summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cont.c2
-rw-r--r--test/ruby/test_yjit.rb121
-rw-r--r--yjit.c23
-rw-r--r--yjit.rb6
-rw-r--r--yjit/bindgen/src/main.rs4
-rw-r--r--yjit/src/asm/mod.rs179
-rw-r--r--yjit/src/codegen.rs29
-rw-r--r--yjit/src/core.rs56
-rw-r--r--yjit/src/cruby_bindings.inc.rs6
-rw-r--r--yjit/src/options.rs2
-rw-r--r--yjit/src/stats.rs29
-rw-r--r--yjit/src/virtualmem.rs29
12 files changed, 454 insertions, 32 deletions
diff --git a/cont.c b/cont.c
index b3c84d82ac..577a30a57a 100644
--- a/cont.c
+++ b/cont.c
@@ -69,7 +69,7 @@ static VALUE rb_cFiberPool;
#define FIBER_POOL_ALLOCATION_FREE
#endif
-#define jit_cont_enabled mjit_enabled // To be used by YJIT later
+#define jit_cont_enabled (mjit_enabled || rb_yjit_enabled_p())
enum context_type {
CONTINUATION_CONTEXT = 0,
diff --git a/test/ruby/test_yjit.rb b/test/ruby/test_yjit.rb
index 6cafb21698..09b5989a06 100644
--- a/test/ruby/test_yjit.rb
+++ b/test/ruby/test_yjit.rb
@@ -825,12 +825,126 @@ class TestYJIT < Test::Unit::TestCase
RUBY
end
+ def test_code_gc
+ assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok)
+ return :not_paged unless add_pages(100) # prepare freeable pages
+ code_gc # first code GC
+ return :not_compiled1 unless compiles { nil } # should be JITable again
+
+ code_gc # second code GC
+ return :not_compiled2 unless compiles { nil } # should be JITable again
+
+ code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
+ return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count != 2
+
+ :ok
+ RUBY
+ end
+
+ def test_on_stack_code_gc_call
+ assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok)
+ fiber = Fiber.new {
+ # Loop to call the same basic block again after Fiber.yield
+ while true
+ Fiber.yield(nil.to_i)
+ end
+ }
+
+ return :not_paged1 unless add_pages(400) # go to a page without initial ocb code
+ return :broken_resume1 if fiber.resume != 0 # JIT the fiber
+ code_gc # first code GC, which should not free the fiber page
+ return :broken_resume2 if fiber.resume != 0 # The code should be still callable
+
+ code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
+ return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count != 1
+
+ :ok
+ RUBY
+ end
+
+ def test_on_stack_code_gc_twice
+ assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok)
+ fiber = Fiber.new {
+ # Loop to call the same basic block again after Fiber.yield
+ while Fiber.yield(nil.to_i); end
+ }
+
+ return :not_paged1 unless add_pages(400) # go to a page without initial ocb code
+ return :broken_resume1 if fiber.resume(true) != 0 # JIT the fiber
+ code_gc # first code GC, which should not free the fiber page
+
+ return :not_paged2 unless add_pages(300) # add some stuff to be freed
+ # Not calling fiber.resume here to test the case that the YJIT payload loses some
+ # information at the previous code GC. The payload should still be there, and
+ # thus we could know the fiber ISEQ is still on stack on this second code GC.
+ code_gc # second code GC, which should still not free the fiber page
+
+ return :not_paged3 unless add_pages(200) # attempt to overwrite the fiber page (it shouldn't)
+ return :broken_resume2 if fiber.resume(true) != 0 # The fiber code should be still fine
+
+ return :broken_resume3 if fiber.resume(false) != nil # terminate the fiber
+ code_gc # third code GC, freeing a page that used to be on stack
+
+ return :not_paged4 unless add_pages(100) # check everything still works
+
+ code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
+ return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count != 3
+
+ :ok
+ RUBY
+ end
+
+ def test_code_gc_with_many_iseqs
+ assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok, mem_size: 1)
+ fiber = Fiber.new {
+ # Loop to call the same basic block again after Fiber.yield
+ while true
+ Fiber.yield(nil.to_i)
+ end
+ }
+
+ return :not_paged1 unless add_pages(500) # use some pages
+ return :broken_resume1 if fiber.resume != 0 # leave an on-stack code as well
+
+ add_pages(2000) # use a whole lot of pages to run out of 1MiB
+ return :broken_resume2 if fiber.resume != 0 # on-stack code should be callable
+
+ code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
+ return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count == 0
+
+ :ok
+ RUBY
+ end
+
+ private
+
+ def code_gc_helpers
+ <<~'RUBY'
+ def compiles(&block)
+ failures = RubyVM::YJIT.runtime_stats[:compilation_failure]
+ block.call
+ failures == RubyVM::YJIT.runtime_stats[:compilation_failure]
+ end
+
+ def add_pages(num_jits)
+ pages = RubyVM::YJIT.runtime_stats[:compiled_page_count]
+ num_jits.times { return false unless eval('compiles { nil.to_i }') }
+ pages.nil? || pages < RubyVM::YJIT.runtime_stats[:compiled_page_count]
+ end
+
+ def code_gc
+ RubyVM::YJIT.simulate_oom! # bump write_pos
+ eval('proc { nil }.call') # trigger code GC
+ end
+ RUBY
+ end
+
def assert_no_exits(script)
assert_compiles(script)
end
ANY = Object.new
- def assert_compiles(test_script, insns: [], call_threshold: 1, stdout: nil, exits: {}, result: ANY, frozen_string_literal: nil)
+ def assert_compiles(test_script, insns: [], call_threshold: 1, stdout: nil, exits: {}, result: ANY, frozen_string_literal: nil, mem_size: nil)
reset_stats = <<~RUBY
RubyVM::YJIT.runtime_stats
RubyVM::YJIT.reset_stats!
@@ -864,7 +978,7 @@ class TestYJIT < Test::Unit::TestCase
#{write_results}
RUBY
- status, out, err, stats = eval_with_jit(script, call_threshold: call_threshold)
+ status, out, err, stats = eval_with_jit(script, call_threshold:, mem_size:)
assert status.success?, "exited with status #{status.to_i}, stderr:\n#{err}"
@@ -918,12 +1032,13 @@ class TestYJIT < Test::Unit::TestCase
s.chars.map { |c| c.ascii_only? ? c : "\\u%x" % c.codepoints[0] }.join
end
- def eval_with_jit(script, call_threshold: 1, timeout: 1000)
+ def eval_with_jit(script, call_threshold: 1, timeout: 1000, mem_size: nil)
args = [
"--disable-gems",
"--yjit-call-threshold=#{call_threshold}",
"--yjit-stats"
]
+ args << "--yjit-exec-mem-size=#{mem_size}" if mem_size
args << "-e" << script_shell_encode(script)
stats_r, stats_w = IO.pipe
out, err, status = EnvUtil.invoke_ruby(args,
diff --git a/yjit.c b/yjit.c
index f6e64aad65..7e6fc9e3fb 100644
--- a/yjit.c
+++ b/yjit.c
@@ -27,6 +27,7 @@
#include "probes_helper.h"
#include "iseq.h"
#include "ruby/debug.h"
+#include "internal/cont.h"
// For mmapp(), sysconf()
#ifndef _WIN32
@@ -65,10 +66,7 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM);
bool
rb_yjit_mark_writable(void *mem_block, uint32_t mem_size)
{
- if (mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE)) {
- return false;
- }
- return true;
+ return mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE) == 0;
}
void
@@ -85,6 +83,20 @@ rb_yjit_mark_executable(void *mem_block, uint32_t mem_size)
}
}
+// Free the specified memory block.
+bool
+rb_yjit_mark_unused(void *mem_block, uint32_t mem_size)
+{
+ // On Linux, you need to use madvise MADV_DONTNEED to free memory.
+ // We might not need to call this on macOS, but it's not really documented.
+ // We generally prefer to do the same thing on both to ease testing too.
+ madvise(mem_block, mem_size, MADV_DONTNEED);
+
+ // On macOS, mprotect PROT_NONE seems to reduce RSS.
+ // We also call this on Linux to avoid executing unused pages.
+ return mprotect(mem_block, mem_size, PROT_NONE) == 0;
+}
+
// `start` is inclusive and `end` is exclusive.
void
rb_yjit_icache_invalidate(void *start, void *end)
@@ -387,6 +399,9 @@ rb_iseq_reset_jit_func(const rb_iseq_t *iseq)
{
RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
iseq->body->jit_func = NULL;
+ // Enable re-compiling this ISEQ. Event when it's invalidated for TracePoint,
+ // we'd like to re-compile ISEQs that haven't been converted to trace_* insns.
+ iseq->body->total_calls = 0;
}
// Get the PC for a given index in an iseq
diff --git a/yjit.rb b/yjit.rb
index b80861dbfb..2a0b3dc6c6 100644
--- a/yjit.rb
+++ b/yjit.rb
@@ -212,13 +212,17 @@ module RubyVM::YJIT
$stderr.puts "bindings_allocations: " + ("%10d" % stats[:binding_allocations])
$stderr.puts "bindings_set: " + ("%10d" % stats[:binding_set])
$stderr.puts "compilation_failure: " + ("%10d" % compilation_failure) if compilation_failure != 0
- $stderr.puts "compiled_iseq_count: " + ("%10d" % stats[:compiled_iseq_count])
$stderr.puts "compiled_block_count: " + ("%10d" % stats[:compiled_block_count])
+ $stderr.puts "compiled_iseq_count: " + ("%10d" % stats[:compiled_iseq_count])
+ $stderr.puts "compiled_page_count: " + ("%10d" % stats[:compiled_page_count])
$stderr.puts "freed_iseq_count: " + ("%10d" % stats[:freed_iseq_count])
+ $stderr.puts "freed_page_count: " + ("%10d" % stats[:freed_page_count])
$stderr.puts "invalidation_count: " + ("%10d" % stats[:invalidation_count])
$stderr.puts "constant_state_bumps: " + ("%10d" % stats[:constant_state_bumps])
$stderr.puts "inline_code_size: " + ("%10d" % stats[:inline_code_size])
$stderr.puts "outlined_code_size: " + ("%10d" % stats[:outlined_code_size])
+ $stderr.puts "freed_code_size: " + ("%10d" % stats[:freed_code_size])
+ $stderr.puts "code_gc_count: " + ("%10d" % stats[:code_gc_count])
$stderr.puts "num_gc_obj_refs: " + ("%10d" % stats[:num_gc_obj_refs])
$stderr.puts "total_exit_count: " + ("%10d" % total_exits)
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index f7ebb88577..60a9d1b87d 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -263,6 +263,7 @@ fn main() {
.allowlist_function("rb_yjit_reserve_addr_space")
.allowlist_function("rb_yjit_mark_writable")
.allowlist_function("rb_yjit_mark_executable")
+ .allowlist_function("rb_yjit_mark_unused")
.allowlist_function("rb_yjit_get_page_size")
.allowlist_function("rb_leaf_invokebuiltin_iseq_p")
.allowlist_function("rb_leaf_builtin_function")
@@ -297,6 +298,9 @@ fn main() {
// From internal/compile.h
.allowlist_function("rb_vm_insn_decode")
+ // from internal/cont.h
+ .allowlist_function("rb_jit_cont_each_iseq")
+
// From iseq.h
.allowlist_function("rb_vm_insn_addr2opcode")
.allowlist_function("rb_iseqw_to_iseq")
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index ab7c4d6aba..b68520a767 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -6,6 +6,9 @@ use std::rc::Rc;
use crate::backend::x86_64::JMP_PTR_BYTES;
#[cfg(target_arch = "aarch64")]
use crate::backend::arm64::JMP_PTR_BYTES;
+use crate::core::for_each_on_stack_iseq_payload;
+use crate::invariants::rb_yjit_tracing_invalidate_all;
+use crate::stats::incr_counter;
use crate::virtualmem::WriteError;
#[cfg(feature = "disasm")]
@@ -115,17 +118,23 @@ impl CodeBlock {
pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool {
let old_write_ptr = self.get_write_ptr();
self.set_write_ptr(base_ptr);
- self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
+
+ // Use the freed_pages list if code GC has been used. Otherwise use the next page.
+ let next_page_idx = if let Some(freed_pages) = CodegenGlobals::get_freed_pages() {
+ let current_page = self.write_pos / self.page_size;
+ freed_pages.iter().find(|&&page| current_page < page).map(|&page| page)
+ } else {
+ Some(self.write_pos / self.page_size + 1)
+ };
// Move self to the next page
- let next_page_idx = self.write_pos / self.page_size + 1;
- if !self.set_page(next_page_idx, &jmp_ptr) {
+ if next_page_idx.is_none() || !self.set_page(next_page_idx.unwrap(), &jmp_ptr) {
self.set_write_ptr(old_write_ptr); // rollback if there are no more pages
return false;
}
// Move the other CodeBlock to the same page if it'S on the furthest page
- self.other_cb().unwrap().set_page(next_page_idx, &jmp_ptr);
+ self.other_cb().unwrap().set_page(next_page_idx.unwrap(), &jmp_ptr);
return !self.dropped_bytes;
}
@@ -151,7 +160,7 @@ impl CodeBlock {
// We could remember the last write_pos in page2 and let set_page use that position,
// but you need to waste some space for keeping write_pos for every single page.
// It doesn't seem necessary for performance either. So we're currently not doing it.
- let dst_pos = self.page_size * page_idx + self.page_start();
+ let dst_pos = self.get_page_pos(page_idx);
if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos {
// Reset dropped_bytes
self.dropped_bytes = false;
@@ -161,6 +170,7 @@ impl CodeBlock {
self.write_pos = dst_pos;
let dst_ptr = self.get_write_ptr();
self.write_pos = src_pos;
+ self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
// Generate jmp_ptr from src_pos to dst_pos
self.without_page_end_reserve(|cb| {
@@ -175,6 +185,53 @@ impl CodeBlock {
!self.dropped_bytes
}
+ /// Free the memory pages of given code page indexes
+ fn free_pages(&mut self, page_idxs: &Vec<usize>) {
+ let mut page_idxs = page_idxs.clone();
+ page_idxs.reverse(); // to loop with pop()
+
+ // Group adjacent page indexes and free them in batches to reduce the # of syscalls.
+ while let Some(page_idx) = page_idxs.pop() {
+ // Group first adjacent page indexes
+ let mut batch_idxs = vec![page_idx];
+ while page_idxs.last() == Some(&(batch_idxs.last().unwrap() + 1)) {
+ batch_idxs.push(page_idxs.pop().unwrap());
+ }
+
+ // Free the grouped pages at once
+ let start_ptr = self.mem_block.borrow().start_ptr().add_bytes(page_idx * self.page_size);
+ let batch_size = self.page_size * batch_idxs.len();
+ self.mem_block.borrow_mut().free_bytes(start_ptr, batch_size as u32);
+ }
+ }
+
+ pub fn page_size(&self) -> usize {
+ self.page_size
+ }
+
+ /// Return the number of code pages that have been allocated by the VirtualMemory.
+ pub fn num_pages(&self) -> usize {
+ let mapped_region_size = self.mem_block.borrow().mapped_region_size();
+ // CodeBlock's page size != VirtualMem's page size on Linux,
+ // so mapped_region_size % self.page_size may not be 0
+ ((mapped_region_size - 1) / self.page_size) + 1
+ }
+
+ /// Return the number of code pages that have been freed and not used yet.
+ pub fn num_freed_pages(&self) -> usize {
+ (0..self.num_pages()).filter(|&page_idx| self.has_freed_page(page_idx)).count()
+ }
+
+ pub fn has_freed_page(&self, page_idx: usize) -> bool {
+ CodegenGlobals::get_freed_pages().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed
+ self.write_pos < page_idx * self.page_size // and not written yet
+ }
+
+ /// Convert a page index to the write_pos for the page start.
+ fn get_page_pos(&self, page_idx: usize) -> usize {
+ self.page_size * page_idx + self.page_start()
+ }
+
/// write_pos of the current page start
pub fn page_start_pos(&self) -> usize {
self.get_write_pos() / self.page_size * self.page_size + self.page_start()
@@ -216,21 +273,48 @@ impl CodeBlock {
/// Return the address ranges of a given address range that this CodeBlock can write.
#[cfg(any(feature = "disasm", target_arch = "aarch64"))]
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
- let mut addrs = vec![];
- let mut start = start_ptr.into_usize();
+ // CodegenGlobals is not initialized when we write initial ocb code
+ let freed_pages = if CodegenGlobals::has_instance() {
+ CodegenGlobals::get_freed_pages().as_ref()
+ } else {
+ None
+ };
+
let region_start = self.get_ptr(0).into_usize();
let region_end = self.get_ptr(self.get_mem_size()).into_usize();
+ let mut start = start_ptr.into_usize();
let end = std::cmp::min(end_ptr.into_usize(), region_end);
+
+ let mut addrs = vec![];
while start < end {
- let current_page = region_start +
- (start.saturating_sub(region_start) / self.page_size * self.page_size);
+ let page_idx = start.saturating_sub(region_start) / self.page_size;
+ let current_page = region_start + (page_idx * self.page_size);
let page_end = std::cmp::min(end, current_page + self.page_end());
- addrs.push((start, page_end));
+ // If code GC has been used, skip pages that are used by past on-stack code
+ if freed_pages.map_or(true, |pages| pages.contains(&page_idx)) {
+ addrs.push((start, page_end));
+ }
start = current_page + self.page_size + self.page_start();
}
addrs
}
+ /// Return the code size that has been used by this CodeBlock.
+ pub fn code_size(&self) -> usize {
+ let mut size = 0;
+ let current_page_idx = self.write_pos / self.page_size;
+ for page_idx in 0..self.num_pages() {
+ if page_idx == current_page_idx {
+ // Count only actually used bytes for the current page.
+ size += (self.write_pos % self.page_size).saturating_sub(self.page_start());
+ } else if !self.has_freed_page(page_idx) {
+ // Count an entire range for any non-freed pages that have been used.
+ size += self.page_end() - self.page_start() + self.page_end_reserve;
+ }
+ }
+ size
+ }
+
/// Check if this code block has sufficient remaining capacity
pub fn has_capacity(&self, num_bytes: usize) -> bool {
let page_offset = self.write_pos % self.page_size;
@@ -261,6 +345,11 @@ impl CodeBlock {
self.asm_comments.get(&pos)
}
+ pub fn clear_comments(&mut self) {
+ #[cfg(feature = "disasm")]
+ self.asm_comments.clear();
+ }
+
pub fn get_mem_size(&self) -> usize {
self.mem_size
}
@@ -293,6 +382,24 @@ impl CodeBlock {
self.mem_block.borrow().start_ptr().add_bytes(offset)
}
+ /// Convert an address range to memory page indexes against a num_pages()-sized array.
+ pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> Vec<usize> {
+ let mem_start = self.mem_block.borrow().start_ptr().into_usize();
+ let mem_end = self.mem_block.borrow().end_ptr().into_usize();
+ assert!(mem_start <= start_addr.into_usize());
+ assert!(start_addr.into_usize() <= end_addr.into_usize());
+ assert!(end_addr.into_usize() <= mem_end);
+
+ // Ignore empty code ranges
+ if start_addr == end_addr {
+ return vec![];
+ }
+
+ let start_page = (start_addr.into_usize() - mem_start) / self.page_size;
+ let end_page = (end_addr.into_usize() - mem_start - 1) / self.page_size;
+ (start_page..=end_page).collect() // TODO: consider returning an iterator
+ }
+
/// Get a (possibly dangling) direct pointer to the current write position
pub fn get_write_ptr(&self) -> CodePtr {
self.get_ptr(self.write_pos)
@@ -431,6 +538,58 @@ impl CodeBlock {
self.mem_block.borrow_mut().mark_all_executable();
}
+ /// Code GC. Free code pages that are not on stack and reuse them.
+ pub fn code_gc(&mut self) {
+ // The previous code GC failed to free any pages. Give up.
+ if CodegenGlobals::get_freed_pages() == &Some(vec![]) {
+ return;
+ }
+
+ // Check which pages are still in use
+ let mut pages_in_use = vec![false; self.num_pages()];
+ // For each ISEQ, we currently assume that only code pages used by inline code
+ // are used by outlined code, so we mark only code pages used by inlined code.
+ for_each_on_stack_iseq_payload(|iseq_payload| {
+ for page in &iseq_payload.pages {
+ pages_in_use[*page] = true;
+ }
+ });
+ // Outlined code generated by CodegenGlobals::init() should also be kept.
+ for page in CodegenGlobals::get_ocb_pages() {
+ pages_in_use[*page] = true;
+ }
+
+ // Let VirtuamMem free the pages
+ let freed_pages: Vec<usize> = pages_in_use.iter().enumerate()
+ .filter(|&(_, &in_use)| !in_use).map(|(page, _)| page).collect();
+ self.free_pages(&freed_pages);
+
+ // Invalidate everything to have more compact code after code GC.
+ // This currently patches every ISEQ, which works, but in the future,
+ // we could limit that to patch only on-stack ISEQs for optimizing code GC.
+ rb_yjit_tracing_invalidate_all();
+ // When code GC runs next time, we could have reused pages in between
+ // invalidated pages. To invalidate them, we skip freezing them here.
+ // We free or not reuse the bytes frozen by any past invalidation, so this
+ // can be safely reset to pass the frozen bytes check on invalidation.
+ CodegenGlobals::set_inline_frozen_bytes(0);
+
+ if let Some(&first_page) = freed_pages.first() {
+ let mut cb = CodegenGlobals::get_inline_cb();
+ cb.write_pos = cb.get_page_pos(first_page);
+ cb.dropped_bytes = false;
+ cb.clear_comments();
+
+ let mut ocb = CodegenGlobals::get_outlined_cb().unwrap();
+ ocb.write_pos = ocb.get_page_pos(first_page);
+ ocb.dropped_bytes = false;
+ ocb.clear_comments();
+ }
+
+ CodegenGlobals::set_freed_pages(freed_pages);
+ incr_counter!(code_gc_count);
+ }
+
pub fn inline(&self) -> bool {
!self.outlined
}
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index e5e6e4ec84..d6ea8996e1 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -643,6 +643,11 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O
if cb.has_dropped_bytes() {
None
} else {
+ // Mark code pages for code GC
+ let iseq_payload = get_or_create_iseq_payload(iseq);
+ for page in cb.addrs_to_pages(code_ptr, cb.get_write_ptr()) {
+ iseq_payload.pages.insert(page);
+ }
Some(code_ptr)
}
}
@@ -6504,6 +6509,12 @@ pub struct CodegenGlobals {
// Methods for generating code for hardcoded (usually C) methods
method_codegen_table: HashMap<usize, MethodGenFn>,
+
+ /// Page indexes for outlined code that are not associated to any ISEQ.
+ ocb_pages: Vec<usize>,
+
+ /// Freed page indexes. None if code GC has not been used.
+ freed_pages: Option<Vec<usize>>,
}
/// For implementing global code invalidation. A position in the inline
@@ -6570,6 +6581,7 @@ impl CodegenGlobals {
#[cfg(test)]
let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2));
+ let ocb_start_addr = ocb.unwrap().get_write_ptr();
let leave_exit_code = gen_leave_exit(&mut ocb);
let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb);
@@ -6577,6 +6589,9 @@ impl CodegenGlobals {
// Generate full exit code for C func
let cfunc_exit_code = gen_full_cfunc_return(&mut ocb);
+ let ocb_end_addr = ocb.unwrap().get_write_ptr();
+ let ocb_pages = ocb.unwrap().addrs_to_pages(ocb_start_addr, ocb_end_addr);
+
// Mark all code memory as executable
cb.mark_all_executable();
ocb.unwrap().mark_all_executable();
@@ -6590,6 +6605,8 @@ impl CodegenGlobals {
global_inval_patches: Vec::new(),
inline_frozen_bytes: 0,
method_codegen_table: HashMap::new(),
+ ocb_pages,
+ freed_pages: None,
};
// Register the method codegen functions
@@ -6725,6 +6742,18 @@ impl CodegenGlobals {
Some(&mgf) => Some(mgf), // Deref
}
}
+
+ pub fn get_ocb_pages() -> &'static Vec<usize> {
+ &CodegenGlobals::get_instance().ocb_pages
+ }
+
+ pub fn get_freed_pages() -> &'static mut Option<Vec<usize>> {
+ &mut CodegenGlobals::get_instance().freed_pages
+ }
+
+ pub fn set_freed_pages(freed_pages: Vec<usize>) {
+ CodegenGlobals::get_instance().freed_pages = Some(freed_pages)
+ }
}
#[cfg(test)]
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 705a0c46ef..19272350ed 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -11,6 +11,7 @@ use crate::utils::*;
use crate::disasm::*;
use core::ffi::c_void;
use std::cell::*;
+use std::collections::HashSet;
use std::hash::{Hash, Hasher};
use std::mem;
use std::rc::{Rc};
@@ -321,7 +322,7 @@ struct Branch {
// Positions where the generated code starts and ends
start_addr: Option<CodePtr>,
- end_addr: Option<CodePtr>,
+ end_addr: Option<CodePtr>, // exclusive
// Context right after the branch instruction
#[allow(unused)] // set but not read at the moment
@@ -475,7 +476,11 @@ impl Eq for BlockRef {}
/// when calling into YJIT
#[derive(Default)]
pub struct IseqPayload {
+ // Basic block versions
version_map: VersionMap,
+
+ // Indexes of code pages used by this this ISEQ
+ pub pages: HashSet<usize>,
}
impl IseqPayload {
@@ -498,7 +503,7 @@ pub fn get_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> {
}
/// Get the payload object associated with an iseq. Create one if none exists.
-fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
+pub fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
type VoidPtr = *mut c_void;
let payload_non_null = unsafe {
@@ -537,6 +542,21 @@ pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
}
+/// Iterate over all on-stack ISEQ payloads
+#[cfg(not(test))]
+pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) {
+ unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) {
+ let callback: &mut &mut dyn FnMut(&IseqPayload) -> bool = std::mem::transmute(&mut *data);
+ if let Some(iseq_payload) = get_iseq_payload(iseq) {
+ callback(iseq_payload);
+ }
+ }
+ let mut data: &mut dyn FnMut(&IseqPayload) = &mut callback;
+ unsafe { rb_jit_cont_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
+}
+#[cfg(test)]
+pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut _callback: F) {}
+
/// Free the per-iseq payload
#[no_mangle]
pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
@@ -854,6 +874,12 @@ fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) {
}
incr_counter!(compiled_block_count);
+
+ // Mark code pages for code GC
+ let iseq_payload = get_iseq_payload(block.blockid.iseq).unwrap();
+ for page in cb.addrs_to_pages(block.start_addr.unwrap(), block.end_addr.unwrap()) {
+ iseq_payload.pages.insert(page);
+ }
}
/// Remove a block version from the version map of its parent ISEQ
@@ -1526,7 +1552,11 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
match block {
// Compilation failed
- None => return None,
+ None => {
+ // Trigger code GC. This entry point will be recompiled later.
+ cb.code_gc();
+ return None;
+ }
// If the block contains no Ruby instructions
Some(block) => {
@@ -1776,6 +1806,18 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
block_rc.borrow().start_addr.unwrap()
}
None => {
+ // Code GC needs to borrow blocks for invalidation, so their mutable
+ // borrows must be dropped first.
+ drop(block);
+ drop(branch);
+ // Trigger code GC. The whole ISEQ will be recompiled later.
+ // We shouldn't trigger it in the middle of compilation in branch_stub_hit
+ // because incomplete code could be used when cb.dropped_bytes is flipped
+ // by code GC. So this place, after all compilation, is the safest place
+ // to hook code GC on branch_stub_hit.
+ cb.code_gc();
+ branch = branch_rc.borrow_mut();
+
// Failed to service the stub by generating a new block so now we
// need to exit to the interpreter at the stubbed location. We are
// intentionally *not* restoring original_interp_sp. At the time of
@@ -1793,7 +1835,8 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
let new_branch_size = branch.code_size();
assert!(
new_branch_size <= branch_size_on_entry,
- "branch stubs should never enlarge branches"
+ "branch stubs should never enlarge branches: (old_size: {}, new_size: {})",
+ branch_size_on_entry, new_branch_size,
);
// Return a pointer to the compiled block version
@@ -1904,7 +1947,10 @@ pub fn gen_branch(
// Get the branch targets or stubs
let dst_addr0 = get_branch_target(target0, ctx0, &branchref, 0, ocb);
let dst_addr1 = if let Some(ctx) = ctx1 {
- get_branch_target(target1.unwrap(), ctx, &branchref, 1, ocb)
+ match get_branch_target(target1.unwrap(), ctx, &branchref, 1, ocb) {
+ Some(dst_addr) => Some(dst_addr),
+ None => return, // avoid unwrap() in gen_fn()
+ }
} else {
None
};
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index 158bdd80dc..a9242e05ca 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -1279,12 +1279,18 @@ extern "C" {
) -> ::std::os::raw::c_int;
}
extern "C" {
+ pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
+}
+extern "C" {
pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
}
extern "C" {
pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
}
extern "C" {
+ pub fn rb_yjit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
+}
+extern "C" {
pub fn rb_yjit_icache_invalidate(
start: *mut ::std::os::raw::c_void,
end: *mut ::std::os::raw::c_void,
diff --git a/yjit/src/options.rs b/yjit/src/options.rs
index 303ae4980f..a0cdbbc566 100644
--- a/yjit/src/options.rs
+++ b/yjit/src/options.rs
@@ -91,7 +91,7 @@ macro_rules! get_option_ref {
// Unsafe is ok here because options are initialized
// once before any Ruby code executes
($option_name:ident) => {
- unsafe { &(OPTIONS.$option_name) }
+ unsafe { &($crate::options::OPTIONS.$option_name) }
};
}
pub(crate) use get_option_ref;
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index 0ad77fc5df..e851d4e4d1 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -253,6 +253,7 @@ make_counters! {
compiled_block_count,
compilation_failure,
freed_iseq_count,
+ code_gc_count,
exit_from_branch_stub,
@@ -351,23 +352,37 @@ fn rb_yjit_gen_stats_dict() -> VALUE {
return Qnil;
}
+ macro_rules! hash_aset_usize {
+ ($hash:ident, $counter_name:expr, $value:expr) => {
+ let key = rust_str_to_sym($counter_name);
+ let value = VALUE::fixnum_from_usize($value);
+ rb_hash_aset($hash, key, value);
+ }
+ }
+
let hash = unsafe { rb_hash_new() };
- // Inline and outlined code size
+ // CodeBlock stats
unsafe {
// Get the inline and outlined code blocks
let cb = CodegenGlobals::get_inline_cb();
let ocb = CodegenGlobals::get_outlined_cb();
// Inline code size
- let key = rust_str_to_sym("inline_code_size");
- let value = VALUE::fixnum_from_usize(cb.get_write_pos());
- rb_hash_aset(hash, key, value);
+ hash_aset_usize!(hash, "inline_code_size", cb.code_size());
// Outlined code size
- let key = rust_str_to_sym("outlined_code_size");
- let value = VALUE::fixnum_from_usize(ocb.unwrap().get_write_pos());
- rb_hash_aset(hash, key, value);
+ hash_aset_usize!(hash, "outlined_code_size", ocb.unwrap().code_size());
+
+ // GCed pages
+ let freed_page_count = cb.num_freed_pages();
+ hash_aset_usize!(hash, "freed_page_count", freed_page_count);
+
+ // GCed code size
+ hash_aset_usize!(hash, "freed_code_size", freed_page_count * cb.page_size());
+
+ // Compiled pages
+ hash_aset_usize!(hash, "compiled_page_count", cb.num_pages() - freed_page_count);
}
// If we're not generating stats, the hash is done
diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs
index 5234963872..1d80983c9e 100644
--- a/yjit/src/virtualmem.rs
+++ b/yjit/src/virtualmem.rs
@@ -51,6 +51,8 @@ pub trait Allocator {
fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool;
fn mark_executable(&mut self, ptr: *const u8, size: u32);
+
+ fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool;
}
/// Pointer into a [VirtualMemory].
@@ -91,6 +93,15 @@ impl<A: Allocator> VirtualMemory<A> {
CodePtr(self.region_start)
}
+ pub fn end_ptr(&self) -> CodePtr {
+ CodePtr(self.region_start.wrapping_add(self.mapped_region_bytes))
+ }
+
+ /// Size of the region in bytes that we have allocated physical memory for.
+ pub fn mapped_region_size(&self) -> usize {
+ self.mapped_region_bytes
+ }
+
/// Size of the region in bytes where writes could be attempted.
pub fn virtual_region_size(&self) -> usize {
self.region_size_bytes
@@ -177,6 +188,12 @@ impl<A: Allocator> VirtualMemory<A> {
// Make mapped region executable
self.allocator.mark_executable(region_start, mapped_region_bytes);
}
+
+ /// Free a range of bytes. start_ptr must be memory page-aligned.
+ pub fn free_bytes(&mut self, start_ptr: CodePtr, size: u32) {
+ assert_eq!(start_ptr.into_usize() % self.page_size_bytes, 0);
+ self.allocator.mark_unused(start_ptr.0, size);
+ }
}
impl CodePtr {
@@ -235,6 +252,10 @@ mod sys {
fn mark_executable(&mut self, ptr: *const u8, size: u32) {
unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) }
}
+
+ fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool {
+ unsafe { rb_yjit_mark_unused(ptr as VoidPtr, size) }
+ }
}
}
@@ -258,6 +279,7 @@ pub mod tests {
enum AllocRequest {
MarkWritable{ start_idx: usize, length: usize },
MarkExecutable{ start_idx: usize, length: usize },
+ MarkUnused{ start_idx: usize, length: usize },
}
use AllocRequest::*;
@@ -298,6 +320,13 @@ pub mod tests {
// We don't try to execute generated code in cfg(test)
// so no need to actually request executable memory.
}
+
+ fn mark_unused(&mut self, ptr: *const u8, length: u32) -> bool {
+ let index = self.bounds_check_request(ptr, length);
+ self.requests.push(MarkUnused { start_idx: index, length: length.as_usize() });
+
+ true
+ }
}
// Fictional architecture where each page is 4 bytes long