summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--p38
-rw-r--r--version.h2
-rw-r--r--yjit/src/asm/mod.rs49
-rw-r--r--yjit/src/backend/arm64/mod.rs245
-rw-r--r--yjit/src/codegen.rs16
5 files changed, 229 insertions, 121 deletions
diff --git a/p b/p
new file mode 100644
index 0000000000..92b251b720
--- /dev/null
+++ b/p
@@ -0,0 +1,38 @@
+[PATCH 1/4] YJIT: Move CodegenGlobals::freed_pages into an Rc
+
+This allows for supplying a freed_pages vec in Rust tests. We need it so we
+can test scenarios that occur after code GC.
+---
+ yjit/src/asm/mod.rs | 48 +++++++++++++++++++++++++++++++++------------
+ yjit/src/codegen.rs | 16 ++++-----------
+ 2 files changed, 39 insertions(+), 25 deletions(-)
+
+Subject: [PATCH 2/4] YJIT: other_cb is None in tests
+
+Since the other cb is in CodegenGlobals, and we want Rust tests to be
+self-contained.
+---
+ yjit/src/asm/mod.rs | 1 +
+ 1 file changed, 1 insertion(+)
+
+Subject: [PATCH 3/4] YJIT: ARM64: Move functions out of arm64_emit()
+
+---
+ yjit/src/backend/arm64/mod.rs | 180 +++++++++++++++++-----------------
+ 1 file changed, 90 insertions(+), 90 deletions(-)
+
+Subject: [PATCH 4/4] YJIT: ARM64: Fix long jumps to labels
+
+Previously, with Code GC, YJIT panicked while trying to emit a B.cond
+instruction with an offset that is not encodable in 19 bits. This only
+happens when the code in an assembler instance straddles two pages.
+
+To fix this, when we detect that a jump to a label can land on a
+different page, we switch to a fresh new page and regenerate all the
+code in the assembler there. We still assume that no one assembler has
+so much code that it wouldn't fit inside a fresh new page.
+
+[Bug #19385]
+---
+ yjit/src/backend/arm64/mod.rs | 65 ++++++++++++++++++++++++++++++++---
+ 1 file changed, 60 insertions(+), 5 deletions(-)
diff --git a/version.h b/version.h
index 00147ecc6f..a1a86affca 100644
--- a/version.h
+++ b/version.h
@@ -11,7 +11,7 @@
# define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
#define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
-#define RUBY_PATCHLEVEL 25
+#define RUBY_PATCHLEVEL 26
#include "ruby/version.h"
#include "ruby/internal/abi.h"
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index 44ae74d470..c814ed8520 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -83,6 +83,10 @@ pub struct CodeBlock {
// for example, when there is not enough space or when a jump
// target is too far away.
dropped_bytes: bool,
+
+ // Keeps track of what pages we can write to after code gc.
+ // `None` means all pages are free.
+ freed_pages: Rc<Option<Vec<usize>>>,
}
/// Set of CodeBlock label states. Used for recovering the previous state.
@@ -94,7 +98,7 @@ pub struct LabelState {
impl CodeBlock {
/// Make a new CodeBlock
- pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool) -> Self {
+ pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>) -> Self {
let mem_size = mem_block.borrow().virtual_region_size();
let mut cb = Self {
mem_block,
@@ -108,6 +112,7 @@ impl CodeBlock {
asm_comments: BTreeMap::new(),
outlined,
dropped_bytes: false,
+ freed_pages,
};
cb.write_pos = cb.page_start();
cb
@@ -120,7 +125,7 @@ impl CodeBlock {
self.set_write_ptr(base_ptr);
// Use the freed_pages list if code GC has been used. Otherwise use the next page.
- let next_page_idx = if let Some(freed_pages) = CodegenGlobals::get_freed_pages() {
+ let next_page_idx = if let Some(freed_pages) = self.freed_pages.as_ref() {
let current_page = self.write_pos / CODE_PAGE_SIZE;
freed_pages.iter().find(|&&page| current_page < page).map(|&page| page)
} else {
@@ -134,6 +139,7 @@ impl CodeBlock {
}
// Move the other CodeBlock to the same page if it'S on the furthest page
+ #[cfg(not(test))]
self.other_cb().unwrap().set_page(next_page_idx.unwrap(), &jmp_ptr);
return !self.dropped_bytes;
@@ -234,7 +240,7 @@ impl CodeBlock {
}
pub fn has_freed_page(&self, page_idx: usize) -> bool {
- CodegenGlobals::get_freed_pages().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed
+ self.freed_pages.as_ref().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed
self.write_pos < page_idx * CODE_PAGE_SIZE // and not written yet
}
@@ -284,18 +290,12 @@ impl CodeBlock {
/// Return the address ranges of a given address range that this CodeBlock can write.
#[cfg(any(feature = "disasm", target_arch = "aarch64"))]
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
- // CodegenGlobals is not initialized when we write initial ocb code
- let freed_pages = if CodegenGlobals::has_instance() {
- CodegenGlobals::get_freed_pages().as_ref()
- } else {
- None
- };
-
let region_start = self.get_ptr(0).into_usize();
let region_end = self.get_ptr(self.get_mem_size()).into_usize();
let mut start = start_ptr.into_usize();
let end = std::cmp::min(end_ptr.into_usize(), region_end);
+ let freed_pages = self.freed_pages.as_ref().as_ref();
let mut addrs = vec![];
while start < end {
let page_idx = start.saturating_sub(region_start) / CODE_PAGE_SIZE;
@@ -558,7 +558,7 @@ impl CodeBlock {
/// Code GC. Free code pages that are not on stack and reuse them.
pub fn code_gc(&mut self) {
// The previous code GC failed to free any pages. Give up.
- if CodegenGlobals::get_freed_pages() == &Some(vec![]) {
+ if self.freed_pages.as_ref() == &Some(vec![]) {
return;
}
@@ -613,7 +613,13 @@ impl CodeBlock {
ocb.clear_comments();
}
- CodegenGlobals::set_freed_pages(freed_pages);
+ // Track which pages are free.
+ let new_freed_pages = Rc::new(Some(freed_pages));
+ let old_freed_pages = mem::replace(&mut self.freed_pages, Rc::clone(&new_freed_pages));
+ self.other_cb().unwrap().freed_pages = new_freed_pages;
+ assert_eq!(1, Rc::strong_count(&old_freed_pages)); // will deallocate
+
+ CodegenGlobals::incr_code_gc_count();
}
pub fn inline(&self) -> bool {
@@ -643,7 +649,24 @@ impl CodeBlock {
let mem_start: *const u8 = alloc.mem_start();
let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size);
- Self::new(Rc::new(RefCell::new(virt_mem)), false)
+ Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(None))
+ }
+
+ /// Stubbed CodeBlock for testing conditions that can arise due to code GC. Can't execute generated code.
+ pub fn new_dummy_with_freed_pages(mut freed_pages: Vec<usize>) -> Self {
+ use std::ptr::NonNull;
+ use crate::virtualmem::*;
+ use crate::virtualmem::tests::TestingAllocator;
+
+ freed_pages.sort_unstable();
+ let mem_size = CODE_PAGE_SIZE *
+ (1 + freed_pages.last().expect("freed_pages vec should not be empty"));
+
+ let alloc = TestingAllocator::new(mem_size);
+ let mem_start: *const u8 = alloc.mem_start();
+ let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size);
+
+ Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(Some(freed_pages)))
}
}
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index eb096ce677..55460eda9e 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -69,6 +69,96 @@ impl From<&Opnd> for A64Opnd {
}
}
+/// Call emit_jmp_ptr and immediately invalidate the written range.
+/// This is needed when next_page also moves other_cb that is not invalidated
+/// by compile_with_regs. Doing it here allows you to avoid invalidating a lot
+/// more than necessary when other_cb jumps from a position early in the page.
+/// This invalidates a small range of cb twice, but we accept the small cost.
+fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) {
+ #[cfg(not(test))]
+ let start = cb.get_write_ptr();
+ emit_jmp_ptr(cb, dst_ptr, true);
+ #[cfg(not(test))]
+ {
+ let end = cb.get_write_ptr();
+ use crate::cruby::rb_yjit_icache_invalidate;
+ unsafe { rb_yjit_icache_invalidate(start.raw_ptr() as _, end.raw_ptr() as _) };
+ }
+}
+
+fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
+ let src_addr = cb.get_write_ptr().into_i64();
+ let dst_addr = dst_ptr.into_i64();
+
+ // If the offset is short enough, then we'll use the
+ // branch instruction. Otherwise, we'll move the
+ // destination into a register and use the branch
+ // register instruction.
+ let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
+ b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
+ 1
+ } else {
+ let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
+ br(cb, Assembler::SCRATCH0);
+ num_insns + 1
+ };
+
+ if padding {
+ // Make sure it's always a consistent number of
+ // instructions in case it gets patched and has to
+ // use the other branch.
+ for _ in num_insns..(JMP_PTR_BYTES / 4) {
+ nop(cb);
+ }
+ }
+}
+
+/// Emit the required instructions to load the given value into the
+/// given register. Our goal here is to use as few instructions as
+/// possible to get this value into the register.
+fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
+ let mut current = value;
+
+ if current <= 0xffff {
+ // If the value fits into a single movz
+ // instruction, then we'll use that.
+ movz(cb, rd, A64Opnd::new_uimm(current), 0);
+ return 1;
+ } else if BitmaskImmediate::try_from(current).is_ok() {
+ // Otherwise, if the immediate can be encoded
+ // with the special bitmask immediate encoding,
+ // we'll use that.
+ mov(cb, rd, A64Opnd::new_uimm(current));
+ return 1;
+ } else {
+ // Finally we'll fall back to encoding the value
+ // using movz for the first 16 bits and movk for
+ // each subsequent set of 16 bits as long we
+ // they are necessary.
+ movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0);
+ let mut num_insns = 1;
+
+ // (We're sure this is necessary since we
+ // checked if it only fit into movz above).
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16);
+ num_insns += 1;
+
+ if current > 0xffff {
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32);
+ num_insns += 1;
+ }
+
+ if current > 0xffff {
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48);
+ num_insns += 1;
+ }
+ return num_insns;
+ }
+}
+
impl Assembler
{
// A special scratch register for intermediate processing.
@@ -551,9 +641,8 @@ impl Assembler
}
/// Emit platform-specific machine code
- /// Returns a list of GC offsets
- pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32>
- {
+ /// Returns a list of GC offsets. Can return failure to signal caller to retry.
+ fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Result<Vec<u32>, ()> {
/// Determine how many instructions it will take to represent moving
/// this value into a register. Note that the return value of this
/// function must correspond to how many instructions are used to
@@ -574,52 +663,6 @@ impl Assembler
}
}
- /// Emit the required instructions to load the given value into the
- /// given register. Our goal here is to use as few instructions as
- /// possible to get this value into the register.
- fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
- let mut current = value;
-
- if current <= 0xffff {
- // If the value fits into a single movz
- // instruction, then we'll use that.
- movz(cb, rd, A64Opnd::new_uimm(current), 0);
- return 1;
- } else if BitmaskImmediate::try_from(current).is_ok() {
- // Otherwise, if the immediate can be encoded
- // with the special bitmask immediate encoding,
- // we'll use that.
- mov(cb, rd, A64Opnd::new_uimm(current));
- return 1;
- } else {
- // Finally we'll fall back to encoding the value
- // using movz for the first 16 bits and movk for
- // each subsequent set of 16 bits as long we
- // they are necessary.
- movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0);
- let mut num_insns = 1;
-
- // (We're sure this is necessary since we
- // checked if it only fit into movz above).
- current >>= 16;
- movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16);
- num_insns += 1;
-
- if current > 0xffff {
- current >>= 16;
- movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32);
- num_insns += 1;
- }
-
- if current > 0xffff {
- current >>= 16;
- movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48);
- num_insns += 1;
- }
- return num_insns;
- }
- }
-
/// Emit a conditional jump instruction to a specific target. This is
/// called when lowering any of the conditional jump instructions.
fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) {
@@ -691,50 +734,6 @@ impl Assembler
ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP));
}
- fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
- let src_addr = cb.get_write_ptr().into_i64();
- let dst_addr = dst_ptr.into_i64();
-
- // If the offset is short enough, then we'll use the
- // branch instruction. Otherwise, we'll move the
- // destination into a register and use the branch
- // register instruction.
- let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
- b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
- 1
- } else {
- let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
- br(cb, Assembler::SCRATCH0);
- num_insns + 1
- };
-
- if padding {
- // Make sure it's always a consistent number of
- // instructions in case it gets patched and has to
- // use the other branch.
- for _ in num_insns..(JMP_PTR_BYTES / 4) {
- nop(cb);
- }
- }
- }
-
- /// Call emit_jmp_ptr and immediately invalidate the written range.
- /// This is needed when next_page also moves other_cb that is not invalidated
- /// by compile_with_regs. Doing it here allows you to avoid invalidating a lot
- /// more than necessary when other_cb jumps from a position early in the page.
- /// This invalidates a small range of cb twice, but we accept the small cost.
- fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) {
- #[cfg(not(test))]
- let start = cb.get_write_ptr();
- emit_jmp_ptr(cb, dst_ptr, true);
- #[cfg(not(test))]
- {
- let end = cb.get_write_ptr();
- use crate::cruby::rb_yjit_icache_invalidate;
- unsafe { rb_yjit_icache_invalidate(start.raw_ptr() as _, end.raw_ptr() as _) };
- }
- }
-
// dbg!(&self.insns);
// List of GC offsets
@@ -1055,13 +1054,19 @@ impl Assembler
if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr_with_invalidation) {
// Reset cb states before retrying the current Insn
cb.set_label_state(old_label_state);
+
+ // We don't want label references to cross page boundaries. Signal caller for
+ // retry.
+ if !self.label_names.is_empty() {
+ return Err(());
+ }
} else {
insn_idx += 1;
gc_offsets.append(&mut insn_gc_offsets);
}
}
- gc_offsets
+ Ok(gc_offsets)
}
/// Optimize and compile the stored instructions
@@ -1076,7 +1081,16 @@ impl Assembler
}
let start_ptr = cb.get_write_ptr();
- let gc_offsets = asm.arm64_emit(cb);
+ let starting_label_state = cb.get_label_state();
+ let gc_offsets = asm.arm64_emit(cb)
+ .unwrap_or_else(|_err| {
+ // we want to lower jumps to labels to b.cond instructions, which have a 1 MiB
+ // range limit. We can easily exceed the limit in case the jump straddles two pages.
+ // In this case, we retry with a fresh page.
+ cb.set_label_state(starting_label_state);
+ cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation);
+ asm.arm64_emit(cb).expect("should not fail when writing to a fresh code page")
+ });
if cb.has_dropped_bytes() {
cb.clear_labels();
@@ -1401,6 +1415,47 @@ mod tests {
}
#[test]
+ fn test_bcond_straddling_code_pages() {
+ const LANDING_PAGE: usize = 65;
+ let mut asm = Assembler::new();
+ let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]);
+
+ // Skip to near the end of the page. Room for two instructions.
+ cb.set_pos(cb.page_start_pos() + cb.page_end() - 8);
+
+ let end = asm.new_label("end");
+ // Start with a conditional jump...
+ asm.jz(end);
+
+ // A few instructions, enough to cause a page switch.
+ let sum = asm.add(399.into(), 111.into());
+ let xorred = asm.xor(sum, 859.into());
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), xorred);
+ asm.store(Opnd::mem(64, Opnd::Reg(X0_REG), 0), xorred);
+
+ // The branch target. It should be in the landing page.
+ asm.write_label(end);
+ asm.cret(xorred);
+
+ // [Bug #19355]
+ // This used to panic with "The offset must be 19 bits or less."
+ // due to attempting to lower the `asm.jz` above to a `b.e` with an offset that's > 1 MiB.
+ let starting_pos = cb.get_write_pos();
+ asm.compile_with_num_regs(&mut cb, 2);
+ let gap = cb.get_write_pos() - starting_pos;
+ assert!(gap > 0b1111111111111111111);
+
+ let instruction_at_starting_pos: [u8; 4] = unsafe {
+ std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(), 4)
+ }.try_into().unwrap();
+ assert_eq!(
+ 0b000101 << 26_u32,
+ u32::from_le_bytes(instruction_at_starting_pos) & (0b111111 << 26_u32),
+ "starting instruction should be an unconditional branch to the new page (B)"
+ );
+ }
+
+ #[test]
fn test_emit_xor() {
let (mut asm, mut cb) = setup_asm();
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index bf68576d7c..22f8da7667 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -7176,9 +7176,6 @@ pub struct CodegenGlobals {
/// Page indexes for outlined code that are not associated to any ISEQ.
ocb_pages: Vec<usize>,
- /// Freed page indexes. None if code GC has not been used.
- freed_pages: Option<Vec<usize>>,
-
/// How many times code GC has been executed.
code_gc_count: usize,
}
@@ -7232,8 +7229,9 @@ impl CodegenGlobals {
);
let mem_block = Rc::new(RefCell::new(mem_block));
- let cb = CodeBlock::new(mem_block.clone(), false);
- let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true));
+ let freed_pages = Rc::new(None);
+ let cb = CodeBlock::new(mem_block.clone(), false, freed_pages.clone());
+ let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true, freed_pages));
assert_eq!(cb.page_size() % page_size.as_usize(), 0, "code page size is not page-aligned");
@@ -7275,7 +7273,6 @@ impl CodegenGlobals {
inline_frozen_bytes: 0,
method_codegen_table: HashMap::new(),
ocb_pages,
- freed_pages: None,
code_gc_count: 0,
};
@@ -7423,12 +7420,7 @@ impl CodegenGlobals {
&CodegenGlobals::get_instance().ocb_pages
}
- pub fn get_freed_pages() -> &'static mut Option<Vec<usize>> {
- &mut CodegenGlobals::get_instance().freed_pages
- }
-
- pub fn set_freed_pages(freed_pages: Vec<usize>) {
- CodegenGlobals::get_instance().freed_pages = Some(freed_pages);
+ pub fn incr_code_gc_count() {
CodegenGlobals::get_instance().code_gc_count += 1;
}