diff options
-rw-r--r-- | p | 38 | ||||
-rw-r--r-- | version.h | 2 | ||||
-rw-r--r-- | yjit/src/asm/mod.rs | 49 | ||||
-rw-r--r-- | yjit/src/backend/arm64/mod.rs | 245 | ||||
-rw-r--r-- | yjit/src/codegen.rs | 16 |
5 files changed, 229 insertions, 121 deletions
@@ -0,0 +1,38 @@ +[PATCH 1/4] YJIT: Move CodegenGlobals::freed_pages into an Rc + +This allows for supplying a freed_pages vec in Rust tests. We need it so we +can test scenarios that occur after code GC. +--- + yjit/src/asm/mod.rs | 48 +++++++++++++++++++++++++++++++++------------ + yjit/src/codegen.rs | 16 ++++----------- + 2 files changed, 39 insertions(+), 25 deletions(-) + +Subject: [PATCH 2/4] YJIT: other_cb is None in tests + +Since the other cb is in CodegenGlobals, and we want Rust tests to be +self-contained. +--- + yjit/src/asm/mod.rs | 1 + + 1 file changed, 1 insertion(+) + +Subject: [PATCH 3/4] YJIT: ARM64: Move functions out of arm64_emit() + +--- + yjit/src/backend/arm64/mod.rs | 180 +++++++++++++++++----------------- + 1 file changed, 90 insertions(+), 90 deletions(-) + +Subject: [PATCH 4/4] YJIT: ARM64: Fix long jumps to labels + +Previously, with Code GC, YJIT panicked while trying to emit a B.cond +instruction with an offset that is not encodable in 19 bits. This only +happens when the code in an assembler instance straddles two pages. + +To fix this, when we detect that a jump to a label can land on a +different page, we switch to a fresh new page and regenerate all the +code in the assembler there. We still assume that no one assembler has +so much code that it wouldn't fit inside a fresh new page. + +[Bug #19385] +--- + yjit/src/backend/arm64/mod.rs | 65 ++++++++++++++++++++++++++++++++--- + 1 file changed, 60 insertions(+), 5 deletions(-) @@ -11,7 +11,7 @@ # define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR #define RUBY_VERSION_TEENY 0 #define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR -#define RUBY_PATCHLEVEL 25 +#define RUBY_PATCHLEVEL 26 #include "ruby/version.h" #include "ruby/internal/abi.h" diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 44ae74d470..c814ed8520 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -83,6 +83,10 @@ pub struct CodeBlock { // for example, when there is not enough space or when a jump // target is too far away. dropped_bytes: bool, + + // Keeps track of what pages we can write to after code gc. + // `None` means all pages are free. + freed_pages: Rc<Option<Vec<usize>>>, } /// Set of CodeBlock label states. Used for recovering the previous state. @@ -94,7 +98,7 @@ pub struct LabelState { impl CodeBlock { /// Make a new CodeBlock - pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool) -> Self { + pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>) -> Self { let mem_size = mem_block.borrow().virtual_region_size(); let mut cb = Self { mem_block, @@ -108,6 +112,7 @@ impl CodeBlock { asm_comments: BTreeMap::new(), outlined, dropped_bytes: false, + freed_pages, }; cb.write_pos = cb.page_start(); cb @@ -120,7 +125,7 @@ impl CodeBlock { self.set_write_ptr(base_ptr); // Use the freed_pages list if code GC has been used. Otherwise use the next page. - let next_page_idx = if let Some(freed_pages) = CodegenGlobals::get_freed_pages() { + let next_page_idx = if let Some(freed_pages) = self.freed_pages.as_ref() { let current_page = self.write_pos / CODE_PAGE_SIZE; freed_pages.iter().find(|&&page| current_page < page).map(|&page| page) } else { @@ -134,6 +139,7 @@ impl CodeBlock { } // Move the other CodeBlock to the same page if it'S on the furthest page + #[cfg(not(test))] self.other_cb().unwrap().set_page(next_page_idx.unwrap(), &jmp_ptr); return !self.dropped_bytes; @@ -234,7 +240,7 @@ impl CodeBlock { } pub fn has_freed_page(&self, page_idx: usize) -> bool { - CodegenGlobals::get_freed_pages().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed + self.freed_pages.as_ref().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed self.write_pos < page_idx * CODE_PAGE_SIZE // and not written yet } @@ -284,18 +290,12 @@ impl CodeBlock { /// Return the address ranges of a given address range that this CodeBlock can write. #[cfg(any(feature = "disasm", target_arch = "aarch64"))] pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> { - // CodegenGlobals is not initialized when we write initial ocb code - let freed_pages = if CodegenGlobals::has_instance() { - CodegenGlobals::get_freed_pages().as_ref() - } else { - None - }; - let region_start = self.get_ptr(0).into_usize(); let region_end = self.get_ptr(self.get_mem_size()).into_usize(); let mut start = start_ptr.into_usize(); let end = std::cmp::min(end_ptr.into_usize(), region_end); + let freed_pages = self.freed_pages.as_ref().as_ref(); let mut addrs = vec![]; while start < end { let page_idx = start.saturating_sub(region_start) / CODE_PAGE_SIZE; @@ -558,7 +558,7 @@ impl CodeBlock { /// Code GC. Free code pages that are not on stack and reuse them. pub fn code_gc(&mut self) { // The previous code GC failed to free any pages. Give up. - if CodegenGlobals::get_freed_pages() == &Some(vec![]) { + if self.freed_pages.as_ref() == &Some(vec![]) { return; } @@ -613,7 +613,13 @@ impl CodeBlock { ocb.clear_comments(); } - CodegenGlobals::set_freed_pages(freed_pages); + // Track which pages are free. + let new_freed_pages = Rc::new(Some(freed_pages)); + let old_freed_pages = mem::replace(&mut self.freed_pages, Rc::clone(&new_freed_pages)); + self.other_cb().unwrap().freed_pages = new_freed_pages; + assert_eq!(1, Rc::strong_count(&old_freed_pages)); // will deallocate + + CodegenGlobals::incr_code_gc_count(); } pub fn inline(&self) -> bool { @@ -643,7 +649,24 @@ impl CodeBlock { let mem_start: *const u8 = alloc.mem_start(); let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size); - Self::new(Rc::new(RefCell::new(virt_mem)), false) + Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(None)) + } + + /// Stubbed CodeBlock for testing conditions that can arise due to code GC. Can't execute generated code. + pub fn new_dummy_with_freed_pages(mut freed_pages: Vec<usize>) -> Self { + use std::ptr::NonNull; + use crate::virtualmem::*; + use crate::virtualmem::tests::TestingAllocator; + + freed_pages.sort_unstable(); + let mem_size = CODE_PAGE_SIZE * + (1 + freed_pages.last().expect("freed_pages vec should not be empty")); + + let alloc = TestingAllocator::new(mem_size); + let mem_start: *const u8 = alloc.mem_start(); + let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size); + + Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(Some(freed_pages))) } } diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index eb096ce677..55460eda9e 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -69,6 +69,96 @@ impl From<&Opnd> for A64Opnd { } } +/// Call emit_jmp_ptr and immediately invalidate the written range. +/// This is needed when next_page also moves other_cb that is not invalidated +/// by compile_with_regs. Doing it here allows you to avoid invalidating a lot +/// more than necessary when other_cb jumps from a position early in the page. +/// This invalidates a small range of cb twice, but we accept the small cost. +fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) { + #[cfg(not(test))] + let start = cb.get_write_ptr(); + emit_jmp_ptr(cb, dst_ptr, true); + #[cfg(not(test))] + { + let end = cb.get_write_ptr(); + use crate::cruby::rb_yjit_icache_invalidate; + unsafe { rb_yjit_icache_invalidate(start.raw_ptr() as _, end.raw_ptr() as _) }; + } +} + +fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) { + let src_addr = cb.get_write_ptr().into_i64(); + let dst_addr = dst_ptr.into_i64(); + + // If the offset is short enough, then we'll use the + // branch instruction. Otherwise, we'll move the + // destination into a register and use the branch + // register instruction. + let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { + b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); + 1 + } else { + let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); + br(cb, Assembler::SCRATCH0); + num_insns + 1 + }; + + if padding { + // Make sure it's always a consistent number of + // instructions in case it gets patched and has to + // use the other branch. + for _ in num_insns..(JMP_PTR_BYTES / 4) { + nop(cb); + } + } +} + +/// Emit the required instructions to load the given value into the +/// given register. Our goal here is to use as few instructions as +/// possible to get this value into the register. +fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { + let mut current = value; + + if current <= 0xffff { + // If the value fits into a single movz + // instruction, then we'll use that. + movz(cb, rd, A64Opnd::new_uimm(current), 0); + return 1; + } else if BitmaskImmediate::try_from(current).is_ok() { + // Otherwise, if the immediate can be encoded + // with the special bitmask immediate encoding, + // we'll use that. + mov(cb, rd, A64Opnd::new_uimm(current)); + return 1; + } else { + // Finally we'll fall back to encoding the value + // using movz for the first 16 bits and movk for + // each subsequent set of 16 bits as long we + // they are necessary. + movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); + let mut num_insns = 1; + + // (We're sure this is necessary since we + // checked if it only fit into movz above). + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); + num_insns += 1; + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); + num_insns += 1; + } + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); + num_insns += 1; + } + return num_insns; + } +} + impl Assembler { // A special scratch register for intermediate processing. @@ -551,9 +641,8 @@ impl Assembler } /// Emit platform-specific machine code - /// Returns a list of GC offsets - pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32> - { + /// Returns a list of GC offsets. Can return failure to signal caller to retry. + fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Result<Vec<u32>, ()> { /// Determine how many instructions it will take to represent moving /// this value into a register. Note that the return value of this /// function must correspond to how many instructions are used to @@ -574,52 +663,6 @@ impl Assembler } } - /// Emit the required instructions to load the given value into the - /// given register. Our goal here is to use as few instructions as - /// possible to get this value into the register. - fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { - let mut current = value; - - if current <= 0xffff { - // If the value fits into a single movz - // instruction, then we'll use that. - movz(cb, rd, A64Opnd::new_uimm(current), 0); - return 1; - } else if BitmaskImmediate::try_from(current).is_ok() { - // Otherwise, if the immediate can be encoded - // with the special bitmask immediate encoding, - // we'll use that. - mov(cb, rd, A64Opnd::new_uimm(current)); - return 1; - } else { - // Finally we'll fall back to encoding the value - // using movz for the first 16 bits and movk for - // each subsequent set of 16 bits as long we - // they are necessary. - movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); - let mut num_insns = 1; - - // (We're sure this is necessary since we - // checked if it only fit into movz above). - current >>= 16; - movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); - num_insns += 1; - - if current > 0xffff { - current >>= 16; - movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); - num_insns += 1; - } - - if current > 0xffff { - current >>= 16; - movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); - num_insns += 1; - } - return num_insns; - } - } - /// Emit a conditional jump instruction to a specific target. This is /// called when lowering any of the conditional jump instructions. fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) { @@ -691,50 +734,6 @@ impl Assembler ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); } - fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) { - let src_addr = cb.get_write_ptr().into_i64(); - let dst_addr = dst_ptr.into_i64(); - - // If the offset is short enough, then we'll use the - // branch instruction. Otherwise, we'll move the - // destination into a register and use the branch - // register instruction. - let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { - b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); - 1 - } else { - let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); - br(cb, Assembler::SCRATCH0); - num_insns + 1 - }; - - if padding { - // Make sure it's always a consistent number of - // instructions in case it gets patched and has to - // use the other branch. - for _ in num_insns..(JMP_PTR_BYTES / 4) { - nop(cb); - } - } - } - - /// Call emit_jmp_ptr and immediately invalidate the written range. - /// This is needed when next_page also moves other_cb that is not invalidated - /// by compile_with_regs. Doing it here allows you to avoid invalidating a lot - /// more than necessary when other_cb jumps from a position early in the page. - /// This invalidates a small range of cb twice, but we accept the small cost. - fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) { - #[cfg(not(test))] - let start = cb.get_write_ptr(); - emit_jmp_ptr(cb, dst_ptr, true); - #[cfg(not(test))] - { - let end = cb.get_write_ptr(); - use crate::cruby::rb_yjit_icache_invalidate; - unsafe { rb_yjit_icache_invalidate(start.raw_ptr() as _, end.raw_ptr() as _) }; - } - } - // dbg!(&self.insns); // List of GC offsets @@ -1055,13 +1054,19 @@ impl Assembler if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr_with_invalidation) { // Reset cb states before retrying the current Insn cb.set_label_state(old_label_state); + + // We don't want label references to cross page boundaries. Signal caller for + // retry. + if !self.label_names.is_empty() { + return Err(()); + } } else { insn_idx += 1; gc_offsets.append(&mut insn_gc_offsets); } } - gc_offsets + Ok(gc_offsets) } /// Optimize and compile the stored instructions @@ -1076,7 +1081,16 @@ impl Assembler } let start_ptr = cb.get_write_ptr(); - let gc_offsets = asm.arm64_emit(cb); + let starting_label_state = cb.get_label_state(); + let gc_offsets = asm.arm64_emit(cb) + .unwrap_or_else(|_err| { + // we want to lower jumps to labels to b.cond instructions, which have a 1 MiB + // range limit. We can easily exceed the limit in case the jump straddles two pages. + // In this case, we retry with a fresh page. + cb.set_label_state(starting_label_state); + cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation); + asm.arm64_emit(cb).expect("should not fail when writing to a fresh code page") + }); if cb.has_dropped_bytes() { cb.clear_labels(); @@ -1401,6 +1415,47 @@ mod tests { } #[test] + fn test_bcond_straddling_code_pages() { + const LANDING_PAGE: usize = 65; + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]); + + // Skip to near the end of the page. Room for two instructions. + cb.set_pos(cb.page_start_pos() + cb.page_end() - 8); + + let end = asm.new_label("end"); + // Start with a conditional jump... + asm.jz(end); + + // A few instructions, enough to cause a page switch. + let sum = asm.add(399.into(), 111.into()); + let xorred = asm.xor(sum, 859.into()); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), xorred); + asm.store(Opnd::mem(64, Opnd::Reg(X0_REG), 0), xorred); + + // The branch target. It should be in the landing page. + asm.write_label(end); + asm.cret(xorred); + + // [Bug #19355] + // This used to panic with "The offset must be 19 bits or less." + // due to attempting to lower the `asm.jz` above to a `b.e` with an offset that's > 1 MiB. + let starting_pos = cb.get_write_pos(); + asm.compile_with_num_regs(&mut cb, 2); + let gap = cb.get_write_pos() - starting_pos; + assert!(gap > 0b1111111111111111111); + + let instruction_at_starting_pos: [u8; 4] = unsafe { + std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(), 4) + }.try_into().unwrap(); + assert_eq!( + 0b000101 << 26_u32, + u32::from_le_bytes(instruction_at_starting_pos) & (0b111111 << 26_u32), + "starting instruction should be an unconditional branch to the new page (B)" + ); + } + + #[test] fn test_emit_xor() { let (mut asm, mut cb) = setup_asm(); diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index bf68576d7c..22f8da7667 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -7176,9 +7176,6 @@ pub struct CodegenGlobals { /// Page indexes for outlined code that are not associated to any ISEQ. ocb_pages: Vec<usize>, - /// Freed page indexes. None if code GC has not been used. - freed_pages: Option<Vec<usize>>, - /// How many times code GC has been executed. code_gc_count: usize, } @@ -7232,8 +7229,9 @@ impl CodegenGlobals { ); let mem_block = Rc::new(RefCell::new(mem_block)); - let cb = CodeBlock::new(mem_block.clone(), false); - let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true)); + let freed_pages = Rc::new(None); + let cb = CodeBlock::new(mem_block.clone(), false, freed_pages.clone()); + let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true, freed_pages)); assert_eq!(cb.page_size() % page_size.as_usize(), 0, "code page size is not page-aligned"); @@ -7275,7 +7273,6 @@ impl CodegenGlobals { inline_frozen_bytes: 0, method_codegen_table: HashMap::new(), ocb_pages, - freed_pages: None, code_gc_count: 0, }; @@ -7423,12 +7420,7 @@ impl CodegenGlobals { &CodegenGlobals::get_instance().ocb_pages } - pub fn get_freed_pages() -> &'static mut Option<Vec<usize>> { - &mut CodegenGlobals::get_instance().freed_pages - } - - pub fn set_freed_pages(freed_pages: Vec<usize>) { - CodegenGlobals::get_instance().freed_pages = Some(freed_pages); + pub fn incr_code_gc_count() { CodegenGlobals::get_instance().code_gc_count += 1; } |