summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNARUSE, Yui <naruse@airemix.jp>2023-02-03 14:13:02 +0900
committerNARUSE, Yui <naruse@airemix.jp>2023-02-03 14:13:09 +0900
commit535d863f34e6c36a2378683e7c2d3b7369e3d076 (patch)
treec14dd8f2b407e868bad684b5011236d00a2145f8
parentf4e6e78410136100ef5f285136a66df8d6004a61 (diff)
downloadruby-535d863f34e6c36a2378683e7c2d3b7369e3d076.tar.gz
merge revision(s) 188688a53e7708d25ab80e14d05e70ffcf792e13: [Backport #19385]
[PATCH 1/4] YJIT: Move CodegenGlobals::freed_pages into an Rc This allows for supplying a freed_pages vec in Rust tests. We need it so we can test scenarios that occur after code GC. --- yjit/src/asm/mod.rs | 48 +++++++++++++++++++++++++++++++++------------ yjit/src/codegen.rs | 16 ++++----------- 2 files changed, 39 insertions(+), 25 deletions(-) Subject: [PATCH 2/4] YJIT: other_cb is None in tests Since the other cb is in CodegenGlobals, and we want Rust tests to be self-contained. --- yjit/src/asm/mod.rs | 1 + 1 file changed, 1 insertion(+) Subject: [PATCH 3/4] YJIT: ARM64: Move functions out of arm64_emit() --- yjit/src/backend/arm64/mod.rs | 180 +++++++++++++++++----------------- 1 file changed, 90 insertions(+), 90 deletions(-) Subject: [PATCH 4/4] YJIT: ARM64: Fix long jumps to labels Previously, with Code GC, YJIT panicked while trying to emit a B.cond instruction with an offset that is not encodable in 19 bits. This only happens when the code in an assembler instance straddles two pages. To fix this, when we detect that a jump to a label can land on a different page, we switch to a fresh new page and regenerate all the code in the assembler there. We still assume that no one assembler has so much code that it wouldn't fit inside a fresh new page. [Bug #19385] --- yjit/src/backend/arm64/mod.rs | 65 ++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 5 deletions(-)
-rw-r--r--p38
-rw-r--r--version.h2
-rw-r--r--yjit/src/asm/mod.rs49
-rw-r--r--yjit/src/backend/arm64/mod.rs245
-rw-r--r--yjit/src/codegen.rs16
5 files changed, 229 insertions, 121 deletions
diff --git a/p b/p
new file mode 100644
index 0000000000..92b251b720
--- /dev/null
+++ b/p
@@ -0,0 +1,38 @@
+[PATCH 1/4] YJIT: Move CodegenGlobals::freed_pages into an Rc
+
+This allows for supplying a freed_pages vec in Rust tests. We need it so we
+can test scenarios that occur after code GC.
+---
+ yjit/src/asm/mod.rs | 48 +++++++++++++++++++++++++++++++++------------
+ yjit/src/codegen.rs | 16 ++++-----------
+ 2 files changed, 39 insertions(+), 25 deletions(-)
+
+Subject: [PATCH 2/4] YJIT: other_cb is None in tests
+
+Since the other cb is in CodegenGlobals, and we want Rust tests to be
+self-contained.
+---
+ yjit/src/asm/mod.rs | 1 +
+ 1 file changed, 1 insertion(+)
+
+Subject: [PATCH 3/4] YJIT: ARM64: Move functions out of arm64_emit()
+
+---
+ yjit/src/backend/arm64/mod.rs | 180 +++++++++++++++++-----------------
+ 1 file changed, 90 insertions(+), 90 deletions(-)
+
+Subject: [PATCH 4/4] YJIT: ARM64: Fix long jumps to labels
+
+Previously, with Code GC, YJIT panicked while trying to emit a B.cond
+instruction with an offset that is not encodable in 19 bits. This only
+happens when the code in an assembler instance straddles two pages.
+
+To fix this, when we detect that a jump to a label can land on a
+different page, we switch to a fresh new page and regenerate all the
+code in the assembler there. We still assume that no one assembler has
+so much code that it wouldn't fit inside a fresh new page.
+
+[Bug #19385]
+---
+ yjit/src/backend/arm64/mod.rs | 65 ++++++++++++++++++++++++++++++++---
+ 1 file changed, 60 insertions(+), 5 deletions(-)
diff --git a/version.h b/version.h
index 00147ecc6f..a1a86affca 100644
--- a/version.h
+++ b/version.h
@@ -11,7 +11,7 @@
# define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
#define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
-#define RUBY_PATCHLEVEL 25
+#define RUBY_PATCHLEVEL 26
#include "ruby/version.h"
#include "ruby/internal/abi.h"
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index 44ae74d470..c814ed8520 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -83,6 +83,10 @@ pub struct CodeBlock {
// for example, when there is not enough space or when a jump
// target is too far away.
dropped_bytes: bool,
+
+ // Keeps track of what pages we can write to after code gc.
+ // `None` means all pages are free.
+ freed_pages: Rc<Option<Vec<usize>>>,
}
/// Set of CodeBlock label states. Used for recovering the previous state.
@@ -94,7 +98,7 @@ pub struct LabelState {
impl CodeBlock {
/// Make a new CodeBlock
- pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool) -> Self {
+ pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>) -> Self {
let mem_size = mem_block.borrow().virtual_region_size();
let mut cb = Self {
mem_block,
@@ -108,6 +112,7 @@ impl CodeBlock {
asm_comments: BTreeMap::new(),
outlined,
dropped_bytes: false,
+ freed_pages,
};
cb.write_pos = cb.page_start();
cb
@@ -120,7 +125,7 @@ impl CodeBlock {
self.set_write_ptr(base_ptr);
// Use the freed_pages list if code GC has been used. Otherwise use the next page.
- let next_page_idx = if let Some(freed_pages) = CodegenGlobals::get_freed_pages() {
+ let next_page_idx = if let Some(freed_pages) = self.freed_pages.as_ref() {
let current_page = self.write_pos / CODE_PAGE_SIZE;
freed_pages.iter().find(|&&page| current_page < page).map(|&page| page)
} else {
@@ -134,6 +139,7 @@ impl CodeBlock {
}
// Move the other CodeBlock to the same page if it'S on the furthest page
+ #[cfg(not(test))]
self.other_cb().unwrap().set_page(next_page_idx.unwrap(), &jmp_ptr);
return !self.dropped_bytes;
@@ -234,7 +240,7 @@ impl CodeBlock {
}
pub fn has_freed_page(&self, page_idx: usize) -> bool {
- CodegenGlobals::get_freed_pages().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed
+ self.freed_pages.as_ref().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed
self.write_pos < page_idx * CODE_PAGE_SIZE // and not written yet
}
@@ -284,18 +290,12 @@ impl CodeBlock {
/// Return the address ranges of a given address range that this CodeBlock can write.
#[cfg(any(feature = "disasm", target_arch = "aarch64"))]
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
- // CodegenGlobals is not initialized when we write initial ocb code
- let freed_pages = if CodegenGlobals::has_instance() {
- CodegenGlobals::get_freed_pages().as_ref()
- } else {
- None
- };
-
let region_start = self.get_ptr(0).into_usize();
let region_end = self.get_ptr(self.get_mem_size()).into_usize();
let mut start = start_ptr.into_usize();
let end = std::cmp::min(end_ptr.into_usize(), region_end);
+ let freed_pages = self.freed_pages.as_ref().as_ref();
let mut addrs = vec![];
while start < end {
let page_idx = start.saturating_sub(region_start) / CODE_PAGE_SIZE;
@@ -558,7 +558,7 @@ impl CodeBlock {
/// Code GC. Free code pages that are not on stack and reuse them.
pub fn code_gc(&mut self) {
// The previous code GC failed to free any pages. Give up.
- if CodegenGlobals::get_freed_pages() == &Some(vec![]) {
+ if self.freed_pages.as_ref() == &Some(vec![]) {
return;
}
@@ -613,7 +613,13 @@ impl CodeBlock {
ocb.clear_comments();
}
- CodegenGlobals::set_freed_pages(freed_pages);
+ // Track which pages are free.
+ let new_freed_pages = Rc::new(Some(freed_pages));
+ let old_freed_pages = mem::replace(&mut self.freed_pages, Rc::clone(&new_freed_pages));
+ self.other_cb().unwrap().freed_pages = new_freed_pages;
+ assert_eq!(1, Rc::strong_count(&old_freed_pages)); // will deallocate
+
+ CodegenGlobals::incr_code_gc_count();
}
pub fn inline(&self) -> bool {
@@ -643,7 +649,24 @@ impl CodeBlock {
let mem_start: *const u8 = alloc.mem_start();
let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size);
- Self::new(Rc::new(RefCell::new(virt_mem)), false)
+ Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(None))
+ }
+
+ /// Stubbed CodeBlock for testing conditions that can arise due to code GC. Can't execute generated code.
+ pub fn new_dummy_with_freed_pages(mut freed_pages: Vec<usize>) -> Self {
+ use std::ptr::NonNull;
+ use crate::virtualmem::*;
+ use crate::virtualmem::tests::TestingAllocator;
+
+ freed_pages.sort_unstable();
+ let mem_size = CODE_PAGE_SIZE *
+ (1 + freed_pages.last().expect("freed_pages vec should not be empty"));
+
+ let alloc = TestingAllocator::new(mem_size);
+ let mem_start: *const u8 = alloc.mem_start();
+ let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size);
+
+ Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(Some(freed_pages)))
}
}
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index eb096ce677..55460eda9e 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -69,6 +69,96 @@ impl From<&Opnd> for A64Opnd {
}
}
+/// Call emit_jmp_ptr and immediately invalidate the written range.
+/// This is needed when next_page also moves other_cb that is not invalidated
+/// by compile_with_regs. Doing it here allows you to avoid invalidating a lot
+/// more than necessary when other_cb jumps from a position early in the page.
+/// This invalidates a small range of cb twice, but we accept the small cost.
+fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) {
+ #[cfg(not(test))]
+ let start = cb.get_write_ptr();
+ emit_jmp_ptr(cb, dst_ptr, true);
+ #[cfg(not(test))]
+ {
+ let end = cb.get_write_ptr();
+ use crate::cruby::rb_yjit_icache_invalidate;
+ unsafe { rb_yjit_icache_invalidate(start.raw_ptr() as _, end.raw_ptr() as _) };
+ }
+}
+
+fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
+ let src_addr = cb.get_write_ptr().into_i64();
+ let dst_addr = dst_ptr.into_i64();
+
+ // If the offset is short enough, then we'll use the
+ // branch instruction. Otherwise, we'll move the
+ // destination into a register and use the branch
+ // register instruction.
+ let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
+ b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
+ 1
+ } else {
+ let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
+ br(cb, Assembler::SCRATCH0);
+ num_insns + 1
+ };
+
+ if padding {
+ // Make sure it's always a consistent number of
+ // instructions in case it gets patched and has to
+ // use the other branch.
+ for _ in num_insns..(JMP_PTR_BYTES / 4) {
+ nop(cb);
+ }
+ }
+}
+
+/// Emit the required instructions to load the given value into the
+/// given register. Our goal here is to use as few instructions as
+/// possible to get this value into the register.
+fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
+ let mut current = value;
+
+ if current <= 0xffff {
+ // If the value fits into a single movz
+ // instruction, then we'll use that.
+ movz(cb, rd, A64Opnd::new_uimm(current), 0);
+ return 1;
+ } else if BitmaskImmediate::try_from(current).is_ok() {
+ // Otherwise, if the immediate can be encoded
+ // with the special bitmask immediate encoding,
+ // we'll use that.
+ mov(cb, rd, A64Opnd::new_uimm(current));
+ return 1;
+ } else {
+ // Finally we'll fall back to encoding the value
+ // using movz for the first 16 bits and movk for
+ // each subsequent set of 16 bits as long we
+ // they are necessary.
+ movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0);
+ let mut num_insns = 1;
+
+ // (We're sure this is necessary since we
+ // checked if it only fit into movz above).
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16);
+ num_insns += 1;
+
+ if current > 0xffff {
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32);
+ num_insns += 1;
+ }
+
+ if current > 0xffff {
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48);
+ num_insns += 1;
+ }
+ return num_insns;
+ }
+}
+
impl Assembler
{
// A special scratch register for intermediate processing.
@@ -551,9 +641,8 @@ impl Assembler
}
/// Emit platform-specific machine code
- /// Returns a list of GC offsets
- pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32>
- {
+ /// Returns a list of GC offsets. Can return failure to signal caller to retry.
+ fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Result<Vec<u32>, ()> {
/// Determine how many instructions it will take to represent moving
/// this value into a register. Note that the return value of this
/// function must correspond to how many instructions are used to
@@ -574,52 +663,6 @@ impl Assembler
}
}
- /// Emit the required instructions to load the given value into the
- /// given register. Our goal here is to use as few instructions as
- /// possible to get this value into the register.
- fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
- let mut current = value;
-
- if current <= 0xffff {
- // If the value fits into a single movz
- // instruction, then we'll use that.
- movz(cb, rd, A64Opnd::new_uimm(current), 0);
- return 1;
- } else if BitmaskImmediate::try_from(current).is_ok() {
- // Otherwise, if the immediate can be encoded
- // with the special bitmask immediate encoding,
- // we'll use that.
- mov(cb, rd, A64Opnd::new_uimm(current));
- return 1;
- } else {
- // Finally we'll fall back to encoding the value
- // using movz for the first 16 bits and movk for
- // each subsequent set of 16 bits as long we
- // they are necessary.
- movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0);
- let mut num_insns = 1;
-
- // (We're sure this is necessary since we
- // checked if it only fit into movz above).
- current >>= 16;
- movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16);
- num_insns += 1;
-
- if current > 0xffff {
- current >>= 16;
- movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32);
- num_insns += 1;
- }
-
- if current > 0xffff {
- current >>= 16;
- movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48);
- num_insns += 1;
- }
- return num_insns;
- }
- }
-
/// Emit a conditional jump instruction to a specific target. This is
/// called when lowering any of the conditional jump instructions.
fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) {
@@ -691,50 +734,6 @@ impl Assembler
ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP));
}
- fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
- let src_addr = cb.get_write_ptr().into_i64();
- let dst_addr = dst_ptr.into_i64();
-
- // If the offset is short enough, then we'll use the
- // branch instruction. Otherwise, we'll move the
- // destination into a register and use the branch
- // register instruction.
- let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
- b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
- 1
- } else {
- let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
- br(cb, Assembler::SCRATCH0);
- num_insns + 1
- };
-
- if padding {
- // Make sure it's always a consistent number of
- // instructions in case it gets patched and has to
- // use the other branch.
- for _ in num_insns..(JMP_PTR_BYTES / 4) {
- nop(cb);
- }
- }
- }
-
- /// Call emit_jmp_ptr and immediately invalidate the written range.
- /// This is needed when next_page also moves other_cb that is not invalidated
- /// by compile_with_regs. Doing it here allows you to avoid invalidating a lot
- /// more than necessary when other_cb jumps from a position early in the page.
- /// This invalidates a small range of cb twice, but we accept the small cost.
- fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) {
- #[cfg(not(test))]
- let start = cb.get_write_ptr();
- emit_jmp_ptr(cb, dst_ptr, true);
- #[cfg(not(test))]
- {
- let end = cb.get_write_ptr();
- use crate::cruby::rb_yjit_icache_invalidate;
- unsafe { rb_yjit_icache_invalidate(start.raw_ptr() as _, end.raw_ptr() as _) };
- }
- }
-
// dbg!(&self.insns);
// List of GC offsets
@@ -1055,13 +1054,19 @@ impl Assembler
if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr_with_invalidation) {
// Reset cb states before retrying the current Insn
cb.set_label_state(old_label_state);
+
+ // We don't want label references to cross page boundaries. Signal caller for
+ // retry.
+ if !self.label_names.is_empty() {
+ return Err(());
+ }
} else {
insn_idx += 1;
gc_offsets.append(&mut insn_gc_offsets);
}
}
- gc_offsets
+ Ok(gc_offsets)
}
/// Optimize and compile the stored instructions
@@ -1076,7 +1081,16 @@ impl Assembler
}
let start_ptr = cb.get_write_ptr();
- let gc_offsets = asm.arm64_emit(cb);
+ let starting_label_state = cb.get_label_state();
+ let gc_offsets = asm.arm64_emit(cb)
+ .unwrap_or_else(|_err| {
+ // we want to lower jumps to labels to b.cond instructions, which have a 1 MiB
+ // range limit. We can easily exceed the limit in case the jump straddles two pages.
+ // In this case, we retry with a fresh page.
+ cb.set_label_state(starting_label_state);
+ cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation);
+ asm.arm64_emit(cb).expect("should not fail when writing to a fresh code page")
+ });
if cb.has_dropped_bytes() {
cb.clear_labels();
@@ -1401,6 +1415,47 @@ mod tests {
}
#[test]
+ fn test_bcond_straddling_code_pages() {
+ const LANDING_PAGE: usize = 65;
+ let mut asm = Assembler::new();
+ let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]);
+
+ // Skip to near the end of the page. Room for two instructions.
+ cb.set_pos(cb.page_start_pos() + cb.page_end() - 8);
+
+ let end = asm.new_label("end");
+ // Start with a conditional jump...
+ asm.jz(end);
+
+ // A few instructions, enough to cause a page switch.
+ let sum = asm.add(399.into(), 111.into());
+ let xorred = asm.xor(sum, 859.into());
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), xorred);
+ asm.store(Opnd::mem(64, Opnd::Reg(X0_REG), 0), xorred);
+
+ // The branch target. It should be in the landing page.
+ asm.write_label(end);
+ asm.cret(xorred);
+
+ // [Bug #19355]
+ // This used to panic with "The offset must be 19 bits or less."
+ // due to attempting to lower the `asm.jz` above to a `b.e` with an offset that's > 1 MiB.
+ let starting_pos = cb.get_write_pos();
+ asm.compile_with_num_regs(&mut cb, 2);
+ let gap = cb.get_write_pos() - starting_pos;
+ assert!(gap > 0b1111111111111111111);
+
+ let instruction_at_starting_pos: [u8; 4] = unsafe {
+ std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(), 4)
+ }.try_into().unwrap();
+ assert_eq!(
+ 0b000101 << 26_u32,
+ u32::from_le_bytes(instruction_at_starting_pos) & (0b111111 << 26_u32),
+ "starting instruction should be an unconditional branch to the new page (B)"
+ );
+ }
+
+ #[test]
fn test_emit_xor() {
let (mut asm, mut cb) = setup_asm();
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index bf68576d7c..22f8da7667 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -7176,9 +7176,6 @@ pub struct CodegenGlobals {
/// Page indexes for outlined code that are not associated to any ISEQ.
ocb_pages: Vec<usize>,
- /// Freed page indexes. None if code GC has not been used.
- freed_pages: Option<Vec<usize>>,
-
/// How many times code GC has been executed.
code_gc_count: usize,
}
@@ -7232,8 +7229,9 @@ impl CodegenGlobals {
);
let mem_block = Rc::new(RefCell::new(mem_block));
- let cb = CodeBlock::new(mem_block.clone(), false);
- let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true));
+ let freed_pages = Rc::new(None);
+ let cb = CodeBlock::new(mem_block.clone(), false, freed_pages.clone());
+ let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true, freed_pages));
assert_eq!(cb.page_size() % page_size.as_usize(), 0, "code page size is not page-aligned");
@@ -7275,7 +7273,6 @@ impl CodegenGlobals {
inline_frozen_bytes: 0,
method_codegen_table: HashMap::new(),
ocb_pages,
- freed_pages: None,
code_gc_count: 0,
};
@@ -7423,12 +7420,7 @@ impl CodegenGlobals {
&CodegenGlobals::get_instance().ocb_pages
}
- pub fn get_freed_pages() -> &'static mut Option<Vec<usize>> {
- &mut CodegenGlobals::get_instance().freed_pages
- }
-
- pub fn set_freed_pages(freed_pages: Vec<usize>) {
- CodegenGlobals::get_instance().freed_pages = Some(freed_pages);
+ pub fn incr_code_gc_count() {
CodegenGlobals::get_instance().code_gc_count += 1;
}