summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakashi Kokubun <takashikkbn@gmail.com>2023-03-17 11:53:17 -0700
committerGitHub <noreply@github.com>2023-03-17 11:53:17 -0700
commit9fd94d6a0cc7626c8abfc9ba05d6f156927ee8f7 (patch)
tree281ac6178534fae0064679b2b1b20e73dc0dd036
parent3592b24cdc07ed89eecb39161f21fe721a89a5de (diff)
downloadruby-9fd94d6a0cc7626c8abfc9ba05d6f156927ee8f7.tar.gz
YJIT: Support entry for multiple PCs per ISEQ (GH-7535)
-rw-r--r--yjit.c6
-rw-r--r--yjit/bindgen/src/main.rs1
-rw-r--r--yjit/src/codegen.rs64
-rw-r--r--yjit/src/core.rs220
-rw-r--r--yjit/src/cruby.rs7
-rw-r--r--yjit/src/cruby_bindings.inc.rs1
-rw-r--r--yjit/src/stats.rs1
7 files changed, 268 insertions, 32 deletions
diff --git a/yjit.c b/yjit.c
index 8e8342971e..85401318ad 100644
--- a/yjit.c
+++ b/yjit.c
@@ -765,6 +765,12 @@ rb_get_ec_cfp(const rb_execution_context_t *ec)
return ec->cfp;
}
+const rb_iseq_t *
+rb_get_cfp_iseq(struct rb_control_frame_struct *cfp)
+{
+ return cfp->iseq;
+}
+
VALUE *
rb_get_cfp_pc(struct rb_control_frame_struct *cfp)
{
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index 3dc503f7c3..a2af76a787 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -368,6 +368,7 @@ fn main() {
.allowlist_function("rb_insn_len")
.allowlist_function("rb_yarv_class_of")
.allowlist_function("rb_get_ec_cfp")
+ .allowlist_function("rb_get_cfp_iseq")
.allowlist_function("rb_get_cfp_pc")
.allowlist_function("rb_get_cfp_sp")
.allowlist_function("rb_get_cfp_self")
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index e9c17cb537..177bdfe088 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -19,6 +19,7 @@ use std::ffi::CStr;
use std::mem::{self, size_of};
use std::os::raw::{c_int};
use std::ptr;
+use std::rc::Rc;
use std::slice;
pub use crate::virtualmem::CodePtr;
@@ -619,38 +620,36 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr {
}
// Generate a runtime guard that ensures the PC is at the expected
-// instruction index in the iseq, otherwise takes a side-exit.
+// instruction index in the iseq, otherwise takes an entry stub
+// that generates another check and entry.
// This is to handle the situation of optional parameters.
// When a function with optional parameters is called, the entry
// PC for the method isn't necessarily 0.
-fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u16) {
+pub fn gen_entry_chain_guard(
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ iseq: IseqPtr,
+ insn_idx: u16,
+) -> Option<PendingEntryRef> {
+ let entry = new_pending_entry();
+ let stub_addr = gen_entry_stub(entry.uninit_entry.as_ptr() as usize, ocb)?;
+
let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC);
let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) };
let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8);
+ asm.comment("guard expected PC");
asm.cmp(pc_opnd, expected_pc_opnd);
- let pc_match = asm.new_label("pc_match");
- asm.je(pc_match);
-
- // We're not starting at the first PC, so we need to exit.
- gen_counter_incr!(asm, leave_start_pc_non_zero);
-
- asm.cpop_into(SP);
- asm.cpop_into(EC);
- asm.cpop_into(CFP);
-
- asm.frame_teardown();
-
- asm.cret(Qundef.into());
-
- // PC should match the expected insn_idx
- asm.write_label(pc_match);
+ asm.mark_entry_start(&entry);
+ asm.jne(stub_addr.into());
+ asm.mark_entry_end(&entry);
+ return Some(entry);
}
/// Compile an interpreter entry block to be inserted into an iseq
/// Returns None if compilation fails.
-pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u16) -> Option<CodePtr> {
+pub fn gen_entry_prologue(cb: &mut CodeBlock, ocb: &mut OutlinedCb, iseq: IseqPtr, insn_idx: u16) -> Option<CodePtr> {
let code_ptr = cb.get_write_ptr();
let mut asm = Assembler::new();
@@ -685,10 +684,13 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u16) -> O
// different location depending on the optional parameters. If an iseq
// has optional parameters, we'll add a runtime check that the PC we've
// compiled for is the same PC that the interpreter wants us to run with.
- // If they don't match, then we'll take a side exit.
- if unsafe { get_iseq_flags_has_opt(iseq) } {
- gen_pc_guard(&mut asm, iseq, insn_idx);
- }
+ // If they don't match, then we'll jump to an entry stub and generate
+ // another PC check and entry there.
+ let pending_entry = if unsafe { get_iseq_flags_has_opt(iseq) } {
+ Some(gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?)
+ } else {
+ None
+ };
asm.compile(cb);
@@ -700,6 +702,12 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u16) -> O
for page in cb.addrs_to_pages(code_ptr, cb.get_write_ptr()) {
iseq_payload.pages.insert(page);
}
+ // Write an entry to the heap and push it to the ISEQ
+ if let Some(pending_entry) = pending_entry {
+ let pending_entry = Rc::try_unwrap(pending_entry)
+ .ok().expect("PendingEntry should be unique");
+ iseq_payload.entries.push(pending_entry.into_entry());
+ }
Some(code_ptr)
}
}
@@ -7864,6 +7872,9 @@ pub struct CodegenGlobals {
// For servicing branch stubs
branch_stub_hit_trampoline: CodePtr,
+ // For servicing entry stubs
+ entry_stub_hit_trampoline: CodePtr,
+
// Code for full logic of returning from C method and exiting to the interpreter
outline_full_cfunc_return_pos: CodePtr,
@@ -7901,7 +7912,6 @@ impl CodegenGlobals {
#[cfg(not(test))]
let (mut cb, mut ocb) = {
use std::cell::RefCell;
- use std::rc::Rc;
let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) };
@@ -7949,6 +7959,7 @@ impl CodegenGlobals {
let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb);
let branch_stub_hit_trampoline = gen_branch_stub_hit_trampoline(&mut ocb);
+ let entry_stub_hit_trampoline = gen_entry_stub_hit_trampoline(&mut ocb);
// Generate full exit code for C func
let cfunc_exit_code = gen_full_cfunc_return(&mut ocb);
@@ -7967,6 +7978,7 @@ impl CodegenGlobals {
stub_exit_code: stub_exit_code,
outline_full_cfunc_return_pos: cfunc_exit_code,
branch_stub_hit_trampoline,
+ entry_stub_hit_trampoline,
global_inval_patches: Vec::new(),
method_codegen_table: HashMap::new(),
ocb_pages,
@@ -8105,6 +8117,10 @@ impl CodegenGlobals {
CodegenGlobals::get_instance().branch_stub_hit_trampoline
}
+ pub fn get_entry_stub_hit_trampoline() -> CodePtr {
+ CodegenGlobals::get_instance().entry_stub_hit_trampoline
+ }
+
pub fn look_up_codegen_method(method_serial: usize) -> Option<MethodGenFn> {
let table = &CodegenGlobals::get_instance().method_codegen_table;
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 888f795279..c8c945fac3 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -688,7 +688,7 @@ impl PendingBranch {
// The branch struct is uninitialized right now but as a stable address.
// We make sure the stub runs after the branch is initialized.
let branch_struct_addr = self.uninit_branch.as_ptr() as usize;
- let stub_addr = gen_call_branch_stub_hit(ocb, branch_struct_addr, target_idx);
+ let stub_addr = gen_branch_stub(ocb, branch_struct_addr, target_idx);
if let Some(stub_addr) = stub_addr {
// Fill the branch target with a stub
@@ -741,6 +741,35 @@ impl PendingBranch {
}
}
+// Store info about code used on YJIT entry
+pub struct Entry {
+ // Positions where the generated code starts and ends
+ start_addr: CodePtr,
+ end_addr: CodePtr, // exclusive
+}
+
+/// A [Branch] for a [Block] that is under construction.
+pub struct PendingEntry {
+ pub uninit_entry: Box<MaybeUninit<Entry>>,
+ start_addr: Cell<Option<CodePtr>>,
+ end_addr: Cell<Option<CodePtr>>, // exclusive
+}
+
+impl PendingEntry {
+ // Construct the entry in the heap
+ pub fn into_entry(mut self) -> EntryRef {
+ // Make the entry
+ let entry = Entry {
+ start_addr: self.start_addr.get().unwrap(),
+ end_addr: self.end_addr.get().unwrap(),
+ };
+ // Move it to the designated place on the heap and unwrap MaybeUninit.
+ self.uninit_entry.write(entry);
+ let raw_entry: *mut MaybeUninit<Entry> = Box::into_raw(self.uninit_entry);
+ NonNull::new(raw_entry as *mut Entry).expect("no null from Box")
+ }
+}
+
// In case a block is invalidated, this helps to remove all pointers to the block.
pub type CmePtr = *const rb_callable_method_entry_t;
@@ -813,6 +842,9 @@ pub type BlockRef = NonNull<Block>;
/// proper usage.
pub type BranchRef = NonNull<Branch>;
+/// Pointer to an entry that is already added to an ISEQ
+pub type EntryRef = NonNull<Entry>;
+
/// List of block versions for a given blockid
type VersionList = Vec<BlockRef>;
@@ -860,6 +892,9 @@ pub struct IseqPayload {
// Indexes of code pages used by this this ISEQ
pub pages: HashSet<usize>,
+ // List of ISEQ entry codes
+ pub entries: Vec<EntryRef>,
+
// Blocks that are invalidated but are not yet deallocated.
// The code GC will free them later.
pub dead_blocks: Vec<BlockRef>,
@@ -998,6 +1033,12 @@ pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
unsafe { free_block(block, false) };
}
+ // Free all entries
+ for entryref in payload.entries.iter() {
+ let entry = unsafe { Box::from_raw(entryref.as_ptr()) };
+ mem::drop(entry);
+ }
+
// Increment the freed iseq count
incr_counter!(freed_iseq_count);
}
@@ -1969,9 +2010,8 @@ fn gen_block_series_body(
pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
// Compute the current instruction index based on the current PC
let insn_idx: u16 = unsafe {
- let pc_zero = rb_iseq_pc_at_idx(iseq, 0);
let ec_pc = get_cfp_pc(get_ec_cfp(ec));
- ec_pc.offset_from(pc_zero).try_into().ok()?
+ iseq_pc_to_insn_idx(iseq, ec_pc)?
};
// The entry context makes no assumptions about types
@@ -1985,7 +2025,7 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
let ocb = CodegenGlobals::get_outlined_cb();
// Write the interpreter entry prologue. Might be NULL when out of memory.
- let code_ptr = gen_entry_prologue(cb, iseq, insn_idx);
+ let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx);
// Try to generate code for the entry block
let block = gen_block_series(blockid, &Context::default(), ec, cb, ocb);
@@ -2014,6 +2054,150 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
return code_ptr;
}
+// Change the entry's jump target from an entry stub to a next entry
+pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) {
+ let mut asm = Assembler::new();
+ asm.comment("regenerate_entry");
+
+ // gen_entry_guard generates cmp + jne. We're rewriting only jne.
+ asm.jne(next_entry.into());
+
+ // Move write_pos to rewrite the entry
+ let old_write_pos = cb.get_write_pos();
+ let old_dropped_bytes = cb.has_dropped_bytes();
+ cb.set_write_ptr(unsafe { entryref.as_ref() }.start_addr);
+ cb.set_dropped_bytes(false);
+ asm.compile(cb);
+
+ // Rewind write_pos to the original one
+ assert_eq!(cb.get_write_ptr(), unsafe { entryref.as_ref() }.end_addr);
+ cb.set_pos(old_write_pos);
+ cb.set_dropped_bytes(old_dropped_bytes);
+}
+
+pub type PendingEntryRef = Rc<PendingEntry>;
+
+/// Create a new entry reference for an ISEQ
+pub fn new_pending_entry() -> PendingEntryRef {
+ let entry = PendingEntry {
+ uninit_entry: Box::new(MaybeUninit::uninit()),
+ start_addr: Cell::new(None),
+ end_addr: Cell::new(None),
+ };
+ return Rc::new(entry);
+}
+
+c_callable! {
+ /// Generated code calls this function with the SysV calling convention.
+ /// See [gen_call_entry_stub_hit].
+ fn entry_stub_hit(entry_ptr: *const c_void, ec: EcPtr) -> *const u8 {
+ with_vm_lock(src_loc!(), || {
+ match entry_stub_hit_body(entry_ptr, ec) {
+ Some(addr) => addr,
+ // Failed to service the stub by generating a new block so now we
+ // need to exit to the interpreter at the stubbed location.
+ None => return CodegenGlobals::get_stub_exit_code().raw_ptr(),
+ }
+ })
+ }
+}
+
+/// Called by the generated code when an entry stub is executed
+fn entry_stub_hit_body(entry_ptr: *const c_void, ec: EcPtr) -> Option<*const u8> {
+ // Get ISEQ and insn_idx from the current ec->cfp
+ let cfp = unsafe { get_ec_cfp(ec) };
+ let iseq = unsafe { get_cfp_iseq(cfp) };
+ let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?;
+
+ let cb = CodegenGlobals::get_inline_cb();
+ let ocb = CodegenGlobals::get_outlined_cb();
+
+ // Compile a new entry guard as a next entry
+ let next_entry = cb.get_write_ptr();
+ let mut asm = Assembler::new();
+ let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?;
+ asm.compile(cb);
+
+ // Try to find an existing compiled version of this block
+ let blockid = BlockId { iseq, idx: insn_idx };
+ let ctx = Context::default();
+ let blockref = match find_block_version(blockid, &ctx) {
+ // If an existing block is found, generate a jump to the block.
+ Some(blockref) => {
+ let mut asm = Assembler::new();
+ asm.jmp(unsafe { blockref.as_ref() }.start_addr.into());
+ asm.compile(cb);
+ blockref
+ }
+ // If this block hasn't yet been compiled, generate blocks after the entry guard.
+ None => match gen_block_series(blockid, &ctx, ec, cb, ocb) {
+ Some(blockref) => blockref,
+ None => { // No space
+ // Trigger code GC. This entry point will be recompiled later.
+ cb.code_gc();
+ return None;
+ }
+ }
+ };
+
+ // Regenerate the previous entry
+ assert!(!entry_ptr.is_null());
+ let entryref = NonNull::<Entry>::new(entry_ptr as *mut Entry).expect("Entry should not be null");
+ regenerate_entry(cb, &entryref, next_entry);
+
+ // Write an entry to the heap and push it to the ISEQ
+ let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique");
+ get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry());
+
+ cb.mark_all_executable();
+ ocb.unwrap().mark_all_executable();
+
+ // Let the stub jump to the block
+ Some(unsafe { blockref.as_ref() }.start_addr.raw_ptr())
+}
+
+/// Generate a stub that calls entry_stub_hit
+pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<CodePtr> {
+ let ocb = ocb.unwrap();
+ let stub_addr = ocb.get_write_ptr();
+
+ let mut asm = Assembler::new();
+ asm.comment("entry stub hit");
+
+ asm.mov(C_ARG_OPNDS[0], entry_address.into());
+
+ // Jump to trampoline to call entry_stub_hit()
+ // Not really a side exit, just don't need a padded jump here.
+ asm.jmp(CodegenGlobals::get_entry_stub_hit_trampoline().as_side_exit());
+
+ asm.compile(ocb);
+
+ if ocb.has_dropped_bytes() {
+ return None; // No space
+ } else {
+ return Some(stub_addr);
+ }
+}
+
+/// A trampoline used by gen_entry_stub. entry_stub_hit may issue Code GC, so
+/// it's useful for Code GC to call entry_stub_hit from a globally shared code.
+pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr {
+ let ocb = ocb.unwrap();
+ let code_ptr = ocb.get_write_ptr();
+ let mut asm = Assembler::new();
+
+ // See gen_entry_guard for how it's used.
+ asm.comment("entry_stub_hit() trampoline");
+ let jump_addr = asm.ccall(entry_stub_hit as *mut u8, vec![C_ARG_OPNDS[0], EC]);
+
+ // Jump to the address returned by the entry_stub_hit() call
+ asm.jmp_opnd(jump_addr);
+
+ asm.compile(ocb);
+
+ code_ptr
+}
+
/// Generate code for a branch, possibly rewriting and changing the size of it
fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) {
// Remove old comments
@@ -2088,7 +2272,7 @@ fn new_pending_branch(jit: &mut JITState, gen_fn: BranchGenFn) -> PendingBranchR
c_callable! {
/// Generated code calls this function with the SysV calling convention.
- /// See [gen_call_branch_stub_hit].
+ /// See [gen_branch_stub].
fn branch_stub_hit(
branch_ptr: *const c_void,
target_idx: u32,
@@ -2257,7 +2441,7 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
/// Generate a "stub", a piece of code that calls the compiler back when run.
/// A piece of code that redeems for more code; a thunk for code.
-fn gen_call_branch_stub_hit(
+fn gen_branch_stub(
ocb: &mut OutlinedCb,
branch_struct_address: usize,
target_idx: u32,
@@ -2324,6 +2508,28 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr {
impl Assembler
{
+ /// Mark the start position of a patchable entry point in the machine code
+ pub fn mark_entry_start(&mut self, entryref: &PendingEntryRef) {
+ // We need to create our own entry rc object
+ // so that we can move the closure below
+ let entryref = entryref.clone();
+
+ self.pos_marker(move |code_ptr| {
+ entryref.start_addr.set(Some(code_ptr));
+ });
+ }
+
+ /// Mark the end position of a patchable entry point in the machine code
+ pub fn mark_entry_end(&mut self, entryref: &PendingEntryRef) {
+ // We need to create our own entry rc object
+ // so that we can move the closure below
+ let entryref = entryref.clone();
+
+ self.pos_marker(move |code_ptr| {
+ entryref.end_addr.set(Some(code_ptr));
+ });
+ }
+
// Mark the start position of a patchable branch in the machine code
fn mark_branch_start(&mut self, branchref: &PendingBranchRef)
{
@@ -2658,7 +2864,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
}
// Create a stub for this branch target
- let stub_addr = gen_call_branch_stub_hit(ocb, branchref.as_ptr() as usize, target_idx as u32);
+ let stub_addr = gen_branch_stub(ocb, branchref.as_ptr() as usize, target_idx as u32);
// In case we were unable to generate a stub (e.g. OOM). Use the block's
// exit instead of a stub for the block. It's important that we
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs
index 570cc24719..5dd6cf5d69 100644
--- a/yjit/src/cruby.rs
+++ b/yjit/src/cruby.rs
@@ -143,6 +143,7 @@ pub use rb_insn_name as raw_insn_name;
pub use rb_insn_len as raw_insn_len;
pub use rb_yarv_class_of as CLASS_OF;
pub use rb_get_ec_cfp as get_ec_cfp;
+pub use rb_get_cfp_iseq as get_cfp_iseq;
pub use rb_get_cfp_pc as get_cfp_pc;
pub use rb_get_cfp_sp as get_cfp_sp;
pub use rb_get_cfp_self as get_cfp_self;
@@ -243,6 +244,12 @@ pub struct VALUE(pub usize);
/// Pointer to an ISEQ
pub type IseqPtr = *const rb_iseq_t;
+// Given an ISEQ pointer, convert PC to insn_idx
+pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option<u16> {
+ let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) };
+ unsafe { pc.offset_from(pc_zero) }.try_into().ok()
+}
+
/// Opaque execution-context type from vm_core.h
#[repr(C)]
pub struct rb_execution_context_struct {
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index 19cea5e682..5d16ad13f2 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -1283,6 +1283,7 @@ extern "C" {
pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function;
pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE;
pub fn rb_get_ec_cfp(ec: *const rb_execution_context_t) -> *mut rb_control_frame_struct;
+ pub fn rb_get_cfp_iseq(cfp: *mut rb_control_frame_struct) -> *const rb_iseq_t;
pub fn rb_get_cfp_pc(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
pub fn rb_get_cfp_sp(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE);
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index 785dc9b0f9..5f8f841ffc 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -283,7 +283,6 @@ make_counters! {
leave_se_interrupt,
leave_interp_return,
- leave_start_pc_non_zero,
getivar_se_self_not_heap,
getivar_idx_out_of_range,