diff options
author | Takashi Kokubun <takashikkbn@gmail.com> | 2023-03-17 11:53:17 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-17 11:53:17 -0700 |
commit | 9fd94d6a0cc7626c8abfc9ba05d6f156927ee8f7 (patch) | |
tree | 281ac6178534fae0064679b2b1b20e73dc0dd036 | |
parent | 3592b24cdc07ed89eecb39161f21fe721a89a5de (diff) | |
download | ruby-9fd94d6a0cc7626c8abfc9ba05d6f156927ee8f7.tar.gz |
YJIT: Support entry for multiple PCs per ISEQ (GH-7535)
-rw-r--r-- | yjit.c | 6 | ||||
-rw-r--r-- | yjit/bindgen/src/main.rs | 1 | ||||
-rw-r--r-- | yjit/src/codegen.rs | 64 | ||||
-rw-r--r-- | yjit/src/core.rs | 220 | ||||
-rw-r--r-- | yjit/src/cruby.rs | 7 | ||||
-rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 1 | ||||
-rw-r--r-- | yjit/src/stats.rs | 1 |
7 files changed, 268 insertions, 32 deletions
@@ -765,6 +765,12 @@ rb_get_ec_cfp(const rb_execution_context_t *ec) return ec->cfp; } +const rb_iseq_t * +rb_get_cfp_iseq(struct rb_control_frame_struct *cfp) +{ + return cfp->iseq; +} + VALUE * rb_get_cfp_pc(struct rb_control_frame_struct *cfp) { diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index 3dc503f7c3..a2af76a787 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -368,6 +368,7 @@ fn main() { .allowlist_function("rb_insn_len") .allowlist_function("rb_yarv_class_of") .allowlist_function("rb_get_ec_cfp") + .allowlist_function("rb_get_cfp_iseq") .allowlist_function("rb_get_cfp_pc") .allowlist_function("rb_get_cfp_sp") .allowlist_function("rb_get_cfp_self") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index e9c17cb537..177bdfe088 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -19,6 +19,7 @@ use std::ffi::CStr; use std::mem::{self, size_of}; use std::os::raw::{c_int}; use std::ptr; +use std::rc::Rc; use std::slice; pub use crate::virtualmem::CodePtr; @@ -619,38 +620,36 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { } // Generate a runtime guard that ensures the PC is at the expected -// instruction index in the iseq, otherwise takes a side-exit. +// instruction index in the iseq, otherwise takes an entry stub +// that generates another check and entry. // This is to handle the situation of optional parameters. // When a function with optional parameters is called, the entry // PC for the method isn't necessarily 0. -fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u16) { +pub fn gen_entry_chain_guard( + asm: &mut Assembler, + ocb: &mut OutlinedCb, + iseq: IseqPtr, + insn_idx: u16, +) -> Option<PendingEntryRef> { + let entry = new_pending_entry(); + let stub_addr = gen_entry_stub(entry.uninit_entry.as_ptr() as usize, ocb)?; + let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC); let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8); + asm.comment("guard expected PC"); asm.cmp(pc_opnd, expected_pc_opnd); - let pc_match = asm.new_label("pc_match"); - asm.je(pc_match); - - // We're not starting at the first PC, so we need to exit. - gen_counter_incr!(asm, leave_start_pc_non_zero); - - asm.cpop_into(SP); - asm.cpop_into(EC); - asm.cpop_into(CFP); - - asm.frame_teardown(); - - asm.cret(Qundef.into()); - - // PC should match the expected insn_idx - asm.write_label(pc_match); + asm.mark_entry_start(&entry); + asm.jne(stub_addr.into()); + asm.mark_entry_end(&entry); + return Some(entry); } /// Compile an interpreter entry block to be inserted into an iseq /// Returns None if compilation fails. -pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u16) -> Option<CodePtr> { +pub fn gen_entry_prologue(cb: &mut CodeBlock, ocb: &mut OutlinedCb, iseq: IseqPtr, insn_idx: u16) -> Option<CodePtr> { let code_ptr = cb.get_write_ptr(); let mut asm = Assembler::new(); @@ -685,10 +684,13 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u16) -> O // different location depending on the optional parameters. If an iseq // has optional parameters, we'll add a runtime check that the PC we've // compiled for is the same PC that the interpreter wants us to run with. - // If they don't match, then we'll take a side exit. - if unsafe { get_iseq_flags_has_opt(iseq) } { - gen_pc_guard(&mut asm, iseq, insn_idx); - } + // If they don't match, then we'll jump to an entry stub and generate + // another PC check and entry there. + let pending_entry = if unsafe { get_iseq_flags_has_opt(iseq) } { + Some(gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?) + } else { + None + }; asm.compile(cb); @@ -700,6 +702,12 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u16) -> O for page in cb.addrs_to_pages(code_ptr, cb.get_write_ptr()) { iseq_payload.pages.insert(page); } + // Write an entry to the heap and push it to the ISEQ + if let Some(pending_entry) = pending_entry { + let pending_entry = Rc::try_unwrap(pending_entry) + .ok().expect("PendingEntry should be unique"); + iseq_payload.entries.push(pending_entry.into_entry()); + } Some(code_ptr) } } @@ -7864,6 +7872,9 @@ pub struct CodegenGlobals { // For servicing branch stubs branch_stub_hit_trampoline: CodePtr, + // For servicing entry stubs + entry_stub_hit_trampoline: CodePtr, + // Code for full logic of returning from C method and exiting to the interpreter outline_full_cfunc_return_pos: CodePtr, @@ -7901,7 +7912,6 @@ impl CodegenGlobals { #[cfg(not(test))] let (mut cb, mut ocb) = { use std::cell::RefCell; - use std::rc::Rc; let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) }; @@ -7949,6 +7959,7 @@ impl CodegenGlobals { let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb); let branch_stub_hit_trampoline = gen_branch_stub_hit_trampoline(&mut ocb); + let entry_stub_hit_trampoline = gen_entry_stub_hit_trampoline(&mut ocb); // Generate full exit code for C func let cfunc_exit_code = gen_full_cfunc_return(&mut ocb); @@ -7967,6 +7978,7 @@ impl CodegenGlobals { stub_exit_code: stub_exit_code, outline_full_cfunc_return_pos: cfunc_exit_code, branch_stub_hit_trampoline, + entry_stub_hit_trampoline, global_inval_patches: Vec::new(), method_codegen_table: HashMap::new(), ocb_pages, @@ -8105,6 +8117,10 @@ impl CodegenGlobals { CodegenGlobals::get_instance().branch_stub_hit_trampoline } + pub fn get_entry_stub_hit_trampoline() -> CodePtr { + CodegenGlobals::get_instance().entry_stub_hit_trampoline + } + pub fn look_up_codegen_method(method_serial: usize) -> Option<MethodGenFn> { let table = &CodegenGlobals::get_instance().method_codegen_table; diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 888f795279..c8c945fac3 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -688,7 +688,7 @@ impl PendingBranch { // The branch struct is uninitialized right now but as a stable address. // We make sure the stub runs after the branch is initialized. let branch_struct_addr = self.uninit_branch.as_ptr() as usize; - let stub_addr = gen_call_branch_stub_hit(ocb, branch_struct_addr, target_idx); + let stub_addr = gen_branch_stub(ocb, branch_struct_addr, target_idx); if let Some(stub_addr) = stub_addr { // Fill the branch target with a stub @@ -741,6 +741,35 @@ impl PendingBranch { } } +// Store info about code used on YJIT entry +pub struct Entry { + // Positions where the generated code starts and ends + start_addr: CodePtr, + end_addr: CodePtr, // exclusive +} + +/// A [Branch] for a [Block] that is under construction. +pub struct PendingEntry { + pub uninit_entry: Box<MaybeUninit<Entry>>, + start_addr: Cell<Option<CodePtr>>, + end_addr: Cell<Option<CodePtr>>, // exclusive +} + +impl PendingEntry { + // Construct the entry in the heap + pub fn into_entry(mut self) -> EntryRef { + // Make the entry + let entry = Entry { + start_addr: self.start_addr.get().unwrap(), + end_addr: self.end_addr.get().unwrap(), + }; + // Move it to the designated place on the heap and unwrap MaybeUninit. + self.uninit_entry.write(entry); + let raw_entry: *mut MaybeUninit<Entry> = Box::into_raw(self.uninit_entry); + NonNull::new(raw_entry as *mut Entry).expect("no null from Box") + } +} + // In case a block is invalidated, this helps to remove all pointers to the block. pub type CmePtr = *const rb_callable_method_entry_t; @@ -813,6 +842,9 @@ pub type BlockRef = NonNull<Block>; /// proper usage. pub type BranchRef = NonNull<Branch>; +/// Pointer to an entry that is already added to an ISEQ +pub type EntryRef = NonNull<Entry>; + /// List of block versions for a given blockid type VersionList = Vec<BlockRef>; @@ -860,6 +892,9 @@ pub struct IseqPayload { // Indexes of code pages used by this this ISEQ pub pages: HashSet<usize>, + // List of ISEQ entry codes + pub entries: Vec<EntryRef>, + // Blocks that are invalidated but are not yet deallocated. // The code GC will free them later. pub dead_blocks: Vec<BlockRef>, @@ -998,6 +1033,12 @@ pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) { unsafe { free_block(block, false) }; } + // Free all entries + for entryref in payload.entries.iter() { + let entry = unsafe { Box::from_raw(entryref.as_ptr()) }; + mem::drop(entry); + } + // Increment the freed iseq count incr_counter!(freed_iseq_count); } @@ -1969,9 +2010,8 @@ fn gen_block_series_body( pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { // Compute the current instruction index based on the current PC let insn_idx: u16 = unsafe { - let pc_zero = rb_iseq_pc_at_idx(iseq, 0); let ec_pc = get_cfp_pc(get_ec_cfp(ec)); - ec_pc.offset_from(pc_zero).try_into().ok()? + iseq_pc_to_insn_idx(iseq, ec_pc)? }; // The entry context makes no assumptions about types @@ -1985,7 +2025,7 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { let ocb = CodegenGlobals::get_outlined_cb(); // Write the interpreter entry prologue. Might be NULL when out of memory. - let code_ptr = gen_entry_prologue(cb, iseq, insn_idx); + let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx); // Try to generate code for the entry block let block = gen_block_series(blockid, &Context::default(), ec, cb, ocb); @@ -2014,6 +2054,150 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { return code_ptr; } +// Change the entry's jump target from an entry stub to a next entry +pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) { + let mut asm = Assembler::new(); + asm.comment("regenerate_entry"); + + // gen_entry_guard generates cmp + jne. We're rewriting only jne. + asm.jne(next_entry.into()); + + // Move write_pos to rewrite the entry + let old_write_pos = cb.get_write_pos(); + let old_dropped_bytes = cb.has_dropped_bytes(); + cb.set_write_ptr(unsafe { entryref.as_ref() }.start_addr); + cb.set_dropped_bytes(false); + asm.compile(cb); + + // Rewind write_pos to the original one + assert_eq!(cb.get_write_ptr(), unsafe { entryref.as_ref() }.end_addr); + cb.set_pos(old_write_pos); + cb.set_dropped_bytes(old_dropped_bytes); +} + +pub type PendingEntryRef = Rc<PendingEntry>; + +/// Create a new entry reference for an ISEQ +pub fn new_pending_entry() -> PendingEntryRef { + let entry = PendingEntry { + uninit_entry: Box::new(MaybeUninit::uninit()), + start_addr: Cell::new(None), + end_addr: Cell::new(None), + }; + return Rc::new(entry); +} + +c_callable! { + /// Generated code calls this function with the SysV calling convention. + /// See [gen_call_entry_stub_hit]. + fn entry_stub_hit(entry_ptr: *const c_void, ec: EcPtr) -> *const u8 { + with_vm_lock(src_loc!(), || { + match entry_stub_hit_body(entry_ptr, ec) { + Some(addr) => addr, + // Failed to service the stub by generating a new block so now we + // need to exit to the interpreter at the stubbed location. + None => return CodegenGlobals::get_stub_exit_code().raw_ptr(), + } + }) + } +} + +/// Called by the generated code when an entry stub is executed +fn entry_stub_hit_body(entry_ptr: *const c_void, ec: EcPtr) -> Option<*const u8> { + // Get ISEQ and insn_idx from the current ec->cfp + let cfp = unsafe { get_ec_cfp(ec) }; + let iseq = unsafe { get_cfp_iseq(cfp) }; + let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?; + + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + + // Compile a new entry guard as a next entry + let next_entry = cb.get_write_ptr(); + let mut asm = Assembler::new(); + let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?; + asm.compile(cb); + + // Try to find an existing compiled version of this block + let blockid = BlockId { iseq, idx: insn_idx }; + let ctx = Context::default(); + let blockref = match find_block_version(blockid, &ctx) { + // If an existing block is found, generate a jump to the block. + Some(blockref) => { + let mut asm = Assembler::new(); + asm.jmp(unsafe { blockref.as_ref() }.start_addr.into()); + asm.compile(cb); + blockref + } + // If this block hasn't yet been compiled, generate blocks after the entry guard. + None => match gen_block_series(blockid, &ctx, ec, cb, ocb) { + Some(blockref) => blockref, + None => { // No space + // Trigger code GC. This entry point will be recompiled later. + cb.code_gc(); + return None; + } + } + }; + + // Regenerate the previous entry + assert!(!entry_ptr.is_null()); + let entryref = NonNull::<Entry>::new(entry_ptr as *mut Entry).expect("Entry should not be null"); + regenerate_entry(cb, &entryref, next_entry); + + // Write an entry to the heap and push it to the ISEQ + let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique"); + get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry()); + + cb.mark_all_executable(); + ocb.unwrap().mark_all_executable(); + + // Let the stub jump to the block + Some(unsafe { blockref.as_ref() }.start_addr.raw_ptr()) +} + +/// Generate a stub that calls entry_stub_hit +pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let stub_addr = ocb.get_write_ptr(); + + let mut asm = Assembler::new(); + asm.comment("entry stub hit"); + + asm.mov(C_ARG_OPNDS[0], entry_address.into()); + + // Jump to trampoline to call entry_stub_hit() + // Not really a side exit, just don't need a padded jump here. + asm.jmp(CodegenGlobals::get_entry_stub_hit_trampoline().as_side_exit()); + + asm.compile(ocb); + + if ocb.has_dropped_bytes() { + return None; // No space + } else { + return Some(stub_addr); + } +} + +/// A trampoline used by gen_entry_stub. entry_stub_hit may issue Code GC, so +/// it's useful for Code GC to call entry_stub_hit from a globally shared code. +pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr { + let ocb = ocb.unwrap(); + let code_ptr = ocb.get_write_ptr(); + let mut asm = Assembler::new(); + + // See gen_entry_guard for how it's used. + asm.comment("entry_stub_hit() trampoline"); + let jump_addr = asm.ccall(entry_stub_hit as *mut u8, vec![C_ARG_OPNDS[0], EC]); + + // Jump to the address returned by the entry_stub_hit() call + asm.jmp_opnd(jump_addr); + + asm.compile(ocb); + + code_ptr +} + /// Generate code for a branch, possibly rewriting and changing the size of it fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) { // Remove old comments @@ -2088,7 +2272,7 @@ fn new_pending_branch(jit: &mut JITState, gen_fn: BranchGenFn) -> PendingBranchR c_callable! { /// Generated code calls this function with the SysV calling convention. - /// See [gen_call_branch_stub_hit]. + /// See [gen_branch_stub]. fn branch_stub_hit( branch_ptr: *const c_void, target_idx: u32, @@ -2257,7 +2441,7 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - /// Generate a "stub", a piece of code that calls the compiler back when run. /// A piece of code that redeems for more code; a thunk for code. -fn gen_call_branch_stub_hit( +fn gen_branch_stub( ocb: &mut OutlinedCb, branch_struct_address: usize, target_idx: u32, @@ -2324,6 +2508,28 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr { impl Assembler { + /// Mark the start position of a patchable entry point in the machine code + pub fn mark_entry_start(&mut self, entryref: &PendingEntryRef) { + // We need to create our own entry rc object + // so that we can move the closure below + let entryref = entryref.clone(); + + self.pos_marker(move |code_ptr| { + entryref.start_addr.set(Some(code_ptr)); + }); + } + + /// Mark the end position of a patchable entry point in the machine code + pub fn mark_entry_end(&mut self, entryref: &PendingEntryRef) { + // We need to create our own entry rc object + // so that we can move the closure below + let entryref = entryref.clone(); + + self.pos_marker(move |code_ptr| { + entryref.end_addr.set(Some(code_ptr)); + }); + } + // Mark the start position of a patchable branch in the machine code fn mark_branch_start(&mut self, branchref: &PendingBranchRef) { @@ -2658,7 +2864,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { } // Create a stub for this branch target - let stub_addr = gen_call_branch_stub_hit(ocb, branchref.as_ptr() as usize, target_idx as u32); + let stub_addr = gen_branch_stub(ocb, branchref.as_ptr() as usize, target_idx as u32); // In case we were unable to generate a stub (e.g. OOM). Use the block's // exit instead of a stub for the block. It's important that we diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 570cc24719..5dd6cf5d69 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -143,6 +143,7 @@ pub use rb_insn_name as raw_insn_name; pub use rb_insn_len as raw_insn_len; pub use rb_yarv_class_of as CLASS_OF; pub use rb_get_ec_cfp as get_ec_cfp; +pub use rb_get_cfp_iseq as get_cfp_iseq; pub use rb_get_cfp_pc as get_cfp_pc; pub use rb_get_cfp_sp as get_cfp_sp; pub use rb_get_cfp_self as get_cfp_self; @@ -243,6 +244,12 @@ pub struct VALUE(pub usize); /// Pointer to an ISEQ pub type IseqPtr = *const rb_iseq_t; +// Given an ISEQ pointer, convert PC to insn_idx +pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option<u16> { + let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; + unsafe { pc.offset_from(pc_zero) }.try_into().ok() +} + /// Opaque execution-context type from vm_core.h #[repr(C)] pub struct rb_execution_context_struct { diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 19cea5e682..5d16ad13f2 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1283,6 +1283,7 @@ extern "C" { pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function; pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; pub fn rb_get_ec_cfp(ec: *const rb_execution_context_t) -> *mut rb_control_frame_struct; + pub fn rb_get_cfp_iseq(cfp: *mut rb_control_frame_struct) -> *const rb_iseq_t; pub fn rb_get_cfp_pc(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_get_cfp_sp(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 785dc9b0f9..5f8f841ffc 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -283,7 +283,6 @@ make_counters! { leave_se_interrupt, leave_interp_return, - leave_start_pc_non_zero, getivar_se_self_not_heap, getivar_idx_out_of_range, |