diff options
author | Jimmy Miller <jimmy.miller@shopify.com> | 2022-09-14 10:32:22 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-14 10:32:22 -0400 |
commit | 758a1d730230ad0f4adfd7681c1fe4c8ac398bde (patch) | |
tree | 876164dea874742b50ad53ee14fa30473c20b930 | |
parent | 8f37e9c91814357f79911e208ef4d0d56dfa9433 (diff) | |
download | ruby-758a1d730230ad0f4adfd7681c1fe4c8ac398bde.tar.gz |
Initial support for VM_CALL_ARGS_SPLAT (#6341)
* Initial support for VM_CALL_ARGS_SPLAT
This implements support for calls with splat (*) for some methods. In
benchmarks this made very little difference for most benchmarks, but a
large difference for binarytrees. Looking at side exits, many
benchmarks now don't exit for splat, but exit for some other
reason. Binarytrees however had a number of calls that used splat args
that are now much faster. In my non-scientific benchmarking this made
splat args performance on par with not using splat args at all.
* Fix wording and whitespace
Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com>
* Get rid of side_effect reassignment
Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com>
-rw-r--r-- | yjit.c | 6 | ||||
-rw-r--r-- | yjit/bindgen/src/main.rs | 1 | ||||
-rw-r--r-- | yjit/src/codegen.rs | 200 | ||||
-rw-r--r-- | yjit/src/cruby.rs | 2 | ||||
-rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 3 | ||||
-rw-r--r-- | yjit/src/stats.rs | 7 |
6 files changed, 182 insertions, 37 deletions
@@ -604,6 +604,12 @@ rb_get_iseq_flags_has_rest(const rb_iseq_t *iseq) } bool +rb_get_iseq_flags_ruby2_keywords(const rb_iseq_t *iseq) +{ + return iseq->body->param.flags.ruby2_keywords; +} + +bool rb_get_iseq_flags_has_block(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_block; diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index eaf030a1de..294da21378 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -342,6 +342,7 @@ fn main() { .allowlist_function("rb_get_iseq_flags_has_kwrest") .allowlist_function("rb_get_iseq_flags_has_block") .allowlist_function("rb_get_iseq_flags_has_accepts_no_kwarg") + .allowlist_function("rb_get_iseq_flags_ruby2_keywords") .allowlist_function("rb_get_iseq_body_local_table_size") .allowlist_function("rb_get_iseq_body_param_keyword") .allowlist_function("rb_get_iseq_body_param_size") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index ca9ec655d1..f13505365e 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -482,7 +482,7 @@ fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) - // // No guards change the logic for reconstructing interpreter state at the // moment, so there is one unique side exit for each context. Note that -// it's incorrect to jump to the side exit after any ctx stack push/pop operations +// it's incorrect to jump to the side exit after any ctx stack push operations // since they change the logic required for reconstructing interpreter state. fn get_side_exit(jit: &mut JITState, ocb: &mut OutlinedCb, ctx: &Context) -> CodePtr { match jit.side_exit_for_pc { @@ -1427,7 +1427,7 @@ fn gen_expandarray( return KeepCompiling; } - // Move the array from the stack into REG0 and check that it's an array. + // Move the array from the stack and check that it's an array. let array_reg = asm.load(array_opnd); guard_object_is_heap( asm, @@ -3911,6 +3911,12 @@ fn gen_send_cfunc( ) -> CodegenStatus { let cfunc = unsafe { get_cme_def_body_cfunc(cme) }; let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + let mut argc = argc; + + // Create a side-exit to fall back to the interpreter + let side_exit = get_side_exit(jit, ocb, ctx); + + let flags = unsafe { vm_ci_flag(ci) }; // If the function expects a Ruby array of arguments if cfunc_argc < 0 && cfunc_argc != -1 { @@ -3925,25 +3931,6 @@ fn gen_send_cfunc( unsafe { get_cikw_keyword_len(kw_arg) } }; - // Number of args which will be passed through to the callee - // This is adjusted by the kwargs being combined into a hash. - let passed_argc = if kw_arg.is_null() { - argc - } else { - argc - kw_arg_num + 1 - }; - - // If the argument count doesn't match - if cfunc_argc >= 0 && cfunc_argc != passed_argc { - gen_counter_incr!(asm, send_cfunc_argc_mismatch); - return CantCompile; - } - - // Don't JIT functions that need C stack arguments for now - if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) { - gen_counter_incr!(asm, send_cfunc_toomany_args); - return CantCompile; - } if c_method_tracing_currently_enabled(jit) { // Don't JIT if tracing c_call or c_return @@ -3964,9 +3951,6 @@ fn gen_send_cfunc( } } - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); - // Check for interrupts gen_check_ints(asm, side_exit); @@ -3978,6 +3962,38 @@ fn gen_send_cfunc( asm.cmp(CFP, stack_limit); asm.jbe(counted_exit!(ocb, side_exit, send_se_cf_overflow).into()); + if flags & VM_CALL_ARGS_SPLAT != 0 { + if cfunc_argc > 0 { + // push_splat_args does a ctx.stack_push so we can no longer side exit + argc = push_splat_args(argc, ctx, asm, ocb, side_exit, cfunc_argc as u32); + } else { + // This is a variadic c function and we'd need to pop + // each of the elements off the array, but the array may be dynamically sized + gen_counter_incr!(asm, send_args_splat_variadic); + return CantCompile + } + } + + // Number of args which will be passed through to the callee + // This is adjusted by the kwargs being combined into a hash. + let passed_argc = if kw_arg.is_null() { + argc + } else { + argc - kw_arg_num + 1 + }; + + // If the argument count doesn't match + if cfunc_argc >= 0 && cfunc_argc != passed_argc { + gen_counter_incr!(asm, send_cfunc_argc_mismatch); + return CantCompile; + } + + // Don't JIT functions that need C stack arguments for now + if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) { + gen_counter_incr!(asm, send_cfunc_toomany_args); + return CantCompile; + } + // Points to the receiver operand on the stack let recv = ctx.stack_opnd(argc); @@ -4152,6 +4168,85 @@ fn gen_return_branch( } } + +/// Pushes arguments from an array to the stack that are passed with a splat (i.e. *args) +/// It optimistically compiles to a static size that is the exact number of arguments +/// needed for the function. +fn push_splat_args(argc: i32, ctx: &mut Context, asm: &mut Assembler, ocb: &mut OutlinedCb, side_exit: CodePtr, num_params: u32) -> i32 { + + let mut argc = argc; + + asm.comment("push_splat_args"); + + let array_opnd = ctx.stack_opnd(0); + + argc = argc - 1; + + let array_reg = asm.load(array_opnd); + guard_object_is_heap( + asm, + array_reg, + counted_exit!(ocb, side_exit, send_splat_not_array), + ); + guard_object_is_array( + asm, + array_reg, + counted_exit!(ocb, side_exit, send_splat_not_array), + ); + + // Pull out the embed flag to check if it's an embedded array. + let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + + // Get the length of the array + let emb_len_opnd = asm.and(flags_opnd, (RARRAY_EMBED_LEN_MASK as u64).into()); + let emb_len_opnd = asm.rshift(emb_len_opnd, (RARRAY_EMBED_LEN_SHIFT as u64).into()); + + // Conditionally move the length of the heap array + let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into()); + let array_len_opnd = Opnd::mem( + (8 * size_of::<std::os::raw::c_long>()) as u8, + asm.load(array_opnd), + RUBY_OFFSET_RARRAY_AS_HEAP_LEN, + ); + let array_len_opnd = asm.csel_nz(emb_len_opnd, array_len_opnd); + + let required_args = num_params - argc as u32; + + // Only handle the case where the number of values in the array is equal to the number requested + asm.cmp(array_len_opnd, required_args.into()); + asm.jne(counted_exit!(ocb, side_exit, send_splatarray_rhs_too_small).into()); + + let array_opnd = ctx.stack_pop(1); + + if required_args > 0 { + + // Load the address of the embedded array + // (struct RArray *)(obj)->as.ary + let array_reg = asm.load(array_opnd); + let ary_opnd = asm.lea(Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RARRAY_AS_ARY)); + + // Conditionally load the address of the heap array + // (struct RArray *)(obj)->as.heap.ptr + let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, Opnd::UImm(RARRAY_EMBED_FLAG as u64)); + let heap_ptr_opnd = Opnd::mem( + (8 * size_of::<usize>()) as u8, + asm.load(array_opnd), + RUBY_OFFSET_RARRAY_AS_HEAP_PTR, + ); + + let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd); + + for i in (0..required_args as i32) { + let top = ctx.stack_push(Type::Unknown); + asm.mov(top, Opnd::mem(64, ary_opnd, i * (SIZEOF_VALUE as i32))); + argc += 1; + } + } + argc +} + fn gen_send_iseq( jit: &mut JITState, ctx: &mut Context, @@ -4165,6 +4260,11 @@ fn gen_send_iseq( let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; let mut argc = argc; + let flags = unsafe { vm_ci_flag(ci) }; + + // Create a side-exit to fall back to the interpreter + let side_exit = get_side_exit(jit, ocb, ctx); + // When you have keyword arguments, there is an extra object that gets // placed on the stack the represents a bitmap of the keywords that were not // specified at the call site. We need to keep track of the fact that this @@ -4190,6 +4290,18 @@ fn gen_send_iseq( return CantCompile; } + // In order to handle backwards compatibility between ruby 3 and 2 + // ruby2_keywords was introduced. It is called only on methods + // with splat and changes they way they handle them. + // We are just going to not compile these. + // https://www.rubydoc.info/stdlib/core/Proc:ruby2_keywords + if unsafe { + get_iseq_flags_ruby2_keywords(jit.iseq) + } { + gen_counter_incr!(asm, send_iseq_ruby2_keywords); + return CantCompile; + } + // If we have keyword arguments being passed to a callee that only takes // positionals, then we need to allocate a hash. For now we're going to // call that too complex and bail. @@ -4221,6 +4333,16 @@ fn gen_send_iseq( } } + + if flags & VM_CALL_ARGS_SPLAT != 0 && flags & VM_CALL_ZSUPER != 0 { + // zsuper methods are super calls without any arguments. + // They are also marked as splat, but don't actually have an array + // they pull arguments from, instead we need to change to call + // a different method with the current stack. + gen_counter_incr!(asm, send_iseq_zsuper); + return CantCompile; + } + let mut start_pc_offset = 0; let required_num = unsafe { get_iseq_body_param_lead_num(iseq) }; @@ -4238,6 +4360,11 @@ fn gen_send_iseq( let opt_num = unsafe { get_iseq_body_param_opt_num(iseq) }; let opts_missing: i32 = opt_num - opts_filled; + if opt_num > 0 && flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr!(asm, send_iseq_complex_callee); + return CantCompile; + } + if opts_filled < 0 || opts_filled > opt_num { gen_counter_incr!(asm, send_iseq_arity_error); return CantCompile; @@ -4334,8 +4461,7 @@ fn gen_send_iseq( // Number of locals that are not parameters let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 } - (num_params as i32); - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); + // Check for interrupts gen_check_ints(asm, side_exit); @@ -4384,6 +4510,11 @@ fn gen_send_iseq( asm.cmp(CFP, stack_limit); asm.jbe(counted_exit!(ocb, side_exit, send_se_cf_overflow).into()); + // push_splat_args does a ctx.stack_push so we can no longer side exit + if flags & VM_CALL_ARGS_SPLAT != 0 { + argc = push_splat_args(argc, ctx, asm, ocb, side_exit, num_params); + } + if doing_kw_call { // Here we're calling a method with keyword arguments and specifying // keyword arguments at this call site. @@ -4787,12 +4918,7 @@ fn gen_send_general( return CantCompile; } - // Don't JIT calls that aren't simple - // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. - if flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_args_splat); - return CantCompile; - } + if flags & VM_CALL_ARGS_BLOCKARG != 0 { gen_counter_incr!(asm, send_block_arg); return CantCompile; @@ -4865,6 +4991,11 @@ fn gen_send_general( // To handle the aliased method case (VM_METHOD_TYPE_ALIAS) loop { let def_type = unsafe { get_cme_def_type(cme) }; + if flags & VM_CALL_ARGS_SPLAT != 0 && (def_type != VM_METHOD_TYPE_ISEQ && def_type != VM_METHOD_TYPE_CFUNC) { + // We can't handle splat calls to non-iseq methods + return CantCompile; + } + match def_type { VM_METHOD_TYPE_ISEQ => { return gen_send_iseq(jit, ctx, asm, ocb, ci, cme, block, argc); @@ -5087,10 +5218,7 @@ fn gen_invokesuper( // Don't JIT calls that aren't simple // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. - if ci_flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_args_splat); - return CantCompile; - } + if ci_flags & VM_CALL_KWARG != 0 { gen_counter_incr!(asm, send_keywords); return CantCompile; diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 2f823e1b61..25149ab730 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -164,6 +164,7 @@ pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max; pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt; pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw; pub use rb_get_iseq_flags_has_rest as get_iseq_flags_has_rest; +pub use rb_get_iseq_flags_ruby2_keywords as get_iseq_flags_ruby2_keywords; pub use rb_get_iseq_flags_has_post as get_iseq_flags_has_post; pub use rb_get_iseq_flags_has_kwrest as get_iseq_flags_has_kwrest; pub use rb_get_iseq_flags_has_block as get_iseq_flags_has_block; @@ -619,6 +620,7 @@ mod manual_defs { pub const VM_CALL_KWARG: u32 = 1 << VM_CALL_KWARG_bit; pub const VM_CALL_KW_SPLAT: u32 = 1 << VM_CALL_KW_SPLAT_bit; pub const VM_CALL_TAILCALL: u32 = 1 << VM_CALL_TAILCALL_bit; + pub const VM_CALL_ZSUPER : u32 = 1 << VM_CALL_ZSUPER_bit; // From internal/struct.h - in anonymous enum, so we can't easily import it pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER2 | RUBY_FL_USER1) as usize; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 9869998788..e3dbdc0d4b 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1146,6 +1146,9 @@ extern "C" { pub fn rb_get_iseq_flags_has_rest(iseq: *const rb_iseq_t) -> bool; } extern "C" { + pub fn rb_get_iseq_flags_ruby2_keywords(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { pub fn rb_get_iseq_flags_has_block(iseq: *const rb_iseq_t) -> bool; } extern "C" { diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 4843cecf92..56aa0b4081 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -164,7 +164,8 @@ make_counters! { send_keywords, send_kw_splat, - send_args_splat, + send_args_splat_super, + send_iseq_zsuper, send_block_arg, send_ivar_set_method, send_zsuper_method, @@ -192,6 +193,10 @@ make_counters! { send_getter_arity, send_se_cf_overflow, send_se_protected_check_failed, + send_splatarray_rhs_too_small, + send_splat_not_array, + send_args_splat_variadic, + send_iseq_ruby2_keywords, traced_cfunc_return, |