diff options
author | Aaron Patterson <tenderlove@ruby-lang.org> | 2022-11-15 11:46:18 -0800 |
---|---|---|
committer | Aaron Patterson <aaron.patterson@gmail.com> | 2022-11-18 12:04:10 -0800 |
commit | 9e067df76bda7384e8f3ddda2b348f3a742eb784 (patch) | |
tree | c231960ee347399930d6aa304b148d208a372611 | |
parent | 6582f34831cc665b2adcf7d475aceb9b918badb6 (diff) | |
download | ruby-9e067df76bda7384e8f3ddda2b348f3a742eb784.tar.gz |
32 bit comparison on shape id
This commit changes the shape id comparisons to use a 32 bit comparison
rather than 64 bit. That means we don't need to load the shape id to a
register on x86 machines.
Given the following program:
```ruby
class Foo
def initialize
@foo = 1
@bar = 1
end
def read
[@foo, @bar]
end
end
foo = Foo.new
foo.read
foo.read
foo.read
foo.read
foo.read
puts RubyVM::YJIT.disasm(Foo.instance_method(:read))
```
The machine code we generated _before_ this change is like this:
```
== BLOCK 1/4, ISEQ RANGE [0,3), 65 bytes ======================
# getinstancevariable
0x559a18623023: mov rax, qword ptr [r13 + 0x18]
# guard object is heap
0x559a18623027: test al, 7
0x559a1862302a: jne 0x559a1862502d
0x559a18623030: cmp rax, 4
0x559a18623034: jbe 0x559a1862502d
# guard shape, embedded, and T_OBJECT
0x559a1862303a: mov rcx, qword ptr [rax]
0x559a1862303d: movabs r11, 0xffff00000000201f
0x559a18623047: and rcx, r11
0x559a1862304a: movabs r11, 0xb000000002001
0x559a18623054: cmp rcx, r11
0x559a18623057: jne 0x559a18625046
0x559a1862305d: mov rax, qword ptr [rax + 0x18]
0x559a18623061: mov qword ptr [rbx], rax
== BLOCK 2/4, ISEQ RANGE [3,6), 0 bytes =======================
== BLOCK 3/4, ISEQ RANGE [3,6), 47 bytes ======================
# gen_direct_jmp: fallthrough
# getinstancevariable
# regenerate_branch
# getinstancevariable
# regenerate_branch
0x559a18623064: mov rax, qword ptr [r13 + 0x18]
# guard shape, embedded, and T_OBJECT
0x559a18623068: mov rcx, qword ptr [rax]
0x559a1862306b: movabs r11, 0xffff00000000201f
0x559a18623075: and rcx, r11
0x559a18623078: movabs r11, 0xb000000002001
0x559a18623082: cmp rcx, r11
0x559a18623085: jne 0x559a18625099
0x559a1862308b: mov rax, qword ptr [rax + 0x20]
0x559a1862308f: mov qword ptr [rbx + 8], rax
```
After this change, it's like this:
```
== BLOCK 1/4, ISEQ RANGE [0,3), 41 bytes ======================
# getinstancevariable
0x5560c986d023: mov rax, qword ptr [r13 + 0x18]
# guard object is heap
0x5560c986d027: test al, 7
0x5560c986d02a: jne 0x5560c986f02d
0x5560c986d030: cmp rax, 4
0x5560c986d034: jbe 0x5560c986f02d
# guard shape
0x5560c986d03a: cmp word ptr [rax + 6], 0x19
0x5560c986d03f: jne 0x5560c986f046
0x5560c986d045: mov rax, qword ptr [rax + 0x10]
0x5560c986d049: mov qword ptr [rbx], rax
== BLOCK 2/4, ISEQ RANGE [3,6), 0 bytes =======================
== BLOCK 3/4, ISEQ RANGE [3,6), 23 bytes ======================
# gen_direct_jmp: fallthrough
# getinstancevariable
# regenerate_branch
# getinstancevariable
# regenerate_branch
0x5560c986d04c: mov rax, qword ptr [r13 + 0x18]
# guard shape
0x5560c986d050: cmp word ptr [rax + 6], 0x19
0x5560c986d055: jne 0x5560c986f099
0x5560c986d05b: mov rax, qword ptr [rax + 0x18]
0x5560c986d05f: mov qword ptr [rbx + 8], rax
```
The first ivar read is a bit more complex, but the second ivar read is
much simpler. I think eventually we could teach the context about the
shape, then emit only one shape guard.
-rw-r--r-- | shape.c | 6 | ||||
-rw-r--r-- | shape.h | 1 | ||||
-rw-r--r-- | yjit/bindgen/src/main.rs | 2 | ||||
-rw-r--r-- | yjit/src/asm/arm64/mod.rs | 2 | ||||
-rw-r--r-- | yjit/src/backend/arm64/mod.rs | 19 | ||||
-rw-r--r-- | yjit/src/backend/ir.rs | 6 | ||||
-rw-r--r-- | yjit/src/codegen.rs | 15 | ||||
-rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 6 |
8 files changed, 35 insertions, 22 deletions
@@ -325,10 +325,10 @@ rb_shape_set_shape(VALUE obj, rb_shape_t* shape) rb_shape_set_shape_id(obj, rb_shape_id(shape)); } -VALUE -rb_shape_flags_mask(void) +uint8_t +rb_shape_id_num_bits(void) { - return SHAPE_FLAG_MASK; + return SHAPE_ID_NUM_BITS; } rb_shape_t * @@ -132,6 +132,7 @@ static inline shape_id_t RCLASS_SHAPE_ID(VALUE obj) { bool rb_shape_root_shape_p(rb_shape_t* shape); rb_shape_t * rb_shape_get_root_shape(void); +uint8_t rb_shape_id_num_bits(void); rb_shape_t* rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id); rb_shape_t * rb_shape_get_parent(rb_shape_t * shape); diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index 2b94d95608..9afba864d2 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -85,7 +85,7 @@ fn main() { // From shape.h .allowlist_function("rb_shape_get_shape_id") .allowlist_function("rb_shape_get_shape_by_id") - .allowlist_function("rb_shape_flags_mask") + .allowlist_function("rb_shape_id_num_bits") .allowlist_function("rb_shape_get_iv_index") // From ruby/internal/intern/object.h diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 683711259e..d463613943 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -540,8 +540,6 @@ pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { pub fn ldurh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { - assert!(rt.num_bits == 32, "Rt should be a 32 bit register"); - assert!(rn.num_bits == 64, "Rn should be a 64 bit register"); assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); LoadStore::ldurh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index c899f6871f..cbda5eec05 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -344,7 +344,14 @@ impl Assembler Insn::Cmp { left, right } => { let opnd0 = split_load_operand(asm, left); let opnd0 = split_less_than_32_cmp(asm, opnd0); - let opnd1 = split_shifted_immediate(asm, right); + let split_right = split_shifted_immediate(asm, right); + let opnd1 = match split_right { + Opnd::InsnOut { .. } if opnd0.num_bits() != split_right.num_bits() => { + split_right.with_num_bits(opnd0.num_bits().unwrap()).unwrap() + }, + _ => split_right + }; + asm.cmp(opnd0, opnd1); }, Insn::CRet(opnd) => { @@ -828,6 +835,7 @@ impl Assembler Opnd::Mem(_) => { match opnd.rm_num_bits() { 64 | 32 => ldur(cb, out.into(), opnd.into()), + 16 => ldurh(cb, out.into(), opnd.into()), 8 => ldurb(cb, out.into(), opnd.into()), num_bits => panic!("unexpected num_bits: {}", num_bits) }; @@ -1357,6 +1365,15 @@ mod tests { } #[test] + fn test_32_bit_register_with_some_number() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6); + asm.cmp(shape_opnd, Opnd::UImm(4097)); + asm.compile_with_num_regs(&mut cb, 2); + } + + #[test] fn test_emit_xor() { let (mut asm, mut cb) = setup_asm(); diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 0aa087630d..973fe89b6d 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -109,8 +109,8 @@ impl Opnd }) }, - Opnd::InsnOut{idx, num_bits } => { - assert!(num_bits == 64); + Opnd::InsnOut{idx, num_bits: out_num_bits } => { + assert!(num_bits <= out_num_bits); Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp: disp, @@ -143,7 +143,7 @@ impl Opnd } /// Get the size in bits for this operand if there is one. - fn num_bits(&self) -> Option<u8> { + pub fn num_bits(&self) -> Option<u8> { match *self { Opnd::Reg(Reg { num_bits, .. }) => Some(num_bits), Opnd::Mem(Mem { num_bits, .. }) => Some(num_bits), diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index be4fc423b6..c66a293322 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2040,16 +2040,13 @@ fn gen_get_ivar( // Compile time self is embedded and the ivar index lands within the object let embed_test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) }; - let flags_mask: usize = unsafe { rb_shape_flags_mask() }.as_usize(); - let expected_flags_mask: usize = (RUBY_T_MASK as usize) | !flags_mask | (ROBJECT_EMBED as usize); - let expected_flags = comptime_receiver.builtin_flags() & expected_flags_mask; + let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let shape_bit_size = unsafe { rb_shape_id_num_bits() }; // either 16 or 32 depending on RUBY_DEBUG + let shape_byte_size = shape_bit_size / 8; + let shape_opnd = Opnd::mem(shape_bit_size, recv, RUBY_OFFSET_RBASIC_FLAGS + (8 - shape_byte_size as i32)); - // Combined guard for all flags: shape, embeddedness, and T_OBJECT - let flags_opnd = Opnd::mem(64, recv, RUBY_OFFSET_RBASIC_FLAGS); - - asm.comment("guard shape, embedded, and T_OBJECT"); - let flags_opnd = asm.and(flags_opnd, Opnd::UImm(expected_flags_mask as u64)); - asm.cmp(flags_opnd, Opnd::UImm(expected_flags as u64)); + asm.comment("guard shape"); + asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64)); let megamorphic_side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic).into(); jit_chain_guard( JCC_JNE, diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 17e5a84c3c..78967cd4ce 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -425,6 +425,9 @@ pub struct rb_shape { } pub type rb_shape_t = rb_shape; extern "C" { + pub fn rb_shape_id_num_bits() -> u8; +} +extern "C" { pub fn rb_shape_get_shape_by_id(shape_id: shape_id_t) -> *mut rb_shape_t; } extern "C" { @@ -433,9 +436,6 @@ extern "C" { extern "C" { pub fn rb_shape_get_iv_index(shape: *mut rb_shape_t, id: ID, value: *mut attr_index_t) -> bool; } -extern "C" { - pub fn rb_shape_flags_mask() -> VALUE; -} pub const idDot2: ruby_method_ids = 128; pub const idDot3: ruby_method_ids = 129; pub const idUPlus: ruby_method_ids = 132; |