32 bit comparison on shape id

This commit changes the shape id comparisons to use a 32 bit comparison rather than 64 bit. That means we don't need to load the shape id to a register on x86 machines. Given the following program: ```ruby class Foo def initialize @foo = 1 @bar = 1 end def read [@foo, @bar] end end foo = Foo.new foo.read foo.read foo.read foo.read foo.read puts RubyVM::YJIT.disasm(Foo.instance_method(:read)) ``` The machine code we generated _before_ this change is like this: ``` == BLOCK 1/4, ISEQ RANGE [0,3), 65 bytes ====================== # getinstancevariable 0x559a18623023: mov rax, qword ptr [r13 + 0x18] # guard object is heap 0x559a18623027: test al, 7 0x559a1862302a: jne 0x559a1862502d 0x559a18623030: cmp rax, 4 0x559a18623034: jbe 0x559a1862502d # guard shape, embedded, and T_OBJECT 0x559a1862303a: mov rcx, qword ptr [rax] 0x559a1862303d: movabs r11, 0xffff00000000201f 0x559a18623047: and rcx, r11 0x559a1862304a: movabs r11, 0xb000000002001 0x559a18623054: cmp rcx, r11 0x559a18623057: jne 0x559a18625046 0x559a1862305d: mov rax, qword ptr [rax + 0x18] 0x559a18623061: mov qword ptr [rbx], rax == BLOCK 2/4, ISEQ RANGE [3,6), 0 bytes ======================= == BLOCK 3/4, ISEQ RANGE [3,6), 47 bytes ====================== # gen_direct_jmp: fallthrough # getinstancevariable # regenerate_branch # getinstancevariable # regenerate_branch 0x559a18623064: mov rax, qword ptr [r13 + 0x18] # guard shape, embedded, and T_OBJECT 0x559a18623068: mov rcx, qword ptr [rax] 0x559a1862306b: movabs r11, 0xffff00000000201f 0x559a18623075: and rcx, r11 0x559a18623078: movabs r11, 0xb000000002001 0x559a18623082: cmp rcx, r11 0x559a18623085: jne 0x559a18625099 0x559a1862308b: mov rax, qword ptr [rax + 0x20] 0x559a1862308f: mov qword ptr [rbx + 8], rax ``` After this change, it's like this: ``` == BLOCK 1/4, ISEQ RANGE [0,3), 41 bytes ====================== # getinstancevariable 0x5560c986d023: mov rax, qword ptr [r13 + 0x18] # guard object is heap 0x5560c986d027: test al, 7 0x5560c986d02a: jne 0x5560c986f02d 0x5560c986d030: cmp rax, 4 0x5560c986d034: jbe 0x5560c986f02d # guard shape 0x5560c986d03a: cmp word ptr [rax + 6], 0x19 0x5560c986d03f: jne 0x5560c986f046 0x5560c986d045: mov rax, qword ptr [rax + 0x10] 0x5560c986d049: mov qword ptr [rbx], rax == BLOCK 2/4, ISEQ RANGE [3,6), 0 bytes ======================= == BLOCK 3/4, ISEQ RANGE [3,6), 23 bytes ====================== # gen_direct_jmp: fallthrough # getinstancevariable # regenerate_branch # getinstancevariable # regenerate_branch 0x5560c986d04c: mov rax, qword ptr [r13 + 0x18] # guard shape 0x5560c986d050: cmp word ptr [rax + 6], 0x19 0x5560c986d055: jne 0x5560c986f099 0x5560c986d05b: mov rax, qword ptr [rax + 0x18] 0x5560c986d05f: mov qword ptr [rbx + 8], rax ``` The first ivar read is a bit more complex, but the second ivar read is much simpler. I think eventually we could teach the context about the shape, then emit only one shape guard.
author: Aaron Patterson <tenderlove@ruby-lang.org> 2022-11-15 11:46:18 -0800
committer: Aaron Patterson <aaron.patterson@gmail.com> 2022-11-18 12:04:10 -0800
commit: 9e067df76bda7384e8f3ddda2b348f3a742eb784 (patch)
tree: c231960ee347399930d6aa304b148d208a372611
parent: 6582f34831cc665b2adcf7d475aceb9b918badb6 (diff)
download: ruby-9e067df76bda7384e8f3ddda2b348f3a742eb784.tar.gz
8 files changed, 35 insertions, 22 deletions
diff --git a/shape.c b/shape.c
index 919ae7c433..430bfeb90a 100644
--- a/shape.c
+++ b/shape.c
@@ -325,10 +325,10 @@ rb_shape_set_shape(VALUE obj, rb_shape_t* shape)
     rb_shape_set_shape_id(obj, rb_shape_id(shape));
 }
 
-VALUE
-rb_shape_flags_mask(void)
+uint8_t
+rb_shape_id_num_bits(void)
 {
-    return SHAPE_FLAG_MASK;
+    return SHAPE_ID_NUM_BITS;
 }
 
 rb_shape_t *
diff --git a/shape.h b/shape.h
index 28b2ce55f0..43308b833b 100644
--- a/shape.h
+++ b/shape.h
@@ -132,6 +132,7 @@ static inline shape_id_t RCLASS_SHAPE_ID(VALUE obj) {
 
 bool rb_shape_root_shape_p(rb_shape_t* shape);
 rb_shape_t * rb_shape_get_root_shape(void);
+uint8_t rb_shape_id_num_bits(void);
 
 rb_shape_t* rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id);
 rb_shape_t * rb_shape_get_parent(rb_shape_t * shape);
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index 2b94d95608..9afba864d2 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -85,7 +85,7 @@ fn main() {
         // From shape.h
         .allowlist_function("rb_shape_get_shape_id")
         .allowlist_function("rb_shape_get_shape_by_id")
-        .allowlist_function("rb_shape_flags_mask")
+        .allowlist_function("rb_shape_id_num_bits")
         .allowlist_function("rb_shape_get_iv_index")
 
         // From ruby/internal/intern/object.h
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index 683711259e..d463613943 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -540,8 +540,6 @@ pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
 pub fn ldurh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
     let bytes: [u8; 4] = match (rt, rn) {
         (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
-            assert!(rt.num_bits == 32, "Rt should be a 32 bit register");
-            assert!(rn.num_bits == 64, "Rn should be a 64 bit register");
             assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less");
 
             LoadStore::ldurh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index c899f6871f..cbda5eec05 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -344,7 +344,14 @@ impl Assembler
                 Insn::Cmp { left, right } => {
                     let opnd0 = split_load_operand(asm, left);
                     let opnd0 = split_less_than_32_cmp(asm, opnd0);
-                    let opnd1 = split_shifted_immediate(asm, right);
+                    let split_right = split_shifted_immediate(asm, right);
+                    let opnd1 = match split_right {
+                        Opnd::InsnOut { .. } if opnd0.num_bits() != split_right.num_bits() => {
+                            split_right.with_num_bits(opnd0.num_bits().unwrap()).unwrap()
+                        },
+                        _ => split_right
+                    };
+
                     asm.cmp(opnd0, opnd1);
                 },
                 Insn::CRet(opnd) => {
@@ -828,6 +835,7 @@ impl Assembler
                         Opnd::Mem(_) => {
                             match opnd.rm_num_bits() {
                                 64 | 32 => ldur(cb, out.into(), opnd.into()),
+                                16 => ldurh(cb, out.into(), opnd.into()),
                                 8 => ldurb(cb, out.into(), opnd.into()),
                                 num_bits => panic!("unexpected num_bits: {}", num_bits)
                             };
@@ -1357,6 +1365,15 @@ mod tests {
     }
 
     #[test]
+    fn test_32_bit_register_with_some_number() {
+        let (mut asm, mut cb) = setup_asm();
+
+        let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6);
+        asm.cmp(shape_opnd, Opnd::UImm(4097));
+        asm.compile_with_num_regs(&mut cb, 2);
+    }
+
+    #[test]
     fn test_emit_xor() {
         let (mut asm, mut cb) = setup_asm();
 
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index 0aa087630d..973fe89b6d 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -109,8 +109,8 @@ impl Opnd
                 })
             },
 
-            Opnd::InsnOut{idx, num_bits } => {
-                assert!(num_bits == 64);
+            Opnd::InsnOut{idx, num_bits: out_num_bits } => {
+                assert!(num_bits <= out_num_bits);
                 Opnd::Mem(Mem {
                     base: MemBase::InsnOut(idx),
                     disp: disp,
@@ -143,7 +143,7 @@ impl Opnd
     }
 
     /// Get the size in bits for this operand if there is one.
-    fn num_bits(&self) -> Option<u8> {
+    pub fn num_bits(&self) -> Option<u8> {
         match *self {
             Opnd::Reg(Reg { num_bits, .. }) => Some(num_bits),
             Opnd::Mem(Mem { num_bits, .. }) => Some(num_bits),
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index be4fc423b6..c66a293322 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -2040,16 +2040,13 @@ fn gen_get_ivar(
     // Compile time self is embedded and the ivar index lands within the object
     let embed_test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) };
 
-    let flags_mask: usize = unsafe { rb_shape_flags_mask() }.as_usize();
-    let expected_flags_mask: usize = (RUBY_T_MASK as usize) | !flags_mask | (ROBJECT_EMBED as usize);
-    let expected_flags = comptime_receiver.builtin_flags() & expected_flags_mask;
+    let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) };
+    let shape_bit_size = unsafe { rb_shape_id_num_bits() }; // either 16 or 32 depending on RUBY_DEBUG
+    let shape_byte_size = shape_bit_size / 8;
+    let shape_opnd = Opnd::mem(shape_bit_size, recv, RUBY_OFFSET_RBASIC_FLAGS + (8 - shape_byte_size as i32));
 
-    // Combined guard for all flags: shape, embeddedness, and T_OBJECT
-    let flags_opnd = Opnd::mem(64, recv, RUBY_OFFSET_RBASIC_FLAGS);
-
-    asm.comment("guard shape, embedded, and T_OBJECT");
-    let flags_opnd = asm.and(flags_opnd, Opnd::UImm(expected_flags_mask as u64));
-    asm.cmp(flags_opnd, Opnd::UImm(expected_flags as u64));
+    asm.comment("guard shape");
+    asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64));
     let megamorphic_side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic).into();
     jit_chain_guard(
         JCC_JNE,
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index 17e5a84c3c..78967cd4ce 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -425,6 +425,9 @@ pub struct rb_shape {
 }
 pub type rb_shape_t = rb_shape;
 extern "C" {
+    pub fn rb_shape_id_num_bits() -> u8;
+}
+extern "C" {
     pub fn rb_shape_get_shape_by_id(shape_id: shape_id_t) -> *mut rb_shape_t;
 }
 extern "C" {
@@ -433,9 +436,6 @@ extern "C" {
 extern "C" {
     pub fn rb_shape_get_iv_index(shape: *mut rb_shape_t, id: ID, value: *mut attr_index_t) -> bool;
 }
-extern "C" {
-    pub fn rb_shape_flags_mask() -> VALUE;
-}
 pub const idDot2: ruby_method_ids = 128;
 pub const idDot3: ruby_method_ids = 129;
 pub const idUPlus: ruby_method_ids = 132;
author	Aaron Patterson <tenderlove@ruby-lang.org>	2022-11-15 11:46:18 -0800
committer	Aaron Patterson <aaron.patterson@gmail.com>	2022-11-18 12:04:10 -0800
commit	9e067df76bda7384e8f3ddda2b348f3a742eb784 (patch)
tree	c231960ee347399930d6aa304b148d208a372611
parent	6582f34831cc665b2adcf7d475aceb9b918badb6 (diff)
download	ruby-9e067df76bda7384e8f3ddda2b348f3a742eb784.tar.gz