Arm64 progress (https://github.com/Shopify/ruby/pull/304)

* Get initial wiring up * Split IncrCounter instruction * Breakpoints in Arm64 * Support for ORR * MOV instruction encodings * Implement JmpOpnd and CRet * Add ORN * Add MVN * PUSH, POP, CCALL for Arm64 * Some formatting and implement Op::Not for Arm64 * Consistent constants when working with the Arm64 SP * Allow OR-ing values into the memory buffer * Test lowering Arm64 ADD * Emit unconditional jumps consistently in Arm64 * Begin emitting conditional jumps for A64 * Back out some labelref changes * Remove label API that no longer exists * Use a trait for the label encoders * Encode nop * Add in nops so jumps are the same width no matter what on Arm64 * Op::Jbe for CodePtr * Pass src_addr and dst_addr instead of calculated offset to label refs * Even more jump work for Arm64 * Fix up jumps to use consistent assertions * Handle splitting Add, Sub, and Not insns for Arm64 * More Arm64 splits and various fixes * PR feedback for Arm64 support * Split up jumps and conditional jump logic
author: Kevin Newton <kddnewton@gmail.com> 2022-07-05 16:04:19 -0400
committer: Takashi Kokubun <takashikkbn@gmail.com> 2022-08-29 08:46:58 -0700
commit: 7a9b581e0896d4aa7a037da90c837b830213c8e8 (patch)
tree: 8d613c9cca2af21aa17840270b23acb233b9f3ff
parent: b272c57f27628ab114206c777d5b274713d31079 (diff)
download: ruby-7a9b581e0896d4aa7a037da90c837b830213c8e8.tar.gz
14 files changed, 939 insertions, 139 deletions
diff --git a/yjit/src/asm/arm64/inst/breakpoint.rs b/yjit/src/asm/arm64/inst/breakpoint.rs
new file mode 100644
index 0000000000..be4920ac76
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/breakpoint.rs
@@ -0,0 +1,55 @@
+/// The struct that represents an A64 breakpoint instruction that can be encoded.
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// |  1  1  0  1    0  1  0  0    0  0  1                                                          0    0  0  0  0 |
+/// |                                      imm16..................................................                  |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct Breakpoint {
+    /// The value to be captured by ESR_ELx.ISS
+    imm16: u16
+}
+
+impl Breakpoint {
+    /// BRK
+    /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/BRK--Breakpoint-instruction-
+    pub fn brk(imm16: u16) -> Self {
+        Self { imm16 }
+    }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#control
+const FAMILY: u32 = 0b101;
+
+impl From<Breakpoint> for u32 {
+    /// Convert an instruction into a 32-bit value.
+    fn from(inst: Breakpoint) -> Self {
+        let imm16 = inst.imm16 as u32;
+
+        0
+        | (0b11 << 30)
+        | (FAMILY << 26)
+        | (1 << 21)
+        | (imm16 << 5)
+    }
+}
+
+impl From<Breakpoint> for [u8; 4] {
+    /// Convert an instruction into a 4 byte array.
+    fn from(inst: Breakpoint) -> [u8; 4] {
+        let result: u32 = inst.into();
+        result.to_le_bytes()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_brk() {
+        let result: u32 = Breakpoint::brk(7).into();
+        assert_eq!(0xd42000e0, result);
+    }
+}
diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs
index 6f23acf9f5..8d65359f77 100644
--- a/yjit/src/asm/arm64/inst/call.rs
+++ b/yjit/src/asm/arm64/inst/call.rs
@@ -1,22 +1,41 @@
-/// The struct that represents an A64 branch with link instruction that can be
-/// encoded.
+/// The operation to perform for this instruction.
+enum Op {
+    /// Branch directly, with a hint that this is not a subroutine call or
+    /// return.
+    Branch = 0,
+
+    /// Branch directly, with a hint that this is a subroutine call or return.
+    BranchWithLink = 1
+}
+
+/// The struct that represents an A64 branch with our without link instruction
+/// that can be encoded.
 ///
 /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
 /// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
-/// |  1  0  0  1    0  1                                                                                           |
-/// |                     imm26.................................................................................... |
+/// |     0  0  1    0  1                                                                                           |
+/// | op                  imm26.................................................................................... |
 /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
 ///
 pub struct Call {
     /// The PC-relative offset to jump to (which will be multiplied by 4).
-    imm26: i32
+    imm26: i32,
+
+    /// The operation to perform for this instruction.
+    op: Op
 }
 
 impl Call {
+    /// B
+    /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-
+    pub fn b(imm26: i32) -> Self {
+        Self { imm26, op: Op::Branch }
+    }
+
     /// BL
     /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en
     pub fn bl(imm26: i32) -> Self {
-        Self { imm26 }
+        Self { imm26, op: Op::BranchWithLink }
     }
 }
 
@@ -29,7 +48,7 @@ impl From<Call> for u32 {
         let imm26 = (inst.imm26 as u32) & ((1 << 26) - 1);
 
         0
-        | (1 << 31)
+        | ((inst.op as u32) << 31)
         | (FAMILY << 26)
         | imm26
     }
@@ -64,4 +83,22 @@ mod tests {
         let result: u32 = Call::bl(-256).into();
         assert_eq!(0x97ffff00, result);
     }
+
+    #[test]
+    fn test_b() {
+        let result: u32 = Call::b(0).into();
+        assert_eq!(0x14000000, result);
+    }
+
+    #[test]
+    fn test_b_positive() {
+        let result: u32 = Call::b(256).into();
+        assert_eq!(0x14000100, result);
+    }
+
+    #[test]
+    fn test_b_negative() {
+        let result: u32 = Call::b(-256).into();
+        assert_eq!(0x17ffff00, result);
+    }
 }
diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs
index cc2a16cbdc..13865697f6 100644
--- a/yjit/src/asm/arm64/inst/logical_imm.rs
+++ b/yjit/src/asm/arm64/inst/logical_imm.rs
@@ -5,6 +5,9 @@ enum Opc {
     /// The AND operation.
     And = 0b00,
 
+    /// The ORR operation.
+    Orr = 0b01,
+
     /// The ANDS operation.
     Ands = 0b11
 }
@@ -12,7 +15,7 @@ enum Opc {
 /// The struct that represents an A64 bitwise immediate instruction that can be
 /// encoded.
 ///
-/// AND/ANDS (immediate)
+/// AND/ORR/ANDS (immediate)
 /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
 /// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
 /// |           1    0  0  1  0    0                                                                                |
@@ -37,19 +40,31 @@ pub struct LogicalImm {
 }
 
 impl LogicalImm {
-    /// AND (immediate)
+    /// AND (bitmask immediate)
     /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en
     pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
         Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() }
     }
 
-    /// ANDS (immediate)
+    /// ANDS (bitmask immediate)
     /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en
     pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
         Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() }
     }
 
-    /// TST (immediate)
+    /// MOV (bitmask immediate)
+    /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en
+    pub fn mov(rd: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+        Self { rd, rn: 0b11111, imm, opc: Opc::Orr, sf: num_bits.into() }
+    }
+
+    /// ORR (bitmask immediate)
+    /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate--
+    pub fn orr(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+        Self { rd, rn, imm, opc: Opc::Orr, sf: num_bits.into() }
+    }
+
+    /// TST (bitmask immediate)
     /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en
     pub fn tst(rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
         Self::ands(31, rn, imm, num_bits)
@@ -101,6 +116,20 @@ mod tests {
     }
 
     #[test]
+    fn test_mov() {
+        let inst = LogicalImm::mov(0, 0x5555555555555555.try_into().unwrap(), 64);
+        let result: u32 = inst.into();
+        assert_eq!(0xb200f3e0, result);
+    }
+
+    #[test]
+    fn test_orr() {
+        let inst = LogicalImm::orr(0, 1, 7.try_into().unwrap(), 64);
+        let result: u32 = inst.into();
+        assert_eq!(0xb2400820, result);
+    }
+
+    #[test]
     fn test_tst() {
         let inst = LogicalImm::tst(1, 7.try_into().unwrap(), 64);
         let result: u32 = inst.into();
diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs
index 3feb3350ab..5d7954c587 100644
--- a/yjit/src/asm/arm64/inst/logical_reg.rs
+++ b/yjit/src/asm/arm64/inst/logical_reg.rs
@@ -1,5 +1,14 @@
 use super::super::arg::Sf;
 
+/// Whether or not this is a NOT instruction.
+enum N {
+    /// This is not a NOT instruction.
+    No = 0,
+
+    /// This is a NOT instruction.
+    Yes = 1
+}
+
 /// The type of shift to perform on the second operand register.
 enum Shift {
     LSL = 0b00, // logical shift left (unsigned)
@@ -13,6 +22,9 @@ enum Opc {
     /// The AND operation.
     And = 0b00,
 
+    /// The ORR operation.
+    Orr = 0b01,
+
     /// The ANDS operation.
     Ands = 0b11
 }
@@ -20,11 +32,11 @@ enum Opc {
 /// The struct that represents an A64 logical register instruction that can be
 /// encoded.
 ///
-/// AND/ANDS (shifted register)
+/// AND/ORR/ANDS (shifted register)
 /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
 /// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
-/// |           0    1  0  1  0          0                                                                          |
-/// | sf opc..                    shift    rm..............   imm6............... rn.............. rd.............. |
+/// |           0    1  0  1  0                                                                                     |
+/// | sf opc..                    shift N  rm..............   imm6............... rn.............. rd.............. |
 /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
 ///
 pub struct LogicalReg {
@@ -40,6 +52,9 @@ pub struct LogicalReg {
     /// The register number of the second operand register.
     rm: u8,
 
+    /// Whether or not this is a NOT instruction.
+    n: N,
+
     /// The type of shift to perform on the second operand register.
     shift: Shift,
 
@@ -54,19 +69,43 @@ impl LogicalReg {
     /// AND (shifted register)
     /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en
     pub fn and(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
-        Self { rd, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() }
+        Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() }
     }
 
     /// ANDS (shifted register)
     /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en
     pub fn ands(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
-        Self { rd, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() }
+        Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() }
+    }
+
+    /// MOV (register)
+    /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en
+    pub fn mov(rd: u8, rm: u8, num_bits: u8) -> Self {
+        Self { rd, rn: 0b11111, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
+    }
+
+    /// MVN (shifted register)
+    /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MVN--Bitwise-NOT--an-alias-of-ORN--shifted-register--?lang=en
+    pub fn mvn(rd: u8, rm: u8, num_bits: u8) -> Self {
+        Self { rd, rn: 0b11111, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
+    }
+
+    /// ORN (shifted register)
+    /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORN--shifted-register---Bitwise-OR-NOT--shifted-register--
+    pub fn orn(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+        Self { rd, rn, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
+    }
+
+    /// ORR (shifted register)
+    /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register--
+    pub fn orr(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+        Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
     }
 
     /// TST (shifted register)
     /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en
     pub fn tst(rn: u8, rm: u8, num_bits: u8) -> Self {
-        Self { rd: 31, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() }
+        Self { rd: 31, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() }
     }
 }
 
@@ -83,6 +122,7 @@ impl From<LogicalReg> for u32 {
         | ((inst.opc as u32) << 29)
         | (FAMILY << 25)
         | ((inst.shift as u32) << 22)
+        | ((inst.n as u32) << 21)
         | ((inst.rm as u32) << 16)
         | (imm6 << 10)
         | ((inst.rn as u32) << 5)
@@ -117,6 +157,34 @@ mod tests {
     }
 
     #[test]
+    fn test_mov() {
+        let inst = LogicalReg::mov(0, 1, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0xaa0103e0, result);
+    }
+
+    #[test]
+    fn test_mvn() {
+        let inst = LogicalReg::mvn(0, 1, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0xaa2103e0, result);
+    }
+
+    #[test]
+    fn test_orn() {
+        let inst = LogicalReg::orn(0, 1, 2, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0xaa220020, result);
+    }
+
+    #[test]
+    fn test_orr() {
+        let inst = LogicalReg::orr(0, 1, 2, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0xaa020020, result);
+    }
+
+    #[test]
     fn test_tst() {
         let inst = LogicalReg::tst(0, 1, 64);
         let result: u32 = inst.into();
diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs
index 2f0e708999..ae589ca564 100644
--- a/yjit/src/asm/arm64/inst/mod.rs
+++ b/yjit/src/asm/arm64/inst/mod.rs
@@ -4,6 +4,7 @@
 mod atomic;
 mod branch;
 mod branch_cond;
+mod breakpoint;
 mod call;
 mod data_imm;
 mod data_reg;
@@ -11,12 +12,14 @@ mod load;
 mod logical_imm;
 mod logical_reg;
 mod mov;
+mod nop;
 mod shift_imm;
 mod store;
 
 pub use atomic::Atomic;
 pub use branch::Branch;
 pub use branch_cond::BranchCond;
+pub use breakpoint::Breakpoint;
 pub use call::Call;
 pub use data_imm::DataImm;
 pub use data_reg::DataReg;
@@ -24,5 +27,6 @@ pub use load::Load;
 pub use logical_imm::LogicalImm;
 pub use logical_reg::LogicalReg;
 pub use mov::Mov;
+pub use nop::Nop;
 pub use shift_imm::ShiftImm;
 pub use store::Store;
diff --git a/yjit/src/asm/arm64/inst/nop.rs b/yjit/src/asm/arm64/inst/nop.rs
new file mode 100644
index 0000000000..a99f8d34b7
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/nop.rs
@@ -0,0 +1,44 @@
+/// The struct that represents an A64 nop instruction that can be encoded.
+///
+/// NOP
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// |  1  1  0  1    0  1  0  1    0  0  0  0    0  0  1  1    0  0  1  0    0  0  0  0    0  0  0  1    1  1  1  1 |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct Nop;
+
+impl Nop {
+    /// NOP
+    /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/NOP--No-Operation-
+    pub fn nop() -> Self {
+        Self {}
+    }
+}
+
+impl From<Nop> for u32 {
+    /// Convert an instruction into a 32-bit value.
+    fn from(inst: Nop) -> Self {
+        0b11010101000000110010000000011111
+    }
+}
+
+impl From<Nop> for [u8; 4] {
+    /// Convert an instruction into a 4 byte array.
+    fn from(inst: Nop) -> [u8; 4] {
+        let result: u32 = inst.into();
+        result.to_le_bytes()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_nop() {
+        let inst = Nop::nop();
+        let result: u32 = inst.into();
+        assert_eq!(0xd503201f, result);
+    }
+}
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index 24f349d589..ced8b262c5 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -6,12 +6,15 @@ mod arg;
 mod inst;
 mod opnd;
 
-use arg::*;
 use inst::*;
-use opnd::*;
+
+// We're going to make these public to make using these things easier in the
+// backend (so they don't have to have knowledge about the submodule).
+pub use arg::*;
+pub use opnd::*;
 
 /// Checks that a signed value fits within the specified number of bits.
-const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool {
+pub const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool {
     let minimum = if num_bits == 64 { i64::MIN } else { -2_i64.pow((num_bits as u32) - 1) };
     let maximum = if num_bits == 64 { i64::MAX } else { 2_i64.pow((num_bits as u32) - 1) - 1 };
 
@@ -19,7 +22,7 @@ const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool {
 }
 
 /// Checks that an unsigned value fits within the specified number of bits.
-const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool {
+pub const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool {
     let maximum = if num_bits == 64 { u64::MAX } else { 2_u64.pow(num_bits as u32) - 1 };
 
     uimm <= maximum
@@ -115,12 +118,39 @@ pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
     cb.write_bytes(&bytes);
 }
 
+/// Whether or not the offset between two instructions fits into the branch with
+/// or without link instruction. If it doesn't, then we have to load the value
+/// into a register first.
+pub const fn b_offset_fits_bits(offset: i64) -> bool {
+    imm_fits_bits(offset, 26)
+}
+
+/// B - branch without link (offset is number of instructions to jump)
+pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) {
+    let bytes: [u8; 4] = match imm26 {
+        A64Opnd::Imm(imm26) => {
+            assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less.");
+
+            Call::b(imm26 as i32).into()
+        },
+        _ => panic!("Invalid operand combination to b instruction.")
+    };
+
+    cb.write_bytes(&bytes);
+}
+
+/// Whether or not the offset between two instructions fits into the b.cond
+/// instruction. If it doesn't, then we have to load the value into a register
+/// first, then use the b.cond instruction to skip past a direct jump.
+pub const fn bcond_offset_fits_bits(offset: i64) -> bool {
+    imm_fits_bits(offset, 21) && (offset & 0b11 == 0)
+}
+
 /// B.cond - branch to target if condition is true
 pub fn bcond(cb: &mut CodeBlock, cond: Condition, byte_offset: A64Opnd) {
     let bytes: [u8; 4] = match byte_offset {
         A64Opnd::Imm(imm) => {
-            assert!(imm_fits_bits(imm, 21), "The immediate operand must be 21 bits or less.");
-            assert!(imm & 0b11 == 0, "The immediate operand must be aligned to a 2-bit boundary.");
+            assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary.");
 
             BranchCond::bcond(cond, imm as i32).into()
         },
@@ -134,7 +164,7 @@ pub fn bcond(cb: &mut CodeBlock, cond: Condition, byte_offset: A64Opnd) {
 pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) {
     let bytes: [u8; 4] = match imm26 {
         A64Opnd::Imm(imm26) => {
-            assert!(imm_fits_bits(imm26, 26), "The immediate operand must be 26 bits or less.");
+            assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less.");
 
             Call::bl(imm26 as i32).into()
         },
@@ -154,6 +184,20 @@ pub fn br(cb: &mut CodeBlock, rn: A64Opnd) {
     cb.write_bytes(&bytes);
 }
 
+/// BRK - create a breakpoint
+pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) {
+    let bytes: [u8; 4] = match imm16 {
+        A64Opnd::None => Breakpoint::brk(0).into(),
+        A64Opnd::UImm(imm16) => {
+            assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less.");
+            Breakpoint::brk(imm16 as u16).into()
+        },
+        _ => panic!("Invalid operand combination to brk instruction.")
+    };
+
+    cb.write_bytes(&bytes);
+}
+
 /// CMP - compare rn and rm, update flags
 pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
     let bytes: [u8; 4] = match (rn, rm) {
@@ -196,6 +240,11 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
 /// LDUR - load a memory address into a register
 pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
     let bytes: [u8; 4] = match (rt, rn) {
+        (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
+            assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size.");
+
+            Load::ldur(rt.reg_no, rn.reg_no, 0, rt.num_bits).into()
+        },
         (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
             assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size");
             assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less");
@@ -238,6 +287,23 @@ pub fn lsr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) {
     cb.write_bytes(&bytes);
 }
 
+/// MOV - move a value in a register to another register
+pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) {
+    let bytes: [u8; 4] = match (rd, rm) {
+        (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => {
+            assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size");
+
+            LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into()
+        },
+        (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => {
+            LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+        },
+        _ => panic!("Invalid operand combination to mov instruction")
+    };
+
+    cb.write_bytes(&bytes);
+}
+
 /// MOVK - move a 16 bit immediate into a register, keep the other bits in place
 pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
     let bytes: [u8; 4] = match (rd, imm16) {
@@ -266,6 +332,63 @@ pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
     cb.write_bytes(&bytes);
 }
 
+/// MVN - move a value in a register to another register, negating it
+pub fn mvn(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) {
+    let bytes: [u8; 4] = match (rd, rm) {
+        (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => {
+            assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size");
+
+            LogicalReg::mvn(rd.reg_no, rm.reg_no, rd.num_bits).into()
+        },
+        _ => panic!("Invalid operand combination to mvn instruction")
+    };
+
+    cb.write_bytes(&bytes);
+}
+
+/// NOP - no-operation, used for alignment purposes
+pub fn nop(cb: &mut CodeBlock) {
+    let bytes: [u8; 4] = Nop::nop().into();
+
+    cb.write_bytes(&bytes);
+}
+
+/// ORN - perform a bitwise OR of rn and NOT rm, put the result in rd, don't update flags
+pub fn orn(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+    let bytes: [u8; 4] = match (rd, rn, rm) {
+        (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+            assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size");
+
+            LogicalReg::orn(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+        },
+        _ => panic!("Invalid operand combination to orn instruction.")
+    };
+
+    cb.write_bytes(&bytes);
+}
+
+/// ORR - perform a bitwise OR of rn and rm, put the result in rd, don't update flags
+pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+    let bytes: [u8; 4] = match (rd, rn, rm) {
+        (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+            assert!(
+                rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits,
+                "All operands must be of the same size."
+            );
+
+            LogicalReg::orr(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+        },
+        (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
+            assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+
+            LogicalImm::orr(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+        },
+        _ => panic!("Invalid operand combination to orr instruction."),
+    };
+
+    cb.write_bytes(&bytes);
+}
+
 /// STUR - store a value in a register at a memory address
 pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
     let bytes: [u8; 4] = match (rt, rn) {
@@ -435,6 +558,11 @@ mod tests {
     }
 
     #[test]
+    fn test_b() {
+        check_bytes("00040014", |cb| b(cb, A64Opnd::new_imm(1024)));
+    }
+
+    #[test]
     fn test_bl() {
         check_bytes("00040094", |cb| bl(cb, A64Opnd::new_imm(1024)));
     }
@@ -445,6 +573,16 @@ mod tests {
     }
 
     #[test]
+    fn test_brk_none() {
+        check_bytes("000020d4", |cb| brk(cb, A64Opnd::None));
+    }
+
+    #[test]
+    fn test_brk_uimm() {
+        check_bytes("c00120d4", |cb| brk(cb, A64Opnd::new_uimm(14)));
+    }
+
+    #[test]
     fn test_cmp_register() {
         check_bytes("5f010beb", |cb| cmp(cb, X10, X11));
     }
@@ -460,8 +598,13 @@ mod tests {
     }
 
     #[test]
-    fn test_ldur() {
-        check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(X1, 123)));
+    fn test_ldur_memory() {
+        check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123)));
+    }
+
+    #[test]
+    fn test_ldur_register() {
+        check_bytes("200040f8", |cb| ldur(cb, X0, X1));
     }
 
     #[test]
@@ -475,6 +618,16 @@ mod tests {
     }
 
     #[test]
+    fn test_mov_registers() {
+        check_bytes("ea030baa", |cb| mov(cb, X10, X11));
+    }
+
+    #[test]
+    fn test_mov_immediate() {
+        check_bytes("eaf300b2", |cb| mov(cb, X10, A64Opnd::new_uimm(0x5555555555555555)));
+    }
+
+    #[test]
     fn test_movk() {
         check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16));
     }
@@ -485,6 +638,31 @@ mod tests {
     }
 
     #[test]
+    fn test_mvn() {
+        check_bytes("ea032baa", |cb| mvn(cb, X10, X11));
+    }
+
+    #[test]
+    fn test_nop() {
+        check_bytes("1f2003d5", |cb| nop(cb));
+    }
+
+    #[test]
+    fn test_orn() {
+        check_bytes("6a012caa", |cb| orn(cb, X10, X11, X12));
+    }
+
+    #[test]
+    fn test_orr_register() {
+        check_bytes("6a010caa", |cb| orr(cb, X10, X11, X12));
+    }
+
+    #[test]
+    fn test_orr_immediate() {
+        check_bytes("6a0940b2", |cb| orr(cb, X10, X11, A64Opnd::new_uimm(7)));
+    }
+
+    #[test]
     fn test_ret_none() {
         check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None));
     }
@@ -496,7 +674,7 @@ mod tests {
 
     #[test]
     fn test_stur() {
-        check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(X11, 128)));
+        check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(64, X11, 128)));
     }
 
     #[test]
diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs
index aa73d438fe..6c06d2db3c 100644
--- a/yjit/src/asm/arm64/opnd.rs
+++ b/yjit/src/asm/arm64/opnd.rs
@@ -11,6 +11,15 @@ pub struct A64Reg
     pub reg_no: u8,
 }
 
+impl A64Reg {
+    pub fn sub_reg(&self, num_bits: u8) -> Self {
+        assert!(num_bits == 32 || num_bits == 64);
+        assert!(num_bits <= self.num_bits);
+
+        Self { num_bits, reg_no: self.reg_no }
+    }
+}
+
 #[derive(Clone, Copy, Debug)]
 pub struct A64Mem
 {
@@ -25,14 +34,10 @@ pub struct A64Mem
 }
 
 impl A64Mem {
-    pub fn new(reg: A64Opnd, disp: i32) -> Self {
+    pub fn new(num_bits: u8, reg: A64Opnd, disp: i32) -> Self {
         match reg {
             A64Opnd::Reg(reg) => {
-                Self {
-                    num_bits: reg.num_bits,
-                    base_reg_no: reg.reg_no,
-                    disp
-                }
+                Self { num_bits, base_reg_no: reg.reg_no, disp }
             },
             _ => panic!("Expected register operand")
         }
@@ -70,8 +75,8 @@ impl A64Opnd {
     }
 
     /// Creates a new memory operand.
-    pub fn new_mem(reg: A64Opnd, disp: i32) -> Self {
-        A64Opnd::Mem(A64Mem::new(reg, disp))
+    pub fn new_mem(num_bits: u8, reg: A64Opnd, disp: i32) -> Self {
+        A64Opnd::Mem(A64Mem::new(num_bits, reg, disp))
     }
 
     /// Convenience function to check if this operand is a register.
@@ -87,23 +92,32 @@ pub const X0_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 0 };
 pub const X1_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 1 };
 pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 };
 pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 };
+pub const X4_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 4 };
+pub const X5_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 5 };
 
+pub const X9_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 9 };
+pub const X10_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 10 };
+pub const X11_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 11 };
 pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 };
 pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 };
 
+pub const X24_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 24 };
+pub const X25_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 25 };
+pub const X26_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 26 };
+
 // 64-bit registers
 pub const X0: A64Opnd = A64Opnd::Reg(X0_REG);
 pub const X1: A64Opnd = A64Opnd::Reg(X1_REG);
 pub const X2: A64Opnd = A64Opnd::Reg(X2_REG);
 pub const X3: A64Opnd = A64Opnd::Reg(X3_REG);
-pub const X4: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 4 });
-pub const X5: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 5 });
+pub const X4: A64Opnd = A64Opnd::Reg(X4_REG);
+pub const X5: A64Opnd = A64Opnd::Reg(X5_REG);
 pub const X6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 6 });
 pub const X7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 7 });
 pub const X8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 8 });
-pub const X9: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 9 });
-pub const X10: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 10 });
-pub const X11: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 11 });
+pub const X9: A64Opnd = A64Opnd::Reg(X9_REG);
+pub const X10: A64Opnd = A64Opnd::Reg(X10_REG);
+pub const X11: A64Opnd = A64Opnd::Reg(X11_REG);
 pub const X12: A64Opnd = A64Opnd::Reg(X12_REG);
 pub const X13: A64Opnd = A64Opnd::Reg(X13_REG);
 pub const X14: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 14 });
@@ -116,13 +130,14 @@ pub const X20: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 20 });
 pub const X21: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 21 });
 pub const X22: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 22 });
 pub const X23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 23 });
-pub const X24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 24 });
-pub const X25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 25 });
-pub const X26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 26 });
+pub const X24: A64Opnd = A64Opnd::Reg(X24_REG);
+pub const X25: A64Opnd = A64Opnd::Reg(X25_REG);
+pub const X26: A64Opnd = A64Opnd::Reg(X26_REG);
 pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 });
 pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 });
 pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 });
 pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 });
+pub const X31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 31 });
 
 // 32-bit registers
 pub const W0: A64Reg = A64Reg { num_bits: 32, reg_no: 0 };
@@ -156,6 +171,7 @@ pub const W27: A64Reg = A64Reg { num_bits: 32, reg_no: 27 };
 pub const W28: A64Reg = A64Reg { num_bits: 32, reg_no: 28 };
 pub const W29: A64Reg = A64Reg { num_bits: 32, reg_no: 29 };
 pub const W30: A64Reg = A64Reg { num_bits: 32, reg_no: 30 };
+pub const W31: A64Reg = A64Reg { num_bits: 32, reg_no: 31 };
 
 // C argument registers
 pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3];
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index b54fc362b4..5723406aec 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -23,6 +23,14 @@ struct LabelRef {
 
     // Label which this refers to
     label_idx: usize,
+
+    /// The number of bytes that this label reference takes up in the memory.
+    /// It's necessary to know this ahead of time so that when we come back to
+    /// patch it it takes the same amount of space.
+    num_bytes: usize,
+
+    /// The object that knows how to encode the branch instruction.
+    encode: Box<dyn FnOnce(&mut CodeBlock, i64, i64)>
 }
 
 /// Block of memory into which instructions can be assembled
@@ -154,7 +162,7 @@ impl CodeBlock {
         self.get_ptr(self.write_pos)
     }
 
-    // Write a single byte at the current position
+    /// Write a single byte at the current position.
     pub fn write_byte(&mut self, byte: u8) {
         let write_ptr = self.get_write_ptr();
 
@@ -165,15 +173,15 @@ impl CodeBlock {
         }
     }
 
-    // Write multiple bytes starting from the current position
-    pub fn write_bytes(&mut self, bytes: &[u8]) {
+    /// Write multiple bytes starting from the current position.
+    fn write_bytes(&mut self, bytes: &[u8]) {
         for byte in bytes {
             self.write_byte(*byte);
         }
     }
 
-    // Write a signed integer over a given number of bits at the current position
-    pub fn write_int(&mut self, val: u64, num_bits: u32) {
+    /// Write an integer over the given number of bits at the current position.
+    fn write_int(&mut self, val: u64, num_bits: u32) {
         assert!(num_bits > 0);
         assert!(num_bits % 8 == 0);
 
@@ -219,14 +227,14 @@ impl CodeBlock {
     }
 
     // Add a label reference at the current write position
-    pub fn label_ref(&mut self, label_idx: usize) {
+    pub fn label_ref<E: 'static>(&mut self, label_idx: usize, num_bytes: usize, encode: E) where E: FnOnce(&mut CodeBlock, i64, i64) {
         assert!(label_idx < self.label_addrs.len());
 
         // Keep track of the reference
-        self.label_refs.push(LabelRef {
-            pos: self.write_pos,
-            label_idx,
-        });
+        self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode: Box::new(encode) });
+
+        // Move past however many bytes the instruction takes up
+        self.write_pos += num_bytes;
     }
 
     // Link internal label references
@@ -242,11 +250,8 @@ impl CodeBlock {
             let label_addr = self.label_addrs[label_idx];
             assert!(label_addr < self.mem_size);
 
-            // Compute the offset from the reference's end to the label
-            let offset = (label_addr as i64) - ((ref_pos + 4) as i64);
-
             self.set_pos(ref_pos);
-            self.write_int(offset as u64, 32);
+            (label_ref.encode)(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
         }
 
         self.write_pos = orig_pos;
diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs
index 9869b79e23..a2a3b47f82 100644
--- a/yjit/src/asm/x86_64/mod.rs
+++ b/yjit/src/asm/x86_64/mod.rs
@@ -703,14 +703,10 @@ pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) {
 
 /// call - Call to label with 32-bit offset
 pub fn call_label(cb: &mut CodeBlock, label_idx: usize) {
-    // Write the opcode
-    cb.write_byte(0xE8);
-
-    // Add a reference to the label
-    cb.label_ref(label_idx);
-
-    // Relative 32-bit offset to be patched
-    cb.write_int(0, 32);
+    cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| {
+        cb.write_byte(0xE8);
+        cb.write_int((dst_addr - src_addr) as u64, 32);
+    });
 }
 
 /// call - Indirect call with an R/M operand
@@ -801,55 +797,54 @@ pub fn int3(cb: &mut CodeBlock) {
     cb.write_byte(0xcc);
 }
 
-// Encode a relative jump to a label (direct or conditional)
+// Encode a conditional relative jump to a label
 // Note: this always encodes a 32-bit offset
-fn write_jcc(cb: &mut CodeBlock, op0: u8, op1: u8, label_idx: usize) {
-    // Write the opcode
-    if op0 != 0xff {
-        cb.write_byte(op0);
-    }
-
-    cb.write_byte(op1);
-
-    // Add a reference to the label
-    cb.label_ref(label_idx);
-
-    // Relative 32-bit offset to be patched
-    cb.write_int( 0, 32);
+fn write_jcc(cb: &mut CodeBlock, op: u8, label_idx: usize) {
+    cb.label_ref(label_idx, 6, move |cb, src_addr, dst_addr| {
+        cb.write_byte(0x0F);
+        cb.write_byte(op);
+        cb.write_int((dst_addr - src_addr) as u64, 32);
+    });
 }
 
 /// jcc - relative jumps to a label
-pub fn ja_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); }
-pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
-pub fn jb_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
-pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); }
-pub fn jc_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
-pub fn je_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); }
-pub fn jg_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); }
-pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); }
-pub fn jl_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); }
-pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); }
-pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); }
-pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
-pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
-pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); }
-pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
-pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); }
-pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); }
-pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); }
-pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); }
-pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); }
-pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x81, label_idx); }
-pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8b, label_idx); }
-pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x89, label_idx); }
-pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); }
-pub fn jo_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x80, label_idx); }
-pub fn jp_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); }
-pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); }
-pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8B, label_idx); }
-pub fn js_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x88, label_idx); }
-pub fn jz_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); }
-pub fn jmp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0xFF, 0xE9, label_idx); }
+pub fn ja_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x87, label_idx); }
+pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); }
+pub fn jb_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); }
+pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x86, label_idx); }
+pub fn jc_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); }
+pub fn je_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x84, label_idx); }
+pub fn jg_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8F, label_idx); }
+pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8D, label_idx); }
+pub fn jl_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8C, label_idx); }
+pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8E, label_idx); }
+pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x86, label_idx); }
+pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); }
+pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); }
+pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x87, label_idx); }
+pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); }
+pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x85, label_idx); }
+pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8E, label_idx); }
+pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8C, label_idx); }
+pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8D, label_idx); }
+pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8F, label_idx); }
+pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x81, label_idx); }
+pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8b, label_idx); }
+pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x89, label_idx); }
+pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x85, label_idx); }
+pub fn jo_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x80, label_idx); }
+pub fn jp_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8A, label_idx); }
+pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8A, label_idx); }
+pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8B, label_idx); }
+pub fn js_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x88, label_idx); }
+pub fn jz_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x84, label_idx); }
+
+pub fn jmp_label(cb: &mut CodeBlock, label_idx: usize) {
+    cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| {
+        cb.write_byte(0xE9);
+        cb.write_int((dst_addr - src_addr) as u64, 32);
+    });
+}
 
 /// Encode a relative jump to a pointer at a 32-bit offset (direct or conditional)
 fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) {
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 4e4c553c9d..061d21d19b 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -7,26 +7,51 @@ use crate::asm::arm64::*;
 use crate::codegen::{JITState};
 use crate::cruby::*;
 use crate::backend::ir::*;
+use crate::virtualmem::CodePtr;
 
 // Use the arm64 register type for this platform
 pub type Reg = A64Reg;
 
 // Callee-saved registers
-pub const _CFP: Opnd = Opnd::Reg(X9);
-pub const _EC: Opnd = Opnd::Reg(X10);
-pub const _SP: Opnd = Opnd::Reg(X11);
+pub const _CFP: Opnd = Opnd::Reg(X24_REG);
+pub const _EC: Opnd = Opnd::Reg(X25_REG);
+pub const _SP: Opnd = Opnd::Reg(X26_REG);
+
+// C argument registers on this platform
+pub const _C_ARG_OPNDS: [Opnd; 6] = [
+    Opnd::Reg(X0_REG),
+    Opnd::Reg(X1_REG),
+    Opnd::Reg(X2_REG),
+    Opnd::Reg(X3_REG),
+    Opnd::Reg(X4_REG),
+    Opnd::Reg(X5_REG)
+];
 
 // C return value register on this platform
-pub const RET_REG: Reg = X0;
+pub const C_RET_REG: Reg = X0_REG;
+pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
+
+// These constants define the way we work with Arm64's stack pointer. The stack
+// pointer always needs to be aligned to a 16-byte boundary.
+pub const C_SP_REG: A64Opnd = X31;
+pub const C_SP_STEP: A64Opnd = A64Opnd::UImm(16);
 
 /// Map Opnd to A64Opnd
 impl From<Opnd> for A64Opnd {
     fn from(opnd: Opnd) -> Self {
         match opnd {
-            Opnd::UImm(val) => uimm_opnd(val),
-            Opnd::Imm(val) => imm_opnd(val),
+            Opnd::UImm(value) => A64Opnd::new_uimm(value),
+            Opnd::Imm(value) => A64Opnd::new_imm(value),
             Opnd::Reg(reg) => A64Opnd::Reg(reg),
-            _ => panic!("unsupported arm64 operand type")
+            Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => {
+                A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp)
+            },
+            Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => {
+                panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base")
+            },
+            Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"),
+            Opnd::None => panic!("attempted to lower an Opnd::None"),
+            Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"),
         }
     }
 }
@@ -43,39 +68,368 @@ impl Assembler
     }
 
     /// Split platform-specific instructions
+    /// The transformations done here are meant to make our lives simpler in later
+    /// stages of the compilation pipeline.
+    /// Here we may want to make sure that all instructions (except load and store)
+    /// have no memory operands.
     fn arm64_split(mut self) -> Assembler
     {
-        // The transformations done here are meant to make our lives simpler in later
-        // stages of the compilation pipeline.
-        // Here we may want to make sure that all instructions (except load and store)
-        // have no memory operands.
+        self.forward_pass(|asm, index, op, opnds, target| {
+            match op {
+                Op::Add | Op::Sub => {
+                    // Check if one of the operands is a register. If it is,
+                    // then we'll make that the first operand.
+                    match (opnds[0], opnds[1]) {
+                        (Opnd::Mem(_), Opnd::Mem(_)) => {
+                            let opnd0 = asm.load(opnds[0]);
+                            let opnd1 = asm.load(opnds[1]);
+                            asm.push_insn(op, vec![opnd0, opnd1], target);
+                        },
+                        (mem_opnd @ Opnd::Mem(_), other_opnd) |
+                        (other_opnd, mem_opnd @ Opnd::Mem(_)) => {
+                            let opnd0 = asm.load(mem_opnd);
+                            asm.push_insn(op, vec![opnd0, other_opnd], target);
+                        },
+                        _ => {
+                            asm.push_insn(op, opnds, target);
+                        }
+                    }
+                },
+                Op::IncrCounter => {
+                    // Every operand to the IncrCounter instruction need to be a
+                    // register once it gets there. So here we're going to load
+                    // anything that isn't a register first.
+                    let new_opnds: Vec<Opnd> = opnds.into_iter().map(|opnd| {
+                        match opnd {
+                            Opnd::Mem(_) | Opnd::Imm(_) | Opnd::UImm(_) => asm.load(opnd),
+                            _ => opnd,
+                        }
+                    }).collect();
+
+                    asm.incr_counter(new_opnds[0], new_opnds[1]);
+                },
+                Op::Mov => {
+                    // The value that is being moved must be either a register
+                    // or an immediate that can be encoded as a bitmask
+                    // immediate. Otherwise, we'll need to split the move into
+                    // multiple instructions.
+                    let value = match opnds[1] {
+                        Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1],
+                        Opnd::Mem(_) | Opnd::Imm(_) => asm.load(opnds[1]),
+                        Opnd::UImm(uimm) => {
+                            if let Ok(encoded) = BitmaskImmediate::try_from(uimm) {
+                                opnds[1]
+                            } else {
+                                asm.load(opnds[1])
+                            }
+                        },
+                        _ => unreachable!()
+                    };
+
+                    /// If we're attempting to load into a memory operand, then
+                    /// we'll switch over to the store instruction. Otherwise
+                    /// we'll use the normal mov instruction.
+                    match opnds[0] {
+                        Opnd::Mem(_) => asm.store(opnds[0], value),
+                        _ => asm.mov(opnds[0], value)
+                    };
+                },
+                Op::Not => {
+                    // The value that is being negated must be in a register, so
+                    // if we get anything else we need to load it first.
+                    let opnd0 = match opnds[0] {
+                        Opnd::Mem(_) => asm.load(opnds[0]),
+                        _ => opnds[0]
+                    };
+
+                    asm.not(opnd0);
+                },
+                Op::Store => {
+                    // The value being stored must be in a register, so if it's
+                    // not already one we'll load it first.
+                    let opnd1 = match opnds[1] {
+                        Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1],
+                        _ => asm.load(opnds[1])
+                    };
 
-        todo!();
+                    asm.store(opnds[0], opnd1);
+                },
+                _ => {
+                    asm.push_insn(op, opnds, target);
+                }
+            };
+        })
     }
 
     /// Emit platform-specific machine code
     /// Returns a list of GC offsets
     pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32>
     {
-        // NOTE: dear Kevin,
-        // for arm, you may want to reserve 1 or 2 caller-save registers
-        // to use as scracth registers (during the last phase of the codegen)
-        // These registers will not be allocated to anything by the register
-        // allocator, they're just useful because arm is slightly trickier
-        // than x86 to generate code for.
-        // For example, if you want to jump far away, you may want to store
-        // the jump target address in a register first.
-
-        todo!();
+        /// Emit a conditional jump instruction to a specific target. This is
+        /// called when lowering any of the conditional jump instructions.
+        fn emit_conditional_jump(cb: &mut CodeBlock, condition: Condition, target: Target) {
+            match target {
+                Target::CodePtr(dst_ptr) => {
+                    let src_addr = cb.get_write_ptr().into_i64() + 4;
+                    let dst_addr = dst_ptr.into_i64();
+                    let offset = dst_addr - src_addr;
+
+                    // If the jump offset fits into the conditional jump as an
+                    // immediate value and it's properly aligned, then we can
+                    // use the b.cond instruction directly. Otherwise, we need
+                    // to load the address into a register and use the branch
+                    // register instruction.
+                    if bcond_offset_fits_bits(offset) {
+                        bcond(cb, condition, A64Opnd::new_imm(dst_addr - src_addr));
+                    } else {
+                        // If the condition is met, then we'll skip past the
+                        // next instruction, put the address in a register, and
+                        // jump to it.
+                        bcond(cb, condition, A64Opnd::new_imm(4));
+
+                        // If the offset fits into a direct jump, then we'll use
+                        // that and the number of instructions will be shorter.
+                        // Otherwise we'll use the branch register instruction.
+                        if b_offset_fits_bits(offset) {
+                            // If we get to this instruction, then the condition
+                            // wasn't met, in which case we'll jump past the
+                            // next instruction that performs the direct jump.
+                            b(cb, A64Opnd::new_imm(4));
+
+                            // Here we'll perform the direct jump to the target.
+                            b(cb, A64Opnd::new_imm(offset / 4));
+                        } else {
+                            // If we get to this instruction, then the condition
+                            // wasn't met, in which case we'll jump past the
+                            // next instruction that perform the direct jump.
+                            b(cb, A64Opnd::new_imm(8));
+                            mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64));
+                            br(cb, X29);
+                        }
+                    }
+                },
+                Target::Label(label_idx) => {
+                    // Here we're going to save enough space for ourselves and
+                    // then come back and write the instruction once we know the
+                    // offset. We're going to assume we can fit into a single
+                    // b.cond instruction. It will panic otherwise.
+                    cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
+                        bcond(cb, condition, A64Opnd::new_imm(dst_addr - src_addr));
+                    });
+                },
+                Target::FunPtr(_) => unreachable!()
+            };
+        }
+
+        // dbg!(&self.insns);
+
+        // List of GC offsets
+        let mut gc_offsets: Vec<u32> = Vec::new();
+
+        // For each instruction
+        for insn in &self.insns {
+            match insn.op {
+                Op::Comment => {
+                    if cfg!(feature = "asm_comments") {
+                        cb.add_comment(&insn.text.as_ref().unwrap());
+                    }
+                },
+                Op::Label => {
+                    cb.write_label(insn.target.unwrap().unwrap_label_idx());
+                },
+                Op::Add => {
+                    add(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into());
+                },
+                Op::Sub => {
+                    sub(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into());
+                },
+                Op::And => {
+                    and(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into());
+                },
+                Op::Not => {
+                    mvn(cb, insn.out.into(), insn.opnds[0].into());
+                },
+                Op::Store => {
+                    // This order may be surprising but it is correct. The way
+                    // the Arm64 assembler works, the register that is going to
+                    // be stored is first and the address is second. However in
+                    // our IR we have the address first and the register second.
+                    stur(cb, insn.opnds[1].into(), insn.opnds[0].into());
+                },
+                Op::Load => {
+                    mov(cb, insn.out.into(), insn.opnds[0].into());
+
+                    // This assumes only load instructions can contain
+                    // references to GC'd Value operands. If the value being
+                    // loaded is a heap object, we'll report that back out to
+                    // the gc_offsets list.
+                    if let Opnd::Value(val) = insn.opnds[0] {
+                        if !val.special_const_p() {
+                            // The pointer immediate is encoded as the last part of the mov written out
+                            let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
+                            gc_offsets.push(ptr_offset);
+                        }
+                    }
+                },
+                Op::Mov => {
+                    mov(cb, insn.opnds[0].into(), insn.opnds[1].into());
+                },
+                Op::Lea => {
+                    ldur(cb, insn.out.into(), insn.opnds[0].into());
+                },
+                Op::CPush => {
+                    add(cb, C_SP_REG, C_SP_REG, C_SP_STEP);
+                    mov(cb, A64Opnd::new_mem(64, C_SP_REG, 0), insn.opnds[0].into());
+                },
+                Op::CPop => {
+                    mov(cb, insn.out.into(), A64Opnd::new_mem(64, C_SP_REG, 0));
+                    sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP);
+                },
+                Op::CCall => {
+                    // Temporary
+                    assert!(insn.opnds.len() < C_ARG_REGS.len());
+
+                    // For each operand
+                    for (idx, opnd) in insn.opnds.iter().enumerate() {
+                        mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into());
+                    }
+
+                    let src_addr = cb.get_write_ptr().into_i64() + 4;
+                    let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64;
+
+                    // The offset between the two instructions in bytes. Note
+                    // that when we encode this into a bl instruction, we'll
+                    // divide by 4 because it accepts the number of instructions
+                    // to jump over.
+                    let offset = dst_addr - src_addr;
+
+                    // If the offset is short enough, then we'll use the branch
+                    // link instruction. Otherwise, we'll move the destination
+                    // and return address into appropriate registers and use the
+                    // branch register instruction.
+                    if b_offset_fits_bits(offset) {
+                        bl(cb, A64Opnd::new_imm(offset / 4));
+                    } else {
+                        mov(cb, X30, A64Opnd::new_uimm(src_addr as u64));
+                        mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64));
+                        br(cb, X29);
+                    }
+                },
+                Op::CRet => {
+                    // TODO: bias allocation towards return register
+                    if insn.opnds[0] != Opnd::Reg(C_RET_REG) {
+                        mov(cb, C_RET_OPND.into(), insn.opnds[0].into());
+                    }
+
+                    ret(cb, A64Opnd::None);
+                },
+                Op::Cmp => {
+                    cmp(cb, insn.opnds[0].into(), insn.opnds[1].into());
+                },
+                Op::Test => {
+                    tst(cb, insn.opnds[0].into(), insn.opnds[1].into());
+                },
+                Op::JmpOpnd => {
+                    br(cb, insn.opnds[0].into());
+                },
+                Op::Jmp => {
+                    match insn.target.unwrap() {
+                        Target::CodePtr(dst_ptr) => {
+                            let src_addr = cb.get_write_ptr().into_i64() + 4;
+                            let dst_addr = dst_ptr.into_i64();
+
+                            // The offset between the two instructions in bytes.
+                            // Note that when we encode this into a b
+                            // instruction, we'll divide by 4 because it accepts
+                            // the number of instructions to jump over.
+                            let offset = dst_addr - src_addr;
+
+                            // If the offset is short enough, then we'll use the
+                            // branch instruction. Otherwise, we'll move the
+                            // destination into a register and use the branch
+                            // register instruction.
+                            if b_offset_fits_bits(offset) {
+                                b(cb, A64Opnd::new_imm(offset / 4));
+                            } else {
+                                mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64));
+                                br(cb, X29);
+                            }
+                        },
+                        Target::Label(label_idx) => {
+                            // Here we're going to save enough space for
+                            // ourselves and then come back and write the
+                            // instruction once we know the offset. We're going
+                            // to assume we can fit into a single b instruction.
+                            // It will panic otherwise.
+                            cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
+                                b(cb, A64Opnd::new_imm((dst_addr - src_addr) / 4));
+                            });
+                        },
+                        _ => unreachable!()
+                    };
+                },
+                Op::Je => {
+                    emit_conditional_jump(cb, Condition::EQ, insn.target.unwrap());
+                },
+                Op::Jbe => {
+                    emit_conditional_jump(cb, Condition::LS, insn.target.unwrap());
+                },
+                Op::Jz => {
+                    emit_conditional_jump(cb, Condition::EQ, insn.target.unwrap());
+                },
+                Op::Jnz => {
+                    emit_conditional_jump(cb, Condition::NE, insn.target.unwrap());
+                },
+                Op::Jo => {
+                    emit_conditional_jump(cb, Condition::VS, insn.target.unwrap());
+                },
+                Op::IncrCounter => {
+                    ldaddal(cb, insn.opnds[0].into(), insn.opnds[0].into(), insn.opnds[1].into());
+                },
+                Op::Breakpoint => {
+                    brk(cb, A64Opnd::None);
+                }
+            };
+        }
+
+        gc_offsets
     }
 
     /// Optimize and compile the stored instructions
-    pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>)
+    pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32>
     {
-        self
-            .arm64_split()
-            .split_loads()
-            .alloc_regs(regs)
-            .arm64_emit(jit, cb)
+        let mut asm = self.arm64_split().split_loads().alloc_regs(regs);
+
+        // Create label instances in the code block
+        for (idx, name) in asm.label_names.iter().enumerate() {
+            let label_idx = cb.new_label(name.to_string());
+            assert!(label_idx == idx);
+        }
+
+        let gc_offsets = asm.arm64_emit(cb);
+        cb.link_labels();
+
+        gc_offsets
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn setup_asm() -> (Assembler, CodeBlock) {
+        (Assembler::new(), CodeBlock::new_dummy(1024))
+    }
+
+    #[test]
+    fn test_emit_add() {
+        let (mut asm, mut cb) = setup_asm();
+
+        let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG));
+        asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+        asm.compile_with_regs(&mut cb, vec![X3_REG]);
+
+        let insns = cb.get_ptr(0).raw_ptr() as *const u32;
+        assert_eq!(0x8b010003, unsafe { *insns });
     }
 }
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index 4e8ed0b8a4..bdefe1c6bc 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -276,6 +276,13 @@ pub enum Target
 
 impl Target
 {
+    pub fn unwrap_fun_ptr(&self) -> *const u8 {
+        match self {
+            Target::FunPtr(ptr) => *ptr,
+            _ => unreachable!("trying to unwrap {:?} into fun ptr", self)
+        }
+    }
+
     pub fn unwrap_label_idx(&self) -> usize {
         match self {
             Target::Label(idx) => *idx,
diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs
index 0841c9ffa5..4794695094 100644
--- a/yjit/src/backend/mod.rs
+++ b/yjit/src/backend/mod.rs
@@ -1,3 +1,8 @@
+#[cfg(target_arch = "x86_64")]
 pub mod x86_64;
+
+#[cfg(target_arch = "aarch64")]
+pub mod arm64;
+
 pub mod ir;
-mod tests;
-\ No newline at end of file
+mod tests;
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
index f4e0d4f53a..19b5096a26 100644
--- a/yjit/src/backend/x86_64/mod.rs
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -186,6 +186,9 @@ impl Assembler
                     for (idx, opnd) in insn.opnds.iter().enumerate() {
                         mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into());
                     }
+
+                    let ptr = insn.target.unwrap().unwrap_fun_ptr();
+                    call_ptr(cb, RAX, ptr);
                 },
 
                 Op::CRet => {
author	Kevin Newton <kddnewton@gmail.com>	2022-07-05 16:04:19 -0400
committer	Takashi Kokubun <takashikkbn@gmail.com>	2022-08-29 08:46:58 -0700
commit	7a9b581e0896d4aa7a037da90c837b830213c8e8 (patch)
tree	8d613c9cca2af21aa17840270b23acb233b9f3ff
parent	b272c57f27628ab114206c777d5b274713d31079 (diff)
download	ruby-7a9b581e0896d4aa7a037da90c837b830213c8e8.tar.gz