diff options
author | Kevin Newton <kddnewton@gmail.com> | 2022-07-05 16:04:19 -0400 |
---|---|---|
committer | Takashi Kokubun <takashikkbn@gmail.com> | 2022-08-29 08:46:58 -0700 |
commit | 7a9b581e0896d4aa7a037da90c837b830213c8e8 (patch) | |
tree | 8d613c9cca2af21aa17840270b23acb233b9f3ff | |
parent | b272c57f27628ab114206c777d5b274713d31079 (diff) | |
download | ruby-7a9b581e0896d4aa7a037da90c837b830213c8e8.tar.gz |
Arm64 progress (https://github.com/Shopify/ruby/pull/304)
* Get initial wiring up
* Split IncrCounter instruction
* Breakpoints in Arm64
* Support for ORR
* MOV instruction encodings
* Implement JmpOpnd and CRet
* Add ORN
* Add MVN
* PUSH, POP, CCALL for Arm64
* Some formatting and implement Op::Not for Arm64
* Consistent constants when working with the Arm64 SP
* Allow OR-ing values into the memory buffer
* Test lowering Arm64 ADD
* Emit unconditional jumps consistently in Arm64
* Begin emitting conditional jumps for A64
* Back out some labelref changes
* Remove label API that no longer exists
* Use a trait for the label encoders
* Encode nop
* Add in nops so jumps are the same width no matter what on Arm64
* Op::Jbe for CodePtr
* Pass src_addr and dst_addr instead of calculated offset to label refs
* Even more jump work for Arm64
* Fix up jumps to use consistent assertions
* Handle splitting Add, Sub, and Not insns for Arm64
* More Arm64 splits and various fixes
* PR feedback for Arm64 support
* Split up jumps and conditional jump logic
-rw-r--r-- | yjit/src/asm/arm64/inst/breakpoint.rs | 55 | ||||
-rw-r--r-- | yjit/src/asm/arm64/inst/call.rs | 51 | ||||
-rw-r--r-- | yjit/src/asm/arm64/inst/logical_imm.rs | 37 | ||||
-rw-r--r-- | yjit/src/asm/arm64/inst/logical_reg.rs | 80 | ||||
-rw-r--r-- | yjit/src/asm/arm64/inst/mod.rs | 4 | ||||
-rw-r--r-- | yjit/src/asm/arm64/inst/nop.rs | 44 | ||||
-rw-r--r-- | yjit/src/asm/arm64/mod.rs | 198 | ||||
-rw-r--r-- | yjit/src/asm/arm64/opnd.rs | 48 | ||||
-rw-r--r-- | yjit/src/asm/mod.rs | 33 | ||||
-rw-r--r-- | yjit/src/asm/x86_64/mod.rs | 101 | ||||
-rw-r--r-- | yjit/src/backend/arm64/mod.rs | 410 | ||||
-rw-r--r-- | yjit/src/backend/ir.rs | 7 | ||||
-rw-r--r-- | yjit/src/backend/mod.rs | 7 | ||||
-rw-r--r-- | yjit/src/backend/x86_64/mod.rs | 3 |
14 files changed, 939 insertions, 139 deletions
diff --git a/yjit/src/asm/arm64/inst/breakpoint.rs b/yjit/src/asm/arm64/inst/breakpoint.rs new file mode 100644 index 0000000000..be4920ac76 --- /dev/null +++ b/yjit/src/asm/arm64/inst/breakpoint.rs @@ -0,0 +1,55 @@ +/// The struct that represents an A64 breakpoint instruction that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 | +/// | imm16.................................................. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Breakpoint { + /// The value to be captured by ESR_ELx.ISS + imm16: u16 +} + +impl Breakpoint { + /// BRK + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/BRK--Breakpoint-instruction- + pub fn brk(imm16: u16) -> Self { + Self { imm16 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#control +const FAMILY: u32 = 0b101; + +impl From<Breakpoint> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Breakpoint) -> Self { + let imm16 = inst.imm16 as u32; + + 0 + | (0b11 << 30) + | (FAMILY << 26) + | (1 << 21) + | (imm16 << 5) + } +} + +impl From<Breakpoint> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Breakpoint) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_brk() { + let result: u32 = Breakpoint::brk(7).into(); + assert_eq!(0xd42000e0, result); + } +} diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs index 6f23acf9f5..8d65359f77 100644 --- a/yjit/src/asm/arm64/inst/call.rs +++ b/yjit/src/asm/arm64/inst/call.rs @@ -1,22 +1,41 @@ -/// The struct that represents an A64 branch with link instruction that can be -/// encoded. +/// The operation to perform for this instruction. +enum Op { + /// Branch directly, with a hint that this is not a subroutine call or + /// return. + Branch = 0, + + /// Branch directly, with a hint that this is a subroutine call or return. + BranchWithLink = 1 +} + +/// The struct that represents an A64 branch with our without link instruction +/// that can be encoded. /// /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | -/// | 1 0 0 1 0 1 | -/// | imm26.................................................................................... | +/// | 0 0 1 0 1 | +/// | op imm26.................................................................................... | /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// pub struct Call { /// The PC-relative offset to jump to (which will be multiplied by 4). - imm26: i32 + imm26: i32, + + /// The operation to perform for this instruction. + op: Op } impl Call { + /// B + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch- + pub fn b(imm26: i32) -> Self { + Self { imm26, op: Op::Branch } + } + /// BL /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en pub fn bl(imm26: i32) -> Self { - Self { imm26 } + Self { imm26, op: Op::BranchWithLink } } } @@ -29,7 +48,7 @@ impl From<Call> for u32 { let imm26 = (inst.imm26 as u32) & ((1 << 26) - 1); 0 - | (1 << 31) + | ((inst.op as u32) << 31) | (FAMILY << 26) | imm26 } @@ -64,4 +83,22 @@ mod tests { let result: u32 = Call::bl(-256).into(); assert_eq!(0x97ffff00, result); } + + #[test] + fn test_b() { + let result: u32 = Call::b(0).into(); + assert_eq!(0x14000000, result); + } + + #[test] + fn test_b_positive() { + let result: u32 = Call::b(256).into(); + assert_eq!(0x14000100, result); + } + + #[test] + fn test_b_negative() { + let result: u32 = Call::b(-256).into(); + assert_eq!(0x17ffff00, result); + } } diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs index cc2a16cbdc..13865697f6 100644 --- a/yjit/src/asm/arm64/inst/logical_imm.rs +++ b/yjit/src/asm/arm64/inst/logical_imm.rs @@ -5,6 +5,9 @@ enum Opc { /// The AND operation. And = 0b00, + /// The ORR operation. + Orr = 0b01, + /// The ANDS operation. Ands = 0b11 } @@ -12,7 +15,7 @@ enum Opc { /// The struct that represents an A64 bitwise immediate instruction that can be /// encoded. /// -/// AND/ANDS (immediate) +/// AND/ORR/ANDS (immediate) /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | /// | 1 0 0 1 0 0 | @@ -37,19 +40,31 @@ pub struct LogicalImm { } impl LogicalImm { - /// AND (immediate) + /// AND (bitmask immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() } } - /// ANDS (immediate) + /// ANDS (bitmask immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() } } - /// TST (immediate) + /// MOV (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en + pub fn mov(rd: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORR (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate-- + pub fn orr(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Orr, sf: num_bits.into() } + } + + /// TST (bitmask immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en pub fn tst(rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self::ands(31, rn, imm, num_bits) @@ -101,6 +116,20 @@ mod tests { } #[test] + fn test_mov() { + let inst = LogicalImm::mov(0, 0x5555555555555555.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xb200f3e0, result); + } + + #[test] + fn test_orr() { + let inst = LogicalImm::orr(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xb2400820, result); + } + + #[test] fn test_tst() { let inst = LogicalImm::tst(1, 7.try_into().unwrap(), 64); let result: u32 = inst.into(); diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs index 3feb3350ab..5d7954c587 100644 --- a/yjit/src/asm/arm64/inst/logical_reg.rs +++ b/yjit/src/asm/arm64/inst/logical_reg.rs @@ -1,5 +1,14 @@ use super::super::arg::Sf; +/// Whether or not this is a NOT instruction. +enum N { + /// This is not a NOT instruction. + No = 0, + + /// This is a NOT instruction. + Yes = 1 +} + /// The type of shift to perform on the second operand register. enum Shift { LSL = 0b00, // logical shift left (unsigned) @@ -13,6 +22,9 @@ enum Opc { /// The AND operation. And = 0b00, + /// The ORR operation. + Orr = 0b01, + /// The ANDS operation. Ands = 0b11 } @@ -20,11 +32,11 @@ enum Opc { /// The struct that represents an A64 logical register instruction that can be /// encoded. /// -/// AND/ANDS (shifted register) +/// AND/ORR/ANDS (shifted register) /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | -/// | 0 1 0 1 0 0 | -/// | sf opc.. shift rm.............. imm6............... rn.............. rd.............. | +/// | 0 1 0 1 0 | +/// | sf opc.. shift N rm.............. imm6............... rn.............. rd.............. | /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// pub struct LogicalReg { @@ -40,6 +52,9 @@ pub struct LogicalReg { /// The register number of the second operand register. rm: u8, + /// Whether or not this is a NOT instruction. + n: N, + /// The type of shift to perform on the second operand register. shift: Shift, @@ -54,19 +69,43 @@ impl LogicalReg { /// AND (shifted register) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en pub fn and(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { - Self { rd, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() } + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() } } /// ANDS (shifted register) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en pub fn ands(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { - Self { rd, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + } + + /// MOV (register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en + pub fn mov(rd: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// MVN (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MVN--Bitwise-NOT--an-alias-of-ORN--shifted-register--?lang=en + pub fn mvn(rd: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORN (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORN--shifted-register---Bitwise-OR-NOT--shifted-register-- + pub fn orn(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORR (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register-- + pub fn orr(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } } /// TST (shifted register) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en pub fn tst(rn: u8, rm: u8, num_bits: u8) -> Self { - Self { rd: 31, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + Self { rd: 31, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } } } @@ -83,6 +122,7 @@ impl From<LogicalReg> for u32 { | ((inst.opc as u32) << 29) | (FAMILY << 25) | ((inst.shift as u32) << 22) + | ((inst.n as u32) << 21) | ((inst.rm as u32) << 16) | (imm6 << 10) | ((inst.rn as u32) << 5) @@ -117,6 +157,34 @@ mod tests { } #[test] + fn test_mov() { + let inst = LogicalReg::mov(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa0103e0, result); + } + + #[test] + fn test_mvn() { + let inst = LogicalReg::mvn(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa2103e0, result); + } + + #[test] + fn test_orn() { + let inst = LogicalReg::orn(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa220020, result); + } + + #[test] + fn test_orr() { + let inst = LogicalReg::orr(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa020020, result); + } + + #[test] fn test_tst() { let inst = LogicalReg::tst(0, 1, 64); let result: u32 = inst.into(); diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 2f0e708999..ae589ca564 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -4,6 +4,7 @@ mod atomic; mod branch; mod branch_cond; +mod breakpoint; mod call; mod data_imm; mod data_reg; @@ -11,12 +12,14 @@ mod load; mod logical_imm; mod logical_reg; mod mov; +mod nop; mod shift_imm; mod store; pub use atomic::Atomic; pub use branch::Branch; pub use branch_cond::BranchCond; +pub use breakpoint::Breakpoint; pub use call::Call; pub use data_imm::DataImm; pub use data_reg::DataReg; @@ -24,5 +27,6 @@ pub use load::Load; pub use logical_imm::LogicalImm; pub use logical_reg::LogicalReg; pub use mov::Mov; +pub use nop::Nop; pub use shift_imm::ShiftImm; pub use store::Store; diff --git a/yjit/src/asm/arm64/inst/nop.rs b/yjit/src/asm/arm64/inst/nop.rs new file mode 100644 index 0000000000..a99f8d34b7 --- /dev/null +++ b/yjit/src/asm/arm64/inst/nop.rs @@ -0,0 +1,44 @@ +/// The struct that represents an A64 nop instruction that can be encoded. +/// +/// NOP +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Nop; + +impl Nop { + /// NOP + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/NOP--No-Operation- + pub fn nop() -> Self { + Self {} + } +} + +impl From<Nop> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Nop) -> Self { + 0b11010101000000110010000000011111 + } +} + +impl From<Nop> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Nop) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_nop() { + let inst = Nop::nop(); + let result: u32 = inst.into(); + assert_eq!(0xd503201f, result); + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 24f349d589..ced8b262c5 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -6,12 +6,15 @@ mod arg; mod inst; mod opnd; -use arg::*; use inst::*; -use opnd::*; + +// We're going to make these public to make using these things easier in the +// backend (so they don't have to have knowledge about the submodule). +pub use arg::*; +pub use opnd::*; /// Checks that a signed value fits within the specified number of bits. -const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { +pub const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { let minimum = if num_bits == 64 { i64::MIN } else { -2_i64.pow((num_bits as u32) - 1) }; let maximum = if num_bits == 64 { i64::MAX } else { 2_i64.pow((num_bits as u32) - 1) - 1 }; @@ -19,7 +22,7 @@ const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { } /// Checks that an unsigned value fits within the specified number of bits. -const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool { +pub const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool { let maximum = if num_bits == 64 { u64::MAX } else { 2_u64.pow(num_bits as u32) - 1 }; uimm <= maximum @@ -115,12 +118,39 @@ pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// Whether or not the offset between two instructions fits into the branch with +/// or without link instruction. If it doesn't, then we have to load the value +/// into a register first. +pub const fn b_offset_fits_bits(offset: i64) -> bool { + imm_fits_bits(offset, 26) +} + +/// B - branch without link (offset is number of instructions to jump) +pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) { + let bytes: [u8; 4] = match imm26 { + A64Opnd::Imm(imm26) => { + assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less."); + + Call::b(imm26 as i32).into() + }, + _ => panic!("Invalid operand combination to b instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// Whether or not the offset between two instructions fits into the b.cond +/// instruction. If it doesn't, then we have to load the value into a register +/// first, then use the b.cond instruction to skip past a direct jump. +pub const fn bcond_offset_fits_bits(offset: i64) -> bool { + imm_fits_bits(offset, 21) && (offset & 0b11 == 0) +} + /// B.cond - branch to target if condition is true pub fn bcond(cb: &mut CodeBlock, cond: Condition, byte_offset: A64Opnd) { let bytes: [u8; 4] = match byte_offset { A64Opnd::Imm(imm) => { - assert!(imm_fits_bits(imm, 21), "The immediate operand must be 21 bits or less."); - assert!(imm & 0b11 == 0, "The immediate operand must be aligned to a 2-bit boundary."); + assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary."); BranchCond::bcond(cond, imm as i32).into() }, @@ -134,7 +164,7 @@ pub fn bcond(cb: &mut CodeBlock, cond: Condition, byte_offset: A64Opnd) { pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) { let bytes: [u8; 4] = match imm26 { A64Opnd::Imm(imm26) => { - assert!(imm_fits_bits(imm26, 26), "The immediate operand must be 26 bits or less."); + assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less."); Call::bl(imm26 as i32).into() }, @@ -154,6 +184,20 @@ pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// BRK - create a breakpoint +pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) { + let bytes: [u8; 4] = match imm16 { + A64Opnd::None => Breakpoint::brk(0).into(), + A64Opnd::UImm(imm16) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + Breakpoint::brk(imm16 as u16).into() + }, + _ => panic!("Invalid operand combination to brk instruction.") + }; + + cb.write_bytes(&bytes); +} + /// CMP - compare rn and rm, update flags pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rn, rm) { @@ -196,6 +240,11 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { /// LDUR - load a memory address into a register pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + + Load::ldur(rt.reg_no, rn.reg_no, 0, rt.num_bits).into() + }, (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); @@ -238,6 +287,23 @@ pub fn lsr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { cb.write_bytes(&bytes); } +/// MOV - move a value in a register to another register +pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => { + LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to mov instruction") + }; + + cb.write_bytes(&bytes); +} + /// MOVK - move a 16 bit immediate into a register, keep the other bits in place pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { let bytes: [u8; 4] = match (rd, imm16) { @@ -266,6 +332,63 @@ pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { cb.write_bytes(&bytes); } +/// MVN - move a value in a register to another register, negating it +pub fn mvn(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::mvn(rd.reg_no, rm.reg_no, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to mvn instruction") + }; + + cb.write_bytes(&bytes); +} + +/// NOP - no-operation, used for alignment purposes +pub fn nop(cb: &mut CodeBlock) { + let bytes: [u8; 4] = Nop::nop().into(); + + cb.write_bytes(&bytes); +} + +/// ORN - perform a bitwise OR of rn and NOT rm, put the result in rd, don't update flags +pub fn orn(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::orn(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to orn instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// ORR - perform a bitwise OR of rn and rm, put the result in rd, don't update flags +pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::orr(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::orr(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to orr instruction."), + }; + + cb.write_bytes(&bytes); +} + /// STUR - store a value in a register at a memory address pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { @@ -435,6 +558,11 @@ mod tests { } #[test] + fn test_b() { + check_bytes("00040014", |cb| b(cb, A64Opnd::new_imm(1024))); + } + + #[test] fn test_bl() { check_bytes("00040094", |cb| bl(cb, A64Opnd::new_imm(1024))); } @@ -445,6 +573,16 @@ mod tests { } #[test] + fn test_brk_none() { + check_bytes("000020d4", |cb| brk(cb, A64Opnd::None)); + } + + #[test] + fn test_brk_uimm() { + check_bytes("c00120d4", |cb| brk(cb, A64Opnd::new_uimm(14))); + } + + #[test] fn test_cmp_register() { check_bytes("5f010beb", |cb| cmp(cb, X10, X11)); } @@ -460,8 +598,13 @@ mod tests { } #[test] - fn test_ldur() { - check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(X1, 123))); + fn test_ldur_memory() { + check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123))); + } + + #[test] + fn test_ldur_register() { + check_bytes("200040f8", |cb| ldur(cb, X0, X1)); } #[test] @@ -475,6 +618,16 @@ mod tests { } #[test] + fn test_mov_registers() { + check_bytes("ea030baa", |cb| mov(cb, X10, X11)); + } + + #[test] + fn test_mov_immediate() { + check_bytes("eaf300b2", |cb| mov(cb, X10, A64Opnd::new_uimm(0x5555555555555555))); + } + + #[test] fn test_movk() { check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16)); } @@ -485,6 +638,31 @@ mod tests { } #[test] + fn test_mvn() { + check_bytes("ea032baa", |cb| mvn(cb, X10, X11)); + } + + #[test] + fn test_nop() { + check_bytes("1f2003d5", |cb| nop(cb)); + } + + #[test] + fn test_orn() { + check_bytes("6a012caa", |cb| orn(cb, X10, X11, X12)); + } + + #[test] + fn test_orr_register() { + check_bytes("6a010caa", |cb| orr(cb, X10, X11, X12)); + } + + #[test] + fn test_orr_immediate() { + check_bytes("6a0940b2", |cb| orr(cb, X10, X11, A64Opnd::new_uimm(7))); + } + + #[test] fn test_ret_none() { check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None)); } @@ -496,7 +674,7 @@ mod tests { #[test] fn test_stur() { - check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(X11, 128))); + check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(64, X11, 128))); } #[test] diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index aa73d438fe..6c06d2db3c 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -11,6 +11,15 @@ pub struct A64Reg pub reg_no: u8, } +impl A64Reg { + pub fn sub_reg(&self, num_bits: u8) -> Self { + assert!(num_bits == 32 || num_bits == 64); + assert!(num_bits <= self.num_bits); + + Self { num_bits, reg_no: self.reg_no } + } +} + #[derive(Clone, Copy, Debug)] pub struct A64Mem { @@ -25,14 +34,10 @@ pub struct A64Mem } impl A64Mem { - pub fn new(reg: A64Opnd, disp: i32) -> Self { + pub fn new(num_bits: u8, reg: A64Opnd, disp: i32) -> Self { match reg { A64Opnd::Reg(reg) => { - Self { - num_bits: reg.num_bits, - base_reg_no: reg.reg_no, - disp - } + Self { num_bits, base_reg_no: reg.reg_no, disp } }, _ => panic!("Expected register operand") } @@ -70,8 +75,8 @@ impl A64Opnd { } /// Creates a new memory operand. - pub fn new_mem(reg: A64Opnd, disp: i32) -> Self { - A64Opnd::Mem(A64Mem::new(reg, disp)) + pub fn new_mem(num_bits: u8, reg: A64Opnd, disp: i32) -> Self { + A64Opnd::Mem(A64Mem::new(num_bits, reg, disp)) } /// Convenience function to check if this operand is a register. @@ -87,23 +92,32 @@ pub const X0_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 0 }; pub const X1_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 1 }; pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 }; pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 }; +pub const X4_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 4 }; +pub const X5_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 5 }; +pub const X9_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 9 }; +pub const X10_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 10 }; +pub const X11_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 11 }; pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 }; pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 }; +pub const X24_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 24 }; +pub const X25_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 25 }; +pub const X26_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 26 }; + // 64-bit registers pub const X0: A64Opnd = A64Opnd::Reg(X0_REG); pub const X1: A64Opnd = A64Opnd::Reg(X1_REG); pub const X2: A64Opnd = A64Opnd::Reg(X2_REG); pub const X3: A64Opnd = A64Opnd::Reg(X3_REG); -pub const X4: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 4 }); -pub const X5: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 5 }); +pub const X4: A64Opnd = A64Opnd::Reg(X4_REG); +pub const X5: A64Opnd = A64Opnd::Reg(X5_REG); pub const X6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 6 }); pub const X7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 7 }); pub const X8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 8 }); -pub const X9: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 9 }); -pub const X10: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 10 }); -pub const X11: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 11 }); +pub const X9: A64Opnd = A64Opnd::Reg(X9_REG); +pub const X10: A64Opnd = A64Opnd::Reg(X10_REG); +pub const X11: A64Opnd = A64Opnd::Reg(X11_REG); pub const X12: A64Opnd = A64Opnd::Reg(X12_REG); pub const X13: A64Opnd = A64Opnd::Reg(X13_REG); pub const X14: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 14 }); @@ -116,13 +130,14 @@ pub const X20: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 20 }); pub const X21: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 21 }); pub const X22: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 22 }); pub const X23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 23 }); -pub const X24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 24 }); -pub const X25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 25 }); -pub const X26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 26 }); +pub const X24: A64Opnd = A64Opnd::Reg(X24_REG); +pub const X25: A64Opnd = A64Opnd::Reg(X25_REG); +pub const X26: A64Opnd = A64Opnd::Reg(X26_REG); pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 }); pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 }); pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 }); pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 }); +pub const X31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 31 }); // 32-bit registers pub const W0: A64Reg = A64Reg { num_bits: 32, reg_no: 0 }; @@ -156,6 +171,7 @@ pub const W27: A64Reg = A64Reg { num_bits: 32, reg_no: 27 }; pub const W28: A64Reg = A64Reg { num_bits: 32, reg_no: 28 }; pub const W29: A64Reg = A64Reg { num_bits: 32, reg_no: 29 }; pub const W30: A64Reg = A64Reg { num_bits: 32, reg_no: 30 }; +pub const W31: A64Reg = A64Reg { num_bits: 32, reg_no: 31 }; // C argument registers pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3]; diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index b54fc362b4..5723406aec 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -23,6 +23,14 @@ struct LabelRef { // Label which this refers to label_idx: usize, + + /// The number of bytes that this label reference takes up in the memory. + /// It's necessary to know this ahead of time so that when we come back to + /// patch it it takes the same amount of space. + num_bytes: usize, + + /// The object that knows how to encode the branch instruction. + encode: Box<dyn FnOnce(&mut CodeBlock, i64, i64)> } /// Block of memory into which instructions can be assembled @@ -154,7 +162,7 @@ impl CodeBlock { self.get_ptr(self.write_pos) } - // Write a single byte at the current position + /// Write a single byte at the current position. pub fn write_byte(&mut self, byte: u8) { let write_ptr = self.get_write_ptr(); @@ -165,15 +173,15 @@ impl CodeBlock { } } - // Write multiple bytes starting from the current position - pub fn write_bytes(&mut self, bytes: &[u8]) { + /// Write multiple bytes starting from the current position. + fn write_bytes(&mut self, bytes: &[u8]) { for byte in bytes { self.write_byte(*byte); } } - // Write a signed integer over a given number of bits at the current position - pub fn write_int(&mut self, val: u64, num_bits: u32) { + /// Write an integer over the given number of bits at the current position. + fn write_int(&mut self, val: u64, num_bits: u32) { assert!(num_bits > 0); assert!(num_bits % 8 == 0); @@ -219,14 +227,14 @@ impl CodeBlock { } // Add a label reference at the current write position - pub fn label_ref(&mut self, label_idx: usize) { + pub fn label_ref<E: 'static>(&mut self, label_idx: usize, num_bytes: usize, encode: E) where E: FnOnce(&mut CodeBlock, i64, i64) { assert!(label_idx < self.label_addrs.len()); // Keep track of the reference - self.label_refs.push(LabelRef { - pos: self.write_pos, - label_idx, - }); + self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode: Box::new(encode) }); + + // Move past however many bytes the instruction takes up + self.write_pos += num_bytes; } // Link internal label references @@ -242,11 +250,8 @@ impl CodeBlock { let label_addr = self.label_addrs[label_idx]; assert!(label_addr < self.mem_size); - // Compute the offset from the reference's end to the label - let offset = (label_addr as i64) - ((ref_pos + 4) as i64); - self.set_pos(ref_pos); - self.write_int(offset as u64, 32); + (label_ref.encode)(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64); } self.write_pos = orig_pos; diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index 9869b79e23..a2a3b47f82 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -703,14 +703,10 @@ pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) { /// call - Call to label with 32-bit offset pub fn call_label(cb: &mut CodeBlock, label_idx: usize) { - // Write the opcode - cb.write_byte(0xE8); - - // Add a reference to the label - cb.label_ref(label_idx); - - // Relative 32-bit offset to be patched - cb.write_int(0, 32); + cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| { + cb.write_byte(0xE8); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); } /// call - Indirect call with an R/M operand @@ -801,55 +797,54 @@ pub fn int3(cb: &mut CodeBlock) { cb.write_byte(0xcc); } -// Encode a relative jump to a label (direct or conditional) +// Encode a conditional relative jump to a label // Note: this always encodes a 32-bit offset -fn write_jcc(cb: &mut CodeBlock, op0: u8, op1: u8, label_idx: usize) { - // Write the opcode - if op0 != 0xff { - cb.write_byte(op0); - } - - cb.write_byte(op1); - - // Add a reference to the label - cb.label_ref(label_idx); - - // Relative 32-bit offset to be patched - cb.write_int( 0, 32); +fn write_jcc(cb: &mut CodeBlock, op: u8, label_idx: usize) { + cb.label_ref(label_idx, 6, move |cb, src_addr, dst_addr| { + cb.write_byte(0x0F); + cb.write_byte(op); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); } /// jcc - relative jumps to a label -pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); } -pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); } -pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); } -pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); } -pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); } -pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); } -pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); } -pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); } -pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); } -pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); } -pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); } -pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); } -pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); } -pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); } -pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); } -pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); } -pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); } -pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); } -pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); } -pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); } -pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x81, label_idx); } -pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8b, label_idx); } -pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x89, label_idx); } -pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); } -pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x80, label_idx); } -pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); } -pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); } -pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8B, label_idx); } -pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x88, label_idx); } -pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); } -pub fn jmp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0xFF, 0xE9, label_idx); } +pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x87, label_idx); } +pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); } +pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); } +pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x86, label_idx); } +pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); } +pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x84, label_idx); } +pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8F, label_idx); } +pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8D, label_idx); } +pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8C, label_idx); } +pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8E, label_idx); } +pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x86, label_idx); } +pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); } +pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); } +pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x87, label_idx); } +pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); } +pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x85, label_idx); } +pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8E, label_idx); } +pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8C, label_idx); } +pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8D, label_idx); } +pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8F, label_idx); } +pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x81, label_idx); } +pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8b, label_idx); } +pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x89, label_idx); } +pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x85, label_idx); } +pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x80, label_idx); } +pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8A, label_idx); } +pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8A, label_idx); } +pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8B, label_idx); } +pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x88, label_idx); } +pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x84, label_idx); } + +pub fn jmp_label(cb: &mut CodeBlock, label_idx: usize) { + cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| { + cb.write_byte(0xE9); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); +} /// Encode a relative jump to a pointer at a 32-bit offset (direct or conditional) fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) { diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 4e4c553c9d..061d21d19b 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -7,26 +7,51 @@ use crate::asm::arm64::*; use crate::codegen::{JITState}; use crate::cruby::*; use crate::backend::ir::*; +use crate::virtualmem::CodePtr; // Use the arm64 register type for this platform pub type Reg = A64Reg; // Callee-saved registers -pub const _CFP: Opnd = Opnd::Reg(X9); -pub const _EC: Opnd = Opnd::Reg(X10); -pub const _SP: Opnd = Opnd::Reg(X11); +pub const _CFP: Opnd = Opnd::Reg(X24_REG); +pub const _EC: Opnd = Opnd::Reg(X25_REG); +pub const _SP: Opnd = Opnd::Reg(X26_REG); + +// C argument registers on this platform +pub const _C_ARG_OPNDS: [Opnd; 6] = [ + Opnd::Reg(X0_REG), + Opnd::Reg(X1_REG), + Opnd::Reg(X2_REG), + Opnd::Reg(X3_REG), + Opnd::Reg(X4_REG), + Opnd::Reg(X5_REG) +]; // C return value register on this platform -pub const RET_REG: Reg = X0; +pub const C_RET_REG: Reg = X0_REG; +pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); + +// These constants define the way we work with Arm64's stack pointer. The stack +// pointer always needs to be aligned to a 16-byte boundary. +pub const C_SP_REG: A64Opnd = X31; +pub const C_SP_STEP: A64Opnd = A64Opnd::UImm(16); /// Map Opnd to A64Opnd impl From<Opnd> for A64Opnd { fn from(opnd: Opnd) -> Self { match opnd { - Opnd::UImm(val) => uimm_opnd(val), - Opnd::Imm(val) => imm_opnd(val), + Opnd::UImm(value) => A64Opnd::new_uimm(value), + Opnd::Imm(value) => A64Opnd::new_imm(value), Opnd::Reg(reg) => A64Opnd::Reg(reg), - _ => panic!("unsupported arm64 operand type") + Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => { + A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp) + }, + Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => { + panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base") + }, + Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"), + Opnd::None => panic!("attempted to lower an Opnd::None"), + Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), } } } @@ -43,39 +68,368 @@ impl Assembler } /// Split platform-specific instructions + /// The transformations done here are meant to make our lives simpler in later + /// stages of the compilation pipeline. + /// Here we may want to make sure that all instructions (except load and store) + /// have no memory operands. fn arm64_split(mut self) -> Assembler { - // The transformations done here are meant to make our lives simpler in later - // stages of the compilation pipeline. - // Here we may want to make sure that all instructions (except load and store) - // have no memory operands. + self.forward_pass(|asm, index, op, opnds, target| { + match op { + Op::Add | Op::Sub => { + // Check if one of the operands is a register. If it is, + // then we'll make that the first operand. + match (opnds[0], opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + let opnd0 = asm.load(opnds[0]); + let opnd1 = asm.load(opnds[1]); + asm.push_insn(op, vec![opnd0, opnd1], target); + }, + (mem_opnd @ Opnd::Mem(_), other_opnd) | + (other_opnd, mem_opnd @ Opnd::Mem(_)) => { + let opnd0 = asm.load(mem_opnd); + asm.push_insn(op, vec![opnd0, other_opnd], target); + }, + _ => { + asm.push_insn(op, opnds, target); + } + } + }, + Op::IncrCounter => { + // Every operand to the IncrCounter instruction need to be a + // register once it gets there. So here we're going to load + // anything that isn't a register first. + let new_opnds: Vec<Opnd> = opnds.into_iter().map(|opnd| { + match opnd { + Opnd::Mem(_) | Opnd::Imm(_) | Opnd::UImm(_) => asm.load(opnd), + _ => opnd, + } + }).collect(); + + asm.incr_counter(new_opnds[0], new_opnds[1]); + }, + Op::Mov => { + // The value that is being moved must be either a register + // or an immediate that can be encoded as a bitmask + // immediate. Otherwise, we'll need to split the move into + // multiple instructions. + let value = match opnds[1] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], + Opnd::Mem(_) | Opnd::Imm(_) => asm.load(opnds[1]), + Opnd::UImm(uimm) => { + if let Ok(encoded) = BitmaskImmediate::try_from(uimm) { + opnds[1] + } else { + asm.load(opnds[1]) + } + }, + _ => unreachable!() + }; + + /// If we're attempting to load into a memory operand, then + /// we'll switch over to the store instruction. Otherwise + /// we'll use the normal mov instruction. + match opnds[0] { + Opnd::Mem(_) => asm.store(opnds[0], value), + _ => asm.mov(opnds[0], value) + }; + }, + Op::Not => { + // The value that is being negated must be in a register, so + // if we get anything else we need to load it first. + let opnd0 = match opnds[0] { + Opnd::Mem(_) => asm.load(opnds[0]), + _ => opnds[0] + }; + + asm.not(opnd0); + }, + Op::Store => { + // The value being stored must be in a register, so if it's + // not already one we'll load it first. + let opnd1 = match opnds[1] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], + _ => asm.load(opnds[1]) + }; - todo!(); + asm.store(opnds[0], opnd1); + }, + _ => { + asm.push_insn(op, opnds, target); + } + }; + }) } /// Emit platform-specific machine code /// Returns a list of GC offsets pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32> { - // NOTE: dear Kevin, - // for arm, you may want to reserve 1 or 2 caller-save registers - // to use as scracth registers (during the last phase of the codegen) - // These registers will not be allocated to anything by the register - // allocator, they're just useful because arm is slightly trickier - // than x86 to generate code for. - // For example, if you want to jump far away, you may want to store - // the jump target address in a register first. - - todo!(); + /// Emit a conditional jump instruction to a specific target. This is + /// called when lowering any of the conditional jump instructions. + fn emit_conditional_jump(cb: &mut CodeBlock, condition: Condition, target: Target) { + match target { + Target::CodePtr(dst_ptr) => { + let src_addr = cb.get_write_ptr().into_i64() + 4; + let dst_addr = dst_ptr.into_i64(); + let offset = dst_addr - src_addr; + + // If the jump offset fits into the conditional jump as an + // immediate value and it's properly aligned, then we can + // use the b.cond instruction directly. Otherwise, we need + // to load the address into a register and use the branch + // register instruction. + if bcond_offset_fits_bits(offset) { + bcond(cb, condition, A64Opnd::new_imm(dst_addr - src_addr)); + } else { + // If the condition is met, then we'll skip past the + // next instruction, put the address in a register, and + // jump to it. + bcond(cb, condition, A64Opnd::new_imm(4)); + + // If the offset fits into a direct jump, then we'll use + // that and the number of instructions will be shorter. + // Otherwise we'll use the branch register instruction. + if b_offset_fits_bits(offset) { + // If we get to this instruction, then the condition + // wasn't met, in which case we'll jump past the + // next instruction that performs the direct jump. + b(cb, A64Opnd::new_imm(4)); + + // Here we'll perform the direct jump to the target. + b(cb, A64Opnd::new_imm(offset / 4)); + } else { + // If we get to this instruction, then the condition + // wasn't met, in which case we'll jump past the + // next instruction that perform the direct jump. + b(cb, A64Opnd::new_imm(8)); + mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + br(cb, X29); + } + } + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for ourselves and + // then come back and write the instruction once we know the + // offset. We're going to assume we can fit into a single + // b.cond instruction. It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + bcond(cb, condition, A64Opnd::new_imm(dst_addr - src_addr)); + }); + }, + Target::FunPtr(_) => unreachable!() + }; + } + + // dbg!(&self.insns); + + // List of GC offsets + let mut gc_offsets: Vec<u32> = Vec::new(); + + // For each instruction + for insn in &self.insns { + match insn.op { + Op::Comment => { + if cfg!(feature = "asm_comments") { + cb.add_comment(&insn.text.as_ref().unwrap()); + } + }, + Op::Label => { + cb.write_label(insn.target.unwrap().unwrap_label_idx()); + }, + Op::Add => { + add(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Sub => { + sub(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::And => { + and(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Not => { + mvn(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::Store => { + // This order may be surprising but it is correct. The way + // the Arm64 assembler works, the register that is going to + // be stored is first and the address is second. However in + // our IR we have the address first and the register second. + stur(cb, insn.opnds[1].into(), insn.opnds[0].into()); + }, + Op::Load => { + mov(cb, insn.out.into(), insn.opnds[0].into()); + + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value being + // loaded is a heap object, we'll report that back out to + // the gc_offsets list. + if let Opnd::Value(val) = insn.opnds[0] { + if !val.special_const_p() { + // The pointer immediate is encoded as the last part of the mov written out + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + gc_offsets.push(ptr_offset); + } + } + }, + Op::Mov => { + mov(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Lea => { + ldur(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::CPush => { + add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + mov(cb, A64Opnd::new_mem(64, C_SP_REG, 0), insn.opnds[0].into()); + }, + Op::CPop => { + mov(cb, insn.out.into(), A64Opnd::new_mem(64, C_SP_REG, 0)); + sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + }, + Op::CCall => { + // Temporary + assert!(insn.opnds.len() < C_ARG_REGS.len()); + + // For each operand + for (idx, opnd) in insn.opnds.iter().enumerate() { + mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into()); + } + + let src_addr = cb.get_write_ptr().into_i64() + 4; + let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64; + + // The offset between the two instructions in bytes. Note + // that when we encode this into a bl instruction, we'll + // divide by 4 because it accepts the number of instructions + // to jump over. + let offset = dst_addr - src_addr; + + // If the offset is short enough, then we'll use the branch + // link instruction. Otherwise, we'll move the destination + // and return address into appropriate registers and use the + // branch register instruction. + if b_offset_fits_bits(offset) { + bl(cb, A64Opnd::new_imm(offset / 4)); + } else { + mov(cb, X30, A64Opnd::new_uimm(src_addr as u64)); + mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + br(cb, X29); + } + }, + Op::CRet => { + // TODO: bias allocation towards return register + if insn.opnds[0] != Opnd::Reg(C_RET_REG) { + mov(cb, C_RET_OPND.into(), insn.opnds[0].into()); + } + + ret(cb, A64Opnd::None); + }, + Op::Cmp => { + cmp(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Test => { + tst(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::JmpOpnd => { + br(cb, insn.opnds[0].into()); + }, + Op::Jmp => { + match insn.target.unwrap() { + Target::CodePtr(dst_ptr) => { + let src_addr = cb.get_write_ptr().into_i64() + 4; + let dst_addr = dst_ptr.into_i64(); + + // The offset between the two instructions in bytes. + // Note that when we encode this into a b + // instruction, we'll divide by 4 because it accepts + // the number of instructions to jump over. + let offset = dst_addr - src_addr; + + // If the offset is short enough, then we'll use the + // branch instruction. Otherwise, we'll move the + // destination into a register and use the branch + // register instruction. + if b_offset_fits_bits(offset) { + b(cb, A64Opnd::new_imm(offset / 4)); + } else { + mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + br(cb, X29); + } + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for + // ourselves and then come back and write the + // instruction once we know the offset. We're going + // to assume we can fit into a single b instruction. + // It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + b(cb, A64Opnd::new_imm((dst_addr - src_addr) / 4)); + }); + }, + _ => unreachable!() + }; + }, + Op::Je => { + emit_conditional_jump(cb, Condition::EQ, insn.target.unwrap()); + }, + Op::Jbe => { + emit_conditional_jump(cb, Condition::LS, insn.target.unwrap()); + }, + Op::Jz => { + emit_conditional_jump(cb, Condition::EQ, insn.target.unwrap()); + }, + Op::Jnz => { + emit_conditional_jump(cb, Condition::NE, insn.target.unwrap()); + }, + Op::Jo => { + emit_conditional_jump(cb, Condition::VS, insn.target.unwrap()); + }, + Op::IncrCounter => { + ldaddal(cb, insn.opnds[0].into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Breakpoint => { + brk(cb, A64Opnd::None); + } + }; + } + + gc_offsets } /// Optimize and compile the stored instructions - pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) + pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32> { - self - .arm64_split() - .split_loads() - .alloc_regs(regs) - .arm64_emit(jit, cb) + let mut asm = self.arm64_split().split_loads().alloc_regs(regs); + + // Create label instances in the code block + for (idx, name) in asm.label_names.iter().enumerate() { + let label_idx = cb.new_label(name.to_string()); + assert!(label_idx == idx); + } + + let gc_offsets = asm.arm64_emit(cb); + cb.link_labels(); + + gc_offsets + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn setup_asm() -> (Assembler, CodeBlock) { + (Assembler::new(), CodeBlock::new_dummy(1024)) + } + + #[test] + fn test_emit_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_regs(&mut cb, vec![X3_REG]); + + let insns = cb.get_ptr(0).raw_ptr() as *const u32; + assert_eq!(0x8b010003, unsafe { *insns }); } } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 4e8ed0b8a4..bdefe1c6bc 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -276,6 +276,13 @@ pub enum Target impl Target { + pub fn unwrap_fun_ptr(&self) -> *const u8 { + match self { + Target::FunPtr(ptr) => *ptr, + _ => unreachable!("trying to unwrap {:?} into fun ptr", self) + } + } + pub fn unwrap_label_idx(&self) -> usize { match self { Target::Label(idx) => *idx, diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs index 0841c9ffa5..4794695094 100644 --- a/yjit/src/backend/mod.rs +++ b/yjit/src/backend/mod.rs @@ -1,3 +1,8 @@ +#[cfg(target_arch = "x86_64")] pub mod x86_64; + +#[cfg(target_arch = "aarch64")] +pub mod arm64; + pub mod ir; -mod tests;
\ No newline at end of file +mod tests; diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index f4e0d4f53a..19b5096a26 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -186,6 +186,9 @@ impl Assembler for (idx, opnd) in insn.opnds.iter().enumerate() { mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into()); } + + let ptr = insn.target.unwrap().unwrap_fun_ptr(); + call_ptr(cb, RAX, ptr); }, Op::CRet => { |