diff options
author | Ben Gamari <ben@smart-cactus.org> | 2023-01-29 08:43:08 -0500 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2023-02-14 11:30:27 -0500 |
commit | 759304244f15992a098a924ebd93f295971da422 (patch) | |
tree | 6f40f077494dc7fff6fa9805eead5d842e6dd7bf | |
parent | 11de324aae17794c8753a8f7cb50c4140785fe27 (diff) | |
download | haskell-759304244f15992a098a924ebd93f295971da422.tar.gz |
nativeGen/AArch64: Emit Atomic{Read,Write} inline
Previously the AtomicRead and AtomicWrite operations were emitted as
out-of-line calls. However, these tend to be very important for
performance, especially the RELAXED case (which only exists for
ThreadSanitizer checking).
Fixes #22115.
-rw-r--r-- | compiler/GHC/CmmToAsm/AArch64/CodeGen.hs | 29 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/AArch64/Instr.hs | 8 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/AArch64/Ppr.hs | 2 |
3 files changed, 37 insertions, 2 deletions
diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs index 2d6a6d7c05..8f8864c516 100644 --- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs @@ -1532,9 +1532,34 @@ genCCall target dest_regs arg_regs bid = do MO_BRev w -> mkCCall (bRevLabel w) -- -- Atomic read-modify-write. + MO_AtomicRead w ord + | [p_reg] <- arg_regs + , [dst_reg] <- dest_regs -> do + (p, _fmt_p, code_p) <- getSomeReg p_reg + platform <- getPlatform + let instr = case ord of + MemOrderRelaxed -> LDR + _ -> LDAR + dst = getRegisterReg platform (CmmLocal dst_reg) + code = + code_p `snocOL` + instr (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p) + return (code, Nothing) + | otherwise -> panic "mal-formed AtomicRead" + MO_AtomicWrite w ord + | [p_reg, val_reg] <- arg_regs -> do + (p, _fmt_p, code_p) <- getSomeReg p_reg + (val, fmt_val, code_val) <- getSomeReg val_reg + let instr = case ord of + MemOrderRelaxed -> STR + _ -> STLR + code = + code_p `appOL` + code_val `snocOL` + instr fmt_val (OpReg w val) (OpAddr $ AddrReg p) + return (code, Nothing) + | otherwise -> panic "mal-formed AtomicWrite" MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop) - MO_AtomicRead w _ -> mkCCall (atomicReadLabel w) - MO_AtomicWrite w _ -> mkCCall (atomicWriteLabel w) MO_Cmpxchg w -> mkCCall (cmpxchgLabel w) -- -- Should be an AtomicRMW variant eventually. -- -- Sequential consistent. diff --git a/compiler/GHC/CmmToAsm/AArch64/Instr.hs b/compiler/GHC/CmmToAsm/AArch64/Instr.hs index d9bec45439..ce28163477 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Instr.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Instr.hs @@ -127,7 +127,9 @@ regUsageOfInstr platform instr = case instr of CBNZ src _ -> usage (regOp src, []) -- 7. Load and Store Instructions -------------------------------------------- STR _ src dst -> usage (regOp src ++ regOp dst, []) + STLR _ src dst -> usage (regOp src ++ regOp dst, []) LDR _ dst src -> usage (regOp src, regOp dst) + LDAR _ dst src -> usage (regOp src, regOp dst) -- TODO is this right? see STR, which I'm only partial about being right? STP _ src1 src2 dst -> usage (regOp src1 ++ regOp src2 ++ regOp dst, []) LDP _ dst1 dst2 src -> usage (regOp src, regOp dst1 ++ regOp dst2) @@ -263,7 +265,9 @@ patchRegsOfInstr instr env = case instr of CBNZ o l -> CBNZ (patchOp o) l -- 7. Load and Store Instructions ------------------------------------------ STR f o1 o2 -> STR f (patchOp o1) (patchOp o2) + STLR f o1 o2 -> STLR f (patchOp o1) (patchOp o2) LDR f o1 o2 -> LDR f (patchOp o1) (patchOp o2) + LDAR f o1 o2 -> LDAR f (patchOp o1) (patchOp o2) STP f o1 o2 o3 -> STP f (patchOp o1) (patchOp o2) (patchOp o3) LDP f o1 o2 o3 -> LDP f (patchOp o1) (patchOp o2) (patchOp o3) @@ -616,7 +620,9 @@ data Instr -- Load and stores. -- TODO STR/LDR might want to change to STP/LDP with XZR for the second register. | STR Format Operand Operand -- str Xn, address-mode // Xn -> *addr + | STLR Format Operand Operand -- stlr Xn, address-mode // Xn -> *addr | LDR Format Operand Operand -- ldr Xn, address-mode // Xn <- *addr + | LDAR Format Operand Operand -- ldar Xn, address-mode // Xn <- *addr | STP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn -> *addr, Xm -> *(addr + 8) | LDP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn <- *addr, Xm <- *(addr + 8) @@ -691,7 +697,9 @@ instrCon i = ROR{} -> "ROR" TST{} -> "TST" STR{} -> "STR" + STLR{} -> "STLR" LDR{} -> "LDR" + LDAR{} -> "LDAR" STP{} -> "STP" LDP{} -> "LDP" CSET{} -> "CSET" diff --git a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs index e34dcfeae9..475324afce 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs @@ -469,6 +469,7 @@ pprInstr platform instr = case instr of STR _f o1@(OpReg W16 (RegReal (RealRegSingle i))) o2 | i < 32 -> op2 (text "\tstrh") o1 o2 STR _f o1 o2 -> op2 (text "\tstr") o1 o2 + STLR _f o1 o2 -> op2 (text "\tstlr") o1 o2 #if defined(darwin_HOST_OS) LDR _f o1 (OpImm (ImmIndex lbl' off)) | Just (_info, lbl) <- dynamicLinkerLabelInfo lbl' -> @@ -533,6 +534,7 @@ pprInstr platform instr = case instr of LDR _f o1@(OpReg W16 (RegReal (RealRegSingle i))) o2 | i < 32 -> op2 (text "\tldrh") o1 o2 LDR _f o1 o2 -> op2 (text "\tldr") o1 o2 + LDAR _f o1 o2 -> op2 (text "\tldar") o1 o2 STP _f o1 o2 o3 -> op3 (text "\tstp") o1 o2 o3 LDP _f o1 o2 o3 -> op3 (text "\tldp") o1 o2 o3 |