summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2023-01-29 08:43:08 -0500
committerBen Gamari <ben@smart-cactus.org>2023-02-09 09:50:55 -0500
commit6e52fcb915baa1acb38ad2b1f313e8e6a89899f5 (patch)
tree89dc020ec41042b4199468f875d765ab37685783
parenta9912de75400b7006fc2eb6cb31c9bf5ae6dacd6 (diff)
downloadhaskell-wip/T22115.tar.gz
nativeGen/AArch64: Emit Atomic{Read,Write} inlinewip/T22115
Previously the AtomicRead and AtomicWrite operations were emitted as out-of-line calls. However, these tend to be very important for performance, especially the RELAXED case (which only exists for ThreadSanitizer checking). Fixes #22115.
-rw-r--r--compiler/GHC/CmmToAsm/AArch64/CodeGen.hs29
-rw-r--r--compiler/GHC/CmmToAsm/AArch64/Instr.hs8
-rw-r--r--compiler/GHC/CmmToAsm/AArch64/Ppr.hs2
3 files changed, 37 insertions, 2 deletions
diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
index 2d6a6d7c05..8f8864c516 100644
--- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
@@ -1532,9 +1532,34 @@ genCCall target dest_regs arg_regs bid = do
MO_BRev w -> mkCCall (bRevLabel w)
-- -- Atomic read-modify-write.
+ MO_AtomicRead w ord
+ | [p_reg] <- arg_regs
+ , [dst_reg] <- dest_regs -> do
+ (p, _fmt_p, code_p) <- getSomeReg p_reg
+ platform <- getPlatform
+ let instr = case ord of
+ MemOrderRelaxed -> LDR
+ _ -> LDAR
+ dst = getRegisterReg platform (CmmLocal dst_reg)
+ code =
+ code_p `snocOL`
+ instr (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)
+ return (code, Nothing)
+ | otherwise -> panic "mal-formed AtomicRead"
+ MO_AtomicWrite w ord
+ | [p_reg, val_reg] <- arg_regs -> do
+ (p, _fmt_p, code_p) <- getSomeReg p_reg
+ (val, fmt_val, code_val) <- getSomeReg val_reg
+ let instr = case ord of
+ MemOrderRelaxed -> STR
+ _ -> STLR
+ code =
+ code_p `appOL`
+ code_val `snocOL`
+ instr fmt_val (OpReg w val) (OpAddr $ AddrReg p)
+ return (code, Nothing)
+ | otherwise -> panic "mal-formed AtomicWrite"
MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop)
- MO_AtomicRead w _ -> mkCCall (atomicReadLabel w)
- MO_AtomicWrite w _ -> mkCCall (atomicWriteLabel w)
MO_Cmpxchg w -> mkCCall (cmpxchgLabel w)
-- -- Should be an AtomicRMW variant eventually.
-- -- Sequential consistent.
diff --git a/compiler/GHC/CmmToAsm/AArch64/Instr.hs b/compiler/GHC/CmmToAsm/AArch64/Instr.hs
index d9bec45439..ce28163477 100644
--- a/compiler/GHC/CmmToAsm/AArch64/Instr.hs
+++ b/compiler/GHC/CmmToAsm/AArch64/Instr.hs
@@ -127,7 +127,9 @@ regUsageOfInstr platform instr = case instr of
CBNZ src _ -> usage (regOp src, [])
-- 7. Load and Store Instructions --------------------------------------------
STR _ src dst -> usage (regOp src ++ regOp dst, [])
+ STLR _ src dst -> usage (regOp src ++ regOp dst, [])
LDR _ dst src -> usage (regOp src, regOp dst)
+ LDAR _ dst src -> usage (regOp src, regOp dst)
-- TODO is this right? see STR, which I'm only partial about being right?
STP _ src1 src2 dst -> usage (regOp src1 ++ regOp src2 ++ regOp dst, [])
LDP _ dst1 dst2 src -> usage (regOp src, regOp dst1 ++ regOp dst2)
@@ -263,7 +265,9 @@ patchRegsOfInstr instr env = case instr of
CBNZ o l -> CBNZ (patchOp o) l
-- 7. Load and Store Instructions ------------------------------------------
STR f o1 o2 -> STR f (patchOp o1) (patchOp o2)
+ STLR f o1 o2 -> STLR f (patchOp o1) (patchOp o2)
LDR f o1 o2 -> LDR f (patchOp o1) (patchOp o2)
+ LDAR f o1 o2 -> LDAR f (patchOp o1) (patchOp o2)
STP f o1 o2 o3 -> STP f (patchOp o1) (patchOp o2) (patchOp o3)
LDP f o1 o2 o3 -> LDP f (patchOp o1) (patchOp o2) (patchOp o3)
@@ -616,7 +620,9 @@ data Instr
-- Load and stores.
-- TODO STR/LDR might want to change to STP/LDP with XZR for the second register.
| STR Format Operand Operand -- str Xn, address-mode // Xn -> *addr
+ | STLR Format Operand Operand -- stlr Xn, address-mode // Xn -> *addr
| LDR Format Operand Operand -- ldr Xn, address-mode // Xn <- *addr
+ | LDAR Format Operand Operand -- ldar Xn, address-mode // Xn <- *addr
| STP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn -> *addr, Xm -> *(addr + 8)
| LDP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn <- *addr, Xm <- *(addr + 8)
@@ -691,7 +697,9 @@ instrCon i =
ROR{} -> "ROR"
TST{} -> "TST"
STR{} -> "STR"
+ STLR{} -> "STLR"
LDR{} -> "LDR"
+ LDAR{} -> "LDAR"
STP{} -> "STP"
LDP{} -> "LDP"
CSET{} -> "CSET"
diff --git a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs
index e34dcfeae9..475324afce 100644
--- a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs
+++ b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs
@@ -469,6 +469,7 @@ pprInstr platform instr = case instr of
STR _f o1@(OpReg W16 (RegReal (RealRegSingle i))) o2 | i < 32 ->
op2 (text "\tstrh") o1 o2
STR _f o1 o2 -> op2 (text "\tstr") o1 o2
+ STLR _f o1 o2 -> op2 (text "\tstlr") o1 o2
#if defined(darwin_HOST_OS)
LDR _f o1 (OpImm (ImmIndex lbl' off)) | Just (_info, lbl) <- dynamicLinkerLabelInfo lbl' ->
@@ -533,6 +534,7 @@ pprInstr platform instr = case instr of
LDR _f o1@(OpReg W16 (RegReal (RealRegSingle i))) o2 | i < 32 ->
op2 (text "\tldrh") o1 o2
LDR _f o1 o2 -> op2 (text "\tldr") o1 o2
+ LDAR _f o1 o2 -> op2 (text "\tldar") o1 o2
STP _f o1 o2 o3 -> op3 (text "\tstp") o1 o2 o3
LDP _f o1 o2 o3 -> op3 (text "\tldp") o1 o2 o3