summaryrefslogtreecommitdiff
path: root/compiler/GHC
diff options
context:
space:
mode:
authorPeter Trommler <ptrommler@acm.org>2021-04-17 17:59:44 +0200
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-08-02 04:11:27 -0400
commitb4d39adbb5884c764c6c11b2614a340c78cc078e (patch)
tree57eb45d9078c90c34f8743b961bf87789e292ae8 /compiler/GHC
parent7e8c578ed9d3469d6a5c1481f9482982c42f10ea (diff)
downloadhaskell-b4d39adbb5884c764c6c11b2614a340c78cc078e.tar.gz
PrimOps: Add CAS op for all int sizes
PPC NCG: Implement CAS inline for 32 and 64 bit testsuite: Add tests for smaller atomic CAS X86 NCG: Catch calls to CAS C fallback Primops: Add atomicCasWord[8|16|32|64]Addr# Add tests for atomicCasWord[8|16|32|64]Addr# Add changelog entry for new primops X86 NCG: Fix MO-Cmpxchg W64 on 32-bit arch ghc-prim: 64-bit CAS C fallback on all archs
Diffstat (limited to 'compiler/GHC')
-rw-r--r--compiler/GHC/Builtin/primops.txt.pp96
-rw-r--r--compiler/GHC/CmmToAsm/PPC/CodeGen.hs33
-rw-r--r--compiler/GHC/CmmToAsm/X86/CodeGen.hs7
-rw-r--r--compiler/GHC/StgToCmm/Prim.hs18
4 files changed, 150 insertions, 4 deletions
diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp
index 5f5cd64cfa..b07c344e18 100644
--- a/compiler/GHC/Builtin/primops.txt.pp
+++ b/compiler/GHC/Builtin/primops.txt.pp
@@ -1927,6 +1927,46 @@ primop CasByteArrayOp_Int "casIntArray#" GenPrimOp
with has_side_effects = True
can_fail = True
+primop CasByteArrayOp_Int8 "casInt8Array#" GenPrimOp
+ MutableByteArray# s -> Int# -> Int8# -> Int8# -> State# s -> (# State# s, Int8# #)
+ {Given an array, an offset in bytes, the expected old value, and
+ the new value, perform an atomic compare and swap i.e. write the new
+ value if the current value matches the provided old value. Returns
+ the value of the element before the operation. Implies a full memory
+ barrier.}
+ with has_side_effects = True
+ can_fail = True
+
+primop CasByteArrayOp_Int16 "casInt16Array#" GenPrimOp
+ MutableByteArray# s -> Int# -> Int16# -> Int16# -> State# s -> (# State# s, Int16# #)
+ {Given an array, an offset in 16 bit units, the expected old value, and
+ the new value, perform an atomic compare and swap i.e. write the new
+ value if the current value matches the provided old value. Returns
+ the value of the element before the operation. Implies a full memory
+ barrier.}
+ with has_side_effects = True
+ can_fail = True
+
+primop CasByteArrayOp_Int32 "casInt32Array#" GenPrimOp
+ MutableByteArray# s -> Int# -> Int32# -> Int32# -> State# s -> (# State# s, Int32# #)
+ {Given an array, an offset in 32 bit units, the expected old value, and
+ the new value, perform an atomic compare and swap i.e. write the new
+ value if the current value matches the provided old value. Returns
+ the value of the element before the operation. Implies a full memory
+ barrier.}
+ with has_side_effects = True
+ can_fail = True
+
+primop CasByteArrayOp_Int64 "casInt64Array#" GenPrimOp
+ MutableByteArray# s -> Int# -> INT64 -> INT64 -> State# s -> (# State# s, INT64 #)
+ {Given an array, an offset in 64 bit units, the expected old value, and
+ the new value, perform an atomic compare and swap i.e. write the new
+ value if the current value matches the provided old value. Returns
+ the value of the element before the operation. Implies a full memory
+ barrier.}
+ with has_side_effects = True
+ can_fail = True
+
primop FetchAddByteArrayOp_Int "fetchAddIntArray#" GenPrimOp
MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
{Given an array, and offset in machine words, and a value to add,
@@ -2387,6 +2427,62 @@ primop CasAddrOp_Word "atomicCasWordAddr#" GenPrimOp
with has_side_effects = True
can_fail = True
+primop CasAddrOp_Word8 "atomicCasWord8Addr#" GenPrimOp
+ Addr# -> Word8# -> Word8# -> State# s -> (# State# s, Word8# #)
+ { Compare and swap on a 8 bit-sized and aligned memory location.
+
+ Use as: \s -> atomicCasWordAddr8# location expected desired s
+
+ This version always returns the old value read. This follows the normal
+ protocol for CAS operations (and matches the underlying instruction on
+ most architectures).
+
+ Implies a full memory barrier.}
+ with has_side_effects = True
+ can_fail = True
+
+primop CasAddrOp_Word16 "atomicCasWord16Addr#" GenPrimOp
+ Addr# -> Word16# -> Word16# -> State# s -> (# State# s, Word16# #)
+ { Compare and swap on a 16 bit-sized and aligned memory location.
+
+ Use as: \s -> atomicCasWordAddr16# location expected desired s
+
+ This version always returns the old value read. This follows the normal
+ protocol for CAS operations (and matches the underlying instruction on
+ most architectures).
+
+ Implies a full memory barrier.}
+ with has_side_effects = True
+ can_fail = True
+
+primop CasAddrOp_Word32 "atomicCasWord32Addr#" GenPrimOp
+ Addr# -> Word32# -> Word32# -> State# s -> (# State# s, Word32# #)
+ { Compare and swap on a 32 bit-sized and aligned memory location.
+
+ Use as: \s -> atomicCasWordAddr32# location expected desired s
+
+ This version always returns the old value read. This follows the normal
+ protocol for CAS operations (and matches the underlying instruction on
+ most architectures).
+
+ Implies a full memory barrier.}
+ with has_side_effects = True
+ can_fail = True
+
+primop CasAddrOp_Word64 "atomicCasWord64Addr#" GenPrimOp
+ Addr# -> WORD64 -> WORD64 -> State# s -> (# State# s, WORD64 #)
+ { Compare and swap on a 64 bit-sized and aligned memory location.
+
+ Use as: \s -> atomicCasWordAddr64# location expected desired s
+
+ This version always returns the old value read. This follows the normal
+ protocol for CAS operations (and matches the underlying instruction on
+ most architectures).
+
+ Implies a full memory barrier.}
+ with has_side_effects = True
+ can_fail = True
+
primop FetchAddAddrOp_Word "fetchAddWordAddr#" GenPrimOp
Addr# -> Word# -> State# s -> (# State# s, Word# #)
{Given an address, and a value to add,
diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
index 67bc3d9bdb..1c3b244980 100644
--- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
@@ -1221,7 +1221,38 @@ genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
code <- assignMem_IntCode (intFormat width) addr val
- return $ unitOL(HWSYNC) `appOL` code
+ return $ unitOL HWSYNC `appOL` code
+
+genCCall (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new]
+ | width == W32 || width == W64
+ = do
+ platform <- getPlatform
+ (old_reg, old_code) <- getSomeReg old
+ (new_reg, new_code) <- getSomeReg new
+ (addr_reg, addr_code) <- getSomeReg addr
+ lbl_retry <- getBlockIdNat
+ lbl_eq <- getBlockIdNat
+ lbl_end <- getBlockIdNat
+ let reg_dst = getRegisterReg platform (CmmLocal dst)
+ code = toOL
+ [ HWSYNC
+ , BCC ALWAYS lbl_retry Nothing
+ , NEWBLOCK lbl_retry
+ , LDR format reg_dst (AddrRegReg r0 addr_reg)
+ , CMP format reg_dst (RIReg old_reg)
+ , BCC NE lbl_end Nothing
+ , BCC ALWAYS lbl_eq Nothing
+ , NEWBLOCK lbl_eq
+ , STC format new_reg (AddrRegReg r0 addr_reg)
+ , BCC NE lbl_retry Nothing
+ , BCC ALWAYS lbl_end Nothing
+ , NEWBLOCK lbl_end
+ , ISYNC
+ ]
+ return $ addr_code `appOL` new_code `appOL` old_code `appOL` code
+ where
+ format = intFormat width
+
genCCall (PrimTarget (MO_Clz width)) [dst] [src]
= do platform <- getPlatform
diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
index 5e7c261cbb..1ab24c4a25 100644
--- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
@@ -2595,10 +2595,11 @@ genCCall' _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] _ = do
code <- assignMem_IntCode (intFormat width) addr val
return $ code `snocOL` MFENCE
-genCCall' _ is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ = do
+genCCall' _ is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _
-- On x86 we don't have enough registers to use cmpxchg with a
-- complicated addressing mode, so on that architecture we
-- pre-compute the address first.
+ | not (is32Bit && width == W64) = do
Amode amode addr_code <- getSimpleAmode is32Bit addr
newval <- getNewRegNat format
newval_code <- getAnyReg new
@@ -3441,7 +3442,9 @@ outOfLineCmmOp bid mop res args
MO_AtomicRMW _ _ -> fsLit "atomicrmw"
MO_AtomicRead _ -> fsLit "atomicread"
MO_AtomicWrite _ -> fsLit "atomicwrite"
- MO_Cmpxchg _ -> fsLit "cmpxchg"
+ MO_Cmpxchg w -> cmpxchgLabel w -- for W64 on 32-bit
+ -- TODO: implement
+ -- cmpxchg8b instr
MO_Xchg _ -> should_be_inline
MO_UF_Conv _ -> unsupported
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index d61880a0e2..290ace9f01 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -872,6 +872,14 @@ emitPrimOp dflags primop = case primop of
emitPrimCall [res] (MO_Cmpxchg (wordWidth platform)) [dst, expected, new]
CasAddrOp_Word -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
emitPrimCall [res] (MO_Cmpxchg (wordWidth platform)) [dst, expected, new]
+ CasAddrOp_Word8 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
+ emitPrimCall [res] (MO_Cmpxchg W8) [dst, expected, new]
+ CasAddrOp_Word16 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
+ emitPrimCall [res] (MO_Cmpxchg W16) [dst, expected, new]
+ CasAddrOp_Word32 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
+ emitPrimCall [res] (MO_Cmpxchg W32) [dst, expected, new]
+ CasAddrOp_Word64 -> \[dst, expected, new] -> opIntoRegs $ \[res] ->
+ emitPrimCall [res] (MO_Cmpxchg W64) [dst, expected, new]
-- SIMD primops
(VecBroadcastOp vcat n w) -> \[e] -> opIntoRegs $ \[res] -> do
@@ -1075,6 +1083,14 @@ emitPrimOp dflags primop = case primop of
doAtomicWriteByteArray mba ix (bWord platform) val
CasByteArrayOp_Int -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
doCasByteArray res mba ix (bWord platform) old new
+ CasByteArrayOp_Int8 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
+ doCasByteArray res mba ix b8 old new
+ CasByteArrayOp_Int16 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
+ doCasByteArray res mba ix b16 old new
+ CasByteArrayOp_Int32 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
+ doCasByteArray res mba ix b32 old new
+ CasByteArrayOp_Int64 -> \[mba, ix, old, new] -> opIntoRegs $ \[res] ->
+ doCasByteArray res mba ix b64 old new
-- The rest just translate straightforwardly
@@ -3092,7 +3108,7 @@ doCasByteArray
doCasByteArray res mba idx idx_ty old new = do
profile <- getProfile
platform <- getPlatform
- let width = (typeWidth idx_ty)
+ let width = typeWidth idx_ty
addr = cmmIndexOffExpr platform (arrWordsHdrSize profile)
width mba idx
emitPrimCall