diff options
author | Ben Gamari <bgamari.foss@gmail.com> | 2015-10-02 15:48:55 +0200 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2015-10-02 15:51:09 +0200 |
commit | bd41eb2ad9507d3f408e25c8dece61f389f11a2a (patch) | |
tree | e5477baad577afef9f8a357837fe00c3b222aba3 /compiler | |
parent | b29f20edb1ca7f1763ceb001e2bb2d5f2f11bec3 (diff) | |
download | haskell-bd41eb2ad9507d3f408e25c8dece61f389f11a2a.tar.gz |
LLVM: Implement atomic operations in terms of LLVM primitives
This fixes Trac #7883.
This adds proper support for,
* `MO_AtomicRMW`
* `MO_AtomicWrite`
* `MO_CmpXChg`
Test Plan: Validate
Reviewers: rrnewton, austin
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D1282
GHC Trac Issues: #7883
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/llvmGen/Llvm.hs | 3 | ||||
-rw-r--r-- | compiler/llvmGen/Llvm/AbsSyn.hs | 42 | ||||
-rw-r--r-- | compiler/llvmGen/Llvm/PpLlvm.hs | 26 | ||||
-rw-r--r-- | compiler/llvmGen/Llvm/Types.hs | 2 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 68 |
5 files changed, 124 insertions, 17 deletions
diff --git a/compiler/llvmGen/Llvm.hs b/compiler/llvmGen/Llvm.hs index 85095997ae..b245422dbc 100644 --- a/compiler/llvmGen/Llvm.hs +++ b/compiler/llvmGen/Llvm.hs @@ -20,6 +20,9 @@ module Llvm ( LlvmBlocks, LlvmBlock(..), LlvmBlockId, LlvmParamAttr(..), LlvmParameter, + -- * Atomic operations + LlvmAtomicOp(..), + -- * Fence synchronization LlvmSyncOrdering(..), diff --git a/compiler/llvmGen/Llvm/AbsSyn.hs b/compiler/llvmGen/Llvm/AbsSyn.hs index 8a53df00fe..774e555170 100644 --- a/compiler/llvmGen/Llvm/AbsSyn.hs +++ b/compiler/llvmGen/Llvm/AbsSyn.hs @@ -87,6 +87,22 @@ data LlvmSyncOrdering | SyncSeqCst deriving (Show, Eq) +-- | LLVM atomic operations. Please see the @atomicrmw@ instruction in +-- the LLVM documentation for a complete description. +data LlvmAtomicOp + = LAO_Xchg + | LAO_Add + | LAO_Sub + | LAO_And + | LAO_Nand + | LAO_Or + | LAO_Xor + | LAO_Max + | LAO_Min + | LAO_Umax + | LAO_Umin + deriving (Show, Eq) + -- | Llvm Statements data LlvmStatement {- | @@ -250,8 +266,8 @@ data LlvmExpression | GetElemPtr Bool LlvmVar [LlvmVar] {- | - Cast the variable from to the to type. This is an abstraction of three - cast operators in Llvm, inttoptr, prttoint and bitcast. + Cast the variable from to the to type. This is an abstraction of three + cast operators in Llvm, inttoptr, prttoint and bitcast. * cast: Cast type * from: Variable to cast * to: type to cast to @@ -259,6 +275,28 @@ data LlvmExpression | Cast LlvmCastOp LlvmVar LlvmType {- | + Atomic read-modify-write operation + * op: Atomic operation + * addr: Address to modify + * operand: Operand to operation + * ordering: Ordering requirement + -} + | AtomicRMW LlvmAtomicOp LlvmVar LlvmVar LlvmSyncOrdering + + {- | + Compare-and-exchange operation + * addr: Address to modify + * old: Expected value + * new: New value + * suc_ord: Ordering required in success case + * fail_ord: Ordering required in failure case, can be no stronger than + suc_ord + + Result is an @i1@, true if store was successful. + -} + | CmpXChg LlvmVar LlvmVar LlvmVar LlvmSyncOrdering LlvmSyncOrdering + + {- | Call a function. The result is the value of the expression. * tailJumps: CallType to signal if the function should be tail called * fnptrval: An LLVM value containing a pointer to a function to be diff --git a/compiler/llvmGen/Llvm/PpLlvm.hs b/compiler/llvmGen/Llvm/PpLlvm.hs index db9ef1fccf..8476b9d585 100644 --- a/compiler/llvmGen/Llvm/PpLlvm.hs +++ b/compiler/llvmGen/Llvm/PpLlvm.hs @@ -245,6 +245,8 @@ ppLlvmExpression expr Load ptr -> ppLoad ptr ALoad ord st ptr -> ppALoad ord st ptr Malloc tp amount -> ppMalloc tp amount + AtomicRMW aop tgt src ordering -> ppAtomicRMW aop tgt src ordering + CmpXChg addr old new s_ord f_ord -> ppCmpXChg addr old new s_ord f_ord Phi tp precessors -> ppPhi tp precessors Asm asm c ty v se sk -> ppAsm asm c ty v se sk MExpr meta expr -> ppMetaExpr meta expr @@ -327,6 +329,30 @@ ppSyncOrdering SyncRelease = text "release" ppSyncOrdering SyncAcqRel = text "acq_rel" ppSyncOrdering SyncSeqCst = text "seq_cst" +ppAtomicOp :: LlvmAtomicOp -> SDoc +ppAtomicOp LAO_Xchg = text "xchg" +ppAtomicOp LAO_Add = text "add" +ppAtomicOp LAO_Sub = text "sub" +ppAtomicOp LAO_And = text "and" +ppAtomicOp LAO_Nand = text "nand" +ppAtomicOp LAO_Or = text "or" +ppAtomicOp LAO_Xor = text "xor" +ppAtomicOp LAO_Max = text "max" +ppAtomicOp LAO_Min = text "min" +ppAtomicOp LAO_Umax = text "umax" +ppAtomicOp LAO_Umin = text "umin" + +ppAtomicRMW :: LlvmAtomicOp -> LlvmVar -> LlvmVar -> LlvmSyncOrdering -> SDoc +ppAtomicRMW aop tgt src ordering = + text "atomicrmw" <+> ppAtomicOp aop <+> ppr tgt <> comma + <+> ppr src <+> ppSyncOrdering ordering + +ppCmpXChg :: LlvmVar -> LlvmVar -> LlvmVar + -> LlvmSyncOrdering -> LlvmSyncOrdering -> SDoc +ppCmpXChg addr old new s_ord f_ord = + text "cmpxchg" <+> ppr addr <> comma <+> ppr old <> comma <+> ppr new + <+> ppSyncOrdering s_ord <+> ppSyncOrdering f_ord + -- XXX: On x86, vector types need to be 16-byte aligned for aligned access, but -- we have no way of guaranteeing that this is true with GHC (we would need to -- modify the layout of the stack and closures, change the storage manager, diff --git a/compiler/llvmGen/Llvm/Types.hs b/compiler/llvmGen/Llvm/Types.hs index b3b173096b..9780bf39cf 100644 --- a/compiler/llvmGen/Llvm/Types.hs +++ b/compiler/llvmGen/Llvm/Types.hs @@ -262,7 +262,7 @@ pLift LMVoid = error "Voids are unliftable" pLift LMMetadata = error "Metadatas are unliftable" pLift x = LMPointer x --- | Lower a variable of 'LMPointer' type. +-- | Lift a variable to 'LMPointer' type. pVarLift :: LlvmVar -> LlvmVar pVarLift (LMGlobalVar s t l x a c) = LMGlobalVar s (pLift t) l x a c pVarLift (LMLocalVar s t ) = LMLocalVar s (pLift t) diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index ed046be891..6e516b8766 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -15,7 +15,6 @@ import BlockId import CodeGen.Platform ( activeStgRegs, callerSaves ) import CLabel import Cmm -import CPrim import PprCmm import CmmUtils import CmmSwitch @@ -226,16 +225,58 @@ genCall t@(PrimTarget (MO_Ctz w)) dsts args = genCall t@(PrimTarget (MO_BSwap w)) dsts args = genCallSimpleCast w t dsts args -genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do - dstV <- getCmmReg (CmmLocal dst) - (v1, stmts, top) <- genLoad True addr (localRegType dst) - let stmt1 = Store v1 dstV - return (stmts `snocOL` stmt1, top) +genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do + (addrVar, stmts1, decls1) <- exprToVar addr + (nVar, stmts2, decls2) <- exprToVar n + let targetTy = widthToLlvmInt width + ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy) + (ptrVar, stmt3) <- doExpr (pLift targetTy) ptrExpr + dstVar <- getCmmReg (CmmLocal dst) + let op = case amop of + AMO_Add -> LAO_Add + AMO_Sub -> LAO_Sub + AMO_And -> LAO_And + AMO_Nand -> LAO_Nand + AMO_Or -> LAO_Or + AMO_Xor -> LAO_Xor + (retVar, stmt4) <- doExpr targetTy $ AtomicRMW op ptrVar nVar SyncSeqCst + let stmt5 = Store retVar dstVar + let stmts = stmts1 `appOL` stmts2 `snocOL` + stmt3 `snocOL` stmt4 `snocOL` stmt5 + return (stmts, decls1++decls2) --- TODO: implement these properly rather than calling to RTS functions. --- genCall t@(PrimTarget (MO_AtomicWrite width)) [] [addr, val] = undefined --- genCall t@(PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = undefined --- genCall t@(PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = undefined +genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do + dstV <- getCmmReg (CmmLocal dst) + (v1, stmts, top) <- genLoad True addr (localRegType dst) + let stmt1 = Store v1 dstV + return (stmts `snocOL` stmt1, top) + +genCall (PrimTarget (MO_Cmpxchg _width)) [dst] [addr, old, new] = do + (addrVar, stmts1, decls1) <- exprToVar addr + (oldVar, stmts2, decls2) <- exprToVar old + (newVar, stmts3, decls3) <- exprToVar new + let targetTy = getVarType oldVar + ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy) + (ptrVar, stmt4) <- doExpr (pLift targetTy) ptrExpr + dstVar <- getCmmReg (CmmLocal dst) + (retVar, stmt5) <- doExpr (LMStructU [targetTy,i1]) + $ CmpXChg ptrVar oldVar newVar SyncSeqCst SyncSeqCst + (retVar', stmt6) <- doExpr targetTy $ ExtractV retVar 0 + let stmt7 = Store retVar' dstVar + stmts = stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` + stmt4 `snocOL` stmt5 `snocOL` stmt6 `snocOL` stmt7 + return (stmts, decls1 ++ decls2 ++ decls3) + +genCall (PrimTarget (MO_AtomicWrite _width)) [] [addr, val] = do + (addrVar, stmts1, decls1) <- exprToVar addr + (valVar, stmts2, decls2) <- exprToVar val + let ptrTy = pLift $ getVarType valVar + ptrExpr = Cast LM_Inttoptr addrVar ptrTy + (ptrVar, stmt3) <- doExpr ptrTy ptrExpr + let stmts4 = unitOL $ Expr + $ AtomicRMW LAO_Xchg ptrVar valVar SyncSeqCst + stmts = stmts1 `appOL` stmts2 `snocOL` stmt3 `appOL` stmts4 + return (stmts, decls1++decls2) -- Handle memcpy function specifically since llvm's intrinsic version takes -- some extra parameters. @@ -715,10 +756,9 @@ cmmPrimOpFunctions mop = do MO_UF_Conv _ -> unsupported MO_AtomicRead _ -> unsupported - - MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop - MO_Cmpxchg w -> fsLit $ cmpxchgLabel w - MO_AtomicWrite w -> fsLit $ atomicWriteLabel w + MO_AtomicRMW _ _ -> unsupported + MO_AtomicWrite _ -> unsupported + MO_Cmpxchg _ -> unsupported -- | Tail function calls genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData |