diff options
author | Johan Tibell <johan.tibell@gmail.com> | 2014-06-27 13:48:24 +0200 |
---|---|---|
committer | Johan Tibell <johan.tibell@gmail.com> | 2014-06-30 22:12:45 +0200 |
commit | 4ee4ab01c1d97845aecb7707ad2f9a80933e7a49 (patch) | |
tree | 08991e97b0109b81447968ba62e4753e8209a738 /compiler/llvmGen | |
parent | aed1723f97e0539d5ab35222b180c1552a5f4cfc (diff) | |
download | haskell-4ee4ab01c1d97845aecb7707ad2f9a80933e7a49.tar.gz |
Re-add more primops for atomic ops on byte arrays
This is the second attempt to add this functionality. The first
attempt was reverted in 950fcae46a82569e7cd1fba1637a23b419e00ecd, due
to register allocator failure on x86. Given how the register
allocator currently works, we don't have enough registers on x86 to
support cmpxchg using complicated addressing modes. Instead we fall
back to a simpler addressing mode on x86.
Adds the following primops:
* atomicReadIntArray#
* atomicWriteIntArray#
* fetchSubIntArray#
* fetchOrIntArray#
* fetchXorIntArray#
* fetchAndIntArray#
Makes these pre-existing out-of-line primops inline:
* fetchAddIntArray#
* casIntArray#
Diffstat (limited to 'compiler/llvmGen')
-rw-r--r-- | compiler/llvmGen/Llvm/AbsSyn.hs | 7 | ||||
-rw-r--r-- | compiler/llvmGen/Llvm/PpLlvm.hs | 18 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 71 |
3 files changed, 66 insertions, 30 deletions
diff --git a/compiler/llvmGen/Llvm/AbsSyn.hs b/compiler/llvmGen/Llvm/AbsSyn.hs index f92bd89c5c..24d0856ea3 100644 --- a/compiler/llvmGen/Llvm/AbsSyn.hs +++ b/compiler/llvmGen/Llvm/AbsSyn.hs @@ -65,6 +65,8 @@ data LlvmFunction = LlvmFunction { type LlvmFunctions = [LlvmFunction] +type SingleThreaded = Bool + -- | LLVM ordering types for synchronization purposes. (Introduced in LLVM -- 3.0). Please see the LLVM documentation for a better description. data LlvmSyncOrdering @@ -224,6 +226,11 @@ data LlvmExpression | Load LlvmVar {- | + Atomic load of the value at location ptr + -} + | ALoad LlvmSyncOrdering SingleThreaded LlvmVar + + {- | Navigate in an structure, selecting elements * inbound: Is the pointer inbounds? (computed pointer doesn't overflow) * ptr: Location of the structure diff --git a/compiler/llvmGen/Llvm/PpLlvm.hs b/compiler/llvmGen/Llvm/PpLlvm.hs index 025078226d..73077257f8 100644 --- a/compiler/llvmGen/Llvm/PpLlvm.hs +++ b/compiler/llvmGen/Llvm/PpLlvm.hs @@ -239,6 +239,7 @@ ppLlvmExpression expr Insert vec elt idx -> ppInsert vec elt idx GetElemPtr inb ptr indexes -> ppGetElementPtr inb ptr indexes Load ptr -> ppLoad ptr + ALoad ord st ptr -> ppALoad ord st ptr Malloc tp amount -> ppMalloc tp amount Phi tp precessors -> ppPhi tp precessors Asm asm c ty v se sk -> ppAsm asm c ty v se sk @@ -327,13 +328,18 @@ ppSyncOrdering SyncSeqCst = text "seq_cst" -- of specifying alignment. ppLoad :: LlvmVar -> SDoc -ppLoad var - | isVecPtrVar var = text "load" <+> ppr var <> - comma <+> text "align 1" - | otherwise = text "load" <+> ppr var +ppLoad var = text "load" <+> ppr var <> align where - isVecPtrVar :: LlvmVar -> Bool - isVecPtrVar = isVector . pLower . getVarType + align | isVector . pLower . getVarType $ var = text ", align 1" + | otherwise = empty + +ppALoad :: LlvmSyncOrdering -> SingleThreaded -> LlvmVar -> SDoc +ppALoad ord st var = sdocWithDynFlags $ \dflags -> + let alignment = (llvmWidthInBits dflags $ getVarType var) `quot` 8 + align = text ", align" <+> ppr alignment + sThreaded | st = text " singlethread" + | otherwise = empty + in text "load atomic" <+> ppr var <> sThreaded <+> ppSyncOrdering ord <> align ppStore :: LlvmVar -> LlvmVar -> SDoc ppStore val dst diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 517553516b..4a56600937 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -15,6 +15,7 @@ import BlockId import CodeGen.Platform ( activeStgRegs, callerSaves ) import CLabel import Cmm +import CPrim import PprCmm import CmmUtils import Hoopl @@ -32,6 +33,7 @@ import Unique import Data.List ( nub ) import Data.Maybe ( catMaybes ) +type Atomic = Bool type LlvmStatements = OrdList LlvmStatement -- ----------------------------------------------------------------------------- @@ -228,6 +230,17 @@ genCall t@(PrimTarget (MO_PopCnt w)) dsts args = genCall t@(PrimTarget (MO_BSwap w)) dsts args = genCallSimpleCast w t dsts args +genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do + dstV <- getCmmReg (CmmLocal dst) + (v1, stmts, top) <- genLoad True addr (localRegType dst) + let stmt1 = Store v1 dstV + return (stmts `snocOL` stmt1, top) + +-- TODO: implement these properly rather than calling to RTS functions. +-- genCall t@(PrimTarget (MO_AtomicWrite width)) [] [addr, val] = undefined +-- genCall t@(PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = undefined +-- genCall t@(PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = undefined + -- Handle memcpy function specifically since llvm's intrinsic version takes -- some extra parameters. genCall t@(PrimTarget op) [] args' @@ -548,7 +561,6 @@ cmmPrimOpFunctions mop = do (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch" - MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported @@ -558,6 +570,12 @@ cmmPrimOpFunctions mop = do MO_Touch -> unsupported MO_UF_Conv _ -> unsupported + MO_AtomicRead _ -> unsupported + + MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop + MO_Cmpxchg w -> fsLit $ cmpxchgLabel w + MO_AtomicWrite w -> fsLit $ atomicWriteLabel w + -- | Tail function calls genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData @@ -849,7 +867,7 @@ exprToVarOpt opt e = case e of -> genLit opt lit CmmLoad e' ty - -> genLoad e' ty + -> genLoad False e' ty -- Cmmreg in expression is the value, so must load. If you want actual -- reg pointer, call getCmmReg directly. @@ -1268,41 +1286,41 @@ genMachOp_slow _ _ _ = panic "genMachOp: More then 2 expressions in MachOp!" -- | Handle CmmLoad expression. -genLoad :: CmmExpr -> CmmType -> LlvmM ExprData +genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData -- First we try to detect a few common cases and produce better code for -- these then the default case. We are mostly trying to detect Cmm code -- like I32[Sp + n] and use 'getelementptr' operations instead of the -- generic case that uses casts and pointer arithmetic -genLoad e@(CmmReg (CmmGlobal r)) ty - = genLoad_fast e r 0 ty +genLoad atomic e@(CmmReg (CmmGlobal r)) ty + = genLoad_fast atomic e r 0 ty -genLoad e@(CmmRegOff (CmmGlobal r) n) ty - = genLoad_fast e r n ty +genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty + = genLoad_fast atomic e r n ty -genLoad e@(CmmMachOp (MO_Add _) [ +genLoad atomic e@(CmmMachOp (MO_Add _) [ (CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]) ty - = genLoad_fast e r (fromInteger n) ty + = genLoad_fast atomic e r (fromInteger n) ty -genLoad e@(CmmMachOp (MO_Sub _) [ +genLoad atomic e@(CmmMachOp (MO_Sub _) [ (CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]) ty - = genLoad_fast e r (negate $ fromInteger n) ty + = genLoad_fast atomic e r (negate $ fromInteger n) ty -- generic case -genLoad e ty +genLoad atomic e ty = do other <- getTBAAMeta otherN - genLoad_slow e ty other + genLoad_slow atomic e ty other -- | Handle CmmLoad expression. -- This is a special case for loading from a global register pointer -- offset such as I32[Sp+8]. -genLoad_fast :: CmmExpr -> GlobalReg -> Int -> CmmType - -> LlvmM ExprData -genLoad_fast e r n ty = do +genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType + -> LlvmM ExprData +genLoad_fast atomic e r n ty = do dflags <- getDynFlags (gv, grt, s1) <- getCmmRegVal (CmmGlobal r) meta <- getTBAARegMeta r @@ -1315,7 +1333,7 @@ genLoad_fast e r n ty = do case grt == ty' of -- were fine True -> do - (var, s3) <- doExpr ty' (MExpr meta $ Load ptr) + (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr) return (var, s1 `snocOL` s2 `snocOL` s3, []) @@ -1323,32 +1341,34 @@ genLoad_fast e r n ty = do False -> do let pty = pLift ty' (ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty - (var, s4) <- doExpr ty' (MExpr meta $ Load ptr') + (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr') return (var, s1 `snocOL` s2 `snocOL` s3 `snocOL` s4, []) -- If its a bit type then we use the slow method since -- we can't avoid casting anyway. - False -> genLoad_slow e ty meta - + False -> genLoad_slow atomic e ty meta + where + loadInstr ptr | atomic = ALoad SyncSeqCst False ptr + | otherwise = Load ptr -- | Handle Cmm load expression. -- Generic case. Uses casts and pointer arithmetic if needed. -genLoad_slow :: CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData -genLoad_slow e ty meta = do +genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData +genLoad_slow atomic e ty meta = do (iptr, stmts, tops) <- exprToVar e dflags <- getDynFlags case getVarType iptr of LMPointer _ -> do (dvar, load) <- doExpr (cmmToLlvmType ty) - (MExpr meta $ Load iptr) + (MExpr meta $ loadInstr iptr) return (dvar, stmts `snocOL` load, tops) i@(LMInt _) | i == llvmWord dflags -> do let pty = LMPointer $ cmmToLlvmType ty (ptr, cast) <- doExpr pty $ Cast LM_Inttoptr iptr pty (dvar, load) <- doExpr (cmmToLlvmType ty) - (MExpr meta $ Load ptr) + (MExpr meta $ loadInstr ptr) return (dvar, stmts `snocOL` cast `snocOL` load, tops) other -> do dflags <- getDynFlags @@ -1357,6 +1377,9 @@ genLoad_slow e ty meta = do "Size of Ptr: " ++ show (llvmPtrBits dflags) ++ ", Size of var: " ++ show (llvmWidthInBits dflags other) ++ ", Var: " ++ showSDoc dflags (ppr iptr))) + where + loadInstr ptr | atomic = ALoad SyncSeqCst False ptr + | otherwise = Load ptr -- | Handle CmmReg expression. This will return a pointer to the stack |