summaryrefslogtreecommitdiff
path: root/compiler/llvmGen
diff options
context:
space:
mode:
authorJohan Tibell <johan.tibell@gmail.com>2014-06-09 11:43:21 +0200
committerJohan Tibell <johan.tibell@gmail.com>2014-06-24 19:06:53 +0200
commitd8abf85f8ca176854e9d5d0b12371c4bc402aac3 (patch)
tree9542f2b8a7ee9b9759396e4172ad5fc4ce1d2f3c /compiler/llvmGen
parenta4a79b5a04658ac542b1e07a6975b488fd589441 (diff)
downloadhaskell-d8abf85f8ca176854e9d5d0b12371c4bc402aac3.tar.gz
Add more primops for atomic ops on byte arrays
Summary: Add more primops for atomic ops on byte arrays Adds the following primops: * atomicReadIntArray# * atomicWriteIntArray# * fetchSubIntArray# * fetchOrIntArray# * fetchXorIntArray# * fetchAndIntArray# Makes these pre-existing out-of-line primops inline: * fetchAddIntArray# * casIntArray#
Diffstat (limited to 'compiler/llvmGen')
-rw-r--r--compiler/llvmGen/Llvm/AbsSyn.hs7
-rw-r--r--compiler/llvmGen/Llvm/PpLlvm.hs18
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs71
3 files changed, 66 insertions, 30 deletions
diff --git a/compiler/llvmGen/Llvm/AbsSyn.hs b/compiler/llvmGen/Llvm/AbsSyn.hs
index f92bd89c5c..24d0856ea3 100644
--- a/compiler/llvmGen/Llvm/AbsSyn.hs
+++ b/compiler/llvmGen/Llvm/AbsSyn.hs
@@ -65,6 +65,8 @@ data LlvmFunction = LlvmFunction {
type LlvmFunctions = [LlvmFunction]
+type SingleThreaded = Bool
+
-- | LLVM ordering types for synchronization purposes. (Introduced in LLVM
-- 3.0). Please see the LLVM documentation for a better description.
data LlvmSyncOrdering
@@ -224,6 +226,11 @@ data LlvmExpression
| Load LlvmVar
{- |
+ Atomic load of the value at location ptr
+ -}
+ | ALoad LlvmSyncOrdering SingleThreaded LlvmVar
+
+ {- |
Navigate in an structure, selecting elements
* inbound: Is the pointer inbounds? (computed pointer doesn't overflow)
* ptr: Location of the structure
diff --git a/compiler/llvmGen/Llvm/PpLlvm.hs b/compiler/llvmGen/Llvm/PpLlvm.hs
index 025078226d..73077257f8 100644
--- a/compiler/llvmGen/Llvm/PpLlvm.hs
+++ b/compiler/llvmGen/Llvm/PpLlvm.hs
@@ -239,6 +239,7 @@ ppLlvmExpression expr
Insert vec elt idx -> ppInsert vec elt idx
GetElemPtr inb ptr indexes -> ppGetElementPtr inb ptr indexes
Load ptr -> ppLoad ptr
+ ALoad ord st ptr -> ppALoad ord st ptr
Malloc tp amount -> ppMalloc tp amount
Phi tp precessors -> ppPhi tp precessors
Asm asm c ty v se sk -> ppAsm asm c ty v se sk
@@ -327,13 +328,18 @@ ppSyncOrdering SyncSeqCst = text "seq_cst"
-- of specifying alignment.
ppLoad :: LlvmVar -> SDoc
-ppLoad var
- | isVecPtrVar var = text "load" <+> ppr var <>
- comma <+> text "align 1"
- | otherwise = text "load" <+> ppr var
+ppLoad var = text "load" <+> ppr var <> align
where
- isVecPtrVar :: LlvmVar -> Bool
- isVecPtrVar = isVector . pLower . getVarType
+ align | isVector . pLower . getVarType $ var = text ", align 1"
+ | otherwise = empty
+
+ppALoad :: LlvmSyncOrdering -> SingleThreaded -> LlvmVar -> SDoc
+ppALoad ord st var = sdocWithDynFlags $ \dflags ->
+ let alignment = (llvmWidthInBits dflags $ getVarType var) `quot` 8
+ align = text ", align" <+> ppr alignment
+ sThreaded | st = text " singlethread"
+ | otherwise = empty
+ in text "load atomic" <+> ppr var <> sThreaded <+> ppSyncOrdering ord <> align
ppStore :: LlvmVar -> LlvmVar -> SDoc
ppStore val dst
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 517553516b..4a56600937 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -15,6 +15,7 @@ import BlockId
import CodeGen.Platform ( activeStgRegs, callerSaves )
import CLabel
import Cmm
+import CPrim
import PprCmm
import CmmUtils
import Hoopl
@@ -32,6 +33,7 @@ import Unique
import Data.List ( nub )
import Data.Maybe ( catMaybes )
+type Atomic = Bool
type LlvmStatements = OrdList LlvmStatement
-- -----------------------------------------------------------------------------
@@ -228,6 +230,17 @@ genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
genCall t@(PrimTarget (MO_BSwap w)) dsts args =
genCallSimpleCast w t dsts args
+genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do
+ dstV <- getCmmReg (CmmLocal dst)
+ (v1, stmts, top) <- genLoad True addr (localRegType dst)
+ let stmt1 = Store v1 dstV
+ return (stmts `snocOL` stmt1, top)
+
+-- TODO: implement these properly rather than calling to RTS functions.
+-- genCall t@(PrimTarget (MO_AtomicWrite width)) [] [addr, val] = undefined
+-- genCall t@(PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = undefined
+-- genCall t@(PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = undefined
+
-- Handle memcpy function specifically since llvm's intrinsic version takes
-- some extra parameters.
genCall t@(PrimTarget op) [] args'
@@ -548,7 +561,6 @@ cmmPrimOpFunctions mop = do
(MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
-
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
@@ -558,6 +570,12 @@ cmmPrimOpFunctions mop = do
MO_Touch -> unsupported
MO_UF_Conv _ -> unsupported
+ MO_AtomicRead _ -> unsupported
+
+ MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
+ MO_Cmpxchg w -> fsLit $ cmpxchgLabel w
+ MO_AtomicWrite w -> fsLit $ atomicWriteLabel w
+
-- | Tail function calls
genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData
@@ -849,7 +867,7 @@ exprToVarOpt opt e = case e of
-> genLit opt lit
CmmLoad e' ty
- -> genLoad e' ty
+ -> genLoad False e' ty
-- Cmmreg in expression is the value, so must load. If you want actual
-- reg pointer, call getCmmReg directly.
@@ -1268,41 +1286,41 @@ genMachOp_slow _ _ _ = panic "genMachOp: More then 2 expressions in MachOp!"
-- | Handle CmmLoad expression.
-genLoad :: CmmExpr -> CmmType -> LlvmM ExprData
+genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData
-- First we try to detect a few common cases and produce better code for
-- these then the default case. We are mostly trying to detect Cmm code
-- like I32[Sp + n] and use 'getelementptr' operations instead of the
-- generic case that uses casts and pointer arithmetic
-genLoad e@(CmmReg (CmmGlobal r)) ty
- = genLoad_fast e r 0 ty
+genLoad atomic e@(CmmReg (CmmGlobal r)) ty
+ = genLoad_fast atomic e r 0 ty
-genLoad e@(CmmRegOff (CmmGlobal r) n) ty
- = genLoad_fast e r n ty
+genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty
+ = genLoad_fast atomic e r n ty
-genLoad e@(CmmMachOp (MO_Add _) [
+genLoad atomic e@(CmmMachOp (MO_Add _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
ty
- = genLoad_fast e r (fromInteger n) ty
+ = genLoad_fast atomic e r (fromInteger n) ty
-genLoad e@(CmmMachOp (MO_Sub _) [
+genLoad atomic e@(CmmMachOp (MO_Sub _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
ty
- = genLoad_fast e r (negate $ fromInteger n) ty
+ = genLoad_fast atomic e r (negate $ fromInteger n) ty
-- generic case
-genLoad e ty
+genLoad atomic e ty
= do other <- getTBAAMeta otherN
- genLoad_slow e ty other
+ genLoad_slow atomic e ty other
-- | Handle CmmLoad expression.
-- This is a special case for loading from a global register pointer
-- offset such as I32[Sp+8].
-genLoad_fast :: CmmExpr -> GlobalReg -> Int -> CmmType
- -> LlvmM ExprData
-genLoad_fast e r n ty = do
+genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType
+ -> LlvmM ExprData
+genLoad_fast atomic e r n ty = do
dflags <- getDynFlags
(gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
meta <- getTBAARegMeta r
@@ -1315,7 +1333,7 @@ genLoad_fast e r n ty = do
case grt == ty' of
-- were fine
True -> do
- (var, s3) <- doExpr ty' (MExpr meta $ Load ptr)
+ (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr)
return (var, s1 `snocOL` s2 `snocOL` s3,
[])
@@ -1323,32 +1341,34 @@ genLoad_fast e r n ty = do
False -> do
let pty = pLift ty'
(ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty
- (var, s4) <- doExpr ty' (MExpr meta $ Load ptr')
+ (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr')
return (var, s1 `snocOL` s2 `snocOL` s3
`snocOL` s4, [])
-- If its a bit type then we use the slow method since
-- we can't avoid casting anyway.
- False -> genLoad_slow e ty meta
-
+ False -> genLoad_slow atomic e ty meta
+ where
+ loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
+ | otherwise = Load ptr
-- | Handle Cmm load expression.
-- Generic case. Uses casts and pointer arithmetic if needed.
-genLoad_slow :: CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
-genLoad_slow e ty meta = do
+genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
+genLoad_slow atomic e ty meta = do
(iptr, stmts, tops) <- exprToVar e
dflags <- getDynFlags
case getVarType iptr of
LMPointer _ -> do
(dvar, load) <- doExpr (cmmToLlvmType ty)
- (MExpr meta $ Load iptr)
+ (MExpr meta $ loadInstr iptr)
return (dvar, stmts `snocOL` load, tops)
i@(LMInt _) | i == llvmWord dflags -> do
let pty = LMPointer $ cmmToLlvmType ty
(ptr, cast) <- doExpr pty $ Cast LM_Inttoptr iptr pty
(dvar, load) <- doExpr (cmmToLlvmType ty)
- (MExpr meta $ Load ptr)
+ (MExpr meta $ loadInstr ptr)
return (dvar, stmts `snocOL` cast `snocOL` load, tops)
other -> do dflags <- getDynFlags
@@ -1357,6 +1377,9 @@ genLoad_slow e ty meta = do
"Size of Ptr: " ++ show (llvmPtrBits dflags) ++
", Size of var: " ++ show (llvmWidthInBits dflags other) ++
", Var: " ++ showSDoc dflags (ppr iptr)))
+ where
+ loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
+ | otherwise = Load ptr
-- | Handle CmmReg expression. This will return a pointer to the stack