summaryrefslogtreecommitdiff
path: root/compiler/llvmGen
diff options
context:
space:
mode:
authorJohan Tibell <johan.tibell@gmail.com>2014-06-27 13:48:24 +0200
committerJohan Tibell <johan.tibell@gmail.com>2014-06-30 22:12:45 +0200
commit4ee4ab01c1d97845aecb7707ad2f9a80933e7a49 (patch)
tree08991e97b0109b81447968ba62e4753e8209a738 /compiler/llvmGen
parentaed1723f97e0539d5ab35222b180c1552a5f4cfc (diff)
downloadhaskell-4ee4ab01c1d97845aecb7707ad2f9a80933e7a49.tar.gz
Re-add more primops for atomic ops on byte arrays
This is the second attempt to add this functionality. The first attempt was reverted in 950fcae46a82569e7cd1fba1637a23b419e00ecd, due to register allocator failure on x86. Given how the register allocator currently works, we don't have enough registers on x86 to support cmpxchg using complicated addressing modes. Instead we fall back to a simpler addressing mode on x86. Adds the following primops: * atomicReadIntArray# * atomicWriteIntArray# * fetchSubIntArray# * fetchOrIntArray# * fetchXorIntArray# * fetchAndIntArray# Makes these pre-existing out-of-line primops inline: * fetchAddIntArray# * casIntArray#
Diffstat (limited to 'compiler/llvmGen')
-rw-r--r--compiler/llvmGen/Llvm/AbsSyn.hs7
-rw-r--r--compiler/llvmGen/Llvm/PpLlvm.hs18
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs71
3 files changed, 66 insertions, 30 deletions
diff --git a/compiler/llvmGen/Llvm/AbsSyn.hs b/compiler/llvmGen/Llvm/AbsSyn.hs
index f92bd89c5c..24d0856ea3 100644
--- a/compiler/llvmGen/Llvm/AbsSyn.hs
+++ b/compiler/llvmGen/Llvm/AbsSyn.hs
@@ -65,6 +65,8 @@ data LlvmFunction = LlvmFunction {
type LlvmFunctions = [LlvmFunction]
+type SingleThreaded = Bool
+
-- | LLVM ordering types for synchronization purposes. (Introduced in LLVM
-- 3.0). Please see the LLVM documentation for a better description.
data LlvmSyncOrdering
@@ -224,6 +226,11 @@ data LlvmExpression
| Load LlvmVar
{- |
+ Atomic load of the value at location ptr
+ -}
+ | ALoad LlvmSyncOrdering SingleThreaded LlvmVar
+
+ {- |
Navigate in an structure, selecting elements
* inbound: Is the pointer inbounds? (computed pointer doesn't overflow)
* ptr: Location of the structure
diff --git a/compiler/llvmGen/Llvm/PpLlvm.hs b/compiler/llvmGen/Llvm/PpLlvm.hs
index 025078226d..73077257f8 100644
--- a/compiler/llvmGen/Llvm/PpLlvm.hs
+++ b/compiler/llvmGen/Llvm/PpLlvm.hs
@@ -239,6 +239,7 @@ ppLlvmExpression expr
Insert vec elt idx -> ppInsert vec elt idx
GetElemPtr inb ptr indexes -> ppGetElementPtr inb ptr indexes
Load ptr -> ppLoad ptr
+ ALoad ord st ptr -> ppALoad ord st ptr
Malloc tp amount -> ppMalloc tp amount
Phi tp precessors -> ppPhi tp precessors
Asm asm c ty v se sk -> ppAsm asm c ty v se sk
@@ -327,13 +328,18 @@ ppSyncOrdering SyncSeqCst = text "seq_cst"
-- of specifying alignment.
ppLoad :: LlvmVar -> SDoc
-ppLoad var
- | isVecPtrVar var = text "load" <+> ppr var <>
- comma <+> text "align 1"
- | otherwise = text "load" <+> ppr var
+ppLoad var = text "load" <+> ppr var <> align
where
- isVecPtrVar :: LlvmVar -> Bool
- isVecPtrVar = isVector . pLower . getVarType
+ align | isVector . pLower . getVarType $ var = text ", align 1"
+ | otherwise = empty
+
+ppALoad :: LlvmSyncOrdering -> SingleThreaded -> LlvmVar -> SDoc
+ppALoad ord st var = sdocWithDynFlags $ \dflags ->
+ let alignment = (llvmWidthInBits dflags $ getVarType var) `quot` 8
+ align = text ", align" <+> ppr alignment
+ sThreaded | st = text " singlethread"
+ | otherwise = empty
+ in text "load atomic" <+> ppr var <> sThreaded <+> ppSyncOrdering ord <> align
ppStore :: LlvmVar -> LlvmVar -> SDoc
ppStore val dst
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 517553516b..4a56600937 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -15,6 +15,7 @@ import BlockId
import CodeGen.Platform ( activeStgRegs, callerSaves )
import CLabel
import Cmm
+import CPrim
import PprCmm
import CmmUtils
import Hoopl
@@ -32,6 +33,7 @@ import Unique
import Data.List ( nub )
import Data.Maybe ( catMaybes )
+type Atomic = Bool
type LlvmStatements = OrdList LlvmStatement
-- -----------------------------------------------------------------------------
@@ -228,6 +230,17 @@ genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
genCall t@(PrimTarget (MO_BSwap w)) dsts args =
genCallSimpleCast w t dsts args
+genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do
+ dstV <- getCmmReg (CmmLocal dst)
+ (v1, stmts, top) <- genLoad True addr (localRegType dst)
+ let stmt1 = Store v1 dstV
+ return (stmts `snocOL` stmt1, top)
+
+-- TODO: implement these properly rather than calling to RTS functions.
+-- genCall t@(PrimTarget (MO_AtomicWrite width)) [] [addr, val] = undefined
+-- genCall t@(PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = undefined
+-- genCall t@(PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = undefined
+
-- Handle memcpy function specifically since llvm's intrinsic version takes
-- some extra parameters.
genCall t@(PrimTarget op) [] args'
@@ -548,7 +561,6 @@ cmmPrimOpFunctions mop = do
(MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
-
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
@@ -558,6 +570,12 @@ cmmPrimOpFunctions mop = do
MO_Touch -> unsupported
MO_UF_Conv _ -> unsupported
+ MO_AtomicRead _ -> unsupported
+
+ MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
+ MO_Cmpxchg w -> fsLit $ cmpxchgLabel w
+ MO_AtomicWrite w -> fsLit $ atomicWriteLabel w
+
-- | Tail function calls
genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData
@@ -849,7 +867,7 @@ exprToVarOpt opt e = case e of
-> genLit opt lit
CmmLoad e' ty
- -> genLoad e' ty
+ -> genLoad False e' ty
-- Cmmreg in expression is the value, so must load. If you want actual
-- reg pointer, call getCmmReg directly.
@@ -1268,41 +1286,41 @@ genMachOp_slow _ _ _ = panic "genMachOp: More then 2 expressions in MachOp!"
-- | Handle CmmLoad expression.
-genLoad :: CmmExpr -> CmmType -> LlvmM ExprData
+genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData
-- First we try to detect a few common cases and produce better code for
-- these then the default case. We are mostly trying to detect Cmm code
-- like I32[Sp + n] and use 'getelementptr' operations instead of the
-- generic case that uses casts and pointer arithmetic
-genLoad e@(CmmReg (CmmGlobal r)) ty
- = genLoad_fast e r 0 ty
+genLoad atomic e@(CmmReg (CmmGlobal r)) ty
+ = genLoad_fast atomic e r 0 ty
-genLoad e@(CmmRegOff (CmmGlobal r) n) ty
- = genLoad_fast e r n ty
+genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty
+ = genLoad_fast atomic e r n ty
-genLoad e@(CmmMachOp (MO_Add _) [
+genLoad atomic e@(CmmMachOp (MO_Add _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
ty
- = genLoad_fast e r (fromInteger n) ty
+ = genLoad_fast atomic e r (fromInteger n) ty
-genLoad e@(CmmMachOp (MO_Sub _) [
+genLoad atomic e@(CmmMachOp (MO_Sub _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
ty
- = genLoad_fast e r (negate $ fromInteger n) ty
+ = genLoad_fast atomic e r (negate $ fromInteger n) ty
-- generic case
-genLoad e ty
+genLoad atomic e ty
= do other <- getTBAAMeta otherN
- genLoad_slow e ty other
+ genLoad_slow atomic e ty other
-- | Handle CmmLoad expression.
-- This is a special case for loading from a global register pointer
-- offset such as I32[Sp+8].
-genLoad_fast :: CmmExpr -> GlobalReg -> Int -> CmmType
- -> LlvmM ExprData
-genLoad_fast e r n ty = do
+genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType
+ -> LlvmM ExprData
+genLoad_fast atomic e r n ty = do
dflags <- getDynFlags
(gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
meta <- getTBAARegMeta r
@@ -1315,7 +1333,7 @@ genLoad_fast e r n ty = do
case grt == ty' of
-- were fine
True -> do
- (var, s3) <- doExpr ty' (MExpr meta $ Load ptr)
+ (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr)
return (var, s1 `snocOL` s2 `snocOL` s3,
[])
@@ -1323,32 +1341,34 @@ genLoad_fast e r n ty = do
False -> do
let pty = pLift ty'
(ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty
- (var, s4) <- doExpr ty' (MExpr meta $ Load ptr')
+ (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr')
return (var, s1 `snocOL` s2 `snocOL` s3
`snocOL` s4, [])
-- If its a bit type then we use the slow method since
-- we can't avoid casting anyway.
- False -> genLoad_slow e ty meta
-
+ False -> genLoad_slow atomic e ty meta
+ where
+ loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
+ | otherwise = Load ptr
-- | Handle Cmm load expression.
-- Generic case. Uses casts and pointer arithmetic if needed.
-genLoad_slow :: CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
-genLoad_slow e ty meta = do
+genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
+genLoad_slow atomic e ty meta = do
(iptr, stmts, tops) <- exprToVar e
dflags <- getDynFlags
case getVarType iptr of
LMPointer _ -> do
(dvar, load) <- doExpr (cmmToLlvmType ty)
- (MExpr meta $ Load iptr)
+ (MExpr meta $ loadInstr iptr)
return (dvar, stmts `snocOL` load, tops)
i@(LMInt _) | i == llvmWord dflags -> do
let pty = LMPointer $ cmmToLlvmType ty
(ptr, cast) <- doExpr pty $ Cast LM_Inttoptr iptr pty
(dvar, load) <- doExpr (cmmToLlvmType ty)
- (MExpr meta $ Load ptr)
+ (MExpr meta $ loadInstr ptr)
return (dvar, stmts `snocOL` cast `snocOL` load, tops)
other -> do dflags <- getDynFlags
@@ -1357,6 +1377,9 @@ genLoad_slow e ty meta = do
"Size of Ptr: " ++ show (llvmPtrBits dflags) ++
", Size of var: " ++ show (llvmWidthInBits dflags other) ++
", Var: " ++ showSDoc dflags (ppr iptr)))
+ where
+ loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
+ | otherwise = Load ptr
-- | Handle CmmReg expression. This will return a pointer to the stack