summaryrefslogtreecommitdiff
path: root/compiler/GHC/CmmToLlvm
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-01-27 13:28:32 -0500
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-02-04 16:14:47 -0500
commit7217156c40240c0aed5ffd83ead0fe4ba0484c75 (patch)
tree1cd6f31d82ba304695189c52939a64a5d9c9f150 /compiler/GHC/CmmToLlvm
parent58d7faacafc975d522cbc9f56a7db1e46b37d4a1 (diff)
downloadhaskell-7217156c40240c0aed5ffd83ead0fe4ba0484c75.tar.gz
Introduce alignment in CmmLoad
Diffstat (limited to 'compiler/GHC/CmmToLlvm')
-rw-r--r--compiler/GHC/CmmToLlvm/CodeGen.hs82
1 files changed, 50 insertions, 32 deletions
diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs
index a57a6f79f0..49359939ea 100644
--- a/compiler/GHC/CmmToLlvm/CodeGen.hs
+++ b/compiler/GHC/CmmToLlvm/CodeGen.hs
@@ -267,7 +267,7 @@ genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $
genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = runStmtsDecls $ do
dstV <- getCmmRegW (CmmLocal dst)
- v1 <- genLoadW True addr (localRegType dst)
+ v1 <- genLoadW True addr (localRegType dst) NaturallyAligned
statement $ Store v1 dstV
genCall (PrimTarget (MO_Cmpxchg _width))
@@ -1357,8 +1357,8 @@ exprToVarOpt opt e = case e of
CmmLit lit
-> genLit opt lit
- CmmLoad e' ty
- -> genLoad False e' ty
+ CmmLoad e' ty align
+ -> genLoad False e' ty align
-- Cmmreg in expression is the value, so must load. If you want actual
-- reg pointer, call getCmmReg directly.
@@ -1790,40 +1790,40 @@ genMachOp_slow _ _ _ = panic "genMachOp: More than 2 expressions in MachOp!"
-- | Handle CmmLoad expression.
-genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData
+genLoad :: Atomic -> CmmExpr -> CmmType -> AlignmentSpec -> LlvmM ExprData
-- First we try to detect a few common cases and produce better code for
-- these then the default case. We are mostly trying to detect Cmm code
-- like I32[Sp + n] and use 'getelementptr' operations instead of the
-- generic case that uses casts and pointer arithmetic
-genLoad atomic e@(CmmReg (CmmGlobal r)) ty
- = genLoad_fast atomic e r 0 ty
+genLoad atomic e@(CmmReg (CmmGlobal r)) ty align
+ = genLoad_fast atomic e r 0 ty align
-genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty
- = genLoad_fast atomic e r n ty
+genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty align
+ = genLoad_fast atomic e r n ty align
genLoad atomic e@(CmmMachOp (MO_Add _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
- ty
- = genLoad_fast atomic e r (fromInteger n) ty
+ ty align
+ = genLoad_fast atomic e r (fromInteger n) ty align
genLoad atomic e@(CmmMachOp (MO_Sub _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
- ty
- = genLoad_fast atomic e r (negate $ fromInteger n) ty
+ ty align
+ = genLoad_fast atomic e r (negate $ fromInteger n) ty align
-- generic case
-genLoad atomic e ty
- = getTBAAMeta topN >>= genLoad_slow atomic e ty
+genLoad atomic e ty align
+ = getTBAAMeta topN >>= genLoad_slow atomic e ty align
-- | Handle CmmLoad expression.
-- This is a special case for loading from a global register pointer
-- offset such as I32[Sp+8].
genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType
- -> LlvmM ExprData
-genLoad_fast atomic e r n ty = do
+ -> AlignmentSpec -> LlvmM ExprData
+genLoad_fast atomic e r n ty align = do
platform <- getPlatform
(gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
meta <- getTBAARegMeta r
@@ -1836,7 +1836,7 @@ genLoad_fast atomic e r n ty = do
case grt == ty' of
-- were fine
True -> do
- (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr)
+ (var, s3) <- doExpr ty' (MExpr meta $ mkLoad atomic ptr align)
return (var, s1 `snocOL` s2 `snocOL` s3,
[])
@@ -1844,43 +1844,61 @@ genLoad_fast atomic e r n ty = do
False -> do
let pty = pLift ty'
(ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty
- (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr')
+ (var, s4) <- doExpr ty' (MExpr meta $ mkLoad atomic ptr' align)
return (var, s1 `snocOL` s2 `snocOL` s3
`snocOL` s4, [])
-- If its a bit type then we use the slow method since
-- we can't avoid casting anyway.
- False -> genLoad_slow atomic e ty meta
- where
- loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
- | otherwise = Load ptr
+ False -> genLoad_slow atomic e ty align meta
-- | Handle Cmm load expression.
-- Generic case. Uses casts and pointer arithmetic if needed.
-genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
-genLoad_slow atomic e ty meta = do
+genLoad_slow :: Atomic -> CmmExpr -> CmmType -> AlignmentSpec -> [MetaAnnot]
+ -> LlvmM ExprData
+genLoad_slow atomic e ty align meta = do
platform <- getPlatform
cfg <- getConfig
runExprData $ do
iptr <- exprToVarW e
case getVarType iptr of
LMPointer _ ->
- doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr iptr)
+ doExprW (cmmToLlvmType ty) (MExpr meta $ mkLoad atomic iptr align)
i@(LMInt _) | i == llvmWord platform -> do
let pty = LMPointer $ cmmToLlvmType ty
ptr <- doExprW pty $ Cast LM_Inttoptr iptr pty
- doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr ptr)
+ doExprW (cmmToLlvmType ty) (MExpr meta $ mkLoad atomic ptr align)
other -> pprPanic "exprToVar: CmmLoad expression is not right type!"
(PprCmm.pprExpr platform e <+> text (
"Size of Ptr: " ++ show (llvmPtrBits platform) ++
", Size of var: " ++ show (llvmWidthInBits platform other) ++
", Var: " ++ renderWithContext (llvmCgContext cfg) (ppVar cfg iptr)))
- where
- loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
- | otherwise = Load ptr
+{-
+Note [Alignment of vector-typed values]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+On x86, vector types need to be 16-byte aligned for aligned
+access, but we have no way of guaranteeing that this is true with GHC
+(we would need to modify the layout of the stack and closures, change
+the storage manager, etc.). So, we blindly tell LLVM that *any* vector
+store or load could be unaligned. In the future we may be able to
+guarantee that certain vector access patterns are aligned, in which
+case we will need a more granular way of specifying alignment.
+-}
+
+mkLoad :: Atomic -> LlvmVar -> AlignmentSpec -> LlvmExpression
+mkLoad atomic ptr alignment
+ | atomic = ALoad SyncSeqCst False ptr
+ | otherwise = Load ptr align
+ where
+ ty = pLower (getVarType ptr)
+ align = case alignment of
+ -- See Note [Alignment of vector-typed values]
+ _ | is_vector -> Just 1
+ Unaligned -> Just 1
+ NaturallyAligned -> Nothing
-- | Handle CmmReg expression. This will return a pointer to the stack
-- location of the register. Throws an error if it isn't allocated on
@@ -1919,7 +1937,7 @@ getCmmRegVal reg =
where loadFromStack = do
ptr <- getCmmReg reg
let ty = pLower $ getVarType ptr
- (v, s) <- doExpr ty (Load ptr)
+ (v, s) <- doExpr ty (Load ptr Nothing)
return (v, ty, unitOL s)
-- | Allocate a local CmmReg on the stack
@@ -2210,8 +2228,8 @@ runStmtsDecls action = do
getCmmRegW :: CmmReg -> WriterT LlvmAccum LlvmM LlvmVar
getCmmRegW = lift . getCmmReg
-genLoadW :: Atomic -> CmmExpr -> CmmType -> WriterT LlvmAccum LlvmM LlvmVar
-genLoadW atomic e ty = liftExprData $ genLoad atomic e ty
+genLoadW :: Atomic -> CmmExpr -> CmmType -> AlignmentSpec -> WriterT LlvmAccum LlvmM LlvmVar
+genLoadW atomic e ty alignment = liftExprData $ genLoad atomic e ty alignment
-- | Return element of single-element list; 'panic' if list is not a single-element list
singletonPanic :: String -> [a] -> a