summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-01-27 14:47:33 -0500
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-02-04 16:14:47 -0500
commit99ea5f2cfa09f50bf3ea105821dc095942552e59 (patch)
tree87cfa9dbe92582d390f242bad5971e315cd76c3d
parent7217156c40240c0aed5ffd83ead0fe4ba0484c75 (diff)
downloadhaskell-99ea5f2cfa09f50bf3ea105821dc095942552e59.tar.gz
Introduce alignment to CmmStore
-rw-r--r--compiler/GHC/Cmm/CommonBlockElim.hs4
-rw-r--r--compiler/GHC/Cmm/Graph.hs3
-rw-r--r--compiler/GHC/Cmm/LayoutStack.hs9
-rw-r--r--compiler/GHC/Cmm/Lint.hs2
-rw-r--r--compiler/GHC/Cmm/Node.hs12
-rw-r--r--compiler/GHC/Cmm/Ppr.hs5
-rw-r--r--compiler/GHC/Cmm/Sink.hs2
-rw-r--r--compiler/GHC/CmmToAsm.hs4
-rw-r--r--compiler/GHC/CmmToAsm/AArch64/CodeGen.hs2
-rw-r--r--compiler/GHC/CmmToAsm/PPC/CodeGen.hs2
-rw-r--r--compiler/GHC/CmmToAsm/X86/CodeGen.hs2
-rw-r--r--compiler/GHC/CmmToC.hs7
-rw-r--r--compiler/GHC/CmmToLlvm/CodeGen.hs99
-rw-r--r--compiler/GHC/Driver/GenerateCgIPEStub.hs2
-rw-r--r--compiler/GHC/Llvm/Ppr.hs17
-rw-r--r--compiler/GHC/Llvm/Syntax.hs2
-rw-r--r--compiler/GHC/StgToCmm/CgUtils.hs2
-rw-r--r--compiler/GHC/StgToCmm/Monad.hs3
18 files changed, 99 insertions, 80 deletions
diff --git a/compiler/GHC/Cmm/CommonBlockElim.hs b/compiler/GHC/Cmm/CommonBlockElim.hs
index b18730ed79..7dd4119f83 100644
--- a/compiler/GHC/Cmm/CommonBlockElim.hs
+++ b/compiler/GHC/Cmm/CommonBlockElim.hs
@@ -144,7 +144,7 @@ hash_block block =
hash_node :: CmmNode O x -> Word32
hash_node n | dont_care n = 0 -- don't care
hash_node (CmmAssign r e) = hash_reg r + hash_e e
- hash_node (CmmStore e e') = hash_e e + hash_e e'
+ hash_node (CmmStore e e' _) = hash_e e + hash_e e'
hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as
hash_node (CmmBranch _) = 23 -- NB. ignore the label
hash_node (CmmCondBranch p _ _ _) = hash_e p
@@ -210,7 +210,7 @@ eqMiddleWith :: (BlockId -> BlockId -> Bool)
-> CmmNode O O -> CmmNode O O -> Bool
eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2)
= r1 == r2 && eqExprWith eqBid e1 e2
-eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2)
+eqMiddleWith eqBid (CmmStore l1 r1 _) (CmmStore l2 r2 _)
= eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2
eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1)
(CmmUnsafeForeignCall t2 r2 a2)
diff --git a/compiler/GHC/Cmm/Graph.hs b/compiler/GHC/Cmm/Graph.hs
index 6761821951..d59658e2af 100644
--- a/compiler/GHC/Cmm/Graph.hs
+++ b/compiler/GHC/Cmm/Graph.hs
@@ -193,8 +193,9 @@ mkAssign :: CmmReg -> CmmExpr -> CmmAGraph
mkAssign l (CmmReg r) | l == r = mkNop
mkAssign l r = mkMiddle $ CmmAssign l r
+-- | Assumes natural alignment
mkStore :: CmmExpr -> CmmExpr -> CmmAGraph
-mkStore l r = mkMiddle $ CmmStore l r
+mkStore l r = mkMiddle $ CmmStore l r NaturallyAligned
---------- Control transfer
mkJump :: Profile -> Convention -> CmmExpr
diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs
index 0a01081634..0d759f5559 100644
--- a/compiler/GHC/Cmm/LayoutStack.hs
+++ b/compiler/GHC/Cmm/LayoutStack.hs
@@ -605,7 +605,8 @@ fixupStack old_stack new_stack = concatMap move new_locs
move (r,n)
| Just (_,m) <- lookupUFM old_map r, n == m = []
| otherwise = [CmmStore (CmmStackSlot Old n)
- (CmmReg (CmmLocal r))]
+ (CmmReg (CmmLocal r))
+ NaturallyAligned]
@@ -703,7 +704,7 @@ setupStackFrame platform lbl liveness updfr_off ret_args stack0
futureContinuation :: Block CmmNode O O -> Maybe BlockId
futureContinuation middle = foldBlockNodesB f middle Nothing
where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
- f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
+ f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _)) _) _
= Just l
f _ r = r
@@ -752,6 +753,7 @@ allocate platform ret_off live stackmap@StackMap{ sm_sp = sp0
select_save to_save (slot:stack)
-> let assig = CmmStore (CmmStackSlot Old n')
(CmmReg (CmmLocal r))
+ NaturallyAligned
n' = plusW platform n 1
in
(to_save', stack', n', assig : assigs, (r,(r,n')):regs)
@@ -786,6 +788,7 @@ allocate platform ret_off live stackmap@StackMap{ sm_sp = sp0
n' = n + localRegBytes platform r
assig = CmmStore (CmmStackSlot Old n')
(CmmReg (CmmLocal r))
+ NaturallyAligned
trim_sp
| not (null push_regs) = push_sp
@@ -995,7 +998,7 @@ elimStackStores stackmap stackmaps area_off nodes
go _stackmap [] = []
go stackmap (n:ns)
= case n of
- CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
+ CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r)) _
| Just (_,off) <- lookupUFM (sm_regs stackmap) r
, area_off area + m == off
-> go stackmap ns
diff --git a/compiler/GHC/Cmm/Lint.hs b/compiler/GHC/Cmm/Lint.hs
index 2c3c605240..e76be551f9 100644
--- a/compiler/GHC/Cmm/Lint.hs
+++ b/compiler/GHC/Cmm/Lint.hs
@@ -174,7 +174,7 @@ lintCmmMiddle node = case node of
then return ()
else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
- CmmStore l r -> do
+ CmmStore l r _alignment -> do
_ <- lintCmmExpr l
_ <- lintCmmExpr r
return ()
diff --git a/compiler/GHC/Cmm/Node.hs b/compiler/GHC/Cmm/Node.hs
index 40b268931d..841c726b14 100644
--- a/compiler/GHC/Cmm/Node.hs
+++ b/compiler/GHC/Cmm/Node.hs
@@ -74,7 +74,7 @@ data CmmNode e x where
CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
-- Assign to register
- CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
+ CmmStore :: !CmmExpr -> !CmmExpr -> !AlignmentSpec -> CmmNode O O
-- Assign to memory location. Size is
-- given by cmmExprType of the rhs.
@@ -322,7 +322,7 @@ instance UserOfRegs LocalReg (CmmNode e x) where
{-# INLINEABLE foldRegsUsed #-}
foldRegsUsed platform f !z n = case n of
CmmAssign _ expr -> fold f z expr
- CmmStore addr rval -> fold f (fold f z addr) rval
+ CmmStore addr rval _ -> fold f (fold f z addr) rval
CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
CmmCondBranch expr _ _ _ -> fold f z expr
CmmSwitch expr _ -> fold f z expr
@@ -337,7 +337,7 @@ instance UserOfRegs GlobalReg (CmmNode e x) where
{-# INLINEABLE foldRegsUsed #-}
foldRegsUsed platform f !z n = case n of
CmmAssign _ expr -> fold f z expr
- CmmStore addr rval -> fold f (fold f z addr) rval
+ CmmStore addr rval _ -> fold f (fold f z addr) rval
CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
CmmCondBranch expr _ _ _ -> fold f z expr
CmmSwitch expr _ -> fold f z expr
@@ -474,7 +474,7 @@ mapExp _ m@(CmmComment _) = m
mapExp _ m@(CmmTick _) = m
mapExp f (CmmUnwind regs) = CmmUnwind (map (fmap (fmap f)) regs)
mapExp f (CmmAssign r e) = CmmAssign r (f e)
-mapExp f (CmmStore addr e) = CmmStore (f addr) (f e)
+mapExp f (CmmStore addr e align) = CmmStore (f addr) (f e) align
mapExp f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as)
mapExp _ l@(CmmBranch _) = l
mapExp f (CmmCondBranch e ti fi l) = CmmCondBranch (f e) ti fi l
@@ -505,7 +505,7 @@ mapExpM _ (CmmComment _) = Nothing
mapExpM _ (CmmTick _) = Nothing
mapExpM f (CmmUnwind regs) = CmmUnwind `fmap` mapM (\(r,e) -> mapM f e >>= \e' -> pure (r,e')) regs
mapExpM f (CmmAssign r e) = CmmAssign r `fmap` f e
-mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e]
+mapExpM f (CmmStore addr e align) = (\[addr', e'] -> CmmStore addr' e' align) `fmap` mapListM f [addr, e]
mapExpM _ (CmmBranch _) = Nothing
mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e
mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e
@@ -558,7 +558,7 @@ foldExp _ (CmmComment {}) z = z
foldExp _ (CmmTick {}) z = z
foldExp f (CmmUnwind xs) z = foldr (maybe id f) z (map snd xs)
foldExp f (CmmAssign _ e) z = f e z
-foldExp f (CmmStore addr e) z = f addr $ f e z
+foldExp f (CmmStore addr e _) z = f addr $ f e z
foldExp f (CmmUnsafeForeignCall t _ as) z = foldr f (foldExpForeignTarget f t z) as
foldExp _ (CmmBranch _) z = z
foldExp f (CmmCondBranch e _ _ _) z = f e z
diff --git a/compiler/GHC/Cmm/Ppr.hs b/compiler/GHC/Cmm/Ppr.hs
index 455a7d639a..c7a1579962 100644
--- a/compiler/GHC/Cmm/Ppr.hs
+++ b/compiler/GHC/Cmm/Ppr.hs
@@ -215,8 +215,11 @@ pprNode platform node = pp_node <+> pp_debug
CmmAssign reg expr -> ppr reg <+> equals <+> pdoc platform expr <> semi
-- rep[lv] = expr;
- CmmStore lv expr -> rep <> brackets (pdoc platform lv) <+> equals <+> pdoc platform expr <> semi
+ CmmStore lv expr align -> rep <> align_mark <> brackets (pdoc platform lv) <+> equals <+> pdoc platform expr <> semi
where
+ align_mark = case align of
+ Unaligned -> text "^"
+ NaturallyAligned -> empty
rep = ppr ( cmmExprType platform expr )
-- call "ccall" foo(x, y)[r1, r2];
diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs
index 4d4cdb6d4c..84f9317f21 100644
--- a/compiler/GHC/Cmm/Sink.hs
+++ b/compiler/GHC/Cmm/Sink.hs
@@ -663,7 +663,7 @@ conflicts platform (r, rhs, addr) node
| foldRegsUsed platform (\b r' -> r == r' || b) False node = True
-- (3) a store to an address conflicts with a read of the same memory
- | CmmStore addr' e <- node
+ | CmmStore addr' e _ <- node
, memConflicts addr (loadAddr platform addr' (cmmExprWidth platform e)) = True
-- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively
diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs
index ddfcce8460..29ec7559f6 100644
--- a/compiler/GHC/CmmToAsm.hs
+++ b/compiler/GHC/CmmToAsm.hs
@@ -1023,10 +1023,10 @@ cmmStmtConFold stmt
CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop")
new_src -> CmmAssign reg new_src
- CmmStore addr src
+ CmmStore addr src align
-> do addr' <- cmmExprConFold DataReference addr
src' <- cmmExprConFold DataReference src
- return $ CmmStore addr' src'
+ return $ CmmStore addr' src' align
CmmCall { cml_target = addr }
-> do addr' <- cmmExprConFold JumpReference addr
diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
index f645720de7..507d5243b9 100644
--- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
@@ -290,7 +290,7 @@ stmtToInstrs bid stmt = do
where ty = cmmRegType platform reg
format = cmmTypeFormat ty
- CmmStore addr src
+ CmmStore addr src _alignment
| isFloatType ty -> assignMem_FltCode format addr src
| otherwise -> assignMem_IntCode format addr src
where ty = cmmExprType platform src
diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
index 430189d442..02308c59e0 100644
--- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
@@ -174,7 +174,7 @@ stmtToInstrs stmt = do
where ty = cmmRegType platform reg
format = cmmTypeFormat ty
- CmmStore addr src
+ CmmStore addr src _alignment
| isFloatType ty -> assignMem_FltCode format addr src
| target32Bit platform &&
isWord64 ty -> assignMem_I64Code addr src
diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
index 392d07c62e..bf799590cd 100644
--- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
@@ -338,7 +338,7 @@ stmtToInstrs bid stmt = do
where ty = cmmRegType platform reg
format = cmmTypeFormat ty
- CmmStore addr src
+ CmmStore addr src _alignment
| isFloatType ty -> assignMem_FltCode format addr src
| is32Bit && isWord64 ty -> assignMem_I64Code addr src
| otherwise -> assignMem_IntCode format addr src
diff --git a/compiler/GHC/CmmToC.hs b/compiler/GHC/CmmToC.hs
index 65077dcc0b..9539208883 100644
--- a/compiler/GHC/CmmToC.hs
+++ b/compiler/GHC/CmmToC.hs
@@ -214,15 +214,14 @@ pprStmt platform stmt =
CmmAssign dest src -> pprAssign platform dest src
- CmmStore dest src
+ CmmStore dest src align
| typeWidth rep == W64 && wordWidth platform /= W64
-> (if isFloatType rep then text "ASSIGN_DBL"
else text "ASSIGN_Word64") <>
parens (mkP_ <> pprExpr1 platform dest <> comma <> pprExpr platform src) <> semi
| otherwise
- -> hsep [ pprExpr platform (CmmLoad dest rep NaturallyAligned), equals, pprExpr platform src <> semi ]
- -- TODO: Is this right?
+ -> hsep [ pprExpr platform (CmmLoad dest rep align), equals, pprExpr platform src <> semi ]
where
rep = cmmExprType platform src
@@ -1271,7 +1270,7 @@ te_Lit _ = return ()
te_Stmt :: CmmNode e x -> TE ()
te_Stmt (CmmAssign r e) = te_Reg r >> te_Expr e
-te_Stmt (CmmStore l r) = te_Expr l >> te_Expr r
+te_Stmt (CmmStore l r _) = te_Expr l >> te_Expr r
te_Stmt (CmmUnsafeForeignCall target rs es)
= do te_Target target
mapM_ te_temp rs
diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs
index 49359939ea..08b1478331 100644
--- a/compiler/GHC/CmmToLlvm/CodeGen.hs
+++ b/compiler/GHC/CmmToLlvm/CodeGen.hs
@@ -126,7 +126,8 @@ stmtToInstrs stmt = case stmt of
CmmUnwind {} -> return (nilOL, [])
CmmAssign reg src -> genAssign reg src
- CmmStore addr src -> genStore addr src
+ CmmStore addr src align
+ -> genStore addr src align
CmmBranch id -> genBranch id
CmmCondBranch arg true false likely
@@ -207,7 +208,7 @@ genCall (PrimTarget (MO_UF_Conv w)) [dst] [e] = runStmtsDecls $ do
castV <- lift $ mkLocalVar ty
ve <- exprToVarW e
statement $ Assignment castV $ Cast LM_Uitofp ve width
- statement $ Store castV dstV
+ statement $ Store castV dstV Nothing
genCall (PrimTarget (MO_UF_Conv _)) [_] args =
panic $ "genCall: Too many arguments to MO_UF_Conv. " ++
@@ -263,12 +264,12 @@ genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $
AMO_Or -> LAO_Or
AMO_Xor -> LAO_Xor
retVar <- doExprW targetTy $ AtomicRMW op ptrVar nVar SyncSeqCst
- statement $ Store retVar dstVar
+ statement $ Store retVar dstVar Nothing
genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = runStmtsDecls $ do
dstV <- getCmmRegW (CmmLocal dst)
v1 <- genLoadW True addr (localRegType dst) NaturallyAligned
- statement $ Store v1 dstV
+ statement $ Store v1 dstV Nothing
genCall (PrimTarget (MO_Cmpxchg _width))
[dst] [addr, old, new] = runStmtsDecls $ do
@@ -282,7 +283,7 @@ genCall (PrimTarget (MO_Cmpxchg _width))
retVar <- doExprW (LMStructU [targetTy,i1])
$ CmpXChg ptrVar oldVar newVar SyncSeqCst SyncSeqCst
retVar' <- doExprW targetTy $ ExtractV retVar 0
- statement $ Store retVar' dstVar
+ statement $ Store retVar' dstVar Nothing
genCall (PrimTarget (MO_Xchg _width)) [dst] [addr, val] = runStmtsDecls $ do
dstV <- getCmmRegW (CmmLocal dst) :: WriterT LlvmAccum LlvmM LlvmVar
@@ -292,7 +293,7 @@ genCall (PrimTarget (MO_Xchg _width)) [dst] [addr, val] = runStmtsDecls $ do
ptrExpr = Cast LM_Inttoptr addrVar ptrTy
ptrVar <- doExprW ptrTy ptrExpr
resVar <- doExprW (getVarType valVar) (AtomicRMW LAO_Xchg ptrVar valVar SyncSeqCst)
- statement $ Store resVar dstV
+ statement $ Store resVar dstV Nothing
genCall (PrimTarget (MO_AtomicWrite _width)) [] [addr, val] = runStmtsDecls $ do
addrVar <- exprToVarW addr
@@ -352,8 +353,8 @@ genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = runStmtsDecls $ do
retH <- doExprW width $ Cast LM_Trunc retShifted width
dstRegL <- getCmmRegW (CmmLocal dstL)
dstRegH <- getCmmRegW (CmmLocal dstH)
- statement $ Store retL dstRegL
- statement $ Store retH dstRegH
+ statement $ Store retL dstRegL Nothing
+ statement $ Store retH dstRegH Nothing
genCall (PrimTarget (MO_S_Mul2 w)) [dstC, dstH, dstL] [lhs, rhs] = runStmtsDecls $ do
let width = widthToLlvmInt w
@@ -384,9 +385,9 @@ genCall (PrimTarget (MO_S_Mul2 w)) [dstC, dstH, dstL] [lhs, rhs] = runStmtsDecls
dstRegL <- getCmmRegW (CmmLocal dstL)
dstRegH <- getCmmRegW (CmmLocal dstH)
dstRegC <- getCmmRegW (CmmLocal dstC)
- statement $ Store retL dstRegL
- statement $ Store retH dstRegH
- statement $ Store retC dstRegC
+ statement $ Store retL dstRegL Nothing
+ statement $ Store retH dstRegH Nothing
+ statement $ Store retC dstRegC Nothing
-- MO_U_QuotRem2 is another case we handle by widening the registers to double
-- the width and use normal LLVM instructions (similarly to the MO_U_Mul2). The
@@ -420,8 +421,8 @@ genCall (PrimTarget (MO_U_QuotRem2 w))
retRem <- narrow retExtRem
dstRegQ <- lift $ getCmmReg (CmmLocal dstQ)
dstRegR <- lift $ getCmmReg (CmmLocal dstR)
- statement $ Store retDiv dstRegQ
- statement $ Store retRem dstRegR
+ statement $ Store retDiv dstRegQ Nothing
+ statement $ Store retRem dstRegR Nothing
-- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from
-- which we need to extract the actual values.
@@ -528,7 +529,7 @@ genCall target res args = do
vreg <- getCmmRegW (CmmLocal creg)
if retTy == pLower (getVarType vreg)
then do
- statement $ Store v1 vreg
+ statement $ Store v1 vreg Nothing
doReturn
else do
let ty = pLower $ getVarType vreg
@@ -540,7 +541,7 @@ genCall target res args = do
++ " returned type!"
v2 <- doExprW ty $ Cast op v1 ty
- statement $ Store v2 vreg
+ statement $ Store v2 vreg Nothing
doReturn
-- | Generate a call to an LLVM intrinsic that performs arithmetic operation
@@ -569,8 +570,8 @@ genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do
(overflow, zext) <- doExpr width $ Cast LM_Zext overflowBit width
dstRegV <- getCmmReg (CmmLocal dstV)
dstRegO <- getCmmReg (CmmLocal dstO)
- let storeV = Store value dstRegV
- storeO = Store overflow dstRegO
+ let storeV = Store value dstRegV Nothing
+ storeO = Store overflow dstRegO Nothing
return (stmts `snocOL` zext `snocOL` storeV `snocOL` storeO, top)
genCallWithOverflow _ _ _ _ =
panic "genCallExtract: wrong ForeignTarget or number of arguments"
@@ -635,7 +636,7 @@ genCallSimpleCast w t@(PrimTarget op) [dst] args = do
(retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
(retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
let retV' = singletonPanic "genCallSimpleCast" retVs'
- let s2 = Store retV' dstV
+ let s2 = Store retV' dstV Nothing
let stmts = stmts2 `appOL` stmts4 `snocOL`
s1 `appOL` stmts5 `snocOL` s2
@@ -667,7 +668,7 @@ genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do
(retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
(retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
let retV' = singletonPanic "genCallSimpleCast2" retVs'
- let s2 = Store retV' dstV
+ let s2 = Store retV' dstV Nothing
let stmts = stmts2 `appOL` stmts4 `snocOL`
s1 `appOL` stmts5 `snocOL` s2
@@ -1097,54 +1098,54 @@ genAssign reg val = do
-- Some registers are pointer types, so need to cast value to pointer
LMPointer _ | getVarType vval == llvmWord platform -> do
(v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
- let s2 = Store v vreg
+ let s2 = Store v vreg Nothing
return (stmts `snocOL` s1 `snocOL` s2, top2)
LMVector _ _ -> do
(v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty
- let s2 = Store v vreg
+ let s2 = mkStore v vreg NaturallyAligned
return (stmts `snocOL` s1 `snocOL` s2, top2)
_ -> do
- let s1 = Store vval vreg
+ let s1 = Store vval vreg Nothing
return (stmts `snocOL` s1, top2)
-- | CmmStore operation
-genStore :: CmmExpr -> CmmExpr -> LlvmM StmtData
+genStore :: CmmExpr -> CmmExpr -> AlignmentSpec -> LlvmM StmtData
-- First we try to detect a few common cases and produce better code for
-- these then the default case. We are mostly trying to detect Cmm code
-- like I32[Sp + n] and use 'getelementptr' operations instead of the
-- generic case that uses casts and pointer arithmetic
-genStore addr@(CmmReg (CmmGlobal r)) val
- = genStore_fast addr r 0 val
+genStore addr@(CmmReg (CmmGlobal r)) val alignment
+ = genStore_fast addr r 0 val alignment
-genStore addr@(CmmRegOff (CmmGlobal r) n) val
- = genStore_fast addr r n val
+genStore addr@(CmmRegOff (CmmGlobal r) n) val alignment
+ = genStore_fast addr r n val alignment
genStore addr@(CmmMachOp (MO_Add _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
- val
- = genStore_fast addr r (fromInteger n) val
+ val alignment
+ = genStore_fast addr r (fromInteger n) val alignment
genStore addr@(CmmMachOp (MO_Sub _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
- val
- = genStore_fast addr r (negate $ fromInteger n) val
+ val alignment
+ = genStore_fast addr r (negate $ fromInteger n) val alignment
-- generic case
-genStore addr val
- = getTBAAMeta topN >>= genStore_slow addr val
+genStore addr val alignment
+ = getTBAAMeta topN >>= genStore_slow addr val alignment
-- | CmmStore operation
-- This is a special case for storing to a global register pointer
-- offset such as I32[Sp+8].
-genStore_fast :: CmmExpr -> GlobalReg -> Int -> CmmExpr
+genStore_fast :: CmmExpr -> GlobalReg -> Int -> CmmExpr -> AlignmentSpec
-> LlvmM StmtData
-genStore_fast addr r n val
+genStore_fast addr r n val alignment
= do platform <- getPlatform
(gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
meta <- getTBAARegMeta r
@@ -1157,7 +1158,7 @@ genStore_fast addr r n val
case pLower grt == getVarType vval of
-- were fine
True -> do
- let s3 = MetaStmt meta $ Store vval ptr
+ let s3 = MetaStmt meta $ mkStore vval ptr alignment
return (stmts `appOL` s1 `snocOL` s2
`snocOL` s3, top)
@@ -1165,19 +1166,19 @@ genStore_fast addr r n val
False -> do
let ty = (pLift . getVarType) vval
(ptr', s3) <- doExpr ty $ Cast LM_Bitcast ptr ty
- let s4 = MetaStmt meta $ Store vval ptr'
+ let s4 = MetaStmt meta $ mkStore vval ptr' alignment
return (stmts `appOL` s1 `snocOL` s2
`snocOL` s3 `snocOL` s4, top)
-- If its a bit type then we use the slow method since
-- we can't avoid casting anyway.
- False -> genStore_slow addr val meta
+ False -> genStore_slow addr val alignment meta
-- | CmmStore operation
-- Generic case. Uses casts and pointer arithmetic if needed.
-genStore_slow :: CmmExpr -> CmmExpr -> [MetaAnnot] -> LlvmM StmtData
-genStore_slow addr val meta = do
+genStore_slow :: CmmExpr -> CmmExpr -> AlignmentSpec -> [MetaAnnot] -> LlvmM StmtData
+genStore_slow addr val alignment meta = do
(vaddr, stmts1, top1) <- exprToVar addr
(vval, stmts2, top2) <- exprToVar val
@@ -1188,17 +1189,17 @@ genStore_slow addr val meta = do
-- sometimes we need to cast an int to a pointer before storing
LMPointer ty@(LMPointer _) | getVarType vval == llvmWord platform -> do
(v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
- let s2 = MetaStmt meta $ Store v vaddr
+ let s2 = MetaStmt meta $ mkStore v vaddr alignment
return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
LMPointer _ -> do
- let s1 = MetaStmt meta $ Store vval vaddr
+ let s1 = MetaStmt meta $ mkStore vval vaddr alignment
return (stmts `snocOL` s1, top1 ++ top2)
i@(LMInt _) | i == llvmWord platform -> do
let vty = pLift $ getVarType vval
(vptr, s1) <- doExpr vty $ Cast LM_Inttoptr vaddr vty
- let s2 = MetaStmt meta $ Store vval vptr
+ let s2 = MetaStmt meta $ mkStore vval vptr alignment
return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
other ->
@@ -1208,6 +1209,16 @@ genStore_slow addr val meta = do
", Size of var: " ++ show (llvmWidthInBits platform other) ++
", Var: " ++ renderWithContext (llvmCgContext cfg) (ppVar cfg vaddr)))
+mkStore :: LlvmVar -> LlvmVar -> AlignmentSpec -> LlvmStatement
+mkStore vval vptr alignment =
+ Store vval vptr align
+ where
+ is_vector = isVector (pLower (getVarType vptr))
+ align = case alignment of
+ -- See Note [Alignment of vector-typed values]
+ _ | is_vector -> Just 1
+ Unaligned -> Just 1
+ NaturallyAligned -> Nothing
-- | Unconditional branch
genBranch :: BlockId -> LlvmM StmtData
@@ -2064,7 +2075,7 @@ funPrologue live cmmBlocks = do
rval = if isLive r then arg else trash
alloc = Assignment reg $ Alloca (pLower $ getVarType reg) 1
markStackReg r
- return $ toOL [alloc, Store rval reg]
+ return $ toOL [alloc, Store rval reg Nothing]
return (concatOL stmtss `snocOL` jumpToEntry, [])
where
diff --git a/compiler/GHC/Driver/GenerateCgIPEStub.hs b/compiler/GHC/Driver/GenerateCgIPEStub.hs
index ca609e9fcf..e1a751e762 100644
--- a/compiler/GHC/Driver/GenerateCgIPEStub.hs
+++ b/compiler/GHC/Driver/GenerateCgIPEStub.hs
@@ -261,7 +261,7 @@ generateCgIPEStub hsc_env this_mod denv s = do
where
find :: CLabel -> [CmmNode O O] -> Maybe IpeSourceLocation -> Maybe IpeSourceLocation
find label (b : blocks) lastTick = case b of
- (CmmStore _ (CmmLit (CmmLabel l))) -> if label == l then lastTick else find label blocks lastTick
+ (CmmStore _ (CmmLit (CmmLabel l)) _) -> if label == l then lastTick else find label blocks lastTick
(CmmTick (SourceNote span name)) -> find label blocks $ Just (span, name)
_ -> find label blocks lastTick
find _ [] _ = Nothing
diff --git a/compiler/GHC/Llvm/Ppr.hs b/compiler/GHC/Llvm/Ppr.hs
index 405296f79f..f02d2e1024 100644
--- a/compiler/GHC/Llvm/Ppr.hs
+++ b/compiler/GHC/Llvm/Ppr.hs
@@ -219,7 +219,8 @@ ppLlvmStatement opts stmt =
BranchIf cond ifT ifF -> ind $ ppBranchIf opts cond ifT ifF
Comment comments -> ind $ ppLlvmComments comments
MkLabel label -> ppLlvmBlockLabel label
- Store value ptr -> ind $ ppStore opts value ptr
+ Store value ptr align
+ -> ind $ ppStore opts value ptr align
Switch scrut def tgs -> ind $ ppSwitch opts scrut def tgs
Return result -> ind $ ppReturn opts result
Expr expr -> ind $ ppLlvmExpression opts expr
@@ -386,14 +387,14 @@ ppALoad opts ord st var =
in text "load atomic" <+> ppr derefType <> comma <+> ppVar opts var <> sThreaded
<+> ppSyncOrdering ord <> align
-ppStore :: LlvmCgConfig -> LlvmVar -> LlvmVar -> SDoc
-ppStore opts val dst
- | isVecPtrVar dst = text "store" <+> ppVar opts val <> comma <+> ppVar opts dst <>
- comma <+> text "align 1"
- | otherwise = text "store" <+> ppVar opts val <> comma <+> ppVar opts dst
+ppStore :: LlvmCgConfig -> LlvmVar -> LlvmVar -> LMAlign -> SDoc
+ppStore opts val dst alignment =
+ text "store" <+> ppVar opts val <> comma <+> ppVar opts dst <> align
where
- isVecPtrVar :: LlvmVar -> Bool
- isVecPtrVar = isVector . pLower . getVarType
+ align =
+ case alignment of
+ Just n -> text ", align" <+> ppr n
+ Nothing -> empty
ppCast :: LlvmCgConfig -> LlvmCastOp -> LlvmVar -> LlvmType -> SDoc
diff --git a/compiler/GHC/Llvm/Syntax.hs b/compiler/GHC/Llvm/Syntax.hs
index befac77734..115f5b58c3 100644
--- a/compiler/GHC/Llvm/Syntax.hs
+++ b/compiler/GHC/Llvm/Syntax.hs
@@ -150,7 +150,7 @@ data LlvmStatement
* value: Variable/Constant to store.
* ptr: Location to store the value in
-}
- | Store LlvmVar LlvmVar
+ | Store LlvmVar LlvmVar LMAlign
{- |
Multiway branch
diff --git a/compiler/GHC/StgToCmm/CgUtils.hs b/compiler/GHC/StgToCmm/CgUtils.hs
index bc42a11d1c..4718cbf74a 100644
--- a/compiler/GHC/StgToCmm/CgUtils.hs
+++ b/compiler/GHC/StgToCmm/CgUtils.hs
@@ -152,7 +152,7 @@ fixStgRegStmt platform stmt = fixAssign $ mapExpDeep fixExpr stmt
let baseAddr = get_GlobalReg_addr platform reg
in case reg `elem` activeStgRegs platform of
True -> CmmAssign (CmmGlobal reg) src
- False -> CmmStore baseAddr src
+ False -> CmmStore baseAddr src NaturallyAligned
other_stmt -> other_stmt
fixExpr expr = case expr of
diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs
index 5ab12a4634..04654d40d3 100644
--- a/compiler/GHC/StgToCmm/Monad.hs
+++ b/compiler/GHC/StgToCmm/Monad.hs
@@ -724,8 +724,9 @@ emitUnwind regs = do
emitAssign :: CmmReg -> CmmExpr -> FCode ()
emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r))
+-- | Assumes natural alignment.
emitStore :: CmmExpr -> CmmExpr -> FCode ()
-emitStore l r = emitCgStmt (CgStmt (CmmStore l r))
+emitStore l r = emitCgStmt (CgStmt (CmmStore l r NaturallyAligned))
emit :: CmmAGraph -> FCode ()
emit ag