summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-01-27 13:28:32 -0500
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-02-04 16:14:47 -0500
commit7217156c40240c0aed5ffd83ead0fe4ba0484c75 (patch)
tree1cd6f31d82ba304695189c52939a64a5d9c9f150
parent58d7faacafc975d522cbc9f56a7db1e46b37d4a1 (diff)
downloadhaskell-7217156c40240c0aed5ffd83ead0fe4ba0484c75.tar.gz
Introduce alignment in CmmLoad
-rw-r--r--compiler/GHC/Cmm/CommonBlockElim.hs4
-rw-r--r--compiler/GHC/Cmm/DebugBlock.hs2
-rw-r--r--compiler/GHC/Cmm/Expr.hs17
-rw-r--r--compiler/GHC/Cmm/Graph.hs4
-rw-r--r--compiler/GHC/Cmm/Info.hs8
-rw-r--r--compiler/GHC/Cmm/LayoutStack.hs5
-rw-r--r--compiler/GHC/Cmm/Lint.hs2
-rw-r--r--compiler/GHC/Cmm/Node.hs18
-rw-r--r--compiler/GHC/Cmm/Parser.y5
-rw-r--r--compiler/GHC/Cmm/Ppr/Expr.hs7
-rw-r--r--compiler/GHC/Cmm/Sink.hs12
-rw-r--r--compiler/GHC/Cmm/Utils.hs20
-rw-r--r--compiler/GHC/CmmToAsm.hs6
-rw-r--r--compiler/GHC/CmmToAsm/AArch64/CodeGen.hs2
-rw-r--r--compiler/GHC/CmmToAsm/PPC/CodeGen.hs24
-rw-r--r--compiler/GHC/CmmToAsm/X86/CodeGen.hs48
-rw-r--r--compiler/GHC/CmmToC.hs18
-rw-r--r--compiler/GHC/CmmToLlvm/CodeGen.hs82
-rw-r--r--compiler/GHC/Llvm/Ppr.hs20
-rw-r--r--compiler/GHC/Llvm/Syntax.hs2
-rw-r--r--compiler/GHC/StgToCmm/CgUtils.hs2
-rw-r--r--compiler/GHC/StgToCmm/Foreign.hs21
-rw-r--r--compiler/GHC/StgToCmm/Hpc.hs2
-rw-r--r--compiler/GHC/StgToCmm/Prim.hs2
-rw-r--r--compiler/GHC/StgToCmm/Prof.hs5
-rw-r--r--compiler/GHC/StgToCmm/Utils.hs13
26 files changed, 193 insertions, 158 deletions
diff --git a/compiler/GHC/Cmm/CommonBlockElim.hs b/compiler/GHC/Cmm/CommonBlockElim.hs
index 10eb9ccb3d..b18730ed79 100644
--- a/compiler/GHC/Cmm/CommonBlockElim.hs
+++ b/compiler/GHC/Cmm/CommonBlockElim.hs
@@ -159,7 +159,7 @@ hash_block block =
hash_e :: CmmExpr -> Word32
hash_e (CmmLit l) = hash_lit l
- hash_e (CmmLoad e _) = 67 + hash_e e
+ hash_e (CmmLoad e _ _) = 67 + hash_e e
hash_e (CmmReg r) = hash_reg r
hash_e (CmmMachOp _ es) = hash_list hash_e es -- pessimal - no operator check
hash_e (CmmRegOff r i) = hash_reg r + cvt i
@@ -222,7 +222,7 @@ eqExprWith :: (BlockId -> BlockId -> Bool)
eqExprWith eqBid = eq
where
CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2
- CmmLoad e1 t1 `eq` CmmLoad e2 t2 = t1 `cmmEqType` t2 && e1 `eq` e2
+ CmmLoad e1 t1 a1 `eq` CmmLoad e2 t2 a2 = t1 `cmmEqType` t2 && e1 `eq` e2 && a1==a2
CmmReg r1 `eq` CmmReg r2 = r1==r2
CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1==r2 && i1==i2
CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2
diff --git a/compiler/GHC/Cmm/DebugBlock.hs b/compiler/GHC/Cmm/DebugBlock.hs
index e6d1f2735e..06c1f9aace 100644
--- a/compiler/GHC/Cmm/DebugBlock.hs
+++ b/compiler/GHC/Cmm/DebugBlock.hs
@@ -546,7 +546,7 @@ toUnwindExpr _ (CmmLit (CmmInt i _)) = UwConst (fromIntegral i)
toUnwindExpr _ (CmmLit (CmmLabel l)) = UwLabel l
toUnwindExpr _ (CmmRegOff (CmmGlobal g) i) = UwReg g i
toUnwindExpr _ (CmmReg (CmmGlobal g)) = UwReg g 0
-toUnwindExpr platform (CmmLoad e _) = UwDeref (toUnwindExpr platform e)
+toUnwindExpr platform (CmmLoad e _ _) = UwDeref (toUnwindExpr platform e)
toUnwindExpr platform e@(CmmMachOp op [e1, e2]) =
case (op, toUnwindExpr platform e1, toUnwindExpr platform e2) of
(MO_Add{}, UwReg r x, UwConst y) -> UwReg r (x + y)
diff --git a/compiler/GHC/Cmm/Expr.hs b/compiler/GHC/Cmm/Expr.hs
index f63ef62dab..f910e65f04 100644
--- a/compiler/GHC/Cmm/Expr.hs
+++ b/compiler/GHC/Cmm/Expr.hs
@@ -9,6 +9,7 @@ module GHC.Cmm.Expr
( CmmExpr(..), cmmExprType, cmmExprWidth, cmmExprAlignment, maybeInvertCmmExpr
, CmmReg(..), cmmRegType, cmmRegWidth
, CmmLit(..), cmmLitType
+ , AlignmentSpec(..)
, LocalReg(..), localRegType
, GlobalReg(..), isArgReg, globalRegType
, spReg, hpReg, spLimReg, hpLimReg, nodeReg
@@ -53,12 +54,13 @@ import GHC.Types.Basic (Alignment, mkAlignment, alignmentOf)
-----------------------------------------------------------------------------
data CmmExpr
- = CmmLit !CmmLit -- Literal
- | CmmLoad !CmmExpr !CmmType -- Read memory location
+ = CmmLit !CmmLit -- Literal
+ | CmmLoad !CmmExpr !CmmType !AlignmentSpec
+ -- Read memory location
| CmmReg !CmmReg -- Contents of register
| CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
| CmmStackSlot Area {-# UNPACK #-} !Int
- -- addressing expression of a stack slot
+ -- Addressing expression of a stack slot
-- See Note [CmmStackSlot aliasing]
| CmmRegOff !CmmReg !Int
-- CmmRegOff reg i
@@ -69,13 +71,16 @@ data CmmExpr
instance Eq CmmExpr where -- Equality ignores the types
CmmLit l1 == CmmLit l2 = l1==l2
- CmmLoad e1 _ == CmmLoad e2 _ = e1==e2
+ CmmLoad e1 _ _ == CmmLoad e2 _ _ = e1==e2
CmmReg r1 == CmmReg r2 = r1==r2
CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2
CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2
CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2
_e1 == _e2 = False
+data AlignmentSpec = NaturallyAligned | Unaligned
+ deriving (Eq, Ord, Show)
+
data CmmReg
= CmmLocal {-# UNPACK #-} !LocalReg
| CmmGlobal GlobalReg
@@ -225,7 +230,7 @@ instance Outputable CmmLit where
cmmExprType :: Platform -> CmmExpr -> CmmType
cmmExprType platform = \case
(CmmLit lit) -> cmmLitType platform lit
- (CmmLoad _ rep) -> rep
+ (CmmLoad _ rep _) -> rep
(CmmReg reg) -> cmmRegType platform reg
(CmmMachOp op args) -> machOpResultType platform op (map (cmmExprType platform) args)
(CmmRegOff reg _) -> cmmRegType platform reg
@@ -385,7 +390,7 @@ instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r CmmExpr where
{-# INLINEABLE foldRegsUsed #-}
foldRegsUsed platform f !z e = expr z e
where expr z (CmmLit _) = z
- expr z (CmmLoad addr _) = foldRegsUsed platform f z addr
+ expr z (CmmLoad addr _ _) = foldRegsUsed platform f z addr
expr z (CmmReg r) = foldRegsUsed platform f z r
expr z (CmmMachOp _ exprs) = foldRegsUsed platform f z exprs
expr z (CmmRegOff r _) = foldRegsUsed platform f z r
diff --git a/compiler/GHC/Cmm/Graph.hs b/compiler/GHC/Cmm/Graph.hs
index ff9391a7fe..6761821951 100644
--- a/compiler/GHC/Cmm/Graph.hs
+++ b/compiler/GHC/Cmm/Graph.hs
@@ -333,14 +333,14 @@ copyIn profile conv area formals extra_stk
| isBitsType $ localRegType reg
, typeWidth (localRegType reg) < wordWidth platform =
let
- stack_slot = (CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth platform))
+ stack_slot = CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth platform) NaturallyAligned
local = CmmLocal reg
width = cmmRegWidth platform local
expr = CmmMachOp (MO_XX_Conv (wordWidth platform) width) [stack_slot]
in CmmAssign local expr
| otherwise =
- CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty)
+ CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty NaturallyAligned)
where ty = localRegType reg
init_offset = widthInBytes (wordWidth platform) -- infotable
diff --git a/compiler/GHC/Cmm/Info.hs b/compiler/GHC/Cmm/Info.hs
index db6d92ced6..71c8ddfb42 100644
--- a/compiler/GHC/Cmm/Info.hs
+++ b/compiler/GHC/Cmm/Info.hs
@@ -498,24 +498,24 @@ infoTableConstrTag = infoTableSrtBitmap
-- field of the info table
infoTableSrtBitmap :: Profile -> CmmExpr -> CmmExpr
infoTableSrtBitmap profile info_tbl
- = CmmLoad (cmmOffsetB platform info_tbl (stdSrtBitmapOffset profile)) (bHalfWord platform)
+ = CmmLoad (cmmOffsetB platform info_tbl (stdSrtBitmapOffset profile)) (bHalfWord platform) NaturallyAligned
where platform = profilePlatform profile
-- | Takes an info table pointer (from infoTable) and returns the closure type
-- field of the info table.
infoTableClosureType :: Profile -> CmmExpr -> CmmExpr
infoTableClosureType profile info_tbl
- = CmmLoad (cmmOffsetB platform info_tbl (stdClosureTypeOffset profile)) (bHalfWord platform)
+ = CmmLoad (cmmOffsetB platform info_tbl (stdClosureTypeOffset profile)) (bHalfWord platform) NaturallyAligned
where platform = profilePlatform profile
infoTablePtrs :: Profile -> CmmExpr -> CmmExpr
infoTablePtrs profile info_tbl
- = CmmLoad (cmmOffsetB platform info_tbl (stdPtrsOffset profile)) (bHalfWord platform)
+ = CmmLoad (cmmOffsetB platform info_tbl (stdPtrsOffset profile)) (bHalfWord platform) NaturallyAligned
where platform = profilePlatform profile
infoTableNonPtrs :: Profile -> CmmExpr -> CmmExpr
infoTableNonPtrs profile info_tbl
- = CmmLoad (cmmOffsetB platform info_tbl (stdNonPtrsOffset profile)) (bHalfWord platform)
+ = CmmLoad (cmmOffsetB platform info_tbl (stdNonPtrsOffset profile)) (bHalfWord platform) NaturallyAligned
where platform = profilePlatform profile
-- | Takes the info pointer of a function, and returns a pointer to the first
diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs
index f23af80d7e..0a01081634 100644
--- a/compiler/GHC/Cmm/LayoutStack.hs
+++ b/compiler/GHC/Cmm/LayoutStack.hs
@@ -397,7 +397,7 @@ collectContInfo blocks
procMiddle :: LabelMap StackMap -> CmmNode e x -> StackMap -> StackMap
procMiddle stackmaps node sm
= case node of
- CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
+ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _ _)
-> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
where loc = getStackLoc area off stackmaps
CmmAssign (CmmLocal r) _other
@@ -1085,7 +1085,8 @@ insertReloads platform stackmap live =
-- This cmmOffset basically corresponds to manifesting
-- @CmmStackSlot Old sp_off@, see Note [SP old/young offsets]
(CmmLoad (cmmOffset platform spExpr (sp_off - reg_off))
- (localRegType reg))
+ (localRegType reg)
+ NaturallyAligned)
| (reg, reg_off) <- stackSlotRegs stackmap
, reg `elemRegSet` live
]
diff --git a/compiler/GHC/Cmm/Lint.hs b/compiler/GHC/Cmm/Lint.hs
index 82d7b56d14..2c3c605240 100644
--- a/compiler/GHC/Cmm/Lint.hs
+++ b/compiler/GHC/Cmm/Lint.hs
@@ -88,7 +88,7 @@ lintCmmBlock labels block
-- byte/word mismatches.
lintCmmExpr :: CmmExpr -> CmmLint CmmType
-lintCmmExpr (CmmLoad expr rep) = do
+lintCmmExpr (CmmLoad expr rep _alignment) = do
_ <- lintCmmExpr expr
-- Disabled, if we have the inlining phase before the lint phase,
-- we can have funny offsets due to pointer tagging. -- EZY
diff --git a/compiler/GHC/Cmm/Node.hs b/compiler/GHC/Cmm/Node.hs
index d7d35a8bfc..40b268931d 100644
--- a/compiler/GHC/Cmm/Node.hs
+++ b/compiler/GHC/Cmm/Node.hs
@@ -464,9 +464,9 @@ wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
-- Take a transformer on expressions and apply it recursively.
-- (wrapRecExp f e) first recursively applies itself to sub-expressions of e
-- then uses f to rewrite the resulting expression
-wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es)
-wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (wrapRecExp f addr) ty)
-wrapRecExp f e = f e
+wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es)
+wrapRecExp f (CmmLoad addr ty align) = f (CmmLoad (wrapRecExp f addr) ty align)
+wrapRecExp f e = f e
mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
mapExp _ f@(CmmEntry{}) = f
@@ -495,9 +495,9 @@ mapForeignTargetM _ (PrimTarget _) = Nothing
wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr)
-- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e
-- then gives f a chance to rewrite the resulting expression
-wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es)
-wrapRecExpM f n@(CmmLoad addr ty) = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr)
-wrapRecExpM f e = f e
+wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es)
+wrapRecExpM f n@(CmmLoad addr ty align) = maybe (f n) (\addr' -> f $ CmmLoad addr' ty align) (wrapRecExpM f addr)
+wrapRecExpM f e = f e
mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
mapExpM _ (CmmEntry{}) = Nothing
@@ -548,9 +548,9 @@ foldExpForeignTarget _ (PrimTarget _) z = z
-- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad
-- itself, delegating all the other CmmExpr forms to 'f'.
wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z
-wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
-wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z)
-wrapRecExpf f e z = f e z
+wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
+wrapRecExpf f e@(CmmLoad addr _ _) z = wrapRecExpf f addr (f e z)
+wrapRecExpf f e z = f e z
foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
foldExp _ (CmmEntry {}) z = z
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
index 065737922f..5a855ba069 100644
--- a/compiler/GHC/Cmm/Parser.y
+++ b/compiler/GHC/Cmm/Parser.y
@@ -794,10 +794,13 @@ expr0 :: { CmmParse CmmExpr }
| STRING { do s <- code (newStringCLit $1);
return (CmmLit s) }
| reg { $1 }
- | type '[' expr ']' { do e <- $3; return (CmmLoad e $1) }
+ | type dereference { do (align, ptr) <- $2; return (CmmLoad ptr $1 align) }
| '%' NAME '(' exprs0 ')' {% exprOp $2 $4 }
| '(' expr ')' { $2 }
+dereference :: { CmmParse (AlignmentSpec, CmmExpr) }
+ : '^' '[' expr ']' { do ptr <- $3; return (Unaligned, ptr) }
+ | '[' expr ']' { do ptr <- $2; return (NaturallyAligned, ptr) }
-- leaving out the type of a literal gives you the native word size in C--
maybe_ty :: { CmmType }
diff --git a/compiler/GHC/Cmm/Ppr/Expr.hs b/compiler/GHC/Cmm/Ppr/Expr.hs
index 7828daf803..3ef4b07af5 100644
--- a/compiler/GHC/Cmm/Ppr/Expr.hs
+++ b/compiler/GHC/Cmm/Ppr/Expr.hs
@@ -149,7 +149,12 @@ pprExpr9 :: Platform -> CmmExpr -> SDoc
pprExpr9 platform e =
case e of
CmmLit lit -> pprLit1 platform lit
- CmmLoad expr rep -> ppr rep <> brackets (pdoc platform expr)
+ CmmLoad expr rep align
+ -> let align_mark =
+ case align of
+ NaturallyAligned -> empty
+ Unaligned -> text "^"
+ in ppr rep <> align_mark <> brackets (pdoc platform expr)
CmmReg reg -> ppr reg
CmmRegOff reg off -> parens (ppr reg <+> char '+' <+> int off)
CmmStackSlot a off -> parens (ppr a <+> char '+' <+> int off)
diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs
index 0f3d979716..4d4cdb6d4c 100644
--- a/compiler/GHC/Cmm/Sink.hs
+++ b/compiler/GHC/Cmm/Sink.hs
@@ -632,9 +632,9 @@ regsUsedIn ls e = go ls e False
use _ls _ z = z
go :: LRegSet -> CmmExpr -> Bool -> Bool
- go ls (CmmMachOp _ es) z = foldr (go ls) z es
- go ls (CmmLoad addr _) z = go ls addr z
- go ls e z = use ls e z
+ go ls (CmmMachOp _ es) z = foldr (go ls) z es
+ go ls (CmmLoad addr _ _) z = go ls addr z
+ go ls e z = use ls e z
-- we don't inline into CmmUnsafeForeignCall if the expression refers
-- to global registers. This is a HACK to avoid global registers
@@ -845,9 +845,9 @@ memConflicts (SpMem o1 w1) (SpMem o2 w2)
memConflicts _ _ = True
exprMem :: Platform -> CmmExpr -> AbsMem
-exprMem platform (CmmLoad addr w) = bothMems (loadAddr platform addr (typeWidth w)) (exprMem platform addr)
-exprMem platform (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem platform) es)
-exprMem _ _ = NoMem
+exprMem platform (CmmLoad addr w _) = bothMems (loadAddr platform addr (typeWidth w)) (exprMem platform addr)
+exprMem platform (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem platform) es)
+exprMem _ _ = NoMem
loadAddr :: Platform -> CmmExpr -> Width -> AbsMem
loadAddr platform e w =
diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs
index b541d7a95c..9e9566b334 100644
--- a/compiler/GHC/Cmm/Utils.hs
+++ b/compiler/GHC/Cmm/Utils.hs
@@ -305,15 +305,16 @@ cmmIndexExpr platform width base idx =
byte_off = CmmMachOp (MO_Shl idx_w) [idx, mkIntExpr platform (widthInLog width)]
cmmLoadIndex :: Platform -> CmmType -> CmmExpr -> Int -> CmmExpr
-cmmLoadIndex platform ty expr ix = CmmLoad (cmmIndex platform (typeWidth ty) expr ix) ty
+cmmLoadIndex platform ty expr ix =
+ CmmLoad (cmmIndex platform (typeWidth ty) expr ix) ty NaturallyAligned -- TODO: Audit uses
--- | Load a non-pointer word.
+-- | Load a naturally-aligned non-pointer word.
cmmLoadBWord :: Platform -> CmmExpr -> CmmExpr
-cmmLoadBWord platform ptr = CmmLoad ptr (bWord platform)
+cmmLoadBWord platform ptr = CmmLoad ptr (bWord platform) NaturallyAligned
--- | Load a GC pointer.
+-- | Load a naturally-aligned GC pointer.
cmmLoadGCWord :: Platform -> CmmExpr -> CmmExpr
-cmmLoadGCWord platform ptr = CmmLoad ptr (gcWord platform)
+cmmLoadGCWord platform ptr = CmmLoad ptr (gcWord platform) NaturallyAligned
-- The "B" variants take byte offsets
cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr
@@ -352,7 +353,8 @@ cmmLabelOffW :: Platform -> CLabel -> WordOff -> CmmLit
cmmLabelOffW platform lbl wd_off = cmmLabelOffB lbl (wordsToBytes platform wd_off)
cmmLoadIndexW :: Platform -> CmmExpr -> Int -> CmmType -> CmmExpr
-cmmLoadIndexW platform base off ty = CmmLoad (cmmOffsetW platform base off) ty
+cmmLoadIndexW platform base off ty =
+ CmmLoad (cmmOffsetW platform base off) ty NaturallyAligned -- TODO: Audit ses
-----------------------
cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord,
@@ -403,7 +405,7 @@ cmmMkAssign platform expr uq =
---------------------------------------------------
isTrivialCmmExpr :: CmmExpr -> Bool
-isTrivialCmmExpr (CmmLoad _ _) = False
+isTrivialCmmExpr (CmmLoad _ _ _) = False
isTrivialCmmExpr (CmmMachOp _ _) = False
isTrivialCmmExpr (CmmLit _) = True
isTrivialCmmExpr (CmmReg _) = True
@@ -411,7 +413,7 @@ isTrivialCmmExpr (CmmRegOff _ _) = True
isTrivialCmmExpr (CmmStackSlot _ _) = panic "isTrivialCmmExpr CmmStackSlot"
hasNoGlobalRegs :: CmmExpr -> Bool
-hasNoGlobalRegs (CmmLoad e _) = hasNoGlobalRegs e
+hasNoGlobalRegs (CmmLoad e _ _) = hasNoGlobalRegs e
hasNoGlobalRegs (CmmMachOp _ es) = all hasNoGlobalRegs es
hasNoGlobalRegs (CmmLit _) = True
hasNoGlobalRegs (CmmReg (CmmLocal _)) = True
@@ -483,7 +485,7 @@ regsOverlap _ reg reg' = reg == reg'
regUsedIn :: Platform -> CmmReg -> CmmExpr -> Bool
regUsedIn platform = regUsedIn_ where
_ `regUsedIn_` CmmLit _ = False
- reg `regUsedIn_` CmmLoad e _ = reg `regUsedIn_` e
+ reg `regUsedIn_` CmmLoad e _ _ = reg `regUsedIn_` e
reg `regUsedIn_` CmmReg reg' = regsOverlap platform reg reg'
reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap platform reg reg'
reg `regUsedIn_` CmmMachOp _ es = any (reg `regUsedIn_`) es
diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs
index 88c72f6b16..ddfcce8460 100644
--- a/compiler/GHC/CmmToAsm.hs
+++ b/compiler/GHC/CmmToAsm.hs
@@ -1067,7 +1067,7 @@ cmmExprConFold referenceKind expr = do
cmmExprNative referenceKind expr'
cmmExprCon :: NCGConfig -> CmmExpr -> CmmExpr
-cmmExprCon config (CmmLoad addr rep) = CmmLoad (cmmExprCon config addr) rep
+cmmExprCon config (CmmLoad addr rep align) = CmmLoad (cmmExprCon config addr) rep align
cmmExprCon config (CmmMachOp mop args)
= cmmMachOpFold (ncgPlatform config) mop (map (cmmExprCon config) args)
cmmExprCon _ other = other
@@ -1080,9 +1080,9 @@ cmmExprNative referenceKind expr = do
let platform = ncgPlatform config
arch = platformArch platform
case expr of
- CmmLoad addr rep
+ CmmLoad addr rep align
-> do addr' <- cmmExprNative DataReference addr
- return $ CmmLoad addr' rep
+ return $ CmmLoad addr' rep align
CmmMachOp mop args
-> do args' <- mapM (cmmExprNative DataReference) args
diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
index 2698e6f17f..f645720de7 100644
--- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
@@ -598,7 +598,7 @@ getRegister' config plat expr
CmmLabelDiffOff _ _ _ _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
- CmmLoad mem rep -> do
+ CmmLoad mem rep _ -> do
Amode addr addr_code <- getAmode plat (typeWidth rep) mem
let format = cmmTypeFormat rep
return (Any format (\dst -> addr_code `snocOL` LDR format (OpReg (formatToWidth format) dst) (OpAddr addr)))
diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
index cd88a9f078..430189d442 100644
--- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
@@ -338,7 +338,7 @@ assignReg_I64Code _ _
iselExpr64 :: CmmExpr -> NatM ChildCode64
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
+iselExpr64 (CmmLoad addrTree ty _) | isWord64 ty = do
(hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
(rlo, rhi) <- getNewRegPairNat II32
let mov_hi = LD II32 rhi hi_addr
@@ -462,7 +462,7 @@ getRegister' _ platform (CmmMachOp (MO_SS_Conv W64 W32) [x])
ChildCode64 code rlo <- iselExpr64 x
return $ Fixed II32 rlo code
-getRegister' _ platform (CmmLoad mem pk)
+getRegister' _ platform (CmmLoad mem pk _)
| not (isWord64 pk) = do
Amode addr addr_code <- getAmode D mem
let code dst = assert ((targetClassOfReg platform dst == RcDouble) == isFloatType pk) $
@@ -476,45 +476,45 @@ getRegister' _ platform (CmmLoad mem pk)
where format = cmmTypeFormat pk
-- catch simple cases of zero- or sign-extended load
-getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
+getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _ _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
-getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do
+getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _ _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
-getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
+getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _ _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
-getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do
+getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _ _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
-getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
+getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _ _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
-getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
+getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _ _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
-getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
+getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _ _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
-getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
+getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _ _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
-getRegister' _ _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
+getRegister' _ _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _ _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
-getRegister' _ _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
+getRegister' _ _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _ _]) = do
-- lwa is DS-form. See Note [Power instruction format]
Amode addr addr_code <- getAmode DS mem
return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
index 028887a56f..392d07c62e 100644
--- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
@@ -526,7 +526,7 @@ iselExpr64 (CmmLit (CmmInt i _)) = do
]
return (ChildCode64 code rlo)
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
+iselExpr64 (CmmLoad addrTree ty _) | isWord64 ty = do
Amode addr addr_code <- getAmode addrTree
(rlo,rhi) <- getNewRegPairNat II32
let
@@ -692,49 +692,49 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
loadFloatAmode w addr code
-- catch simple cases of zero- or sign-extended load
-getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
+getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _ _]) = do
code <- intLoadCode (MOVZxL II8) addr
return (Any II32 code)
-getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
+getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _ _]) = do
code <- intLoadCode (MOVSxL II8) addr
return (Any II32 code)
-getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
+getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _ _]) = do
code <- intLoadCode (MOVZxL II16) addr
return (Any II32 code)
-getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
+getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _ _]) = do
code <- intLoadCode (MOVSxL II16) addr
return (Any II32 code)
-- catch simple cases of zero- or sign-extended load
-getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
+getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _ _])
| not is32Bit = do
code <- intLoadCode (MOVZxL II8) addr
return (Any II64 code)
-getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
+getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _ _])
| not is32Bit = do
code <- intLoadCode (MOVSxL II8) addr
return (Any II64 code)
-getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
+getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _ _])
| not is32Bit = do
code <- intLoadCode (MOVZxL II16) addr
return (Any II64 code)
-getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
+getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _ _])
| not is32Bit = do
code <- intLoadCode (MOVSxL II16) addr
return (Any II64 code)
-getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
+getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _ _])
| not is32Bit = do
code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
return (Any II64 code)
-getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
+getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _ _])
| not is32Bit = do
code <- intLoadCode (MOVSxL II32) addr
return (Any II64 code)
@@ -1114,13 +1114,13 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
return (Fixed format result code)
-getRegister' _ _ (CmmLoad mem pk)
+getRegister' _ _ (CmmLoad mem pk _)
| isFloatType pk
= do
Amode addr mem_code <- getAmode mem
loadFloatAmode (typeWidth pk) addr mem_code
-getRegister' _ is32Bit (CmmLoad mem pk)
+getRegister' _ is32Bit (CmmLoad mem pk _)
| is32Bit && not (isWord64 pk)
= do
code <- intLoadCode instr mem
@@ -1138,7 +1138,7 @@ getRegister' _ is32Bit (CmmLoad mem pk)
-- simpler we do our 8-bit arithmetic with full 32-bit registers.
-- Simpler memory load code on x86_64
-getRegister' _ is32Bit (CmmLoad mem pk)
+getRegister' _ is32Bit (CmmLoad mem pk _)
| not is32Bit
= do
code <- intLoadCode (MOV format) mem
@@ -1388,7 +1388,7 @@ getNonClobberedOperand (CmmLit lit) =
then return (OpImm (litToImm lit), nilOL)
else getNonClobberedOperand_generic (CmmLit lit)
-getNonClobberedOperand (CmmLoad mem pk) = do
+getNonClobberedOperand (CmmLoad mem pk _) = do
is32Bit <- is32BitPlatform
-- this logic could be simplified
-- TODO FIXME
@@ -1412,7 +1412,7 @@ getNonClobberedOperand (CmmLoad mem pk) = do
return (OpAddr src', mem_code `appOL` save_code)
else
-- if its a word or gcptr on 32bit?
- getNonClobberedOperand_generic (CmmLoad mem pk)
+ getNonClobberedOperand_generic (CmmLoad mem pk NaturallyAligned)
getNonClobberedOperand e = getNonClobberedOperand_generic e
@@ -1447,7 +1447,7 @@ getOperand (CmmLit lit) = do
then return (OpImm (litToImm lit), nilOL)
else getOperand_generic (CmmLit lit)
-getOperand (CmmLoad mem pk) = do
+getOperand (CmmLoad mem pk _) = do
is32Bit <- is32BitPlatform
use_sse2 <- sse2Enabled
if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
@@ -1455,7 +1455,7 @@ getOperand (CmmLoad mem pk) = do
Amode src mem_code <- getAmode mem
return (OpAddr src, mem_code)
else
- getOperand_generic (CmmLoad mem pk)
+ getOperand_generic (CmmLoad mem pk NaturallyAligned)
getOperand e = getOperand_generic e
@@ -1465,7 +1465,7 @@ getOperand_generic e = do
return (OpReg reg, code)
isOperand :: Bool -> CmmExpr -> Bool
-isOperand _ (CmmLoad _ _) = True
+isOperand _ (CmmLoad _ _ _) = True
isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
|| isSuitableFloatingPointLit lit
isOperand _ _ = False
@@ -1523,7 +1523,7 @@ isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
isSuitableFloatingPointLit _ = False
getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
-getRegOrMem e@(CmmLoad mem pk) = do
+getRegOrMem e@(CmmLoad mem pk _) = do
is32Bit <- is32BitPlatform
use_sse2 <- sse2Enabled
if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
@@ -1610,7 +1610,7 @@ condIntCode cond x y = do is32Bit <- is32BitPlatform
condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-- memory vs immediate
-condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
+condIntCode' is32Bit cond (CmmLoad x pk _) (CmmLit lit)
| is32BitLit is32Bit lit = do
Amode x_addr x_code <- getAmode x
let
@@ -1723,7 +1723,7 @@ assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-- specific case of adding/subtracting an integer to a particular address.
-- ToDo: catch other cases where we can use an operation directly on a memory
-- address.
-assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
+assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _ _,
CmmLit (CmmInt i _)])
| addr == addr2, pk /= II64 || is32BitInteger i,
Just instr <- check op
@@ -1762,7 +1762,7 @@ assignMem_IntCode pk addr src = do
-- Assign; dst is a reg, rhs is mem
-assignReg_IntCode pk reg (CmmLoad src _) = do
+assignReg_IntCode pk reg (CmmLoad src _ _) = do
load_code <- intLoadCode (MOV pk) src
platform <- ncgPlatform <$> getConfig
return (load_code (getRegisterReg platform reg))
@@ -1794,7 +1794,7 @@ assignReg_FltCode _ reg src = do
genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
-genJump (CmmLoad mem _) regs = do
+genJump (CmmLoad mem _ _) regs = do
Amode target code <- getAmode mem
return (code `snocOL` JMP (OpAddr target) regs)
diff --git a/compiler/GHC/CmmToC.hs b/compiler/GHC/CmmToC.hs
index a6a036c290..65077dcc0b 100644
--- a/compiler/GHC/CmmToC.hs
+++ b/compiler/GHC/CmmToC.hs
@@ -221,7 +221,8 @@ pprStmt platform stmt =
parens (mkP_ <> pprExpr1 platform dest <> comma <> pprExpr platform src) <> semi
| otherwise
- -> hsep [ pprExpr platform (CmmLoad dest rep), equals, pprExpr platform src <> semi ]
+ -> hsep [ pprExpr platform (CmmLoad dest rep NaturallyAligned), equals, pprExpr platform src <> semi ]
+ -- TODO: Is this right?
where
rep = cmmExprType platform src
@@ -376,10 +377,10 @@ pprSwitch platform e ids
pprExpr :: Platform -> CmmExpr -> SDoc
pprExpr platform e = case e of
- CmmLit lit -> pprLit platform lit
- CmmLoad e ty -> pprLoad platform e ty
- CmmReg reg -> pprCastReg reg
- CmmRegOff reg 0 -> pprCastReg reg
+ CmmLit lit -> pprLit platform lit
+ CmmLoad e ty align -> pprLoad platform e ty align
+ CmmReg reg -> pprCastReg reg
+ CmmRegOff reg 0 -> pprCastReg reg
-- CmmRegOff is an alias of MO_Add
CmmRegOff reg i -> pprCastReg reg <> char '+' <>
@@ -390,13 +391,14 @@ pprExpr platform e = case e of
CmmStackSlot _ _ -> panic "pprExpr: CmmStackSlot not supported!"
-pprLoad :: Platform -> CmmExpr -> CmmType -> SDoc
-pprLoad platform e ty
+pprLoad :: Platform -> CmmExpr -> CmmType -> AlignmentSpec -> SDoc
+pprLoad platform e ty _align
| width == W64, wordWidth platform /= W64
= (if isFloatType ty then text "PK_DBL"
else text "PK_Word64")
<> parens (mkP_ <> pprExpr1 platform e)
+ -- TODO: exploit natural-alignment where possible
| otherwise
= case e of
CmmReg r | isPtrReg r && width == wordWidth platform && not (isFloatType ty)
@@ -1285,7 +1287,7 @@ te_Target (PrimTarget{}) = return ()
te_Expr :: CmmExpr -> TE ()
te_Expr (CmmLit lit) = te_Lit lit
-te_Expr (CmmLoad e _) = te_Expr e
+te_Expr (CmmLoad e _ _) = te_Expr e
te_Expr (CmmReg r) = te_Reg r
te_Expr (CmmMachOp _ es) = mapM_ te_Expr es
te_Expr (CmmRegOff r _) = te_Reg r
diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs
index a57a6f79f0..49359939ea 100644
--- a/compiler/GHC/CmmToLlvm/CodeGen.hs
+++ b/compiler/GHC/CmmToLlvm/CodeGen.hs
@@ -267,7 +267,7 @@ genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $
genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = runStmtsDecls $ do
dstV <- getCmmRegW (CmmLocal dst)
- v1 <- genLoadW True addr (localRegType dst)
+ v1 <- genLoadW True addr (localRegType dst) NaturallyAligned
statement $ Store v1 dstV
genCall (PrimTarget (MO_Cmpxchg _width))
@@ -1357,8 +1357,8 @@ exprToVarOpt opt e = case e of
CmmLit lit
-> genLit opt lit
- CmmLoad e' ty
- -> genLoad False e' ty
+ CmmLoad e' ty align
+ -> genLoad False e' ty align
-- Cmmreg in expression is the value, so must load. If you want actual
-- reg pointer, call getCmmReg directly.
@@ -1790,40 +1790,40 @@ genMachOp_slow _ _ _ = panic "genMachOp: More than 2 expressions in MachOp!"
-- | Handle CmmLoad expression.
-genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData
+genLoad :: Atomic -> CmmExpr -> CmmType -> AlignmentSpec -> LlvmM ExprData
-- First we try to detect a few common cases and produce better code for
-- these then the default case. We are mostly trying to detect Cmm code
-- like I32[Sp + n] and use 'getelementptr' operations instead of the
-- generic case that uses casts and pointer arithmetic
-genLoad atomic e@(CmmReg (CmmGlobal r)) ty
- = genLoad_fast atomic e r 0 ty
+genLoad atomic e@(CmmReg (CmmGlobal r)) ty align
+ = genLoad_fast atomic e r 0 ty align
-genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty
- = genLoad_fast atomic e r n ty
+genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty align
+ = genLoad_fast atomic e r n ty align
genLoad atomic e@(CmmMachOp (MO_Add _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
- ty
- = genLoad_fast atomic e r (fromInteger n) ty
+ ty align
+ = genLoad_fast atomic e r (fromInteger n) ty align
genLoad atomic e@(CmmMachOp (MO_Sub _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
- ty
- = genLoad_fast atomic e r (negate $ fromInteger n) ty
+ ty align
+ = genLoad_fast atomic e r (negate $ fromInteger n) ty align
-- generic case
-genLoad atomic e ty
- = getTBAAMeta topN >>= genLoad_slow atomic e ty
+genLoad atomic e ty align
+ = getTBAAMeta topN >>= genLoad_slow atomic e ty align
-- | Handle CmmLoad expression.
-- This is a special case for loading from a global register pointer
-- offset such as I32[Sp+8].
genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType
- -> LlvmM ExprData
-genLoad_fast atomic e r n ty = do
+ -> AlignmentSpec -> LlvmM ExprData
+genLoad_fast atomic e r n ty align = do
platform <- getPlatform
(gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
meta <- getTBAARegMeta r
@@ -1836,7 +1836,7 @@ genLoad_fast atomic e r n ty = do
case grt == ty' of
-- were fine
True -> do
- (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr)
+ (var, s3) <- doExpr ty' (MExpr meta $ mkLoad atomic ptr align)
return (var, s1 `snocOL` s2 `snocOL` s3,
[])
@@ -1844,43 +1844,61 @@ genLoad_fast atomic e r n ty = do
False -> do
let pty = pLift ty'
(ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty
- (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr')
+ (var, s4) <- doExpr ty' (MExpr meta $ mkLoad atomic ptr' align)
return (var, s1 `snocOL` s2 `snocOL` s3
`snocOL` s4, [])
-- If its a bit type then we use the slow method since
-- we can't avoid casting anyway.
- False -> genLoad_slow atomic e ty meta
- where
- loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
- | otherwise = Load ptr
+ False -> genLoad_slow atomic e ty align meta
-- | Handle Cmm load expression.
-- Generic case. Uses casts and pointer arithmetic if needed.
-genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
-genLoad_slow atomic e ty meta = do
+genLoad_slow :: Atomic -> CmmExpr -> CmmType -> AlignmentSpec -> [MetaAnnot]
+ -> LlvmM ExprData
+genLoad_slow atomic e ty align meta = do
platform <- getPlatform
cfg <- getConfig
runExprData $ do
iptr <- exprToVarW e
case getVarType iptr of
LMPointer _ ->
- doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr iptr)
+ doExprW (cmmToLlvmType ty) (MExpr meta $ mkLoad atomic iptr align)
i@(LMInt _) | i == llvmWord platform -> do
let pty = LMPointer $ cmmToLlvmType ty
ptr <- doExprW pty $ Cast LM_Inttoptr iptr pty
- doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr ptr)
+ doExprW (cmmToLlvmType ty) (MExpr meta $ mkLoad atomic ptr align)
other -> pprPanic "exprToVar: CmmLoad expression is not right type!"
(PprCmm.pprExpr platform e <+> text (
"Size of Ptr: " ++ show (llvmPtrBits platform) ++
", Size of var: " ++ show (llvmWidthInBits platform other) ++
", Var: " ++ renderWithContext (llvmCgContext cfg) (ppVar cfg iptr)))
- where
- loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
- | otherwise = Load ptr
+{-
+Note [Alignment of vector-typed values]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+On x86, vector types need to be 16-byte aligned for aligned
+access, but we have no way of guaranteeing that this is true with GHC
+(we would need to modify the layout of the stack and closures, change
+the storage manager, etc.). So, we blindly tell LLVM that *any* vector
+store or load could be unaligned. In the future we may be able to
+guarantee that certain vector access patterns are aligned, in which
+case we will need a more granular way of specifying alignment.
+-}
+
+mkLoad :: Atomic -> LlvmVar -> AlignmentSpec -> LlvmExpression
+mkLoad atomic ptr alignment
+ | atomic = ALoad SyncSeqCst False ptr
+ | otherwise = Load ptr align
+ where
+ ty = pLower (getVarType ptr)
+ align = case alignment of
+ -- See Note [Alignment of vector-typed values]
+ _ | is_vector -> Just 1
+ Unaligned -> Just 1
+ NaturallyAligned -> Nothing
-- | Handle CmmReg expression. This will return a pointer to the stack
-- location of the register. Throws an error if it isn't allocated on
@@ -1919,7 +1937,7 @@ getCmmRegVal reg =
where loadFromStack = do
ptr <- getCmmReg reg
let ty = pLower $ getVarType ptr
- (v, s) <- doExpr ty (Load ptr)
+ (v, s) <- doExpr ty (Load ptr Nothing)
return (v, ty, unitOL s)
-- | Allocate a local CmmReg on the stack
@@ -2210,8 +2228,8 @@ runStmtsDecls action = do
getCmmRegW :: CmmReg -> WriterT LlvmAccum LlvmM LlvmVar
getCmmRegW = lift . getCmmReg
-genLoadW :: Atomic -> CmmExpr -> CmmType -> WriterT LlvmAccum LlvmM LlvmVar
-genLoadW atomic e ty = liftExprData $ genLoad atomic e ty
+genLoadW :: Atomic -> CmmExpr -> CmmType -> AlignmentSpec -> WriterT LlvmAccum LlvmM LlvmVar
+genLoadW atomic e ty alignment = liftExprData $ genLoad atomic e ty alignment
-- | Return element of single-element list; 'panic' if list is not a single-element list
singletonPanic :: String -> [a] -> a
diff --git a/compiler/GHC/Llvm/Ppr.hs b/compiler/GHC/Llvm/Ppr.hs
index 8ec3f58db2..405296f79f 100644
--- a/compiler/GHC/Llvm/Ppr.hs
+++ b/compiler/GHC/Llvm/Ppr.hs
@@ -242,7 +242,7 @@ ppLlvmExpression opts expr
ExtractV struct idx -> ppExtractV opts struct idx
Insert vec elt idx -> ppInsert opts vec elt idx
GetElemPtr inb ptr indexes -> ppGetElementPtr opts inb ptr indexes
- Load ptr -> ppLoad opts ptr
+ Load ptr align -> ppLoad opts ptr align
ALoad ord st ptr -> ppALoad opts ord st ptr
Malloc tp amount -> ppMalloc opts tp amount
AtomicRMW aop tgt src ordering -> ppAtomicRMW opts aop tgt src ordering
@@ -365,20 +365,16 @@ ppCmpXChg opts addr old new s_ord f_ord =
text "cmpxchg" <+> ppVar opts addr <> comma <+> ppVar opts old <> comma <+> ppVar opts new
<+> ppSyncOrdering s_ord <+> ppSyncOrdering f_ord
--- XXX: On x86, vector types need to be 16-byte aligned for aligned access, but
--- we have no way of guaranteeing that this is true with GHC (we would need to
--- modify the layout of the stack and closures, change the storage manager,
--- etc.). So, we blindly tell LLVM that *any* vector store or load could be
--- unaligned. In the future we may be able to guarantee that certain vector
--- access patterns are aligned, in which case we will need a more granular way
--- of specifying alignment.
-ppLoad :: LlvmCgConfig -> LlvmVar -> SDoc
-ppLoad opts var = text "load" <+> ppr derefType <> comma <+> ppVar opts var <> align
+ppLoad :: LlvmCgConfig -> LlvmVar -> LMAlign -> SDoc
+ppLoad opts var alignment =
+ text "load" <+> ppr derefType <> comma <+> ppVar opts var <> align
where
derefType = pLower $ getVarType var
- align | isVector . pLower . getVarType $ var = text ", align 1"
- | otherwise = empty
+ align =
+ case alignment of
+ Just n -> text ", align" <+> ppr n
+ Nothing -> empty
ppALoad :: LlvmCgConfig -> LlvmSyncOrdering -> SingleThreaded -> LlvmVar -> SDoc
ppALoad opts ord st var =
diff --git a/compiler/GHC/Llvm/Syntax.hs b/compiler/GHC/Llvm/Syntax.hs
index 12e0073c7a..befac77734 100644
--- a/compiler/GHC/Llvm/Syntax.hs
+++ b/compiler/GHC/Llvm/Syntax.hs
@@ -252,7 +252,7 @@ data LlvmExpression
{- |
Load the value at location ptr
-}
- | Load LlvmVar
+ | Load LlvmVar LMAlign
{- |
Atomic load of the value at location ptr
diff --git a/compiler/GHC/StgToCmm/CgUtils.hs b/compiler/GHC/StgToCmm/CgUtils.hs
index 6722a4c288..bc42a11d1c 100644
--- a/compiler/GHC/StgToCmm/CgUtils.hs
+++ b/compiler/GHC/StgToCmm/CgUtils.hs
@@ -171,7 +171,7 @@ fixStgRegStmt platform stmt = fixAssign $ mapExpDeep fixExpr stmt
let baseAddr = get_GlobalReg_addr platform reg
in case reg of
BaseReg -> baseAddr
- _other -> CmmLoad baseAddr (globalRegType platform reg)
+ _other -> CmmLoad baseAddr (globalRegType platform reg) NaturallyAligned
CmmRegOff (CmmGlobal reg) offset ->
-- RegOf leaves are just a shorthand form. If the reg maps
diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs
index e3cd4d8db1..cc33576240 100644
--- a/compiler/GHC/StgToCmm/Foreign.hs
+++ b/compiler/GHC/StgToCmm/Foreign.hs
@@ -395,7 +395,7 @@ emitPopTupleRegs regs_live = do
(widthInBytes $ typeWidth reg_ty)
adj_sp = mkAssign spReg
(cmmOffset platform spExpr width)
- restore_reg = mkAssign (CmmGlobal reg) (CmmLoad spExpr reg_ty)
+ restore_reg = mkAssign (CmmGlobal reg) (CmmLoad spExpr reg_ty NaturallyAligned)
in mkCmmIfThen cond $ catAGraphs [restore_reg, adj_sp]
emit . catAGraphs =<< mapM save_arg regs
@@ -451,7 +451,7 @@ closeNursery profile tso = do
-- tso->alloc_limit += alloc
mkStore alloc_limit (CmmMachOp (MO_Sub W64)
- [ CmmLoad alloc_limit b64
+ [ CmmLoad alloc_limit b64 NaturallyAligned
, CmmMachOp (mo_WordTo64 platform) [alloc] ])
]
@@ -485,9 +485,8 @@ loadThreadState profile = do
open_nursery,
-- and load the current cost centre stack from the TSO when profiling:
if profileIsProfiling profile
- then storeCurCCS
- (CmmLoad (cmmOffset platform (CmmReg (CmmLocal tso))
- (tso_CCCS profile)) (ccsType platform))
+ then let ccs_ptr = cmmOffset platform (CmmReg (CmmLocal tso)) (tso_CCCS profile)
+ in storeCurCCS (CmmLoad ccs_ptr (ccsType platform) NaturallyAligned)
else mkNop
]
@@ -555,11 +554,11 @@ openNursery profile tso = do
(cmmOffsetExpr platform
(CmmReg bdstartreg)
(cmmOffset platform
- (CmmMachOp (mo_wordMul platform) [
- CmmMachOp (MO_SS_Conv W32 (wordWidth platform))
- [CmmLoad (nursery_bdescr_blocks platform cnreg) b32],
- mkIntExpr platform (pc_BLOCK_SIZE (platformConstants platform))
- ])
+ (CmmMachOp (mo_wordMul platform)
+ [ CmmMachOp (MO_SS_Conv W32 (wordWidth platform))
+ [CmmLoad (nursery_bdescr_blocks platform cnreg) b32 NaturallyAligned]
+ , mkIntExpr platform (pc_BLOCK_SIZE (platformConstants platform))
+ ])
(-1)
)
),
@@ -573,7 +572,7 @@ openNursery profile tso = do
-- tso->alloc_limit += alloc
mkStore alloc_limit (CmmMachOp (MO_Add W64)
- [ CmmLoad alloc_limit b64
+ [ CmmLoad alloc_limit b64 NaturallyAligned
, CmmMachOp (mo_WordTo64 platform) [alloc] ])
]
diff --git a/compiler/GHC/StgToCmm/Hpc.hs b/compiler/GHC/StgToCmm/Hpc.hs
index 23e7519d80..fd2a84c6e2 100644
--- a/compiler/GHC/StgToCmm/Hpc.hs
+++ b/compiler/GHC/StgToCmm/Hpc.hs
@@ -27,7 +27,7 @@ import Control.Monad
mkTickBox :: Platform -> Module -> Int -> CmmAGraph
mkTickBox platform mod n
= mkStore tick_box (CmmMachOp (MO_Add W64)
- [ CmmLoad tick_box b64
+ [ CmmLoad tick_box b64 NaturallyAligned
, CmmLit (CmmInt 1 W64)
])
where
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index 72f42ba831..17fe2d9373 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -2222,7 +2222,7 @@ cmmLoadIndexOffExpr :: Platform
-> CmmExpr -- Index
-> CmmExpr
cmmLoadIndexOffExpr platform off ty base idx_ty idx
- = CmmLoad (cmmIndexOffExpr platform off (typeWidth idx_ty) base idx) ty
+ = CmmLoad (cmmIndexOffExpr platform off (typeWidth idx_ty) base idx) ty NaturallyAligned
setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs
index bb3d4a8696..79e08071fa 100644
--- a/compiler/GHC/StgToCmm/Prof.hs
+++ b/compiler/GHC/StgToCmm/Prof.hs
@@ -85,7 +85,7 @@ mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
costCentreFrom :: Platform
-> CmmExpr -- A closure pointer
-> CmmExpr -- The cost centre from that closure
-costCentreFrom platform cl = CmmLoad (cmmOffsetB platform cl (pc_OFFSET_StgHeader_ccs (platformConstants platform))) (ccsType platform)
+costCentreFrom platform cl = CmmLoad (cmmOffsetB platform cl (pc_OFFSET_StgHeader_ccs (platformConstants platform))) (ccsType platform) NaturallyAligned
-- | The profiling header words in a static closure
staticProfHdr :: Profile -> CostCentreStack -> [CmmLit]
@@ -410,7 +410,8 @@ ldvEnter cl_ptr = do
loadEra :: Platform -> CmmExpr
loadEra platform = CmmMachOp (MO_UU_Conv (cIntWidth platform) (wordWidth platform))
[CmmLoad (mkLblExpr (mkRtsCmmDataLabel (fsLit "era")))
- (cInt platform)]
+ (cInt platform)
+ NaturallyAligned]
-- | Takes the address of a closure, and returns
-- the address of the LDV word in the closure
diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs
index 22e34dddae..270699b9ed 100644
--- a/compiler/GHC/StgToCmm/Utils.hs
+++ b/compiler/GHC/StgToCmm/Utils.hs
@@ -104,18 +104,18 @@ addToMemLblE rep lbl = addToMemE rep (CmmLit (CmmLabel lbl))
-- | @addToMem rep ptr n@ adds @n@ to the integer pointed-to by @ptr@.
addToMem :: CmmType -- rep of the counter
- -> CmmExpr -- Address
+ -> CmmExpr -- Naturally-aligned address
-> Int -- What to add (a word)
-> CmmAGraph
addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) (typeWidth rep)))
-- | @addToMemE rep ptr n@ adds @n@ to the integer pointed-to by @ptr@.
addToMemE :: CmmType -- rep of the counter
- -> CmmExpr -- Address
+ -> CmmExpr -- Naturally-aligned address
-> CmmExpr -- What to add (a word-typed expression)
-> CmmAGraph
addToMemE rep ptr n
- = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep, n])
+ = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep NaturallyAligned, n])
-------------------------------------------------------------------------
@@ -135,7 +135,8 @@ mkTaggedObjectLoad platform reg base offset tag
(CmmLoad (cmmOffsetB platform
(CmmReg (CmmLocal base))
(offset - tag))
- (localRegType reg))
+ (localRegType reg)
+ NaturallyAligned)
-------------------------------------------------------------------------
--
@@ -238,7 +239,9 @@ callerSaveGlobalReg platform reg
callerRestoreGlobalReg :: Platform -> GlobalReg -> CmmAGraph
callerRestoreGlobalReg platform reg
= mkAssign (CmmGlobal reg)
- (CmmLoad (get_GlobalReg_addr platform reg) (globalRegType platform reg))
+ (CmmLoad (get_GlobalReg_addr platform reg)
+ (globalRegType platform reg)
+ NaturallyAligned)
-------------------------------------------------------------------------