diff options
author | Ben Gamari <ben@smart-cactus.org> | 2022-01-27 13:28:32 -0500 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-02-04 16:14:47 -0500 |
commit | 7217156c40240c0aed5ffd83ead0fe4ba0484c75 (patch) | |
tree | 1cd6f31d82ba304695189c52939a64a5d9c9f150 /compiler/GHC | |
parent | 58d7faacafc975d522cbc9f56a7db1e46b37d4a1 (diff) | |
download | haskell-7217156c40240c0aed5ffd83ead0fe4ba0484c75.tar.gz |
Introduce alignment in CmmLoad
Diffstat (limited to 'compiler/GHC')
26 files changed, 193 insertions, 158 deletions
diff --git a/compiler/GHC/Cmm/CommonBlockElim.hs b/compiler/GHC/Cmm/CommonBlockElim.hs index 10eb9ccb3d..b18730ed79 100644 --- a/compiler/GHC/Cmm/CommonBlockElim.hs +++ b/compiler/GHC/Cmm/CommonBlockElim.hs @@ -159,7 +159,7 @@ hash_block block = hash_e :: CmmExpr -> Word32 hash_e (CmmLit l) = hash_lit l - hash_e (CmmLoad e _) = 67 + hash_e e + hash_e (CmmLoad e _ _) = 67 + hash_e e hash_e (CmmReg r) = hash_reg r hash_e (CmmMachOp _ es) = hash_list hash_e es -- pessimal - no operator check hash_e (CmmRegOff r i) = hash_reg r + cvt i @@ -222,7 +222,7 @@ eqExprWith :: (BlockId -> BlockId -> Bool) eqExprWith eqBid = eq where CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2 - CmmLoad e1 t1 `eq` CmmLoad e2 t2 = t1 `cmmEqType` t2 && e1 `eq` e2 + CmmLoad e1 t1 a1 `eq` CmmLoad e2 t2 a2 = t1 `cmmEqType` t2 && e1 `eq` e2 && a1==a2 CmmReg r1 `eq` CmmReg r2 = r1==r2 CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1==r2 && i1==i2 CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2 diff --git a/compiler/GHC/Cmm/DebugBlock.hs b/compiler/GHC/Cmm/DebugBlock.hs index e6d1f2735e..06c1f9aace 100644 --- a/compiler/GHC/Cmm/DebugBlock.hs +++ b/compiler/GHC/Cmm/DebugBlock.hs @@ -546,7 +546,7 @@ toUnwindExpr _ (CmmLit (CmmInt i _)) = UwConst (fromIntegral i) toUnwindExpr _ (CmmLit (CmmLabel l)) = UwLabel l toUnwindExpr _ (CmmRegOff (CmmGlobal g) i) = UwReg g i toUnwindExpr _ (CmmReg (CmmGlobal g)) = UwReg g 0 -toUnwindExpr platform (CmmLoad e _) = UwDeref (toUnwindExpr platform e) +toUnwindExpr platform (CmmLoad e _ _) = UwDeref (toUnwindExpr platform e) toUnwindExpr platform e@(CmmMachOp op [e1, e2]) = case (op, toUnwindExpr platform e1, toUnwindExpr platform e2) of (MO_Add{}, UwReg r x, UwConst y) -> UwReg r (x + y) diff --git a/compiler/GHC/Cmm/Expr.hs b/compiler/GHC/Cmm/Expr.hs index f63ef62dab..f910e65f04 100644 --- a/compiler/GHC/Cmm/Expr.hs +++ b/compiler/GHC/Cmm/Expr.hs @@ -9,6 +9,7 @@ module GHC.Cmm.Expr ( CmmExpr(..), cmmExprType, cmmExprWidth, cmmExprAlignment, maybeInvertCmmExpr , CmmReg(..), cmmRegType, cmmRegWidth , CmmLit(..), cmmLitType + , AlignmentSpec(..) , LocalReg(..), localRegType , GlobalReg(..), isArgReg, globalRegType , spReg, hpReg, spLimReg, hpLimReg, nodeReg @@ -53,12 +54,13 @@ import GHC.Types.Basic (Alignment, mkAlignment, alignmentOf) ----------------------------------------------------------------------------- data CmmExpr - = CmmLit !CmmLit -- Literal - | CmmLoad !CmmExpr !CmmType -- Read memory location + = CmmLit !CmmLit -- Literal + | CmmLoad !CmmExpr !CmmType !AlignmentSpec + -- Read memory location | CmmReg !CmmReg -- Contents of register | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) | CmmStackSlot Area {-# UNPACK #-} !Int - -- addressing expression of a stack slot + -- Addressing expression of a stack slot -- See Note [CmmStackSlot aliasing] | CmmRegOff !CmmReg !Int -- CmmRegOff reg i @@ -69,13 +71,16 @@ data CmmExpr instance Eq CmmExpr where -- Equality ignores the types CmmLit l1 == CmmLit l2 = l1==l2 - CmmLoad e1 _ == CmmLoad e2 _ = e1==e2 + CmmLoad e1 _ _ == CmmLoad e2 _ _ = e1==e2 CmmReg r1 == CmmReg r2 = r1==r2 CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2 CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2 CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2 _e1 == _e2 = False +data AlignmentSpec = NaturallyAligned | Unaligned + deriving (Eq, Ord, Show) + data CmmReg = CmmLocal {-# UNPACK #-} !LocalReg | CmmGlobal GlobalReg @@ -225,7 +230,7 @@ instance Outputable CmmLit where cmmExprType :: Platform -> CmmExpr -> CmmType cmmExprType platform = \case (CmmLit lit) -> cmmLitType platform lit - (CmmLoad _ rep) -> rep + (CmmLoad _ rep _) -> rep (CmmReg reg) -> cmmRegType platform reg (CmmMachOp op args) -> machOpResultType platform op (map (cmmExprType platform) args) (CmmRegOff reg _) -> cmmRegType platform reg @@ -385,7 +390,7 @@ instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r CmmExpr where {-# INLINEABLE foldRegsUsed #-} foldRegsUsed platform f !z e = expr z e where expr z (CmmLit _) = z - expr z (CmmLoad addr _) = foldRegsUsed platform f z addr + expr z (CmmLoad addr _ _) = foldRegsUsed platform f z addr expr z (CmmReg r) = foldRegsUsed platform f z r expr z (CmmMachOp _ exprs) = foldRegsUsed platform f z exprs expr z (CmmRegOff r _) = foldRegsUsed platform f z r diff --git a/compiler/GHC/Cmm/Graph.hs b/compiler/GHC/Cmm/Graph.hs index ff9391a7fe..6761821951 100644 --- a/compiler/GHC/Cmm/Graph.hs +++ b/compiler/GHC/Cmm/Graph.hs @@ -333,14 +333,14 @@ copyIn profile conv area formals extra_stk | isBitsType $ localRegType reg , typeWidth (localRegType reg) < wordWidth platform = let - stack_slot = (CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth platform)) + stack_slot = CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth platform) NaturallyAligned local = CmmLocal reg width = cmmRegWidth platform local expr = CmmMachOp (MO_XX_Conv (wordWidth platform) width) [stack_slot] in CmmAssign local expr | otherwise = - CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) + CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty NaturallyAligned) where ty = localRegType reg init_offset = widthInBytes (wordWidth platform) -- infotable diff --git a/compiler/GHC/Cmm/Info.hs b/compiler/GHC/Cmm/Info.hs index db6d92ced6..71c8ddfb42 100644 --- a/compiler/GHC/Cmm/Info.hs +++ b/compiler/GHC/Cmm/Info.hs @@ -498,24 +498,24 @@ infoTableConstrTag = infoTableSrtBitmap -- field of the info table infoTableSrtBitmap :: Profile -> CmmExpr -> CmmExpr infoTableSrtBitmap profile info_tbl - = CmmLoad (cmmOffsetB platform info_tbl (stdSrtBitmapOffset profile)) (bHalfWord platform) + = CmmLoad (cmmOffsetB platform info_tbl (stdSrtBitmapOffset profile)) (bHalfWord platform) NaturallyAligned where platform = profilePlatform profile -- | Takes an info table pointer (from infoTable) and returns the closure type -- field of the info table. infoTableClosureType :: Profile -> CmmExpr -> CmmExpr infoTableClosureType profile info_tbl - = CmmLoad (cmmOffsetB platform info_tbl (stdClosureTypeOffset profile)) (bHalfWord platform) + = CmmLoad (cmmOffsetB platform info_tbl (stdClosureTypeOffset profile)) (bHalfWord platform) NaturallyAligned where platform = profilePlatform profile infoTablePtrs :: Profile -> CmmExpr -> CmmExpr infoTablePtrs profile info_tbl - = CmmLoad (cmmOffsetB platform info_tbl (stdPtrsOffset profile)) (bHalfWord platform) + = CmmLoad (cmmOffsetB platform info_tbl (stdPtrsOffset profile)) (bHalfWord platform) NaturallyAligned where platform = profilePlatform profile infoTableNonPtrs :: Profile -> CmmExpr -> CmmExpr infoTableNonPtrs profile info_tbl - = CmmLoad (cmmOffsetB platform info_tbl (stdNonPtrsOffset profile)) (bHalfWord platform) + = CmmLoad (cmmOffsetB platform info_tbl (stdNonPtrsOffset profile)) (bHalfWord platform) NaturallyAligned where platform = profilePlatform profile -- | Takes the info pointer of a function, and returns a pointer to the first diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs index f23af80d7e..0a01081634 100644 --- a/compiler/GHC/Cmm/LayoutStack.hs +++ b/compiler/GHC/Cmm/LayoutStack.hs @@ -397,7 +397,7 @@ collectContInfo blocks procMiddle :: LabelMap StackMap -> CmmNode e x -> StackMap -> StackMap procMiddle stackmaps node sm = case node of - CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _) + CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _ _) -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) } where loc = getStackLoc area off stackmaps CmmAssign (CmmLocal r) _other @@ -1085,7 +1085,8 @@ insertReloads platform stackmap live = -- This cmmOffset basically corresponds to manifesting -- @CmmStackSlot Old sp_off@, see Note [SP old/young offsets] (CmmLoad (cmmOffset platform spExpr (sp_off - reg_off)) - (localRegType reg)) + (localRegType reg) + NaturallyAligned) | (reg, reg_off) <- stackSlotRegs stackmap , reg `elemRegSet` live ] diff --git a/compiler/GHC/Cmm/Lint.hs b/compiler/GHC/Cmm/Lint.hs index 82d7b56d14..2c3c605240 100644 --- a/compiler/GHC/Cmm/Lint.hs +++ b/compiler/GHC/Cmm/Lint.hs @@ -88,7 +88,7 @@ lintCmmBlock labels block -- byte/word mismatches. lintCmmExpr :: CmmExpr -> CmmLint CmmType -lintCmmExpr (CmmLoad expr rep) = do +lintCmmExpr (CmmLoad expr rep _alignment) = do _ <- lintCmmExpr expr -- Disabled, if we have the inlining phase before the lint phase, -- we can have funny offsets due to pointer tagging. -- EZY diff --git a/compiler/GHC/Cmm/Node.hs b/compiler/GHC/Cmm/Node.hs index d7d35a8bfc..40b268931d 100644 --- a/compiler/GHC/Cmm/Node.hs +++ b/compiler/GHC/Cmm/Node.hs @@ -464,9 +464,9 @@ wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr -- Take a transformer on expressions and apply it recursively. -- (wrapRecExp f e) first recursively applies itself to sub-expressions of e -- then uses f to rewrite the resulting expression -wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es) -wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (wrapRecExp f addr) ty) -wrapRecExp f e = f e +wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es) +wrapRecExp f (CmmLoad addr ty align) = f (CmmLoad (wrapRecExp f addr) ty align) +wrapRecExp f e = f e mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x mapExp _ f@(CmmEntry{}) = f @@ -495,9 +495,9 @@ mapForeignTargetM _ (PrimTarget _) = Nothing wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr) -- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e -- then gives f a chance to rewrite the resulting expression -wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es) -wrapRecExpM f n@(CmmLoad addr ty) = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr) -wrapRecExpM f e = f e +wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es) +wrapRecExpM f n@(CmmLoad addr ty align) = maybe (f n) (\addr' -> f $ CmmLoad addr' ty align) (wrapRecExpM f addr) +wrapRecExpM f e = f e mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x) mapExpM _ (CmmEntry{}) = Nothing @@ -548,9 +548,9 @@ foldExpForeignTarget _ (PrimTarget _) z = z -- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad -- itself, delegating all the other CmmExpr forms to 'f'. wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z -wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es -wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z) -wrapRecExpf f e z = f e z +wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es +wrapRecExpf f e@(CmmLoad addr _ _) z = wrapRecExpf f addr (f e z) +wrapRecExpf f e z = f e z foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z foldExp _ (CmmEntry {}) z = z diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y index 065737922f..5a855ba069 100644 --- a/compiler/GHC/Cmm/Parser.y +++ b/compiler/GHC/Cmm/Parser.y @@ -794,10 +794,13 @@ expr0 :: { CmmParse CmmExpr } | STRING { do s <- code (newStringCLit $1); return (CmmLit s) } | reg { $1 } - | type '[' expr ']' { do e <- $3; return (CmmLoad e $1) } + | type dereference { do (align, ptr) <- $2; return (CmmLoad ptr $1 align) } | '%' NAME '(' exprs0 ')' {% exprOp $2 $4 } | '(' expr ')' { $2 } +dereference :: { CmmParse (AlignmentSpec, CmmExpr) } + : '^' '[' expr ']' { do ptr <- $3; return (Unaligned, ptr) } + | '[' expr ']' { do ptr <- $2; return (NaturallyAligned, ptr) } -- leaving out the type of a literal gives you the native word size in C-- maybe_ty :: { CmmType } diff --git a/compiler/GHC/Cmm/Ppr/Expr.hs b/compiler/GHC/Cmm/Ppr/Expr.hs index 7828daf803..3ef4b07af5 100644 --- a/compiler/GHC/Cmm/Ppr/Expr.hs +++ b/compiler/GHC/Cmm/Ppr/Expr.hs @@ -149,7 +149,12 @@ pprExpr9 :: Platform -> CmmExpr -> SDoc pprExpr9 platform e = case e of CmmLit lit -> pprLit1 platform lit - CmmLoad expr rep -> ppr rep <> brackets (pdoc platform expr) + CmmLoad expr rep align + -> let align_mark = + case align of + NaturallyAligned -> empty + Unaligned -> text "^" + in ppr rep <> align_mark <> brackets (pdoc platform expr) CmmReg reg -> ppr reg CmmRegOff reg off -> parens (ppr reg <+> char '+' <+> int off) CmmStackSlot a off -> parens (ppr a <+> char '+' <+> int off) diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs index 0f3d979716..4d4cdb6d4c 100644 --- a/compiler/GHC/Cmm/Sink.hs +++ b/compiler/GHC/Cmm/Sink.hs @@ -632,9 +632,9 @@ regsUsedIn ls e = go ls e False use _ls _ z = z go :: LRegSet -> CmmExpr -> Bool -> Bool - go ls (CmmMachOp _ es) z = foldr (go ls) z es - go ls (CmmLoad addr _) z = go ls addr z - go ls e z = use ls e z + go ls (CmmMachOp _ es) z = foldr (go ls) z es + go ls (CmmLoad addr _ _) z = go ls addr z + go ls e z = use ls e z -- we don't inline into CmmUnsafeForeignCall if the expression refers -- to global registers. This is a HACK to avoid global registers @@ -845,9 +845,9 @@ memConflicts (SpMem o1 w1) (SpMem o2 w2) memConflicts _ _ = True exprMem :: Platform -> CmmExpr -> AbsMem -exprMem platform (CmmLoad addr w) = bothMems (loadAddr platform addr (typeWidth w)) (exprMem platform addr) -exprMem platform (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem platform) es) -exprMem _ _ = NoMem +exprMem platform (CmmLoad addr w _) = bothMems (loadAddr platform addr (typeWidth w)) (exprMem platform addr) +exprMem platform (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem platform) es) +exprMem _ _ = NoMem loadAddr :: Platform -> CmmExpr -> Width -> AbsMem loadAddr platform e w = diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs index b541d7a95c..9e9566b334 100644 --- a/compiler/GHC/Cmm/Utils.hs +++ b/compiler/GHC/Cmm/Utils.hs @@ -305,15 +305,16 @@ cmmIndexExpr platform width base idx = byte_off = CmmMachOp (MO_Shl idx_w) [idx, mkIntExpr platform (widthInLog width)] cmmLoadIndex :: Platform -> CmmType -> CmmExpr -> Int -> CmmExpr -cmmLoadIndex platform ty expr ix = CmmLoad (cmmIndex platform (typeWidth ty) expr ix) ty +cmmLoadIndex platform ty expr ix = + CmmLoad (cmmIndex platform (typeWidth ty) expr ix) ty NaturallyAligned -- TODO: Audit uses --- | Load a non-pointer word. +-- | Load a naturally-aligned non-pointer word. cmmLoadBWord :: Platform -> CmmExpr -> CmmExpr -cmmLoadBWord platform ptr = CmmLoad ptr (bWord platform) +cmmLoadBWord platform ptr = CmmLoad ptr (bWord platform) NaturallyAligned --- | Load a GC pointer. +-- | Load a naturally-aligned GC pointer. cmmLoadGCWord :: Platform -> CmmExpr -> CmmExpr -cmmLoadGCWord platform ptr = CmmLoad ptr (gcWord platform) +cmmLoadGCWord platform ptr = CmmLoad ptr (gcWord platform) NaturallyAligned -- The "B" variants take byte offsets cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr @@ -352,7 +353,8 @@ cmmLabelOffW :: Platform -> CLabel -> WordOff -> CmmLit cmmLabelOffW platform lbl wd_off = cmmLabelOffB lbl (wordsToBytes platform wd_off) cmmLoadIndexW :: Platform -> CmmExpr -> Int -> CmmType -> CmmExpr -cmmLoadIndexW platform base off ty = CmmLoad (cmmOffsetW platform base off) ty +cmmLoadIndexW platform base off ty = + CmmLoad (cmmOffsetW platform base off) ty NaturallyAligned -- TODO: Audit ses ----------------------- cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord, @@ -403,7 +405,7 @@ cmmMkAssign platform expr uq = --------------------------------------------------- isTrivialCmmExpr :: CmmExpr -> Bool -isTrivialCmmExpr (CmmLoad _ _) = False +isTrivialCmmExpr (CmmLoad _ _ _) = False isTrivialCmmExpr (CmmMachOp _ _) = False isTrivialCmmExpr (CmmLit _) = True isTrivialCmmExpr (CmmReg _) = True @@ -411,7 +413,7 @@ isTrivialCmmExpr (CmmRegOff _ _) = True isTrivialCmmExpr (CmmStackSlot _ _) = panic "isTrivialCmmExpr CmmStackSlot" hasNoGlobalRegs :: CmmExpr -> Bool -hasNoGlobalRegs (CmmLoad e _) = hasNoGlobalRegs e +hasNoGlobalRegs (CmmLoad e _ _) = hasNoGlobalRegs e hasNoGlobalRegs (CmmMachOp _ es) = all hasNoGlobalRegs es hasNoGlobalRegs (CmmLit _) = True hasNoGlobalRegs (CmmReg (CmmLocal _)) = True @@ -483,7 +485,7 @@ regsOverlap _ reg reg' = reg == reg' regUsedIn :: Platform -> CmmReg -> CmmExpr -> Bool regUsedIn platform = regUsedIn_ where _ `regUsedIn_` CmmLit _ = False - reg `regUsedIn_` CmmLoad e _ = reg `regUsedIn_` e + reg `regUsedIn_` CmmLoad e _ _ = reg `regUsedIn_` e reg `regUsedIn_` CmmReg reg' = regsOverlap platform reg reg' reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap platform reg reg' reg `regUsedIn_` CmmMachOp _ es = any (reg `regUsedIn_`) es diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs index 88c72f6b16..ddfcce8460 100644 --- a/compiler/GHC/CmmToAsm.hs +++ b/compiler/GHC/CmmToAsm.hs @@ -1067,7 +1067,7 @@ cmmExprConFold referenceKind expr = do cmmExprNative referenceKind expr' cmmExprCon :: NCGConfig -> CmmExpr -> CmmExpr -cmmExprCon config (CmmLoad addr rep) = CmmLoad (cmmExprCon config addr) rep +cmmExprCon config (CmmLoad addr rep align) = CmmLoad (cmmExprCon config addr) rep align cmmExprCon config (CmmMachOp mop args) = cmmMachOpFold (ncgPlatform config) mop (map (cmmExprCon config) args) cmmExprCon _ other = other @@ -1080,9 +1080,9 @@ cmmExprNative referenceKind expr = do let platform = ncgPlatform config arch = platformArch platform case expr of - CmmLoad addr rep + CmmLoad addr rep align -> do addr' <- cmmExprNative DataReference addr - return $ CmmLoad addr' rep + return $ CmmLoad addr' rep align CmmMachOp mop args -> do args' <- mapM (cmmExprNative DataReference) args diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs index 2698e6f17f..f645720de7 100644 --- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs @@ -598,7 +598,7 @@ getRegister' config plat expr CmmLabelDiffOff _ _ _ _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) - CmmLoad mem rep -> do + CmmLoad mem rep _ -> do Amode addr addr_code <- getAmode plat (typeWidth rep) mem let format = cmmTypeFormat rep return (Any format (\dst -> addr_code `snocOL` LDR format (OpReg (formatToWidth format) dst) (OpAddr addr))) diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs index cd88a9f078..430189d442 100644 --- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs @@ -338,7 +338,7 @@ assignReg_I64Code _ _ iselExpr64 :: CmmExpr -> NatM ChildCode64 -iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do +iselExpr64 (CmmLoad addrTree ty _) | isWord64 ty = do (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree (rlo, rhi) <- getNewRegPairNat II32 let mov_hi = LD II32 rhi hi_addr @@ -462,7 +462,7 @@ getRegister' _ platform (CmmMachOp (MO_SS_Conv W64 W32) [x]) ChildCode64 code rlo <- iselExpr64 x return $ Fixed II32 rlo code -getRegister' _ platform (CmmLoad mem pk) +getRegister' _ platform (CmmLoad mem pk _) | not (isWord64 pk) = do Amode addr addr_code <- getAmode D mem let code dst = assert ((targetClassOfReg platform dst == RcDouble) == isFloatType pk) $ @@ -476,45 +476,45 @@ getRegister' _ platform (CmmLoad mem pk) where format = cmmTypeFormat pk -- catch simple cases of zero- or sign-extended load -getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do +getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _ _]) = do Amode addr addr_code <- getAmode D mem return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) -getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do +getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _ _]) = do Amode addr addr_code <- getAmode D mem return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) -getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do +getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _ _]) = do Amode addr addr_code <- getAmode D mem return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) -getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do +getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _ _]) = do Amode addr addr_code <- getAmode D mem return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) -- Note: there is no Load Byte Arithmetic instruction, so no signed case here -getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do +getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _ _]) = do Amode addr addr_code <- getAmode D mem return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) -getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do +getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _ _]) = do Amode addr addr_code <- getAmode D mem return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) -getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do +getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _ _]) = do Amode addr addr_code <- getAmode D mem return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr)) -getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do +getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _ _]) = do Amode addr addr_code <- getAmode D mem return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr)) -getRegister' _ _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do +getRegister' _ _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _ _]) = do Amode addr addr_code <- getAmode D mem return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr)) -getRegister' _ _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do +getRegister' _ _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _ _]) = do -- lwa is DS-form. See Note [Power instruction format] Amode addr addr_code <- getAmode DS mem return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr)) diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs index 028887a56f..392d07c62e 100644 --- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs @@ -526,7 +526,7 @@ iselExpr64 (CmmLit (CmmInt i _)) = do ] return (ChildCode64 code rlo) -iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do +iselExpr64 (CmmLoad addrTree ty _) | isWord64 ty = do Amode addr addr_code <- getAmode addrTree (rlo,rhi) <- getNewRegPairNat II32 let @@ -692,49 +692,49 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) = loadFloatAmode w addr code -- catch simple cases of zero- or sign-extended load -getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do +getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _ _]) = do code <- intLoadCode (MOVZxL II8) addr return (Any II32 code) -getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do +getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _ _]) = do code <- intLoadCode (MOVSxL II8) addr return (Any II32 code) -getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do +getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _ _]) = do code <- intLoadCode (MOVZxL II16) addr return (Any II32 code) -getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do +getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _ _]) = do code <- intLoadCode (MOVSxL II16) addr return (Any II32 code) -- catch simple cases of zero- or sign-extended load -getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) +getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _ _]) | not is32Bit = do code <- intLoadCode (MOVZxL II8) addr return (Any II64 code) -getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) +getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _ _]) | not is32Bit = do code <- intLoadCode (MOVSxL II8) addr return (Any II64 code) -getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) +getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _ _]) | not is32Bit = do code <- intLoadCode (MOVZxL II16) addr return (Any II64 code) -getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) +getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _ _]) | not is32Bit = do code <- intLoadCode (MOVSxL II16) addr return (Any II64 code) -getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) +getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _ _]) | not is32Bit = do code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend return (Any II64 code) -getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) +getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _ _]) | not is32Bit = do code <- intLoadCode (MOVSxL II32) addr return (Any II64 code) @@ -1114,13 +1114,13 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps return (Fixed format result code) -getRegister' _ _ (CmmLoad mem pk) +getRegister' _ _ (CmmLoad mem pk _) | isFloatType pk = do Amode addr mem_code <- getAmode mem loadFloatAmode (typeWidth pk) addr mem_code -getRegister' _ is32Bit (CmmLoad mem pk) +getRegister' _ is32Bit (CmmLoad mem pk _) | is32Bit && not (isWord64 pk) = do code <- intLoadCode instr mem @@ -1138,7 +1138,7 @@ getRegister' _ is32Bit (CmmLoad mem pk) -- simpler we do our 8-bit arithmetic with full 32-bit registers. -- Simpler memory load code on x86_64 -getRegister' _ is32Bit (CmmLoad mem pk) +getRegister' _ is32Bit (CmmLoad mem pk _) | not is32Bit = do code <- intLoadCode (MOV format) mem @@ -1388,7 +1388,7 @@ getNonClobberedOperand (CmmLit lit) = then return (OpImm (litToImm lit), nilOL) else getNonClobberedOperand_generic (CmmLit lit) -getNonClobberedOperand (CmmLoad mem pk) = do +getNonClobberedOperand (CmmLoad mem pk _) = do is32Bit <- is32BitPlatform -- this logic could be simplified -- TODO FIXME @@ -1412,7 +1412,7 @@ getNonClobberedOperand (CmmLoad mem pk) = do return (OpAddr src', mem_code `appOL` save_code) else -- if its a word or gcptr on 32bit? - getNonClobberedOperand_generic (CmmLoad mem pk) + getNonClobberedOperand_generic (CmmLoad mem pk NaturallyAligned) getNonClobberedOperand e = getNonClobberedOperand_generic e @@ -1447,7 +1447,7 @@ getOperand (CmmLit lit) = do then return (OpImm (litToImm lit), nilOL) else getOperand_generic (CmmLit lit) -getOperand (CmmLoad mem pk) = do +getOperand (CmmLoad mem pk _) = do is32Bit <- is32BitPlatform use_sse2 <- sse2Enabled if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True) @@ -1455,7 +1455,7 @@ getOperand (CmmLoad mem pk) = do Amode src mem_code <- getAmode mem return (OpAddr src, mem_code) else - getOperand_generic (CmmLoad mem pk) + getOperand_generic (CmmLoad mem pk NaturallyAligned) getOperand e = getOperand_generic e @@ -1465,7 +1465,7 @@ getOperand_generic e = do return (OpReg reg, code) isOperand :: Bool -> CmmExpr -> Bool -isOperand _ (CmmLoad _ _) = True +isOperand _ (CmmLoad _ _ _) = True isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit || isSuitableFloatingPointLit lit isOperand _ _ = False @@ -1523,7 +1523,7 @@ isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0 isSuitableFloatingPointLit _ = False getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock) -getRegOrMem e@(CmmLoad mem pk) = do +getRegOrMem e@(CmmLoad mem pk _) = do is32Bit <- is32BitPlatform use_sse2 <- sse2Enabled if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True) @@ -1610,7 +1610,7 @@ condIntCode cond x y = do is32Bit <- is32BitPlatform condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode -- memory vs immediate -condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit) +condIntCode' is32Bit cond (CmmLoad x pk _) (CmmLit lit) | is32BitLit is32Bit lit = do Amode x_addr x_code <- getAmode x let @@ -1723,7 +1723,7 @@ assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock -- specific case of adding/subtracting an integer to a particular address. -- ToDo: catch other cases where we can use an operation directly on a memory -- address. -assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _, +assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _ _, CmmLit (CmmInt i _)]) | addr == addr2, pk /= II64 || is32BitInteger i, Just instr <- check op @@ -1762,7 +1762,7 @@ assignMem_IntCode pk addr src = do -- Assign; dst is a reg, rhs is mem -assignReg_IntCode pk reg (CmmLoad src _) = do +assignReg_IntCode pk reg (CmmLoad src _ _) = do load_code <- intLoadCode (MOV pk) src platform <- ncgPlatform <$> getConfig return (load_code (getRegisterReg platform reg)) @@ -1794,7 +1794,7 @@ assignReg_FltCode _ reg src = do genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock -genJump (CmmLoad mem _) regs = do +genJump (CmmLoad mem _ _) regs = do Amode target code <- getAmode mem return (code `snocOL` JMP (OpAddr target) regs) diff --git a/compiler/GHC/CmmToC.hs b/compiler/GHC/CmmToC.hs index a6a036c290..65077dcc0b 100644 --- a/compiler/GHC/CmmToC.hs +++ b/compiler/GHC/CmmToC.hs @@ -221,7 +221,8 @@ pprStmt platform stmt = parens (mkP_ <> pprExpr1 platform dest <> comma <> pprExpr platform src) <> semi | otherwise - -> hsep [ pprExpr platform (CmmLoad dest rep), equals, pprExpr platform src <> semi ] + -> hsep [ pprExpr platform (CmmLoad dest rep NaturallyAligned), equals, pprExpr platform src <> semi ] + -- TODO: Is this right? where rep = cmmExprType platform src @@ -376,10 +377,10 @@ pprSwitch platform e ids pprExpr :: Platform -> CmmExpr -> SDoc pprExpr platform e = case e of - CmmLit lit -> pprLit platform lit - CmmLoad e ty -> pprLoad platform e ty - CmmReg reg -> pprCastReg reg - CmmRegOff reg 0 -> pprCastReg reg + CmmLit lit -> pprLit platform lit + CmmLoad e ty align -> pprLoad platform e ty align + CmmReg reg -> pprCastReg reg + CmmRegOff reg 0 -> pprCastReg reg -- CmmRegOff is an alias of MO_Add CmmRegOff reg i -> pprCastReg reg <> char '+' <> @@ -390,13 +391,14 @@ pprExpr platform e = case e of CmmStackSlot _ _ -> panic "pprExpr: CmmStackSlot not supported!" -pprLoad :: Platform -> CmmExpr -> CmmType -> SDoc -pprLoad platform e ty +pprLoad :: Platform -> CmmExpr -> CmmType -> AlignmentSpec -> SDoc +pprLoad platform e ty _align | width == W64, wordWidth platform /= W64 = (if isFloatType ty then text "PK_DBL" else text "PK_Word64") <> parens (mkP_ <> pprExpr1 platform e) + -- TODO: exploit natural-alignment where possible | otherwise = case e of CmmReg r | isPtrReg r && width == wordWidth platform && not (isFloatType ty) @@ -1285,7 +1287,7 @@ te_Target (PrimTarget{}) = return () te_Expr :: CmmExpr -> TE () te_Expr (CmmLit lit) = te_Lit lit -te_Expr (CmmLoad e _) = te_Expr e +te_Expr (CmmLoad e _ _) = te_Expr e te_Expr (CmmReg r) = te_Reg r te_Expr (CmmMachOp _ es) = mapM_ te_Expr es te_Expr (CmmRegOff r _) = te_Reg r diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs index a57a6f79f0..49359939ea 100644 --- a/compiler/GHC/CmmToLlvm/CodeGen.hs +++ b/compiler/GHC/CmmToLlvm/CodeGen.hs @@ -267,7 +267,7 @@ genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = runStmtsDecls $ do dstV <- getCmmRegW (CmmLocal dst) - v1 <- genLoadW True addr (localRegType dst) + v1 <- genLoadW True addr (localRegType dst) NaturallyAligned statement $ Store v1 dstV genCall (PrimTarget (MO_Cmpxchg _width)) @@ -1357,8 +1357,8 @@ exprToVarOpt opt e = case e of CmmLit lit -> genLit opt lit - CmmLoad e' ty - -> genLoad False e' ty + CmmLoad e' ty align + -> genLoad False e' ty align -- Cmmreg in expression is the value, so must load. If you want actual -- reg pointer, call getCmmReg directly. @@ -1790,40 +1790,40 @@ genMachOp_slow _ _ _ = panic "genMachOp: More than 2 expressions in MachOp!" -- | Handle CmmLoad expression. -genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData +genLoad :: Atomic -> CmmExpr -> CmmType -> AlignmentSpec -> LlvmM ExprData -- First we try to detect a few common cases and produce better code for -- these then the default case. We are mostly trying to detect Cmm code -- like I32[Sp + n] and use 'getelementptr' operations instead of the -- generic case that uses casts and pointer arithmetic -genLoad atomic e@(CmmReg (CmmGlobal r)) ty - = genLoad_fast atomic e r 0 ty +genLoad atomic e@(CmmReg (CmmGlobal r)) ty align + = genLoad_fast atomic e r 0 ty align -genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty - = genLoad_fast atomic e r n ty +genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty align + = genLoad_fast atomic e r n ty align genLoad atomic e@(CmmMachOp (MO_Add _) [ (CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]) - ty - = genLoad_fast atomic e r (fromInteger n) ty + ty align + = genLoad_fast atomic e r (fromInteger n) ty align genLoad atomic e@(CmmMachOp (MO_Sub _) [ (CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]) - ty - = genLoad_fast atomic e r (negate $ fromInteger n) ty + ty align + = genLoad_fast atomic e r (negate $ fromInteger n) ty align -- generic case -genLoad atomic e ty - = getTBAAMeta topN >>= genLoad_slow atomic e ty +genLoad atomic e ty align + = getTBAAMeta topN >>= genLoad_slow atomic e ty align -- | Handle CmmLoad expression. -- This is a special case for loading from a global register pointer -- offset such as I32[Sp+8]. genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType - -> LlvmM ExprData -genLoad_fast atomic e r n ty = do + -> AlignmentSpec -> LlvmM ExprData +genLoad_fast atomic e r n ty align = do platform <- getPlatform (gv, grt, s1) <- getCmmRegVal (CmmGlobal r) meta <- getTBAARegMeta r @@ -1836,7 +1836,7 @@ genLoad_fast atomic e r n ty = do case grt == ty' of -- were fine True -> do - (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr) + (var, s3) <- doExpr ty' (MExpr meta $ mkLoad atomic ptr align) return (var, s1 `snocOL` s2 `snocOL` s3, []) @@ -1844,43 +1844,61 @@ genLoad_fast atomic e r n ty = do False -> do let pty = pLift ty' (ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty - (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr') + (var, s4) <- doExpr ty' (MExpr meta $ mkLoad atomic ptr' align) return (var, s1 `snocOL` s2 `snocOL` s3 `snocOL` s4, []) -- If its a bit type then we use the slow method since -- we can't avoid casting anyway. - False -> genLoad_slow atomic e ty meta - where - loadInstr ptr | atomic = ALoad SyncSeqCst False ptr - | otherwise = Load ptr + False -> genLoad_slow atomic e ty align meta -- | Handle Cmm load expression. -- Generic case. Uses casts and pointer arithmetic if needed. -genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData -genLoad_slow atomic e ty meta = do +genLoad_slow :: Atomic -> CmmExpr -> CmmType -> AlignmentSpec -> [MetaAnnot] + -> LlvmM ExprData +genLoad_slow atomic e ty align meta = do platform <- getPlatform cfg <- getConfig runExprData $ do iptr <- exprToVarW e case getVarType iptr of LMPointer _ -> - doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr iptr) + doExprW (cmmToLlvmType ty) (MExpr meta $ mkLoad atomic iptr align) i@(LMInt _) | i == llvmWord platform -> do let pty = LMPointer $ cmmToLlvmType ty ptr <- doExprW pty $ Cast LM_Inttoptr iptr pty - doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr ptr) + doExprW (cmmToLlvmType ty) (MExpr meta $ mkLoad atomic ptr align) other -> pprPanic "exprToVar: CmmLoad expression is not right type!" (PprCmm.pprExpr platform e <+> text ( "Size of Ptr: " ++ show (llvmPtrBits platform) ++ ", Size of var: " ++ show (llvmWidthInBits platform other) ++ ", Var: " ++ renderWithContext (llvmCgContext cfg) (ppVar cfg iptr))) - where - loadInstr ptr | atomic = ALoad SyncSeqCst False ptr - | otherwise = Load ptr +{- +Note [Alignment of vector-typed values] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +On x86, vector types need to be 16-byte aligned for aligned +access, but we have no way of guaranteeing that this is true with GHC +(we would need to modify the layout of the stack and closures, change +the storage manager, etc.). So, we blindly tell LLVM that *any* vector +store or load could be unaligned. In the future we may be able to +guarantee that certain vector access patterns are aligned, in which +case we will need a more granular way of specifying alignment. +-} + +mkLoad :: Atomic -> LlvmVar -> AlignmentSpec -> LlvmExpression +mkLoad atomic ptr alignment + | atomic = ALoad SyncSeqCst False ptr + | otherwise = Load ptr align + where + ty = pLower (getVarType ptr) + align = case alignment of + -- See Note [Alignment of vector-typed values] + _ | is_vector -> Just 1 + Unaligned -> Just 1 + NaturallyAligned -> Nothing -- | Handle CmmReg expression. This will return a pointer to the stack -- location of the register. Throws an error if it isn't allocated on @@ -1919,7 +1937,7 @@ getCmmRegVal reg = where loadFromStack = do ptr <- getCmmReg reg let ty = pLower $ getVarType ptr - (v, s) <- doExpr ty (Load ptr) + (v, s) <- doExpr ty (Load ptr Nothing) return (v, ty, unitOL s) -- | Allocate a local CmmReg on the stack @@ -2210,8 +2228,8 @@ runStmtsDecls action = do getCmmRegW :: CmmReg -> WriterT LlvmAccum LlvmM LlvmVar getCmmRegW = lift . getCmmReg -genLoadW :: Atomic -> CmmExpr -> CmmType -> WriterT LlvmAccum LlvmM LlvmVar -genLoadW atomic e ty = liftExprData $ genLoad atomic e ty +genLoadW :: Atomic -> CmmExpr -> CmmType -> AlignmentSpec -> WriterT LlvmAccum LlvmM LlvmVar +genLoadW atomic e ty alignment = liftExprData $ genLoad atomic e ty alignment -- | Return element of single-element list; 'panic' if list is not a single-element list singletonPanic :: String -> [a] -> a diff --git a/compiler/GHC/Llvm/Ppr.hs b/compiler/GHC/Llvm/Ppr.hs index 8ec3f58db2..405296f79f 100644 --- a/compiler/GHC/Llvm/Ppr.hs +++ b/compiler/GHC/Llvm/Ppr.hs @@ -242,7 +242,7 @@ ppLlvmExpression opts expr ExtractV struct idx -> ppExtractV opts struct idx Insert vec elt idx -> ppInsert opts vec elt idx GetElemPtr inb ptr indexes -> ppGetElementPtr opts inb ptr indexes - Load ptr -> ppLoad opts ptr + Load ptr align -> ppLoad opts ptr align ALoad ord st ptr -> ppALoad opts ord st ptr Malloc tp amount -> ppMalloc opts tp amount AtomicRMW aop tgt src ordering -> ppAtomicRMW opts aop tgt src ordering @@ -365,20 +365,16 @@ ppCmpXChg opts addr old new s_ord f_ord = text "cmpxchg" <+> ppVar opts addr <> comma <+> ppVar opts old <> comma <+> ppVar opts new <+> ppSyncOrdering s_ord <+> ppSyncOrdering f_ord --- XXX: On x86, vector types need to be 16-byte aligned for aligned access, but --- we have no way of guaranteeing that this is true with GHC (we would need to --- modify the layout of the stack and closures, change the storage manager, --- etc.). So, we blindly tell LLVM that *any* vector store or load could be --- unaligned. In the future we may be able to guarantee that certain vector --- access patterns are aligned, in which case we will need a more granular way --- of specifying alignment. -ppLoad :: LlvmCgConfig -> LlvmVar -> SDoc -ppLoad opts var = text "load" <+> ppr derefType <> comma <+> ppVar opts var <> align +ppLoad :: LlvmCgConfig -> LlvmVar -> LMAlign -> SDoc +ppLoad opts var alignment = + text "load" <+> ppr derefType <> comma <+> ppVar opts var <> align where derefType = pLower $ getVarType var - align | isVector . pLower . getVarType $ var = text ", align 1" - | otherwise = empty + align = + case alignment of + Just n -> text ", align" <+> ppr n + Nothing -> empty ppALoad :: LlvmCgConfig -> LlvmSyncOrdering -> SingleThreaded -> LlvmVar -> SDoc ppALoad opts ord st var = diff --git a/compiler/GHC/Llvm/Syntax.hs b/compiler/GHC/Llvm/Syntax.hs index 12e0073c7a..befac77734 100644 --- a/compiler/GHC/Llvm/Syntax.hs +++ b/compiler/GHC/Llvm/Syntax.hs @@ -252,7 +252,7 @@ data LlvmExpression {- | Load the value at location ptr -} - | Load LlvmVar + | Load LlvmVar LMAlign {- | Atomic load of the value at location ptr diff --git a/compiler/GHC/StgToCmm/CgUtils.hs b/compiler/GHC/StgToCmm/CgUtils.hs index 6722a4c288..bc42a11d1c 100644 --- a/compiler/GHC/StgToCmm/CgUtils.hs +++ b/compiler/GHC/StgToCmm/CgUtils.hs @@ -171,7 +171,7 @@ fixStgRegStmt platform stmt = fixAssign $ mapExpDeep fixExpr stmt let baseAddr = get_GlobalReg_addr platform reg in case reg of BaseReg -> baseAddr - _other -> CmmLoad baseAddr (globalRegType platform reg) + _other -> CmmLoad baseAddr (globalRegType platform reg) NaturallyAligned CmmRegOff (CmmGlobal reg) offset -> -- RegOf leaves are just a shorthand form. If the reg maps diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs index e3cd4d8db1..cc33576240 100644 --- a/compiler/GHC/StgToCmm/Foreign.hs +++ b/compiler/GHC/StgToCmm/Foreign.hs @@ -395,7 +395,7 @@ emitPopTupleRegs regs_live = do (widthInBytes $ typeWidth reg_ty) adj_sp = mkAssign spReg (cmmOffset platform spExpr width) - restore_reg = mkAssign (CmmGlobal reg) (CmmLoad spExpr reg_ty) + restore_reg = mkAssign (CmmGlobal reg) (CmmLoad spExpr reg_ty NaturallyAligned) in mkCmmIfThen cond $ catAGraphs [restore_reg, adj_sp] emit . catAGraphs =<< mapM save_arg regs @@ -451,7 +451,7 @@ closeNursery profile tso = do -- tso->alloc_limit += alloc mkStore alloc_limit (CmmMachOp (MO_Sub W64) - [ CmmLoad alloc_limit b64 + [ CmmLoad alloc_limit b64 NaturallyAligned , CmmMachOp (mo_WordTo64 platform) [alloc] ]) ] @@ -485,9 +485,8 @@ loadThreadState profile = do open_nursery, -- and load the current cost centre stack from the TSO when profiling: if profileIsProfiling profile - then storeCurCCS - (CmmLoad (cmmOffset platform (CmmReg (CmmLocal tso)) - (tso_CCCS profile)) (ccsType platform)) + then let ccs_ptr = cmmOffset platform (CmmReg (CmmLocal tso)) (tso_CCCS profile) + in storeCurCCS (CmmLoad ccs_ptr (ccsType platform) NaturallyAligned) else mkNop ] @@ -555,11 +554,11 @@ openNursery profile tso = do (cmmOffsetExpr platform (CmmReg bdstartreg) (cmmOffset platform - (CmmMachOp (mo_wordMul platform) [ - CmmMachOp (MO_SS_Conv W32 (wordWidth platform)) - [CmmLoad (nursery_bdescr_blocks platform cnreg) b32], - mkIntExpr platform (pc_BLOCK_SIZE (platformConstants platform)) - ]) + (CmmMachOp (mo_wordMul platform) + [ CmmMachOp (MO_SS_Conv W32 (wordWidth platform)) + [CmmLoad (nursery_bdescr_blocks platform cnreg) b32 NaturallyAligned] + , mkIntExpr platform (pc_BLOCK_SIZE (platformConstants platform)) + ]) (-1) ) ), @@ -573,7 +572,7 @@ openNursery profile tso = do -- tso->alloc_limit += alloc mkStore alloc_limit (CmmMachOp (MO_Add W64) - [ CmmLoad alloc_limit b64 + [ CmmLoad alloc_limit b64 NaturallyAligned , CmmMachOp (mo_WordTo64 platform) [alloc] ]) ] diff --git a/compiler/GHC/StgToCmm/Hpc.hs b/compiler/GHC/StgToCmm/Hpc.hs index 23e7519d80..fd2a84c6e2 100644 --- a/compiler/GHC/StgToCmm/Hpc.hs +++ b/compiler/GHC/StgToCmm/Hpc.hs @@ -27,7 +27,7 @@ import Control.Monad mkTickBox :: Platform -> Module -> Int -> CmmAGraph mkTickBox platform mod n = mkStore tick_box (CmmMachOp (MO_Add W64) - [ CmmLoad tick_box b64 + [ CmmLoad tick_box b64 NaturallyAligned , CmmLit (CmmInt 1 W64) ]) where diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index 72f42ba831..17fe2d9373 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -2222,7 +2222,7 @@ cmmLoadIndexOffExpr :: Platform -> CmmExpr -- Index -> CmmExpr cmmLoadIndexOffExpr platform off ty base idx_ty idx - = CmmLoad (cmmIndexOffExpr platform off (typeWidth idx_ty) base idx) ty + = CmmLoad (cmmIndexOffExpr platform off (typeWidth idx_ty) base idx) ty NaturallyAligned setInfo :: CmmExpr -> CmmExpr -> CmmAGraph setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs index bb3d4a8696..79e08071fa 100644 --- a/compiler/GHC/StgToCmm/Prof.hs +++ b/compiler/GHC/StgToCmm/Prof.hs @@ -85,7 +85,7 @@ mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) costCentreFrom :: Platform -> CmmExpr -- A closure pointer -> CmmExpr -- The cost centre from that closure -costCentreFrom platform cl = CmmLoad (cmmOffsetB platform cl (pc_OFFSET_StgHeader_ccs (platformConstants platform))) (ccsType platform) +costCentreFrom platform cl = CmmLoad (cmmOffsetB platform cl (pc_OFFSET_StgHeader_ccs (platformConstants platform))) (ccsType platform) NaturallyAligned -- | The profiling header words in a static closure staticProfHdr :: Profile -> CostCentreStack -> [CmmLit] @@ -410,7 +410,8 @@ ldvEnter cl_ptr = do loadEra :: Platform -> CmmExpr loadEra platform = CmmMachOp (MO_UU_Conv (cIntWidth platform) (wordWidth platform)) [CmmLoad (mkLblExpr (mkRtsCmmDataLabel (fsLit "era"))) - (cInt platform)] + (cInt platform) + NaturallyAligned] -- | Takes the address of a closure, and returns -- the address of the LDV word in the closure diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs index 22e34dddae..270699b9ed 100644 --- a/compiler/GHC/StgToCmm/Utils.hs +++ b/compiler/GHC/StgToCmm/Utils.hs @@ -104,18 +104,18 @@ addToMemLblE rep lbl = addToMemE rep (CmmLit (CmmLabel lbl)) -- | @addToMem rep ptr n@ adds @n@ to the integer pointed-to by @ptr@. addToMem :: CmmType -- rep of the counter - -> CmmExpr -- Address + -> CmmExpr -- Naturally-aligned address -> Int -- What to add (a word) -> CmmAGraph addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) (typeWidth rep))) -- | @addToMemE rep ptr n@ adds @n@ to the integer pointed-to by @ptr@. addToMemE :: CmmType -- rep of the counter - -> CmmExpr -- Address + -> CmmExpr -- Naturally-aligned address -> CmmExpr -- What to add (a word-typed expression) -> CmmAGraph addToMemE rep ptr n - = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep, n]) + = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep NaturallyAligned, n]) ------------------------------------------------------------------------- @@ -135,7 +135,8 @@ mkTaggedObjectLoad platform reg base offset tag (CmmLoad (cmmOffsetB platform (CmmReg (CmmLocal base)) (offset - tag)) - (localRegType reg)) + (localRegType reg) + NaturallyAligned) ------------------------------------------------------------------------- -- @@ -238,7 +239,9 @@ callerSaveGlobalReg platform reg callerRestoreGlobalReg :: Platform -> GlobalReg -> CmmAGraph callerRestoreGlobalReg platform reg = mkAssign (CmmGlobal reg) - (CmmLoad (get_GlobalReg_addr platform reg) (globalRegType platform reg)) + (CmmLoad (get_GlobalReg_addr platform reg) + (globalRegType platform reg) + NaturallyAligned) ------------------------------------------------------------------------- |