diff options
author | Kavon Farvardin <kavon@farvard.in> | 2018-10-04 13:44:55 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2018-10-04 22:27:54 -0400 |
commit | adcb5fb47c0942671d409b940d8884daa9359ca4 (patch) | |
tree | 4d4591d46c195de5e92b0f8b57375883ade89b92 | |
parent | 07083fc44ebf3f0510ae1d71ae5c9c88c87ae1d8 (diff) | |
download | haskell-adcb5fb47c0942671d409b940d8884daa9359ca4.tar.gz |
Multiple fixes / improvements for LLVM backend
- Fix for #13904 -- stop "trashing" callee-saved registers, since it is
not actually doing anything useful.
- Fix for #14251 -- fixes the calling convention for functions passing
raw SSE-register values by adding padding as needed to get the values
in the right registers. This problem cropped up when some args were
unused an dropped from the live list.
- Fixed a typo in 'readnone' attribute
- Added 'lower-expect' pass to level 0 LLVM optimization passes to
improve block layout in LLVM for stack checks, etc.
Test Plan: `make test WAYS=optllvm` and `make test WAYS=llvm`
Reviewers: bgamari, simonmar, angerman
Reviewed By: angerman
Subscribers: rwbarton, carter
GHC Trac Issues: #13904, #14251
Differential Revision: https://phabricator.haskell.org/D5190
-rw-r--r-- | compiler/llvmGen/Llvm/Types.hs | 2 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Base.hs | 62 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 60 | ||||
-rw-r--r-- | llvm-passes | 2 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/all.T | 3 |
5 files changed, 67 insertions, 62 deletions
diff --git a/compiler/llvmGen/Llvm/Types.hs b/compiler/llvmGen/Llvm/Types.hs index bc7bbaab1b..a45004d3a8 100644 --- a/compiler/llvmGen/Llvm/Types.hs +++ b/compiler/llvmGen/Llvm/Types.hs @@ -564,7 +564,7 @@ instance Outputable LlvmFuncAttr where ppr OptSize = text "optsize" ppr NoReturn = text "noreturn" ppr NoUnwind = text "nounwind" - ppr ReadNone = text "readnon" + ppr ReadNone = text "readnone" ppr ReadOnly = text "readonly" ppr Ssp = text "ssp" ppr SspReq = text "ssqreq" diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs index 6e20da48c1..ec91bacc4c 100644 --- a/compiler/llvmGen/LlvmCodeGen/Base.hs +++ b/compiler/llvmGen/LlvmCodeGen/Base.hs @@ -26,7 +26,7 @@ module LlvmCodeGen.Base ( cmmToLlvmType, widthToLlvmFloat, widthToLlvmInt, llvmFunTy, llvmFunSig, llvmFunArgs, llvmStdFunAttrs, llvmFunAlign, llvmInfAlign, - llvmPtrBits, tysToParams, llvmFunSection, + llvmPtrBits, tysToParams, llvmFunSection, padLiveArgs, isSSE, strCLabel_llvm, strDisplayName_llvm, strProcedureName_llvm, getGlobalPtr, generateExternDecls, @@ -58,6 +58,8 @@ import ErrUtils import qualified Stream import Control.Monad (ap) +import Data.List (sort) +import Data.Maybe (mapMaybe) -- ---------------------------------------------------------------------------- -- * Some Data Types @@ -147,16 +149,58 @@ llvmFunSection dflags lbl -- | A Function's arguments llvmFunArgs :: DynFlags -> LiveGlobalRegs -> [LlvmVar] llvmFunArgs dflags live = - map (lmGlobalRegArg dflags) (filter isPassed (activeStgRegs platform)) + map (lmGlobalRegArg dflags) (filter isPassed allRegs) where platform = targetPlatform dflags - isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live + allRegs = activeStgRegs platform + paddedLive = map (\(_,r) -> r) $ padLiveArgs live + isLive r = r `elem` alwaysLive || r `elem` paddedLive isPassed r = not (isSSE r) || isLive r - isSSE (FloatReg _) = True - isSSE (DoubleReg _) = True - isSSE (XmmReg _) = True - isSSE (YmmReg _) = True - isSSE (ZmmReg _) = True - isSSE _ = False + + +isSSE :: GlobalReg -> Bool +isSSE (FloatReg _) = True +isSSE (DoubleReg _) = True +isSSE (XmmReg _) = True +isSSE (YmmReg _) = True +isSSE (ZmmReg _) = True +isSSE _ = False + +sseRegNum :: GlobalReg -> Maybe Int +sseRegNum (FloatReg i) = Just i +sseRegNum (DoubleReg i) = Just i +sseRegNum (XmmReg i) = Just i +sseRegNum (YmmReg i) = Just i +sseRegNum (ZmmReg i) = Just i +sseRegNum _ = Nothing + +-- the bool indicates whether the global reg was added as padding. +-- the returned list is not sorted in any particular order, +-- but does indicate the set of live registers needed, with SSE padding. +padLiveArgs :: LiveGlobalRegs -> [(Bool, GlobalReg)] +padLiveArgs live = allRegs + where + sseRegNums = sort $ mapMaybe sseRegNum live + (_, padding) = foldl assignSlots (1, []) $ sseRegNums + allRegs = padding ++ map (\r -> (False, r)) live + + assignSlots (i, acc) regNum + | i == regNum = -- don't need padding here + (i+1, acc) + | i < regNum = let -- add padding for slots i .. regNum-1 + numNeeded = regNum-i + acc' = genPad i numNeeded ++ acc + in + (regNum+1, acc') + | otherwise = error "padLiveArgs -- i > regNum ??" + + genPad start n = + take n $ flip map (iterate (+1) start) (\i -> + (True, FloatReg i)) + -- NOTE: Picking float should be fine for the following reasons: + -- (1) Float aliases with all the other SSE register types on + -- the given platform. + -- (2) The argument is not live anyways. + -- | Llvm standard fun attributes llvmStdFunAttrs :: [LlvmFuncAttr] diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 81791628d5..18734009c6 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -14,7 +14,7 @@ import LlvmCodeGen.Base import LlvmCodeGen.Regs import BlockId -import CodeGen.Platform ( activeStgRegs, callerSaves ) +import CodeGen.Platform ( activeStgRegs ) import CLabel import Cmm import PprCmm @@ -211,7 +211,6 @@ genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args fptr <- liftExprData $ getFunPtr funTy t argVars' <- castVarsW Signed $ zip argVars argTy - doTrashStmts let argSuffix = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1] statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) [] | otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt) @@ -294,7 +293,6 @@ genCall t@(PrimTarget op) [] args fptr <- getFunPtrW funTy t argVars' <- castVarsW Signed $ zip argVars argTy - doTrashStmts let alignVal = mkIntLit i32 align arguments = argVars' ++ (alignVal:isVolVal) statement $ Expr $ Call StdCall fptr arguments [] @@ -449,7 +447,6 @@ genCall target res args = runStmtsDecls $ do | never_returns = statement $ Unreachable | otherwise = return () - doTrashStmts -- make the actual call case retTy of @@ -1786,12 +1783,9 @@ genLit _ CmmHighStackMark funPrologue :: LiveGlobalRegs -> [CmmBlock] -> LlvmM StmtData funPrologue live cmmBlocks = do - trash <- getTrashRegs let getAssignedRegs :: CmmNode O O -> [CmmReg] getAssignedRegs (CmmAssign reg _) = [reg] - -- Calls will trash all registers. Unfortunately, this needs them to - -- be stack-allocated in the first place. - getAssignedRegs (CmmUnsafeForeignCall _ rs _) = map CmmGlobal trash ++ map CmmLocal rs + getAssignedRegs (CmmUnsafeForeignCall _ rs _) = map CmmLocal rs getAssignedRegs _ = [] getRegsBlock (_, body, _) = concatMap getAssignedRegs $ blockToList body assignedRegs = nub $ concatMap (getRegsBlock . blockSplit) cmmBlocks @@ -1821,14 +1815,9 @@ funPrologue live cmmBlocks = do funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements) funEpilogue live = do - -- Have information and liveness optimisation is enabled? - let liveRegs = alwaysLive ++ live - isSSE (FloatReg _) = True - isSSE (DoubleReg _) = True - isSSE (XmmReg _) = True - isSSE (YmmReg _) = True - isSSE (ZmmReg _) = True - isSSE _ = False + -- the bool indicates whether the register is padding. + let alwaysNeeded = map (\r -> (False, r)) alwaysLive + livePadded = alwaysNeeded ++ padLiveArgs live -- Set to value or "undef" depending on whether the register is -- actually live @@ -1840,39 +1829,17 @@ funEpilogue live = do let ty = (pLower . getVarType $ lmGlobalRegVar dflags r) return (Just $ LMLitVar $ LMUndefLit ty, nilOL) platform <- getDynFlag targetPlatform - loads <- flip mapM (activeStgRegs platform) $ \r -> case () of - _ | r `elem` liveRegs -> loadExpr r - | not (isSSE r) -> loadUndef r + let allRegs = activeStgRegs platform + loads <- flip mapM allRegs $ \r -> case () of + _ | (False, r) `elem` livePadded + -> loadExpr r -- if r is not padding, load it + | not (isSSE r) || (True, r) `elem` livePadded + -> loadUndef r | otherwise -> return (Nothing, nilOL) let (vars, stmts) = unzip loads return (catMaybes vars, concatOL stmts) - --- | A series of statements to trash all the STG registers. --- --- In LLVM we pass the STG registers around everywhere in function calls. --- So this means LLVM considers them live across the entire function, when --- in reality they usually aren't. For Caller save registers across C calls --- the saving and restoring of them is done by the Cmm code generator, --- using Cmm local vars. So to stop LLVM saving them as well (and saving --- all of them since it thinks they're always live, we trash them just --- before the call by assigning the 'undef' value to them. The ones we --- need are restored from the Cmm local var and the ones we don't need --- are fine to be trashed. -getTrashStmts :: LlvmM LlvmStatements -getTrashStmts = do - regs <- getTrashRegs - stmts <- flip mapM regs $ \ r -> do - reg <- getCmmReg (CmmGlobal r) - let ty = (pLower . getVarType) reg - return $ Store (LMLitVar $ LMUndefLit ty) reg - return $ toOL stmts - -getTrashRegs :: LlvmM [GlobalReg] -getTrashRegs = do plat <- getLlvmPlatform - return $ filter (callerSaves plat) (activeStgRegs plat) - -- | Get a function pointer to the CLabel specified. -- -- This is for Haskell functions, function type is assumed, so doesn't work @@ -1994,11 +1961,6 @@ getCmmRegW = lift . getCmmReg genLoadW :: Atomic -> CmmExpr -> CmmType -> WriterT LlvmAccum LlvmM LlvmVar genLoadW atomic e ty = liftExprData $ genLoad atomic e ty -doTrashStmts :: WriterT LlvmAccum LlvmM () -doTrashStmts = do - stmts <- lift getTrashStmts - tell $ LlvmAccum stmts mempty - -- | Return element of single-element list; 'panic' if list is not a single-element list singletonPanic :: String -> [a] -> a singletonPanic _ [x] = x diff --git a/llvm-passes b/llvm-passes index 5183c9f2ab..14eb62d87c 100644 --- a/llvm-passes +++ b/llvm-passes @@ -1,5 +1,5 @@ [ -(0, "-mem2reg -globalopt"), +(0, "-mem2reg -globalopt -lower-expect"), (1, "-O1 -globalopt"), (2, "-O2") ] diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index 49592951bc..bd1521d6d8 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -172,5 +172,4 @@ test('T13825-unit', test('T14619', normal, compile_and_run, ['']) test('T14754', normal, compile_and_run, ['']) test('T14346', only_ways(['threaded1','threaded2']), compile_and_run, ['-O -threaded']) -test('T14251', [expect_broken_for(14251, ['optllvm'])], - compile_and_run, ['']) +test('T14251', normal, compile_and_run, ['']) |