diff options
author | Ben Gamari <ben@smart-cactus.org> | 2020-01-06 12:29:14 -0500 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-01-20 15:32:52 -0500 |
commit | e90ecc932c4857c774405dc398c940290f70f371 (patch) | |
tree | 1f2925748b99003cc106958dbb8ddce9f193381e | |
parent | 442751c605d8a9792014a9e3e694db5b312e80c3 (diff) | |
download | haskell-e90ecc932c4857c774405dc398c940290f70f371.tar.gz |
llvmGen: Fix #14251
Fixes the calling convention for functions passing raw SSE-register
values by adding padding as needed to get the values in the right
registers. This problem cropped up when some args were unused an dropped
from the live list.
This folds together 2e23e1c7de01c92b038e55ce53d11bf9db993dd4 and
73273be476a8cc6c13368660b042b3b0614fd928 previously from @kavon.
Metric Increase:
T12707
ManyConstructors
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Base.hs | 116 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 22 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/all.T | 3 |
3 files changed, 116 insertions, 25 deletions
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs index 9ddb23aade..ce9f22052f 100644 --- a/compiler/llvmGen/LlvmCodeGen/Base.hs +++ b/compiler/llvmGen/LlvmCodeGen/Base.hs @@ -28,7 +28,7 @@ module LlvmCodeGen.Base ( cmmToLlvmType, widthToLlvmFloat, widthToLlvmInt, llvmFunTy, llvmFunSig, llvmFunArgs, llvmStdFunAttrs, llvmFunAlign, llvmInfAlign, - llvmPtrBits, tysToParams, llvmFunSection, + llvmPtrBits, tysToParams, llvmFunSection, padLiveArgs, isFPR, strCLabel_llvm, strDisplayName_llvm, strProcedureName_llvm, getGlobalPtr, generateExternDecls, @@ -49,6 +49,7 @@ import GHC.Platform.Regs ( activeStgRegs ) import DynFlags import FastString import Cmm hiding ( succ ) +import CmmUtils ( regsOverlap ) import Outputable as Outp import GHC.Platform import UniqFM @@ -62,7 +63,7 @@ import qualified Stream import Data.Maybe (fromJust) import Control.Monad (ap) import Data.Char (isDigit) -import Data.List (intercalate) +import Data.List (sort, groupBy, intercalate) import qualified Data.List.NonEmpty as NE -- ---------------------------------------------------------------------------- @@ -152,16 +153,109 @@ llvmFunSection dflags lbl -- | A Function's arguments llvmFunArgs :: DynFlags -> LiveGlobalRegs -> [LlvmVar] llvmFunArgs dflags live = - map (lmGlobalRegArg dflags) (filter isPassed (activeStgRegs platform)) + map (lmGlobalRegArg dflags) (filter isPassed allRegs) where platform = targetPlatform dflags - isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live - isPassed r = not (isSSE r) || isLive r - isSSE (FloatReg _) = True - isSSE (DoubleReg _) = True - isSSE (XmmReg _) = True - isSSE (YmmReg _) = True - isSSE (ZmmReg _) = True - isSSE _ = False + allRegs = activeStgRegs platform + paddedLive = map (\(_,r) -> r) $ padLiveArgs dflags live + isLive r = r `elem` alwaysLive || r `elem` paddedLive + isPassed r = not (isFPR r) || isLive r + + +isFPR :: GlobalReg -> Bool +isFPR (FloatReg _) = True +isFPR (DoubleReg _) = True +isFPR (XmmReg _) = True +isFPR (YmmReg _) = True +isFPR (ZmmReg _) = True +isFPR _ = False + +sameFPRClass :: GlobalReg -> GlobalReg -> Bool +sameFPRClass (FloatReg _) (FloatReg _) = True +sameFPRClass (DoubleReg _) (DoubleReg _) = True +sameFPRClass (XmmReg _) (XmmReg _) = True +sameFPRClass (YmmReg _) (YmmReg _) = True +sameFPRClass (ZmmReg _) (ZmmReg _) = True +sameFPRClass _ _ = False + +normalizeFPRNum :: GlobalReg -> GlobalReg +normalizeFPRNum (FloatReg _) = FloatReg 1 +normalizeFPRNum (DoubleReg _) = DoubleReg 1 +normalizeFPRNum (XmmReg _) = XmmReg 1 +normalizeFPRNum (YmmReg _) = YmmReg 1 +normalizeFPRNum (ZmmReg _) = ZmmReg 1 +normalizeFPRNum _ = error "normalizeFPRNum expected only FPR regs" + +getFPRCtor :: GlobalReg -> Int -> GlobalReg +getFPRCtor (FloatReg _) = FloatReg +getFPRCtor (DoubleReg _) = DoubleReg +getFPRCtor (XmmReg _) = XmmReg +getFPRCtor (YmmReg _) = YmmReg +getFPRCtor (ZmmReg _) = ZmmReg +getFPRCtor _ = error "getFPRCtor expected only FPR regs" + +fprRegNum :: GlobalReg -> Int +fprRegNum (FloatReg i) = i +fprRegNum (DoubleReg i) = i +fprRegNum (XmmReg i) = i +fprRegNum (YmmReg i) = i +fprRegNum (ZmmReg i) = i +fprRegNum _ = error "fprRegNum expected only FPR regs" + +-- | Input: dynflags, and the list of live registers +-- +-- Output: An augmented list of live registers, where padding was +-- added to the list of registers to ensure the calling convention is +-- correctly used by LLVM. +-- +-- Each global reg in the returned list is tagged with a bool, which +-- indicates whether the global reg was added as padding, or was an original +-- live register. +-- +-- That is, True => padding, False => a real, live global register. +-- +-- Also, the returned list is not sorted in any particular order. +-- +padLiveArgs :: DynFlags -> LiveGlobalRegs -> [(Bool, GlobalReg)] +padLiveArgs dflags live = + if platformUnregisterised plat + then taggedLive -- not using GHC's register convention for platform. + else padding ++ taggedLive + where + taggedLive = map (\x -> (False, x)) live + plat = targetPlatform dflags + + fprLive = filter isFPR live + padding = concatMap calcPad $ groupBy sharesClass fprLive + + sharesClass :: GlobalReg -> GlobalReg -> Bool + sharesClass a b = sameFPRClass a b || overlappingClass + where + overlappingClass = regsOverlap dflags (norm a) (norm b) + norm = CmmGlobal . normalizeFPRNum + + calcPad :: [GlobalReg] -> [(Bool, GlobalReg)] + calcPad rs = getFPRPadding (getFPRCtor $ head rs) rs + +getFPRPadding :: (Int -> GlobalReg) -> LiveGlobalRegs -> [(Bool, GlobalReg)] +getFPRPadding paddingCtor live = padding + where + fprRegNums = sort $ map fprRegNum live + (_, padding) = foldl assignSlots (1, []) $ fprRegNums + + assignSlots (i, acc) regNum + | i == regNum = -- don't need padding here + (i+1, acc) + | i < regNum = let -- add padding for slots i .. regNum-1 + numNeeded = regNum-i + acc' = genPad i numNeeded ++ acc + in + (regNum+1, acc') + | otherwise = error "padLiveArgs -- i > regNum ??" + + genPad start n = + take n $ flip map (iterate (+1) start) (\i -> + (True, paddingCtor i)) + -- | Llvm standard fun attributes llvmStdFunAttrs :: [LlvmFuncAttr] diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 7a6e3386ae..bfaf7706d1 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -1841,19 +1841,14 @@ funPrologue live cmmBlocks = do -- STG Liveness optimisation done here. funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements) funEpilogue live = do + dflags <- getDynFlags - -- Have information and liveness optimisation is enabled? - let liveRegs = alwaysLive ++ live - isSSE (FloatReg _) = True - isSSE (DoubleReg _) = True - isSSE (XmmReg _) = True - isSSE (YmmReg _) = True - isSSE (ZmmReg _) = True - isSSE _ = False + -- the bool indicates whether the register is padding. + let alwaysNeeded = map (\r -> (False, r)) alwaysLive + livePadded = alwaysNeeded ++ padLiveArgs dflags live -- Set to value or "undef" depending on whether the register is -- actually live - dflags <- getDynFlags let loadExpr r = do (v, _, s) <- getCmmRegVal (CmmGlobal r) return (Just $ v, s) @@ -1861,9 +1856,12 @@ funEpilogue live = do let ty = (pLower . getVarType $ lmGlobalRegVar dflags r) return (Just $ LMLitVar $ LMUndefLit ty, nilOL) platform <- getDynFlag targetPlatform - loads <- flip mapM (activeStgRegs platform) $ \r -> case () of - _ | r `elem` liveRegs -> loadExpr r - | not (isSSE r) -> loadUndef r + let allRegs = activeStgRegs platform + loads <- flip mapM allRegs $ \r -> case () of + _ | (False, r) `elem` livePadded + -> loadExpr r -- if r is not padding, load it + | not (isFPR r) || (True, r) `elem` livePadded + -> loadUndef r | otherwise -> return (Nothing, nilOL) let (vars, stmts) = unzip loads diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index c6c3a5c9ba..98d77b7289 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -185,8 +185,7 @@ test('T13825-unit', test('T14619', normal, compile_and_run, ['']) test('T14754', normal, compile_and_run, ['']) test('T14346', only_ways(['threaded1','threaded2']), compile_and_run, ['-O -threaded']) -test('T14251', [expect_broken_for(14251, ['optllvm'])], - compile_and_run, ['']) +test('T14251', normal, compile_and_run, ['']) # These actually used to fail with all optimisation settings, but adding -O just # to make sure |