summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2020-01-06 12:29:14 -0500
committerBen Gamari <ben@smart-cactus.org>2020-01-12 07:16:58 -0500
commit90e985498a421784975b40b50d5dbea2b40b1856 (patch)
tree49ecdb6becd5f430db6102551a9bb7ce87dbeed4
parent34cf0394d7d7b3f51ee13dff8f4853c244636d83 (diff)
downloadhaskell-wip/T14251.tar.gz
llvmGen: Fix #14251wip/T14251
Fixes the calling convention for functions passing raw SSE-register values by adding padding as needed to get the values in the right registers. This problem cropped up when some args were unused an dropped from the live list. This folds together 2e23e1c7de01c92b038e55ce53d11bf9db993dd4 and 73273be476a8cc6c13368660b042b3b0614fd928 previously from @kavon. Metric Increase: T12707 ManyConstructors
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Base.hs116
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs22
-rw-r--r--testsuite/tests/codeGen/should_run/all.T3
3 files changed, 116 insertions, 25 deletions
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs
index 9ddb23aade..ce9f22052f 100644
--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
@@ -28,7 +28,7 @@ module LlvmCodeGen.Base (
cmmToLlvmType, widthToLlvmFloat, widthToLlvmInt, llvmFunTy,
llvmFunSig, llvmFunArgs, llvmStdFunAttrs, llvmFunAlign, llvmInfAlign,
- llvmPtrBits, tysToParams, llvmFunSection,
+ llvmPtrBits, tysToParams, llvmFunSection, padLiveArgs, isFPR,
strCLabel_llvm, strDisplayName_llvm, strProcedureName_llvm,
getGlobalPtr, generateExternDecls,
@@ -49,6 +49,7 @@ import GHC.Platform.Regs ( activeStgRegs )
import DynFlags
import FastString
import Cmm hiding ( succ )
+import CmmUtils ( regsOverlap )
import Outputable as Outp
import GHC.Platform
import UniqFM
@@ -62,7 +63,7 @@ import qualified Stream
import Data.Maybe (fromJust)
import Control.Monad (ap)
import Data.Char (isDigit)
-import Data.List (intercalate)
+import Data.List (sort, groupBy, intercalate)
import qualified Data.List.NonEmpty as NE
-- ----------------------------------------------------------------------------
@@ -152,16 +153,109 @@ llvmFunSection dflags lbl
-- | A Function's arguments
llvmFunArgs :: DynFlags -> LiveGlobalRegs -> [LlvmVar]
llvmFunArgs dflags live =
- map (lmGlobalRegArg dflags) (filter isPassed (activeStgRegs platform))
+ map (lmGlobalRegArg dflags) (filter isPassed allRegs)
where platform = targetPlatform dflags
- isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live
- isPassed r = not (isSSE r) || isLive r
- isSSE (FloatReg _) = True
- isSSE (DoubleReg _) = True
- isSSE (XmmReg _) = True
- isSSE (YmmReg _) = True
- isSSE (ZmmReg _) = True
- isSSE _ = False
+ allRegs = activeStgRegs platform
+ paddedLive = map (\(_,r) -> r) $ padLiveArgs dflags live
+ isLive r = r `elem` alwaysLive || r `elem` paddedLive
+ isPassed r = not (isFPR r) || isLive r
+
+
+isFPR :: GlobalReg -> Bool
+isFPR (FloatReg _) = True
+isFPR (DoubleReg _) = True
+isFPR (XmmReg _) = True
+isFPR (YmmReg _) = True
+isFPR (ZmmReg _) = True
+isFPR _ = False
+
+sameFPRClass :: GlobalReg -> GlobalReg -> Bool
+sameFPRClass (FloatReg _) (FloatReg _) = True
+sameFPRClass (DoubleReg _) (DoubleReg _) = True
+sameFPRClass (XmmReg _) (XmmReg _) = True
+sameFPRClass (YmmReg _) (YmmReg _) = True
+sameFPRClass (ZmmReg _) (ZmmReg _) = True
+sameFPRClass _ _ = False
+
+normalizeFPRNum :: GlobalReg -> GlobalReg
+normalizeFPRNum (FloatReg _) = FloatReg 1
+normalizeFPRNum (DoubleReg _) = DoubleReg 1
+normalizeFPRNum (XmmReg _) = XmmReg 1
+normalizeFPRNum (YmmReg _) = YmmReg 1
+normalizeFPRNum (ZmmReg _) = ZmmReg 1
+normalizeFPRNum _ = error "normalizeFPRNum expected only FPR regs"
+
+getFPRCtor :: GlobalReg -> Int -> GlobalReg
+getFPRCtor (FloatReg _) = FloatReg
+getFPRCtor (DoubleReg _) = DoubleReg
+getFPRCtor (XmmReg _) = XmmReg
+getFPRCtor (YmmReg _) = YmmReg
+getFPRCtor (ZmmReg _) = ZmmReg
+getFPRCtor _ = error "getFPRCtor expected only FPR regs"
+
+fprRegNum :: GlobalReg -> Int
+fprRegNum (FloatReg i) = i
+fprRegNum (DoubleReg i) = i
+fprRegNum (XmmReg i) = i
+fprRegNum (YmmReg i) = i
+fprRegNum (ZmmReg i) = i
+fprRegNum _ = error "fprRegNum expected only FPR regs"
+
+-- | Input: dynflags, and the list of live registers
+--
+-- Output: An augmented list of live registers, where padding was
+-- added to the list of registers to ensure the calling convention is
+-- correctly used by LLVM.
+--
+-- Each global reg in the returned list is tagged with a bool, which
+-- indicates whether the global reg was added as padding, or was an original
+-- live register.
+--
+-- That is, True => padding, False => a real, live global register.
+--
+-- Also, the returned list is not sorted in any particular order.
+--
+padLiveArgs :: DynFlags -> LiveGlobalRegs -> [(Bool, GlobalReg)]
+padLiveArgs dflags live =
+ if platformUnregisterised plat
+ then taggedLive -- not using GHC's register convention for platform.
+ else padding ++ taggedLive
+ where
+ taggedLive = map (\x -> (False, x)) live
+ plat = targetPlatform dflags
+
+ fprLive = filter isFPR live
+ padding = concatMap calcPad $ groupBy sharesClass fprLive
+
+ sharesClass :: GlobalReg -> GlobalReg -> Bool
+ sharesClass a b = sameFPRClass a b || overlappingClass
+ where
+ overlappingClass = regsOverlap dflags (norm a) (norm b)
+ norm = CmmGlobal . normalizeFPRNum
+
+ calcPad :: [GlobalReg] -> [(Bool, GlobalReg)]
+ calcPad rs = getFPRPadding (getFPRCtor $ head rs) rs
+
+getFPRPadding :: (Int -> GlobalReg) -> LiveGlobalRegs -> [(Bool, GlobalReg)]
+getFPRPadding paddingCtor live = padding
+ where
+ fprRegNums = sort $ map fprRegNum live
+ (_, padding) = foldl assignSlots (1, []) $ fprRegNums
+
+ assignSlots (i, acc) regNum
+ | i == regNum = -- don't need padding here
+ (i+1, acc)
+ | i < regNum = let -- add padding for slots i .. regNum-1
+ numNeeded = regNum-i
+ acc' = genPad i numNeeded ++ acc
+ in
+ (regNum+1, acc')
+ | otherwise = error "padLiveArgs -- i > regNum ??"
+
+ genPad start n =
+ take n $ flip map (iterate (+1) start) (\i ->
+ (True, paddingCtor i))
+
-- | Llvm standard fun attributes
llvmStdFunAttrs :: [LlvmFuncAttr]
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 84a6963234..6a23ea772e 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -1841,19 +1841,14 @@ funPrologue live cmmBlocks = do
-- STG Liveness optimisation done here.
funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements)
funEpilogue live = do
+ dflags <- getDynFlags
- -- Have information and liveness optimisation is enabled?
- let liveRegs = alwaysLive ++ live
- isSSE (FloatReg _) = True
- isSSE (DoubleReg _) = True
- isSSE (XmmReg _) = True
- isSSE (YmmReg _) = True
- isSSE (ZmmReg _) = True
- isSSE _ = False
+ -- the bool indicates whether the register is padding.
+ let alwaysNeeded = map (\r -> (False, r)) alwaysLive
+ livePadded = alwaysNeeded ++ padLiveArgs dflags live
-- Set to value or "undef" depending on whether the register is
-- actually live
- dflags <- getDynFlags
let loadExpr r = do
(v, _, s) <- getCmmRegVal (CmmGlobal r)
return (Just $ v, s)
@@ -1861,9 +1856,12 @@ funEpilogue live = do
let ty = (pLower . getVarType $ lmGlobalRegVar dflags r)
return (Just $ LMLitVar $ LMUndefLit ty, nilOL)
platform <- getDynFlag targetPlatform
- loads <- flip mapM (activeStgRegs platform) $ \r -> case () of
- _ | r `elem` liveRegs -> loadExpr r
- | not (isSSE r) -> loadUndef r
+ let allRegs = activeStgRegs platform
+ loads <- flip mapM allRegs $ \r -> case () of
+ _ | (False, r) `elem` livePadded
+ -> loadExpr r -- if r is not padding, load it
+ | not (isFPR r) || (True, r) `elem` livePadded
+ -> loadUndef r
| otherwise -> return (Nothing, nilOL)
let (vars, stmts) = unzip loads
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index c6c3a5c9ba..98d77b7289 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -185,8 +185,7 @@ test('T13825-unit',
test('T14619', normal, compile_and_run, [''])
test('T14754', normal, compile_and_run, [''])
test('T14346', only_ways(['threaded1','threaded2']), compile_and_run, ['-O -threaded'])
-test('T14251', [expect_broken_for(14251, ['optllvm'])],
- compile_and_run, [''])
+test('T14251', normal, compile_and_run, [''])
# These actually used to fail with all optimisation settings, but adding -O just
# to make sure