diff options
-rw-r--r-- | compiler/simplCore/CSE.hs | 68 | ||||
-rw-r--r-- | testsuite/tests/numeric/should_compile/T14465.stdout | 2 | ||||
-rw-r--r-- | testsuite/tests/numeric/should_compile/T7116.stdout | 4 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T15445.hs | 8 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T15445.stderr | 13 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T15445a.hs | 10 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/all.T | 4 |
7 files changed, 94 insertions, 15 deletions
diff --git a/compiler/simplCore/CSE.hs b/compiler/simplCore/CSE.hs index 7e44e2e14d..96fbd07454 100644 --- a/compiler/simplCore/CSE.hs +++ b/compiler/simplCore/CSE.hs @@ -15,7 +15,8 @@ import GhcPrelude import CoreSubst import Var ( Var ) import VarEnv ( elemInScopeSet, mkInScopeSet ) -import Id ( Id, idType, idInlineActivation, isDeadBinder +import Id ( Id, idType, isDeadBinder + , idInlineActivation, setInlineActivation , zapIdOccInfo, zapIdUsageInfo, idInlinePragma , isJoinId, isJoinId_maybe ) import CoreUtils ( mkAltExpr, eqExpr @@ -25,9 +26,7 @@ import CoreFVs ( exprFreeVars ) import Type ( tyConAppArgs ) import CoreSyn import Outputable -import BasicTypes ( TopLevelFlag(..), isTopLevel - , isAlwaysActive, isAnyInlinePragma, - inlinePragmaSpec, noUserInlineSpec ) +import BasicTypes import CoreMap import Util ( filterOut ) import Data.List ( mapAccumL ) @@ -335,14 +334,16 @@ cseBind toplevel env (NonRec b e) (env1, b1) = addBinder env b (env2, (b2, e2)) = cse_bind toplevel env1 (b,e) b1 -cseBind _ env (Rec [(in_id, rhs)]) +cseBind toplevel env (Rec [(in_id, rhs)]) | noCSE in_id = (env1, Rec [(out_id, rhs')]) -- See Note [CSE for recursive bindings] | Just previous <- lookupCSRecEnv env out_id rhs'' , let previous' = mkTicks ticks previous - = (extendCSSubst env1 in_id previous', NonRec out_id previous') + out_id' = delayInlining toplevel out_id + = -- We have a hit in the recursive-binding cache + (extendCSSubst env1 in_id previous', NonRec out_id' previous') | otherwise = (extendCSRecEnv env1 out_id rhs'' id_expr', Rec [(zapped_id, rhs')]) @@ -382,10 +383,21 @@ cse_bind toplevel env (in_id, in_rhs) out_id in (env, (out_id, mkLams params' out_body)) | otherwise - = (env', (out_id', out_rhs)) + = (env', (out_id'', out_rhs)) where (env', out_id') = addBinding env in_id out_id out_rhs - out_rhs = tryForCSE env in_rhs + (cse_done, out_rhs) = try_for_cse env in_rhs + out_id'' | cse_done = delayInlining toplevel out_id' + | otherwise = out_id' + +delayInlining :: TopLevelFlag -> Id -> Id +-- Add a NOINLINE[2] if the Id doesn't have an INLNE pragma already +delayInlining top_lvl bndr + | isTopLevel top_lvl + , isAlwaysActive (idInlineActivation bndr) + = bndr `setInlineActivation` activeAfterInitial + | otherwise + = bndr addBinding :: CSEnv -- Includes InId->OutId cloning -> InVar -- Could be a let-bound type @@ -464,12 +476,46 @@ The net effect is that for the y-binding we want to - but leave the original binding for y undisturbed This is done by cse_bind. I got it wrong the first time (Trac #13367). + +Note [Delay inlining after CSE] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose (Trac #15445) we have + f,g :: Num a => a -> a + f x = ...f (x-1)..... + g y = ...g (y-1) .... + +and we make some specialisations of 'g', either automatically, or via +a SPECIALISE pragma. Then CSE kicks in and notices that the RHSs of +'f' and 'g' are identical, so we get + f x = ...f (x-1)... + g = f + {-# RULES g @Int _ = $sg #-} + +Now there is terrible danger that, in an importing module, we'll inline +'g' before we have a chance to run its specialisation! + +Solution: during CSE, when adding a top-level + g = f +binding after a "hit" in the CSE cache, add a NOINLINE[2] activation +to it, to ensure it's not inlined right away. + +Why top level only? Because for nested bindings we are already past +phase 2 and will never return there. -} tryForCSE :: CSEnv -> InExpr -> OutExpr -tryForCSE env expr - | Just e <- lookupCSEnv env expr'' = mkTicks ticks e - | otherwise = expr' +tryForCSE env expr = snd (try_for_cse env expr) + +try_for_cse :: CSEnv -> InExpr -> (Bool, OutExpr) +-- (False, e') => We did not CSE the entire expression, +-- but we might have CSE'd some sub-expressions, +-- yielding e' +-- +-- (True, te') => We CSE'd the entire expression, +-- yielding the trivial expression te' +try_for_cse env expr + | Just e <- lookupCSEnv env expr'' = (True, mkTicks ticks e) + | otherwise = (False, expr') -- The varToCoreExpr is needed if we have -- case e of xco { ...case e of yco { ... } ... } -- Then CSE will substitute yco -> xco; diff --git a/testsuite/tests/numeric/should_compile/T14465.stdout b/testsuite/tests/numeric/should_compile/T14465.stdout index 32cf35639c..e6dfa5fc6c 100644 --- a/testsuite/tests/numeric/should_compile/T14465.stdout +++ b/testsuite/tests/numeric/should_compile/T14465.stdout @@ -93,7 +93,7 @@ plusOne :: Natural -> Natural plusOne = \ (n :: Natural) -> plusNatural n M.minusOne1 -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} -one :: Natural +one [InlPrag=NOUSERINLINE[2]] :: Natural [GblId, Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, WorkFree=True, Expandable=True, diff --git a/testsuite/tests/numeric/should_compile/T7116.stdout b/testsuite/tests/numeric/should_compile/T7116.stdout index 681d171350..fc0159a1ec 100644 --- a/testsuite/tests/numeric/should_compile/T7116.stdout +++ b/testsuite/tests/numeric/should_compile/T7116.stdout @@ -65,7 +65,7 @@ dr case x of { GHC.Types.D# x1 -> GHC.Types.D# (GHC.Prim.+## x1 x1) } -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} -dl :: Double -> Double +dl [InlPrag=NOUSERINLINE[2]] :: Double -> Double [GblId, Arity=1, Caf=NoCafRefs, @@ -97,7 +97,7 @@ fr } -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} -fl :: Float -> Float +fl [InlPrag=NOUSERINLINE[2]] :: Float -> Float [GblId, Arity=1, Caf=NoCafRefs, diff --git a/testsuite/tests/simplCore/should_compile/T15445.hs b/testsuite/tests/simplCore/should_compile/T15445.hs new file mode 100644 index 0000000000..36bf61dbbb --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T15445.hs @@ -0,0 +1,8 @@ +module T15445 where + +import T15445a + + +foo :: IO () +foo = do { print (plusTwoRec [1..10 :: Int]) + ; print (plusTwoRec' [1..20 :: Int]) } diff --git a/testsuite/tests/simplCore/should_compile/T15445.stderr b/testsuite/tests/simplCore/should_compile/T15445.stderr new file mode 100644 index 0000000000..d5deac5a59 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T15445.stderr @@ -0,0 +1,13 @@ +Rule fired: Class op + (BUILTIN) +Rule fired: Class op fromInteger (BUILTIN) +Rule fired: integerToInt (BUILTIN) +Rule fired: SPEC plusTwoRec (T15445a) +Rule fired: SPEC $fShow[] (GHC.Show) +Rule fired: Class op >> (BUILTIN) +Rule fired: Class op show (BUILTIN) +Rule fired: SPEC plusTwoRec (T15445a) +Rule fired: Class op enumFromTo (BUILTIN) +Rule fired: Class op show (BUILTIN) +Rule fired: Class op enumFromTo (BUILTIN) +Rule fired: eftIntList (GHC.Enum) +Rule fired: eftIntList (GHC.Enum) diff --git a/testsuite/tests/simplCore/should_compile/T15445a.hs b/testsuite/tests/simplCore/should_compile/T15445a.hs new file mode 100644 index 0000000000..02e5baceb5 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T15445a.hs @@ -0,0 +1,10 @@ +module T15445a where + +{-# SPECIALIZE plusTwoRec :: [Int] -> [Int] #-} +plusTwoRec :: Num a => [a] -> [a] +plusTwoRec [] = [] +plusTwoRec (x:xs) = x+2:plusTwoRec xs + +plusTwoRec' :: Num a => [a] -> [a] +plusTwoRec' [] = [] +plusTwoRec' (x:xs) = x+2:plusTwoRec' xs diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index 188f6432fa..1284b7c3d4 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -318,6 +318,8 @@ test('T15005', normal, compile, ['-O']) # we omit profiling because it affects the optimiser and makes the test fail test('T15056', [extra_files(['T15056a.hs']), omit_ways(['profasm'])], multimod_compile, ['T15056', '-O -v0 -ddump-rule-firings']) test('T15186', normal, multimod_compile, ['T15186', '-v0']) -test('T15453', normal, compile, ['-O1']) test('T15517', normal, compile, ['-O0']) test('T15517a', normal, compile, ['-O0']) +test('T15453', normal, compile, ['-dcore-lint -O1']) +test('T15445', normal, multimod_compile, ['T15445', '-v0 -O -ddump-rule-firings']) + |