From 0592d1ad634130cb577fa98d3fbb548338407a63 Mon Sep 17 00:00:00 2001 From: Simon Peyton Jones Date: Tue, 29 Oct 2019 09:19:14 +0000 Subject: Make CSE delay inlining less CSE delays inlining a little bit, to avoid losing vital specialisations; see Note [Delay inlining after CSE] in CSE. But it was being over-enthusiastic. This patch makes the delay only apply to Ids with specialisation rules, which avoids unnecessary delay (#17409). --- compiler/simplCore/CSE.hs | 56 +++++++++++++++++++--- .../tests/numeric/should_compile/T14465.stdout | 2 +- .../tests/numeric/should_compile/T7116.stdout | 4 +- testsuite/tests/perf/compiler/T16473.stdout | 1 - testsuite/tests/simplCore/should_compile/Makefile | 5 ++ testsuite/tests/simplCore/should_compile/T17409.hs | 9 ++++ .../tests/simplCore/should_compile/T17409.stdout | 2 + testsuite/tests/simplCore/should_compile/all.T | 5 +- 8 files changed, 72 insertions(+), 12 deletions(-) create mode 100644 testsuite/tests/simplCore/should_compile/T17409.hs create mode 100644 testsuite/tests/simplCore/should_compile/T17409.stdout diff --git a/compiler/simplCore/CSE.hs b/compiler/simplCore/CSE.hs index 0758ce930a..35862aeabe 100644 --- a/compiler/simplCore/CSE.hs +++ b/compiler/simplCore/CSE.hs @@ -15,7 +15,7 @@ import GhcPrelude import CoreSubst import Var ( Var ) import VarEnv ( elemInScopeSet, mkInScopeSet ) -import Id ( Id, idType, isDeadBinder +import Id ( Id, idType, isDeadBinder, idHasRules , idInlineActivation, setInlineActivation , zapIdOccInfo, zapIdUsageInfo, idInlinePragma , isJoinId, isJoinId_maybe ) @@ -392,9 +392,15 @@ cse_bind toplevel env (in_id, in_rhs) out_id delayInlining :: TopLevelFlag -> Id -> Id -- Add a NOINLINE[2] if the Id doesn't have an INLNE pragma already +-- See Note [Delay inlining after CSE] delayInlining top_lvl bndr | isTopLevel top_lvl , isAlwaysActive (idInlineActivation bndr) + , idHasRules bndr -- Only if the Id has some RULES, + -- which might otherwise get lost + -- These rules are probably auto-generated specialisations, + -- since Ids with manual rules usually have manually-inserted + -- delayed inlining anyway = bndr `setInlineActivation` activeAfterInitial | otherwise = bndr @@ -494,13 +500,49 @@ a SPECIALISE pragma. Then CSE kicks in and notices that the RHSs of Now there is terrible danger that, in an importing module, we'll inline 'g' before we have a chance to run its specialisation! -Solution: during CSE, when adding a top-level - g = f -binding after a "hit" in the CSE cache, add a NOINLINE[2] activation -to it, to ensure it's not inlined right away. +Solution: during CSE, afer a "hit" in the CSE cache + * when adding a binding + g = f + * for a top-level function g + * and g has specialisation RULES +add a NOINLINE[2] activation to it, to ensure it's not inlined +right away. + +Notes: +* Why top level only? Because for nested bindings we are already past + phase 2 and will never return there. + +* Why "only if g has RULES"? Because there is no point in + doing this if there are no RULES; and other things being + equal it delays optimisation to delay inlining (#17409) + + +---- Historical note --- + +This patch is simpler and more direct than an earlier +version: + + commit 2110738b280543698407924a16ac92b6d804dc36 + Author: Simon Peyton Jones + Date: Mon Jul 30 13:43:56 2018 +0100 + + Don't inline functions with RULES too early + +We had to revert this patch because it made GHC itself slower. + +Why? It delayed inlining of /all/ functions with RULES, and that was +very bad in TcFlatten.flatten_ty_con_app + +* It delayed inlining of liftM +* That delayed the unravelling of the recursion in some dictionary + bindings. +* That delayed some eta expansion, leaving + flatten_ty_con_app = \x y. let in \z. blah +* That allowed the float-out pass to put sguff between + the \y and \z. +* And that permanently stopped eta expasion of the function, + even once was simplified. -Why top level only? Because for nested bindings we are already past -phase 2 and will never return there. -} tryForCSE :: CSEnv -> InExpr -> OutExpr diff --git a/testsuite/tests/numeric/should_compile/T14465.stdout b/testsuite/tests/numeric/should_compile/T14465.stdout index 936507532a..df97060635 100644 --- a/testsuite/tests/numeric/should_compile/T14465.stdout +++ b/testsuite/tests/numeric/should_compile/T14465.stdout @@ -98,7 +98,7 @@ plusOne :: Natural -> Natural plusOne = \ (n :: Natural) -> plusNatural n M.minusOne1 -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} -one [InlPrag=NOUSERINLINE[2]] :: Natural +one :: Natural [GblId, Unf=Unf{Src=, TopLvl=True, Value=True, ConLike=True, WorkFree=True, Expandable=True, diff --git a/testsuite/tests/numeric/should_compile/T7116.stdout b/testsuite/tests/numeric/should_compile/T7116.stdout index a1adeb180a..171d9bc7f4 100644 --- a/testsuite/tests/numeric/should_compile/T7116.stdout +++ b/testsuite/tests/numeric/should_compile/T7116.stdout @@ -65,7 +65,7 @@ dr case x of { GHC.Types.D# x1 -> GHC.Types.D# (GHC.Prim.+## x1 x1) } -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} -dl [InlPrag=NOUSERINLINE[2]] :: Double -> Double +dl :: Double -> Double [GblId, Arity=1, Caf=NoCafRefs, @@ -97,7 +97,7 @@ fr } -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} -fl [InlPrag=NOUSERINLINE[2]] :: Float -> Float +fl :: Float -> Float [GblId, Arity=1, Caf=NoCafRefs, diff --git a/testsuite/tests/perf/compiler/T16473.stdout b/testsuite/tests/perf/compiler/T16473.stdout index 3a1f5a571b..4427d39a45 100644 --- a/testsuite/tests/perf/compiler/T16473.stdout +++ b/testsuite/tests/perf/compiler/T16473.stdout @@ -136,4 +136,3 @@ Rule fired: Class op $p1Monad (BUILTIN) Rule fired: Class op pure (BUILTIN) Rule fired: SPEC/Main $fMonadStateT @ Identity _ (Main) Rule fired: SPEC go @ (StateT (Sum Int) Identity) (Main) -Rule fired: Class op fmap (BUILTIN) diff --git a/testsuite/tests/simplCore/should_compile/Makefile b/testsuite/tests/simplCore/should_compile/Makefile index c27458c46c..1daf834381 100644 --- a/testsuite/tests/simplCore/should_compile/Makefile +++ b/testsuite/tests/simplCore/should_compile/Makefile @@ -2,6 +2,11 @@ TOP=../../.. include $(TOP)/mk/boilerplate.mk include $(TOP)/mk/test.mk +T17409: + $(RM) -f T17409.o T17409.hi + - '$(TEST_HC)' $(TEST_HC_OPTS) -O -c -dverbose-core2core -dsuppress-uniques T17409.hs 2> /dev/null | grep '\' + # Expecting 'id' to be inlined in the 'gentle' pass + T14978: $(RM) -f T14978.o T14978.hi -'$(TEST_HC)' $(TEST_HC_OPTS) -O -c -ddump-simpl T14978.hs -dsuppress-coercions | grep 'foo' diff --git a/testsuite/tests/simplCore/should_compile/T17409.hs b/testsuite/tests/simplCore/should_compile/T17409.hs new file mode 100644 index 0000000000..512f152cf1 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T17409.hs @@ -0,0 +1,9 @@ +module T17409 where + +-- The bug was that id was inlined only after the +-- "gentle" simplifier pass, beucause CSE in GHC.Base +-- had commoned-up 'id' with 'breakpoint', and added +-- a NOINLINE[2] to the former. + +-- The test just checks that id is inlined early. +f x = not (id x) diff --git a/testsuite/tests/simplCore/should_compile/T17409.stdout b/testsuite/tests/simplCore/should_compile/T17409.stdout new file mode 100644 index 0000000000..50edd27281 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T17409.stdout @@ -0,0 +1,2 @@ + f = \ (x :: Bool) -> not (id @ Bool x); } in +f = \ (x :: Bool) -> not (id @ Bool x) diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index 35933e8be4..838ae93cad 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -311,4 +311,7 @@ test('T16979b', normal, compile, ['-O']) test('T17140', [extra_files(['T17140a.hs'])], makefile_test, - ['T17140']) \ No newline at end of file + ['T17140']) +test('T17409', + normal, + makefile_test, ['T17409']) -- cgit v1.2.1