summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2019-10-29 09:19:14 +0000
committerBen Gamari <ben@smart-cactus.org>2019-10-31 17:04:45 -0400
commit0592d1ad634130cb577fa98d3fbb548338407a63 (patch)
tree28711fde82824fc0f85cfa271c70c1575394aeb3
parent6a606980b40df7e084a7f79ecf2f6eab7a135aeb (diff)
downloadhaskell-wip/T17409.tar.gz
Make CSE delay inlining lesswip/T17409
CSE delays inlining a little bit, to avoid losing vital specialisations; see Note [Delay inlining after CSE] in CSE. But it was being over-enthusiastic. This patch makes the delay only apply to Ids with specialisation rules, which avoids unnecessary delay (#17409).
-rw-r--r--compiler/simplCore/CSE.hs56
-rw-r--r--testsuite/tests/numeric/should_compile/T14465.stdout2
-rw-r--r--testsuite/tests/numeric/should_compile/T7116.stdout4
-rw-r--r--testsuite/tests/perf/compiler/T16473.stdout1
-rw-r--r--testsuite/tests/simplCore/should_compile/Makefile5
-rw-r--r--testsuite/tests/simplCore/should_compile/T17409.hs9
-rw-r--r--testsuite/tests/simplCore/should_compile/T17409.stdout2
-rw-r--r--testsuite/tests/simplCore/should_compile/all.T5
8 files changed, 72 insertions, 12 deletions
diff --git a/compiler/simplCore/CSE.hs b/compiler/simplCore/CSE.hs
index 0758ce930a..35862aeabe 100644
--- a/compiler/simplCore/CSE.hs
+++ b/compiler/simplCore/CSE.hs
@@ -15,7 +15,7 @@ import GhcPrelude
import CoreSubst
import Var ( Var )
import VarEnv ( elemInScopeSet, mkInScopeSet )
-import Id ( Id, idType, isDeadBinder
+import Id ( Id, idType, isDeadBinder, idHasRules
, idInlineActivation, setInlineActivation
, zapIdOccInfo, zapIdUsageInfo, idInlinePragma
, isJoinId, isJoinId_maybe )
@@ -392,9 +392,15 @@ cse_bind toplevel env (in_id, in_rhs) out_id
delayInlining :: TopLevelFlag -> Id -> Id
-- Add a NOINLINE[2] if the Id doesn't have an INLNE pragma already
+-- See Note [Delay inlining after CSE]
delayInlining top_lvl bndr
| isTopLevel top_lvl
, isAlwaysActive (idInlineActivation bndr)
+ , idHasRules bndr -- Only if the Id has some RULES,
+ -- which might otherwise get lost
+ -- These rules are probably auto-generated specialisations,
+ -- since Ids with manual rules usually have manually-inserted
+ -- delayed inlining anyway
= bndr `setInlineActivation` activeAfterInitial
| otherwise
= bndr
@@ -494,13 +500,49 @@ a SPECIALISE pragma. Then CSE kicks in and notices that the RHSs of
Now there is terrible danger that, in an importing module, we'll inline
'g' before we have a chance to run its specialisation!
-Solution: during CSE, when adding a top-level
- g = f
-binding after a "hit" in the CSE cache, add a NOINLINE[2] activation
-to it, to ensure it's not inlined right away.
+Solution: during CSE, afer a "hit" in the CSE cache
+ * when adding a binding
+ g = f
+ * for a top-level function g
+ * and g has specialisation RULES
+add a NOINLINE[2] activation to it, to ensure it's not inlined
+right away.
+
+Notes:
+* Why top level only? Because for nested bindings we are already past
+ phase 2 and will never return there.
+
+* Why "only if g has RULES"? Because there is no point in
+ doing this if there are no RULES; and other things being
+ equal it delays optimisation to delay inlining (#17409)
+
+
+---- Historical note ---
+
+This patch is simpler and more direct than an earlier
+version:
+
+ commit 2110738b280543698407924a16ac92b6d804dc36
+ Author: Simon Peyton Jones <simonpj@microsoft.com>
+ Date: Mon Jul 30 13:43:56 2018 +0100
+
+ Don't inline functions with RULES too early
+
+We had to revert this patch because it made GHC itself slower.
+
+Why? It delayed inlining of /all/ functions with RULES, and that was
+very bad in TcFlatten.flatten_ty_con_app
+
+* It delayed inlining of liftM
+* That delayed the unravelling of the recursion in some dictionary
+ bindings.
+* That delayed some eta expansion, leaving
+ flatten_ty_con_app = \x y. let <stuff> in \z. blah
+* That allowed the float-out pass to put sguff between
+ the \y and \z.
+* And that permanently stopped eta expasion of the function,
+ even once <stuff> was simplified.
-Why top level only? Because for nested bindings we are already past
-phase 2 and will never return there.
-}
tryForCSE :: CSEnv -> InExpr -> OutExpr
diff --git a/testsuite/tests/numeric/should_compile/T14465.stdout b/testsuite/tests/numeric/should_compile/T14465.stdout
index 936507532a..df97060635 100644
--- a/testsuite/tests/numeric/should_compile/T14465.stdout
+++ b/testsuite/tests/numeric/should_compile/T14465.stdout
@@ -98,7 +98,7 @@ plusOne :: Natural -> Natural
plusOne = \ (n :: Natural) -> plusNatural n M.minusOne1
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
-one [InlPrag=NOUSERINLINE[2]] :: Natural
+one :: Natural
[GblId,
Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
WorkFree=True, Expandable=True,
diff --git a/testsuite/tests/numeric/should_compile/T7116.stdout b/testsuite/tests/numeric/should_compile/T7116.stdout
index a1adeb180a..171d9bc7f4 100644
--- a/testsuite/tests/numeric/should_compile/T7116.stdout
+++ b/testsuite/tests/numeric/should_compile/T7116.stdout
@@ -65,7 +65,7 @@ dr
case x of { GHC.Types.D# x1 -> GHC.Types.D# (GHC.Prim.+## x1 x1) }
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
-dl [InlPrag=NOUSERINLINE[2]] :: Double -> Double
+dl :: Double -> Double
[GblId,
Arity=1,
Caf=NoCafRefs,
@@ -97,7 +97,7 @@ fr
}
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
-fl [InlPrag=NOUSERINLINE[2]] :: Float -> Float
+fl :: Float -> Float
[GblId,
Arity=1,
Caf=NoCafRefs,
diff --git a/testsuite/tests/perf/compiler/T16473.stdout b/testsuite/tests/perf/compiler/T16473.stdout
index 3a1f5a571b..4427d39a45 100644
--- a/testsuite/tests/perf/compiler/T16473.stdout
+++ b/testsuite/tests/perf/compiler/T16473.stdout
@@ -136,4 +136,3 @@ Rule fired: Class op $p1Monad (BUILTIN)
Rule fired: Class op pure (BUILTIN)
Rule fired: SPEC/Main $fMonadStateT @ Identity _ (Main)
Rule fired: SPEC go @ (StateT (Sum Int) Identity) (Main)
-Rule fired: Class op fmap (BUILTIN)
diff --git a/testsuite/tests/simplCore/should_compile/Makefile b/testsuite/tests/simplCore/should_compile/Makefile
index c27458c46c..1daf834381 100644
--- a/testsuite/tests/simplCore/should_compile/Makefile
+++ b/testsuite/tests/simplCore/should_compile/Makefile
@@ -2,6 +2,11 @@ TOP=../../..
include $(TOP)/mk/boilerplate.mk
include $(TOP)/mk/test.mk
+T17409:
+ $(RM) -f T17409.o T17409.hi
+ - '$(TEST_HC)' $(TEST_HC_OPTS) -O -c -dverbose-core2core -dsuppress-uniques T17409.hs 2> /dev/null | grep '\<id\>'
+ # Expecting 'id' to be inlined in the 'gentle' pass
+
T14978:
$(RM) -f T14978.o T14978.hi
-'$(TEST_HC)' $(TEST_HC_OPTS) -O -c -ddump-simpl T14978.hs -dsuppress-coercions | grep 'foo'
diff --git a/testsuite/tests/simplCore/should_compile/T17409.hs b/testsuite/tests/simplCore/should_compile/T17409.hs
new file mode 100644
index 0000000000..512f152cf1
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T17409.hs
@@ -0,0 +1,9 @@
+module T17409 where
+
+-- The bug was that id was inlined only after the
+-- "gentle" simplifier pass, beucause CSE in GHC.Base
+-- had commoned-up 'id' with 'breakpoint', and added
+-- a NOINLINE[2] to the former.
+
+-- The test just checks that id is inlined early.
+f x = not (id x)
diff --git a/testsuite/tests/simplCore/should_compile/T17409.stdout b/testsuite/tests/simplCore/should_compile/T17409.stdout
new file mode 100644
index 0000000000..50edd27281
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T17409.stdout
@@ -0,0 +1,2 @@
+ f = \ (x :: Bool) -> not (id @ Bool x); } in
+f = \ (x :: Bool) -> not (id @ Bool x)
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index 35933e8be4..838ae93cad 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -311,4 +311,7 @@ test('T16979b', normal, compile, ['-O'])
test('T17140',
[extra_files(['T17140a.hs'])],
makefile_test,
- ['T17140']) \ No newline at end of file
+ ['T17140'])
+test('T17409',
+ normal,
+ makefile_test, ['T17409'])