Make CSE delay inlining lesswip/T17409

CSE delays inlining a little bit, to avoid losing vital specialisations; see Note [Delay inlining after CSE] in CSE. But it was being over-enthusiastic. This patch makes the delay only apply to Ids with specialisation rules, which avoids unnecessary delay (#17409).
author: Simon Peyton Jones <simonpj@microsoft.com> 2019-10-29 09:19:14 +0000
committer: Ben Gamari <ben@smart-cactus.org> 2019-10-31 17:04:45 -0400
commit: 0592d1ad634130cb577fa98d3fbb548338407a63 (patch)
tree: 28711fde82824fc0f85cfa271c70c1575394aeb3
parent: 6a606980b40df7e084a7f79ecf2f6eab7a135aeb (diff)
download: haskell-wip/T17409.tar.gz
8 files changed, 72 insertions, 12 deletions
diff --git a/compiler/simplCore/CSE.hs b/compiler/simplCore/CSE.hs
index 0758ce930a..35862aeabe 100644
--- a/compiler/simplCore/CSE.hs
+++ b/compiler/simplCore/CSE.hs
@@ -15,7 +15,7 @@ import GhcPrelude
 import CoreSubst
 import Var              ( Var )
 import VarEnv           ( elemInScopeSet, mkInScopeSet )
-import Id               ( Id, idType, isDeadBinder
+import Id               ( Id, idType, isDeadBinder, idHasRules
                         , idInlineActivation, setInlineActivation
                         , zapIdOccInfo, zapIdUsageInfo, idInlinePragma
                         , isJoinId, isJoinId_maybe )
@@ -392,9 +392,15 @@ cse_bind toplevel env (in_id, in_rhs) out_id
 
 delayInlining :: TopLevelFlag -> Id -> Id
 -- Add a NOINLINE[2] if the Id doesn't have an INLNE pragma already
+-- See Note [Delay inlining after CSE]
 delayInlining top_lvl bndr
   | isTopLevel top_lvl
   , isAlwaysActive (idInlineActivation bndr)
+  , idHasRules bndr  -- Only if the Id has some RULES,
+                     -- which might otherwise get lost
+       -- These rules are probably auto-generated specialisations,
+       -- since Ids with manual rules usually have manually-inserted
+       -- delayed inlining anyway
   = bndr `setInlineActivation` activeAfterInitial
   | otherwise
   = bndr
@@ -494,13 +500,49 @@ a SPECIALISE pragma.  Then CSE kicks in and notices that the RHSs of
 Now there is terrible danger that, in an importing module, we'll inline
 'g' before we have a chance to run its specialisation!
 
-Solution: during CSE, when adding a top-level
-  g = f
-binding after a "hit" in the CSE cache, add a NOINLINE[2] activation
-to it, to ensure it's not inlined right away.
+Solution: during CSE, afer a "hit" in the CSE cache
+  * when adding a binding
+        g = f
+  * for a top-level function g
+  * and g has specialisation RULES
+add a NOINLINE[2] activation to it, to ensure it's not inlined
+right away.
+
+Notes:
+* Why top level only?  Because for nested bindings we are already past
+  phase 2 and will never return there.
+
+* Why "only if g has RULES"?  Because there is no point in
+  doing this if there are no RULES; and other things being
+  equal it delays optimisation to delay inlining (#17409)
+
+
+---- Historical note ---
+
+This patch is simpler and more direct than an earlier
+version:
+
+  commit 2110738b280543698407924a16ac92b6d804dc36
+  Author: Simon Peyton Jones <simonpj@microsoft.com>
+  Date:   Mon Jul 30 13:43:56 2018 +0100
+
+  Don't inline functions with RULES too early
+
+We had to revert this patch because it made GHC itself slower.
+
+Why? It delayed inlining of /all/ functions with RULES, and that was
+very bad in TcFlatten.flatten_ty_con_app
+
+* It delayed inlining of liftM
+* That delayed the unravelling of the recursion in some dictionary
+  bindings.
+* That delayed some eta expansion, leaving
+     flatten_ty_con_app = \x y. let <stuff> in \z. blah
+* That allowed the float-out pass to put sguff between
+  the \y and \z.
+* And that permanently stopped eta expasion of the function,
+  even once <stuff> was simplified.
 
-Why top level only?  Because for nested bindings we are already past
-phase 2 and will never return there.
 -}
 
 tryForCSE :: CSEnv -> InExpr -> OutExpr
diff --git a/testsuite/tests/numeric/should_compile/T14465.stdout b/testsuite/tests/numeric/should_compile/T14465.stdout
index 936507532a..df97060635 100644
--- a/testsuite/tests/numeric/should_compile/T14465.stdout
+++ b/testsuite/tests/numeric/should_compile/T14465.stdout
@@ -98,7 +98,7 @@ plusOne :: Natural -> Natural
 plusOne = \ (n :: Natural) -> plusNatural n M.minusOne1
 
 -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
-one [InlPrag=NOUSERINLINE[2]] :: Natural
+one :: Natural
 [GblId,
  Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
diff --git a/testsuite/tests/numeric/should_compile/T7116.stdout b/testsuite/tests/numeric/should_compile/T7116.stdout
index a1adeb180a..171d9bc7f4 100644
--- a/testsuite/tests/numeric/should_compile/T7116.stdout
+++ b/testsuite/tests/numeric/should_compile/T7116.stdout
@@ -65,7 +65,7 @@ dr
       case x of { GHC.Types.D# x1 -> GHC.Types.D# (GHC.Prim.+## x1 x1) }
 
 -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
-dl [InlPrag=NOUSERINLINE[2]] :: Double -> Double
+dl :: Double -> Double
 [GblId,
  Arity=1,
  Caf=NoCafRefs,
@@ -97,7 +97,7 @@ fr
       }
 
 -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
-fl [InlPrag=NOUSERINLINE[2]] :: Float -> Float
+fl :: Float -> Float
 [GblId,
  Arity=1,
  Caf=NoCafRefs,
diff --git a/testsuite/tests/perf/compiler/T16473.stdout b/testsuite/tests/perf/compiler/T16473.stdout
index 3a1f5a571b..4427d39a45 100644
--- a/testsuite/tests/perf/compiler/T16473.stdout
+++ b/testsuite/tests/perf/compiler/T16473.stdout
@@ -136,4 +136,3 @@ Rule fired: Class op $p1Monad (BUILTIN)
 Rule fired: Class op pure (BUILTIN)
 Rule fired: SPEC/Main $fMonadStateT @ Identity _ (Main)
 Rule fired: SPEC go @ (StateT (Sum Int) Identity) (Main)
-Rule fired: Class op fmap (BUILTIN)
diff --git a/testsuite/tests/simplCore/should_compile/Makefile b/testsuite/tests/simplCore/should_compile/Makefile
index c27458c46c..1daf834381 100644
--- a/testsuite/tests/simplCore/should_compile/Makefile
+++ b/testsuite/tests/simplCore/should_compile/Makefile
@@ -2,6 +2,11 @@ TOP=../../..
 include $(TOP)/mk/boilerplate.mk
 include $(TOP)/mk/test.mk
 
+T17409:
+	$(RM) -f T17409.o T17409.hi
+	- '$(TEST_HC)' $(TEST_HC_OPTS) -O -c -dverbose-core2core -dsuppress-uniques T17409.hs 2> /dev/null | grep '\<id\>'
+        # Expecting 'id' to be inlined in the 'gentle' pass
+
 T14978:
 	$(RM) -f T14978.o T14978.hi
 	-'$(TEST_HC)' $(TEST_HC_OPTS) -O -c -ddump-simpl T14978.hs -dsuppress-coercions | grep 'foo'
diff --git a/testsuite/tests/simplCore/should_compile/T17409.hs b/testsuite/tests/simplCore/should_compile/T17409.hs
new file mode 100644
index 0000000000..512f152cf1
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T17409.hs
@@ -0,0 +1,9 @@
+module T17409 where
+
+-- The bug was that id was inlined only after the
+-- "gentle" simplifier pass, beucause CSE in GHC.Base
+-- had commoned-up 'id' with 'breakpoint', and added
+-- a NOINLINE[2] to the former.
+
+-- The test just checks that id is inlined early.
+f x = not (id x)
diff --git a/testsuite/tests/simplCore/should_compile/T17409.stdout b/testsuite/tests/simplCore/should_compile/T17409.stdout
new file mode 100644
index 0000000000..50edd27281
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T17409.stdout
@@ -0,0 +1,2 @@
+      f = \ (x :: Bool) -> not (id @ Bool x); } in
+f = \ (x :: Bool) -> not (id @ Bool x)
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index 35933e8be4..838ae93cad 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -311,4 +311,7 @@ test('T16979b', normal, compile, ['-O'])
 test('T17140',
      [extra_files(['T17140a.hs'])],
      makefile_test,
-     ['T17140'])
-\ No newline at end of file
+     ['T17140'])
+test('T17409',
+     normal,
+     makefile_test, ['T17409'])
author	Simon Peyton Jones <simonpj@microsoft.com>	2019-10-29 09:19:14 +0000
committer	Ben Gamari <ben@smart-cactus.org>	2019-10-31 17:04:45 -0400
commit	0592d1ad634130cb577fa98d3fbb548338407a63 (patch)
tree	28711fde82824fc0f85cfa271c70c1575394aeb3
parent	6a606980b40df7e084a7f79ecf2f6eab7a135aeb (diff)
download	haskell-wip/T17409.tar.gz