summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2021-07-02 08:50:16 +0100
committerSimon Peyton Jones <simonpj@microsoft.com>2021-07-19 10:59:38 +0100
commit5b187575dac64e0a202f0700bdb7a1d1604f9438 (patch)
tree9f24e24b67c08dfab0e1cf44fda2edb195382889
parent41d6cfc4d36ba93d82f16f9a83ea69f4e02c3810 (diff)
downloadhaskell-5b187575dac64e0a202f0700bdb7a1d1604f9438.tar.gz
Better sharing of join points (#19996)wip/T19557
This patch, provoked by regressions in the text package (#19557), improves sharing of join points. This also fixes the terrible behaviour in #20049. See Note [Duplicating join points] in GHC.Core.Opt.Simplify. * In the StrictArg case of mkDupableContWithDmds, don't use Plan A for data constructors * In postInlineUnconditionally, don't inline JoinIds Avoids inlining join $j x = Just x in case blah of A -> $j x1 B -> $j x2 C -> $j x3 * In mkDupableStrictBind and mkDupableStrictAlt, create join points (much) more often: exprIsTrivial rather than exprIsDupable. This may be much, but we'll see. Metric Decrease: T12545 T13253-spj T13719 T18140 T18282 T18304 T18698a T18698b Metric Increase: T16577 T18923 T9961
-rw-r--r--compiler/GHC/Core/Opt/Simplify.hs116
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Utils.hs5
-rw-r--r--testsuite/tests/perf/compiler/T20049.hs53
-rw-r--r--testsuite/tests/perf/compiler/all.T4
4 files changed, 149 insertions, 29 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index 6c7379faa2..28c5548745 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -57,15 +57,13 @@ import GHC.Types.Unique ( hasKey )
import GHC.Types.Basic
import GHC.Types.Tickish
import GHC.Types.Var ( isTyCoVar )
-
import GHC.Builtin.PrimOps ( PrimOp (SeqOp) )
import GHC.Builtin.Types.Prim( realWorldStatePrimTy )
import GHC.Builtin.Names( runRWKey )
-import GHC.Data.Maybe ( orElse )
+import GHC.Data.Maybe ( isNothing, orElse )
import GHC.Data.FastString
import GHC.Unit.Module ( moduleName, pprModuleName )
-
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
@@ -3428,7 +3426,8 @@ mkDupableContWithDmds env _
(StrictArg { sc_fun = fun, sc_cont = cont
, sc_fun_ty = fun_ty })
-- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable
- | thumbsUpPlanA cont
+ | isNothing (isDataConId_maybe (ai_fun fun))
+ , thumbsUpPlanA cont -- See point (3) of Note [Duplicating join points]
= -- Use Plan A of Note [Duplicating StrictArg]
do { let (_ : dmds) = ai_dmds fun
; (floats1, cont') <- mkDupableContWithDmds env dmds cont
@@ -3537,7 +3536,7 @@ mkDupableContWithDmds env _
mkDupableStrictBind :: SimplEnv -> OutId -> OutExpr -> OutType
-> SimplM (SimplFloats, SimplCont)
mkDupableStrictBind env arg_bndr join_rhs res_ty
- | exprIsDupable (targetPlatform (seDynFlags env)) join_rhs
+ | exprIsTrivial join_rhs -- See point (2) of Note [Duplicating join points]
= return (emptyFloats env
, StrictBind { sc_bndr = arg_bndr, sc_bndrs = []
, sc_body = join_rhs
@@ -3564,8 +3563,8 @@ mkDupableStrictBind env arg_bndr join_rhs res_ty
mkDupableAlt :: Platform -> OutId
-> JoinFloats -> OutAlt
-> SimplM (JoinFloats, OutAlt)
-mkDupableAlt platform case_bndr jfloats (Alt con bndrs' rhs')
- | exprIsDupable platform rhs' -- Note [Small alternative rhs]
+mkDupableAlt _platform case_bndr jfloats (Alt con bndrs' rhs')
+ | exprIsTrivial rhs' -- See point (2) of Note [Duplicating join points]
= return (jfloats, Alt con bndrs' rhs')
| otherwise
@@ -3632,6 +3631,77 @@ the case rn cancels with.
See #4957 a fuller example.
+Note [Duplicating join points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+IN #19996 we discovered that we want to be really careful about
+inlining join points. Consider
+ case (join $j x = K f x )
+ (in case v of )
+ ( p1 -> $j x1 ) of
+ ( p2 -> $j x2 )
+ ( p3 -> $j x3 )
+ K g y -> blah[g,y]
+
+Here the join-point RHS is very small, just a constructor
+application (K x y). So we might inline it to get
+ case (case v of )
+ ( p1 -> K f x1 ) of
+ ( p2 -> K f x2 )
+ ( p3 -> K f x3 )
+ K g y -> blah[g,y]
+
+But now we have to make `blah` into a join point, /abstracted/
+over `g` and `y`. In contrast, if we /don't/ inline $j we
+don't need a join point for `blah` and we'll get
+ join $j x = let g=f, y=x in blah[g,y]
+ in case v of
+ p1 -> $j x1
+ p2 -> $j x2
+ p3 -> $j x3
+
+This can make a /massive/ difference, because `blah` can see
+what `f` is, instead of lambda-abstracting over it.
+
+To achieve this:
+
+1. Do not postInlineUnconditionally a join point, until the Final
+ phase. (The Final phase is still quite early, so we might consider
+ delaying still more.)
+
+2. In mkDupableAlt and mkDupableStrictBind, generate an alterative for
+ all alternatives, except for exprIsTrival RHSs. Previously we used
+ exprIsDupable. This generates a lot more join points, but makes
+ them much more case-of-case friendly.
+
+ It is definitely worth checking for exprIsTrivial, otherwise we get
+ an extra Simplifier iteration, because it is inlined in the next
+ round.
+
+3. By the same token we want to use Plan B in
+ Note [Duplicating StrictArg] when the RHS of the new join point
+ is a data constructor application. That same Note explains why we
+ want Plan A when the RHS of the new join point would be a
+ non-data-constructor application
+
+4. You might worry that $j will be inlined by the call-site inliner,
+ but it won't because the call-site context for a join is usually
+ extremely boring (the arguments come from the pattern match).
+ And if not, then perhaps inlining it would be a good idea.
+
+ You might also wonder if we get UnfWhen, because the RHS of the
+ join point is no bigger than the call. But in the cases we care
+ about it will be a little bigger, because of that free `f` in
+ $j x = K f x
+ So for now we don't do anything special in callSiteInline
+
+There is a bit of tension between (2) and (3). Do we want to retain
+the join point only when the RHS is
+* a constructor application? or
+* just non-trivial?
+Currently, a bit ad-hoc, but we definitely want to retain the join
+point for data constructors in mkDupalbleALt (point 2); that is the
+whole point of #19996 described above.
+
Historical Note [Case binders and join points]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NB: this entire Note is now irrelevant. In Jun 21 we stopped
@@ -3686,24 +3756,6 @@ we'd lose that when zapping the subst-env. We could have a per-alt subst-env,
but zapping it (as we do in mkDupableCont, the Select case) is safe, and
at worst delays the join-point inlining.
-Note [Small alternative rhs]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-It is worth checking for a small RHS because otherwise we
-get extra let bindings that may cause an extra iteration of the simplifier to
-inline back in place. Quite often the rhs is just a variable or constructor.
-The Ord instance of Maybe in PrelMaybe.hs, for example, took several extra
-iterations because the version with the let bindings looked big, and so wasn't
-inlined, but after the join points had been inlined it looked smaller, and so
-was inlined.
-
-NB: we have to check the size of rhs', not rhs.
-Duplicating a small InAlt might invalidate occurrence information
-However, if it *is* dupable, we return the *un* simplified alternative,
-because otherwise we'd need to pair it up with an empty subst-env....
-but we only have one env shared between all the alts.
-(Remember we must zap the subst-env before re-simplifying something).
-Rather than do this we simply agree to re-simplify the original (small) thing later.
-
Note [Funky mkLamTypes]
~~~~~~~~~~~~~~~~~~~~~~
Notice the funky mkLamTypes. If the constructor has existentials
@@ -3749,10 +3801,18 @@ There are two ways to make it duplicable.
join $j x = f e1 x e3
in case x of { True -> jump $j r1
; False -> jump $j r2 }
- Notice that Plan B is very like the way we handle strict
- bindings; see Note [Duplicating StrictBind].
-Plan A is good. Here's an example from #3116
+ Notice that Plan B is very like the way we handle strict bindings;
+ see Note [Duplicating StrictBind]. And Plan B is exactly what we'd
+ get if we turned use a case expression to evaluate the strict arg:
+
+ case (case x of { True -> r1; False -> r2 }) of
+ r -> f e1 r e3
+
+ So, looking at Note [Duplicating join points], we also want Plan B
+ when `f` is a data constructor.
+
+Plan A is often good. Here's an example from #3116
go (n+1) (case l of
1 -> bs'
_ -> Chunk p fpc (o+1) (l-1) bs')
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs
index 549ee2365f..39f62d8744 100644
--- a/compiler/GHC/Core/Opt/Simplify/Utils.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs
@@ -1386,6 +1386,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs
| isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
| isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
| exprIsTrivial rhs = True
+ | isJoinId bndr -- See point (1) of Note [Duplicating join points]
+ , not (phase == FinalPhase) = False -- in Simplify.hs
| otherwise
= case occ_info of
OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt, occ_n_br = n_br }
@@ -1440,7 +1442,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs
where
unfolding = idUnfolding bndr
uf_opts = seUnfoldingOpts env
- active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
+ phase = sm_phase (getMode env)
+ active = isActive phase (idInlineActivation bndr)
-- See Note [pre/postInlineUnconditionally in gentle mode]
{- Note [Inline small things to avoid creating a thunk]
diff --git a/testsuite/tests/perf/compiler/T20049.hs b/testsuite/tests/perf/compiler/T20049.hs
new file mode 100644
index 0000000000..2a6c1c56fc
--- /dev/null
+++ b/testsuite/tests/perf/compiler/T20049.hs
@@ -0,0 +1,53 @@
+{-# OPTIONS_GHC -O1 #-}
+module Repro where
+
+data A = A
+ { a1 :: (Maybe ())
+ , a2 :: (Maybe ())
+ , a3 :: (Maybe ())
+ , a4 :: (Maybe ())
+ , a5 :: (Maybe ())
+ , a6 :: (Maybe ())
+ , a7 :: (Maybe ())
+ , a8 :: (Maybe ())
+ , a9 :: (Maybe ())
+ , a10 :: (Maybe ())
+ , a11 :: (Maybe ())
+ , a12 :: (Maybe ())
+ }
+
+data B = B
+ { b1 :: !Bool
+ , b2 :: !Bool
+ , b3 :: !Bool
+ , b4 :: !Bool
+ , b5 :: !Bool
+ , b6 :: !Bool
+ , b7 :: !Bool
+ , b8 :: !Bool
+ , b9 :: !Bool
+ , b10 :: !Bool
+ , b11 :: !Bool
+ , b12 :: !Bool
+ }
+
+f :: Maybe () -> Bool
+f (Just ()) = True
+f Nothing = False
+
+g :: A -> B
+g (A a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12) =
+ B {
+ b1 = f a1
+ , b2 = f a2
+ , b3 = f a3
+ , b4 = f a4
+ , b5 = f a5
+ , b6 = f a6
+ , b7 = f a7
+ , b8 = f a8
+ , b9 = f a9
+ , b10 = f a10
+ , b11 = f a11
+ , b12 = f a12
+ }
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index ee764f15cb..6628fb377e 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -477,3 +477,7 @@ test('T11545',
test('T15304',
[ collect_compiler_stats('all', 10) ],
compile, ['-O'])
+test ('T20049',
+ [ collect_compiler_stats('bytes allocated',2) ],
+ compile,
+ [''])