diff options
author | Simon Peyton Jones <simonpj@microsoft.com> | 2021-07-02 08:50:16 +0100 |
---|---|---|
committer | Simon Peyton Jones <simonpj@microsoft.com> | 2021-07-19 10:59:38 +0100 |
commit | 5b187575dac64e0a202f0700bdb7a1d1604f9438 (patch) | |
tree | 9f24e24b67c08dfab0e1cf44fda2edb195382889 | |
parent | 41d6cfc4d36ba93d82f16f9a83ea69f4e02c3810 (diff) | |
download | haskell-5b187575dac64e0a202f0700bdb7a1d1604f9438.tar.gz |
Better sharing of join points (#19996)wip/T19557
This patch, provoked by regressions in the text package
(#19557), improves sharing of join points. This also fixes
the terrible behaviour in #20049.
See Note [Duplicating join points] in GHC.Core.Opt.Simplify.
* In the StrictArg case of mkDupableContWithDmds, don't
use Plan A for data constructors
* In postInlineUnconditionally, don't inline JoinIds
Avoids inlining join $j x = Just x
in case blah of
A -> $j x1
B -> $j x2
C -> $j x3
* In mkDupableStrictBind and mkDupableStrictAlt, create
join points (much) more often: exprIsTrivial rather than
exprIsDupable. This may be much, but we'll see.
Metric Decrease:
T12545
T13253-spj
T13719
T18140
T18282
T18304
T18698a
T18698b
Metric Increase:
T16577
T18923
T9961
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify.hs | 116 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Utils.hs | 5 | ||||
-rw-r--r-- | testsuite/tests/perf/compiler/T20049.hs | 53 | ||||
-rw-r--r-- | testsuite/tests/perf/compiler/all.T | 4 |
4 files changed, 149 insertions, 29 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs index 6c7379faa2..28c5548745 100644 --- a/compiler/GHC/Core/Opt/Simplify.hs +++ b/compiler/GHC/Core/Opt/Simplify.hs @@ -57,15 +57,13 @@ import GHC.Types.Unique ( hasKey ) import GHC.Types.Basic import GHC.Types.Tickish import GHC.Types.Var ( isTyCoVar ) - import GHC.Builtin.PrimOps ( PrimOp (SeqOp) ) import GHC.Builtin.Types.Prim( realWorldStatePrimTy ) import GHC.Builtin.Names( runRWKey ) -import GHC.Data.Maybe ( orElse ) +import GHC.Data.Maybe ( isNothing, orElse ) import GHC.Data.FastString import GHC.Unit.Module ( moduleName, pprModuleName ) - import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Utils.Panic.Plain @@ -3428,7 +3426,8 @@ mkDupableContWithDmds env _ (StrictArg { sc_fun = fun, sc_cont = cont , sc_fun_ty = fun_ty }) -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable - | thumbsUpPlanA cont + | isNothing (isDataConId_maybe (ai_fun fun)) + , thumbsUpPlanA cont -- See point (3) of Note [Duplicating join points] = -- Use Plan A of Note [Duplicating StrictArg] do { let (_ : dmds) = ai_dmds fun ; (floats1, cont') <- mkDupableContWithDmds env dmds cont @@ -3537,7 +3536,7 @@ mkDupableContWithDmds env _ mkDupableStrictBind :: SimplEnv -> OutId -> OutExpr -> OutType -> SimplM (SimplFloats, SimplCont) mkDupableStrictBind env arg_bndr join_rhs res_ty - | exprIsDupable (targetPlatform (seDynFlags env)) join_rhs + | exprIsTrivial join_rhs -- See point (2) of Note [Duplicating join points] = return (emptyFloats env , StrictBind { sc_bndr = arg_bndr, sc_bndrs = [] , sc_body = join_rhs @@ -3564,8 +3563,8 @@ mkDupableStrictBind env arg_bndr join_rhs res_ty mkDupableAlt :: Platform -> OutId -> JoinFloats -> OutAlt -> SimplM (JoinFloats, OutAlt) -mkDupableAlt platform case_bndr jfloats (Alt con bndrs' rhs') - | exprIsDupable platform rhs' -- Note [Small alternative rhs] +mkDupableAlt _platform case_bndr jfloats (Alt con bndrs' rhs') + | exprIsTrivial rhs' -- See point (2) of Note [Duplicating join points] = return (jfloats, Alt con bndrs' rhs') | otherwise @@ -3632,6 +3631,77 @@ the case rn cancels with. See #4957 a fuller example. +Note [Duplicating join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +IN #19996 we discovered that we want to be really careful about +inlining join points. Consider + case (join $j x = K f x ) + (in case v of ) + ( p1 -> $j x1 ) of + ( p2 -> $j x2 ) + ( p3 -> $j x3 ) + K g y -> blah[g,y] + +Here the join-point RHS is very small, just a constructor +application (K x y). So we might inline it to get + case (case v of ) + ( p1 -> K f x1 ) of + ( p2 -> K f x2 ) + ( p3 -> K f x3 ) + K g y -> blah[g,y] + +But now we have to make `blah` into a join point, /abstracted/ +over `g` and `y`. In contrast, if we /don't/ inline $j we +don't need a join point for `blah` and we'll get + join $j x = let g=f, y=x in blah[g,y] + in case v of + p1 -> $j x1 + p2 -> $j x2 + p3 -> $j x3 + +This can make a /massive/ difference, because `blah` can see +what `f` is, instead of lambda-abstracting over it. + +To achieve this: + +1. Do not postInlineUnconditionally a join point, until the Final + phase. (The Final phase is still quite early, so we might consider + delaying still more.) + +2. In mkDupableAlt and mkDupableStrictBind, generate an alterative for + all alternatives, except for exprIsTrival RHSs. Previously we used + exprIsDupable. This generates a lot more join points, but makes + them much more case-of-case friendly. + + It is definitely worth checking for exprIsTrivial, otherwise we get + an extra Simplifier iteration, because it is inlined in the next + round. + +3. By the same token we want to use Plan B in + Note [Duplicating StrictArg] when the RHS of the new join point + is a data constructor application. That same Note explains why we + want Plan A when the RHS of the new join point would be a + non-data-constructor application + +4. You might worry that $j will be inlined by the call-site inliner, + but it won't because the call-site context for a join is usually + extremely boring (the arguments come from the pattern match). + And if not, then perhaps inlining it would be a good idea. + + You might also wonder if we get UnfWhen, because the RHS of the + join point is no bigger than the call. But in the cases we care + about it will be a little bigger, because of that free `f` in + $j x = K f x + So for now we don't do anything special in callSiteInline + +There is a bit of tension between (2) and (3). Do we want to retain +the join point only when the RHS is +* a constructor application? or +* just non-trivial? +Currently, a bit ad-hoc, but we definitely want to retain the join +point for data constructors in mkDupalbleALt (point 2); that is the +whole point of #19996 described above. + Historical Note [Case binders and join points] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NB: this entire Note is now irrelevant. In Jun 21 we stopped @@ -3686,24 +3756,6 @@ we'd lose that when zapping the subst-env. We could have a per-alt subst-env, but zapping it (as we do in mkDupableCont, the Select case) is safe, and at worst delays the join-point inlining. -Note [Small alternative rhs] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It is worth checking for a small RHS because otherwise we -get extra let bindings that may cause an extra iteration of the simplifier to -inline back in place. Quite often the rhs is just a variable or constructor. -The Ord instance of Maybe in PrelMaybe.hs, for example, took several extra -iterations because the version with the let bindings looked big, and so wasn't -inlined, but after the join points had been inlined it looked smaller, and so -was inlined. - -NB: we have to check the size of rhs', not rhs. -Duplicating a small InAlt might invalidate occurrence information -However, if it *is* dupable, we return the *un* simplified alternative, -because otherwise we'd need to pair it up with an empty subst-env.... -but we only have one env shared between all the alts. -(Remember we must zap the subst-env before re-simplifying something). -Rather than do this we simply agree to re-simplify the original (small) thing later. - Note [Funky mkLamTypes] ~~~~~~~~~~~~~~~~~~~~~~ Notice the funky mkLamTypes. If the constructor has existentials @@ -3749,10 +3801,18 @@ There are two ways to make it duplicable. join $j x = f e1 x e3 in case x of { True -> jump $j r1 ; False -> jump $j r2 } - Notice that Plan B is very like the way we handle strict - bindings; see Note [Duplicating StrictBind]. -Plan A is good. Here's an example from #3116 + Notice that Plan B is very like the way we handle strict bindings; + see Note [Duplicating StrictBind]. And Plan B is exactly what we'd + get if we turned use a case expression to evaluate the strict arg: + + case (case x of { True -> r1; False -> r2 }) of + r -> f e1 r e3 + + So, looking at Note [Duplicating join points], we also want Plan B + when `f` is a data constructor. + +Plan A is often good. Here's an example from #3116 go (n+1) (case l of 1 -> bs' _ -> Chunk p fpc (o+1) (l-1) bs') diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs index 549ee2365f..39f62d8744 100644 --- a/compiler/GHC/Core/Opt/Simplify/Utils.hs +++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs @@ -1386,6 +1386,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally] | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally] | exprIsTrivial rhs = True + | isJoinId bndr -- See point (1) of Note [Duplicating join points] + , not (phase == FinalPhase) = False -- in Simplify.hs | otherwise = case occ_info of OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt, occ_n_br = n_br } @@ -1440,7 +1442,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs where unfolding = idUnfolding bndr uf_opts = seUnfoldingOpts env - active = isActive (sm_phase (getMode env)) (idInlineActivation bndr) + phase = sm_phase (getMode env) + active = isActive phase (idInlineActivation bndr) -- See Note [pre/postInlineUnconditionally in gentle mode] {- Note [Inline small things to avoid creating a thunk] diff --git a/testsuite/tests/perf/compiler/T20049.hs b/testsuite/tests/perf/compiler/T20049.hs new file mode 100644 index 0000000000..2a6c1c56fc --- /dev/null +++ b/testsuite/tests/perf/compiler/T20049.hs @@ -0,0 +1,53 @@ +{-# OPTIONS_GHC -O1 #-} +module Repro where + +data A = A + { a1 :: (Maybe ()) + , a2 :: (Maybe ()) + , a3 :: (Maybe ()) + , a4 :: (Maybe ()) + , a5 :: (Maybe ()) + , a6 :: (Maybe ()) + , a7 :: (Maybe ()) + , a8 :: (Maybe ()) + , a9 :: (Maybe ()) + , a10 :: (Maybe ()) + , a11 :: (Maybe ()) + , a12 :: (Maybe ()) + } + +data B = B + { b1 :: !Bool + , b2 :: !Bool + , b3 :: !Bool + , b4 :: !Bool + , b5 :: !Bool + , b6 :: !Bool + , b7 :: !Bool + , b8 :: !Bool + , b9 :: !Bool + , b10 :: !Bool + , b11 :: !Bool + , b12 :: !Bool + } + +f :: Maybe () -> Bool +f (Just ()) = True +f Nothing = False + +g :: A -> B +g (A a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12) = + B { + b1 = f a1 + , b2 = f a2 + , b3 = f a3 + , b4 = f a4 + , b5 = f a5 + , b6 = f a6 + , b7 = f a7 + , b8 = f a8 + , b9 = f a9 + , b10 = f a10 + , b11 = f a11 + , b12 = f a12 + } diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T index ee764f15cb..6628fb377e 100644 --- a/testsuite/tests/perf/compiler/all.T +++ b/testsuite/tests/perf/compiler/all.T @@ -477,3 +477,7 @@ test('T11545', test('T15304', [ collect_compiler_stats('all', 10) ], compile, ['-O']) +test ('T20049', + [ collect_compiler_stats('bytes allocated',2) ], + compile, + ['']) |