diff options
author | Simon Peyton Jones <simon.peytonjones@gmail.com> | 2022-01-24 00:55:34 +0000 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-01-26 19:47:09 -0500 |
commit | ce488c2bdba764baf3fa5fa880b7865600679620 (patch) | |
tree | 6362c472b9a3f372ccc9df9616f5916d9b126354 /compiler | |
parent | a5924b38b0cbe75021e6042b946fe921f2c4afe4 (diff) | |
download | haskell-ce488c2bdba764baf3fa5fa880b7865600679620.tar.gz |
Better occurrence analysis with casts
This patch addresses #20988 by refactoring the way the
occurrence analyser deals with lambdas.
Previously it used collectBinders to split off a group of binders,
and deal with them together. Now I deal with them one at a time
in occAnalLam, which allows me to skip casts easily. See
Note [Occurrence analysis for lambda binders]
about "lambda-groups"
This avoidance of splitting out a list of binders has some good
consequences. Less code, more efficient, and I think, more clear.
The Simplifier needed a similar change, now that lambda-groups
can inlude casts. It turned out that I could simplify the code
here too, in particular elminating the sm_bndrs field of StrictBind.
Simpler, more efficient.
Compile-time metrics improve slightly; here are the ones that are
+/- 0.5% or greater:
Baseline
Test Metric value New value Change
--------------------------------------------------------------------
T11303b(normal) ghc/alloc 40,736,702 40,543,992 -0.5%
T12425(optasm) ghc/alloc 90,443,459 90,034,104 -0.5%
T14683(normal) ghc/alloc 2,991,496,696 2,956,277,288 -1.2%
T16875(normal) ghc/alloc 34,937,866 34,739,328 -0.6%
T17977b(normal) ghc/alloc 37,908,550 37,709,096 -0.5%
T20261(normal) ghc/alloc 621,154,237 618,312,480 -0.5%
T3064(normal) ghc/alloc 190,832,320 189,952,312 -0.5%
T3294(normal) ghc/alloc 1,604,674,178 1,604,608,264 -0.0%
T5321FD(normal) ghc/alloc 270,540,489 251,888,480 -6.9% GOOD
T5321Fun(normal) ghc/alloc 300,707,814 281,856,200 -6.3% GOOD
WWRec(normal) ghc/alloc 588,460,916 585,536,400 -0.5%
geo. mean -0.3%
Metric Decrease:
T5321FD
T5321Fun
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/GHC/Core/Opt/OccurAnal.hs | 479 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify.hs | 86 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Utils.hs | 9 | ||||
-rw-r--r-- | compiler/GHC/Core/SimpleOpt.hs | 15 | ||||
-rw-r--r-- | compiler/GHC/Core/Utils.hs | 29 |
5 files changed, 327 insertions, 291 deletions
diff --git a/compiler/GHC/Core/Opt/OccurAnal.hs b/compiler/GHC/Core/Opt/OccurAnal.hs index 12269b0a29..0a2b7a7929 100644 --- a/compiler/GHC/Core/Opt/OccurAnal.hs +++ b/compiler/GHC/Core/Opt/OccurAnal.hs @@ -16,7 +16,11 @@ The occurrence analyser re-typechecks a core expression, returning a new core expression with (hopefully) improved usage information. -} -module GHC.Core.Opt.OccurAnal ( occurAnalysePgm, occurAnalyseExpr ) where +module GHC.Core.Opt.OccurAnal ( + occurAnalysePgm, + occurAnalyseExpr, + zapLambdaBndrs + ) where import GHC.Prelude @@ -29,7 +33,7 @@ import GHC.Core.Coercion import GHC.Core.Type import GHC.Core.TyCo.FVs( tyCoVarsOfMCo ) -import GHC.Data.Maybe( isJust ) +import GHC.Data.Maybe( isJust, orElse ) import GHC.Data.Graph.Directed ( SCC(..), Node(..) , stronglyConnCompFromEdgedVerticesUniq , stronglyConnCompFromEdgedVerticesUniqR ) @@ -672,7 +676,13 @@ a right-hand side. In particular, we need to a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot lambda, or a non-recursive join point; and - b) call 'markAllNonTail' *unless* the binding is for a join point. + b) call 'markAllNonTail' *unless* the binding is for a join point, and + the RHS has the right arity; e.g. + join j x y = case ... of + A -> j2 p + B -> j2 q + in j a b + Here we want the tail calls to j2 to be tail calls of the whole expression Some examples, with how the free occurrences in e (assumed not to be a value lambda) get marked: @@ -823,7 +833,7 @@ occAnalRec :: OccEnv -> TopLevelFlag -- The NonRec case is just like a Let (NonRec ...) above occAnalRec !_ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs - , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs })) + , nd_uds = rhs_uds })) (WithUsageDetails body_uds binds) | not (bndr `usedIn` body_uds) = WithUsageDetails body_uds binds -- See Note [Dead code] @@ -833,8 +843,8 @@ occAnalRec !_ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs (NonRec tagged_bndr rhs : binds) where (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr - rhs_uds' = adjustRhsUsage NonRecursive (willBeJoinId_maybe tagged_bndr) - rhs_bndrs rhs_uds + rhs_uds' = adjustRhsUsage mb_join_arity rhs rhs_uds + mb_join_arity = willBeJoinId_maybe tagged_bndr -- The Rec case is the interesting one -- See Note [Recursive bindings: the grand plan] @@ -1308,10 +1318,6 @@ data Details , nd_rhs :: CoreExpr -- RHS, already occ-analysed - , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS - -- INVARIANT: (nd_rhs_bndrs nd, _) == - -- collectBinders (nd_rhs nd) - , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings -- ignoring phase (ie assuming all are active) -- See Note [Forming Rec groups] @@ -1366,7 +1372,6 @@ makeNode !env imp_rule_edges bndr_set (bndr, rhs) where details = ND { nd_bndr = bndr' , nd_rhs = rhs' - , nd_rhs_bndrs = bndrs' , nd_uds = scope_uds , nd_inl = inl_fvs , nd_simple = null rules_w_uds && null imp_rule_info @@ -1400,12 +1405,11 @@ makeNode !env imp_rule_edges bndr_set (bndr, rhs) --------- Right hand side --------- -- Constructing the edges for the main Rec computation -- See Note [Forming Rec groups] - -- Do not use occAnalRhs because we don't yet know - -- the final answer for mb_join_arity - (bndrs, body) = collectBinders rhs - rhs_env = rhsCtxt env - (WithUsageDetails rhs_uds (bndrs', body')) = occAnalLamOrRhs rhs_env bndrs body - rhs' = mkLams bndrs' body' + -- Do not use occAnalRhs because we don't yet know the final + -- answer for mb_join_arity; instead, do the occAnalLam call from + -- occAnalRhs, and postpone adjustRhsUsage until occAnalRec + rhs_env = rhsCtxt env + (WithUsageDetails rhs_uds rhs') = occAnalLam rhs_env rhs --------- Unfolding --------- -- See Note [Unfoldings and join points] @@ -1451,11 +1455,7 @@ mkLoopBreakerNodes :: OccEnv -> TopLevelFlag mkLoopBreakerNodes !env lvl body_uds details_s = WithUsageDetails final_uds (zipWithEqual "mkLoopBreakerNodes" mk_lb_node details_s bndrs') where - (final_uds, bndrs') - = tagRecBinders lvl body_uds - [ (bndr, uds, rhs_bndrs) - | ND { nd_bndr = bndr, nd_uds = uds, nd_rhs_bndrs = rhs_bndrs } - <- details_s ] + (final_uds, bndrs') = tagRecBinders lvl body_uds details_s mk_lb_node nd@(ND { nd_bndr = old_bndr, nd_inl = inl_fvs }) new_bndr = DigraphNode { node_payload = new_nd @@ -1700,29 +1700,207 @@ So The Plan is this: was a loop breaker last time round Hence the is_lb field of NodeScore +-} -************************************************************************ +{- ********************************************************************* * * - Right hand sides + Lambda groups * * -************************************************************************ +********************************************************************* -} + +{- Note [Occurrence analysis for lambda binders] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For value lambdas we do a special hack. Consider + (\x. \y. ...x...) +If we did nothing, x is used inside the \y, so would be marked +as dangerous to dup. But in the common case where the abstraction +is applied to two arguments this is over-pessimistic, which delays +inlining x, which forces more simplifier iterations. + +So the occurrence analyser collaborates with the simplifier to treat +a /lambda-group/ specially. A lambda-group is a contiguous run of +lambda and casts, e.g. + Lam x (Lam y (Cast (Lam z body) co)) + +* Occurrence analyser: we just mark each binder in the lambda-group + (here: x,y,z) with its occurrence info in the *body* of the + lambda-group. See occAnalLam. + +* Simplifier. The simplifier is careful when partially applying + lambda-groups. See the call to zapLambdaBndrs in + GHC.Core.Opt.Simplify.simplExprF1 + GHC.Core.SimpleOpt.simple_app + +* Why do we take care to account for intervening casts? Answer: + currently we don't do eta-expansion and cast-swizzling in a stable + unfolding (see Note [Eta-expansion inside stable unfoldings]). + So we can get + f = \x. ((\y. ...x...y...) |> co) + Now, since the lambdas aren't together, the occurrence analyser will + say that x is OnceInLam. Now if we have a call + (f e1 |> co) e2 + we'll end up with + let x = e1 in ...x..e2... + and it'll take an extra iteration of the Simplifier to substitute for x. + +A thought: a lambda-group is pretty much what GHC.Core.Opt.Arity.manifestArity +recognises except that the latter looks through (some) ticks. Maybe a lambda +group should also look through (some) ticks? +-} + +isOneShotFun :: CoreExpr -> Bool +-- The top level lambdas, ignoring casts, of the expression +-- are all one-shot. If there aren't any lambdas at all, this is True +isOneShotFun (Lam b e) = isOneShotBndr b && isOneShotFun e +isOneShotFun (Cast e _) = isOneShotFun e +isOneShotFun _ = True + +zapLambdaBndrs :: CoreExpr -> FullArgCount -> CoreExpr +-- If (\xyz. t) appears under-applied to only two arguments, +-- we must zap the occ-info on x,y, because they appear under the \z +-- See Note [Occurrence analysis for lambda binders] in GHc.Core.Opt.OccurAnal +-- +-- NB: `arg_count` includes both type and value args +zapLambdaBndrs fun arg_count + = -- If the lambda is fully applied, leave it alone; if not + -- zap the OccInfo on the lambdas that do have arguments, + -- so they beta-reduce to use-many Lets rather than used-once ones. + zap arg_count fun `orElse` fun + where + zap :: FullArgCount -> CoreExpr -> Maybe CoreExpr + -- Nothing => No need to change the occ-info + -- Just e => Had to change + zap 0 e | isOneShotFun e = Nothing -- All remaining lambdas are one-shot + | otherwise = Just e -- in which case no need to zap + zap n (Cast e co) = do { e' <- zap n e; return (Cast e' co) } + zap n (Lam b e) = do { e' <- zap (n-1) e + ; return (Lam (zap_bndr b) e') } + zap _ _ = Nothing -- More arguments than lambdas + + zap_bndr b | isTyVar b = b + | otherwise = zapLamIdInfo b + +occAnalLam :: OccEnv -> CoreExpr -> (WithUsageDetails CoreExpr) +-- See Note [Occurrence analysis for lambda binders] +-- It does the following: +-- * Sets one-shot info on the lambda binder from the OccEnv, and +-- removes that one-shot info from the OccEnv +-- * Sets the OccEnv to OccVanilla when going under a value lambda +-- * Tags each lambda with its occurrence information +-- * Walks through casts +-- This function does /not/ do +-- markAllInsideLam or +-- markAllNonTail +-- The caller does that, either in occAnal (Lam {}), or in adjustRhsUsage +-- See Note [Adjusting right-hand sides] + +occAnalLam env (Lam bndr expr) + | isTyVar bndr + = let (WithUsageDetails usage expr') = occAnalLam env expr + in WithUsageDetails usage (Lam bndr expr') + -- Important: Keep the 'env' unchanged so that with a RHS like + -- \(@ x) -> K @x (f @x) + -- we'll see that (K @x (f @x)) is in a OccRhs, and hence refrain + -- from inlining f. See the beginning of Note [Cascading inlines]. + + | otherwise -- So 'bndr' is an Id + = let (env_one_shots', bndr1) + = case occ_one_shots env of + [] -> ([], bndr) + (os : oss) -> (oss, updOneShotInfo bndr os) + -- Use updOneShotInfo, not setOneShotInfo, as pre-existing + -- one-shot info might be better than what we can infer, e.g. + -- due to explicit use of the magic 'oneShot' function. + -- See Note [The oneShot function] + + env1 = env { occ_encl = OccVanilla, occ_one_shots = env_one_shots' } + env2 = addOneInScope env1 bndr + (WithUsageDetails usage expr') = occAnalLam env2 expr + (usage', bndr2) = tagLamBinder usage bndr1 + in WithUsageDetails usage' (Lam bndr2 expr') + +-- For casts, keep going in the same lambda-group +-- See Note [Occurrence analysis for lambda binders] +occAnalLam env (Cast expr co) + = let (WithUsageDetails usage expr') = occAnalLam env expr + -- usage1: see Note [Gather occurrences of coercion variables] + usage1 = addManyOccs usage (coVarsOfCo co) + + -- usage2: see Note [Occ-anal and cast worker/wrapper] + usage2 = case expr of + Var {} | isRhsEnv env -> markAllMany usage1 + _ -> usage1 + + -- usage3: you might think this was not necessary, because of + -- the markAllNonTail in adjustRhsUsage; but not so! For a + -- join point, adjustRhsUsage doesn't do this; yet if there is + -- a cast, we must! + usage3 = markAllNonTail usage2 + + in WithUsageDetails usage3 (Cast expr' co) + +occAnalLam env expr = occAnal env expr + +{- Note [Occ-anal and cast worker/wrapper] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider y = e; x = y |> co +If we mark y as used-once, we'll inline y into x, and the the Cast +worker/wrapper transform will float it straight back out again. See +Note [Cast worker/wrapper] in GHC.Core.Opt.Simplify. + +So in this particular case we want to mark 'y' as Many. It's very +ad-hoc, but it's also simple. It's also what would happen if we gave +the binding for x a stable unfolding (as we usually do for wrappers, thus + y = e + {-# INLINE x #-} + x = y |> co +Now y appears twice -- once in x's stable unfolding, and once in x's +RHS. So it'll get a Many occ-info. (Maybe Cast w/w should create a stable +unfolding, which would obviate this Note; but that seems a bit of a +heavyweight solution.) + +We only need to this in occAnalLam, not occAnal, because the top leve +of a right hand side is handled by occAnalLam. -} + +{- ********************************************************************* +* * + Right hand sides +* * +********************************************************************* -} + occAnalRhs :: OccEnv -> RecFlag -> Maybe JoinArity -> CoreExpr -- RHS -> WithUsageDetails CoreExpr occAnalRhs !env is_rec mb_join_arity rhs - = let - (bndrs, body) = collectBinders rhs - (WithUsageDetails body_usage (bndrs',body')) = occAnalLamOrRhs env bndrs body - final_bndrs | isRec is_rec = bndrs' - | otherwise = markJoinOneShots mb_join_arity bndrs' - -- For a /non-recursive/ join point we can mark all - -- its join-lambda as one-shot; and it's a good idea to do so + = let (WithUsageDetails usage rhs1) = occAnalLam env rhs + -- We call occAnalLam here, not occAnalExpr, so that it doesn't + -- do the markAllInsideLam and markNonTailCall stuff before + -- we've had a chance to help with join points; that comes next + rhs2 = markJoinOneShots is_rec mb_join_arity rhs1 + rhs_usage = adjustRhsUsage mb_join_arity rhs2 usage + in WithUsageDetails rhs_usage rhs2 + + - -- Final adjustment - rhs_usage = adjustRhsUsage is_rec mb_join_arity final_bndrs body_usage - in WithUsageDetails rhs_usage (mkLams final_bndrs body') +markJoinOneShots :: RecFlag -> Maybe JoinArity -> CoreExpr -> CoreExpr +-- For a /non-recursive/ join point we can mark all +-- its join-lambda as one-shot; and it's a good idea to do so +markJoinOneShots NonRecursive (Just join_arity) rhs + = go join_arity rhs + where + go 0 rhs = rhs + go n (Lam b rhs) = Lam (if isId b then setOneShotLambda b else b) + (go (n-1) rhs) + go _ rhs = rhs -- Not enough lambdas. This can legitimately happen. + -- e.g. let j = case ... in j True + -- This will become an arity-1 join point after the + -- simplifier has eta-expanded it; but it may not have + -- enough lambdas /yet/. (Lint checks that JoinIds do + -- have enough lambdas.) +markJoinOneShots _ _ rhs + = rhs occAnalUnfolding :: OccEnv -> RecFlag @@ -1991,75 +2169,47 @@ occAnal env (Tick tickish body) -- See #14242. occAnal env (Cast expr co) - = let - (WithUsageDetails usage expr') = occAnal env expr - usage1 = markAllManyNonTailIf (isRhsEnv env) usage - -- usage1: if we see let x = y `cast` co - -- then mark y as 'Many' so that we don't - -- immediately inline y again. - usage2 = addManyOccs usage1 (coVarsOfCo co) - -- usage2: see Note [Gather occurrences of coercion variables] - in WithUsageDetails (markAllNonTail usage2) (Cast expr' co) + = let (WithUsageDetails usage expr') = occAnal env expr + usage1 = addManyOccs usage (coVarsOfCo co) + -- usage2: see Note [Gather occurrences of coercion variables] + usage2 = markAllNonTail usage1 + -- usage3: calls inside expr aren't tail calls any more + in WithUsageDetails usage2 (Cast expr' co) occAnal env app@(App _ _) = occAnalApp env (collectArgsTicks tickishFloatable app) --- Ignore type variables altogether --- (a) occurrences inside type lambdas only not marked as InsideLam --- (b) type variables not in environment - -occAnal env (Lam x body) - | isTyVar x - = let - (WithUsageDetails body_usage body') = occAnal env body - in WithUsageDetails (markAllNonTail body_usage) (Lam x body') - -{- Note [Occurrence analysis for lambda binders] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -For value lambdas we do a special hack. Consider - (\x. \y. ...x...) -If we did nothing, x is used inside the \y, so would be marked -as dangerous to dup. But in the common case where the abstraction -is applied to two arguments this is over-pessimistic, which delays -inlining x, which forces more simplifier iterations. - -So instead, we just mark each binder with its occurrence info in the -*body* of the multiple lambda. Then, the simplifier is careful when -partially applying lambdas. See the calls to zapLamBndrs in - GHC.Core.Opt.Simplify.simplExprF1 - GHC.Core.SimpleOpt.simple_app --} - -occAnal env expr@(Lam _ _) - = -- See Note [Occurrence analysis for lambda binders] - let - (bndrs, body) = collectBinders expr - (WithUsageDetails usage (tagged_bndrs, body')) = occAnalLamOrRhs env bndrs body - expr' = mkLams tagged_bndrs body' - usage1 = markAllNonTail usage - one_shot_gp = all isOneShotBndr tagged_bndrs - final_usage = markAllInsideLamIf (not one_shot_gp) usage1 - `addLamCoVarOccs` bndrs - -- See Note [Gather occurrences of coercion variables] +occAnal env expr@(Lam {}) + = let (WithUsageDetails usage expr') = occAnalLam env expr + final_usage = markAllInsideLamIf (not (isOneShotFun expr')) $ + markAllNonTail usage in WithUsageDetails final_usage expr' occAnal env (Case scrut bndr ty alts) = let (WithUsageDetails scrut_usage scrut') = occAnal (scrutCtxt env alts) scrut - alt_env = addBndrSwap scrut' bndr $ env { occ_encl = OccVanilla } `addInScope` [bndr] - (alts_usage_s, alts') = mapAndUnzip ((\(WithUsageDetails uds a) -> (uds,a)) . occAnalAlt alt_env) alts + alt_env = addBndrSwap scrut' bndr $ env { occ_encl = OccVanilla } `addOneInScope` bndr + (alts_usage_s, alts') = mapAndUnzip (do_alt alt_env) alts alts_usage = foldr orUDs emptyDetails alts_usage_s (alts_usage1, tagged_bndr) = tagLamBinder alts_usage bndr total_usage = markAllNonTail scrut_usage `andUDs` alts_usage1 -- Alts can have tail calls, but the scrutinee can't in WithUsageDetails total_usage (Case scrut' tagged_bndr ty alts') + where + do_alt !env (Alt con bndrs rhs) + = let + (WithUsageDetails rhs_usage1 rhs1) = occAnal (env `addInScope` bndrs) rhs + (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs + in -- See Note [Binders in case alternatives] + (alt_usg, Alt con tagged_bndrs rhs1) occAnal env (Let bind body) = let - (WithUsageDetails body_usage body') = occAnal (env `addInScope` bindersOf bind) body - (WithUsageDetails final_usage new_binds) = occAnalBind env NotTopLevel + body_env = env { occ_encl = OccVanilla } `addInScope` bindersOf bind + (WithUsageDetails body_usage body') = occAnal body_env body + (WithUsageDetails final_usage binds') = occAnalBind env NotTopLevel noImpRuleEdges bind body_usage - in WithUsageDetails final_usage (mkLets new_binds body') + in WithUsageDetails final_usage (mkLets binds' body') occAnalArgs :: OccEnv -> CoreExpr -> [CoreExpr] -> [OneShots] -> WithUsageDetails CoreExpr -- The `fun` argument is just an accumulating parameter, @@ -2269,44 +2419,6 @@ life, because it binds 'y' to (a,b) (imagine got inlined and scrutinised y). -} -occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr - -> WithUsageDetails ([CoreBndr], CoreExpr) --- Tags the returned binders with their OccInfo, but does --- not do any markInsideLam to the returned usage details -occAnalLamOrRhs !env [] body - = let (WithUsageDetails body_usage body') = occAnal env body - in WithUsageDetails body_usage ([], body') - -- RHS of thunk or nullary join point - -occAnalLamOrRhs env (bndr:bndrs) body - | isTyVar bndr - = -- Important: Keep the environment so that we don't inline into an RHS like - -- \(@ x) -> C @x (f @x) - -- (see the beginning of Note [Cascading inlines]). - let - (WithUsageDetails body_usage (bndrs',body')) = occAnalLamOrRhs env bndrs body - in WithUsageDetails body_usage (bndr:bndrs', body') - -occAnalLamOrRhs env binders body - = let - (WithUsageDetails body_usage body') = occAnal env_body body - (final_usage, tagged_binders) = tagLamBinders body_usage binders' - -- Use binders' to put one-shot info on the lambdas - in - WithUsageDetails final_usage (tagged_binders, body') - where - env1 = env `addInScope` binders - (env_body, binders') = oneShotGroup env1 binders - -occAnalAlt :: OccEnv - -> CoreAlt -> WithUsageDetails (Alt IdWithOccInfo) -occAnalAlt !env (Alt con bndrs rhs) - = let - (WithUsageDetails rhs_usage1 rhs1) = occAnal (env `addInScope` bndrs) rhs - (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs - in -- See Note [Binders in case alternatives] - WithUsageDetails alt_usg (Alt con tagged_bndrs rhs1) - {- ************************************************************************ * * @@ -2405,61 +2517,19 @@ isRhsEnv (OccEnv { occ_encl = cxt }) = case cxt of OccRhs -> True _ -> False +addOneInScope :: OccEnv -> CoreBndr -> OccEnv +addOneInScope env@(OccEnv { occ_bs_env = swap_env, occ_bs_rng = rng_vars }) bndr + | bndr `elemVarSet` rng_vars = env { occ_bs_env = emptyVarEnv, occ_bs_rng = emptyVarSet } + | otherwise = env { occ_bs_env = swap_env `delVarEnv` bndr } + addInScope :: OccEnv -> [Var] -> OccEnv -- See Note [The binder-swap substitution] +-- It's only neccessary to call this on in-scope Ids, +-- but harmless to include TyVars too addInScope env@(OccEnv { occ_bs_env = swap_env, occ_bs_rng = rng_vars }) bndrs | any (`elemVarSet` rng_vars) bndrs = env { occ_bs_env = emptyVarEnv, occ_bs_rng = emptyVarSet } | otherwise = env { occ_bs_env = swap_env `delVarEnvList` bndrs } -oneShotGroup :: OccEnv -> [CoreBndr] - -> ( OccEnv - , [CoreBndr] ) - -- The result binders have one-shot-ness set that they might not have had originally. - -- This happens in (build (\c n -> e)). Here the occurrence analyser - -- linearity context knows that c,n are one-shot, and it records that fact in - -- the binder. This is useful to guide subsequent float-in/float-out transformations - -oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs - = go ctxt bndrs [] - where - go ctxt [] rev_bndrs - = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla } - , reverse rev_bndrs ) - - go [] bndrs rev_bndrs - = ( env { occ_one_shots = [], occ_encl = OccVanilla } - , reverse rev_bndrs ++ bndrs ) - - go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs - | isId bndr = go ctxt' bndrs (bndr': rev_bndrs) - | otherwise = go ctxt bndrs (bndr : rev_bndrs) - where - bndr' = updOneShotInfo bndr one_shot - -- Use updOneShotInfo, not setOneShotInfo, as pre-existing - -- one-shot info might be better than what we can infer, e.g. - -- due to explicit use of the magic 'oneShot' function. - -- See Note [The oneShot function] - - -markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var] --- Mark the lambdas of a non-recursive join point as one-shot. --- This is good to prevent gratuitous float-out etc -markJoinOneShots mb_join_arity bndrs - = case mb_join_arity of - Nothing -> bndrs - Just n -> go n bndrs - where - go 0 bndrs = bndrs - go _ [] = [] -- This can legitimately happen. - -- e.g. let j = case ... in j True - -- This will become an arity-1 join point after the - -- simplifier has eta-expanded it; but it may not have - -- enough lambdas /yet/. (Lint checks that JoinIds do - -- have enough lambdas.) - go n (b:bs) = b' : go (n-1) bs - where - b' | isId b = setOneShotLambda b - | otherwise = b -------------------- transClosureFV :: VarEnv VarSet -> VarEnv VarSet @@ -2841,7 +2911,7 @@ andUDs, orUDs andUDs = combineUsageDetailsWith addOccInfo orUDs = combineUsageDetailsWith orOccInfo -mkOneOcc ::Id -> InterestingCxt -> JoinArity -> UsageDetails +mkOneOcc :: Id -> InterestingCxt -> JoinArity -> UsageDetails mkOneOcc id int_cxt arity | isLocalId id = emptyDetails { ud_env = unitVarEnv id occ_info } @@ -2911,12 +2981,6 @@ markAllNonTailIf False ud = ud markAllManyNonTail = markAllMany . markAllNonTail -- effectively sets to noOccInfo -markAllManyNonTailIf :: Bool -- If this is true - -> UsageDetails -- Then do markAllManyNonTail on this - -> UsageDetails -markAllManyNonTailIf True uds = markAllManyNonTail uds -markAllManyNonTailIf False uds = uds - lookupDetails :: UsageDetails -> Id -> OccInfo lookupDetails ud id = case lookupVarEnv (ud_env ud) id of @@ -2980,22 +3044,19 @@ flattenUsageDetails ud@(UD { ud_env = env }) ------------------- -- See Note [Adjusting right-hand sides] -adjustRhsUsage :: RecFlag -> Maybe JoinArity - -> [CoreBndr] -- Outer lambdas, AFTER occ anal +adjustRhsUsage :: Maybe JoinArity + -> CoreExpr -- Rhs, AFTER occ anal -> UsageDetails -- From body of lambda -> UsageDetails -adjustRhsUsage is_rec mb_join_arity bndrs usage - = markAllInsideLamIf (not one_shot) $ +adjustRhsUsage mb_join_arity rhs usage + = -- c.f. occAnal (Lam {}) + markAllInsideLamIf (not one_shot) $ markAllNonTailIf (not exact_join) $ usage where - one_shot = case mb_join_arity of - Just join_arity - | isRec is_rec -> False - | otherwise -> all isOneShotBndr (drop join_arity bndrs) - Nothing -> all isOneShotBndr bndrs - + one_shot = isOneShotFun rhs exact_join = exactJoin mb_join_arity bndrs + (bndrs,_) = collectBinders rhs exactJoin :: Maybe JoinArity -> [a] -> Bool exactJoin Nothing _ = False @@ -3053,17 +3114,16 @@ tagNonRecBinder lvl usage binder tagRecBinders :: TopLevelFlag -- At top level? -> UsageDetails -- Of body of let ONLY - -> [(CoreBndr, -- Binder - UsageDetails, -- RHS usage details - [CoreBndr])] -- Lambdas in new RHS + -> [Details] -> (UsageDetails, -- Adjusted details for whole scope, -- with binders removed [IdWithOccInfo]) -- Tagged binders -- Substantially more complicated than non-recursive case. Need to adjust RHS -- details *before* tagging binders (because the tags depend on the RHSes). -tagRecBinders lvl body_uds triples +tagRecBinders lvl body_uds details_s = let - (bndrs, rhs_udss, _) = unzip3 triples + bndrs = map nd_bndr details_s + rhs_udss = map nd_uds details_s -- 1. Determine join-point-hood of whole group, as determined by -- the *unadjusted* usage details @@ -3072,20 +3132,21 @@ tagRecBinders lvl body_uds triples -- 2. Adjust usage details of each RHS, taking into account the -- join-point-hood decision - rhs_udss' = map adjust triples - adjust (bndr, rhs_uds, rhs_bndrs) - = adjustRhsUsage Recursive mb_join_arity rhs_bndrs rhs_uds - where - -- Can't use willBeJoinId_maybe here because we haven't tagged the - -- binder yet (the tag depends on these adjustments!) - mb_join_arity - | will_be_joins - , let occ = lookupDetails unadj_uds bndr - , AlwaysTailCalled arity <- tailCallInfo occ - = Just arity - | otherwise - = assert (not will_be_joins) -- Should be AlwaysTailCalled if - Nothing -- we are making join points! + rhs_udss' = [ adjustRhsUsage (mb_join_arity bndr) rhs rhs_uds + | ND { nd_bndr = bndr, nd_uds = rhs_uds + , nd_rhs = rhs } <- details_s ] + + mb_join_arity :: Id -> Maybe JoinArity + mb_join_arity bndr + -- Can't use willBeJoinId_maybe here because we haven't tagged + -- the binder yet (the tag depends on these adjustments!) + | will_be_joins + , let occ = lookupDetails unadj_uds bndr + , AlwaysTailCalled arity <- tailCallInfo occ + = Just arity + | otherwise + = assert (not will_be_joins) -- Should be AlwaysTailCalled if + Nothing -- we are making join points! -- 3. Compute final usage details from adjusted RHS details adj_uds = foldr andUDs body_uds rhs_udss' diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs index f08f96b6bc..b1112be288 100644 --- a/compiler/GHC/Core/Opt/Simplify.hs +++ b/compiler/GHC/Core/Opt/Simplify.hs @@ -21,7 +21,7 @@ import GHC.Core.Opt.Simplify.Monad import GHC.Core.Type hiding ( substTy, substTyVar, extendTvSubst, extendCvSubst ) import GHC.Core.Opt.Simplify.Env import GHC.Core.Opt.Simplify.Utils -import GHC.Core.Opt.OccurAnal ( occurAnalyseExpr ) +import GHC.Core.Opt.OccurAnal ( occurAnalyseExpr, zapLambdaBndrs ) import GHC.Core.Make ( FloatBind, mkImpossibleExpr, castBottomExpr ) import qualified GHC.Core.Make import GHC.Core.Coercion hiding ( substCo, substCoVar ) @@ -1153,16 +1153,14 @@ simplExprF1 env (App fun arg) cont simplExprF1 env expr@(Lam {}) cont = {-#SCC "simplExprF1-Lam" #-} - simplLam env zapped_bndrs body cont - -- The main issue here is under-saturated lambdas + simplLam env (zapLambdaBndrs expr n_args) cont + -- zapLambdaBndrs: the issue here is under-saturated lambdas -- (\x1. \x2. e) arg1 -- Here x1 might have "occurs-once" occ-info, because occ-info -- is computed assuming that a group of lambdas is applied -- all at once. If there are too few args, we must zap the -- occ-info, UNLESS the remaining binders are one-shot where - (bndrs, body) = collectBinders expr - zapped_bndrs = zapLamBndrs n_args bndrs n_args = countArgs cont -- NB: countArgs counts all the args (incl type args) -- and likewise drop counts all binders (incl type lambdas) @@ -1191,7 +1189,7 @@ simplExprF1 env (Let (NonRec bndr rhs) body) cont = {-#SCC "simplNonRecJoinPoint" #-} simplNonRecJoinPoint env bndr' rhs' body cont | otherwise - = {-#SCC "simplNonRecE" #-} simplNonRecE env bndr (rhs, env) ([], body) cont + = {-#SCC "simplNonRecE" #-} simplNonRecE env bndr (rhs, env) body cont {- Note [Avoiding space leaks in OutType] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1449,12 +1447,12 @@ rebuild env expr cont StrictArg { sc_fun = fun, sc_cont = cont, sc_fun_ty = fun_ty } -> rebuildCall env (addValArgTo fun expr fun_ty ) cont - StrictBind { sc_bndr = b, sc_bndrs = bs, sc_body = body + StrictBind { sc_bndr = b, sc_body = body , sc_env = se, sc_cont = cont } -> do { (floats1, env') <- simplNonRecX (se `setInScopeFromE` env) b expr -- expr satisfies let/app since it started life -- in a call to simplNonRecE - ; (floats2, expr') <- simplLam env' bs body cont + ; (floats2, expr') <- simplLam env' body cont ; return (floats1 `addFloats` floats2, expr') } ApplyToTy { sc_arg_ty = ty, sc_cont = cont} @@ -1598,41 +1596,47 @@ simplArg env dup_flag arg_env arg ************************************************************************ -} -simplLam :: SimplEnv -> [InId] -> InExpr -> SimplCont +simplLam :: SimplEnv -> InExpr -> SimplCont -> SimplM (SimplFloats, OutExpr) -simplLam env [] body cont - = simplExprF env body cont +simplLam env (Lam bndr body) cont = simpl_lam env bndr body cont +simplLam env expr cont = simplExprF env expr cont -simplLam env (bndr:bndrs) body (ApplyToTy { sc_arg_ty = arg_ty, sc_cont = cont }) +simpl_lam :: SimplEnv -> InBndr -> InExpr -> SimplCont + -> SimplM (SimplFloats, OutExpr) + +-- Type beta-reduction +simpl_lam env bndr body (ApplyToTy { sc_arg_ty = arg_ty, sc_cont = cont }) = do { tick (BetaReduction bndr) - ; simplLam (extendTvSubst env bndr arg_ty) bndrs body cont } + ; simplLam (extendTvSubst env bndr arg_ty) body cont } -simplLam env (bndr:bndrs) body (ApplyToVal { sc_arg = arg, sc_env = arg_se - , sc_cont = cont, sc_dup = dup }) +-- Value beta-reduction +simpl_lam env bndr body (ApplyToVal { sc_arg = arg, sc_env = arg_se + , sc_cont = cont, sc_dup = dup }) | isSimplified dup -- Don't re-simplify if we've simplified it once -- See Note [Avoiding exponential behaviour] = do { tick (BetaReduction bndr) ; (floats1, env') <- simplNonRecX env bndr arg - ; (floats2, expr') <- simplLam env' bndrs body cont + ; (floats2, expr') <- simplLam env' body cont ; return (floats1 `addFloats` floats2, expr') } | otherwise = do { tick (BetaReduction bndr) - ; simplNonRecE env bndr (arg, arg_se) (bndrs, body) cont } + ; simplNonRecE env bndr (arg, arg_se) body cont } - -- Discard a non-counting tick on a lambda. This may change the - -- cost attribution slightly (moving the allocation of the - -- lambda elsewhere), but we don't care: optimisation changes - -- cost attribution all the time. -simplLam env bndrs body (TickIt tickish cont) +-- Discard a non-counting tick on a lambda. This may change the +-- cost attribution slightly (moving the allocation of the +-- lambda elsewhere), but we don't care: optimisation changes +-- cost attribution all the time. +simpl_lam env bndr body (TickIt tickish cont) | not (tickishCounts tickish) - = simplLam env bndrs body cont + = simpl_lam env bndr body cont - -- Not enough args, so there are real lambdas left to put in the result -simplLam env bndrs body cont - = do { (env', bndrs') <- simplLamBndrs env bndrs - ; body' <- simplExpr env' body +-- Not enough args, so there are real lambdas left to put in the result +simpl_lam env bndr body cont + = do { let (inner_bndrs, inner_body) = collectBinders body + ; (env', bndrs') <- simplLamBndrs env (bndr:inner_bndrs) + ; body' <- simplExpr env' inner_body ; new_lam <- mkLam env' bndrs' body' cont ; rebuild env' new_lam cont } @@ -1652,8 +1656,7 @@ simplNonRecE :: SimplEnv -> InId -- The binder, always an Id -- Never a join point -> (InExpr, SimplEnv) -- Rhs of binding (or arg of lambda) - -> ([InBndr], InExpr) -- Body of the let/lambda - -- \xs.e + -> InExpr -- Body of the let/lambda -> SimplCont -> SimplM (SimplFloats, OutExpr) @@ -1661,27 +1664,22 @@ simplNonRecE :: SimplEnv -- * non-top-level non-recursive non-join-point lets in expressions -- * beta reduction -- --- simplNonRec env b (rhs, rhs_se) (bs, body) k +-- simplNonRec env b (rhs, rhs_se) body k -- = let env in --- cont< let b = rhs_se(rhs) in \bs.body > +-- cont< let b = rhs_se(rhs) in body > -- -- It deals with strict bindings, via the StrictBind continuation, -- which may abort the whole process -- -- Precondition: rhs satisfies the let/app invariant -- Note [Core let/app invariant] in GHC.Core --- --- The "body" of the binding comes as a pair of ([InId],InExpr) --- representing a lambda; so we recurse back to simplLam --- Why? Because of the binder-occ-info-zapping done before --- the call to simplLam in simplExprF (Lam ...) -simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont +simplNonRecE env bndr (rhs, rhs_se) body cont | assert (isId bndr && not (isJoinId bndr) ) True , Just env' <- preInlineUnconditionally env NotTopLevel bndr rhs rhs_se = do { tick (PreInlineUnconditionally bndr) ; -- pprTrace "preInlineUncond" (ppr bndr <+> ppr rhs) $ - simplLam env' bndrs body cont } + simplLam env' body cont } | otherwise = do { (env1, bndr1) <- simplNonRecBndr env bndr @@ -1690,14 +1688,14 @@ simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont -- See Note [Dark corner with representation polymorphism] ; if isStrictId bndr1 && sm_case_case (getMode env) then simplExprF (rhs_se `setInScopeFromE` env) rhs - (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs, sc_body = body + (StrictBind { sc_bndr = bndr, sc_body = body , sc_env = env, sc_cont = cont, sc_dup = NoDup }) -- Deal with lazy bindings else do { (env2, bndr2) <- addBndrRules env1 bndr bndr1 Nothing ; (floats1, env3) <- simplLazyBind env2 NotTopLevel NonRecursive bndr bndr2 rhs rhs_se - ; (floats2, expr') <- simplLam env3 bndrs body cont + ; (floats2, expr') <- simplLam env3 body cont ; return (floats1 `addFloats` floats2, expr') } } ------------------ @@ -3433,14 +3431,14 @@ mkDupableContWithDmds env dmds (TickIt t cont) ; return (floats, TickIt t cont') } mkDupableContWithDmds env _ - (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs - , sc_body = body, sc_env = se, sc_cont = cont}) + (StrictBind { sc_bndr = bndr, sc_body = body + , sc_env = se, sc_cont = cont}) -- See Note [Duplicating StrictBind] -- K[ let x = <> in b ] --> join j x = K[ b ] -- j <> = do { let sb_env = se `setInScopeFromE` env ; (sb_env1, bndr') <- simplBinder sb_env bndr - ; (floats1, join_inner) <- simplLam sb_env1 bndrs body cont + ; (floats1, join_inner) <- simplLam sb_env1 body cont -- No need to use mkDupableCont before simplLam; we -- use cont once here, and then share the result if necessary @@ -3565,7 +3563,7 @@ mkDupableStrictBind :: SimplEnv -> OutId -> OutExpr -> OutType mkDupableStrictBind env arg_bndr join_rhs res_ty | exprIsTrivial join_rhs -- See point (2) of Note [Duplicating join points] = return (emptyFloats env - , StrictBind { sc_bndr = arg_bndr, sc_bndrs = [] + , StrictBind { sc_bndr = arg_bndr , sc_body = join_rhs , sc_env = zapSubstEnv env -- See Note [StaticEnv invariant] in GHC.Core.Opt.Simplify.Utils diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs index 375ba8a975..e288646d74 100644 --- a/compiler/GHC/Core/Opt/Simplify/Utils.hs +++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs @@ -149,11 +149,10 @@ data SimplCont , sc_cont :: SimplCont } -- The two strict forms have no DupFlag, because we never duplicate them - | StrictBind -- (StrictBind x xs b K)[e] = let x = e in K[\xs.b] - -- or, equivalently, = K[ (\x xs.b) e ] + | StrictBind -- (StrictBind x b K)[e] = let x = e in K[b] + -- or, equivalently, = K[ (\x.b) e ] { sc_dup :: DupFlag -- See Note [DupFlag invariants] , sc_bndr :: InId - , sc_bndrs :: [InBndr] , sc_body :: InExpr , sc_env :: StaticEnv -- See Note [StaticEnv invariant] , sc_cont :: SimplCont } @@ -483,9 +482,11 @@ contHoleScaling (ApplyToVal { sc_cont = k }) = contHoleScaling k contHoleScaling (TickIt _ k) = contHoleScaling k ------------------- countArgs :: SimplCont -> Int --- Count all arguments, including types, coercions, and other values +-- Count all arguments, including types, coercions, +-- and other values; skipping over casts. countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont +countArgs (CastIt _ cont) = countArgs cont countArgs _ = 0 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont) diff --git a/compiler/GHC/Core/SimpleOpt.hs b/compiler/GHC/Core/SimpleOpt.hs index 720bc895c8..92f1c7987e 100644 --- a/compiler/GHC/Core/SimpleOpt.hs +++ b/compiler/GHC/Core/SimpleOpt.hs @@ -28,7 +28,7 @@ import GHC.Core.FVs import GHC.Core.Unfold import GHC.Core.Unfold.Make import GHC.Core.Make ( FloatBind(..) ) -import GHC.Core.Opt.OccurAnal( occurAnalyseExpr, occurAnalysePgm ) +import GHC.Core.Opt.OccurAnal( occurAnalyseExpr, occurAnalysePgm, zapLambdaBndrs ) import GHC.Types.Literal import GHC.Types.Id import GHC.Types.Id.Info ( realUnfoldingInfo, setUnfoldingInfo, setRuleInfo, IdInfo (..) ) @@ -332,20 +332,21 @@ simple_app env (App e1 e2) as = simple_app env e1 ((env, e2) : as) simple_app env e@(Lam {}) as@(_:_) - | (bndrs, body) <- collectBinders e - , let zapped_bndrs = zapLamBndrs (length as) bndrs + = do_beta env (zapLambdaBndrs e n_args) as -- Be careful to zap the lambda binders if necessary -- c.f. the Lam case of simplExprF1 in GHC.Core.Opt.Simplify -- Lacking this zap caused #19347, when we had a redex -- (\ a b. K a b) e1 e2 -- where (as it happens) the eta-expanded K is produced by -- Note [Typechecking data constructors] in GHC.Tc.Gen.Head - = do_beta env zapped_bndrs body as where - do_beta env (b:bs) body (a:as) + n_args = length as + + do_beta env (Lam b body) (a:as) | (env', mb_pr) <- simple_bind_pair env b Nothing a NotTopLevel - = wrapLet mb_pr $ do_beta env' bs body as - do_beta env bs body as = simple_app env (mkLams bs body) as + = wrapLet mb_pr $ do_beta env' body as + do_beta env body as + = simple_app env body as simple_app env (Tick t e) as -- Okay to do "(Tick t e) x ==> Tick t (e x)"? diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs index d9e34d851b..244f5f4b42 100644 --- a/compiler/GHC/Core/Utils.hs +++ b/compiler/GHC/Core/Utils.hs @@ -38,7 +38,7 @@ module GHC.Core.Utils ( diffBinds, -- * Lambdas and eta reduction - tryEtaReduce, zapLamBndrs, + tryEtaReduce, -- * Manipulating data constructors and types exprToType, exprToCoercion_maybe, @@ -93,7 +93,7 @@ import GHC.Types.Tickish import GHC.Types.Id import GHC.Types.Id.Info import GHC.Types.Unique -import GHC.Types.Basic ( Arity, FullArgCount ) +import GHC.Types.Basic ( Arity ) import GHC.Types.Unique.Set import GHC.Data.FastString @@ -2507,31 +2507,6 @@ This turned up in #7542. {- ********************************************************************* * * - Zapping lambda binders -* * -********************************************************************* -} - -zapLamBndrs :: FullArgCount -> [Var] -> [Var] --- If (\xyz. t) appears under-applied to only two arguments, --- we must zap the occ-info on x,y, because they appear under the \x --- See Note [Occurrence analysis for lambda binders] in GHc.Core.Opt.OccurAnal --- --- NB: both `arg_count` and `bndrs` include both type and value args/bndrs -zapLamBndrs arg_count bndrs - | no_need_to_zap = bndrs - | otherwise = zap_em arg_count bndrs - where - no_need_to_zap = all isOneShotBndr (drop arg_count bndrs) - - zap_em :: FullArgCount -> [Var] -> [Var] - zap_em 0 bs = bs - zap_em _ [] = [] - zap_em n (b:bs) | isTyVar b = b : zap_em (n-1) bs - | otherwise = zapLamIdInfo b : zap_em (n-1) bs - - -{- ********************************************************************* -* * \subsection{Determining non-updatable right-hand-sides} * * ************************************************************************ |