diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2020-03-17 09:45:29 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-03-18 10:06:43 -0400 |
commit | 528df8ecb4e2f9c78b1ae4ab7ff8230644e9b643 (patch) | |
tree | 86cd4522d35c4c8fd3a17db5f4e6b138f8be70df /compiler/GHC | |
parent | 53ff2cd0c49735e8f709ac8a5ceab68483eb89df (diff) | |
download | haskell-528df8ecb4e2f9c78b1ae4ab7ff8230644e9b643.tar.gz |
Modules: Core operations (#13009)
Diffstat (limited to 'compiler/GHC')
48 files changed, 30319 insertions, 76 deletions
diff --git a/compiler/GHC/Core.hs b/compiler/GHC/Core.hs index b4af2b2eea..79e71f9526 100644 --- a/compiler/GHC/Core.hs +++ b/compiler/GHC/Core.hs @@ -354,7 +354,7 @@ an unlifted literal, like all the others. Also, we do not permit case analysis with literal patterns on floating-point types. See #9238 and Note [Rules for floating-point comparisons] in -PrelRules for the rationale for this restriction. +GHC.Core.Op.ConstantFold for the rationale for this restriction. -------------------------- GHC.Core INVARIANTS --------------------------- @@ -508,7 +508,7 @@ checked by Core Lint. 5. Floating-point values must not be scrutinised against literals. See #9238 and Note [Rules for floating-point comparisons] - in PrelRules for rationale. Checked in lintCaseExpr; + in GHC.Core.Op.ConstantFold for rationale. Checked in lintCaseExpr; see the call to isFloatingTy. 6. The 'ty' field of (Case scrut bndr ty alts) is the type of the @@ -784,7 +784,7 @@ is crucial for understanding how case-of-case interacts with join points: _ -> False The simplifier will pull the case into the join point (see Note [Join points -and case-of-case] in Simplify): +and case-of-case] in GHC.Core.Op.Simplify): join j :: Int -> Bool -> Bool -- changed! @@ -1810,9 +1810,9 @@ the occurrence info is wrong -} -- The Ord is needed for the FiniteMap used in the lookForConstructor --- in SimplEnv. If you declared that lookForConstructor *ignores* --- constructor-applications with LitArg args, then you could get --- rid of this Ord. +-- in GHC.Core.Op.Simplify.Env. If you declared that lookForConstructor +-- *ignores* constructor-applications with LitArg args, then you could get rid +-- of this Ord. instance Outputable AltCon where ppr (DataAlt dc) = ppr dc diff --git a/compiler/GHC/Core/Coercion.hs b/compiler/GHC/Core/Coercion.hs index 3e59a6ef85..626b1bbc78 100644 --- a/compiler/GHC/Core/Coercion.hs +++ b/compiler/GHC/Core/Coercion.hs @@ -1499,7 +1499,7 @@ mkPiCo r v co | isTyVar v = mkHomoForAllCos [v] co -- We didn't call mkForAllCo here because if v does not appear -- in co, the argement coercion will be nominal. But here we -- want it to be r. It is only called in 'mkPiCos', which is - -- only used in SimplUtils, where we are sure for + -- only used in GHC.Core.Op.Simplify.Utils, where we are sure for -- now (Aug 2018) v won't occur in co. mkFunCo r (mkReflCo r (varType v)) co | otherwise = mkFunCo r (mkReflCo r (varType v)) co diff --git a/compiler/GHC/Core/FamInstEnv.hs b/compiler/GHC/Core/FamInstEnv.hs index 10dc63eb50..1343544612 100644 --- a/compiler/GHC/Core/FamInstEnv.hs +++ b/compiler/GHC/Core/FamInstEnv.hs @@ -314,7 +314,7 @@ Nevertheless it is still useful to have data families in the FamInstEnv: - For finding overlaps and conflicts - For finding the representation type...see FamInstEnv.topNormaliseType - and its call site in Simplify + and its call site in GHC.Core.Op.Simplify - In standalone deriving instance Eq (T [Int]) we need to find the representation type for T [Int] diff --git a/compiler/GHC/Core/Lint.hs b/compiler/GHC/Core/Lint.hs index 92e9a25a6f..5777940ba5 100644 --- a/compiler/GHC/Core/Lint.hs +++ b/compiler/GHC/Core/Lint.hs @@ -29,7 +29,7 @@ import GHC.Core import GHC.Core.FVs import GHC.Core.Utils import GHC.Core.Stats ( coreBindsStats ) -import CoreMonad +import GHC.Core.Op.Monad import Bag import Literal import GHC.Core.DataCon @@ -167,7 +167,7 @@ In the desugarer, it's very very convenient to be able to say (in effect) let x::a = True in <body> That is, use a type let. See Note [Type let] in CoreSyn. One place it is used is in mkWwArgs; see Note [Join points and beta-redexes] -in WwLib. (Maybe there are other "clients" of this feature; I'm not sure). +in GHC.Core.Op.WorkWrap.Lib. (Maybe there are other "clients" of this feature; I'm not sure). * Hence when linting <body> we need to remember that a=Int, else we might reject a correct program. So we carry a type substitution (in @@ -639,7 +639,7 @@ lintSingleBinding top_lvl_flag rec_flag (binder,rhs) -- We used to check that the dmdTypeDepth of a demand signature never -- exceeds idArity, but that is an unnecessary complication, see - -- Note [idArity varies independently of dmdTypeDepth] in DmdAnal + -- Note [idArity varies independently of dmdTypeDepth] in GHC.Core.Op.DmdAnal -- Check that the binder's arity is within the bounds imposed by -- the type and the strictness signature. See Note [exprArity invariant] @@ -1146,7 +1146,7 @@ lintCaseExpr scrut var alt_ty alts = -- Check that the scrutinee is not a floating-point type -- if there are any literal alternatives -- See GHC.Core Note [Case expression invariants] item (5) - -- See Note [Rules for floating-point comparisons] in PrelRules + -- See Note [Rules for floating-point comparisons] in GHC.Core.Op.ConstantFold ; let isLitPat (LitAlt _, _ , _) = True isLitPat _ = False ; checkL (not $ isFloatingTy scrut_ty && any isLitPat alts) @@ -2838,7 +2838,7 @@ lintAnnots pname pass guts = do let binds = flattenBinds $ mg_binds nguts binds' = flattenBinds $ mg_binds nguts' (diffs,_) = diffBinds True (mkRnEnv2 emptyInScopeSet) binds binds' - when (not (null diffs)) $ CoreMonad.putMsg $ vcat + when (not (null diffs)) $ GHC.Core.Op.Monad.putMsg $ vcat [ lint_banner "warning" pname , text "Core changes with annotations:" , withPprStyle (defaultDumpStyle dflags) $ nest 2 $ vcat diffs diff --git a/compiler/GHC/Core/Make.hs b/compiler/GHC/Core/Make.hs index 17fc146608..538344b946 100644 --- a/compiler/GHC/Core/Make.hs +++ b/compiler/GHC/Core/Make.hs @@ -193,7 +193,7 @@ mkWildEvBinder pred = mkWildValBinder pred -- that you expect to use only at a *binding* site. Do not use it at -- occurrence sites because it has a single, fixed unique, and it's very -- easy to get into difficulties with shadowing. That's why it is used so little. --- See Note [WildCard binders] in SimplEnv +-- See Note [WildCard binders] in GHC.Core.Op.Simplify.Env mkWildValBinder :: Type -> Id mkWildValBinder ty = mkLocalIdOrCoVar wildCardName ty -- "OrCoVar" since a coercion can be a scrutinee with -fdefer-type-errors @@ -576,7 +576,7 @@ data FloatBind = FloatLet CoreBind | FloatCase CoreExpr Id AltCon [Var] -- case e of y { C ys -> ... } - -- See Note [Floating single-alternative cases] in SetLevels + -- See Note [Floating single-alternative cases] in GHC.Core.Op.SetLevels instance Outputable FloatBind where ppr (FloatLet b) = text "LET" <+> ppr b @@ -880,7 +880,7 @@ the first. But the stable-unfolding for f looks like \x. case x of MkT a b -> g ($WMkT b a) where $WMkT is the wrapper for MkT that evaluates its arguments. We apply the same w/w split to this unfolding (see Note [Worker-wrapper -for INLINEABLE functions] in WorkWrap) so the template ends up like +for INLINEABLE functions] in GHC.Core.Op.WorkWrap) so the template ends up like \b. let a = absentError "blah" x = MkT a b in case x of MkT a b -> g ($WMkT b a) @@ -925,7 +925,7 @@ aBSENT_ERROR_ID where absent_ty = mkSpecForAllTys [alphaTyVar] (mkVisFunTy addrPrimTy alphaTy) -- Not runtime-rep polymorphic. aBSENT_ERROR_ID is only used for - -- lifted-type things; see Note [Absent errors] in WwLib + -- lifted-type things; see Note [Absent errors] in GHC.Core.Op.WorkWrap.Lib arity_info = vanillaIdInfo `setArityInfo` 1 -- NB: no bottoming strictness info, unlike other error-ids. -- See Note [aBSENT_ERROR_ID] diff --git a/compiler/GHC/Core/Op/CSE.hs b/compiler/GHC/Core/Op/CSE.hs new file mode 100644 index 0000000000..dc93dacf07 --- /dev/null +++ b/compiler/GHC/Core/Op/CSE.hs @@ -0,0 +1,799 @@ +{- +(c) The AQUA Project, Glasgow University, 1993-1998 + +\section{Common subexpression} +-} + +{-# LANGUAGE CPP #-} + +{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +module GHC.Core.Op.CSE (cseProgram, cseOneExpr) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Core.Subst +import Var ( Var ) +import VarEnv ( mkInScopeSet ) +import Id ( Id, idType, idHasRules + , idInlineActivation, setInlineActivation + , zapIdOccInfo, zapIdUsageInfo, idInlinePragma + , isJoinId, isJoinId_maybe ) +import GHC.Core.Utils ( mkAltExpr, eqExpr + , exprIsTickedString + , stripTicksE, stripTicksT, mkTicks ) +import GHC.Core.FVs ( exprFreeVars ) +import GHC.Core.Type ( tyConAppArgs ) +import GHC.Core +import Outputable +import BasicTypes +import GHC.Core.Map +import Util ( filterOut, equalLength, debugIsOn ) +import Data.List ( mapAccumL ) + +{- + Simple common sub-expression + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we see + x1 = C a b + x2 = C x1 b +we build up a reverse mapping: C a b -> x1 + C x1 b -> x2 +and apply that to the rest of the program. + +When we then see + y1 = C a b + y2 = C y1 b +we replace the C a b with x1. But then we *dont* want to +add x1 -> y1 to the mapping. Rather, we want the reverse, y1 -> x1 +so that a subsequent binding + y2 = C y1 b +will get transformed to C x1 b, and then to x2. + +So we carry an extra var->var substitution which we apply *before* looking up in the +reverse mapping. + + +Note [Shadowing] +~~~~~~~~~~~~~~~~ +We have to be careful about shadowing. +For example, consider + f = \x -> let y = x+x in + h = \x -> x+x + in ... + +Here we must *not* do CSE on the inner x+x! The simplifier used to guarantee no +shadowing, but it doesn't any more (it proved too hard), so we clone as we go. +We can simply add clones to the substitution already described. + + +Note [CSE for bindings] +~~~~~~~~~~~~~~~~~~~~~~~ +Let-bindings have two cases, implemented by addBinding. + +* SUBSTITUTE: applies when the RHS is a variable + + let x = y in ...(h x).... + + Here we want to extend the /substitution/ with x -> y, so that the + (h x) in the body might CSE with an enclosing (let v = h y in ...). + NB: the substitution maps InIds, so we extend the substitution with + a binding for the original InId 'x' + + How can we have a variable on the RHS? Doesn't the simplifier inline them? + + - First, the original RHS might have been (g z) which has CSE'd + with an enclosing (let y = g z in ...). This is super-important. + See #5996: + x1 = C a b + x2 = C x1 b + y1 = C a b + y2 = C y1 b + Here we CSE y1's rhs to 'x1', and then we must add (y1->x1) to + the substitution so that we can CSE the binding for y2. + + - Second, we use addBinding for case expression scrutinees too; + see Note [CSE for case expressions] + +* EXTEND THE REVERSE MAPPING: applies in all other cases + + let x = h y in ...(h y)... + + Here we want to extend the /reverse mapping (cs_map)/ so that + we CSE the (h y) call to x. + + Note that we use EXTEND even for a trivial expression, provided it + is not a variable or literal. In particular this /includes/ type + applications. This can be important (#13156); e.g. + case f @ Int of { r1 -> + case f @ Int of { r2 -> ... + Here we want to common-up the two uses of (f @ Int) so we can + remove one of the case expressions. + + See also Note [Corner case for case expressions] for another + reason not to use SUBSTITUTE for all trivial expressions. + +Notice that + - The SUBSTITUTE situation extends the substitution (cs_subst) + - The EXTEND situation extends the reverse mapping (cs_map) + +Notice also that in the SUBSTITUTE case we leave behind a binding + x = y +even though we /also/ carry a substitution x -> y. Can we just drop +the binding instead? Well, not at top level! See Note [Top level and +postInlineUnconditionally] in GHC.Core.Op.Simplify.Utils; and in any +case CSE applies only to the /bindings/ of the program, and we leave +it to the simplifier to propate effects to the RULES. Finally, it +doesn't seem worth the effort to discard the nested bindings because +the simplifier will do it next. + +Note [CSE for case expressions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + case scrut_expr of x { ...alts... } +This is very like a strict let-binding + let !x = scrut_expr in ... +So we use (addBinding x scrut_expr) to process scrut_expr and x, and as a +result all the stuff under Note [CSE for bindings] applies directly. + +For example: + +* Trivial scrutinee + f = \x -> case x of wild { + (a:as) -> case a of wild1 { + (p,q) -> ...(wild1:as)... + + Here, (wild1:as) is morally the same as (a:as) and hence equal to + wild. But that's not quite obvious. In the rest of the compiler we + want to keep it as (wild1:as), but for CSE purpose that's a bad + idea. + + By using addBinding we add the binding (wild1 -> a) to the substitution, + which does exactly the right thing. + + (Notice this is exactly backwards to what the simplifier does, which + is to try to replaces uses of 'a' with uses of 'wild1'.) + + This is the main reason that addBinding is called with a trivial rhs. + +* Non-trivial scrutinee + case (f x) of y { pat -> ...let z = f x in ... } + + By using addBinding we'll add (f x :-> y) to the cs_map, and + thereby CSE the inner (f x) to y. + +Note [CSE for INLINE and NOINLINE] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There are some subtle interactions of CSE with functions that the user +has marked as INLINE or NOINLINE. (Examples from Roman Leshchinskiy.) +Consider + + yes :: Int {-# NOINLINE yes #-} + yes = undefined + + no :: Int {-# NOINLINE no #-} + no = undefined + + foo :: Int -> Int -> Int {-# NOINLINE foo #-} + foo m n = n + + {-# RULES "foo/no" foo no = id #-} + + bar :: Int -> Int + bar = foo yes + +We do not expect the rule to fire. But if we do CSE, then we risk +getting yes=no, and the rule does fire. Actually, it won't because +NOINLINE means that 'yes' will never be inlined, not even if we have +yes=no. So that's fine (now; perhaps in the olden days, yes=no would +have substituted even if 'yes' was NOINLINE). + +But we do need to take care. Consider + + {-# NOINLINE bar #-} + bar = <rhs> -- Same rhs as foo + + foo = <rhs> + +If CSE produces + foo = bar +then foo will never be inlined to <rhs> (when it should be, if <rhs> +is small). The conclusion here is this: + + We should not add + <rhs> :-> bar + to the CSEnv if 'bar' has any constraints on when it can inline; + that is, if its 'activation' not always active. Otherwise we + might replace <rhs> by 'bar', and then later be unable to see that it + really was <rhs>. + +An except to the rule is when the INLINE pragma is not from the user, e.g. from +WorkWrap (see Note [Wrapper activation]). We can tell because noUserInlineSpec +is then true. + +Note that we do not (currently) do CSE on the unfolding stored inside +an Id, even if it is a 'stable' unfolding. That means that when an +unfolding happens, it is always faithful to what the stable unfolding +originally was. + +Note [CSE for stable unfoldings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + {-# Unf = Stable (\pq. build blah) #-} + foo = x + +Here 'foo' has a stable unfolding, but its (optimised) RHS is trivial. +(Turns out that this actually happens for the enumFromTo method of +the Integer instance of Enum in GHC.Enum.) Suppose moreover that foo's +stable unfolding originates from an INLINE or INLINEABLE pragma on foo. +Then we obviously do NOT want to extend the substitution with (foo->x), +because we promised to inline foo as what the user wrote. See similar Note +[Stable unfoldings and postInlineUnconditionally] in GHC.Core.Op.Simplify.Utils. + +Nor do we want to change the reverse mapping. Suppose we have + + {-# Unf = Stable (\pq. build blah) #-} + foo = <expr> + bar = <expr> + +There could conceivably be merit in rewriting the RHS of bar: + bar = foo +but now bar's inlining behaviour will change, and importing +modules might see that. So it seems dodgy and we don't do it. + +Stable unfoldings are also created during worker/wrapper when we decide +that a function's definition is so small that it should always inline. +In this case we still want to do CSE (#13340). Hence the use of +isAnyInlinePragma rather than isStableUnfolding. + +Note [Corner case for case expressions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Here is another reason that we do not use SUBSTITUTE for +all trivial expressions. Consider + case x |> co of (y::Array# Int) { ... } + +We do not want to extend the substitution with (y -> x |> co); since y +is of unlifted type, this would destroy the let/app invariant if (x |> +co) was not ok-for-speculation. + +But surely (x |> co) is ok-for-speculation, because it's a trivial +expression, and x's type is also unlifted, presumably. Well, maybe +not if you are using unsafe casts. I actually found a case where we +had + (x :: HValue) |> (UnsafeCo :: HValue ~ Array# Int) + +Note [CSE for join points?] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We must not be naive about join points in CSE: + join j = e in + if b then jump j else 1 + e +The expression (1 + jump j) is not good (see Note [Invariants on join points] in +GHC.Core). This seems to come up quite seldom, but it happens (first seen +compiling ppHtml in Haddock.Backends.Xhtml). + +We could try and be careful by tracking which join points are still valid at +each subexpression, but since join points aren't allocated or shared, there's +less to gain by trying to CSE them. (#13219) + +Note [Look inside join-point binders] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Another way how CSE for join points is tricky is + + let join foo x = (x, 42) + join bar x = (x, 42) + in … jump foo 1 … jump bar 2 … + +naively, CSE would turn this into + + let join foo x = (x, 42) + join bar = foo + in … jump foo 1 … jump bar 2 … + +but now bar is a join point that claims arity one, but its right-hand side +is not a lambda, breaking the join-point invariant (this was #15002). + +So `cse_bind` must zoom past the lambdas of a join point (using +`collectNBinders`) and resume searching for CSE opportunities only in +the body of the join point. + +Note [CSE for recursive bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f = \x ... f.... + g = \y ... g ... +where the "..." are identical. Could we CSE them? In full generality +with mutual recursion it's quite hard; but for self-recursive bindings +(which are very common) it's rather easy: + +* Maintain a separate cs_rec_map, that maps + (\f. (\x. ...f...) ) -> f + Note the \f in the domain of the mapping! + +* When we come across the binding for 'g', look up (\g. (\y. ...g...)) + Bingo we get a hit. So we can replace the 'g' binding with + g = f + +We can't use cs_map for this, because the key isn't an expression of +the program; it's a kind of synthetic key for recursive bindings. + + +************************************************************************ +* * +\section{Common subexpression} +* * +************************************************************************ +-} + +cseProgram :: CoreProgram -> CoreProgram +cseProgram binds = snd (mapAccumL (cseBind TopLevel) emptyCSEnv binds) + +cseBind :: TopLevelFlag -> CSEnv -> CoreBind -> (CSEnv, CoreBind) +cseBind toplevel env (NonRec b e) + = (env2, NonRec b2 e2) + where + (env1, b1) = addBinder env b + (env2, (b2, e2)) = cse_bind toplevel env1 (b,e) b1 + +cseBind toplevel env (Rec [(in_id, rhs)]) + | noCSE in_id + = (env1, Rec [(out_id, rhs')]) + + -- See Note [CSE for recursive bindings] + | Just previous <- lookupCSRecEnv env out_id rhs'' + , let previous' = mkTicks ticks previous + out_id' = delayInlining toplevel out_id + = -- We have a hit in the recursive-binding cache + (extendCSSubst env1 in_id previous', NonRec out_id' previous') + + | otherwise + = (extendCSRecEnv env1 out_id rhs'' id_expr', Rec [(zapped_id, rhs')]) + + where + (env1, [out_id]) = addRecBinders env [in_id] + rhs' = cseExpr env1 rhs + rhs'' = stripTicksE tickishFloatable rhs' + ticks = stripTicksT tickishFloatable rhs' + id_expr' = varToCoreExpr out_id + zapped_id = zapIdUsageInfo out_id + +cseBind toplevel env (Rec pairs) + = (env2, Rec pairs') + where + (env1, bndrs1) = addRecBinders env (map fst pairs) + (env2, pairs') = mapAccumL do_one env1 (zip pairs bndrs1) + + do_one env (pr, b1) = cse_bind toplevel env pr b1 + +-- | Given a binding of @in_id@ to @in_rhs@, and a fresh name to refer +-- to @in_id@ (@out_id@, created from addBinder or addRecBinders), +-- first try to CSE @in_rhs@, and then add the resulting (possibly CSE'd) +-- binding to the 'CSEnv', so that we attempt to CSE any expressions +-- which are equal to @out_rhs@. +cse_bind :: TopLevelFlag -> CSEnv -> (InId, InExpr) -> OutId -> (CSEnv, (OutId, OutExpr)) +cse_bind toplevel env (in_id, in_rhs) out_id + | isTopLevel toplevel, exprIsTickedString in_rhs + -- See Note [Take care with literal strings] + = (env', (out_id', in_rhs)) + + | Just arity <- isJoinId_maybe in_id + -- See Note [Look inside join-point binders] + = let (params, in_body) = collectNBinders arity in_rhs + (env', params') = addBinders env params + out_body = tryForCSE env' in_body + in (env, (out_id, mkLams params' out_body)) + + | otherwise + = (env', (out_id'', out_rhs)) + where + (env', out_id') = addBinding env in_id out_id out_rhs + (cse_done, out_rhs) = try_for_cse env in_rhs + out_id'' | cse_done = delayInlining toplevel out_id' + | otherwise = out_id' + +delayInlining :: TopLevelFlag -> Id -> Id +-- Add a NOINLINE[2] if the Id doesn't have an INLNE pragma already +-- See Note [Delay inlining after CSE] +delayInlining top_lvl bndr + | isTopLevel top_lvl + , isAlwaysActive (idInlineActivation bndr) + , idHasRules bndr -- Only if the Id has some RULES, + -- which might otherwise get lost + -- These rules are probably auto-generated specialisations, + -- since Ids with manual rules usually have manually-inserted + -- delayed inlining anyway + = bndr `setInlineActivation` activeAfterInitial + | otherwise + = bndr + +addBinding :: CSEnv -- Includes InId->OutId cloning + -> InVar -- Could be a let-bound type + -> OutId -> OutExpr -- Processed binding + -> (CSEnv, OutId) -- Final env, final bndr +-- Extend the CSE env with a mapping [rhs -> out-id] +-- unless we can instead just substitute [in-id -> rhs] +-- +-- It's possible for the binder to be a type variable (see +-- Note [Type-let] in GHC.Core), in which case we can just substitute. +addBinding env in_id out_id rhs' + | not (isId in_id) = (extendCSSubst env in_id rhs', out_id) + | noCSE in_id = (env, out_id) + | use_subst = (extendCSSubst env in_id rhs', out_id) + | otherwise = (extendCSEnv env rhs' id_expr', zapped_id) + where + id_expr' = varToCoreExpr out_id + zapped_id = zapIdUsageInfo out_id + -- Putting the Id into the cs_map makes it possible that + -- it'll become shared more than it is now, which would + -- invalidate (the usage part of) its demand info. + -- This caused #100218. + -- Easiest thing is to zap the usage info; subsequently + -- performing late demand-analysis will restore it. Don't zap + -- the strictness info; it's not necessary to do so, and losing + -- it is bad for performance if you don't do late demand + -- analysis + + -- Should we use SUBSTITUTE or EXTEND? + -- See Note [CSE for bindings] + use_subst = case rhs' of + Var {} -> True + _ -> False + +-- | Given a binder `let x = e`, this function +-- determines whether we should add `e -> x` to the cs_map +noCSE :: InId -> Bool +noCSE id = not (isAlwaysActive (idInlineActivation id)) && + not (noUserInlineSpec (inlinePragmaSpec (idInlinePragma id))) + -- See Note [CSE for INLINE and NOINLINE] + || isAnyInlinePragma (idInlinePragma id) + -- See Note [CSE for stable unfoldings] + || isJoinId id + -- See Note [CSE for join points?] + + +{- Note [Take care with literal strings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this example: + + x = "foo"# + y = "foo"# + ...x...y...x...y.... + +We would normally turn this into: + + x = "foo"# + y = x + ...x...x...x...x.... + +But this breaks an invariant of Core, namely that the RHS of a top-level binding +of type Addr# must be a string literal, not another variable. See Note +[Core top-level string literals] in GHC.Core. + +For this reason, we special case top-level bindings to literal strings and leave +the original RHS unmodified. This produces: + + x = "foo"# + y = "foo"# + ...x...x...x...x.... + +Now 'y' will be discarded as dead code, and we are done. + +The net effect is that for the y-binding we want to + - Use SUBSTITUTE, by extending the substitution with y :-> x + - but leave the original binding for y undisturbed + +This is done by cse_bind. I got it wrong the first time (#13367). + +Note [Delay inlining after CSE] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose (#15445) we have + f,g :: Num a => a -> a + f x = ...f (x-1)..... + g y = ...g (y-1) .... + +and we make some specialisations of 'g', either automatically, or via +a SPECIALISE pragma. Then CSE kicks in and notices that the RHSs of +'f' and 'g' are identical, so we get + f x = ...f (x-1)... + g = f + {-# RULES g @Int _ = $sg #-} + +Now there is terrible danger that, in an importing module, we'll inline +'g' before we have a chance to run its specialisation! + +Solution: during CSE, after a "hit" in the CSE cache + * when adding a binding + g = f + * for a top-level function g + * and g has specialisation RULES +add a NOINLINE[2] activation to it, to ensure it's not inlined +right away. + +Notes: +* Why top level only? Because for nested bindings we are already past + phase 2 and will never return there. + +* Why "only if g has RULES"? Because there is no point in + doing this if there are no RULES; and other things being + equal it delays optimisation to delay inlining (#17409) + + +---- Historical note --- + +This patch is simpler and more direct than an earlier +version: + + commit 2110738b280543698407924a16ac92b6d804dc36 + Author: Simon Peyton Jones <simonpj@microsoft.com> + Date: Mon Jul 30 13:43:56 2018 +0100 + + Don't inline functions with RULES too early + +We had to revert this patch because it made GHC itself slower. + +Why? It delayed inlining of /all/ functions with RULES, and that was +very bad in TcFlatten.flatten_ty_con_app + +* It delayed inlining of liftM +* That delayed the unravelling of the recursion in some dictionary + bindings. +* That delayed some eta expansion, leaving + flatten_ty_con_app = \x y. let <stuff> in \z. blah +* That allowed the float-out pass to put sguff between + the \y and \z. +* And that permanently stopped eta expansion of the function, + even once <stuff> was simplified. + +-} + +tryForCSE :: CSEnv -> InExpr -> OutExpr +tryForCSE env expr = snd (try_for_cse env expr) + +try_for_cse :: CSEnv -> InExpr -> (Bool, OutExpr) +-- (False, e') => We did not CSE the entire expression, +-- but we might have CSE'd some sub-expressions, +-- yielding e' +-- +-- (True, te') => We CSE'd the entire expression, +-- yielding the trivial expression te' +try_for_cse env expr + | Just e <- lookupCSEnv env expr'' = (True, mkTicks ticks e) + | otherwise = (False, expr') + -- The varToCoreExpr is needed if we have + -- case e of xco { ...case e of yco { ... } ... } + -- Then CSE will substitute yco -> xco; + -- but these are /coercion/ variables + where + expr' = cseExpr env expr + expr'' = stripTicksE tickishFloatable expr' + ticks = stripTicksT tickishFloatable expr' + -- We don't want to lose the source notes when a common sub + -- expression gets eliminated. Hence we push all (!) of them on + -- top of the replaced sub-expression. This is probably not too + -- useful in practice, but upholds our semantics. + +-- | Runs CSE on a single expression. +-- +-- This entry point is not used in the compiler itself, but is provided +-- as a convenient entry point for users of the GHC API. +cseOneExpr :: InExpr -> OutExpr +cseOneExpr e = cseExpr env e + where env = emptyCSEnv {cs_subst = mkEmptySubst (mkInScopeSet (exprFreeVars e)) } + +cseExpr :: CSEnv -> InExpr -> OutExpr +cseExpr env (Type t) = Type (substTy (csEnvSubst env) t) +cseExpr env (Coercion c) = Coercion (substCo (csEnvSubst env) c) +cseExpr _ (Lit lit) = Lit lit +cseExpr env (Var v) = lookupSubst env v +cseExpr env (App f a) = App (cseExpr env f) (tryForCSE env a) +cseExpr env (Tick t e) = Tick t (cseExpr env e) +cseExpr env (Cast e co) = Cast (tryForCSE env e) (substCo (csEnvSubst env) co) +cseExpr env (Lam b e) = let (env', b') = addBinder env b + in Lam b' (cseExpr env' e) +cseExpr env (Let bind e) = let (env', bind') = cseBind NotTopLevel env bind + in Let bind' (cseExpr env' e) +cseExpr env (Case e bndr ty alts) = cseCase env e bndr ty alts + +cseCase :: CSEnv -> InExpr -> InId -> InType -> [InAlt] -> OutExpr +cseCase env scrut bndr ty alts + = Case scrut1 bndr3 ty' $ + combineAlts alt_env (map cse_alt alts) + where + ty' = substTy (csEnvSubst env) ty + scrut1 = tryForCSE env scrut + + bndr1 = zapIdOccInfo bndr + -- Zapping the OccInfo is needed because the extendCSEnv + -- in cse_alt may mean that a dead case binder + -- becomes alive, and Lint rejects that + (env1, bndr2) = addBinder env bndr1 + (alt_env, bndr3) = addBinding env1 bndr bndr2 scrut1 + -- addBinding: see Note [CSE for case expressions] + + con_target :: OutExpr + con_target = lookupSubst alt_env bndr + + arg_tys :: [OutType] + arg_tys = tyConAppArgs (idType bndr3) + + -- See Note [CSE for case alternatives] + cse_alt (DataAlt con, args, rhs) + = (DataAlt con, args', tryForCSE new_env rhs) + where + (env', args') = addBinders alt_env args + new_env = extendCSEnv env' con_expr con_target + con_expr = mkAltExpr (DataAlt con) args' arg_tys + + cse_alt (con, args, rhs) + = (con, args', tryForCSE env' rhs) + where + (env', args') = addBinders alt_env args + +combineAlts :: CSEnv -> [OutAlt] -> [OutAlt] +-- See Note [Combine case alternatives] +combineAlts env alts + | (Just alt1, rest_alts) <- find_bndr_free_alt alts + , (_,bndrs1,rhs1) <- alt1 + , let filtered_alts = filterOut (identical_alt rhs1) rest_alts + , not (equalLength rest_alts filtered_alts) + = ASSERT2( null bndrs1, ppr alts ) + (DEFAULT, [], rhs1) : filtered_alts + + | otherwise + = alts + where + in_scope = substInScope (csEnvSubst env) + + find_bndr_free_alt :: [CoreAlt] -> (Maybe CoreAlt, [CoreAlt]) + -- The (Just alt) is a binder-free alt + -- See Note [Combine case alts: awkward corner] + find_bndr_free_alt [] + = (Nothing, []) + find_bndr_free_alt (alt@(_,bndrs,_) : alts) + | null bndrs = (Just alt, alts) + | otherwise = case find_bndr_free_alt alts of + (mb_bf, alts) -> (mb_bf, alt:alts) + + identical_alt rhs1 (_,_,rhs) = eqExpr in_scope rhs1 rhs + -- Even if this alt has binders, they will have been cloned + -- If any of these binders are mentioned in 'rhs', then + -- 'rhs' won't compare equal to 'rhs1' (which is from an + -- alt with no binders). + +{- Note [CSE for case alternatives] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider case e of x + K1 y -> ....(K1 y)... + K2 -> ....K2.... + +We definitely want to CSE that (K1 y) into just x. + +But what about the lone K2? At first you would think "no" because +turning K2 into 'x' increases the number of live variables. But + +* Turning K2 into x increases the chance of combining identical alts. + Example case xs of + (_:_) -> f xs + [] -> f [] + See #17901 and simplCore/should_compile/T17901 for more examples + of this kind. + +* The next run of the simplifier will turn 'x' back into K2, so we won't + permanently bloat the free-var count. + + +Note [Combine case alternatives] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +combineAlts is just a more heavyweight version of the use of +combineIdenticalAlts in GHC.Core.Op.Simplify.Utils.prepareAlts. The basic idea is +to transform + + DEFAULT -> e1 + K x -> e1 + W y z -> e2 +===> + DEFAULT -> e1 + W y z -> e2 + +In the simplifier we use cheapEqExpr, because it is called a lot. +But here in CSE we use the full eqExpr. After all, two alternatives usually +differ near the root, so it probably isn't expensive to compare the full +alternative. It seems like the same kind of thing that CSE is supposed +to be doing, which is why I put it here. + +I actually saw some examples in the wild, where some inlining made e1 too +big for cheapEqExpr to catch it. + +Note [Combine case alts: awkward corner] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We would really like to check isDeadBinder on the binders in the +alternative. But alas, the simplifer zaps occ-info on binders in case +alternatives; see Note [Case alternative occ info] in GHC.Core.Op.Simplify. + +* One alternative (perhaps a good one) would be to do OccAnal + just before CSE. Then perhaps we could get rid of combineIdenticalAlts + in the Simplifier, which might save work. + +* Another would be for CSE to return free vars as it goes. + +* But the current solution is to find a nullary alternative (including + the DEFAULT alt, if any). This will not catch + case x of + A y -> blah + B z p -> blah + where no alternative is nullary or DEFAULT. But the current + solution is at least cheap. + + +************************************************************************ +* * +\section{The CSE envt} +* * +************************************************************************ +-} + +data CSEnv + = CS { cs_subst :: Subst -- Maps InBndrs to OutExprs + -- The substitution variables to + -- /trivial/ OutExprs, not arbitrary expressions + + , cs_map :: CoreMap OutExpr -- The reverse mapping + -- Maps a OutExpr to a /trivial/ OutExpr + -- The key of cs_map is stripped of all Ticks + + , cs_rec_map :: CoreMap OutExpr + -- See Note [CSE for recursive bindings] + } + +emptyCSEnv :: CSEnv +emptyCSEnv = CS { cs_map = emptyCoreMap, cs_rec_map = emptyCoreMap + , cs_subst = emptySubst } + +lookupCSEnv :: CSEnv -> OutExpr -> Maybe OutExpr +lookupCSEnv (CS { cs_map = csmap }) expr + = lookupCoreMap csmap expr + +extendCSEnv :: CSEnv -> OutExpr -> OutExpr -> CSEnv +extendCSEnv cse expr triv_expr + = cse { cs_map = extendCoreMap (cs_map cse) sexpr triv_expr } + where + sexpr = stripTicksE tickishFloatable expr + +extendCSRecEnv :: CSEnv -> OutId -> OutExpr -> OutExpr -> CSEnv +-- See Note [CSE for recursive bindings] +extendCSRecEnv cse bndr expr triv_expr + = cse { cs_rec_map = extendCoreMap (cs_rec_map cse) (Lam bndr expr) triv_expr } + +lookupCSRecEnv :: CSEnv -> OutId -> OutExpr -> Maybe OutExpr +-- See Note [CSE for recursive bindings] +lookupCSRecEnv (CS { cs_rec_map = csmap }) bndr expr + = lookupCoreMap csmap (Lam bndr expr) + +csEnvSubst :: CSEnv -> Subst +csEnvSubst = cs_subst + +lookupSubst :: CSEnv -> Id -> OutExpr +lookupSubst (CS { cs_subst = sub}) x = lookupIdSubst (text "CSE.lookupSubst") sub x + +extendCSSubst :: CSEnv -> Id -> CoreExpr -> CSEnv +extendCSSubst cse x rhs = cse { cs_subst = extendSubst (cs_subst cse) x rhs } + +-- | Add clones to the substitution to deal with shadowing. See +-- Note [Shadowing] for more details. You should call this whenever +-- you go under a binder. +addBinder :: CSEnv -> Var -> (CSEnv, Var) +addBinder cse v = (cse { cs_subst = sub' }, v') + where + (sub', v') = substBndr (cs_subst cse) v + +addBinders :: CSEnv -> [Var] -> (CSEnv, [Var]) +addBinders cse vs = (cse { cs_subst = sub' }, vs') + where + (sub', vs') = substBndrs (cs_subst cse) vs + +addRecBinders :: CSEnv -> [Id] -> (CSEnv, [Id]) +addRecBinders cse vs = (cse { cs_subst = sub' }, vs') + where + (sub', vs') = substRecBndrs (cs_subst cse) vs diff --git a/compiler/GHC/Core/Op/CallArity.hs b/compiler/GHC/Core/Op/CallArity.hs new file mode 100644 index 0000000000..aaf3372071 --- /dev/null +++ b/compiler/GHC/Core/Op/CallArity.hs @@ -0,0 +1,763 @@ +-- +-- Copyright (c) 2014 Joachim Breitner +-- + +module GHC.Core.Op.CallArity + ( callArityAnalProgram + , callArityRHS -- for testing + ) where + +import GhcPrelude + +import VarSet +import VarEnv +import GHC.Driver.Session ( DynFlags ) + +import BasicTypes +import GHC.Core +import Id +import GHC.Core.Arity ( typeArity ) +import GHC.Core.Utils ( exprIsCheap, exprIsTrivial ) +import UnVarGraph +import Demand +import Util + +import Control.Arrow ( first, second ) + + +{- +%************************************************************************ +%* * + Call Arity Analysis +%* * +%************************************************************************ + +Note [Call Arity: The goal] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The goal of this analysis is to find out if we can eta-expand a local function, +based on how it is being called. The motivating example is this code, +which comes up when we implement foldl using foldr, and do list fusion: + + let go = \x -> let d = case ... of + False -> go (x+1) + True -> id + in \z -> d (x + z) + in go 1 0 + +If we do not eta-expand `go` to have arity 2, we are going to allocate a lot of +partial function applications, which would be bad. + +The function `go` has a type of arity two, but only one lambda is manifest. +Furthermore, an analysis that only looks at the RHS of go cannot be sufficient +to eta-expand go: If `go` is ever called with one argument (and the result used +multiple times), we would be doing the work in `...` multiple times. + +So `callArityAnalProgram` looks at the whole let expression to figure out if +all calls are nice, i.e. have a high enough arity. It then stores the result in +the `calledArity` field of the `IdInfo` of `go`, which the next simplifier +phase will eta-expand. + +The specification of the `calledArity` field is: + + No work will be lost if you eta-expand me to the arity in `calledArity`. + +What we want to know for a variable +----------------------------------- + +For every let-bound variable we'd like to know: + 1. A lower bound on the arity of all calls to the variable, and + 2. whether the variable is being called at most once or possible multiple + times. + +It is always ok to lower the arity, or pretend that there are multiple calls. +In particular, "Minimum arity 0 and possible called multiple times" is always +correct. + + +What we want to know from an expression +--------------------------------------- + +In order to obtain that information for variables, we analyze expression and +obtain bits of information: + + I. The arity analysis: + For every variable, whether it is absent, or called, + and if called, which what arity. + + II. The Co-Called analysis: + For every two variables, whether there is a possibility that both are being + called. + We obtain as a special case: For every variables, whether there is a + possibility that it is being called twice. + +For efficiency reasons, we gather this information only for a set of +*interesting variables*, to avoid spending time on, e.g., variables from pattern matches. + +The two analysis are not completely independent, as a higher arity can improve +the information about what variables are being called once or multiple times. + +Note [Analysis I: The arity analysis] +------------------------------------ + +The arity analysis is quite straight forward: The information about an +expression is an + VarEnv Arity +where absent variables are bound to Nothing and otherwise to a lower bound to +their arity. + +When we analyze an expression, we analyze it with a given context arity. +Lambdas decrease and applications increase the incoming arity. Analysizing a +variable will put that arity in the environment. In lets or cases all the +results from the various subexpressions are lubed, which takes the point-wise +minimum (considering Nothing an infinity). + + +Note [Analysis II: The Co-Called analysis] +------------------------------------------ + +The second part is more sophisticated. For reasons explained below, it is not +sufficient to simply know how often an expression evaluates a variable. Instead +we need to know which variables are possibly called together. + +The data structure here is an undirected graph of variables, which is provided +by the abstract + UnVarGraph + +It is safe to return a larger graph, i.e. one with more edges. The worst case +(i.e. the least useful and always correct result) is the complete graph on all +free variables, which means that anything can be called together with anything +(including itself). + +Notation for the following: +C(e) is the co-called result for e. +G₁∪G₂ is the union of two graphs +fv is the set of free variables (conveniently the domain of the arity analysis result) +S₁×S₂ is the complete bipartite graph { {a,b} | a ∈ S₁, b ∈ S₂ } +S² is the complete graph on the set of variables S, S² = S×S +C'(e) is a variant for bound expression: + If e is called at most once, or it is and stays a thunk (after the analysis), + it is simply C(e). Otherwise, the expression can be called multiple times + and we return (fv e)² + +The interesting cases of the analysis: + * Var v: + No other variables are being called. + Return {} (the empty graph) + * Lambda v e, under arity 0: + This means that e can be evaluated many times and we cannot get + any useful co-call information. + Return (fv e)² + * Case alternatives alt₁,alt₂,...: + Only one can be execuded, so + Return (alt₁ ∪ alt₂ ∪...) + * App e₁ e₂ (and analogously Case scrut alts), with non-trivial e₂: + We get the results from both sides, with the argument evaluated at most once. + Additionally, anything called by e₁ can possibly be called with anything + from e₂. + Return: C(e₁) ∪ C(e₂) ∪ (fv e₁) × (fv e₂) + * App e₁ x: + As this is already in A-normal form, CorePrep will not separately lambda + bind (and hence share) x. So we conservatively assume multiple calls to x here + Return: C(e₁) ∪ (fv e₁) × {x} ∪ {(x,x)} + * Let v = rhs in body: + In addition to the results from the subexpressions, add all co-calls from + everything that the body calls together with v to everything that is called + by v. + Return: C'(rhs) ∪ C(body) ∪ (fv rhs) × {v'| {v,v'} ∈ C(body)} + * Letrec v₁ = rhs₁ ... vₙ = rhsₙ in body + Tricky. + We assume that it is really mutually recursive, i.e. that every variable + calls one of the others, and that this is strongly connected (otherwise we + return an over-approximation, so that's ok), see note [Recursion and fixpointing]. + + Let V = {v₁,...vₙ}. + Assume that the vs have been analysed with an incoming demand and + cardinality consistent with the final result (this is the fixed-pointing). + Again we can use the results from all subexpressions. + In addition, for every variable vᵢ, we need to find out what it is called + with (call this set Sᵢ). There are two cases: + * If vᵢ is a function, we need to go through all right-hand-sides and bodies, + and collect every variable that is called together with any variable from V: + Sᵢ = {v' | j ∈ {1,...,n}, {v',vⱼ} ∈ C'(rhs₁) ∪ ... ∪ C'(rhsₙ) ∪ C(body) } + * If vᵢ is a thunk, then its rhs is evaluated only once, so we need to + exclude it from this set: + Sᵢ = {v' | j ∈ {1,...,n}, j≠i, {v',vⱼ} ∈ C'(rhs₁) ∪ ... ∪ C'(rhsₙ) ∪ C(body) } + Finally, combine all this: + Return: C(body) ∪ + C'(rhs₁) ∪ ... ∪ C'(rhsₙ) ∪ + (fv rhs₁) × S₁) ∪ ... ∪ (fv rhsₙ) × Sₙ) + +Using the result: Eta-Expansion +------------------------------- + +We use the result of these two analyses to decide whether we can eta-expand the +rhs of a let-bound variable. + +If the variable is already a function (exprIsCheap), and all calls to the +variables have a higher arity than the current manifest arity (i.e. the number +of lambdas), expand. + +If the variable is a thunk we must be careful: Eta-Expansion will prevent +sharing of work, so this is only safe if there is at most one call to the +function. Therefore, we check whether {v,v} ∈ G. + + Example: + + let n = case .. of .. -- A thunk! + in n 0 + n 1 + + vs. + + let n = case .. of .. + in case .. of T -> n 0 + F -> n 1 + + We are only allowed to eta-expand `n` if it is going to be called at most + once in the body of the outer let. So we need to know, for each variable + individually, that it is going to be called at most once. + + +Why the co-call graph? +---------------------- + +Why is it not sufficient to simply remember which variables are called once and +which are called multiple times? It would be in the previous example, but consider + + let n = case .. of .. + in case .. of + True -> let go = \y -> case .. of + True -> go (y + n 1) + False > n + in go 1 + False -> n + +vs. + + let n = case .. of .. + in case .. of + True -> let go = \y -> case .. of + True -> go (y+1) + False > n + in go 1 + False -> n + +In both cases, the body and the rhs of the inner let call n at most once. +But only in the second case that holds for the whole expression! The +crucial difference is that in the first case, the rhs of `go` can call +*both* `go` and `n`, and hence can call `n` multiple times as it recurses, +while in the second case find out that `go` and `n` are not called together. + + +Why co-call information for functions? +-------------------------------------- + +Although for eta-expansion we need the information only for thunks, we still +need to know whether functions are being called once or multiple times, and +together with what other functions. + + Example: + + let n = case .. of .. + f x = n (x+1) + in f 1 + f 2 + + vs. + + let n = case .. of .. + f x = n (x+1) + in case .. of T -> f 0 + F -> f 1 + + Here, the body of f calls n exactly once, but f itself is being called + multiple times, so eta-expansion is not allowed. + + +Note [Analysis type signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The work-hourse of the analysis is the function `callArityAnal`, with the +following type: + + type CallArityRes = (UnVarGraph, VarEnv Arity) + callArityAnal :: + Arity -> -- The arity this expression is called with + VarSet -> -- The set of interesting variables + CoreExpr -> -- The expression to analyse + (CallArityRes, CoreExpr) + +and the following specification: + + ((coCalls, callArityEnv), expr') = callArityEnv arity interestingIds expr + + <=> + + Assume the expression `expr` is being passed `arity` arguments. Then it holds that + * The domain of `callArityEnv` is a subset of `interestingIds`. + * Any variable from `interestingIds` that is not mentioned in the `callArityEnv` + is absent, i.e. not called at all. + * Every call from `expr` to a variable bound to n in `callArityEnv` has at + least n value arguments. + * For two interesting variables `v1` and `v2`, they are not adjacent in `coCalls`, + then in no execution of `expr` both are being called. + Furthermore, expr' is expr with the callArity field of the `IdInfo` updated. + + +Note [Which variables are interesting] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The analysis would quickly become prohibitive expensive if we would analyse all +variables; for most variables we simply do not care about how often they are +called, i.e. variables bound in a pattern match. So interesting are variables that are + * top-level or let bound + * and possibly functions (typeArity > 0) + +Note [Taking boring variables into account] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If we decide that the variable bound in `let x = e1 in e2` is not interesting, +the analysis of `e2` will not report anything about `x`. To ensure that +`callArityBind` does still do the right thing we have to take that into account +every time we would be lookup up `x` in the analysis result of `e2`. + * Instead of calling lookupCallArityRes, we return (0, True), indicating + that this variable might be called many times with no arguments. + * Instead of checking `calledWith x`, we assume that everything can be called + with it. + * In the recursive case, when calclulating the `cross_calls`, if there is + any boring variable in the recursive group, we ignore all co-call-results + and directly go to a very conservative assumption. + +The last point has the nice side effect that the relatively expensive +integration of co-call results in a recursive groups is often skipped. This +helped to avoid the compile time blowup in some real-world code with large +recursive groups (#10293). + +Note [Recursion and fixpointing] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For a mutually recursive let, we begin by + 1. analysing the body, using the same incoming arity as for the whole expression. + 2. Then we iterate, memoizing for each of the bound variables the last + analysis call, i.e. incoming arity, whether it is called once, and the CallArityRes. + 3. We combine the analysis result from the body and the memoized results for + the arguments (if already present). + 4. For each variable, we find out the incoming arity and whether it is called + once, based on the current analysis result. If this differs from the + memoized results, we re-analyse the rhs and update the memoized table. + 5. If nothing had to be reanalyzed, we are done. + Otherwise, repeat from step 3. + + +Note [Thunks in recursive groups] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We never eta-expand a thunk in a recursive group, on the grounds that if it is +part of a recursive group, then it will be called multiple times. + +This is not necessarily true, e.g. it would be safe to eta-expand t2 (but not +t1) in the following code: + + let go x = t1 + t1 = if ... then t2 else ... + t2 = if ... then go 1 else ... + in go 0 + +Detecting this would require finding out what variables are only ever called +from thunks. While this is certainly possible, we yet have to see this to be +relevant in the wild. + + +Note [Analysing top-level binds] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We can eta-expand top-level-binds if they are not exported, as we see all calls +to them. The plan is as follows: Treat the top-level binds as nested lets around +a body representing “all external calls”, which returns a pessimistic +CallArityRes (the co-call graph is the complete graph, all arityies 0). + +Note [Trimming arity] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the Call Arity papers, we are working on an untyped lambda calculus with no +other id annotations, where eta-expansion is always possible. But this is not +the case for Core! + 1. We need to ensure the invariant + callArity e <= typeArity (exprType e) + for the same reasons that exprArity needs this invariant (see Note + [exprArity invariant] in GHC.Core.Arity). + + If we are not doing that, a too-high arity annotation will be stored with + the id, confusing the simplifier later on. + + 2. Eta-expanding a right hand side might invalidate existing annotations. In + particular, if an id has a strictness annotation of <...><...>b, then + passing two arguments to it will definitely bottom out, so the simplifier + will throw away additional parameters. This conflicts with Call Arity! So + we ensure that we never eta-expand such a value beyond the number of + arguments mentioned in the strictness signature. + See #10176 for a real-world-example. + +Note [What is a thunk] +~~~~~~~~~~~~~~~~~~~~~~ + +Originally, everything that is not in WHNF (`exprIsWHNF`) is considered a +thunk, not eta-expanded, to avoid losing any sharing. This is also how the +published papers on Call Arity describe it. + +In practice, there are thunks that do a just little work, such as +pattern-matching on a variable, and the benefits of eta-expansion likely +outweigh the cost of doing that repeatedly. Therefore, this implementation of +Call Arity considers everything that is not cheap (`exprIsCheap`) as a thunk. + +Note [Call Arity and Join Points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Call Arity analysis does not care about join points, and treats them just +like normal functions. This is ok. + +The analysis *could* make use of the fact that join points are always evaluated +in the same context as the join-binding they are defined in and are always +one-shot, and handle join points separately, as suggested in +https://gitlab.haskell.org/ghc/ghc/issues/13479#note_134870. +This *might* be more efficient (for example, join points would not have to be +considered interesting variables), but it would also add redundant code. So for +now we do not do that. + +The simplifier never eta-expands join points (it instead pushes extra arguments from +an eta-expanded context into the join point’s RHS), so the call arity +annotation on join points is not actually used. As it would be equally valid +(though less efficient) to eta-expand join points, this is the simplifier's +choice, and hence Call Arity sets the call arity for join points as well. +-} + +-- Main entry point + +callArityAnalProgram :: DynFlags -> CoreProgram -> CoreProgram +callArityAnalProgram _dflags binds = binds' + where + (_, binds') = callArityTopLvl [] emptyVarSet binds + +-- See Note [Analysing top-level-binds] +callArityTopLvl :: [Var] -> VarSet -> [CoreBind] -> (CallArityRes, [CoreBind]) +callArityTopLvl exported _ [] + = ( calledMultipleTimes $ (emptyUnVarGraph, mkVarEnv $ [(v, 0) | v <- exported]) + , [] ) +callArityTopLvl exported int1 (b:bs) + = (ae2, b':bs') + where + int2 = bindersOf b + exported' = filter isExportedId int2 ++ exported + int' = int1 `addInterestingBinds` b + (ae1, bs') = callArityTopLvl exported' int' bs + (ae2, b') = callArityBind (boringBinds b) ae1 int1 b + + +callArityRHS :: CoreExpr -> CoreExpr +callArityRHS = snd . callArityAnal 0 emptyVarSet + +-- The main analysis function. See Note [Analysis type signature] +callArityAnal :: + Arity -> -- The arity this expression is called with + VarSet -> -- The set of interesting variables + CoreExpr -> -- The expression to analyse + (CallArityRes, CoreExpr) + -- How this expression uses its interesting variables + -- and the expression with IdInfo updated + +-- The trivial base cases +callArityAnal _ _ e@(Lit _) + = (emptyArityRes, e) +callArityAnal _ _ e@(Type _) + = (emptyArityRes, e) +callArityAnal _ _ e@(Coercion _) + = (emptyArityRes, e) +-- The transparent cases +callArityAnal arity int (Tick t e) + = second (Tick t) $ callArityAnal arity int e +callArityAnal arity int (Cast e co) + = second (\e -> Cast e co) $ callArityAnal arity int e + +-- The interesting case: Variables, Lambdas, Lets, Applications, Cases +callArityAnal arity int e@(Var v) + | v `elemVarSet` int + = (unitArityRes v arity, e) + | otherwise + = (emptyArityRes, e) + +-- Non-value lambdas are ignored +callArityAnal arity int (Lam v e) | not (isId v) + = second (Lam v) $ callArityAnal arity (int `delVarSet` v) e + +-- We have a lambda that may be called multiple times, so its free variables +-- can all be co-called. +callArityAnal 0 int (Lam v e) + = (ae', Lam v e') + where + (ae, e') = callArityAnal 0 (int `delVarSet` v) e + ae' = calledMultipleTimes ae +-- We have a lambda that we are calling. decrease arity. +callArityAnal arity int (Lam v e) + = (ae, Lam v e') + where + (ae, e') = callArityAnal (arity - 1) (int `delVarSet` v) e + +-- Application. Increase arity for the called expression, nothing to know about +-- the second +callArityAnal arity int (App e (Type t)) + = second (\e -> App e (Type t)) $ callArityAnal arity int e +callArityAnal arity int (App e1 e2) + = (final_ae, App e1' e2') + where + (ae1, e1') = callArityAnal (arity + 1) int e1 + (ae2, e2') = callArityAnal 0 int e2 + -- If the argument is trivial (e.g. a variable), then it will _not_ be + -- let-bound in the Core to STG transformation (CorePrep actually), + -- so no sharing will happen here, and we have to assume many calls. + ae2' | exprIsTrivial e2 = calledMultipleTimes ae2 + | otherwise = ae2 + final_ae = ae1 `both` ae2' + +-- Case expression. +callArityAnal arity int (Case scrut bndr ty alts) + = -- pprTrace "callArityAnal:Case" + -- (vcat [ppr scrut, ppr final_ae]) + (final_ae, Case scrut' bndr ty alts') + where + (alt_aes, alts') = unzip $ map go alts + go (dc, bndrs, e) = let (ae, e') = callArityAnal arity int e + in (ae, (dc, bndrs, e')) + alt_ae = lubRess alt_aes + (scrut_ae, scrut') = callArityAnal 0 int scrut + final_ae = scrut_ae `both` alt_ae + +-- For lets, use callArityBind +callArityAnal arity int (Let bind e) + = -- pprTrace "callArityAnal:Let" + -- (vcat [ppr v, ppr arity, ppr n, ppr final_ae ]) + (final_ae, Let bind' e') + where + int_body = int `addInterestingBinds` bind + (ae_body, e') = callArityAnal arity int_body e + (final_ae, bind') = callArityBind (boringBinds bind) ae_body int bind + +-- Which bindings should we look at? +-- See Note [Which variables are interesting] +isInteresting :: Var -> Bool +isInteresting v = not $ null (typeArity (idType v)) + +interestingBinds :: CoreBind -> [Var] +interestingBinds = filter isInteresting . bindersOf + +boringBinds :: CoreBind -> VarSet +boringBinds = mkVarSet . filter (not . isInteresting) . bindersOf + +addInterestingBinds :: VarSet -> CoreBind -> VarSet +addInterestingBinds int bind + = int `delVarSetList` bindersOf bind -- Possible shadowing + `extendVarSetList` interestingBinds bind + +-- Used for both local and top-level binds +-- Second argument is the demand from the body +callArityBind :: VarSet -> CallArityRes -> VarSet -> CoreBind -> (CallArityRes, CoreBind) +-- Non-recursive let +callArityBind boring_vars ae_body int (NonRec v rhs) + | otherwise + = -- pprTrace "callArityBind:NonRec" + -- (vcat [ppr v, ppr ae_body, ppr int, ppr ae_rhs, ppr safe_arity]) + (final_ae, NonRec v' rhs') + where + is_thunk = not (exprIsCheap rhs) -- see note [What is a thunk] + -- If v is boring, we will not find it in ae_body, but always assume (0, False) + boring = v `elemVarSet` boring_vars + + (arity, called_once) + | boring = (0, False) -- See Note [Taking boring variables into account] + | otherwise = lookupCallArityRes ae_body v + safe_arity | called_once = arity + | is_thunk = 0 -- A thunk! Do not eta-expand + | otherwise = arity + + -- See Note [Trimming arity] + trimmed_arity = trimArity v safe_arity + + (ae_rhs, rhs') = callArityAnal trimmed_arity int rhs + + + ae_rhs'| called_once = ae_rhs + | safe_arity == 0 = ae_rhs -- If it is not a function, its body is evaluated only once + | otherwise = calledMultipleTimes ae_rhs + + called_by_v = domRes ae_rhs' + called_with_v + | boring = domRes ae_body + | otherwise = calledWith ae_body v `delUnVarSet` v + final_ae = addCrossCoCalls called_by_v called_with_v $ ae_rhs' `lubRes` resDel v ae_body + + v' = v `setIdCallArity` trimmed_arity + + +-- Recursive let. See Note [Recursion and fixpointing] +callArityBind boring_vars ae_body int b@(Rec binds) + = -- (if length binds > 300 then + -- pprTrace "callArityBind:Rec" + -- (vcat [ppr (Rec binds'), ppr ae_body, ppr int, ppr ae_rhs]) else id) $ + (final_ae, Rec binds') + where + -- See Note [Taking boring variables into account] + any_boring = any (`elemVarSet` boring_vars) [ i | (i, _) <- binds] + + int_body = int `addInterestingBinds` b + (ae_rhs, binds') = fix initial_binds + final_ae = bindersOf b `resDelList` ae_rhs + + initial_binds = [(i,Nothing,e) | (i,e) <- binds] + + fix :: [(Id, Maybe (Bool, Arity, CallArityRes), CoreExpr)] -> (CallArityRes, [(Id, CoreExpr)]) + fix ann_binds + | -- pprTrace "callArityBind:fix" (vcat [ppr ann_binds, ppr any_change, ppr ae]) $ + any_change + = fix ann_binds' + | otherwise + = (ae, map (\(i, _, e) -> (i, e)) ann_binds') + where + aes_old = [ (i,ae) | (i, Just (_,_,ae), _) <- ann_binds ] + ae = callArityRecEnv any_boring aes_old ae_body + + rerun (i, mbLastRun, rhs) + | i `elemVarSet` int_body && not (i `elemUnVarSet` domRes ae) + -- No call to this yet, so do nothing + = (False, (i, Nothing, rhs)) + + | Just (old_called_once, old_arity, _) <- mbLastRun + , called_once == old_called_once + , new_arity == old_arity + -- No change, no need to re-analyze + = (False, (i, mbLastRun, rhs)) + + | otherwise + -- We previously analyzed this with a different arity (or not at all) + = let is_thunk = not (exprIsCheap rhs) -- see note [What is a thunk] + + safe_arity | is_thunk = 0 -- See Note [Thunks in recursive groups] + | otherwise = new_arity + + -- See Note [Trimming arity] + trimmed_arity = trimArity i safe_arity + + (ae_rhs, rhs') = callArityAnal trimmed_arity int_body rhs + + ae_rhs' | called_once = ae_rhs + | safe_arity == 0 = ae_rhs -- If it is not a function, its body is evaluated only once + | otherwise = calledMultipleTimes ae_rhs + + i' = i `setIdCallArity` trimmed_arity + + in (True, (i', Just (called_once, new_arity, ae_rhs'), rhs')) + where + -- See Note [Taking boring variables into account] + (new_arity, called_once) | i `elemVarSet` boring_vars = (0, False) + | otherwise = lookupCallArityRes ae i + + (changes, ann_binds') = unzip $ map rerun ann_binds + any_change = or changes + +-- Combining the results from body and rhs, (mutually) recursive case +-- See Note [Analysis II: The Co-Called analysis] +callArityRecEnv :: Bool -> [(Var, CallArityRes)] -> CallArityRes -> CallArityRes +callArityRecEnv any_boring ae_rhss ae_body + = -- (if length ae_rhss > 300 then pprTrace "callArityRecEnv" (vcat [ppr ae_rhss, ppr ae_body, ppr ae_new]) else id) $ + ae_new + where + vars = map fst ae_rhss + + ae_combined = lubRess (map snd ae_rhss) `lubRes` ae_body + + cross_calls + -- See Note [Taking boring variables into account] + | any_boring = completeGraph (domRes ae_combined) + -- Also, calculating cross_calls is expensive. Simply be conservative + -- if the mutually recursive group becomes too large. + | lengthExceeds ae_rhss 25 = completeGraph (domRes ae_combined) + | otherwise = unionUnVarGraphs $ map cross_call ae_rhss + cross_call (v, ae_rhs) = completeBipartiteGraph called_by_v called_with_v + where + is_thunk = idCallArity v == 0 + -- What rhs are relevant as happening before (or after) calling v? + -- If v is a thunk, everything from all the _other_ variables + -- If v is not a thunk, everything can happen. + ae_before_v | is_thunk = lubRess (map snd $ filter ((/= v) . fst) ae_rhss) `lubRes` ae_body + | otherwise = ae_combined + -- What do we want to know from these? + -- Which calls can happen next to any recursive call. + called_with_v + = unionUnVarSets $ map (calledWith ae_before_v) vars + called_by_v = domRes ae_rhs + + ae_new = first (cross_calls `unionUnVarGraph`) ae_combined + +-- See Note [Trimming arity] +trimArity :: Id -> Arity -> Arity +trimArity v a = minimum [a, max_arity_by_type, max_arity_by_strsig] + where + max_arity_by_type = length (typeArity (idType v)) + max_arity_by_strsig + | isBotDiv result_info = length demands + | otherwise = a + + (demands, result_info) = splitStrictSig (idStrictness v) + +--------------------------------------- +-- Functions related to CallArityRes -- +--------------------------------------- + +-- Result type for the two analyses. +-- See Note [Analysis I: The arity analysis] +-- and Note [Analysis II: The Co-Called analysis] +type CallArityRes = (UnVarGraph, VarEnv Arity) + +emptyArityRes :: CallArityRes +emptyArityRes = (emptyUnVarGraph, emptyVarEnv) + +unitArityRes :: Var -> Arity -> CallArityRes +unitArityRes v arity = (emptyUnVarGraph, unitVarEnv v arity) + +resDelList :: [Var] -> CallArityRes -> CallArityRes +resDelList vs ae = foldr resDel ae vs + +resDel :: Var -> CallArityRes -> CallArityRes +resDel v (g, ae) = (g `delNode` v, ae `delVarEnv` v) + +domRes :: CallArityRes -> UnVarSet +domRes (_, ae) = varEnvDom ae + +-- In the result, find out the minimum arity and whether the variable is called +-- at most once. +lookupCallArityRes :: CallArityRes -> Var -> (Arity, Bool) +lookupCallArityRes (g, ae) v + = case lookupVarEnv ae v of + Just a -> (a, not (g `hasLoopAt` v)) + Nothing -> (0, False) + +calledWith :: CallArityRes -> Var -> UnVarSet +calledWith (g, _) v = neighbors g v + +addCrossCoCalls :: UnVarSet -> UnVarSet -> CallArityRes -> CallArityRes +addCrossCoCalls set1 set2 = first (completeBipartiteGraph set1 set2 `unionUnVarGraph`) + +-- Replaces the co-call graph by a complete graph (i.e. no information) +calledMultipleTimes :: CallArityRes -> CallArityRes +calledMultipleTimes res = first (const (completeGraph (domRes res))) res + +-- Used for application and cases +both :: CallArityRes -> CallArityRes -> CallArityRes +both r1 r2 = addCrossCoCalls (domRes r1) (domRes r2) $ r1 `lubRes` r2 + +-- Used when combining results from alternative cases; take the minimum +lubRes :: CallArityRes -> CallArityRes -> CallArityRes +lubRes (g1, ae1) (g2, ae2) = (g1 `unionUnVarGraph` g2, ae1 `lubArityEnv` ae2) + +lubArityEnv :: VarEnv Arity -> VarEnv Arity -> VarEnv Arity +lubArityEnv = plusVarEnv_C min + +lubRess :: [CallArityRes] -> CallArityRes +lubRess = foldl' lubRes emptyArityRes diff --git a/compiler/GHC/Core/Op/ConstantFold.hs b/compiler/GHC/Core/Op/ConstantFold.hs new file mode 100644 index 0000000000..ae9ba8f262 --- /dev/null +++ b/compiler/GHC/Core/Op/ConstantFold.hs @@ -0,0 +1,2257 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + +\section[ConFold]{Constant Folder} + +Conceptually, constant folding should be parameterized with the kind +of target machine to get identical behaviour during compilation time +and runtime. We cheat a little bit here... + +ToDo: + check boundaries before folding, e.g. we can fold the Float addition + (i1 + i2) only if it results in a valid Float. +-} + +{-# LANGUAGE CPP, RankNTypes, PatternSynonyms, ViewPatterns, RecordWildCards, + DeriveFunctor #-} +{-# OPTIONS_GHC -optc-DNON_POSIX_SOURCE -Wno-incomplete-uni-patterns #-} + +module GHC.Core.Op.ConstantFold + ( primOpRules + , builtinRules + , caseRules + ) +where + +#include "HsVersions.h" + +import GhcPrelude + +import {-# SOURCE #-} MkId ( mkPrimOpId, magicDictId ) + +import GHC.Core +import GHC.Core.Make +import Id +import Literal +import GHC.Core.SimpleOpt ( exprIsLiteral_maybe ) +import PrimOp ( PrimOp(..), tagToEnumKey ) +import TysWiredIn +import TysPrim +import GHC.Core.TyCon + ( tyConDataCons_maybe, isAlgTyCon, isEnumerationTyCon + , isNewTyCon, unwrapNewTyCon_maybe, tyConDataCons + , tyConFamilySize ) +import GHC.Core.DataCon ( dataConTagZ, dataConTyCon, dataConWrapId, dataConWorkId ) +import GHC.Core.Utils ( cheapEqExpr, cheapEqExpr', exprIsHNF, exprType + , stripTicksTop, stripTicksTopT, mkTicks ) +import GHC.Core.Unfold ( exprIsConApp_maybe ) +import GHC.Core.Type +import OccName ( occNameFS ) +import PrelNames +import Maybes ( orElse ) +import Name ( Name, nameOccName ) +import Outputable +import FastString +import BasicTypes +import GHC.Driver.Session +import GHC.Platform +import Util +import GHC.Core.Coercion (mkUnbranchedAxInstCo,mkSymCo,Role(..)) + +import Control.Applicative ( Alternative(..) ) + +import Control.Monad +import qualified Control.Monad.Fail as MonadFail +import Data.Bits as Bits +import qualified Data.ByteString as BS +import Data.Int +import Data.Ratio +import Data.Word + +{- +Note [Constant folding] +~~~~~~~~~~~~~~~~~~~~~~~ +primOpRules generates a rewrite rule for each primop +These rules do what is often called "constant folding" +E.g. the rules for +# might say + 4 +# 5 = 9 +Well, of course you'd need a lot of rules if you did it +like that, so we use a BuiltinRule instead, so that we +can match in any two literal values. So the rule is really +more like + (Lit x) +# (Lit y) = Lit (x+#y) +where the (+#) on the rhs is done at compile time + +That is why these rules are built in here. +-} + +primOpRules :: Name -> PrimOp -> Maybe CoreRule + -- ToDo: something for integer-shift ops? + -- NotOp +primOpRules nm TagToEnumOp = mkPrimOpRule nm 2 [ tagToEnumRule ] +primOpRules nm DataToTagOp = mkPrimOpRule nm 2 [ dataToTagRule ] + +-- Int operations +primOpRules nm IntAddOp = mkPrimOpRule nm 2 [ binaryLit (intOp2 (+)) + , identityDynFlags zeroi + , numFoldingRules IntAddOp intPrimOps + ] +primOpRules nm IntSubOp = mkPrimOpRule nm 2 [ binaryLit (intOp2 (-)) + , rightIdentityDynFlags zeroi + , equalArgs >> retLit zeroi + , numFoldingRules IntSubOp intPrimOps + ] +primOpRules nm IntAddCOp = mkPrimOpRule nm 2 [ binaryLit (intOpC2 (+)) + , identityCDynFlags zeroi ] +primOpRules nm IntSubCOp = mkPrimOpRule nm 2 [ binaryLit (intOpC2 (-)) + , rightIdentityCDynFlags zeroi + , equalArgs >> retLitNoC zeroi ] +primOpRules nm IntMulOp = mkPrimOpRule nm 2 [ binaryLit (intOp2 (*)) + , zeroElem zeroi + , identityDynFlags onei + , numFoldingRules IntMulOp intPrimOps + ] +primOpRules nm IntQuotOp = mkPrimOpRule nm 2 [ nonZeroLit 1 >> binaryLit (intOp2 quot) + , leftZero zeroi + , rightIdentityDynFlags onei + , equalArgs >> retLit onei ] +primOpRules nm IntRemOp = mkPrimOpRule nm 2 [ nonZeroLit 1 >> binaryLit (intOp2 rem) + , leftZero zeroi + , do l <- getLiteral 1 + dflags <- getDynFlags + guard (l == onei dflags) + retLit zeroi + , equalArgs >> retLit zeroi + , equalArgs >> retLit zeroi ] +primOpRules nm AndIOp = mkPrimOpRule nm 2 [ binaryLit (intOp2 (.&.)) + , idempotent + , zeroElem zeroi ] +primOpRules nm OrIOp = mkPrimOpRule nm 2 [ binaryLit (intOp2 (.|.)) + , idempotent + , identityDynFlags zeroi ] +primOpRules nm XorIOp = mkPrimOpRule nm 2 [ binaryLit (intOp2 xor) + , identityDynFlags zeroi + , equalArgs >> retLit zeroi ] +primOpRules nm NotIOp = mkPrimOpRule nm 1 [ unaryLit complementOp + , inversePrimOp NotIOp ] +primOpRules nm IntNegOp = mkPrimOpRule nm 1 [ unaryLit negOp + , inversePrimOp IntNegOp ] +primOpRules nm ISllOp = mkPrimOpRule nm 2 [ shiftRule (const Bits.shiftL) + , rightIdentityDynFlags zeroi ] +primOpRules nm ISraOp = mkPrimOpRule nm 2 [ shiftRule (const Bits.shiftR) + , rightIdentityDynFlags zeroi ] +primOpRules nm ISrlOp = mkPrimOpRule nm 2 [ shiftRule shiftRightLogical + , rightIdentityDynFlags zeroi ] + +-- Word operations +primOpRules nm WordAddOp = mkPrimOpRule nm 2 [ binaryLit (wordOp2 (+)) + , identityDynFlags zerow + , numFoldingRules WordAddOp wordPrimOps + ] +primOpRules nm WordSubOp = mkPrimOpRule nm 2 [ binaryLit (wordOp2 (-)) + , rightIdentityDynFlags zerow + , equalArgs >> retLit zerow + , numFoldingRules WordSubOp wordPrimOps + ] +primOpRules nm WordAddCOp = mkPrimOpRule nm 2 [ binaryLit (wordOpC2 (+)) + , identityCDynFlags zerow ] +primOpRules nm WordSubCOp = mkPrimOpRule nm 2 [ binaryLit (wordOpC2 (-)) + , rightIdentityCDynFlags zerow + , equalArgs >> retLitNoC zerow ] +primOpRules nm WordMulOp = mkPrimOpRule nm 2 [ binaryLit (wordOp2 (*)) + , identityDynFlags onew + , numFoldingRules WordMulOp wordPrimOps + ] +primOpRules nm WordQuotOp = mkPrimOpRule nm 2 [ nonZeroLit 1 >> binaryLit (wordOp2 quot) + , rightIdentityDynFlags onew ] +primOpRules nm WordRemOp = mkPrimOpRule nm 2 [ nonZeroLit 1 >> binaryLit (wordOp2 rem) + , leftZero zerow + , do l <- getLiteral 1 + dflags <- getDynFlags + guard (l == onew dflags) + retLit zerow + , equalArgs >> retLit zerow ] +primOpRules nm AndOp = mkPrimOpRule nm 2 [ binaryLit (wordOp2 (.&.)) + , idempotent + , zeroElem zerow ] +primOpRules nm OrOp = mkPrimOpRule nm 2 [ binaryLit (wordOp2 (.|.)) + , idempotent + , identityDynFlags zerow ] +primOpRules nm XorOp = mkPrimOpRule nm 2 [ binaryLit (wordOp2 xor) + , identityDynFlags zerow + , equalArgs >> retLit zerow ] +primOpRules nm NotOp = mkPrimOpRule nm 1 [ unaryLit complementOp + , inversePrimOp NotOp ] +primOpRules nm SllOp = mkPrimOpRule nm 2 [ shiftRule (const Bits.shiftL) ] +primOpRules nm SrlOp = mkPrimOpRule nm 2 [ shiftRule shiftRightLogical ] + +-- coercions +primOpRules nm Word2IntOp = mkPrimOpRule nm 1 [ liftLitDynFlags word2IntLit + , inversePrimOp Int2WordOp ] +primOpRules nm Int2WordOp = mkPrimOpRule nm 1 [ liftLitDynFlags int2WordLit + , inversePrimOp Word2IntOp ] +primOpRules nm Narrow8IntOp = mkPrimOpRule nm 1 [ liftLit narrow8IntLit + , subsumedByPrimOp Narrow8IntOp + , Narrow8IntOp `subsumesPrimOp` Narrow16IntOp + , Narrow8IntOp `subsumesPrimOp` Narrow32IntOp + , narrowSubsumesAnd AndIOp Narrow8IntOp 8 ] +primOpRules nm Narrow16IntOp = mkPrimOpRule nm 1 [ liftLit narrow16IntLit + , subsumedByPrimOp Narrow8IntOp + , subsumedByPrimOp Narrow16IntOp + , Narrow16IntOp `subsumesPrimOp` Narrow32IntOp + , narrowSubsumesAnd AndIOp Narrow16IntOp 16 ] +primOpRules nm Narrow32IntOp = mkPrimOpRule nm 1 [ liftLit narrow32IntLit + , subsumedByPrimOp Narrow8IntOp + , subsumedByPrimOp Narrow16IntOp + , subsumedByPrimOp Narrow32IntOp + , removeOp32 + , narrowSubsumesAnd AndIOp Narrow32IntOp 32 ] +primOpRules nm Narrow8WordOp = mkPrimOpRule nm 1 [ liftLit narrow8WordLit + , subsumedByPrimOp Narrow8WordOp + , Narrow8WordOp `subsumesPrimOp` Narrow16WordOp + , Narrow8WordOp `subsumesPrimOp` Narrow32WordOp + , narrowSubsumesAnd AndOp Narrow8WordOp 8 ] +primOpRules nm Narrow16WordOp = mkPrimOpRule nm 1 [ liftLit narrow16WordLit + , subsumedByPrimOp Narrow8WordOp + , subsumedByPrimOp Narrow16WordOp + , Narrow16WordOp `subsumesPrimOp` Narrow32WordOp + , narrowSubsumesAnd AndOp Narrow16WordOp 16 ] +primOpRules nm Narrow32WordOp = mkPrimOpRule nm 1 [ liftLit narrow32WordLit + , subsumedByPrimOp Narrow8WordOp + , subsumedByPrimOp Narrow16WordOp + , subsumedByPrimOp Narrow32WordOp + , removeOp32 + , narrowSubsumesAnd AndOp Narrow32WordOp 32 ] +primOpRules nm OrdOp = mkPrimOpRule nm 1 [ liftLit char2IntLit + , inversePrimOp ChrOp ] +primOpRules nm ChrOp = mkPrimOpRule nm 1 [ do [Lit lit] <- getArgs + guard (litFitsInChar lit) + liftLit int2CharLit + , inversePrimOp OrdOp ] +primOpRules nm Float2IntOp = mkPrimOpRule nm 1 [ liftLit float2IntLit ] +primOpRules nm Int2FloatOp = mkPrimOpRule nm 1 [ liftLit int2FloatLit ] +primOpRules nm Double2IntOp = mkPrimOpRule nm 1 [ liftLit double2IntLit ] +primOpRules nm Int2DoubleOp = mkPrimOpRule nm 1 [ liftLit int2DoubleLit ] +-- SUP: Not sure what the standard says about precision in the following 2 cases +primOpRules nm Float2DoubleOp = mkPrimOpRule nm 1 [ liftLit float2DoubleLit ] +primOpRules nm Double2FloatOp = mkPrimOpRule nm 1 [ liftLit double2FloatLit ] + +-- Float +primOpRules nm FloatAddOp = mkPrimOpRule nm 2 [ binaryLit (floatOp2 (+)) + , identity zerof ] +primOpRules nm FloatSubOp = mkPrimOpRule nm 2 [ binaryLit (floatOp2 (-)) + , rightIdentity zerof ] +primOpRules nm FloatMulOp = mkPrimOpRule nm 2 [ binaryLit (floatOp2 (*)) + , identity onef + , strengthReduction twof FloatAddOp ] + -- zeroElem zerof doesn't hold because of NaN +primOpRules nm FloatDivOp = mkPrimOpRule nm 2 [ guardFloatDiv >> binaryLit (floatOp2 (/)) + , rightIdentity onef ] +primOpRules nm FloatNegOp = mkPrimOpRule nm 1 [ unaryLit negOp + , inversePrimOp FloatNegOp ] + +-- Double +primOpRules nm DoubleAddOp = mkPrimOpRule nm 2 [ binaryLit (doubleOp2 (+)) + , identity zerod ] +primOpRules nm DoubleSubOp = mkPrimOpRule nm 2 [ binaryLit (doubleOp2 (-)) + , rightIdentity zerod ] +primOpRules nm DoubleMulOp = mkPrimOpRule nm 2 [ binaryLit (doubleOp2 (*)) + , identity oned + , strengthReduction twod DoubleAddOp ] + -- zeroElem zerod doesn't hold because of NaN +primOpRules nm DoubleDivOp = mkPrimOpRule nm 2 [ guardDoubleDiv >> binaryLit (doubleOp2 (/)) + , rightIdentity oned ] +primOpRules nm DoubleNegOp = mkPrimOpRule nm 1 [ unaryLit negOp + , inversePrimOp DoubleNegOp ] + +-- Relational operators + +primOpRules nm IntEqOp = mkRelOpRule nm (==) [ litEq True ] +primOpRules nm IntNeOp = mkRelOpRule nm (/=) [ litEq False ] +primOpRules nm CharEqOp = mkRelOpRule nm (==) [ litEq True ] +primOpRules nm CharNeOp = mkRelOpRule nm (/=) [ litEq False ] + +primOpRules nm IntGtOp = mkRelOpRule nm (>) [ boundsCmp Gt ] +primOpRules nm IntGeOp = mkRelOpRule nm (>=) [ boundsCmp Ge ] +primOpRules nm IntLeOp = mkRelOpRule nm (<=) [ boundsCmp Le ] +primOpRules nm IntLtOp = mkRelOpRule nm (<) [ boundsCmp Lt ] + +primOpRules nm CharGtOp = mkRelOpRule nm (>) [ boundsCmp Gt ] +primOpRules nm CharGeOp = mkRelOpRule nm (>=) [ boundsCmp Ge ] +primOpRules nm CharLeOp = mkRelOpRule nm (<=) [ boundsCmp Le ] +primOpRules nm CharLtOp = mkRelOpRule nm (<) [ boundsCmp Lt ] + +primOpRules nm FloatGtOp = mkFloatingRelOpRule nm (>) +primOpRules nm FloatGeOp = mkFloatingRelOpRule nm (>=) +primOpRules nm FloatLeOp = mkFloatingRelOpRule nm (<=) +primOpRules nm FloatLtOp = mkFloatingRelOpRule nm (<) +primOpRules nm FloatEqOp = mkFloatingRelOpRule nm (==) +primOpRules nm FloatNeOp = mkFloatingRelOpRule nm (/=) + +primOpRules nm DoubleGtOp = mkFloatingRelOpRule nm (>) +primOpRules nm DoubleGeOp = mkFloatingRelOpRule nm (>=) +primOpRules nm DoubleLeOp = mkFloatingRelOpRule nm (<=) +primOpRules nm DoubleLtOp = mkFloatingRelOpRule nm (<) +primOpRules nm DoubleEqOp = mkFloatingRelOpRule nm (==) +primOpRules nm DoubleNeOp = mkFloatingRelOpRule nm (/=) + +primOpRules nm WordGtOp = mkRelOpRule nm (>) [ boundsCmp Gt ] +primOpRules nm WordGeOp = mkRelOpRule nm (>=) [ boundsCmp Ge ] +primOpRules nm WordLeOp = mkRelOpRule nm (<=) [ boundsCmp Le ] +primOpRules nm WordLtOp = mkRelOpRule nm (<) [ boundsCmp Lt ] +primOpRules nm WordEqOp = mkRelOpRule nm (==) [ litEq True ] +primOpRules nm WordNeOp = mkRelOpRule nm (/=) [ litEq False ] + +primOpRules nm AddrAddOp = mkPrimOpRule nm 2 [ rightIdentityDynFlags zeroi ] + +primOpRules nm SeqOp = mkPrimOpRule nm 4 [ seqRule ] +primOpRules nm SparkOp = mkPrimOpRule nm 4 [ sparkRule ] + +primOpRules _ _ = Nothing + +{- +************************************************************************ +* * +\subsection{Doing the business} +* * +************************************************************************ +-} + +-- useful shorthands +mkPrimOpRule :: Name -> Int -> [RuleM CoreExpr] -> Maybe CoreRule +mkPrimOpRule nm arity rules = Just $ mkBasicRule nm arity (msum rules) + +mkRelOpRule :: Name -> (forall a . Ord a => a -> a -> Bool) + -> [RuleM CoreExpr] -> Maybe CoreRule +mkRelOpRule nm cmp extra + = mkPrimOpRule nm 2 $ + binaryCmpLit cmp : equal_rule : extra + where + -- x `cmp` x does not depend on x, so + -- compute it for the arbitrary value 'True' + -- and use that result + equal_rule = do { equalArgs + ; dflags <- getDynFlags + ; return (if cmp True True + then trueValInt dflags + else falseValInt dflags) } + +{- Note [Rules for floating-point comparisons] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We need different rules for floating-point values because for floats +it is not true that x = x (for NaNs); so we do not want the equal_rule +rule that mkRelOpRule uses. + +Note also that, in the case of equality/inequality, we do /not/ +want to switch to a case-expression. For example, we do not want +to convert + case (eqFloat# x 3.8#) of + True -> this + False -> that +to + case x of + 3.8#::Float# -> this + _ -> that +See #9238. Reason: comparing floating-point values for equality +delicate, and we don't want to implement that delicacy in the code for +case expressions. So we make it an invariant of Core that a case +expression never scrutinises a Float# or Double#. + +This transformation is what the litEq rule does; +see Note [The litEq rule: converting equality to case]. +So we /refrain/ from using litEq for mkFloatingRelOpRule. +-} + +mkFloatingRelOpRule :: Name -> (forall a . Ord a => a -> a -> Bool) + -> Maybe CoreRule +-- See Note [Rules for floating-point comparisons] +mkFloatingRelOpRule nm cmp + = mkPrimOpRule nm 2 [binaryCmpLit cmp] + +-- common constants +zeroi, onei, zerow, onew :: DynFlags -> Literal +zeroi dflags = mkLitInt dflags 0 +onei dflags = mkLitInt dflags 1 +zerow dflags = mkLitWord dflags 0 +onew dflags = mkLitWord dflags 1 + +zerof, onef, twof, zerod, oned, twod :: Literal +zerof = mkLitFloat 0.0 +onef = mkLitFloat 1.0 +twof = mkLitFloat 2.0 +zerod = mkLitDouble 0.0 +oned = mkLitDouble 1.0 +twod = mkLitDouble 2.0 + +cmpOp :: DynFlags -> (forall a . Ord a => a -> a -> Bool) + -> Literal -> Literal -> Maybe CoreExpr +cmpOp dflags cmp = go + where + done True = Just $ trueValInt dflags + done False = Just $ falseValInt dflags + + -- These compares are at different types + go (LitChar i1) (LitChar i2) = done (i1 `cmp` i2) + go (LitFloat i1) (LitFloat i2) = done (i1 `cmp` i2) + go (LitDouble i1) (LitDouble i2) = done (i1 `cmp` i2) + go (LitNumber nt1 i1 _) (LitNumber nt2 i2 _) + | nt1 /= nt2 = Nothing + | otherwise = done (i1 `cmp` i2) + go _ _ = Nothing + +-------------------------- + +negOp :: DynFlags -> Literal -> Maybe CoreExpr -- Negate +negOp _ (LitFloat 0.0) = Nothing -- can't represent -0.0 as a Rational +negOp dflags (LitFloat f) = Just (mkFloatVal dflags (-f)) +negOp _ (LitDouble 0.0) = Nothing +negOp dflags (LitDouble d) = Just (mkDoubleVal dflags (-d)) +negOp dflags (LitNumber nt i t) + | litNumIsSigned nt = Just (Lit (mkLitNumberWrap dflags nt (-i) t)) +negOp _ _ = Nothing + +complementOp :: DynFlags -> Literal -> Maybe CoreExpr -- Binary complement +complementOp dflags (LitNumber nt i t) = + Just (Lit (mkLitNumberWrap dflags nt (complement i) t)) +complementOp _ _ = Nothing + +-------------------------- +intOp2 :: (Integral a, Integral b) + => (a -> b -> Integer) + -> DynFlags -> Literal -> Literal -> Maybe CoreExpr +intOp2 = intOp2' . const + +intOp2' :: (Integral a, Integral b) + => (DynFlags -> a -> b -> Integer) + -> DynFlags -> Literal -> Literal -> Maybe CoreExpr +intOp2' op dflags (LitNumber LitNumInt i1 _) (LitNumber LitNumInt i2 _) = + let o = op dflags + in intResult dflags (fromInteger i1 `o` fromInteger i2) +intOp2' _ _ _ _ = Nothing -- Could find LitLit + +intOpC2 :: (Integral a, Integral b) + => (a -> b -> Integer) + -> DynFlags -> Literal -> Literal -> Maybe CoreExpr +intOpC2 op dflags (LitNumber LitNumInt i1 _) (LitNumber LitNumInt i2 _) = do + intCResult dflags (fromInteger i1 `op` fromInteger i2) +intOpC2 _ _ _ _ = Nothing -- Could find LitLit + +shiftRightLogical :: DynFlags -> Integer -> Int -> Integer +-- Shift right, putting zeros in rather than sign-propagating as Bits.shiftR would do +-- Do this by converting to Word and back. Obviously this won't work for big +-- values, but its ok as we use it here +shiftRightLogical dflags x n = + case platformWordSize (targetPlatform dflags) of + PW4 -> fromIntegral (fromInteger x `shiftR` n :: Word32) + PW8 -> fromIntegral (fromInteger x `shiftR` n :: Word64) + +-------------------------- +retLit :: (DynFlags -> Literal) -> RuleM CoreExpr +retLit l = do dflags <- getDynFlags + return $ Lit $ l dflags + +retLitNoC :: (DynFlags -> Literal) -> RuleM CoreExpr +retLitNoC l = do dflags <- getDynFlags + let lit = l dflags + let ty = literalType lit + return $ mkCoreUbxTup [ty, ty] [Lit lit, Lit (zeroi dflags)] + +wordOp2 :: (Integral a, Integral b) + => (a -> b -> Integer) + -> DynFlags -> Literal -> Literal -> Maybe CoreExpr +wordOp2 op dflags (LitNumber LitNumWord w1 _) (LitNumber LitNumWord w2 _) + = wordResult dflags (fromInteger w1 `op` fromInteger w2) +wordOp2 _ _ _ _ = Nothing -- Could find LitLit + +wordOpC2 :: (Integral a, Integral b) + => (a -> b -> Integer) + -> DynFlags -> Literal -> Literal -> Maybe CoreExpr +wordOpC2 op dflags (LitNumber LitNumWord w1 _) (LitNumber LitNumWord w2 _) = + wordCResult dflags (fromInteger w1 `op` fromInteger w2) +wordOpC2 _ _ _ _ = Nothing -- Could find LitLit + +shiftRule :: (DynFlags -> Integer -> Int -> Integer) -> RuleM CoreExpr +-- Shifts take an Int; hence third arg of op is Int +-- Used for shift primops +-- ISllOp, ISraOp, ISrlOp :: Word# -> Int# -> Word# +-- SllOp, SrlOp :: Word# -> Int# -> Word# +shiftRule shift_op + = do { dflags <- getDynFlags + ; [e1, Lit (LitNumber LitNumInt shift_len _)] <- getArgs + ; case e1 of + _ | shift_len == 0 + -> return e1 + -- See Note [Guarding against silly shifts] + | shift_len < 0 || shift_len > wordSizeInBits dflags + -> return $ Lit $ mkLitNumberWrap dflags LitNumInt 0 (exprType e1) + + -- Do the shift at type Integer, but shift length is Int + Lit (LitNumber nt x t) + | 0 < shift_len + , shift_len <= wordSizeInBits dflags + -> let op = shift_op dflags + y = x `op` fromInteger shift_len + in liftMaybe $ Just (Lit (mkLitNumberWrap dflags nt y t)) + + _ -> mzero } + +wordSizeInBits :: DynFlags -> Integer +wordSizeInBits dflags = toInteger (platformWordSizeInBits (targetPlatform dflags)) + +-------------------------- +floatOp2 :: (Rational -> Rational -> Rational) + -> DynFlags -> Literal -> Literal + -> Maybe (Expr CoreBndr) +floatOp2 op dflags (LitFloat f1) (LitFloat f2) + = Just (mkFloatVal dflags (f1 `op` f2)) +floatOp2 _ _ _ _ = Nothing + +-------------------------- +doubleOp2 :: (Rational -> Rational -> Rational) + -> DynFlags -> Literal -> Literal + -> Maybe (Expr CoreBndr) +doubleOp2 op dflags (LitDouble f1) (LitDouble f2) + = Just (mkDoubleVal dflags (f1 `op` f2)) +doubleOp2 _ _ _ _ = Nothing + +-------------------------- +{- Note [The litEq rule: converting equality to case] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This stuff turns + n ==# 3# +into + case n of + 3# -> True + m -> False + +This is a Good Thing, because it allows case-of case things +to happen, and case-default absorption to happen. For +example: + + if (n ==# 3#) || (n ==# 4#) then e1 else e2 +will transform to + case n of + 3# -> e1 + 4# -> e1 + m -> e2 +(modulo the usual precautions to avoid duplicating e1) +-} + +litEq :: Bool -- True <=> equality, False <=> inequality + -> RuleM CoreExpr +litEq is_eq = msum + [ do [Lit lit, expr] <- getArgs + dflags <- getDynFlags + do_lit_eq dflags lit expr + , do [expr, Lit lit] <- getArgs + dflags <- getDynFlags + do_lit_eq dflags lit expr ] + where + do_lit_eq dflags lit expr = do + guard (not (litIsLifted lit)) + return (mkWildCase expr (literalType lit) intPrimTy + [(DEFAULT, [], val_if_neq), + (LitAlt lit, [], val_if_eq)]) + where + val_if_eq | is_eq = trueValInt dflags + | otherwise = falseValInt dflags + val_if_neq | is_eq = falseValInt dflags + | otherwise = trueValInt dflags + + +-- | Check if there is comparison with minBound or maxBound, that is +-- always true or false. For instance, an Int cannot be smaller than its +-- minBound, so we can replace such comparison with False. +boundsCmp :: Comparison -> RuleM CoreExpr +boundsCmp op = do + dflags <- getDynFlags + [a, b] <- getArgs + liftMaybe $ mkRuleFn dflags op a b + +data Comparison = Gt | Ge | Lt | Le + +mkRuleFn :: DynFlags -> Comparison -> CoreExpr -> CoreExpr -> Maybe CoreExpr +mkRuleFn dflags Gt (Lit lit) _ | isMinBound dflags lit = Just $ falseValInt dflags +mkRuleFn dflags Le (Lit lit) _ | isMinBound dflags lit = Just $ trueValInt dflags +mkRuleFn dflags Ge _ (Lit lit) | isMinBound dflags lit = Just $ trueValInt dflags +mkRuleFn dflags Lt _ (Lit lit) | isMinBound dflags lit = Just $ falseValInt dflags +mkRuleFn dflags Ge (Lit lit) _ | isMaxBound dflags lit = Just $ trueValInt dflags +mkRuleFn dflags Lt (Lit lit) _ | isMaxBound dflags lit = Just $ falseValInt dflags +mkRuleFn dflags Gt _ (Lit lit) | isMaxBound dflags lit = Just $ falseValInt dflags +mkRuleFn dflags Le _ (Lit lit) | isMaxBound dflags lit = Just $ trueValInt dflags +mkRuleFn _ _ _ _ = Nothing + +isMinBound :: DynFlags -> Literal -> Bool +isMinBound _ (LitChar c) = c == minBound +isMinBound dflags (LitNumber nt i _) = case nt of + LitNumInt -> i == tARGET_MIN_INT dflags + LitNumInt64 -> i == toInteger (minBound :: Int64) + LitNumWord -> i == 0 + LitNumWord64 -> i == 0 + LitNumNatural -> i == 0 + LitNumInteger -> False +isMinBound _ _ = False + +isMaxBound :: DynFlags -> Literal -> Bool +isMaxBound _ (LitChar c) = c == maxBound +isMaxBound dflags (LitNumber nt i _) = case nt of + LitNumInt -> i == tARGET_MAX_INT dflags + LitNumInt64 -> i == toInteger (maxBound :: Int64) + LitNumWord -> i == tARGET_MAX_WORD dflags + LitNumWord64 -> i == toInteger (maxBound :: Word64) + LitNumNatural -> False + LitNumInteger -> False +isMaxBound _ _ = False + +-- | Create an Int literal expression while ensuring the given Integer is in the +-- target Int range +intResult :: DynFlags -> Integer -> Maybe CoreExpr +intResult dflags result = Just (intResult' dflags result) + +intResult' :: DynFlags -> Integer -> CoreExpr +intResult' dflags result = Lit (mkLitIntWrap dflags result) + +-- | Create an unboxed pair of an Int literal expression, ensuring the given +-- Integer is in the target Int range and the corresponding overflow flag +-- (@0#@/@1#@) if it wasn't. +intCResult :: DynFlags -> Integer -> Maybe CoreExpr +intCResult dflags result = Just (mkPair [Lit lit, Lit c]) + where + mkPair = mkCoreUbxTup [intPrimTy, intPrimTy] + (lit, b) = mkLitIntWrapC dflags result + c = if b then onei dflags else zeroi dflags + +-- | Create a Word literal expression while ensuring the given Integer is in the +-- target Word range +wordResult :: DynFlags -> Integer -> Maybe CoreExpr +wordResult dflags result = Just (wordResult' dflags result) + +wordResult' :: DynFlags -> Integer -> CoreExpr +wordResult' dflags result = Lit (mkLitWordWrap dflags result) + +-- | Create an unboxed pair of a Word literal expression, ensuring the given +-- Integer is in the target Word range and the corresponding carry flag +-- (@0#@/@1#@) if it wasn't. +wordCResult :: DynFlags -> Integer -> Maybe CoreExpr +wordCResult dflags result = Just (mkPair [Lit lit, Lit c]) + where + mkPair = mkCoreUbxTup [wordPrimTy, intPrimTy] + (lit, b) = mkLitWordWrapC dflags result + c = if b then onei dflags else zeroi dflags + +inversePrimOp :: PrimOp -> RuleM CoreExpr +inversePrimOp primop = do + [Var primop_id `App` e] <- getArgs + matchPrimOpId primop primop_id + return e + +subsumesPrimOp :: PrimOp -> PrimOp -> RuleM CoreExpr +this `subsumesPrimOp` that = do + [Var primop_id `App` e] <- getArgs + matchPrimOpId that primop_id + return (Var (mkPrimOpId this) `App` e) + +subsumedByPrimOp :: PrimOp -> RuleM CoreExpr +subsumedByPrimOp primop = do + [e@(Var primop_id `App` _)] <- getArgs + matchPrimOpId primop primop_id + return e + +-- | narrow subsumes bitwise `and` with full mask (cf #16402): +-- +-- narrowN (x .&. m) +-- m .&. (2^N-1) = 2^N-1 +-- ==> narrowN x +-- +-- e.g. narrow16 (x .&. 0xFFFF) +-- ==> narrow16 x +-- +narrowSubsumesAnd :: PrimOp -> PrimOp -> Int -> RuleM CoreExpr +narrowSubsumesAnd and_primop narrw n = do + [Var primop_id `App` x `App` y] <- getArgs + matchPrimOpId and_primop primop_id + let mask = bit n -1 + g v (Lit (LitNumber _ m _)) = do + guard (m .&. mask == mask) + return (Var (mkPrimOpId narrw) `App` v) + g _ _ = mzero + g x y <|> g y x + +idempotent :: RuleM CoreExpr +idempotent = do [e1, e2] <- getArgs + guard $ cheapEqExpr e1 e2 + return e1 + +{- +Note [Guarding against silly shifts] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this code: + + import Data.Bits( (.|.), shiftL ) + chunkToBitmap :: [Bool] -> Word32 + chunkToBitmap chunk = foldr (.|.) 0 [ 1 `shiftL` n | (True,n) <- zip chunk [0..] ] + +This optimises to: +Shift.$wgo = \ (w_sCS :: GHC.Prim.Int#) (w1_sCT :: [GHC.Types.Bool]) -> + case w1_sCT of _ { + [] -> 0##; + : x_aAW xs_aAX -> + case x_aAW of _ { + GHC.Types.False -> + case w_sCS of wild2_Xh { + __DEFAULT -> Shift.$wgo (GHC.Prim.+# wild2_Xh 1) xs_aAX; + 9223372036854775807 -> 0## }; + GHC.Types.True -> + case GHC.Prim.>=# w_sCS 64 of _ { + GHC.Types.False -> + case w_sCS of wild3_Xh { + __DEFAULT -> + case Shift.$wgo (GHC.Prim.+# wild3_Xh 1) xs_aAX of ww_sCW { __DEFAULT -> + GHC.Prim.or# (GHC.Prim.narrow32Word# + (GHC.Prim.uncheckedShiftL# 1## wild3_Xh)) + ww_sCW + }; + 9223372036854775807 -> + GHC.Prim.narrow32Word# +!!!!--> (GHC.Prim.uncheckedShiftL# 1## 9223372036854775807) + }; + GHC.Types.True -> + case w_sCS of wild3_Xh { + __DEFAULT -> Shift.$wgo (GHC.Prim.+# wild3_Xh 1) xs_aAX; + 9223372036854775807 -> 0## + } } } } + +Note the massive shift on line "!!!!". It can't happen, because we've checked +that w < 64, but the optimiser didn't spot that. We DO NOT want to constant-fold this! +Moreover, if the programmer writes (n `uncheckedShiftL` 9223372036854775807), we +can't constant fold it, but if it gets to the assembler we get + Error: operand type mismatch for `shl' + +So the best thing to do is to rewrite the shift with a call to error, +when the second arg is large. However, in general we cannot do this; consider +this case + + let x = I# (uncheckedIShiftL# n 80) + in ... + +Here x contains an invalid shift and consequently we would like to rewrite it +as follows: + + let x = I# (error "invalid shift) + in ... + +This was originally done in the fix to #16449 but this breaks the let/app +invariant (see Note [Core let/app invariant] in GHC.Core) as noted in #16742. +For the reasons discussed in Note [Checking versus non-checking primops] (in +the PrimOp module) there is no safe way rewrite the argument of I# such that +it bottoms. + +Consequently we instead take advantage of the fact that large shifts are +undefined behavior (see associated documentation in primops.txt.pp) and +transform the invalid shift into an "obviously incorrect" value. + +There are two cases: + +- Shifting fixed-width things: the primops ISll, Sll, etc + These are handled by shiftRule. + + We are happy to shift by any amount up to wordSize but no more. + +- Shifting Integers: the function shiftLInteger, shiftRInteger + from the 'integer' library. These are handled by rule_shift_op, + and match_Integer_shift_op. + + Here we could in principle shift by any amount, but we arbitrary + limit the shift to 4 bits; in particular we do not want shift by a + huge amount, which can happen in code like that above. + +The two cases are more different in their code paths that is comfortable, +but that is only a historical accident. + + +************************************************************************ +* * +\subsection{Vaguely generic functions} +* * +************************************************************************ +-} + +mkBasicRule :: Name -> Int -> RuleM CoreExpr -> CoreRule +-- Gives the Rule the same name as the primop itself +mkBasicRule op_name n_args rm + = BuiltinRule { ru_name = occNameFS (nameOccName op_name), + ru_fn = op_name, + ru_nargs = n_args, + ru_try = runRuleM rm } + +newtype RuleM r = RuleM + { runRuleM :: DynFlags -> InScopeEnv -> Id -> [CoreExpr] -> Maybe r } + deriving (Functor) + +instance Applicative RuleM where + pure x = RuleM $ \_ _ _ _ -> Just x + (<*>) = ap + +instance Monad RuleM where + RuleM f >>= g + = RuleM $ \dflags iu fn args -> + case f dflags iu fn args of + Nothing -> Nothing + Just r -> runRuleM (g r) dflags iu fn args + +#if !MIN_VERSION_base(4,13,0) + fail = MonadFail.fail +#endif + +instance MonadFail.MonadFail RuleM where + fail _ = mzero + +instance Alternative RuleM where + empty = RuleM $ \_ _ _ _ -> Nothing + RuleM f1 <|> RuleM f2 = RuleM $ \dflags iu fn args -> + f1 dflags iu fn args <|> f2 dflags iu fn args + +instance MonadPlus RuleM + +instance HasDynFlags RuleM where + getDynFlags = RuleM $ \dflags _ _ _ -> Just dflags + +liftMaybe :: Maybe a -> RuleM a +liftMaybe Nothing = mzero +liftMaybe (Just x) = return x + +liftLit :: (Literal -> Literal) -> RuleM CoreExpr +liftLit f = liftLitDynFlags (const f) + +liftLitDynFlags :: (DynFlags -> Literal -> Literal) -> RuleM CoreExpr +liftLitDynFlags f = do + dflags <- getDynFlags + [Lit lit] <- getArgs + return $ Lit (f dflags lit) + +removeOp32 :: RuleM CoreExpr +removeOp32 = do + dflags <- getDynFlags + case platformWordSize (targetPlatform dflags) of + PW4 -> do + [e] <- getArgs + return e + PW8 -> + mzero + +getArgs :: RuleM [CoreExpr] +getArgs = RuleM $ \_ _ _ args -> Just args + +getInScopeEnv :: RuleM InScopeEnv +getInScopeEnv = RuleM $ \_ iu _ _ -> Just iu + +getFunction :: RuleM Id +getFunction = RuleM $ \_ _ fn _ -> Just fn + +-- return the n-th argument of this rule, if it is a literal +-- argument indices start from 0 +getLiteral :: Int -> RuleM Literal +getLiteral n = RuleM $ \_ _ _ exprs -> case drop n exprs of + (Lit l:_) -> Just l + _ -> Nothing + +unaryLit :: (DynFlags -> Literal -> Maybe CoreExpr) -> RuleM CoreExpr +unaryLit op = do + dflags <- getDynFlags + [Lit l] <- getArgs + liftMaybe $ op dflags (convFloating dflags l) + +binaryLit :: (DynFlags -> Literal -> Literal -> Maybe CoreExpr) -> RuleM CoreExpr +binaryLit op = do + dflags <- getDynFlags + [Lit l1, Lit l2] <- getArgs + liftMaybe $ op dflags (convFloating dflags l1) (convFloating dflags l2) + +binaryCmpLit :: (forall a . Ord a => a -> a -> Bool) -> RuleM CoreExpr +binaryCmpLit op = do + dflags <- getDynFlags + binaryLit (\_ -> cmpOp dflags op) + +leftIdentity :: Literal -> RuleM CoreExpr +leftIdentity id_lit = leftIdentityDynFlags (const id_lit) + +rightIdentity :: Literal -> RuleM CoreExpr +rightIdentity id_lit = rightIdentityDynFlags (const id_lit) + +identity :: Literal -> RuleM CoreExpr +identity lit = leftIdentity lit `mplus` rightIdentity lit + +leftIdentityDynFlags :: (DynFlags -> Literal) -> RuleM CoreExpr +leftIdentityDynFlags id_lit = do + dflags <- getDynFlags + [Lit l1, e2] <- getArgs + guard $ l1 == id_lit dflags + return e2 + +-- | Left identity rule for PrimOps like 'IntAddC' and 'WordAddC', where, in +-- addition to the result, we have to indicate that no carry/overflow occurred. +leftIdentityCDynFlags :: (DynFlags -> Literal) -> RuleM CoreExpr +leftIdentityCDynFlags id_lit = do + dflags <- getDynFlags + [Lit l1, e2] <- getArgs + guard $ l1 == id_lit dflags + let no_c = Lit (zeroi dflags) + return (mkCoreUbxTup [exprType e2, intPrimTy] [e2, no_c]) + +rightIdentityDynFlags :: (DynFlags -> Literal) -> RuleM CoreExpr +rightIdentityDynFlags id_lit = do + dflags <- getDynFlags + [e1, Lit l2] <- getArgs + guard $ l2 == id_lit dflags + return e1 + +-- | Right identity rule for PrimOps like 'IntSubC' and 'WordSubC', where, in +-- addition to the result, we have to indicate that no carry/overflow occurred. +rightIdentityCDynFlags :: (DynFlags -> Literal) -> RuleM CoreExpr +rightIdentityCDynFlags id_lit = do + dflags <- getDynFlags + [e1, Lit l2] <- getArgs + guard $ l2 == id_lit dflags + let no_c = Lit (zeroi dflags) + return (mkCoreUbxTup [exprType e1, intPrimTy] [e1, no_c]) + +identityDynFlags :: (DynFlags -> Literal) -> RuleM CoreExpr +identityDynFlags lit = + leftIdentityDynFlags lit `mplus` rightIdentityDynFlags lit + +-- | Identity rule for PrimOps like 'IntAddC' and 'WordAddC', where, in addition +-- to the result, we have to indicate that no carry/overflow occurred. +identityCDynFlags :: (DynFlags -> Literal) -> RuleM CoreExpr +identityCDynFlags lit = + leftIdentityCDynFlags lit `mplus` rightIdentityCDynFlags lit + +leftZero :: (DynFlags -> Literal) -> RuleM CoreExpr +leftZero zero = do + dflags <- getDynFlags + [Lit l1, _] <- getArgs + guard $ l1 == zero dflags + return $ Lit l1 + +rightZero :: (DynFlags -> Literal) -> RuleM CoreExpr +rightZero zero = do + dflags <- getDynFlags + [_, Lit l2] <- getArgs + guard $ l2 == zero dflags + return $ Lit l2 + +zeroElem :: (DynFlags -> Literal) -> RuleM CoreExpr +zeroElem lit = leftZero lit `mplus` rightZero lit + +equalArgs :: RuleM () +equalArgs = do + [e1, e2] <- getArgs + guard $ e1 `cheapEqExpr` e2 + +nonZeroLit :: Int -> RuleM () +nonZeroLit n = getLiteral n >>= guard . not . isZeroLit + +-- When excess precision is not requested, cut down the precision of the +-- Rational value to that of Float/Double. We confuse host architecture +-- and target architecture here, but it's convenient (and wrong :-). +convFloating :: DynFlags -> Literal -> Literal +convFloating dflags (LitFloat f) | not (gopt Opt_ExcessPrecision dflags) = + LitFloat (toRational (fromRational f :: Float )) +convFloating dflags (LitDouble d) | not (gopt Opt_ExcessPrecision dflags) = + LitDouble (toRational (fromRational d :: Double)) +convFloating _ l = l + +guardFloatDiv :: RuleM () +guardFloatDiv = do + [Lit (LitFloat f1), Lit (LitFloat f2)] <- getArgs + guard $ (f1 /=0 || f2 > 0) -- see Note [negative zero] + && f2 /= 0 -- avoid NaN and Infinity/-Infinity + +guardDoubleDiv :: RuleM () +guardDoubleDiv = do + [Lit (LitDouble d1), Lit (LitDouble d2)] <- getArgs + guard $ (d1 /=0 || d2 > 0) -- see Note [negative zero] + && d2 /= 0 -- avoid NaN and Infinity/-Infinity +-- Note [negative zero] Avoid (0 / -d), otherwise 0/(-1) reduces to +-- zero, but we might want to preserve the negative zero here which +-- is representable in Float/Double but not in (normalised) +-- Rational. (#3676) Perhaps we should generate (0 :% (-1)) instead? + +strengthReduction :: Literal -> PrimOp -> RuleM CoreExpr +strengthReduction two_lit add_op = do -- Note [Strength reduction] + arg <- msum [ do [arg, Lit mult_lit] <- getArgs + guard (mult_lit == two_lit) + return arg + , do [Lit mult_lit, arg] <- getArgs + guard (mult_lit == two_lit) + return arg ] + return $ Var (mkPrimOpId add_op) `App` arg `App` arg + +-- Note [Strength reduction] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- This rule turns floating point multiplications of the form 2.0 * x and +-- x * 2.0 into x + x addition, because addition costs less than multiplication. +-- See #7116 + +-- Note [What's true and false] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- trueValInt and falseValInt represent true and false values returned by +-- comparison primops for Char, Int, Word, Integer, Double, Float and Addr. +-- True is represented as an unboxed 1# literal, while false is represented +-- as 0# literal. +-- We still need Bool data constructors (True and False) to use in a rule +-- for constant folding of equal Strings + +trueValInt, falseValInt :: DynFlags -> Expr CoreBndr +trueValInt dflags = Lit $ onei dflags -- see Note [What's true and false] +falseValInt dflags = Lit $ zeroi dflags + +trueValBool, falseValBool :: Expr CoreBndr +trueValBool = Var trueDataConId -- see Note [What's true and false] +falseValBool = Var falseDataConId + +ltVal, eqVal, gtVal :: Expr CoreBndr +ltVal = Var ordLTDataConId +eqVal = Var ordEQDataConId +gtVal = Var ordGTDataConId + +mkIntVal :: DynFlags -> Integer -> Expr CoreBndr +mkIntVal dflags i = Lit (mkLitInt dflags i) +mkFloatVal :: DynFlags -> Rational -> Expr CoreBndr +mkFloatVal dflags f = Lit (convFloating dflags (LitFloat f)) +mkDoubleVal :: DynFlags -> Rational -> Expr CoreBndr +mkDoubleVal dflags d = Lit (convFloating dflags (LitDouble d)) + +matchPrimOpId :: PrimOp -> Id -> RuleM () +matchPrimOpId op id = do + op' <- liftMaybe $ isPrimOpId_maybe id + guard $ op == op' + +{- +************************************************************************ +* * +\subsection{Special rules for seq, tagToEnum, dataToTag} +* * +************************************************************************ + +Note [tagToEnum#] +~~~~~~~~~~~~~~~~~ +Nasty check to ensure that tagToEnum# is applied to a type that is an +enumeration TyCon. Unification may refine the type later, but this +check won't see that, alas. It's crude but it works. + +Here's are two cases that should fail + f :: forall a. a + f = tagToEnum# 0 -- Can't do tagToEnum# at a type variable + + g :: Int + g = tagToEnum# 0 -- Int is not an enumeration + +We used to make this check in the type inference engine, but it's quite +ugly to do so, because the delayed constraint solving means that we don't +really know what's going on until the end. It's very much a corner case +because we don't expect the user to call tagToEnum# at all; we merely +generate calls in derived instances of Enum. So we compromise: a +rewrite rule rewrites a bad instance of tagToEnum# to an error call, +and emits a warning. +-} + +tagToEnumRule :: RuleM CoreExpr +-- If data T a = A | B | C +-- then tagToEnum# (T ty) 2# --> B ty +tagToEnumRule = do + [Type ty, Lit (LitNumber LitNumInt i _)] <- getArgs + case splitTyConApp_maybe ty of + Just (tycon, tc_args) | isEnumerationTyCon tycon -> do + let tag = fromInteger i + correct_tag dc = (dataConTagZ dc) == tag + (dc:rest) <- return $ filter correct_tag (tyConDataCons_maybe tycon `orElse` []) + ASSERT(null rest) return () + return $ mkTyApps (Var (dataConWorkId dc)) tc_args + + -- See Note [tagToEnum#] + _ -> WARN( True, text "tagToEnum# on non-enumeration type" <+> ppr ty ) + return $ mkRuntimeErrorApp rUNTIME_ERROR_ID ty "tagToEnum# on non-enumeration type" + +------------------------------ +dataToTagRule :: RuleM CoreExpr +-- See Note [dataToTag#] in primops.txt.pp +dataToTagRule = a `mplus` b + where + -- dataToTag (tagToEnum x) ==> x + a = do + [Type ty1, Var tag_to_enum `App` Type ty2 `App` tag] <- getArgs + guard $ tag_to_enum `hasKey` tagToEnumKey + guard $ ty1 `eqType` ty2 + return tag + + -- dataToTag (K e1 e2) ==> tag-of K + -- This also works (via exprIsConApp_maybe) for + -- dataToTag x + -- where x's unfolding is a constructor application + b = do + dflags <- getDynFlags + [_, val_arg] <- getArgs + in_scope <- getInScopeEnv + (_,floats, dc,_,_) <- liftMaybe $ exprIsConApp_maybe in_scope val_arg + ASSERT( not (isNewTyCon (dataConTyCon dc)) ) return () + return $ wrapFloats floats (mkIntVal dflags (toInteger (dataConTagZ dc))) + +{- Note [dataToTag# magic] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +The primop dataToTag# is unusual because it evaluates its argument. +Only `SeqOp` shares that property. (Other primops do not do anything +as fancy as argument evaluation.) The special handling for dataToTag# +is: + +* GHC.Core.Utils.exprOkForSpeculation has a special case for DataToTagOp, + (actually in app_ok). Most primops with lifted arguments do not + evaluate those arguments, but DataToTagOp and SeqOp are two + exceptions. We say that they are /never/ ok-for-speculation, + regardless of the evaluated-ness of their argument. + See GHC.Core.Utils Note [exprOkForSpeculation and SeqOp/DataToTagOp] + +* There is a special case for DataToTagOp in GHC.StgToCmm.Expr.cgExpr, + that evaluates its argument and then extracts the tag from + the returned value. + +* An application like (dataToTag# (Just x)) is optimised by + dataToTagRule in GHC.Core.Op.ConstantFold. + +* A case expression like + case (dataToTag# e) of <alts> + gets transformed t + case e of <transformed alts> + by GHC.Core.Op.ConstantFold.caseRules; see Note [caseRules for dataToTag] + +See #15696 for a long saga. +-} + +{- ********************************************************************* +* * + unsafeEqualityProof +* * +********************************************************************* -} + +-- unsafeEqualityProof k t t ==> UnsafeRefl (Refl t) +-- That is, if the two types are equal, it's not unsafe! + +unsafeEqualityProofRule :: RuleM CoreExpr +unsafeEqualityProofRule + = do { [Type rep, Type t1, Type t2] <- getArgs + ; guard (t1 `eqType` t2) + ; fn <- getFunction + ; let (_, ue) = splitForAllTys (idType fn) + tc = tyConAppTyCon ue -- tycon: UnsafeEquality + (dc:_) = tyConDataCons tc -- data con: UnsafeRefl + -- UnsafeRefl :: forall (r :: RuntimeRep) (a :: TYPE r). + -- UnsafeEquality r a a + ; return (mkTyApps (Var (dataConWrapId dc)) [rep, t1]) } + + +{- ********************************************************************* +* * + Rules for seq# and spark# +* * +********************************************************************* -} + +{- Note [seq# magic] +~~~~~~~~~~~~~~~~~~~~ +The primop + seq# :: forall a s . a -> State# s -> (# State# s, a #) + +is /not/ the same as the Prelude function seq :: a -> b -> b +as you can see from its type. In fact, seq# is the implementation +mechanism for 'evaluate' + + evaluate :: a -> IO a + evaluate a = IO $ \s -> seq# a s + +The semantics of seq# is + * evaluate its first argument + * and return it + +Things to note + +* Why do we need a primop at all? That is, instead of + case seq# x s of (# x, s #) -> blah + why not instead say this? + case x of { DEFAULT -> blah) + + Reason (see #5129): if we saw + catch# (\s -> case x of { DEFAULT -> raiseIO# exn s }) handler + + then we'd drop the 'case x' because the body of the case is bottom + anyway. But we don't want to do that; the whole /point/ of + seq#/evaluate is to evaluate 'x' first in the IO monad. + + In short, we /always/ evaluate the first argument and never + just discard it. + +* Why return the value? So that we can control sharing of seq'd + values: in + let x = e in x `seq` ... x ... + We don't want to inline x, so better to represent it as + let x = e in case seq# x RW of (# _, x' #) -> ... x' ... + also it matches the type of rseq in the Eval monad. + +Implementing seq#. The compiler has magic for SeqOp in + +- GHC.Core.Op.ConstantFold.seqRule: eliminate (seq# <whnf> s) + +- GHC.StgToCmm.Expr.cgExpr, and cgCase: special case for seq# + +- GHC.Core.Utils.exprOkForSpeculation; + see Note [exprOkForSpeculation and SeqOp/DataToTagOp] in GHC.Core.Utils + +- Simplify.addEvals records evaluated-ness for the result; see + Note [Adding evaluatedness info to pattern-bound variables] + in GHC.Core.Op.Simplify +-} + +seqRule :: RuleM CoreExpr +seqRule = do + [Type ty_a, Type _ty_s, a, s] <- getArgs + guard $ exprIsHNF a + return $ mkCoreUbxTup [exprType s, ty_a] [s, a] + +-- spark# :: forall a s . a -> State# s -> (# State# s, a #) +sparkRule :: RuleM CoreExpr +sparkRule = seqRule -- reduce on HNF, just the same + -- XXX perhaps we shouldn't do this, because a spark eliminated by + -- this rule won't be counted as a dud at runtime? + +{- +************************************************************************ +* * +\subsection{Built in rules} +* * +************************************************************************ + +Note [Scoping for Builtin rules] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When compiling a (base-package) module that defines one of the +functions mentioned in the RHS of a built-in rule, there's a danger +that we'll see + + f = ...(eq String x).... + + ....and lower down... + + eqString = ... + +Then a rewrite would give + + f = ...(eqString x)... + ....and lower down... + eqString = ... + +and lo, eqString is not in scope. This only really matters when we +get to code generation. But the occurrence analyser does a GlomBinds +step when necessary, that does a new SCC analysis on the whole set of +bindings (see occurAnalysePgm), which sorts out the dependency, so all +is fine. +-} + +builtinRules :: [CoreRule] +-- Rules for non-primops that can't be expressed using a RULE pragma +builtinRules + = [BuiltinRule { ru_name = fsLit "AppendLitString", + ru_fn = unpackCStringFoldrName, + ru_nargs = 4, ru_try = match_append_lit }, + BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName, + ru_nargs = 2, ru_try = match_eq_string }, + BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName, + ru_nargs = 2, ru_try = \_ _ _ -> match_inline }, + BuiltinRule { ru_name = fsLit "MagicDict", ru_fn = idName magicDictId, + ru_nargs = 4, ru_try = \_ _ _ -> match_magicDict }, + + mkBasicRule unsafeEqualityProofName 3 unsafeEqualityProofRule, + + mkBasicRule divIntName 2 $ msum + [ nonZeroLit 1 >> binaryLit (intOp2 div) + , leftZero zeroi + , do + [arg, Lit (LitNumber LitNumInt d _)] <- getArgs + Just n <- return $ exactLog2 d + dflags <- getDynFlags + return $ Var (mkPrimOpId ISraOp) `App` arg `App` mkIntVal dflags n + ], + + mkBasicRule modIntName 2 $ msum + [ nonZeroLit 1 >> binaryLit (intOp2 mod) + , leftZero zeroi + , do + [arg, Lit (LitNumber LitNumInt d _)] <- getArgs + Just _ <- return $ exactLog2 d + dflags <- getDynFlags + return $ Var (mkPrimOpId AndIOp) + `App` arg `App` mkIntVal dflags (d - 1) + ] + ] + ++ builtinIntegerRules + ++ builtinNaturalRules +{-# NOINLINE builtinRules #-} +-- there is no benefit to inlining these yet, despite this, GHC produces +-- unfoldings for this regardless since the floated list entries look small. + +builtinIntegerRules :: [CoreRule] +builtinIntegerRules = + [rule_IntToInteger "smallInteger" smallIntegerName, + rule_WordToInteger "wordToInteger" wordToIntegerName, + rule_Int64ToInteger "int64ToInteger" int64ToIntegerName, + rule_Word64ToInteger "word64ToInteger" word64ToIntegerName, + rule_convert "integerToWord" integerToWordName mkWordLitWord, + rule_convert "integerToInt" integerToIntName mkIntLitInt, + rule_convert "integerToWord64" integerToWord64Name (\_ -> mkWord64LitWord64), + rule_convert "integerToInt64" integerToInt64Name (\_ -> mkInt64LitInt64), + rule_binop "plusInteger" plusIntegerName (+), + rule_binop "minusInteger" minusIntegerName (-), + rule_binop "timesInteger" timesIntegerName (*), + rule_unop "negateInteger" negateIntegerName negate, + rule_binop_Prim "eqInteger#" eqIntegerPrimName (==), + rule_binop_Prim "neqInteger#" neqIntegerPrimName (/=), + rule_unop "absInteger" absIntegerName abs, + rule_unop "signumInteger" signumIntegerName signum, + rule_binop_Prim "leInteger#" leIntegerPrimName (<=), + rule_binop_Prim "gtInteger#" gtIntegerPrimName (>), + rule_binop_Prim "ltInteger#" ltIntegerPrimName (<), + rule_binop_Prim "geInteger#" geIntegerPrimName (>=), + rule_binop_Ordering "compareInteger" compareIntegerName compare, + rule_encodeFloat "encodeFloatInteger" encodeFloatIntegerName mkFloatLitFloat, + rule_convert "floatFromInteger" floatFromIntegerName (\_ -> mkFloatLitFloat), + rule_encodeFloat "encodeDoubleInteger" encodeDoubleIntegerName mkDoubleLitDouble, + rule_decodeDouble "decodeDoubleInteger" decodeDoubleIntegerName, + rule_convert "doubleFromInteger" doubleFromIntegerName (\_ -> mkDoubleLitDouble), + rule_rationalTo "rationalToFloat" rationalToFloatName mkFloatExpr, + rule_rationalTo "rationalToDouble" rationalToDoubleName mkDoubleExpr, + rule_binop "gcdInteger" gcdIntegerName gcd, + rule_binop "lcmInteger" lcmIntegerName lcm, + rule_binop "andInteger" andIntegerName (.&.), + rule_binop "orInteger" orIntegerName (.|.), + rule_binop "xorInteger" xorIntegerName xor, + rule_unop "complementInteger" complementIntegerName complement, + rule_shift_op "shiftLInteger" shiftLIntegerName shiftL, + rule_shift_op "shiftRInteger" shiftRIntegerName shiftR, + rule_bitInteger "bitInteger" bitIntegerName, + -- See Note [Integer division constant folding] in libraries/base/GHC/Real.hs + rule_divop_one "quotInteger" quotIntegerName quot, + rule_divop_one "remInteger" remIntegerName rem, + rule_divop_one "divInteger" divIntegerName div, + rule_divop_one "modInteger" modIntegerName mod, + rule_divop_both "divModInteger" divModIntegerName divMod, + rule_divop_both "quotRemInteger" quotRemIntegerName quotRem, + -- These rules below don't actually have to be built in, but if we + -- put them in the Haskell source then we'd have to duplicate them + -- between all Integer implementations + rule_XToIntegerToX "smallIntegerToInt" integerToIntName smallIntegerName, + rule_XToIntegerToX "wordToIntegerToWord" integerToWordName wordToIntegerName, + rule_XToIntegerToX "int64ToIntegerToInt64" integerToInt64Name int64ToIntegerName, + rule_XToIntegerToX "word64ToIntegerToWord64" integerToWord64Name word64ToIntegerName, + rule_smallIntegerTo "smallIntegerToWord" integerToWordName Int2WordOp, + rule_smallIntegerTo "smallIntegerToFloat" floatFromIntegerName Int2FloatOp, + rule_smallIntegerTo "smallIntegerToDouble" doubleFromIntegerName Int2DoubleOp + ] + where rule_convert str name convert + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_Integer_convert convert } + rule_IntToInteger str name + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_IntToInteger } + rule_WordToInteger str name + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_WordToInteger } + rule_Int64ToInteger str name + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_Int64ToInteger } + rule_Word64ToInteger str name + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_Word64ToInteger } + rule_unop str name op + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_Integer_unop op } + rule_bitInteger str name + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_bitInteger } + rule_binop str name op + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 2, + ru_try = match_Integer_binop op } + rule_divop_both str name op + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 2, + ru_try = match_Integer_divop_both op } + rule_divop_one str name op + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 2, + ru_try = match_Integer_divop_one op } + rule_shift_op str name op + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 2, + ru_try = match_Integer_shift_op op } + rule_binop_Prim str name op + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 2, + ru_try = match_Integer_binop_Prim op } + rule_binop_Ordering str name op + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 2, + ru_try = match_Integer_binop_Ordering op } + rule_encodeFloat str name op + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 2, + ru_try = match_Integer_Int_encodeFloat op } + rule_decodeDouble str name + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_decodeDouble } + rule_XToIntegerToX str name toIntegerName + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_XToIntegerToX toIntegerName } + rule_smallIntegerTo str name primOp + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_smallIntegerTo primOp } + rule_rationalTo str name mkLit + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 2, + ru_try = match_rationalTo mkLit } + +builtinNaturalRules :: [CoreRule] +builtinNaturalRules = + [rule_binop "plusNatural" plusNaturalName (+) + ,rule_partial_binop "minusNatural" minusNaturalName (\a b -> if a >= b then Just (a - b) else Nothing) + ,rule_binop "timesNatural" timesNaturalName (*) + ,rule_NaturalFromInteger "naturalFromInteger" naturalFromIntegerName + ,rule_NaturalToInteger "naturalToInteger" naturalToIntegerName + ,rule_WordToNatural "wordToNatural" wordToNaturalName + ] + where rule_binop str name op + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 2, + ru_try = match_Natural_binop op } + rule_partial_binop str name op + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 2, + ru_try = match_Natural_partial_binop op } + rule_NaturalToInteger str name + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_NaturalToInteger } + rule_NaturalFromInteger str name + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_NaturalFromInteger } + rule_WordToNatural str name + = BuiltinRule { ru_name = fsLit str, ru_fn = name, ru_nargs = 1, + ru_try = match_WordToNatural } + +--------------------------------------------------- +-- The rule is this: +-- unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n) +-- = unpackFoldrCString# "foobaz" c n + +match_append_lit :: RuleFun +match_append_lit _ id_unf _ + [ Type ty1 + , lit1 + , c1 + , e2 + ] + -- N.B. Ensure that we strip off any ticks (e.g. source notes) from the + -- `lit` and `c` arguments, lest this may fail to fire when building with + -- -g3. See #16740. + | (strTicks, Var unpk `App` Type ty2 + `App` lit2 + `App` c2 + `App` n) <- stripTicksTop tickishFloatable e2 + , unpk `hasKey` unpackCStringFoldrIdKey + , cheapEqExpr' tickishFloatable c1 c2 + , (c1Ticks, c1') <- stripTicksTop tickishFloatable c1 + , c2Ticks <- stripTicksTopT tickishFloatable c2 + , Just (LitString s1) <- exprIsLiteral_maybe id_unf lit1 + , Just (LitString s2) <- exprIsLiteral_maybe id_unf lit2 + = ASSERT( ty1 `eqType` ty2 ) + Just $ mkTicks strTicks + $ Var unpk `App` Type ty1 + `App` Lit (LitString (s1 `BS.append` s2)) + `App` mkTicks (c1Ticks ++ c2Ticks) c1' + `App` n + +match_append_lit _ _ _ _ = Nothing + +--------------------------------------------------- +-- The rule is this: +-- eqString (unpackCString# (Lit s1)) (unpackCString# (Lit s2)) = s1==s2 + +match_eq_string :: RuleFun +match_eq_string _ id_unf _ + [Var unpk1 `App` lit1, Var unpk2 `App` lit2] + | unpk1 `hasKey` unpackCStringIdKey + , unpk2 `hasKey` unpackCStringIdKey + , Just (LitString s1) <- exprIsLiteral_maybe id_unf lit1 + , Just (LitString s2) <- exprIsLiteral_maybe id_unf lit2 + = Just (if s1 == s2 then trueValBool else falseValBool) + +match_eq_string _ _ _ _ = Nothing + + +--------------------------------------------------- +-- The rule is this: +-- inline f_ty (f a b c) = <f's unfolding> a b c +-- (if f has an unfolding, EVEN if it's a loop breaker) +-- +-- It's important to allow the argument to 'inline' to have args itself +-- (a) because its more forgiving to allow the programmer to write +-- inline f a b c +-- or inline (f a b c) +-- (b) because a polymorphic f wll get a type argument that the +-- programmer can't avoid +-- +-- Also, don't forget about 'inline's type argument! +match_inline :: [Expr CoreBndr] -> Maybe (Expr CoreBndr) +match_inline (Type _ : e : _) + | (Var f, args1) <- collectArgs e, + Just unf <- maybeUnfoldingTemplate (realIdUnfolding f) + -- Ignore the IdUnfoldingFun here! + = Just (mkApps unf args1) + +match_inline _ = Nothing + + +-- See Note [magicDictId magic] in `basicTypes/MkId.hs` +-- for a description of what is going on here. +match_magicDict :: [Expr CoreBndr] -> Maybe (Expr CoreBndr) +match_magicDict [Type _, Var wrap `App` Type a `App` Type _ `App` f, x, y ] + | Just (fieldTy, _) <- splitFunTy_maybe $ dropForAlls $ idType wrap + , Just (dictTy, _) <- splitFunTy_maybe fieldTy + , Just dictTc <- tyConAppTyCon_maybe dictTy + , Just (_,_,co) <- unwrapNewTyCon_maybe dictTc + = Just + $ f `App` Cast x (mkSymCo (mkUnbranchedAxInstCo Representational co [a] [])) + `App` y + +match_magicDict _ = Nothing + +------------------------------------------------- +-- Integer rules +-- smallInteger (79::Int#) = 79::Integer +-- wordToInteger (79::Word#) = 79::Integer +-- Similarly Int64, Word64 + +match_IntToInteger :: RuleFun +match_IntToInteger = match_IntToInteger_unop id + +match_WordToInteger :: RuleFun +match_WordToInteger _ id_unf id [xl] + | Just (LitNumber LitNumWord x _) <- exprIsLiteral_maybe id_unf xl + = case splitFunTy_maybe (idType id) of + Just (_, integerTy) -> + Just (Lit (mkLitInteger x integerTy)) + _ -> + panic "match_WordToInteger: Id has the wrong type" +match_WordToInteger _ _ _ _ = Nothing + +match_Int64ToInteger :: RuleFun +match_Int64ToInteger _ id_unf id [xl] + | Just (LitNumber LitNumInt64 x _) <- exprIsLiteral_maybe id_unf xl + = case splitFunTy_maybe (idType id) of + Just (_, integerTy) -> + Just (Lit (mkLitInteger x integerTy)) + _ -> + panic "match_Int64ToInteger: Id has the wrong type" +match_Int64ToInteger _ _ _ _ = Nothing + +match_Word64ToInteger :: RuleFun +match_Word64ToInteger _ id_unf id [xl] + | Just (LitNumber LitNumWord64 x _) <- exprIsLiteral_maybe id_unf xl + = case splitFunTy_maybe (idType id) of + Just (_, integerTy) -> + Just (Lit (mkLitInteger x integerTy)) + _ -> + panic "match_Word64ToInteger: Id has the wrong type" +match_Word64ToInteger _ _ _ _ = Nothing + +match_NaturalToInteger :: RuleFun +match_NaturalToInteger _ id_unf id [xl] + | Just (LitNumber LitNumNatural x _) <- exprIsLiteral_maybe id_unf xl + = case splitFunTy_maybe (idType id) of + Just (_, naturalTy) -> + Just (Lit (LitNumber LitNumInteger x naturalTy)) + _ -> + panic "match_NaturalToInteger: Id has the wrong type" +match_NaturalToInteger _ _ _ _ = Nothing + +match_NaturalFromInteger :: RuleFun +match_NaturalFromInteger _ id_unf id [xl] + | Just (LitNumber LitNumInteger x _) <- exprIsLiteral_maybe id_unf xl + , x >= 0 + = case splitFunTy_maybe (idType id) of + Just (_, naturalTy) -> + Just (Lit (LitNumber LitNumNatural x naturalTy)) + _ -> + panic "match_NaturalFromInteger: Id has the wrong type" +match_NaturalFromInteger _ _ _ _ = Nothing + +match_WordToNatural :: RuleFun +match_WordToNatural _ id_unf id [xl] + | Just (LitNumber LitNumWord x _) <- exprIsLiteral_maybe id_unf xl + = case splitFunTy_maybe (idType id) of + Just (_, naturalTy) -> + Just (Lit (LitNumber LitNumNatural x naturalTy)) + _ -> + panic "match_WordToNatural: Id has the wrong type" +match_WordToNatural _ _ _ _ = Nothing + +------------------------------------------------- +{- Note [Rewriting bitInteger] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For most types the bitInteger operation can be implemented in terms of shifts. +The integer-gmp package, however, can do substantially better than this if +allowed to provide its own implementation. However, in so doing it previously lost +constant-folding (see #8832). The bitInteger rule above provides constant folding +specifically for this function. + +There is, however, a bit of trickiness here when it comes to ranges. While the +AST encodes all integers as Integers, `bit` expects the bit +index to be given as an Int. Hence we coerce to an Int in the rule definition. +This will behave a bit funny for constants larger than the word size, but the user +should expect some funniness given that they will have at very least ignored a +warning in this case. +-} + +match_bitInteger :: RuleFun +-- Just for GHC.Integer.Type.bitInteger :: Int# -> Integer +match_bitInteger dflags id_unf fn [arg] + | Just (LitNumber LitNumInt x _) <- exprIsLiteral_maybe id_unf arg + , x >= 0 + , x <= (wordSizeInBits dflags - 1) + -- Make sure x is small enough to yield a decently small integer + -- Attempting to construct the Integer for + -- (bitInteger 9223372036854775807#) + -- would be a bad idea (#14959) + , let x_int = fromIntegral x :: Int + = case splitFunTy_maybe (idType fn) of + Just (_, integerTy) + -> Just (Lit (LitNumber LitNumInteger (bit x_int) integerTy)) + _ -> panic "match_IntToInteger_unop: Id has the wrong type" + +match_bitInteger _ _ _ _ = Nothing + + +------------------------------------------------- +match_Integer_convert :: Num a + => (DynFlags -> a -> Expr CoreBndr) + -> RuleFun +match_Integer_convert convert dflags id_unf _ [xl] + | Just (LitNumber LitNumInteger x _) <- exprIsLiteral_maybe id_unf xl + = Just (convert dflags (fromInteger x)) +match_Integer_convert _ _ _ _ _ = Nothing + +match_Integer_unop :: (Integer -> Integer) -> RuleFun +match_Integer_unop unop _ id_unf _ [xl] + | Just (LitNumber LitNumInteger x i) <- exprIsLiteral_maybe id_unf xl + = Just (Lit (LitNumber LitNumInteger (unop x) i)) +match_Integer_unop _ _ _ _ _ = Nothing + +match_IntToInteger_unop :: (Integer -> Integer) -> RuleFun +match_IntToInteger_unop unop _ id_unf fn [xl] + | Just (LitNumber LitNumInt x _) <- exprIsLiteral_maybe id_unf xl + = case splitFunTy_maybe (idType fn) of + Just (_, integerTy) -> + Just (Lit (LitNumber LitNumInteger (unop x) integerTy)) + _ -> + panic "match_IntToInteger_unop: Id has the wrong type" +match_IntToInteger_unop _ _ _ _ _ = Nothing + +match_Integer_binop :: (Integer -> Integer -> Integer) -> RuleFun +match_Integer_binop binop _ id_unf _ [xl,yl] + | Just (LitNumber LitNumInteger x i) <- exprIsLiteral_maybe id_unf xl + , Just (LitNumber LitNumInteger y _) <- exprIsLiteral_maybe id_unf yl + = Just (Lit (mkLitInteger (x `binop` y) i)) +match_Integer_binop _ _ _ _ _ = Nothing + +match_Natural_binop :: (Integer -> Integer -> Integer) -> RuleFun +match_Natural_binop binop _ id_unf _ [xl,yl] + | Just (LitNumber LitNumNatural x i) <- exprIsLiteral_maybe id_unf xl + , Just (LitNumber LitNumNatural y _) <- exprIsLiteral_maybe id_unf yl + = Just (Lit (mkLitNatural (x `binop` y) i)) +match_Natural_binop _ _ _ _ _ = Nothing + +match_Natural_partial_binop :: (Integer -> Integer -> Maybe Integer) -> RuleFun +match_Natural_partial_binop binop _ id_unf _ [xl,yl] + | Just (LitNumber LitNumNatural x i) <- exprIsLiteral_maybe id_unf xl + , Just (LitNumber LitNumNatural y _) <- exprIsLiteral_maybe id_unf yl + , Just z <- x `binop` y + = Just (Lit (mkLitNatural z i)) +match_Natural_partial_binop _ _ _ _ _ = Nothing + +-- This helper is used for the quotRem and divMod functions +match_Integer_divop_both + :: (Integer -> Integer -> (Integer, Integer)) -> RuleFun +match_Integer_divop_both divop _ id_unf _ [xl,yl] + | Just (LitNumber LitNumInteger x t) <- exprIsLiteral_maybe id_unf xl + , Just (LitNumber LitNumInteger y _) <- exprIsLiteral_maybe id_unf yl + , y /= 0 + , (r,s) <- x `divop` y + = Just $ mkCoreUbxTup [t,t] [Lit (mkLitInteger r t), Lit (mkLitInteger s t)] +match_Integer_divop_both _ _ _ _ _ = Nothing + +-- This helper is used for the quot and rem functions +match_Integer_divop_one :: (Integer -> Integer -> Integer) -> RuleFun +match_Integer_divop_one divop _ id_unf _ [xl,yl] + | Just (LitNumber LitNumInteger x i) <- exprIsLiteral_maybe id_unf xl + , Just (LitNumber LitNumInteger y _) <- exprIsLiteral_maybe id_unf yl + , y /= 0 + = Just (Lit (mkLitInteger (x `divop` y) i)) +match_Integer_divop_one _ _ _ _ _ = Nothing + +match_Integer_shift_op :: (Integer -> Int -> Integer) -> RuleFun +-- Used for shiftLInteger, shiftRInteger :: Integer -> Int# -> Integer +-- See Note [Guarding against silly shifts] +match_Integer_shift_op binop _ id_unf _ [xl,yl] + | Just (LitNumber LitNumInteger x i) <- exprIsLiteral_maybe id_unf xl + , Just (LitNumber LitNumInt y _) <- exprIsLiteral_maybe id_unf yl + , y >= 0 + , y <= 4 -- Restrict constant-folding of shifts on Integers, somewhat + -- arbitrary. We can get huge shifts in inaccessible code + -- (#15673) + = Just (Lit (mkLitInteger (x `binop` fromIntegral y) i)) +match_Integer_shift_op _ _ _ _ _ = Nothing + +match_Integer_binop_Prim :: (Integer -> Integer -> Bool) -> RuleFun +match_Integer_binop_Prim binop dflags id_unf _ [xl, yl] + | Just (LitNumber LitNumInteger x _) <- exprIsLiteral_maybe id_unf xl + , Just (LitNumber LitNumInteger y _) <- exprIsLiteral_maybe id_unf yl + = Just (if x `binop` y then trueValInt dflags else falseValInt dflags) +match_Integer_binop_Prim _ _ _ _ _ = Nothing + +match_Integer_binop_Ordering :: (Integer -> Integer -> Ordering) -> RuleFun +match_Integer_binop_Ordering binop _ id_unf _ [xl, yl] + | Just (LitNumber LitNumInteger x _) <- exprIsLiteral_maybe id_unf xl + , Just (LitNumber LitNumInteger y _) <- exprIsLiteral_maybe id_unf yl + = Just $ case x `binop` y of + LT -> ltVal + EQ -> eqVal + GT -> gtVal +match_Integer_binop_Ordering _ _ _ _ _ = Nothing + +match_Integer_Int_encodeFloat :: RealFloat a + => (a -> Expr CoreBndr) + -> RuleFun +match_Integer_Int_encodeFloat mkLit _ id_unf _ [xl,yl] + | Just (LitNumber LitNumInteger x _) <- exprIsLiteral_maybe id_unf xl + , Just (LitNumber LitNumInt y _) <- exprIsLiteral_maybe id_unf yl + = Just (mkLit $ encodeFloat x (fromInteger y)) +match_Integer_Int_encodeFloat _ _ _ _ _ = Nothing + +--------------------------------------------------- +-- constant folding for Float/Double +-- +-- This turns +-- rationalToFloat n d +-- into a literal Float, and similarly for Doubles. +-- +-- it's important to not match d == 0, because that may represent a +-- literal "0/0" or similar, and we can't produce a literal value for +-- NaN or +-Inf +match_rationalTo :: RealFloat a + => (a -> Expr CoreBndr) + -> RuleFun +match_rationalTo mkLit _ id_unf _ [xl, yl] + | Just (LitNumber LitNumInteger x _) <- exprIsLiteral_maybe id_unf xl + , Just (LitNumber LitNumInteger y _) <- exprIsLiteral_maybe id_unf yl + , y /= 0 + = Just (mkLit (fromRational (x % y))) +match_rationalTo _ _ _ _ _ = Nothing + +match_decodeDouble :: RuleFun +match_decodeDouble dflags id_unf fn [xl] + | Just (LitDouble x) <- exprIsLiteral_maybe id_unf xl + = case splitFunTy_maybe (idType fn) of + Just (_, res) + | Just [_lev1, _lev2, integerTy, intHashTy] <- tyConAppArgs_maybe res + -> case decodeFloat (fromRational x :: Double) of + (y, z) -> + Just $ mkCoreUbxTup [integerTy, intHashTy] + [Lit (mkLitInteger y integerTy), + Lit (mkLitInt dflags (toInteger z))] + _ -> + pprPanic "match_decodeDouble: Id has the wrong type" + (ppr fn <+> dcolon <+> ppr (idType fn)) +match_decodeDouble _ _ _ _ = Nothing + +match_XToIntegerToX :: Name -> RuleFun +match_XToIntegerToX n _ _ _ [App (Var x) y] + | idName x == n + = Just y +match_XToIntegerToX _ _ _ _ _ = Nothing + +match_smallIntegerTo :: PrimOp -> RuleFun +match_smallIntegerTo primOp _ _ _ [App (Var x) y] + | idName x == smallIntegerName + = Just $ App (Var (mkPrimOpId primOp)) y +match_smallIntegerTo _ _ _ _ _ = Nothing + + + +-------------------------------------------------------- +-- Note [Constant folding through nested expressions] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- We use rewrites rules to perform constant folding. It means that we don't +-- have a global view of the expression we are trying to optimise. As a +-- consequence we only perform local (small-step) transformations that either: +-- 1) reduce the number of operations +-- 2) rearrange the expression to increase the odds that other rules will +-- match +-- +-- We don't try to handle more complex expression optimisation cases that would +-- require a global view. For example, rewriting expressions to increase +-- sharing (e.g., Horner's method); optimisations that require local +-- transformations increasing the number of operations; rearrangements to +-- cancel/factorize terms (e.g., (a+b-a-b) isn't rearranged to reduce to 0). +-- +-- We already have rules to perform constant folding on expressions with the +-- following shape (where a and/or b are literals): +-- +-- D) op +-- /\ +-- / \ +-- / \ +-- a b +-- +-- To support nested expressions, we match three other shapes of expression +-- trees: +-- +-- A) op1 B) op1 C) op1 +-- /\ /\ /\ +-- / \ / \ / \ +-- / \ / \ / \ +-- a op2 op2 c op2 op3 +-- /\ /\ /\ /\ +-- / \ / \ / \ / \ +-- b c a b a b c d +-- +-- +-- R1) +/- simplification: +-- ops = + or -, two literals (not siblings) +-- +-- Examples: +-- A: 5 + (10-x) ==> 15-x +-- B: (10+x) + 5 ==> 15+x +-- C: (5+a)-(5-b) ==> 0+(a+b) +-- +-- R2) * simplification +-- ops = *, two literals (not siblings) +-- +-- Examples: +-- A: 5 * (10*x) ==> 50*x +-- B: (10*x) * 5 ==> 50*x +-- C: (5*a)*(5*b) ==> 25*(a*b) +-- +-- R3) * distribution over +/- +-- op1 = *, op2 = + or -, two literals (not siblings) +-- +-- This transformation doesn't reduce the number of operations but switches +-- the outer and the inner operations so that the outer is (+) or (-) instead +-- of (*). It increases the odds that other rules will match after this one. +-- +-- Examples: +-- A: 5 * (10-x) ==> 50 - (5*x) +-- B: (10+x) * 5 ==> 50 + (5*x) +-- C: Not supported as it would increase the number of operations: +-- (5+a)*(5-b) ==> 25 - 5*b + 5*a - a*b +-- +-- R4) Simple factorization +-- +-- op1 = + or -, op2/op3 = *, +-- one literal for each innermost * operation (except in the D case), +-- the two other terms are equals +-- +-- Examples: +-- A: x - (10*x) ==> (-9)*x +-- B: (10*x) + x ==> 11*x +-- C: (5*x)-(x*3) ==> 2*x +-- D: x+x ==> 2*x +-- +-- R5) +/- propagation +-- +-- ops = + or -, one literal +-- +-- This transformation doesn't reduce the number of operations but propagates +-- the constant to the outer level. It increases the odds that other rules +-- will match after this one. +-- +-- Examples: +-- A: x - (10-y) ==> (x+y) - 10 +-- B: (10+x) - y ==> 10 + (x-y) +-- C: N/A (caught by the A and B cases) +-- +-------------------------------------------------------- + +-- | Rules to perform constant folding into nested expressions +-- +--See Note [Constant folding through nested expressions] +numFoldingRules :: PrimOp -> (DynFlags -> PrimOps) -> RuleM CoreExpr +numFoldingRules op dict = do + [e1,e2] <- getArgs + dflags <- getDynFlags + let PrimOps{..} = dict dflags + if not (gopt Opt_NumConstantFolding dflags) + then mzero + else case BinOpApp e1 op e2 of + -- R1) +/- simplification + x :++: (y :++: v) -> return $ mkL (x+y) `add` v + x :++: (L y :-: v) -> return $ mkL (x+y) `sub` v + x :++: (v :-: L y) -> return $ mkL (x-y) `add` v + L x :-: (y :++: v) -> return $ mkL (x-y) `sub` v + L x :-: (L y :-: v) -> return $ mkL (x-y) `add` v + L x :-: (v :-: L y) -> return $ mkL (x+y) `sub` v + + (y :++: v) :-: L x -> return $ mkL (y-x) `add` v + (L y :-: v) :-: L x -> return $ mkL (y-x) `sub` v + (v :-: L y) :-: L x -> return $ mkL (0-y-x) `add` v + + (x :++: w) :+: (y :++: v) -> return $ mkL (x+y) `add` (w `add` v) + (w :-: L x) :+: (L y :-: v) -> return $ mkL (y-x) `add` (w `sub` v) + (w :-: L x) :+: (v :-: L y) -> return $ mkL (0-x-y) `add` (w `add` v) + (L x :-: w) :+: (L y :-: v) -> return $ mkL (x+y) `sub` (w `add` v) + (L x :-: w) :+: (v :-: L y) -> return $ mkL (x-y) `add` (v `sub` w) + (w :-: L x) :+: (y :++: v) -> return $ mkL (y-x) `add` (w `add` v) + (L x :-: w) :+: (y :++: v) -> return $ mkL (x+y) `add` (v `sub` w) + (y :++: v) :+: (w :-: L x) -> return $ mkL (y-x) `add` (w `add` v) + (y :++: v) :+: (L x :-: w) -> return $ mkL (x+y) `add` (v `sub` w) + + (v :-: L y) :-: (w :-: L x) -> return $ mkL (x-y) `add` (v `sub` w) + (v :-: L y) :-: (L x :-: w) -> return $ mkL (0-x-y) `add` (v `add` w) + (L y :-: v) :-: (w :-: L x) -> return $ mkL (x+y) `sub` (v `add` w) + (L y :-: v) :-: (L x :-: w) -> return $ mkL (y-x) `add` (w `sub` v) + (x :++: w) :-: (y :++: v) -> return $ mkL (x-y) `add` (w `sub` v) + (w :-: L x) :-: (y :++: v) -> return $ mkL (0-y-x) `add` (w `sub` v) + (L x :-: w) :-: (y :++: v) -> return $ mkL (x-y) `sub` (v `add` w) + (y :++: v) :-: (w :-: L x) -> return $ mkL (y+x) `add` (v `sub` w) + (y :++: v) :-: (L x :-: w) -> return $ mkL (y-x) `add` (v `add` w) + + -- R2) * simplification + x :**: (y :**: v) -> return $ mkL (x*y) `mul` v + (x :**: w) :*: (y :**: v) -> return $ mkL (x*y) `mul` (w `mul` v) + + -- R3) * distribution over +/- + x :**: (y :++: v) -> return $ mkL (x*y) `add` (mkL x `mul` v) + x :**: (L y :-: v) -> return $ mkL (x*y) `sub` (mkL x `mul` v) + x :**: (v :-: L y) -> return $ (mkL x `mul` v) `sub` mkL (x*y) + + -- R4) Simple factorization + v :+: w + | w `cheapEqExpr` v -> return $ mkL 2 `mul` v + w :+: (y :**: v) + | w `cheapEqExpr` v -> return $ mkL (1+y) `mul` v + w :-: (y :**: v) + | w `cheapEqExpr` v -> return $ mkL (1-y) `mul` v + (y :**: v) :+: w + | w `cheapEqExpr` v -> return $ mkL (y+1) `mul` v + (y :**: v) :-: w + | w `cheapEqExpr` v -> return $ mkL (y-1) `mul` v + (x :**: w) :+: (y :**: v) + | w `cheapEqExpr` v -> return $ mkL (x+y) `mul` v + (x :**: w) :-: (y :**: v) + | w `cheapEqExpr` v -> return $ mkL (x-y) `mul` v + + -- R5) +/- propagation + w :+: (y :++: v) -> return $ mkL y `add` (w `add` v) + (y :++: v) :+: w -> return $ mkL y `add` (w `add` v) + w :-: (y :++: v) -> return $ (w `sub` v) `sub` mkL y + (y :++: v) :-: w -> return $ mkL y `add` (v `sub` w) + w :-: (L y :-: v) -> return $ (w `add` v) `sub` mkL y + (L y :-: v) :-: w -> return $ mkL y `sub` (w `add` v) + w :+: (L y :-: v) -> return $ mkL y `add` (w `sub` v) + w :+: (v :-: L y) -> return $ (w `add` v) `sub` mkL y + (L y :-: v) :+: w -> return $ mkL y `add` (w `sub` v) + (v :-: L y) :+: w -> return $ (w `add` v) `sub` mkL y + + _ -> mzero + + + +-- | Match the application of a binary primop +pattern BinOpApp :: Arg CoreBndr -> PrimOp -> Arg CoreBndr -> CoreExpr +pattern BinOpApp x op y = OpVal op `App` x `App` y + +-- | Match a primop +pattern OpVal :: PrimOp -> Arg CoreBndr +pattern OpVal op <- Var (isPrimOpId_maybe -> Just op) where + OpVal op = Var (mkPrimOpId op) + + + +-- | Match a literal +pattern L :: Integer -> Arg CoreBndr +pattern L l <- Lit (isLitValue_maybe -> Just l) + +-- | Match an addition +pattern (:+:) :: Arg CoreBndr -> Arg CoreBndr -> CoreExpr +pattern x :+: y <- BinOpApp x (isAddOp -> True) y + +-- | Match an addition with a literal (handle commutativity) +pattern (:++:) :: Integer -> Arg CoreBndr -> CoreExpr +pattern l :++: x <- (isAdd -> Just (l,x)) + +isAdd :: CoreExpr -> Maybe (Integer,CoreExpr) +isAdd e = case e of + L l :+: x -> Just (l,x) + x :+: L l -> Just (l,x) + _ -> Nothing + +-- | Match a multiplication +pattern (:*:) :: Arg CoreBndr -> Arg CoreBndr -> CoreExpr +pattern x :*: y <- BinOpApp x (isMulOp -> True) y + +-- | Match a multiplication with a literal (handle commutativity) +pattern (:**:) :: Integer -> Arg CoreBndr -> CoreExpr +pattern l :**: x <- (isMul -> Just (l,x)) + +isMul :: CoreExpr -> Maybe (Integer,CoreExpr) +isMul e = case e of + L l :*: x -> Just (l,x) + x :*: L l -> Just (l,x) + _ -> Nothing + + +-- | Match a subtraction +pattern (:-:) :: Arg CoreBndr -> Arg CoreBndr -> CoreExpr +pattern x :-: y <- BinOpApp x (isSubOp -> True) y + +isSubOp :: PrimOp -> Bool +isSubOp IntSubOp = True +isSubOp WordSubOp = True +isSubOp _ = False + +isAddOp :: PrimOp -> Bool +isAddOp IntAddOp = True +isAddOp WordAddOp = True +isAddOp _ = False + +isMulOp :: PrimOp -> Bool +isMulOp IntMulOp = True +isMulOp WordMulOp = True +isMulOp _ = False + +-- | Explicit "type-class"-like dictionary for numeric primops +-- +-- Depends on DynFlags because creating a literal value depends on DynFlags +data PrimOps = PrimOps + { add :: CoreExpr -> CoreExpr -> CoreExpr -- ^ Add two numbers + , sub :: CoreExpr -> CoreExpr -> CoreExpr -- ^ Sub two numbers + , mul :: CoreExpr -> CoreExpr -> CoreExpr -- ^ Multiply two numbers + , mkL :: Integer -> CoreExpr -- ^ Create a literal value + } + +intPrimOps :: DynFlags -> PrimOps +intPrimOps dflags = PrimOps + { add = \x y -> BinOpApp x IntAddOp y + , sub = \x y -> BinOpApp x IntSubOp y + , mul = \x y -> BinOpApp x IntMulOp y + , mkL = intResult' dflags + } + +wordPrimOps :: DynFlags -> PrimOps +wordPrimOps dflags = PrimOps + { add = \x y -> BinOpApp x WordAddOp y + , sub = \x y -> BinOpApp x WordSubOp y + , mul = \x y -> BinOpApp x WordMulOp y + , mkL = wordResult' dflags + } + + +-------------------------------------------------------- +-- Constant folding through case-expressions +-- +-- cf Scrutinee Constant Folding in simplCore/GHC.Core.Op.Simplify.Utils +-------------------------------------------------------- + +-- | Match the scrutinee of a case and potentially return a new scrutinee and a +-- function to apply to each literal alternative. +caseRules :: DynFlags + -> CoreExpr -- Scrutinee + -> Maybe ( CoreExpr -- New scrutinee + , AltCon -> Maybe AltCon -- How to fix up the alt pattern + -- Nothing <=> Unreachable + -- See Note [Unreachable caseRules alternatives] + , Id -> CoreExpr) -- How to reconstruct the original scrutinee + -- from the new case-binder +-- e.g case e of b { +-- ...; +-- con bs -> rhs; +-- ... } +-- ==> +-- case e' of b' { +-- ...; +-- fixup_altcon[con] bs -> let b = mk_orig[b] in rhs; +-- ... } + +caseRules dflags (App (App (Var f) v) (Lit l)) -- v `op` x# + | Just op <- isPrimOpId_maybe f + , Just x <- isLitValue_maybe l + , Just adjust_lit <- adjustDyadicRight op x + = Just (v, tx_lit_con dflags adjust_lit + , \v -> (App (App (Var f) (Var v)) (Lit l))) + +caseRules dflags (App (App (Var f) (Lit l)) v) -- x# `op` v + | Just op <- isPrimOpId_maybe f + , Just x <- isLitValue_maybe l + , Just adjust_lit <- adjustDyadicLeft x op + = Just (v, tx_lit_con dflags adjust_lit + , \v -> (App (App (Var f) (Lit l)) (Var v))) + + +caseRules dflags (App (Var f) v ) -- op v + | Just op <- isPrimOpId_maybe f + , Just adjust_lit <- adjustUnary op + = Just (v, tx_lit_con dflags adjust_lit + , \v -> App (Var f) (Var v)) + +-- See Note [caseRules for tagToEnum] +caseRules dflags (App (App (Var f) type_arg) v) + | Just TagToEnumOp <- isPrimOpId_maybe f + = Just (v, tx_con_tte dflags + , \v -> (App (App (Var f) type_arg) (Var v))) + +-- See Note [caseRules for dataToTag] +caseRules _ (App (App (Var f) (Type ty)) v) -- dataToTag x + | Just DataToTagOp <- isPrimOpId_maybe f + , Just (tc, _) <- tcSplitTyConApp_maybe ty + , isAlgTyCon tc + = Just (v, tx_con_dtt ty + , \v -> App (App (Var f) (Type ty)) (Var v)) + +caseRules _ _ = Nothing + + +tx_lit_con :: DynFlags -> (Integer -> Integer) -> AltCon -> Maybe AltCon +tx_lit_con _ _ DEFAULT = Just DEFAULT +tx_lit_con dflags adjust (LitAlt l) = Just $ LitAlt (mapLitValue dflags adjust l) +tx_lit_con _ _ alt = pprPanic "caseRules" (ppr alt) + -- NB: mapLitValue uses mkLitIntWrap etc, to ensure that the + -- literal alternatives remain in Word/Int target ranges + -- (See Note [Word/Int underflow/overflow] in Literal and #13172). + +adjustDyadicRight :: PrimOp -> Integer -> Maybe (Integer -> Integer) +-- Given (x `op` lit) return a function 'f' s.t. f (x `op` lit) = x +adjustDyadicRight op lit + = case op of + WordAddOp -> Just (\y -> y-lit ) + IntAddOp -> Just (\y -> y-lit ) + WordSubOp -> Just (\y -> y+lit ) + IntSubOp -> Just (\y -> y+lit ) + XorOp -> Just (\y -> y `xor` lit) + XorIOp -> Just (\y -> y `xor` lit) + _ -> Nothing + +adjustDyadicLeft :: Integer -> PrimOp -> Maybe (Integer -> Integer) +-- Given (lit `op` x) return a function 'f' s.t. f (lit `op` x) = x +adjustDyadicLeft lit op + = case op of + WordAddOp -> Just (\y -> y-lit ) + IntAddOp -> Just (\y -> y-lit ) + WordSubOp -> Just (\y -> lit-y ) + IntSubOp -> Just (\y -> lit-y ) + XorOp -> Just (\y -> y `xor` lit) + XorIOp -> Just (\y -> y `xor` lit) + _ -> Nothing + + +adjustUnary :: PrimOp -> Maybe (Integer -> Integer) +-- Given (op x) return a function 'f' s.t. f (op x) = x +adjustUnary op + = case op of + NotOp -> Just (\y -> complement y) + NotIOp -> Just (\y -> complement y) + IntNegOp -> Just (\y -> negate y ) + _ -> Nothing + +tx_con_tte :: DynFlags -> AltCon -> Maybe AltCon +tx_con_tte _ DEFAULT = Just DEFAULT +tx_con_tte _ alt@(LitAlt {}) = pprPanic "caseRules" (ppr alt) +tx_con_tte dflags (DataAlt dc) -- See Note [caseRules for tagToEnum] + = Just $ LitAlt $ mkLitInt dflags $ toInteger $ dataConTagZ dc + +tx_con_dtt :: Type -> AltCon -> Maybe AltCon +tx_con_dtt _ DEFAULT = Just DEFAULT +tx_con_dtt ty (LitAlt (LitNumber LitNumInt i _)) + | tag >= 0 + , tag < n_data_cons + = Just (DataAlt (data_cons !! tag)) -- tag is zero-indexed, as is (!!) + | otherwise + = Nothing + where + tag = fromInteger i :: ConTagZ + tc = tyConAppTyCon ty + n_data_cons = tyConFamilySize tc + data_cons = tyConDataCons tc + +tx_con_dtt _ alt = pprPanic "caseRules" (ppr alt) + + +{- Note [caseRules for tagToEnum] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We want to transform + case tagToEnum x of + False -> e1 + True -> e2 +into + case x of + 0# -> e1 + 1# -> e2 + +This rule eliminates a lot of boilerplate. For + if (x>y) then e2 else e1 +we generate + case tagToEnum (x ># y) of + False -> e1 + True -> e2 +and it is nice to then get rid of the tagToEnum. + +Beware (#14768): avoid the temptation to map constructor 0 to +DEFAULT, in the hope of getting this + case (x ># y) of + DEFAULT -> e1 + 1# -> e2 +That fails utterly in the case of + data Colour = Red | Green | Blue + case tagToEnum x of + DEFAULT -> e1 + Red -> e2 + +We don't want to get this! + case x of + DEFAULT -> e1 + DEFAULT -> e2 + +Instead, we deal with turning one branch into DEFAULT in GHC.Core.Op.Simplify.Utils +(add_default in mkCase3). + +Note [caseRules for dataToTag] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +See also Note [dataToTag#] in primpops.txt.pp + +We want to transform + case dataToTag x of + DEFAULT -> e1 + 1# -> e2 +into + case x of + DEFAULT -> e1 + (:) _ _ -> e2 + +Note the need for some wildcard binders in +the 'cons' case. + +For the time, we only apply this transformation when the type of `x` is a type +headed by a normal tycon. In particular, we do not apply this in the case of a +data family tycon, since that would require carefully applying coercion(s) +between the data family and the data family instance's representation type, +which caseRules isn't currently engineered to handle (#14680). + +Note [Unreachable caseRules alternatives] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Take care if we see something like + case dataToTag x of + DEFAULT -> e1 + -1# -> e2 + 100 -> e3 +because there isn't a data constructor with tag -1 or 100. In this case the +out-of-range alternative is dead code -- we know the range of tags for x. + +Hence caseRules returns (AltCon -> Maybe AltCon), with Nothing indicating +an alternative that is unreachable. + +You may wonder how this can happen: check out #15436. +-} diff --git a/compiler/GHC/Core/Op/CprAnal.hs b/compiler/GHC/Core/Op/CprAnal.hs new file mode 100644 index 0000000000..c8f7e314e9 --- /dev/null +++ b/compiler/GHC/Core/Op/CprAnal.hs @@ -0,0 +1,669 @@ +{-# LANGUAGE CPP #-} + +-- | Constructed Product Result analysis. Identifies functions that surely +-- return heap-allocated records on every code path, so that we can eliminate +-- said heap allocation by performing a worker/wrapper split. +-- +-- See https://www.microsoft.com/en-us/research/publication/constructed-product-result-analysis-haskell/. +-- CPR analysis should happen after strictness analysis. +-- See Note [Phase ordering]. +module GHC.Core.Op.CprAnal ( cprAnalProgram ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Core.Op.WorkWrap.Lib ( deepSplitProductType_maybe ) +import GHC.Driver.Session +import Demand +import Cpr +import GHC.Core +import GHC.Core.Seq +import Outputable +import VarEnv +import BasicTypes +import Data.List +import GHC.Core.DataCon +import Id +import IdInfo +import GHC.Core.Utils ( exprIsHNF, dumpIdInfoOfProgram ) +import GHC.Core.TyCon +import GHC.Core.Type +import GHC.Core.FamInstEnv +import Util +import ErrUtils ( dumpIfSet_dyn, DumpFormat (..) ) +import Maybes ( isJust, isNothing ) + +{- Note [Constructed Product Result] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The goal of Constructed Product Result analysis is to identify functions that +surely return heap-allocated records on every code path, so that we can +eliminate said heap allocation by performing a worker/wrapper split. + +@swap@ below is such a function: + + swap (a, b) = (b, a) + +A @case@ on an application of @swap@, like +@case swap (10, 42) of (a, b) -> a + b@ could cancel away +(by case-of-known-constructor) if we "inlined" @swap@ and simplified. We then +say that @swap@ has the CPR property. + +We can't inline recursive functions, but similar reasoning applies there: + + f x n = case n of + 0 -> (x, 0) + _ -> f (x+1) (n-1) + +Inductively, @case f 1 2 of (a, b) -> a + b@ could cancel away the constructed +product with the case. So @f@, too, has the CPR property. But we can't really +"inline" @f@, because it's recursive. Also, non-recursive functions like @swap@ +might be too big to inline (or even marked NOINLINE). We still want to exploit +the CPR property, and that is exactly what the worker/wrapper transformation +can do for us: + + $wf x n = case n of + 0 -> case (x, 0) of -> (a, b) -> (# a, b #) + _ -> case f (x+1) (n-1) of (a, b) -> (# a, b #) + f x n = case $wf x n of (# a, b #) -> (a, b) + +where $wf readily simplifies (by case-of-known-constructor and inlining @f@) to: + + $wf x n = case n of + 0 -> (# x, 0 #) + _ -> $wf (x+1) (n-1) + +Now, a call site like @case f 1 2 of (a, b) -> a + b@ can inline @f@ and +eliminate the heap-allocated pair constructor. + +Note [Phase ordering] +~~~~~~~~~~~~~~~~~~~~~ +We need to perform strictness analysis before CPR analysis, because that might +unbox some arguments, in turn leading to more constructed products. +Ideally, we would want the following pipeline: + +1. Strictness +2. worker/wrapper (for strictness) +3. CPR +4. worker/wrapper (for CPR) + +Currently, we omit 2. and anticipate the results of worker/wrapper. +See Note [CPR in a DataAlt case alternative] and Note [CPR for strict binders]. +An additional w/w pass would simplify things, but probably add slight overhead. +So currently we have + +1. Strictness +2. CPR +3. worker/wrapper (for strictness and CPR) +-} + +-- +-- * Analysing programs +-- + +cprAnalProgram :: DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram +cprAnalProgram dflags fam_envs binds = do + let env = emptyAnalEnv fam_envs + let binds_plus_cpr = snd $ mapAccumL cprAnalTopBind env binds + dumpIfSet_dyn dflags Opt_D_dump_cpr_signatures "Cpr signatures" FormatText $ + dumpIdInfoOfProgram (ppr . cprInfo) binds_plus_cpr + -- See Note [Stamp out space leaks in demand analysis] in GHC.Core.Op.DmdAnal + seqBinds binds_plus_cpr `seq` return binds_plus_cpr + +-- Analyse a (group of) top-level binding(s) +cprAnalTopBind :: AnalEnv + -> CoreBind + -> (AnalEnv, CoreBind) +cprAnalTopBind env (NonRec id rhs) + = (extendAnalEnv env id' (idCprInfo id'), NonRec id' rhs') + where + (id', rhs') = cprAnalBind TopLevel env id rhs + +cprAnalTopBind env (Rec pairs) + = (env', Rec pairs') + where + (env', pairs') = cprFix TopLevel env pairs + +-- +-- * Analysing expressions +-- + +-- | The abstract semantic function ⟦_⟧ : Expr -> Env -> A from +-- "Constructed Product Result Analysis for Haskell" +cprAnal, cprAnal' + :: AnalEnv + -> CoreExpr -- ^ expression to be denoted by a 'CprType' + -> (CprType, CoreExpr) -- ^ the updated expression and its 'CprType' + +cprAnal env e = -- pprTraceWith "cprAnal" (\res -> ppr (fst (res)) $$ ppr e) $ + cprAnal' env e + +cprAnal' _ (Lit lit) = (topCprType, Lit lit) +cprAnal' _ (Type ty) = (topCprType, Type ty) -- Doesn't happen, in fact +cprAnal' _ (Coercion co) = (topCprType, Coercion co) + +cprAnal' env (Var var) = (cprTransform env var, Var var) + +cprAnal' env (Cast e co) + = (cpr_ty, Cast e' co) + where + (cpr_ty, e') = cprAnal env e + +cprAnal' env (Tick t e) + = (cpr_ty, Tick t e') + where + (cpr_ty, e') = cprAnal env e + +cprAnal' env (App fun (Type ty)) + = (fun_ty, App fun' (Type ty)) + where + (fun_ty, fun') = cprAnal env fun + +cprAnal' env (App fun arg) + = (res_ty, App fun' arg') + where + (fun_ty, fun') = cprAnal env fun + -- In contrast to DmdAnal, there is no useful (non-nested) CPR info to be + -- had by looking into the CprType of arg. + (_, arg') = cprAnal env arg + res_ty = applyCprTy fun_ty + +cprAnal' env (Lam var body) + | isTyVar var + , (body_ty, body') <- cprAnal env body + = (body_ty, Lam var body') + | otherwise + = (lam_ty, Lam var body') + where + env' = extendSigsWithLam env var + (body_ty, body') = cprAnal env' body + lam_ty = abstractCprTy body_ty + +cprAnal' env (Case scrut case_bndr ty alts) + = (res_ty, Case scrut' case_bndr ty alts') + where + (_, scrut') = cprAnal env scrut + -- Regardless whether scrut had the CPR property or not, the case binder + -- certainly has it. See 'extendEnvForDataAlt'. + (alt_tys, alts') = mapAndUnzip (cprAnalAlt env scrut case_bndr) alts + res_ty = foldl' lubCprType botCprType alt_tys + +cprAnal' env (Let (NonRec id rhs) body) + = (body_ty, Let (NonRec id' rhs') body') + where + (id', rhs') = cprAnalBind NotTopLevel env id rhs + env' = extendAnalEnv env id' (idCprInfo id') + (body_ty, body') = cprAnal env' body + +cprAnal' env (Let (Rec pairs) body) + = body_ty `seq` (body_ty, Let (Rec pairs') body') + where + (env', pairs') = cprFix NotTopLevel env pairs + (body_ty, body') = cprAnal env' body + +cprAnalAlt + :: AnalEnv + -> CoreExpr -- ^ scrutinee + -> Id -- ^ case binder + -> Alt Var -- ^ current alternative + -> (CprType, Alt Var) +cprAnalAlt env scrut case_bndr (con@(DataAlt dc),bndrs,rhs) + -- See 'extendEnvForDataAlt' and Note [CPR in a DataAlt case alternative] + = (rhs_ty, (con, bndrs, rhs')) + where + env_alt = extendEnvForDataAlt env scrut case_bndr dc bndrs + (rhs_ty, rhs') = cprAnal env_alt rhs +cprAnalAlt env _ _ (con,bndrs,rhs) + = (rhs_ty, (con, bndrs, rhs')) + where + (rhs_ty, rhs') = cprAnal env rhs + +-- +-- * CPR transformer +-- + +cprTransform :: AnalEnv -- ^ The analysis environment + -> Id -- ^ The function + -> CprType -- ^ The demand type of the function +cprTransform env id + = -- pprTrace "cprTransform" (vcat [ppr id, ppr sig]) + sig + where + sig + | isGlobalId id -- imported function or data con worker + = getCprSig (idCprInfo id) + | Just sig <- lookupSigEnv env id -- local let-bound + = getCprSig sig + | otherwise + = topCprType + +-- +-- * Bindings +-- + +-- Recursive bindings +cprFix :: TopLevelFlag + -> AnalEnv -- Does not include bindings for this binding + -> [(Id,CoreExpr)] + -> (AnalEnv, [(Id,CoreExpr)]) -- Binders annotated with stricness info + +cprFix top_lvl env orig_pairs + = loop 1 initial_pairs + where + bot_sig = mkCprSig 0 botCpr + -- See Note [Initialising strictness] in GHC.Core.Op.DmdAnal + initial_pairs | ae_virgin env = [(setIdCprInfo id bot_sig, rhs) | (id, rhs) <- orig_pairs ] + | otherwise = orig_pairs + + -- The fixed-point varies the idCprInfo field of the binders, and terminates if that + -- annotation does not change any more. + loop :: Int -> [(Id,CoreExpr)] -> (AnalEnv, [(Id,CoreExpr)]) + loop n pairs + | found_fixpoint = (final_anal_env, pairs') + | otherwise = loop (n+1) pairs' + where + found_fixpoint = map (idCprInfo . fst) pairs' == map (idCprInfo . fst) pairs + first_round = n == 1 + pairs' = step first_round pairs + final_anal_env = extendAnalEnvs env (map fst pairs') + + step :: Bool -> [(Id, CoreExpr)] -> [(Id, CoreExpr)] + step first_round pairs = pairs' + where + -- In all but the first iteration, delete the virgin flag + start_env | first_round = env + | otherwise = nonVirgin env + + start = extendAnalEnvs start_env (map fst pairs) + + (_, pairs') = mapAccumL my_downRhs start pairs + + my_downRhs env (id,rhs) + = (env', (id', rhs')) + where + (id', rhs') = cprAnalBind top_lvl env id rhs + env' = extendAnalEnv env id (idCprInfo id') + +-- | Process the RHS of the binding for a sensible arity, add the CPR signature +-- to the Id, and augment the environment with the signature as well. +cprAnalBind + :: TopLevelFlag + -> AnalEnv + -> Id + -> CoreExpr + -> (Id, CoreExpr) +cprAnalBind top_lvl env id rhs + = (id', rhs') + where + (rhs_ty, rhs') = cprAnal env rhs + -- possibly trim thunk CPR info + rhs_ty' + -- See Note [CPR for thunks] + | stays_thunk = trimCprTy rhs_ty + -- See Note [CPR for sum types] + | returns_sum = trimCprTy rhs_ty + | otherwise = rhs_ty + -- See Note [Arity trimming for CPR signatures] + sig = mkCprSigForArity (idArity id) rhs_ty' + id' = setIdCprInfo id sig + + -- See Note [CPR for thunks] + stays_thunk = is_thunk && not_strict + is_thunk = not (exprIsHNF rhs) && not (isJoinId id) + not_strict = not (isStrictDmd (idDemandInfo id)) + -- See Note [CPR for sum types] + (_, ret_ty) = splitPiTys (idType id) + not_a_prod = isNothing (deepSplitProductType_maybe (ae_fam_envs env) ret_ty) + returns_sum = not (isTopLevel top_lvl) && not_a_prod + +{- Note [Arity trimming for CPR signatures] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Although it doesn't affect correctness of the analysis per se, we have to trim +CPR signatures to idArity. Here's what might happen if we don't: + + f x = if expensive + then \y. Box y + else \z. Box z + g a b = f a b + +The two lambdas will have a CPR type of @1m@ (so construct a product after +applied to one argument). Thus, @f@ will have a CPR signature of @2m@ +(constructs a product after applied to two arguments). +But WW will never eta-expand @f@! In this case that would amount to possibly +duplicating @expensive@ work. + +(Side note: Even if @f@'s 'idArity' happened to be 2, it would not do so, see +Note [Don't eta expand in w/w].) + +So @f@ will not be worker/wrappered. But @g@ also inherited its CPR signature +from @f@'s, so it *will* be WW'd: + + f x = if expensive + then \y. Box y + else \z. Box z + $wg a b = case f a b of Box x -> x + g a b = Box ($wg a b) + +And the case in @g@ can never cancel away, thus we introduced extra reboxing. +Hence we always trim the CPR signature of a binding to idArity. +-} + +data AnalEnv + = AE + { ae_sigs :: SigEnv + -- ^ Current approximation of signatures for local ids + , ae_virgin :: Bool + -- ^ True only on every first iteration in a fixed-point + -- iteration. See Note [Initialising strictness] in "DmdAnal" + , ae_fam_envs :: FamInstEnvs + -- ^ Needed when expanding type families and synonyms of product types. + } + +type SigEnv = VarEnv CprSig + +instance Outputable AnalEnv where + ppr (AE { ae_sigs = env, ae_virgin = virgin }) + = text "AE" <+> braces (vcat + [ text "ae_virgin =" <+> ppr virgin + , text "ae_sigs =" <+> ppr env ]) + +emptyAnalEnv :: FamInstEnvs -> AnalEnv +emptyAnalEnv fam_envs + = AE + { ae_sigs = emptyVarEnv + , ae_virgin = True + , ae_fam_envs = fam_envs + } + +-- | Extend an environment with the strictness IDs attached to the id +extendAnalEnvs :: AnalEnv -> [Id] -> AnalEnv +extendAnalEnvs env ids + = env { ae_sigs = sigs' } + where + sigs' = extendVarEnvList (ae_sigs env) [ (id, idCprInfo id) | id <- ids ] + +extendAnalEnv :: AnalEnv -> Id -> CprSig -> AnalEnv +extendAnalEnv env id sig + = env { ae_sigs = extendVarEnv (ae_sigs env) id sig } + +lookupSigEnv :: AnalEnv -> Id -> Maybe CprSig +lookupSigEnv env id = lookupVarEnv (ae_sigs env) id + +nonVirgin :: AnalEnv -> AnalEnv +nonVirgin env = env { ae_virgin = False } + +extendSigsWithLam :: AnalEnv -> Id -> AnalEnv +-- Extend the AnalEnv when we meet a lambda binder +extendSigsWithLam env id + | isId id + , isStrictDmd (idDemandInfo id) -- See Note [CPR for strict binders] + , Just (dc,_,_,_) <- deepSplitProductType_maybe (ae_fam_envs env) $ idType id + = extendAnalEnv env id (CprSig (conCprType (dataConTag dc))) + | otherwise + = env + +extendEnvForDataAlt :: AnalEnv -> CoreExpr -> Id -> DataCon -> [Var] -> AnalEnv +-- See Note [CPR in a DataAlt case alternative] +extendEnvForDataAlt env scrut case_bndr dc bndrs + = foldl' do_con_arg env' ids_w_strs + where + env' = extendAnalEnv env case_bndr (CprSig case_bndr_ty) + + ids_w_strs = filter isId bndrs `zip` dataConRepStrictness dc + + tycon = dataConTyCon dc + is_product = isJust (isDataProductTyCon_maybe tycon) + is_sum = isJust (isDataSumTyCon_maybe tycon) + case_bndr_ty + | is_product || is_sum = conCprType (dataConTag dc) + -- Any of the constructors had existentials. This is a little too + -- conservative (after all, we only care about the particular data con), + -- but there is no easy way to write is_sum and this won't happen much. + | otherwise = topCprType + + -- We could have much deeper CPR info here with Nested CPR, which could + -- propagate available unboxed things from the scrutinee, getting rid of + -- the is_var_scrut heuristic. See Note [CPR in a DataAlt case alternative]. + -- Giving strict binders the CPR property only makes sense for products, as + -- the arguments in Note [CPR for strict binders] don't apply to sums (yet); + -- we lack WW for strict binders of sum type. + do_con_arg env (id, str) + | let is_strict = isStrictDmd (idDemandInfo id) || isMarkedStrict str + , is_var_scrut && is_strict + , let fam_envs = ae_fam_envs env + , Just (dc,_,_,_) <- deepSplitProductType_maybe fam_envs $ idType id + = extendAnalEnv env id (CprSig (conCprType (dataConTag dc))) + | otherwise + = env + + is_var_scrut = is_var scrut + is_var (Cast e _) = is_var e + is_var (Var v) = isLocalId v + is_var _ = False + +{- Note [Safe abortion in the fixed-point iteration] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Fixed-point iteration may fail to terminate. But we cannot simply give up and +return the environment and code unchanged! We still need to do one additional +round, to ensure that all expressions have been traversed at least once, and any +unsound CPR annotations have been updated. + +Note [CPR in a DataAlt case alternative] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In a case alternative, we want to give some of the binders the CPR property. +Specifically + + * The case binder; inside the alternative, the case binder always has + the CPR property, meaning that a case on it will successfully cancel. + Example: + f True x = case x of y { I# x' -> if x' ==# 3 + then y + else I# 8 } + f False x = I# 3 + + By giving 'y' the CPR property, we ensure that 'f' does too, so we get + f b x = case fw b x of { r -> I# r } + fw True x = case x of y { I# x' -> if x' ==# 3 then x' else 8 } + fw False x = 3 + + Of course there is the usual risk of re-boxing: we have 'x' available + boxed and unboxed, but we return the unboxed version for the wrapper to + box. If the wrapper doesn't cancel with its caller, we'll end up + re-boxing something that we did have available in boxed form. + + * Any strict binders with product type, can use Note [CPR for strict binders] + to anticipate worker/wrappering for strictness info. + But we can go a little further. Consider + + data T = MkT !Int Int + + f2 (MkT x y) | y>0 = f2 (MkT x (y-1)) + | otherwise = x + + For $wf2 we are going to unbox the MkT *and*, since it is strict, the + first argument of the MkT; see Note [Add demands for strict constructors]. + But then we don't want box it up again when returning it! We want + 'f2' to have the CPR property, so we give 'x' the CPR property. + + * It's a bit delicate because we're brittly anticipating worker/wrapper here. + If the case above is scrutinising something other than an argument the + original function, we really don't have the unboxed version available. E.g + g v = case foo v of + MkT x y | y>0 -> ... + | otherwise -> x + Here we don't have the unboxed 'x' available. Hence the + is_var_scrut test when making use of the strictness annotation. + Slightly ad-hoc, because even if the scrutinee *is* a variable it + might not be a onre of the arguments to the original function, or a + sub-component thereof. But it's simple, and nothing terrible + happens if we get it wrong. e.g. Trac #10694. + +Note [CPR for strict binders] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If a lambda-bound variable is marked demanded with a strict demand, then give it +a CPR signature, anticipating the results of worker/wrapper. Here's a concrete +example ('f1' in test T10482a), assuming h is strict: + + f1 :: Int -> Int + f1 x = case h x of + A -> x + B -> f1 (x-1) + C -> x+1 + +If we notice that 'x' is used strictly, we can give it the CPR +property; and hence f1 gets the CPR property too. It's sound (doesn't +change strictness) to give it the CPR property because by the time 'x' +is returned (case A above), it'll have been evaluated (by the wrapper +of 'h' in the example). + +Moreover, if f itself is strict in x, then we'll pass x unboxed to +f1, and so the boxed version *won't* be available; in that case it's +very helpful to give 'x' the CPR property. + +Note that + + * We only want to do this for something that definitely + has product type, else we may get over-optimistic CPR results + (e.g. from \x -> x!). + + * See Note [CPR examples] + +Note [CPR for sum types] +~~~~~~~~~~~~~~~~~~~~~~~~ +At the moment we do not do CPR for let-bindings that + * non-top level + * bind a sum type +Reason: I found that in some benchmarks we were losing let-no-escapes, +which messed it all up. Example + let j = \x. .... + in case y of + True -> j False + False -> j True +If we w/w this we get + let j' = \x. .... + in case y of + True -> case j' False of { (# a #) -> Just a } + False -> case j' True of { (# a #) -> Just a } +Notice that j' is not a let-no-escape any more. + +However this means in turn that the *enclosing* function +may be CPR'd (via the returned Justs). But in the case of +sums, there may be Nothing alternatives; and that messes +up the sum-type CPR. + +Conclusion: only do this for products. It's still not +guaranteed OK for products, but sums definitely lose sometimes. + +Note [CPR for thunks] +~~~~~~~~~~~~~~~~~~~~~ +If the rhs is a thunk, we usually forget the CPR info, because +it is presumably shared (else it would have been inlined, and +so we'd lose sharing if w/w'd it into a function). E.g. + + let r = case expensive of + (a,b) -> (b,a) + in ... + +If we marked r as having the CPR property, then we'd w/w into + + let $wr = \() -> case expensive of + (a,b) -> (# b, a #) + r = case $wr () of + (# b,a #) -> (b,a) + in ... + +But now r is a thunk, which won't be inlined, so we are no further ahead. +But consider + + f x = let r = case expensive of (a,b) -> (b,a) + in if foo r then r else (x,x) + +Does f have the CPR property? Well, no. + +However, if the strictness analyser has figured out (in a previous +iteration) that it's strict, then we DON'T need to forget the CPR info. +Instead we can retain the CPR info and do the thunk-splitting transform +(see WorkWrap.splitThunk). + +This made a big difference to PrelBase.modInt, which had something like + modInt = \ x -> let r = ... -> I# v in + ...body strict in r... +r's RHS isn't a value yet; but modInt returns r in various branches, so +if r doesn't have the CPR property then neither does modInt +Another case I found in practice (in Complex.magnitude), looks like this: + let k = if ... then I# a else I# b + in ... body strict in k .... +(For this example, it doesn't matter whether k is returned as part of +the overall result; but it does matter that k's RHS has the CPR property.) +Left to itself, the simplifier will make a join point thus: + let $j k = ...body strict in k... + if ... then $j (I# a) else $j (I# b) +With thunk-splitting, we get instead + let $j x = let k = I#x in ...body strict in k... + in if ... then $j a else $j b +This is much better; there's a good chance the I# won't get allocated. + +But what about botCpr? Consider + lvl = error "boom" + fac -1 = lvl + fac 0 = 1 + fac n = n * fac (n-1) +fac won't have the CPR property here when we trim every thunk! But the +assumption is that error cases are rarely entered and we are diverging anyway, +so WW doesn't hurt. + +Note [CPR examples] +~~~~~~~~~~~~~~~~~~~~ +Here are some examples (stranal/should_compile/T10482a) of the +usefulness of Note [CPR in a DataAlt case alternative]. The main +point: all of these functions can have the CPR property. + + ------- f1 ----------- + -- x is used strictly by h, so it'll be available + -- unboxed before it is returned in the True branch + + f1 :: Int -> Int + f1 x = case h x x of + True -> x + False -> f1 (x-1) + + + ------- f2 ----------- + -- x is a strict field of MkT2, so we'll pass it unboxed + -- to $wf2, so it's available unboxed. This depends on + -- the case expression analysing (a subcomponent of) one + -- of the original arguments to the function, so it's + -- a bit more delicate. + + data T2 = MkT2 !Int Int + + f2 :: T2 -> Int + f2 (MkT2 x y) | y>0 = f2 (MkT2 x (y-1)) + | otherwise = x + + + ------- f3 ----------- + -- h is strict in x, so x will be unboxed before it + -- is rerturned in the otherwise case. + + data T3 = MkT3 Int Int + + f1 :: T3 -> Int + f1 (MkT3 x y) | h x y = f3 (MkT3 x (y-1)) + | otherwise = x + + + ------- f4 ----------- + -- Just like f2, but MkT4 can't unbox its strict + -- argument automatically, as f2 can + + data family Foo a + newtype instance Foo Int = Foo Int + + data T4 a = MkT4 !(Foo a) Int + + f4 :: T4 Int -> Int + f4 (MkT4 x@(Foo v) y) | y>0 = f4 (MkT4 x (y-1)) + | otherwise = v +-} diff --git a/compiler/GHC/Core/Op/DmdAnal.hs b/compiler/GHC/Core/Op/DmdAnal.hs new file mode 100644 index 0000000000..57544519d3 --- /dev/null +++ b/compiler/GHC/Core/Op/DmdAnal.hs @@ -0,0 +1,1265 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1993-1998 + + + ----------------- + A demand analysis + ----------------- +-} + +{-# LANGUAGE CPP #-} + +module GHC.Core.Op.DmdAnal ( dmdAnalProgram ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Driver.Session +import GHC.Core.Op.WorkWrap.Lib ( findTypeShape ) +import Demand -- All of it +import GHC.Core +import GHC.Core.Seq ( seqBinds ) +import Outputable +import VarEnv +import BasicTypes +import Data.List ( mapAccumL ) +import GHC.Core.DataCon +import Id +import IdInfo +import GHC.Core.Utils +import GHC.Core.TyCon +import GHC.Core.Type +import GHC.Core.Coercion ( Coercion, coVarsOfCo ) +import GHC.Core.FamInstEnv +import Util +import Maybes ( isJust ) +import TysWiredIn +import TysPrim ( realWorldStatePrimTy ) +import ErrUtils ( dumpIfSet_dyn, DumpFormat (..) ) +import UniqSet + +{- +************************************************************************ +* * +\subsection{Top level stuff} +* * +************************************************************************ +-} + +dmdAnalProgram :: DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram +dmdAnalProgram dflags fam_envs binds = do + let env = emptyAnalEnv dflags fam_envs + let binds_plus_dmds = snd $ mapAccumL dmdAnalTopBind env binds + dumpIfSet_dyn dflags Opt_D_dump_str_signatures "Strictness signatures" FormatText $ + dumpIdInfoOfProgram (pprIfaceStrictSig . strictnessInfo) binds_plus_dmds + -- See Note [Stamp out space leaks in demand analysis] + seqBinds binds_plus_dmds `seq` return binds_plus_dmds + +-- Analyse a (group of) top-level binding(s) +dmdAnalTopBind :: AnalEnv + -> CoreBind + -> (AnalEnv, CoreBind) +dmdAnalTopBind env (NonRec id rhs) + = (extendAnalEnv TopLevel env id' (idStrictness id'), NonRec id' rhs') + where + ( _, id', rhs') = dmdAnalRhsLetDown Nothing env cleanEvalDmd id rhs + +dmdAnalTopBind env (Rec pairs) + = (env', Rec pairs') + where + (env', _, pairs') = dmdFix TopLevel env cleanEvalDmd pairs + -- We get two iterations automatically + -- c.f. the NonRec case above + +{- Note [Stamp out space leaks in demand analysis] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The demand analysis pass outputs a new copy of the Core program in +which binders have been annotated with demand and strictness +information. It's tiresome to ensure that this information is fully +evaluated everywhere that we produce it, so we just run a single +seqBinds over the output before returning it, to ensure that there are +no references holding on to the input Core program. + +This makes a ~30% reduction in peak memory usage when compiling +DynFlags (cf #9675 and #13426). + +This is particularly important when we are doing late demand analysis, +since we don't do a seqBinds at any point thereafter. Hence code +generation would hold on to an extra copy of the Core program, via +unforced thunks in demand or strictness information; and it is the +most memory-intensive part of the compilation process, so this added +seqBinds makes a big difference in peak memory usage. +-} + + +{- +************************************************************************ +* * +\subsection{The analyser itself} +* * +************************************************************************ + +Note [Ensure demand is strict] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's important not to analyse e with a lazy demand because +a) When we encounter case s of (a,b) -> + we demand s with U(d1d2)... but if the overall demand is lazy + that is wrong, and we'd need to reduce the demand on s, + which is inconvenient +b) More important, consider + f (let x = R in x+x), where f is lazy + We still want to mark x as demanded, because it will be when we + enter the let. If we analyse f's arg with a Lazy demand, we'll + just mark x as Lazy +c) The application rule wouldn't be right either + Evaluating (f x) in a L demand does *not* cause + evaluation of f in a C(L) demand! +-} + +-- If e is complicated enough to become a thunk, its contents will be evaluated +-- at most once, so oneify it. +dmdTransformThunkDmd :: CoreExpr -> Demand -> Demand +dmdTransformThunkDmd e + | exprIsTrivial e = id + | otherwise = oneifyDmd + +-- Do not process absent demands +-- Otherwise act like in a normal demand analysis +-- See ↦* relation in the Cardinality Analysis paper +dmdAnalStar :: AnalEnv + -> Demand -- This one takes a *Demand* + -> CoreExpr -- Should obey the let/app invariant + -> (BothDmdArg, CoreExpr) +dmdAnalStar env dmd e + | (dmd_shell, cd) <- toCleanDmd dmd + , (dmd_ty, e') <- dmdAnal env cd e + = ASSERT2( not (isUnliftedType (exprType e)) || exprOkForSpeculation e, ppr e ) + -- The argument 'e' should satisfy the let/app invariant + -- See Note [Analysing with absent demand] in Demand.hs + (postProcessDmdType dmd_shell dmd_ty, e') + +-- Main Demand Analsysis machinery +dmdAnal, dmdAnal' :: AnalEnv + -> CleanDemand -- The main one takes a *CleanDemand* + -> CoreExpr -> (DmdType, CoreExpr) + +-- The CleanDemand is always strict and not absent +-- See Note [Ensure demand is strict] + +dmdAnal env d e = -- pprTrace "dmdAnal" (ppr d <+> ppr e) $ + dmdAnal' env d e + +dmdAnal' _ _ (Lit lit) = (nopDmdType, Lit lit) +dmdAnal' _ _ (Type ty) = (nopDmdType, Type ty) -- Doesn't happen, in fact +dmdAnal' _ _ (Coercion co) + = (unitDmdType (coercionDmdEnv co), Coercion co) + +dmdAnal' env dmd (Var var) + = (dmdTransform env var dmd, Var var) + +dmdAnal' env dmd (Cast e co) + = (dmd_ty `bothDmdType` mkBothDmdArg (coercionDmdEnv co), Cast e' co) + where + (dmd_ty, e') = dmdAnal env dmd e + +dmdAnal' env dmd (Tick t e) + = (dmd_ty, Tick t e') + where + (dmd_ty, e') = dmdAnal env dmd e + +dmdAnal' env dmd (App fun (Type ty)) + = (fun_ty, App fun' (Type ty)) + where + (fun_ty, fun') = dmdAnal env dmd fun + +-- Lots of the other code is there to make this +-- beautiful, compositional, application rule :-) +dmdAnal' env dmd (App fun arg) + = -- This case handles value arguments (type args handled above) + -- Crucially, coercions /are/ handled here, because they are + -- value arguments (#10288) + let + call_dmd = mkCallDmd dmd + (fun_ty, fun') = dmdAnal env call_dmd fun + (arg_dmd, res_ty) = splitDmdTy fun_ty + (arg_ty, arg') = dmdAnalStar env (dmdTransformThunkDmd arg arg_dmd) arg + in +-- pprTrace "dmdAnal:app" (vcat +-- [ text "dmd =" <+> ppr dmd +-- , text "expr =" <+> ppr (App fun arg) +-- , text "fun dmd_ty =" <+> ppr fun_ty +-- , text "arg dmd =" <+> ppr arg_dmd +-- , text "arg dmd_ty =" <+> ppr arg_ty +-- , text "res dmd_ty =" <+> ppr res_ty +-- , text "overall res dmd_ty =" <+> ppr (res_ty `bothDmdType` arg_ty) ]) + (res_ty `bothDmdType` arg_ty, App fun' arg') + +dmdAnal' env dmd (Lam var body) + | isTyVar var + = let + (body_ty, body') = dmdAnal env dmd body + in + (body_ty, Lam var body') + + | otherwise + = let (body_dmd, defer_and_use) = peelCallDmd dmd + -- body_dmd: a demand to analyze the body + + (body_ty, body') = dmdAnal env body_dmd body + (lam_ty, var') = annotateLamIdBndr env notArgOfDfun body_ty var + in + (postProcessUnsat defer_and_use lam_ty, Lam var' body') + +dmdAnal' env dmd (Case scrut case_bndr ty [(DataAlt dc, bndrs, rhs)]) + -- Only one alternative with a product constructor + | let tycon = dataConTyCon dc + , isJust (isDataProductTyCon_maybe tycon) + , Just rec_tc' <- checkRecTc (ae_rec_tc env) tycon + = let + env_alt = env { ae_rec_tc = rec_tc' } + (rhs_ty, rhs') = dmdAnal env_alt dmd rhs + (alt_ty1, dmds) = findBndrsDmds env rhs_ty bndrs + (alt_ty2, case_bndr_dmd) = findBndrDmd env False alt_ty1 case_bndr + id_dmds = addCaseBndrDmd case_bndr_dmd dmds + alt_ty3 | io_hack_reqd scrut dc bndrs = deferAfterIO alt_ty2 + | otherwise = alt_ty2 + + -- Compute demand on the scrutinee + -- See Note [Demand on scrutinee of a product case] + scrut_dmd = mkProdDmd id_dmds + (scrut_ty, scrut') = dmdAnal env scrut_dmd scrut + res_ty = alt_ty3 `bothDmdType` toBothDmdArg scrut_ty + case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd + bndrs' = setBndrsDemandInfo bndrs id_dmds + in +-- pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut +-- , text "dmd" <+> ppr dmd +-- , text "case_bndr_dmd" <+> ppr (idDemandInfo case_bndr') +-- , text "id_dmds" <+> ppr id_dmds +-- , text "scrut_dmd" <+> ppr scrut_dmd +-- , text "scrut_ty" <+> ppr scrut_ty +-- , text "alt_ty" <+> ppr alt_ty2 +-- , text "res_ty" <+> ppr res_ty ]) $ + (res_ty, Case scrut' case_bndr' ty [(DataAlt dc, bndrs', rhs')]) + +dmdAnal' env dmd (Case scrut case_bndr ty alts) + = let -- Case expression with multiple alternatives + (alt_tys, alts') = mapAndUnzip (dmdAnalAlt env dmd case_bndr) alts + (scrut_ty, scrut') = dmdAnal env cleanEvalDmd scrut + (alt_ty, case_bndr') = annotateBndr env (foldr lubDmdType botDmdType alt_tys) case_bndr + -- NB: Base case is botDmdType, for empty case alternatives + -- This is a unit for lubDmdType, and the right result + -- when there really are no alternatives + res_ty = alt_ty `bothDmdType` toBothDmdArg scrut_ty + in +-- pprTrace "dmdAnal:Case2" (vcat [ text "scrut" <+> ppr scrut +-- , text "scrut_ty" <+> ppr scrut_ty +-- , text "alt_tys" <+> ppr alt_tys +-- , text "alt_ty" <+> ppr alt_ty +-- , text "res_ty" <+> ppr res_ty ]) $ + (res_ty, Case scrut' case_bndr' ty alts') + +-- Let bindings can be processed in two ways: +-- Down (RHS before body) or Up (body before RHS). +-- The following case handle the up variant. +-- +-- It is very simple. For let x = rhs in body +-- * Demand-analyse 'body' in the current environment +-- * Find the demand, 'rhs_dmd' placed on 'x' by 'body' +-- * Demand-analyse 'rhs' in 'rhs_dmd' +-- +-- This is used for a non-recursive local let without manifest lambdas. +-- This is the LetUp rule in the paper “Higher-Order Cardinality Analysis”. +dmdAnal' env dmd (Let (NonRec id rhs) body) + | useLetUp id + = (final_ty, Let (NonRec id' rhs') body') + where + (body_ty, body') = dmdAnal env dmd body + (body_ty', id_dmd) = findBndrDmd env notArgOfDfun body_ty id + id' = setIdDemandInfo id id_dmd + + (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd) rhs + final_ty = body_ty' `bothDmdType` rhs_ty + +dmdAnal' env dmd (Let (NonRec id rhs) body) + = (body_ty2, Let (NonRec id2 rhs') body') + where + (lazy_fv, id1, rhs') = dmdAnalRhsLetDown Nothing env dmd id rhs + env1 = extendAnalEnv NotTopLevel env id1 (idStrictness id1) + (body_ty, body') = dmdAnal env1 dmd body + (body_ty1, id2) = annotateBndr env body_ty id1 + body_ty2 = addLazyFVs body_ty1 lazy_fv -- see Note [Lazy and unleashable free variables] + + -- If the actual demand is better than the vanilla call + -- demand, you might think that we might do better to re-analyse + -- the RHS with the stronger demand. + -- But (a) That seldom happens, because it means that *every* path in + -- the body of the let has to use that stronger demand + -- (b) It often happens temporarily in when fixpointing, because + -- the recursive function at first seems to place a massive demand. + -- But we don't want to go to extra work when the function will + -- probably iterate to something less demanding. + -- In practice, all the times the actual demand on id2 is more than + -- the vanilla call demand seem to be due to (b). So we don't + -- bother to re-analyse the RHS. + +dmdAnal' env dmd (Let (Rec pairs) body) + = let + (env', lazy_fv, pairs') = dmdFix NotTopLevel env dmd pairs + (body_ty, body') = dmdAnal env' dmd body + body_ty1 = deleteFVs body_ty (map fst pairs) + body_ty2 = addLazyFVs body_ty1 lazy_fv -- see Note [Lazy and unleashable free variables] + in + body_ty2 `seq` + (body_ty2, Let (Rec pairs') body') + +io_hack_reqd :: CoreExpr -> DataCon -> [Var] -> Bool +-- See Note [IO hack in the demand analyser] +io_hack_reqd scrut con bndrs + | (bndr:_) <- bndrs + , con == tupleDataCon Unboxed 2 + , idType bndr `eqType` realWorldStatePrimTy + , (fun, _) <- collectArgs scrut + = case fun of + Var f -> not (isPrimOpId f) + _ -> True + | otherwise + = False + +dmdAnalAlt :: AnalEnv -> CleanDemand -> Id -> Alt Var -> (DmdType, Alt Var) +dmdAnalAlt env dmd case_bndr (con,bndrs,rhs) + | null bndrs -- Literals, DEFAULT, and nullary constructors + , (rhs_ty, rhs') <- dmdAnal env dmd rhs + = (rhs_ty, (con, [], rhs')) + + | otherwise -- Non-nullary data constructors + , (rhs_ty, rhs') <- dmdAnal env dmd rhs + , (alt_ty, dmds) <- findBndrsDmds env rhs_ty bndrs + , let case_bndr_dmd = findIdDemand alt_ty case_bndr + id_dmds = addCaseBndrDmd case_bndr_dmd dmds + = (alt_ty, (con, setBndrsDemandInfo bndrs id_dmds, rhs')) + + +{- Note [IO hack in the demand analyser] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There's a hack here for I/O operations. Consider + + case foo x s of { (# s', r #) -> y } + +Is this strict in 'y'? Often not! If foo x s performs some observable action +(including raising an exception with raiseIO#, modifying a mutable variable, or +even ending the program normally), then we must not force 'y' (which may fail +to terminate) until we have performed foo x s. + +Hackish solution: spot the IO-like situation and add a virtual branch, +as if we had + case foo x s of + (# s, r #) -> y + other -> return () +So the 'y' isn't necessarily going to be evaluated + +A more complete example (#148, #1592) where this shows up is: + do { let len = <expensive> ; + ; when (...) (exitWith ExitSuccess) + ; print len } + +However, consider + f x s = case getMaskingState# s of + (# s, r #) -> + case x of I# x2 -> ... + +Here it is terribly sad to make 'f' lazy in 's'. After all, +getMaskingState# is not going to diverge or throw an exception! This +situation actually arises in GHC.IO.Handle.Internals.wantReadableHandle +(on an MVar not an Int), and made a material difference. + +So if the scrutinee is a primop call, we *don't* apply the +state hack: + - If it is a simple, terminating one like getMaskingState, + applying the hack is over-conservative. + - If the primop is raise# then it returns bottom, so + the case alternatives are already discarded. + - If the primop can raise a non-IO exception, like + divide by zero or seg-fault (eg writing an array + out of bounds) then we don't mind evaluating 'x' first. + +Note [Demand on the scrutinee of a product case] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When figuring out the demand on the scrutinee of a product case, +we use the demands of the case alternative, i.e. id_dmds. +But note that these include the demand on the case binder; +see Note [Demand on case-alternative binders] in Demand.hs. +This is crucial. Example: + f x = case x of y { (a,b) -> k y a } +If we just take scrut_demand = U(L,A), then we won't pass x to the +worker, so the worker will rebuild + x = (a, absent-error) +and that'll crash. + +Note [Aggregated demand for cardinality] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We use different strategies for strictness and usage/cardinality to +"unleash" demands captured on free variables by bindings. Let us +consider the example: + +f1 y = let {-# NOINLINE h #-} + h = y + in (h, h) + +We are interested in obtaining cardinality demand U1 on |y|, as it is +used only in a thunk, and, therefore, is not going to be updated any +more. Therefore, the demand on |y|, captured and unleashed by usage of +|h| is U1. However, if we unleash this demand every time |h| is used, +and then sum up the effects, the ultimate demand on |y| will be U1 + +U1 = U. In order to avoid it, we *first* collect the aggregate demand +on |h| in the body of let-expression, and only then apply the demand +transformer: + +transf[x](U) = {y |-> U1} + +so the resulting demand on |y| is U1. + +The situation is, however, different for strictness, where this +aggregating approach exhibits worse results because of the nature of +|both| operation for strictness. Consider the example: + +f y c = + let h x = y |seq| x + in case of + True -> h True + False -> y + +It is clear that |f| is strict in |y|, however, the suggested analysis +will infer from the body of |let| that |h| is used lazily (as it is +used in one branch only), therefore lazy demand will be put on its +free variable |y|. Conversely, if the demand on |h| is unleashed right +on the spot, we will get the desired result, namely, that |f| is +strict in |y|. + + +************************************************************************ +* * + Demand transformer +* * +************************************************************************ +-} + +dmdTransform :: AnalEnv -- The strictness environment + -> Id -- The function + -> CleanDemand -- The demand on the function + -> DmdType -- The demand type of the function in this context + -- Returned DmdEnv includes the demand on + -- this function plus demand on its free variables + +dmdTransform env var dmd + | isDataConWorkId var -- Data constructor + = dmdTransformDataConSig (idArity var) (idStrictness var) dmd + + | gopt Opt_DmdTxDictSel (ae_dflags env), + Just _ <- isClassOpId_maybe var -- Dictionary component selector + = dmdTransformDictSelSig (idStrictness var) dmd + + | isGlobalId var -- Imported function + , let res = dmdTransformSig (idStrictness var) dmd + = -- pprTrace "dmdTransform" (vcat [ppr var, ppr (idStrictness var), ppr dmd, ppr res]) + res + + | Just (sig, top_lvl) <- lookupSigEnv env var -- Local letrec bound thing + , let fn_ty = dmdTransformSig sig dmd + = -- pprTrace "dmdTransform" (vcat [ppr var, ppr sig, ppr dmd, ppr fn_ty]) $ + if isTopLevel top_lvl + then fn_ty -- Don't record top level things + else addVarDmd fn_ty var (mkOnceUsedDmd dmd) + + | otherwise -- Local non-letrec-bound thing + = unitDmdType (unitVarEnv var (mkOnceUsedDmd dmd)) + +{- +************************************************************************ +* * +\subsection{Bindings} +* * +************************************************************************ +-} + +-- Recursive bindings +dmdFix :: TopLevelFlag + -> AnalEnv -- Does not include bindings for this binding + -> CleanDemand + -> [(Id,CoreExpr)] + -> (AnalEnv, DmdEnv, [(Id,CoreExpr)]) -- Binders annotated with strictness info + +dmdFix top_lvl env let_dmd orig_pairs + = loop 1 initial_pairs + where + bndrs = map fst orig_pairs + + -- See Note [Initialising strictness] + initial_pairs | ae_virgin env = [(setIdStrictness id botSig, rhs) | (id, rhs) <- orig_pairs ] + | otherwise = orig_pairs + + -- If fixed-point iteration does not yield a result we use this instead + -- See Note [Safe abortion in the fixed-point iteration] + abort :: (AnalEnv, DmdEnv, [(Id,CoreExpr)]) + abort = (env, lazy_fv', zapped_pairs) + where (lazy_fv, pairs') = step True (zapIdStrictness orig_pairs) + -- Note [Lazy and unleashable free variables] + non_lazy_fvs = plusVarEnvList $ map (strictSigDmdEnv . idStrictness . fst) pairs' + lazy_fv' = lazy_fv `plusVarEnv` mapVarEnv (const topDmd) non_lazy_fvs + zapped_pairs = zapIdStrictness pairs' + + -- The fixed-point varies the idStrictness field of the binders, and terminates if that + -- annotation does not change any more. + loop :: Int -> [(Id,CoreExpr)] -> (AnalEnv, DmdEnv, [(Id,CoreExpr)]) + loop n pairs + | found_fixpoint = (final_anal_env, lazy_fv, pairs') + | n == 10 = abort + | otherwise = loop (n+1) pairs' + where + found_fixpoint = map (idStrictness . fst) pairs' == map (idStrictness . fst) pairs + first_round = n == 1 + (lazy_fv, pairs') = step first_round pairs + final_anal_env = extendAnalEnvs top_lvl env (map fst pairs') + + step :: Bool -> [(Id, CoreExpr)] -> (DmdEnv, [(Id, CoreExpr)]) + step first_round pairs = (lazy_fv, pairs') + where + -- In all but the first iteration, delete the virgin flag + start_env | first_round = env + | otherwise = nonVirgin env + + start = (extendAnalEnvs top_lvl start_env (map fst pairs), emptyDmdEnv) + + ((_,lazy_fv), pairs') = mapAccumL my_downRhs start pairs + -- mapAccumL: Use the new signature to do the next pair + -- The occurrence analyser has arranged them in a good order + -- so this can significantly reduce the number of iterations needed + + my_downRhs (env, lazy_fv) (id,rhs) + = ((env', lazy_fv'), (id', rhs')) + where + (lazy_fv1, id', rhs') = dmdAnalRhsLetDown (Just bndrs) env let_dmd id rhs + lazy_fv' = plusVarEnv_C bothDmd lazy_fv lazy_fv1 + env' = extendAnalEnv top_lvl env id (idStrictness id') + + + zapIdStrictness :: [(Id, CoreExpr)] -> [(Id, CoreExpr)] + zapIdStrictness pairs = [(setIdStrictness id nopSig, rhs) | (id, rhs) <- pairs ] + +{- +Note [Safe abortion in the fixed-point iteration] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Fixed-point iteration may fail to terminate. But we cannot simply give up and +return the environment and code unchanged! We still need to do one additional +round, for two reasons: + + * To get information on used free variables (both lazy and strict!) + (see Note [Lazy and unleashable free variables]) + * To ensure that all expressions have been traversed at least once, and any left-over + strictness annotations have been updated. + +This final iteration does not add the variables to the strictness signature +environment, which effectively assigns them 'nopSig' (see "getStrictness") + +-} + +-- Let bindings can be processed in two ways: +-- Down (RHS before body) or Up (body before RHS). +-- dmdAnalRhsLetDown implements the Down variant: +-- * assuming a demand of <L,U> +-- * looking at the definition +-- * determining a strictness signature +-- +-- It is used for toplevel definition, recursive definitions and local +-- non-recursive definitions that have manifest lambdas. +-- Local non-recursive definitions without a lambda are handled with LetUp. +-- +-- This is the LetDown rule in the paper “Higher-Order Cardinality Analysis”. +dmdAnalRhsLetDown + :: Maybe [Id] -- Just bs <=> recursive, Nothing <=> non-recursive + -> AnalEnv -> CleanDemand + -> Id -> CoreExpr + -> (DmdEnv, Id, CoreExpr) +-- Process the RHS of the binding, add the strictness signature +-- to the Id, and augment the environment with the signature as well. +dmdAnalRhsLetDown rec_flag env let_dmd id rhs + = (lazy_fv, id', rhs') + where + rhs_arity = idArity id + rhs_dmd + -- See Note [Demand analysis for join points] + -- See Note [Invariants on join points] invariant 2b, in GHC.Core + -- rhs_arity matches the join arity of the join point + | isJoinId id + = mkCallDmds rhs_arity let_dmd + | otherwise + -- NB: rhs_arity + -- See Note [Demand signatures are computed for a threshold demand based on idArity] + = mkRhsDmd env rhs_arity rhs + (DmdType rhs_fv rhs_dmds rhs_div, rhs') + = dmdAnal env rhs_dmd rhs + -- TODO: Won't the following line unnecessarily trim down arity for join + -- points returning a lambda in a C(S) context? + sig = mkStrictSigForArity rhs_arity (mkDmdType sig_fv rhs_dmds rhs_div) + id' = set_idStrictness env id sig + -- See Note [NOINLINE and strictness] + + + -- See Note [Aggregated demand for cardinality] + rhs_fv1 = case rec_flag of + Just bs -> reuseEnv (delVarEnvList rhs_fv bs) + Nothing -> rhs_fv + + -- See Note [Lazy and unleashable free variables] + (lazy_fv, sig_fv) = splitFVs is_thunk rhs_fv1 + is_thunk = not (exprIsHNF rhs) && not (isJoinId id) + +-- | @mkRhsDmd env rhs_arity rhs@ creates a 'CleanDemand' for +-- unleashing on the given function's @rhs@, by creating a call demand of +-- @rhs_arity@ with a body demand appropriate for possible product types. +-- See Note [Product demands for function body]. +-- For example, a call of the form @mkRhsDmd _ 2 (\x y -> (x, y))@ returns a +-- clean usage demand of @C1(C1(U(U,U)))@. +mkRhsDmd :: AnalEnv -> Arity -> CoreExpr -> CleanDemand +mkRhsDmd env rhs_arity rhs = + case peelTsFuns rhs_arity (findTypeShape (ae_fam_envs env) (exprType rhs)) of + Just (TsProd tss) -> mkCallDmds rhs_arity (cleanEvalProdDmd (length tss)) + _ -> mkCallDmds rhs_arity cleanEvalDmd + +-- | If given the let-bound 'Id', 'useLetUp' determines whether we should +-- process the binding up (body before rhs) or down (rhs before body). +-- +-- We use LetDown if there is a chance to get a useful strictness signature to +-- unleash at call sites. LetDown is generally more precise than LetUp if we can +-- correctly guess how it will be used in the body, that is, for which incoming +-- demand the strictness signature should be computed, which allows us to +-- unleash higher-order demands on arguments at call sites. This is mostly the +-- case when +-- +-- * The binding takes any arguments before performing meaningful work (cf. +-- 'idArity'), in which case we are interested to see how it uses them. +-- * The binding is a join point, hence acting like a function, not a value. +-- As a big plus, we know *precisely* how it will be used in the body; since +-- it's always tail-called, we can directly unleash the incoming demand of +-- the let binding on its RHS when computing a strictness signature. See +-- [Demand analysis for join points]. +-- +-- Thus, if the binding is not a join point and its arity is 0, we have a thunk +-- and use LetUp, implying that we have no usable demand signature available +-- when we analyse the let body. +-- +-- Since thunk evaluation is memoised, we want to unleash its 'DmdEnv' of free +-- vars at most once, regardless of how many times it was forced in the body. +-- This makes a real difference wrt. usage demands. The other reason is being +-- able to unleash a more precise product demand on its RHS once we know how the +-- thunk was used in the let body. +-- +-- Characteristic examples, always assuming a single evaluation: +-- +-- * @let x = 2*y in x + x@ => LetUp. Compared to LetDown, we find out that +-- the expression uses @y@ at most once. +-- * @let x = (a,b) in fst x@ => LetUp. Compared to LetDown, we find out that +-- @b@ is absent. +-- * @let f x = x*2 in f y@ => LetDown. Compared to LetUp, we find out that +-- the expression uses @y@ strictly, because we have @f@'s demand signature +-- available at the call site. +-- * @join exit = 2*y in if a then exit else if b then exit else 3*y@ => +-- LetDown. Compared to LetUp, we find out that the expression uses @y@ +-- strictly, because we can unleash @exit@'s signature at each call site. +-- * For a more convincing example with join points, see Note [Demand analysis +-- for join points]. +-- +useLetUp :: Var -> Bool +useLetUp f = idArity f == 0 && not (isJoinId f) + +{- Note [Demand analysis for join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + g :: (Int,Int) -> Int + g (p,q) = p+q + + f :: T -> Int -> Int + f x p = g (join j y = (p,y) + in case x of + A -> j 3 + B -> j 4 + C -> (p,7)) + +If j was a vanilla function definition, we'd analyse its body with +evalDmd, and think that it was lazy in p. But for join points we can +do better! We know that j's body will (if called at all) be evaluated +with the demand that consumes the entire join-binding, in this case +the argument demand from g. Whizzo! g evaluates both components of +its argument pair, so p will certainly be evaluated if j is called. + +For f to be strict in p, we need /all/ paths to evaluate p; in this +case the C branch does so too, so we are fine. So, as usual, we need +to transport demands on free variables to the call site(s). Compare +Note [Lazy and unleashable free variables]. + +The implementation is easy. When analysing a join point, we can +analyse its body with the demand from the entire join-binding (written +let_dmd here). + +Another win for join points! #13543. + +However, note that the strictness signature for a join point can +look a little puzzling. E.g. + + (join j x = \y. error "urk") + (in case v of ) + ( A -> j 3 ) x + ( B -> j 4 ) + ( C -> \y. blah ) + +The entire thing is in a C(S) context, so j's strictness signature +will be [A]b +meaning one absent argument, returns bottom. That seems odd because +there's a \y inside. But it's right because when consumed in a C(1) +context the RHS of the join point is indeed bottom. + +Note [Demand signatures are computed for a threshold demand based on idArity] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We compute demand signatures assuming idArity incoming arguments to approximate +behavior for when we have a call site with at least that many arguments. idArity +is /at least/ the number of manifest lambdas, but might be higher for PAPs and +trivial RHS (see Note [Demand analysis for trivial right-hand sides]). + +Because idArity of a function varies independently of its cardinality properties +(cf. Note [idArity varies independently of dmdTypeDepth]), we implicitly encode +the arity for when a demand signature is sound to unleash in its 'dmdTypeDepth' +(cf. Note [Understanding DmdType and StrictSig] in Demand). It is unsound to +unleash a demand signature when the incoming number of arguments is less than +that. See Note [What are demand signatures?] for more details on soundness. + +Why idArity arguments? Because that's a conservative estimate of how many +arguments we must feed a function before it does anything interesting with them. +Also it elegantly subsumes the trivial RHS and PAP case. + +There might be functions for which we might want to analyse for more incoming +arguments than idArity. Example: + + f x = + if expensive + then \y -> ... y ... + else \y -> ... y ... + +We'd analyse `f` under a unary call demand C(S), corresponding to idArity +being 1. That's enough to look under the manifest lambda and find out how a +unary call would use `x`, but not enough to look into the lambdas in the if +branches. + +On the other hand, if we analysed for call demand C(C(S)), we'd get useful +strictness info for `y` (and more precise info on `x`) and possibly CPR +information, but + + * We would no longer be able to unleash the signature at unary call sites + * Performing the worker/wrapper split based on this information would be + implicitly eta-expanding `f`, playing fast and loose with divergence and + even being unsound in the presence of newtypes, so we refrain from doing so. + Also see Note [Don't eta expand in w/w] in GHC.Core.Op.WorkWrap. + +Since we only compute one signature, we do so for arity 1. Computing multiple +signatures for different arities (i.e., polyvariance) would be entirely +possible, if it weren't for the additional runtime and implementation +complexity. + +Note [idArity varies independently of dmdTypeDepth] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We used to check in GHC.Core.Lint that dmdTypeDepth <= idArity for a let-bound +identifier. But that means we would have to zap demand signatures every time we +reset or decrease arity. That's an unnecessary dependency, because + + * The demand signature captures a semantic property that is independent of + what the binding's current arity is + * idArity is analysis information itself, thus volatile + * We already *have* dmdTypeDepth, wo why not just use it to encode the + threshold for when to unleash the signature + (cf. Note [Understanding DmdType and StrictSig] in Demand) + +Consider the following expression, for example: + + (let go x y = `x` seq ... in go) |> co + +`go` might have a strictness signature of `<S><L>`. The simplifier will identify +`go` as a nullary join point through `joinPointBinding_maybe` and float the +coercion into the binding, leading to an arity decrease: + + join go = (\x y -> `x` seq ...) |> co in go + +With the CoreLint check, we would have to zap `go`'s perfectly viable strictness +signature. + +Note [What are demand signatures?] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Demand analysis interprets expressions in the abstract domain of demand +transformers. Given an incoming demand we put an expression under, its abstract +transformer gives us back a demand type denoting how other things (like +arguments and free vars) were used when the expression was evaluated. +Here's an example: + + f x y = + if x + expensive + then \z -> z + y * ... + else \z -> z * ... + +The abstract transformer (let's call it F_e) of the if expression (let's call it +e) would transform an incoming head demand <S,HU> into a demand type like +{x-><S,1*U>,y-><L,U>}<L,U>. In pictures: + + Demand ---F_e---> DmdType + <S,HU> {x-><S,1*U>,y-><L,U>}<L,U> + +Let's assume that the demand transformers we compute for an expression are +correct wrt. to some concrete semantics for Core. How do demand signatures fit +in? They are strange beasts, given that they come with strict rules when to +it's sound to unleash them. + +Fortunately, we can formalise the rules with Galois connections. Consider +f's strictness signature, {}<S,1*U><L,U>. It's a single-point approximation of +the actual abstract transformer of f's RHS for arity 2. So, what happens is that +we abstract *once more* from the abstract domain we already are in, replacing +the incoming Demand by a simple lattice with two elements denoting incoming +arity: A_2 = {<2, >=2} (where '<2' is the top element and >=2 the bottom +element). Here's the diagram: + + A_2 -----f_f----> DmdType + ^ | + | α γ | + | v + Demand ---F_f---> DmdType + +With + α(C1(C1(_))) = >=2 -- example for usage demands, but similar for strictness + α(_) = <2 + γ(ty) = ty +and F_f being the abstract transformer of f's RHS and f_f being the abstracted +abstract transformer computable from our demand signature simply by + + f_f(>=2) = {}<S,1*U><L,U> + f_f(<2) = postProcessUnsat {}<S,1*U><L,U> + +where postProcessUnsat makes a proper top element out of the given demand type. + +Note [Demand analysis for trivial right-hand sides] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + foo = plusInt |> co +where plusInt is an arity-2 function with known strictness. Clearly +we want plusInt's strictness to propagate to foo! But because it has +no manifest lambdas, it won't do so automatically, and indeed 'co' might +have type (Int->Int->Int) ~ T. + +Fortunately, GHC.Core.Arity gives 'foo' arity 2, which is enough for LetDown to +forward plusInt's demand signature, and all is well (see Note [Newtype arity] in +GHC.Core.Arity)! A small example is the test case NewtypeArity. + + +Note [Product demands for function body] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This example comes from shootout/binary_trees: + + Main.check' = \ b z ds. case z of z' { I# ip -> + case ds_d13s of + Main.Nil -> z' + Main.Node s14k s14l s14m -> + Main.check' (not b) + (Main.check' b + (case b { + False -> I# (-# s14h s14k); + True -> I# (+# s14h s14k) + }) + s14l) + s14m } } } + +Here we *really* want to unbox z, even though it appears to be used boxed in +the Nil case. Partly the Nil case is not a hot path. But more specifically, +the whole function gets the CPR property if we do. + +So for the demand on the body of a RHS we use a product demand if it's +a product type. + +************************************************************************ +* * +\subsection{Strictness signatures and types} +* * +************************************************************************ +-} + +unitDmdType :: DmdEnv -> DmdType +unitDmdType dmd_env = DmdType dmd_env [] topDiv + +coercionDmdEnv :: Coercion -> DmdEnv +coercionDmdEnv co = mapVarEnv (const topDmd) (getUniqSet $ coVarsOfCo co) + -- The VarSet from coVarsOfCo is really a VarEnv Var + +addVarDmd :: DmdType -> Var -> Demand -> DmdType +addVarDmd (DmdType fv ds res) var dmd + = DmdType (extendVarEnv_C bothDmd fv var dmd) ds res + +addLazyFVs :: DmdType -> DmdEnv -> DmdType +addLazyFVs dmd_ty lazy_fvs + = dmd_ty `bothDmdType` mkBothDmdArg lazy_fvs + -- Using bothDmdType (rather than just both'ing the envs) + -- is vital. Consider + -- let f = \x -> (x,y) + -- in error (f 3) + -- Here, y is treated as a lazy-fv of f, but we must `bothDmd` that L + -- demand with the bottom coming up from 'error' + -- + -- I got a loop in the fixpointer without this, due to an interaction + -- with the lazy_fv filtering in dmdAnalRhsLetDown. Roughly, it was + -- letrec f n x + -- = letrec g y = x `fatbar` + -- letrec h z = z + ...g... + -- in h (f (n-1) x) + -- in ... + -- In the initial iteration for f, f=Bot + -- Suppose h is found to be strict in z, but the occurrence of g in its RHS + -- is lazy. Now consider the fixpoint iteration for g, esp the demands it + -- places on its free variables. Suppose it places none. Then the + -- x `fatbar` ...call to h... + -- will give a x->V demand for x. That turns into a L demand for x, + -- which floats out of the defn for h. Without the modifyEnv, that + -- L demand doesn't get both'd with the Bot coming up from the inner + -- call to f. So we just get an L demand for x for g. + +{- +Note [Do not strictify the argument dictionaries of a dfun] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The typechecker can tie recursive knots involving dfuns, so we do the +conservative thing and refrain from strictifying a dfun's argument +dictionaries. +-} + +setBndrsDemandInfo :: [Var] -> [Demand] -> [Var] +setBndrsDemandInfo (b:bs) (d:ds) + | isTyVar b = b : setBndrsDemandInfo bs (d:ds) + | otherwise = setIdDemandInfo b d : setBndrsDemandInfo bs ds +setBndrsDemandInfo [] ds = ASSERT( null ds ) [] +setBndrsDemandInfo bs _ = pprPanic "setBndrsDemandInfo" (ppr bs) + +annotateBndr :: AnalEnv -> DmdType -> Var -> (DmdType, Var) +-- The returned env has the var deleted +-- The returned var is annotated with demand info +-- according to the result demand of the provided demand type +-- No effect on the argument demands +annotateBndr env dmd_ty var + | isId var = (dmd_ty', setIdDemandInfo var dmd) + | otherwise = (dmd_ty, var) + where + (dmd_ty', dmd) = findBndrDmd env False dmd_ty var + +annotateLamIdBndr :: AnalEnv + -> DFunFlag -- is this lambda at the top of the RHS of a dfun? + -> DmdType -- Demand type of body + -> Id -- Lambda binder + -> (DmdType, -- Demand type of lambda + Id) -- and binder annotated with demand + +annotateLamIdBndr env arg_of_dfun dmd_ty id +-- For lambdas we add the demand to the argument demands +-- Only called for Ids + = ASSERT( isId id ) + -- pprTrace "annLamBndr" (vcat [ppr id, ppr _dmd_ty]) $ + (final_ty, setIdDemandInfo id dmd) + where + -- Watch out! See note [Lambda-bound unfoldings] + final_ty = case maybeUnfoldingTemplate (idUnfolding id) of + Nothing -> main_ty + Just unf -> main_ty `bothDmdType` unf_ty + where + (unf_ty, _) = dmdAnalStar env dmd unf + + main_ty = addDemand dmd dmd_ty' + (dmd_ty', dmd) = findBndrDmd env arg_of_dfun dmd_ty id + +deleteFVs :: DmdType -> [Var] -> DmdType +deleteFVs (DmdType fvs dmds res) bndrs + = DmdType (delVarEnvList fvs bndrs) dmds res + +{- +Note [NOINLINE and strictness] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The strictness analyser used to have a HACK which ensured that NOINLNE +things were not strictness-analysed. The reason was unsafePerformIO. +Left to itself, the strictness analyser would discover this strictness +for unsafePerformIO: + unsafePerformIO: C(U(AV)) +But then consider this sub-expression + unsafePerformIO (\s -> let r = f x in + case writeIORef v r s of (# s1, _ #) -> + (# s1, r #) +The strictness analyser will now find that r is sure to be eval'd, +and may then hoist it out. This makes tests/lib/should_run/memo002 +deadlock. + +Solving this by making all NOINLINE things have no strictness info is overkill. +In particular, it's overkill for runST, which is perfectly respectable. +Consider + f x = runST (return x) +This should be strict in x. + +So the new plan is to define unsafePerformIO using the 'lazy' combinator: + + unsafePerformIO (IO m) = lazy (case m realWorld# of (# _, r #) -> r) + +Remember, 'lazy' is a wired-in identity-function Id, of type a->a, which is +magically NON-STRICT, and is inlined after strictness analysis. So +unsafePerformIO will look non-strict, and that's what we want. + +Now we don't need the hack in the strictness analyser. HOWEVER, this +decision does mean that even a NOINLINE function is not entirely +opaque: some aspect of its implementation leaks out, notably its +strictness. For example, if you have a function implemented by an +error stub, but which has RULES, you may want it not to be eliminated +in favour of error! + +Note [Lazy and unleashable free variables] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We put the strict and once-used FVs in the DmdType of the Id, so +that at its call sites we unleash demands on its strict fvs. +An example is 'roll' in imaginary/wheel-sieve2 +Something like this: + roll x = letrec + go y = if ... then roll (x-1) else x+1 + in + go ms +We want to see that roll is strict in x, which is because +go is called. So we put the DmdEnv for x in go's DmdType. + +Another example: + + f :: Int -> Int -> Int + f x y = let t = x+1 + h z = if z==0 then t else + if z==1 then x+1 else + x + h (z-1) + in h y + +Calling h does indeed evaluate x, but we can only see +that if we unleash a demand on x at the call site for t. + +Incidentally, here's a place where lambda-lifting h would +lose the cigar --- we couldn't see the joint strictness in t/x + + ON THE OTHER HAND + +We don't want to put *all* the fv's from the RHS into the +DmdType. Because + + * it makes the strictness signatures larger, and hence slows down fixpointing + +and + + * it is useless information at the call site anyways: + For lazy, used-many times fv's we will never get any better result than + that, no matter how good the actual demand on the function at the call site + is (unless it is always absent, but then the whole binder is useless). + +Therefore we exclude lazy multiple-used fv's from the environment in the +DmdType. + +But now the signature lies! (Missing variables are assumed to be absent.) To +make up for this, the code that analyses the binding keeps the demand on those +variable separate (usually called "lazy_fv") and adds it to the demand of the +whole binding later. + +What if we decide _not_ to store a strictness signature for a binding at all, as +we do when aborting a fixed-point iteration? The we risk losing the information +that the strict variables are being used. In that case, we take all free variables +mentioned in the (unsound) strictness signature, conservatively approximate the +demand put on them (topDmd), and add that to the "lazy_fv" returned by "dmdFix". + + +Note [Lambda-bound unfoldings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We allow a lambda-bound variable to carry an unfolding, a facility that is used +exclusively for join points; see Note [Case binders and join points]. If so, +we must be careful to demand-analyse the RHS of the unfolding! Example + \x. \y{=Just x}. <body> +Then if <body> uses 'y', then transitively it uses 'x', and we must not +forget that fact, otherwise we might make 'x' absent when it isn't. + + +************************************************************************ +* * +\subsection{Strictness signatures} +* * +************************************************************************ +-} + +type DFunFlag = Bool -- indicates if the lambda being considered is in the + -- sequence of lambdas at the top of the RHS of a dfun +notArgOfDfun :: DFunFlag +notArgOfDfun = False + +data AnalEnv + = AE { ae_dflags :: DynFlags + , ae_sigs :: SigEnv + , ae_virgin :: Bool -- True on first iteration only + -- See Note [Initialising strictness] + , ae_rec_tc :: RecTcChecker + , ae_fam_envs :: FamInstEnvs + } + + -- We use the se_env to tell us whether to + -- record info about a variable in the DmdEnv + -- We do so if it's a LocalId, but not top-level + -- + -- The DmdEnv gives the demand on the free vars of the function + -- when it is given enough args to satisfy the strictness signature + +type SigEnv = VarEnv (StrictSig, TopLevelFlag) + +instance Outputable AnalEnv where + ppr (AE { ae_sigs = env, ae_virgin = virgin }) + = text "AE" <+> braces (vcat + [ text "ae_virgin =" <+> ppr virgin + , text "ae_sigs =" <+> ppr env ]) + +emptyAnalEnv :: DynFlags -> FamInstEnvs -> AnalEnv +emptyAnalEnv dflags fam_envs + = AE { ae_dflags = dflags + , ae_sigs = emptySigEnv + , ae_virgin = True + , ae_rec_tc = initRecTc + , ae_fam_envs = fam_envs + } + +emptySigEnv :: SigEnv +emptySigEnv = emptyVarEnv + +-- | Extend an environment with the strictness IDs attached to the id +extendAnalEnvs :: TopLevelFlag -> AnalEnv -> [Id] -> AnalEnv +extendAnalEnvs top_lvl env vars + = env { ae_sigs = extendSigEnvs top_lvl (ae_sigs env) vars } + +extendSigEnvs :: TopLevelFlag -> SigEnv -> [Id] -> SigEnv +extendSigEnvs top_lvl sigs vars + = extendVarEnvList sigs [ (var, (idStrictness var, top_lvl)) | var <- vars] + +extendAnalEnv :: TopLevelFlag -> AnalEnv -> Id -> StrictSig -> AnalEnv +extendAnalEnv top_lvl env var sig + = env { ae_sigs = extendSigEnv top_lvl (ae_sigs env) var sig } + +extendSigEnv :: TopLevelFlag -> SigEnv -> Id -> StrictSig -> SigEnv +extendSigEnv top_lvl sigs var sig = extendVarEnv sigs var (sig, top_lvl) + +lookupSigEnv :: AnalEnv -> Id -> Maybe (StrictSig, TopLevelFlag) +lookupSigEnv env id = lookupVarEnv (ae_sigs env) id + +nonVirgin :: AnalEnv -> AnalEnv +nonVirgin env = env { ae_virgin = False } + +findBndrsDmds :: AnalEnv -> DmdType -> [Var] -> (DmdType, [Demand]) +-- Return the demands on the Ids in the [Var] +findBndrsDmds env dmd_ty bndrs + = go dmd_ty bndrs + where + go dmd_ty [] = (dmd_ty, []) + go dmd_ty (b:bs) + | isId b = let (dmd_ty1, dmds) = go dmd_ty bs + (dmd_ty2, dmd) = findBndrDmd env False dmd_ty1 b + in (dmd_ty2, dmd : dmds) + | otherwise = go dmd_ty bs + +findBndrDmd :: AnalEnv -> Bool -> DmdType -> Id -> (DmdType, Demand) +-- See Note [Trimming a demand to a type] in Demand.hs +findBndrDmd env arg_of_dfun dmd_ty id + = (dmd_ty', dmd') + where + dmd' = killUsageDemand (ae_dflags env) $ + strictify $ + trimToType starting_dmd (findTypeShape fam_envs id_ty) + + (dmd_ty', starting_dmd) = peelFV dmd_ty id + + id_ty = idType id + + strictify dmd + | gopt Opt_DictsStrict (ae_dflags env) + -- We never want to strictify a recursive let. At the moment + -- annotateBndr is only call for non-recursive lets; if that + -- changes, we need a RecFlag parameter and another guard here. + , not arg_of_dfun -- See Note [Do not strictify the argument dictionaries of a dfun] + = strictifyDictDmd id_ty dmd + | otherwise + = dmd + + fam_envs = ae_fam_envs env + +set_idStrictness :: AnalEnv -> Id -> StrictSig -> Id +set_idStrictness env id sig + = setIdStrictness id (killUsageSig (ae_dflags env) sig) + +{- Note [Initialising strictness] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +See section 9.2 (Finding fixpoints) of the paper. + +Our basic plan is to initialise the strictness of each Id in a +recursive group to "bottom", and find a fixpoint from there. However, +this group B might be inside an *enclosing* recursive group A, in +which case we'll do the entire fixpoint shebang on for each iteration +of A. This can be illustrated by the following example: + +Example: + + f [] = [] + f (x:xs) = let g [] = f xs + g (y:ys) = y+1 : g ys + in g (h x) + +At each iteration of the fixpoint for f, the analyser has to find a +fixpoint for the enclosed function g. In the meantime, the demand +values for g at each iteration for f are *greater* than those we +encountered in the previous iteration for f. Therefore, we can begin +the fixpoint for g not with the bottom value but rather with the +result of the previous analysis. I.e., when beginning the fixpoint +process for g, we can start from the demand signature computed for g +previously and attached to the binding occurrence of g. + +To speed things up, we initialise each iteration of A (the enclosing +one) from the result of the last one, which is neatly recorded in each +binder. That way we make use of earlier iterations of the fixpoint +algorithm. (Cunning plan.) + +But on the *first* iteration we want to *ignore* the current strictness +of the Id, and start from "bottom". Nowadays the Id can have a current +strictness, because interface files record strictness for nested bindings. +To know when we are in the first iteration, we look at the ae_virgin +field of the AnalEnv. + + +Note [Final Demand Analyser run] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some of the information that the demand analyser determines is not always +preserved by the simplifier. For example, the simplifier will happily rewrite + \y [Demand=1*U] let x = y in x + x +to + \y [Demand=1*U] y + y +which is quite a lie. + +The once-used information is (currently) only used by the code +generator, though. So: + + * We zap the used-once info in the worker-wrapper; + see Note [Zapping Used Once info in WorkWrap] in + GHC.Core.Op.WorkWrap. + If it's not reliable, it's better not to have it at all. + + * Just before TidyCore, we add a pass of the demand analyser, + but WITHOUT subsequent worker/wrapper and simplifier, + right before TidyCore. See SimplCore.getCoreToDo. + + This way, correct information finds its way into the module interface + (strictness signatures!) and the code generator (single-entry thunks!) + +Note that, in contrast, the single-call information (C1(..)) /can/ be +relied upon, as the simplifier tends to be very careful about not +duplicating actual function calls. + +Also see #11731. +-} diff --git a/compiler/GHC/Core/Op/Exitify.hs b/compiler/GHC/Core/Op/Exitify.hs new file mode 100644 index 0000000000..45f9451787 --- /dev/null +++ b/compiler/GHC/Core/Op/Exitify.hs @@ -0,0 +1,499 @@ +module GHC.Core.Op.Exitify ( exitifyProgram ) where + +{- +Note [Exitification] +~~~~~~~~~~~~~~~~~~~~ + +This module implements Exitification. The goal is to pull as much code out of +recursive functions as possible, as the simplifier is better at inlining into +call-sites that are not in recursive functions. + +Example: + + let t = foo bar + joinrec go 0 x y = t (x*x) + go (n-1) x y = jump go (n-1) (x+y) + in … + +We’d like to inline `t`, but that does not happen: Because t is a thunk and is +used in a recursive function, doing so might lose sharing in general. In +this case, however, `t` is on the _exit path_ of `go`, so called at most once. +How do we make this clearly visible to the simplifier? + +A code path (i.e., an expression in a tail-recursive position) in a recursive +function is an exit path if it does not contain a recursive call. We can bind +this expression outside the recursive function, as a join-point. + +Example result: + + let t = foo bar + join exit x = t (x*x) + joinrec go 0 x y = jump exit x + go (n-1) x y = jump go (n-1) (x+y) + in … + +Now `t` is no longer in a recursive function, and good things happen! +-} + +import GhcPrelude +import Var +import Id +import IdInfo +import GHC.Core +import GHC.Core.Utils +import State +import Unique +import VarSet +import VarEnv +import GHC.Core.FVs +import FastString +import GHC.Core.Type +import Util( mapSnd ) + +import Data.Bifunctor +import Control.Monad + +-- | Traverses the AST, simply to find all joinrecs and call 'exitify' on them. +-- The really interesting function is exitifyRec +exitifyProgram :: CoreProgram -> CoreProgram +exitifyProgram binds = map goTopLvl binds + where + goTopLvl (NonRec v e) = NonRec v (go in_scope_toplvl e) + goTopLvl (Rec pairs) = Rec (map (second (go in_scope_toplvl)) pairs) + -- Top-level bindings are never join points + + in_scope_toplvl = emptyInScopeSet `extendInScopeSetList` bindersOfBinds binds + + go :: InScopeSet -> CoreExpr -> CoreExpr + go _ e@(Var{}) = e + go _ e@(Lit {}) = e + go _ e@(Type {}) = e + go _ e@(Coercion {}) = e + go in_scope (Cast e' c) = Cast (go in_scope e') c + go in_scope (Tick t e') = Tick t (go in_scope e') + go in_scope (App e1 e2) = App (go in_scope e1) (go in_scope e2) + + go in_scope (Lam v e') + = Lam v (go in_scope' e') + where in_scope' = in_scope `extendInScopeSet` v + + go in_scope (Case scrut bndr ty alts) + = Case (go in_scope scrut) bndr ty (map go_alt alts) + where + in_scope1 = in_scope `extendInScopeSet` bndr + go_alt (dc, pats, rhs) = (dc, pats, go in_scope' rhs) + where in_scope' = in_scope1 `extendInScopeSetList` pats + + go in_scope (Let (NonRec bndr rhs) body) + = Let (NonRec bndr (go in_scope rhs)) (go in_scope' body) + where + in_scope' = in_scope `extendInScopeSet` bndr + + go in_scope (Let (Rec pairs) body) + | is_join_rec = mkLets (exitifyRec in_scope' pairs') body' + | otherwise = Let (Rec pairs') body' + where + is_join_rec = any (isJoinId . fst) pairs + in_scope' = in_scope `extendInScopeSetList` bindersOf (Rec pairs) + pairs' = mapSnd (go in_scope') pairs + body' = go in_scope' body + + +-- | State Monad used inside `exitify` +type ExitifyM = State [(JoinId, CoreExpr)] + +-- | Given a recursive group of a joinrec, identifies “exit paths” and binds them as +-- join-points outside the joinrec. +exitifyRec :: InScopeSet -> [(Var,CoreExpr)] -> [CoreBind] +exitifyRec in_scope pairs + = [ NonRec xid rhs | (xid,rhs) <- exits ] ++ [Rec pairs'] + where + -- We need the set of free variables of many subexpressions here, so + -- annotate the AST with them + -- see Note [Calculating free variables] + ann_pairs = map (second freeVars) pairs + + -- Which are the recursive calls? + recursive_calls = mkVarSet $ map fst pairs + + (pairs',exits) = (`runState` []) $ do + forM ann_pairs $ \(x,rhs) -> do + -- go past the lambdas of the join point + let (args, body) = collectNAnnBndrs (idJoinArity x) rhs + body' <- go args body + let rhs' = mkLams args body' + return (x, rhs') + + --------------------- + -- 'go' is the main working function. + -- It goes through the RHS (tail-call positions only), + -- checks if there are no more recursive calls, if so, abstracts over + -- variables bound on the way and lifts it out as a join point. + -- + -- ExitifyM is a state monad to keep track of floated binds + go :: [Var] -- ^ Variables that are in-scope here, but + -- not in scope at the joinrec; that is, + -- we must potentially abstract over them. + -- Invariant: they are kept in dependency order + -> CoreExprWithFVs -- ^ Current expression in tail position + -> ExitifyM CoreExpr + + -- We first look at the expression (no matter what it shape is) + -- and determine if we can turn it into a exit join point + go captured ann_e + | -- An exit expression has no recursive calls + let fvs = dVarSetToVarSet (freeVarsOf ann_e) + , disjointVarSet fvs recursive_calls + = go_exit captured (deAnnotate ann_e) fvs + + -- We could not turn it into a exit join point. So now recurse + -- into all expression where eligible exit join points might sit, + -- i.e. into all tail-call positions: + + -- Case right hand sides are in tail-call position + go captured (_, AnnCase scrut bndr ty alts) = do + alts' <- forM alts $ \(dc, pats, rhs) -> do + rhs' <- go (captured ++ [bndr] ++ pats) rhs + return (dc, pats, rhs') + return $ Case (deAnnotate scrut) bndr ty alts' + + go captured (_, AnnLet ann_bind body) + -- join point, RHS and body are in tail-call position + | AnnNonRec j rhs <- ann_bind + , Just join_arity <- isJoinId_maybe j + = do let (params, join_body) = collectNAnnBndrs join_arity rhs + join_body' <- go (captured ++ params) join_body + let rhs' = mkLams params join_body' + body' <- go (captured ++ [j]) body + return $ Let (NonRec j rhs') body' + + -- rec join point, RHSs and body are in tail-call position + | AnnRec pairs <- ann_bind + , isJoinId (fst (head pairs)) + = do let js = map fst pairs + pairs' <- forM pairs $ \(j,rhs) -> do + let join_arity = idJoinArity j + (params, join_body) = collectNAnnBndrs join_arity rhs + join_body' <- go (captured ++ js ++ params) join_body + let rhs' = mkLams params join_body' + return (j, rhs') + body' <- go (captured ++ js) body + return $ Let (Rec pairs') body' + + -- normal Let, only the body is in tail-call position + | otherwise + = do body' <- go (captured ++ bindersOf bind ) body + return $ Let bind body' + where bind = deAnnBind ann_bind + + -- Cannot be turned into an exit join point, but also has no + -- tail-call subexpression. Nothing to do here. + go _ ann_e = return (deAnnotate ann_e) + + --------------------- + go_exit :: [Var] -- Variables captured locally + -> CoreExpr -- An exit expression + -> VarSet -- Free vars of the expression + -> ExitifyM CoreExpr + -- go_exit deals with a tail expression that is floatable + -- out as an exit point; that is, it mentions no recursive calls + go_exit captured e fvs + -- Do not touch an expression that is already a join jump where all arguments + -- are captured variables. See Note [Idempotency] + -- But _do_ float join jumps with interesting arguments. + -- See Note [Jumps can be interesting] + | (Var f, args) <- collectArgs e + , isJoinId f + , all isCapturedVarArg args + = return e + + -- Do not touch a boring expression (see Note [Interesting expression]) + | not is_interesting + = return e + + -- Cannot float out if local join points are used, as + -- we cannot abstract over them + | captures_join_points + = return e + + -- We have something to float out! + | otherwise + = do { -- Assemble the RHS of the exit join point + let rhs = mkLams abs_vars e + avoid = in_scope `extendInScopeSetList` captured + -- Remember this binding under a suitable name + ; v <- addExit avoid (length abs_vars) rhs + -- And jump to it from here + ; return $ mkVarApps (Var v) abs_vars } + + where + -- Used to detect exit expressions that are already proper exit jumps + isCapturedVarArg (Var v) = v `elem` captured + isCapturedVarArg _ = False + + -- An interesting exit expression has free, non-imported + -- variables from outside the recursive group + -- See Note [Interesting expression] + is_interesting = anyVarSet isLocalId $ + fvs `minusVarSet` mkVarSet captured + + -- The arguments of this exit join point + -- See Note [Picking arguments to abstract over] + abs_vars = snd $ foldr pick (fvs, []) captured + where + pick v (fvs', acc) | v `elemVarSet` fvs' = (fvs' `delVarSet` v, zap v : acc) + | otherwise = (fvs', acc) + + -- We are going to abstract over these variables, so we must + -- zap any IdInfo they have; see #15005 + -- cf. GHC.Core.Op.SetLevels.abstractVars + zap v | isId v = setIdInfo v vanillaIdInfo + | otherwise = v + + -- We cannot abstract over join points + captures_join_points = any isJoinId abs_vars + + +-- Picks a new unique, which is disjoint from +-- * the free variables of the whole joinrec +-- * any bound variables (captured) +-- * any exit join points created so far. +mkExitJoinId :: InScopeSet -> Type -> JoinArity -> ExitifyM JoinId +mkExitJoinId in_scope ty join_arity = do + fs <- get + let avoid = in_scope `extendInScopeSetList` (map fst fs) + `extendInScopeSet` exit_id_tmpl -- just cosmetics + return (uniqAway avoid exit_id_tmpl) + where + exit_id_tmpl = mkSysLocal (fsLit "exit") initExitJoinUnique ty + `asJoinId` join_arity + +addExit :: InScopeSet -> JoinArity -> CoreExpr -> ExitifyM JoinId +addExit in_scope join_arity rhs = do + -- Pick a suitable name + let ty = exprType rhs + v <- mkExitJoinId in_scope ty join_arity + fs <- get + put ((v,rhs):fs) + return v + +{- +Note [Interesting expression] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We do not want this to happen: + + joinrec go 0 x y = x + go (n-1) x y = jump go (n-1) (x+y) + in … +==> + join exit x = x + joinrec go 0 x y = jump exit x + go (n-1) x y = jump go (n-1) (x+y) + in … + +because the floated exit path (`x`) is simply a parameter of `go`; there are +not useful interactions exposed this way. + +Neither do we want this to happen + + joinrec go 0 x y = x+x + go (n-1) x y = jump go (n-1) (x+y) + in … +==> + join exit x = x+x + joinrec go 0 x y = jump exit x + go (n-1) x y = jump go (n-1) (x+y) + in … + +where the floated expression `x+x` is a bit more complicated, but still not +intersting. + +Expressions are interesting when they move an occurrence of a variable outside +the recursive `go` that can benefit from being obviously called once, for example: + * a local thunk that can then be inlined (see example in note [Exitification]) + * the parameter of a function, where the demand analyzer then can then + see that it is called at most once, and hence improve the function’s + strictness signature + +So we only hoist an exit expression out if it mentiones at least one free, +non-imported variable. + +Note [Jumps can be interesting] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A jump to a join point can be interesting, if its arguments contain free +non-exported variables (z in the following example): + + joinrec go 0 x y = jump j (x+z) + go (n-1) x y = jump go (n-1) (x+y) + in … +==> + join exit x y = jump j (x+z) + joinrec go 0 x y = jump exit x + go (n-1) x y = jump go (n-1) (x+y) + + +The join point itself can be interesting, even if none if its +arguments have free variables free in the joinrec. For example + + join j p = case p of (x,y) -> x+y + joinrec go 0 x y = jump j (x,y) + go (n-1) x y = jump go (n-1) (x+y) y + in … + +Here, `j` would not be inlined because we do not inline something that looks +like an exit join point (see Note [Do not inline exit join points]). But +if we exitify the 'jump j (x,y)' we get + + join j p = case p of (x,y) -> x+y + join exit x y = jump j (x,y) + joinrec go 0 x y = jump exit x y + go (n-1) x y = jump go (n-1) (x+y) y + in … + +and now 'j' can inline, and we get rid of the pair. Here's another +example (assume `g` to be an imported function that, on its own, +does not make this interesting): + + join j y = map f y + joinrec go 0 x y = jump j (map g x) + go (n-1) x y = jump go (n-1) (x+y) + in … + +Again, `j` would not be inlined because we do not inline something that looks +like an exit join point (see Note [Do not inline exit join points]). + +But after exitification we have + + join j y = map f y + join exit x = jump j (map g x) + joinrec go 0 x y = jump j (map g x) + go (n-1) x y = jump go (n-1) (x+y) + in … + +and now we can inline `j` and this will allow `map/map` to fire. + + +Note [Idempotency] +~~~~~~~~~~~~~~~~~~ + +We do not want this to happen, where we replace the floated expression with +essentially the same expression: + + join exit x = t (x*x) + joinrec go 0 x y = jump exit x + go (n-1) x y = jump go (n-1) (x+y) + in … +==> + join exit x = t (x*x) + join exit' x = jump exit x + joinrec go 0 x y = jump exit' x + go (n-1) x y = jump go (n-1) (x+y) + in … + +So when the RHS is a join jump, and all of its arguments are captured variables, +then we leave it in place. + +Note that `jump exit x` in this example looks interesting, as `exit` is a free +variable. Therefore, idempotency does not simply follow from floating only +interesting expressions. + +Note [Calculating free variables] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We have two options where to annotate the tree with free variables: + + A) The whole tree. + B) Each individual joinrec as we come across it. + +Downside of A: We pay the price on the whole module, even outside any joinrecs. +Downside of B: We pay the price per joinrec, possibly multiple times when +joinrecs are nested. + +Further downside of A: If the exitify function returns annotated expressions, +it would have to ensure that the annotations are correct. + +We therefore choose B, and calculate the free variables in `exitify`. + + +Note [Do not inline exit join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we have + + let t = foo bar + join exit x = t (x*x) + joinrec go 0 x y = jump exit x + go (n-1) x y = jump go (n-1) (x+y) + in … + +we do not want the simplifier to simply inline `exit` back in (which it happily +would). + +To prevent this, we need to recognize exit join points, and then disable +inlining. + +Exit join points, recognizeable using `isExitJoinId` are join points with an +occurrence in a recursive group, and can be recognized (after the occurrence +analyzer ran!) using `isExitJoinId`. +This function detects joinpoints with `occ_in_lam (idOccinfo id) == True`, +because the lambdas of a non-recursive join point are not considered for +`occ_in_lam`. For example, in the following code, `j1` is /not/ marked +occ_in_lam, because `j2` is called only once. + + join j1 x = x+1 + join j2 y = join j1 (y+2) + +To prevent inlining, we check for isExitJoinId +* In `preInlineUnconditionally` directly. +* In `simplLetUnfolding` we simply give exit join points no unfolding, which + prevents inlining in `postInlineUnconditionally` and call sites. + +Note [Placement of the exitification pass] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +I (Joachim) experimented with multiple positions for the Exitification pass in +the Core2Core pipeline: + + A) Before the `simpl_phases` + B) Between the `simpl_phases` and the "main" simplifier pass + C) After demand_analyser + D) Before the final simplification phase + +Here is the table (this is without inlining join exit points in the final +simplifier run): + + Program | Allocs | Instrs + | ABCD.log A.log B.log C.log D.log | ABCD.log A.log B.log C.log D.log +----------------|---------------------------------------------------|------------------------------------------------- + fannkuch-redux | -99.9% +0.0% -99.9% -99.9% -99.9% | -3.9% +0.5% -3.0% -3.9% -3.9% + fasta | -0.0% +0.0% +0.0% -0.0% -0.0% | -8.5% +0.0% +0.0% -0.0% -8.5% + fem | 0.0% 0.0% 0.0% 0.0% +0.0% | -2.2% -0.1% -0.1% -2.1% -2.1% + fish | 0.0% 0.0% 0.0% 0.0% +0.0% | -3.1% +0.0% -1.1% -1.1% -0.0% + k-nucleotide | -91.3% -91.0% -91.0% -91.3% -91.3% | -6.3% +11.4% +11.4% -6.3% -6.2% + scs | -0.0% -0.0% -0.0% -0.0% -0.0% | -3.4% -3.0% -3.1% -3.3% -3.3% + simple | -6.0% 0.0% -6.0% -6.0% +0.0% | -3.4% +0.0% -5.2% -3.4% -0.1% + spectral-norm | -0.0% 0.0% 0.0% -0.0% +0.0% | -2.7% +0.0% -2.7% -5.4% -5.4% +----------------|---------------------------------------------------|------------------------------------------------- + Min | -95.0% -91.0% -95.0% -95.0% -95.0% | -8.5% -3.0% -5.2% -6.3% -8.5% + Max | +0.2% +0.2% +0.2% +0.2% +1.5% | +0.4% +11.4% +11.4% +0.4% +1.5% + Geometric Mean | -4.7% -2.1% -4.7% -4.7% -4.6% | -0.4% +0.1% -0.1% -0.3% -0.2% + +Position A is disqualified, as it does not get rid of the allocations in +fannkuch-redux. +Position A and B are disqualified because it increases instructions in k-nucleotide. +Positions C and D have their advantages: C decreases allocations in simpl, but D instructions in fasta. + +Assuming we have a budget of _one_ run of Exitification, then C wins (but we +could get more from running it multiple times, as seen in fish). + +Note [Picking arguments to abstract over] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When we create an exit join point, so we need to abstract over those of its +free variables that are be out-of-scope at the destination of the exit join +point. So we go through the list `captured` and pick those that are actually +free variables of the join point. + +We do not just `filter (`elemVarSet` fvs) captured`, as there might be +shadowing, and `captured` may contain multiple variables with the same Unique. I +these cases we want to abstract only over the last occurrence, hence the `foldr` +(with emphasis on the `r`). This is #15110. + +-} diff --git a/compiler/GHC/Core/Op/FloatIn.hs b/compiler/GHC/Core/Op/FloatIn.hs new file mode 100644 index 0000000000..ac4ef8088e --- /dev/null +++ b/compiler/GHC/Core/Op/FloatIn.hs @@ -0,0 +1,772 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + +************************************************************************ +* * +\section[FloatIn]{Floating Inwards pass} +* * +************************************************************************ + +The main purpose of @floatInwards@ is floating into branches of a +case, so that we don't allocate things, save them on the stack, and +then discover that they aren't needed in the chosen branch. +-} + +{-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -fprof-auto #-} +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +module GHC.Core.Op.FloatIn ( floatInwards ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Core +import GHC.Core.Make hiding ( wrapFloats ) +import GHC.Driver.Types ( ModGuts(..) ) +import GHC.Core.Utils +import GHC.Core.FVs +import GHC.Core.Op.Monad ( CoreM ) +import Id ( isOneShotBndr, idType, isJoinId, isJoinId_maybe ) +import Var +import GHC.Core.Type +import VarSet +import Util +import GHC.Driver.Session +import Outputable +-- import Data.List ( mapAccumL ) +import BasicTypes ( RecFlag(..), isRec ) + +{- +Top-level interface function, @floatInwards@. Note that we do not +actually float any bindings downwards from the top-level. +-} + +floatInwards :: ModGuts -> CoreM ModGuts +floatInwards pgm@(ModGuts { mg_binds = binds }) + = do { dflags <- getDynFlags + ; return (pgm { mg_binds = map (fi_top_bind dflags) binds }) } + where + fi_top_bind dflags (NonRec binder rhs) + = NonRec binder (fiExpr dflags [] (freeVars rhs)) + fi_top_bind dflags (Rec pairs) + = Rec [ (b, fiExpr dflags [] (freeVars rhs)) | (b, rhs) <- pairs ] + + +{- +************************************************************************ +* * +\subsection{Mail from Andr\'e [edited]} +* * +************************************************************************ + +{\em Will wrote: What??? I thought the idea was to float as far +inwards as possible, no matter what. This is dropping all bindings +every time it sees a lambda of any kind. Help! } + +You are assuming we DO DO full laziness AFTER floating inwards! We +have to [not float inside lambdas] if we don't. + +If we indeed do full laziness after the floating inwards (we could +check the compilation flags for that) then I agree we could be more +aggressive and do float inwards past lambdas. + +Actually we are not doing a proper full laziness (see below), which +was another reason for not floating inwards past a lambda. + +This can easily be fixed. The problem is that we float lets outwards, +but there are a few expressions which are not let bound, like case +scrutinees and case alternatives. After floating inwards the +simplifier could decide to inline the let and the laziness would be +lost, e.g. + +\begin{verbatim} +let a = expensive ==> \b -> case expensive of ... +in \ b -> case a of ... +\end{verbatim} +The fix is +\begin{enumerate} +\item +to let bind the algebraic case scrutinees (done, I think) and +the case alternatives (except the ones with an +unboxed type)(not done, I think). This is best done in the +GHC.Core.Op.SetLevels.hs module, which tags things with their level numbers. +\item +do the full laziness pass (floating lets outwards). +\item +simplify. The simplifier inlines the (trivial) lets that were + created but were not floated outwards. +\end{enumerate} + +With the fix I think Will's suggestion that we can gain even more from +strictness by floating inwards past lambdas makes sense. + +We still gain even without going past lambdas, as things may be +strict in the (new) context of a branch (where it was floated to) or +of a let rhs, e.g. +\begin{verbatim} +let a = something case x of +in case x of alt1 -> case something of a -> a + a + alt1 -> a + a ==> alt2 -> b + alt2 -> b + +let a = something let b = case something of a -> a + a +in let b = a + a ==> in (b,b) +in (b,b) +\end{verbatim} +Also, even if a is not found to be strict in the new context and is +still left as a let, if the branch is not taken (or b is not entered) +the closure for a is not built. + +************************************************************************ +* * +\subsection{Main floating-inwards code} +* * +************************************************************************ +-} + +type FreeVarSet = DIdSet +type BoundVarSet = DIdSet + +data FloatInBind = FB BoundVarSet FreeVarSet FloatBind + -- The FreeVarSet is the free variables of the binding. In the case + -- of recursive bindings, the set doesn't include the bound + -- variables. + +type FloatInBinds = [FloatInBind] + -- In reverse dependency order (innermost binder first) + +fiExpr :: DynFlags + -> FloatInBinds -- Binds we're trying to drop + -- as far "inwards" as possible + -> CoreExprWithFVs -- Input expr + -> CoreExpr -- Result + +fiExpr _ to_drop (_, AnnLit lit) = wrapFloats to_drop (Lit lit) + -- See Note [Dead bindings] +fiExpr _ to_drop (_, AnnType ty) = ASSERT( null to_drop ) Type ty +fiExpr _ to_drop (_, AnnVar v) = wrapFloats to_drop (Var v) +fiExpr _ to_drop (_, AnnCoercion co) = wrapFloats to_drop (Coercion co) +fiExpr dflags to_drop (_, AnnCast expr (co_ann, co)) + = wrapFloats (drop_here ++ co_drop) $ + Cast (fiExpr dflags e_drop expr) co + where + [drop_here, e_drop, co_drop] + = sepBindsByDropPoint dflags False + [freeVarsOf expr, freeVarsOfAnn co_ann] + to_drop + +{- +Applications: we do float inside applications, mainly because we +need to get at all the arguments. The next simplifier run will +pull out any silly ones. +-} + +fiExpr dflags to_drop ann_expr@(_,AnnApp {}) + = wrapFloats drop_here $ wrapFloats extra_drop $ + mkTicks ticks $ + mkApps (fiExpr dflags fun_drop ann_fun) + (zipWith (fiExpr dflags) arg_drops ann_args) + where + (ann_fun, ann_args, ticks) = collectAnnArgsTicks tickishFloatable ann_expr + fun_ty = exprType (deAnnotate ann_fun) + fun_fvs = freeVarsOf ann_fun + arg_fvs = map freeVarsOf ann_args + + (drop_here : extra_drop : fun_drop : arg_drops) + = sepBindsByDropPoint dflags False + (extra_fvs : fun_fvs : arg_fvs) + to_drop + -- Shortcut behaviour: if to_drop is empty, + -- sepBindsByDropPoint returns a suitable bunch of empty + -- lists without evaluating extra_fvs, and hence without + -- peering into each argument + + (_, extra_fvs) = foldl' add_arg (fun_ty, extra_fvs0) ann_args + extra_fvs0 = case ann_fun of + (_, AnnVar _) -> fun_fvs + _ -> emptyDVarSet + -- Don't float the binding for f into f x y z; see Note [Join points] + -- for why we *can't* do it when f is a join point. (If f isn't a + -- join point, floating it in isn't especially harmful but it's + -- useless since the simplifier will immediately float it back out.) + + add_arg :: (Type,FreeVarSet) -> CoreExprWithFVs -> (Type,FreeVarSet) + add_arg (fun_ty, extra_fvs) (_, AnnType ty) + = (piResultTy fun_ty ty, extra_fvs) + + add_arg (fun_ty, extra_fvs) (arg_fvs, arg) + | noFloatIntoArg arg arg_ty + = (res_ty, extra_fvs `unionDVarSet` arg_fvs) + | otherwise + = (res_ty, extra_fvs) + where + (arg_ty, res_ty) = splitFunTy fun_ty + +{- Note [Dead bindings] +~~~~~~~~~~~~~~~~~~~~~~~ +At a literal we won't usually have any floated bindings; the +only way that can happen is if the binding wrapped the literal +/in the original input program/. e.g. + case x of { DEFAULT -> 1# } +But, while this may be unusual it is not actually wrong, and it did +once happen (#15696). + +Note [Do not destroy the let/app invariant] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Watch out for + f (x +# y) +We don't want to float bindings into here + f (case ... of { x -> x +# y }) +because that might destroy the let/app invariant, which requires +unlifted function arguments to be ok-for-speculation. + +Note [Join points] +~~~~~~~~~~~~~~~~~~ +Generally, we don't need to worry about join points - there are places we're +not allowed to float them, but since they can't have occurrences in those +places, we're not tempted. + +We do need to be careful about jumps, however: + + joinrec j x y z = ... in + jump j a b c + +Previous versions often floated the definition of a recursive function into its +only non-recursive occurrence. But for a join point, this is a disaster: + + (joinrec j x y z = ... in + jump j) a b c -- wrong! + +Every jump must be exact, so the jump to j must have three arguments. Hence +we're careful not to float into the target of a jump (though we can float into +the arguments just fine). + +Note [Floating in past a lambda group] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* We must be careful about floating inside a value lambda. + That risks losing laziness. + The float-out pass might rescue us, but then again it might not. + +* We must be careful about type lambdas too. At one time we did, and + there is no risk of duplicating work thereby, but we do need to be + careful. In particular, here is a bad case (it happened in the + cichelli benchmark: + let v = ... + in let f = /\t -> \a -> ... + ==> + let f = /\t -> let v = ... in \a -> ... + This is bad as now f is an updatable closure (update PAP) + and has arity 0. + +* Hack alert! We only float in through one-shot lambdas, + not (as you might guess) through lone big lambdas. + Reason: we float *out* past big lambdas (see the test in the Lam + case of FloatOut.floatExpr) and we don't want to float straight + back in again. + + It *is* important to float into one-shot lambdas, however; + see the remarks with noFloatIntoRhs. + +So we treat lambda in groups, using the following rule: + + Float in if (a) there is at least one Id, + and (b) there are no non-one-shot Ids + + Otherwise drop all the bindings outside the group. + +This is what the 'go' function in the AnnLam case is doing. + +(Join points are handled similarly: a join point is considered one-shot iff +it's non-recursive, so we float only into non-recursive join points.) + +Urk! if all are tyvars, and we don't float in, we may miss an + opportunity to float inside a nested case branch + + +Note [Floating coercions] +~~~~~~~~~~~~~~~~~~~~~~~~~ +We could, in principle, have a coercion binding like + case f x of co { DEFAULT -> e1 e2 } +It's not common to have a function that returns a coercion, but nothing +in Core prohibits it. If so, 'co' might be mentioned in e1 or e2 +/only in a type/. E.g. suppose e1 was + let (x :: Int |> co) = blah in blah2 + + +But, with coercions appearing in types, there is a complication: we +might be floating in a "strict let" -- that is, a case. Case expressions +mention their return type. We absolutely can't float a coercion binding +inward to the point that the type of the expression it's about to wrap +mentions the coercion. So we include the union of the sets of free variables +of the types of all the drop points involved. If any of the floaters +bind a coercion variable mentioned in any of the types, that binder must +be dropped right away. + +-} + +fiExpr dflags to_drop lam@(_, AnnLam _ _) + | noFloatIntoLam bndrs -- Dump it all here + -- NB: Must line up with noFloatIntoRhs (AnnLam...); see #7088 + = wrapFloats to_drop (mkLams bndrs (fiExpr dflags [] body)) + + | otherwise -- Float inside + = mkLams bndrs (fiExpr dflags to_drop body) + + where + (bndrs, body) = collectAnnBndrs lam + +{- +We don't float lets inwards past an SCC. + ToDo: keep info on current cc, and when passing + one, if it is not the same, annotate all lets in binds with current + cc, change current cc to the new one and float binds into expr. +-} + +fiExpr dflags to_drop (_, AnnTick tickish expr) + | tickish `tickishScopesLike` SoftScope + = Tick tickish (fiExpr dflags to_drop expr) + + | otherwise -- Wimp out for now - we could push values in + = wrapFloats to_drop (Tick tickish (fiExpr dflags [] expr)) + +{- +For @Lets@, the possible ``drop points'' for the \tr{to_drop} +bindings are: (a)~in the body, (b1)~in the RHS of a NonRec binding, +or~(b2), in each of the RHSs of the pairs of a @Rec@. + +Note that we do {\em weird things} with this let's binding. Consider: +\begin{verbatim} +let + w = ... +in { + let v = ... w ... + in ... v .. w ... +} +\end{verbatim} +Look at the inner \tr{let}. As \tr{w} is used in both the bind and +body of the inner let, we could panic and leave \tr{w}'s binding where +it is. But \tr{v} is floatable further into the body of the inner let, and +{\em then} \tr{w} will also be only in the body of that inner let. + +So: rather than drop \tr{w}'s binding here, we add it onto the list of +things to drop in the outer let's body, and let nature take its +course. + +Note [extra_fvs (1): avoid floating into RHS] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider let x=\y....t... in body. We do not necessarily want to float +a binding for t into the RHS, because it'll immediately be floated out +again. (It won't go inside the lambda else we risk losing work.) +In letrec, we need to be more careful still. We don't want to transform + let x# = y# +# 1# + in + letrec f = \z. ...x#...f... + in ... +into + letrec f = let x# = y# +# 1# in \z. ...x#...f... in ... +because now we can't float the let out again, because a letrec +can't have unboxed bindings. + +So we make "extra_fvs" which is the rhs_fvs of such bindings, and +arrange to dump bindings that bind extra_fvs before the entire let. + +Note [extra_fvs (2): free variables of rules] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + let x{rule mentioning y} = rhs in body +Here y is not free in rhs or body; but we still want to dump bindings +that bind y outside the let. So we augment extra_fvs with the +idRuleAndUnfoldingVars of x. No need for type variables, hence not using +idFreeVars. +-} + +fiExpr dflags to_drop (_,AnnLet bind body) + = fiExpr dflags (after ++ new_float : before) body + -- to_drop is in reverse dependency order + where + (before, new_float, after) = fiBind dflags to_drop bind body_fvs + body_fvs = freeVarsOf body + +{- Note [Floating primops] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +We try to float-in a case expression over an unlifted type. The +motivating example was #5658: in particular, this change allows +array indexing operations, which have a single DEFAULT alternative +without any binders, to be floated inward. + +SIMD primops for unpacking SIMD vectors into an unboxed tuple of unboxed +scalars also need to be floated inward, but unpacks have a single non-DEFAULT +alternative that binds the elements of the tuple. We now therefore also support +floating in cases with a single alternative that may bind values. + +But there are wrinkles + +* Which unlifted cases do we float? See PrimOp.hs + Note [PrimOp can_fail and has_side_effects] which explains: + - We can float-in can_fail primops, but we can't float them out. + - But we can float a has_side_effects primop, but NOT inside a lambda, + so for now we don't float them at all. + Hence exprOkForSideEffects + +* Because we can float can-fail primops (array indexing, division) inwards + but not outwards, we must be careful not to transform + case a /# b of r -> f (F# r) + ===> + f (case a /# b of r -> F# r) + because that creates a new thunk that wasn't there before. And + because it can't be floated out (can_fail), the thunk will stay + there. Disaster! (This happened in nofib 'simple' and 'scs'.) + + Solution: only float cases into the branches of other cases, and + not into the arguments of an application, or the RHS of a let. This + is somewhat conservative, but it's simple. And it still hits the + cases like #5658. This is implemented in sepBindsByJoinPoint; + if is_case is False we dump all floating cases right here. + +* #14511 is another example of why we want to restrict float-in + of case-expressions. Consider + case indexArray# a n of (# r #) -> writeArray# ma i (f r) + Now, floating that indexing operation into the (f r) thunk will + not create any new thunks, but it will keep the array 'a' alive + for much longer than the programmer expected. + + So again, not floating a case into a let or argument seems like + the Right Thing + +For @Case@, the possible drop points for the 'to_drop' +bindings are: + (a) inside the scrutinee + (b) inside one of the alternatives/default (default FVs always /first/!). + +-} + +fiExpr dflags to_drop (_, AnnCase scrut case_bndr _ [(con,alt_bndrs,rhs)]) + | isUnliftedType (idType case_bndr) + , exprOkForSideEffects (deAnnotate scrut) + -- See Note [Floating primops] + = wrapFloats shared_binds $ + fiExpr dflags (case_float : rhs_binds) rhs + where + case_float = FB (mkDVarSet (case_bndr : alt_bndrs)) scrut_fvs + (FloatCase scrut' case_bndr con alt_bndrs) + scrut' = fiExpr dflags scrut_binds scrut + rhs_fvs = freeVarsOf rhs `delDVarSetList` (case_bndr : alt_bndrs) + scrut_fvs = freeVarsOf scrut + + [shared_binds, scrut_binds, rhs_binds] + = sepBindsByDropPoint dflags False + [scrut_fvs, rhs_fvs] + to_drop + +fiExpr dflags to_drop (_, AnnCase scrut case_bndr ty alts) + = wrapFloats drop_here1 $ + wrapFloats drop_here2 $ + Case (fiExpr dflags scrut_drops scrut) case_bndr ty + (zipWith fi_alt alts_drops_s alts) + where + -- Float into the scrut and alts-considered-together just like App + [drop_here1, scrut_drops, alts_drops] + = sepBindsByDropPoint dflags False + [scrut_fvs, all_alts_fvs] + to_drop + + -- Float into the alts with the is_case flag set + (drop_here2 : alts_drops_s) + | [ _ ] <- alts = [] : [alts_drops] + | otherwise = sepBindsByDropPoint dflags True alts_fvs alts_drops + + scrut_fvs = freeVarsOf scrut + alts_fvs = map alt_fvs alts + all_alts_fvs = unionDVarSets alts_fvs + alt_fvs (_con, args, rhs) + = foldl' delDVarSet (freeVarsOf rhs) (case_bndr:args) + -- Delete case_bndr and args from free vars of rhs + -- to get free vars of alt + + fi_alt to_drop (con, args, rhs) = (con, args, fiExpr dflags to_drop rhs) + +------------------ +fiBind :: DynFlags + -> FloatInBinds -- Binds we're trying to drop + -- as far "inwards" as possible + -> CoreBindWithFVs -- Input binding + -> DVarSet -- Free in scope of binding + -> ( FloatInBinds -- Land these before + , FloatInBind -- The binding itself + , FloatInBinds) -- Land these after + +fiBind dflags to_drop (AnnNonRec id ann_rhs@(rhs_fvs, rhs)) body_fvs + = ( extra_binds ++ shared_binds -- Land these before + -- See Note [extra_fvs (1,2)] + , FB (unitDVarSet id) rhs_fvs' -- The new binding itself + (FloatLet (NonRec id rhs')) + , body_binds ) -- Land these after + + where + body_fvs2 = body_fvs `delDVarSet` id + + rule_fvs = bndrRuleAndUnfoldingVarsDSet id -- See Note [extra_fvs (2): free variables of rules] + extra_fvs | noFloatIntoRhs NonRecursive id rhs + = rule_fvs `unionDVarSet` rhs_fvs + | otherwise + = rule_fvs + -- See Note [extra_fvs (1): avoid floating into RHS] + -- No point in floating in only to float straight out again + -- We *can't* float into ok-for-speculation unlifted RHSs + -- But do float into join points + + [shared_binds, extra_binds, rhs_binds, body_binds] + = sepBindsByDropPoint dflags False + [extra_fvs, rhs_fvs, body_fvs2] + to_drop + + -- Push rhs_binds into the right hand side of the binding + rhs' = fiRhs dflags rhs_binds id ann_rhs + rhs_fvs' = rhs_fvs `unionDVarSet` floatedBindsFVs rhs_binds `unionDVarSet` rule_fvs + -- Don't forget the rule_fvs; the binding mentions them! + +fiBind dflags to_drop (AnnRec bindings) body_fvs + = ( extra_binds ++ shared_binds + , FB (mkDVarSet ids) rhs_fvs' + (FloatLet (Rec (fi_bind rhss_binds bindings))) + , body_binds ) + where + (ids, rhss) = unzip bindings + rhss_fvs = map freeVarsOf rhss + + -- See Note [extra_fvs (1,2)] + rule_fvs = mapUnionDVarSet bndrRuleAndUnfoldingVarsDSet ids + extra_fvs = rule_fvs `unionDVarSet` + unionDVarSets [ rhs_fvs | (bndr, (rhs_fvs, rhs)) <- bindings + , noFloatIntoRhs Recursive bndr rhs ] + + (shared_binds:extra_binds:body_binds:rhss_binds) + = sepBindsByDropPoint dflags False + (extra_fvs:body_fvs:rhss_fvs) + to_drop + + rhs_fvs' = unionDVarSets rhss_fvs `unionDVarSet` + unionDVarSets (map floatedBindsFVs rhss_binds) `unionDVarSet` + rule_fvs -- Don't forget the rule variables! + + -- Push rhs_binds into the right hand side of the binding + fi_bind :: [FloatInBinds] -- one per "drop pt" conjured w/ fvs_of_rhss + -> [(Id, CoreExprWithFVs)] + -> [(Id, CoreExpr)] + + fi_bind to_drops pairs + = [ (binder, fiRhs dflags to_drop binder rhs) + | ((binder, rhs), to_drop) <- zipEqual "fi_bind" pairs to_drops ] + +------------------ +fiRhs :: DynFlags -> FloatInBinds -> CoreBndr -> CoreExprWithFVs -> CoreExpr +fiRhs dflags to_drop bndr rhs + | Just join_arity <- isJoinId_maybe bndr + , let (bndrs, body) = collectNAnnBndrs join_arity rhs + = mkLams bndrs (fiExpr dflags to_drop body) + | otherwise + = fiExpr dflags to_drop rhs + +------------------ +noFloatIntoLam :: [Var] -> Bool +noFloatIntoLam bndrs = any bad bndrs + where + bad b = isId b && not (isOneShotBndr b) + -- Don't float inside a non-one-shot lambda + +noFloatIntoRhs :: RecFlag -> Id -> CoreExprWithFVs' -> Bool +-- ^ True if it's a bad idea to float bindings into this RHS +noFloatIntoRhs is_rec bndr rhs + | isJoinId bndr + = isRec is_rec -- Joins are one-shot iff non-recursive + + | otherwise + = noFloatIntoArg rhs (idType bndr) + +noFloatIntoArg :: CoreExprWithFVs' -> Type -> Bool +noFloatIntoArg expr expr_ty + | isUnliftedType expr_ty + = True -- See Note [Do not destroy the let/app invariant] + + | AnnLam bndr e <- expr + , (bndrs, _) <- collectAnnBndrs e + = noFloatIntoLam (bndr:bndrs) -- Wrinkle 1 (a) + || all isTyVar (bndr:bndrs) -- Wrinkle 1 (b) + -- See Note [noFloatInto considerations] wrinkle 2 + + | otherwise -- Note [noFloatInto considerations] wrinkle 2 + = exprIsTrivial deann_expr || exprIsHNF deann_expr + where + deann_expr = deAnnotate' expr + +{- Note [noFloatInto considerations] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When do we want to float bindings into + - noFloatIntoRHs: the RHS of a let-binding + - noFloatIntoArg: the argument of a function application + +Definitely don't float in if it has unlifted type; that +would destroy the let/app invariant. + +* Wrinkle 1: do not float in if + (a) any non-one-shot value lambdas + or (b) all type lambdas + In both cases we'll float straight back out again + NB: Must line up with fiExpr (AnnLam...); see #7088 + + (a) is important: we /must/ float into a one-shot lambda group + (which includes join points). This makes a big difference + for things like + f x# = let x = I# x# + in let j = \() -> ...x... + in if <condition> then normal-path else j () + If x is used only in the error case join point, j, we must float the + boxing constructor into it, else we box it every time which is very + bad news indeed. + +* Wrinkle 2: for RHSs, do not float into a HNF; we'll just float right + back out again... not tragic, but a waste of time. + + For function arguments we will still end up with this + in-then-out stuff; consider + letrec x = e in f x + Here x is not a HNF, so we'll produce + f (letrec x = e in x) + which is OK... it's not that common, and we'll end up + floating out again, in CorePrep if not earlier. + Still, we use exprIsTrivial to catch this case (sigh) + + +************************************************************************ +* * +\subsection{@sepBindsByDropPoint@} +* * +************************************************************************ + +This is the crucial function. The idea is: We have a wad of bindings +that we'd like to distribute inside a collection of {\em drop points}; +insides the alternatives of a \tr{case} would be one example of some +drop points; the RHS and body of a non-recursive \tr{let} binding +would be another (2-element) collection. + +So: We're given a list of sets-of-free-variables, one per drop point, +and a list of floating-inwards bindings. If a binding can go into +only one drop point (without suddenly making something out-of-scope), +in it goes. If a binding is used inside {\em multiple} drop points, +then it has to go in a you-must-drop-it-above-all-these-drop-points +point. + +We have to maintain the order on these drop-point-related lists. +-} + +-- pprFIB :: FloatInBinds -> SDoc +-- pprFIB fibs = text "FIB:" <+> ppr [b | FB _ _ b <- fibs] + +sepBindsByDropPoint + :: DynFlags + -> Bool -- True <=> is case expression + -> [FreeVarSet] -- One set of FVs per drop point + -- Always at least two long! + -> FloatInBinds -- Candidate floaters + -> [FloatInBinds] -- FIRST one is bindings which must not be floated + -- inside any drop point; the rest correspond + -- one-to-one with the input list of FV sets + +-- Every input floater is returned somewhere in the result; +-- none are dropped, not even ones which don't seem to be +-- free in *any* of the drop-point fvs. Why? Because, for example, +-- a binding (let x = E in B) might have a specialised version of +-- x (say x') stored inside x, but x' isn't free in E or B. + +type DropBox = (FreeVarSet, FloatInBinds) + +sepBindsByDropPoint dflags is_case drop_pts floaters + | null floaters -- Shortcut common case + = [] : [[] | _ <- drop_pts] + + | otherwise + = ASSERT( drop_pts `lengthAtLeast` 2 ) + go floaters (map (\fvs -> (fvs, [])) (emptyDVarSet : drop_pts)) + where + n_alts = length drop_pts + + go :: FloatInBinds -> [DropBox] -> [FloatInBinds] + -- The *first* one in the argument list is the drop_here set + -- The FloatInBinds in the lists are in the reverse of + -- the normal FloatInBinds order; that is, they are the right way round! + + go [] drop_boxes = map (reverse . snd) drop_boxes + + go (bind_w_fvs@(FB bndrs bind_fvs bind) : binds) drop_boxes@(here_box : fork_boxes) + = go binds new_boxes + where + -- "here" means the group of bindings dropped at the top of the fork + + (used_here : used_in_flags) = [ fvs `intersectsDVarSet` bndrs + | (fvs, _) <- drop_boxes] + + drop_here = used_here || cant_push + + n_used_alts = count id used_in_flags -- returns number of Trues in list. + + cant_push + | is_case = n_used_alts == n_alts -- Used in all, don't push + -- Remember n_alts > 1 + || (n_used_alts > 1 && not (floatIsDupable dflags bind)) + -- floatIsDupable: see Note [Duplicating floats] + + | otherwise = floatIsCase bind || n_used_alts > 1 + -- floatIsCase: see Note [Floating primops] + + new_boxes | drop_here = (insert here_box : fork_boxes) + | otherwise = (here_box : new_fork_boxes) + + new_fork_boxes = zipWithEqual "FloatIn.sepBinds" insert_maybe + fork_boxes used_in_flags + + insert :: DropBox -> DropBox + insert (fvs,drops) = (fvs `unionDVarSet` bind_fvs, bind_w_fvs:drops) + + insert_maybe box True = insert box + insert_maybe box False = box + + go _ _ = panic "sepBindsByDropPoint/go" + + +{- Note [Duplicating floats] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For case expressions we duplicate the binding if it is reasonably +small, and if it is not used in all the RHSs This is good for +situations like + let x = I# y in + case e of + C -> error x + D -> error x + E -> ...not mentioning x... + +If the thing is used in all RHSs there is nothing gained, +so we don't duplicate then. +-} + +floatedBindsFVs :: FloatInBinds -> FreeVarSet +floatedBindsFVs binds = mapUnionDVarSet fbFVs binds + +fbFVs :: FloatInBind -> DVarSet +fbFVs (FB _ fvs _) = fvs + +wrapFloats :: FloatInBinds -> CoreExpr -> CoreExpr +-- Remember FloatInBinds is in *reverse* dependency order +wrapFloats [] e = e +wrapFloats (FB _ _ fl : bs) e = wrapFloats bs (wrapFloat fl e) + +floatIsDupable :: DynFlags -> FloatBind -> Bool +floatIsDupable dflags (FloatCase scrut _ _ _) = exprIsDupable dflags scrut +floatIsDupable dflags (FloatLet (Rec prs)) = all (exprIsDupable dflags . snd) prs +floatIsDupable dflags (FloatLet (NonRec _ r)) = exprIsDupable dflags r + +floatIsCase :: FloatBind -> Bool +floatIsCase (FloatCase {}) = True +floatIsCase (FloatLet {}) = False diff --git a/compiler/GHC/Core/Op/FloatOut.hs b/compiler/GHC/Core/Op/FloatOut.hs new file mode 100644 index 0000000000..fb47b2b3ed --- /dev/null +++ b/compiler/GHC/Core/Op/FloatOut.hs @@ -0,0 +1,757 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + +\section[FloatOut]{Float bindings outwards (towards the top level)} + +``Long-distance'' floating of bindings towards the top level. +-} + +{-# LANGUAGE CPP #-} + +module GHC.Core.Op.FloatOut ( floatOutwards ) where + +import GhcPrelude + +import GHC.Core +import GHC.Core.Utils +import GHC.Core.Make +import GHC.Core.Arity ( etaExpand ) +import GHC.Core.Op.Monad ( FloatOutSwitches(..) ) + +import GHC.Driver.Session +import ErrUtils ( dumpIfSet_dyn, DumpFormat (..) ) +import Id ( Id, idArity, idType, isBottomingId, + isJoinId, isJoinId_maybe ) +import GHC.Core.Op.SetLevels +import UniqSupply ( UniqSupply ) +import Bag +import Util +import Maybes +import Outputable +import GHC.Core.Type +import qualified Data.IntMap as M + +import Data.List ( partition ) + +#include "HsVersions.h" + +{- + ----------------- + Overall game plan + ----------------- + +The Big Main Idea is: + + To float out sub-expressions that can thereby get outside + a non-one-shot value lambda, and hence may be shared. + + +To achieve this we may need to do two things: + + a) Let-bind the sub-expression: + + f (g x) ==> let lvl = f (g x) in lvl + + Now we can float the binding for 'lvl'. + + b) More than that, we may need to abstract wrt a type variable + + \x -> ... /\a -> let v = ...a... in .... + + Here the binding for v mentions 'a' but not 'x'. So we + abstract wrt 'a', to give this binding for 'v': + + vp = /\a -> ...a... + v = vp a + + Now the binding for vp can float out unimpeded. + I can't remember why this case seemed important enough to + deal with, but I certainly found cases where important floats + didn't happen if we did not abstract wrt tyvars. + +With this in mind we can also achieve another goal: lambda lifting. +We can make an arbitrary (function) binding float to top level by +abstracting wrt *all* local variables, not just type variables, leaving +a binding that can be floated right to top level. Whether or not this +happens is controlled by a flag. + + +Random comments +~~~~~~~~~~~~~~~ + +At the moment we never float a binding out to between two adjacent +lambdas. For example: + +@ + \x y -> let t = x+x in ... +===> + \x -> let t = x+x in \y -> ... +@ +Reason: this is less efficient in the case where the original lambda +is never partially applied. + +But there's a case I've seen where this might not be true. Consider: +@ +elEm2 x ys + = elem' x ys + where + elem' _ [] = False + elem' x (y:ys) = x==y || elem' x ys +@ +It turns out that this generates a subexpression of the form +@ + \deq x ys -> let eq = eqFromEqDict deq in ... +@ +which might usefully be separated to +@ + \deq -> let eq = eqFromEqDict deq in \xy -> ... +@ +Well, maybe. We don't do this at the moment. + +Note [Join points] +~~~~~~~~~~~~~~~~~~ +Every occurrence of a join point must be a tail call (see Note [Invariants on +join points] in GHC.Core), so we must be careful with how far we float them. The +mechanism for doing so is the *join ceiling*, detailed in Note [Join ceiling] +in GHC.Core.Op.SetLevels. For us, the significance is that a binder might be marked to be +dropped at the nearest boundary between tail calls and non-tail calls. For +example: + + (< join j = ... in + let x = < ... > in + case < ... > of + A -> ... + B -> ... + >) < ... > < ... > + +Here the join ceilings are marked with angle brackets. Either side of an +application is a join ceiling, as is the scrutinee position of a case +expression or the RHS of a let binding (but not a join point). + +Why do we *want* do float join points at all? After all, they're never +allocated, so there's no sharing to be gained by floating them. However, the +other benefit of floating is making RHSes small, and this can have a significant +impact. In particular, stream fusion has been known to produce nested loops like +this: + + joinrec j1 x1 = + joinrec j2 x2 = + joinrec j3 x3 = ... jump j1 (x3 + 1) ... jump j2 (x3 + 1) ... + in jump j3 x2 + in jump j2 x1 + in jump j1 x + +(Assume x1 and x2 do *not* occur free in j3.) + +Here j1 and j2 are wholly superfluous---each of them merely forwards its +argument to j3. Since j3 only refers to x3, we can float j2 and j3 to make +everything one big mutual recursion: + + joinrec j1 x1 = jump j2 x1 + j2 x2 = jump j3 x2 + j3 x3 = ... jump j1 (x3 + 1) ... jump j2 (x3 + 1) ... + in jump j1 x + +Now the simplifier will happily inline the trivial j1 and j2, leaving only j3. +Without floating, we're stuck with three loops instead of one. + +************************************************************************ +* * +\subsection[floatOutwards]{@floatOutwards@: let-floating interface function} +* * +************************************************************************ +-} + +floatOutwards :: FloatOutSwitches + -> DynFlags + -> UniqSupply + -> CoreProgram -> IO CoreProgram + +floatOutwards float_sws dflags us pgm + = do { + let { annotated_w_levels = setLevels float_sws pgm us ; + (fss, binds_s') = unzip (map floatTopBind annotated_w_levels) + } ; + + dumpIfSet_dyn dflags Opt_D_verbose_core2core "Levels added:" + FormatCore + (vcat (map ppr annotated_w_levels)); + + let { (tlets, ntlets, lams) = get_stats (sum_stats fss) }; + + dumpIfSet_dyn dflags Opt_D_dump_simpl_stats "FloatOut stats:" + FormatText + (hcat [ int tlets, text " Lets floated to top level; ", + int ntlets, text " Lets floated elsewhere; from ", + int lams, text " Lambda groups"]); + + return (bagToList (unionManyBags binds_s')) + } + +floatTopBind :: LevelledBind -> (FloatStats, Bag CoreBind) +floatTopBind bind + = case (floatBind bind) of { (fs, floats, bind') -> + let float_bag = flattenTopFloats floats + in case bind' of + -- bind' can't have unlifted values or join points, so can only be one + -- value bind, rec or non-rec (see comment on floatBind) + [Rec prs] -> (fs, unitBag (Rec (addTopFloatPairs float_bag prs))) + [NonRec b e] -> (fs, float_bag `snocBag` NonRec b e) + _ -> pprPanic "floatTopBind" (ppr bind') } + +{- +************************************************************************ +* * +\subsection[FloatOut-Bind]{Floating in a binding (the business end)} +* * +************************************************************************ +-} + +floatBind :: LevelledBind -> (FloatStats, FloatBinds, [CoreBind]) + -- Returns a list with either + -- * A single non-recursive binding (value or join point), or + -- * The following, in order: + -- * Zero or more non-rec unlifted bindings + -- * One or both of: + -- * A recursive group of join binds + -- * A recursive group of value binds + -- See Note [Floating out of Rec rhss] for why things get arranged this way. +floatBind (NonRec (TB var _) rhs) + = case (floatRhs var rhs) of { (fs, rhs_floats, rhs') -> + + -- A tiresome hack: + -- see Note [Bottoming floats: eta expansion] in GHC.Core.Op.SetLevels + let rhs'' | isBottomingId var = etaExpand (idArity var) rhs' + | otherwise = rhs' + + in (fs, rhs_floats, [NonRec var rhs'']) } + +floatBind (Rec pairs) + = case floatList do_pair pairs of { (fs, rhs_floats, new_pairs) -> + let (new_ul_pairss, new_other_pairss) = unzip new_pairs + (new_join_pairs, new_l_pairs) = partition (isJoinId . fst) + (concat new_other_pairss) + -- Can't put the join points and the values in the same rec group + new_rec_binds | null new_join_pairs = [ Rec new_l_pairs ] + | null new_l_pairs = [ Rec new_join_pairs ] + | otherwise = [ Rec new_l_pairs + , Rec new_join_pairs ] + new_non_rec_binds = [ NonRec b e | (b, e) <- concat new_ul_pairss ] + in + (fs, rhs_floats, new_non_rec_binds ++ new_rec_binds) } + where + do_pair :: (LevelledBndr, LevelledExpr) + -> (FloatStats, FloatBinds, + ([(Id,CoreExpr)], -- Non-recursive unlifted value bindings + [(Id,CoreExpr)])) -- Join points and lifted value bindings + do_pair (TB name spec, rhs) + | isTopLvl dest_lvl -- See Note [floatBind for top level] + = case (floatRhs name rhs) of { (fs, rhs_floats, rhs') -> + (fs, emptyFloats, ([], addTopFloatPairs (flattenTopFloats rhs_floats) + [(name, rhs')]))} + | otherwise -- Note [Floating out of Rec rhss] + = case (floatRhs name rhs) of { (fs, rhs_floats, rhs') -> + case (partitionByLevel dest_lvl rhs_floats) of { (rhs_floats', heres) -> + case (splitRecFloats heres) of { (ul_pairs, pairs, case_heres) -> + let pairs' = (name, installUnderLambdas case_heres rhs') : pairs in + (fs, rhs_floats', (ul_pairs, pairs')) }}} + where + dest_lvl = floatSpecLevel spec + +splitRecFloats :: Bag FloatBind + -> ([(Id,CoreExpr)], -- Non-recursive unlifted value bindings + [(Id,CoreExpr)], -- Join points and lifted value bindings + Bag FloatBind) -- A tail of further bindings +-- The "tail" begins with a case +-- See Note [Floating out of Rec rhss] +splitRecFloats fs + = go [] [] (bagToList fs) + where + go ul_prs prs (FloatLet (NonRec b r) : fs) | isUnliftedType (idType b) + , not (isJoinId b) + = go ((b,r):ul_prs) prs fs + | otherwise + = go ul_prs ((b,r):prs) fs + go ul_prs prs (FloatLet (Rec prs') : fs) = go ul_prs (prs' ++ prs) fs + go ul_prs prs fs = (reverse ul_prs, prs, + listToBag fs) + -- Order only matters for + -- non-rec + +installUnderLambdas :: Bag FloatBind -> CoreExpr -> CoreExpr +-- Note [Floating out of Rec rhss] +installUnderLambdas floats e + | isEmptyBag floats = e + | otherwise = go e + where + go (Lam b e) = Lam b (go e) + go e = install floats e + +--------------- +floatList :: (a -> (FloatStats, FloatBinds, b)) -> [a] -> (FloatStats, FloatBinds, [b]) +floatList _ [] = (zeroStats, emptyFloats, []) +floatList f (a:as) = case f a of { (fs_a, binds_a, b) -> + case floatList f as of { (fs_as, binds_as, bs) -> + (fs_a `add_stats` fs_as, binds_a `plusFloats` binds_as, b:bs) }} + +{- +Note [Floating out of Rec rhss] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider Rec { f<1,0> = \xy. body } +From the body we may get some floats. The ones with level <1,0> must +stay here, since they may mention f. Ideally we'd like to make them +part of the Rec block pairs -- but we can't if there are any +FloatCases involved. + +Nor is it a good idea to dump them in the rhs, but outside the lambda + f = case x of I# y -> \xy. body +because now f's arity might get worse, which is Not Good. (And if +there's an SCC around the RHS it might not get better again. +See #5342.) + +So, gruesomely, we split the floats into + * the outer FloatLets, which can join the Rec, and + * an inner batch starting in a FloatCase, which are then + pushed *inside* the lambdas. +This loses full-laziness the rare situation where there is a +FloatCase and a Rec interacting. + +If there are unlifted FloatLets (that *aren't* join points) among the floats, +we can't add them to the recursive group without angering Core Lint, but since +they must be ok-for-speculation, they can't actually be making any recursive +calls, so we can safely pull them out and keep them non-recursive. + +(Why is something getting floated to <1,0> that doesn't make a recursive call? +The case that came up in testing was that f *and* the unlifted binding were +getting floated *to the same place*: + + \x<2,0> -> + ... <3,0> + letrec { f<F<2,0>> = + ... let x'<F<2,0>> = x +# 1# in ... + } in ... + +Everything gets labeled "float to <2,0>" because it all depends on x, but this +makes f and x' look mutually recursive when they're not. + +The test was shootout/k-nucleotide, as compiled using commit 47d5dd68 on the +wip/join-points branch. + +TODO: This can probably be solved somehow in GHC.Core.Op.SetLevels. The difference between +"this *is at* level <2,0>" and "this *depends on* level <2,0>" is very +important.) + +Note [floatBind for top level] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We may have a *nested* binding whose destination level is (FloatMe tOP_LEVEL), thus + letrec { foo <0,0> = .... (let bar<0,0> = .. in ..) .... } +The binding for bar will be in the "tops" part of the floating binds, +and thus not partioned by floatBody. + +We could perhaps get rid of the 'tops' component of the floating binds, +but this case works just as well. + + +************************************************************************ + +\subsection[FloatOut-Expr]{Floating in expressions} +* * +************************************************************************ +-} + +floatBody :: Level + -> LevelledExpr + -> (FloatStats, FloatBinds, CoreExpr) + +floatBody lvl arg -- Used rec rhss, and case-alternative rhss + = case (floatExpr arg) of { (fsa, floats, arg') -> + case (partitionByLevel lvl floats) of { (floats', heres) -> + -- Dump bindings are bound here + (fsa, floats', install heres arg') }} + +----------------- + +{- Note [Floating past breakpoints] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We used to disallow floating out of breakpoint ticks (see #10052). However, I +think this is too restrictive. + +Consider the case of an expression scoped over by a breakpoint tick, + + tick<...> (let x = ... in f x) + +In this case it is completely legal to float out x, despite the fact that +breakpoint ticks are scoped, + + let x = ... in (tick<...> f x) + +The reason here is that we know that the breakpoint will still be hit when the +expression is entered since the tick still scopes over the RHS. + +-} + +floatExpr :: LevelledExpr + -> (FloatStats, FloatBinds, CoreExpr) +floatExpr (Var v) = (zeroStats, emptyFloats, Var v) +floatExpr (Type ty) = (zeroStats, emptyFloats, Type ty) +floatExpr (Coercion co) = (zeroStats, emptyFloats, Coercion co) +floatExpr (Lit lit) = (zeroStats, emptyFloats, Lit lit) + +floatExpr (App e a) + = case (atJoinCeiling $ floatExpr e) of { (fse, floats_e, e') -> + case (atJoinCeiling $ floatExpr a) of { (fsa, floats_a, a') -> + (fse `add_stats` fsa, floats_e `plusFloats` floats_a, App e' a') }} + +floatExpr lam@(Lam (TB _ lam_spec) _) + = let (bndrs_w_lvls, body) = collectBinders lam + bndrs = [b | TB b _ <- bndrs_w_lvls] + bndr_lvl = asJoinCeilLvl (floatSpecLevel lam_spec) + -- All the binders have the same level + -- See GHC.Core.Op.SetLevels.lvlLamBndrs + -- Use asJoinCeilLvl to make this the join ceiling + in + case (floatBody bndr_lvl body) of { (fs, floats, body') -> + (add_to_stats fs floats, floats, mkLams bndrs body') } + +floatExpr (Tick tickish expr) + | tickish `tickishScopesLike` SoftScope -- not scoped, can just float + = case (atJoinCeiling $ floatExpr expr) of { (fs, floating_defns, expr') -> + (fs, floating_defns, Tick tickish expr') } + + | not (tickishCounts tickish) || tickishCanSplit tickish + = case (atJoinCeiling $ floatExpr expr) of { (fs, floating_defns, expr') -> + let -- Annotate bindings floated outwards past an scc expression + -- with the cc. We mark that cc as "duplicated", though. + annotated_defns = wrapTick (mkNoCount tickish) floating_defns + in + (fs, annotated_defns, Tick tickish expr') } + + -- Note [Floating past breakpoints] + | Breakpoint{} <- tickish + = case (floatExpr expr) of { (fs, floating_defns, expr') -> + (fs, floating_defns, Tick tickish expr') } + + | otherwise + = pprPanic "floatExpr tick" (ppr tickish) + +floatExpr (Cast expr co) + = case (atJoinCeiling $ floatExpr expr) of { (fs, floating_defns, expr') -> + (fs, floating_defns, Cast expr' co) } + +floatExpr (Let bind body) + = case bind_spec of + FloatMe dest_lvl + -> case (floatBind bind) of { (fsb, bind_floats, binds') -> + case (floatExpr body) of { (fse, body_floats, body') -> + let new_bind_floats = foldr plusFloats emptyFloats + (map (unitLetFloat dest_lvl) binds') in + ( add_stats fsb fse + , bind_floats `plusFloats` new_bind_floats + `plusFloats` body_floats + , body') }} + + StayPut bind_lvl -- See Note [Avoiding unnecessary floating] + -> case (floatBind bind) of { (fsb, bind_floats, binds') -> + case (floatBody bind_lvl body) of { (fse, body_floats, body') -> + ( add_stats fsb fse + , bind_floats `plusFloats` body_floats + , foldr Let body' binds' ) }} + where + bind_spec = case bind of + NonRec (TB _ s) _ -> s + Rec ((TB _ s, _) : _) -> s + Rec [] -> panic "floatExpr:rec" + +floatExpr (Case scrut (TB case_bndr case_spec) ty alts) + = case case_spec of + FloatMe dest_lvl -- Case expression moves + | [(con@(DataAlt {}), bndrs, rhs)] <- alts + -> case atJoinCeiling $ floatExpr scrut of { (fse, fde, scrut') -> + case floatExpr rhs of { (fsb, fdb, rhs') -> + let + float = unitCaseFloat dest_lvl scrut' + case_bndr con [b | TB b _ <- bndrs] + in + (add_stats fse fsb, fde `plusFloats` float `plusFloats` fdb, rhs') }} + | otherwise + -> pprPanic "Floating multi-case" (ppr alts) + + StayPut bind_lvl -- Case expression stays put + -> case atJoinCeiling $ floatExpr scrut of { (fse, fde, scrut') -> + case floatList (float_alt bind_lvl) alts of { (fsa, fda, alts') -> + (add_stats fse fsa, fda `plusFloats` fde, Case scrut' case_bndr ty alts') + }} + where + float_alt bind_lvl (con, bs, rhs) + = case (floatBody bind_lvl rhs) of { (fs, rhs_floats, rhs') -> + (fs, rhs_floats, (con, [b | TB b _ <- bs], rhs')) } + +floatRhs :: CoreBndr + -> LevelledExpr + -> (FloatStats, FloatBinds, CoreExpr) +floatRhs bndr rhs + | Just join_arity <- isJoinId_maybe bndr + , Just (bndrs, body) <- try_collect join_arity rhs [] + = case bndrs of + [] -> floatExpr rhs + (TB _ lam_spec):_ -> + let lvl = floatSpecLevel lam_spec in + case floatBody lvl body of { (fs, floats, body') -> + (fs, floats, mkLams [b | TB b _ <- bndrs] body') } + | otherwise + = atJoinCeiling $ floatExpr rhs + where + try_collect 0 expr acc = Just (reverse acc, expr) + try_collect n (Lam b e) acc = try_collect (n-1) e (b:acc) + try_collect _ _ _ = Nothing + +{- +Note [Avoiding unnecessary floating] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In general we want to avoid floating a let unnecessarily, because +it might worsen strictness: + let + x = ...(let y = e in y+y).... +Here y is demanded. If we float it outside the lazy 'x=..' then +we'd have to zap its demand info, and it may never be restored. + +So at a 'let' we leave the binding right where the are unless +the binding will escape a value lambda, e.g. + +(\x -> let y = fac 100 in y) + +That's what the partitionByMajorLevel does in the floatExpr (Let ...) +case. + +Notice, though, that we must take care to drop any bindings +from the body of the let that depend on the staying-put bindings. + +We used instead to do the partitionByMajorLevel on the RHS of an '=', +in floatRhs. But that was quite tiresome. We needed to test for +values or trivial rhss, because (in particular) we don't want to insert +new bindings between the "=" and the "\". E.g. + f = \x -> let <bind> in <body> +We do not want + f = let <bind> in \x -> <body> +(a) The simplifier will immediately float it further out, so we may + as well do so right now; in general, keeping rhss as manifest + values is good +(b) If a float-in pass follows immediately, it might add yet more + bindings just after the '='. And some of them might (correctly) + be strict even though the 'let f' is lazy, because f, being a value, + gets its demand-info zapped by the simplifier. +And even all that turned out to be very fragile, and broke +altogether when profiling got in the way. + +So now we do the partition right at the (Let..) itself. + +************************************************************************ +* * +\subsection{Utility bits for floating stats} +* * +************************************************************************ + +I didn't implement this with unboxed numbers. I don't want to be too +strict in this stuff, as it is rarely turned on. (WDP 95/09) +-} + +data FloatStats + = FlS Int -- Number of top-floats * lambda groups they've been past + Int -- Number of non-top-floats * lambda groups they've been past + Int -- Number of lambda (groups) seen + +get_stats :: FloatStats -> (Int, Int, Int) +get_stats (FlS a b c) = (a, b, c) + +zeroStats :: FloatStats +zeroStats = FlS 0 0 0 + +sum_stats :: [FloatStats] -> FloatStats +sum_stats xs = foldr add_stats zeroStats xs + +add_stats :: FloatStats -> FloatStats -> FloatStats +add_stats (FlS a1 b1 c1) (FlS a2 b2 c2) + = FlS (a1 + a2) (b1 + b2) (c1 + c2) + +add_to_stats :: FloatStats -> FloatBinds -> FloatStats +add_to_stats (FlS a b c) (FB tops ceils others) + = FlS (a + lengthBag tops) + (b + lengthBag ceils + lengthBag (flattenMajor others)) + (c + 1) + +{- +************************************************************************ +* * +\subsection{Utility bits for floating} +* * +************************************************************************ + +Note [Representation of FloatBinds] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The FloatBinds types is somewhat important. We can get very large numbers +of floating bindings, often all destined for the top level. A typical example +is x = [4,2,5,2,5, .... ] +Then we get lots of small expressions like (fromInteger 4), which all get +lifted to top level. + +The trouble is that + (a) we partition these floating bindings *at every binding site* + (b) GHC.Core.Op.SetLevels introduces a new bindings site for every float +So we had better not look at each binding at each binding site! + +That is why MajorEnv is represented as a finite map. + +We keep the bindings destined for the *top* level separate, because +we float them out even if they don't escape a *value* lambda; see +partitionByMajorLevel. +-} + +type FloatLet = CoreBind -- INVARIANT: a FloatLet is always lifted +type MajorEnv = M.IntMap MinorEnv -- Keyed by major level +type MinorEnv = M.IntMap (Bag FloatBind) -- Keyed by minor level + +data FloatBinds = FB !(Bag FloatLet) -- Destined for top level + !(Bag FloatBind) -- Destined for join ceiling + !MajorEnv -- Other levels + -- See Note [Representation of FloatBinds] + +instance Outputable FloatBinds where + ppr (FB fbs ceils defs) + = text "FB" <+> (braces $ vcat + [ text "tops =" <+> ppr fbs + , text "ceils =" <+> ppr ceils + , text "non-tops =" <+> ppr defs ]) + +flattenTopFloats :: FloatBinds -> Bag CoreBind +flattenTopFloats (FB tops ceils defs) + = ASSERT2( isEmptyBag (flattenMajor defs), ppr defs ) + ASSERT2( isEmptyBag ceils, ppr ceils ) + tops + +addTopFloatPairs :: Bag CoreBind -> [(Id,CoreExpr)] -> [(Id,CoreExpr)] +addTopFloatPairs float_bag prs + = foldr add prs float_bag + where + add (NonRec b r) prs = (b,r):prs + add (Rec prs1) prs2 = prs1 ++ prs2 + +flattenMajor :: MajorEnv -> Bag FloatBind +flattenMajor = M.foldr (unionBags . flattenMinor) emptyBag + +flattenMinor :: MinorEnv -> Bag FloatBind +flattenMinor = M.foldr unionBags emptyBag + +emptyFloats :: FloatBinds +emptyFloats = FB emptyBag emptyBag M.empty + +unitCaseFloat :: Level -> CoreExpr -> Id -> AltCon -> [Var] -> FloatBinds +unitCaseFloat (Level major minor t) e b con bs + | t == JoinCeilLvl + = FB emptyBag floats M.empty + | otherwise + = FB emptyBag emptyBag (M.singleton major (M.singleton minor floats)) + where + floats = unitBag (FloatCase e b con bs) + +unitLetFloat :: Level -> FloatLet -> FloatBinds +unitLetFloat lvl@(Level major minor t) b + | isTopLvl lvl = FB (unitBag b) emptyBag M.empty + | t == JoinCeilLvl = FB emptyBag floats M.empty + | otherwise = FB emptyBag emptyBag (M.singleton major + (M.singleton minor floats)) + where + floats = unitBag (FloatLet b) + +plusFloats :: FloatBinds -> FloatBinds -> FloatBinds +plusFloats (FB t1 c1 l1) (FB t2 c2 l2) + = FB (t1 `unionBags` t2) (c1 `unionBags` c2) (l1 `plusMajor` l2) + +plusMajor :: MajorEnv -> MajorEnv -> MajorEnv +plusMajor = M.unionWith plusMinor + +plusMinor :: MinorEnv -> MinorEnv -> MinorEnv +plusMinor = M.unionWith unionBags + +install :: Bag FloatBind -> CoreExpr -> CoreExpr +install defn_groups expr + = foldr wrapFloat expr defn_groups + +partitionByLevel + :: Level -- Partitioning level + -> FloatBinds -- Defns to be divided into 2 piles... + -> (FloatBinds, -- Defns with level strictly < partition level, + Bag FloatBind) -- The rest + +{- +-- ---- partitionByMajorLevel ---- +-- Float it if we escape a value lambda, +-- *or* if we get to the top level +-- *or* if it's a case-float and its minor level is < current +-- +-- If we can get to the top level, say "yes" anyway. This means that +-- x = f e +-- transforms to +-- lvl = e +-- x = f lvl +-- which is as it should be + +partitionByMajorLevel (Level major _) (FB tops defns) + = (FB tops outer, heres `unionBags` flattenMajor inner) + where + (outer, mb_heres, inner) = M.splitLookup major defns + heres = case mb_heres of + Nothing -> emptyBag + Just h -> flattenMinor h +-} + +partitionByLevel (Level major minor typ) (FB tops ceils defns) + = (FB tops ceils' (outer_maj `plusMajor` M.singleton major outer_min), + here_min `unionBags` here_ceil + `unionBags` flattenMinor inner_min + `unionBags` flattenMajor inner_maj) + + where + (outer_maj, mb_here_maj, inner_maj) = M.splitLookup major defns + (outer_min, mb_here_min, inner_min) = case mb_here_maj of + Nothing -> (M.empty, Nothing, M.empty) + Just min_defns -> M.splitLookup minor min_defns + here_min = mb_here_min `orElse` emptyBag + (here_ceil, ceils') | typ == JoinCeilLvl = (ceils, emptyBag) + | otherwise = (emptyBag, ceils) + +-- Like partitionByLevel, but instead split out the bindings that are marked +-- to float to the nearest join ceiling (see Note [Join points]) +partitionAtJoinCeiling :: FloatBinds -> (FloatBinds, Bag FloatBind) +partitionAtJoinCeiling (FB tops ceils defs) + = (FB tops emptyBag defs, ceils) + +-- Perform some action at a join ceiling, i.e., don't let join points float out +-- (see Note [Join points]) +atJoinCeiling :: (FloatStats, FloatBinds, CoreExpr) + -> (FloatStats, FloatBinds, CoreExpr) +atJoinCeiling (fs, floats, expr') + = (fs, floats', install ceils expr') + where + (floats', ceils) = partitionAtJoinCeiling floats + +wrapTick :: Tickish Id -> FloatBinds -> FloatBinds +wrapTick t (FB tops ceils defns) + = FB (mapBag wrap_bind tops) (wrap_defns ceils) + (M.map (M.map wrap_defns) defns) + where + wrap_defns = mapBag wrap_one + + wrap_bind (NonRec binder rhs) = NonRec binder (maybe_tick rhs) + wrap_bind (Rec pairs) = Rec (mapSnd maybe_tick pairs) + + wrap_one (FloatLet bind) = FloatLet (wrap_bind bind) + wrap_one (FloatCase e b c bs) = FloatCase (maybe_tick e) b c bs + + maybe_tick e | exprIsHNF e = tickHNFArgs t e + | otherwise = mkTick t e + -- we don't need to wrap a tick around an HNF when we float it + -- outside a tick: that is an invariant of the tick semantics + -- Conversely, inlining of HNFs inside an SCC is allowed, and + -- indeed the HNF we're floating here might well be inlined back + -- again, and we don't want to end up with duplicate ticks. diff --git a/compiler/GHC/Core/Op/LiberateCase.hs b/compiler/GHC/Core/Op/LiberateCase.hs new file mode 100644 index 0000000000..399abf4c67 --- /dev/null +++ b/compiler/GHC/Core/Op/LiberateCase.hs @@ -0,0 +1,442 @@ +{- +(c) The AQUA Project, Glasgow University, 1994-1998 + +\section[LiberateCase]{Unroll recursion to allow evals to be lifted from a loop} +-} + +{-# LANGUAGE CPP #-} +module GHC.Core.Op.LiberateCase ( liberateCase ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Driver.Session +import GHC.Core +import GHC.Core.Unfold ( couldBeSmallEnoughToInline ) +import TysWiredIn ( unitDataConId ) +import Id +import VarEnv +import Util ( notNull ) + +{- +The liberate-case transformation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This module walks over @Core@, and looks for @case@ on free variables. +The criterion is: + if there is case on a free on the route to the recursive call, + then the recursive call is replaced with an unfolding. + +Example + + f = \ t -> case v of + V a b -> a : f t + +=> the inner f is replaced. + + f = \ t -> case v of + V a b -> a : (letrec + f = \ t -> case v of + V a b -> a : f t + in f) t +(note the NEED for shadowing) + +=> Simplify + + f = \ t -> case v of + V a b -> a : (letrec + f = \ t -> a : f t + in f t) + +Better code, because 'a' is free inside the inner letrec, rather +than needing projection from v. + +Note that this deals with *free variables*. SpecConstr deals with +*arguments* that are of known form. E.g. + + last [] = error + last (x:[]) = x + last (x:xs) = last xs + + +Note [Scrutinee with cast] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this: + f = \ t -> case (v `cast` co) of + V a b -> a : f t + +Exactly the same optimisation (unrolling one call to f) will work here, +despite the cast. See mk_alt_env in the Case branch of libCase. + + +To think about (Apr 94) +~~~~~~~~~~~~~~ +Main worry: duplicating code excessively. At the moment we duplicate +the entire binding group once at each recursive call. But there may +be a group of recursive calls which share a common set of evaluated +free variables, in which case the duplication is a plain waste. + +Another thing we could consider adding is some unfold-threshold thing, +so that we'll only duplicate if the size of the group rhss isn't too +big. + +Data types +~~~~~~~~~~ +The ``level'' of a binder tells how many +recursive defns lexically enclose the binding +A recursive defn "encloses" its RHS, not its +scope. For example: +\begin{verbatim} + letrec f = let g = ... in ... + in + let h = ... + in ... +\end{verbatim} +Here, the level of @f@ is zero, the level of @g@ is one, +and the level of @h@ is zero (NB not one). + + +************************************************************************ +* * + Top-level code +* * +************************************************************************ +-} + +liberateCase :: DynFlags -> CoreProgram -> CoreProgram +liberateCase dflags binds = do_prog (initEnv dflags) binds + where + do_prog _ [] = [] + do_prog env (bind:binds) = bind' : do_prog env' binds + where + (env', bind') = libCaseBind env bind + +{- +************************************************************************ +* * + Main payload +* * +************************************************************************ + +Bindings +~~~~~~~~ +-} + +libCaseBind :: LibCaseEnv -> CoreBind -> (LibCaseEnv, CoreBind) + +libCaseBind env (NonRec binder rhs) + = (addBinders env [binder], NonRec binder (libCase env rhs)) + +libCaseBind env (Rec pairs) + = (env_body, Rec pairs') + where + binders = map fst pairs + + env_body = addBinders env binders + + pairs' = [(binder, libCase env_rhs rhs) | (binder,rhs) <- pairs] + + -- We extend the rec-env by binding each Id to its rhs, first + -- processing the rhs with an *un-extended* environment, so + -- that the same process doesn't occur for ever! + env_rhs | is_dupable_bind = addRecBinds env dup_pairs + | otherwise = env + + dup_pairs = [ (localiseId binder, libCase env_body rhs) + | (binder, rhs) <- pairs ] + -- localiseID : see Note [Need to localiseId in libCaseBind] + + is_dupable_bind = small_enough && all ok_pair pairs + + -- Size: we are going to duplicate dup_pairs; to find their + -- size, build a fake binding (let { dup_pairs } in (), + -- and find the size of that + -- See Note [Small enough] + small_enough = case bombOutSize env of + Nothing -> True -- Infinity + Just size -> couldBeSmallEnoughToInline (lc_dflags env) size $ + Let (Rec dup_pairs) (Var unitDataConId) + + ok_pair (id,_) + = idArity id > 0 -- Note [Only functions!] + && not (isBottomingId id) -- Note [Not bottoming ids] + +{- Note [Not bottoming Ids] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Do not specialise error-functions (this is unusual, but I once saw it, +(actually in Data.Typable.Internal) + +Note [Only functions!] +~~~~~~~~~~~~~~~~~~~~~~ +Consider the following code + + f = g (case v of V a b -> a : t f) + +where g is expensive. If we aren't careful, liberate case will turn this into + + f = g (case v of + V a b -> a : t (letrec f = g (case v of V a b -> a : f t) + in f) + ) + +Yikes! We evaluate g twice. This leads to a O(2^n) explosion +if g calls back to the same code recursively. + +Solution: make sure that we only do the liberate-case thing on *functions* + +Note [Small enough] +~~~~~~~~~~~~~~~~~~~ +Consider + \fv. letrec + f = \x. BIG...(case fv of { (a,b) -> ...g.. })... + g = \y. SMALL...f... + +Then we *can* in principle do liberate-case on 'g' (small RHS) but not +for 'f' (too big). But doing so is not profitable, because duplicating +'g' at its call site in 'f' doesn't get rid of any cases. So we just +ask for the whole group to be small enough. + +Note [Need to localiseId in libCaseBind] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The call to localiseId is needed for two subtle reasons +(a) Reset the export flags on the binders so + that we don't get name clashes on exported things if the + local binding floats out to top level. This is most unlikely + to happen, since the whole point concerns free variables. + But resetting the export flag is right regardless. + +(b) Make the name an Internal one. External Names should never be + nested; if it were floated to the top level, we'd get a name + clash at code generation time. + +Expressions +~~~~~~~~~~~ +-} + +libCase :: LibCaseEnv + -> CoreExpr + -> CoreExpr + +libCase env (Var v) = libCaseApp env v [] +libCase _ (Lit lit) = Lit lit +libCase _ (Type ty) = Type ty +libCase _ (Coercion co) = Coercion co +libCase env e@(App {}) | let (fun, args) = collectArgs e + , Var v <- fun + = libCaseApp env v args +libCase env (App fun arg) = App (libCase env fun) (libCase env arg) +libCase env (Tick tickish body) = Tick tickish (libCase env body) +libCase env (Cast e co) = Cast (libCase env e) co + +libCase env (Lam binder body) + = Lam binder (libCase (addBinders env [binder]) body) + +libCase env (Let bind body) + = Let bind' (libCase env_body body) + where + (env_body, bind') = libCaseBind env bind + +libCase env (Case scrut bndr ty alts) + = Case (libCase env scrut) bndr ty (map (libCaseAlt env_alts) alts) + where + env_alts = addBinders (mk_alt_env scrut) [bndr] + mk_alt_env (Var scrut_var) = addScrutedVar env scrut_var + mk_alt_env (Cast scrut _) = mk_alt_env scrut -- Note [Scrutinee with cast] + mk_alt_env _ = env + +libCaseAlt :: LibCaseEnv -> (AltCon, [CoreBndr], CoreExpr) + -> (AltCon, [CoreBndr], CoreExpr) +libCaseAlt env (con,args,rhs) = (con, args, libCase (addBinders env args) rhs) + +{- +Ids +~~~ + +To unfold, we can't just wrap the id itself in its binding if it's a join point: + + jump j a b c => (joinrec j x y z = ... in jump j) a b c -- wrong!!! + +Every jump must provide all arguments, so we have to be careful to wrap the +whole jump instead: + + jump j a b c => joinrec j x y z = ... in jump j a b c -- right + +-} + +libCaseApp :: LibCaseEnv -> Id -> [CoreExpr] -> CoreExpr +libCaseApp env v args + | Just the_bind <- lookupRecId env v -- It's a use of a recursive thing + , notNull free_scruts -- with free vars scrutinised in RHS + = Let the_bind expr' + + | otherwise + = expr' + + where + rec_id_level = lookupLevel env v + free_scruts = freeScruts env rec_id_level + expr' = mkApps (Var v) (map (libCase env) args) + +freeScruts :: LibCaseEnv + -> LibCaseLevel -- Level of the recursive Id + -> [Id] -- Ids that are scrutinised between the binding + -- of the recursive Id and here +freeScruts env rec_bind_lvl + = [v | (v, scrut_bind_lvl, scrut_at_lvl) <- lc_scruts env + , scrut_bind_lvl <= rec_bind_lvl + , scrut_at_lvl > rec_bind_lvl] + -- Note [When to specialise] + -- Note [Avoiding fruitless liberate-case] + +{- +Note [When to specialise] +~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f = \x. letrec g = \y. case x of + True -> ... (f a) ... + False -> ... (g b) ... + +We get the following levels + f 0 + x 1 + g 1 + y 2 + +Then 'x' is being scrutinised at a deeper level than its binding, so +it's added to lc_sruts: [(x,1)] + +We do *not* want to specialise the call to 'f', because 'x' is not free +in 'f'. So here the bind-level of 'x' (=1) is not <= the bind-level of 'f' (=0). + +We *do* want to specialise the call to 'g', because 'x' is free in g. +Here the bind-level of 'x' (=1) is <= the bind-level of 'g' (=1). + +Note [Avoiding fruitless liberate-case] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider also: + f = \x. case top_lvl_thing of + I# _ -> let g = \y. ... g ... + in ... + +Here, top_lvl_thing is scrutinised at a level (1) deeper than its +binding site (0). Nevertheless, we do NOT want to specialise the call +to 'g' because all the structure in its free variables is already +visible at the definition site for g. Hence, when considering specialising +an occurrence of 'g', we want to check that there's a scruted-var v st + + a) v's binding site is *outside* g + b) v's scrutinisation site is *inside* g + + +************************************************************************ +* * + Utility functions +* * +************************************************************************ +-} + +addBinders :: LibCaseEnv -> [CoreBndr] -> LibCaseEnv +addBinders env@(LibCaseEnv { lc_lvl = lvl, lc_lvl_env = lvl_env }) binders + = env { lc_lvl_env = lvl_env' } + where + lvl_env' = extendVarEnvList lvl_env (binders `zip` repeat lvl) + +addRecBinds :: LibCaseEnv -> [(Id,CoreExpr)] -> LibCaseEnv +addRecBinds env@(LibCaseEnv {lc_lvl = lvl, lc_lvl_env = lvl_env, + lc_rec_env = rec_env}) pairs + = env { lc_lvl = lvl', lc_lvl_env = lvl_env', lc_rec_env = rec_env' } + where + lvl' = lvl + 1 + lvl_env' = extendVarEnvList lvl_env [(binder,lvl) | (binder,_) <- pairs] + rec_env' = extendVarEnvList rec_env [(binder, Rec pairs) | (binder,_) <- pairs] + +addScrutedVar :: LibCaseEnv + -> Id -- This Id is being scrutinised by a case expression + -> LibCaseEnv + +addScrutedVar env@(LibCaseEnv { lc_lvl = lvl, lc_lvl_env = lvl_env, + lc_scruts = scruts }) scrut_var + | bind_lvl < lvl + = env { lc_scruts = scruts' } + -- Add to scruts iff the scrut_var is being scrutinised at + -- a deeper level than its defn + + | otherwise = env + where + scruts' = (scrut_var, bind_lvl, lvl) : scruts + bind_lvl = case lookupVarEnv lvl_env scrut_var of + Just lvl -> lvl + Nothing -> topLevel + +lookupRecId :: LibCaseEnv -> Id -> Maybe CoreBind +lookupRecId env id = lookupVarEnv (lc_rec_env env) id + +lookupLevel :: LibCaseEnv -> Id -> LibCaseLevel +lookupLevel env id + = case lookupVarEnv (lc_lvl_env env) id of + Just lvl -> lvl + Nothing -> topLevel + +{- +************************************************************************ +* * + The environment +* * +************************************************************************ +-} + +type LibCaseLevel = Int + +topLevel :: LibCaseLevel +topLevel = 0 + +data LibCaseEnv + = LibCaseEnv { + lc_dflags :: DynFlags, + + lc_lvl :: LibCaseLevel, -- Current level + -- The level is incremented when (and only when) going + -- inside the RHS of a (sufficiently small) recursive + -- function. + + lc_lvl_env :: IdEnv LibCaseLevel, + -- Binds all non-top-level in-scope Ids (top-level and + -- imported things have a level of zero) + + lc_rec_env :: IdEnv CoreBind, + -- Binds *only* recursively defined ids, to their own + -- binding group, and *only* in their own RHSs + + lc_scruts :: [(Id, LibCaseLevel, LibCaseLevel)] + -- Each of these Ids was scrutinised by an enclosing + -- case expression, at a level deeper than its binding + -- level. + -- + -- The first LibCaseLevel is the *binding level* of + -- the scrutinised Id, + -- The second is the level *at which it was scrutinised*. + -- (see Note [Avoiding fruitless liberate-case]) + -- The former is a bit redundant, since you could always + -- look it up in lc_lvl_env, but it's just cached here + -- + -- The order is insignificant; it's a bag really + -- + -- There's one element per scrutinisation; + -- in principle the same Id may appear multiple times, + -- although that'd be unusual: + -- case x of { (a,b) -> ....(case x of ...) .. } + } + +initEnv :: DynFlags -> LibCaseEnv +initEnv dflags + = LibCaseEnv { lc_dflags = dflags, + lc_lvl = 0, + lc_lvl_env = emptyVarEnv, + lc_rec_env = emptyVarEnv, + lc_scruts = [] } + +-- Bomb-out size for deciding if +-- potential liberatees are too big. +-- (passed in from cmd-line args) +bombOutSize :: LibCaseEnv -> Maybe Int +bombOutSize = liberateCaseThreshold . lc_dflags diff --git a/compiler/GHC/Core/Op/Monad.hs b/compiler/GHC/Core/Op/Monad.hs new file mode 100644 index 0000000000..a0a15bba6f --- /dev/null +++ b/compiler/GHC/Core/Op/Monad.hs @@ -0,0 +1,828 @@ +{- +(c) The AQUA Project, Glasgow University, 1993-1998 + +-} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveFunctor #-} + +{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} + +module GHC.Core.Op.Monad ( + -- * Configuration of the core-to-core passes + CoreToDo(..), runWhen, runMaybe, + SimplMode(..), + FloatOutSwitches(..), + pprPassDetails, + + -- * Plugins + CorePluginPass, bindsOnlyPass, + + -- * Counting + SimplCount, doSimplTick, doFreeSimplTick, simplCountN, + pprSimplCount, plusSimplCount, zeroSimplCount, + isZeroSimplCount, hasDetailedCounts, Tick(..), + + -- * The monad + CoreM, runCoreM, + + -- ** Reading from the monad + getHscEnv, getRuleBase, getModule, + getDynFlags, getPackageFamInstEnv, + getVisibleOrphanMods, getUniqMask, + getPrintUnqualified, getSrcSpanM, + + -- ** Writing to the monad + addSimplCount, + + -- ** Lifting into the monad + liftIO, liftIOWithCount, + + -- ** Dealing with annotations + getAnnotations, getFirstAnnotations, + + -- ** Screen output + putMsg, putMsgS, errorMsg, errorMsgS, warnMsg, + fatalErrorMsg, fatalErrorMsgS, + debugTraceMsg, debugTraceMsgS, + dumpIfSet_dyn + ) where + +import GhcPrelude hiding ( read ) + +import GHC.Core +import GHC.Driver.Types +import Module +import GHC.Driver.Session +import BasicTypes ( CompilerPhase(..) ) +import Annotations + +import IOEnv hiding ( liftIO, failM, failWithM ) +import qualified IOEnv ( liftIO ) +import Var +import Outputable +import FastString +import ErrUtils( Severity(..), DumpFormat (..), dumpOptionsFromFlag ) +import UniqSupply +import MonadUtils +import NameEnv +import SrcLoc +import Data.Bifunctor ( bimap ) +import ErrUtils (dumpAction) +import Data.List (intersperse, groupBy, sortBy) +import Data.Ord +import Data.Dynamic +import Data.Map (Map) +import qualified Data.Map as Map +import qualified Data.Map.Strict as MapStrict +import Data.Word +import Control.Monad +import Control.Applicative ( Alternative(..) ) +import Panic (throwGhcException, GhcException(..)) + +{- +************************************************************************ +* * + The CoreToDo type and related types + Abstraction of core-to-core passes to run. +* * +************************************************************************ +-} + +data CoreToDo -- These are diff core-to-core passes, + -- which may be invoked in any order, + -- as many times as you like. + + = CoreDoSimplify -- The core-to-core simplifier. + Int -- Max iterations + SimplMode + | CoreDoPluginPass String CorePluginPass + | CoreDoFloatInwards + | CoreDoFloatOutwards FloatOutSwitches + | CoreLiberateCase + | CoreDoPrintCore + | CoreDoStaticArgs + | CoreDoCallArity + | CoreDoExitify + | CoreDoDemand + | CoreDoCpr + | CoreDoWorkerWrapper + | CoreDoSpecialising + | CoreDoSpecConstr + | CoreCSE + | CoreDoRuleCheck CompilerPhase String -- Check for non-application of rules + -- matching this string + | CoreDoNothing -- Useful when building up + | CoreDoPasses [CoreToDo] -- lists of these things + + | CoreDesugar -- Right after desugaring, no simple optimisation yet! + | CoreDesugarOpt -- CoreDesugarXXX: Not strictly a core-to-core pass, but produces + -- Core output, and hence useful to pass to endPass + + | CoreTidy + | CorePrep + | CoreOccurAnal + +instance Outputable CoreToDo where + ppr (CoreDoSimplify _ _) = text "Simplifier" + ppr (CoreDoPluginPass s _) = text "Core plugin: " <+> text s + ppr CoreDoFloatInwards = text "Float inwards" + ppr (CoreDoFloatOutwards f) = text "Float out" <> parens (ppr f) + ppr CoreLiberateCase = text "Liberate case" + ppr CoreDoStaticArgs = text "Static argument" + ppr CoreDoCallArity = text "Called arity analysis" + ppr CoreDoExitify = text "Exitification transformation" + ppr CoreDoDemand = text "Demand analysis" + ppr CoreDoCpr = text "Constructed Product Result analysis" + ppr CoreDoWorkerWrapper = text "Worker Wrapper binds" + ppr CoreDoSpecialising = text "Specialise" + ppr CoreDoSpecConstr = text "SpecConstr" + ppr CoreCSE = text "Common sub-expression" + ppr CoreDesugar = text "Desugar (before optimization)" + ppr CoreDesugarOpt = text "Desugar (after optimization)" + ppr CoreTidy = text "Tidy Core" + ppr CorePrep = text "CorePrep" + ppr CoreOccurAnal = text "Occurrence analysis" + ppr CoreDoPrintCore = text "Print core" + ppr (CoreDoRuleCheck {}) = text "Rule check" + ppr CoreDoNothing = text "CoreDoNothing" + ppr (CoreDoPasses passes) = text "CoreDoPasses" <+> ppr passes + +pprPassDetails :: CoreToDo -> SDoc +pprPassDetails (CoreDoSimplify n md) = vcat [ text "Max iterations =" <+> int n + , ppr md ] +pprPassDetails _ = Outputable.empty + +data SimplMode -- See comments in GHC.Core.Op.Simplify.Monad + = SimplMode + { sm_names :: [String] -- Name(s) of the phase + , sm_phase :: CompilerPhase + , sm_dflags :: DynFlags -- Just for convenient non-monadic + -- access; we don't override these + , sm_rules :: Bool -- Whether RULES are enabled + , sm_inline :: Bool -- Whether inlining is enabled + , sm_case_case :: Bool -- Whether case-of-case is enabled + , sm_eta_expand :: Bool -- Whether eta-expansion is enabled + } + +instance Outputable SimplMode where + ppr (SimplMode { sm_phase = p, sm_names = ss + , sm_rules = r, sm_inline = i + , sm_eta_expand = eta, sm_case_case = cc }) + = text "SimplMode" <+> braces ( + sep [ text "Phase =" <+> ppr p <+> + brackets (text (concat $ intersperse "," ss)) <> comma + , pp_flag i (sLit "inline") <> comma + , pp_flag r (sLit "rules") <> comma + , pp_flag eta (sLit "eta-expand") <> comma + , pp_flag cc (sLit "case-of-case") ]) + where + pp_flag f s = ppUnless f (text "no") <+> ptext s + +data FloatOutSwitches = FloatOutSwitches { + floatOutLambdas :: Maybe Int, -- ^ Just n <=> float lambdas to top level, if + -- doing so will abstract over n or fewer + -- value variables + -- Nothing <=> float all lambdas to top level, + -- regardless of how many free variables + -- Just 0 is the vanilla case: float a lambda + -- iff it has no free vars + + floatOutConstants :: Bool, -- ^ True <=> float constants to top level, + -- even if they do not escape a lambda + floatOutOverSatApps :: Bool, + -- ^ True <=> float out over-saturated applications + -- based on arity information. + -- See Note [Floating over-saturated applications] + -- in GHC.Core.Op.SetLevels + floatToTopLevelOnly :: Bool -- ^ Allow floating to the top level only. + } +instance Outputable FloatOutSwitches where + ppr = pprFloatOutSwitches + +pprFloatOutSwitches :: FloatOutSwitches -> SDoc +pprFloatOutSwitches sw + = text "FOS" <+> (braces $ + sep $ punctuate comma $ + [ text "Lam =" <+> ppr (floatOutLambdas sw) + , text "Consts =" <+> ppr (floatOutConstants sw) + , text "OverSatApps =" <+> ppr (floatOutOverSatApps sw) ]) + +-- The core-to-core pass ordering is derived from the DynFlags: +runWhen :: Bool -> CoreToDo -> CoreToDo +runWhen True do_this = do_this +runWhen False _ = CoreDoNothing + +runMaybe :: Maybe a -> (a -> CoreToDo) -> CoreToDo +runMaybe (Just x) f = f x +runMaybe Nothing _ = CoreDoNothing + +{- + +************************************************************************ +* * + Types for Plugins +* * +************************************************************************ +-} + +-- | A description of the plugin pass itself +type CorePluginPass = ModGuts -> CoreM ModGuts + +bindsOnlyPass :: (CoreProgram -> CoreM CoreProgram) -> ModGuts -> CoreM ModGuts +bindsOnlyPass pass guts + = do { binds' <- pass (mg_binds guts) + ; return (guts { mg_binds = binds' }) } + +{- +************************************************************************ +* * + Counting and logging +* * +************************************************************************ +-} + +getVerboseSimplStats :: (Bool -> SDoc) -> SDoc +getVerboseSimplStats = getPprDebug -- For now, anyway + +zeroSimplCount :: DynFlags -> SimplCount +isZeroSimplCount :: SimplCount -> Bool +hasDetailedCounts :: SimplCount -> Bool +pprSimplCount :: SimplCount -> SDoc +doSimplTick :: DynFlags -> Tick -> SimplCount -> SimplCount +doFreeSimplTick :: Tick -> SimplCount -> SimplCount +plusSimplCount :: SimplCount -> SimplCount -> SimplCount + +data SimplCount + = VerySimplCount !Int -- Used when don't want detailed stats + + | SimplCount { + ticks :: !Int, -- Total ticks + details :: !TickCounts, -- How many of each type + + n_log :: !Int, -- N + log1 :: [Tick], -- Last N events; <= opt_HistorySize, + -- most recent first + log2 :: [Tick] -- Last opt_HistorySize events before that + -- Having log1, log2 lets us accumulate the + -- recent history reasonably efficiently + } + +type TickCounts = Map Tick Int + +simplCountN :: SimplCount -> Int +simplCountN (VerySimplCount n) = n +simplCountN (SimplCount { ticks = n }) = n + +zeroSimplCount dflags + -- This is where we decide whether to do + -- the VerySimpl version or the full-stats version + | dopt Opt_D_dump_simpl_stats dflags + = SimplCount {ticks = 0, details = Map.empty, + n_log = 0, log1 = [], log2 = []} + | otherwise + = VerySimplCount 0 + +isZeroSimplCount (VerySimplCount n) = n==0 +isZeroSimplCount (SimplCount { ticks = n }) = n==0 + +hasDetailedCounts (VerySimplCount {}) = False +hasDetailedCounts (SimplCount {}) = True + +doFreeSimplTick tick sc@SimplCount { details = dts } + = sc { details = dts `addTick` tick } +doFreeSimplTick _ sc = sc + +doSimplTick dflags tick + sc@(SimplCount { ticks = tks, details = dts, n_log = nl, log1 = l1 }) + | nl >= historySize dflags = sc1 { n_log = 1, log1 = [tick], log2 = l1 } + | otherwise = sc1 { n_log = nl+1, log1 = tick : l1 } + where + sc1 = sc { ticks = tks+1, details = dts `addTick` tick } + +doSimplTick _ _ (VerySimplCount n) = VerySimplCount (n+1) + + +addTick :: TickCounts -> Tick -> TickCounts +addTick fm tick = MapStrict.insertWith (+) tick 1 fm + +plusSimplCount sc1@(SimplCount { ticks = tks1, details = dts1 }) + sc2@(SimplCount { ticks = tks2, details = dts2 }) + = log_base { ticks = tks1 + tks2 + , details = MapStrict.unionWith (+) dts1 dts2 } + where + -- A hackish way of getting recent log info + log_base | null (log1 sc2) = sc1 -- Nothing at all in sc2 + | null (log2 sc2) = sc2 { log2 = log1 sc1 } + | otherwise = sc2 + +plusSimplCount (VerySimplCount n) (VerySimplCount m) = VerySimplCount (n+m) +plusSimplCount lhs rhs = + throwGhcException . PprProgramError "plusSimplCount" $ vcat + [ text "lhs" + , pprSimplCount lhs + , text "rhs" + , pprSimplCount rhs + ] + -- We use one or the other consistently + +pprSimplCount (VerySimplCount n) = text "Total ticks:" <+> int n +pprSimplCount (SimplCount { ticks = tks, details = dts, log1 = l1, log2 = l2 }) + = vcat [text "Total ticks: " <+> int tks, + blankLine, + pprTickCounts dts, + getVerboseSimplStats $ \dbg -> if dbg + then + vcat [blankLine, + text "Log (most recent first)", + nest 4 (vcat (map ppr l1) $$ vcat (map ppr l2))] + else Outputable.empty + ] + +{- Note [Which transformations are innocuous] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +At one point (Jun 18) I wondered if some transformations (ticks) +might be "innocuous", in the sense that they do not unlock a later +transformation that does not occur in the same pass. If so, we could +refrain from bumping the overall tick-count for such innocuous +transformations, and perhaps terminate the simplifier one pass +earlier. + +But alas I found that virtually nothing was innocuous! This Note +just records what I learned, in case anyone wants to try again. + +These transformations are not innocuous: + +*** NB: I think these ones could be made innocuous + EtaExpansion + LetFloatFromLet + +LetFloatFromLet + x = K (let z = e2 in Just z) + prepareRhs transforms to + x2 = let z=e2 in Just z + x = K xs + And now more let-floating can happen in the + next pass, on x2 + +PreInlineUnconditionally + Example in spectral/cichelli/Auxil + hinsert = ...let lo = e in + let j = ...lo... in + case x of + False -> () + True -> case lo of I# lo' -> + ...j... + When we PreInlineUnconditionally j, lo's occ-info changes to once, + so it can be PreInlineUnconditionally in the next pass, and a + cascade of further things can happen. + +PostInlineUnconditionally + let x = e in + let y = ...x.. in + case .. of { A -> ...x...y... + B -> ...x...y... } + Current postinlineUnconditinaly will inline y, and then x; sigh. + + But PostInlineUnconditionally might also unlock subsequent + transformations for the same reason as PreInlineUnconditionally, + so it's probably not innocuous anyway. + +KnownBranch, BetaReduction: + May drop chunks of code, and thereby enable PreInlineUnconditionally + for some let-binding which now occurs once + +EtaExpansion: + Example in imaginary/digits-of-e1 + fail = \void. e where e :: IO () + --> etaExpandRhs + fail = \void. (\s. (e |> g) s) |> sym g where g :: IO () ~ S -> (S,()) + --> Next iteration of simplify + fail1 = \void. \s. (e |> g) s + fail = fail1 |> Void#->sym g + And now inline 'fail' + +CaseMerge: + case x of y { + DEFAULT -> case y of z { pi -> ei } + alts2 } + ---> CaseMerge + case x of { pi -> let z = y in ei + ; alts2 } + The "let z=y" case-binder-swap gets dealt with in the next pass +-} + +pprTickCounts :: Map Tick Int -> SDoc +pprTickCounts counts + = vcat (map pprTickGroup groups) + where + groups :: [[(Tick,Int)]] -- Each group shares a common tag + -- toList returns common tags adjacent + groups = groupBy same_tag (Map.toList counts) + same_tag (tick1,_) (tick2,_) = tickToTag tick1 == tickToTag tick2 + +pprTickGroup :: [(Tick, Int)] -> SDoc +pprTickGroup group@((tick1,_):_) + = hang (int (sum [n | (_,n) <- group]) <+> text (tickString tick1)) + 2 (vcat [ int n <+> pprTickCts tick + -- flip as we want largest first + | (tick,n) <- sortBy (flip (comparing snd)) group]) +pprTickGroup [] = panic "pprTickGroup" + +data Tick -- See Note [Which transformations are innocuous] + = PreInlineUnconditionally Id + | PostInlineUnconditionally Id + + | UnfoldingDone Id + | RuleFired FastString -- Rule name + + | LetFloatFromLet + | EtaExpansion Id -- LHS binder + | EtaReduction Id -- Binder on outer lambda + | BetaReduction Id -- Lambda binder + + + | CaseOfCase Id -- Bndr on *inner* case + | KnownBranch Id -- Case binder + | CaseMerge Id -- Binder on outer case + | AltMerge Id -- Case binder + | CaseElim Id -- Case binder + | CaseIdentity Id -- Case binder + | FillInCaseDefault Id -- Case binder + + | SimplifierDone -- Ticked at each iteration of the simplifier + +instance Outputable Tick where + ppr tick = text (tickString tick) <+> pprTickCts tick + +instance Eq Tick where + a == b = case a `cmpTick` b of + EQ -> True + _ -> False + +instance Ord Tick where + compare = cmpTick + +tickToTag :: Tick -> Int +tickToTag (PreInlineUnconditionally _) = 0 +tickToTag (PostInlineUnconditionally _) = 1 +tickToTag (UnfoldingDone _) = 2 +tickToTag (RuleFired _) = 3 +tickToTag LetFloatFromLet = 4 +tickToTag (EtaExpansion _) = 5 +tickToTag (EtaReduction _) = 6 +tickToTag (BetaReduction _) = 7 +tickToTag (CaseOfCase _) = 8 +tickToTag (KnownBranch _) = 9 +tickToTag (CaseMerge _) = 10 +tickToTag (CaseElim _) = 11 +tickToTag (CaseIdentity _) = 12 +tickToTag (FillInCaseDefault _) = 13 +tickToTag SimplifierDone = 16 +tickToTag (AltMerge _) = 17 + +tickString :: Tick -> String +tickString (PreInlineUnconditionally _) = "PreInlineUnconditionally" +tickString (PostInlineUnconditionally _)= "PostInlineUnconditionally" +tickString (UnfoldingDone _) = "UnfoldingDone" +tickString (RuleFired _) = "RuleFired" +tickString LetFloatFromLet = "LetFloatFromLet" +tickString (EtaExpansion _) = "EtaExpansion" +tickString (EtaReduction _) = "EtaReduction" +tickString (BetaReduction _) = "BetaReduction" +tickString (CaseOfCase _) = "CaseOfCase" +tickString (KnownBranch _) = "KnownBranch" +tickString (CaseMerge _) = "CaseMerge" +tickString (AltMerge _) = "AltMerge" +tickString (CaseElim _) = "CaseElim" +tickString (CaseIdentity _) = "CaseIdentity" +tickString (FillInCaseDefault _) = "FillInCaseDefault" +tickString SimplifierDone = "SimplifierDone" + +pprTickCts :: Tick -> SDoc +pprTickCts (PreInlineUnconditionally v) = ppr v +pprTickCts (PostInlineUnconditionally v)= ppr v +pprTickCts (UnfoldingDone v) = ppr v +pprTickCts (RuleFired v) = ppr v +pprTickCts LetFloatFromLet = Outputable.empty +pprTickCts (EtaExpansion v) = ppr v +pprTickCts (EtaReduction v) = ppr v +pprTickCts (BetaReduction v) = ppr v +pprTickCts (CaseOfCase v) = ppr v +pprTickCts (KnownBranch v) = ppr v +pprTickCts (CaseMerge v) = ppr v +pprTickCts (AltMerge v) = ppr v +pprTickCts (CaseElim v) = ppr v +pprTickCts (CaseIdentity v) = ppr v +pprTickCts (FillInCaseDefault v) = ppr v +pprTickCts _ = Outputable.empty + +cmpTick :: Tick -> Tick -> Ordering +cmpTick a b = case (tickToTag a `compare` tickToTag b) of + GT -> GT + EQ -> cmpEqTick a b + LT -> LT + +cmpEqTick :: Tick -> Tick -> Ordering +cmpEqTick (PreInlineUnconditionally a) (PreInlineUnconditionally b) = a `compare` b +cmpEqTick (PostInlineUnconditionally a) (PostInlineUnconditionally b) = a `compare` b +cmpEqTick (UnfoldingDone a) (UnfoldingDone b) = a `compare` b +cmpEqTick (RuleFired a) (RuleFired b) = a `compare` b +cmpEqTick (EtaExpansion a) (EtaExpansion b) = a `compare` b +cmpEqTick (EtaReduction a) (EtaReduction b) = a `compare` b +cmpEqTick (BetaReduction a) (BetaReduction b) = a `compare` b +cmpEqTick (CaseOfCase a) (CaseOfCase b) = a `compare` b +cmpEqTick (KnownBranch a) (KnownBranch b) = a `compare` b +cmpEqTick (CaseMerge a) (CaseMerge b) = a `compare` b +cmpEqTick (AltMerge a) (AltMerge b) = a `compare` b +cmpEqTick (CaseElim a) (CaseElim b) = a `compare` b +cmpEqTick (CaseIdentity a) (CaseIdentity b) = a `compare` b +cmpEqTick (FillInCaseDefault a) (FillInCaseDefault b) = a `compare` b +cmpEqTick _ _ = EQ + +{- +************************************************************************ +* * + Monad and carried data structure definitions +* * +************************************************************************ +-} + +data CoreReader = CoreReader { + cr_hsc_env :: HscEnv, + cr_rule_base :: RuleBase, + cr_module :: Module, + cr_print_unqual :: PrintUnqualified, + cr_loc :: SrcSpan, -- Use this for log/error messages so they + -- are at least tagged with the right source file + cr_visible_orphan_mods :: !ModuleSet, + cr_uniq_mask :: !Char -- Mask for creating unique values +} + +-- Note: CoreWriter used to be defined with data, rather than newtype. If it +-- is defined that way again, the cw_simpl_count field, at least, must be +-- strict to avoid a space leak (#7702). +newtype CoreWriter = CoreWriter { + cw_simpl_count :: SimplCount +} + +emptyWriter :: DynFlags -> CoreWriter +emptyWriter dflags = CoreWriter { + cw_simpl_count = zeroSimplCount dflags + } + +plusWriter :: CoreWriter -> CoreWriter -> CoreWriter +plusWriter w1 w2 = CoreWriter { + cw_simpl_count = (cw_simpl_count w1) `plusSimplCount` (cw_simpl_count w2) + } + +type CoreIOEnv = IOEnv CoreReader + +-- | The monad used by Core-to-Core passes to register simplification statistics. +-- Also used to have common state (in the form of UniqueSupply) for generating Uniques. +newtype CoreM a = CoreM { unCoreM :: CoreIOEnv (a, CoreWriter) } + deriving (Functor) + +instance Monad CoreM where + mx >>= f = CoreM $ do + (x, w1) <- unCoreM mx + (y, w2) <- unCoreM (f x) + let w = w1 `plusWriter` w2 + return $ seq w (y, w) + -- forcing w before building the tuple avoids a space leak + -- (#7702) + +instance Applicative CoreM where + pure x = CoreM $ nop x + (<*>) = ap + m *> k = m >>= \_ -> k + +instance Alternative CoreM where + empty = CoreM Control.Applicative.empty + m <|> n = CoreM (unCoreM m <|> unCoreM n) + +instance MonadPlus CoreM + +instance MonadUnique CoreM where + getUniqueSupplyM = do + mask <- read cr_uniq_mask + liftIO $! mkSplitUniqSupply mask + + getUniqueM = do + mask <- read cr_uniq_mask + liftIO $! uniqFromMask mask + +runCoreM :: HscEnv + -> RuleBase + -> Char -- ^ Mask + -> Module + -> ModuleSet + -> PrintUnqualified + -> SrcSpan + -> CoreM a + -> IO (a, SimplCount) +runCoreM hsc_env rule_base mask mod orph_imps print_unqual loc m + = liftM extract $ runIOEnv reader $ unCoreM m + where + reader = CoreReader { + cr_hsc_env = hsc_env, + cr_rule_base = rule_base, + cr_module = mod, + cr_visible_orphan_mods = orph_imps, + cr_print_unqual = print_unqual, + cr_loc = loc, + cr_uniq_mask = mask + } + + extract :: (a, CoreWriter) -> (a, SimplCount) + extract (value, writer) = (value, cw_simpl_count writer) + +{- +************************************************************************ +* * + Core combinators, not exported +* * +************************************************************************ +-} + +nop :: a -> CoreIOEnv (a, CoreWriter) +nop x = do + r <- getEnv + return (x, emptyWriter $ (hsc_dflags . cr_hsc_env) r) + +read :: (CoreReader -> a) -> CoreM a +read f = CoreM $ getEnv >>= (\r -> nop (f r)) + +write :: CoreWriter -> CoreM () +write w = CoreM $ return ((), w) + +-- \subsection{Lifting IO into the monad} + +-- | Lift an 'IOEnv' operation into 'CoreM' +liftIOEnv :: CoreIOEnv a -> CoreM a +liftIOEnv mx = CoreM (mx >>= (\x -> nop x)) + +instance MonadIO CoreM where + liftIO = liftIOEnv . IOEnv.liftIO + +-- | Lift an 'IO' operation into 'CoreM' while consuming its 'SimplCount' +liftIOWithCount :: IO (SimplCount, a) -> CoreM a +liftIOWithCount what = liftIO what >>= (\(count, x) -> addSimplCount count >> return x) + +{- +************************************************************************ +* * + Reader, writer and state accessors +* * +************************************************************************ +-} + +getHscEnv :: CoreM HscEnv +getHscEnv = read cr_hsc_env + +getRuleBase :: CoreM RuleBase +getRuleBase = read cr_rule_base + +getVisibleOrphanMods :: CoreM ModuleSet +getVisibleOrphanMods = read cr_visible_orphan_mods + +getPrintUnqualified :: CoreM PrintUnqualified +getPrintUnqualified = read cr_print_unqual + +getSrcSpanM :: CoreM SrcSpan +getSrcSpanM = read cr_loc + +addSimplCount :: SimplCount -> CoreM () +addSimplCount count = write (CoreWriter { cw_simpl_count = count }) + +getUniqMask :: CoreM Char +getUniqMask = read cr_uniq_mask + +-- Convenience accessors for useful fields of HscEnv + +instance HasDynFlags CoreM where + getDynFlags = fmap hsc_dflags getHscEnv + +instance HasModule CoreM where + getModule = read cr_module + +getPackageFamInstEnv :: CoreM PackageFamInstEnv +getPackageFamInstEnv = do + hsc_env <- getHscEnv + eps <- liftIO $ hscEPS hsc_env + return $ eps_fam_inst_env eps + +{- +************************************************************************ +* * + Dealing with annotations +* * +************************************************************************ +-} + +-- | Get all annotations of a given type. This happens lazily, that is +-- no deserialization will take place until the [a] is actually demanded and +-- the [a] can also be empty (the UniqFM is not filtered). +-- +-- This should be done once at the start of a Core-to-Core pass that uses +-- annotations. +-- +-- See Note [Annotations] +getAnnotations :: Typeable a => ([Word8] -> a) -> ModGuts -> CoreM (ModuleEnv [a], NameEnv [a]) +getAnnotations deserialize guts = do + hsc_env <- getHscEnv + ann_env <- liftIO $ prepareAnnotations hsc_env (Just guts) + return (deserializeAnns deserialize ann_env) + +-- | Get at most one annotation of a given type per annotatable item. +getFirstAnnotations :: Typeable a => ([Word8] -> a) -> ModGuts -> CoreM (ModuleEnv a, NameEnv a) +getFirstAnnotations deserialize guts + = bimap mod name <$> getAnnotations deserialize guts + where + mod = mapModuleEnv head . filterModuleEnv (const $ not . null) + name = mapNameEnv head . filterNameEnv (not . null) + +{- +Note [Annotations] +~~~~~~~~~~~~~~~~~~ +A Core-to-Core pass that wants to make use of annotations calls +getAnnotations or getFirstAnnotations at the beginning to obtain a UniqFM with +annotations of a specific type. This produces all annotations from interface +files read so far. However, annotations from interface files read during the +pass will not be visible until getAnnotations is called again. This is similar +to how rules work and probably isn't too bad. + +The current implementation could be optimised a bit: when looking up +annotations for a thing from the HomePackageTable, we could search directly in +the module where the thing is defined rather than building one UniqFM which +contains all annotations we know of. This would work because annotations can +only be given to things defined in the same module. However, since we would +only want to deserialise every annotation once, we would have to build a cache +for every module in the HTP. In the end, it's probably not worth it as long as +we aren't using annotations heavily. + +************************************************************************ +* * + Direct screen output +* * +************************************************************************ +-} + +msg :: Severity -> WarnReason -> SDoc -> CoreM () +msg sev reason doc + = do { dflags <- getDynFlags + ; loc <- getSrcSpanM + ; unqual <- getPrintUnqualified + ; let sty = case sev of + SevError -> err_sty + SevWarning -> err_sty + SevDump -> dump_sty + _ -> user_sty + err_sty = mkErrStyle dflags unqual + user_sty = mkUserStyle dflags unqual AllTheWay + dump_sty = mkDumpStyle dflags unqual + ; liftIO $ putLogMsg dflags reason sev loc sty doc } + +-- | Output a String message to the screen +putMsgS :: String -> CoreM () +putMsgS = putMsg . text + +-- | Output a message to the screen +putMsg :: SDoc -> CoreM () +putMsg = msg SevInfo NoReason + +-- | Output an error to the screen. Does not cause the compiler to die. +errorMsgS :: String -> CoreM () +errorMsgS = errorMsg . text + +-- | Output an error to the screen. Does not cause the compiler to die. +errorMsg :: SDoc -> CoreM () +errorMsg = msg SevError NoReason + +warnMsg :: WarnReason -> SDoc -> CoreM () +warnMsg = msg SevWarning + +-- | Output a fatal error to the screen. Does not cause the compiler to die. +fatalErrorMsgS :: String -> CoreM () +fatalErrorMsgS = fatalErrorMsg . text + +-- | Output a fatal error to the screen. Does not cause the compiler to die. +fatalErrorMsg :: SDoc -> CoreM () +fatalErrorMsg = msg SevFatal NoReason + +-- | Output a string debugging message at verbosity level of @-v@ or higher +debugTraceMsgS :: String -> CoreM () +debugTraceMsgS = debugTraceMsg . text + +-- | Outputs a debugging message at verbosity level of @-v@ or higher +debugTraceMsg :: SDoc -> CoreM () +debugTraceMsg = msg SevDump NoReason + +-- | Show some labelled 'SDoc' if a particular flag is set or at a verbosity level of @-v -ddump-most@ or higher +dumpIfSet_dyn :: DumpFlag -> String -> DumpFormat -> SDoc -> CoreM () +dumpIfSet_dyn flag str fmt doc + = do { dflags <- getDynFlags + ; unqual <- getPrintUnqualified + ; when (dopt flag dflags) $ liftIO $ do + let sty = mkDumpStyle dflags unqual + dumpAction dflags sty (dumpOptionsFromFlag flag) str fmt doc } diff --git a/compiler/GHC/Core/Op/Monad.hs-boot b/compiler/GHC/Core/Op/Monad.hs-boot new file mode 100644 index 0000000000..4ca105a66c --- /dev/null +++ b/compiler/GHC/Core/Op/Monad.hs-boot @@ -0,0 +1,30 @@ +-- Created this hs-boot file to remove circular dependencies from the use of +-- Plugins. Plugins needs CoreToDo and CoreM types to define core-to-core +-- transformations. +-- However GHC.Core.Op.Monad does much more than defining these, and because Plugins are +-- activated in various modules, the imports become circular. To solve this I +-- extracted CoreToDo and CoreM into this file. +-- I needed to write the whole definition of these types, otherwise it created +-- a data-newtype conflict. + +module GHC.Core.Op.Monad ( CoreToDo, CoreM ) where + +import GhcPrelude + +import IOEnv ( IOEnv ) + +type CoreIOEnv = IOEnv CoreReader + +data CoreReader + +newtype CoreWriter = CoreWriter { + cw_simpl_count :: SimplCount +} + +data SimplCount + +newtype CoreM a = CoreM { unCoreM :: CoreIOEnv (a, CoreWriter) } + +instance Monad CoreM + +data CoreToDo diff --git a/compiler/GHC/Core/Op/OccurAnal.hs b/compiler/GHC/Core/Op/OccurAnal.hs new file mode 100644 index 0000000000..b676be38ae --- /dev/null +++ b/compiler/GHC/Core/Op/OccurAnal.hs @@ -0,0 +1,2898 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + +************************************************************************ +* * +\section[OccurAnal]{Occurrence analysis pass} +* * +************************************************************************ + +The occurrence analyser re-typechecks a core expression, returning a new +core expression with (hopefully) improved usage information. +-} + +{-# LANGUAGE CPP, BangPatterns, MultiWayIf, ViewPatterns #-} + +{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} + +module GHC.Core.Op.OccurAnal ( + occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Core +import GHC.Core.FVs +import GHC.Core.Utils ( exprIsTrivial, isDefaultAlt, isExpandableApp, + stripTicksTopE, mkTicks ) +import GHC.Core.Arity ( joinRhsArity ) +import Id +import IdInfo +import Name( localiseName ) +import BasicTypes +import Module( Module ) +import GHC.Core.Coercion +import GHC.Core.Type + +import VarSet +import VarEnv +import Var +import Demand ( argOneShots, argsOneShots ) +import Digraph ( SCC(..), Node(..) + , stronglyConnCompFromEdgedVerticesUniq + , stronglyConnCompFromEdgedVerticesUniqR ) +import Unique +import UniqFM +import UniqSet +import Util +import Outputable +import Data.List +import Control.Arrow ( second ) + +{- +************************************************************************ +* * + occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap +* * +************************************************************************ + +Here's the externally-callable interface: +-} + +occurAnalysePgm :: Module -- Used only in debug output + -> (Id -> Bool) -- Active unfoldings + -> (Activation -> Bool) -- Active rules + -> [CoreRule] + -> CoreProgram -> CoreProgram +occurAnalysePgm this_mod active_unf active_rule imp_rules binds + | isEmptyDetails final_usage + = occ_anald_binds + + | otherwise -- See Note [Glomming] + = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon) + 2 (ppr final_usage ) ) + occ_anald_glommed_binds + where + init_env = initOccEnv { occ_rule_act = active_rule + , occ_unf_act = active_unf } + + (final_usage, occ_anald_binds) = go init_env binds + (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel + imp_rule_edges + (flattenBinds binds) + initial_uds + -- It's crucial to re-analyse the glommed-together bindings + -- so that we establish the right loop breakers. Otherwise + -- we can easily create an infinite loop (#9583 is an example) + -- + -- Also crucial to re-analyse the /original/ bindings + -- in case the first pass accidentally discarded as dead code + -- a binding that was actually needed (albeit before its + -- definition site). #17724 threw this up. + + initial_uds = addManyOccsSet emptyDetails + (rulesFreeVars imp_rules) + -- The RULES declarations keep things alive! + + -- Note [Preventing loops due to imported functions rules] + imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv + [ mapVarEnv (const maps_to) $ + getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule) + | imp_rule <- imp_rules + , not (isBuiltinRule imp_rule) -- See Note [Plugin rules] + , let maps_to = exprFreeIds (ru_rhs imp_rule) + `delVarSetList` ru_bndrs imp_rule + , arg <- ru_args imp_rule ] + + go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind]) + go _ [] + = (initial_uds, []) + go env (bind:binds) + = (final_usage, bind' ++ binds') + where + (bs_usage, binds') = go env binds + (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind + bs_usage + +occurAnalyseExpr :: CoreExpr -> CoreExpr + -- Do occurrence analysis, and discard occurrence info returned +occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap + +occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr +occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap + +occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr +occurAnalyseExpr' enable_binder_swap expr + = snd (occAnal env expr) + where + env = initOccEnv { occ_binder_swap = enable_binder_swap } + +{- Note [Plugin rules] +~~~~~~~~~~~~~~~~~~~~~~ +Conal Elliott (#11651) built a GHC plugin that added some +BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to +do some domain-specific transformations that could not be expressed +with an ordinary pattern-matching CoreRule. But then we can't extract +the dependencies (in imp_rule_edges) from ru_rhs etc, because a +BuiltinRule doesn't have any of that stuff. + +So we simply assume that BuiltinRules have no dependencies, and filter +them out from the imp_rule_edges comprehension. +-} + +{- +************************************************************************ +* * + Bindings +* * +************************************************************************ + +Note [Recursive bindings: the grand plan] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we come across a binding group + Rec { x1 = r1; ...; xn = rn } +we treat it like this (occAnalRecBind): + +1. Occurrence-analyse each right hand side, and build a + "Details" for each binding to capture the results. + + Wrap the details in a Node (details, node-id, dep-node-ids), + where node-id is just the unique of the binder, and + dep-node-ids lists all binders on which this binding depends. + We'll call these the "scope edges". + See Note [Forming the Rec groups]. + + All this is done by makeNode. + +2. Do SCC-analysis on these Nodes. Each SCC will become a new Rec or + NonRec. The key property is that every free variable of a binding + is accounted for by the scope edges, so that when we are done + everything is still in scope. + +3. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we + identify suitable loop-breakers to ensure that inlining terminates. + This is done by occAnalRec. + +4. To do so we form a new set of Nodes, with the same details, but + different edges, the "loop-breaker nodes". The loop-breaker nodes + have both more and fewer dependencies than the scope edges + (see Note [Choosing loop breakers]) + + More edges: if f calls g, and g has an active rule that mentions h + then we add an edge from f -> h + + Fewer edges: we only include dependencies on active rules, on rule + RHSs (not LHSs) and if there is an INLINE pragma only + on the stable unfolding (and vice versa). The scope + edges must be much more inclusive. + +5. The "weak fvs" of a node are, by definition: + the scope fvs - the loop-breaker fvs + See Note [Weak loop breakers], and the nd_weak field of Details + +6. Having formed the loop-breaker nodes + +Note [Dead code] +~~~~~~~~~~~~~~~~ +Dropping dead code for a cyclic Strongly Connected Component is done +in a very simple way: + + the entire SCC is dropped if none of its binders are mentioned + in the body; otherwise the whole thing is kept. + +The key observation is that dead code elimination happens after +dependency analysis: so 'occAnalBind' processes SCCs instead of the +original term's binding groups. + +Thus 'occAnalBind' does indeed drop 'f' in an example like + + letrec f = ...g... + g = ...(...g...)... + in + ...g... + +when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in +'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes +'AcyclicSCC f', where 'body_usage' won't contain 'f'. + +------------------------------------------------------------ +Note [Forming Rec groups] +~~~~~~~~~~~~~~~~~~~~~~~~~ +We put bindings {f = ef; g = eg } in a Rec group if "f uses g" +and "g uses f", no matter how indirectly. We do a SCC analysis +with an edge f -> g if "f uses g". + +More precisely, "f uses g" iff g should be in scope wherever f is. +That is, g is free in: + a) the rhs 'ef' + b) or the RHS of a rule for f (Note [Rules are extra RHSs]) + c) or the LHS or a rule for f (Note [Rule dependency info]) + +These conditions apply regardless of the activation of the RULE (eg it might be +inactive in this phase but become active later). Once a Rec is broken up +it can never be put back together, so we must be conservative. + +The principle is that, regardless of rule firings, every variable is +always in scope. + + * Note [Rules are extra RHSs] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A RULE for 'f' is like an extra RHS for 'f'. That way the "parent" + keeps the specialised "children" alive. If the parent dies + (because it isn't referenced any more), then the children will die + too (unless they are already referenced directly). + + To that end, we build a Rec group for each cyclic strongly + connected component, + *treating f's rules as extra RHSs for 'f'*. + More concretely, the SCC analysis runs on a graph with an edge + from f -> g iff g is mentioned in + (a) f's rhs + (b) f's RULES + These are rec_edges. + + Under (b) we include variables free in *either* LHS *or* RHS of + the rule. The former might seems silly, but see Note [Rule + dependency info]. So in Example [eftInt], eftInt and eftIntFB + will be put in the same Rec, even though their 'main' RHSs are + both non-recursive. + + * Note [Rule dependency info] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + The VarSet in a RuleInfo is used for dependency analysis in the + occurrence analyser. We must track free vars in *both* lhs and rhs. + Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind. + Why both? Consider + x = y + RULE f x = v+4 + Then if we substitute y for x, we'd better do so in the + rule's LHS too, so we'd better ensure the RULE appears to mention 'x' + as well as 'v' + + * Note [Rules are visible in their own rec group] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + We want the rules for 'f' to be visible in f's right-hand side. + And we'd like them to be visible in other functions in f's Rec + group. E.g. in Note [Specialisation rules] we want f' rule + to be visible in both f's RHS, and fs's RHS. + + This means that we must simplify the RULEs first, before looking + at any of the definitions. This is done by Simplify.simplRecBind, + when it calls addLetIdInfo. + +------------------------------------------------------------ +Note [Choosing loop breakers] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Loop breaking is surprisingly subtle. First read the section 4 of +"Secrets of the GHC inliner". This describes our basic plan. +We avoid infinite inlinings by choosing loop breakers, and +ensuring that a loop breaker cuts each loop. + +See also Note [Inlining and hs-boot files] in GHC.Core.ToIface, which +deals with a closely related source of infinite loops. + +Fundamentally, we do SCC analysis on a graph. For each recursive +group we choose a loop breaker, delete all edges to that node, +re-analyse the SCC, and iterate. + +But what is the graph? NOT the same graph as was used for Note +[Forming Rec groups]! In particular, a RULE is like an equation for +'f' that is *always* inlined if it is applicable. We do *not* disable +rules for loop-breakers. It's up to whoever makes the rules to make +sure that the rules themselves always terminate. See Note [Rules for +recursive functions] in GHC.Core.Op.Simplify + +Hence, if + f's RHS (or its INLINE template if it has one) mentions g, and + g has a RULE that mentions h, and + h has a RULE that mentions f + +then we *must* choose f to be a loop breaker. Example: see Note +[Specialisation rules]. + +In general, take the free variables of f's RHS, and augment it with +all the variables reachable by RULES from those starting points. That +is the whole reason for computing rule_fv_env in occAnalBind. (Of +course we only consider free vars that are also binders in this Rec +group.) See also Note [Finding rule RHS free vars] + +Note that when we compute this rule_fv_env, we only consider variables +free in the *RHS* of the rule, in contrast to the way we build the +Rec group in the first place (Note [Rule dependency info]) + +Note that if 'g' has RHS that mentions 'w', we should add w to +g's loop-breaker edges. More concretely there is an edge from f -> g +iff + (a) g is mentioned in f's RHS `xor` f's INLINE rhs + (see Note [Inline rules]) + (b) or h is mentioned in f's RHS, and + g appears in the RHS of an active RULE of h + or a transitive sequence of active rules starting with h + +Why "active rules"? See Note [Finding rule RHS free vars] + +Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is +chosen as a loop breaker, because their RHSs don't mention each other. +And indeed both can be inlined safely. + +Note again that the edges of the graph we use for computing loop breakers +are not the same as the edges we use for computing the Rec blocks. +That's why we compute + +- rec_edges for the Rec block analysis +- loop_breaker_nodes for the loop breaker analysis + + * Note [Finding rule RHS free vars] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Consider this real example from Data Parallel Haskell + tagZero :: Array Int -> Array Tag + {-# INLINE [1] tagZeroes #-} + tagZero xs = pmap (\x -> fromBool (x==0)) xs + + {-# RULES "tagZero" [~1] forall xs n. + pmap fromBool <blah blah> = tagZero xs #-} + So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero. + However, tagZero can only be inlined in phase 1 and later, while + the RULE is only active *before* phase 1. So there's no problem. + + To make this work, we look for the RHS free vars only for + *active* rules. That's the reason for the occ_rule_act field + of the OccEnv. + + * Note [Weak loop breakers] + ~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a last nasty wrinkle. Suppose we have + + Rec { f = f_rhs + RULE f [] = g + + h = h_rhs + g = h + ...more... + } + + Remember that we simplify the RULES before any RHS (see Note + [Rules are visible in their own rec group] above). + + So we must *not* postInlineUnconditionally 'g', even though + its RHS turns out to be trivial. (I'm assuming that 'g' is + not chosen as a loop breaker.) Why not? Because then we + drop the binding for 'g', which leaves it out of scope in the + RULE! + + Here's a somewhat different example of the same thing + Rec { g = h + ; h = ...f... + ; f = f_rhs + RULE f [] = g } + Here the RULE is "below" g, but we *still* can't postInlineUnconditionally + g, because the RULE for f is active throughout. So the RHS of h + might rewrite to h = ...g... + So g must remain in scope in the output program! + + We "solve" this by: + + Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True) + iff g is a "missing free variable" of the Rec group + + A "missing free variable" x is one that is mentioned in an RHS or + INLINE or RULE of a binding in the Rec group, but where the + dependency on x may not show up in the loop_breaker_nodes (see + note [Choosing loop breakers} above). + + A normal "strong" loop breaker has IAmLoopBreaker False. So + + Inline postInlineUnconditionally + strong IAmLoopBreaker False no no + weak IAmLoopBreaker True yes no + other yes yes + + The **sole** reason for this kind of loop breaker is so that + postInlineUnconditionally does not fire. Ugh. (Typically it'll + inline via the usual callSiteInline stuff, so it'll be dead in the + next pass, so the main Ugh is the tiresome complication.) + +Note [Rules for imported functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this + f = /\a. B.g a + RULE B.g Int = 1 + f Int +Note that + * The RULE is for an imported function. + * f is non-recursive +Now we +can get + f Int --> B.g Int Inlining f + --> 1 + f Int Firing RULE +and so the simplifier goes into an infinite loop. This +would not happen if the RULE was for a local function, +because we keep track of dependencies through rules. But +that is pretty much impossible to do for imported Ids. Suppose +f's definition had been + f = /\a. C.h a +where (by some long and devious process), C.h eventually inlines to +B.g. We could only spot such loops by exhaustively following +unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE) +f. + +Note that RULES for imported functions are important in practice; they +occur a lot in the libraries. + +We regard this potential infinite loop as a *programmer* error. +It's up the programmer not to write silly rules like + RULE f x = f x +and the example above is just a more complicated version. + +Note [Preventing loops due to imported functions rules] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider: + import GHC.Base (foldr) + + {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-} + filter p xs = build (\c n -> foldr (filterFB c p) n xs) + filterFB c p = ... + + f = filter p xs + +Note that filter is not a loop-breaker, so what happens is: + f = filter p xs + = {inline} build (\c n -> foldr (filterFB c p) n xs) + = {inline} foldr (filterFB (:) p) [] xs + = {RULE} filter p xs + +We are in an infinite loop. + +A more elaborate example (that I actually saw in practice when I went to +mark GHC.List.filter as INLINABLE) is as follows. Say I have this module: + {-# LANGUAGE RankNTypes #-} + module GHCList where + + import Prelude hiding (filter) + import GHC.Base (build) + + {-# INLINABLE filter #-} + filter :: (a -> Bool) -> [a] -> [a] + filter p [] = [] + filter p (x:xs) = if p x then x : filter p xs else filter p xs + + {-# NOINLINE [0] filterFB #-} + filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b + filterFB c p x r | p x = x `c` r + | otherwise = r + + {-# RULES + "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr + (filterFB c p) n xs) + "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p + #-} + +Then (because RULES are applied inside INLINABLE unfoldings, but inlinings +are not), the unfolding given to "filter" in the interface file will be: + filter p [] = [] + filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs) + else build (\c n -> foldr (filterFB c p) n xs + +Note that because this unfolding does not mention "filter", filter is not +marked as a strong loop breaker. Therefore at a use site in another module: + filter p xs + = {inline} + case xs of [] -> [] + (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs) + else build (\c n -> foldr (filterFB c p) n xs) + + build (\c n -> foldr (filterFB c p) n xs) + = {inline} foldr (filterFB (:) p) [] xs + = {RULE} filter p xs + +And we are in an infinite loop again, except that this time the loop is producing an +infinitely large *term* (an unrolling of filter) and so the simplifier finally +dies with "ticks exhausted" + +Because of this problem, we make a small change in the occurrence analyser +designed to mark functions like "filter" as strong loop breakers on the basis that: + 1. The RHS of filter mentions the local function "filterFB" + 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS + +So for each RULE for an *imported* function we are going to add +dependency edges between the *local* FVS of the rule LHS and the +*local* FVS of the rule RHS. We don't do anything special for RULES on +local functions because the standard occurrence analysis stuff is +pretty good at getting loop-breakerness correct there. + +It is important to note that even with this extra hack we aren't always going to get +things right. For example, it might be that the rule LHS mentions an imported Id, +and another module has a RULE that can rewrite that imported Id to one of our local +Ids. + +Note [Specialising imported functions] (referred to from Specialise) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +BUT for *automatically-generated* rules, the programmer can't be +responsible for the "programmer error" in Note [Rules for imported +functions]. In particular, consider specialising a recursive function +defined in another module. If we specialise a recursive function B.g, +we get + g_spec = .....(B.g Int)..... + RULE B.g Int = g_spec +Here, g_spec doesn't look recursive, but when the rule fires, it +becomes so. And if B.g was mutually recursive, the loop might +not be as obvious as it is here. + +To avoid this, + * When specialising a function that is a loop breaker, + give a NOINLINE pragma to the specialised function + +Note [Glomming] +~~~~~~~~~~~~~~~ +RULES for imported Ids can make something at the top refer to something at the bottom: + f = \x -> B.g (q x) + h = \y -> 3 + + RULE: B.g (q x) = h x + +Applying this rule makes f refer to h, although f doesn't appear to +depend on h. (And, as in Note [Rules for imported functions], the +dependency might be more indirect. For example, f might mention C.t +rather than B.g, where C.t eventually inlines to B.g.) + +NOTICE that this cannot happen for rules whose head is a +locally-defined function, because we accurately track dependencies +through RULES. It only happens for rules whose head is an imported +function (B.g in the example above). + +Solution: + - When simplifying, bring all top level identifiers into + scope at the start, ignoring the Rec/NonRec structure, so + that when 'h' pops up in f's rhs, we find it in the in-scope set + (as the simplifier generally expects). This happens in simplTopBinds. + + - In the occurrence analyser, if there are any out-of-scope + occurrences that pop out of the top, which will happen after + firing the rule: f = \x -> h x + h = \y -> 3 + then just glom all the bindings into a single Rec, so that + the *next* iteration of the occurrence analyser will sort + them all out. This part happens in occurAnalysePgm. + +------------------------------------------------------------ +Note [Inline rules] +~~~~~~~~~~~~~~~~~~~ +None of the above stuff about RULES applies to Inline Rules, +stored in a CoreUnfolding. The unfolding, if any, is simplified +at the same time as the regular RHS of the function (ie *not* like +Note [Rules are visible in their own rec group]), so it should be +treated *exactly* like an extra RHS. + +Or, rather, when computing loop-breaker edges, + * If f has an INLINE pragma, and it is active, we treat the + INLINE rhs as f's rhs + * If it's inactive, we treat f as having no rhs + * If it has no INLINE pragma, we look at f's actual rhs + + +There is a danger that we'll be sub-optimal if we see this + f = ...f... + [INLINE f = ..no f...] +where f is recursive, but the INLINE is not. This can just about +happen with a sufficiently odd set of rules; eg + + foo :: Int -> Int + {-# INLINE [1] foo #-} + foo x = x+1 + + bar :: Int -> Int + {-# INLINE [1] bar #-} + bar x = foo x + 1 + + {-# RULES "foo" [~1] forall x. foo x = bar x #-} + +Here the RULE makes bar recursive; but it's INLINE pragma remains +non-recursive. It's tempting to then say that 'bar' should not be +a loop breaker, but an attempt to do so goes wrong in two ways: + a) We may get + $df = ...$cfoo... + $cfoo = ...$df.... + [INLINE $cfoo = ...no-$df...] + But we want $cfoo to depend on $df explicitly so that we + put the bindings in the right order to inline $df in $cfoo + and perhaps break the loop altogether. (Maybe this + b) + + +Example [eftInt] +~~~~~~~~~~~~~~~ +Example (from GHC.Enum): + + eftInt :: Int# -> Int# -> [Int] + eftInt x y = ...(non-recursive)... + + {-# INLINE [0] eftIntFB #-} + eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r + eftIntFB c n x y = ...(non-recursive)... + + {-# RULES + "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y) + "eftIntList" [1] eftIntFB (:) [] = eftInt + #-} + +Note [Specialisation rules] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this group, which is typical of what SpecConstr builds: + + fs a = ....f (C a).... + f x = ....f (C a).... + {-# RULE f (C a) = fs a #-} + +So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE). + +But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop: + - the RULE is applied in f's RHS (see Note [Self-recursive rules] in GHC.Core.Op.Simplify + - fs is inlined (say it's small) + - now there's another opportunity to apply the RULE + +This showed up when compiling Control.Concurrent.Chan.getChanContents. + +------------------------------------------------------------ +Note [Finding join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's the occurrence analyser's job to find bindings that we can turn into join +points, but it doesn't perform that transformation right away. Rather, it marks +the eligible bindings as part of their occurrence data, leaving it to the +simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'. +The simplifier then eta-expands the RHS if needed and then updates the +occurrence sites. Dividing the work this way means that the occurrence analyser +still only takes one pass, yet one can always tell the difference between a +function call and a jump by looking at the occurrence (because the same pass +changes the 'IdDetails' and propagates the binders to their occurrence sites). + +To track potential join points, we use the 'occ_tail' field of OccInfo. A value +of `AlwaysTailCalled n` indicates that every occurrence of the variable is a +tail call with `n` arguments (counting both value and type arguments). Otherwise +'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the +rest of 'OccInfo' until it goes on the binder. + +Note [Rules and join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Things get fiddly with rules. Suppose we have: + + let j :: Int -> Int + j y = 2 * y + k :: Int -> Int -> Int + {-# RULES "SPEC k 0" k 0 = j #-} + k x y = x + 2 * y + in ... + +Now suppose that both j and k appear only as saturated tail calls in the body. +Thus we would like to make them both join points. The rule complicates matters, +though, as its RHS has an unapplied occurrence of j. *However*, if we were to +eta-expand the rule, all would be well: + + {-# RULES "SPEC k 0" forall a. k 0 a = j a #-} + +So conceivably we could notice that a potential join point would have an +"undersaturated" rule and account for it. This would mean we could make +something that's been specialised a join point, for instance. But local bindings +are rarely specialised, and being overly cautious about rules only +costs us anything when, for some `j`: + + * Before specialisation, `j` has non-tail calls, so it can't be a join point. + * During specialisation, `j` gets specialised and thus acquires rules. + * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say), + and so now `j` *could* become a join point. + +This appears to be very rare in practice. TODO Perhaps we should gather +statistics to be sure. + +------------------------------------------------------------ +Note [Adjusting right-hand sides] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There's a bit of a dance we need to do after analysing a lambda expression or +a right-hand side. In particular, we need to + + a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot + lambda, or a non-recursive join point; and + b) call 'markAllNonTailCalled' *unless* the binding is for a join point. + +Some examples, with how the free occurrences in e (assumed not to be a value +lambda) get marked: + + inside lam non-tail-called + ------------------------------------------------------------ + let x = e No Yes + let f = \x -> e Yes Yes + let f = \x{OneShot} -> e No Yes + \x -> e Yes Yes + join j x = e No No + joinrec j x = e Yes No + +There are a few other caveats; most importantly, if we're marking a binding as +'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so +that the effect cascades properly. Consequently, at the time the RHS is +analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must +return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once +join-point-hood has been decided. + +Thus the overall sequence taking place in 'occAnalNonRecBind' and +'occAnalRecBind' is as follows: + + 1. Call 'occAnalLamOrRhs' to find usage information for the RHS. + 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make + the binding a join point. + 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when + recursive.) + +(In the recursive case, this logic is spread between 'makeNode' and +'occAnalRec'.) +-} + +------------------------------------------------------------------ +-- occAnalBind +------------------------------------------------------------------ + +occAnalBind :: OccEnv -- The incoming OccEnv + -> TopLevelFlag + -> ImpRuleEdges + -> CoreBind + -> UsageDetails -- Usage details of scope + -> (UsageDetails, -- Of the whole let(rec) + [CoreBind]) + +occAnalBind env lvl top_env (NonRec binder rhs) body_usage + = occAnalNonRecBind env lvl top_env binder rhs body_usage +occAnalBind env lvl top_env (Rec pairs) body_usage + = occAnalRecBind env lvl top_env pairs body_usage + +----------------- +occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr + -> UsageDetails -> (UsageDetails, [CoreBind]) +occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage + | isTyVar binder -- A type let; we don't gather usage info + = (body_usage, [NonRec binder rhs]) + + | not (binder `usedIn` body_usage) -- It's not mentioned + = (body_usage, []) + + | otherwise -- It's mentioned in the body + = (body_usage' `andUDs` rhs_usage', [NonRec tagged_binder rhs']) + where + (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder + mb_join_arity = willBeJoinId_maybe tagged_binder + + (bndrs, body) = collectBinders rhs + + (rhs_usage1, bndrs', body') = occAnalNonRecRhs env tagged_binder bndrs body + rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body' + -- For a /non-recursive/ join point we can mark all + -- its join-lambda as one-shot; and it's a good idea to do so + + -- Unfoldings + -- See Note [Unfoldings and join points] + rhs_usage2 = case occAnalUnfolding env NonRecursive binder of + Just unf_usage -> rhs_usage1 `andUDs` unf_usage + Nothing -> rhs_usage1 + + -- Rules + -- See Note [Rules are extra RHSs] and Note [Rule dependency info] + rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder + rule_uds = map (\(_, l, r) -> l `andUDs` r) rules_w_uds + rhs_usage3 = foldr andUDs rhs_usage2 rule_uds + rhs_usage4 = case lookupVarEnv imp_rule_edges binder of + Nothing -> rhs_usage3 + Just vs -> addManyOccsSet rhs_usage3 vs + -- See Note [Preventing loops due to imported functions rules] + + -- Final adjustment + rhs_usage' = adjustRhsUsage mb_join_arity NonRecursive bndrs' rhs_usage4 + +----------------- +occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)] + -> UsageDetails -> (UsageDetails, [CoreBind]) +occAnalRecBind env lvl imp_rule_edges pairs body_usage + = foldr (occAnalRec env lvl) (body_usage, []) sccs + -- For a recursive group, we + -- * occ-analyse all the RHSs + -- * compute strongly-connected components + -- * feed those components to occAnalRec + -- See Note [Recursive bindings: the grand plan] + where + sccs :: [SCC Details] + sccs = {-# SCC "occAnalBind.scc" #-} + stronglyConnCompFromEdgedVerticesUniq nodes + + nodes :: [LetrecNode] + nodes = {-# SCC "occAnalBind.assoc" #-} + map (makeNode env imp_rule_edges bndr_set) pairs + + bndr_set = mkVarSet (map fst pairs) + +{- +Note [Unfoldings and join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We assume that anything in an unfolding occurs multiple times, since unfoldings +are often copied (that's the whole point!). But we still need to track tail +calls for the purpose of finding join points. +-} + +----------------------------- +occAnalRec :: OccEnv -> TopLevelFlag + -> SCC Details + -> (UsageDetails, [CoreBind]) + -> (UsageDetails, [CoreBind]) + + -- The NonRec case is just like a Let (NonRec ...) above +occAnalRec _ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs + , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs })) + (body_uds, binds) + | not (bndr `usedIn` body_uds) + = (body_uds, binds) -- See Note [Dead code] + + | otherwise -- It's mentioned in the body + = (body_uds' `andUDs` rhs_uds', + NonRec tagged_bndr rhs : binds) + where + (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr + rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive + rhs_bndrs rhs_uds + + -- The Rec case is the interesting one + -- See Note [Recursive bindings: the grand plan] + -- See Note [Loop breaking] +occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds) + | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds + = (body_uds, binds) -- See Note [Dead code] + + | otherwise -- At this point we always build a single Rec + = -- pprTrace "occAnalRec" (vcat + -- [ text "weak_fvs" <+> ppr weak_fvs + -- , text "lb nodes" <+> ppr loop_breaker_nodes]) + (final_uds, Rec pairs : binds) + + where + bndrs = map nd_bndr details_s + bndr_set = mkVarSet bndrs + + ------------------------------ + -- See Note [Choosing loop breakers] for loop_breaker_nodes + final_uds :: UsageDetails + loop_breaker_nodes :: [LetrecNode] + (final_uds, loop_breaker_nodes) + = mkLoopBreakerNodes env lvl bndr_set body_uds details_s + + ------------------------------ + weak_fvs :: VarSet + weak_fvs = mapUnionVarSet nd_weak details_s + + --------------------------- + -- Now reconstruct the cycle + pairs :: [(Id,CoreExpr)] + pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs loop_breaker_nodes [] + | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes [] + -- If weak_fvs is empty, the loop_breaker_nodes will include + -- all the edges in the original scope edges [remember, + -- weak_fvs is the difference between scope edges and + -- lb-edges], so a fresh SCC computation would yield a + -- single CyclicSCC result; and reOrderNodes deals with + -- exactly that case + + +------------------------------------------------------------------ +-- Loop breaking +------------------------------------------------------------------ + +type Binding = (Id,CoreExpr) + +loopBreakNodes :: Int + -> VarSet -- All binders + -> VarSet -- Binders whose dependencies may be "missing" + -- See Note [Weak loop breakers] + -> [LetrecNode] + -> [Binding] -- Append these to the end + -> [Binding] +{- +loopBreakNodes is applied to the list of nodes for a cyclic strongly +connected component (there's guaranteed to be a cycle). It returns +the same nodes, but + a) in a better order, + b) with some of the Ids having a IAmALoopBreaker pragma + +The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means +that the simplifier can guarantee not to loop provided it never records an inlining +for these no-inline guys. + +Furthermore, the order of the binds is such that if we neglect dependencies +on the no-inline Ids then the binds are topologically sorted. This means +that the simplifier will generally do a good job if it works from top bottom, +recording inlinings for any Ids which aren't marked as "no-inline" as it goes. +-} + +-- Return the bindings sorted into a plausible order, and marked with loop breakers. +loopBreakNodes depth bndr_set weak_fvs nodes binds + = -- pprTrace "loopBreakNodes" (ppr nodes) $ + go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds + where + go [] binds = binds + go (scc:sccs) binds = loop_break_scc scc (go sccs binds) + + loop_break_scc scc binds + = case scc of + AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds + CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds + +---------------------------------- +reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding] + -- Choose a loop breaker, mark it no-inline, + -- and call loopBreakNodes on the rest +reOrderNodes _ _ _ [] _ = panic "reOrderNodes" +reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds +reOrderNodes depth bndr_set weak_fvs (node : nodes) binds + = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen + -- , text "chosen" <+> ppr chosen_nodes ]) $ + loopBreakNodes new_depth bndr_set weak_fvs unchosen $ + (map mk_loop_breaker chosen_nodes ++ binds) + where + (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb + (nd_score (node_payload node)) + [node] [] nodes + + approximate_lb = depth >= 2 + new_depth | approximate_lb = 0 + | otherwise = depth+1 + -- After two iterations (d=0, d=1) give up + -- and approximate, returning to d=0 + +mk_loop_breaker :: LetrecNode -> Binding +mk_loop_breaker (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs}) + = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs) + where + tail_info = tailCallInfo (idOccInfo bndr) + +mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding +-- See Note [Weak loop breakers] +mk_non_loop_breaker weak_fvs (node_payload -> ND { nd_bndr = bndr + , nd_rhs = rhs}) + | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs) + | otherwise = (bndr, rhs) + where + occ' = weakLoopBreaker { occ_tail = tail_info } + tail_info = tailCallInfo (idOccInfo bndr) + +---------------------------------- +chooseLoopBreaker :: Bool -- True <=> Too many iterations, + -- so approximate + -> NodeScore -- Best score so far + -> [LetrecNode] -- Nodes with this score + -> [LetrecNode] -- Nodes with higher scores + -> [LetrecNode] -- Unprocessed nodes + -> ([LetrecNode], [LetrecNode]) + -- This loop looks for the bind with the lowest score + -- to pick as the loop breaker. The rest accumulate in +chooseLoopBreaker _ _ loop_nodes acc [] + = (loop_nodes, acc) -- Done + + -- If approximate_loop_breaker is True, we pick *all* + -- nodes with lowest score, else just one + -- See Note [Complexity of loop breaking] +chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes) + | approx_lb + , rank sc == rank loop_sc + = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes + + | sc `betterLB` loop_sc -- Better score so pick this new one + = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes + + | otherwise -- Worse score so don't pick it + = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes + where + sc = nd_score (node_payload node) + +{- +Note [Complexity of loop breaking] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The loop-breaking algorithm knocks out one binder at a time, and +performs a new SCC analysis on the remaining binders. That can +behave very badly in tightly-coupled groups of bindings; in the +worst case it can be (N**2)*log N, because it does a full SCC +on N, then N-1, then N-2 and so on. + +To avoid this, we switch plans after 2 (or whatever) attempts: + Plan A: pick one binder with the lowest score, make it + a loop breaker, and try again + Plan B: pick *all* binders with the lowest score, make them + all loop breakers, and try again +Since there are only a small finite number of scores, this will +terminate in a constant number of iterations, rather than O(N) +iterations. + +You might thing that it's very unlikely, but RULES make it much +more likely. Here's a real example from #1969: + Rec { $dm = \d.\x. op d + {-# RULES forall d. $dm Int d = $s$dm1 + forall d. $dm Bool d = $s$dm2 #-} + + dInt = MkD .... opInt ... + dInt = MkD .... opBool ... + opInt = $dm dInt + opBool = $dm dBool + + $s$dm1 = \x. op dInt + $s$dm2 = \x. op dBool } +The RULES stuff means that we can't choose $dm as a loop breaker +(Note [Choosing loop breakers]), so we must choose at least (say) +opInt *and* opBool, and so on. The number of loop breakders is +linear in the number of instance declarations. + +Note [Loop breakers and INLINE/INLINABLE pragmas] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Avoid choosing a function with an INLINE pramga as the loop breaker! +If such a function is mutually-recursive with a non-INLINE thing, +then the latter should be the loop-breaker. + +It's vital to distinguish between INLINE and INLINABLE (the +Bool returned by hasStableCoreUnfolding_maybe). If we start with + Rec { {-# INLINABLE f #-} + f x = ...f... } +and then worker/wrapper it through strictness analysis, we'll get + Rec { {-# INLINABLE $wf #-} + $wf p q = let x = (p,q) in ...f... + + {-# INLINE f #-} + f x = case x of (p,q) -> $wf p q } + +Now it is vital that we choose $wf as the loop breaker, so we can +inline 'f' in '$wf'. + +Note [DFuns should not be loop breakers] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's particularly bad to make a DFun into a loop breaker. See +Note [How instance declarations are translated] in TcInstDcls + +We give DFuns a higher score than ordinary CONLIKE things because +if there's a choice we want the DFun to be the non-loop breaker. Eg + +rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC) + + $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE) + {-# DFUN #-} + $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC) + } + +Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it +if we can't unravel the DFun first. + +Note [Constructor applications] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's really really important to inline dictionaries. Real +example (the Enum Ordering instance from GHC.Base): + + rec f = \ x -> case d of (p,q,r) -> p x + g = \ x -> case d of (p,q,r) -> q x + d = (v, f, g) + +Here, f and g occur just once; but we can't inline them into d. +On the other hand we *could* simplify those case expressions if +we didn't stupidly choose d as the loop breaker. +But we won't because constructor args are marked "Many". +Inlining dictionaries is really essential to unravelling +the loops in static numeric dictionaries, see GHC.Float. + +Note [Closure conversion] +~~~~~~~~~~~~~~~~~~~~~~~~~ +We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm. +The immediate motivation came from the result of a closure-conversion transformation +which generated code like this: + + data Clo a b = forall c. Clo (c -> a -> b) c + + ($:) :: Clo a b -> a -> b + Clo f env $: x = f env x + + rec { plus = Clo plus1 () + + ; plus1 _ n = Clo plus2 n + + ; plus2 Zero n = n + ; plus2 (Succ m) n = Succ (plus $: m $: n) } + +If we inline 'plus' and 'plus1', everything unravels nicely. But if +we choose 'plus1' as the loop breaker (which is entirely possible +otherwise), the loop does not unravel nicely. + + +@occAnalUnfolding@ deals with the question of bindings where the Id is marked +by an INLINE pragma. For these we record that anything which occurs +in its RHS occurs many times. This pessimistically assumes that this +inlined binder also occurs many times in its scope, but if it doesn't +we'll catch it next time round. At worst this costs an extra simplifier pass. +ToDo: try using the occurrence info for the inline'd binder. + +[March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC. +[June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC. + + +************************************************************************ +* * + Making nodes +* * +************************************************************************ +-} + +type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs + +noImpRuleEdges :: ImpRuleEdges +noImpRuleEdges = emptyVarEnv + +type LetrecNode = Node Unique Details -- Node comes from Digraph + -- The Unique key is gotten from the Id +data Details + = ND { nd_bndr :: Id -- Binder + , nd_rhs :: CoreExpr -- RHS, already occ-analysed + , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS + -- INVARIANT: (nd_rhs_bndrs nd, _) == + -- collectBinders (nd_rhs nd) + + , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings + -- ignoring phase (ie assuming all are active) + -- See Note [Forming Rec groups] + + , nd_inl :: IdSet -- Free variables of + -- the stable unfolding (if present and active) + -- or the RHS (if not) + -- but excluding any RULES + -- This is the IdSet that may be used if the Id is inlined + + , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds + -- but are *not* in nd_inl. These are the ones whose + -- dependencies might not be respected by loop_breaker_nodes + -- See Note [Weak loop breakers] + + , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES + + , nd_score :: NodeScore + } + +instance Outputable Details where + ppr nd = text "ND" <> braces + (sep [ text "bndr =" <+> ppr (nd_bndr nd) + , text "uds =" <+> ppr (nd_uds nd) + , text "inl =" <+> ppr (nd_inl nd) + , text "weak =" <+> ppr (nd_weak nd) + , text "rule =" <+> ppr (nd_active_rule_fvs nd) + , text "score =" <+> ppr (nd_score nd) + ]) + +-- The NodeScore is compared lexicographically; +-- e.g. lower rank wins regardless of size +type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker + , Int -- Size of rhs: higher => more likely to be picked as LB + -- Maxes out at maxExprSize; we just use it to prioritise + -- small functions + , Bool ) -- Was it a loop breaker before? + -- True => more likely to be picked + -- Note [Loop breakers, node scoring, and stability] + +rank :: NodeScore -> Int +rank (r, _, _) = r + +makeNode :: OccEnv -> ImpRuleEdges -> VarSet + -> (Var, CoreExpr) -> LetrecNode +-- See Note [Recursive bindings: the grand plan] +makeNode env imp_rule_edges bndr_set (bndr, rhs) + = DigraphNode details (varUnique bndr) (nonDetKeysUniqSet node_fvs) + -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR + -- is still deterministic with edges in nondeterministic order as + -- explained in Note [Deterministic SCC] in Digraph. + where + details = ND { nd_bndr = bndr + , nd_rhs = rhs' + , nd_rhs_bndrs = bndrs' + , nd_uds = rhs_usage3 + , nd_inl = inl_fvs + , nd_weak = node_fvs `minusVarSet` inl_fvs + , nd_active_rule_fvs = active_rule_fvs + , nd_score = pprPanic "makeNodeDetails" (ppr bndr) } + + -- Constructing the edges for the main Rec computation + -- See Note [Forming Rec groups] + (bndrs, body) = collectBinders rhs + (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body + rhs' = mkLams bndrs' body' + rhs_usage2 = foldr andUDs rhs_usage1 rule_uds + -- Note [Rules are extra RHSs] + -- Note [Rule dependency info] + rhs_usage3 = case mb_unf_uds of + Just unf_uds -> rhs_usage2 `andUDs` unf_uds + Nothing -> rhs_usage2 + node_fvs = udFreeVars bndr_set rhs_usage3 + + -- Finding the free variables of the rules + is_active = occ_rule_act env :: Activation -> Bool + + rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)] + rules_w_uds = occAnalRules env (Just (length bndrs)) Recursive bndr + + rules_w_rhs_fvs :: [(Activation, VarSet)] -- Find the RHS fvs + rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) + (lookupVarEnv imp_rule_edges bndr) + -- See Note [Preventing loops due to imported functions rules] + [ (ru_act rule, udFreeVars bndr_set rhs_uds) + | (rule, _, rhs_uds) <- rules_w_uds ] + rule_uds = map (\(_, l, r) -> l `andUDs` r) rules_w_uds + active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs + , is_active a] + + -- Finding the usage details of the INLINE pragma (if any) + mb_unf_uds = occAnalUnfolding env Recursive bndr + + -- Find the "nd_inl" free vars; for the loop-breaker phase + inl_fvs = case mb_unf_uds of + Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS + Just unf_uds -> udFreeVars bndr_set unf_uds + -- We could check for an *active* INLINE (returning + -- emptyVarSet for an inactive one), but is_active + -- isn't the right thing (it tells about + -- RULE activation), so we'd need more plumbing + +mkLoopBreakerNodes :: OccEnv -> TopLevelFlag + -> VarSet + -> UsageDetails -- for BODY of let + -> [Details] + -> (UsageDetails, -- adjusted + [LetrecNode]) +-- Does four things +-- a) tag each binder with its occurrence info +-- b) add a NodeScore to each node +-- c) make a Node with the right dependency edges for +-- the loop-breaker SCC analysis +-- d) adjust each RHS's usage details according to +-- the binder's (new) shotness and join-point-hood +mkLoopBreakerNodes env lvl bndr_set body_uds details_s + = (final_uds, zipWith mk_lb_node details_s bndrs') + where + (final_uds, bndrs') = tagRecBinders lvl body_uds + [ ((nd_bndr nd) + ,(nd_uds nd) + ,(nd_rhs_bndrs nd)) + | nd <- details_s ] + mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs }) bndr' + = DigraphNode nd' (varUnique bndr) (nonDetKeysUniqSet lb_deps) + -- It's OK to use nonDetKeysUniqSet here as + -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges + -- in nondeterministic order as explained in + -- Note [Deterministic SCC] in Digraph. + where + nd' = nd { nd_bndr = bndr', nd_score = score } + score = nodeScore env bndr bndr' rhs lb_deps + lb_deps = extendFvs_ rule_fv_env inl_fvs + + rule_fv_env :: IdEnv IdSet + -- Maps a variable f to the variables from this group + -- mentioned in RHS of active rules for f + -- Domain is *subset* of bound vars (others have no rule fvs) + rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs) + init_rule_fvs -- See Note [Finding rule RHS free vars] + = [ (b, trimmed_rule_fvs) + | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s + , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set + , not (isEmptyVarSet trimmed_rule_fvs) ] + + +------------------------------------------ +nodeScore :: OccEnv + -> Id -- Binder has old occ-info (just for loop-breaker-ness) + -> Id -- Binder with new occ-info + -> CoreExpr -- RHS + -> VarSet -- Loop-breaker dependencies + -> NodeScore +nodeScore env old_bndr new_bndr bind_rhs lb_deps + | not (isId old_bndr) -- A type or coercion variable is never a loop breaker + = (100, 0, False) + + | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers + = (0, 0, True) -- See Note [Self-recursion and loop breakers] + + | not (occ_unf_act env old_bndr) -- A binder whose inlining is inactive (e.g. has + = (0, 0, True) -- a NOINLINE pragma) makes a great loop breaker + + | exprIsTrivial rhs + = mk_score 10 -- Practically certain to be inlined + -- Used to have also: && not (isExportedId bndr) + -- But I found this sometimes cost an extra iteration when we have + -- rec { d = (a,b); a = ...df...; b = ...df...; df = d } + -- where df is the exported dictionary. Then df makes a really + -- bad choice for loop breaker + + | DFunUnfolding { df_args = args } <- id_unfolding + -- Never choose a DFun as a loop breaker + -- Note [DFuns should not be loop breakers] + = (9, length args, is_lb) + + -- Data structures are more important than INLINE pragmas + -- so that dictionary/method recursion unravels + + | CoreUnfolding { uf_guidance = UnfWhen {} } <- id_unfolding + = mk_score 6 + + | is_con_app rhs -- Data types help with cases: + = mk_score 5 -- Note [Constructor applications] + + | isStableUnfolding id_unfolding + , can_unfold + = mk_score 3 + + | isOneOcc (idOccInfo new_bndr) + = mk_score 2 -- Likely to be inlined + + | can_unfold -- The Id has some kind of unfolding + = mk_score 1 + + | otherwise + = (0, 0, is_lb) + + where + mk_score :: Int -> NodeScore + mk_score rank = (rank, rhs_size, is_lb) + + is_lb = isStrongLoopBreaker (idOccInfo old_bndr) + rhs = case id_unfolding of + CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs } + | isStableSource src + -> unf_rhs + _ -> bind_rhs + -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding + rhs_size = case id_unfolding of + CoreUnfolding { uf_guidance = guidance } + | UnfIfGoodArgs { ug_size = size } <- guidance + -> size + _ -> cheapExprSize rhs + + can_unfold = canUnfold id_unfolding + id_unfolding = realIdUnfolding old_bndr + -- realIdUnfolding: Ignore loop-breaker-ness here because + -- that is what we are setting! + + -- Checking for a constructor application + -- Cheap and cheerful; the simplifier moves casts out of the way + -- The lambda case is important to spot x = /\a. C (f a) + -- which comes up when C is a dictionary constructor and + -- f is a default method. + -- Example: the instance for Show (ST s a) in GHC.ST + -- + -- However we *also* treat (\x. C p q) as a con-app-like thing, + -- Note [Closure conversion] + is_con_app (Var v) = isConLikeId v + is_con_app (App f _) = is_con_app f + is_con_app (Lam _ e) = is_con_app e + is_con_app (Tick _ e) = is_con_app e + is_con_app _ = False + +maxExprSize :: Int +maxExprSize = 20 -- Rather arbitrary + +cheapExprSize :: CoreExpr -> Int +-- Maxes out at maxExprSize +cheapExprSize e + = go 0 e + where + go n e | n >= maxExprSize = n + | otherwise = go1 n e + + go1 n (Var {}) = n+1 + go1 n (Lit {}) = n+1 + go1 n (Type {}) = n + go1 n (Coercion {}) = n + go1 n (Tick _ e) = go1 n e + go1 n (Cast e _) = go1 n e + go1 n (App f a) = go (go1 n f) a + go1 n (Lam b e) + | isTyVar b = go1 n e + | otherwise = go (n+1) e + go1 n (Let b e) = gos (go1 n e) (rhssOfBind b) + go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as) + + gos n [] = n + gos n (e:es) | n >= maxExprSize = n + | otherwise = gos (go1 n e) es + +betterLB :: NodeScore -> NodeScore -> Bool +-- If n1 `betterLB` n2 then choose n1 as the loop breaker +betterLB (rank1, size1, lb1) (rank2, size2, _) + | rank1 < rank2 = True + | rank1 > rank2 = False + | size1 < size2 = False -- Make the bigger n2 into the loop breaker + | size1 > size2 = True + | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it + | otherwise = False -- See Note [Loop breakers, node scoring, and stability] + +{- Note [Self-recursion and loop breakers] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + rec { f = ...f...g... + ; g = .....f... } +then 'f' has to be a loop breaker anyway, so we may as well choose it +right away, so that g can inline freely. + +This is really just a cheap hack. Consider + rec { f = ...g... + ; g = ..f..h... + ; h = ...f....} +Here f or g are better loop breakers than h; but we might accidentally +choose h. Finding the minimal set of loop breakers is hard. + +Note [Loop breakers, node scoring, and stability] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +To choose a loop breaker, we give a NodeScore to each node in the SCC, +and pick the one with the best score (according to 'betterLB'). + +We need to be jolly careful (#12425, #12234) about the stability +of this choice. Suppose we have + + let rec { f = ...g...g... + ; g = ...f...f... } + in + case x of + True -> ...f.. + False -> ..f... + +In each iteration of the simplifier the occurrence analyser OccAnal +chooses a loop breaker. Suppose in iteration 1 it choose g as the loop +breaker. That means it is free to inline f. + +Suppose that GHC decides to inline f in the branches of the case, but +(for some reason; eg it is not saturated) in the rhs of g. So we get + + let rec { f = ...g...g... + ; g = ...f...f... } + in + case x of + True -> ...g...g..... + False -> ..g..g.... + +Now suppose that, for some reason, in the next iteration the occurrence +analyser chooses f as the loop breaker, so it can freely inline g. And +again for some reason the simplifier inlines g at its calls in the case +branches, but not in the RHS of f. Then we get + + let rec { f = ...g...g... + ; g = ...f...f... } + in + case x of + True -> ...(...f...f...)...(...f..f..)..... + False -> ..(...f...f...)...(..f..f...).... + +You can see where this is going! Each iteration of the simplifier +doubles the number of calls to f or g. No wonder GHC is slow! + +(In the particular example in comment:3 of #12425, f and g are the two +mutually recursive fmap instances for CondT and Result. They are both +marked INLINE which, oddly, is why they don't inline in each other's +RHS, because the call there is not saturated.) + +The root cause is that we flip-flop on our choice of loop breaker. I +always thought it didn't matter, and indeed for any single iteration +to terminate, it doesn't matter. But when we iterate, it matters a +lot!! + +So The Plan is this: + If there is a tie, choose the node that + was a loop breaker last time round + +Hence the is_lb field of NodeScore + +************************************************************************ +* * + Right hand sides +* * +************************************************************************ +-} + +occAnalRhs :: OccEnv -> RecFlag -> Id -> [CoreBndr] -> CoreExpr + -> (UsageDetails, [CoreBndr], CoreExpr) + -- Returned usage details covers only the RHS, + -- and *not* the RULE or INLINE template for the Id +occAnalRhs env Recursive _ bndrs body + = occAnalRecRhs env bndrs body +occAnalRhs env NonRecursive id bndrs body + = occAnalNonRecRhs env id bndrs body + +occAnalRecRhs :: OccEnv -> [CoreBndr] -> CoreExpr -- Rhs lambdas, body + -> (UsageDetails, [CoreBndr], CoreExpr) + -- Returned usage details covers only the RHS, + -- and *not* the RULE or INLINE template for the Id +occAnalRecRhs env bndrs body = occAnalLamOrRhs (rhsCtxt env) bndrs body + +occAnalNonRecRhs :: OccEnv + -> Id -> [CoreBndr] -> CoreExpr -- Binder; rhs lams, body + -- Binder is already tagged with occurrence info + -> (UsageDetails, [CoreBndr], CoreExpr) + -- Returned usage details covers only the RHS, + -- and *not* the RULE or INLINE template for the Id +occAnalNonRecRhs env bndr bndrs body + = occAnalLamOrRhs rhs_env bndrs body + where + env1 | is_join_point = env -- See Note [Join point RHSs] + | certainly_inline = env -- See Note [Cascading inlines] + | otherwise = rhsCtxt env + + -- See Note [Sources of one-shot information] + rhs_env = env1 { occ_one_shots = argOneShots dmd } + + certainly_inline -- See Note [Cascading inlines] + = case occ of + OneOcc { occ_in_lam = NotInsideLam, occ_one_br = InOneBranch } + -> active && not_stable + _ -> False + + is_join_point = isAlwaysTailCalled occ + -- Like (isJoinId bndr) but happens one step earlier + -- c.f. willBeJoinId_maybe + + occ = idOccInfo bndr + dmd = idDemandInfo bndr + active = isAlwaysActive (idInlineActivation bndr) + not_stable = not (isStableUnfolding (idUnfolding bndr)) + +occAnalUnfolding :: OccEnv + -> RecFlag + -> Id + -> Maybe UsageDetails + -- Just the analysis, not a new unfolding. The unfolding + -- got analysed when it was created and we don't need to + -- update it. +occAnalUnfolding env rec_flag id + = case realIdUnfolding id of -- ignore previous loop-breaker flag + CoreUnfolding { uf_tmpl = rhs, uf_src = src } + | not (isStableSource src) + -> Nothing + | otherwise + -> Just $ markAllMany usage + where + (bndrs, body) = collectBinders rhs + (usage, _, _) = occAnalRhs env rec_flag id bndrs body + + DFunUnfolding { df_bndrs = bndrs, df_args = args } + -> Just $ zapDetails (delDetailsList usage bndrs) + where + usage = andUDsList (map (fst . occAnal env) args) + + _ -> Nothing + +occAnalRules :: OccEnv + -> Maybe JoinArity -- If the binder is (or MAY become) a join + -- point, what its join arity is (or WOULD + -- become). See Note [Rules and join points]. + -> RecFlag + -> Id + -> [(CoreRule, -- Each (non-built-in) rule + UsageDetails, -- Usage details for LHS + UsageDetails)] -- Usage details for RHS +occAnalRules env mb_expected_join_arity rec_flag id + = [ (rule, lhs_uds, rhs_uds) | rule@Rule {} <- idCoreRules id + , let (lhs_uds, rhs_uds) = occ_anal_rule rule ] + where + occ_anal_rule (Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs }) + = (lhs_uds, final_rhs_uds) + where + lhs_uds = addManyOccsSet emptyDetails $ + (exprsFreeVars args `delVarSetList` bndrs) + (rhs_bndrs, rhs_body) = collectBinders rhs + (rhs_uds, _, _) = occAnalRhs env rec_flag id rhs_bndrs rhs_body + -- Note [Rules are extra RHSs] + -- Note [Rule dependency info] + final_rhs_uds = adjust_tail_info args $ markAllMany $ + (rhs_uds `delDetailsList` bndrs) + occ_anal_rule _ + = (emptyDetails, emptyDetails) + + adjust_tail_info args uds -- see Note [Rules and join points] + = case mb_expected_join_arity of + Just ar | args `lengthIs` ar -> uds + _ -> markAllNonTailCalled uds +{- Note [Join point RHSs] +~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + x = e + join j = Just x + +We want to inline x into j right away, so we don't want to give +the join point a RhsCtxt (#14137). It's not a huge deal, because +the FloatIn pass knows to float into join point RHSs; and the simplifier +does not float things out of join point RHSs. But it's a simple, cheap +thing to do. See #14137. + +Note [Cascading inlines] +~~~~~~~~~~~~~~~~~~~~~~~~ +By default we use an rhsCtxt for the RHS of a binding. This tells the +occ anal n that it's looking at an RHS, which has an effect in +occAnalApp. In particular, for constructor applications, it makes +the arguments appear to have NoOccInfo, so that we don't inline into +them. Thus x = f y + k = Just x +we do not want to inline x. + +But there's a problem. Consider + x1 = a0 : [] + x2 = a1 : x1 + x3 = a2 : x2 + g = f x3 +First time round, it looks as if x1 and x2 occur as an arg of a +let-bound constructor ==> give them a many-occurrence. +But then x3 is inlined (unconditionally as it happens) and +next time round, x2 will be, and the next time round x1 will be +Result: multiple simplifier iterations. Sigh. + +So, when analysing the RHS of x3 we notice that x3 will itself +definitely inline the next time round, and so we analyse x3's rhs in +an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff. + +Annoyingly, we have to approximate GHC.Core.Op.Simplify.Utils.preInlineUnconditionally. +If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and + (b) certainly_inline says "yes" when preInlineUnconditionally says "no" +then the simplifier iterates indefinitely: + x = f y + k = Just x -- We decide that k is 'certainly_inline' + v = ...k... -- but preInlineUnconditionally doesn't inline it +inline ==> + k = Just (f y) + v = ...k... +float ==> + x1 = f y + k = Just x1 + v = ...k... + +This is worse than the slow cascade, so we only want to say "certainly_inline" +if it really is certain. Look at the note with preInlineUnconditionally +for the various clauses. + + +************************************************************************ +* * + Expressions +* * +************************************************************************ +-} + +occAnal :: OccEnv + -> CoreExpr + -> (UsageDetails, -- Gives info only about the "interesting" Ids + CoreExpr) + +occAnal _ expr@(Type _) = (emptyDetails, expr) +occAnal _ expr@(Lit _) = (emptyDetails, expr) +occAnal env expr@(Var _) = occAnalApp env (expr, [], []) + -- At one stage, I gathered the idRuleVars for the variable here too, + -- which in a way is the right thing to do. + -- But that went wrong right after specialisation, when + -- the *occurrences* of the overloaded function didn't have any + -- rules in them, so the *specialised* versions looked as if they + -- weren't used at all. + +occAnal _ (Coercion co) + = (addManyOccsSet emptyDetails (coVarsOfCo co), Coercion co) + -- See Note [Gather occurrences of coercion variables] + +{- +Note [Gather occurrences of coercion variables] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We need to gather info about what coercion variables appear, so that +we can sort them into the right place when doing dependency analysis. +-} + +occAnal env (Tick tickish body) + | SourceNote{} <- tickish + = (usage, Tick tickish body') + -- SourceNotes are best-effort; so we just proceed as usual. + -- If we drop a tick due to the issues described below it's + -- not the end of the world. + + | tickish `tickishScopesLike` SoftScope + = (markAllNonTailCalled usage, Tick tickish body') + + | Breakpoint _ ids <- tickish + = (usage_lam `andUDs` foldr addManyOccs emptyDetails ids, Tick tickish body') + -- never substitute for any of the Ids in a Breakpoint + + | otherwise + = (usage_lam, Tick tickish body') + where + !(usage,body') = occAnal env body + -- for a non-soft tick scope, we can inline lambdas only + usage_lam = markAllNonTailCalled (markAllInsideLam usage) + -- TODO There may be ways to make ticks and join points play + -- nicer together, but right now there are problems: + -- let j x = ... in tick<t> (j 1) + -- Making j a join point may cause the simplifier to drop t + -- (if the tick is put into the continuation). So we don't + -- count j 1 as a tail call. + -- See #14242. + +occAnal env (Cast expr co) + = case occAnal env expr of { (usage, expr') -> + let usage1 = zapDetailsIf (isRhsEnv env) usage + -- usage1: if we see let x = y `cast` co + -- then mark y as 'Many' so that we don't + -- immediately inline y again. + usage2 = addManyOccsSet usage1 (coVarsOfCo co) + -- usage2: see Note [Gather occurrences of coercion variables] + in (markAllNonTailCalled usage2, Cast expr' co) + } + +occAnal env app@(App _ _) + = occAnalApp env (collectArgsTicks tickishFloatable app) + +-- Ignore type variables altogether +-- (a) occurrences inside type lambdas only not marked as InsideLam +-- (b) type variables not in environment + +occAnal env (Lam x body) + | isTyVar x + = case occAnal env body of { (body_usage, body') -> + (markAllNonTailCalled body_usage, Lam x body') + } + +-- For value lambdas we do a special hack. Consider +-- (\x. \y. ...x...) +-- If we did nothing, x is used inside the \y, so would be marked +-- as dangerous to dup. But in the common case where the abstraction +-- is applied to two arguments this is over-pessimistic. +-- So instead, we just mark each binder with its occurrence +-- info in the *body* of the multiple lambda. +-- Then, the simplifier is careful when partially applying lambdas. + +occAnal env expr@(Lam _ _) + = case occAnalLamOrRhs env binders body of { (usage, tagged_binders, body') -> + let + expr' = mkLams tagged_binders body' + usage1 = markAllNonTailCalled usage + one_shot_gp = all isOneShotBndr tagged_binders + final_usage | one_shot_gp = usage1 + | otherwise = markAllInsideLam usage1 + in + (final_usage, expr') } + where + (binders, body) = collectBinders expr + +occAnal env (Case scrut bndr ty alts) + = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') -> + case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') -> + let + alts_usage = foldr orUDs emptyDetails alts_usage_s + (alts_usage1, tagged_bndr) = tagLamBinder alts_usage bndr + total_usage = markAllNonTailCalled scrut_usage `andUDs` alts_usage1 + -- Alts can have tail calls, but the scrutinee can't + in + total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }} + where + alt_env = mkAltEnv env scrut bndr + occ_anal_alt = occAnalAlt alt_env + + occ_anal_scrut (Var v) (alt1 : other_alts) + | not (null other_alts) || not (isDefaultAlt alt1) + = (mkOneOcc env v IsInteresting 0, Var v) + -- The 'True' says that the variable occurs in an interesting + -- context; the case has at least one non-default alternative + occ_anal_scrut (Tick t e) alts + | t `tickishScopesLike` SoftScope + -- No reason to not look through all ticks here, but only + -- for soft-scoped ticks we can do so without having to + -- update returned occurrence info (see occAnal) + = second (Tick t) $ occ_anal_scrut e alts + + occ_anal_scrut scrut _alts + = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt + +occAnal env (Let bind body) + = case occAnal env body of { (body_usage, body') -> + case occAnalBind env NotTopLevel + noImpRuleEdges bind + body_usage of { (final_usage, new_binds) -> + (final_usage, mkLets new_binds body') }} + +occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr]) +occAnalArgs _ [] _ + = (emptyDetails, []) + +occAnalArgs env (arg:args) one_shots + | isTypeArg arg + = case occAnalArgs env args one_shots of { (uds, args') -> + (uds, arg:args') } + + | otherwise + = case argCtxt env one_shots of { (arg_env, one_shots') -> + case occAnal arg_env arg of { (uds1, arg') -> + case occAnalArgs env args one_shots' of { (uds2, args') -> + (uds1 `andUDs` uds2, arg':args') }}} + +{- +Applications are dealt with specially because we want +the "build hack" to work. + +Note [Arguments of let-bound constructors] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f x = let y = expensive x in + let z = (True,y) in + (case z of {(p,q)->q}, case z of {(p,q)->q}) +We feel free to duplicate the WHNF (True,y), but that means +that y may be duplicated thereby. + +If we aren't careful we duplicate the (expensive x) call! +Constructors are rather like lambdas in this way. +-} + +occAnalApp :: OccEnv + -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id]) + -> (UsageDetails, Expr CoreBndr) +occAnalApp env (Var fun, args, ticks) + | null ticks = (uds, mkApps (Var fun) args') + | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args') + where + uds = fun_uds `andUDs` final_args_uds + + !(args_uds, args') = occAnalArgs env args one_shots + !final_args_uds + | isRhsEnv env && is_exp = markAllNonTailCalled $ + markAllInsideLam args_uds + | otherwise = markAllNonTailCalled args_uds + -- We mark the free vars of the argument of a constructor or PAP + -- as "inside-lambda", if it is the RHS of a let(rec). + -- This means that nothing gets inlined into a constructor or PAP + -- argument position, which is what we want. Typically those + -- constructor arguments are just variables, or trivial expressions. + -- We use inside-lam because it's like eta-expanding the PAP. + -- + -- This is the *whole point* of the isRhsEnv predicate + -- See Note [Arguments of let-bound constructors] + + n_val_args = valArgCount args + n_args = length args + fun_uds = mkOneOcc env fun (if n_val_args > 0 then IsInteresting else NotInteresting) n_args + is_exp = isExpandableApp fun n_val_args + -- See Note [CONLIKE pragma] in BasicTypes + -- The definition of is_exp should match that in GHC.Core.Op.Simplify.prepareRhs + + one_shots = argsOneShots (idStrictness fun) guaranteed_val_args + guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo + (occ_one_shots env)) + -- See Note [Sources of one-shot information], bullet point A'] + +occAnalApp env (fun, args, ticks) + = (markAllNonTailCalled (fun_uds `andUDs` args_uds), + mkTicks ticks $ mkApps fun' args') + where + !(fun_uds, fun') = occAnal (addAppCtxt env args) fun + -- The addAppCtxt is a bit cunning. One iteration of the simplifier + -- often leaves behind beta redexs like + -- (\x y -> e) a1 a2 + -- Here we would like to mark x,y as one-shot, and treat the whole + -- thing much like a let. We do this by pushing some True items + -- onto the context stack. + !(args_uds, args') = occAnalArgs env args [] + +zapDetailsIf :: Bool -- If this is true + -> UsageDetails -- Then do zapDetails on this + -> UsageDetails +zapDetailsIf True uds = zapDetails uds +zapDetailsIf False uds = uds + +{- +Note [Sources of one-shot information] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The occurrence analyser obtains one-shot-lambda information from two sources: + +A: Saturated applications: eg f e1 .. en + + In general, given a call (f e1 .. en) we can propagate one-shot info from + f's strictness signature into e1 .. en, but /only/ if n is enough to + saturate the strictness signature. A strictness signature like + + f :: C1(C1(L))LS + + means that *if f is applied to three arguments* then it will guarantee to + call its first argument at most once, and to call the result of that at + most once. But if f has fewer than three arguments, all bets are off; e.g. + + map (f (\x y. expensive) e2) xs + + Here the \x y abstraction may be called many times (once for each element of + xs) so we should not mark x and y as one-shot. But if it was + + map (f (\x y. expensive) 3 2) xs + + then the first argument of f will be called at most once. + + The one-shot info, derived from f's strictness signature, is + computed by 'argsOneShots', called in occAnalApp. + +A': Non-obviously saturated applications: eg build (f (\x y -> expensive)) + where f is as above. + + In this case, f is only manifestly applied to one argument, so it does not + look saturated. So by the previous point, we should not use its strictness + signature to learn about the one-shotness of \x y. But in this case we can: + build is fully applied, so we may use its strictness signature; and from + that we learn that build calls its argument with two arguments *at most once*. + + So there is really only one call to f, and it will have three arguments. In + that sense, f is saturated, and we may proceed as described above. + + Hence the computation of 'guaranteed_val_args' in occAnalApp, using + '(occ_one_shots env)'. See also #13227, comment:9 + +B: Let-bindings: eg let f = \c. let ... in \n -> blah + in (build f, build f) + + Propagate one-shot info from the demanand-info on 'f' to the + lambdas in its RHS (which may not be syntactically at the top) + + This information must have come from a previous run of the demanand + analyser. + +Previously, the demand analyser would *also* set the one-shot information, but +that code was buggy (see #11770), so doing it only in on place, namely here, is +saner. + +Note [OneShots] +~~~~~~~~~~~~~~~ +When analysing an expression, the occ_one_shots argument contains information +about how the function is being used. The length of the list indicates +how many arguments will eventually be passed to the analysed expression, +and the OneShotInfo indicates whether this application is once or multiple times. + +Example: + + Context of f occ_one_shots when analysing f + + f 1 2 [OneShot, OneShot] + map (f 1) [OneShot, NoOneShotInfo] + build f [OneShot, OneShot] + f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot] + +Note [Binders in case alternatives] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + case x of y { (a,b) -> f y } +We treat 'a', 'b' as dead, because they don't physically occur in the +case alternative. (Indeed, a variable is dead iff it doesn't occur in +its scope in the output of OccAnal.) It really helps to know when +binders are unused. See esp the call to isDeadBinder in +Simplify.mkDupableAlt + +In this example, though, the Simplifier will bring 'a' and 'b' back to +life, because it binds 'y' to (a,b) (imagine got inlined and +scrutinised y). +-} + +occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr + -> (UsageDetails, [CoreBndr], CoreExpr) +occAnalLamOrRhs env [] body + = case occAnal env body of (body_usage, body') -> (body_usage, [], body') + -- RHS of thunk or nullary join point +occAnalLamOrRhs env (bndr:bndrs) body + | isTyVar bndr + = -- Important: Keep the environment so that we don't inline into an RHS like + -- \(@ x) -> C @x (f @x) + -- (see the beginning of Note [Cascading inlines]). + case occAnalLamOrRhs env bndrs body of + (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body') +occAnalLamOrRhs env binders body + = case occAnal env_body body of { (body_usage, body') -> + let + (final_usage, tagged_binders) = tagLamBinders body_usage binders' + -- Use binders' to put one-shot info on the lambdas + in + (final_usage, tagged_binders, body') } + where + (env_body, binders') = oneShotGroup env binders + +occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr)) + -> CoreAlt + -> (UsageDetails, Alt IdWithOccInfo) +occAnalAlt (env, scrut_bind) (con, bndrs, rhs) + = case occAnal env rhs of { (rhs_usage1, rhs1) -> + let + (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs + -- See Note [Binders in case alternatives] + (alt_usg', rhs2) = wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1 + in + (alt_usg', (con, tagged_bndrs, rhs2)) } + +wrapAltRHS :: OccEnv + -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv + -> UsageDetails -- usage for entire alt (p -> rhs) + -> [Var] -- alt binders + -> CoreExpr -- alt RHS + -> (UsageDetails, CoreExpr) +wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs + | occ_binder_swap env + , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this + -- handles condition (a) in Note [Binder swap] + , not captured -- See condition (b) in Note [Binder swap] + = ( alt_usg' `andUDs` let_rhs_usg + , Let (NonRec tagged_scrut_var let_rhs') alt_rhs ) + where + captured = any (`usedIn` let_rhs_usg) bndrs -- Check condition (b) + + -- The rhs of the let may include coercion variables + -- if the scrutinee was a cast, so we must gather their + -- usage. See Note [Gather occurrences of coercion variables] + -- Moreover, the rhs of the let may mention the case-binder, and + -- we want to gather its occ-info as well + (let_rhs_usg, let_rhs') = occAnal env let_rhs + + (alt_usg', tagged_scrut_var) = tagLamBinder alt_usg scrut_var + +wrapAltRHS _ _ alt_usg _ alt_rhs + = (alt_usg, alt_rhs) + +{- +************************************************************************ +* * + OccEnv +* * +************************************************************************ +-} + +data OccEnv + = OccEnv { occ_encl :: !OccEncl -- Enclosing context information + , occ_one_shots :: !OneShots -- See Note [OneShots] + , occ_gbl_scrut :: GlobalScruts + + , occ_unf_act :: Id -> Bool -- Which Id unfoldings are active + + , occ_rule_act :: Activation -> Bool -- Which rules are active + -- See Note [Finding rule RHS free vars] + + , occ_binder_swap :: !Bool -- enable the binder_swap + -- See CorePrep Note [Dead code in CorePrep] + } + +type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees] + +----------------------------- +-- OccEncl is used to control whether to inline into constructor arguments +-- For example: +-- x = (p,q) -- Don't inline p or q +-- y = /\a -> (p a, q a) -- Still don't inline p or q +-- z = f (p,q) -- Do inline p,q; it may make a rule fire +-- So OccEncl tells enough about the context to know what to do when +-- we encounter a constructor application or PAP. + +data OccEncl + = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda + -- Don't inline into constructor args here + | OccVanilla -- Argument of function, body of lambda, scruintee of case etc. + -- Do inline into constructor args here + +instance Outputable OccEncl where + ppr OccRhs = text "occRhs" + ppr OccVanilla = text "occVanilla" + +-- See note [OneShots] +type OneShots = [OneShotInfo] + +initOccEnv :: OccEnv +initOccEnv + = OccEnv { occ_encl = OccVanilla + , occ_one_shots = [] + , occ_gbl_scrut = emptyVarSet + -- To be conservative, we say that all + -- inlines and rules are active + , occ_unf_act = \_ -> True + , occ_rule_act = \_ -> True + , occ_binder_swap = True } + +vanillaCtxt :: OccEnv -> OccEnv +vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] } + +rhsCtxt :: OccEnv -> OccEnv +rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] } + +argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots]) +argCtxt env [] + = (env { occ_encl = OccVanilla, occ_one_shots = [] }, []) +argCtxt env (one_shots:one_shots_s) + = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s) + +isRhsEnv :: OccEnv -> Bool +isRhsEnv (OccEnv { occ_encl = OccRhs }) = True +isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False + +oneShotGroup :: OccEnv -> [CoreBndr] + -> ( OccEnv + , [CoreBndr] ) + -- The result binders have one-shot-ness set that they might not have had originally. + -- This happens in (build (\c n -> e)). Here the occurrence analyser + -- linearity context knows that c,n are one-shot, and it records that fact in + -- the binder. This is useful to guide subsequent float-in/float-out transformations + +oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs + = go ctxt bndrs [] + where + go ctxt [] rev_bndrs + = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla } + , reverse rev_bndrs ) + + go [] bndrs rev_bndrs + = ( env { occ_one_shots = [], occ_encl = OccVanilla } + , reverse rev_bndrs ++ bndrs ) + + go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs + | isId bndr = go ctxt' bndrs (bndr': rev_bndrs) + | otherwise = go ctxt bndrs (bndr : rev_bndrs) + where + bndr' = updOneShotInfo bndr one_shot + -- Use updOneShotInfo, not setOneShotInfo, as pre-existing + -- one-shot info might be better than what we can infer, e.g. + -- due to explicit use of the magic 'oneShot' function. + -- See Note [The oneShot function] + + +markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var] +-- Mark the lambdas of a non-recursive join point as one-shot. +-- This is good to prevent gratuitous float-out etc +markJoinOneShots mb_join_arity bndrs + = case mb_join_arity of + Nothing -> bndrs + Just n -> go n bndrs + where + go 0 bndrs = bndrs + go _ [] = [] -- This can legitimately happen. + -- e.g. let j = case ... in j True + -- This will become an arity-1 join point after the + -- simplifier has eta-expanded it; but it may not have + -- enough lambdas /yet/. (Lint checks that JoinIds do + -- have enough lambdas.) + go n (b:bs) = b' : go (n-1) bs + where + b' | isId b = setOneShotLambda b + | otherwise = b + +addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv +addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args + = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt } + +transClosureFV :: UniqFM VarSet -> UniqFM VarSet +-- If (f,g), (g,h) are in the input, then (f,h) is in the output +-- as well as (f,g), (g,h) +transClosureFV env + | no_change = env + | otherwise = transClosureFV (listToUFM new_fv_list) + where + (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env) + -- It's OK to use nonDetUFMToList here because we'll forget the + -- ordering by creating a new set with listToUFM + bump no_change (b,fvs) + | no_change_here = (no_change, (b,fvs)) + | otherwise = (False, (b,new_fvs)) + where + (new_fvs, no_change_here) = extendFvs env fvs + +------------- +extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet +extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag + +extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool) +-- (extendFVs env s) returns +-- (s `union` env(s), env(s) `subset` s) +extendFvs env s + | isNullUFM env + = (s, True) + | otherwise + = (s `unionVarSet` extras, extras `subVarSet` s) + where + extras :: VarSet -- env(s) + extras = nonDetFoldUFM unionVarSet emptyVarSet $ + -- It's OK to use nonDetFoldUFM here because unionVarSet commutes + intersectUFM_C (\x _ -> x) env (getUniqSet s) + +{- +************************************************************************ +* * + Binder swap +* * +************************************************************************ + +Note [Binder swap] +~~~~~~~~~~~~~~~~~~ +The "binder swap" transformation swaps occurrence of the +scrutinee of a case for occurrences of the case-binder: + + (1) case x of b { pi -> ri } + ==> + case x of b { pi -> let x=b in ri } + + (2) case (x |> co) of b { pi -> ri } + ==> + case (x |> co) of b { pi -> let x = b |> sym co in ri } + +In both cases, the trivial 'let' can be eliminated by the +immediately following simplifier pass. + +There are two reasons for making this swap: + +(A) It reduces the number of occurrences of the scrutinee, x. + That in turn might reduce its occurrences to one, so we + can inline it and save an allocation. E.g. + let x = factorial y in case x of b { I# v -> ...x... } + If we replace 'x' by 'b' in the alternative we get + let x = factorial y in case x of b { I# v -> ...b... } + and now we can inline 'x', thus + case (factorial y) of b { I# v -> ...b... } + +(B) The case-binder b has unfolding information; in the + example above we know that b = I# v. That in turn allows + nested cases to simplify. Consider + case x of b { I# v -> + ...(case x of b2 { I# v2 -> rhs })... + If we replace 'x' by 'b' in the alternative we get + case x of b { I# v -> + ...(case b of b2 { I# v2 -> rhs })... + and now it is trivial to simplify the inner case: + case x of b { I# v -> + ...(let b2 = b in rhs)... + + The same can happen even if the scrutinee is a variable + with a cast: see Note [Case of cast] + +In both cases, in a particular alternative (pi -> ri), we only +add the binding if + (a) x occurs free in (pi -> ri) + (ie it occurs in ri, but is not bound in pi) + (b) the pi does not bind b (or the free vars of co) +We need (a) and (b) for the inserted binding to be correct. + +For the alternatives where we inject the binding, we can transfer +all x's OccInfo to b. And that is the point. + +Notice that + * The deliberate shadowing of 'x'. + * That (a) rapidly becomes false, so no bindings are injected. + +The reason for doing these transformations /here in the occurrence +analyser/ is because it allows us to adjust the OccInfo for 'x' and +'b' as we go. + + * Suppose the only occurrences of 'x' are the scrutinee and in the + ri; then this transformation makes it occur just once, and hence + get inlined right away. + + * If instead we do this in the Simplifier, we don't know whether 'x' + is used in ri, so we are forced to pessimistically zap b's OccInfo + even though it is typically dead (ie neither it nor x appear in + the ri). There's nothing actually wrong with zapping it, except + that it's kind of nice to know which variables are dead. My nose + tells me to keep this information as robustly as possible. + +The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding +{x=b}; it's Nothing if the binder-swap doesn't happen. + +There is a danger though. Consider + let v = x +# y + in case (f v) of w -> ...v...v... +And suppose that (f v) expands to just v. Then we'd like to +use 'w' instead of 'v' in the alternative. But it may be too +late; we may have substituted the (cheap) x+#y for v in the +same simplifier pass that reduced (f v) to v. + +I think this is just too bad. CSE will recover some of it. + +Note [Case of cast] +~~~~~~~~~~~~~~~~~~~ +Consider case (x `cast` co) of b { I# -> + ... (case (x `cast` co) of {...}) ... +We'd like to eliminate the inner case. That is the motivation for +equation (2) in Note [Binder swap]. When we get to the inner case, we +inline x, cancel the casts, and away we go. + +Note [Binder swap on GlobalId scrutinees] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When the scrutinee is a GlobalId we must take care in two ways + + i) In order to *know* whether 'x' occurs free in the RHS, we need its + occurrence info. BUT, we don't gather occurrence info for + GlobalIds. That's the reason for the (small) occ_gbl_scrut env in + OccEnv is for: it says "gather occurrence info for these". + + ii) We must call localiseId on 'x' first, in case it's a GlobalId, or + has an External Name. See, for example, SimplEnv Note [Global Ids in + the substitution]. + +Note [Zap case binders in proxy bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +From the original + case x of cb(dead) { p -> ...x... } +we will get + case x of cb(live) { p -> let x = cb in ...x... } + +Core Lint never expects to find an *occurrence* of an Id marked +as Dead, so we must zap the OccInfo on cb before making the +binding x = cb. See #5028. + +NB: the OccInfo on /occurrences/ really doesn't matter much; the simplifier +doesn't use it. So this is only to satisfy the perhaps-over-picky Lint. + +Historical note [no-case-of-case] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We *used* to suppress the binder-swap in case expressions when +-fno-case-of-case is on. Old remarks: + "This happens in the first simplifier pass, + and enhances full laziness. Here's the bad case: + f = \ y -> ...(case x of I# v -> ...(case x of ...) ... ) + If we eliminate the inner case, we trap it inside the I# v -> arm, + which might prevent some full laziness happening. I've seen this + in action in spectral/cichelli/Prog.hs: + [(m,n) | m <- [1..max], n <- [1..max]] + Hence the check for NoCaseOfCase." +However, now the full-laziness pass itself reverses the binder-swap, so this +check is no longer necessary. + +Historical note [Suppressing the case binder-swap] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This old note describes a problem that is also fixed by doing the +binder-swap in OccAnal: + + There is another situation when it might make sense to suppress the + case-expression binde-swap. If we have + + case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 } + ...other cases .... } + + We'll perform the binder-swap for the outer case, giving + + case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 } + ...other cases .... } + + But there is no point in doing it for the inner case, because w1 can't + be inlined anyway. Furthermore, doing the case-swapping involves + zapping w2's occurrence info (see paragraphs that follow), and that + forces us to bind w2 when doing case merging. So we get + + case x of w1 { A -> let w2 = w1 in e1 + B -> let w2 = w1 in e2 + ...other cases .... } + + This is plain silly in the common case where w2 is dead. + + Even so, I can't see a good way to implement this idea. I tried + not doing the binder-swap if the scrutinee was already evaluated + but that failed big-time: + + data T = MkT !Int + + case v of w { MkT x -> + case x of x1 { I# y1 -> + case x of x2 { I# y2 -> ... + + Notice that because MkT is strict, x is marked "evaluated". But to + eliminate the last case, we must either make sure that x (as well as + x1) has unfolding MkT y1. The straightforward thing to do is to do + the binder-swap. So this whole note is a no-op. + +It's fixed by doing the binder-swap in OccAnal because we can do the +binder-swap unconditionally and still get occurrence analysis +information right. +-} + +mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr)) +-- Does three things: a) makes the occ_one_shots = OccVanilla +-- b) extends the GlobalScruts if possible +-- c) returns a proxy mapping, binding the scrutinee +-- to the case binder, if possible +mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr + = case stripTicksTopE (const True) scrut of + Var v -> add_scrut v case_bndr' + Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co)) + -- See Note [Case of cast] + _ -> (env { occ_encl = OccVanilla }, Nothing) + + where + add_scrut v rhs + | isGlobalId v = (env { occ_encl = OccVanilla }, Nothing) + | otherwise = ( env { occ_encl = OccVanilla + , occ_gbl_scrut = pe `extendVarSet` v } + , Just (localise v, rhs) ) + -- ToDO: this isGlobalId stuff is a TEMPORARY FIX + -- to avoid the binder-swap for GlobalIds + -- See #16346 + + case_bndr' = Var (zapIdOccInfo case_bndr) + -- See Note [Zap case binders in proxy bindings] + + -- Localise the scrut_var before shadowing it; we're making a + -- new binding for it, and it might have an External Name, or + -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees] + -- Also we don't want any INLINE or NOINLINE pragmas! + localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) + (idType scrut_var) + +{- +************************************************************************ +* * +\subsection[OccurAnal-types]{OccEnv} +* * +************************************************************************ + +Note [UsageDetails and zapping] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +On many occasions, we must modify all gathered occurrence data at once. For +instance, all occurrences underneath a (non-one-shot) lambda set the +'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but +that takes O(n) time and we will do this often---in particular, there are many +places where tail calls are not allowed, and each of these causes all variables +to get marked with 'NoTailCallInfo'. + +Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along +with the 'OccInfoEnv'. Each of these extra environments is a "zapped set" +recording which variables have been zapped in some way. Zapping all occurrence +info then simply means setting the corresponding zapped set to the whole +'OccInfoEnv', a fast O(1) operation. +-} + +type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage + -- INVARIANT: never IAmDead + -- (Deadness is signalled by not being in the map at all) + +type ZappedSet = OccInfoEnv -- Values are ignored + +data UsageDetails + = UD { ud_env :: !OccInfoEnv + , ud_z_many :: ZappedSet -- apply 'markMany' to these + , ud_z_in_lam :: ZappedSet -- apply 'markInsideLam' to these + , ud_z_no_tail :: ZappedSet } -- apply 'markNonTailCalled' to these + -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv + +instance Outputable UsageDetails where + ppr ud = ppr (ud_env (flattenUsageDetails ud)) + +------------------- +-- UsageDetails API + +andUDs, orUDs + :: UsageDetails -> UsageDetails -> UsageDetails +andUDs = combineUsageDetailsWith addOccInfo +orUDs = combineUsageDetailsWith orOccInfo + +andUDsList :: [UsageDetails] -> UsageDetails +andUDsList = foldl' andUDs emptyDetails + +mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails +mkOneOcc env id int_cxt arity + | isLocalId id + = singleton $ OneOcc { occ_in_lam = NotInsideLam + , occ_one_br = InOneBranch + , occ_int_cxt = int_cxt + , occ_tail = AlwaysTailCalled arity } + | id `elemVarSet` occ_gbl_scrut env + = singleton noOccInfo + + | otherwise + = emptyDetails + where + singleton info = emptyDetails { ud_env = unitVarEnv id info } + +addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails +addOneOcc ud id info + = ud { ud_env = extendVarEnv_C plus_zapped (ud_env ud) id info } + `alterZappedSets` (`delVarEnv` id) + where + plus_zapped old new = doZapping ud id old `addOccInfo` new + +addManyOccsSet :: UsageDetails -> VarSet -> UsageDetails +addManyOccsSet usage id_set = nonDetFoldUniqSet addManyOccs usage id_set + -- It's OK to use nonDetFoldUFM here because addManyOccs commutes + +-- Add several occurrences, assumed not to be tail calls +addManyOccs :: Var -> UsageDetails -> UsageDetails +addManyOccs v u | isId v = addOneOcc u v noOccInfo + | otherwise = u + -- Give a non-committal binder info (i.e noOccInfo) because + -- a) Many copies of the specialised thing can appear + -- b) We don't want to substitute a BIG expression inside a RULE + -- even if that's the only occurrence of the thing + -- (Same goes for INLINE.) + +delDetails :: UsageDetails -> Id -> UsageDetails +delDetails ud bndr + = ud `alterUsageDetails` (`delVarEnv` bndr) + +delDetailsList :: UsageDetails -> [Id] -> UsageDetails +delDetailsList ud bndrs + = ud `alterUsageDetails` (`delVarEnvList` bndrs) + +emptyDetails :: UsageDetails +emptyDetails = UD { ud_env = emptyVarEnv + , ud_z_many = emptyVarEnv + , ud_z_in_lam = emptyVarEnv + , ud_z_no_tail = emptyVarEnv } + +isEmptyDetails :: UsageDetails -> Bool +isEmptyDetails = isEmptyVarEnv . ud_env + +markAllMany, markAllInsideLam, markAllNonTailCalled, zapDetails + :: UsageDetails -> UsageDetails +markAllMany ud = ud { ud_z_many = ud_env ud } +markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud } +markAllNonTailCalled ud = ud { ud_z_no_tail = ud_env ud } + +zapDetails = markAllMany . markAllNonTailCalled -- effectively sets to noOccInfo + +lookupDetails :: UsageDetails -> Id -> OccInfo +lookupDetails ud id + | isCoVar id -- We do not currently gather occurrence info (from types) + = noOccInfo -- for CoVars, so we must conservatively mark them as used + -- See Note [DoO not mark CoVars as dead] + | otherwise + = case lookupVarEnv (ud_env ud) id of + Just occ -> doZapping ud id occ + Nothing -> IAmDead + +usedIn :: Id -> UsageDetails -> Bool +v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud + +udFreeVars :: VarSet -> UsageDetails -> VarSet +-- Find the subset of bndrs that are mentioned in uds +udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud) + +{- Note [Do not mark CoVars as dead] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's obviously wrong to mark CoVars as dead if they are used. +Currently we don't traverse types to gather usase info for CoVars, +so we had better treat them as having noOccInfo. + +This showed up in #15696 we had something like + case eq_sel d of co -> ...(typeError @(...co...) "urk")... + +Then 'd' was substituted by a dictionary, so the expression +simpified to + case (Coercion <blah>) of co -> ...(typeError @(...co...) "urk")... + +But then the "drop the case altogether" equation of rebuildCase +thought that 'co' was dead, and discarded the entire case. Urk! + +I have no idea how we managed to avoid this pitfall for so long! +-} + +------------------- +-- Auxiliary functions for UsageDetails implementation + +combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo) + -> UsageDetails -> UsageDetails -> UsageDetails +combineUsageDetailsWith plus_occ_info ud1 ud2 + | isEmptyDetails ud1 = ud2 + | isEmptyDetails ud2 = ud1 + | otherwise + = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2) + , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2) + , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2) + , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) } + +doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo +doZapping ud var occ + = doZappingByUnique ud (varUnique var) occ + +doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo +doZappingByUnique ud uniq + = (if | in_subset ud_z_many -> markMany + | in_subset ud_z_in_lam -> markInsideLam + | otherwise -> id) . + (if | in_subset ud_z_no_tail -> markNonTailCalled + | otherwise -> id) + where + in_subset field = uniq `elemVarEnvByKey` field ud + +alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails +alterZappedSets ud f + = ud { ud_z_many = f (ud_z_many ud) + , ud_z_in_lam = f (ud_z_in_lam ud) + , ud_z_no_tail = f (ud_z_no_tail ud) } + +alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails +alterUsageDetails ud f + = ud { ud_env = f (ud_env ud) } + `alterZappedSets` f + +flattenUsageDetails :: UsageDetails -> UsageDetails +flattenUsageDetails ud + = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) } + `alterZappedSets` const emptyVarEnv + +------------------- +-- See Note [Adjusting right-hand sides] +adjustRhsUsage :: Maybe JoinArity -> RecFlag + -> [CoreBndr] -- Outer lambdas, AFTER occ anal + -> UsageDetails -> UsageDetails +adjustRhsUsage mb_join_arity rec_flag bndrs usage + = maybe_mark_lam (maybe_drop_tails usage) + where + maybe_mark_lam ud | one_shot = ud + | otherwise = markAllInsideLam ud + maybe_drop_tails ud | exact_join = ud + | otherwise = markAllNonTailCalled ud + + one_shot = case mb_join_arity of + Just join_arity + | isRec rec_flag -> False + | otherwise -> all isOneShotBndr (drop join_arity bndrs) + Nothing -> all isOneShotBndr bndrs + + exact_join = case mb_join_arity of + Just join_arity -> bndrs `lengthIs` join_arity + _ -> False + +type IdWithOccInfo = Id + +tagLamBinders :: UsageDetails -- Of scope + -> [Id] -- Binders + -> (UsageDetails, -- Details with binders removed + [IdWithOccInfo]) -- Tagged binders +tagLamBinders usage binders + = usage' `seq` (usage', bndrs') + where + (usage', bndrs') = mapAccumR tagLamBinder usage binders + +tagLamBinder :: UsageDetails -- Of scope + -> Id -- Binder + -> (UsageDetails, -- Details with binder removed + IdWithOccInfo) -- Tagged binders +-- Used for lambda and case binders +-- It copes with the fact that lambda bindings can have a +-- stable unfolding, used for join points +tagLamBinder usage bndr + = (usage2, bndr') + where + occ = lookupDetails usage bndr + bndr' = setBinderOcc (markNonTailCalled occ) bndr + -- Don't try to make an argument into a join point + usage1 = usage `delDetails` bndr + usage2 | isId bndr = addManyOccsSet usage1 (idUnfoldingVars bndr) + -- This is effectively the RHS of a + -- non-join-point binding, so it's okay to use + -- addManyOccsSet, which assumes no tail calls + | otherwise = usage1 + +tagNonRecBinder :: TopLevelFlag -- At top level? + -> UsageDetails -- Of scope + -> CoreBndr -- Binder + -> (UsageDetails, -- Details with binder removed + IdWithOccInfo) -- Tagged binder + +tagNonRecBinder lvl usage binder + = let + occ = lookupDetails usage binder + will_be_join = decideJoinPointHood lvl usage [binder] + occ' | will_be_join = -- must already be marked AlwaysTailCalled + ASSERT(isAlwaysTailCalled occ) occ + | otherwise = markNonTailCalled occ + binder' = setBinderOcc occ' binder + usage' = usage `delDetails` binder + in + usage' `seq` (usage', binder') + +tagRecBinders :: TopLevelFlag -- At top level? + -> UsageDetails -- Of body of let ONLY + -> [(CoreBndr, -- Binder + UsageDetails, -- RHS usage details + [CoreBndr])] -- Lambdas in new RHS + -> (UsageDetails, -- Adjusted details for whole scope, + -- with binders removed + [IdWithOccInfo]) -- Tagged binders +-- Substantially more complicated than non-recursive case. Need to adjust RHS +-- details *before* tagging binders (because the tags depend on the RHSes). +tagRecBinders lvl body_uds triples + = let + (bndrs, rhs_udss, _) = unzip3 triples + + -- 1. Determine join-point-hood of whole group, as determined by + -- the *unadjusted* usage details + unadj_uds = foldr andUDs body_uds rhs_udss + will_be_joins = decideJoinPointHood lvl unadj_uds bndrs + + -- 2. Adjust usage details of each RHS, taking into account the + -- join-point-hood decision + rhs_udss' = map adjust triples + adjust (bndr, rhs_uds, rhs_bndrs) + = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds + where + -- Can't use willBeJoinId_maybe here because we haven't tagged the + -- binder yet (the tag depends on these adjustments!) + mb_join_arity + | will_be_joins + , let occ = lookupDetails unadj_uds bndr + , AlwaysTailCalled arity <- tailCallInfo occ + = Just arity + | otherwise + = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if + Nothing -- we are making join points! + + -- 3. Compute final usage details from adjusted RHS details + adj_uds = foldr andUDs body_uds rhs_udss' + + -- 4. Tag each binder with its adjusted details + bndrs' = [ setBinderOcc (lookupDetails adj_uds bndr) bndr + | bndr <- bndrs ] + + -- 5. Drop the binders from the adjusted details and return + usage' = adj_uds `delDetailsList` bndrs + in + (usage', bndrs') + +setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr +setBinderOcc occ_info bndr + | isTyVar bndr = bndr + | isExportedId bndr = if isManyOccs (idOccInfo bndr) + then bndr + else setIdOccInfo bndr noOccInfo + -- Don't use local usage info for visible-elsewhere things + -- BUT *do* erase any IAmALoopBreaker annotation, because we're + -- about to re-generate it and it shouldn't be "sticky" + + | otherwise = setIdOccInfo bndr occ_info + +-- | Decide whether some bindings should be made into join points or not. +-- Returns `False` if they can't be join points. Note that it's an +-- all-or-nothing decision, as if multiple binders are given, they're +-- assumed to be mutually recursive. +-- +-- It must, however, be a final decision. If we say "True" for 'f', +-- and then subsequently decide /not/ make 'f' into a join point, then +-- the decision about another binding 'g' might be invalidated if (say) +-- 'f' tail-calls 'g'. +-- +-- See Note [Invariants on join points] in GHC.Core. +decideJoinPointHood :: TopLevelFlag -> UsageDetails + -> [CoreBndr] + -> Bool +decideJoinPointHood TopLevel _ _ + = False +decideJoinPointHood NotTopLevel usage bndrs + | isJoinId (head bndrs) + = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+> + ppr bndrs) + all_ok + | otherwise + = all_ok + where + -- See Note [Invariants on join points]; invariants cited by number below. + -- Invariant 2 is always satisfiable by the simplifier by eta expansion. + all_ok = -- Invariant 3: Either all are join points or none are + all ok bndrs + + ok bndr + | -- Invariant 1: Only tail calls, all same join arity + AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr) + + , -- Invariant 1 as applied to LHSes of rules + all (ok_rule arity) (idCoreRules bndr) + + -- Invariant 2a: stable unfoldings + -- See Note [Join points and INLINE pragmas] + , ok_unfolding arity (realIdUnfolding bndr) + + -- Invariant 4: Satisfies polymorphism rule + , isValidJoinPointType arity (idType bndr) + = True + + | otherwise + = False + + ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans + ok_rule join_arity (Rule { ru_args = args }) + = args `lengthIs` join_arity + -- Invariant 1 as applied to LHSes of rules + + -- ok_unfolding returns False if we should /not/ convert a non-join-id + -- into a join-id, even though it is AlwaysTailCalled + ok_unfolding join_arity (CoreUnfolding { uf_src = src, uf_tmpl = rhs }) + = not (isStableSource src && join_arity > joinRhsArity rhs) + ok_unfolding _ (DFunUnfolding {}) + = False + ok_unfolding _ _ + = True + +willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity +willBeJoinId_maybe bndr + = case tailCallInfo (idOccInfo bndr) of + AlwaysTailCalled arity -> Just arity + _ -> isJoinId_maybe bndr + + +{- Note [Join points and INLINE pragmas] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f x = let g = \x. not -- Arity 1 + {-# INLINE g #-} + in case x of + A -> g True True + B -> g True False + C -> blah2 + +Here 'g' is always tail-called applied to 2 args, but the stable +unfolding captured by the INLINE pragma has arity 1. If we try to +convert g to be a join point, its unfolding will still have arity 1 +(since it is stable, and we don't meddle with stable unfoldings), and +Lint will complain (see Note [Invariants on join points], (2a), in +GHC.Core. #13413. + +Moreover, since g is going to be inlined anyway, there is no benefit +from making it a join point. + +If it is recursive, and uselessly marked INLINE, this will stop us +making it a join point, which is annoying. But occasionally +(notably in class methods; see Note [Instances and loop breakers] in +TcInstDcls) we mark recursive things as INLINE but the recursion +unravels; so ignoring INLINE pragmas on recursive things isn't good +either. + +See Invariant 2a of Note [Invariants on join points] in GHC.Core + + +************************************************************************ +* * +\subsection{Operations over OccInfo} +* * +************************************************************************ +-} + +markMany, markInsideLam, markNonTailCalled :: OccInfo -> OccInfo + +markMany IAmDead = IAmDead +markMany occ = ManyOccs { occ_tail = occ_tail occ } + +markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = IsInsideLam } +markInsideLam occ = occ + +markNonTailCalled IAmDead = IAmDead +markNonTailCalled occ = occ { occ_tail = NoTailCallInfo } + +addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo + +addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) ) + ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo` + tailCallInfo a2 } + -- Both branches are at least One + -- (Argument is never IAmDead) + +-- (orOccInfo orig new) is used +-- when combining occurrence info from branches of a case + +orOccInfo (OneOcc { occ_in_lam = in_lam1, occ_int_cxt = int_cxt1 + , occ_tail = tail1 }) + (OneOcc { occ_in_lam = in_lam2, occ_int_cxt = int_cxt2 + , occ_tail = tail2 }) + = OneOcc { occ_one_br = MultipleBranches -- because it occurs in both branches + , occ_in_lam = in_lam1 `mappend` in_lam2 + , occ_int_cxt = int_cxt1 `mappend` int_cxt2 + , occ_tail = tail1 `andTailCallInfo` tail2 } + +orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) ) + ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo` + tailCallInfo a2 } + +andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo +andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2) + | arity1 == arity2 = info +andTailCallInfo _ _ = NoTailCallInfo diff --git a/compiler/GHC/Core/Op/SetLevels.hs b/compiler/GHC/Core/Op/SetLevels.hs new file mode 100644 index 0000000000..a3b1fd75b3 --- /dev/null +++ b/compiler/GHC/Core/Op/SetLevels.hs @@ -0,0 +1,1771 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + +\section{GHC.Core.Op.SetLevels} + + *************************** + Overview + *************************** + +1. We attach binding levels to Core bindings, in preparation for floating + outwards (@FloatOut@). + +2. We also let-ify many expressions (notably case scrutinees), so they + will have a fighting chance of being floated sensible. + +3. Note [Need for cloning during float-out] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + We clone the binders of any floatable let-binding, so that when it is + floated out it will be unique. Example + (let x=2 in x) + (let x=3 in x) + we must clone before floating so we get + let x1=2 in + let x2=3 in + x1+x2 + + NOTE: this can't be done using the uniqAway idea, because the variable + must be unique in the whole program, not just its current scope, + because two variables in different scopes may float out to the + same top level place + + NOTE: Very tiresomely, we must apply this substitution to + the rules stored inside a variable too. + + We do *not* clone top-level bindings, because some of them must not change, + but we *do* clone bindings that are heading for the top level + +4. Note [Binder-swap during float-out] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + In the expression + case x of wild { p -> ...wild... } + we substitute x for wild in the RHS of the case alternatives: + case x of wild { p -> ...x... } + This means that a sub-expression involving x is not "trapped" inside the RHS. + And it's not inconvenient because we already have a substitution. + + Note that this is EXACTLY BACKWARDS from the what the simplifier does. + The simplifier tries to get rid of occurrences of x, in favour of wild, + in the hope that there will only be one remaining occurrence of x, namely + the scrutinee of the case, and we can inline it. +-} + +{-# LANGUAGE CPP, MultiWayIf #-} + +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} +module GHC.Core.Op.SetLevels ( + setLevels, + + Level(..), LevelType(..), tOP_LEVEL, isJoinCeilLvl, asJoinCeilLvl, + LevelledBind, LevelledExpr, LevelledBndr, + FloatSpec(..), floatSpecLevel, + + incMinorLvl, ltMajLvl, ltLvl, isTopLvl + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Core +import GHC.Core.Op.Monad ( FloatOutSwitches(..) ) +import GHC.Core.Utils ( exprType, exprIsHNF + , exprOkForSpeculation + , exprIsTopLevelBindable + , isExprLevPoly + , collectMakeStaticArgs + ) +import GHC.Core.Arity ( exprBotStrictness_maybe ) +import GHC.Core.FVs -- all of it +import GHC.Core.Subst +import GHC.Core.Make ( sortQuantVars ) + +import Id +import IdInfo +import Var +import VarSet +import UniqSet ( nonDetFoldUniqSet ) +import UniqDSet ( getUniqDSet ) +import VarEnv +import Literal ( litIsTrivial ) +import Demand ( StrictSig, Demand, isStrictDmd, splitStrictSig, increaseStrictSigArity ) +import Cpr ( mkCprSig, botCpr ) +import Name ( getOccName, mkSystemVarName ) +import OccName ( occNameString ) +import GHC.Core.Type ( Type, mkLamTypes, splitTyConApp_maybe, tyCoVarsOfType + , mightBeUnliftedType, closeOverKindsDSet ) +import BasicTypes ( Arity, RecFlag(..), isRec ) +import GHC.Core.DataCon ( dataConOrigResTy ) +import TysWiredIn +import UniqSupply +import Util +import Outputable +import FastString +import UniqDFM +import FV +import Data.Maybe +import MonadUtils ( mapAccumLM ) + +{- +************************************************************************ +* * +\subsection{Level numbers} +* * +************************************************************************ +-} + +type LevelledExpr = TaggedExpr FloatSpec +type LevelledBind = TaggedBind FloatSpec +type LevelledBndr = TaggedBndr FloatSpec + +data Level = Level Int -- Level number of enclosing lambdas + Int -- Number of big-lambda and/or case expressions and/or + -- context boundaries between + -- here and the nearest enclosing lambda + LevelType -- Binder or join ceiling? +data LevelType = BndrLvl | JoinCeilLvl deriving (Eq) + +data FloatSpec + = FloatMe Level -- Float to just inside the binding + -- tagged with this level + | StayPut Level -- Stay where it is; binding is + -- tagged with this level + +floatSpecLevel :: FloatSpec -> Level +floatSpecLevel (FloatMe l) = l +floatSpecLevel (StayPut l) = l + +{- +The {\em level number} on a (type-)lambda-bound variable is the +nesting depth of the (type-)lambda which binds it. The outermost lambda +has level 1, so (Level 0 0) means that the variable is bound outside any lambda. + +On an expression, it's the maximum level number of its free +(type-)variables. On a let(rec)-bound variable, it's the level of its +RHS. On a case-bound variable, it's the number of enclosing lambdas. + +Top-level variables: level~0. Those bound on the RHS of a top-level +definition but ``before'' a lambda; e.g., the \tr{x} in (levels shown +as ``subscripts'')... +\begin{verbatim} +a_0 = let b_? = ... in + x_1 = ... b ... in ... +\end{verbatim} + +The main function @lvlExpr@ carries a ``context level'' (@le_ctxt_lvl@). +That's meant to be the level number of the enclosing binder in the +final (floated) program. If the level number of a sub-expression is +less than that of the context, then it might be worth let-binding the +sub-expression so that it will indeed float. + +If you can float to level @Level 0 0@ worth doing so because then your +allocation becomes static instead of dynamic. We always start with +context @Level 0 0@. + + +Note [FloatOut inside INLINE] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@InlineCtxt@ very similar to @Level 0 0@, but is used for one purpose: +to say "don't float anything out of here". That's exactly what we +want for the body of an INLINE, where we don't want to float anything +out at all. See notes with lvlMFE below. + +But, check this out: + +-- At one time I tried the effect of not floating anything out of an InlineMe, +-- but it sometimes works badly. For example, consider PrelArr.done. It +-- has the form __inline (\d. e) +-- where e doesn't mention d. If we float this to +-- __inline (let x = e in \d. x) +-- things are bad. The inliner doesn't even inline it because it doesn't look +-- like a head-normal form. So it seems a lesser evil to let things float. +-- In GHC.Core.Op.SetLevels we do set the context to (Level 0 0) when we get to an InlineMe +-- which discourages floating out. + +So the conclusion is: don't do any floating at all inside an InlineMe. +(In the above example, don't float the {x=e} out of the \d.) + +One particular case is that of workers: we don't want to float the +call to the worker outside the wrapper, otherwise the worker might get +inlined into the floated expression, and an importing module won't see +the worker at all. + +Note [Join ceiling] +~~~~~~~~~~~~~~~~~~~ +Join points can't float very far; too far, and they can't remain join points +So, suppose we have: + + f x = (joinrec j y = ... x ... in jump j x) + 1 + +One may be tempted to float j out to the top of f's RHS, but then the jump +would not be a tail call. Thus we keep track of a level called the *join +ceiling* past which join points are not allowed to float. + +The troublesome thing is that, unlike most levels to which something might +float, there is not necessarily an identifier to which the join ceiling is +attached. Fortunately, if something is to be floated to a join ceiling, it must +be dropped at the *nearest* join ceiling. Thus each level is marked as to +whether it is a join ceiling, so that FloatOut can tell which binders are being +floated to the nearest join ceiling and which to a particular binder (or set of +binders). +-} + +instance Outputable FloatSpec where + ppr (FloatMe l) = char 'F' <> ppr l + ppr (StayPut l) = ppr l + +tOP_LEVEL :: Level +tOP_LEVEL = Level 0 0 BndrLvl + +incMajorLvl :: Level -> Level +incMajorLvl (Level major _ _) = Level (major + 1) 0 BndrLvl + +incMinorLvl :: Level -> Level +incMinorLvl (Level major minor _) = Level major (minor+1) BndrLvl + +asJoinCeilLvl :: Level -> Level +asJoinCeilLvl (Level major minor _) = Level major minor JoinCeilLvl + +maxLvl :: Level -> Level -> Level +maxLvl l1@(Level maj1 min1 _) l2@(Level maj2 min2 _) + | (maj1 > maj2) || (maj1 == maj2 && min1 > min2) = l1 + | otherwise = l2 + +ltLvl :: Level -> Level -> Bool +ltLvl (Level maj1 min1 _) (Level maj2 min2 _) + = (maj1 < maj2) || (maj1 == maj2 && min1 < min2) + +ltMajLvl :: Level -> Level -> Bool + -- Tells if one level belongs to a difft *lambda* level to another +ltMajLvl (Level maj1 _ _) (Level maj2 _ _) = maj1 < maj2 + +isTopLvl :: Level -> Bool +isTopLvl (Level 0 0 _) = True +isTopLvl _ = False + +isJoinCeilLvl :: Level -> Bool +isJoinCeilLvl (Level _ _ t) = t == JoinCeilLvl + +instance Outputable Level where + ppr (Level maj min typ) + = hcat [ char '<', int maj, char ',', int min, char '>' + , ppWhen (typ == JoinCeilLvl) (char 'C') ] + +instance Eq Level where + (Level maj1 min1 _) == (Level maj2 min2 _) = maj1 == maj2 && min1 == min2 + +{- +************************************************************************ +* * +\subsection{Main level-setting code} +* * +************************************************************************ +-} + +setLevels :: FloatOutSwitches + -> CoreProgram + -> UniqSupply + -> [LevelledBind] + +setLevels float_lams binds us + = initLvl us (do_them init_env binds) + where + init_env = initialEnv float_lams + + do_them :: LevelEnv -> [CoreBind] -> LvlM [LevelledBind] + do_them _ [] = return [] + do_them env (b:bs) + = do { (lvld_bind, env') <- lvlTopBind env b + ; lvld_binds <- do_them env' bs + ; return (lvld_bind : lvld_binds) } + +lvlTopBind :: LevelEnv -> Bind Id -> LvlM (LevelledBind, LevelEnv) +lvlTopBind env (NonRec bndr rhs) + = do { rhs' <- lvl_top env NonRecursive bndr rhs + ; let (env', [bndr']) = substAndLvlBndrs NonRecursive env tOP_LEVEL [bndr] + ; return (NonRec bndr' rhs', env') } + +lvlTopBind env (Rec pairs) + = do { let (env', bndrs') = substAndLvlBndrs Recursive env tOP_LEVEL + (map fst pairs) + ; rhss' <- mapM (\(b,r) -> lvl_top env' Recursive b r) pairs + ; return (Rec (bndrs' `zip` rhss'), env') } + +lvl_top :: LevelEnv -> RecFlag -> Id -> CoreExpr -> LvlM LevelledExpr +lvl_top env is_rec bndr rhs + = lvlRhs env is_rec + (isBottomingId bndr) + Nothing -- Not a join point + (freeVars rhs) + +{- +************************************************************************ +* * +\subsection{Setting expression levels} +* * +************************************************************************ + +Note [Floating over-saturated applications] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we see (f x y), and (f x) is a redex (ie f's arity is 1), +we call (f x) an "over-saturated application" + +Should we float out an over-sat app, if can escape a value lambda? +It is sometimes very beneficial (-7% runtime -4% alloc over nofib -O2). +But we don't want to do it for class selectors, because the work saved +is minimal, and the extra local thunks allocated cost money. + +Arguably we could float even class-op applications if they were going to +top level -- but then they must be applied to a constant dictionary and +will almost certainly be optimised away anyway. +-} + +lvlExpr :: LevelEnv -- Context + -> CoreExprWithFVs -- Input expression + -> LvlM LevelledExpr -- Result expression + +{- +The @le_ctxt_lvl@ is, roughly, the level of the innermost enclosing +binder. Here's an example + + v = \x -> ...\y -> let r = case (..x..) of + ..x.. + in .. + +When looking at the rhs of @r@, @le_ctxt_lvl@ will be 1 because that's +the level of @r@, even though it's inside a level-2 @\y@. It's +important that @le_ctxt_lvl@ is 1 and not 2 in @r@'s rhs, because we +don't want @lvlExpr@ to turn the scrutinee of the @case@ into an MFE +--- because it isn't a *maximal* free expression. + +If there were another lambda in @r@'s rhs, it would get level-2 as well. +-} + +lvlExpr env (_, AnnType ty) = return (Type (GHC.Core.Subst.substTy (le_subst env) ty)) +lvlExpr env (_, AnnCoercion co) = return (Coercion (substCo (le_subst env) co)) +lvlExpr env (_, AnnVar v) = return (lookupVar env v) +lvlExpr _ (_, AnnLit lit) = return (Lit lit) + +lvlExpr env (_, AnnCast expr (_, co)) = do + expr' <- lvlNonTailExpr env expr + return (Cast expr' (substCo (le_subst env) co)) + +lvlExpr env (_, AnnTick tickish expr) = do + expr' <- lvlNonTailExpr env expr + let tickish' = substTickish (le_subst env) tickish + return (Tick tickish' expr') + +lvlExpr env expr@(_, AnnApp _ _) = lvlApp env expr (collectAnnArgs expr) + +-- We don't split adjacent lambdas. That is, given +-- \x y -> (x+1,y) +-- we don't float to give +-- \x -> let v = x+1 in \y -> (v,y) +-- Why not? Because partial applications are fairly rare, and splitting +-- lambdas makes them more expensive. + +lvlExpr env expr@(_, AnnLam {}) + = do { new_body <- lvlNonTailMFE new_env True body + ; return (mkLams new_bndrs new_body) } + where + (bndrs, body) = collectAnnBndrs expr + (env1, bndrs1) = substBndrsSL NonRecursive env bndrs + (new_env, new_bndrs) = lvlLamBndrs env1 (le_ctxt_lvl env) bndrs1 + -- At one time we called a special version of collectBinders, + -- which ignored coercions, because we don't want to split + -- a lambda like this (\x -> coerce t (\s -> ...)) + -- This used to happen quite a bit in state-transformer programs, + -- but not nearly so much now non-recursive newtypes are transparent. + -- [See GHC.Core.Op.SetLevels rev 1.50 for a version with this approach.] + +lvlExpr env (_, AnnLet bind body) + = do { (bind', new_env) <- lvlBind env bind + ; body' <- lvlExpr new_env body + -- No point in going via lvlMFE here. If the binding is alive + -- (mentioned in body), and the whole let-expression doesn't + -- float, then neither will the body + ; return (Let bind' body') } + +lvlExpr env (_, AnnCase scrut case_bndr ty alts) + = do { scrut' <- lvlNonTailMFE env True scrut + ; lvlCase env (freeVarsOf scrut) scrut' case_bndr ty alts } + +lvlNonTailExpr :: LevelEnv -- Context + -> CoreExprWithFVs -- Input expression + -> LvlM LevelledExpr -- Result expression +lvlNonTailExpr env expr + = lvlExpr (placeJoinCeiling env) expr + +------------------------------------------- +lvlApp :: LevelEnv + -> CoreExprWithFVs + -> (CoreExprWithFVs, [CoreExprWithFVs]) -- Input application + -> LvlM LevelledExpr -- Result expression +lvlApp env orig_expr ((_,AnnVar fn), args) + | floatOverSat env -- See Note [Floating over-saturated applications] + , arity > 0 + , arity < n_val_args + , Nothing <- isClassOpId_maybe fn + = do { rargs' <- mapM (lvlNonTailMFE env False) rargs + ; lapp' <- lvlNonTailMFE env False lapp + ; return (foldl' App lapp' rargs') } + + | otherwise + = do { (_, args') <- mapAccumLM lvl_arg stricts args + -- Take account of argument strictness; see + -- Note [Floating to the top] + ; return (foldl' App (lookupVar env fn) args') } + where + n_val_args = count (isValArg . deAnnotate) args + arity = idArity fn + + stricts :: [Demand] -- True for strict /value/ arguments + stricts = case splitStrictSig (idStrictness fn) of + (arg_ds, _) | arg_ds `lengthExceeds` n_val_args + -> [] + | otherwise + -> arg_ds + + -- Separate out the PAP that we are floating from the extra + -- arguments, by traversing the spine until we have collected + -- (n_val_args - arity) value arguments. + (lapp, rargs) = left (n_val_args - arity) orig_expr [] + + left 0 e rargs = (e, rargs) + left n (_, AnnApp f a) rargs + | isValArg (deAnnotate a) = left (n-1) f (a:rargs) + | otherwise = left n f (a:rargs) + left _ _ _ = panic "GHC.Core.Op.SetLevels.lvlExpr.left" + + is_val_arg :: CoreExprWithFVs -> Bool + is_val_arg (_, AnnType {}) = False + is_val_arg _ = True + + lvl_arg :: [Demand] -> CoreExprWithFVs -> LvlM ([Demand], LevelledExpr) + lvl_arg strs arg | (str1 : strs') <- strs + , is_val_arg arg + = do { arg' <- lvlMFE env (isStrictDmd str1) arg + ; return (strs', arg') } + | otherwise + = do { arg' <- lvlMFE env False arg + ; return (strs, arg') } + +lvlApp env _ (fun, args) + = -- No PAPs that we can float: just carry on with the + -- arguments and the function. + do { args' <- mapM (lvlNonTailMFE env False) args + ; fun' <- lvlNonTailExpr env fun + ; return (foldl' App fun' args') } + +------------------------------------------- +lvlCase :: LevelEnv -- Level of in-scope names/tyvars + -> DVarSet -- Free vars of input scrutinee + -> LevelledExpr -- Processed scrutinee + -> Id -> Type -- Case binder and result type + -> [CoreAltWithFVs] -- Input alternatives + -> LvlM LevelledExpr -- Result expression +lvlCase env scrut_fvs scrut' case_bndr ty alts + -- See Note [Floating single-alternative cases] + | [(con@(DataAlt {}), bs, body)] <- alts + , exprIsHNF (deTagExpr scrut') -- See Note [Check the output scrutinee for exprIsHNF] + , not (isTopLvl dest_lvl) -- Can't have top-level cases + , not (floatTopLvlOnly env) -- Can float anywhere + = -- Always float the case if possible + -- Unlike lets we don't insist that it escapes a value lambda + do { (env1, (case_bndr' : bs')) <- cloneCaseBndrs env dest_lvl (case_bndr : bs) + ; let rhs_env = extendCaseBndrEnv env1 case_bndr scrut' + ; body' <- lvlMFE rhs_env True body + ; let alt' = (con, map (stayPut dest_lvl) bs', body') + ; return (Case scrut' (TB case_bndr' (FloatMe dest_lvl)) ty' [alt']) } + + | otherwise -- Stays put + = do { let (alts_env1, [case_bndr']) = substAndLvlBndrs NonRecursive env incd_lvl [case_bndr] + alts_env = extendCaseBndrEnv alts_env1 case_bndr scrut' + ; alts' <- mapM (lvl_alt alts_env) alts + ; return (Case scrut' case_bndr' ty' alts') } + where + ty' = substTy (le_subst env) ty + + incd_lvl = incMinorLvl (le_ctxt_lvl env) + dest_lvl = maxFvLevel (const True) env scrut_fvs + -- Don't abstract over type variables, hence const True + + lvl_alt alts_env (con, bs, rhs) + = do { rhs' <- lvlMFE new_env True rhs + ; return (con, bs', rhs') } + where + (new_env, bs') = substAndLvlBndrs NonRecursive alts_env incd_lvl bs + +{- Note [Floating single-alternative cases] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this: + data T a = MkT !a + f :: T Int -> blah + f x vs = case x of { MkT y -> + let f vs = ...(case y of I# w -> e)...f.. + in f vs + +Here we can float the (case y ...) out, because y is sure +to be evaluated, to give + f x vs = case x of { MkT y -> + case y of I# w -> + let f vs = ...(e)...f.. + in f vs + +That saves unboxing it every time round the loop. It's important in +some DPH stuff where we really want to avoid that repeated unboxing in +the inner loop. + +Things to note: + + * The test we perform is exprIsHNF, and /not/ exprOkForSpeculation. + + - exrpIsHNF catches the key case of an evaluated variable + + - exprOkForSpeculation is /false/ of an evaluated variable; + See Note [exprOkForSpeculation and evaluated variables] in GHC.Core.Utils + So we'd actually miss the key case! + + - Nothing is gained from the extra generality of exprOkForSpeculation + since we only consider floating a case whose single alternative + is a DataAlt K a b -> rhs + + * We can't float a case to top level + + * It's worth doing this float even if we don't float + the case outside a value lambda. Example + case x of { + MkT y -> (case y of I# w2 -> ..., case y of I# w2 -> ...) + If we floated the cases out we could eliminate one of them. + + * We only do this with a single-alternative case + + +Note [Setting levels when floating single-alternative cases] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Handling level-setting when floating a single-alternative case binding +is a bit subtle, as evidenced by #16978. In particular, we must keep +in mind that we are merely moving the case and its binders, not the +body. For example, suppose 'a' is known to be evaluated and we have + + \z -> case a of + (x,_) -> <body involving x and z> + +After floating we may have: + + case a of + (x,_) -> \z -> <body involving x and z> + {- some expression involving x and z -} + +When analysing <body involving...> we want to use the /ambient/ level, +and /not/ the destination level of the 'case a of (x,-) ->' binding. + +#16978 was caused by us setting the context level to the destination +level of `x` when analysing <body>. This led us to conclude that we +needed to quantify over some of its free variables (e.g. z), resulting +in shadowing and very confusing Core Lint failures. + + +Note [Check the output scrutinee for exprIsHNF] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this: + case x of y { + A -> ....(case y of alts).... + } + +Because of the binder-swap, the inner case will get substituted to +(case x of ..). So when testing whether the scrutinee is in HNF we +must be careful to test the *result* scrutinee ('x' in this case), not +the *input* one 'y'. The latter *is* in HNF here (because y is +evaluated), but the former is not -- and indeed we can't float the +inner case out, at least not unless x is also evaluated at its binding +site. See #5453. + +That's why we apply exprIsHNF to scrut' and not to scrut. + +See Note [Floating single-alternative cases] for why +we use exprIsHNF in the first place. +-} + +lvlNonTailMFE :: LevelEnv -- Level of in-scope names/tyvars + -> Bool -- True <=> strict context [body of case + -- or let] + -> CoreExprWithFVs -- input expression + -> LvlM LevelledExpr -- Result expression +lvlNonTailMFE env strict_ctxt ann_expr + = lvlMFE (placeJoinCeiling env) strict_ctxt ann_expr + +lvlMFE :: LevelEnv -- Level of in-scope names/tyvars + -> Bool -- True <=> strict context [body of case or let] + -> CoreExprWithFVs -- input expression + -> LvlM LevelledExpr -- Result expression +-- lvlMFE is just like lvlExpr, except that it might let-bind +-- the expression, so that it can itself be floated. + +lvlMFE env _ (_, AnnType ty) + = return (Type (GHC.Core.Subst.substTy (le_subst env) ty)) + +-- No point in floating out an expression wrapped in a coercion or note +-- If we do we'll transform lvl = e |> co +-- to lvl' = e; lvl = lvl' |> co +-- and then inline lvl. Better just to float out the payload. +lvlMFE env strict_ctxt (_, AnnTick t e) + = do { e' <- lvlMFE env strict_ctxt e + ; let t' = substTickish (le_subst env) t + ; return (Tick t' e') } + +lvlMFE env strict_ctxt (_, AnnCast e (_, co)) + = do { e' <- lvlMFE env strict_ctxt e + ; return (Cast e' (substCo (le_subst env) co)) } + +lvlMFE env strict_ctxt e@(_, AnnCase {}) + | strict_ctxt -- Don't share cases in a strict context + = lvlExpr env e -- See Note [Case MFEs] + +lvlMFE env strict_ctxt ann_expr + | floatTopLvlOnly env && not (isTopLvl dest_lvl) + -- Only floating to the top level is allowed. + || anyDVarSet isJoinId fvs -- If there is a free join, don't float + -- See Note [Free join points] + || isExprLevPoly expr + -- We can't let-bind levity polymorphic expressions + -- See Note [Levity polymorphism invariants] in GHC.Core + || notWorthFloating expr abs_vars + || not float_me + = -- Don't float it out + lvlExpr env ann_expr + + | float_is_new_lam || exprIsTopLevelBindable expr expr_ty + -- No wrapping needed if the type is lifted, or is a literal string + -- or if we are wrapping it in one or more value lambdas + = do { expr1 <- lvlFloatRhs abs_vars dest_lvl rhs_env NonRecursive + (isJust mb_bot_str) + join_arity_maybe + ann_expr + -- Treat the expr just like a right-hand side + ; var <- newLvlVar expr1 join_arity_maybe is_mk_static + ; let var2 = annotateBotStr var float_n_lams mb_bot_str + ; return (Let (NonRec (TB var2 (FloatMe dest_lvl)) expr1) + (mkVarApps (Var var2) abs_vars)) } + + -- OK, so the float has an unlifted type (not top-level bindable) + -- and no new value lambdas (float_is_new_lam is False) + -- Try for the boxing strategy + -- See Note [Floating MFEs of unlifted type] + | escapes_value_lam + , not expr_ok_for_spec -- Boxing/unboxing isn't worth it for cheap expressions + -- See Note [Test cheapness with exprOkForSpeculation] + , Just (tc, _) <- splitTyConApp_maybe expr_ty + , Just dc <- boxingDataCon_maybe tc + , let dc_res_ty = dataConOrigResTy dc -- No free type variables + [bx_bndr, ubx_bndr] = mkTemplateLocals [dc_res_ty, expr_ty] + = do { expr1 <- lvlExpr rhs_env ann_expr + ; let l1r = incMinorLvlFrom rhs_env + float_rhs = mkLams abs_vars_w_lvls $ + Case expr1 (stayPut l1r ubx_bndr) dc_res_ty + [(DEFAULT, [], mkConApp dc [Var ubx_bndr])] + + ; var <- newLvlVar float_rhs Nothing is_mk_static + ; let l1u = incMinorLvlFrom env + use_expr = Case (mkVarApps (Var var) abs_vars) + (stayPut l1u bx_bndr) expr_ty + [(DataAlt dc, [stayPut l1u ubx_bndr], Var ubx_bndr)] + ; return (Let (NonRec (TB var (FloatMe dest_lvl)) float_rhs) + use_expr) } + + | otherwise -- e.g. do not float unboxed tuples + = lvlExpr env ann_expr + + where + expr = deAnnotate ann_expr + expr_ty = exprType expr + fvs = freeVarsOf ann_expr + fvs_ty = tyCoVarsOfType expr_ty + is_bot = isBottomThunk mb_bot_str + is_function = isFunction ann_expr + mb_bot_str = exprBotStrictness_maybe expr + -- See Note [Bottoming floats] + -- esp Bottoming floats (2) + expr_ok_for_spec = exprOkForSpeculation expr + dest_lvl = destLevel env fvs fvs_ty is_function is_bot False + abs_vars = abstractVars dest_lvl env fvs + + -- float_is_new_lam: the floated thing will be a new value lambda + -- replacing, say (g (x+4)) by (lvl x). No work is saved, nor is + -- allocation saved. The benefit is to get it to the top level + -- and hence out of the body of this function altogether, making + -- it smaller and more inlinable + float_is_new_lam = float_n_lams > 0 + float_n_lams = count isId abs_vars + + (rhs_env, abs_vars_w_lvls) = lvlLamBndrs env dest_lvl abs_vars + + join_arity_maybe = Nothing + + is_mk_static = isJust (collectMakeStaticArgs expr) + -- Yuk: See Note [Grand plan for static forms] in main/StaticPtrTable + + -- A decision to float entails let-binding this thing, and we only do + -- that if we'll escape a value lambda, or will go to the top level. + float_me = saves_work || saves_alloc || is_mk_static + + -- We can save work if we can move a redex outside a value lambda + -- But if float_is_new_lam is True, then the redex is wrapped in a + -- a new lambda, so no work is saved + saves_work = escapes_value_lam && not float_is_new_lam + + escapes_value_lam = dest_lvl `ltMajLvl` (le_ctxt_lvl env) + -- See Note [Escaping a value lambda] + + -- See Note [Floating to the top] + saves_alloc = isTopLvl dest_lvl + && floatConsts env + && (not strict_ctxt || is_bot || exprIsHNF expr) + +isBottomThunk :: Maybe (Arity, s) -> Bool +-- See Note [Bottoming floats] (2) +isBottomThunk (Just (0, _)) = True -- Zero arity +isBottomThunk _ = False + +{- Note [Floating to the top] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We are keen to float something to the top level, even if it does not +escape a value lambda (and hence save work), for two reasons: + + * Doing so makes the function smaller, by floating out + bottoming expressions, or integer or string literals. That in + turn makes it easier to inline, with less duplication. + + * (Minor) Doing so may turn a dynamic allocation (done by machine + instructions) into a static one. Minor because we are assuming + we are not escaping a value lambda. + +But do not so if: + - the context is a strict, and + - the expression is not a HNF, and + - the expression is not bottoming + +Exammples: + +* Bottoming + f x = case x of + 0 -> error <big thing> + _ -> x+1 + Here we want to float (error <big thing>) to top level, abstracting + over 'x', so as to make f's RHS smaller. + +* HNF + f = case y of + True -> p:q + False -> blah + We may as well float the (p:q) so it becomes a static data structure. + +* Case scrutinee + f = case g True of .... + Don't float (g True) to top level; then we have the admin of a + top-level thunk to worry about, with zero gain. + +* Case alternative + h = case y of + True -> g True + False -> False + Don't float (g True) to the top level + +* Arguments + t = f (g True) + If f is lazy, we /do/ float (g True) because then we can allocate + the thunk statically rather than dynamically. But if f is strict + we don't (see the use of idStrictness in lvlApp). It's not clear + if this test is worth the bother: it's only about CAFs! + +It's controlled by a flag (floatConsts), because doing this too +early loses opportunities for RULES which (needless to say) are +important in some nofib programs (gcd is an example). [SPJ note: +I think this is obsolete; the flag seems always on.] + +Note [Floating join point bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Mostly we only float a join point if it can /stay/ a join point. But +there is one exception: if it can go to the top level (#13286). +Consider + f x = joinrec j y n = <...j y' n'...> + in jump j x 0 + +Here we may just as well produce + j y n = <....j y' n'...> + f x = j x 0 + +and now there is a chance that 'f' will be inlined at its call sites. +It shouldn't make a lot of difference, but these tests + perf/should_run/MethSharing + simplCore/should_compile/spec-inline +and one nofib program, all improve if you do float to top, because +of the resulting inlining of f. So ok, let's do it. + +Note [Free join points] +~~~~~~~~~~~~~~~~~~~~~~~ +We never float a MFE that has a free join-point variable. You might think +this can never occur. After all, consider + join j x = ... + in ....(jump j x).... +How might we ever want to float that (jump j x)? + * If it would escape a value lambda, thus + join j x = ... in (\y. ...(jump j x)... ) + then 'j' isn't a valid join point in the first place. + +But consider + join j x = .... in + joinrec j2 y = ...(jump j x)...(a+b).... + +Since j2 is recursive, it /is/ worth floating (a+b) out of the joinrec. +But it is emphatically /not/ good to float the (jump j x) out: + (a) 'j' will stop being a join point + (b) In any case, jumping to 'j' must be an exit of the j2 loop, so no + work would be saved by floating it out of the \y. + +Even if we floated 'j' to top level, (b) would still hold. + +Bottom line: never float a MFE that has a free JoinId. + +Note [Floating MFEs of unlifted type] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose we have + case f x of (r::Int#) -> blah +we'd like to float (f x). But it's not trivial because it has type +Int#, and we don't want to evaluate it too early. But we can instead +float a boxed version + y = case f x of r -> I# r +and replace the original (f x) with + case (case y of I# r -> r) of r -> blah + +Being able to float unboxed expressions is sometimes important; see +#12603. I'm not sure how /often/ it is important, but it's +not hard to achieve. + +We only do it for a fixed collection of types for which we have a +convenient boxing constructor (see boxingDataCon_maybe). In +particular we /don't/ do it for unboxed tuples; it's better to float +the components of the tuple individually. + +I did experiment with a form of boxing that works for any type, namely +wrapping in a function. In our example + + let y = case f x of r -> \v. f x + in case y void of r -> blah + +It works fine, but it's 50% slower (based on some crude benchmarking). +I suppose we could do it for types not covered by boxingDataCon_maybe, +but it's more code and I'll wait to see if anyone wants it. + +Note [Test cheapness with exprOkForSpeculation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We don't want to float very cheap expressions by boxing and unboxing. +But we use exprOkForSpeculation for the test, not exprIsCheap. +Why? Because it's important /not/ to transform + f (a /# 3) +to + f (case bx of I# a -> a /# 3) +and float bx = I# (a /# 3), because the application of f no +longer obeys the let/app invariant. But (a /# 3) is ok-for-spec +due to a special hack that says division operators can't fail +when the denominator is definitely non-zero. And yet that +same expression says False to exprIsCheap. Simplest way to +guarantee the let/app invariant is to use the same function! + +If an expression is okay for speculation, we could also float it out +*without* boxing and unboxing, since evaluating it early is okay. +However, it turned out to usually be better not to float such expressions, +since they tend to be extremely cheap things like (x +# 1#). Even the +cost of spilling the let-bound variable to the stack across a call may +exceed the cost of recomputing such an expression. (And we can't float +unlifted bindings to top-level.) + +We could try to do something smarter here, and float out expensive yet +okay-for-speculation things, such as division by non-zero constants. +But I suspect it's a narrow target. + +Note [Bottoming floats] +~~~~~~~~~~~~~~~~~~~~~~~ +If we see + f = \x. g (error "urk") +we'd like to float the call to error, to get + lvl = error "urk" + f = \x. g lvl + +But, as ever, we need to be careful: + +(1) We want to float a bottoming + expression even if it has free variables: + f = \x. g (let v = h x in error ("urk" ++ v)) + Then we'd like to abstract over 'x' can float the whole arg of g: + lvl = \x. let v = h x in error ("urk" ++ v) + f = \x. g (lvl x) + To achieve this we pass is_bot to destLevel + +(2) We do not do this for lambdas that return + bottom. Instead we treat the /body/ of such a function specially, + via point (1). For example: + f = \x. ....(\y z. if x then error y else error z).... + ===> + lvl = \x z y. if b then error y else error z + f = \x. ...(\y z. lvl x z y)... + (There is no guarantee that we'll choose the perfect argument order.) + +(3) If we have a /binding/ that returns bottom, we want to float it to top + level, even if it has free vars (point (1)), and even it has lambdas. + Example: + ... let { v = \y. error (show x ++ show y) } in ... + We want to abstract over x and float the whole thing to top: + lvl = \xy. errror (show x ++ show y) + ...let {v = lvl x} in ... + + Then of course we don't want to separately float the body (error ...) + as /another/ MFE, so we tell lvlFloatRhs not to do that, via the is_bot + argument. + +See Maessen's paper 1999 "Bottom extraction: factoring error handling out +of functional programs" (unpublished I think). + +When we do this, we set the strictness and arity of the new bottoming +Id, *immediately*, for three reasons: + + * To prevent the abstracted thing being immediately inlined back in again + via preInlineUnconditionally. The latter has a test for bottoming Ids + to stop inlining them, so we'd better make sure it *is* a bottoming Id! + + * So that it's properly exposed as such in the interface file, even if + this is all happening after strictness analysis. + + * In case we do CSE with the same expression that *is* marked bottom + lvl = error "urk" + x{str=bot) = error "urk" + Here we don't want to replace 'x' with 'lvl', else we may get Lint + errors, e.g. via a case with empty alternatives: (case x of {}) + Lint complains unless the scrutinee of such a case is clearly bottom. + + This was reported in #11290. But since the whole bottoming-float + thing is based on the cheap-and-cheerful exprIsBottom, I'm not sure + that it'll nail all such cases. + +Note [Bottoming floats: eta expansion] c.f Note [Bottoming floats] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Tiresomely, though, the simplifier has an invariant that the manifest +arity of the RHS should be the same as the arity; but we can't call +etaExpand during GHC.Core.Op.SetLevels because it works over a decorated form of +CoreExpr. So we do the eta expansion later, in GHC.Core.Op.FloatOut. + +Note [Case MFEs] +~~~~~~~~~~~~~~~~ +We don't float a case expression as an MFE from a strict context. Why not? +Because in doing so we share a tiny bit of computation (the switch) but +in exchange we build a thunk, which is bad. This case reduces allocation +by 7% in spectral/puzzle (a rather strange benchmark) and 1.2% in real/fem. +Doesn't change any other allocation at all. + +We will make a separate decision for the scrutinee and alternatives. + +However this can have a knock-on effect for fusion: consider + \v -> foldr k z (case x of I# y -> build ..y..) +Perhaps we can float the entire (case x of ...) out of the \v. Then +fusion will not happen, but we will get more sharing. But if we don't +float the case (as advocated here) we won't float the (build ...y..) +either, so fusion will happen. It can be a big effect, esp in some +artificial benchmarks (e.g. integer, queens), but there is no perfect +answer. + +-} + +annotateBotStr :: Id -> Arity -> Maybe (Arity, StrictSig) -> Id +-- See Note [Bottoming floats] for why we want to add +-- bottoming information right now +-- +-- n_extra are the number of extra value arguments added during floating +annotateBotStr id n_extra mb_str + = case mb_str of + Nothing -> id + Just (arity, sig) -> id `setIdArity` (arity + n_extra) + `setIdStrictness` (increaseStrictSigArity n_extra sig) + `setIdCprInfo` mkCprSig (arity + n_extra) botCpr + +notWorthFloating :: CoreExpr -> [Var] -> Bool +-- Returns True if the expression would be replaced by +-- something bigger than it is now. For example: +-- abs_vars = tvars only: return True if e is trivial, +-- but False for anything bigger +-- abs_vars = [x] (an Id): return True for trivial, or an application (f x) +-- but False for (f x x) +-- +-- One big goal is that floating should be idempotent. Eg if +-- we replace e with (lvl79 x y) and then run FloatOut again, don't want +-- to replace (lvl79 x y) with (lvl83 x y)! + +notWorthFloating e abs_vars + = go e (count isId abs_vars) + where + go (Var {}) n = n >= 0 + go (Lit lit) n = ASSERT( n==0 ) + litIsTrivial lit -- Note [Floating literals] + go (Tick t e) n = not (tickishIsCode t) && go e n + go (Cast e _) n = go e n + go (App e arg) n + -- See Note [Floating applications to coercions] + | Type {} <- arg = go e n + | n==0 = False + | is_triv arg = go e (n-1) + | otherwise = False + go _ _ = False + + is_triv (Lit {}) = True -- Treat all literals as trivial + is_triv (Var {}) = True -- (ie not worth floating) + is_triv (Cast e _) = is_triv e + is_triv (App e (Type {})) = is_triv e -- See Note [Floating applications to coercions] + is_triv (Tick t e) = not (tickishIsCode t) && is_triv e + is_triv _ = False + +{- +Note [Floating literals] +~~~~~~~~~~~~~~~~~~~~~~~~ +It's important to float Integer literals, so that they get shared, +rather than being allocated every time round the loop. +Hence the litIsTrivial. + +Ditto literal strings (LitString), which we'd like to float to top +level, which is now possible. + +Note [Floating applications to coercions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We don’t float out variables applied only to type arguments, since the +extra binding would be pointless: type arguments are completely erased. +But *coercion* arguments aren’t (see Note [Coercion tokens] in +CoreToStg.hs and Note [Count coercion arguments in boring contexts] in +CoreUnfold.hs), so we still want to float out variables applied only to +coercion arguments. + +Note [Escaping a value lambda] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We want to float even cheap expressions out of value lambdas, +because that saves allocation. Consider + f = \x. .. (\y.e) ... +Then we'd like to avoid allocating the (\y.e) every time we call f, +(assuming e does not mention x). An example where this really makes a +difference is simplrun009. + +Another reason it's good is because it makes SpecContr fire on functions. +Consider + f = \x. ....(f (\y.e)).... +After floating we get + lvl = \y.e + f = \x. ....(f lvl)... +and that is much easier for SpecConstr to generate a robust +specialisation for. + +However, if we are wrapping the thing in extra value lambdas (in +abs_vars), then nothing is saved. E.g. + f = \xyz. ...(e1[y],e2).... +If we float + lvl = \y. (e1[y],e2) + f = \xyz. ...(lvl y)... +we have saved nothing: one pair will still be allocated for each +call of 'f'. Hence the (not float_is_lam) in float_me. + + +************************************************************************ +* * +\subsection{Bindings} +* * +************************************************************************ + +The binding stuff works for top level too. +-} + +lvlBind :: LevelEnv + -> CoreBindWithFVs + -> LvlM (LevelledBind, LevelEnv) + +lvlBind env (AnnNonRec bndr rhs) + | isTyVar bndr -- Don't do anything for TyVar binders + -- (simplifier gets rid of them pronto) + || isCoVar bndr -- Difficult to fix up CoVar occurrences (see extendPolyLvlEnv) + -- so we will ignore this case for now + || not (profitableFloat env dest_lvl) + || (isTopLvl dest_lvl && not (exprIsTopLevelBindable deann_rhs bndr_ty)) + -- We can't float an unlifted binding to top level (except + -- literal strings), so we don't float it at all. It's a + -- bit brutal, but unlifted bindings aren't expensive either + + = -- No float + do { rhs' <- lvlRhs env NonRecursive is_bot mb_join_arity rhs + ; let bind_lvl = incMinorLvl (le_ctxt_lvl env) + (env', [bndr']) = substAndLvlBndrs NonRecursive env bind_lvl [bndr] + ; return (NonRec bndr' rhs', env') } + + -- Otherwise we are going to float + | null abs_vars + = do { -- No type abstraction; clone existing binder + rhs' <- lvlFloatRhs [] dest_lvl env NonRecursive + is_bot mb_join_arity rhs + ; (env', [bndr']) <- cloneLetVars NonRecursive env dest_lvl [bndr] + ; let bndr2 = annotateBotStr bndr' 0 mb_bot_str + ; return (NonRec (TB bndr2 (FloatMe dest_lvl)) rhs', env') } + + | otherwise + = do { -- Yes, type abstraction; create a new binder, extend substitution, etc + rhs' <- lvlFloatRhs abs_vars dest_lvl env NonRecursive + is_bot mb_join_arity rhs + ; (env', [bndr']) <- newPolyBndrs dest_lvl env abs_vars [bndr] + ; let bndr2 = annotateBotStr bndr' n_extra mb_bot_str + ; return (NonRec (TB bndr2 (FloatMe dest_lvl)) rhs', env') } + + where + bndr_ty = idType bndr + ty_fvs = tyCoVarsOfType bndr_ty + rhs_fvs = freeVarsOf rhs + bind_fvs = rhs_fvs `unionDVarSet` dIdFreeVars bndr + abs_vars = abstractVars dest_lvl env bind_fvs + dest_lvl = destLevel env bind_fvs ty_fvs (isFunction rhs) is_bot is_join + + deann_rhs = deAnnotate rhs + mb_bot_str = exprBotStrictness_maybe deann_rhs + is_bot = isJust mb_bot_str + -- NB: not isBottomThunk! See Note [Bottoming floats] point (3) + + n_extra = count isId abs_vars + mb_join_arity = isJoinId_maybe bndr + is_join = isJust mb_join_arity + +lvlBind env (AnnRec pairs) + | floatTopLvlOnly env && not (isTopLvl dest_lvl) + -- Only floating to the top level is allowed. + || not (profitableFloat env dest_lvl) + || (isTopLvl dest_lvl && any (mightBeUnliftedType . idType) bndrs) + -- This mightBeUnliftedType stuff is the same test as in the non-rec case + -- You might wonder whether we can have a recursive binding for + -- an unlifted value -- but we can if it's a /join binding/ (#16978) + -- (Ultimately I think we should not use GHC.Core.Op.SetLevels to + -- float join bindings at all, but that's another story.) + = -- No float + do { let bind_lvl = incMinorLvl (le_ctxt_lvl env) + (env', bndrs') = substAndLvlBndrs Recursive env bind_lvl bndrs + lvl_rhs (b,r) = lvlRhs env' Recursive is_bot (isJoinId_maybe b) r + ; rhss' <- mapM lvl_rhs pairs + ; return (Rec (bndrs' `zip` rhss'), env') } + + -- Otherwise we are going to float + | null abs_vars + = do { (new_env, new_bndrs) <- cloneLetVars Recursive env dest_lvl bndrs + ; new_rhss <- mapM (do_rhs new_env) pairs + ; return ( Rec ([TB b (FloatMe dest_lvl) | b <- new_bndrs] `zip` new_rhss) + , new_env) } + +-- ToDo: when enabling the floatLambda stuff, +-- I think we want to stop doing this + | [(bndr,rhs)] <- pairs + , count isId abs_vars > 1 + = do -- Special case for self recursion where there are + -- several variables carried around: build a local loop: + -- poly_f = \abs_vars. \lam_vars . letrec f = \lam_vars. rhs in f lam_vars + -- This just makes the closures a bit smaller. If we don't do + -- this, allocation rises significantly on some programs + -- + -- We could elaborate it for the case where there are several + -- mutually recursive functions, but it's quite a bit more complicated + -- + -- This all seems a bit ad hoc -- sigh + let (rhs_env, abs_vars_w_lvls) = lvlLamBndrs env dest_lvl abs_vars + rhs_lvl = le_ctxt_lvl rhs_env + + (rhs_env', [new_bndr]) <- cloneLetVars Recursive rhs_env rhs_lvl [bndr] + let + (lam_bndrs, rhs_body) = collectAnnBndrs rhs + (body_env1, lam_bndrs1) = substBndrsSL NonRecursive rhs_env' lam_bndrs + (body_env2, lam_bndrs2) = lvlLamBndrs body_env1 rhs_lvl lam_bndrs1 + new_rhs_body <- lvlRhs body_env2 Recursive is_bot (get_join bndr) rhs_body + (poly_env, [poly_bndr]) <- newPolyBndrs dest_lvl env abs_vars [bndr] + return (Rec [(TB poly_bndr (FloatMe dest_lvl) + , mkLams abs_vars_w_lvls $ + mkLams lam_bndrs2 $ + Let (Rec [( TB new_bndr (StayPut rhs_lvl) + , mkLams lam_bndrs2 new_rhs_body)]) + (mkVarApps (Var new_bndr) lam_bndrs1))] + , poly_env) + + | otherwise -- Non-null abs_vars + = do { (new_env, new_bndrs) <- newPolyBndrs dest_lvl env abs_vars bndrs + ; new_rhss <- mapM (do_rhs new_env) pairs + ; return ( Rec ([TB b (FloatMe dest_lvl) | b <- new_bndrs] `zip` new_rhss) + , new_env) } + + where + (bndrs,rhss) = unzip pairs + is_join = isJoinId (head bndrs) + -- bndrs is always non-empty and if one is a join they all are + -- Both are checked by Lint + is_fun = all isFunction rhss + is_bot = False -- It's odd to have an unconditionally divergent + -- function in a Rec, and we don't much care what + -- happens to it. False is simple! + + do_rhs env (bndr,rhs) = lvlFloatRhs abs_vars dest_lvl env Recursive + is_bot (get_join bndr) + rhs + + get_join bndr | need_zap = Nothing + | otherwise = isJoinId_maybe bndr + need_zap = dest_lvl `ltLvl` joinCeilingLevel env + + -- Finding the free vars of the binding group is annoying + bind_fvs = ((unionDVarSets [ freeVarsOf rhs | (_, rhs) <- pairs]) + `unionDVarSet` + (fvDVarSet $ unionsFV [ idFVs bndr + | (bndr, (_,_)) <- pairs])) + `delDVarSetList` + bndrs + + ty_fvs = foldr (unionVarSet . tyCoVarsOfType . idType) emptyVarSet bndrs + dest_lvl = destLevel env bind_fvs ty_fvs is_fun is_bot is_join + abs_vars = abstractVars dest_lvl env bind_fvs + +profitableFloat :: LevelEnv -> Level -> Bool +profitableFloat env dest_lvl + = (dest_lvl `ltMajLvl` le_ctxt_lvl env) -- Escapes a value lambda + || isTopLvl dest_lvl -- Going all the way to top level + + +---------------------------------------------------- +-- Three help functions for the type-abstraction case + +lvlRhs :: LevelEnv + -> RecFlag + -> Bool -- Is this a bottoming function + -> Maybe JoinArity + -> CoreExprWithFVs + -> LvlM LevelledExpr +lvlRhs env rec_flag is_bot mb_join_arity expr + = lvlFloatRhs [] (le_ctxt_lvl env) env + rec_flag is_bot mb_join_arity expr + +lvlFloatRhs :: [OutVar] -> Level -> LevelEnv -> RecFlag + -> Bool -- Binding is for a bottoming function + -> Maybe JoinArity + -> CoreExprWithFVs + -> LvlM (Expr LevelledBndr) +-- Ignores the le_ctxt_lvl in env; treats dest_lvl as the baseline +lvlFloatRhs abs_vars dest_lvl env rec is_bot mb_join_arity rhs + = do { body' <- if not is_bot -- See Note [Floating from a RHS] + && any isId bndrs + then lvlMFE body_env True body + else lvlExpr body_env body + ; return (mkLams bndrs' body') } + where + (bndrs, body) | Just join_arity <- mb_join_arity + = collectNAnnBndrs join_arity rhs + | otherwise + = collectAnnBndrs rhs + (env1, bndrs1) = substBndrsSL NonRecursive env bndrs + all_bndrs = abs_vars ++ bndrs1 + (body_env, bndrs') | Just _ <- mb_join_arity + = lvlJoinBndrs env1 dest_lvl rec all_bndrs + | otherwise + = case lvlLamBndrs env1 dest_lvl all_bndrs of + (env2, bndrs') -> (placeJoinCeiling env2, bndrs') + -- The important thing here is that we call lvlLamBndrs on + -- all these binders at once (abs_vars and bndrs), so they + -- all get the same major level. Otherwise we create stupid + -- let-bindings inside, joyfully thinking they can float; but + -- in the end they don't because we never float bindings in + -- between lambdas + +{- Note [Floating from a RHS] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When floating the RHS of a let-binding, we don't always want to apply +lvlMFE to the body of a lambda, as we usually do, because the entire +binding body is already going to the right place (dest_lvl). + +A particular example is the top level. Consider + concat = /\ a -> foldr ..a.. (++) [] +We don't want to float the body of the lambda to get + lvl = /\ a -> foldr ..a.. (++) [] + concat = /\ a -> lvl a +That would be stupid. + +Previously this was avoided in a much nastier way, by testing strict_ctxt +in float_me in lvlMFE. But that wasn't even right because it would fail +to float out the error sub-expression in + f = \x. case x of + True -> error ("blah" ++ show x) + False -> ... + +But we must be careful: + +* If we had + f = \x -> factorial 20 + we /would/ want to float that (factorial 20) out! Functions are treated + differently: see the use of isFunction in the calls to destLevel. If + there are only type lambdas, then destLevel will say "go to top, and + abstract over the free tyvars" and we don't want that here. + +* But if we had + f = \x -> error (...x....) + we would NOT want to float the bottoming expression out to give + lvl = \x -> error (...x...) + f = \x -> lvl x + +Conclusion: use lvlMFE if there are + * any value lambdas in the original function, and + * this is not a bottoming function (the is_bot argument) +Use lvlExpr otherwise. A little subtle, and I got it wrong at least twice +(e.g. #13369). +-} + +{- +************************************************************************ +* * +\subsection{Deciding floatability} +* * +************************************************************************ +-} + +substAndLvlBndrs :: RecFlag -> LevelEnv -> Level -> [InVar] -> (LevelEnv, [LevelledBndr]) +substAndLvlBndrs is_rec env lvl bndrs + = lvlBndrs subst_env lvl subst_bndrs + where + (subst_env, subst_bndrs) = substBndrsSL is_rec env bndrs + +substBndrsSL :: RecFlag -> LevelEnv -> [InVar] -> (LevelEnv, [OutVar]) +-- So named only to avoid the name clash with GHC.Core.Subst.substBndrs +substBndrsSL is_rec env@(LE { le_subst = subst, le_env = id_env }) bndrs + = ( env { le_subst = subst' + , le_env = foldl' add_id id_env (bndrs `zip` bndrs') } + , bndrs') + where + (subst', bndrs') = case is_rec of + NonRecursive -> substBndrs subst bndrs + Recursive -> substRecBndrs subst bndrs + +lvlLamBndrs :: LevelEnv -> Level -> [OutVar] -> (LevelEnv, [LevelledBndr]) +-- Compute the levels for the binders of a lambda group +lvlLamBndrs env lvl bndrs + = lvlBndrs env new_lvl bndrs + where + new_lvl | any is_major bndrs = incMajorLvl lvl + | otherwise = incMinorLvl lvl + + is_major bndr = isId bndr && not (isProbablyOneShotLambda bndr) + -- The "probably" part says "don't float things out of a + -- probable one-shot lambda" + -- See Note [Computing one-shot info] in Demand.hs + +lvlJoinBndrs :: LevelEnv -> Level -> RecFlag -> [OutVar] + -> (LevelEnv, [LevelledBndr]) +lvlJoinBndrs env lvl rec bndrs + = lvlBndrs env new_lvl bndrs + where + new_lvl | isRec rec = incMajorLvl lvl + | otherwise = incMinorLvl lvl + -- Non-recursive join points are one-shot; recursive ones are not + +lvlBndrs :: LevelEnv -> Level -> [CoreBndr] -> (LevelEnv, [LevelledBndr]) +-- The binders returned are exactly the same as the ones passed, +-- apart from applying the substitution, but they are now paired +-- with a (StayPut level) +-- +-- The returned envt has le_ctxt_lvl updated to the new_lvl +-- +-- All the new binders get the same level, because +-- any floating binding is either going to float past +-- all or none. We never separate binders. +lvlBndrs env@(LE { le_lvl_env = lvl_env }) new_lvl bndrs + = ( env { le_ctxt_lvl = new_lvl + , le_join_ceil = new_lvl + , le_lvl_env = addLvls new_lvl lvl_env bndrs } + , map (stayPut new_lvl) bndrs) + +stayPut :: Level -> OutVar -> LevelledBndr +stayPut new_lvl bndr = TB bndr (StayPut new_lvl) + + -- Destination level is the max Id level of the expression + -- (We'll abstract the type variables, if any.) +destLevel :: LevelEnv + -> DVarSet -- Free vars of the term + -> TyCoVarSet -- Free in the /type/ of the term + -- (a subset of the previous argument) + -> Bool -- True <=> is function + -> Bool -- True <=> is bottom + -> Bool -- True <=> is a join point + -> Level +-- INVARIANT: if is_join=True then result >= join_ceiling +destLevel env fvs fvs_ty is_function is_bot is_join + | isTopLvl max_fv_id_level -- Float even joins if they get to top level + -- See Note [Floating join point bindings] + = tOP_LEVEL + + | is_join -- Never float a join point past the join ceiling + -- See Note [Join points] in GHC.Core.Op.FloatOut + = if max_fv_id_level `ltLvl` join_ceiling + then join_ceiling + else max_fv_id_level + + | is_bot -- Send bottoming bindings to the top + = as_far_as_poss -- regardless; see Note [Bottoming floats] + -- Esp Bottoming floats (1) + + | Just n_args <- floatLams env + , n_args > 0 -- n=0 case handled uniformly by the 'otherwise' case + , is_function + , countFreeIds fvs <= n_args + = as_far_as_poss -- Send functions to top level; see + -- the comments with isFunction + + | otherwise = max_fv_id_level + where + join_ceiling = joinCeilingLevel env + max_fv_id_level = maxFvLevel isId env fvs -- Max over Ids only; the + -- tyvars will be abstracted + + as_far_as_poss = maxFvLevel' isId env fvs_ty + -- See Note [Floating and kind casts] + +{- Note [Floating and kind casts] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this + case x of + K (co :: * ~# k) -> let v :: Int |> co + v = e + in blah + +Then, even if we are abstracting over Ids, or if e is bottom, we can't +float v outside the 'co' binding. Reason: if we did we'd get + v' :: forall k. (Int ~# Age) => Int |> co +and now 'co' isn't in scope in that type. The underlying reason is +that 'co' is a value-level thing and we can't abstract over that in a +type (else we'd get a dependent type). So if v's /type/ mentions 'co' +we can't float it out beyond the binding site of 'co'. + +That's why we have this as_far_as_poss stuff. Usually as_far_as_poss +is just tOP_LEVEL; but occasionally a coercion variable (which is an +Id) mentioned in type prevents this. + +Example #14270 comment:15. +-} + + +isFunction :: CoreExprWithFVs -> Bool +-- The idea here is that we want to float *functions* to +-- the top level. This saves no work, but +-- (a) it can make the host function body a lot smaller, +-- and hence inlinable. +-- (b) it can also save allocation when the function is recursive: +-- h = \x -> letrec f = \y -> ...f...y...x... +-- in f x +-- becomes +-- f = \x y -> ...(f x)...y...x... +-- h = \x -> f x x +-- No allocation for f now. +-- We may only want to do this if there are sufficiently few free +-- variables. We certainly only want to do it for values, and not for +-- constructors. So the simple thing is just to look for lambdas +isFunction (_, AnnLam b e) | isId b = True + | otherwise = isFunction e +-- isFunction (_, AnnTick _ e) = isFunction e -- dubious +isFunction _ = False + +countFreeIds :: DVarSet -> Int +countFreeIds = nonDetFoldUDFM add 0 . getUniqDSet + -- It's OK to use nonDetFoldUDFM here because we're just counting things. + where + add :: Var -> Int -> Int + add v n | isId v = n+1 + | otherwise = n + +{- +************************************************************************ +* * +\subsection{Free-To-Level Monad} +* * +************************************************************************ +-} + +data LevelEnv + = LE { le_switches :: FloatOutSwitches + , le_ctxt_lvl :: Level -- The current level + , le_lvl_env :: VarEnv Level -- Domain is *post-cloned* TyVars and Ids + , le_join_ceil:: Level -- Highest level to which joins float + -- Invariant: always >= le_ctxt_lvl + + -- See Note [le_subst and le_env] + , le_subst :: Subst -- Domain is pre-cloned TyVars and Ids + -- The Id -> CoreExpr in the Subst is ignored + -- (since we want to substitute a LevelledExpr for + -- an Id via le_env) but we do use the Co/TyVar substs + , le_env :: IdEnv ([OutVar], LevelledExpr) -- Domain is pre-cloned Ids + } + +{- Note [le_subst and le_env] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We clone let- and case-bound variables so that they are still distinct +when floated out; hence the le_subst/le_env. (see point 3 of the +module overview comment). We also use these envs when making a +variable polymorphic because we want to float it out past a big +lambda. + +The le_subst and le_env always implement the same mapping, + in_x :-> out_x a b +where out_x is an OutVar, and a,b are its arguments (when +we perform abstraction at the same time as floating). + + le_subst maps to CoreExpr + le_env maps to LevelledExpr + +Since the range is always a variable or application, there is never +any difference between the two, but sadly the types differ. The +le_subst is used when substituting in a variable's IdInfo; the le_env +when we find a Var. + +In addition the le_env records a [OutVar] of variables free in the +OutExpr/LevelledExpr, just so we don't have to call freeVars +repeatedly. This list is always non-empty, and the first element is +out_x + +The domain of the both envs is *pre-cloned* Ids, though + +The domain of the le_lvl_env is the *post-cloned* Ids +-} + +initialEnv :: FloatOutSwitches -> LevelEnv +initialEnv float_lams + = LE { le_switches = float_lams + , le_ctxt_lvl = tOP_LEVEL + , le_join_ceil = panic "initialEnv" + , le_lvl_env = emptyVarEnv + , le_subst = emptySubst + , le_env = emptyVarEnv } + +addLvl :: Level -> VarEnv Level -> OutVar -> VarEnv Level +addLvl dest_lvl env v' = extendVarEnv env v' dest_lvl + +addLvls :: Level -> VarEnv Level -> [OutVar] -> VarEnv Level +addLvls dest_lvl env vs = foldl' (addLvl dest_lvl) env vs + +floatLams :: LevelEnv -> Maybe Int +floatLams le = floatOutLambdas (le_switches le) + +floatConsts :: LevelEnv -> Bool +floatConsts le = floatOutConstants (le_switches le) + +floatOverSat :: LevelEnv -> Bool +floatOverSat le = floatOutOverSatApps (le_switches le) + +floatTopLvlOnly :: LevelEnv -> Bool +floatTopLvlOnly le = floatToTopLevelOnly (le_switches le) + +incMinorLvlFrom :: LevelEnv -> Level +incMinorLvlFrom env = incMinorLvl (le_ctxt_lvl env) + +-- extendCaseBndrEnv adds the mapping case-bndr->scrut-var if it can +-- See Note [Binder-swap during float-out] +extendCaseBndrEnv :: LevelEnv + -> Id -- Pre-cloned case binder + -> Expr LevelledBndr -- Post-cloned scrutinee + -> LevelEnv +extendCaseBndrEnv le@(LE { le_subst = subst, le_env = id_env }) + case_bndr (Var scrut_var) + = le { le_subst = extendSubstWithVar subst case_bndr scrut_var + , le_env = add_id id_env (case_bndr, scrut_var) } +extendCaseBndrEnv env _ _ = env + +-- See Note [Join ceiling] +placeJoinCeiling :: LevelEnv -> LevelEnv +placeJoinCeiling le@(LE { le_ctxt_lvl = lvl }) + = le { le_ctxt_lvl = lvl', le_join_ceil = lvl' } + where + lvl' = asJoinCeilLvl (incMinorLvl lvl) + +maxFvLevel :: (Var -> Bool) -> LevelEnv -> DVarSet -> Level +maxFvLevel max_me env var_set + = foldDVarSet (maxIn max_me env) tOP_LEVEL var_set + +maxFvLevel' :: (Var -> Bool) -> LevelEnv -> TyCoVarSet -> Level +-- Same but for TyCoVarSet +maxFvLevel' max_me env var_set + = nonDetFoldUniqSet (maxIn max_me env) tOP_LEVEL var_set + +maxIn :: (Var -> Bool) -> LevelEnv -> InVar -> Level -> Level +maxIn max_me (LE { le_lvl_env = lvl_env, le_env = id_env }) in_var lvl + = case lookupVarEnv id_env in_var of + Just (abs_vars, _) -> foldr max_out lvl abs_vars + Nothing -> max_out in_var lvl + where + max_out out_var lvl + | max_me out_var = case lookupVarEnv lvl_env out_var of + Just lvl' -> maxLvl lvl' lvl + Nothing -> lvl + | otherwise = lvl -- Ignore some vars depending on max_me + +lookupVar :: LevelEnv -> Id -> LevelledExpr +lookupVar le v = case lookupVarEnv (le_env le) v of + Just (_, expr) -> expr + _ -> Var v + +-- Level to which join points are allowed to float (boundary of current tail +-- context). See Note [Join ceiling] +joinCeilingLevel :: LevelEnv -> Level +joinCeilingLevel = le_join_ceil + +abstractVars :: Level -> LevelEnv -> DVarSet -> [OutVar] + -- Find the variables in fvs, free vars of the target expression, + -- whose level is greater than the destination level + -- These are the ones we are going to abstract out + -- + -- Note that to get reproducible builds, the variables need to be + -- abstracted in deterministic order, not dependent on the values of + -- Uniques. This is achieved by using DVarSets, deterministic free + -- variable computation and deterministic sort. + -- See Note [Unique Determinism] in Unique for explanation of why + -- Uniques are not deterministic. +abstractVars dest_lvl (LE { le_subst = subst, le_lvl_env = lvl_env }) in_fvs + = -- NB: sortQuantVars might not put duplicates next to each other + map zap $ sortQuantVars $ + filter abstract_me $ + dVarSetElems $ + closeOverKindsDSet $ + substDVarSet subst in_fvs + -- NB: it's important to call abstract_me only on the OutIds the + -- come from substDVarSet (not on fv, which is an InId) + where + abstract_me v = case lookupVarEnv lvl_env v of + Just lvl -> dest_lvl `ltLvl` lvl + Nothing -> False + + -- We are going to lambda-abstract, so nuke any IdInfo, + -- and add the tyvars of the Id (if necessary) + zap v | isId v = WARN( isStableUnfolding (idUnfolding v) || + not (isEmptyRuleInfo (idSpecialisation v)), + text "absVarsOf: discarding info on" <+> ppr v ) + setIdInfo v vanillaIdInfo + | otherwise = v + +type LvlM result = UniqSM result + +initLvl :: UniqSupply -> UniqSM a -> a +initLvl = initUs_ + +newPolyBndrs :: Level -> LevelEnv -> [OutVar] -> [InId] + -> LvlM (LevelEnv, [OutId]) +-- The envt is extended to bind the new bndrs to dest_lvl, but +-- the le_ctxt_lvl is unaffected +newPolyBndrs dest_lvl + env@(LE { le_lvl_env = lvl_env, le_subst = subst, le_env = id_env }) + abs_vars bndrs + = ASSERT( all (not . isCoVar) bndrs ) -- What would we add to the CoSubst in this case. No easy answer. + do { uniqs <- getUniquesM + ; let new_bndrs = zipWith mk_poly_bndr bndrs uniqs + bndr_prs = bndrs `zip` new_bndrs + env' = env { le_lvl_env = addLvls dest_lvl lvl_env new_bndrs + , le_subst = foldl' add_subst subst bndr_prs + , le_env = foldl' add_id id_env bndr_prs } + ; return (env', new_bndrs) } + where + add_subst env (v, v') = extendIdSubst env v (mkVarApps (Var v') abs_vars) + add_id env (v, v') = extendVarEnv env v ((v':abs_vars), mkVarApps (Var v') abs_vars) + + mk_poly_bndr bndr uniq = transferPolyIdInfo bndr abs_vars $ -- Note [transferPolyIdInfo] in Id.hs + transfer_join_info bndr $ + mkSysLocal (mkFastString str) uniq poly_ty + where + str = "poly_" ++ occNameString (getOccName bndr) + poly_ty = mkLamTypes abs_vars (GHC.Core.Subst.substTy subst (idType bndr)) + + -- If we are floating a join point to top level, it stops being + -- a join point. Otherwise it continues to be a join point, + -- but we may need to adjust its arity + dest_is_top = isTopLvl dest_lvl + transfer_join_info bndr new_bndr + | Just join_arity <- isJoinId_maybe bndr + , not dest_is_top + = new_bndr `asJoinId` join_arity + length abs_vars + | otherwise + = new_bndr + +newLvlVar :: LevelledExpr -- The RHS of the new binding + -> Maybe JoinArity -- Its join arity, if it is a join point + -> Bool -- True <=> the RHS looks like (makeStatic ...) + -> LvlM Id +newLvlVar lvld_rhs join_arity_maybe is_mk_static + = do { uniq <- getUniqueM + ; return (add_join_info (mk_id uniq rhs_ty)) + } + where + add_join_info var = var `asJoinId_maybe` join_arity_maybe + de_tagged_rhs = deTagExpr lvld_rhs + rhs_ty = exprType de_tagged_rhs + + mk_id uniq rhs_ty + -- See Note [Grand plan for static forms] in StaticPtrTable. + | is_mk_static + = mkExportedVanillaId (mkSystemVarName uniq (mkFastString "static_ptr")) + rhs_ty + | otherwise + = mkSysLocal (mkFastString "lvl") uniq rhs_ty + +-- | Clone the binders bound by a single-alternative case. +cloneCaseBndrs :: LevelEnv -> Level -> [Var] -> LvlM (LevelEnv, [Var]) +cloneCaseBndrs env@(LE { le_subst = subst, le_lvl_env = lvl_env, le_env = id_env }) + new_lvl vs + = do { us <- getUniqueSupplyM + ; let (subst', vs') = cloneBndrs subst us vs + -- N.B. We are not moving the body of the case, merely its case + -- binders. Consequently we should *not* set le_ctxt_lvl and + -- le_join_ceil. See Note [Setting levels when floating + -- single-alternative cases]. + env' = env { le_lvl_env = addLvls new_lvl lvl_env vs' + , le_subst = subst' + , le_env = foldl' add_id id_env (vs `zip` vs') } + + ; return (env', vs') } + +cloneLetVars :: RecFlag -> LevelEnv -> Level -> [InVar] + -> LvlM (LevelEnv, [OutVar]) +-- See Note [Need for cloning during float-out] +-- Works for Ids bound by let(rec) +-- The dest_lvl is attributed to the binders in the new env, +-- but cloneVars doesn't affect the le_ctxt_lvl of the incoming env +cloneLetVars is_rec + env@(LE { le_subst = subst, le_lvl_env = lvl_env, le_env = id_env }) + dest_lvl vs + = do { us <- getUniqueSupplyM + ; let vs1 = map zap vs + -- See Note [Zapping the demand info] + (subst', vs2) = case is_rec of + NonRecursive -> cloneBndrs subst us vs1 + Recursive -> cloneRecIdBndrs subst us vs1 + prs = vs `zip` vs2 + env' = env { le_lvl_env = addLvls dest_lvl lvl_env vs2 + , le_subst = subst' + , le_env = foldl' add_id id_env prs } + + ; return (env', vs2) } + where + zap :: Var -> Var + zap v | isId v = zap_join (zapIdDemandInfo v) + | otherwise = v + + zap_join | isTopLvl dest_lvl = zapJoinId + | otherwise = id + +add_id :: IdEnv ([Var], LevelledExpr) -> (Var, Var) -> IdEnv ([Var], LevelledExpr) +add_id id_env (v, v1) + | isTyVar v = delVarEnv id_env v + | otherwise = extendVarEnv id_env v ([v1], ASSERT(not (isCoVar v1)) Var v1) + +{- +Note [Zapping the demand info] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +VERY IMPORTANT: we must zap the demand info if the thing is going to +float out, because it may be less demanded than at its original +binding site. Eg + f :: Int -> Int + f x = let v = 3*4 in v+x +Here v is strict; but if we float v to top level, it isn't any more. + +Similarly, if we're floating a join point, it won't be one anymore, so we zap +join point information as well. +-} diff --git a/compiler/GHC/Core/Op/Simplify.hs b/compiler/GHC/Core/Op/Simplify.hs new file mode 100644 index 0000000000..448edd21f6 --- /dev/null +++ b/compiler/GHC/Core/Op/Simplify.hs @@ -0,0 +1,3666 @@ +{- +(c) The AQUA Project, Glasgow University, 1993-1998 + +\section[Simplify]{The main module of the simplifier} +-} + +{-# LANGUAGE CPP #-} + +{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} +module GHC.Core.Op.Simplify ( simplTopBinds, simplExpr, simplRules ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Driver.Session +import GHC.Core.Op.Simplify.Monad +import GHC.Core.Type hiding ( substTy, substTyVar, extendTvSubst, extendCvSubst ) +import GHC.Core.Op.Simplify.Env +import GHC.Core.Op.Simplify.Utils +import GHC.Core.Op.OccurAnal ( occurAnalyseExpr ) +import GHC.Core.FamInstEnv ( FamInstEnv ) +import Literal ( litIsLifted ) --, mkLitInt ) -- temporalily commented out. See #8326 +import Id +import MkId ( seqId ) +import GHC.Core.Make ( FloatBind, mkImpossibleExpr, castBottomExpr ) +import qualified GHC.Core.Make +import IdInfo +import Name ( mkSystemVarName, isExternalName, getOccFS ) +import GHC.Core.Coercion hiding ( substCo, substCoVar ) +import GHC.Core.Coercion.Opt ( optCoercion ) +import GHC.Core.FamInstEnv ( topNormaliseType_maybe ) +import GHC.Core.DataCon + ( DataCon, dataConWorkId, dataConRepStrictness + , dataConRepArgTys, isUnboxedTupleCon + , StrictnessMark (..) ) +import GHC.Core.Op.Monad ( Tick(..), SimplMode(..) ) +import GHC.Core +import Demand ( StrictSig(..), dmdTypeDepth, isStrictDmd + , mkClosedStrictSig, topDmd, botDiv ) +import Cpr ( mkCprSig, botCpr ) +import GHC.Core.Ppr ( pprCoreExpr ) +import GHC.Core.Unfold +import GHC.Core.Utils +import GHC.Core.SimpleOpt ( pushCoTyArg, pushCoValArg + , joinPointBinding_maybe, joinPointBindings_maybe ) +import GHC.Core.Rules ( mkRuleInfo, lookupRule, getRules ) +import BasicTypes ( TopLevelFlag(..), isNotTopLevel, isTopLevel, + RecFlag(..), Arity ) +import MonadUtils ( mapAccumLM, liftIO ) +import Var ( isTyCoVar ) +import Maybes ( orElse ) +import Control.Monad +import Outputable +import FastString +import Util +import ErrUtils +import Module ( moduleName, pprModuleName ) +import PrimOp ( PrimOp (SeqOp) ) + + +{- +The guts of the simplifier is in this module, but the driver loop for +the simplifier is in GHC.Core.Op.Simplify.Driver + +Note [The big picture] +~~~~~~~~~~~~~~~~~~~~~~ +The general shape of the simplifier is this: + + simplExpr :: SimplEnv -> InExpr -> SimplCont -> SimplM (SimplFloats, OutExpr) + simplBind :: SimplEnv -> InBind -> SimplM (SimplFloats, SimplEnv) + + * SimplEnv contains + - Simplifier mode (which includes DynFlags for convenience) + - Ambient substitution + - InScopeSet + + * SimplFloats contains + - Let-floats (which includes ok-for-spec case-floats) + - Join floats + - InScopeSet (including all the floats) + + * Expressions + simplExpr :: SimplEnv -> InExpr -> SimplCont + -> SimplM (SimplFloats, OutExpr) + The result of simplifying an /expression/ is (floats, expr) + - A bunch of floats (let bindings, join bindings) + - A simplified expression. + The overall result is effectively (let floats in expr) + + * Bindings + simplBind :: SimplEnv -> InBind -> SimplM (SimplFloats, SimplEnv) + The result of simplifying a binding is + - A bunch of floats, the last of which is the simplified binding + There may be auxiliary bindings too; see prepareRhs + - An environment suitable for simplifying the scope of the binding + + The floats may also be empty, if the binding is inlined unconditionally; + in that case the returned SimplEnv will have an augmented substitution. + + The returned floats and env both have an in-scope set, and they are + guaranteed to be the same. + + +Note [Shadowing] +~~~~~~~~~~~~~~~~ +The simplifier used to guarantee that the output had no shadowing, but +it does not do so any more. (Actually, it never did!) The reason is +documented with simplifyArgs. + + +Eta expansion +~~~~~~~~~~~~~~ +For eta expansion, we want to catch things like + + case e of (a,b) -> \x -> case a of (p,q) -> \y -> r + +If the \x was on the RHS of a let, we'd eta expand to bring the two +lambdas together. And in general that's a good thing to do. Perhaps +we should eta expand wherever we find a (value) lambda? Then the eta +expansion at a let RHS can concentrate solely on the PAP case. + +Note [In-scope set as a substitution] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As per Note [Lookups in in-scope set], an in-scope set can act as +a substitution. Specifically, it acts as a substitution from variable to +variables /with the same unique/. + +Why do we need this? Well, during the course of the simplifier, we may want to +adjust inessential properties of a variable. For instance, when performing a +beta-reduction, we change + + (\x. e) u ==> let x = u in e + +We typically want to add an unfolding to `x` so that it inlines to (the +simplification of) `u`. + +We do that by adding the unfolding to the binder `x`, which is added to the +in-scope set. When simplifying occurrences of `x` (every occurrence!), they are +replaced by their “updated” version from the in-scope set, hence inherit the +unfolding. This happens in `SimplEnv.substId`. + +Another example. Consider + + case x of y { Node a b -> ...y... + ; Leaf v -> ...y... } + +In the Node branch want y's unfolding to be (Node a b); in the Leaf branch we +want y's unfolding to be (Leaf v). We achieve this by adding the appropriate +unfolding to y, and re-adding it to the in-scope set. See the calls to +`addBinderUnfolding` in `Simplify.addAltUnfoldings` and elsewhere. + +It's quite convenient. This way we don't need to manipulate the substitution all +the time: every update to a binder is automatically reflected to its bound +occurrences. + +************************************************************************ +* * +\subsection{Bindings} +* * +************************************************************************ +-} + +simplTopBinds :: SimplEnv -> [InBind] -> SimplM (SimplFloats, SimplEnv) +-- See Note [The big picture] +simplTopBinds env0 binds0 + = do { -- Put all the top-level binders into scope at the start + -- so that if a transformation rule has unexpectedly brought + -- anything into scope, then we don't get a complaint about that. + -- It's rather as if the top-level binders were imported. + -- See note [Glomming] in OccurAnal. + ; env1 <- {-#SCC "simplTopBinds-simplRecBndrs" #-} simplRecBndrs env0 (bindersOfBinds binds0) + ; (floats, env2) <- {-#SCC "simplTopBinds-simpl_binds" #-} simpl_binds env1 binds0 + ; freeTick SimplifierDone + ; return (floats, env2) } + where + -- We need to track the zapped top-level binders, because + -- they should have their fragile IdInfo zapped (notably occurrence info) + -- That's why we run down binds and bndrs' simultaneously. + -- + simpl_binds :: SimplEnv -> [InBind] -> SimplM (SimplFloats, SimplEnv) + simpl_binds env [] = return (emptyFloats env, env) + simpl_binds env (bind:binds) = do { (float, env1) <- simpl_bind env bind + ; (floats, env2) <- simpl_binds env1 binds + ; return (float `addFloats` floats, env2) } + + simpl_bind env (Rec pairs) + = simplRecBind env TopLevel Nothing pairs + simpl_bind env (NonRec b r) + = do { (env', b') <- addBndrRules env b (lookupRecBndr env b) Nothing + ; simplRecOrTopPair env' TopLevel NonRecursive Nothing b b' r } + +{- +************************************************************************ +* * + Lazy bindings +* * +************************************************************************ + +simplRecBind is used for + * recursive bindings only +-} + +simplRecBind :: SimplEnv -> TopLevelFlag -> MaybeJoinCont + -> [(InId, InExpr)] + -> SimplM (SimplFloats, SimplEnv) +simplRecBind env0 top_lvl mb_cont pairs0 + = do { (env_with_info, triples) <- mapAccumLM add_rules env0 pairs0 + ; (rec_floats, env1) <- go env_with_info triples + ; return (mkRecFloats rec_floats, env1) } + where + add_rules :: SimplEnv -> (InBndr,InExpr) -> SimplM (SimplEnv, (InBndr, OutBndr, InExpr)) + -- Add the (substituted) rules to the binder + add_rules env (bndr, rhs) + = do { (env', bndr') <- addBndrRules env bndr (lookupRecBndr env bndr) mb_cont + ; return (env', (bndr, bndr', rhs)) } + + go env [] = return (emptyFloats env, env) + + go env ((old_bndr, new_bndr, rhs) : pairs) + = do { (float, env1) <- simplRecOrTopPair env top_lvl Recursive mb_cont + old_bndr new_bndr rhs + ; (floats, env2) <- go env1 pairs + ; return (float `addFloats` floats, env2) } + +{- +simplOrTopPair is used for + * recursive bindings (whether top level or not) + * top-level non-recursive bindings + +It assumes the binder has already been simplified, but not its IdInfo. +-} + +simplRecOrTopPair :: SimplEnv + -> TopLevelFlag -> RecFlag -> MaybeJoinCont + -> InId -> OutBndr -> InExpr -- Binder and rhs + -> SimplM (SimplFloats, SimplEnv) + +simplRecOrTopPair env top_lvl is_rec mb_cont old_bndr new_bndr rhs + | Just env' <- preInlineUnconditionally env top_lvl old_bndr rhs env + = {-#SCC "simplRecOrTopPair-pre-inline-uncond" #-} + trace_bind "pre-inline-uncond" $ + do { tick (PreInlineUnconditionally old_bndr) + ; return ( emptyFloats env, env' ) } + + | Just cont <- mb_cont + = {-#SCC "simplRecOrTopPair-join" #-} + ASSERT( isNotTopLevel top_lvl && isJoinId new_bndr ) + trace_bind "join" $ + simplJoinBind env cont old_bndr new_bndr rhs env + + | otherwise + = {-#SCC "simplRecOrTopPair-normal" #-} + trace_bind "normal" $ + simplLazyBind env top_lvl is_rec old_bndr new_bndr rhs env + + where + dflags = seDynFlags env + + -- trace_bind emits a trace for each top-level binding, which + -- helps to locate the tracing for inlining and rule firing + trace_bind what thing_inside + | not (dopt Opt_D_verbose_core2core dflags) + = thing_inside + | otherwise + = traceAction dflags ("SimplBind " ++ what) + (ppr old_bndr) thing_inside + +-------------------------- +simplLazyBind :: SimplEnv + -> TopLevelFlag -> RecFlag + -> InId -> OutId -- Binder, both pre-and post simpl + -- Not a JoinId + -- The OutId has IdInfo, except arity, unfolding + -- Ids only, no TyVars + -> InExpr -> SimplEnv -- The RHS and its environment + -> SimplM (SimplFloats, SimplEnv) +-- Precondition: not a JoinId +-- Precondition: rhs obeys the let/app invariant +-- NOT used for JoinIds +simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se + = ASSERT( isId bndr ) + ASSERT2( not (isJoinId bndr), ppr bndr ) + -- pprTrace "simplLazyBind" ((ppr bndr <+> ppr bndr1) $$ ppr rhs $$ ppr (seIdSubst rhs_se)) $ + do { let rhs_env = rhs_se `setInScopeFromE` env + (tvs, body) = case collectTyAndValBinders rhs of + (tvs, [], body) + | surely_not_lam body -> (tvs, body) + _ -> ([], rhs) + + surely_not_lam (Lam {}) = False + surely_not_lam (Tick t e) + | not (tickishFloatable t) = surely_not_lam e + -- eta-reduction could float + surely_not_lam _ = True + -- Do not do the "abstract tyvar" thing if there's + -- a lambda inside, because it defeats eta-reduction + -- f = /\a. \x. g a x + -- should eta-reduce. + + + ; (body_env, tvs') <- {-#SCC "simplBinders" #-} simplBinders rhs_env tvs + -- See Note [Floating and type abstraction] in GHC.Core.Op.Simplify.Utils + + -- Simplify the RHS + ; let rhs_cont = mkRhsStop (substTy body_env (exprType body)) + ; (body_floats0, body0) <- {-#SCC "simplExprF" #-} simplExprF body_env body rhs_cont + + -- Never float join-floats out of a non-join let-binding + -- So wrap the body in the join-floats right now + -- Hence: body_floats1 consists only of let-floats + ; let (body_floats1, body1) = wrapJoinFloatsX body_floats0 body0 + + -- ANF-ise a constructor or PAP rhs + -- We get at most one float per argument here + ; (let_floats, body2) <- {-#SCC "prepareRhs" #-} prepareRhs (getMode env) top_lvl + (getOccFS bndr1) (idInfo bndr1) body1 + ; let body_floats2 = body_floats1 `addLetFloats` let_floats + + ; (rhs_floats, rhs') + <- if not (doFloatFromRhs top_lvl is_rec False body_floats2 body2) + then -- No floating, revert to body1 + {-#SCC "simplLazyBind-no-floating" #-} + do { rhs' <- mkLam env tvs' (wrapFloats body_floats2 body1) rhs_cont + ; return (emptyFloats env, rhs') } + + else if null tvs then -- Simple floating + {-#SCC "simplLazyBind-simple-floating" #-} + do { tick LetFloatFromLet + ; return (body_floats2, body2) } + + else -- Do type-abstraction first + {-#SCC "simplLazyBind-type-abstraction-first" #-} + do { tick LetFloatFromLet + ; (poly_binds, body3) <- abstractFloats (seDynFlags env) top_lvl + tvs' body_floats2 body2 + ; let floats = foldl' extendFloats (emptyFloats env) poly_binds + ; rhs' <- mkLam env tvs' body3 rhs_cont + ; return (floats, rhs') } + + ; (bind_float, env2) <- completeBind (env `setInScopeFromF` rhs_floats) + top_lvl Nothing bndr bndr1 rhs' + ; return (rhs_floats `addFloats` bind_float, env2) } + +-------------------------- +simplJoinBind :: SimplEnv + -> SimplCont + -> InId -> OutId -- Binder, both pre-and post simpl + -- The OutId has IdInfo, except arity, + -- unfolding + -> InExpr -> SimplEnv -- The right hand side and its env + -> SimplM (SimplFloats, SimplEnv) +simplJoinBind env cont old_bndr new_bndr rhs rhs_se + = do { let rhs_env = rhs_se `setInScopeFromE` env + ; rhs' <- simplJoinRhs rhs_env old_bndr rhs cont + ; completeBind env NotTopLevel (Just cont) old_bndr new_bndr rhs' } + +-------------------------- +simplNonRecX :: SimplEnv + -> InId -- Old binder; not a JoinId + -> OutExpr -- Simplified RHS + -> SimplM (SimplFloats, SimplEnv) +-- A specialised variant of simplNonRec used when the RHS is already +-- simplified, notably in knownCon. It uses case-binding where necessary. +-- +-- Precondition: rhs satisfies the let/app invariant + +simplNonRecX env bndr new_rhs + | ASSERT2( not (isJoinId bndr), ppr bndr ) + isDeadBinder bndr -- Not uncommon; e.g. case (a,b) of c { (p,q) -> p } + = return (emptyFloats env, env) -- Here c is dead, and we avoid + -- creating the binding c = (a,b) + + | Coercion co <- new_rhs + = return (emptyFloats env, extendCvSubst env bndr co) + + | otherwise + = do { (env', bndr') <- simplBinder env bndr + ; completeNonRecX NotTopLevel env' (isStrictId bndr) bndr bndr' new_rhs } + -- simplNonRecX is only used for NotTopLevel things + +-------------------------- +completeNonRecX :: TopLevelFlag -> SimplEnv + -> Bool + -> InId -- Old binder; not a JoinId + -> OutId -- New binder + -> OutExpr -- Simplified RHS + -> SimplM (SimplFloats, SimplEnv) -- The new binding is in the floats +-- Precondition: rhs satisfies the let/app invariant +-- See Note [Core let/app invariant] in GHC.Core + +completeNonRecX top_lvl env is_strict old_bndr new_bndr new_rhs + = ASSERT2( not (isJoinId new_bndr), ppr new_bndr ) + do { (prepd_floats, rhs1) <- prepareRhs (getMode env) top_lvl (getOccFS new_bndr) + (idInfo new_bndr) new_rhs + ; let floats = emptyFloats env `addLetFloats` prepd_floats + ; (rhs_floats, rhs2) <- + if doFloatFromRhs NotTopLevel NonRecursive is_strict floats rhs1 + then -- Add the floats to the main env + do { tick LetFloatFromLet + ; return (floats, rhs1) } + else -- Do not float; wrap the floats around the RHS + return (emptyFloats env, wrapFloats floats rhs1) + + ; (bind_float, env2) <- completeBind (env `setInScopeFromF` rhs_floats) + NotTopLevel Nothing + old_bndr new_bndr rhs2 + ; return (rhs_floats `addFloats` bind_float, env2) } + + +{- ********************************************************************* +* * + prepareRhs, makeTrivial +* * +************************************************************************ + +Note [prepareRhs] +~~~~~~~~~~~~~~~~~ +prepareRhs takes a putative RHS, checks whether it's a PAP or +constructor application and, if so, converts it to ANF, so that the +resulting thing can be inlined more easily. Thus + x = (f a, g b) +becomes + t1 = f a + t2 = g b + x = (t1,t2) + +We also want to deal well cases like this + v = (f e1 `cast` co) e2 +Here we want to make e1,e2 trivial and get + x1 = e1; x2 = e2; v = (f x1 `cast` co) v2 +That's what the 'go' loop in prepareRhs does +-} + +prepareRhs :: SimplMode -> TopLevelFlag + -> FastString -- Base for any new variables + -> IdInfo -- IdInfo for the LHS of this binding + -> OutExpr + -> SimplM (LetFloats, OutExpr) +-- Transforms a RHS into a better RHS by adding floats +-- e.g x = Just e +-- becomes a = e +-- x = Just a +-- See Note [prepareRhs] +prepareRhs mode top_lvl occ info (Cast rhs co) -- Note [Float coercions] + | let ty1 = coercionLKind co -- Do *not* do this if rhs has an unlifted type + , not (isUnliftedType ty1) -- see Note [Float coercions (unlifted)] + = do { (floats, rhs') <- makeTrivialWithInfo mode top_lvl occ sanitised_info rhs + ; return (floats, Cast rhs' co) } + where + sanitised_info = vanillaIdInfo `setStrictnessInfo` strictnessInfo info + `setCprInfo` cprInfo info + `setDemandInfo` demandInfo info + +prepareRhs mode top_lvl occ _ rhs0 + = do { (_is_exp, floats, rhs1) <- go 0 rhs0 + ; return (floats, rhs1) } + where + go :: Int -> OutExpr -> SimplM (Bool, LetFloats, OutExpr) + go n_val_args (Cast rhs co) + = do { (is_exp, floats, rhs') <- go n_val_args rhs + ; return (is_exp, floats, Cast rhs' co) } + go n_val_args (App fun (Type ty)) + = do { (is_exp, floats, rhs') <- go n_val_args fun + ; return (is_exp, floats, App rhs' (Type ty)) } + go n_val_args (App fun arg) + = do { (is_exp, floats1, fun') <- go (n_val_args+1) fun + ; case is_exp of + False -> return (False, emptyLetFloats, App fun arg) + True -> do { (floats2, arg') <- makeTrivial mode top_lvl occ arg + ; return (True, floats1 `addLetFlts` floats2, App fun' arg') } } + go n_val_args (Var fun) + = return (is_exp, emptyLetFloats, Var fun) + where + is_exp = isExpandableApp fun n_val_args -- The fun a constructor or PAP + -- See Note [CONLIKE pragma] in BasicTypes + -- The definition of is_exp should match that in + -- OccurAnal.occAnalApp + + go n_val_args (Tick t rhs) + -- We want to be able to float bindings past this + -- tick. Non-scoping ticks don't care. + | tickishScoped t == NoScope + = do { (is_exp, floats, rhs') <- go n_val_args rhs + ; return (is_exp, floats, Tick t rhs') } + + -- On the other hand, for scoping ticks we need to be able to + -- copy them on the floats, which in turn is only allowed if + -- we can obtain non-counting ticks. + | (not (tickishCounts t) || tickishCanSplit t) + = do { (is_exp, floats, rhs') <- go n_val_args rhs + ; let tickIt (id, expr) = (id, mkTick (mkNoCount t) expr) + floats' = mapLetFloats floats tickIt + ; return (is_exp, floats', Tick t rhs') } + + go _ other + = return (False, emptyLetFloats, other) + +{- +Note [Float coercions] +~~~~~~~~~~~~~~~~~~~~~~ +When we find the binding + x = e `cast` co +we'd like to transform it to + x' = e + x = x `cast` co -- A trivial binding +There's a chance that e will be a constructor application or function, or something +like that, so moving the coercion to the usage site may well cancel the coercions +and lead to further optimisation. Example: + + data family T a :: * + data instance T Int = T Int + + foo :: Int -> Int -> Int + foo m n = ... + where + x = T m + go 0 = 0 + go n = case x of { T m -> go (n-m) } + -- This case should optimise + +Note [Preserve strictness when floating coercions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In the Note [Float coercions] transformation, keep the strictness info. +Eg + f = e `cast` co -- f has strictness SSL +When we transform to + f' = e -- f' also has strictness SSL + f = f' `cast` co -- f still has strictness SSL + +Its not wrong to drop it on the floor, but better to keep it. + +Note [Float coercions (unlifted)] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +BUT don't do [Float coercions] if 'e' has an unlifted type. +This *can* happen: + + foo :: Int = (error (# Int,Int #) "urk") + `cast` CoUnsafe (# Int,Int #) Int + +If do the makeTrivial thing to the error call, we'll get + foo = case error (# Int,Int #) "urk" of v -> v `cast` ... +But 'v' isn't in scope! + +These strange casts can happen as a result of case-of-case + bar = case (case x of { T -> (# 2,3 #); F -> error "urk" }) of + (# p,q #) -> p+q +-} + +makeTrivialArg :: SimplMode -> ArgSpec -> SimplM (LetFloats, ArgSpec) +makeTrivialArg mode (ValArg e) + = do { (floats, e') <- makeTrivial mode NotTopLevel (fsLit "arg") e + ; return (floats, ValArg e') } +makeTrivialArg _ arg + = return (emptyLetFloats, arg) -- CastBy, TyArg + +makeTrivial :: SimplMode -> TopLevelFlag + -> FastString -- ^ A "friendly name" to build the new binder from + -> OutExpr -- ^ This expression satisfies the let/app invariant + -> SimplM (LetFloats, OutExpr) +-- Binds the expression to a variable, if it's not trivial, returning the variable +makeTrivial mode top_lvl context expr + = makeTrivialWithInfo mode top_lvl context vanillaIdInfo expr + +makeTrivialWithInfo :: SimplMode -> TopLevelFlag + -> FastString -- ^ a "friendly name" to build the new binder from + -> IdInfo + -> OutExpr -- ^ This expression satisfies the let/app invariant + -> SimplM (LetFloats, OutExpr) +-- Propagate strictness and demand info to the new binder +-- Note [Preserve strictness when floating coercions] +-- Returned SimplEnv has same substitution as incoming one +makeTrivialWithInfo mode top_lvl occ_fs info expr + | exprIsTrivial expr -- Already trivial + || not (bindingOk top_lvl expr expr_ty) -- Cannot trivialise + -- See Note [Cannot trivialise] + = return (emptyLetFloats, expr) + + | otherwise + = do { (floats, expr1) <- prepareRhs mode top_lvl occ_fs info expr + ; if exprIsTrivial expr1 -- See Note [Trivial after prepareRhs] + then return (floats, expr1) + else do + { uniq <- getUniqueM + ; let name = mkSystemVarName uniq occ_fs + var = mkLocalIdWithInfo name expr_ty info + + -- Now something very like completeBind, + -- but without the postInlineUnconditionally part + ; (arity, is_bot, expr2) <- tryEtaExpandRhs mode var expr1 + ; unf <- mkLetUnfolding (sm_dflags mode) top_lvl InlineRhs var expr2 + + ; let final_id = addLetBndrInfo var arity is_bot unf + bind = NonRec final_id expr2 + + ; return ( floats `addLetFlts` unitLetFloat bind, Var final_id ) }} + where + expr_ty = exprType expr + +bindingOk :: TopLevelFlag -> CoreExpr -> Type -> Bool +-- True iff we can have a binding of this expression at this level +-- Precondition: the type is the type of the expression +bindingOk top_lvl expr expr_ty + | isTopLevel top_lvl = exprIsTopLevelBindable expr expr_ty + | otherwise = True + +{- Note [Trivial after prepareRhs] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we call makeTrival on (e |> co), the recursive use of prepareRhs +may leave us with + { a1 = e } and (a1 |> co) +Now the latter is trivial, so we don't want to let-bind it. + +Note [Cannot trivialise] +~~~~~~~~~~~~~~~~~~~~~~~~ +Consider: + f :: Int -> Addr# + + foo :: Bar + foo = Bar (f 3) + +Then we can't ANF-ise foo, even though we'd like to, because +we can't make a top-level binding for the Addr# (f 3). And if +so we don't want to turn it into + foo = let x = f 3 in Bar x +because we'll just end up inlining x back, and that makes the +simplifier loop. Better not to ANF-ise it at all. + +Literal strings are an exception. + + foo = Ptr "blob"# + +We want to turn this into: + + foo1 = "blob"# + foo = Ptr foo1 + +See Note [Core top-level string literals] in GHC.Core. + +************************************************************************ +* * + Completing a lazy binding +* * +************************************************************************ + +completeBind + * deals only with Ids, not TyVars + * takes an already-simplified binder and RHS + * is used for both recursive and non-recursive bindings + * is used for both top-level and non-top-level bindings + +It does the following: + - tries discarding a dead binding + - tries PostInlineUnconditionally + - add unfolding [this is the only place we add an unfolding] + - add arity + +It does *not* attempt to do let-to-case. Why? Because it is used for + - top-level bindings (when let-to-case is impossible) + - many situations where the "rhs" is known to be a WHNF + (so let-to-case is inappropriate). + +Nor does it do the atomic-argument thing +-} + +completeBind :: SimplEnv + -> TopLevelFlag -- Flag stuck into unfolding + -> MaybeJoinCont -- Required only for join point + -> InId -- Old binder + -> OutId -> OutExpr -- New binder and RHS + -> SimplM (SimplFloats, SimplEnv) +-- completeBind may choose to do its work +-- * by extending the substitution (e.g. let x = y in ...) +-- * or by adding to the floats in the envt +-- +-- Binder /can/ be a JoinId +-- Precondition: rhs obeys the let/app invariant +completeBind env top_lvl mb_cont old_bndr new_bndr new_rhs + | isCoVar old_bndr + = case new_rhs of + Coercion co -> return (emptyFloats env, extendCvSubst env old_bndr co) + _ -> return (mkFloatBind env (NonRec new_bndr new_rhs)) + + | otherwise + = ASSERT( isId new_bndr ) + do { let old_info = idInfo old_bndr + old_unf = unfoldingInfo old_info + occ_info = occInfo old_info + + -- Do eta-expansion on the RHS of the binding + -- See Note [Eta-expanding at let bindings] in GHC.Core.Op.Simplify.Utils + ; (new_arity, is_bot, final_rhs) <- tryEtaExpandRhs (getMode env) + new_bndr new_rhs + + -- Simplify the unfolding + ; new_unfolding <- simplLetUnfolding env top_lvl mb_cont old_bndr + final_rhs (idType new_bndr) old_unf + + ; let final_bndr = addLetBndrInfo new_bndr new_arity is_bot new_unfolding + -- See Note [In-scope set as a substitution] + + ; if postInlineUnconditionally env top_lvl final_bndr occ_info final_rhs + + then -- Inline and discard the binding + do { tick (PostInlineUnconditionally old_bndr) + ; return ( emptyFloats env + , extendIdSubst env old_bndr $ + DoneEx final_rhs (isJoinId_maybe new_bndr)) } + -- Use the substitution to make quite, quite sure that the + -- substitution will happen, since we are going to discard the binding + + else -- Keep the binding + -- pprTrace "Binding" (ppr final_bndr <+> ppr new_unfolding) $ + return (mkFloatBind env (NonRec final_bndr final_rhs)) } + +addLetBndrInfo :: OutId -> Arity -> Bool -> Unfolding -> OutId +addLetBndrInfo new_bndr new_arity is_bot new_unf + = new_bndr `setIdInfo` info5 + where + info1 = idInfo new_bndr `setArityInfo` new_arity + + -- Unfolding info: Note [Setting the new unfolding] + info2 = info1 `setUnfoldingInfo` new_unf + + -- Demand info: Note [Setting the demand info] + -- We also have to nuke demand info if for some reason + -- eta-expansion *reduces* the arity of the binding to less + -- than that of the strictness sig. This can happen: see Note [Arity decrease]. + info3 | isEvaldUnfolding new_unf + || (case strictnessInfo info2 of + StrictSig dmd_ty -> new_arity < dmdTypeDepth dmd_ty) + = zapDemandInfo info2 `orElse` info2 + | otherwise + = info2 + + -- Bottoming bindings: see Note [Bottoming bindings] + info4 | is_bot = info3 + `setStrictnessInfo` + mkClosedStrictSig (replicate new_arity topDmd) botDiv + `setCprInfo` mkCprSig new_arity botCpr + | otherwise = info3 + + -- Zap call arity info. We have used it by now (via + -- `tryEtaExpandRhs`), and the simplifier can invalidate this + -- information, leading to broken code later (e.g. #13479) + info5 = zapCallArityInfo info4 + + +{- Note [Arity decrease] +~~~~~~~~~~~~~~~~~~~~~~~~ +Generally speaking the arity of a binding should not decrease. But it *can* +legitimately happen because of RULES. Eg + f = g Int +where g has arity 2, will have arity 2. But if there's a rewrite rule + g Int --> h +where h has arity 1, then f's arity will decrease. Here's a real-life example, +which is in the output of Specialise: + + Rec { + $dm {Arity 2} = \d.\x. op d + {-# RULES forall d. $dm Int d = $s$dm #-} + + dInt = MkD .... opInt ... + opInt {Arity 1} = $dm dInt + + $s$dm {Arity 0} = \x. op dInt } + +Here opInt has arity 1; but when we apply the rule its arity drops to 0. +That's why Specialise goes to a little trouble to pin the right arity +on specialised functions too. + +Note [Bottoming bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose we have + let x = error "urk" + in ...(case x of <alts>)... +or + let f = \x. error (x ++ "urk") + in ...(case f "foo" of <alts>)... + +Then we'd like to drop the dead <alts> immediately. So it's good to +propagate the info that x's RHS is bottom to x's IdInfo as rapidly as +possible. + +We use tryEtaExpandRhs on every binding, and it turns ou that the +arity computation it performs (via GHC.Core.Arity.findRhsArity) already +does a simple bottoming-expression analysis. So all we need to do +is propagate that info to the binder's IdInfo. + +This showed up in #12150; see comment:16. + +Note [Setting the demand info] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If the unfolding is a value, the demand info may +go pear-shaped, so we nuke it. Example: + let x = (a,b) in + case x of (p,q) -> h p q x +Here x is certainly demanded. But after we've nuked +the case, we'll get just + let x = (a,b) in h a b x +and now x is not demanded (I'm assuming h is lazy) +This really happens. Similarly + let f = \x -> e in ...f..f... +After inlining f at some of its call sites the original binding may +(for example) be no longer strictly demanded. +The solution here is a bit ad hoc... + + +************************************************************************ +* * +\subsection[Simplify-simplExpr]{The main function: simplExpr} +* * +************************************************************************ + +The reason for this OutExprStuff stuff is that we want to float *after* +simplifying a RHS, not before. If we do so naively we get quadratic +behaviour as things float out. + +To see why it's important to do it after, consider this (real) example: + + let t = f x + in fst t +==> + let t = let a = e1 + b = e2 + in (a,b) + in fst t +==> + let a = e1 + b = e2 + t = (a,b) + in + a -- Can't inline a this round, cos it appears twice +==> + e1 + +Each of the ==> steps is a round of simplification. We'd save a +whole round if we float first. This can cascade. Consider + + let f = g d + in \x -> ...f... +==> + let f = let d1 = ..d.. in \y -> e + in \x -> ...f... +==> + let d1 = ..d.. + in \x -> ...(\y ->e)... + +Only in this second round can the \y be applied, and it +might do the same again. +-} + +simplExpr :: SimplEnv -> CoreExpr -> SimplM CoreExpr +simplExpr env (Type ty) + = do { ty' <- simplType env ty -- See Note [Avoiding space leaks in OutType] + ; return (Type ty') } + +simplExpr env expr + = simplExprC env expr (mkBoringStop expr_out_ty) + where + expr_out_ty :: OutType + expr_out_ty = substTy env (exprType expr) + -- NB: Since 'expr' is term-valued, not (Type ty), this call + -- to exprType will succeed. exprType fails on (Type ty). + +simplExprC :: SimplEnv + -> InExpr -- A term-valued expression, never (Type ty) + -> SimplCont + -> SimplM OutExpr + -- Simplify an expression, given a continuation +simplExprC env expr cont + = -- pprTrace "simplExprC" (ppr expr $$ ppr cont {- $$ ppr (seIdSubst env) -} $$ ppr (seLetFloats env) ) $ + do { (floats, expr') <- simplExprF env expr cont + ; -- pprTrace "simplExprC ret" (ppr expr $$ ppr expr') $ + -- pprTrace "simplExprC ret3" (ppr (seInScope env')) $ + -- pprTrace "simplExprC ret4" (ppr (seLetFloats env')) $ + return (wrapFloats floats expr') } + +-------------------------------------------------- +simplExprF :: SimplEnv + -> InExpr -- A term-valued expression, never (Type ty) + -> SimplCont + -> SimplM (SimplFloats, OutExpr) + +simplExprF env e cont + = {- pprTrace "simplExprF" (vcat + [ ppr e + , text "cont =" <+> ppr cont + , text "inscope =" <+> ppr (seInScope env) + , text "tvsubst =" <+> ppr (seTvSubst env) + , text "idsubst =" <+> ppr (seIdSubst env) + , text "cvsubst =" <+> ppr (seCvSubst env) + ]) $ -} + simplExprF1 env e cont + +simplExprF1 :: SimplEnv -> InExpr -> SimplCont + -> SimplM (SimplFloats, OutExpr) + +simplExprF1 _ (Type ty) _ + = pprPanic "simplExprF: type" (ppr ty) + -- simplExprF does only with term-valued expressions + -- The (Type ty) case is handled separately by simplExpr + -- and by the other callers of simplExprF + +simplExprF1 env (Var v) cont = {-#SCC "simplIdF" #-} simplIdF env v cont +simplExprF1 env (Lit lit) cont = {-#SCC "rebuild" #-} rebuild env (Lit lit) cont +simplExprF1 env (Tick t expr) cont = {-#SCC "simplTick" #-} simplTick env t expr cont +simplExprF1 env (Cast body co) cont = {-#SCC "simplCast" #-} simplCast env body co cont +simplExprF1 env (Coercion co) cont = {-#SCC "simplCoercionF" #-} simplCoercionF env co cont + +simplExprF1 env (App fun arg) cont + = {-#SCC "simplExprF1-App" #-} case arg of + Type ty -> do { -- The argument type will (almost) certainly be used + -- in the output program, so just force it now. + -- See Note [Avoiding space leaks in OutType] + arg' <- simplType env ty + + -- But use substTy, not simplType, to avoid forcing + -- the hole type; it will likely not be needed. + -- See Note [The hole type in ApplyToTy] + ; let hole' = substTy env (exprType fun) + + ; simplExprF env fun $ + ApplyToTy { sc_arg_ty = arg' + , sc_hole_ty = hole' + , sc_cont = cont } } + _ -> simplExprF env fun $ + ApplyToVal { sc_arg = arg, sc_env = env + , sc_dup = NoDup, sc_cont = cont } + +simplExprF1 env expr@(Lam {}) cont + = {-#SCC "simplExprF1-Lam" #-} + simplLam env zapped_bndrs body cont + -- The main issue here is under-saturated lambdas + -- (\x1. \x2. e) arg1 + -- Here x1 might have "occurs-once" occ-info, because occ-info + -- is computed assuming that a group of lambdas is applied + -- all at once. If there are too few args, we must zap the + -- occ-info, UNLESS the remaining binders are one-shot + where + (bndrs, body) = collectBinders expr + zapped_bndrs | need_to_zap = map zap bndrs + | otherwise = bndrs + + need_to_zap = any zappable_bndr (drop n_args bndrs) + n_args = countArgs cont + -- NB: countArgs counts all the args (incl type args) + -- and likewise drop counts all binders (incl type lambdas) + + zappable_bndr b = isId b && not (isOneShotBndr b) + zap b | isTyVar b = b + | otherwise = zapLamIdInfo b + +simplExprF1 env (Case scrut bndr _ alts) cont + = {-#SCC "simplExprF1-Case" #-} + simplExprF env scrut (Select { sc_dup = NoDup, sc_bndr = bndr + , sc_alts = alts + , sc_env = env, sc_cont = cont }) + +simplExprF1 env (Let (Rec pairs) body) cont + | Just pairs' <- joinPointBindings_maybe pairs + = {-#SCC "simplRecJoinPoin" #-} simplRecJoinPoint env pairs' body cont + + | otherwise + = {-#SCC "simplRecE" #-} simplRecE env pairs body cont + +simplExprF1 env (Let (NonRec bndr rhs) body) cont + | Type ty <- rhs -- First deal with type lets (let a = Type ty in e) + = {-#SCC "simplExprF1-NonRecLet-Type" #-} + ASSERT( isTyVar bndr ) + do { ty' <- simplType env ty + ; simplExprF (extendTvSubst env bndr ty') body cont } + + | Just (bndr', rhs') <- joinPointBinding_maybe bndr rhs + = {-#SCC "simplNonRecJoinPoint" #-} simplNonRecJoinPoint env bndr' rhs' body cont + + | otherwise + = {-#SCC "simplNonRecE" #-} simplNonRecE env bndr (rhs, env) ([], body) cont + +{- Note [Avoiding space leaks in OutType] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Since the simplifier is run for multiple iterations, we need to ensure +that any thunks in the output of one simplifier iteration are forced +by the evaluation of the next simplifier iteration. Otherwise we may +retain multiple copies of the Core program and leak a terrible amount +of memory (as in #13426). + +The simplifier is naturally strict in the entire "Expr part" of the +input Core program, because any expression may contain binders, which +we must find in order to extend the SimplEnv accordingly. But types +do not contain binders and so it is tempting to write things like + + simplExpr env (Type ty) = return (Type (substTy env ty)) -- Bad! + +This is Bad because the result includes a thunk (substTy env ty) which +retains a reference to the whole simplifier environment; and the next +simplifier iteration will not force this thunk either, because the +line above is not strict in ty. + +So instead our strategy is for the simplifier to fully evaluate +OutTypes when it emits them into the output Core program, for example + + simplExpr env (Type ty) = do { ty' <- simplType env ty -- Good + ; return (Type ty') } + +where the only difference from above is that simplType calls seqType +on the result of substTy. + +However, SimplCont can also contain OutTypes and it's not necessarily +a good idea to force types on the way in to SimplCont, because they +may end up not being used and forcing them could be a lot of wasted +work. T5631 is a good example of this. + +- For ApplyToTy's sc_arg_ty, we force the type on the way in because + the type will almost certainly appear as a type argument in the + output program. + +- For the hole types in Stop and ApplyToTy, we force the type when we + emit it into the output program, after obtaining it from + contResultType. (The hole type in ApplyToTy is only directly used + to form the result type in a new Stop continuation.) +-} + +--------------------------------- +-- Simplify a join point, adding the context. +-- Context goes *inside* the lambdas. IOW, if the join point has arity n, we do: +-- \x1 .. xn -> e => \x1 .. xn -> E[e] +-- Note that we need the arity of the join point, since e may be a lambda +-- (though this is unlikely). See Note [Join points and case-of-case]. +simplJoinRhs :: SimplEnv -> InId -> InExpr -> SimplCont + -> SimplM OutExpr +simplJoinRhs env bndr expr cont + | Just arity <- isJoinId_maybe bndr + = do { let (join_bndrs, join_body) = collectNBinders arity expr + ; (env', join_bndrs') <- simplLamBndrs env join_bndrs + ; join_body' <- simplExprC env' join_body cont + ; return $ mkLams join_bndrs' join_body' } + + | otherwise + = pprPanic "simplJoinRhs" (ppr bndr) + +--------------------------------- +simplType :: SimplEnv -> InType -> SimplM OutType + -- Kept monadic just so we can do the seqType + -- See Note [Avoiding space leaks in OutType] +simplType env ty + = -- pprTrace "simplType" (ppr ty $$ ppr (seTvSubst env)) $ + seqType new_ty `seq` return new_ty + where + new_ty = substTy env ty + +--------------------------------- +simplCoercionF :: SimplEnv -> InCoercion -> SimplCont + -> SimplM (SimplFloats, OutExpr) +simplCoercionF env co cont + = do { co' <- simplCoercion env co + ; rebuild env (Coercion co') cont } + +simplCoercion :: SimplEnv -> InCoercion -> SimplM OutCoercion +simplCoercion env co + = do { dflags <- getDynFlags + ; let opt_co = optCoercion dflags (getTCvSubst env) co + ; seqCo opt_co `seq` return opt_co } + +----------------------------------- +-- | Push a TickIt context outwards past applications and cases, as +-- long as this is a non-scoping tick, to let case and application +-- optimisations apply. + +simplTick :: SimplEnv -> Tickish Id -> InExpr -> SimplCont + -> SimplM (SimplFloats, OutExpr) +simplTick env tickish expr cont + -- A scoped tick turns into a continuation, so that we can spot + -- (scc t (\x . e)) in simplLam and eliminate the scc. If we didn't do + -- it this way, then it would take two passes of the simplifier to + -- reduce ((scc t (\x . e)) e'). + -- NB, don't do this with counting ticks, because if the expr is + -- bottom, then rebuildCall will discard the continuation. + +-- XXX: we cannot do this, because the simplifier assumes that +-- the context can be pushed into a case with a single branch. e.g. +-- scc<f> case expensive of p -> e +-- becomes +-- case expensive of p -> scc<f> e +-- +-- So I'm disabling this for now. It just means we will do more +-- simplifier iterations that necessary in some cases. + +-- | tickishScoped tickish && not (tickishCounts tickish) +-- = simplExprF env expr (TickIt tickish cont) + + -- For unscoped or soft-scoped ticks, we are allowed to float in new + -- cost, so we simply push the continuation inside the tick. This + -- has the effect of moving the tick to the outside of a case or + -- application context, allowing the normal case and application + -- optimisations to fire. + | tickish `tickishScopesLike` SoftScope + = do { (floats, expr') <- simplExprF env expr cont + ; return (floats, mkTick tickish expr') + } + + -- Push tick inside if the context looks like this will allow us to + -- do a case-of-case - see Note [case-of-scc-of-case] + | Select {} <- cont, Just expr' <- push_tick_inside + = simplExprF env expr' cont + + -- We don't want to move the tick, but we might still want to allow + -- floats to pass through with appropriate wrapping (or not, see + -- wrap_floats below) + --- | not (tickishCounts tickish) || tickishCanSplit tickish + -- = wrap_floats + + | otherwise + = no_floating_past_tick + + where + + -- Try to push tick inside a case, see Note [case-of-scc-of-case]. + push_tick_inside = + case expr0 of + Case scrut bndr ty alts + -> Just $ Case (tickScrut scrut) bndr ty (map tickAlt alts) + _other -> Nothing + where (ticks, expr0) = stripTicksTop movable (Tick tickish expr) + movable t = not (tickishCounts t) || + t `tickishScopesLike` NoScope || + tickishCanSplit t + tickScrut e = foldr mkTick e ticks + -- Alternatives get annotated with all ticks that scope in some way, + -- but we don't want to count entries. + tickAlt (c,bs,e) = (c,bs, foldr mkTick e ts_scope) + ts_scope = map mkNoCount $ + filter (not . (`tickishScopesLike` NoScope)) ticks + + no_floating_past_tick = + do { let (inc,outc) = splitCont cont + ; (floats, expr1) <- simplExprF env expr inc + ; let expr2 = wrapFloats floats expr1 + tickish' = simplTickish env tickish + ; rebuild env (mkTick tickish' expr2) outc + } + +-- Alternative version that wraps outgoing floats with the tick. This +-- results in ticks being duplicated, as we don't make any attempt to +-- eliminate the tick if we re-inline the binding (because the tick +-- semantics allows unrestricted inlining of HNFs), so I'm not doing +-- this any more. FloatOut will catch any real opportunities for +-- floating. +-- +-- wrap_floats = +-- do { let (inc,outc) = splitCont cont +-- ; (env', expr') <- simplExprF (zapFloats env) expr inc +-- ; let tickish' = simplTickish env tickish +-- ; let wrap_float (b,rhs) = (zapIdStrictness (setIdArity b 0), +-- mkTick (mkNoCount tickish') rhs) +-- -- when wrapping a float with mkTick, we better zap the Id's +-- -- strictness info and arity, because it might be wrong now. +-- ; let env'' = addFloats env (mapFloats env' wrap_float) +-- ; rebuild env'' expr' (TickIt tickish' outc) +-- } + + + simplTickish env tickish + | Breakpoint n ids <- tickish + = Breakpoint n (map (getDoneId . substId env) ids) + | otherwise = tickish + + -- Push type application and coercion inside a tick + splitCont :: SimplCont -> (SimplCont, SimplCont) + splitCont cont@(ApplyToTy { sc_cont = tail }) = (cont { sc_cont = inc }, outc) + where (inc,outc) = splitCont tail + splitCont (CastIt co c) = (CastIt co inc, outc) + where (inc,outc) = splitCont c + splitCont other = (mkBoringStop (contHoleType other), other) + + getDoneId (DoneId id) = id + getDoneId (DoneEx e _) = getIdFromTrivialExpr e -- Note [substTickish] in GHC.Core.Subst + getDoneId other = pprPanic "getDoneId" (ppr other) + +-- Note [case-of-scc-of-case] +-- It's pretty important to be able to transform case-of-case when +-- there's an SCC in the way. For example, the following comes up +-- in nofib/real/compress/Encode.hs: +-- +-- case scctick<code_string.r1> +-- case $wcode_string_r13s wild_XC w1_s137 w2_s138 l_aje +-- of _ { (# ww1_s13f, ww2_s13g, ww3_s13h #) -> +-- (ww1_s13f, ww2_s13g, ww3_s13h) +-- } +-- of _ { (ww_s12Y, ww1_s12Z, ww2_s130) -> +-- tick<code_string.f1> +-- (ww_s12Y, +-- ww1_s12Z, +-- PTTrees.PT +-- @ GHC.Types.Char @ GHC.Types.Int wild2_Xj ww2_s130 r_ajf) +-- } +-- +-- We really want this case-of-case to fire, because then the 3-tuple +-- will go away (indeed, the CPR optimisation is relying on this +-- happening). But the scctick is in the way - we need to push it +-- inside to expose the case-of-case. So we perform this +-- transformation on the inner case: +-- +-- scctick c (case e of { p1 -> e1; ...; pn -> en }) +-- ==> +-- case (scctick c e) of { p1 -> scc c e1; ...; pn -> scc c en } +-- +-- So we've moved a constant amount of work out of the scc to expose +-- the case. We only do this when the continuation is interesting: in +-- for now, it has to be another Case (maybe generalise this later). + +{- +************************************************************************ +* * +\subsection{The main rebuilder} +* * +************************************************************************ +-} + +rebuild :: SimplEnv -> OutExpr -> SimplCont -> SimplM (SimplFloats, OutExpr) +-- At this point the substitution in the SimplEnv should be irrelevant; +-- only the in-scope set matters +rebuild env expr cont + = case cont of + Stop {} -> return (emptyFloats env, expr) + TickIt t cont -> rebuild env (mkTick t expr) cont + CastIt co cont -> rebuild env (mkCast expr co) cont + -- NB: mkCast implements the (Coercion co |> g) optimisation + + Select { sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont } + -> rebuildCase (se `setInScopeFromE` env) expr bndr alts cont + + StrictArg { sc_fun = fun, sc_cont = cont } + -> rebuildCall env (fun `addValArgTo` expr) cont + StrictBind { sc_bndr = b, sc_bndrs = bs, sc_body = body + , sc_env = se, sc_cont = cont } + -> do { (floats1, env') <- simplNonRecX (se `setInScopeFromE` env) b expr + -- expr satisfies let/app since it started life + -- in a call to simplNonRecE + ; (floats2, expr') <- simplLam env' bs body cont + ; return (floats1 `addFloats` floats2, expr') } + + ApplyToTy { sc_arg_ty = ty, sc_cont = cont} + -> rebuild env (App expr (Type ty)) cont + + ApplyToVal { sc_arg = arg, sc_env = se, sc_dup = dup_flag, sc_cont = cont} + -- See Note [Avoid redundant simplification] + -> do { (_, _, arg') <- simplArg env dup_flag se arg + ; rebuild env (App expr arg') cont } + +{- +************************************************************************ +* * +\subsection{Lambdas} +* * +************************************************************************ +-} + +{- Note [Optimising reflexivity] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's important (for compiler performance) to get rid of reflexivity as soon +as it appears. See #11735, #14737, and #15019. + +In particular, we want to behave well on + + * e |> co1 |> co2 + where the two happen to cancel out entirely. That is quite common; + e.g. a newtype wrapping and unwrapping cancel. + + + * (f |> co) @t1 @t2 ... @tn x1 .. xm + Here we wil use pushCoTyArg and pushCoValArg successively, which + build up NthCo stacks. Silly to do that if co is reflexive. + +However, we don't want to call isReflexiveCo too much, because it uses +type equality which is expensive on big types (#14737 comment:7). + +A good compromise (determined experimentally) seems to be to call +isReflexiveCo + * when composing casts, and + * at the end + +In investigating this I saw missed opportunities for on-the-fly +coercion shrinkage. See #15090. +-} + + +simplCast :: SimplEnv -> InExpr -> Coercion -> SimplCont + -> SimplM (SimplFloats, OutExpr) +simplCast env body co0 cont0 + = do { co1 <- {-#SCC "simplCast-simplCoercion" #-} simplCoercion env co0 + ; cont1 <- {-#SCC "simplCast-addCoerce" #-} + if isReflCo co1 + then return cont0 -- See Note [Optimising reflexivity] + else addCoerce co1 cont0 + ; {-#SCC "simplCast-simplExprF" #-} simplExprF env body cont1 } + where + -- If the first parameter is MRefl, then simplifying revealed a + -- reflexive coercion. Omit. + addCoerceM :: MOutCoercion -> SimplCont -> SimplM SimplCont + addCoerceM MRefl cont = return cont + addCoerceM (MCo co) cont = addCoerce co cont + + addCoerce :: OutCoercion -> SimplCont -> SimplM SimplCont + addCoerce co1 (CastIt co2 cont) -- See Note [Optimising reflexivity] + | isReflexiveCo co' = return cont + | otherwise = addCoerce co' cont + where + co' = mkTransCo co1 co2 + + addCoerce co cont@(ApplyToTy { sc_arg_ty = arg_ty, sc_cont = tail }) + | Just (arg_ty', m_co') <- pushCoTyArg co arg_ty + -- N.B. As mentioned in Note [The hole type in ApplyToTy] this is + -- only needed by `sc_hole_ty` which is often not forced. + -- Consequently it is worthwhile using a lazy pattern match here to + -- avoid unnecessary coercionKind evaluations. + , let hole_ty = coercionLKind co + = {-#SCC "addCoerce-pushCoTyArg" #-} + do { tail' <- addCoerceM m_co' tail + ; return (cont { sc_arg_ty = arg_ty' + , sc_hole_ty = hole_ty -- NB! As the cast goes past, the + -- type of the hole changes (#16312) + , sc_cont = tail' }) } + + addCoerce co cont@(ApplyToVal { sc_arg = arg, sc_env = arg_se + , sc_dup = dup, sc_cont = tail }) + | Just (co1, m_co2) <- pushCoValArg co + , let new_ty = coercionRKind co1 + , not (isTypeLevPoly new_ty) -- Without this check, we get a lev-poly arg + -- See Note [Levity polymorphism invariants] in GHC.Core + -- test: typecheck/should_run/EtaExpandLevPoly + = {-#SCC "addCoerce-pushCoValArg" #-} + do { tail' <- addCoerceM m_co2 tail + ; if isReflCo co1 + then return (cont { sc_cont = tail' }) + -- Avoid simplifying if possible; + -- See Note [Avoiding exponential behaviour] + else do + { (dup', arg_se', arg') <- simplArg env dup arg_se arg + -- When we build the ApplyTo we can't mix the OutCoercion + -- 'co' with the InExpr 'arg', so we simplify + -- to make it all consistent. It's a bit messy. + -- But it isn't a common case. + -- Example of use: #995 + ; return (ApplyToVal { sc_arg = mkCast arg' co1 + , sc_env = arg_se' + , sc_dup = dup' + , sc_cont = tail' }) } } + + addCoerce co cont + | isReflexiveCo co = return cont -- Having this at the end makes a huge + -- difference in T12227, for some reason + -- See Note [Optimising reflexivity] + | otherwise = return (CastIt co cont) + +simplArg :: SimplEnv -> DupFlag -> StaticEnv -> CoreExpr + -> SimplM (DupFlag, StaticEnv, OutExpr) +simplArg env dup_flag arg_env arg + | isSimplified dup_flag + = return (dup_flag, arg_env, arg) + | otherwise + = do { arg' <- simplExpr (arg_env `setInScopeFromE` env) arg + ; return (Simplified, zapSubstEnv arg_env, arg') } + +{- +************************************************************************ +* * +\subsection{Lambdas} +* * +************************************************************************ +-} + +simplLam :: SimplEnv -> [InId] -> InExpr -> SimplCont + -> SimplM (SimplFloats, OutExpr) + +simplLam env [] body cont + = simplExprF env body cont + +simplLam env (bndr:bndrs) body (ApplyToTy { sc_arg_ty = arg_ty, sc_cont = cont }) + = do { tick (BetaReduction bndr) + ; simplLam (extendTvSubst env bndr arg_ty) bndrs body cont } + +simplLam env (bndr:bndrs) body (ApplyToVal { sc_arg = arg, sc_env = arg_se + , sc_cont = cont, sc_dup = dup }) + | isSimplified dup -- Don't re-simplify if we've simplified it once + -- See Note [Avoiding exponential behaviour] + = do { tick (BetaReduction bndr) + ; (floats1, env') <- simplNonRecX env zapped_bndr arg + ; (floats2, expr') <- simplLam env' bndrs body cont + ; return (floats1 `addFloats` floats2, expr') } + + | otherwise + = do { tick (BetaReduction bndr) + ; simplNonRecE env zapped_bndr (arg, arg_se) (bndrs, body) cont } + where + zapped_bndr -- See Note [Zap unfolding when beta-reducing] + | isId bndr = zapStableUnfolding bndr + | otherwise = bndr + + -- Discard a non-counting tick on a lambda. This may change the + -- cost attribution slightly (moving the allocation of the + -- lambda elsewhere), but we don't care: optimisation changes + -- cost attribution all the time. +simplLam env bndrs body (TickIt tickish cont) + | not (tickishCounts tickish) + = simplLam env bndrs body cont + + -- Not enough args, so there are real lambdas left to put in the result +simplLam env bndrs body cont + = do { (env', bndrs') <- simplLamBndrs env bndrs + ; body' <- simplExpr env' body + ; new_lam <- mkLam env bndrs' body' cont + ; rebuild env' new_lam cont } + +------------- +simplLamBndr :: SimplEnv -> InBndr -> SimplM (SimplEnv, OutBndr) +-- Used for lambda binders. These sometimes have unfoldings added by +-- the worker/wrapper pass that must be preserved, because they can't +-- be reconstructed from context. For example: +-- f x = case x of (a,b) -> fw a b x +-- fw a b x{=(a,b)} = ... +-- The "{=(a,b)}" is an unfolding we can't reconstruct otherwise. +simplLamBndr env bndr + | isId bndr && isFragileUnfolding old_unf -- Special case + = do { (env1, bndr1) <- simplBinder env bndr + ; unf' <- simplStableUnfolding env1 NotTopLevel Nothing bndr + old_unf (idType bndr1) + ; let bndr2 = bndr1 `setIdUnfolding` unf' + ; return (modifyInScope env1 bndr2, bndr2) } + + | otherwise + = simplBinder env bndr -- Normal case + where + old_unf = idUnfolding bndr + +simplLamBndrs :: SimplEnv -> [InBndr] -> SimplM (SimplEnv, [OutBndr]) +simplLamBndrs env bndrs = mapAccumLM simplLamBndr env bndrs + +------------------ +simplNonRecE :: SimplEnv + -> InId -- The binder, always an Id + -- Never a join point + -> (InExpr, SimplEnv) -- Rhs of binding (or arg of lambda) + -> ([InBndr], InExpr) -- Body of the let/lambda + -- \xs.e + -> SimplCont + -> SimplM (SimplFloats, OutExpr) + +-- simplNonRecE is used for +-- * non-top-level non-recursive non-join-point lets in expressions +-- * beta reduction +-- +-- simplNonRec env b (rhs, rhs_se) (bs, body) k +-- = let env in +-- cont< let b = rhs_se(rhs) in \bs.body > +-- +-- It deals with strict bindings, via the StrictBind continuation, +-- which may abort the whole process +-- +-- Precondition: rhs satisfies the let/app invariant +-- Note [Core let/app invariant] in GHC.Core +-- +-- The "body" of the binding comes as a pair of ([InId],InExpr) +-- representing a lambda; so we recurse back to simplLam +-- Why? Because of the binder-occ-info-zapping done before +-- the call to simplLam in simplExprF (Lam ...) + +simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont + | ASSERT( isId bndr && not (isJoinId bndr) ) True + , Just env' <- preInlineUnconditionally env NotTopLevel bndr rhs rhs_se + = do { tick (PreInlineUnconditionally bndr) + ; -- pprTrace "preInlineUncond" (ppr bndr <+> ppr rhs) $ + simplLam env' bndrs body cont } + + -- Deal with strict bindings + | isStrictId bndr -- Includes coercions + , sm_case_case (getMode env) + = simplExprF (rhs_se `setInScopeFromE` env) rhs + (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs, sc_body = body + , sc_env = env, sc_cont = cont, sc_dup = NoDup }) + + -- Deal with lazy bindings + | otherwise + = ASSERT( not (isTyVar bndr) ) + do { (env1, bndr1) <- simplNonRecBndr env bndr + ; (env2, bndr2) <- addBndrRules env1 bndr bndr1 Nothing + ; (floats1, env3) <- simplLazyBind env2 NotTopLevel NonRecursive bndr bndr2 rhs rhs_se + ; (floats2, expr') <- simplLam env3 bndrs body cont + ; return (floats1 `addFloats` floats2, expr') } + +------------------ +simplRecE :: SimplEnv + -> [(InId, InExpr)] + -> InExpr + -> SimplCont + -> SimplM (SimplFloats, OutExpr) + +-- simplRecE is used for +-- * non-top-level recursive lets in expressions +simplRecE env pairs body cont + = do { let bndrs = map fst pairs + ; MASSERT(all (not . isJoinId) bndrs) + ; env1 <- simplRecBndrs env bndrs + -- NB: bndrs' don't have unfoldings or rules + -- We add them as we go down + ; (floats1, env2) <- simplRecBind env1 NotTopLevel Nothing pairs + ; (floats2, expr') <- simplExprF env2 body cont + ; return (floats1 `addFloats` floats2, expr') } + +{- Note [Avoiding exponential behaviour] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +One way in which we can get exponential behaviour is if we simplify a +big expression, and the re-simplify it -- and then this happens in a +deeply-nested way. So we must be jolly careful about re-simplifying +an expression. That is why completeNonRecX does not try +preInlineUnconditionally. + +Example: + f BIG, where f has a RULE +Then + * We simplify BIG before trying the rule; but the rule does not fire + * We inline f = \x. x True + * So if we did preInlineUnconditionally we'd re-simplify (BIG True) + +However, if BIG has /not/ already been simplified, we'd /like/ to +simplify BIG True; maybe good things happen. That is why + +* simplLam has + - a case for (isSimplified dup), which goes via simplNonRecX, and + - a case for the un-simplified case, which goes via simplNonRecE + +* We go to some efforts to avoid unnecessarily simplifying ApplyToVal, + in at least two places + - In simplCast/addCoerce, where we check for isReflCo + - In rebuildCall we avoid simplifying arguments before we have to + (see Note [Trying rewrite rules]) + + +Note [Zap unfolding when beta-reducing] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Lambda-bound variables can have stable unfoldings, such as + $j = \x. \b{Unf=Just x}. e +See Note [Case binders and join points] below; the unfolding for lets +us optimise e better. However when we beta-reduce it we want to +revert to using the actual value, otherwise we can end up in the +stupid situation of + let x = blah in + let b{Unf=Just x} = y + in ...b... +Here it'd be far better to drop the unfolding and use the actual RHS. + +************************************************************************ +* * + Join points +* * +********************************************************************* -} + +{- Note [Rules and unfolding for join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose we have + + simplExpr (join j x = rhs ) cont + ( {- RULE j (p:ps) = blah -} ) + ( {- StableUnfolding j = blah -} ) + (in blah ) + +Then we will push 'cont' into the rhs of 'j'. But we should *also* push +'cont' into the RHS of + * Any RULEs for j, e.g. generated by SpecConstr + * Any stable unfolding for j, e.g. the result of an INLINE pragma + +Simplifying rules and stable-unfoldings happens a bit after +simplifying the right-hand side, so we remember whether or not it +is a join point, and what 'cont' is, in a value of type MaybeJoinCont + +#13900 was caused by forgetting to push 'cont' into the RHS +of a SpecConstr-generated RULE for a join point. +-} + +type MaybeJoinCont = Maybe SimplCont + -- Nothing => Not a join point + -- Just k => This is a join binding with continuation k + -- See Note [Rules and unfolding for join points] + +simplNonRecJoinPoint :: SimplEnv -> InId -> InExpr + -> InExpr -> SimplCont + -> SimplM (SimplFloats, OutExpr) +simplNonRecJoinPoint env bndr rhs body cont + | ASSERT( isJoinId bndr ) True + , Just env' <- preInlineUnconditionally env NotTopLevel bndr rhs env + = do { tick (PreInlineUnconditionally bndr) + ; simplExprF env' body cont } + + | otherwise + = wrapJoinCont env cont $ \ env cont -> + do { -- We push join_cont into the join RHS and the body; + -- and wrap wrap_cont around the whole thing + ; let res_ty = contResultType cont + ; (env1, bndr1) <- simplNonRecJoinBndr env res_ty bndr + ; (env2, bndr2) <- addBndrRules env1 bndr bndr1 (Just cont) + ; (floats1, env3) <- simplJoinBind env2 cont bndr bndr2 rhs env + ; (floats2, body') <- simplExprF env3 body cont + ; return (floats1 `addFloats` floats2, body') } + + +------------------ +simplRecJoinPoint :: SimplEnv -> [(InId, InExpr)] + -> InExpr -> SimplCont + -> SimplM (SimplFloats, OutExpr) +simplRecJoinPoint env pairs body cont + = wrapJoinCont env cont $ \ env cont -> + do { let bndrs = map fst pairs + res_ty = contResultType cont + ; env1 <- simplRecJoinBndrs env res_ty bndrs + -- NB: bndrs' don't have unfoldings or rules + -- We add them as we go down + ; (floats1, env2) <- simplRecBind env1 NotTopLevel (Just cont) pairs + ; (floats2, body') <- simplExprF env2 body cont + ; return (floats1 `addFloats` floats2, body') } + +-------------------- +wrapJoinCont :: SimplEnv -> SimplCont + -> (SimplEnv -> SimplCont -> SimplM (SimplFloats, OutExpr)) + -> SimplM (SimplFloats, OutExpr) +-- Deal with making the continuation duplicable if necessary, +-- and with the no-case-of-case situation. +wrapJoinCont env cont thing_inside + | contIsStop cont -- Common case; no need for fancy footwork + = thing_inside env cont + + | not (sm_case_case (getMode env)) + -- See Note [Join points with -fno-case-of-case] + = do { (floats1, expr1) <- thing_inside env (mkBoringStop (contHoleType cont)) + ; let (floats2, expr2) = wrapJoinFloatsX floats1 expr1 + ; (floats3, expr3) <- rebuild (env `setInScopeFromF` floats2) expr2 cont + ; return (floats2 `addFloats` floats3, expr3) } + + | otherwise + -- Normal case; see Note [Join points and case-of-case] + = do { (floats1, cont') <- mkDupableCont env cont + ; (floats2, result) <- thing_inside (env `setInScopeFromF` floats1) cont' + ; return (floats1 `addFloats` floats2, result) } + + +-------------------- +trimJoinCont :: Id -> Maybe JoinArity -> SimplCont -> SimplCont +-- Drop outer context from join point invocation (jump) +-- See Note [Join points and case-of-case] + +trimJoinCont _ Nothing cont + = cont -- Not a jump +trimJoinCont var (Just arity) cont + = trim arity cont + where + trim 0 cont@(Stop {}) + = cont + trim 0 cont + = mkBoringStop (contResultType cont) + trim n cont@(ApplyToVal { sc_cont = k }) + = cont { sc_cont = trim (n-1) k } + trim n cont@(ApplyToTy { sc_cont = k }) + = cont { sc_cont = trim (n-1) k } -- join arity counts types! + trim _ cont + = pprPanic "completeCall" $ ppr var $$ ppr cont + + +{- Note [Join points and case-of-case] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we perform the case-of-case transform (or otherwise push continuations +inward), we want to treat join points specially. Since they're always +tail-called and we want to maintain this invariant, we can do this (for any +evaluation context E): + + E[join j = e + in case ... of + A -> jump j 1 + B -> jump j 2 + C -> f 3] + + --> + + join j = E[e] + in case ... of + A -> jump j 1 + B -> jump j 2 + C -> E[f 3] + +As is evident from the example, there are two components to this behavior: + + 1. When entering the RHS of a join point, copy the context inside. + 2. When a join point is invoked, discard the outer context. + +We need to be very careful here to remain consistent---neither part is +optional! + +We need do make the continuation E duplicable (since we are duplicating it) +with mkDupableCont. + + +Note [Join points with -fno-case-of-case] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Supose case-of-case is switched off, and we are simplifying + + case (join j x = <j-rhs> in + case y of + A -> j 1 + B -> j 2 + C -> e) of <outer-alts> + +Usually, we'd push the outer continuation (case . of <outer-alts>) into +both the RHS and the body of the join point j. But since we aren't doing +case-of-case we may then end up with this totally bogus result + + join x = case <j-rhs> of <outer-alts> in + case (case y of + A -> j 1 + B -> j 2 + C -> e) of <outer-alts> + +This would be OK in the language of the paper, but not in GHC: j is no longer +a join point. We can only do the "push continuation into the RHS of the +join point j" if we also push the continuation right down to the /jumps/ to +j, so that it can evaporate there. If we are doing case-of-case, we'll get to + + join x = case <j-rhs> of <outer-alts> in + case y of + A -> j 1 + B -> j 2 + C -> case e of <outer-alts> + +which is great. + +Bottom line: if case-of-case is off, we must stop pushing the continuation +inwards altogether at any join point. Instead simplify the (join ... in ...) +with a Stop continuation, and wrap the original continuation around the +outside. Surprisingly tricky! + + +************************************************************************ +* * + Variables +* * +************************************************************************ +-} + +simplVar :: SimplEnv -> InVar -> SimplM OutExpr +-- Look up an InVar in the environment +simplVar env var + | isTyVar var = return (Type (substTyVar env var)) + | isCoVar var = return (Coercion (substCoVar env var)) + | otherwise + = case substId env var of + ContEx tvs cvs ids e -> simplExpr (setSubstEnv env tvs cvs ids) e + DoneId var1 -> return (Var var1) + DoneEx e _ -> return e + +simplIdF :: SimplEnv -> InId -> SimplCont -> SimplM (SimplFloats, OutExpr) +simplIdF env var cont + = case substId env var of + ContEx tvs cvs ids e -> simplExprF (setSubstEnv env tvs cvs ids) e cont + -- Don't trim; haven't already simplified e, + -- so the cont is not embodied in e + + DoneId var1 -> completeCall env var1 (trimJoinCont var (isJoinId_maybe var1) cont) + + DoneEx e mb_join -> simplExprF (zapSubstEnv env) e (trimJoinCont var mb_join cont) + -- Note [zapSubstEnv] + -- The template is already simplified, so don't re-substitute. + -- This is VITAL. Consider + -- let x = e in + -- let y = \z -> ...x... in + -- \ x -> ...y... + -- We'll clone the inner \x, adding x->x' in the id_subst + -- Then when we inline y, we must *not* replace x by x' in + -- the inlined copy!! + +--------------------------------------------------------- +-- Dealing with a call site + +completeCall :: SimplEnv -> OutId -> SimplCont -> SimplM (SimplFloats, OutExpr) +completeCall env var cont + | Just expr <- callSiteInline dflags var active_unf + lone_variable arg_infos interesting_cont + -- Inline the variable's RHS + = do { checkedTick (UnfoldingDone var) + ; dump_inline expr cont + ; simplExprF (zapSubstEnv env) expr cont } + + | otherwise + -- Don't inline; instead rebuild the call + = do { rule_base <- getSimplRules + ; let info = mkArgInfo env var (getRules rule_base var) + n_val_args call_cont + ; rebuildCall env info cont } + + where + dflags = seDynFlags env + (lone_variable, arg_infos, call_cont) = contArgs cont + n_val_args = length arg_infos + interesting_cont = interestingCallContext env call_cont + active_unf = activeUnfolding (getMode env) var + + log_inlining doc + = liftIO $ dumpAction dflags + (mkUserStyle dflags alwaysQualify AllTheWay) + (dumpOptionsFromFlag Opt_D_dump_inlinings) + "" FormatText doc + + dump_inline unfolding cont + | not (dopt Opt_D_dump_inlinings dflags) = return () + | not (dopt Opt_D_verbose_core2core dflags) + = when (isExternalName (idName var)) $ + log_inlining $ + sep [text "Inlining done:", nest 4 (ppr var)] + | otherwise + = liftIO $ log_inlining $ + sep [text "Inlining done: " <> ppr var, + nest 4 (vcat [text "Inlined fn: " <+> nest 2 (ppr unfolding), + text "Cont: " <+> ppr cont])] + +rebuildCall :: SimplEnv + -> ArgInfo + -> SimplCont + -> SimplM (SimplFloats, OutExpr) +-- We decided not to inline, so +-- - simplify the arguments +-- - try rewrite rules +-- - and rebuild + +---------- Bottoming applications -------------- +rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args, ai_strs = [] }) cont + -- When we run out of strictness args, it means + -- that the call is definitely bottom; see GHC.Core.Op.Simplify.Utils.mkArgInfo + -- Then we want to discard the entire strict continuation. E.g. + -- * case (error "hello") of { ... } + -- * (error "Hello") arg + -- * f (error "Hello") where f is strict + -- etc + -- Then, especially in the first of these cases, we'd like to discard + -- the continuation, leaving just the bottoming expression. But the + -- type might not be right, so we may have to add a coerce. + | not (contIsTrivial cont) -- Only do this if there is a non-trivial + -- continuation to discard, else we do it + -- again and again! + = seqType cont_ty `seq` -- See Note [Avoiding space leaks in OutType] + return (emptyFloats env, castBottomExpr res cont_ty) + where + res = argInfoExpr fun rev_args + cont_ty = contResultType cont + +---------- Try rewrite RULES -------------- +-- See Note [Trying rewrite rules] +rebuildCall env info@(ArgInfo { ai_fun = fun, ai_args = rev_args + , ai_rules = Just (nr_wanted, rules) }) cont + | nr_wanted == 0 || no_more_args + , let info' = info { ai_rules = Nothing } + = -- We've accumulated a simplified call in <fun,rev_args> + -- so try rewrite rules; see Note [RULEs apply to simplified arguments] + -- See also Note [Rules for recursive functions] + do { mb_match <- tryRules env rules fun (reverse rev_args) cont + ; case mb_match of + Just (env', rhs, cont') -> simplExprF env' rhs cont' + Nothing -> rebuildCall env info' cont } + where + no_more_args = case cont of + ApplyToTy {} -> False + ApplyToVal {} -> False + _ -> True + + +---------- Simplify applications and casts -------------- +rebuildCall env info (CastIt co cont) + = rebuildCall env (addCastTo info co) cont + +rebuildCall env info (ApplyToTy { sc_arg_ty = arg_ty, sc_cont = cont }) + = rebuildCall env (addTyArgTo info arg_ty) cont + +rebuildCall env info@(ArgInfo { ai_encl = encl_rules, ai_type = fun_ty + , ai_strs = str:strs, ai_discs = disc:discs }) + (ApplyToVal { sc_arg = arg, sc_env = arg_se + , sc_dup = dup_flag, sc_cont = cont }) + | isSimplified dup_flag -- See Note [Avoid redundant simplification] + = rebuildCall env (addValArgTo info' arg) cont + + | str -- Strict argument + , sm_case_case (getMode env) + = -- pprTrace "Strict Arg" (ppr arg $$ ppr (seIdSubst env) $$ ppr (seInScope env)) $ + simplExprF (arg_se `setInScopeFromE` env) arg + (StrictArg { sc_fun = info', sc_cci = cci_strict + , sc_dup = Simplified, sc_cont = cont }) + -- Note [Shadowing] + + | otherwise -- Lazy argument + -- DO NOT float anything outside, hence simplExprC + -- There is no benefit (unlike in a let-binding), and we'd + -- have to be very careful about bogus strictness through + -- floating a demanded let. + = do { arg' <- simplExprC (arg_se `setInScopeFromE` env) arg + (mkLazyArgStop arg_ty cci_lazy) + ; rebuildCall env (addValArgTo info' arg') cont } + where + info' = info { ai_strs = strs, ai_discs = discs } + arg_ty = funArgTy fun_ty + + -- Use this for lazy arguments + cci_lazy | encl_rules = RuleArgCtxt + | disc > 0 = DiscArgCtxt -- Be keener here + | otherwise = BoringCtxt -- Nothing interesting + + -- ..and this for strict arguments + cci_strict | encl_rules = RuleArgCtxt + | disc > 0 = DiscArgCtxt + | otherwise = RhsCtxt + -- Why RhsCtxt? if we see f (g x) (h x), and f is strict, we + -- want to be a bit more eager to inline g, because it may + -- expose an eval (on x perhaps) that can be eliminated or + -- shared. I saw this in nofib 'boyer2', RewriteFuns.onewayunify1 + -- It's worth an 18% improvement in allocation for this + -- particular benchmark; 5% on 'mate' and 1.3% on 'multiplier' + +---------- No further useful info, revert to generic rebuild ------------ +rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args }) cont + = rebuild env (argInfoExpr fun rev_args) cont + +{- Note [Trying rewrite rules] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider an application (f e1 e2 e3) where the e1,e2,e3 are not yet +simplified. We want to simplify enough arguments to allow the rules +to apply, but it's more efficient to avoid simplifying e2,e3 if e1 alone +is sufficient. Example: class ops + (+) dNumInt e2 e3 +If we rewrite ((+) dNumInt) to plusInt, we can take advantage of the +latter's strictness when simplifying e2, e3. Moreover, suppose we have + RULE f Int = \x. x True + +Then given (f Int e1) we rewrite to + (\x. x True) e1 +without simplifying e1. Now we can inline x into its unique call site, +and absorb the True into it all in the same pass. If we simplified +e1 first, we couldn't do that; see Note [Avoiding exponential behaviour]. + +So we try to apply rules if either + (a) no_more_args: we've run out of argument that the rules can "see" + (b) nr_wanted: none of the rules wants any more arguments + + +Note [RULES apply to simplified arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's very desirable to try RULES once the arguments have been simplified, because +doing so ensures that rule cascades work in one pass. Consider + {-# RULES g (h x) = k x + f (k x) = x #-} + ...f (g (h x))... +Then we want to rewrite (g (h x)) to (k x) and only then try f's rules. If +we match f's rules against the un-simplified RHS, it won't match. This +makes a particularly big difference when superclass selectors are involved: + op ($p1 ($p2 (df d))) +We want all this to unravel in one sweep. + +Note [Avoid redundant simplification] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Because RULES apply to simplified arguments, there's a danger of repeatedly +simplifying already-simplified arguments. An important example is that of + (>>=) d e1 e2 +Here e1, e2 are simplified before the rule is applied, but don't really +participate in the rule firing. So we mark them as Simplified to avoid +re-simplifying them. + +Note [Shadowing] +~~~~~~~~~~~~~~~~ +This part of the simplifier may break the no-shadowing invariant +Consider + f (...(\a -> e)...) (case y of (a,b) -> e') +where f is strict in its second arg +If we simplify the innermost one first we get (...(\a -> e)...) +Simplifying the second arg makes us float the case out, so we end up with + case y of (a,b) -> f (...(\a -> e)...) e' +So the output does not have the no-shadowing invariant. However, there is +no danger of getting name-capture, because when the first arg was simplified +we used an in-scope set that at least mentioned all the variables free in its +static environment, and that is enough. + +We can't just do innermost first, or we'd end up with a dual problem: + case x of (a,b) -> f e (...(\a -> e')...) + +I spent hours trying to recover the no-shadowing invariant, but I just could +not think of an elegant way to do it. The simplifier is already knee-deep in +continuations. We have to keep the right in-scope set around; AND we have +to get the effect that finding (error "foo") in a strict arg position will +discard the entire application and replace it with (error "foo"). Getting +all this at once is TOO HARD! + + +************************************************************************ +* * + Rewrite rules +* * +************************************************************************ +-} + +tryRules :: SimplEnv -> [CoreRule] + -> Id -> [ArgSpec] + -> SimplCont + -> SimplM (Maybe (SimplEnv, CoreExpr, SimplCont)) + +tryRules env rules fn args call_cont + | null rules + = return Nothing + +{- Disabled until we fix #8326 + | fn `hasKey` tagToEnumKey -- See Note [Optimising tagToEnum#] + , [_type_arg, val_arg] <- args + , Select dup bndr ((_,[],rhs1) : rest_alts) se cont <- call_cont + , isDeadBinder bndr + = do { let enum_to_tag :: CoreAlt -> CoreAlt + -- Takes K -> e into tagK# -> e + -- where tagK# is the tag of constructor K + enum_to_tag (DataAlt con, [], rhs) + = ASSERT( isEnumerationTyCon (dataConTyCon con) ) + (LitAlt tag, [], rhs) + where + tag = mkLitInt dflags (toInteger (dataConTag con - fIRST_TAG)) + enum_to_tag alt = pprPanic "tryRules: tagToEnum" (ppr alt) + + new_alts = (DEFAULT, [], rhs1) : map enum_to_tag rest_alts + new_bndr = setIdType bndr intPrimTy + -- The binder is dead, but should have the right type + ; return (Just (val_arg, Select dup new_bndr new_alts se cont)) } +-} + + | Just (rule, rule_rhs) <- lookupRule dflags (getUnfoldingInRuleMatch env) + (activeRule (getMode env)) fn + (argInfoAppArgs args) rules + -- Fire a rule for the function + = do { checkedTick (RuleFired (ruleName rule)) + ; let cont' = pushSimplifiedArgs zapped_env + (drop (ruleArity rule) args) + call_cont + -- (ruleArity rule) says how + -- many args the rule consumed + + occ_anald_rhs = occurAnalyseExpr rule_rhs + -- See Note [Occurrence-analyse after rule firing] + ; dump rule rule_rhs + ; return (Just (zapped_env, occ_anald_rhs, cont')) } + -- The occ_anald_rhs and cont' are all Out things + -- hence zapping the environment + + | otherwise -- No rule fires + = do { nodump -- This ensures that an empty file is written + ; return Nothing } + + where + dflags = seDynFlags env + zapped_env = zapSubstEnv env -- See Note [zapSubstEnv] + + printRuleModule rule + = parens (maybe (text "BUILTIN") + (pprModuleName . moduleName) + (ruleModule rule)) + + dump rule rule_rhs + | dopt Opt_D_dump_rule_rewrites dflags + = log_rule dflags Opt_D_dump_rule_rewrites "Rule fired" $ vcat + [ text "Rule:" <+> ftext (ruleName rule) + , text "Module:" <+> printRuleModule rule + , text "Before:" <+> hang (ppr fn) 2 (sep (map ppr args)) + , text "After: " <+> pprCoreExpr rule_rhs + , text "Cont: " <+> ppr call_cont ] + + | dopt Opt_D_dump_rule_firings dflags + = log_rule dflags Opt_D_dump_rule_firings "Rule fired:" $ + ftext (ruleName rule) + <+> printRuleModule rule + + | otherwise + = return () + + nodump + | dopt Opt_D_dump_rule_rewrites dflags + = liftIO $ do + touchDumpFile dflags (dumpOptionsFromFlag Opt_D_dump_rule_rewrites) + + | dopt Opt_D_dump_rule_firings dflags + = liftIO $ do + touchDumpFile dflags (dumpOptionsFromFlag Opt_D_dump_rule_firings) + + | otherwise + = return () + + log_rule dflags flag hdr details + = liftIO $ do + let sty = mkDumpStyle dflags alwaysQualify + dumpAction dflags sty (dumpOptionsFromFlag flag) "" FormatText $ + sep [text hdr, nest 4 details] + +trySeqRules :: SimplEnv + -> OutExpr -> InExpr -- Scrutinee and RHS + -> SimplCont + -> SimplM (Maybe (SimplEnv, CoreExpr, SimplCont)) +-- See Note [User-defined RULES for seq] +trySeqRules in_env scrut rhs cont + = do { rule_base <- getSimplRules + ; tryRules in_env (getRules rule_base seqId) seqId out_args rule_cont } + where + no_cast_scrut = drop_casts scrut + scrut_ty = exprType no_cast_scrut + seq_id_ty = idType seqId + res1_ty = piResultTy seq_id_ty rhs_rep + res2_ty = piResultTy res1_ty scrut_ty + rhs_ty = substTy in_env (exprType rhs) + rhs_rep = getRuntimeRep rhs_ty + out_args = [ TyArg { as_arg_ty = rhs_rep + , as_hole_ty = seq_id_ty } + , TyArg { as_arg_ty = scrut_ty + , as_hole_ty = res1_ty } + , TyArg { as_arg_ty = rhs_ty + , as_hole_ty = res2_ty } + , ValArg no_cast_scrut] + rule_cont = ApplyToVal { sc_dup = NoDup, sc_arg = rhs + , sc_env = in_env, sc_cont = cont } + -- Lazily evaluated, so we don't do most of this + + drop_casts (Cast e _) = drop_casts e + drop_casts e = e + +{- Note [User-defined RULES for seq] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Given + case (scrut |> co) of _ -> rhs +look for rules that match the expression + seq @t1 @t2 scrut +where scrut :: t1 + rhs :: t2 + +If you find a match, rewrite it, and apply to 'rhs'. + +Notice that we can simply drop casts on the fly here, which +makes it more likely that a rule will match. + +See Note [User-defined RULES for seq] in MkId. + +Note [Occurrence-analyse after rule firing] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +After firing a rule, we occurrence-analyse the instantiated RHS before +simplifying it. Usually this doesn't make much difference, but it can +be huge. Here's an example (simplCore/should_compile/T7785) + + map f (map f (map f xs) + += -- Use build/fold form of map, twice + map f (build (\cn. foldr (mapFB c f) n + (build (\cn. foldr (mapFB c f) n xs)))) + += -- Apply fold/build rule + map f (build (\cn. (\cn. foldr (mapFB c f) n xs) (mapFB c f) n)) + += -- Beta-reduce + -- Alas we have no occurrence-analysed, so we don't know + -- that c is used exactly once + map f (build (\cn. let c1 = mapFB c f in + foldr (mapFB c1 f) n xs)) + += -- Use mapFB rule: mapFB (mapFB c f) g = mapFB c (f.g) + -- We can do this because (mapFB c n) is a PAP and hence expandable + map f (build (\cn. let c1 = mapFB c n in + foldr (mapFB c (f.f)) n x)) + +This is not too bad. But now do the same with the outer map, and +we get another use of mapFB, and t can interact with /both/ remaining +mapFB calls in the above expression. This is stupid because actually +that 'c1' binding is dead. The outer map introduces another c2. If +there is a deep stack of maps we get lots of dead bindings, and lots +of redundant work as we repeatedly simplify the result of firing rules. + +The easy thing to do is simply to occurrence analyse the result of +the rule firing. Note that this occ-anals not only the RHS of the +rule, but also the function arguments, which by now are OutExprs. +E.g. + RULE f (g x) = x+1 + +Call f (g BIG) --> (\x. x+1) BIG + +The rule binders are lambda-bound and applied to the OutExpr arguments +(here BIG) which lack all internal occurrence info. + +Is this inefficient? Not really: we are about to walk over the result +of the rule firing to simplify it, so occurrence analysis is at most +a constant factor. + +Possible improvement: occ-anal the rules when putting them in the +database; and in the simplifier just occ-anal the OutExpr arguments. +But that's more complicated and the rule RHS is usually tiny; so I'm +just doing the simple thing. + +Historical note: previously we did occ-anal the rules in Rule.hs, +but failed to occ-anal the OutExpr arguments, which led to the +nasty performance problem described above. + + +Note [Optimising tagToEnum#] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have an enumeration data type: + + data Foo = A | B | C + +Then we want to transform + + case tagToEnum# x of ==> case x of + A -> e1 DEFAULT -> e1 + B -> e2 1# -> e2 + C -> e3 2# -> e3 + +thereby getting rid of the tagToEnum# altogether. If there was a DEFAULT +alternative we retain it (remember it comes first). If not the case must +be exhaustive, and we reflect that in the transformed version by adding +a DEFAULT. Otherwise Lint complains that the new case is not exhaustive. +See #8317. + +Note [Rules for recursive functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +You might think that we shouldn't apply rules for a loop breaker: +doing so might give rise to an infinite loop, because a RULE is +rather like an extra equation for the function: + RULE: f (g x) y = x+y + Eqn: f a y = a-y + +But it's too drastic to disable rules for loop breakers. +Even the foldr/build rule would be disabled, because foldr +is recursive, and hence a loop breaker: + foldr k z (build g) = g k z +So it's up to the programmer: rules can cause divergence + + +************************************************************************ +* * + Rebuilding a case expression +* * +************************************************************************ + +Note [Case elimination] +~~~~~~~~~~~~~~~~~~~~~~~ +The case-elimination transformation discards redundant case expressions. +Start with a simple situation: + + case x# of ===> let y# = x# in e + y# -> e + +(when x#, y# are of primitive type, of course). We can't (in general) +do this for algebraic cases, because we might turn bottom into +non-bottom! + +The code in GHC.Core.Op.Simplify.Utils.prepareAlts has the effect of generalise +this idea to look for a case where we're scrutinising a variable, and we know +that only the default case can match. For example: + + case x of + 0# -> ... + DEFAULT -> ...(case x of + 0# -> ... + DEFAULT -> ...) ... + +Here the inner case is first trimmed to have only one alternative, the +DEFAULT, after which it's an instance of the previous case. This +really only shows up in eliminating error-checking code. + +Note that GHC.Core.Op.Simplify.Utils.mkCase combines identical RHSs. So + + case e of ===> case e of DEFAULT -> r + True -> r + False -> r + +Now again the case may be eliminated by the CaseElim transformation. +This includes things like (==# a# b#)::Bool so that we simplify + case ==# a# b# of { True -> x; False -> x } +to just + x +This particular example shows up in default methods for +comparison operations (e.g. in (>=) for Int.Int32) + +Note [Case to let transformation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If a case over a lifted type has a single alternative, and is being +used as a strict 'let' (all isDeadBinder bndrs), we may want to do +this transformation: + + case e of r ===> let r = e in ...r... + _ -> ...r... + +We treat the unlifted and lifted cases separately: + +* Unlifted case: 'e' satisfies exprOkForSpeculation + (ok-for-spec is needed to satisfy the let/app invariant). + This turns case a +# b of r -> ...r... + into let r = a +# b in ...r... + and thence .....(a +# b).... + + However, if we have + case indexArray# a i of r -> ...r... + we might like to do the same, and inline the (indexArray# a i). + But indexArray# is not okForSpeculation, so we don't build a let + in rebuildCase (lest it get floated *out*), so the inlining doesn't + happen either. Annoying. + +* Lifted case: we need to be sure that the expression is already + evaluated (exprIsHNF). If it's not already evaluated + - we risk losing exceptions, divergence or + user-specified thunk-forcing + - even if 'e' is guaranteed to converge, we don't want to + create a thunk (call by need) instead of evaluating it + right away (call by value) + + However, we can turn the case into a /strict/ let if the 'r' is + used strictly in the body. Then we won't lose divergence; and + we won't build a thunk because the let is strict. + See also Note [Case-to-let for strictly-used binders] + + NB: absentError satisfies exprIsHNF: see Note [aBSENT_ERROR_ID] in GHC.Core.Make. + We want to turn + case (absentError "foo") of r -> ...MkT r... + into + let r = absentError "foo" in ...MkT r... + + +Note [Case-to-let for strictly-used binders] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have this: + case <scrut> of r { _ -> ..r.. } + +where 'r' is used strictly in (..r..), we can safely transform to + let r = <scrut> in ...r... + +This is a Good Thing, because 'r' might be dead (if the body just +calls error), or might be used just once (in which case it can be +inlined); or we might be able to float the let-binding up or down. +E.g. #15631 has an example. + +Note that this can change the error behaviour. For example, we might +transform + case x of { _ -> error "bad" } + --> error "bad" +which is might be puzzling if 'x' currently lambda-bound, but later gets +let-bound to (error "good"). + +Nevertheless, the paper "A semantics for imprecise exceptions" allows +this transformation. If you want to fix the evaluation order, use +'pseq'. See #8900 for an example where the loss of this +transformation bit us in practice. + +See also Note [Empty case alternatives] in GHC.Core. + +Historical notes + +There have been various earlier versions of this patch: + +* By Sept 18 the code looked like this: + || scrut_is_demanded_var scrut + + scrut_is_demanded_var :: CoreExpr -> Bool + scrut_is_demanded_var (Cast s _) = scrut_is_demanded_var s + scrut_is_demanded_var (Var _) = isStrictDmd (idDemandInfo case_bndr) + scrut_is_demanded_var _ = False + + This only fired if the scrutinee was a /variable/, which seems + an unnecessary restriction. So in #15631 I relaxed it to allow + arbitrary scrutinees. Less code, less to explain -- but the change + had 0.00% effect on nofib. + +* Previously, in Jan 13 the code looked like this: + || case_bndr_evald_next rhs + + case_bndr_evald_next :: CoreExpr -> Bool + -- See Note [Case binder next] + case_bndr_evald_next (Var v) = v == case_bndr + case_bndr_evald_next (Cast e _) = case_bndr_evald_next e + case_bndr_evald_next (App e _) = case_bndr_evald_next e + case_bndr_evald_next (Case e _ _ _) = case_bndr_evald_next e + case_bndr_evald_next _ = False + + This patch was part of fixing #7542. See also + Note [Eta reduction of an eval'd function] in GHC.Core.Utils.) + + +Further notes about case elimination +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider: test :: Integer -> IO () + test = print + +Turns out that this compiles to: + Print.test + = \ eta :: Integer + eta1 :: Void# -> + case PrelNum.< eta PrelNum.zeroInteger of wild { __DEFAULT -> + case hPutStr stdout + (PrelNum.jtos eta ($w[] @ Char)) + eta1 + of wild1 { (# new_s, a4 #) -> PrelIO.lvl23 new_s }} + +Notice the strange '<' which has no effect at all. This is a funny one. +It started like this: + +f x y = if x < 0 then jtos x + else if y==0 then "" else jtos x + +At a particular call site we have (f v 1). So we inline to get + + if v < 0 then jtos x + else if 1==0 then "" else jtos x + +Now simplify the 1==0 conditional: + + if v<0 then jtos v else jtos v + +Now common-up the two branches of the case: + + case (v<0) of DEFAULT -> jtos v + +Why don't we drop the case? Because it's strict in v. It's technically +wrong to drop even unnecessary evaluations, and in practice they +may be a result of 'seq' so we *definitely* don't want to drop those. +I don't really know how to improve this situation. + + +Note [FloatBinds from constructor wrappers] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have FloatBinds coming from the constructor wrapper +(as in Note [exprIsConApp_maybe on data constructors with wrappers]), +we cannot float past them. We'd need to float the FloatBind +together with the simplify floats, unfortunately the +simplifier doesn't have case-floats. The simplest thing we can +do is to wrap all the floats here. The next iteration of the +simplifier will take care of all these cases and lets. + +Given data T = MkT !Bool, this allows us to simplify +case $WMkT b of { MkT x -> f x } +to +case b of { b' -> f b' }. + +We could try and be more clever (like maybe wfloats only contain +let binders, so we could float them). But the need for the +extra complication is not clear. +-} + +--------------------------------------------------------- +-- Eliminate the case if possible + +rebuildCase, reallyRebuildCase + :: SimplEnv + -> OutExpr -- Scrutinee + -> InId -- Case binder + -> [InAlt] -- Alternatives (increasing order) + -> SimplCont + -> SimplM (SimplFloats, OutExpr) + +-------------------------------------------------- +-- 1. Eliminate the case if there's a known constructor +-------------------------------------------------- + +rebuildCase env scrut case_bndr alts cont + | Lit lit <- scrut -- No need for same treatment as constructors + -- because literals are inlined more vigorously + , not (litIsLifted lit) + = do { tick (KnownBranch case_bndr) + ; case findAlt (LitAlt lit) alts of + Nothing -> missingAlt env case_bndr alts cont + Just (_, bs, rhs) -> simple_rhs env [] scrut bs rhs } + + | Just (in_scope', wfloats, con, ty_args, other_args) + <- exprIsConApp_maybe (getUnfoldingInRuleMatch env) scrut + -- Works when the scrutinee is a variable with a known unfolding + -- as well as when it's an explicit constructor application + , let env0 = setInScopeSet env in_scope' + = do { tick (KnownBranch case_bndr) + ; case findAlt (DataAlt con) alts of + Nothing -> missingAlt env0 case_bndr alts cont + Just (DEFAULT, bs, rhs) -> let con_app = Var (dataConWorkId con) + `mkTyApps` ty_args + `mkApps` other_args + in simple_rhs env0 wfloats con_app bs rhs + Just (_, bs, rhs) -> knownCon env0 scrut wfloats con ty_args other_args + case_bndr bs rhs cont + } + where + simple_rhs env wfloats scrut' bs rhs = + ASSERT( null bs ) + do { (floats1, env') <- simplNonRecX env case_bndr scrut' + -- scrut is a constructor application, + -- hence satisfies let/app invariant + ; (floats2, expr') <- simplExprF env' rhs cont + ; case wfloats of + [] -> return (floats1 `addFloats` floats2, expr') + _ -> return + -- See Note [FloatBinds from constructor wrappers] + ( emptyFloats env, + GHC.Core.Make.wrapFloats wfloats $ + wrapFloats (floats1 `addFloats` floats2) expr' )} + + +-------------------------------------------------- +-- 2. Eliminate the case if scrutinee is evaluated +-------------------------------------------------- + +rebuildCase env scrut case_bndr alts@[(_, bndrs, rhs)] cont + -- See if we can get rid of the case altogether + -- See Note [Case elimination] + -- mkCase made sure that if all the alternatives are equal, + -- then there is now only one (DEFAULT) rhs + + -- 2a. Dropping the case altogether, if + -- a) it binds nothing (so it's really just a 'seq') + -- b) evaluating the scrutinee has no side effects + | is_plain_seq + , exprOkForSideEffects scrut + -- The entire case is dead, so we can drop it + -- if the scrutinee converges without having imperative + -- side effects or raising a Haskell exception + -- See Note [PrimOp can_fail and has_side_effects] in PrimOp + = simplExprF env rhs cont + + -- 2b. Turn the case into a let, if + -- a) it binds only the case-binder + -- b) unlifted case: the scrutinee is ok-for-speculation + -- lifted case: the scrutinee is in HNF (or will later be demanded) + -- See Note [Case to let transformation] + | all_dead_bndrs + , doCaseToLet scrut case_bndr + = do { tick (CaseElim case_bndr) + ; (floats1, env') <- simplNonRecX env case_bndr scrut + ; (floats2, expr') <- simplExprF env' rhs cont + ; return (floats1 `addFloats` floats2, expr') } + + -- 2c. Try the seq rules if + -- a) it binds only the case binder + -- b) a rule for seq applies + -- See Note [User-defined RULES for seq] in MkId + | is_plain_seq + = do { mb_rule <- trySeqRules env scrut rhs cont + ; case mb_rule of + Just (env', rule_rhs, cont') -> simplExprF env' rule_rhs cont' + Nothing -> reallyRebuildCase env scrut case_bndr alts cont } + where + all_dead_bndrs = all isDeadBinder bndrs -- bndrs are [InId] + is_plain_seq = all_dead_bndrs && isDeadBinder case_bndr -- Evaluation *only* for effect + +rebuildCase env scrut case_bndr alts cont + = reallyRebuildCase env scrut case_bndr alts cont + + +doCaseToLet :: OutExpr -- Scrutinee + -> InId -- Case binder + -> Bool +-- The situation is case scrut of b { DEFAULT -> body } +-- Can we transform thus? let { b = scrut } in body +doCaseToLet scrut case_bndr + | isTyCoVar case_bndr -- Respect GHC.Core + = isTyCoArg scrut -- Note [Core type and coercion invariant] + + | isUnliftedType (idType case_bndr) + = exprOkForSpeculation scrut + + | otherwise -- Scrut has a lifted type + = exprIsHNF scrut + || isStrictDmd (idDemandInfo case_bndr) + -- See Note [Case-to-let for strictly-used binders] + +-------------------------------------------------- +-- 3. Catch-all case +-------------------------------------------------- + +reallyRebuildCase env scrut case_bndr alts cont + | not (sm_case_case (getMode env)) + = do { case_expr <- simplAlts env scrut case_bndr alts + (mkBoringStop (contHoleType cont)) + ; rebuild env case_expr cont } + + | otherwise + = do { (floats, cont') <- mkDupableCaseCont env alts cont + ; case_expr <- simplAlts (env `setInScopeFromF` floats) + scrut case_bndr alts cont' + ; return (floats, case_expr) } + +{- +simplCaseBinder checks whether the scrutinee is a variable, v. If so, +try to eliminate uses of v in the RHSs in favour of case_bndr; that +way, there's a chance that v will now only be used once, and hence +inlined. + +Historical note: we use to do the "case binder swap" in the Simplifier +so there were additional complications if the scrutinee was a variable. +Now the binder-swap stuff is done in the occurrence analyser; see +OccurAnal Note [Binder swap]. + +Note [knownCon occ info] +~~~~~~~~~~~~~~~~~~~~~~~~ +If the case binder is not dead, then neither are the pattern bound +variables: + case <any> of x { (a,b) -> + case x of { (p,q) -> p } } +Here (a,b) both look dead, but come alive after the inner case is eliminated. +The point is that we bring into the envt a binding + let x = (a,b) +after the outer case, and that makes (a,b) alive. At least we do unless +the case binder is guaranteed dead. + +Note [Case alternative occ info] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we are simply reconstructing a case (the common case), we always +zap the occurrence info on the binders in the alternatives. Even +if the case binder is dead, the scrutinee is usually a variable, and *that* +can bring the case-alternative binders back to life. +See Note [Add unfolding for scrutinee] + +Note [Improving seq] +~~~~~~~~~~~~~~~~~~~ +Consider + type family F :: * -> * + type instance F Int = Int + +We'd like to transform + case e of (x :: F Int) { DEFAULT -> rhs } +===> + case e `cast` co of (x'::Int) + I# x# -> let x = x' `cast` sym co + in rhs + +so that 'rhs' can take advantage of the form of x'. Notice that Note +[Case of cast] (in OccurAnal) may then apply to the result. + +We'd also like to eliminate empty types (#13468). So if + + data Void + type instance F Bool = Void + +then we'd like to transform + case (x :: F Bool) of { _ -> error "urk" } +===> + case (x |> co) of (x' :: Void) of {} + +Nota Bene: we used to have a built-in rule for 'seq' that dropped +casts, so that + case (x |> co) of { _ -> blah } +dropped the cast; in order to improve the chances of trySeqRules +firing. But that works in the /opposite/ direction to Note [Improving +seq] so there's a danger of flip/flopping. Better to make trySeqRules +insensitive to the cast, which is now is. + +The need for [Improving seq] showed up in Roman's experiments. Example: + foo :: F Int -> Int -> Int + foo t n = t `seq` bar n + where + bar 0 = 0 + bar n = bar (n - case t of TI i -> i) +Here we'd like to avoid repeated evaluating t inside the loop, by +taking advantage of the `seq`. + +At one point I did transformation in LiberateCase, but it's more +robust here. (Otherwise, there's a danger that we'll simply drop the +'seq' altogether, before LiberateCase gets to see it.) +-} + +simplAlts :: SimplEnv + -> OutExpr -- Scrutinee + -> InId -- Case binder + -> [InAlt] -- Non-empty + -> SimplCont + -> SimplM OutExpr -- Returns the complete simplified case expression + +simplAlts env0 scrut case_bndr alts cont' + = do { traceSmpl "simplAlts" (vcat [ ppr case_bndr + , text "cont':" <+> ppr cont' + , text "in_scope" <+> ppr (seInScope env0) ]) + ; (env1, case_bndr1) <- simplBinder env0 case_bndr + ; let case_bndr2 = case_bndr1 `setIdUnfolding` evaldUnfolding + env2 = modifyInScope env1 case_bndr2 + -- See Note [Case binder evaluated-ness] + + ; fam_envs <- getFamEnvs + ; (alt_env', scrut', case_bndr') <- improveSeq fam_envs env2 scrut + case_bndr case_bndr2 alts + + ; (imposs_deflt_cons, in_alts) <- prepareAlts scrut' case_bndr' alts + -- NB: it's possible that the returned in_alts is empty: this is handled + -- by the caller (rebuildCase) in the missingAlt function + + ; alts' <- mapM (simplAlt alt_env' (Just scrut') imposs_deflt_cons case_bndr' cont') in_alts + ; -- pprTrace "simplAlts" (ppr case_bndr $$ ppr alts_ty $$ ppr alts_ty' $$ ppr alts $$ ppr cont') $ + + ; let alts_ty' = contResultType cont' + -- See Note [Avoiding space leaks in OutType] + ; seqType alts_ty' `seq` + mkCase (seDynFlags env0) scrut' case_bndr' alts_ty' alts' } + + +------------------------------------ +improveSeq :: (FamInstEnv, FamInstEnv) -> SimplEnv + -> OutExpr -> InId -> OutId -> [InAlt] + -> SimplM (SimplEnv, OutExpr, OutId) +-- Note [Improving seq] +improveSeq fam_envs env scrut case_bndr case_bndr1 [(DEFAULT,_,_)] + | Just (co, ty2) <- topNormaliseType_maybe fam_envs (idType case_bndr1) + = do { case_bndr2 <- newId (fsLit "nt") ty2 + ; let rhs = DoneEx (Var case_bndr2 `Cast` mkSymCo co) Nothing + env2 = extendIdSubst env case_bndr rhs + ; return (env2, scrut `Cast` co, case_bndr2) } + +improveSeq _ env scrut _ case_bndr1 _ + = return (env, scrut, case_bndr1) + + +------------------------------------ +simplAlt :: SimplEnv + -> Maybe OutExpr -- The scrutinee + -> [AltCon] -- These constructors can't be present when + -- matching the DEFAULT alternative + -> OutId -- The case binder + -> SimplCont + -> InAlt + -> SimplM OutAlt + +simplAlt env _ imposs_deflt_cons case_bndr' cont' (DEFAULT, bndrs, rhs) + = ASSERT( null bndrs ) + do { let env' = addBinderUnfolding env case_bndr' + (mkOtherCon imposs_deflt_cons) + -- Record the constructors that the case-binder *can't* be. + ; rhs' <- simplExprC env' rhs cont' + ; return (DEFAULT, [], rhs') } + +simplAlt env scrut' _ case_bndr' cont' (LitAlt lit, bndrs, rhs) + = ASSERT( null bndrs ) + do { env' <- addAltUnfoldings env scrut' case_bndr' (Lit lit) + ; rhs' <- simplExprC env' rhs cont' + ; return (LitAlt lit, [], rhs') } + +simplAlt env scrut' _ case_bndr' cont' (DataAlt con, vs, rhs) + = do { -- See Note [Adding evaluatedness info to pattern-bound variables] + let vs_with_evals = addEvals scrut' con vs + ; (env', vs') <- simplLamBndrs env vs_with_evals + + -- Bind the case-binder to (con args) + ; let inst_tys' = tyConAppArgs (idType case_bndr') + con_app :: OutExpr + con_app = mkConApp2 con inst_tys' vs' + + ; env'' <- addAltUnfoldings env' scrut' case_bndr' con_app + ; rhs' <- simplExprC env'' rhs cont' + ; return (DataAlt con, vs', rhs') } + +{- Note [Adding evaluatedness info to pattern-bound variables] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +addEvals records the evaluated-ness of the bound variables of +a case pattern. This is *important*. Consider + + data T = T !Int !Int + + case x of { T a b -> T (a+1) b } + +We really must record that b is already evaluated so that we don't +go and re-evaluate it when constructing the result. +See Note [Data-con worker strictness] in MkId.hs + +NB: simplLamBndrs preserves this eval info + +In addition to handling data constructor fields with !s, addEvals +also records the fact that the result of seq# is always in WHNF. +See Note [seq# magic] in GHC.Core.Op.ConstantFold. Example (#15226): + + case seq# v s of + (# s', v' #) -> E + +we want the compiler to be aware that v' is in WHNF in E. + +Open problem: we don't record that v itself is in WHNF (and we can't +do it here). The right thing is to do some kind of binder-swap; +see #15226 for discussion. +-} + +addEvals :: Maybe OutExpr -> DataCon -> [Id] -> [Id] +-- See Note [Adding evaluatedness info to pattern-bound variables] +addEvals scrut con vs + -- Deal with seq# applications + | Just scr <- scrut + , isUnboxedTupleCon con + , [s,x] <- vs + -- Use stripNArgs rather than collectArgsTicks to avoid building + -- a list of arguments only to throw it away immediately. + , Just (Var f) <- stripNArgs 4 scr + , Just SeqOp <- isPrimOpId_maybe f + , let x' = zapIdOccInfoAndSetEvald MarkedStrict x + = [s, x'] + + -- Deal with banged datacon fields +addEvals _scrut con vs = go vs the_strs + where + the_strs = dataConRepStrictness con + + go [] [] = [] + go (v:vs') strs | isTyVar v = v : go vs' strs + go (v:vs') (str:strs) = zapIdOccInfoAndSetEvald str v : go vs' strs + go _ _ = pprPanic "Simplify.addEvals" + (ppr con $$ + ppr vs $$ + ppr_with_length (map strdisp the_strs) $$ + ppr_with_length (dataConRepArgTys con) $$ + ppr_with_length (dataConRepStrictness con)) + where + ppr_with_length list + = ppr list <+> parens (text "length =" <+> ppr (length list)) + strdisp MarkedStrict = text "MarkedStrict" + strdisp NotMarkedStrict = text "NotMarkedStrict" + +zapIdOccInfoAndSetEvald :: StrictnessMark -> Id -> Id +zapIdOccInfoAndSetEvald str v = + setCaseBndrEvald str $ -- Add eval'dness info + zapIdOccInfo v -- And kill occ info; + -- see Note [Case alternative occ info] + +addAltUnfoldings :: SimplEnv -> Maybe OutExpr -> OutId -> OutExpr -> SimplM SimplEnv +addAltUnfoldings env scrut case_bndr con_app + = do { let con_app_unf = mk_simple_unf con_app + env1 = addBinderUnfolding env case_bndr con_app_unf + + -- See Note [Add unfolding for scrutinee] + env2 = case scrut of + Just (Var v) -> addBinderUnfolding env1 v con_app_unf + Just (Cast (Var v) co) -> addBinderUnfolding env1 v $ + mk_simple_unf (Cast con_app (mkSymCo co)) + _ -> env1 + + ; traceSmpl "addAltUnf" (vcat [ppr case_bndr <+> ppr scrut, ppr con_app]) + ; return env2 } + where + mk_simple_unf = mkSimpleUnfolding (seDynFlags env) + +addBinderUnfolding :: SimplEnv -> Id -> Unfolding -> SimplEnv +addBinderUnfolding env bndr unf + | debugIsOn, Just tmpl <- maybeUnfoldingTemplate unf + = WARN( not (eqType (idType bndr) (exprType tmpl)), + ppr bndr $$ ppr (idType bndr) $$ ppr tmpl $$ ppr (exprType tmpl) ) + modifyInScope env (bndr `setIdUnfolding` unf) + + | otherwise + = modifyInScope env (bndr `setIdUnfolding` unf) + +zapBndrOccInfo :: Bool -> Id -> Id +-- Consider case e of b { (a,b) -> ... } +-- Then if we bind b to (a,b) in "...", and b is not dead, +-- then we must zap the deadness info on a,b +zapBndrOccInfo keep_occ_info pat_id + | keep_occ_info = pat_id + | otherwise = zapIdOccInfo pat_id + +{- Note [Case binder evaluated-ness] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We pin on a (OtherCon []) unfolding to the case-binder of a Case, +even though it'll be over-ridden in every case alternative with a more +informative unfolding. Why? Because suppose a later, less clever, pass +simply replaces all occurrences of the case binder with the binder itself; +then Lint may complain about the let/app invariant. Example + case e of b { DEFAULT -> let v = reallyUnsafePtrEq# b y in .... + ; K -> blah } + +The let/app invariant requires that y is evaluated in the call to +reallyUnsafePtrEq#, which it is. But we still want that to be true if we +propagate binders to occurrences. + +This showed up in #13027. + +Note [Add unfolding for scrutinee] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In general it's unlikely that a variable scrutinee will appear +in the case alternatives case x of { ...x unlikely to appear... } +because the binder-swap in OccAnal has got rid of all such occurrences +See Note [Binder swap] in OccAnal. + +BUT it is still VERY IMPORTANT to add a suitable unfolding for a +variable scrutinee, in simplAlt. Here's why + case x of y + (a,b) -> case b of c + I# v -> ...(f y)... +There is no occurrence of 'b' in the (...(f y)...). But y gets +the unfolding (a,b), and *that* mentions b. If f has a RULE + RULE f (p, I# q) = ... +we want that rule to match, so we must extend the in-scope env with a +suitable unfolding for 'y'. It's *essential* for rule matching; but +it's also good for case-elimintation -- suppose that 'f' was inlined +and did multi-level case analysis, then we'd solve it in one +simplifier sweep instead of two. + +Exactly the same issue arises in GHC.Core.Op.SpecConstr; +see Note [Add scrutinee to ValueEnv too] in GHC.Core.Op.SpecConstr + +HOWEVER, given + case x of y { Just a -> r1; Nothing -> r2 } +we do not want to add the unfolding x -> y to 'x', which might seem cool, +since 'y' itself has different unfoldings in r1 and r2. Reason: if we +did that, we'd have to zap y's deadness info and that is a very useful +piece of information. + +So instead we add the unfolding x -> Just a, and x -> Nothing in the +respective RHSs. + + +************************************************************************ +* * +\subsection{Known constructor} +* * +************************************************************************ + +We are a bit careful with occurrence info. Here's an example + + (\x* -> case x of (a*, b) -> f a) (h v, e) + +where the * means "occurs once". This effectively becomes + case (h v, e) of (a*, b) -> f a) +and then + let a* = h v; b = e in f a +and then + f (h v) + +All this should happen in one sweep. +-} + +knownCon :: SimplEnv + -> OutExpr -- The scrutinee + -> [FloatBind] -> DataCon -> [OutType] -> [OutExpr] -- The scrutinee (in pieces) + -> InId -> [InBndr] -> InExpr -- The alternative + -> SimplCont + -> SimplM (SimplFloats, OutExpr) + +knownCon env scrut dc_floats dc dc_ty_args dc_args bndr bs rhs cont + = do { (floats1, env1) <- bind_args env bs dc_args + ; (floats2, env2) <- bind_case_bndr env1 + ; (floats3, expr') <- simplExprF env2 rhs cont + ; case dc_floats of + [] -> + return (floats1 `addFloats` floats2 `addFloats` floats3, expr') + _ -> + return ( emptyFloats env + -- See Note [FloatBinds from constructor wrappers] + , GHC.Core.Make.wrapFloats dc_floats $ + wrapFloats (floats1 `addFloats` floats2 `addFloats` floats3) expr') } + where + zap_occ = zapBndrOccInfo (isDeadBinder bndr) -- bndr is an InId + + -- Ugh! + bind_args env' [] _ = return (emptyFloats env', env') + + bind_args env' (b:bs') (Type ty : args) + = ASSERT( isTyVar b ) + bind_args (extendTvSubst env' b ty) bs' args + + bind_args env' (b:bs') (Coercion co : args) + = ASSERT( isCoVar b ) + bind_args (extendCvSubst env' b co) bs' args + + bind_args env' (b:bs') (arg : args) + = ASSERT( isId b ) + do { let b' = zap_occ b + -- Note that the binder might be "dead", because it doesn't + -- occur in the RHS; and simplNonRecX may therefore discard + -- it via postInlineUnconditionally. + -- Nevertheless we must keep it if the case-binder is alive, + -- because it may be used in the con_app. See Note [knownCon occ info] + ; (floats1, env2) <- simplNonRecX env' b' arg -- arg satisfies let/app invariant + ; (floats2, env3) <- bind_args env2 bs' args + ; return (floats1 `addFloats` floats2, env3) } + + bind_args _ _ _ = + pprPanic "bind_args" $ ppr dc $$ ppr bs $$ ppr dc_args $$ + text "scrut:" <+> ppr scrut + + -- It's useful to bind bndr to scrut, rather than to a fresh + -- binding x = Con arg1 .. argn + -- because very often the scrut is a variable, so we avoid + -- creating, and then subsequently eliminating, a let-binding + -- BUT, if scrut is a not a variable, we must be careful + -- about duplicating the arg redexes; in that case, make + -- a new con-app from the args + bind_case_bndr env + | isDeadBinder bndr = return (emptyFloats env, env) + | exprIsTrivial scrut = return (emptyFloats env + , extendIdSubst env bndr (DoneEx scrut Nothing)) + | otherwise = do { dc_args <- mapM (simplVar env) bs + -- dc_ty_args are already OutTypes, + -- but bs are InBndrs + ; let con_app = Var (dataConWorkId dc) + `mkTyApps` dc_ty_args + `mkApps` dc_args + ; simplNonRecX env bndr con_app } + +------------------- +missingAlt :: SimplEnv -> Id -> [InAlt] -> SimplCont + -> SimplM (SimplFloats, OutExpr) + -- This isn't strictly an error, although it is unusual. + -- It's possible that the simplifier might "see" that + -- an inner case has no accessible alternatives before + -- it "sees" that the entire branch of an outer case is + -- inaccessible. So we simply put an error case here instead. +missingAlt env case_bndr _ cont + = WARN( True, text "missingAlt" <+> ppr case_bndr ) + -- See Note [Avoiding space leaks in OutType] + let cont_ty = contResultType cont + in seqType cont_ty `seq` + return (emptyFloats env, mkImpossibleExpr cont_ty) + +{- +************************************************************************ +* * +\subsection{Duplicating continuations} +* * +************************************************************************ + +Consider + let x* = case e of { True -> e1; False -> e2 } + in b +where x* is a strict binding. Then mkDupableCont will be given +the continuation + case [] of { True -> e1; False -> e2 } ; let x* = [] in b ; stop +and will split it into + dupable: case [] of { True -> $j1; False -> $j2 } ; stop + join floats: $j1 = e1, $j2 = e2 + non_dupable: let x* = [] in b; stop + +Putting this back together would give + let x* = let { $j1 = e1; $j2 = e2 } in + case e of { True -> $j1; False -> $j2 } + in b +(Of course we only do this if 'e' wants to duplicate that continuation.) +Note how important it is that the new join points wrap around the +inner expression, and not around the whole thing. + +In contrast, any let-bindings introduced by mkDupableCont can wrap +around the entire thing. + +Note [Bottom alternatives] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we have + case (case x of { A -> error .. ; B -> e; C -> error ..) + of alts +then we can just duplicate those alts because the A and C cases +will disappear immediately. This is more direct than creating +join points and inlining them away. See #4930. +-} + +-------------------- +mkDupableCaseCont :: SimplEnv -> [InAlt] -> SimplCont + -> SimplM (SimplFloats, SimplCont) +mkDupableCaseCont env alts cont + | altsWouldDup alts = mkDupableCont env cont + | otherwise = return (emptyFloats env, cont) + +altsWouldDup :: [InAlt] -> Bool -- True iff strictly > 1 non-bottom alternative +altsWouldDup [] = False -- See Note [Bottom alternatives] +altsWouldDup [_] = False +altsWouldDup (alt:alts) + | is_bot_alt alt = altsWouldDup alts + | otherwise = not (all is_bot_alt alts) + where + is_bot_alt (_,_,rhs) = exprIsBottom rhs + +------------------------- +mkDupableCont :: SimplEnv -> SimplCont + -> SimplM ( SimplFloats -- Incoming SimplEnv augmented with + -- extra let/join-floats and in-scope variables + , SimplCont) -- dup_cont: duplicable continuation + +mkDupableCont env cont + | contIsDupable cont + = return (emptyFloats env, cont) + +mkDupableCont _ (Stop {}) = panic "mkDupableCont" -- Handled by previous eqn + +mkDupableCont env (CastIt ty cont) + = do { (floats, cont') <- mkDupableCont env cont + ; return (floats, CastIt ty cont') } + +-- Duplicating ticks for now, not sure if this is good or not +mkDupableCont env (TickIt t cont) + = do { (floats, cont') <- mkDupableCont env cont + ; return (floats, TickIt t cont') } + +mkDupableCont env (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs + , sc_body = body, sc_env = se, sc_cont = cont}) + -- See Note [Duplicating StrictBind] + = do { let sb_env = se `setInScopeFromE` env + ; (sb_env1, bndr') <- simplBinder sb_env bndr + ; (floats1, join_inner) <- simplLam sb_env1 bndrs body cont + -- No need to use mkDupableCont before simplLam; we + -- use cont once here, and then share the result if necessary + + ; let join_body = wrapFloats floats1 join_inner + res_ty = contResultType cont + + ; (floats2, body2) + <- if exprIsDupable (seDynFlags env) join_body + then return (emptyFloats env, join_body) + else do { join_bndr <- newJoinId [bndr'] res_ty + ; let join_call = App (Var join_bndr) (Var bndr') + join_rhs = Lam (setOneShotLambda bndr') join_body + join_bind = NonRec join_bndr join_rhs + floats = emptyFloats env `extendFloats` join_bind + ; return (floats, join_call) } + ; return ( floats2 + , StrictBind { sc_bndr = bndr', sc_bndrs = [] + , sc_body = body2 + , sc_env = zapSubstEnv se `setInScopeFromF` floats2 + -- See Note [StaticEnv invariant] in GHC.Core.Op.Simplify.Utils + , sc_dup = OkToDup + , sc_cont = mkBoringStop res_ty } ) } + +mkDupableCont env (StrictArg { sc_fun = info, sc_cci = cci, sc_cont = cont }) + -- See Note [Duplicating StrictArg] + -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable + = do { (floats1, cont') <- mkDupableCont env cont + ; (floats_s, args') <- mapAndUnzipM (makeTrivialArg (getMode env)) + (ai_args info) + ; return ( foldl' addLetFloats floats1 floats_s + , StrictArg { sc_fun = info { ai_args = args' } + , sc_cci = cci + , sc_cont = cont' + , sc_dup = OkToDup} ) } + +mkDupableCont env (ApplyToTy { sc_cont = cont + , sc_arg_ty = arg_ty, sc_hole_ty = hole_ty }) + = do { (floats, cont') <- mkDupableCont env cont + ; return (floats, ApplyToTy { sc_cont = cont' + , sc_arg_ty = arg_ty, sc_hole_ty = hole_ty }) } + +mkDupableCont env (ApplyToVal { sc_arg = arg, sc_dup = dup + , sc_env = se, sc_cont = cont }) + = -- e.g. [...hole...] (...arg...) + -- ==> + -- let a = ...arg... + -- in [...hole...] a + -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable + do { (floats1, cont') <- mkDupableCont env cont + ; let env' = env `setInScopeFromF` floats1 + ; (_, se', arg') <- simplArg env' dup se arg + ; (let_floats2, arg'') <- makeTrivial (getMode env) NotTopLevel (fsLit "karg") arg' + ; let all_floats = floats1 `addLetFloats` let_floats2 + ; return ( all_floats + , ApplyToVal { sc_arg = arg'' + , sc_env = se' `setInScopeFromF` all_floats + -- Ensure that sc_env includes the free vars of + -- arg'' in its in-scope set, even if makeTrivial + -- has turned arg'' into a fresh variable + -- See Note [StaticEnv invariant] in GHC.Core.Op.Simplify.Utils + , sc_dup = OkToDup, sc_cont = cont' }) } + +mkDupableCont env (Select { sc_bndr = case_bndr, sc_alts = alts + , sc_env = se, sc_cont = cont }) + = -- e.g. (case [...hole...] of { pi -> ei }) + -- ===> + -- let ji = \xij -> ei + -- in case [...hole...] of { pi -> ji xij } + -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable + do { tick (CaseOfCase case_bndr) + ; (floats, alt_cont) <- mkDupableCaseCont env alts cont + -- NB: We call mkDupableCaseCont here to make cont duplicable + -- (if necessary, depending on the number of alts) + -- And this is important: see Note [Fusing case continuations] + + ; let alt_env = se `setInScopeFromF` floats + ; (alt_env', case_bndr') <- simplBinder alt_env case_bndr + ; alts' <- mapM (simplAlt alt_env' Nothing [] case_bndr' alt_cont) alts + -- Safe to say that there are no handled-cons for the DEFAULT case + -- NB: simplBinder does not zap deadness occ-info, so + -- a dead case_bndr' will still advertise its deadness + -- This is really important because in + -- case e of b { (# p,q #) -> ... } + -- b is always dead, and indeed we are not allowed to bind b to (# p,q #), + -- which might happen if e was an explicit unboxed pair and b wasn't marked dead. + -- In the new alts we build, we have the new case binder, so it must retain + -- its deadness. + -- NB: we don't use alt_env further; it has the substEnv for + -- the alternatives, and we don't want that + + ; (join_floats, alts'') <- mapAccumLM (mkDupableAlt (seDynFlags env) case_bndr') + emptyJoinFloats alts' + + ; let all_floats = floats `addJoinFloats` join_floats + -- Note [Duplicated env] + ; return (all_floats + , Select { sc_dup = OkToDup + , sc_bndr = case_bndr' + , sc_alts = alts'' + , sc_env = zapSubstEnv se `setInScopeFromF` all_floats + -- See Note [StaticEnv invariant] in GHC.Core.Op.Simplify.Utils + , sc_cont = mkBoringStop (contResultType cont) } ) } + +mkDupableAlt :: DynFlags -> OutId + -> JoinFloats -> OutAlt + -> SimplM (JoinFloats, OutAlt) +mkDupableAlt dflags case_bndr jfloats (con, bndrs', rhs') + | exprIsDupable dflags rhs' -- Note [Small alternative rhs] + = return (jfloats, (con, bndrs', rhs')) + + | otherwise + = do { let rhs_ty' = exprType rhs' + scrut_ty = idType case_bndr + case_bndr_w_unf + = case con of + DEFAULT -> case_bndr + DataAlt dc -> setIdUnfolding case_bndr unf + where + -- See Note [Case binders and join points] + unf = mkInlineUnfolding rhs + rhs = mkConApp2 dc (tyConAppArgs scrut_ty) bndrs' + + LitAlt {} -> WARN( True, text "mkDupableAlt" + <+> ppr case_bndr <+> ppr con ) + case_bndr + -- The case binder is alive but trivial, so why has + -- it not been substituted away? + + final_bndrs' + | isDeadBinder case_bndr = filter abstract_over bndrs' + | otherwise = bndrs' ++ [case_bndr_w_unf] + + abstract_over bndr + | isTyVar bndr = True -- Abstract over all type variables just in case + | otherwise = not (isDeadBinder bndr) + -- The deadness info on the new Ids is preserved by simplBinders + final_args = varsToCoreExprs final_bndrs' + -- Note [Join point abstraction] + + -- We make the lambdas into one-shot-lambdas. The + -- join point is sure to be applied at most once, and doing so + -- prevents the body of the join point being floated out by + -- the full laziness pass + really_final_bndrs = map one_shot final_bndrs' + one_shot v | isId v = setOneShotLambda v + | otherwise = v + join_rhs = mkLams really_final_bndrs rhs' + + ; join_bndr <- newJoinId final_bndrs' rhs_ty' + + ; let join_call = mkApps (Var join_bndr) final_args + alt' = (con, bndrs', join_call) + + ; return ( jfloats `addJoinFlts` unitJoinFloat (NonRec join_bndr join_rhs) + , alt') } + -- See Note [Duplicated env] + +{- +Note [Fusing case continuations] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's important to fuse two successive case continuations when the +first has one alternative. That's why we call prepareCaseCont here. +Consider this, which arises from thunk splitting (see Note [Thunk +splitting] in GHC.Core.Op.WorkWrap): + + let + x* = case (case v of {pn -> rn}) of + I# a -> I# a + in body + +The simplifier will find + (Var v) with continuation + Select (pn -> rn) ( + Select [I# a -> I# a] ( + StrictBind body Stop + +So we'll call mkDupableCont on + Select [I# a -> I# a] (StrictBind body Stop) +There is just one alternative in the first Select, so we want to +simplify the rhs (I# a) with continuation (StrictBind body Stop) +Supposing that body is big, we end up with + let $j a = <let x = I# a in body> + in case v of { pn -> case rn of + I# a -> $j a } +This is just what we want because the rn produces a box that +the case rn cancels with. + +See #4957 a fuller example. + +Note [Case binders and join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this + case (case .. ) of c { + I# c# -> ....c.... + +If we make a join point with c but not c# we get + $j = \c -> ....c.... + +But if later inlining scrutinises the c, thus + + $j = \c -> ... case c of { I# y -> ... } ... + +we won't see that 'c' has already been scrutinised. This actually +happens in the 'tabulate' function in wave4main, and makes a significant +difference to allocation. + +An alternative plan is this: + + $j = \c# -> let c = I# c# in ...c.... + +but that is bad if 'c' is *not* later scrutinised. + +So instead we do both: we pass 'c' and 'c#' , and record in c's inlining +(a stable unfolding) that it's really I# c#, thus + + $j = \c# -> \c[=I# c#] -> ...c.... + +Absence analysis may later discard 'c'. + +NB: take great care when doing strictness analysis; + see Note [Lambda-bound unfoldings] in GHC.Core.Op.DmdAnal. + +Also note that we can still end up passing stuff that isn't used. Before +strictness analysis we have + let $j x y c{=(x,y)} = (h c, ...) + in ... +After strictness analysis we see that h is strict, we end up with + let $j x y c{=(x,y)} = ($wh x y, ...) +and c is unused. + +Note [Duplicated env] +~~~~~~~~~~~~~~~~~~~~~ +Some of the alternatives are simplified, but have not been turned into a join point +So they *must* have a zapped subst-env. So we can't use completeNonRecX to +bind the join point, because it might to do PostInlineUnconditionally, and +we'd lose that when zapping the subst-env. We could have a per-alt subst-env, +but zapping it (as we do in mkDupableCont, the Select case) is safe, and +at worst delays the join-point inlining. + +Note [Small alternative rhs] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It is worth checking for a small RHS because otherwise we +get extra let bindings that may cause an extra iteration of the simplifier to +inline back in place. Quite often the rhs is just a variable or constructor. +The Ord instance of Maybe in PrelMaybe.hs, for example, took several extra +iterations because the version with the let bindings looked big, and so wasn't +inlined, but after the join points had been inlined it looked smaller, and so +was inlined. + +NB: we have to check the size of rhs', not rhs. +Duplicating a small InAlt might invalidate occurrence information +However, if it *is* dupable, we return the *un* simplified alternative, +because otherwise we'd need to pair it up with an empty subst-env.... +but we only have one env shared between all the alts. +(Remember we must zap the subst-env before re-simplifying something). +Rather than do this we simply agree to re-simplify the original (small) thing later. + +Note [Funky mkLamTypes] +~~~~~~~~~~~~~~~~~~~~~~ +Notice the funky mkLamTypes. If the constructor has existentials +it's possible that the join point will be abstracted over +type variables as well as term variables. + Example: Suppose we have + data T = forall t. C [t] + Then faced with + case (case e of ...) of + C t xs::[t] -> rhs + We get the join point + let j :: forall t. [t] -> ... + j = /\t \xs::[t] -> rhs + in + case (case e of ...) of + C t xs::[t] -> j t xs + +Note [Duplicating StrictArg] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We make a StrictArg duplicable simply by making all its +stored-up arguments (in sc_fun) trivial, by let-binding +them. Thus: + f E [..hole..] + ==> let a = E + in f a [..hole..] +Now if the thing in the hole is a case expression (which is when +we'll call mkDupableCont), we'll push the function call into the +branches, which is what we want. Now RULES for f may fire, and +call-pattern specialisation. Here's an example from #3116 + go (n+1) (case l of + 1 -> bs' + _ -> Chunk p fpc (o+1) (l-1) bs') +If we can push the call for 'go' inside the case, we get +call-pattern specialisation for 'go', which is *crucial* for +this program. + +Here is the (&&) example: + && E (case x of { T -> F; F -> T }) + ==> let a = E in + case x of { T -> && a F; F -> && a T } +Much better! + +Notice that + * Arguments to f *after* the strict one are handled by + the ApplyToVal case of mkDupableCont. Eg + f [..hole..] E + + * We can only do the let-binding of E because the function + part of a StrictArg continuation is an explicit syntax + tree. In earlier versions we represented it as a function + (CoreExpr -> CoreEpxr) which we couldn't take apart. + +Historical aide: previously we did this (where E is a +big argument: + f E [..hole..] + ==> let $j = \a -> f E a + in $j [..hole..] + +But this is terrible! Here's an example: + && E (case x of { T -> F; F -> T }) +Now, && is strict so we end up simplifying the case with +an ArgOf continuation. If we let-bind it, we get + let $j = \v -> && E v + in simplExpr (case x of { T -> F; F -> T }) + (ArgOf (\r -> $j r) +And after simplifying more we get + let $j = \v -> && E v + in case x of { T -> $j F; F -> $j T } +Which is a Very Bad Thing + + +Note [Duplicating StrictBind] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We make a StrictBind duplicable in a very similar way to +that for case expressions. After all, + let x* = e in b is similar to case e of x -> b + +So we potentially make a join-point for the body, thus: + let x = [] in b ==> join j x = b + in let x = [] in j x + + +Note [Join point abstraction] Historical note +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +NB: This note is now historical, describing how (in the past) we used +to add a void argument to nullary join points. But now that "join +point" is not a fuzzy concept but a formal syntactic construct (as +distinguished by the JoinId constructor of IdDetails), each of these +concerns is handled separately, with no need for a vestigial extra +argument. + +Join points always have at least one value argument, +for several reasons + +* If we try to lift a primitive-typed something out + for let-binding-purposes, we will *caseify* it (!), + with potentially-disastrous strictness results. So + instead we turn it into a function: \v -> e + where v::Void#. The value passed to this function is void, + which generates (almost) no code. + +* CPR. We used to say "&& isUnliftedType rhs_ty'" here, but now + we make the join point into a function whenever used_bndrs' + is empty. This makes the join-point more CPR friendly. + Consider: let j = if .. then I# 3 else I# 4 + in case .. of { A -> j; B -> j; C -> ... } + + Now CPR doesn't w/w j because it's a thunk, so + that means that the enclosing function can't w/w either, + which is a lose. Here's the example that happened in practice: + kgmod :: Int -> Int -> Int + kgmod x y = if x > 0 && y < 0 || x < 0 && y > 0 + then 78 + else 5 + +* Let-no-escape. We want a join point to turn into a let-no-escape + so that it is implemented as a jump, and one of the conditions + for LNE is that it's not updatable. In CoreToStg, see + Note [What is a non-escaping let] + +* Floating. Since a join point will be entered once, no sharing is + gained by floating out, but something might be lost by doing + so because it might be allocated. + +I have seen a case alternative like this: + True -> \v -> ... +It's a bit silly to add the realWorld dummy arg in this case, making + $j = \s v -> ... + True -> $j s +(the \v alone is enough to make CPR happy) but I think it's rare + +There's a slight infelicity here: we pass the overall +case_bndr to all the join points if it's used in *any* RHS, +because we don't know its usage in each RHS separately + + + +************************************************************************ +* * + Unfoldings +* * +************************************************************************ +-} + +simplLetUnfolding :: SimplEnv-> TopLevelFlag + -> MaybeJoinCont + -> InId + -> OutExpr -> OutType + -> Unfolding -> SimplM Unfolding +simplLetUnfolding env top_lvl cont_mb id new_rhs rhs_ty unf + | isStableUnfolding unf + = simplStableUnfolding env top_lvl cont_mb id unf rhs_ty + | isExitJoinId id + = return noUnfolding -- See Note [Do not inline exit join points] in GHC.Core.Op.Exitify + | otherwise + = mkLetUnfolding (seDynFlags env) top_lvl InlineRhs id new_rhs + +------------------- +mkLetUnfolding :: DynFlags -> TopLevelFlag -> UnfoldingSource + -> InId -> OutExpr -> SimplM Unfolding +mkLetUnfolding dflags top_lvl src id new_rhs + = is_bottoming `seq` -- See Note [Force bottoming field] + return (mkUnfolding dflags src is_top_lvl is_bottoming new_rhs) + -- We make an unfolding *even for loop-breakers*. + -- Reason: (a) It might be useful to know that they are WHNF + -- (b) In GHC.Iface.Tidy we currently assume that, if we want to + -- expose the unfolding then indeed we *have* an unfolding + -- to expose. (We could instead use the RHS, but currently + -- we don't.) The simple thing is always to have one. + where + is_top_lvl = isTopLevel top_lvl + is_bottoming = isBottomingId id + +------------------- +simplStableUnfolding :: SimplEnv -> TopLevelFlag + -> MaybeJoinCont -- Just k => a join point with continuation k + -> InId + -> Unfolding -> OutType -> SimplM Unfolding +-- Note [Setting the new unfolding] +simplStableUnfolding env top_lvl mb_cont id unf rhs_ty + = case unf of + NoUnfolding -> return unf + BootUnfolding -> return unf + OtherCon {} -> return unf + + DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args } + -> do { (env', bndrs') <- simplBinders unf_env bndrs + ; args' <- mapM (simplExpr env') args + ; return (mkDFunUnfolding bndrs' con args') } + + CoreUnfolding { uf_tmpl = expr, uf_src = src, uf_guidance = guide } + | isStableSource src + -> do { expr' <- case mb_cont of -- See Note [Rules and unfolding for join points] + Just cont -> simplJoinRhs unf_env id expr cont + Nothing -> simplExprC unf_env expr (mkBoringStop rhs_ty) + ; case guide of + UnfWhen { ug_arity = arity + , ug_unsat_ok = sat_ok + , ug_boring_ok = boring_ok + } + -- Happens for INLINE things + -> let guide' = + UnfWhen { ug_arity = arity + , ug_unsat_ok = sat_ok + , ug_boring_ok = + boring_ok || inlineBoringOk expr' + } + -- Refresh the boring-ok flag, in case expr' + -- has got small. This happens, notably in the inlinings + -- for dfuns for single-method classes; see + -- Note [Single-method classes] in TcInstDcls. + -- A test case is #4138 + -- But retain a previous boring_ok of True; e.g. see + -- the way it is set in calcUnfoldingGuidanceWithArity + in return (mkCoreUnfolding src is_top_lvl expr' guide') + -- See Note [Top-level flag on inline rules] in GHC.Core.Unfold + + _other -- Happens for INLINABLE things + -> mkLetUnfolding dflags top_lvl src id expr' } + -- If the guidance is UnfIfGoodArgs, this is an INLINABLE + -- unfolding, and we need to make sure the guidance is kept up + -- to date with respect to any changes in the unfolding. + + | otherwise -> return noUnfolding -- Discard unstable unfoldings + where + dflags = seDynFlags env + is_top_lvl = isTopLevel top_lvl + act = idInlineActivation id + unf_env = updMode (updModeForStableUnfoldings act) env + -- See Note [Simplifying inside stable unfoldings] in GHC.Core.Op.Simplify.Utils + +{- +Note [Force bottoming field] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We need to force bottoming, or the new unfolding holds +on to the old unfolding (which is part of the id). + +Note [Setting the new unfolding] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* If there's an INLINE pragma, we simplify the RHS gently. Maybe we + should do nothing at all, but simplifying gently might get rid of + more crap. + +* If not, we make an unfolding from the new RHS. But *only* for + non-loop-breakers. Making loop breakers not have an unfolding at all + means that we can avoid tests in exprIsConApp, for example. This is + important: if exprIsConApp says 'yes' for a recursive thing, then we + can get into an infinite loop + +If there's a stable unfolding on a loop breaker (which happens for +INLINABLE), we hang on to the inlining. It's pretty dodgy, but the +user did say 'INLINE'. May need to revisit this choice. + +************************************************************************ +* * + Rules +* * +************************************************************************ + +Note [Rules in a letrec] +~~~~~~~~~~~~~~~~~~~~~~~~ +After creating fresh binders for the binders of a letrec, we +substitute the RULES and add them back onto the binders; this is done +*before* processing any of the RHSs. This is important. Manuel found +cases where he really, really wanted a RULE for a recursive function +to apply in that function's own right-hand side. + +See Note [Forming Rec groups] in OccurAnal +-} + +addBndrRules :: SimplEnv -> InBndr -> OutBndr + -> MaybeJoinCont -- Just k for a join point binder + -- Nothing otherwise + -> SimplM (SimplEnv, OutBndr) +-- Rules are added back into the bin +addBndrRules env in_id out_id mb_cont + | null old_rules + = return (env, out_id) + | otherwise + = do { new_rules <- simplRules env (Just out_id) old_rules mb_cont + ; let final_id = out_id `setIdSpecialisation` mkRuleInfo new_rules + ; return (modifyInScope env final_id, final_id) } + where + old_rules = ruleInfoRules (idSpecialisation in_id) + +simplRules :: SimplEnv -> Maybe OutId -> [CoreRule] + -> MaybeJoinCont -> SimplM [CoreRule] +simplRules env mb_new_id rules mb_cont + = mapM simpl_rule rules + where + simpl_rule rule@(BuiltinRule {}) + = return rule + + simpl_rule rule@(Rule { ru_bndrs = bndrs, ru_args = args + , ru_fn = fn_name, ru_rhs = rhs }) + = do { (env', bndrs') <- simplBinders env bndrs + ; let rhs_ty = substTy env' (exprType rhs) + rhs_cont = case mb_cont of -- See Note [Rules and unfolding for join points] + Nothing -> mkBoringStop rhs_ty + Just cont -> ASSERT2( join_ok, bad_join_msg ) + cont + rule_env = updMode updModeForRules env' + fn_name' = case mb_new_id of + Just id -> idName id + Nothing -> fn_name + + -- join_ok is an assertion check that the join-arity of the + -- binder matches that of the rule, so that pushing the + -- continuation into the RHS makes sense + join_ok = case mb_new_id of + Just id | Just join_arity <- isJoinId_maybe id + -> length args == join_arity + _ -> False + bad_join_msg = vcat [ ppr mb_new_id, ppr rule + , ppr (fmap isJoinId_maybe mb_new_id) ] + + ; args' <- mapM (simplExpr rule_env) args + ; rhs' <- simplExprC rule_env rhs rhs_cont + ; return (rule { ru_bndrs = bndrs' + , ru_fn = fn_name' + , ru_args = args' + , ru_rhs = rhs' }) } diff --git a/compiler/GHC/Core/Op/Simplify/Driver.hs b/compiler/GHC/Core/Op/Simplify/Driver.hs new file mode 100644 index 0000000000..b6ec392599 --- /dev/null +++ b/compiler/GHC/Core/Op/Simplify/Driver.hs @@ -0,0 +1,1037 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + +\section[SimplCore]{Driver for simplifying @Core@ programs} +-} + +{-# LANGUAGE CPP #-} + +module GHC.Core.Op.Simplify.Driver ( core2core, simplifyExpr ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Driver.Session +import GHC.Core +import GHC.Driver.Types +import GHC.Core.Op.CSE ( cseProgram ) +import GHC.Core.Rules ( mkRuleBase, unionRuleBase, + extendRuleBaseList, ruleCheckProgram, addRuleInfo, + getRules ) +import GHC.Core.Ppr ( pprCoreBindings, pprCoreExpr ) +import GHC.Core.Op.OccurAnal ( occurAnalysePgm, occurAnalyseExpr ) +import IdInfo +import GHC.Core.Stats ( coreBindsSize, coreBindsStats, exprSize ) +import GHC.Core.Utils ( mkTicks, stripTicksTop ) +import GHC.Core.Lint ( endPass, lintPassResult, dumpPassResult, + lintAnnots ) +import GHC.Core.Op.Simplify ( simplTopBinds, simplExpr, simplRules ) +import GHC.Core.Op.Simplify.Utils ( simplEnvForGHCi, activeRule, activeUnfolding ) +import GHC.Core.Op.Simplify.Env +import GHC.Core.Op.Simplify.Monad +import GHC.Core.Op.Monad +import qualified ErrUtils as Err +import GHC.Core.Op.FloatIn ( floatInwards ) +import GHC.Core.Op.FloatOut ( floatOutwards ) +import GHC.Core.FamInstEnv +import Id +import ErrUtils ( withTiming, withTimingD, DumpFormat (..) ) +import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma, defaultInlinePragma ) +import VarSet +import VarEnv +import GHC.Core.Op.LiberateCase ( liberateCase ) +import GHC.Core.Op.StaticArgs ( doStaticArgs ) +import GHC.Core.Op.Specialise ( specProgram) +import GHC.Core.Op.SpecConstr ( specConstrProgram) +import GHC.Core.Op.DmdAnal ( dmdAnalProgram ) +import GHC.Core.Op.CprAnal ( cprAnalProgram ) +import GHC.Core.Op.CallArity ( callArityAnalProgram ) +import GHC.Core.Op.Exitify ( exitifyProgram ) +import GHC.Core.Op.WorkWrap ( wwTopBinds ) +import SrcLoc +import Util +import Module +import GHC.Driver.Plugins ( withPlugins, installCoreToDos ) +import GHC.Runtime.Loader -- ( initializePlugins ) + +import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply ) +import UniqFM +import Outputable +import Control.Monad +import qualified GHC.LanguageExtensions as LangExt +{- +************************************************************************ +* * +\subsection{The driver for the simplifier} +* * +************************************************************************ +-} + +core2core :: HscEnv -> ModGuts -> IO ModGuts +core2core hsc_env guts@(ModGuts { mg_module = mod + , mg_loc = loc + , mg_deps = deps + , mg_rdr_env = rdr_env }) + = do { -- make sure all plugins are loaded + + ; let builtin_passes = getCoreToDo dflags + orph_mods = mkModuleSet (mod : dep_orphs deps) + uniq_mask = 's' + ; + ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base uniq_mask mod + orph_mods print_unqual loc $ + do { hsc_env' <- getHscEnv + ; dflags' <- liftIO $ initializePlugins hsc_env' + (hsc_dflags hsc_env') + ; all_passes <- withPlugins dflags' + installCoreToDos + builtin_passes + ; runCorePasses all_passes guts } + + ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats + "Grand total simplifier statistics" + FormatText + (pprSimplCount stats) + + ; return guts2 } + where + dflags = hsc_dflags hsc_env + home_pkg_rules = hptRules hsc_env (dep_mods deps) + hpt_rule_base = mkRuleBase home_pkg_rules + print_unqual = mkPrintUnqualified dflags rdr_env + -- mod: get the module out of the current HscEnv so we can retrieve it from the monad. + -- This is very convienent for the users of the monad (e.g. plugins do not have to + -- consume the ModGuts to find the module) but somewhat ugly because mg_module may + -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which + -- would mean our cached value would go out of date. + +{- +************************************************************************ +* * + Generating the main optimisation pipeline +* * +************************************************************************ +-} + +getCoreToDo :: DynFlags -> [CoreToDo] +getCoreToDo dflags + = flatten_todos core_todo + where + opt_level = optLevel dflags + phases = simplPhases dflags + max_iter = maxSimplIterations dflags + rule_check = ruleCheck dflags + call_arity = gopt Opt_CallArity dflags + exitification = gopt Opt_Exitification dflags + strictness = gopt Opt_Strictness dflags + full_laziness = gopt Opt_FullLaziness dflags + do_specialise = gopt Opt_Specialise dflags + do_float_in = gopt Opt_FloatIn dflags + cse = gopt Opt_CSE dflags + spec_constr = gopt Opt_SpecConstr dflags + liberate_case = gopt Opt_LiberateCase dflags + late_dmd_anal = gopt Opt_LateDmdAnal dflags + late_specialise = gopt Opt_LateSpecialise dflags + static_args = gopt Opt_StaticArgumentTransformation dflags + rules_on = gopt Opt_EnableRewriteRules dflags + eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags + ww_on = gopt Opt_WorkerWrapper dflags + static_ptrs = xopt LangExt.StaticPointers dflags + + maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase) + + maybe_strictness_before phase + = runWhen (phase `elem` strictnessBefore dflags) CoreDoDemand + + base_mode = SimplMode { sm_phase = panic "base_mode" + , sm_names = [] + , sm_dflags = dflags + , sm_rules = rules_on + , sm_eta_expand = eta_expand_on + , sm_inline = True + , sm_case_case = True } + + simpl_phase phase names iter + = CoreDoPasses + $ [ maybe_strictness_before phase + , CoreDoSimplify iter + (base_mode { sm_phase = Phase phase + , sm_names = names }) + + , maybe_rule_check (Phase phase) ] + + simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter + | phase <- [phases, phases-1 .. 1] ] + + + -- initial simplify: mk specialiser happy: minimum effort please + simpl_gently = CoreDoSimplify max_iter + (base_mode { sm_phase = InitialPhase + , sm_names = ["Gentle"] + , sm_rules = rules_on -- Note [RULEs enabled in InitialPhase] + , sm_inline = True + -- See Note [Inline in InitialPhase] + , sm_case_case = False }) + -- Don't do case-of-case transformations. + -- This makes full laziness work better + + dmd_cpr_ww = if ww_on then [CoreDoDemand,CoreDoCpr,CoreDoWorkerWrapper] + else [CoreDoDemand,CoreDoCpr] + + + demand_analyser = (CoreDoPasses ( + dmd_cpr_ww ++ + [simpl_phase 0 ["post-worker-wrapper"] max_iter] + )) + + -- Static forms are moved to the top level with the FloatOut pass. + -- See Note [Grand plan for static forms] in StaticPtrTable. + static_ptrs_float_outwards = + runWhen static_ptrs $ CoreDoPasses + [ simpl_gently -- Float Out can't handle type lets (sometimes created + -- by simpleOptPgm via mkParallelBindings) + , CoreDoFloatOutwards FloatOutSwitches + { floatOutLambdas = Just 0 + , floatOutConstants = True + , floatOutOverSatApps = False + , floatToTopLevelOnly = True + } + ] + + core_todo = + if opt_level == 0 then + [ static_ptrs_float_outwards, + CoreDoSimplify max_iter + (base_mode { sm_phase = Phase 0 + , sm_names = ["Non-opt simplification"] }) + ] + + else {- opt_level >= 1 -} [ + + -- We want to do the static argument transform before full laziness as it + -- may expose extra opportunities to float things outwards. However, to fix + -- up the output of the transformation we need at do at least one simplify + -- after this before anything else + runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]), + + -- initial simplify: mk specialiser happy: minimum effort please + simpl_gently, + + -- Specialisation is best done before full laziness + -- so that overloaded functions have all their dictionary lambdas manifest + runWhen do_specialise CoreDoSpecialising, + + if full_laziness then + CoreDoFloatOutwards FloatOutSwitches { + floatOutLambdas = Just 0, + floatOutConstants = True, + floatOutOverSatApps = False, + floatToTopLevelOnly = False } + -- Was: gentleFloatOutSwitches + -- + -- I have no idea why, but not floating constants to + -- top level is very bad in some cases. + -- + -- Notably: p_ident in spectral/rewrite + -- Changing from "gentle" to "constantsOnly" + -- improved rewrite's allocation by 19%, and + -- made 0.0% difference to any other nofib + -- benchmark + -- + -- Not doing floatOutOverSatApps yet, we'll do + -- that later on when we've had a chance to get more + -- accurate arity information. In fact it makes no + -- difference at all to performance if we do it here, + -- but maybe we save some unnecessary to-and-fro in + -- the simplifier. + else + -- Even with full laziness turned off, we still need to float static + -- forms to the top level. See Note [Grand plan for static forms] in + -- StaticPtrTable. + static_ptrs_float_outwards, + + simpl_phases, + + -- Phase 0: allow all Ids to be inlined now + -- This gets foldr inlined before strictness analysis + + -- At least 3 iterations because otherwise we land up with + -- huge dead expressions because of an infelicity in the + -- simplifier. + -- let k = BIG in foldr k z xs + -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs + -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs + -- Don't stop now! + simpl_phase 0 ["main"] (max max_iter 3), + + runWhen do_float_in CoreDoFloatInwards, + -- Run float-inwards immediately before the strictness analyser + -- Doing so pushes bindings nearer their use site and hence makes + -- them more likely to be strict. These bindings might only show + -- up after the inlining from simplification. Example in fulsom, + -- Csg.calc, where an arg of timesDouble thereby becomes strict. + + runWhen call_arity $ CoreDoPasses + [ CoreDoCallArity + , simpl_phase 0 ["post-call-arity"] max_iter + ], + + runWhen strictness demand_analyser, + + runWhen exitification CoreDoExitify, + -- See note [Placement of the exitification pass] + + runWhen full_laziness $ + CoreDoFloatOutwards FloatOutSwitches { + floatOutLambdas = floatLamArgs dflags, + floatOutConstants = True, + floatOutOverSatApps = True, + floatToTopLevelOnly = False }, + -- nofib/spectral/hartel/wang doubles in speed if you + -- do full laziness late in the day. It only happens + -- after fusion and other stuff, so the early pass doesn't + -- catch it. For the record, the redex is + -- f_el22 (f_el21 r_midblock) + + + runWhen cse CoreCSE, + -- We want CSE to follow the final full-laziness pass, because it may + -- succeed in commoning up things floated out by full laziness. + -- CSE used to rely on the no-shadowing invariant, but it doesn't any more + + runWhen do_float_in CoreDoFloatInwards, + + maybe_rule_check (Phase 0), + + -- Case-liberation for -O2. This should be after + -- strictness analysis and the simplification which follows it. + runWhen liberate_case (CoreDoPasses [ + CoreLiberateCase, + simpl_phase 0 ["post-liberate-case"] max_iter + ]), -- Run the simplifier after LiberateCase to vastly + -- reduce the possibility of shadowing + -- Reason: see Note [Shadowing] in GHC.Core.Op.SpecConstr + + runWhen spec_constr CoreDoSpecConstr, + + maybe_rule_check (Phase 0), + + runWhen late_specialise + (CoreDoPasses [ CoreDoSpecialising + , simpl_phase 0 ["post-late-spec"] max_iter]), + + -- LiberateCase can yield new CSE opportunities because it peels + -- off one layer of a recursive function (concretely, I saw this + -- in wheel-sieve1), and I'm guessing that SpecConstr can too + -- And CSE is a very cheap pass. So it seems worth doing here. + runWhen ((liberate_case || spec_constr) && cse) CoreCSE, + + -- Final clean-up simplification: + simpl_phase 0 ["final"] max_iter, + + runWhen late_dmd_anal $ CoreDoPasses ( + dmd_cpr_ww ++ + [simpl_phase 0 ["post-late-ww"] max_iter] + ), + + -- Final run of the demand_analyser, ensures that one-shot thunks are + -- really really one-shot thunks. Only needed if the demand analyser + -- has run at all. See Note [Final Demand Analyser run] in GHC.Core.Op.DmdAnal + -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution + -- can become /exponentially/ more expensive. See #11731, #12996. + runWhen (strictness || late_dmd_anal) CoreDoDemand, + + maybe_rule_check (Phase 0) + ] + + -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity. + flatten_todos [] = [] + flatten_todos (CoreDoNothing : rest) = flatten_todos rest + flatten_todos (CoreDoPasses passes : rest) = + flatten_todos passes ++ flatten_todos rest + flatten_todos (todo : rest) = todo : flatten_todos rest + +{- Note [Inline in InitialPhase] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In GHC 8 and earlier we did not inline anything in the InitialPhase. But that is +confusing for users because when they say INLINE they expect the function to inline +right away. + +So now we do inlining immediately, even in the InitialPhase, assuming that the +Id's Activation allows it. + +This is a surprisingly big deal. Compiler performance improved a lot +when I made this change: + + perf/compiler/T5837.run T5837 [stat too good] (normal) + perf/compiler/parsing001.run parsing001 [stat too good] (normal) + perf/compiler/T12234.run T12234 [stat too good] (optasm) + perf/compiler/T9020.run T9020 [stat too good] (optasm) + perf/compiler/T3064.run T3064 [stat too good] (normal) + perf/compiler/T9961.run T9961 [stat too good] (normal) + perf/compiler/T13056.run T13056 [stat too good] (optasm) + perf/compiler/T9872d.run T9872d [stat too good] (normal) + perf/compiler/T783.run T783 [stat too good] (normal) + perf/compiler/T12227.run T12227 [stat too good] (normal) + perf/should_run/lazy-bs-alloc.run lazy-bs-alloc [stat too good] (normal) + perf/compiler/T1969.run T1969 [stat too good] (normal) + perf/compiler/T9872a.run T9872a [stat too good] (normal) + perf/compiler/T9872c.run T9872c [stat too good] (normal) + perf/compiler/T9872b.run T9872b [stat too good] (normal) + perf/compiler/T9872d.run T9872d [stat too good] (normal) + +Note [RULEs enabled in InitialPhase] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +RULES are enabled when doing "gentle" simplification in InitialPhase, +or with -O0. Two reasons: + + * We really want the class-op cancellation to happen: + op (df d1 d2) --> $cop3 d1 d2 + because this breaks the mutual recursion between 'op' and 'df' + + * I wanted the RULE + lift String ===> ... + to work in Template Haskell when simplifying + splices, so we get simpler code for literal strings + +But watch out: list fusion can prevent floating. So use phase control +to switch off those rules until after floating. + +************************************************************************ +* * + The CoreToDo interpreter +* * +************************************************************************ +-} + +runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts +runCorePasses passes guts + = foldM do_pass guts passes + where + do_pass guts CoreDoNothing = return guts + do_pass guts (CoreDoPasses ps) = runCorePasses ps guts + do_pass guts pass = do + withTimingD (ppr pass <+> brackets (ppr mod)) + (const ()) $ do + { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts + ; endPass pass (mg_binds guts') (mg_rules guts') + ; return guts' } + + mod = mg_module guts + +doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts +doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-} + simplifyPgm pass + +doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-} + doPass cseProgram + +doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-} + doPassD liberateCase + +doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-} + floatInwards + +doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-} + doPassDUM (floatOutwards f) + +doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-} + doPassU doStaticArgs + +doCorePass CoreDoCallArity = {-# SCC "CallArity" #-} + doPassD callArityAnalProgram + +doCorePass CoreDoExitify = {-# SCC "Exitify" #-} + doPass exitifyProgram + +doCorePass CoreDoDemand = {-# SCC "DmdAnal" #-} + doPassDFM dmdAnalProgram + +doCorePass CoreDoCpr = {-# SCC "CprAnal" #-} + doPassDFM cprAnalProgram + +doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-} + doPassDFU wwTopBinds + +doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-} + specProgram + +doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-} + specConstrProgram + +doCorePass CoreDoPrintCore = observe printCore +doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat +doCorePass CoreDoNothing = return +doCorePass (CoreDoPasses passes) = runCorePasses passes + +doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass + +doCorePass pass@CoreDesugar = pprPanic "doCorePass" (ppr pass) +doCorePass pass@CoreDesugarOpt = pprPanic "doCorePass" (ppr pass) +doCorePass pass@CoreTidy = pprPanic "doCorePass" (ppr pass) +doCorePass pass@CorePrep = pprPanic "doCorePass" (ppr pass) +doCorePass pass@CoreOccurAnal = pprPanic "doCorePass" (ppr pass) + +{- +************************************************************************ +* * +\subsection{Core pass combinators} +* * +************************************************************************ +-} + +printCore :: DynFlags -> CoreProgram -> IO () +printCore dflags binds + = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds) + +ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts +ruleCheckPass current_phase pat guts = + withTimingD (text "RuleCheck"<+>brackets (ppr $ mg_module guts)) + (const ()) $ do + { rb <- getRuleBase + ; dflags <- getDynFlags + ; vis_orphs <- getVisibleOrphanMods + ; let rule_fn fn = getRules (RuleEnv rb vis_orphs) fn + ++ (mg_rules guts) + ; liftIO $ putLogMsg dflags NoReason Err.SevDump noSrcSpan + (defaultDumpStyle dflags) + (ruleCheckProgram current_phase pat + rule_fn (mg_binds guts)) + ; return guts } + +doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts +doPassDUM do_pass = doPassM $ \binds -> do + dflags <- getDynFlags + us <- getUniqueSupplyM + liftIO $ do_pass dflags us binds + +doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts +doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags)) + +doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts +doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags) + +doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts +doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us) + +doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts +doPassU do_pass = doPassDU (const do_pass) + +doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts +doPassDFM do_pass guts = do + dflags <- getDynFlags + p_fam_env <- getPackageFamInstEnv + let fam_envs = (p_fam_env, mg_fam_inst_env guts) + doPassM (liftIO . do_pass dflags fam_envs) guts + +doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts +doPassDFU do_pass guts = do + dflags <- getDynFlags + us <- getUniqueSupplyM + p_fam_env <- getPackageFamInstEnv + let fam_envs = (p_fam_env, mg_fam_inst_env guts) + doPass (do_pass dflags fam_envs us) guts + +-- Most passes return no stats and don't change rules: these combinators +-- let us lift them to the full blown ModGuts+CoreM world +doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts +doPassM bind_f guts = do + binds' <- bind_f (mg_binds guts) + return (guts { mg_binds = binds' }) + +doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts +doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) } + +-- Observer passes just peek; don't modify the bindings at all +observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts +observe do_pass = doPassM $ \binds -> do + dflags <- getDynFlags + _ <- liftIO $ do_pass dflags binds + return binds + +{- +************************************************************************ +* * + Gentle simplification +* * +************************************************************************ +-} + +simplifyExpr :: HscEnv -- includes spec of what core-to-core passes to do + -> CoreExpr + -> IO CoreExpr +-- simplifyExpr is called by the driver to simplify an +-- expression typed in at the interactive prompt +simplifyExpr hsc_env expr + = withTiming dflags (text "Simplify [expr]") (const ()) $ + do { eps <- hscEPS hsc_env ; + ; let rule_env = mkRuleEnv (eps_rule_base eps) [] + fi_env = ( eps_fam_inst_env eps + , extendFamInstEnvList emptyFamInstEnv $ + snd $ ic_instances $ hsc_IC hsc_env ) + simpl_env = simplEnvForGHCi dflags + + ; us <- mkSplitUniqSupply 's' + ; let sz = exprSize expr + + ; (expr', counts) <- initSmpl dflags rule_env fi_env us sz $ + simplExprGently simpl_env expr + + ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags) + "Simplifier statistics" (pprSimplCount counts) + + ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression" + FormatCore + (pprCoreExpr expr') + + ; return expr' + } + where + dflags = hsc_dflags hsc_env + +simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr +-- Simplifies an expression +-- does occurrence analysis, then simplification +-- and repeats (twice currently) because one pass +-- alone leaves tons of crud. +-- Used (a) for user expressions typed in at the interactive prompt +-- (b) the LHS and RHS of a RULE +-- (c) Template Haskell splices +-- +-- The name 'Gently' suggests that the SimplMode is InitialPhase, +-- and in fact that is so.... but the 'Gently' in simplExprGently doesn't +-- enforce that; it just simplifies the expression twice + +-- It's important that simplExprGently does eta reduction; see +-- Note [Simplifying the left-hand side of a RULE] above. The +-- simplifier does indeed do eta reduction (it's in GHC.Core.Op.Simplify.completeLam) +-- but only if -O is on. + +simplExprGently env expr = do + expr1 <- simplExpr env (occurAnalyseExpr expr) + simplExpr env (occurAnalyseExpr expr1) + +{- +************************************************************************ +* * +\subsection{The driver for the simplifier} +* * +************************************************************************ +-} + +simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts +simplifyPgm pass guts + = do { hsc_env <- getHscEnv + ; us <- getUniqueSupplyM + ; rb <- getRuleBase + ; liftIOWithCount $ + simplifyPgmIO pass hsc_env us rb guts } + +simplifyPgmIO :: CoreToDo + -> HscEnv + -> UniqSupply + -> RuleBase + -> ModGuts + -> IO (SimplCount, ModGuts) -- New bindings + +simplifyPgmIO pass@(CoreDoSimplify max_iterations mode) + hsc_env us hpt_rule_base + guts@(ModGuts { mg_module = this_mod + , mg_rdr_env = rdr_env + , mg_deps = deps + , mg_binds = binds, mg_rules = rules + , mg_fam_inst_env = fam_inst_env }) + = do { (termination_msg, it_count, counts_out, guts') + <- do_iteration us 1 [] binds rules + + ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags && + dopt Opt_D_dump_simpl_stats dflags) + "Simplifier statistics for following pass" + (vcat [text termination_msg <+> text "after" <+> ppr it_count + <+> text "iterations", + blankLine, + pprSimplCount counts_out]) + + ; return (counts_out, guts') + } + where + dflags = hsc_dflags hsc_env + print_unqual = mkPrintUnqualified dflags rdr_env + simpl_env = mkSimplEnv mode + active_rule = activeRule mode + active_unf = activeUnfolding mode + + do_iteration :: UniqSupply + -> Int -- Counts iterations + -> [SimplCount] -- Counts from earlier iterations, reversed + -> CoreProgram -- Bindings in + -> [CoreRule] -- and orphan rules + -> IO (String, Int, SimplCount, ModGuts) + + do_iteration us iteration_no counts_so_far binds rules + -- iteration_no is the number of the iteration we are + -- about to begin, with '1' for the first + | iteration_no > max_iterations -- Stop if we've run out of iterations + = WARN( debugIsOn && (max_iterations > 2) + , hang (text "Simplifier bailing out after" <+> int max_iterations + <+> text "iterations" + <+> (brackets $ hsep $ punctuate comma $ + map (int . simplCountN) (reverse counts_so_far))) + 2 (text "Size =" <+> ppr (coreBindsStats binds))) + + -- Subtract 1 from iteration_no to get the + -- number of iterations we actually completed + return ( "Simplifier baled out", iteration_no - 1 + , totalise counts_so_far + , guts { mg_binds = binds, mg_rules = rules } ) + + -- Try and force thunks off the binds; significantly reduces + -- space usage, especially with -O. JRS, 000620. + | let sz = coreBindsSize binds + , () <- sz `seq` () -- Force it + = do { + -- Occurrence analysis + let { tagged_binds = {-# SCC "OccAnal" #-} + occurAnalysePgm this_mod active_unf active_rule rules + binds + } ; + Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis" + FormatCore + (pprCoreBindings tagged_binds); + + -- Get any new rules, and extend the rule base + -- See Note [Overall plumbing for rules] in GHC.Core.Rules + -- We need to do this regularly, because simplification can + -- poke on IdInfo thunks, which in turn brings in new rules + -- behind the scenes. Otherwise there's a danger we'll simply + -- miss the rules for Ids hidden inside imported inlinings + eps <- hscEPS hsc_env ; + let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps) + ; rule_base2 = extendRuleBaseList rule_base1 rules + ; fam_envs = (eps_fam_inst_env eps, fam_inst_env) + ; vis_orphs = this_mod : dep_orphs deps } ; + + -- Simplify the program + ((binds1, rules1), counts1) <- + initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $ + do { (floats, env1) <- {-# SCC "SimplTopBinds" #-} + simplTopBinds simpl_env tagged_binds + + -- Apply the substitution to rules defined in this module + -- for imported Ids. Eg RULE map my_f = blah + -- If we have a substitution my_f :-> other_f, we'd better + -- apply it to the rule to, or it'll never match + ; rules1 <- simplRules env1 Nothing rules Nothing + + ; return (getTopFloatBinds floats, rules1) } ; + + -- Stop if nothing happened; don't dump output + -- See Note [Which transformations are innocuous] in GHC.Core.Op.Monad + if isZeroSimplCount counts1 then + return ( "Simplifier reached fixed point", iteration_no + , totalise (counts1 : counts_so_far) -- Include "free" ticks + , guts { mg_binds = binds1, mg_rules = rules1 } ) + else do { + -- Short out indirections + -- We do this *after* at least one run of the simplifier + -- because indirection-shorting uses the export flag on *occurrences* + -- and that isn't guaranteed to be ok until after the first run propagates + -- stuff from the binding site to its occurrences + -- + -- ToDo: alas, this means that indirection-shorting does not happen at all + -- if the simplifier does nothing (not common, I know, but unsavoury) + let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ; + + -- Dump the result of this iteration + dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ; + lintPassResult hsc_env pass binds2 ; + + -- Loop + do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1 + } } +#if __GLASGOW_HASKELL__ <= 810 + | otherwise = panic "do_iteration" +#endif + where + (us1, us2) = splitUniqSupply us + + -- Remember the counts_so_far are reversed + totalise :: [SimplCount] -> SimplCount + totalise = foldr (\c acc -> acc `plusSimplCount` c) + (zeroSimplCount dflags) + +simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO" + +------------------- +dump_end_iteration :: DynFlags -> PrintUnqualified -> Int + -> SimplCount -> CoreProgram -> [CoreRule] -> IO () +dump_end_iteration dflags print_unqual iteration_no counts binds rules + = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules + where + mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations + | otherwise = Nothing + -- Show details if Opt_D_dump_simpl_iterations is on + + hdr = text "Simplifier iteration=" <> int iteration_no + pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr + , pprSimplCount counts + , text "---- End of simplifier counts for" <+> hdr ] + +{- +************************************************************************ +* * + Shorting out indirections +* * +************************************************************************ + +If we have this: + + x_local = <expression> + ...bindings... + x_exported = x_local + +where x_exported is exported, and x_local is not, then we replace it with this: + + x_exported = <expression> + x_local = x_exported + ...bindings... + +Without this we never get rid of the x_exported = x_local thing. This +save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and +makes strictness information propagate better. This used to happen in +the final phase, but it's tidier to do it here. + +Note [Messing up the exported Id's RULES] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We must be careful about discarding (obviously) or even merging the +RULES on the exported Id. The example that went bad on me at one stage +was this one: + + iterate :: (a -> a) -> a -> [a] + [Exported] + iterate = iterateList + + iterateFB c f x = x `c` iterateFB c f (f x) + iterateList f x = x : iterateList f (f x) + [Not exported] + + {-# RULES + "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x) + "iterateFB" iterateFB (:) = iterateList + #-} + +This got shorted out to: + + iterateList :: (a -> a) -> a -> [a] + iterateList = iterate + + iterateFB c f x = x `c` iterateFB c f (f x) + iterate f x = x : iterate f (f x) + + {-# RULES + "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x) + "iterateFB" iterateFB (:) = iterate + #-} + +And now we get an infinite loop in the rule system + iterate f x -> build (\cn -> iterateFB c f x) + -> iterateFB (:) f x + -> iterate f x + +Old "solution": + use rule switching-off pragmas to get rid + of iterateList in the first place + +But in principle the user *might* want rules that only apply to the Id +he says. And inline pragmas are similar + {-# NOINLINE f #-} + f = local + local = <stuff> +Then we do not want to get rid of the NOINLINE. + +Hence hasShortableIdinfo. + + +Note [Rules and indirection-zapping] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Problem: what if x_exported has a RULE that mentions something in ...bindings...? +Then the things mentioned can be out of scope! Solution + a) Make sure that in this pass the usage-info from x_exported is + available for ...bindings... + b) If there are any such RULES, rec-ify the entire top-level. + It'll get sorted out next time round + +Other remarks +~~~~~~~~~~~~~ +If more than one exported thing is equal to a local thing (i.e., the +local thing really is shared), then we do one only: +\begin{verbatim} + x_local = .... + x_exported1 = x_local + x_exported2 = x_local +==> + x_exported1 = .... + + x_exported2 = x_exported1 +\end{verbatim} + +We rely on prior eta reduction to simplify things like +\begin{verbatim} + x_exported = /\ tyvars -> x_local tyvars +==> + x_exported = x_local +\end{verbatim} +Hence,there's a possibility of leaving unchanged something like this: +\begin{verbatim} + x_local = .... + x_exported1 = x_local Int +\end{verbatim} +By the time we've thrown away the types in STG land this +could be eliminated. But I don't think it's very common +and it's dangerous to do this fiddling in STG land +because we might eliminate a binding that's mentioned in the +unfolding for something. + +Note [Indirection zapping and ticks] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Unfortunately this is another place where we need a special case for +ticks. The following happens quite regularly: + + x_local = <expression> + x_exported = tick<x> x_local + +Which we want to become: + + x_exported = tick<x> <expression> + +As it makes no sense to keep the tick and the expression on separate +bindings. Note however that that this might increase the ticks scoping +over the execution of x_local, so we can only do this for floatable +ticks. More often than not, other references will be unfoldings of +x_exported, and therefore carry the tick anyway. +-} + +type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks + +shortOutIndirections :: CoreProgram -> CoreProgram +shortOutIndirections binds + | isEmptyVarEnv ind_env = binds + | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping] + | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff + where + ind_env = makeIndEnv binds + -- These exported Ids are the subjects of the indirection-elimination + exp_ids = map fst $ nonDetEltsUFM ind_env + -- It's OK to use nonDetEltsUFM here because we forget the ordering + -- by immediately converting to a set or check if all the elements + -- satisfy a predicate. + exp_id_set = mkVarSet exp_ids + no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids + binds' = concatMap zap binds + + zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)] + zap (Rec pairs) = [Rec (concatMap zapPair pairs)] + + zapPair (bndr, rhs) + | bndr `elemVarSet` exp_id_set + = [] -- Kill the exported-id binding + + | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr + , (exp_id', lcl_id') <- transferIdInfo exp_id bndr + = -- Turn a local-id binding into two bindings + -- exp_id = rhs; lcl_id = exp_id + [ (exp_id', mkTicks ticks rhs), + (lcl_id', Var exp_id') ] + + | otherwise + = [(bndr,rhs)] + +makeIndEnv :: [CoreBind] -> IndEnv +makeIndEnv binds + = foldl' add_bind emptyVarEnv binds + where + add_bind :: IndEnv -> CoreBind -> IndEnv + add_bind env (NonRec exported_id rhs) = add_pair env (exported_id, rhs) + add_bind env (Rec pairs) = foldl' add_pair env pairs + + add_pair :: IndEnv -> (Id,CoreExpr) -> IndEnv + add_pair env (exported_id, exported) + | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported + , shortMeOut env exported_id local_id + = extendVarEnv env local_id (exported_id, ticks) + add_pair env _ = env + +----------------- +shortMeOut :: IndEnv -> Id -> Id -> Bool +shortMeOut ind_env exported_id local_id +-- The if-then-else stuff is just so I can get a pprTrace to see +-- how often I don't get shorting out because of IdInfo stuff + = if isExportedId exported_id && -- Only if this is exported + + isLocalId local_id && -- Only if this one is defined in this + -- module, so that we *can* change its + -- binding to be the exported thing! + + not (isExportedId local_id) && -- Only if this one is not itself exported, + -- since the transformation will nuke it + + not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for + then + if hasShortableIdInfo exported_id + then True -- See Note [Messing up the exported Id's IdInfo] + else WARN( True, text "Not shorting out:" <+> ppr exported_id ) + False + else + False + +----------------- +hasShortableIdInfo :: Id -> Bool +-- True if there is no user-attached IdInfo on exported_id, +-- so we can safely discard it +-- See Note [Messing up the exported Id's IdInfo] +hasShortableIdInfo id + = isEmptyRuleInfo (ruleInfo info) + && isDefaultInlinePragma (inlinePragInfo info) + && not (isStableUnfolding (unfoldingInfo info)) + where + info = idInfo id + +----------------- +{- Note [Transferring IdInfo] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + lcl_id = e; exp_id = lcl_id + +and lcl_id has useful IdInfo, we don't want to discard it by going + gbl_id = e; lcl_id = gbl_id + +Instead, transfer IdInfo from lcl_id to exp_id, specifically +* (Stable) unfolding +* Strictness +* Rules +* Inline pragma + +Overwriting, rather than merging, seems to work ok. + +We also zap the InlinePragma on the lcl_id. It might originally +have had a NOINLINE, which we have now transferred; and we really +want the lcl_id to inline now that its RHS is trivial! +-} + +transferIdInfo :: Id -> Id -> (Id, Id) +-- See Note [Transferring IdInfo] +transferIdInfo exported_id local_id + = ( modifyIdInfo transfer exported_id + , local_id `setInlinePragma` defaultInlinePragma ) + where + local_info = idInfo local_id + transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info + `setCprInfo` cprInfo local_info + `setUnfoldingInfo` unfoldingInfo local_info + `setInlinePragInfo` inlinePragInfo local_info + `setRuleInfo` addRuleInfo (ruleInfo exp_info) new_info + new_info = setRuleInfoHead (idName exported_id) + (ruleInfo local_info) + -- Remember to set the function-name field of the + -- rules as we transfer them from one function to another diff --git a/compiler/GHC/Core/Op/Simplify/Env.hs b/compiler/GHC/Core/Op/Simplify/Env.hs new file mode 100644 index 0000000000..0e94f734af --- /dev/null +++ b/compiler/GHC/Core/Op/Simplify/Env.hs @@ -0,0 +1,938 @@ +{- +(c) The AQUA Project, Glasgow University, 1993-1998 + +\section[GHC.Core.Op.Simplify.Monad]{The simplifier Monad} +-} + +{-# LANGUAGE CPP #-} + +module GHC.Core.Op.Simplify.Env ( + -- * The simplifier mode + setMode, getMode, updMode, seDynFlags, + + -- * Environments + SimplEnv(..), pprSimplEnv, -- Temp not abstract + mkSimplEnv, extendIdSubst, + extendTvSubst, extendCvSubst, + zapSubstEnv, setSubstEnv, + getInScope, setInScopeFromE, setInScopeFromF, + setInScopeSet, modifyInScope, addNewInScopeIds, + getSimplRules, + + -- * Substitution results + SimplSR(..), mkContEx, substId, lookupRecBndr, refineFromInScope, + + -- * Simplifying 'Id' binders + simplNonRecBndr, simplNonRecJoinBndr, simplRecBndrs, simplRecJoinBndrs, + simplBinder, simplBinders, + substTy, substTyVar, getTCvSubst, + substCo, substCoVar, + + -- * Floats + SimplFloats(..), emptyFloats, mkRecFloats, + mkFloatBind, addLetFloats, addJoinFloats, addFloats, + extendFloats, wrapFloats, + doFloatFromRhs, getTopFloatBinds, + + -- * LetFloats + LetFloats, letFloatBinds, emptyLetFloats, unitLetFloat, + addLetFlts, mapLetFloats, + + -- * JoinFloats + JoinFloat, JoinFloats, emptyJoinFloats, + wrapJoinFloats, wrapJoinFloatsX, unitJoinFloat, addJoinFlts + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Core.Op.Simplify.Monad +import GHC.Core.Op.Monad ( SimplMode(..) ) +import GHC.Core +import GHC.Core.Utils +import Var +import VarEnv +import VarSet +import OrdList +import Id +import GHC.Core.Make ( mkWildValBinder ) +import GHC.Driver.Session ( DynFlags ) +import TysWiredIn +import qualified GHC.Core.Type as Type +import GHC.Core.Type hiding ( substTy, substTyVar, substTyVarBndr, extendTvSubst, extendCvSubst ) +import qualified GHC.Core.Coercion as Coercion +import GHC.Core.Coercion hiding ( substCo, substCoVar, substCoVarBndr ) +import BasicTypes +import MonadUtils +import Outputable +import Util +import UniqFM ( pprUniqFM ) + +import Data.List (mapAccumL) + +{- +************************************************************************ +* * +\subsubsection{The @SimplEnv@ type} +* * +************************************************************************ +-} + +data SimplEnv + = SimplEnv { + ----------- Static part of the environment ----------- + -- Static in the sense of lexically scoped, + -- wrt the original expression + + seMode :: SimplMode + + -- The current substitution + , seTvSubst :: TvSubstEnv -- InTyVar |--> OutType + , seCvSubst :: CvSubstEnv -- InCoVar |--> OutCoercion + , seIdSubst :: SimplIdSubst -- InId |--> OutExpr + + ----------- Dynamic part of the environment ----------- + -- Dynamic in the sense of describing the setup where + -- the expression finally ends up + + -- The current set of in-scope variables + -- They are all OutVars, and all bound in this module + , seInScope :: InScopeSet -- OutVars only + } + +data SimplFloats + = SimplFloats + { -- Ordinary let bindings + sfLetFloats :: LetFloats + -- See Note [LetFloats] + + -- Join points + , sfJoinFloats :: JoinFloats + -- Handled separately; they don't go very far + -- We consider these to be /inside/ sfLetFloats + -- because join points can refer to ordinary bindings, + -- but not vice versa + + -- Includes all variables bound by sfLetFloats and + -- sfJoinFloats, plus at least whatever is in scope where + -- these bindings land up. + , sfInScope :: InScopeSet -- All OutVars + } + +instance Outputable SimplFloats where + ppr (SimplFloats { sfLetFloats = lf, sfJoinFloats = jf, sfInScope = is }) + = text "SimplFloats" + <+> braces (vcat [ text "lets: " <+> ppr lf + , text "joins:" <+> ppr jf + , text "in_scope:" <+> ppr is ]) + +emptyFloats :: SimplEnv -> SimplFloats +emptyFloats env + = SimplFloats { sfLetFloats = emptyLetFloats + , sfJoinFloats = emptyJoinFloats + , sfInScope = seInScope env } + +pprSimplEnv :: SimplEnv -> SDoc +-- Used for debugging; selective +pprSimplEnv env + = vcat [text "TvSubst:" <+> ppr (seTvSubst env), + text "CvSubst:" <+> ppr (seCvSubst env), + text "IdSubst:" <+> id_subst_doc, + text "InScope:" <+> in_scope_vars_doc + ] + where + id_subst_doc = pprUniqFM ppr (seIdSubst env) + in_scope_vars_doc = pprVarSet (getInScopeVars (seInScope env)) + (vcat . map ppr_one) + ppr_one v | isId v = ppr v <+> ppr (idUnfolding v) + | otherwise = ppr v + +type SimplIdSubst = IdEnv SimplSR -- IdId |--> OutExpr + -- See Note [Extending the Subst] in GHC.Core.Subst + +-- | A substitution result. +data SimplSR + = DoneEx OutExpr (Maybe JoinArity) + -- If x :-> DoneEx e ja is in the SimplIdSubst + -- then replace occurrences of x by e + -- and ja = Just a <=> x is a join-point of arity a + -- See Note [Join arity in SimplIdSubst] + + + | DoneId OutId + -- If x :-> DoneId v is in the SimplIdSubst + -- then replace occurrences of x by v + -- and v is a join-point of arity a + -- <=> x is a join-point of arity a + + | ContEx TvSubstEnv -- A suspended substitution + CvSubstEnv + SimplIdSubst + InExpr + -- If x :-> ContEx tv cv id e is in the SimplISubst + -- then replace occurrences of x by (subst (tv,cv,id) e) + +instance Outputable SimplSR where + ppr (DoneId v) = text "DoneId" <+> ppr v + ppr (DoneEx e mj) = text "DoneEx" <> pp_mj <+> ppr e + where + pp_mj = case mj of + Nothing -> empty + Just n -> parens (int n) + + ppr (ContEx _tv _cv _id e) = vcat [text "ContEx" <+> ppr e {-, + ppr (filter_env tv), ppr (filter_env id) -}] + -- where + -- fvs = exprFreeVars e + -- filter_env env = filterVarEnv_Directly keep env + -- keep uniq _ = uniq `elemUFM_Directly` fvs + +{- +Note [SimplEnv invariants] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +seInScope: + The in-scope part of Subst includes *all* in-scope TyVars and Ids + The elements of the set may have better IdInfo than the + occurrences of in-scope Ids, and (more important) they will + have a correctly-substituted type. So we use a lookup in this + set to replace occurrences + + The Ids in the InScopeSet are replete with their Rules, + and as we gather info about the unfolding of an Id, we replace + it in the in-scope set. + + The in-scope set is actually a mapping OutVar -> OutVar, and + in case expressions we sometimes bind + +seIdSubst: + The substitution is *apply-once* only, because InIds and OutIds + can overlap. + For example, we generally omit mappings + a77 -> a77 + from the substitution, when we decide not to clone a77, but it's quite + legitimate to put the mapping in the substitution anyway. + + Furthermore, consider + let x = case k of I# x77 -> ... in + let y = case k of I# x77 -> ... in ... + and suppose the body is strict in both x and y. Then the simplifier + will pull the first (case k) to the top; so the second (case k) will + cancel out, mapping x77 to, well, x77! But one is an in-Id and the + other is an out-Id. + + Of course, the substitution *must* applied! Things in its domain + simply aren't necessarily bound in the result. + +* substId adds a binding (DoneId new_id) to the substitution if + the Id's unique has changed + + Note, though that the substitution isn't necessarily extended + if the type of the Id changes. Why not? Because of the next point: + +* We *always, always* finish by looking up in the in-scope set + any variable that doesn't get a DoneEx or DoneVar hit in the substitution. + Reason: so that we never finish up with a "old" Id in the result. + An old Id might point to an old unfolding and so on... which gives a space + leak. + + [The DoneEx and DoneVar hits map to "new" stuff.] + +* It follows that substExpr must not do a no-op if the substitution is empty. + substType is free to do so, however. + +* When we come to a let-binding (say) we generate new IdInfo, including an + unfolding, attach it to the binder, and add this newly adorned binder to + the in-scope set. So all subsequent occurrences of the binder will get + mapped to the full-adorned binder, which is also the one put in the + binding site. + +* The in-scope "set" usually maps x->x; we use it simply for its domain. + But sometimes we have two in-scope Ids that are synomyms, and should + map to the same target: x->x, y->x. Notably: + case y of x { ... } + That's why the "set" is actually a VarEnv Var + +Note [Join arity in SimplIdSubst] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We have to remember which incoming variables are join points: the occurrences +may not be marked correctly yet, and we're in change of propagating the change if +OccurAnal makes something a join point). + +Normally the in-scope set is where we keep the latest information, but +the in-scope set tracks only OutVars; if a binding is unconditionally +inlined (via DoneEx), it never makes it into the in-scope set, and we +need to know at the occurrence site that the variable is a join point +so that we know to drop the context. Thus we remember which join +points we're substituting. -} + +mkSimplEnv :: SimplMode -> SimplEnv +mkSimplEnv mode + = SimplEnv { seMode = mode + , seInScope = init_in_scope + , seTvSubst = emptyVarEnv + , seCvSubst = emptyVarEnv + , seIdSubst = emptyVarEnv } + -- The top level "enclosing CC" is "SUBSUMED". + +init_in_scope :: InScopeSet +init_in_scope = mkInScopeSet (unitVarSet (mkWildValBinder unitTy)) + -- See Note [WildCard binders] + +{- +Note [WildCard binders] +~~~~~~~~~~~~~~~~~~~~~~~ +The program to be simplified may have wild binders + case e of wild { p -> ... } +We want to *rename* them away, so that there are no +occurrences of 'wild-id' (with wildCardKey). The easy +way to do that is to start of with a representative +Id in the in-scope set + +There can be *occurrences* of wild-id. For example, +GHC.Core.Make.mkCoreApp transforms + e (a /# b) --> case (a /# b) of wild { DEFAULT -> e wild } +This is ok provided 'wild' isn't free in 'e', and that's the delicate +thing. Generally, you want to run the simplifier to get rid of the +wild-ids before doing much else. + +It's a very dark corner of GHC. Maybe it should be cleaned up. +-} + +getMode :: SimplEnv -> SimplMode +getMode env = seMode env + +seDynFlags :: SimplEnv -> DynFlags +seDynFlags env = sm_dflags (seMode env) + +setMode :: SimplMode -> SimplEnv -> SimplEnv +setMode mode env = env { seMode = mode } + +updMode :: (SimplMode -> SimplMode) -> SimplEnv -> SimplEnv +updMode upd env = env { seMode = upd (seMode env) } + +--------------------- +extendIdSubst :: SimplEnv -> Id -> SimplSR -> SimplEnv +extendIdSubst env@(SimplEnv {seIdSubst = subst}) var res + = ASSERT2( isId var && not (isCoVar var), ppr var ) + env { seIdSubst = extendVarEnv subst var res } + +extendTvSubst :: SimplEnv -> TyVar -> Type -> SimplEnv +extendTvSubst env@(SimplEnv {seTvSubst = tsubst}) var res + = ASSERT2( isTyVar var, ppr var $$ ppr res ) + env {seTvSubst = extendVarEnv tsubst var res} + +extendCvSubst :: SimplEnv -> CoVar -> Coercion -> SimplEnv +extendCvSubst env@(SimplEnv {seCvSubst = csubst}) var co + = ASSERT( isCoVar var ) + env {seCvSubst = extendVarEnv csubst var co} + +--------------------- +getInScope :: SimplEnv -> InScopeSet +getInScope env = seInScope env + +setInScopeSet :: SimplEnv -> InScopeSet -> SimplEnv +setInScopeSet env in_scope = env {seInScope = in_scope} + +setInScopeFromE :: SimplEnv -> SimplEnv -> SimplEnv +-- See Note [Setting the right in-scope set] +setInScopeFromE rhs_env here_env = rhs_env { seInScope = seInScope here_env } + +setInScopeFromF :: SimplEnv -> SimplFloats -> SimplEnv +setInScopeFromF env floats = env { seInScope = sfInScope floats } + +addNewInScopeIds :: SimplEnv -> [CoreBndr] -> SimplEnv + -- The new Ids are guaranteed to be freshly allocated +addNewInScopeIds env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst }) vs + = env { seInScope = in_scope `extendInScopeSetList` vs, + seIdSubst = id_subst `delVarEnvList` vs } + -- Why delete? Consider + -- let x = a*b in (x, \x -> x+3) + -- We add [x |-> a*b] to the substitution, but we must + -- _delete_ it from the substitution when going inside + -- the (\x -> ...)! + +modifyInScope :: SimplEnv -> CoreBndr -> SimplEnv +-- The variable should already be in scope, but +-- replace the existing version with this new one +-- which has more information +modifyInScope env@(SimplEnv {seInScope = in_scope}) v + = env {seInScope = extendInScopeSet in_scope v} + +{- Note [Setting the right in-scope set] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + \x. (let x = e in b) arg[x] +where the let shadows the lambda. Really this means something like + \x1. (let x2 = e in b) arg[x1] + +- When we capture the 'arg' in an ApplyToVal continuation, we capture + the environment, which says what 'x' is bound to, namely x1 + +- Then that continuation gets pushed under the let + +- Finally we simplify 'arg'. We want + - the static, lexical environment binding x :-> x1 + - the in-scopeset from "here", under the 'let' which includes + both x1 and x2 + +It's important to have the right in-scope set, else we may rename a +variable to one that is already in scope. So we must pick up the +in-scope set from "here", but otherwise use the environment we +captured along with 'arg'. This transfer of in-scope set is done by +setInScopeFromE. +-} + +--------------------- +zapSubstEnv :: SimplEnv -> SimplEnv +zapSubstEnv env = env {seTvSubst = emptyVarEnv, seCvSubst = emptyVarEnv, seIdSubst = emptyVarEnv} + +setSubstEnv :: SimplEnv -> TvSubstEnv -> CvSubstEnv -> SimplIdSubst -> SimplEnv +setSubstEnv env tvs cvs ids = env { seTvSubst = tvs, seCvSubst = cvs, seIdSubst = ids } + +mkContEx :: SimplEnv -> InExpr -> SimplSR +mkContEx (SimplEnv { seTvSubst = tvs, seCvSubst = cvs, seIdSubst = ids }) e = ContEx tvs cvs ids e + +{- +************************************************************************ +* * +\subsection{LetFloats} +* * +************************************************************************ + +Note [LetFloats] +~~~~~~~~~~~~~~~~ +The LetFloats is a bunch of bindings, classified by a FloatFlag. + +* All of them satisfy the let/app invariant + +Examples + + NonRec x (y:ys) FltLifted + Rec [(x,rhs)] FltLifted + + NonRec x* (p:q) FltOKSpec -- RHS is WHNF. Question: why not FltLifted? + NonRec x# (y +# 3) FltOkSpec -- Unboxed, but ok-for-spec'n + + NonRec x* (f y) FltCareful -- Strict binding; might fail or diverge + +Can't happen: + NonRec x# (a /# b) -- Might fail; does not satisfy let/app + NonRec x# (f y) -- Might diverge; does not satisfy let/app +-} + +data LetFloats = LetFloats (OrdList OutBind) FloatFlag + -- See Note [LetFloats] + +type JoinFloat = OutBind +type JoinFloats = OrdList JoinFloat + +data FloatFlag + = FltLifted -- All bindings are lifted and lazy *or* + -- consist of a single primitive string literal + -- Hence ok to float to top level, or recursive + + | FltOkSpec -- All bindings are FltLifted *or* + -- strict (perhaps because unlifted, + -- perhaps because of a strict binder), + -- *and* ok-for-speculation + -- Hence ok to float out of the RHS + -- of a lazy non-recursive let binding + -- (but not to top level, or into a rec group) + + | FltCareful -- At least one binding is strict (or unlifted) + -- and not guaranteed cheap + -- Do not float these bindings out of a lazy let + +instance Outputable LetFloats where + ppr (LetFloats binds ff) = ppr ff $$ ppr (fromOL binds) + +instance Outputable FloatFlag where + ppr FltLifted = text "FltLifted" + ppr FltOkSpec = text "FltOkSpec" + ppr FltCareful = text "FltCareful" + +andFF :: FloatFlag -> FloatFlag -> FloatFlag +andFF FltCareful _ = FltCareful +andFF FltOkSpec FltCareful = FltCareful +andFF FltOkSpec _ = FltOkSpec +andFF FltLifted flt = flt + +doFloatFromRhs :: TopLevelFlag -> RecFlag -> Bool -> SimplFloats -> OutExpr -> Bool +-- If you change this function look also at FloatIn.noFloatFromRhs +doFloatFromRhs lvl rec str (SimplFloats { sfLetFloats = LetFloats fs ff }) rhs + = not (isNilOL fs) && want_to_float && can_float + where + want_to_float = isTopLevel lvl || exprIsCheap rhs || exprIsExpandable rhs + -- See Note [Float when cheap or expandable] + can_float = case ff of + FltLifted -> True + FltOkSpec -> isNotTopLevel lvl && isNonRec rec + FltCareful -> isNotTopLevel lvl && isNonRec rec && str + +{- +Note [Float when cheap or expandable] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We want to float a let from a let if the residual RHS is + a) cheap, such as (\x. blah) + b) expandable, such as (f b) if f is CONLIKE +But there are + - cheap things that are not expandable (eg \x. expensive) + - expandable things that are not cheap (eg (f b) where b is CONLIKE) +so we must take the 'or' of the two. +-} + +emptyLetFloats :: LetFloats +emptyLetFloats = LetFloats nilOL FltLifted + +emptyJoinFloats :: JoinFloats +emptyJoinFloats = nilOL + +unitLetFloat :: OutBind -> LetFloats +-- This key function constructs a singleton float with the right form +unitLetFloat bind = ASSERT(all (not . isJoinId) (bindersOf bind)) + LetFloats (unitOL bind) (flag bind) + where + flag (Rec {}) = FltLifted + flag (NonRec bndr rhs) + | not (isStrictId bndr) = FltLifted + | exprIsTickedString rhs = FltLifted + -- String literals can be floated freely. + -- See Note [Core top-level string literals] in GHC.Core. + | exprOkForSpeculation rhs = FltOkSpec -- Unlifted, and lifted but ok-for-spec (eg HNF) + | otherwise = ASSERT2( not (isUnliftedType (idType bndr)), ppr bndr ) + FltCareful + -- Unlifted binders can only be let-bound if exprOkForSpeculation holds + +unitJoinFloat :: OutBind -> JoinFloats +unitJoinFloat bind = ASSERT(all isJoinId (bindersOf bind)) + unitOL bind + +mkFloatBind :: SimplEnv -> OutBind -> (SimplFloats, SimplEnv) +-- Make a singleton SimplFloats, and +-- extend the incoming SimplEnv's in-scope set with its binders +-- These binders may already be in the in-scope set, +-- but may have by now been augmented with more IdInfo +mkFloatBind env bind + = (floats, env { seInScope = in_scope' }) + where + floats + | isJoinBind bind + = SimplFloats { sfLetFloats = emptyLetFloats + , sfJoinFloats = unitJoinFloat bind + , sfInScope = in_scope' } + | otherwise + = SimplFloats { sfLetFloats = unitLetFloat bind + , sfJoinFloats = emptyJoinFloats + , sfInScope = in_scope' } + + in_scope' = seInScope env `extendInScopeSetBind` bind + +extendFloats :: SimplFloats -> OutBind -> SimplFloats +-- Add this binding to the floats, and extend the in-scope env too +extendFloats (SimplFloats { sfLetFloats = floats + , sfJoinFloats = jfloats + , sfInScope = in_scope }) + bind + | isJoinBind bind + = SimplFloats { sfInScope = in_scope' + , sfLetFloats = floats + , sfJoinFloats = jfloats' } + | otherwise + = SimplFloats { sfInScope = in_scope' + , sfLetFloats = floats' + , sfJoinFloats = jfloats } + where + in_scope' = in_scope `extendInScopeSetBind` bind + floats' = floats `addLetFlts` unitLetFloat bind + jfloats' = jfloats `addJoinFlts` unitJoinFloat bind + +addLetFloats :: SimplFloats -> LetFloats -> SimplFloats +-- Add the let-floats for env2 to env1; +-- *plus* the in-scope set for env2, which is bigger +-- than that for env1 +addLetFloats floats let_floats@(LetFloats binds _) + = floats { sfLetFloats = sfLetFloats floats `addLetFlts` let_floats + , sfInScope = foldlOL extendInScopeSetBind + (sfInScope floats) binds } + +addJoinFloats :: SimplFloats -> JoinFloats -> SimplFloats +addJoinFloats floats join_floats + = floats { sfJoinFloats = sfJoinFloats floats `addJoinFlts` join_floats + , sfInScope = foldlOL extendInScopeSetBind + (sfInScope floats) join_floats } + +extendInScopeSetBind :: InScopeSet -> CoreBind -> InScopeSet +extendInScopeSetBind in_scope bind + = extendInScopeSetList in_scope (bindersOf bind) + +addFloats :: SimplFloats -> SimplFloats -> SimplFloats +-- Add both let-floats and join-floats for env2 to env1; +-- *plus* the in-scope set for env2, which is bigger +-- than that for env1 +addFloats (SimplFloats { sfLetFloats = lf1, sfJoinFloats = jf1 }) + (SimplFloats { sfLetFloats = lf2, sfJoinFloats = jf2, sfInScope = in_scope }) + = SimplFloats { sfLetFloats = lf1 `addLetFlts` lf2 + , sfJoinFloats = jf1 `addJoinFlts` jf2 + , sfInScope = in_scope } + +addLetFlts :: LetFloats -> LetFloats -> LetFloats +addLetFlts (LetFloats bs1 l1) (LetFloats bs2 l2) + = LetFloats (bs1 `appOL` bs2) (l1 `andFF` l2) + +letFloatBinds :: LetFloats -> [CoreBind] +letFloatBinds (LetFloats bs _) = fromOL bs + +addJoinFlts :: JoinFloats -> JoinFloats -> JoinFloats +addJoinFlts = appOL + +mkRecFloats :: SimplFloats -> SimplFloats +-- Flattens the floats from env2 into a single Rec group, +-- They must either all be lifted LetFloats or all JoinFloats +mkRecFloats floats@(SimplFloats { sfLetFloats = LetFloats bs ff + , sfJoinFloats = jbs + , sfInScope = in_scope }) + = ASSERT2( case ff of { FltLifted -> True; _ -> False }, ppr (fromOL bs) ) + ASSERT2( isNilOL bs || isNilOL jbs, ppr floats ) + SimplFloats { sfLetFloats = floats' + , sfJoinFloats = jfloats' + , sfInScope = in_scope } + where + floats' | isNilOL bs = emptyLetFloats + | otherwise = unitLetFloat (Rec (flattenBinds (fromOL bs))) + jfloats' | isNilOL jbs = emptyJoinFloats + | otherwise = unitJoinFloat (Rec (flattenBinds (fromOL jbs))) + +wrapFloats :: SimplFloats -> OutExpr -> OutExpr +-- Wrap the floats around the expression; they should all +-- satisfy the let/app invariant, so mkLets should do the job just fine +wrapFloats (SimplFloats { sfLetFloats = LetFloats bs _ + , sfJoinFloats = jbs }) body + = foldrOL Let (wrapJoinFloats jbs body) bs + -- Note: Always safe to put the joins on the inside + -- since the values can't refer to them + +wrapJoinFloatsX :: SimplFloats -> OutExpr -> (SimplFloats, OutExpr) +-- Wrap the sfJoinFloats of the env around the expression, +-- and take them out of the SimplEnv +wrapJoinFloatsX floats body + = ( floats { sfJoinFloats = emptyJoinFloats } + , wrapJoinFloats (sfJoinFloats floats) body ) + +wrapJoinFloats :: JoinFloats -> OutExpr -> OutExpr +-- Wrap the sfJoinFloats of the env around the expression, +-- and take them out of the SimplEnv +wrapJoinFloats join_floats body + = foldrOL Let body join_floats + +getTopFloatBinds :: SimplFloats -> [CoreBind] +getTopFloatBinds (SimplFloats { sfLetFloats = lbs + , sfJoinFloats = jbs}) + = ASSERT( isNilOL jbs ) -- Can't be any top-level join bindings + letFloatBinds lbs + +mapLetFloats :: LetFloats -> ((Id,CoreExpr) -> (Id,CoreExpr)) -> LetFloats +mapLetFloats (LetFloats fs ff) fun + = LetFloats (mapOL app fs) ff + where + app (NonRec b e) = case fun (b,e) of (b',e') -> NonRec b' e' + app (Rec bs) = Rec (map fun bs) + +{- +************************************************************************ +* * + Substitution of Vars +* * +************************************************************************ + +Note [Global Ids in the substitution] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We look up even a global (eg imported) Id in the substitution. Consider + case X.g_34 of b { (a,b) -> ... case X.g_34 of { (p,q) -> ...} ... } +The binder-swap in the occurrence analyser will add a binding +for a LocalId version of g (with the same unique though): + case X.g_34 of b { (a,b) -> let g_34 = b in + ... case X.g_34 of { (p,q) -> ...} ... } +So we want to look up the inner X.g_34 in the substitution, where we'll +find that it has been substituted by b. (Or conceivably cloned.) +-} + +substId :: SimplEnv -> InId -> SimplSR +-- Returns DoneEx only on a non-Var expression +substId (SimplEnv { seInScope = in_scope, seIdSubst = ids }) v + = case lookupVarEnv ids v of -- Note [Global Ids in the substitution] + Nothing -> DoneId (refineFromInScope in_scope v) + Just (DoneId v) -> DoneId (refineFromInScope in_scope v) + Just res -> res -- DoneEx non-var, or ContEx + + -- Get the most up-to-date thing from the in-scope set + -- Even though it isn't in the substitution, it may be in + -- the in-scope set with better IdInfo. + -- + -- See also Note [In-scope set as a substitution] in GHC.Core.Op.Simplify. + +refineFromInScope :: InScopeSet -> Var -> Var +refineFromInScope in_scope v + | isLocalId v = case lookupInScope in_scope v of + Just v' -> v' + Nothing -> WARN( True, ppr v ) v -- This is an error! + | otherwise = v + +lookupRecBndr :: SimplEnv -> InId -> OutId +-- Look up an Id which has been put into the envt by simplRecBndrs, +-- but where we have not yet done its RHS +lookupRecBndr (SimplEnv { seInScope = in_scope, seIdSubst = ids }) v + = case lookupVarEnv ids v of + Just (DoneId v) -> v + Just _ -> pprPanic "lookupRecBndr" (ppr v) + Nothing -> refineFromInScope in_scope v + +{- +************************************************************************ +* * +\section{Substituting an Id binder} +* * +************************************************************************ + + +These functions are in the monad only so that they can be made strict via seq. + +Note [Return type for join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + + (join j :: Char -> Int -> Int) 77 + ( j x = \y. y + ord x ) + (in case v of ) + ( A -> j 'x' ) + ( B -> j 'y' ) + ( C -> <blah> ) + +The simplifier pushes the "apply to 77" continuation inwards to give + + join j :: Char -> Int + j x = (\y. y + ord x) 77 + in case v of + A -> j 'x' + B -> j 'y' + C -> <blah> 77 + +Notice that the "apply to 77" continuation went into the RHS of the +join point. And that meant that the return type of the join point +changed!! + +That's why we pass res_ty into simplNonRecJoinBndr, and substIdBndr +takes a (Just res_ty) argument so that it knows to do the type-changing +thing. +-} + +simplBinders :: SimplEnv -> [InBndr] -> SimplM (SimplEnv, [OutBndr]) +simplBinders env bndrs = mapAccumLM simplBinder env bndrs + +------------- +simplBinder :: SimplEnv -> InBndr -> SimplM (SimplEnv, OutBndr) +-- Used for lambda and case-bound variables +-- Clone Id if necessary, substitute type +-- Return with IdInfo already substituted, but (fragile) occurrence info zapped +-- The substitution is extended only if the variable is cloned, because +-- we *don't* need to use it to track occurrence info. +simplBinder env bndr + | isTyVar bndr = do { let (env', tv) = substTyVarBndr env bndr + ; seqTyVar tv `seq` return (env', tv) } + | otherwise = do { let (env', id) = substIdBndr Nothing env bndr + ; seqId id `seq` return (env', id) } + +--------------- +simplNonRecBndr :: SimplEnv -> InBndr -> SimplM (SimplEnv, OutBndr) +-- A non-recursive let binder +simplNonRecBndr env id + = do { let (env1, id1) = substIdBndr Nothing env id + ; seqId id1 `seq` return (env1, id1) } + +--------------- +simplNonRecJoinBndr :: SimplEnv -> OutType -> InBndr + -> SimplM (SimplEnv, OutBndr) +-- A non-recursive let binder for a join point; +-- context being pushed inward may change the type +-- See Note [Return type for join points] +simplNonRecJoinBndr env res_ty id + = do { let (env1, id1) = substIdBndr (Just res_ty) env id + ; seqId id1 `seq` return (env1, id1) } + +--------------- +simplRecBndrs :: SimplEnv -> [InBndr] -> SimplM SimplEnv +-- Recursive let binders +simplRecBndrs env@(SimplEnv {}) ids + = ASSERT(all (not . isJoinId) ids) + do { let (env1, ids1) = mapAccumL (substIdBndr Nothing) env ids + ; seqIds ids1 `seq` return env1 } + +--------------- +simplRecJoinBndrs :: SimplEnv -> OutType -> [InBndr] -> SimplM SimplEnv +-- Recursive let binders for join points; +-- context being pushed inward may change types +-- See Note [Return type for join points] +simplRecJoinBndrs env@(SimplEnv {}) res_ty ids + = ASSERT(all isJoinId ids) + do { let (env1, ids1) = mapAccumL (substIdBndr (Just res_ty)) env ids + ; seqIds ids1 `seq` return env1 } + +--------------- +substIdBndr :: Maybe OutType -> SimplEnv -> InBndr -> (SimplEnv, OutBndr) +-- Might be a coercion variable +substIdBndr new_res_ty env bndr + | isCoVar bndr = substCoVarBndr env bndr + | otherwise = substNonCoVarIdBndr new_res_ty env bndr + +--------------- +substNonCoVarIdBndr + :: Maybe OutType -- New result type, if a join binder + -- See Note [Return type for join points] + -> SimplEnv + -> InBndr -- Env and binder to transform + -> (SimplEnv, OutBndr) +-- Clone Id if necessary, substitute its type +-- Return an Id with its +-- * Type substituted +-- * UnfoldingInfo, Rules, WorkerInfo zapped +-- * Fragile OccInfo (only) zapped: Note [Robust OccInfo] +-- * Robust info, retained especially arity and demand info, +-- so that they are available to occurrences that occur in an +-- earlier binding of a letrec +-- +-- For the robust info, see Note [Arity robustness] +-- +-- Augment the substitution if the unique changed +-- Extend the in-scope set with the new Id +-- +-- Similar to GHC.Core.Subst.substIdBndr, except that +-- the type of id_subst differs +-- all fragile info is zapped +substNonCoVarIdBndr new_res_ty + env@(SimplEnv { seInScope = in_scope + , seIdSubst = id_subst }) + old_id + = ASSERT2( not (isCoVar old_id), ppr old_id ) + (env { seInScope = in_scope `extendInScopeSet` new_id, + seIdSubst = new_subst }, new_id) + where + id1 = uniqAway in_scope old_id + id2 = substIdType env id1 + + id3 | Just res_ty <- new_res_ty + = id2 `setIdType` setJoinResTy (idJoinArity id2) res_ty (idType id2) + -- See Note [Return type for join points] + | otherwise + = id2 + + new_id = zapFragileIdInfo id3 -- Zaps rules, worker-info, unfolding + -- and fragile OccInfo + + -- Extend the substitution if the unique has changed, + -- or there's some useful occurrence information + -- See the notes with substTyVarBndr for the delSubstEnv + new_subst | new_id /= old_id + = extendVarEnv id_subst old_id (DoneId new_id) + | otherwise + = delVarEnv id_subst old_id + +------------------------------------ +seqTyVar :: TyVar -> () +seqTyVar b = b `seq` () + +seqId :: Id -> () +seqId id = seqType (idType id) `seq` + idInfo id `seq` + () + +seqIds :: [Id] -> () +seqIds [] = () +seqIds (id:ids) = seqId id `seq` seqIds ids + +{- +Note [Arity robustness] +~~~~~~~~~~~~~~~~~~~~~~~ +We *do* transfer the arity from from the in_id of a let binding to the +out_id. This is important, so that the arity of an Id is visible in +its own RHS. For example: + f = \x. ....g (\y. f y).... +We can eta-reduce the arg to g, because f is a value. But that +needs to be visible. + +This interacts with the 'state hack' too: + f :: Bool -> IO Int + f = \x. case x of + True -> f y + False -> \s -> ... +Can we eta-expand f? Only if we see that f has arity 1, and then we +take advantage of the 'state hack' on the result of +(f y) :: State# -> (State#, Int) to expand the arity one more. + +There is a disadvantage though. Making the arity visible in the RHS +allows us to eta-reduce + f = \x -> f x +to + f = f +which technically is not sound. This is very much a corner case, so +I'm not worried about it. Another idea is to ensure that f's arity +never decreases; its arity started as 1, and we should never eta-reduce +below that. + + +Note [Robust OccInfo] +~~~~~~~~~~~~~~~~~~~~~ +It's important that we *do* retain the loop-breaker OccInfo, because +that's what stops the Id getting inlined infinitely, in the body of +the letrec. +-} + + +{- +************************************************************************ +* * + Impedance matching to type substitution +* * +************************************************************************ +-} + +getTCvSubst :: SimplEnv -> TCvSubst +getTCvSubst (SimplEnv { seInScope = in_scope, seTvSubst = tv_env + , seCvSubst = cv_env }) + = mkTCvSubst in_scope (tv_env, cv_env) + +substTy :: SimplEnv -> Type -> Type +substTy env ty = Type.substTy (getTCvSubst env) ty + +substTyVar :: SimplEnv -> TyVar -> Type +substTyVar env tv = Type.substTyVar (getTCvSubst env) tv + +substTyVarBndr :: SimplEnv -> TyVar -> (SimplEnv, TyVar) +substTyVarBndr env tv + = case Type.substTyVarBndr (getTCvSubst env) tv of + (TCvSubst in_scope' tv_env' cv_env', tv') + -> (env { seInScope = in_scope', seTvSubst = tv_env', seCvSubst = cv_env' }, tv') + +substCoVar :: SimplEnv -> CoVar -> Coercion +substCoVar env tv = Coercion.substCoVar (getTCvSubst env) tv + +substCoVarBndr :: SimplEnv -> CoVar -> (SimplEnv, CoVar) +substCoVarBndr env cv + = case Coercion.substCoVarBndr (getTCvSubst env) cv of + (TCvSubst in_scope' tv_env' cv_env', cv') + -> (env { seInScope = in_scope', seTvSubst = tv_env', seCvSubst = cv_env' }, cv') + +substCo :: SimplEnv -> Coercion -> Coercion +substCo env co = Coercion.substCo (getTCvSubst env) co + +------------------ +substIdType :: SimplEnv -> Id -> Id +substIdType (SimplEnv { seInScope = in_scope, seTvSubst = tv_env, seCvSubst = cv_env }) id + | (isEmptyVarEnv tv_env && isEmptyVarEnv cv_env) + || noFreeVarsOfType old_ty + = id + | otherwise = Id.setIdType id (Type.substTy (TCvSubst in_scope tv_env cv_env) old_ty) + -- The tyCoVarsOfType is cheaper than it looks + -- because we cache the free tyvars of the type + -- in a Note in the id's type itself + where + old_ty = idType id diff --git a/compiler/GHC/Core/Op/Simplify/Monad.hs b/compiler/GHC/Core/Op/Simplify/Monad.hs new file mode 100644 index 0000000000..e6b23734c4 --- /dev/null +++ b/compiler/GHC/Core/Op/Simplify/Monad.hs @@ -0,0 +1,252 @@ +{- +(c) The AQUA Project, Glasgow University, 1993-1998 + +\section[GHC.Core.Op.Simplify.Monad]{The simplifier Monad} +-} + +{-# LANGUAGE DeriveFunctor #-} +module GHC.Core.Op.Simplify.Monad ( + -- The monad + SimplM, + initSmpl, traceSmpl, + getSimplRules, getFamEnvs, + + -- Unique supply + MonadUnique(..), newId, newJoinId, + + -- Counting + SimplCount, tick, freeTick, checkedTick, + getSimplCount, zeroSimplCount, pprSimplCount, + plusSimplCount, isZeroSimplCount + ) where + +import GhcPrelude + +import Var ( Var, isId, mkLocalVar ) +import Name ( mkSystemVarName ) +import Id ( Id, mkSysLocalOrCoVar ) +import IdInfo ( IdDetails(..), vanillaIdInfo, setArityInfo ) +import GHC.Core.Type ( Type, mkLamTypes ) +import GHC.Core.FamInstEnv ( FamInstEnv ) +import GHC.Core ( RuleEnv(..) ) +import UniqSupply +import GHC.Driver.Session +import GHC.Core.Op.Monad +import Outputable +import FastString +import MonadUtils +import ErrUtils as Err +import Util ( count ) +import Panic (throwGhcExceptionIO, GhcException (..)) +import BasicTypes ( IntWithInf, treatZeroAsInf, mkIntWithInf ) +import Control.Monad ( ap ) + +{- +************************************************************************ +* * +\subsection{Monad plumbing} +* * +************************************************************************ + +For the simplifier monad, we want to {\em thread} a unique supply and a counter. +(Command-line switches move around through the explicitly-passed SimplEnv.) +-} + +newtype SimplM result + = SM { unSM :: SimplTopEnv -- Envt that does not change much + -> UniqSupply -- We thread the unique supply because + -- constantly splitting it is rather expensive + -> SimplCount + -> IO (result, UniqSupply, SimplCount)} + -- we only need IO here for dump output + deriving (Functor) + +data SimplTopEnv + = STE { st_flags :: DynFlags + , st_max_ticks :: IntWithInf -- Max #ticks in this simplifier run + , st_rules :: RuleEnv + , st_fams :: (FamInstEnv, FamInstEnv) } + +initSmpl :: DynFlags -> RuleEnv -> (FamInstEnv, FamInstEnv) + -> UniqSupply -- No init count; set to 0 + -> Int -- Size of the bindings, used to limit + -- the number of ticks we allow + -> SimplM a + -> IO (a, SimplCount) + +initSmpl dflags rules fam_envs us size m + = do (result, _, count) <- unSM m env us (zeroSimplCount dflags) + return (result, count) + where + env = STE { st_flags = dflags, st_rules = rules + , st_max_ticks = computeMaxTicks dflags size + , st_fams = fam_envs } + +computeMaxTicks :: DynFlags -> Int -> IntWithInf +-- Compute the max simplifier ticks as +-- (base-size + pgm-size) * magic-multiplier * tick-factor/100 +-- where +-- magic-multiplier is a constant that gives reasonable results +-- base-size is a constant to deal with size-zero programs +computeMaxTicks dflags size + = treatZeroAsInf $ + fromInteger ((toInteger (size + base_size) + * toInteger (tick_factor * magic_multiplier)) + `div` 100) + where + tick_factor = simplTickFactor dflags + base_size = 100 + magic_multiplier = 40 + -- MAGIC NUMBER, multiplies the simplTickFactor + -- We can afford to be generous; this is really + -- just checking for loops, and shouldn't usually fire + -- A figure of 20 was too small: see #5539. + +{-# INLINE thenSmpl #-} +{-# INLINE thenSmpl_ #-} +{-# INLINE returnSmpl #-} + + +instance Applicative SimplM where + pure = returnSmpl + (<*>) = ap + (*>) = thenSmpl_ + +instance Monad SimplM where + (>>) = (*>) + (>>=) = thenSmpl + +returnSmpl :: a -> SimplM a +returnSmpl e = SM (\_st_env us sc -> return (e, us, sc)) + +thenSmpl :: SimplM a -> (a -> SimplM b) -> SimplM b +thenSmpl_ :: SimplM a -> SimplM b -> SimplM b + +thenSmpl m k + = SM $ \st_env us0 sc0 -> do + (m_result, us1, sc1) <- unSM m st_env us0 sc0 + unSM (k m_result) st_env us1 sc1 + +thenSmpl_ m k + = SM $ \st_env us0 sc0 -> do + (_, us1, sc1) <- unSM m st_env us0 sc0 + unSM k st_env us1 sc1 + +-- TODO: this specializing is not allowed +-- {-# SPECIALIZE mapM :: (a -> SimplM b) -> [a] -> SimplM [b] #-} +-- {-# SPECIALIZE mapAndUnzipM :: (a -> SimplM (b, c)) -> [a] -> SimplM ([b],[c]) #-} +-- {-# SPECIALIZE mapAccumLM :: (acc -> b -> SimplM (acc,c)) -> acc -> [b] -> SimplM (acc, [c]) #-} + +traceSmpl :: String -> SDoc -> SimplM () +traceSmpl herald doc + = do { dflags <- getDynFlags + ; liftIO $ Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_trace "Simpl Trace" + FormatText + (hang (text herald) 2 doc) } + +{- +************************************************************************ +* * +\subsection{The unique supply} +* * +************************************************************************ +-} + +instance MonadUnique SimplM where + getUniqueSupplyM + = SM (\_st_env us sc -> case splitUniqSupply us of + (us1, us2) -> return (us1, us2, sc)) + + getUniqueM + = SM (\_st_env us sc -> case takeUniqFromSupply us of + (u, us') -> return (u, us', sc)) + + getUniquesM + = SM (\_st_env us sc -> case splitUniqSupply us of + (us1, us2) -> return (uniqsFromSupply us1, us2, sc)) + +instance HasDynFlags SimplM where + getDynFlags = SM (\st_env us sc -> return (st_flags st_env, us, sc)) + +instance MonadIO SimplM where + liftIO m = SM $ \_ us sc -> do + x <- m + return (x, us, sc) + +getSimplRules :: SimplM RuleEnv +getSimplRules = SM (\st_env us sc -> return (st_rules st_env, us, sc)) + +getFamEnvs :: SimplM (FamInstEnv, FamInstEnv) +getFamEnvs = SM (\st_env us sc -> return (st_fams st_env, us, sc)) + +newId :: FastString -> Type -> SimplM Id +newId fs ty = do uniq <- getUniqueM + return (mkSysLocalOrCoVar fs uniq ty) + +newJoinId :: [Var] -> Type -> SimplM Id +newJoinId bndrs body_ty + = do { uniq <- getUniqueM + ; let name = mkSystemVarName uniq (fsLit "$j") + join_id_ty = mkLamTypes bndrs body_ty -- Note [Funky mkLamTypes] + arity = count isId bndrs + -- arity: See Note [Invariants on join points] invariant 2b, in GHC.Core + join_arity = length bndrs + details = JoinId join_arity + id_info = vanillaIdInfo `setArityInfo` arity +-- `setOccInfo` strongLoopBreaker + + ; return (mkLocalVar details name join_id_ty id_info) } + +{- +************************************************************************ +* * +\subsection{Counting up what we've done} +* * +************************************************************************ +-} + +getSimplCount :: SimplM SimplCount +getSimplCount = SM (\_st_env us sc -> return (sc, us, sc)) + +tick :: Tick -> SimplM () +tick t = SM (\st_env us sc -> let sc' = doSimplTick (st_flags st_env) t sc + in sc' `seq` return ((), us, sc')) + +checkedTick :: Tick -> SimplM () +-- Try to take a tick, but fail if too many +checkedTick t + = SM (\st_env us sc -> + if st_max_ticks st_env <= mkIntWithInf (simplCountN sc) + then throwGhcExceptionIO $ + PprProgramError "Simplifier ticks exhausted" (msg sc) + else let sc' = doSimplTick (st_flags st_env) t sc + in sc' `seq` return ((), us, sc')) + where + msg sc = vcat + [ text "When trying" <+> ppr t + , text "To increase the limit, use -fsimpl-tick-factor=N (default 100)." + , space + , text "If you need to increase the limit substantially, please file a" + , text "bug report and indicate the factor you needed." + , space + , text "If GHC was unable to complete compilation even" + <+> text "with a very large factor" + , text "(a thousand or more), please consult the" + <+> doubleQuotes (text "Known bugs or infelicities") + , text "section in the Users Guide before filing a report. There are a" + , text "few situations unlikely to occur in practical programs for which" + , text "simplifier non-termination has been judged acceptable." + , space + , pp_details sc + , pprSimplCount sc ] + pp_details sc + | hasDetailedCounts sc = empty + | otherwise = text "To see detailed counts use -ddump-simpl-stats" + + +freeTick :: Tick -> SimplM () +-- Record a tick, but don't add to the total tick count, which is +-- used to decide when nothing further has happened +freeTick t + = SM (\_st_env us sc -> let sc' = doFreeSimplTick t sc + in sc' `seq` return ((), us, sc')) diff --git a/compiler/GHC/Core/Op/Simplify/Utils.hs b/compiler/GHC/Core/Op/Simplify/Utils.hs new file mode 100644 index 0000000000..e62c256354 --- /dev/null +++ b/compiler/GHC/Core/Op/Simplify/Utils.hs @@ -0,0 +1,2329 @@ +{- +(c) The AQUA Project, Glasgow University, 1993-1998 + +The simplifier utilities +-} + +{-# LANGUAGE CPP #-} + +module GHC.Core.Op.Simplify.Utils ( + -- Rebuilding + mkLam, mkCase, prepareAlts, tryEtaExpandRhs, + + -- Inlining, + preInlineUnconditionally, postInlineUnconditionally, + activeUnfolding, activeRule, + getUnfoldingInRuleMatch, + simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules, + + -- The continuation type + SimplCont(..), DupFlag(..), StaticEnv, + isSimplified, contIsStop, + contIsDupable, contResultType, contHoleType, + contIsTrivial, contArgs, + countArgs, + mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg, + interestingCallContext, + + -- ArgInfo + ArgInfo(..), ArgSpec(..), mkArgInfo, + addValArgTo, addCastTo, addTyArgTo, + argInfoExpr, argInfoAppArgs, pushSimplifiedArgs, + + abstractFloats, + + -- Utilities + isExitJoinId + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Core.Op.Simplify.Env +import GHC.Core.Op.Monad ( SimplMode(..), Tick(..) ) +import GHC.Driver.Session +import GHC.Core +import qualified GHC.Core.Subst +import GHC.Core.Ppr +import GHC.Core.TyCo.Ppr ( pprParendType ) +import GHC.Core.FVs +import GHC.Core.Utils +import GHC.Core.Arity +import GHC.Core.Unfold +import Name +import Id +import IdInfo +import Var +import Demand +import GHC.Core.Op.Simplify.Monad +import GHC.Core.Type hiding( substTy ) +import GHC.Core.Coercion hiding( substCo ) +import GHC.Core.DataCon ( dataConWorkId, isNullaryRepDataCon ) +import VarSet +import BasicTypes +import Util +import OrdList ( isNilOL ) +import MonadUtils +import Outputable +import GHC.Core.Op.ConstantFold +import FastString ( fsLit ) + +import Control.Monad ( when ) +import Data.List ( sortBy ) + +{- +************************************************************************ +* * + The SimplCont and DupFlag types +* * +************************************************************************ + +A SimplCont allows the simplifier to traverse the expression in a +zipper-like fashion. The SimplCont represents the rest of the expression, +"above" the point of interest. + +You can also think of a SimplCont as an "evaluation context", using +that term in the way it is used for operational semantics. This is the +way I usually think of it, For example you'll often see a syntax for +evaluation context looking like + C ::= [] | C e | case C of alts | C `cast` co +That's the kind of thing we are doing here, and I use that syntax in +the comments. + + +Key points: + * A SimplCont describes a *strict* context (just like + evaluation contexts do). E.g. Just [] is not a SimplCont + + * A SimplCont describes a context that *does not* bind + any variables. E.g. \x. [] is not a SimplCont +-} + +data SimplCont + = Stop -- Stop[e] = e + OutType -- Type of the <hole> + CallCtxt -- Tells if there is something interesting about + -- the context, and hence the inliner + -- should be a bit keener (see interestingCallContext) + -- Specifically: + -- This is an argument of a function that has RULES + -- Inlining the call might allow the rule to fire + -- Never ValAppCxt (use ApplyToVal instead) + -- or CaseCtxt (use Select instead) + + | CastIt -- (CastIt co K)[e] = K[ e `cast` co ] + OutCoercion -- The coercion simplified + -- Invariant: never an identity coercion + SimplCont + + | ApplyToVal -- (ApplyToVal arg K)[e] = K[ e arg ] + { sc_dup :: DupFlag -- See Note [DupFlag invariants] + , sc_arg :: InExpr -- The argument, + , sc_env :: StaticEnv -- see Note [StaticEnv invariant] + , sc_cont :: SimplCont } + + | ApplyToTy -- (ApplyToTy ty K)[e] = K[ e ty ] + { sc_arg_ty :: OutType -- Argument type + , sc_hole_ty :: OutType -- Type of the function, presumably (forall a. blah) + -- See Note [The hole type in ApplyToTy] + , sc_cont :: SimplCont } + + | Select -- (Select alts K)[e] = K[ case e of alts ] + { sc_dup :: DupFlag -- See Note [DupFlag invariants] + , sc_bndr :: InId -- case binder + , sc_alts :: [InAlt] -- Alternatives + , sc_env :: StaticEnv -- See Note [StaticEnv invariant] + , sc_cont :: SimplCont } + + -- The two strict forms have no DupFlag, because we never duplicate them + | StrictBind -- (StrictBind x xs b K)[e] = let x = e in K[\xs.b] + -- or, equivalently, = K[ (\x xs.b) e ] + { sc_dup :: DupFlag -- See Note [DupFlag invariants] + , sc_bndr :: InId + , sc_bndrs :: [InBndr] + , sc_body :: InExpr + , sc_env :: StaticEnv -- See Note [StaticEnv invariant] + , sc_cont :: SimplCont } + + | StrictArg -- (StrictArg (f e1 ..en) K)[e] = K[ f e1 .. en e ] + { sc_dup :: DupFlag -- Always Simplified or OkToDup + , sc_fun :: ArgInfo -- Specifies f, e1..en, Whether f has rules, etc + -- plus strictness flags for *further* args + , sc_cci :: CallCtxt -- Whether *this* argument position is interesting + , sc_cont :: SimplCont } + + | TickIt -- (TickIt t K)[e] = K[ tick t e ] + (Tickish Id) -- Tick tickish <hole> + SimplCont + +type StaticEnv = SimplEnv -- Just the static part is relevant + +data DupFlag = NoDup -- Unsimplified, might be big + | Simplified -- Simplified + | OkToDup -- Simplified and small + +isSimplified :: DupFlag -> Bool +isSimplified NoDup = False +isSimplified _ = True -- Invariant: the subst-env is empty + +perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type +perhapsSubstTy dup env ty + | isSimplified dup = ty + | otherwise = substTy env ty + +{- Note [StaticEnv invariant] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We pair up an InExpr or InAlts with a StaticEnv, which establishes the +lexical scope for that InExpr. When we simplify that InExpr/InAlts, we +use + - Its captured StaticEnv + - Overriding its InScopeSet with the larger one at the + simplification point. + +Why override the InScopeSet? Example: + (let y = ey in f) ex +By the time we simplify ex, 'y' will be in scope. + +However the InScopeSet in the StaticEnv is not irrelevant: it should +include all the free vars of applying the substitution to the InExpr. +Reason: contHoleType uses perhapsSubstTy to apply the substitution to +the expression, and that (rightly) gives ASSERT failures if the InScopeSet +isn't big enough. + +Note [DupFlag invariants] +~~~~~~~~~~~~~~~~~~~~~~~~~ +In both (ApplyToVal dup _ env k) + and (Select dup _ _ env k) +the following invariants hold + + (a) if dup = OkToDup, then continuation k is also ok-to-dup + (b) if dup = OkToDup or Simplified, the subst-env is empty + (and and hence no need to re-simplify) +-} + +instance Outputable DupFlag where + ppr OkToDup = text "ok" + ppr NoDup = text "nodup" + ppr Simplified = text "simpl" + +instance Outputable SimplCont where + ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty + ppr (CastIt co cont ) = (text "CastIt" <+> pprOptCo co) $$ ppr cont + ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont + ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont }) + = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont + ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont }) + = (text "ApplyToVal" <+> ppr dup <+> pprParendExpr arg) + $$ ppr cont + ppr (StrictBind { sc_bndr = b, sc_cont = cont }) + = (text "StrictBind" <+> ppr b) $$ ppr cont + ppr (StrictArg { sc_fun = ai, sc_cont = cont }) + = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont + ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont }) + = (text "Select" <+> ppr dup <+> ppr bndr) $$ + whenPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont + + +{- Note [The hole type in ApplyToTy] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The sc_hole_ty field of ApplyToTy records the type of the "hole" in the +continuation. It is absolutely necessary to compute contHoleType, but it is +not used for anything else (and hence may not be evaluated). + +Why is it necessary for contHoleType? Consider the continuation + ApplyToType Int (Stop Int) +corresponding to + (<hole> @Int) :: Int +What is the type of <hole>? It could be (forall a. Int) or (forall a. a), +and there is no way to know which, so we must record it. + +In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType +for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably +doesn't matter because we'll never compute them all. + +************************************************************************ +* * + ArgInfo and ArgSpec +* * +************************************************************************ +-} + +data ArgInfo + = ArgInfo { + ai_fun :: OutId, -- The function + ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order) + + ai_type :: OutType, -- Type of (f a1 ... an) + + ai_rules :: FunRules, -- Rules for this function + + ai_encl :: Bool, -- Flag saying whether this function + -- or an enclosing one has rules (recursively) + -- True => be keener to inline in all args + + ai_strs :: [Bool], -- Strictness of remaining arguments + -- Usually infinite, but if it is finite it guarantees + -- that the function diverges after being given + -- that number of args + ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline + -- Always infinite + } + +data ArgSpec + = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal + | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy + , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah) + | CastBy OutCoercion -- Cast by this; c.f. CastIt + +instance Outputable ArgSpec where + ppr (ValArg e) = text "ValArg" <+> ppr e + ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty + ppr (CastBy c) = text "CastBy" <+> ppr c + +addValArgTo :: ArgInfo -> OutExpr -> ArgInfo +addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai + , ai_type = applyTypeToArg (ai_type ai) arg + , ai_rules = decRules (ai_rules ai) } + +addTyArgTo :: ArgInfo -> OutType -> ArgInfo +addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai + , ai_type = piResultTy poly_fun_ty arg_ty + , ai_rules = decRules (ai_rules ai) } + where + poly_fun_ty = ai_type ai + arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty } + +addCastTo :: ArgInfo -> OutCoercion -> ArgInfo +addCastTo ai co = ai { ai_args = CastBy co : ai_args ai + , ai_type = coercionRKind co } + +argInfoAppArgs :: [ArgSpec] -> [OutExpr] +argInfoAppArgs [] = [] +argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast +argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as +argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as + +pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont +pushSimplifiedArgs _env [] k = k +pushSimplifiedArgs env (arg : args) k + = case arg of + TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty } + -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest } + ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest } + CastBy c -> CastIt c rest + where + rest = pushSimplifiedArgs env args k + -- The env has an empty SubstEnv + +argInfoExpr :: OutId -> [ArgSpec] -> OutExpr +-- NB: the [ArgSpec] is reversed so that the first arg +-- in the list is the last one in the application +argInfoExpr fun rev_args + = go rev_args + where + go [] = Var fun + go (ValArg a : as) = go as `App` a + go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty + go (CastBy co : as) = mkCast (go as) co + + +type FunRules = Maybe (Int, [CoreRule]) -- Remaining rules for this function + -- Nothing => No rules + -- Just (n, rules) => some rules, requiring at least n more type/value args + +decRules :: FunRules -> FunRules +decRules (Just (n, rules)) = Just (n-1, rules) +decRules Nothing = Nothing + +mkFunRules :: [CoreRule] -> FunRules +mkFunRules [] = Nothing +mkFunRules rs = Just (n_required, rs) + where + n_required = maximum (map ruleArity rs) + +{- +************************************************************************ +* * + Functions on SimplCont +* * +************************************************************************ +-} + +mkBoringStop :: OutType -> SimplCont +mkBoringStop ty = Stop ty BoringCtxt + +mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in GHC.Core.Unfold +mkRhsStop ty = Stop ty RhsCtxt + +mkLazyArgStop :: OutType -> CallCtxt -> SimplCont +mkLazyArgStop ty cci = Stop ty cci + +------------------- +contIsRhsOrArg :: SimplCont -> Bool +contIsRhsOrArg (Stop {}) = True +contIsRhsOrArg (StrictBind {}) = True +contIsRhsOrArg (StrictArg {}) = True +contIsRhsOrArg _ = False + +contIsRhs :: SimplCont -> Bool +contIsRhs (Stop _ RhsCtxt) = True +contIsRhs _ = False + +------------------- +contIsStop :: SimplCont -> Bool +contIsStop (Stop {}) = True +contIsStop _ = False + +contIsDupable :: SimplCont -> Bool +contIsDupable (Stop {}) = True +contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k +contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants] +contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto... +contIsDupable (StrictArg { sc_dup = OkToDup }) = True -- ...ditto... +contIsDupable (CastIt _ k) = contIsDupable k +contIsDupable _ = False + +------------------- +contIsTrivial :: SimplCont -> Bool +contIsTrivial (Stop {}) = True +contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k +contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k +contIsTrivial (CastIt _ k) = contIsTrivial k +contIsTrivial _ = False + +------------------- +contResultType :: SimplCont -> OutType +contResultType (Stop ty _) = ty +contResultType (CastIt _ k) = contResultType k +contResultType (StrictBind { sc_cont = k }) = contResultType k +contResultType (StrictArg { sc_cont = k }) = contResultType k +contResultType (Select { sc_cont = k }) = contResultType k +contResultType (ApplyToTy { sc_cont = k }) = contResultType k +contResultType (ApplyToVal { sc_cont = k }) = contResultType k +contResultType (TickIt _ k) = contResultType k + +contHoleType :: SimplCont -> OutType +contHoleType (Stop ty _) = ty +contHoleType (TickIt _ k) = contHoleType k +contHoleType (CastIt co _) = coercionLKind co +contHoleType (StrictBind { sc_bndr = b, sc_dup = dup, sc_env = se }) + = perhapsSubstTy dup se (idType b) +contHoleType (StrictArg { sc_fun = ai }) = funArgTy (ai_type ai) +contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy] +contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k }) + = mkVisFunTy (perhapsSubstTy dup se (exprType e)) + (contHoleType k) +contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se }) + = perhapsSubstTy d se (idType b) + +------------------- +countArgs :: SimplCont -> Int +-- Count all arguments, including types, coercions, and other values +countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont +countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont +countArgs _ = 0 + +contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont) +-- Summarises value args, discards type args and coercions +-- The returned continuation of the call is only used to +-- answer questions like "are you interesting?" +contArgs cont + | lone cont = (True, [], cont) + | otherwise = go [] cont + where + lone (ApplyToTy {}) = False -- See Note [Lone variables] in GHC.Core.Unfold + lone (ApplyToVal {}) = False + lone (CastIt {}) = False + lone _ = True + + go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k }) + = go (is_interesting arg se : args) k + go args (ApplyToTy { sc_cont = k }) = go args k + go args (CastIt _ k) = go args k + go args k = (False, reverse args, k) + + is_interesting arg se = interestingArg se arg + -- Do *not* use short-cutting substitution here + -- because we want to get as much IdInfo as possible + + +------------------- +mkArgInfo :: SimplEnv + -> Id + -> [CoreRule] -- Rules for function + -> Int -- Number of value args + -> SimplCont -- Context of the call + -> ArgInfo + +mkArgInfo env fun rules n_val_args call_cont + | n_val_args < idArity fun -- Note [Unsaturated functions] + = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty + , ai_rules = fun_rules + , ai_encl = False + , ai_strs = vanilla_stricts + , ai_discs = vanilla_discounts } + | otherwise + = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty + , ai_rules = fun_rules + , ai_encl = interestingArgContext rules call_cont + , ai_strs = arg_stricts + , ai_discs = arg_discounts } + where + fun_ty = idType fun + + fun_rules = mkFunRules rules + + vanilla_discounts, arg_discounts :: [Int] + vanilla_discounts = repeat 0 + arg_discounts = case idUnfolding fun of + CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}} + -> discounts ++ vanilla_discounts + _ -> vanilla_discounts + + vanilla_stricts, arg_stricts :: [Bool] + vanilla_stricts = repeat False + + arg_stricts + | not (sm_inline (seMode env)) + = vanilla_stricts -- See Note [Do not expose strictness if sm_inline=False] + | otherwise + = add_type_str fun_ty $ + case splitStrictSig (idStrictness fun) of + (demands, result_info) + | not (demands `lengthExceeds` n_val_args) + -> -- Enough args, use the strictness given. + -- For bottoming functions we used to pretend that the arg + -- is lazy, so that we don't treat the arg as an + -- interesting context. This avoids substituting + -- top-level bindings for (say) strings into + -- calls to error. But now we are more careful about + -- inlining lone variables, so it's ok + -- (see GHC.Core.Op.Simplify.Utils.analyseCont) + if isBotDiv result_info then + map isStrictDmd demands -- Finite => result is bottom + else + map isStrictDmd demands ++ vanilla_stricts + | otherwise + -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun) + <+> ppr n_val_args <+> ppr demands ) + vanilla_stricts -- Not enough args, or no strictness + + add_type_str :: Type -> [Bool] -> [Bool] + -- If the function arg types are strict, record that in the 'strictness bits' + -- No need to instantiate because unboxed types (which dominate the strict + -- types) can't instantiate type variables. + -- add_type_str is done repeatedly (for each call); + -- might be better once-for-all in the function + -- But beware primops/datacons with no strictness + + add_type_str _ [] = [] + add_type_str fun_ty all_strs@(str:strs) + | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty -- Add strict-type info + = (str || Just False == isLiftedType_maybe arg_ty) + : add_type_str fun_ty' strs + -- If the type is levity-polymorphic, we can't know whether it's + -- strict. isLiftedType_maybe will return Just False only when + -- we're sure the type is unlifted. + + | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty + = add_type_str fun_ty' all_strs -- Look through foralls + + | otherwise + = all_strs + +{- Note [Unsaturated functions] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider (test eyeball/inline4) + x = a:as + y = f x +where f has arity 2. Then we do not want to inline 'x', because +it'll just be floated out again. Even if f has lots of discounts +on its first argument -- it must be saturated for these to kick in + +Note [Do not expose strictness if sm_inline=False] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +#15163 showed a case in which we had + + {-# INLINE [1] zip #-} + zip = undefined + + {-# RULES "foo" forall as bs. stream (zip as bs) = ..blah... #-} + +If we expose zip's bottoming nature when simplifying the LHS of the +RULE we get + {-# RULES "foo" forall as bs. + stream (case zip of {}) = ..blah... #-} +discarding the arguments to zip. Usually this is fine, but on the +LHS of a rule it's not, because 'as' and 'bs' are now not bound on +the LHS. + +This is a pretty pathological example, so I'm not losing sleep over +it, but the simplest solution was to check sm_inline; if it is False, +which it is on the LHS of a rule (see updModeForRules), then don't +make use of the strictness info for the function. +-} + + +{- +************************************************************************ +* * + Interesting arguments +* * +************************************************************************ + +Note [Interesting call context] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We want to avoid inlining an expression where there can't possibly be +any gain, such as in an argument position. Hence, if the continuation +is interesting (eg. a case scrutinee, application etc.) then we +inline, otherwise we don't. + +Previously some_benefit used to return True only if the variable was +applied to some value arguments. This didn't work: + + let x = _coerce_ (T Int) Int (I# 3) in + case _coerce_ Int (T Int) x of + I# y -> .... + +we want to inline x, but can't see that it's a constructor in a case +scrutinee position, and some_benefit is False. + +Another example: + +dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t) + +.... case dMonadST _@_ x0 of (a,b,c) -> .... + +we'd really like to inline dMonadST here, but we *don't* want to +inline if the case expression is just + + case x of y { DEFAULT -> ... } + +since we can just eliminate this case instead (x is in WHNF). Similar +applies when x is bound to a lambda expression. Hence +contIsInteresting looks for case expressions with just a single +default case. + +Note [No case of case is boring] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we see + case f x of <alts> + +we'd usually treat the context as interesting, to encourage 'f' to +inline. But if case-of-case is off, it's really not so interesting +after all, because we are unlikely to be able to push the case +expression into the branches of any case in f's unfolding. So, to +reduce unnecessary code expansion, we just make the context look boring. +This made a small compile-time perf improvement in perf/compiler/T6048, +and it looks plausible to me. +-} + +interestingCallContext :: SimplEnv -> SimplCont -> CallCtxt +-- See Note [Interesting call context] +interestingCallContext env cont + = interesting cont + where + interesting (Select {}) + | sm_case_case (getMode env) = CaseCtxt + | otherwise = BoringCtxt + -- See Note [No case of case is boring] + + interesting (ApplyToVal {}) = ValAppCtxt + -- Can happen if we have (f Int |> co) y + -- If f has an INLINE prag we need to give it some + -- motivation to inline. See Note [Cast then apply] + -- in GHC.Core.Unfold + + interesting (StrictArg { sc_cci = cci }) = cci + interesting (StrictBind {}) = BoringCtxt + interesting (Stop _ cci) = cci + interesting (TickIt _ k) = interesting k + interesting (ApplyToTy { sc_cont = k }) = interesting k + interesting (CastIt _ k) = interesting k + -- If this call is the arg of a strict function, the context + -- is a bit interesting. If we inline here, we may get useful + -- evaluation information to avoid repeated evals: e.g. + -- x + (y * z) + -- Here the contIsInteresting makes the '*' keener to inline, + -- which in turn exposes a constructor which makes the '+' inline. + -- Assuming that +,* aren't small enough to inline regardless. + -- + -- It's also very important to inline in a strict context for things + -- like + -- foldr k z (f x) + -- Here, the context of (f x) is strict, and if f's unfolding is + -- a build it's *great* to inline it here. So we must ensure that + -- the context for (f x) is not totally uninteresting. + +interestingArgContext :: [CoreRule] -> SimplCont -> Bool +-- If the argument has form (f x y), where x,y are boring, +-- and f is marked INLINE, then we don't want to inline f. +-- But if the context of the argument is +-- g (f x y) +-- where g has rules, then we *do* want to inline f, in case it +-- exposes a rule that might fire. Similarly, if the context is +-- h (g (f x x)) +-- where h has rules, then we do want to inline f; hence the +-- call_cont argument to interestingArgContext +-- +-- The ai-rules flag makes this happen; if it's +-- set, the inliner gets just enough keener to inline f +-- regardless of how boring f's arguments are, if it's marked INLINE +-- +-- The alternative would be to *always* inline an INLINE function, +-- regardless of how boring its context is; but that seems overkill +-- For example, it'd mean that wrapper functions were always inlined +-- +-- The call_cont passed to interestingArgContext is the context of +-- the call itself, e.g. g <hole> in the example above +interestingArgContext rules call_cont + = notNull rules || enclosing_fn_has_rules + where + enclosing_fn_has_rules = go call_cont + + go (Select {}) = False + go (ApplyToVal {}) = False -- Shouldn't really happen + go (ApplyToTy {}) = False -- Ditto + go (StrictArg { sc_cci = cci }) = interesting cci + go (StrictBind {}) = False -- ?? + go (CastIt _ c) = go c + go (Stop _ cci) = interesting cci + go (TickIt _ c) = go c + + interesting RuleArgCtxt = True + interesting _ = False + + +{- Note [Interesting arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +An argument is interesting if it deserves a discount for unfoldings +with a discount in that argument position. The idea is to avoid +unfolding a function that is applied only to variables that have no +unfolding (i.e. they are probably lambda bound): f x y z There is +little point in inlining f here. + +Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But +we must look through lets, eg (let x = e in C a b), because the let will +float, exposing the value, if we inline. That makes it different to +exprIsHNF. + +Before 2009 we said it was interesting if the argument had *any* structure +at all; i.e. (hasSomeUnfolding v). But does too much inlining; see #3016. + +But we don't regard (f x y) as interesting, unless f is unsaturated. +If it's saturated and f hasn't inlined, then it's probably not going +to now! + +Note [Conlike is interesting] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f d = ...((*) d x y)... + ... f (df d')... +where df is con-like. Then we'd really like to inline 'f' so that the +rule for (*) (df d) can fire. To do this + a) we give a discount for being an argument of a class-op (eg (*) d) + b) we say that a con-like argument (eg (df d)) is interesting +-} + +interestingArg :: SimplEnv -> CoreExpr -> ArgSummary +-- See Note [Interesting arguments] +interestingArg env e = go env 0 e + where + -- n is # value args to which the expression is applied + go env n (Var v) + = case substId env v of + DoneId v' -> go_var n v' + DoneEx e _ -> go (zapSubstEnv env) n e + ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e + + go _ _ (Lit {}) = ValueArg + go _ _ (Type _) = TrivArg + go _ _ (Coercion _) = TrivArg + go env n (App fn (Type _)) = go env n fn + go env n (App fn _) = go env (n+1) fn + go env n (Tick _ a) = go env n a + go env n (Cast e _) = go env n e + go env n (Lam v e) + | isTyVar v = go env n e + | n>0 = NonTrivArg -- (\x.b) e is NonTriv + | otherwise = ValueArg + go _ _ (Case {}) = NonTrivArg + go env n (Let b e) = case go env' n e of + ValueArg -> ValueArg + _ -> NonTrivArg + where + env' = env `addNewInScopeIds` bindersOf b + + go_var n v + | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that + -- data constructors here + | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding + | n > 0 = NonTrivArg -- Saturated or unknown call + | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding + -- See Note [Conlike is interesting] + | otherwise = TrivArg -- n==0, no useful unfolding + where + conlike_unfolding = isConLikeUnfolding (idUnfolding v) + +{- +************************************************************************ +* * + SimplMode +* * +************************************************************************ + +The SimplMode controls several switches; see its definition in +GHC.Core.Op.Monad + sm_rules :: Bool -- Whether RULES are enabled + sm_inline :: Bool -- Whether inlining is enabled + sm_case_case :: Bool -- Whether case-of-case is enabled + sm_eta_expand :: Bool -- Whether eta-expansion is enabled +-} + +simplEnvForGHCi :: DynFlags -> SimplEnv +simplEnvForGHCi dflags + = mkSimplEnv $ SimplMode { sm_names = ["GHCi"] + , sm_phase = InitialPhase + , sm_dflags = dflags + , sm_rules = rules_on + , sm_inline = False + , sm_eta_expand = eta_expand_on + , sm_case_case = True } + where + rules_on = gopt Opt_EnableRewriteRules dflags + eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags + -- Do not do any inlining, in case we expose some unboxed + -- tuple stuff that confuses the bytecode interpreter + +updModeForStableUnfoldings :: Activation -> SimplMode -> SimplMode +-- See Note [Simplifying inside stable unfoldings] +updModeForStableUnfoldings inline_rule_act current_mode + = current_mode { sm_phase = phaseFromActivation inline_rule_act + , sm_inline = True + , sm_eta_expand = False } + -- sm_eta_expand: see Note [No eta expansion in stable unfoldings] + -- For sm_rules, just inherit; sm_rules might be "off" + -- because of -fno-enable-rewrite-rules + where + phaseFromActivation (ActiveAfter _ n) = Phase n + phaseFromActivation _ = InitialPhase + +updModeForRules :: SimplMode -> SimplMode +-- See Note [Simplifying rules] +updModeForRules current_mode + = current_mode { sm_phase = InitialPhase + , sm_inline = False -- See Note [Do not expose strictness if sm_inline=False] + , sm_rules = False + , sm_eta_expand = False } + +{- Note [Simplifying rules] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When simplifying a rule LHS, refrain from /any/ inlining or applying +of other RULES. + +Doing anything to the LHS is plain confusing, because it means that what the +rule matches is not what the user wrote. c.f. #10595, and #10528. +Moreover, inlining (or applying rules) on rule LHSs risks introducing +Ticks into the LHS, which makes matching trickier. #10665, #10745. + +Doing this to either side confounds tools like HERMIT, which seek to reason +about and apply the RULES as originally written. See #10829. + +Note [No eta expansion in stable unfoldings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have a stable unfolding + + f :: Ord a => a -> IO () + -- Unfolding template + -- = /\a \(d:Ord a) (x:a). bla + +we do not want to eta-expand to + + f :: Ord a => a -> IO () + -- Unfolding template + -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co + +because not specialisation of the overloading doesn't work properly +(see Note [Specialisation shape] in GHC.Core.Op.Specialise), #9509. + +So we disable eta-expansion in stable unfoldings. + +Note [Inlining in gentle mode] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Something is inlined if + (i) the sm_inline flag is on, AND + (ii) the thing has an INLINE pragma, AND + (iii) the thing is inlinable in the earliest phase. + +Example of why (iii) is important: + {-# INLINE [~1] g #-} + g = ... + + {-# INLINE f #-} + f x = g (g x) + +If we were to inline g into f's inlining, then an importing module would +never be able to do + f e --> g (g e) ---> RULE fires +because the stable unfolding for f has had g inlined into it. + +On the other hand, it is bad not to do ANY inlining into an +stable unfolding, because then recursive knots in instance declarations +don't get unravelled. + +However, *sometimes* SimplGently must do no call-site inlining at all +(hence sm_inline = False). Before full laziness we must be careful +not to inline wrappers, because doing so inhibits floating + e.g. ...(case f x of ...)... + ==> ...(case (case x of I# x# -> fw x#) of ...)... + ==> ...(case x of I# x# -> case fw x# of ...)... +and now the redex (f x) isn't floatable any more. + +The no-inlining thing is also important for Template Haskell. You might be +compiling in one-shot mode with -O2; but when TH compiles a splice before +running it, we don't want to use -O2. Indeed, we don't want to inline +anything, because the byte-code interpreter might get confused about +unboxed tuples and suchlike. + +Note [Simplifying inside stable unfoldings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We must take care with simplification inside stable unfoldings (which come from +INLINE pragmas). + +First, consider the following example + let f = \pq -> BIG + in + let g = \y -> f y y + {-# INLINE g #-} + in ...g...g...g...g...g... +Now, if that's the ONLY occurrence of f, it might be inlined inside g, +and thence copied multiple times when g is inlined. HENCE we treat +any occurrence in a stable unfolding as a multiple occurrence, not a single +one; see OccurAnal.addRuleUsage. + +Second, we do want *do* to some modest rules/inlining stuff in stable +unfoldings, partly to eliminate senseless crap, and partly to break +the recursive knots generated by instance declarations. + +However, suppose we have + {-# INLINE <act> f #-} + f = <rhs> +meaning "inline f in phases p where activation <act>(p) holds". +Then what inlinings/rules can we apply to the copy of <rhs> captured in +f's stable unfolding? Our model is that literally <rhs> is substituted for +f when it is inlined. So our conservative plan (implemented by +updModeForStableUnfoldings) is this: + + ------------------------------------------------------------- + When simplifying the RHS of a stable unfolding, set the phase + to the phase in which the stable unfolding first becomes active + ------------------------------------------------------------- + +That ensures that + + a) Rules/inlinings that *cease* being active before p will + not apply to the stable unfolding, consistent with it being + inlined in its *original* form in phase p. + + b) Rules/inlinings that only become active *after* p will + not apply to the stable unfolding, again to be consistent with + inlining the *original* rhs in phase p. + +For example, + {-# INLINE f #-} + f x = ...g... + + {-# NOINLINE [1] g #-} + g y = ... + + {-# RULE h g = ... #-} +Here we must not inline g into f's RHS, even when we get to phase 0, +because when f is later inlined into some other module we want the +rule for h to fire. + +Similarly, consider + {-# INLINE f #-} + f x = ...g... + + g y = ... +and suppose that there are auto-generated specialisations and a strictness +wrapper for g. The specialisations get activation AlwaysActive, and the +strictness wrapper get activation (ActiveAfter 0). So the strictness +wrepper fails the test and won't be inlined into f's stable unfolding. That +means f can inline, expose the specialised call to g, so the specialisation +rules can fire. + +A note about wrappers +~~~~~~~~~~~~~~~~~~~~~ +It's also important not to inline a worker back into a wrapper. +A wrapper looks like + wraper = inline_me (\x -> ...worker... ) +Normally, the inline_me prevents the worker getting inlined into +the wrapper (initially, the worker's only call site!). But, +if the wrapper is sure to be called, the strictness analyser will +mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf +continuation. +-} + +activeUnfolding :: SimplMode -> Id -> Bool +activeUnfolding mode id + | isCompulsoryUnfolding (realIdUnfolding id) + = True -- Even sm_inline can't override compulsory unfoldings + | otherwise + = isActive (sm_phase mode) (idInlineActivation id) + && sm_inline mode + -- `or` isStableUnfolding (realIdUnfolding id) + -- Inline things when + -- (a) they are active + -- (b) sm_inline says so, except that for stable unfoldings + -- (ie pragmas) we inline anyway + +getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv +-- When matching in RULE, we want to "look through" an unfolding +-- (to see a constructor) if *rules* are on, even if *inlinings* +-- are not. A notable example is DFuns, which really we want to +-- match in rules like (op dfun) in gentle mode. Another example +-- is 'otherwise' which we want exprIsConApp_maybe to be able to +-- see very early on +getUnfoldingInRuleMatch env + = (in_scope, id_unf) + where + in_scope = seInScope env + mode = getMode env + id_unf id | unf_is_active id = idUnfolding id + | otherwise = NoUnfolding + unf_is_active id + | not (sm_rules mode) = -- active_unfolding_minimal id + isStableUnfolding (realIdUnfolding id) + -- Do we even need to test this? I think this InScopeEnv + -- is only consulted if activeRule returns True, which + -- never happens if sm_rules is False + | otherwise = isActive (sm_phase mode) (idInlineActivation id) + +---------------------- +activeRule :: SimplMode -> Activation -> Bool +-- Nothing => No rules at all +activeRule mode + | not (sm_rules mode) = \_ -> False -- Rewriting is off + | otherwise = isActive (sm_phase mode) + +{- +************************************************************************ +* * + preInlineUnconditionally +* * +************************************************************************ + +preInlineUnconditionally +~~~~~~~~~~~~~~~~~~~~~~~~ +@preInlineUnconditionally@ examines a bndr to see if it is used just +once in a completely safe way, so that it is safe to discard the +binding inline its RHS at the (unique) usage site, REGARDLESS of how +big the RHS might be. If this is the case we don't simplify the RHS +first, but just inline it un-simplified. + +This is much better than first simplifying a perhaps-huge RHS and then +inlining and re-simplifying it. Indeed, it can be at least quadratically +better. Consider + + x1 = e1 + x2 = e2[x1] + x3 = e3[x2] + ...etc... + xN = eN[xN-1] + +We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc. +This can happen with cascades of functions too: + + f1 = \x1.e1 + f2 = \xs.e2[f1] + f3 = \xs.e3[f3] + ...etc... + +THE MAIN INVARIANT is this: + + ---- preInlineUnconditionally invariant ----- + IF preInlineUnconditionally chooses to inline x = <rhs> + THEN doing the inlining should not change the occurrence + info for the free vars of <rhs> + ---------------------------------------------- + +For example, it's tempting to look at trivial binding like + x = y +and inline it unconditionally. But suppose x is used many times, +but this is the unique occurrence of y. Then inlining x would change +y's occurrence info, which breaks the invariant. It matters: y +might have a BIG rhs, which will now be dup'd at every occurrence of x. + + +Even RHSs labelled InlineMe aren't caught here, because there might be +no benefit from inlining at the call site. + +[Sept 01] Don't unconditionally inline a top-level thing, because that +can simply make a static thing into something built dynamically. E.g. + x = (a,b) + main = \s -> h x + +[Remember that we treat \s as a one-shot lambda.] No point in +inlining x unless there is something interesting about the call site. + +But watch out: if you aren't careful, some useful foldr/build fusion +can be lost (most notably in spectral/hartel/parstof) because the +foldr didn't see the build. Doing the dynamic allocation isn't a big +deal, in fact, but losing the fusion can be. But the right thing here +seems to be to do a callSiteInline based on the fact that there is +something interesting about the call site (it's strict). Hmm. That +seems a bit fragile. + +Conclusion: inline top level things gaily until Phase 0 (the last +phase), at which point don't. + +Note [pre/postInlineUnconditionally in gentle mode] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Even in gentle mode we want to do preInlineUnconditionally. The +reason is that too little clean-up happens if you don't inline +use-once things. Also a bit of inlining is *good* for full laziness; +it can expose constant sub-expressions. Example in +spectral/mandel/Mandel.hs, where the mandelset function gets a useful +let-float if you inline windowToViewport + +However, as usual for Gentle mode, do not inline things that are +inactive in the initial stages. See Note [Gentle mode]. + +Note [Stable unfoldings and preInlineUnconditionally] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas! +Example + + {-# INLINE f #-} + f :: Eq a => a -> a + f x = ... + + fInt :: Int -> Int + fInt = f Int dEqInt + + ...fInt...fInt...fInt... + +Here f occurs just once, in the RHS of fInt. But if we inline it there +it might make fInt look big, and we'll lose the opportunity to inline f +at each of fInt's call sites. The INLINE pragma will only inline when +the application is saturated for exactly this reason; and we don't +want PreInlineUnconditionally to second-guess it. A live example is +#3736. + c.f. Note [Stable unfoldings and postInlineUnconditionally] + +NB: if the pragma is INLINEABLE, then we don't want to behave in +this special way -- an INLINEABLE pragma just says to GHC "inline this +if you like". But if there is a unique occurrence, we want to inline +the stable unfolding, not the RHS. + +Note [Top-level bottoming Ids] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Don't inline top-level Ids that are bottoming, even if they are used just +once, because FloatOut has gone to some trouble to extract them out. +Inlining them won't make the program run faster! + +Note [Do not inline CoVars unconditionally] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Coercion variables appear inside coercions, and the RHS of a let-binding +is a term (not a coercion) so we can't necessarily inline the latter in +the former. +-} + +preInlineUnconditionally + :: SimplEnv -> TopLevelFlag -> InId + -> InExpr -> StaticEnv -- These two go together + -> Maybe SimplEnv -- Returned env has extended substitution +-- Precondition: rhs satisfies the let/app invariant +-- See Note [Core let/app invariant] in GHC.Core +-- Reason: we don't want to inline single uses, or discard dead bindings, +-- for unlifted, side-effect-ful bindings +preInlineUnconditionally env top_lvl bndr rhs rhs_env + | not pre_inline_unconditionally = Nothing + | not active = Nothing + | isTopLevel top_lvl && isBottomingId bndr = Nothing -- Note [Top-level bottoming Ids] + | isCoVar bndr = Nothing -- Note [Do not inline CoVars unconditionally] + | isExitJoinId bndr = Nothing -- Note [Do not inline exit join points] + -- in module Exitify + | not (one_occ (idOccInfo bndr)) = Nothing + | not (isStableUnfolding unf) = Just (extend_subst_with rhs) + + -- Note [Stable unfoldings and preInlineUnconditionally] + | isInlinablePragma inline_prag + , Just inl <- maybeUnfoldingTemplate unf = Just (extend_subst_with inl) + | otherwise = Nothing + where + unf = idUnfolding bndr + extend_subst_with inl_rhs = extendIdSubst env bndr (mkContEx rhs_env inl_rhs) + + one_occ IAmDead = True -- Happens in ((\x.1) v) + one_occ OneOcc{ occ_one_br = InOneBranch + , occ_in_lam = NotInsideLam } = isNotTopLevel top_lvl || early_phase + one_occ OneOcc{ occ_one_br = InOneBranch + , occ_in_lam = IsInsideLam + , occ_int_cxt = IsInteresting } = canInlineInLam rhs + one_occ _ = False + + pre_inline_unconditionally = gopt Opt_SimplPreInlining (seDynFlags env) + mode = getMode env + active = isActive (sm_phase mode) (inlinePragmaActivation inline_prag) + -- See Note [pre/postInlineUnconditionally in gentle mode] + inline_prag = idInlinePragma bndr + +-- Be very careful before inlining inside a lambda, because (a) we must not +-- invalidate occurrence information, and (b) we want to avoid pushing a +-- single allocation (here) into multiple allocations (inside lambda). +-- Inlining a *function* with a single *saturated* call would be ok, mind you. +-- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok) +-- where +-- is_cheap = exprIsCheap rhs +-- ok = is_cheap && int_cxt + + -- int_cxt The context isn't totally boring + -- E.g. let f = \ab.BIG in \y. map f xs + -- Don't want to substitute for f, because then we allocate + -- its closure every time the \y is called + -- But: let f = \ab.BIG in \y. map (f y) xs + -- Now we do want to substitute for f, even though it's not + -- saturated, because we're going to allocate a closure for + -- (f y) every time round the loop anyhow. + + -- canInlineInLam => free vars of rhs are (Once in_lam) or Many, + -- so substituting rhs inside a lambda doesn't change the occ info. + -- Sadly, not quite the same as exprIsHNF. + canInlineInLam (Lit _) = True + canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e + canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e + canInlineInLam _ = False + -- not ticks. Counting ticks cannot be duplicated, and non-counting + -- ticks around a Lam will disappear anyway. + + early_phase = case sm_phase mode of + Phase 0 -> False + _ -> True +-- If we don't have this early_phase test, consider +-- x = length [1,2,3] +-- The full laziness pass carefully floats all the cons cells to +-- top level, and preInlineUnconditionally floats them all back in. +-- Result is (a) static allocation replaced by dynamic allocation +-- (b) many simplifier iterations because this tickles +-- a related problem; only one inlining per pass +-- +-- On the other hand, I have seen cases where top-level fusion is +-- lost if we don't inline top level thing (e.g. string constants) +-- Hence the test for phase zero (which is the phase for all the final +-- simplifications). Until phase zero we take no special notice of +-- top level things, but then we become more leery about inlining +-- them. + +{- +************************************************************************ +* * + postInlineUnconditionally +* * +************************************************************************ + +postInlineUnconditionally +~~~~~~~~~~~~~~~~~~~~~~~~~ +@postInlineUnconditionally@ decides whether to unconditionally inline +a thing based on the form of its RHS; in particular if it has a +trivial RHS. If so, we can inline and discard the binding altogether. + +NB: a loop breaker has must_keep_binding = True and non-loop-breakers +only have *forward* references. Hence, it's safe to discard the binding + +NOTE: This isn't our last opportunity to inline. We're at the binding +site right now, and we'll get another opportunity when we get to the +occurrence(s) + +Note that we do this unconditional inlining only for trivial RHSs. +Don't inline even WHNFs inside lambdas; doing so may simply increase +allocation when the function is called. This isn't the last chance; see +NOTE above. + +NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why? +Because we don't even want to inline them into the RHS of constructor +arguments. See NOTE above + +NB: At one time even NOINLINE was ignored here: if the rhs is trivial +it's best to inline it anyway. We often get a=E; b=a from desugaring, +with both a and b marked NOINLINE. But that seems incompatible with +our new view that inlining is like a RULE, so I'm sticking to the 'active' +story for now. +-} + +postInlineUnconditionally + :: SimplEnv -> TopLevelFlag + -> OutId -- The binder (*not* a CoVar), including its unfolding + -> OccInfo -- From the InId + -> OutExpr + -> Bool +-- Precondition: rhs satisfies the let/app invariant +-- See Note [Core let/app invariant] in GHC.Core +-- Reason: we don't want to inline single uses, or discard dead bindings, +-- for unlifted, side-effect-ful bindings +postInlineUnconditionally env top_lvl bndr occ_info rhs + | not active = False + | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline + -- because it might be referred to "earlier" + | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally] + | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally] + | exprIsTrivial rhs = True + | otherwise + = case occ_info of + -- The point of examining occ_info here is that for *non-values* + -- that occur outside a lambda, the call-site inliner won't have + -- a chance (because it doesn't know that the thing + -- only occurs once). The pre-inliner won't have gotten + -- it either, if the thing occurs in more than one branch + -- So the main target is things like + -- let x = f y in + -- case v of + -- True -> case x of ... + -- False -> case x of ... + -- This is very important in practice; e.g. wheel-seive1 doubles + -- in allocation if you miss this out + OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt } + -- OneOcc => no code-duplication issue + -> smallEnoughToInline dflags unfolding -- Small enough to dup + -- ToDo: consider discount on smallEnoughToInline if int_cxt is true + -- + -- NB: Do NOT inline arbitrarily big things, even if one_br is True + -- Reason: doing so risks exponential behaviour. We simplify a big + -- expression, inline it, and simplify it again. But if the + -- very same thing happens in the big expression, we get + -- exponential cost! + -- PRINCIPLE: when we've already simplified an expression once, + -- make sure that we only inline it if it's reasonably small. + + && (in_lam == NotInsideLam || + -- Outside a lambda, we want to be reasonably aggressive + -- about inlining into multiple branches of case + -- e.g. let x = <non-value> + -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... } + -- Inlining can be a big win if C3 is the hot-spot, even if + -- the uses in C1, C2 are not 'interesting' + -- An example that gets worse if you add int_cxt here is 'clausify' + + (isCheapUnfolding unfolding && int_cxt == IsInteresting)) + -- isCheap => acceptable work duplication; in_lam may be true + -- int_cxt to prevent us inlining inside a lambda without some + -- good reason. See the notes on int_cxt in preInlineUnconditionally + + IAmDead -> True -- This happens; for example, the case_bndr during case of + -- known constructor: case (a,b) of x { (p,q) -> ... } + -- Here x isn't mentioned in the RHS, so we don't want to + -- create the (dead) let-binding let x = (a,b) in ... + + _ -> False + +-- Here's an example that we don't handle well: +-- let f = if b then Left (\x.BIG) else Right (\y.BIG) +-- in \y. ....case f of {...} .... +-- Here f is used just once, and duplicating the case work is fine (exprIsCheap). +-- But +-- - We can't preInlineUnconditionally because that would invalidate +-- the occ info for b. +-- - We can't postInlineUnconditionally because the RHS is big, and +-- that risks exponential behaviour +-- - We can't call-site inline, because the rhs is big +-- Alas! + + where + unfolding = idUnfolding bndr + dflags = seDynFlags env + active = isActive (sm_phase (getMode env)) (idInlineActivation bndr) + -- See Note [pre/postInlineUnconditionally in gentle mode] + +{- +Note [Top level and postInlineUnconditionally] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We don't do postInlineUnconditionally for top-level things (even for +ones that are trivial): + + * Doing so will inline top-level error expressions that have been + carefully floated out by FloatOut. More generally, it might + replace static allocation with dynamic. + + * Even for trivial expressions there's a problem. Consider + {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-} + blah xs = reverse xs + ruggle = sort + In one simplifier pass we might fire the rule, getting + blah xs = ruggle xs + but in *that* simplifier pass we must not do postInlineUnconditionally + on 'ruggle' because then we'll have an unbound occurrence of 'ruggle' + + If the rhs is trivial it'll be inlined by callSiteInline, and then + the binding will be dead and discarded by the next use of OccurAnal + + * There is less point, because the main goal is to get rid of local + bindings used in multiple case branches. + + * The inliner should inline trivial things at call sites anyway. + + * The Id might be exported. We could check for that separately, + but since we aren't going to postInlineUnconditionally /any/ + top-level bindings, we don't need to test. + +Note [Stable unfoldings and postInlineUnconditionally] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Do not do postInlineUnconditionally if the Id has a stable unfolding, +otherwise we lose the unfolding. Example + + -- f has stable unfolding with rhs (e |> co) + -- where 'e' is big + f = e |> co + +Then there's a danger we'll optimise to + + f' = e + f = f' |> co + +and now postInlineUnconditionally, losing the stable unfolding on f. Now f' +won't inline because 'e' is too big. + + c.f. Note [Stable unfoldings and preInlineUnconditionally] + + +************************************************************************ +* * + Rebuilding a lambda +* * +************************************************************************ +-} + +mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr +-- mkLam tries three things +-- a) eta reduction, if that gives a trivial expression +-- b) eta expansion [only if there are some value lambdas] + +mkLam _env [] body _cont + = return body +mkLam env bndrs body cont + = do { dflags <- getDynFlags + ; mkLam' dflags bndrs body } + where + mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr + mkLam' dflags bndrs (Cast body co) + | not (any bad bndrs) + -- Note [Casts and lambdas] + = do { lam <- mkLam' dflags bndrs body + ; return (mkCast lam (mkPiCos Representational bndrs co)) } + where + co_vars = tyCoVarsOfCo co + bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars + + mkLam' dflags bndrs body@(Lam {}) + = mkLam' dflags (bndrs ++ bndrs1) body1 + where + (bndrs1, body1) = collectBinders body + + mkLam' dflags bndrs (Tick t expr) + | tickishFloatable t + = mkTick t <$> mkLam' dflags bndrs expr + + mkLam' dflags bndrs body + | gopt Opt_DoEtaReduction dflags + , Just etad_lam <- tryEtaReduce bndrs body + = do { tick (EtaReduction (head bndrs)) + ; return etad_lam } + + | not (contIsRhs cont) -- See Note [Eta-expanding lambdas] + , sm_eta_expand (getMode env) + , any isRuntimeVar bndrs + , let body_arity = exprEtaExpandArity dflags body + , body_arity > 0 + = do { tick (EtaExpansion (head bndrs)) + ; let res = mkLams bndrs (etaExpand body_arity body) + ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body) + , text "after" <+> ppr res]) + ; return res } + + | otherwise + = return (mkLams bndrs body) + +{- +Note [Eta expanding lambdas] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In general we *do* want to eta-expand lambdas. Consider + f (\x -> case x of (a,b) -> \s -> blah) +where 's' is a state token, and hence can be eta expanded. This +showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather +important function! + +The eta-expansion will never happen unless we do it now. (Well, it's +possible that CorePrep will do it, but CorePrep only has a half-baked +eta-expander that can't deal with casts. So it's much better to do it +here.) + +However, when the lambda is let-bound, as the RHS of a let, we have a +better eta-expander (in the form of tryEtaExpandRhs), so we don't +bother to try expansion in mkLam in that case; hence the contIsRhs +guard. + +NB: We check the SimplEnv (sm_eta_expand), not DynFlags. + See Note [No eta expansion in stable unfoldings] + +Note [Casts and lambdas] +~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + (\x. (\y. e) `cast` g1) `cast` g2 +There is a danger here that the two lambdas look separated, and the +full laziness pass might float an expression to between the two. + +So this equation in mkLam' floats the g1 out, thus: + (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1) +where x:tx. + +In general, this floats casts outside lambdas, where (I hope) they +might meet and cancel with some other cast: + \x. e `cast` co ===> (\x. e) `cast` (tx -> co) + /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co) + /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co) + (if not (g `in` co)) + +Notice that it works regardless of 'e'. Originally it worked only +if 'e' was itself a lambda, but in some cases that resulted in +fruitless iteration in the simplifier. A good example was when +compiling Text.ParserCombinators.ReadPrec, where we had a definition +like (\x. Get `cast` g) +where Get is a constructor with nonzero arity. Then mkLam eta-expanded +the Get, and the next iteration eta-reduced it, and then eta-expanded +it again. + +Note also the side condition for the case of coercion binders. +It does not make sense to transform + /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g) +because the latter is not well-kinded. + +************************************************************************ +* * + Eta expansion +* * +************************************************************************ +-} + +tryEtaExpandRhs :: SimplMode -> OutId -> OutExpr + -> SimplM (Arity, Bool, OutExpr) +-- See Note [Eta-expanding at let bindings] +-- If tryEtaExpandRhs rhs = (n, is_bot, rhs') then +-- (a) rhs' has manifest arity n +-- (b) if is_bot is True then rhs' applied to n args is guaranteed bottom +tryEtaExpandRhs mode bndr rhs + | Just join_arity <- isJoinId_maybe bndr + = do { let (join_bndrs, join_body) = collectNBinders join_arity rhs + ; return (count isId join_bndrs, exprIsBottom join_body, rhs) } + -- Note [Do not eta-expand join points] + -- But do return the correct arity and bottom-ness, because + -- these are used to set the bndr's IdInfo (#15517) + -- Note [Invariants on join points] invariant 2b, in GHC.Core + + | otherwise + = do { (new_arity, is_bot, new_rhs) <- try_expand + + ; WARN( new_arity < old_id_arity, + (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity + <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) ) + -- Note [Arity decrease] in GHC.Core.Op.Simplify + return (new_arity, is_bot, new_rhs) } + where + try_expand + | exprIsTrivial rhs + = return (exprArity rhs, False, rhs) + + | sm_eta_expand mode -- Provided eta-expansion is on + , new_arity > old_arity -- And the current manifest arity isn't enough + = do { tick (EtaExpansion bndr) + ; return (new_arity, is_bot, etaExpand new_arity rhs) } + + | otherwise + = return (old_arity, is_bot && new_arity == old_arity, rhs) + + dflags = sm_dflags mode + old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs] + old_id_arity = idArity bndr + + (new_arity1, is_bot) = findRhsArity dflags bndr rhs old_arity + new_arity2 = idCallArity bndr + new_arity = max new_arity1 new_arity2 + +{- +Note [Eta-expanding at let bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We now eta expand at let-bindings, which is where the payoff comes. +The most significant thing is that we can do a simple arity analysis +(in GHC.Core.Arity.findRhsArity), which we can't do for free-floating lambdas + +One useful consequence of not eta-expanding lambdas is this example: + genMap :: C a => ... + {-# INLINE genMap #-} + genMap f xs = ... + + myMap :: D a => ... + {-# INLINE myMap #-} + myMap = genMap + +Notice that 'genMap' should only inline if applied to two arguments. +In the stable unfolding for myMap we'll have the unfolding + (\d -> genMap Int (..d..)) +We do not want to eta-expand to + (\d f xs -> genMap Int (..d..) f xs) +because then 'genMap' will inline, and it really shouldn't: at least +as far as the programmer is concerned, it's not applied to two +arguments! + +Note [Do not eta-expand join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Similarly to CPR (see Note [Don't w/w join points for CPR] in +GHC.Core.Op.WorkWrap), a join point stands well to gain from its outer binding's +eta-expansion, and eta-expanding a join point is fraught with issues like how to +deal with a cast: + + let join $j1 :: IO () + $j1 = ... + $j2 :: Int -> IO () + $j2 n = if n > 0 then $j1 + else ... + + => + + let join $j1 :: IO () + $j1 = (\eta -> ...) + `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ()) + ~ IO () + $j2 :: Int -> IO () + $j2 n = (\eta -> if n > 0 then $j1 + else ...) + `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ()) + ~ IO () + +The cast here can't be pushed inside the lambda (since it's not casting to a +function type), so the lambda has to stay, but it can't because it contains a +reference to a join point. In fact, $j2 can't be eta-expanded at all. Rather +than try and detect this situation (and whatever other situations crop up!), we +don't bother; again, any surrounding eta-expansion will improve these join +points anyway, since an outer cast can *always* be pushed inside. By the time +CorePrep comes around, the code is very likely to look more like this: + + let join $j1 :: State# RealWorld -> (# State# RealWorld, ()) + $j1 = (...) eta + $j2 :: Int -> State# RealWorld -> (# State# RealWorld, ()) + $j2 = if n > 0 then $j1 + else (...) eta + +Note [Do not eta-expand PAPs] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We used to have old_arity = manifestArity rhs, which meant that we +would eta-expand even PAPs. But this gives no particular advantage, +and can lead to a massive blow-up in code size, exhibited by #9020. +Suppose we have a PAP + foo :: IO () + foo = returnIO () +Then we can eta-expand do + foo = (\eta. (returnIO () |> sym g) eta) |> g +where + g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #) + +But there is really no point in doing this, and it generates masses of +coercions and whatnot that eventually disappear again. For T9020, GHC +allocated 6.6G before, and 0.8G afterwards; and residency dropped from +1.8G to 45M. + +But note that this won't eta-expand, say + f = \g -> map g +Does it matter not eta-expanding such functions? I'm not sure. Perhaps +strictness analysis will have less to bite on? + + +************************************************************************ +* * +\subsection{Floating lets out of big lambdas} +* * +************************************************************************ + +Note [Floating and type abstraction] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this: + x = /\a. C e1 e2 +We'd like to float this to + y1 = /\a. e1 + y2 = /\a. e2 + x = /\a. C (y1 a) (y2 a) +for the usual reasons: we want to inline x rather vigorously. + +You may think that this kind of thing is rare. But in some programs it is +common. For example, if you do closure conversion you might get: + + data a :-> b = forall e. (e -> a -> b) :$ e + + f_cc :: forall a. a :-> a + f_cc = /\a. (\e. id a) :$ () + +Now we really want to inline that f_cc thing so that the +construction of the closure goes away. + +So I have elaborated simplLazyBind to understand right-hand sides that look +like + /\ a1..an. body + +and treat them specially. The real work is done in +GHC.Core.Op.Simplify.Utils.abstractFloats, but there is quite a bit of plumbing +in simplLazyBind as well. + +The same transformation is good when there are lets in the body: + + /\abc -> let(rec) x = e in b + ==> + let(rec) x' = /\abc -> let x = x' a b c in e + in + /\abc -> let x = x' a b c in b + +This is good because it can turn things like: + + let f = /\a -> letrec g = ... g ... in g +into + letrec g' = /\a -> ... g' a ... + in + let f = /\ a -> g' a + +which is better. In effect, it means that big lambdas don't impede +let-floating. + +This optimisation is CRUCIAL in eliminating the junk introduced by +desugaring mutually recursive definitions. Don't eliminate it lightly! + +[May 1999] If we do this transformation *regardless* then we can +end up with some pretty silly stuff. For example, + + let + st = /\ s -> let { x1=r1 ; x2=r2 } in ... + in .. +becomes + let y1 = /\s -> r1 + y2 = /\s -> r2 + st = /\s -> ...[y1 s/x1, y2 s/x2] + in .. + +Unless the "..." is a WHNF there is really no point in doing this. +Indeed it can make things worse. Suppose x1 is used strictly, +and is of the form + + x1* = case f y of { (a,b) -> e } + +If we abstract this wrt the tyvar we then can't do the case inline +as we would normally do. + +That's why the whole transformation is part of the same process that +floats let-bindings and constructor arguments out of RHSs. In particular, +it is guarded by the doFloatFromRhs call in simplLazyBind. + +Note [Which type variables to abstract over] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Abstract only over the type variables free in the rhs wrt which the +new binding is abstracted. Note that + + * The naive approach of abstracting wrt the + tyvars free in the Id's /type/ fails. Consider: + /\ a b -> let t :: (a,b) = (e1, e2) + x :: a = fst t + in ... + Here, b isn't free in x's type, but we must nevertheless + abstract wrt b as well, because t's type mentions b. + Since t is floated too, we'd end up with the bogus: + poly_t = /\ a b -> (e1, e2) + poly_x = /\ a -> fst (poly_t a *b*) + + * We must do closeOverKinds. Example (#10934): + f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ... + Here we want to float 't', but we must remember to abstract over + 'k' as well, even though it is not explicitly mentioned in the RHS, + otherwise we get + t = /\ (f:k->*) (a:k). AccFailure @ (f a) + which is obviously bogus. +-} + +abstractFloats :: DynFlags -> TopLevelFlag -> [OutTyVar] -> SimplFloats + -> OutExpr -> SimplM ([OutBind], OutExpr) +abstractFloats dflags top_lvl main_tvs floats body + = ASSERT( notNull body_floats ) + ASSERT( isNilOL (sfJoinFloats floats) ) + do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats + ; return (float_binds, GHC.Core.Subst.substExpr (text "abstract_floats1") subst body) } + where + is_top_lvl = isTopLevel top_lvl + main_tv_set = mkVarSet main_tvs + body_floats = letFloatBinds (sfLetFloats floats) + empty_subst = GHC.Core.Subst.mkEmptySubst (sfInScope floats) + + abstract :: GHC.Core.Subst.Subst -> OutBind -> SimplM (GHC.Core.Subst.Subst, OutBind) + abstract subst (NonRec id rhs) + = do { (poly_id1, poly_app) <- mk_poly1 tvs_here id + ; let (poly_id2, poly_rhs) = mk_poly2 poly_id1 tvs_here rhs' + subst' = GHC.Core.Subst.extendIdSubst subst id poly_app + ; return (subst', NonRec poly_id2 poly_rhs) } + where + rhs' = GHC.Core.Subst.substExpr (text "abstract_floats2") subst rhs + + -- tvs_here: see Note [Which type variables to abstract over] + tvs_here = scopedSort $ + filter (`elemVarSet` main_tv_set) $ + closeOverKindsList $ + exprSomeFreeVarsList isTyVar rhs' + + abstract subst (Rec prs) + = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly1 tvs_here) ids + ; let subst' = GHC.Core.Subst.extendSubstList subst (ids `zip` poly_apps) + poly_pairs = [ mk_poly2 poly_id tvs_here rhs' + | (poly_id, rhs) <- poly_ids `zip` rhss + , let rhs' = GHC.Core.Subst.substExpr (text "abstract_floats") + subst' rhs ] + ; return (subst', Rec poly_pairs) } + where + (ids,rhss) = unzip prs + -- For a recursive group, it's a bit of a pain to work out the minimal + -- set of tyvars over which to abstract: + -- /\ a b c. let x = ...a... in + -- letrec { p = ...x...q... + -- q = .....p...b... } in + -- ... + -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted + -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'. + -- Since it's a pain, we just use the whole set, which is always safe + -- + -- If you ever want to be more selective, remember this bizarre case too: + -- x::a = x + -- Here, we must abstract 'x' over 'a'. + tvs_here = scopedSort main_tvs + + mk_poly1 :: [TyVar] -> Id -> SimplM (Id, CoreExpr) + mk_poly1 tvs_here var + = do { uniq <- getUniqueM + ; let poly_name = setNameUnique (idName var) uniq -- Keep same name + poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course + poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs + mkLocalId poly_name poly_ty + ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) } + -- In the olden days, it was crucial to copy the occInfo of the original var, + -- because we were looking at occurrence-analysed but as yet unsimplified code! + -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking + -- at already simplified code, so it doesn't matter + -- + -- It's even right to retain single-occurrence or dead-var info: + -- Suppose we started with /\a -> let x = E in B + -- where x occurs once in B. Then we transform to: + -- let x' = /\a -> E in /\a -> let x* = x' a in B + -- where x* has an INLINE prag on it. Now, once x* is inlined, + -- the occurrences of x' will be just the occurrences originally + -- pinned on x. + + mk_poly2 :: Id -> [TyVar] -> CoreExpr -> (Id, CoreExpr) + mk_poly2 poly_id tvs_here rhs + = (poly_id `setIdUnfolding` unf, poly_rhs) + where + poly_rhs = mkLams tvs_here rhs + unf = mkUnfolding dflags InlineRhs is_top_lvl False poly_rhs + + -- We want the unfolding. Consider + -- let + -- x = /\a. let y = ... in Just y + -- in body + -- Then we float the y-binding out (via abstractFloats and addPolyBind) + -- but 'x' may well then be inlined in 'body' in which case we'd like the + -- opportunity to inline 'y' too. + +{- +Note [Abstract over coercions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the +type variable a. Rather than sort this mess out, we simply bale out and abstract +wrt all the type variables if any of them are coercion variables. + + +Historical note: if you use let-bindings instead of a substitution, beware of this: + + -- Suppose we start with: + -- + -- x = /\ a -> let g = G in E + -- + -- Then we'll float to get + -- + -- x = let poly_g = /\ a -> G + -- in /\ a -> let g = poly_g a in E + -- + -- But now the occurrence analyser will see just one occurrence + -- of poly_g, not inside a lambda, so the simplifier will + -- PreInlineUnconditionally poly_g back into g! Badk to square 1! + -- (I used to think that the "don't inline lone occurrences" stuff + -- would stop this happening, but since it's the *only* occurrence, + -- PreInlineUnconditionally kicks in first!) + -- + -- Solution: put an INLINE note on g's RHS, so that poly_g seems + -- to appear many times. (NB: mkInlineMe eliminates + -- such notes on trivial RHSs, so do it manually.) + +************************************************************************ +* * + prepareAlts +* * +************************************************************************ + +prepareAlts tries these things: + +1. Eliminate alternatives that cannot match, including the + DEFAULT alternative. + +2. If the DEFAULT alternative can match only one possible constructor, + then make that constructor explicit. + e.g. + case e of x { DEFAULT -> rhs } + ===> + case e of x { (a,b) -> rhs } + where the type is a single constructor type. This gives better code + when rhs also scrutinises x or e. + +3. Returns a list of the constructors that cannot holds in the + DEFAULT alternative (if there is one) + +Here "cannot match" includes knowledge from GADTs + +It's a good idea to do this stuff before simplifying the alternatives, to +avoid simplifying alternatives we know can't happen, and to come up with +the list of constructors that are handled, to put into the IdInfo of the +case binder, for use when simplifying the alternatives. + +Eliminating the default alternative in (1) isn't so obvious, but it can +happen: + +data Colour = Red | Green | Blue + +f x = case x of + Red -> .. + Green -> .. + DEFAULT -> h x + +h y = case y of + Blue -> .. + DEFAULT -> [ case y of ... ] + +If we inline h into f, the default case of the inlined h can't happen. +If we don't notice this, we may end up filtering out *all* the cases +of the inner case y, which give us nowhere to go! +-} + +prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt]) +-- The returned alternatives can be empty, none are possible +prepareAlts scrut case_bndr' alts + | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr') + -- Case binder is needed just for its type. Note that as an + -- OutId, it has maximum information; this is important. + -- Test simpl013 is an example + = do { us <- getUniquesM + ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts + (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1 + (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2 + -- "idcs" stands for "impossible default data constructors" + -- i.e. the constructors that can't match the default case + ; when yes2 $ tick (FillInCaseDefault case_bndr') + ; when yes3 $ tick (AltMerge case_bndr') + ; return (idcs3, alts3) } + + | otherwise -- Not a data type, so nothing interesting happens + = return ([], alts) + where + imposs_cons = case scrut of + Var v -> otherCons (idUnfolding v) + _ -> [] + + +{- +************************************************************************ +* * + mkCase +* * +************************************************************************ + +mkCase tries these things + +* Note [Nerge nested cases] +* Note [Eliminate identity case] +* Note [Scrutinee constant folding] + +Note [Merge Nested Cases] +~~~~~~~~~~~~~~~~~~~~~~~~~ + case e of b { ==> case e of b { + p1 -> rhs1 p1 -> rhs1 + ... ... + pm -> rhsm pm -> rhsm + _ -> case b of b' { pn -> let b'=b in rhsn + pn -> rhsn ... + ... po -> let b'=b in rhso + po -> rhso _ -> let b'=b in rhsd + _ -> rhsd + } + +which merges two cases in one case when -- the default alternative of +the outer case scrutises the same variable as the outer case. This +transformation is called Case Merging. It avoids that the same +variable is scrutinised multiple times. + +Note [Eliminate Identity Case] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + case e of ===> e + True -> True; + False -> False + +and similar friends. + +Note [Scrutinee Constant Folding] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + case x op# k# of _ { ===> case x of _ { + a1# -> e1 (a1# inv_op# k#) -> e1 + a2# -> e2 (a2# inv_op# k#) -> e2 + ... ... + DEFAULT -> ed DEFAULT -> ed + + where (x op# k#) inv_op# k# == x + +And similarly for commuted arguments and for some unary operations. + +The purpose of this transformation is not only to avoid an arithmetic +operation at runtime but to allow other transformations to apply in cascade. + +Example with the "Merge Nested Cases" optimization (from #12877): + + main = case t of t0 + 0## -> ... + DEFAULT -> case t0 `minusWord#` 1## of t1 + 0## -> ... + DEFAULT -> case t1 `minusWord#` 1## of t2 + 0## -> ... + DEFAULT -> case t2 `minusWord#` 1## of _ + 0## -> ... + DEFAULT -> ... + + becomes: + + main = case t of _ + 0## -> ... + 1## -> ... + 2## -> ... + 3## -> ... + DEFAULT -> ... + +There are some wrinkles + +* Do not apply caseRules if there is just a single DEFAULT alternative + case e +# 3# of b { DEFAULT -> rhs } + If we applied the transformation here we would (stupidly) get + case a of b' { DEFAULT -> let b = e +# 3# in rhs } + and now the process may repeat, because that let will really + be a case. + +* The type of the scrutinee might change. E.g. + case tagToEnum (x :: Int#) of (b::Bool) + False -> e1 + True -> e2 + ==> + case x of (b'::Int#) + DEFAULT -> e1 + 1# -> e2 + +* The case binder may be used in the right hand sides, so we need + to make a local binding for it, if it is alive. e.g. + case e +# 10# of b + DEFAULT -> blah...b... + 44# -> blah2...b... + ===> + case e of b' + DEFAULT -> let b = b' +# 10# in blah...b... + 34# -> let b = 44# in blah2...b... + + Note that in the non-DEFAULT cases we know what to bind 'b' to, + whereas in the DEFAULT case we must reconstruct the original value. + But NB: we use b'; we do not duplicate 'e'. + +* In dataToTag we might need to make up some fake binders; + see Note [caseRules for dataToTag] in GHC.Core.Op.ConstantFold +-} + +mkCase, mkCase1, mkCase2, mkCase3 + :: DynFlags + -> OutExpr -> OutId + -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order + -> SimplM OutExpr + +-------------------------------------------------- +-- 1. Merge Nested Cases +-------------------------------------------------- + +mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts) + | gopt Opt_CaseMerge dflags + , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts) + <- stripTicksTop tickishFloatable deflt_rhs + , inner_scrut_var == outer_bndr + = do { tick (CaseMerge outer_bndr) + + ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args ) + (con, args, wrap_rhs rhs) + -- Simplifier's no-shadowing invariant should ensure + -- that outer_bndr is not shadowed by the inner patterns + wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs + -- The let is OK even for unboxed binders, + + wrapped_alts | isDeadBinder inner_bndr = inner_alts + | otherwise = map wrap_alt inner_alts + + merged_alts = mergeAlts outer_alts wrapped_alts + -- NB: mergeAlts gives priority to the left + -- case x of + -- A -> e1 + -- DEFAULT -> case x of + -- A -> e2 + -- B -> e3 + -- When we merge, we must ensure that e1 takes + -- precedence over e2 as the value for A! + + ; fmap (mkTicks ticks) $ + mkCase1 dflags scrut outer_bndr alts_ty merged_alts + } + -- Warning: don't call mkCase recursively! + -- Firstly, there's no point, because inner alts have already had + -- mkCase applied to them, so they won't have a case in their default + -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr + -- in munge_rhs may put a case into the DEFAULT branch! + +mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts + +-------------------------------------------------- +-- 2. Eliminate Identity Case +-------------------------------------------------- + +mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case + | all identity_alt alts + = do { tick (CaseIdentity case_bndr) + ; return (mkTicks ticks $ re_cast scrut rhs1) } + where + ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts) + identity_alt (con, args, rhs) = check_eq rhs con args + + check_eq (Cast rhs co) con args -- See Note [RHS casts] + = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args + check_eq (Tick t e) alt args + = tickishFloatable t && check_eq e alt args + + check_eq (Lit lit) (LitAlt lit') _ = lit == lit' + check_eq (Var v) _ _ | v == case_bndr = True + check_eq (Var v) (DataAlt con) args + | null arg_tys, null args = v == dataConWorkId con + -- Optimisation only + check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $ + mkConApp2 con arg_tys args + check_eq _ _ _ = False + + arg_tys = tyConAppArgs (idType case_bndr) + + -- Note [RHS casts] + -- ~~~~~~~~~~~~~~~~ + -- We've seen this: + -- case e of x { _ -> x `cast` c } + -- And we definitely want to eliminate this case, to give + -- e `cast` c + -- So we throw away the cast from the RHS, and reconstruct + -- it at the other end. All the RHS casts must be the same + -- if (all identity_alt alts) holds. + -- + -- Don't worry about nested casts, because the simplifier combines them + + re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co + re_cast scrut _ = scrut + +mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts + +-------------------------------------------------- +-- 2. Scrutinee Constant Folding +-------------------------------------------------- + +mkCase2 dflags scrut bndr alts_ty alts + | -- See Note [Scrutinee Constant Folding] + case alts of -- Not if there is just a DEFAULT alternative + [(DEFAULT,_,_)] -> False + _ -> True + , gopt Opt_CaseFolding dflags + , Just (scrut', tx_con, mk_orig) <- caseRules dflags scrut + = do { bndr' <- newId (fsLit "lwild") (exprType scrut') + + ; alts' <- mapMaybeM (tx_alt tx_con mk_orig bndr') alts + -- mapMaybeM: discard unreachable alternatives + -- See Note [Unreachable caseRules alternatives] + -- in GHC.Core.Op.ConstantFold + + ; mkCase3 dflags scrut' bndr' alts_ty $ + add_default (re_sort alts') + } + + | otherwise + = mkCase3 dflags scrut bndr alts_ty alts + where + -- We need to keep the correct association between the scrutinee and its + -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with + -- "let bndr = ... in": + -- + -- case v + 10 of y =====> case v of y + -- 20 -> e1 10 -> let y = 20 in e1 + -- DEFAULT -> e2 DEFAULT -> let y = v + 10 in e2 + -- + -- Other transformations give: =====> case v of y' + -- 10 -> let y = 20 in e1 + -- DEFAULT -> let y = y' + 10 in e2 + -- + -- This wrapping is done in tx_alt; we use mk_orig, returned by caseRules, + -- to construct an expression equivalent to the original one, for use + -- in the DEFAULT case + + tx_alt :: (AltCon -> Maybe AltCon) -> (Id -> CoreExpr) -> Id + -> CoreAlt -> SimplM (Maybe CoreAlt) + tx_alt tx_con mk_orig new_bndr (con, bs, rhs) + = case tx_con con of + Nothing -> return Nothing + Just con' -> do { bs' <- mk_new_bndrs new_bndr con' + ; return (Just (con', bs', rhs')) } + where + rhs' | isDeadBinder bndr = rhs + | otherwise = bindNonRec bndr orig_val rhs + + orig_val = case con of + DEFAULT -> mk_orig new_bndr + LitAlt l -> Lit l + DataAlt dc -> mkConApp2 dc (tyConAppArgs (idType bndr)) bs + + mk_new_bndrs new_bndr (DataAlt dc) + | not (isNullaryRepDataCon dc) + = -- For non-nullary data cons we must invent some fake binders + -- See Note [caseRules for dataToTag] in GHC.Core.Op.ConstantFold + do { us <- getUniquesM + ; let (ex_tvs, arg_ids) = dataConRepInstPat us dc + (tyConAppArgs (idType new_bndr)) + ; return (ex_tvs ++ arg_ids) } + mk_new_bndrs _ _ = return [] + + re_sort :: [CoreAlt] -> [CoreAlt] + -- Sort the alternatives to re-establish + -- GHC.Core Note [Case expression invariants] + re_sort alts = sortBy cmpAlt alts + + add_default :: [CoreAlt] -> [CoreAlt] + -- See Note [Literal cases] + add_default ((LitAlt {}, bs, rhs) : alts) = (DEFAULT, bs, rhs) : alts + add_default alts = alts + +{- Note [Literal cases] +~~~~~~~~~~~~~~~~~~~~~~~ +If we have + case tagToEnum (a ># b) of + False -> e1 + True -> e2 + +then caseRules for TagToEnum will turn it into + case tagToEnum (a ># b) of + 0# -> e1 + 1# -> e2 + +Since the case is exhaustive (all cases are) we can convert it to + case tagToEnum (a ># b) of + DEFAULT -> e1 + 1# -> e2 + +This may generate sligthtly better code (although it should not, since +all cases are exhaustive) and/or optimise better. I'm not certain that +it's necessary, but currently we do make this change. We do it here, +NOT in the TagToEnum rules (see "Beware" in Note [caseRules for tagToEnum] +in GHC.Core.Op.ConstantFold) +-} + +-------------------------------------------------- +-- Catch-all +-------------------------------------------------- +mkCase3 _dflags scrut bndr alts_ty alts + = return (Case scrut bndr alts_ty alts) + +-- See Note [Exitification] and Note [Do not inline exit join points] in +-- GHC.Core.Op.Exitify +-- This lives here (and not in Id) because occurrence info is only valid on +-- InIds, so it's crucial that isExitJoinId is only called on freshly +-- occ-analysed code. It's not a generic function you can call anywhere. +isExitJoinId :: Var -> Bool +isExitJoinId id + = isJoinId id + && isOneOcc (idOccInfo id) + && occ_in_lam (idOccInfo id) == IsInsideLam + +{- +Note [Dead binders] +~~~~~~~~~~~~~~~~~~~~ +Note that dead-ness is maintained by the simplifier, so that it is +accurate after simplification as well as before. + + +Note [Cascading case merge] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Case merging should cascade in one sweep, because it +happens bottom-up + + case e of a { + DEFAULT -> case a of b + DEFAULT -> case b of c { + DEFAULT -> e + A -> ea + B -> eb + C -> ec +==> + case e of a { + DEFAULT -> case a of b + DEFAULT -> let c = b in e + A -> let c = b in ea + B -> eb + C -> ec +==> + case e of a { + DEFAULT -> let b = a in let c = b in e + A -> let b = a in let c = b in ea + B -> let b = a in eb + C -> ec + + +However here's a tricky case that we still don't catch, and I don't +see how to catch it in one pass: + + case x of c1 { I# a1 -> + case a1 of c2 -> + 0 -> ... + DEFAULT -> case x of c3 { I# a2 -> + case a2 of ... + +After occurrence analysis (and its binder-swap) we get this + + case x of c1 { I# a1 -> + let x = c1 in -- Binder-swap addition + case a1 of c2 -> + 0 -> ... + DEFAULT -> case x of c3 { I# a2 -> + case a2 of ... + +When we simplify the inner case x, we'll see that +x=c1=I# a1. So we'll bind a2 to a1, and get + + case x of c1 { I# a1 -> + case a1 of c2 -> + 0 -> ... + DEFAULT -> case a1 of ... + +This is correct, but we can't do a case merge in this sweep +because c2 /= a1. Reason: the binding c1=I# a1 went inwards +without getting changed to c1=I# c2. + +I don't think this is worth fixing, even if I knew how. It'll +all come out in the next pass anyway. +-} diff --git a/compiler/GHC/Core/Op/SpecConstr.hs b/compiler/GHC/Core/Op/SpecConstr.hs new file mode 100644 index 0000000000..4522e2d23c --- /dev/null +++ b/compiler/GHC/Core/Op/SpecConstr.hs @@ -0,0 +1,2360 @@ +{- +ToDo [Oct 2013] +~~~~~~~~~~~~~~~ +1. Nuke ForceSpecConstr for good (it is subsumed by GHC.Types.SPEC in ghc-prim) +2. Nuke NoSpecConstr + + +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + +\section[SpecConstr]{Specialise over constructors} +-} + +{-# LANGUAGE CPP #-} + +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +module GHC.Core.Op.SpecConstr( + specConstrProgram, + SpecConstrAnnotation(..) + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Core +import GHC.Core.Subst +import GHC.Core.Utils +import GHC.Core.Unfold ( couldBeSmallEnoughToInline ) +import GHC.Core.FVs ( exprsFreeVarsList ) +import GHC.Core.Op.Monad +import Literal ( litIsLifted ) +import GHC.Driver.Types ( ModGuts(..) ) +import GHC.Core.Op.WorkWrap.Lib ( isWorkerSmallEnough, mkWorkerArgs ) +import GHC.Core.DataCon +import GHC.Core.Coercion hiding( substCo ) +import GHC.Core.Rules +import GHC.Core.Type hiding ( substTy ) +import GHC.Core.TyCon ( tyConName ) +import Id +import GHC.Core.Ppr ( pprParendExpr ) +import GHC.Core.Make ( mkImpossibleExpr ) +import VarEnv +import VarSet +import Name +import BasicTypes +import GHC.Driver.Session ( DynFlags(..), GeneralFlag( Opt_SpecConstrKeen ) + , gopt, hasPprDebug ) +import Maybes ( orElse, catMaybes, isJust, isNothing ) +import Demand +import Cpr +import GHC.Serialized ( deserializeWithData ) +import Util +import Pair +import UniqSupply +import Outputable +import FastString +import UniqFM +import MonadUtils +import Control.Monad ( zipWithM ) +import Data.List +import PrelNames ( specTyConName ) +import Module +import GHC.Core.TyCon ( TyCon ) +import GHC.Exts( SpecConstrAnnotation(..) ) +import Data.Ord( comparing ) + +{- +----------------------------------------------------- + Game plan +----------------------------------------------------- + +Consider + drop n [] = [] + drop 0 xs = [] + drop n (x:xs) = drop (n-1) xs + +After the first time round, we could pass n unboxed. This happens in +numerical code too. Here's what it looks like in Core: + + drop n xs = case xs of + [] -> [] + (y:ys) -> case n of + I# n# -> case n# of + 0 -> [] + _ -> drop (I# (n# -# 1#)) xs + +Notice that the recursive call has an explicit constructor as argument. +Noticing this, we can make a specialised version of drop + + RULE: drop (I# n#) xs ==> drop' n# xs + + drop' n# xs = let n = I# n# in ...orig RHS... + +Now the simplifier will apply the specialisation in the rhs of drop', giving + + drop' n# xs = case xs of + [] -> [] + (y:ys) -> case n# of + 0 -> [] + _ -> drop' (n# -# 1#) xs + +Much better! + +We'd also like to catch cases where a parameter is carried along unchanged, +but evaluated each time round the loop: + + f i n = if i>0 || i>n then i else f (i*2) n + +Here f isn't strict in n, but we'd like to avoid evaluating it each iteration. +In Core, by the time we've w/wd (f is strict in i) we get + + f i# n = case i# ># 0 of + False -> I# i# + True -> case n of { I# n# -> + case i# ># n# of + False -> I# i# + True -> f (i# *# 2#) n + +At the call to f, we see that the argument, n is known to be (I# n#), +and n is evaluated elsewhere in the body of f, so we can play the same +trick as above. + + +Note [Reboxing] +~~~~~~~~~~~~~~~ +We must be careful not to allocate the same constructor twice. Consider + f p = (...(case p of (a,b) -> e)...p..., + ...let t = (r,s) in ...t...(f t)...) +At the recursive call to f, we can see that t is a pair. But we do NOT want +to make a specialised copy: + f' a b = let p = (a,b) in (..., ...) +because now t is allocated by the caller, then r and s are passed to the +recursive call, which allocates the (r,s) pair again. + +This happens if + (a) the argument p is used in other than a case-scrutinisation way. + (b) the argument to the call is not a 'fresh' tuple; you have to + look into its unfolding to see that it's a tuple + +Hence the "OR" part of Note [Good arguments] below. + +ALTERNATIVE 2: pass both boxed and unboxed versions. This no longer saves +allocation, but does perhaps save evals. In the RULE we'd have +something like + + f (I# x#) = f' (I# x#) x# + +If at the call site the (I# x) was an unfolding, then we'd have to +rely on CSE to eliminate the duplicate allocation.... This alternative +doesn't look attractive enough to pursue. + +ALTERNATIVE 3: ignore the reboxing problem. The trouble is that +the conservative reboxing story prevents many useful functions from being +specialised. Example: + foo :: Maybe Int -> Int -> Int + foo (Just m) 0 = 0 + foo x@(Just m) n = foo x (n-m) +Here the use of 'x' will clearly not require boxing in the specialised function. + +The strictness analyser has the same problem, in fact. Example: + f p@(a,b) = ... +If we pass just 'a' and 'b' to the worker, it might need to rebox the +pair to create (a,b). A more sophisticated analysis might figure out +precisely the cases in which this could happen, but the strictness +analyser does no such analysis; it just passes 'a' and 'b', and hopes +for the best. + +So my current choice is to make SpecConstr similarly aggressive, and +ignore the bad potential of reboxing. + + +Note [Good arguments] +~~~~~~~~~~~~~~~~~~~~~ +So we look for + +* A self-recursive function. Ignore mutual recursion for now, + because it's less common, and the code is simpler for self-recursion. + +* EITHER + + a) At a recursive call, one or more parameters is an explicit + constructor application + AND + That same parameter is scrutinised by a case somewhere in + the RHS of the function + + OR + + b) At a recursive call, one or more parameters has an unfolding + that is an explicit constructor application + AND + That same parameter is scrutinised by a case somewhere in + the RHS of the function + AND + Those are the only uses of the parameter (see Note [Reboxing]) + + +What to abstract over +~~~~~~~~~~~~~~~~~~~~~ +There's a bit of a complication with type arguments. If the call +site looks like + + f p = ...f ((:) [a] x xs)... + +then our specialised function look like + + f_spec x xs = let p = (:) [a] x xs in ....as before.... + +This only makes sense if either + a) the type variable 'a' is in scope at the top of f, or + b) the type variable 'a' is an argument to f (and hence fs) + +Actually, (a) may hold for value arguments too, in which case +we may not want to pass them. Suppose 'x' is in scope at f's +defn, but xs is not. Then we'd like + + f_spec xs = let p = (:) [a] x xs in ....as before.... + +Similarly (b) may hold too. If x is already an argument at the +call, no need to pass it again. + +Finally, if 'a' is not in scope at the call site, we could abstract +it as we do the term variables: + + f_spec a x xs = let p = (:) [a] x xs in ...as before... + +So the grand plan is: + + * abstract the call site to a constructor-only pattern + e.g. C x (D (f p) (g q)) ==> C s1 (D s2 s3) + + * Find the free variables of the abstracted pattern + + * Pass these variables, less any that are in scope at + the fn defn. But see Note [Shadowing] below. + + +NOTICE that we only abstract over variables that are not in scope, +so we're in no danger of shadowing variables used in "higher up" +in f_spec's RHS. + + +Note [Shadowing] +~~~~~~~~~~~~~~~~ +In this pass we gather up usage information that may mention variables +that are bound between the usage site and the definition site; or (more +seriously) may be bound to something different at the definition site. +For example: + + f x = letrec g y v = let x = ... + in ...(g (a,b) x)... + +Since 'x' is in scope at the call site, we may make a rewrite rule that +looks like + RULE forall a,b. g (a,b) x = ... +But this rule will never match, because it's really a different 'x' at +the call site -- and that difference will be manifest by the time the +simplifier gets to it. [A worry: the simplifier doesn't *guarantee* +no-shadowing, so perhaps it may not be distinct?] + +Anyway, the rule isn't actually wrong, it's just not useful. One possibility +is to run deShadowBinds before running SpecConstr, but instead we run the +simplifier. That gives the simplest possible program for SpecConstr to +chew on; and it virtually guarantees no shadowing. + +Note [Specialising for constant parameters] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This one is about specialising on a *constant* (but not necessarily +constructor) argument + + foo :: Int -> (Int -> Int) -> Int + foo 0 f = 0 + foo m f = foo (f m) (+1) + +It produces + + lvl_rmV :: GHC.Base.Int -> GHC.Base.Int + lvl_rmV = + \ (ds_dlk :: GHC.Base.Int) -> + case ds_dlk of wild_alH { GHC.Base.I# x_alG -> + GHC.Base.I# (GHC.Prim.+# x_alG 1) + + T.$wfoo :: GHC.Prim.Int# -> (GHC.Base.Int -> GHC.Base.Int) -> + GHC.Prim.Int# + T.$wfoo = + \ (ww_sme :: GHC.Prim.Int#) (w_smg :: GHC.Base.Int -> GHC.Base.Int) -> + case ww_sme of ds_Xlw { + __DEFAULT -> + case w_smg (GHC.Base.I# ds_Xlw) of w1_Xmo { GHC.Base.I# ww1_Xmz -> + T.$wfoo ww1_Xmz lvl_rmV + }; + 0 -> 0 + } + +The recursive call has lvl_rmV as its argument, so we could create a specialised copy +with that argument baked in; that is, not passed at all. Now it can perhaps be inlined. + +When is this worth it? Call the constant 'lvl' +- If 'lvl' has an unfolding that is a constructor, see if the corresponding + parameter is scrutinised anywhere in the body. + +- If 'lvl' has an unfolding that is a inlinable function, see if the corresponding + parameter is applied (...to enough arguments...?) + + Also do this is if the function has RULES? + +Also + +Note [Specialising for lambda parameters] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + foo :: Int -> (Int -> Int) -> Int + foo 0 f = 0 + foo m f = foo (f m) (\n -> n-m) + +This is subtly different from the previous one in that we get an +explicit lambda as the argument: + + T.$wfoo :: GHC.Prim.Int# -> (GHC.Base.Int -> GHC.Base.Int) -> + GHC.Prim.Int# + T.$wfoo = + \ (ww_sm8 :: GHC.Prim.Int#) (w_sma :: GHC.Base.Int -> GHC.Base.Int) -> + case ww_sm8 of ds_Xlr { + __DEFAULT -> + case w_sma (GHC.Base.I# ds_Xlr) of w1_Xmf { GHC.Base.I# ww1_Xmq -> + T.$wfoo + ww1_Xmq + (\ (n_ad3 :: GHC.Base.Int) -> + case n_ad3 of wild_alB { GHC.Base.I# x_alA -> + GHC.Base.I# (GHC.Prim.-# x_alA ds_Xlr) + }) + }; + 0 -> 0 + } + +I wonder if SpecConstr couldn't be extended to handle this? After all, +lambda is a sort of constructor for functions and perhaps it already +has most of the necessary machinery? + +Furthermore, there's an immediate win, because you don't need to allocate the lambda +at the call site; and if perchance it's called in the recursive call, then you +may avoid allocating it altogether. Just like for constructors. + +Looks cool, but probably rare...but it might be easy to implement. + + +Note [SpecConstr for casts] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + data family T a :: * + data instance T Int = T Int + + foo n = ... + where + go (T 0) = 0 + go (T n) = go (T (n-1)) + +The recursive call ends up looking like + go (T (I# ...) `cast` g) +So we want to spot the constructor application inside the cast. +That's why we have the Cast case in argToPat + +Note [Local recursive groups] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For a *local* recursive group, we can see all the calls to the +function, so we seed the specialisation loop from the calls in the +body, not from the calls in the RHS. Consider: + + bar m n = foo n (n,n) (n,n) (n,n) (n,n) + where + foo n p q r s + | n == 0 = m + | n > 3000 = case p of { (p1,p2) -> foo (n-1) (p2,p1) q r s } + | n > 2000 = case q of { (q1,q2) -> foo (n-1) p (q2,q1) r s } + | n > 1000 = case r of { (r1,r2) -> foo (n-1) p q (r2,r1) s } + | otherwise = case s of { (s1,s2) -> foo (n-1) p q r (s2,s1) } + +If we start with the RHSs of 'foo', we get lots and lots of specialisations, +most of which are not needed. But if we start with the (single) call +in the rhs of 'bar' we get exactly one fully-specialised copy, and all +the recursive calls go to this fully-specialised copy. Indeed, the original +function is later collected as dead code. This is very important in +specialising the loops arising from stream fusion, for example in NDP where +we were getting literally hundreds of (mostly unused) specialisations of +a local function. + +In a case like the above we end up never calling the original un-specialised +function. (Although we still leave its code around just in case.) + +However, if we find any boring calls in the body, including *unsaturated* +ones, such as + letrec foo x y = ....foo... + in map foo xs +then we will end up calling the un-specialised function, so then we *should* +use the calls in the un-specialised RHS as seeds. We call these +"boring call patterns", and callsToPats reports if it finds any of these. + +Note [Seeding top-level recursive groups] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This seeding is done in the binding for seed_calls in specRec. + +1. If all the bindings in a top-level recursive group are local (not + exported), then all the calls are in the rest of the top-level + bindings. This means we can specialise with those call patterns + ONLY, and NOT with the RHSs of the recursive group (exactly like + Note [Local recursive groups]) + +2. But if any of the bindings are exported, the function may be called + with any old arguments, so (for lack of anything better) we specialise + based on + (a) the call patterns in the RHS + (b) the call patterns in the rest of the top-level bindings + NB: before Apr 15 we used (a) only, but Dimitrios had an example + where (b) was crucial, so I added that. + Adding (b) also improved nofib allocation results: + multiplier: 4% better + minimax: 2.8% better + +Actually in case (2), instead of using the calls from the RHS, it +would be better to specialise in the importing module. We'd need to +add an INLINABLE pragma to the function, and then it can be +specialised in the importing scope, just as is done for type classes +in GHC.Core.Op.Specialise.specImports. This remains to be done (#10346). + +Note [Top-level recursive groups] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +To get the call usage information from "the rest of the top level +bindings" (c.f. Note [Seeding top-level recursive groups]), we work +backwards through the top-level bindings so we see the usage before we +get to the binding of the function. Before we can collect the usage +though, we go through all the bindings and add them to the +environment. This is necessary because usage is only tracked for +functions in the environment. These two passes are called + 'go' and 'goEnv' +in specConstrProgram. (Looks a bit revolting to me.) + +Note [Do not specialise diverging functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Specialising a function that just diverges is a waste of code. +Furthermore, it broke GHC (simpl014) thus: + {-# STR Sb #-} + f = \x. case x of (a,b) -> f x +If we specialise f we get + f = \x. case x of (a,b) -> fspec a b +But fspec doesn't have decent strictness info. As it happened, +(f x) :: IO t, so the state hack applied and we eta expanded fspec, +and hence f. But now f's strictness is less than its arity, which +breaks an invariant. + + +Note [Forcing specialisation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +With stream fusion and in other similar cases, we want to fully +specialise some (but not necessarily all!) loops regardless of their +size and the number of specialisations. + +We allow a library to do this, in one of two ways (one which is +deprecated): + + 1) Add a parameter of type GHC.Types.SPEC (from ghc-prim) to the loop body. + + 2) (Deprecated) Annotate a type with ForceSpecConstr from GHC.Exts, + and then add *that* type as a parameter to the loop body + +The reason #2 is deprecated is because it requires GHCi, which isn't +available for things like a cross compiler using stage1. + +Here's a (simplified) example from the `vector` package. You may bring +the special 'force specialization' type into scope by saying: + + import GHC.Types (SPEC(..)) + +or by defining your own type (again, deprecated): + + data SPEC = SPEC | SPEC2 + {-# ANN type SPEC ForceSpecConstr #-} + +(Note this is the exact same definition of GHC.Types.SPEC, just +without the annotation.) + +After that, you say: + + foldl :: (a -> b -> a) -> a -> Stream b -> a + {-# INLINE foldl #-} + foldl f z (Stream step s _) = foldl_loop SPEC z s + where + foldl_loop !sPEC z s = case step s of + Yield x s' -> foldl_loop sPEC (f z x) s' + Skip -> foldl_loop sPEC z s' + Done -> z + +SpecConstr will spot the SPEC parameter and always fully specialise +foldl_loop. Note that + + * We have to prevent the SPEC argument from being removed by + w/w which is why (a) SPEC is a sum type, and (b) we have to seq on + the SPEC argument. + + * And lastly, the SPEC argument is ultimately eliminated by + SpecConstr itself so there is no runtime overhead. + +This is all quite ugly; we ought to come up with a better design. + +ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set +sc_force to True when calling specLoop. This flag does four things: + + * Ignore specConstrThreshold, to specialise functions of arbitrary size + (see scTopBind) + * Ignore specConstrCount, to make arbitrary numbers of specialisations + (see specialise) + * Specialise even for arguments that are not scrutinised in the loop + (see argToPat; #4448) + * Only specialise on recursive types a finite number of times + (see is_too_recursive; #5550; Note [Limit recursive specialisation]) + +The flag holds only for specialising a single binding group, and NOT +for nested bindings. (So really it should be passed around explicitly +and not stored in ScEnv.) #14379 turned out to be caused by + f SPEC x = let g1 x = ... + in ... +We force-specialise f (because of the SPEC), but that generates a specialised +copy of g1 (as well as the original). Alas g1 has a nested binding g2; and +in each copy of g1 we get an unspecialised and specialised copy of g2; and so +on. Result, exponential. So the force-spec flag now only applies to one +level of bindings at a time. + +Mechanism for this one-level-only thing: + + - Switch it on at the call to specRec, in scExpr and scTopBinds + - Switch it off when doing the RHSs; + this can be done very conveniently in decreaseSpecCount + +What alternatives did I consider? + +* Annotating the loop itself doesn't work because (a) it is local and + (b) it will be w/w'ed and having w/w propagating annotations somehow + doesn't seem like a good idea. The types of the loop arguments + really seem to be the most persistent thing. + +* Annotating the types that make up the loop state doesn't work, + either, because (a) it would prevent us from using types like Either + or tuples here, (b) we don't want to restrict the set of types that + can be used in Stream states and (c) some types are fixed by the + user (e.g., the accumulator here) but we still want to specialise as + much as possible. + +Alternatives to ForceSpecConstr +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Instead of giving the loop an extra argument of type SPEC, we +also considered *wrapping* arguments in SPEC, thus + data SPEC a = SPEC a | SPEC2 + + loop = \arg -> case arg of + SPEC state -> + case state of (x,y) -> ... loop (SPEC (x',y')) ... + S2 -> error ... +The idea is that a SPEC argument says "specialise this argument +regardless of whether the function case-analyses it". But this +doesn't work well: + * SPEC must still be a sum type, else the strictness analyser + eliminates it + * But that means that 'loop' won't be strict in its real payload +This loss of strictness in turn screws up specialisation, because +we may end up with calls like + loop (SPEC (case z of (p,q) -> (q,p))) +Without the SPEC, if 'loop' were strict, the case would move out +and we'd see loop applied to a pair. But if 'loop' isn't strict +this doesn't look like a specialisable call. + +Note [Limit recursive specialisation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It is possible for ForceSpecConstr to cause an infinite loop of specialisation. +Because there is no limit on the number of specialisations, a recursive call with +a recursive constructor as an argument (for example, list cons) will generate +a specialisation for that constructor. If the resulting specialisation also +contains a recursive call with the constructor, this could proceed indefinitely. + +For example, if ForceSpecConstr is on: + loop :: [Int] -> [Int] -> [Int] + loop z [] = z + loop z (x:xs) = loop (x:z) xs +this example will create a specialisation for the pattern + loop (a:b) c = loop' a b c + + loop' a b [] = (a:b) + loop' a b (x:xs) = loop (x:(a:b)) xs +and a new pattern is found: + loop (a:(b:c)) d = loop'' a b c d +which can continue indefinitely. + +Roman's suggestion to fix this was to stop after a couple of times on recursive types, +but still specialising on non-recursive types as much as possible. + +To implement this, we count the number of times we have gone round the +"specialise recursively" loop ('go' in 'specRec'). Once have gone round +more than N times (controlled by -fspec-constr-recursive=N) we check + + - If sc_force is off, and sc_count is (Just max) then we don't + need to do anything: trim_pats will limit the number of specs + + - Otherwise check if any function has now got more than (sc_count env) + specialisations. If sc_count is "no limit" then we arbitrarily + choose 10 as the limit (ugh). + +See #5550. Also #13623, where this test had become over-aggressive, +and we lost a wonderful specialisation that we really wanted! + +Note [NoSpecConstr] +~~~~~~~~~~~~~~~~~~~ +The ignoreDataCon stuff allows you to say + {-# ANN type T NoSpecConstr #-} +to mean "don't specialise on arguments of this type". It was added +before we had ForceSpecConstr. Lacking ForceSpecConstr we specialised +regardless of size; and then we needed a way to turn that *off*. Now +that we have ForceSpecConstr, this NoSpecConstr is probably redundant. +(Used only for PArray, TODO: remove?) + +----------------------------------------------------- + Stuff not yet handled +----------------------------------------------------- + +Here are notes arising from Roman's work that I don't want to lose. + +Example 1 +~~~~~~~~~ + data T a = T !a + + foo :: Int -> T Int -> Int + foo 0 t = 0 + foo x t | even x = case t of { T n -> foo (x-n) t } + | otherwise = foo (x-1) t + +SpecConstr does no specialisation, because the second recursive call +looks like a boxed use of the argument. A pity. + + $wfoo_sFw :: GHC.Prim.Int# -> T.T GHC.Base.Int -> GHC.Prim.Int# + $wfoo_sFw = + \ (ww_sFo [Just L] :: GHC.Prim.Int#) (w_sFq [Just L] :: T.T GHC.Base.Int) -> + case ww_sFo of ds_Xw6 [Just L] { + __DEFAULT -> + case GHC.Prim.remInt# ds_Xw6 2 of wild1_aEF [Dead Just A] { + __DEFAULT -> $wfoo_sFw (GHC.Prim.-# ds_Xw6 1) w_sFq; + 0 -> + case w_sFq of wild_Xy [Just L] { T.T n_ad5 [Just U(L)] -> + case n_ad5 of wild1_aET [Just A] { GHC.Base.I# y_aES [Just L] -> + $wfoo_sFw (GHC.Prim.-# ds_Xw6 y_aES) wild_Xy + } } }; + 0 -> 0 + +Example 2 +~~~~~~~~~ + data a :*: b = !a :*: !b + data T a = T !a + + foo :: (Int :*: T Int) -> Int + foo (0 :*: t) = 0 + foo (x :*: t) | even x = case t of { T n -> foo ((x-n) :*: t) } + | otherwise = foo ((x-1) :*: t) + +Very similar to the previous one, except that the parameters are now in +a strict tuple. Before SpecConstr, we have + + $wfoo_sG3 :: GHC.Prim.Int# -> T.T GHC.Base.Int -> GHC.Prim.Int# + $wfoo_sG3 = + \ (ww_sFU [Just L] :: GHC.Prim.Int#) (ww_sFW [Just L] :: T.T + GHC.Base.Int) -> + case ww_sFU of ds_Xws [Just L] { + __DEFAULT -> + case GHC.Prim.remInt# ds_Xws 2 of wild1_aEZ [Dead Just A] { + __DEFAULT -> + case ww_sFW of tpl_B2 [Just L] { T.T a_sFo [Just A] -> + $wfoo_sG3 (GHC.Prim.-# ds_Xws 1) tpl_B2 -- $wfoo1 + }; + 0 -> + case ww_sFW of wild_XB [Just A] { T.T n_ad7 [Just S(L)] -> + case n_ad7 of wild1_aFd [Just L] { GHC.Base.I# y_aFc [Just L] -> + $wfoo_sG3 (GHC.Prim.-# ds_Xws y_aFc) wild_XB -- $wfoo2 + } } }; + 0 -> 0 } + +We get two specialisations: +"SC:$wfoo1" [0] __forall {a_sFB :: GHC.Base.Int sc_sGC :: GHC.Prim.Int#} + Foo.$wfoo sc_sGC (Foo.T @ GHC.Base.Int a_sFB) + = Foo.$s$wfoo1 a_sFB sc_sGC ; +"SC:$wfoo2" [0] __forall {y_aFp :: GHC.Prim.Int# sc_sGC :: GHC.Prim.Int#} + Foo.$wfoo sc_sGC (Foo.T @ GHC.Base.Int (GHC.Base.I# y_aFp)) + = Foo.$s$wfoo y_aFp sc_sGC ; + +But perhaps the first one isn't good. After all, we know that tpl_B2 is +a T (I# x) really, because T is strict and Int has one constructor. (We can't +unbox the strict fields, because T is polymorphic!) + +************************************************************************ +* * +\subsection{Top level wrapper stuff} +* * +************************************************************************ +-} + +specConstrProgram :: ModGuts -> CoreM ModGuts +specConstrProgram guts + = do + dflags <- getDynFlags + us <- getUniqueSupplyM + (_, annos) <- getFirstAnnotations deserializeWithData guts + this_mod <- getModule + let binds' = reverse $ fst $ initUs us $ do + -- Note [Top-level recursive groups] + (env, binds) <- goEnv (initScEnv dflags this_mod annos) + (mg_binds guts) + -- binds is identical to (mg_binds guts), except that the + -- binders on the LHS have been replaced by extendBndr + -- (SPJ this seems like overkill; I don't think the binders + -- will change at all; and we don't substitute in the RHSs anyway!!) + go env nullUsage (reverse binds) + + return (guts { mg_binds = binds' }) + where + -- See Note [Top-level recursive groups] + goEnv env [] = return (env, []) + goEnv env (bind:binds) = do (env', bind') <- scTopBindEnv env bind + (env'', binds') <- goEnv env' binds + return (env'', bind' : binds') + + -- Arg list of bindings is in reverse order + go _ _ [] = return [] + go env usg (bind:binds) = do (usg', bind') <- scTopBind env usg bind + binds' <- go env usg' binds + return (bind' : binds') + +{- +************************************************************************ +* * +\subsection{Environment: goes downwards} +* * +************************************************************************ + +Note [Work-free values only in environment] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The sc_vals field keeps track of in-scope value bindings, so +that if we come across (case x of Just y ->...) we can reduce the +case from knowing that x is bound to a pair. + +But only *work-free* values are ok here. For example if the envt had + x -> Just (expensive v) +then we do NOT want to expand to + let y = expensive v in ... +because the x-binding still exists and we've now duplicated (expensive v). + +This seldom happens because let-bound constructor applications are +ANF-ised, but it can happen as a result of on-the-fly transformations in +SpecConstr itself. Here is #7865: + + let { + a'_shr = + case xs_af8 of _ { + [] -> acc_af6; + : ds_dgt [Dmd=<L,A>] ds_dgu [Dmd=<L,A>] -> + (expensive x_af7, x_af7 + } } in + let { + ds_sht = + case a'_shr of _ { (p'_afd, q'_afe) -> + TSpecConstr_DoubleInline.recursive + (GHC.Types.: @ GHC.Types.Int x_af7 wild_X6) (q'_afe, p'_afd) + } } in + +When processed knowing that xs_af8 was bound to a cons, we simplify to + a'_shr = (expensive x_af7, x_af7) +and we do NOT want to inline that at the occurrence of a'_shr in ds_sht. +(There are other occurrences of a'_shr.) No no no. + +It would be possible to do some on-the-fly ANF-ising, so that a'_shr turned +into a work-free value again, thus + a1 = expensive x_af7 + a'_shr = (a1, x_af7) +but that's more work, so until its shown to be important I'm going to +leave it for now. + +Note [Making SpecConstr keener] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this, in (perf/should_run/T9339) + last (filter odd [1..1000]) + +After optimisation, including SpecConstr, we get: + f :: Int# -> Int -> Int + f x y = case case remInt# x 2# of + __DEFAULT -> case x of + __DEFAULT -> f (+# wild_Xp 1#) (I# x) + 1000000# -> ... + 0# -> case x of + __DEFAULT -> f (+# wild_Xp 1#) y + 1000000# -> y + +Not good! We build an (I# x) box every time around the loop. +SpecConstr (as described in the paper) does not specialise f, despite +the call (f ... (I# x)) because 'y' is not scrutinised in the body. +But it is much better to specialise f for the case where the argument +is of form (I# x); then we build the box only when returning y, which +is on the cold path. + +Another example: + + f x = ...(g x).... + +Here 'x' is not scrutinised in f's body; but if we did specialise 'f' +then the call (g x) might allow 'g' to be specialised in turn. + +So sc_keen controls whether or not we take account of whether argument is +scrutinised in the body. True <=> ignore that, and specialise whenever +the function is applied to a data constructor. +-} + +data ScEnv = SCE { sc_dflags :: DynFlags, + sc_module :: !Module, + sc_size :: Maybe Int, -- Size threshold + -- Nothing => no limit + + sc_count :: Maybe Int, -- Max # of specialisations for any one fn + -- Nothing => no limit + -- See Note [Avoiding exponential blowup] + + sc_recursive :: Int, -- Max # of specialisations over recursive type. + -- Stops ForceSpecConstr from diverging. + + sc_keen :: Bool, -- Specialise on arguments that are known + -- constructors, even if they are not + -- scrutinised in the body. See + -- Note [Making SpecConstr keener] + + sc_force :: Bool, -- Force specialisation? + -- See Note [Forcing specialisation] + + sc_subst :: Subst, -- Current substitution + -- Maps InIds to OutExprs + + sc_how_bound :: HowBoundEnv, + -- Binds interesting non-top-level variables + -- Domain is OutVars (*after* applying the substitution) + + sc_vals :: ValueEnv, + -- Domain is OutIds (*after* applying the substitution) + -- Used even for top-level bindings (but not imported ones) + -- The range of the ValueEnv is *work-free* values + -- such as (\x. blah), or (Just v) + -- but NOT (Just (expensive v)) + -- See Note [Work-free values only in environment] + + sc_annotations :: UniqFM SpecConstrAnnotation + } + +--------------------- +type HowBoundEnv = VarEnv HowBound -- Domain is OutVars + +--------------------- +type ValueEnv = IdEnv Value -- Domain is OutIds +data Value = ConVal AltCon [CoreArg] -- _Saturated_ constructors + -- The AltCon is never DEFAULT + | LambdaVal -- Inlinable lambdas or PAPs + +instance Outputable Value where + ppr (ConVal con args) = ppr con <+> interpp'SP args + ppr LambdaVal = text "<Lambda>" + +--------------------- +initScEnv :: DynFlags -> Module -> UniqFM SpecConstrAnnotation -> ScEnv +initScEnv dflags this_mod anns + = SCE { sc_dflags = dflags, + sc_module = this_mod, + sc_size = specConstrThreshold dflags, + sc_count = specConstrCount dflags, + sc_recursive = specConstrRecursive dflags, + sc_keen = gopt Opt_SpecConstrKeen dflags, + sc_force = False, + sc_subst = emptySubst, + sc_how_bound = emptyVarEnv, + sc_vals = emptyVarEnv, + sc_annotations = anns } + +data HowBound = RecFun -- These are the recursive functions for which + -- we seek interesting call patterns + + | RecArg -- These are those functions' arguments, or their sub-components; + -- we gather occurrence information for these + +instance Outputable HowBound where + ppr RecFun = text "RecFun" + ppr RecArg = text "RecArg" + +scForce :: ScEnv -> Bool -> ScEnv +scForce env b = env { sc_force = b } + +lookupHowBound :: ScEnv -> Id -> Maybe HowBound +lookupHowBound env id = lookupVarEnv (sc_how_bound env) id + +scSubstId :: ScEnv -> Id -> CoreExpr +scSubstId env v = lookupIdSubst (text "scSubstId") (sc_subst env) v + +scSubstTy :: ScEnv -> Type -> Type +scSubstTy env ty = substTy (sc_subst env) ty + +scSubstCo :: ScEnv -> Coercion -> Coercion +scSubstCo env co = substCo (sc_subst env) co + +zapScSubst :: ScEnv -> ScEnv +zapScSubst env = env { sc_subst = zapSubstEnv (sc_subst env) } + +extendScInScope :: ScEnv -> [Var] -> ScEnv + -- Bring the quantified variables into scope +extendScInScope env qvars = env { sc_subst = extendInScopeList (sc_subst env) qvars } + + -- Extend the substitution +extendScSubst :: ScEnv -> Var -> OutExpr -> ScEnv +extendScSubst env var expr = env { sc_subst = extendSubst (sc_subst env) var expr } + +extendScSubstList :: ScEnv -> [(Var,OutExpr)] -> ScEnv +extendScSubstList env prs = env { sc_subst = extendSubstList (sc_subst env) prs } + +extendHowBound :: ScEnv -> [Var] -> HowBound -> ScEnv +extendHowBound env bndrs how_bound + = env { sc_how_bound = extendVarEnvList (sc_how_bound env) + [(bndr,how_bound) | bndr <- bndrs] } + +extendBndrsWith :: HowBound -> ScEnv -> [Var] -> (ScEnv, [Var]) +extendBndrsWith how_bound env bndrs + = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndrs') + where + (subst', bndrs') = substBndrs (sc_subst env) bndrs + hb_env' = sc_how_bound env `extendVarEnvList` + [(bndr,how_bound) | bndr <- bndrs'] + +extendBndrWith :: HowBound -> ScEnv -> Var -> (ScEnv, Var) +extendBndrWith how_bound env bndr + = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndr') + where + (subst', bndr') = substBndr (sc_subst env) bndr + hb_env' = extendVarEnv (sc_how_bound env) bndr' how_bound + +extendRecBndrs :: ScEnv -> [Var] -> (ScEnv, [Var]) +extendRecBndrs env bndrs = (env { sc_subst = subst' }, bndrs') + where + (subst', bndrs') = substRecBndrs (sc_subst env) bndrs + +extendBndr :: ScEnv -> Var -> (ScEnv, Var) +extendBndr env bndr = (env { sc_subst = subst' }, bndr') + where + (subst', bndr') = substBndr (sc_subst env) bndr + +extendValEnv :: ScEnv -> Id -> Maybe Value -> ScEnv +extendValEnv env _ Nothing = env +extendValEnv env id (Just cv) + | valueIsWorkFree cv -- Don't duplicate work!! #7865 + = env { sc_vals = extendVarEnv (sc_vals env) id cv } +extendValEnv env _ _ = env + +extendCaseBndrs :: ScEnv -> OutExpr -> OutId -> AltCon -> [Var] -> (ScEnv, [Var]) +-- When we encounter +-- case scrut of b +-- C x y -> ... +-- we want to bind b, to (C x y) +-- NB1: Extends only the sc_vals part of the envt +-- NB2: Kill the dead-ness info on the pattern binders x,y, since +-- they are potentially made alive by the [b -> C x y] binding +extendCaseBndrs env scrut case_bndr con alt_bndrs + = (env2, alt_bndrs') + where + live_case_bndr = not (isDeadBinder case_bndr) + env1 | Var v <- stripTicksTopE (const True) scrut + = extendValEnv env v cval + | otherwise = env -- See Note [Add scrutinee to ValueEnv too] + env2 | live_case_bndr = extendValEnv env1 case_bndr cval + | otherwise = env1 + + alt_bndrs' | case scrut of { Var {} -> True; _ -> live_case_bndr } + = map zap alt_bndrs + | otherwise + = alt_bndrs + + cval = case con of + DEFAULT -> Nothing + LitAlt {} -> Just (ConVal con []) + DataAlt {} -> Just (ConVal con vanilla_args) + where + vanilla_args = map Type (tyConAppArgs (idType case_bndr)) ++ + varsToCoreExprs alt_bndrs + + zap v | isTyVar v = v -- See NB2 above + | otherwise = zapIdOccInfo v + + +decreaseSpecCount :: ScEnv -> Int -> ScEnv +-- See Note [Avoiding exponential blowup] +decreaseSpecCount env n_specs + = env { sc_force = False -- See Note [Forcing specialisation] + , sc_count = case sc_count env of + Nothing -> Nothing + Just n -> Just (n `div` (n_specs + 1)) } + -- The "+1" takes account of the original function; + -- See Note [Avoiding exponential blowup] + +--------------------------------------------------- +-- See Note [Forcing specialisation] +ignoreType :: ScEnv -> Type -> Bool +ignoreDataCon :: ScEnv -> DataCon -> Bool +forceSpecBndr :: ScEnv -> Var -> Bool + +ignoreDataCon env dc = ignoreTyCon env (dataConTyCon dc) + +ignoreType env ty + = case tyConAppTyCon_maybe ty of + Just tycon -> ignoreTyCon env tycon + _ -> False + +ignoreTyCon :: ScEnv -> TyCon -> Bool +ignoreTyCon env tycon + = lookupUFM (sc_annotations env) tycon == Just NoSpecConstr + +forceSpecBndr env var = forceSpecFunTy env . snd . splitForAllTys . varType $ var + +forceSpecFunTy :: ScEnv -> Type -> Bool +forceSpecFunTy env = any (forceSpecArgTy env) . fst . splitFunTys + +forceSpecArgTy :: ScEnv -> Type -> Bool +forceSpecArgTy env ty + | Just ty' <- coreView ty = forceSpecArgTy env ty' + +forceSpecArgTy env ty + | Just (tycon, tys) <- splitTyConApp_maybe ty + , tycon /= funTyCon + = tyConName tycon == specTyConName + || lookupUFM (sc_annotations env) tycon == Just ForceSpecConstr + || any (forceSpecArgTy env) tys + +forceSpecArgTy _ _ = False + +{- +Note [Add scrutinee to ValueEnv too] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this: + case x of y + (a,b) -> case b of c + I# v -> ...(f y)... +By the time we get to the call (f y), the ValueEnv +will have a binding for y, and for c + y -> (a,b) + c -> I# v +BUT that's not enough! Looking at the call (f y) we +see that y is pair (a,b), but we also need to know what 'b' is. +So in extendCaseBndrs we must *also* add the binding + b -> I# v +else we lose a useful specialisation for f. This is necessary even +though the simplifier has systematically replaced uses of 'x' with 'y' +and 'b' with 'c' in the code. The use of 'b' in the ValueEnv came +from outside the case. See #4908 for the live example. + +Note [Avoiding exponential blowup] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The sc_count field of the ScEnv says how many times we are prepared to +duplicate a single function. But we must take care with recursive +specialisations. Consider + + let $j1 = let $j2 = let $j3 = ... + in + ...$j3... + in + ...$j2... + in + ...$j1... + +If we specialise $j1 then in each specialisation (as well as the original) +we can specialise $j2, and similarly $j3. Even if we make just *one* +specialisation of each, because we also have the original we'll get 2^n +copies of $j3, which is not good. + +So when recursively specialising we divide the sc_count by the number of +copies we are making at this level, including the original. + + +************************************************************************ +* * +\subsection{Usage information: flows upwards} +* * +************************************************************************ +-} + +data ScUsage + = SCU { + scu_calls :: CallEnv, -- Calls + -- The functions are a subset of the + -- RecFuns in the ScEnv + + scu_occs :: !(IdEnv ArgOcc) -- Information on argument occurrences + } -- The domain is OutIds + +type CallEnv = IdEnv [Call] +data Call = Call Id [CoreArg] ValueEnv + -- The arguments of the call, together with the + -- env giving the constructor bindings at the call site + -- We keep the function mainly for debug output + +instance Outputable ScUsage where + ppr (SCU { scu_calls = calls, scu_occs = occs }) + = text "SCU" <+> braces (sep [ ptext (sLit "calls =") <+> ppr calls + , text "occs =" <+> ppr occs ]) + +instance Outputable Call where + ppr (Call fn args _) = ppr fn <+> fsep (map pprParendExpr args) + +nullUsage :: ScUsage +nullUsage = SCU { scu_calls = emptyVarEnv, scu_occs = emptyVarEnv } + +combineCalls :: CallEnv -> CallEnv -> CallEnv +combineCalls = plusVarEnv_C (++) + where +-- plus cs ds | length res > 1 +-- = pprTrace "combineCalls" (vcat [ text "cs:" <+> ppr cs +-- , text "ds:" <+> ppr ds]) +-- res +-- | otherwise = res +-- where +-- res = cs ++ ds + +combineUsage :: ScUsage -> ScUsage -> ScUsage +combineUsage u1 u2 = SCU { scu_calls = combineCalls (scu_calls u1) (scu_calls u2), + scu_occs = plusVarEnv_C combineOcc (scu_occs u1) (scu_occs u2) } + +combineUsages :: [ScUsage] -> ScUsage +combineUsages [] = nullUsage +combineUsages us = foldr1 combineUsage us + +lookupOccs :: ScUsage -> [OutVar] -> (ScUsage, [ArgOcc]) +lookupOccs (SCU { scu_calls = sc_calls, scu_occs = sc_occs }) bndrs + = (SCU {scu_calls = sc_calls, scu_occs = delVarEnvList sc_occs bndrs}, + [lookupVarEnv sc_occs b `orElse` NoOcc | b <- bndrs]) + +data ArgOcc = NoOcc -- Doesn't occur at all; or a type argument + | UnkOcc -- Used in some unknown way + + | ScrutOcc -- See Note [ScrutOcc] + (DataConEnv [ArgOcc]) -- How the sub-components are used + +type DataConEnv a = UniqFM a -- Keyed by DataCon + +{- Note [ScrutOcc] +~~~~~~~~~~~~~~~~~~~ +An occurrence of ScrutOcc indicates that the thing, or a `cast` version of the thing, +is *only* taken apart or applied. + + Functions, literal: ScrutOcc emptyUFM + Data constructors: ScrutOcc subs, + +where (subs :: UniqFM [ArgOcc]) gives usage of the *pattern-bound* components, +The domain of the UniqFM is the Unique of the data constructor + +The [ArgOcc] is the occurrences of the *pattern-bound* components +of the data structure. E.g. + data T a = forall b. MkT a b (b->a) +A pattern binds b, x::a, y::b, z::b->a, but not 'a'! + +-} + +instance Outputable ArgOcc where + ppr (ScrutOcc xs) = text "scrut-occ" <> ppr xs + ppr UnkOcc = text "unk-occ" + ppr NoOcc = text "no-occ" + +evalScrutOcc :: ArgOcc +evalScrutOcc = ScrutOcc emptyUFM + +-- Experimentally, this version of combineOcc makes ScrutOcc "win", so +-- that if the thing is scrutinised anywhere then we get to see that +-- in the overall result, even if it's also used in a boxed way +-- This might be too aggressive; see Note [Reboxing] Alternative 3 +combineOcc :: ArgOcc -> ArgOcc -> ArgOcc +combineOcc NoOcc occ = occ +combineOcc occ NoOcc = occ +combineOcc (ScrutOcc xs) (ScrutOcc ys) = ScrutOcc (plusUFM_C combineOccs xs ys) +combineOcc UnkOcc (ScrutOcc ys) = ScrutOcc ys +combineOcc (ScrutOcc xs) UnkOcc = ScrutOcc xs +combineOcc UnkOcc UnkOcc = UnkOcc + +combineOccs :: [ArgOcc] -> [ArgOcc] -> [ArgOcc] +combineOccs xs ys = zipWithEqual "combineOccs" combineOcc xs ys + +setScrutOcc :: ScEnv -> ScUsage -> OutExpr -> ArgOcc -> ScUsage +-- _Overwrite_ the occurrence info for the scrutinee, if the scrutinee +-- is a variable, and an interesting variable +setScrutOcc env usg (Cast e _) occ = setScrutOcc env usg e occ +setScrutOcc env usg (Tick _ e) occ = setScrutOcc env usg e occ +setScrutOcc env usg (Var v) occ + | Just RecArg <- lookupHowBound env v = usg { scu_occs = extendVarEnv (scu_occs usg) v occ } + | otherwise = usg +setScrutOcc _env usg _other _occ -- Catch-all + = usg + +{- +************************************************************************ +* * +\subsection{The main recursive function} +* * +************************************************************************ + +The main recursive function gathers up usage information, and +creates specialised versions of functions. +-} + +scExpr, scExpr' :: ScEnv -> CoreExpr -> UniqSM (ScUsage, CoreExpr) + -- The unique supply is needed when we invent + -- a new name for the specialised function and its args + +scExpr env e = scExpr' env e + +scExpr' env (Var v) = case scSubstId env v of + Var v' -> return (mkVarUsage env v' [], Var v') + e' -> scExpr (zapScSubst env) e' + +scExpr' env (Type t) = return (nullUsage, Type (scSubstTy env t)) +scExpr' env (Coercion c) = return (nullUsage, Coercion (scSubstCo env c)) +scExpr' _ e@(Lit {}) = return (nullUsage, e) +scExpr' env (Tick t e) = do (usg, e') <- scExpr env e + return (usg, Tick t e') +scExpr' env (Cast e co) = do (usg, e') <- scExpr env e + return (usg, mkCast e' (scSubstCo env co)) + -- Important to use mkCast here + -- See Note [SpecConstr call patterns] +scExpr' env e@(App _ _) = scApp env (collectArgs e) +scExpr' env (Lam b e) = do let (env', b') = extendBndr env b + (usg, e') <- scExpr env' e + return (usg, Lam b' e') + +scExpr' env (Case scrut b ty alts) + = do { (scrut_usg, scrut') <- scExpr env scrut + ; case isValue (sc_vals env) scrut' of + Just (ConVal con args) -> sc_con_app con args scrut' + _other -> sc_vanilla scrut_usg scrut' + } + where + sc_con_app con args scrut' -- Known constructor; simplify + = do { let (_, bs, rhs) = findAlt con alts + `orElse` (DEFAULT, [], mkImpossibleExpr ty) + alt_env' = extendScSubstList env ((b,scrut') : bs `zip` trimConArgs con args) + ; scExpr alt_env' rhs } + + sc_vanilla scrut_usg scrut' -- Normal case + = do { let (alt_env,b') = extendBndrWith RecArg env b + -- Record RecArg for the components + + ; (alt_usgs, alt_occs, alts') + <- mapAndUnzip3M (sc_alt alt_env scrut' b') alts + + ; let scrut_occ = foldr combineOcc NoOcc alt_occs + scrut_usg' = setScrutOcc env scrut_usg scrut' scrut_occ + -- The combined usage of the scrutinee is given + -- by scrut_occ, which is passed to scScrut, which + -- in turn treats a bare-variable scrutinee specially + + ; return (foldr combineUsage scrut_usg' alt_usgs, + Case scrut' b' (scSubstTy env ty) alts') } + + sc_alt env scrut' b' (con,bs,rhs) + = do { let (env1, bs1) = extendBndrsWith RecArg env bs + (env2, bs2) = extendCaseBndrs env1 scrut' b' con bs1 + ; (usg, rhs') <- scExpr env2 rhs + ; let (usg', b_occ:arg_occs) = lookupOccs usg (b':bs2) + scrut_occ = case con of + DataAlt dc -> ScrutOcc (unitUFM dc arg_occs) + _ -> ScrutOcc emptyUFM + ; return (usg', b_occ `combineOcc` scrut_occ, (con, bs2, rhs')) } + +scExpr' env (Let (NonRec bndr rhs) body) + | isTyVar bndr -- Type-lets may be created by doBeta + = scExpr' (extendScSubst env bndr rhs) body + + | otherwise + = do { let (body_env, bndr') = extendBndr env bndr + ; rhs_info <- scRecRhs env (bndr',rhs) + + ; let body_env2 = extendHowBound body_env [bndr'] RecFun + -- Note [Local let bindings] + rhs' = ri_new_rhs rhs_info + body_env3 = extendValEnv body_env2 bndr' (isValue (sc_vals env) rhs') + + ; (body_usg, body') <- scExpr body_env3 body + + -- NB: For non-recursive bindings we inherit sc_force flag from + -- the parent function (see Note [Forcing specialisation]) + ; (spec_usg, specs) <- specNonRec env body_usg rhs_info + + ; return (body_usg { scu_calls = scu_calls body_usg `delVarEnv` bndr' } + `combineUsage` spec_usg, -- Note [spec_usg includes rhs_usg] + mkLets [NonRec b r | (b,r) <- ruleInfoBinds rhs_info specs] body') + } + + +-- A *local* recursive group: see Note [Local recursive groups] +scExpr' env (Let (Rec prs) body) + = do { let (bndrs,rhss) = unzip prs + (rhs_env1,bndrs') = extendRecBndrs env bndrs + rhs_env2 = extendHowBound rhs_env1 bndrs' RecFun + force_spec = any (forceSpecBndr env) bndrs' + -- Note [Forcing specialisation] + + ; rhs_infos <- mapM (scRecRhs rhs_env2) (bndrs' `zip` rhss) + ; (body_usg, body') <- scExpr rhs_env2 body + + -- NB: start specLoop from body_usg + ; (spec_usg, specs) <- specRec NotTopLevel (scForce rhs_env2 force_spec) + body_usg rhs_infos + -- Do not unconditionally generate specialisations from rhs_usgs + -- Instead use them only if we find an unspecialised call + -- See Note [Local recursive groups] + + ; let all_usg = spec_usg `combineUsage` body_usg -- Note [spec_usg includes rhs_usg] + bind' = Rec (concat (zipWith ruleInfoBinds rhs_infos specs)) + + ; return (all_usg { scu_calls = scu_calls all_usg `delVarEnvList` bndrs' }, + Let bind' body') } + +{- +Note [Local let bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~ +It is not uncommon to find this + + let $j = \x. <blah> in ...$j True...$j True... + +Here $j is an arbitrary let-bound function, but it often comes up for +join points. We might like to specialise $j for its call patterns. +Notice the difference from a letrec, where we look for call patterns +in the *RHS* of the function. Here we look for call patterns in the +*body* of the let. + +At one point I predicated this on the RHS mentioning the outer +recursive function, but that's not essential and might even be +harmful. I'm not sure. +-} + +scApp :: ScEnv -> (InExpr, [InExpr]) -> UniqSM (ScUsage, CoreExpr) + +scApp env (Var fn, args) -- Function is a variable + = ASSERT( not (null args) ) + do { args_w_usgs <- mapM (scExpr env) args + ; let (arg_usgs, args') = unzip args_w_usgs + arg_usg = combineUsages arg_usgs + ; case scSubstId env fn of + fn'@(Lam {}) -> scExpr (zapScSubst env) (doBeta fn' args') + -- Do beta-reduction and try again + + Var fn' -> return (arg_usg `combineUsage` mkVarUsage env fn' args', + mkApps (Var fn') args') + + other_fn' -> return (arg_usg, mkApps other_fn' args') } + -- NB: doing this ignores any usage info from the substituted + -- function, but I don't think that matters. If it does + -- we can fix it. + where + doBeta :: OutExpr -> [OutExpr] -> OutExpr + -- ToDo: adjust for System IF + doBeta (Lam bndr body) (arg : args) = Let (NonRec bndr arg) (doBeta body args) + doBeta fn args = mkApps fn args + +-- The function is almost always a variable, but not always. +-- In particular, if this pass follows float-in, +-- which it may, we can get +-- (let f = ...f... in f) arg1 arg2 +scApp env (other_fn, args) + = do { (fn_usg, fn') <- scExpr env other_fn + ; (arg_usgs, args') <- mapAndUnzipM (scExpr env) args + ; return (combineUsages arg_usgs `combineUsage` fn_usg, mkApps fn' args') } + +---------------------- +mkVarUsage :: ScEnv -> Id -> [CoreExpr] -> ScUsage +mkVarUsage env fn args + = case lookupHowBound env fn of + Just RecFun -> SCU { scu_calls = unitVarEnv fn [Call fn args (sc_vals env)] + , scu_occs = emptyVarEnv } + Just RecArg -> SCU { scu_calls = emptyVarEnv + , scu_occs = unitVarEnv fn arg_occ } + Nothing -> nullUsage + where + -- I rather think we could use UnkOcc all the time + arg_occ | null args = UnkOcc + | otherwise = evalScrutOcc + +---------------------- +scTopBindEnv :: ScEnv -> CoreBind -> UniqSM (ScEnv, CoreBind) +scTopBindEnv env (Rec prs) + = do { let (rhs_env1,bndrs') = extendRecBndrs env bndrs + rhs_env2 = extendHowBound rhs_env1 bndrs RecFun + + prs' = zip bndrs' rhss + ; return (rhs_env2, Rec prs') } + where + (bndrs,rhss) = unzip prs + +scTopBindEnv env (NonRec bndr rhs) + = do { let (env1, bndr') = extendBndr env bndr + env2 = extendValEnv env1 bndr' (isValue (sc_vals env) rhs) + ; return (env2, NonRec bndr' rhs) } + +---------------------- +scTopBind :: ScEnv -> ScUsage -> CoreBind -> UniqSM (ScUsage, CoreBind) + +{- +scTopBind _ usage _ + | pprTrace "scTopBind_usage" (ppr (scu_calls usage)) False + = error "false" +-} + +scTopBind env body_usage (Rec prs) + | Just threshold <- sc_size env + , not force_spec + , not (all (couldBeSmallEnoughToInline (sc_dflags env) threshold) rhss) + -- No specialisation + = -- pprTrace "scTopBind: nospec" (ppr bndrs) $ + do { (rhs_usgs, rhss') <- mapAndUnzipM (scExpr env) rhss + ; return (body_usage `combineUsage` combineUsages rhs_usgs, Rec (bndrs `zip` rhss')) } + + | otherwise -- Do specialisation + = do { rhs_infos <- mapM (scRecRhs env) prs + + ; (spec_usage, specs) <- specRec TopLevel (scForce env force_spec) + body_usage rhs_infos + + ; return (body_usage `combineUsage` spec_usage, + Rec (concat (zipWith ruleInfoBinds rhs_infos specs))) } + where + (bndrs,rhss) = unzip prs + force_spec = any (forceSpecBndr env) bndrs + -- Note [Forcing specialisation] + +scTopBind env usage (NonRec bndr rhs) -- Oddly, we don't seem to specialise top-level non-rec functions + = do { (rhs_usg', rhs') <- scExpr env rhs + ; return (usage `combineUsage` rhs_usg', NonRec bndr rhs') } + +---------------------- +scRecRhs :: ScEnv -> (OutId, InExpr) -> UniqSM RhsInfo +scRecRhs env (bndr,rhs) + = do { let (arg_bndrs,body) = collectBinders rhs + (body_env, arg_bndrs') = extendBndrsWith RecArg env arg_bndrs + ; (body_usg, body') <- scExpr body_env body + ; let (rhs_usg, arg_occs) = lookupOccs body_usg arg_bndrs' + ; return (RI { ri_rhs_usg = rhs_usg + , ri_fn = bndr, ri_new_rhs = mkLams arg_bndrs' body' + , ri_lam_bndrs = arg_bndrs, ri_lam_body = body + , ri_arg_occs = arg_occs }) } + -- The arg_occs says how the visible, + -- lambda-bound binders of the RHS are used + -- (including the TyVar binders) + -- Two pats are the same if they match both ways + +---------------------- +ruleInfoBinds :: RhsInfo -> SpecInfo -> [(Id,CoreExpr)] +ruleInfoBinds (RI { ri_fn = fn, ri_new_rhs = new_rhs }) + (SI { si_specs = specs }) + = [(id,rhs) | OS { os_id = id, os_rhs = rhs } <- specs] ++ + -- First the specialised bindings + + [(fn `addIdSpecialisations` rules, new_rhs)] + -- And now the original binding + where + rules = [r | OS { os_rule = r } <- specs] + +{- +************************************************************************ +* * + The specialiser itself +* * +************************************************************************ +-} + +data RhsInfo + = RI { ri_fn :: OutId -- The binder + , ri_new_rhs :: OutExpr -- The specialised RHS (in current envt) + , ri_rhs_usg :: ScUsage -- Usage info from specialising RHS + + , ri_lam_bndrs :: [InVar] -- The *original* RHS (\xs.body) + , ri_lam_body :: InExpr -- Note [Specialise original body] + , ri_arg_occs :: [ArgOcc] -- Info on how the xs occur in body + } + +data SpecInfo -- Info about specialisations for a particular Id + = SI { si_specs :: [OneSpec] -- The specialisations we have generated + + , si_n_specs :: Int -- Length of si_specs; used for numbering them + + , si_mb_unspec :: Maybe ScUsage -- Just cs => we have not yet used calls in the + } -- from calls in the *original* RHS as + -- seeds for new specialisations; + -- if you decide to do so, here is the + -- RHS usage (which has not yet been + -- unleashed) + -- Nothing => we have + -- See Note [Local recursive groups] + -- See Note [spec_usg includes rhs_usg] + + -- One specialisation: Rule plus definition +data OneSpec = + OS { os_pat :: CallPat -- Call pattern that generated this specialisation + , os_rule :: CoreRule -- Rule connecting original id with the specialisation + , os_id :: OutId -- Spec id + , os_rhs :: OutExpr } -- Spec rhs + +noSpecInfo :: SpecInfo +noSpecInfo = SI { si_specs = [], si_n_specs = 0, si_mb_unspec = Nothing } + +---------------------- +specNonRec :: ScEnv + -> ScUsage -- Body usage + -> RhsInfo -- Structure info usage info for un-specialised RHS + -> UniqSM (ScUsage, SpecInfo) -- Usage from RHSs (specialised and not) + -- plus details of specialisations + +specNonRec env body_usg rhs_info + = specialise env (scu_calls body_usg) rhs_info + (noSpecInfo { si_mb_unspec = Just (ri_rhs_usg rhs_info) }) + +---------------------- +specRec :: TopLevelFlag -> ScEnv + -> ScUsage -- Body usage + -> [RhsInfo] -- Structure info and usage info for un-specialised RHSs + -> UniqSM (ScUsage, [SpecInfo]) -- Usage from all RHSs (specialised and not) + -- plus details of specialisations + +specRec top_lvl env body_usg rhs_infos + = go 1 seed_calls nullUsage init_spec_infos + where + (seed_calls, init_spec_infos) -- Note [Seeding top-level recursive groups] + | isTopLevel top_lvl + , any (isExportedId . ri_fn) rhs_infos -- Seed from body and RHSs + = (all_calls, [noSpecInfo | _ <- rhs_infos]) + | otherwise -- Seed from body only + = (calls_in_body, [noSpecInfo { si_mb_unspec = Just (ri_rhs_usg ri) } + | ri <- rhs_infos]) + + calls_in_body = scu_calls body_usg + calls_in_rhss = foldr (combineCalls . scu_calls . ri_rhs_usg) emptyVarEnv rhs_infos + all_calls = calls_in_rhss `combineCalls` calls_in_body + + -- Loop, specialising, until you get no new specialisations + go :: Int -- Which iteration of the "until no new specialisations" + -- loop we are on; first iteration is 1 + -> CallEnv -- Seed calls + -- Two accumulating parameters: + -> ScUsage -- Usage from earlier specialisations + -> [SpecInfo] -- Details of specialisations so far + -> UniqSM (ScUsage, [SpecInfo]) + go n_iter seed_calls usg_so_far spec_infos + | isEmptyVarEnv seed_calls + = -- pprTrace "specRec1" (vcat [ ppr (map ri_fn rhs_infos) + -- , ppr seed_calls + -- , ppr body_usg ]) $ + return (usg_so_far, spec_infos) + + -- Limit recursive specialisation + -- See Note [Limit recursive specialisation] + | n_iter > sc_recursive env -- Too many iterations of the 'go' loop + , sc_force env || isNothing (sc_count env) + -- If both of these are false, the sc_count + -- threshold will prevent non-termination + , any ((> the_limit) . si_n_specs) spec_infos + = -- pprTrace "specRec2" (ppr (map (map os_pat . si_specs) spec_infos)) $ + return (usg_so_far, spec_infos) + + | otherwise + = -- pprTrace "specRec3" (vcat [ text "bndrs" <+> ppr (map ri_fn rhs_infos) + -- , text "iteration" <+> int n_iter + -- , text "spec_infos" <+> ppr (map (map os_pat . si_specs) spec_infos) + -- ]) $ + do { specs_w_usg <- zipWithM (specialise env seed_calls) rhs_infos spec_infos + ; let (extra_usg_s, new_spec_infos) = unzip specs_w_usg + extra_usg = combineUsages extra_usg_s + all_usg = usg_so_far `combineUsage` extra_usg + ; go (n_iter + 1) (scu_calls extra_usg) all_usg new_spec_infos } + + -- See Note [Limit recursive specialisation] + the_limit = case sc_count env of + Nothing -> 10 -- Ugh! + Just max -> max + + +---------------------- +specialise + :: ScEnv + -> CallEnv -- Info on newly-discovered calls to this function + -> RhsInfo + -> SpecInfo -- Original RHS plus patterns dealt with + -> UniqSM (ScUsage, SpecInfo) -- New specialised versions and their usage + +-- See Note [spec_usg includes rhs_usg] + +-- Note: this only generates *specialised* bindings +-- The original binding is added by ruleInfoBinds +-- +-- Note: the rhs here is the optimised version of the original rhs +-- So when we make a specialised copy of the RHS, we're starting +-- from an RHS whose nested functions have been optimised already. + +specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs + , ri_lam_body = body, ri_arg_occs = arg_occs }) + spec_info@(SI { si_specs = specs, si_n_specs = spec_count + , si_mb_unspec = mb_unspec }) + | isBottomingId fn -- Note [Do not specialise diverging functions] + -- and do not generate specialisation seeds from its RHS + = -- pprTrace "specialise bot" (ppr fn) $ + return (nullUsage, spec_info) + + | isNeverActive (idInlineActivation fn) -- See Note [Transfer activation] + || null arg_bndrs -- Only specialise functions + = -- pprTrace "specialise inactive" (ppr fn) $ + case mb_unspec of -- Behave as if there was a single, boring call + Just rhs_usg -> return (rhs_usg, spec_info { si_mb_unspec = Nothing }) + -- See Note [spec_usg includes rhs_usg] + Nothing -> return (nullUsage, spec_info) + + | Just all_calls <- lookupVarEnv bind_calls fn + = -- pprTrace "specialise entry {" (ppr fn <+> ppr all_calls) $ + do { (boring_call, new_pats) <- callsToNewPats env fn spec_info arg_occs all_calls + + ; let n_pats = length new_pats +-- ; if (not (null new_pats) || isJust mb_unspec) then +-- pprTrace "specialise" (vcat [ ppr fn <+> text "with" <+> int n_pats <+> text "good patterns" +-- , text "mb_unspec" <+> ppr (isJust mb_unspec) +-- , text "arg_occs" <+> ppr arg_occs +-- , text "good pats" <+> ppr new_pats]) $ +-- return () +-- else return () + + ; let spec_env = decreaseSpecCount env n_pats + ; (spec_usgs, new_specs) <- mapAndUnzipM (spec_one spec_env fn arg_bndrs body) + (new_pats `zip` [spec_count..]) + -- See Note [Specialise original body] + + ; let spec_usg = combineUsages spec_usgs + + -- If there were any boring calls among the seeds (= all_calls), then those + -- calls will call the un-specialised function. So we should use the seeds + -- from the _unspecialised_ function's RHS, which are in mb_unspec, by returning + -- then in new_usg. + (new_usg, mb_unspec') + = case mb_unspec of + Just rhs_usg | boring_call -> (spec_usg `combineUsage` rhs_usg, Nothing) + _ -> (spec_usg, mb_unspec) + +-- ; pprTrace "specialise return }" +-- (vcat [ ppr fn +-- , text "boring_call:" <+> ppr boring_call +-- , text "new calls:" <+> ppr (scu_calls new_usg)]) $ +-- return () + + ; return (new_usg, SI { si_specs = new_specs ++ specs + , si_n_specs = spec_count + n_pats + , si_mb_unspec = mb_unspec' }) } + + | otherwise -- No new seeds, so return nullUsage + = return (nullUsage, spec_info) + + + + +--------------------- +spec_one :: ScEnv + -> OutId -- Function + -> [InVar] -- Lambda-binders of RHS; should match patterns + -> InExpr -- Body of the original function + -> (CallPat, Int) + -> UniqSM (ScUsage, OneSpec) -- Rule and binding + +-- spec_one creates a specialised copy of the function, together +-- with a rule for using it. I'm very proud of how short this +-- function is, considering what it does :-). + +{- + Example + + In-scope: a, x::a + f = /\b \y::[(a,b)] -> ....f (b,c) ((:) (a,(b,c)) (x,v) (h w))... + [c::*, v::(b,c) are presumably bound by the (...) part] + ==> + f_spec = /\ b c \ v::(b,c) hw::[(a,(b,c))] -> + (...entire body of f...) [b -> (b,c), + y -> ((:) (a,(b,c)) (x,v) hw)] + + RULE: forall b::* c::*, -- Note, *not* forall a, x + v::(b,c), + hw::[(a,(b,c))] . + + f (b,c) ((:) (a,(b,c)) (x,v) hw) = f_spec b c v hw +-} + +spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number) + = do { spec_uniq <- getUniqueM + ; let spec_env = extendScSubstList (extendScInScope env qvars) + (arg_bndrs `zip` pats) + fn_name = idName fn + fn_loc = nameSrcSpan fn_name + fn_occ = nameOccName fn_name + spec_occ = mkSpecOcc fn_occ + -- We use fn_occ rather than fn in the rule_name string + -- as we don't want the uniq to end up in the rule, and + -- hence in the ABI, as that can cause spurious ABI + -- changes (#4012). + rule_name = mkFastString ("SC:" ++ occNameString fn_occ ++ show rule_number) + spec_name = mkInternalName spec_uniq spec_occ fn_loc +-- ; pprTrace "{spec_one" (ppr (sc_count env) <+> ppr fn +-- <+> ppr pats <+> text "-->" <+> ppr spec_name) $ +-- return () + + -- Specialise the body + ; (spec_usg, spec_body) <- scExpr spec_env body + +-- ; pprTrace "done spec_one}" (ppr fn) $ +-- return () + + -- And build the results + ; let (spec_lam_args, spec_call_args) = mkWorkerArgs (sc_dflags env) + qvars body_ty + -- Usual w/w hack to avoid generating + -- a spec_rhs of unlifted type and no args + + spec_lam_args_str = handOutStrictnessInformation (fst (splitStrictSig spec_str)) spec_lam_args + -- Annotate the variables with the strictness information from + -- the function (see Note [Strictness information in worker binders]) + + spec_join_arity | isJoinId fn = Just (length spec_lam_args) + | otherwise = Nothing + spec_id = mkLocalId spec_name + (mkLamTypes spec_lam_args body_ty) + -- See Note [Transfer strictness] + `setIdStrictness` spec_str + `setIdCprInfo` topCprSig + `setIdArity` count isId spec_lam_args + `asJoinId_maybe` spec_join_arity + spec_str = calcSpecStrictness fn spec_lam_args pats + + + -- Conditionally use result of new worker-wrapper transform + spec_rhs = mkLams spec_lam_args_str spec_body + body_ty = exprType spec_body + rule_rhs = mkVarApps (Var spec_id) spec_call_args + inline_act = idInlineActivation fn + this_mod = sc_module spec_env + rule = mkRule this_mod True {- Auto -} True {- Local -} + rule_name inline_act fn_name qvars pats rule_rhs + -- See Note [Transfer activation] + ; return (spec_usg, OS { os_pat = call_pat, os_rule = rule + , os_id = spec_id + , os_rhs = spec_rhs }) } + + +-- See Note [Strictness information in worker binders] +handOutStrictnessInformation :: [Demand] -> [Var] -> [Var] +handOutStrictnessInformation = go + where + go _ [] = [] + go [] vs = vs + go (d:dmds) (v:vs) | isId v = setIdDemandInfo v d : go dmds vs + go dmds (v:vs) = v : go dmds vs + +calcSpecStrictness :: Id -- The original function + -> [Var] -> [CoreExpr] -- Call pattern + -> StrictSig -- Strictness of specialised thing +-- See Note [Transfer strictness] +calcSpecStrictness fn qvars pats + = mkClosedStrictSig spec_dmds topDiv + where + spec_dmds = [ lookupVarEnv dmd_env qv `orElse` topDmd | qv <- qvars, isId qv ] + StrictSig (DmdType _ dmds _) = idStrictness fn + + dmd_env = go emptyVarEnv dmds pats + + go :: DmdEnv -> [Demand] -> [CoreExpr] -> DmdEnv + go env ds (Type {} : pats) = go env ds pats + go env ds (Coercion {} : pats) = go env ds pats + go env (d:ds) (pat : pats) = go (go_one env d pat) ds pats + go env _ _ = env + + go_one :: DmdEnv -> Demand -> CoreExpr -> DmdEnv + go_one env d (Var v) = extendVarEnv_C bothDmd env v d + go_one env d e + | Just ds <- splitProdDmd_maybe d -- NB: d does not have to be strict + , (Var _, args) <- collectArgs e = go env ds args + go_one env _ _ = env + +{- +Note [spec_usg includes rhs_usg] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In calls to 'specialise', the returned ScUsage must include the rhs_usg in +the passed-in SpecInfo, unless there are no calls at all to the function. + +The caller can, indeed must, assume this. He should not combine in rhs_usg +himself, or he'll get rhs_usg twice -- and that can lead to an exponential +blowup of duplicates in the CallEnv. This is what gave rise to the massive +performance loss in #8852. + +Note [Specialise original body] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The RhsInfo for a binding keeps the *original* body of the binding. We +must specialise that, *not* the result of applying specExpr to the RHS +(which is also kept in RhsInfo). Otherwise we end up specialising a +specialised RHS, and that can lead directly to exponential behaviour. + +Note [Transfer activation] +~~~~~~~~~~~~~~~~~~~~~~~~~~ + This note is for SpecConstr, but exactly the same thing + happens in the overloading specialiser; see + Note [Auto-specialisation and RULES] in GHC.Core.Op.Specialise. + +In which phase should the specialise-constructor rules be active? +Originally I made them always-active, but Manuel found that this +defeated some clever user-written rules. Then I made them active only +in Phase 0; after all, currently, the specConstr transformation is +only run after the simplifier has reached Phase 0, but that meant +that specialisations didn't fire inside wrappers; see test +simplCore/should_compile/spec-inline. + +So now I just use the inline-activation of the parent Id, as the +activation for the specialisation RULE, just like the main specialiser; + +This in turn means there is no point in specialising NOINLINE things, +so we test for that. + +Note [Transfer strictness] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +We must transfer strictness information from the original function to +the specialised one. Suppose, for example + + f has strictness SS + and a RULE f (a:as) b = f_spec a as b + +Now we want f_spec to have strictness LLS, otherwise we'll use call-by-need +when calling f_spec instead of call-by-value. And that can result in +unbounded worsening in space (cf the classic foldl vs foldl') + +See #3437 for a good example. + +The function calcSpecStrictness performs the calculation. + +Note [Strictness information in worker binders] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +After having calculated the strictness annotation for the worker (see Note +[Transfer strictness] above), we also want to have this information attached to +the worker’s arguments, for the benefit of later passes. The function +handOutStrictnessInformation decomposes the strictness annotation calculated by +calcSpecStrictness and attaches them to the variables. + +************************************************************************ +* * +\subsection{Argument analysis} +* * +************************************************************************ + +This code deals with analysing call-site arguments to see whether +they are constructor applications. + +Note [Free type variables of the qvar types] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In a call (f @a x True), that we want to specialise, what variables should +we quantify over. Clearly over 'a' and 'x', but what about any type variables +free in x's type? In fact we don't need to worry about them because (f @a) +can only be a well-typed application if its type is compatible with x, so any +variables free in x's type must be free in (f @a), and hence either be gathered +via 'a' itself, or be in scope at f's defn. Hence we just take + (exprsFreeVars pats). + +BUT phantom type synonyms can mess this reasoning up, + eg x::T b with type T b = Int +So we apply expandTypeSynonyms to the bound Ids. +See # 5458. Yuk. + +Note [SpecConstr call patterns] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A "call patterns" that we collect is going to become the LHS of a RULE. +It's important that it doesn't have + e |> Refl +or + e |> g1 |> g2 +because both of these will be optimised by Simplify.simplRule. In the +former case such optimisation benign, because the rule will match more +terms; but in the latter we may lose a binding of 'g1' or 'g2', and +end up with a rule LHS that doesn't bind the template variables +(#10602). + +The simplifier eliminates such things, but SpecConstr itself constructs +new terms by substituting. So the 'mkCast' in the Cast case of scExpr +is very important! + +Note [Choosing patterns] +~~~~~~~~~~~~~~~~~~~~~~~~ +If we get lots of patterns we may not want to make a specialisation +for each of them (code bloat), so we choose as follows, implemented +by trim_pats. + +* The flag -fspec-constr-count-N sets the sc_count field + of the ScEnv to (Just n). This limits the total number + of specialisations for a given function to N. + +* -fno-spec-constr-count sets the sc_count field to Nothing, + which switches of the limit. + +* The ghastly ForceSpecConstr trick also switches of the limit + for a particular function + +* Otherwise we sort the patterns to choose the most general + ones first; more general => more widely applicable. + +Note [SpecConstr and casts] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider (#14270) a call like + + let f = e + in ... f (K @(a |> co)) ... + +where 'co' is a coercion variable not in scope at f's definition site. +If we aren't caereful we'll get + + let $sf a co = e (K @(a |> co)) + RULE "SC:f" forall a co. f (K @(a |> co)) = $sf a co + f = e + in ... + +But alas, when we match the call we won't bind 'co', because type-matching +(for good reasons) discards casts). + +I don't know how to solve this, so for now I'm just discarding any +call patterns that + * Mentions a coercion variable in a type argument + * That is not in scope at the binding of the function + +I think this is very rare. + +It is important (e.g. #14936) that this /only/ applies to +coercions mentioned in casts. We don't want to be discombobulated +by casts in terms! For example, consider + f ((e1,e2) |> sym co) +where, say, + f :: Foo -> blah + co :: Foo ~R (Int,Int) + +Here we definitely do want to specialise for that pair! We do not +match on the structure of the coercion; instead we just match on a +coercion variable, so the RULE looks like + + forall (x::Int, y::Int, co :: (Int,Int) ~R Foo) + f ((x,y) |> co) = $sf x y co + +Often the body of f looks like + f arg = ...(case arg |> co' of + (x,y) -> blah)... + +so that the specialised f will turn into + $sf x y co = let arg = (x,y) |> co + in ...(case arg>| co' of + (x,y) -> blah).... + +which will simplify to not use 'co' at all. But we can't guarantee +that co will end up unused, so we still pass it. Absence analysis +may remove it later. + +Note that this /also/ discards the call pattern if we have a cast in a +/term/, although in fact Rules.match does make a very flaky and +fragile attempt to match coercions. e.g. a call like + f (Maybe Age) (Nothing |> co) blah + where co :: Maybe Int ~ Maybe Age +will be discarded. It's extremely fragile to match on the form of a +coercion, so I think it's better just not to try. A more complicated +alternative would be to discard calls that mention coercion variables +only in kind-casts, but I'm doing the simple thing for now. +-} + +type CallPat = ([Var], [CoreExpr]) -- Quantified variables and arguments + -- See Note [SpecConstr call patterns] + +callsToNewPats :: ScEnv -> Id + -> SpecInfo + -> [ArgOcc] -> [Call] + -> UniqSM (Bool, [CallPat]) + -- Result has no duplicate patterns, + -- nor ones mentioned in done_pats + -- Bool indicates that there was at least one boring pattern +callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls + = do { mb_pats <- mapM (callToPats env bndr_occs) calls + + ; let have_boring_call = any isNothing mb_pats + + good_pats :: [CallPat] + good_pats = catMaybes mb_pats + + -- Remove patterns we have already done + new_pats = filterOut is_done good_pats + is_done p = any (samePat p . os_pat) done_specs + + -- Remove duplicates + non_dups = nubBy samePat new_pats + + -- Remove ones that have too many worker variables + small_pats = filterOut too_big non_dups + too_big (vars,_) = not (isWorkerSmallEnough (sc_dflags env) vars) + -- We are about to construct w/w pair in 'spec_one'. + -- Omit specialisation leading to high arity workers. + -- See Note [Limit w/w arity] in GHC.Core.Op.WorkWrap.Lib + + -- Discard specialisations if there are too many of them + trimmed_pats = trim_pats env fn spec_info small_pats + +-- ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls +-- , text "done_specs:" <+> ppr (map os_pat done_specs) +-- , text "good_pats:" <+> ppr good_pats ]) $ +-- return () + + ; return (have_boring_call, trimmed_pats) } + + +trim_pats :: ScEnv -> Id -> SpecInfo -> [CallPat] -> [CallPat] +-- See Note [Choosing patterns] +trim_pats env fn (SI { si_n_specs = done_spec_count }) pats + | sc_force env + || isNothing mb_scc + || n_remaining >= n_pats + = -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats) + pats -- No need to trim + + | otherwise + = emit_trace $ -- Need to trim, so keep the best ones + take n_remaining sorted_pats + + where + n_pats = length pats + spec_count' = n_pats + done_spec_count + n_remaining = max_specs - done_spec_count + mb_scc = sc_count env + Just max_specs = mb_scc + + sorted_pats = map fst $ + sortBy (comparing snd) $ + [(pat, pat_cons pat) | pat <- pats] + -- Sort in order of increasing number of constructors + -- (i.e. decreasing generality) and pick the initial + -- segment of this list + + pat_cons :: CallPat -> Int + -- How many data constructors of literals are in + -- the pattern. More data-cons => less general + pat_cons (qs, ps) = foldr ((+) . n_cons) 0 ps + where + q_set = mkVarSet qs + n_cons (Var v) | v `elemVarSet` q_set = 0 + | otherwise = 1 + n_cons (Cast e _) = n_cons e + n_cons (App e1 e2) = n_cons e1 + n_cons e2 + n_cons (Lit {}) = 1 + n_cons _ = 0 + + emit_trace result + | debugIsOn || hasPprDebug (sc_dflags env) + -- Suppress this scary message for ordinary users! #5125 + = pprTrace "SpecConstr" msg result + | otherwise + = result + msg = vcat [ sep [ text "Function" <+> quotes (ppr fn) + , nest 2 (text "has" <+> + speakNOf spec_count' (text "call pattern") <> comma <+> + text "but the limit is" <+> int max_specs) ] + , text "Use -fspec-constr-count=n to set the bound" + , text "done_spec_count =" <+> int done_spec_count + , text "Keeping " <+> int n_remaining <> text ", out of" <+> int n_pats + , text "Discarding:" <+> ppr (drop n_remaining sorted_pats) ] + + +callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat) + -- The [Var] is the variables to quantify over in the rule + -- Type variables come first, since they may scope + -- over the following term variables + -- The [CoreExpr] are the argument patterns for the rule +callToPats env bndr_occs call@(Call _ args con_env) + | args `ltLength` bndr_occs -- Check saturated + = return Nothing + | otherwise + = do { let in_scope = substInScope (sc_subst env) + ; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs + ; let pat_fvs = exprsFreeVarsList pats + -- To get determinism we need the list of free variables in + -- deterministic order. Otherwise we end up creating + -- lambdas with different argument orders. See + -- determinism/simplCore/should_compile/spec-inline-determ.hs + -- for an example. For explanation of determinism + -- considerations See Note [Unique Determinism] in Unique. + + in_scope_vars = getInScopeVars in_scope + is_in_scope v = v `elemVarSet` in_scope_vars + qvars = filterOut is_in_scope pat_fvs + -- Quantify over variables that are not in scope + -- at the call site + -- See Note [Free type variables of the qvar types] + -- See Note [Shadowing] at the top + + (ktvs, ids) = partition isTyVar qvars + qvars' = scopedSort ktvs ++ map sanitise ids + -- Order into kind variables, type variables, term variables + -- The kind of a type variable may mention a kind variable + -- and the type of a term variable may mention a type variable + + sanitise id = id `setIdType` expandTypeSynonyms (idType id) + -- See Note [Free type variables of the qvar types] + + -- Bad coercion variables: see Note [SpecConstr and casts] + bad_covars :: CoVarSet + bad_covars = mapUnionVarSet get_bad_covars pats + get_bad_covars :: CoreArg -> CoVarSet + get_bad_covars (Type ty) + = filterVarSet (\v -> isId v && not (is_in_scope v)) $ + tyCoVarsOfType ty + get_bad_covars _ + = emptyVarSet + + ; -- pprTrace "callToPats" (ppr args $$ ppr bndr_occs) $ + WARN( not (isEmptyVarSet bad_covars) + , text "SpecConstr: bad covars:" <+> ppr bad_covars + $$ ppr call ) + if interesting && isEmptyVarSet bad_covars + then return (Just (qvars', pats)) + else return Nothing } + + -- argToPat takes an actual argument, and returns an abstracted + -- version, consisting of just the "constructor skeleton" of the + -- argument, with non-constructor sub-expression replaced by new + -- placeholder variables. For example: + -- C a (D (f x) (g y)) ==> C p1 (D p2 p3) + +argToPat :: ScEnv + -> InScopeSet -- What's in scope at the fn defn site + -> ValueEnv -- ValueEnv at the call site + -> CoreArg -- A call arg (or component thereof) + -> ArgOcc + -> UniqSM (Bool, CoreArg) + +-- Returns (interesting, pat), +-- where pat is the pattern derived from the argument +-- interesting=True if the pattern is non-trivial (not a variable or type) +-- E.g. x:xs --> (True, x:xs) +-- f xs --> (False, w) where w is a fresh wildcard +-- (f xs, 'c') --> (True, (w, 'c')) where w is a fresh wildcard +-- \x. x+y --> (True, \x. x+y) +-- lvl7 --> (True, lvl7) if lvl7 is bound +-- somewhere further out + +argToPat _env _in_scope _val_env arg@(Type {}) _arg_occ + = return (False, arg) + +argToPat env in_scope val_env (Tick _ arg) arg_occ + = argToPat env in_scope val_env arg arg_occ + -- Note [Notes in call patterns] + -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + -- Ignore Notes. In particular, we want to ignore any InlineMe notes + -- Perhaps we should not ignore profiling notes, but I'm going to + -- ride roughshod over them all for now. + --- See Note [Notes in RULE matching] in GHC.Core.Rules + +argToPat env in_scope val_env (Let _ arg) arg_occ + = argToPat env in_scope val_env arg arg_occ + -- See Note [Matching lets] in Rule.hs + -- Look through let expressions + -- e.g. f (let v = rhs in (v,w)) + -- Here we can specialise for f (v,w) + -- because the rule-matcher will look through the let. + +{- Disabled; see Note [Matching cases] in Rule.hs +argToPat env in_scope val_env (Case scrut _ _ [(_, _, rhs)]) arg_occ + | exprOkForSpeculation scrut -- See Note [Matching cases] in Rule.hhs + = argToPat env in_scope val_env rhs arg_occ +-} + +argToPat env in_scope val_env (Cast arg co) arg_occ + | not (ignoreType env ty2) + = do { (interesting, arg') <- argToPat env in_scope val_env arg arg_occ + ; if not interesting then + wildCardPat ty2 + else do + { -- Make a wild-card pattern for the coercion + uniq <- getUniqueM + ; let co_name = mkSysTvName uniq (fsLit "sg") + co_var = mkCoVar co_name (mkCoercionType Representational ty1 ty2) + ; return (interesting, Cast arg' (mkCoVarCo co_var)) } } + where + Pair ty1 ty2 = coercionKind co + + + +{- Disabling lambda specialisation for now + It's fragile, and the spec_loop can be infinite +argToPat in_scope val_env arg arg_occ + | is_value_lam arg + = return (True, arg) + where + is_value_lam (Lam v e) -- Spot a value lambda, even if + | isId v = True -- it is inside a type lambda + | otherwise = is_value_lam e + is_value_lam other = False +-} + + -- Check for a constructor application + -- NB: this *precedes* the Var case, so that we catch nullary constrs +argToPat env in_scope val_env arg arg_occ + | Just (ConVal (DataAlt dc) args) <- isValue val_env arg + , not (ignoreDataCon env dc) -- See Note [NoSpecConstr] + , Just arg_occs <- mb_scrut dc + = do { let (ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) args + ; (_, args') <- argsToPats env in_scope val_env rest_args arg_occs + ; return (True, + mkConApp dc (ty_args ++ args')) } + where + mb_scrut dc = case arg_occ of + ScrutOcc bs | Just occs <- lookupUFM bs dc + -> Just (occs) -- See Note [Reboxing] + _other | sc_force env || sc_keen env + -> Just (repeat UnkOcc) + | otherwise + -> Nothing + + -- Check if the argument is a variable that + -- (a) is used in an interesting way in the function body + -- (b) we know what its value is + -- In that case it counts as "interesting" +argToPat env in_scope val_env (Var v) arg_occ + | sc_force env || case arg_occ of { UnkOcc -> False; _other -> True }, -- (a) + is_value, -- (b) + -- Ignoring sc_keen here to avoid gratuitously incurring Note [Reboxing] + -- So sc_keen focused just on f (I# x), where we have freshly-allocated + -- box that we can eliminate in the caller + not (ignoreType env (varType v)) + = return (True, Var v) + where + is_value + | isLocalId v = v `elemInScopeSet` in_scope + && isJust (lookupVarEnv val_env v) + -- Local variables have values in val_env + | otherwise = isValueUnfolding (idUnfolding v) + -- Imports have unfoldings + +-- I'm really not sure what this comment means +-- And by not wild-carding we tend to get forall'd +-- variables that are in scope, which in turn can +-- expose the weakness in let-matching +-- See Note [Matching lets] in GHC.Core.Rules + + -- Check for a variable bound inside the function. + -- Don't make a wild-card, because we may usefully share + -- e.g. f a = let x = ... in f (x,x) + -- NB: this case follows the lambda and con-app cases!! +-- argToPat _in_scope _val_env (Var v) _arg_occ +-- = return (False, Var v) + -- SLPJ : disabling this to avoid proliferation of versions + -- also works badly when thinking about seeding the loop + -- from the body of the let + -- f x y = letrec g z = ... in g (x,y) + -- We don't want to specialise for that *particular* x,y + + -- The default case: make a wild-card + -- We use this for coercions too +argToPat _env _in_scope _val_env arg _arg_occ + = wildCardPat (exprType arg) + +wildCardPat :: Type -> UniqSM (Bool, CoreArg) +wildCardPat ty + = do { uniq <- getUniqueM + ; let id = mkSysLocalOrCoVar (fsLit "sc") uniq ty + ; return (False, varToCoreExpr id) } + +argsToPats :: ScEnv -> InScopeSet -> ValueEnv + -> [CoreArg] -> [ArgOcc] -- Should be same length + -> UniqSM (Bool, [CoreArg]) +argsToPats env in_scope val_env args occs + = do { stuff <- zipWithM (argToPat env in_scope val_env) args occs + ; let (interesting_s, args') = unzip stuff + ; return (or interesting_s, args') } + +isValue :: ValueEnv -> CoreExpr -> Maybe Value +isValue _env (Lit lit) + | litIsLifted lit = Nothing + | otherwise = Just (ConVal (LitAlt lit) []) + +isValue env (Var v) + | Just cval <- lookupVarEnv env v + = Just cval -- You might think we could look in the idUnfolding here + -- but that doesn't take account of which branch of a + -- case we are in, which is the whole point + + | not (isLocalId v) && isCheapUnfolding unf + = isValue env (unfoldingTemplate unf) + where + unf = idUnfolding v + -- However we do want to consult the unfolding + -- as well, for let-bound constructors! + +isValue env (Lam b e) + | isTyVar b = case isValue env e of + Just _ -> Just LambdaVal + Nothing -> Nothing + | otherwise = Just LambdaVal + +isValue env (Tick t e) + | not (tickishIsCode t) + = isValue env e + +isValue _env expr -- Maybe it's a constructor application + | (Var fun, args, _) <- collectArgsTicks (not . tickishIsCode) expr + = case isDataConWorkId_maybe fun of + + Just con | args `lengthAtLeast` dataConRepArity con + -- Check saturated; might be > because the + -- arity excludes type args + -> Just (ConVal (DataAlt con) args) + + _other | valArgCount args < idArity fun + -- Under-applied function + -> Just LambdaVal -- Partial application + + _other -> Nothing + +isValue _env _expr = Nothing + +valueIsWorkFree :: Value -> Bool +valueIsWorkFree LambdaVal = True +valueIsWorkFree (ConVal _ args) = all exprIsWorkFree args + +samePat :: CallPat -> CallPat -> Bool +samePat (vs1, as1) (vs2, as2) + = all2 same as1 as2 + where + same (Var v1) (Var v2) + | v1 `elem` vs1 = v2 `elem` vs2 + | v2 `elem` vs2 = False + | otherwise = v1 == v2 + + same (Lit l1) (Lit l2) = l1==l2 + same (App f1 a1) (App f2 a2) = same f1 f2 && same a1 a2 + + same (Type {}) (Type {}) = True -- Note [Ignore type differences] + same (Coercion {}) (Coercion {}) = True + same (Tick _ e1) e2 = same e1 e2 -- Ignore casts and notes + same (Cast e1 _) e2 = same e1 e2 + same e1 (Tick _ e2) = same e1 e2 + same e1 (Cast e2 _) = same e1 e2 + + same e1 e2 = WARN( bad e1 || bad e2, ppr e1 $$ ppr e2) + False -- Let, lambda, case should not occur + bad (Case {}) = True + bad (Let {}) = True + bad (Lam {}) = True + bad _other = False + +{- +Note [Ignore type differences] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We do not want to generate specialisations where the call patterns +differ only in their type arguments! Not only is it utterly useless, +but it also means that (with polymorphic recursion) we can generate +an infinite number of specialisations. Example is Data.Sequence.adjustTree, +I think. +-} diff --git a/compiler/GHC/Core/Op/Specialise.hs b/compiler/GHC/Core/Op/Specialise.hs new file mode 100644 index 0000000000..250a0f7313 --- /dev/null +++ b/compiler/GHC/Core/Op/Specialise.hs @@ -0,0 +1,2720 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1993-1998 + +\section[Specialise]{Stamping out overloading, and (optionally) polymorphism} +-} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE ViewPatterns #-} + +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} +module GHC.Core.Op.Specialise ( specProgram, specUnfolding ) where + +#include "HsVersions.h" + +import GhcPrelude + +import Id +import TcType hiding( substTy ) +import GHC.Core.Type hiding( substTy, extendTvSubstList ) +import GHC.Core.Predicate +import Module( Module, HasModule(..) ) +import GHC.Core.Coercion( Coercion ) +import GHC.Core.Op.Monad +import qualified GHC.Core.Subst +import GHC.Core.Unfold +import Var ( isLocalVar ) +import VarSet +import VarEnv +import GHC.Core +import GHC.Core.Rules +import GHC.Core.SimpleOpt ( collectBindersPushingCo ) +import GHC.Core.Utils ( exprIsTrivial, mkCast, exprType ) +import GHC.Core.FVs +import GHC.Core.Arity ( etaExpandToJoinPointRule ) +import UniqSupply +import Name +import MkId ( voidArgId, voidPrimId ) +import Maybes ( mapMaybe, isJust ) +import MonadUtils ( foldlM ) +import BasicTypes +import GHC.Driver.Types +import Bag +import GHC.Driver.Session +import Util +import Outputable +import FastString +import State +import UniqDFM +import GHC.Core.TyCo.Rep (TyCoBinder (..)) + +import Control.Monad +import qualified Control.Monad.Fail as MonadFail + +{- +************************************************************************ +* * +\subsection[notes-Specialise]{Implementation notes [SLPJ, Aug 18 1993]} +* * +************************************************************************ + +These notes describe how we implement specialisation to eliminate +overloading. + +The specialisation pass works on Core +syntax, complete with all the explicit dictionary application, +abstraction and construction as added by the type checker. The +existing type checker remains largely as it is. + +One important thought: the {\em types} passed to an overloaded +function, and the {\em dictionaries} passed are mutually redundant. +If the same function is applied to the same type(s) then it is sure to +be applied to the same dictionary(s)---or rather to the same {\em +values}. (The arguments might look different but they will evaluate +to the same value.) + +Second important thought: we know that we can make progress by +treating dictionary arguments as static and worth specialising on. So +we can do without binding-time analysis, and instead specialise on +dictionary arguments and no others. + +The basic idea +~~~~~~~~~~~~~~ +Suppose we have + + let f = <f_rhs> + in <body> + +and suppose f is overloaded. + +STEP 1: CALL-INSTANCE COLLECTION + +We traverse <body>, accumulating all applications of f to types and +dictionaries. + +(Might there be partial applications, to just some of its types and +dictionaries? In principle yes, but in practice the type checker only +builds applications of f to all its types and dictionaries, so partial +applications could only arise as a result of transformation, and even +then I think it's unlikely. In any case, we simply don't accumulate such +partial applications.) + + +STEP 2: EQUIVALENCES + +So now we have a collection of calls to f: + f t1 t2 d1 d2 + f t3 t4 d3 d4 + ... +Notice that f may take several type arguments. To avoid ambiguity, we +say that f is called at type t1/t2 and t3/t4. + +We take equivalence classes using equality of the *types* (ignoring +the dictionary args, which as mentioned previously are redundant). + +STEP 3: SPECIALISATION + +For each equivalence class, choose a representative (f t1 t2 d1 d2), +and create a local instance of f, defined thus: + + f@t1/t2 = <f_rhs> t1 t2 d1 d2 + +f_rhs presumably has some big lambdas and dictionary lambdas, so lots +of simplification will now result. However we don't actually *do* that +simplification. Rather, we leave it for the simplifier to do. If we +*did* do it, though, we'd get more call instances from the specialised +RHS. We can work out what they are by instantiating the call-instance +set from f's RHS with the types t1, t2. + +Add this new id to f's IdInfo, to record that f has a specialised version. + +Before doing any of this, check that f's IdInfo doesn't already +tell us about an existing instance of f at the required type/s. +(This might happen if specialisation was applied more than once, or +it might arise from user SPECIALIZE pragmas.) + +Recursion +~~~~~~~~~ +Wait a minute! What if f is recursive? Then we can't just plug in +its right-hand side, can we? + +But it's ok. The type checker *always* creates non-recursive definitions +for overloaded recursive functions. For example: + + f x = f (x+x) -- Yes I know its silly + +becomes + + f a (d::Num a) = let p = +.sel a d + in + letrec fl (y::a) = fl (p y y) + in + fl + +We still have recursion for non-overloaded functions which we +specialise, but the recursive call should get specialised to the +same recursive version. + + +Polymorphism 1 +~~~~~~~~~~~~~~ + +All this is crystal clear when the function is applied to *constant +types*; that is, types which have no type variables inside. But what if +it is applied to non-constant types? Suppose we find a call of f at type +t1/t2. There are two possibilities: + +(a) The free type variables of t1, t2 are in scope at the definition point +of f. In this case there's no problem, we proceed just as before. A common +example is as follows. Here's the Haskell: + + g y = let f x = x+x + in f y + f y + +After typechecking we have + + g a (d::Num a) (y::a) = let f b (d'::Num b) (x::b) = +.sel b d' x x + in +.sel a d (f a d y) (f a d y) + +Notice that the call to f is at type type "a"; a non-constant type. +Both calls to f are at the same type, so we can specialise to give: + + g a (d::Num a) (y::a) = let f@a (x::a) = +.sel a d x x + in +.sel a d (f@a y) (f@a y) + + +(b) The other case is when the type variables in the instance types +are *not* in scope at the definition point of f. The example we are +working with above is a good case. There are two instances of (+.sel a d), +but "a" is not in scope at the definition of +.sel. Can we do anything? +Yes, we can "common them up", a sort of limited common sub-expression deal. +This would give: + + g a (d::Num a) (y::a) = let +.sel@a = +.sel a d + f@a (x::a) = +.sel@a x x + in +.sel@a (f@a y) (f@a y) + +This can save work, and can't be spotted by the type checker, because +the two instances of +.sel weren't originally at the same type. + +Further notes on (b) + +* There are quite a few variations here. For example, the defn of + +.sel could be floated outside the \y, to attempt to gain laziness. + It certainly mustn't be floated outside the \d because the d has to + be in scope too. + +* We don't want to inline f_rhs in this case, because +that will duplicate code. Just commoning up the call is the point. + +* Nothing gets added to +.sel's IdInfo. + +* Don't bother unless the equivalence class has more than one item! + +Not clear whether this is all worth it. It is of course OK to +simply discard call-instances when passing a big lambda. + +Polymorphism 2 -- Overloading +~~~~~~~~~~~~~~ +Consider a function whose most general type is + + f :: forall a b. Ord a => [a] -> b -> b + +There is really no point in making a version of g at Int/Int and another +at Int/Bool, because it's only instantiating the type variable "a" which +buys us any efficiency. Since g is completely polymorphic in b there +ain't much point in making separate versions of g for the different +b types. + +That suggests that we should identify which of g's type variables +are constrained (like "a") and which are unconstrained (like "b"). +Then when taking equivalence classes in STEP 2, we ignore the type args +corresponding to unconstrained type variable. In STEP 3 we make +polymorphic versions. Thus: + + f@t1/ = /\b -> <f_rhs> t1 b d1 d2 + +We do this. + + +Dictionary floating +~~~~~~~~~~~~~~~~~~~ +Consider this + + f a (d::Num a) = let g = ... + in + ...(let d1::Ord a = Num.Ord.sel a d in g a d1)... + +Here, g is only called at one type, but the dictionary isn't in scope at the +definition point for g. Usually the type checker would build a +definition for d1 which enclosed g, but the transformation system +might have moved d1's defn inward. Solution: float dictionary bindings +outwards along with call instances. + +Consider + + f x = let g p q = p==q + h r s = (r+s, g r s) + in + h x x + + +Before specialisation, leaving out type abstractions we have + + f df x = let g :: Eq a => a -> a -> Bool + g dg p q = == dg p q + h :: Num a => a -> a -> (a, Bool) + h dh r s = let deq = eqFromNum dh + in (+ dh r s, g deq r s) + in + h df x x + +After specialising h we get a specialised version of h, like this: + + h' r s = let deq = eqFromNum df + in (+ df r s, g deq r s) + +But we can't naively make an instance for g from this, because deq is not in scope +at the defn of g. Instead, we have to float out the (new) defn of deq +to widen its scope. Notice that this floating can't be done in advance -- it only +shows up when specialisation is done. + +User SPECIALIZE pragmas +~~~~~~~~~~~~~~~~~~~~~~~ +Specialisation pragmas can be digested by the type checker, and implemented +by adding extra definitions along with that of f, in the same way as before + + f@t1/t2 = <f_rhs> t1 t2 d1 d2 + +Indeed the pragmas *have* to be dealt with by the type checker, because +only it knows how to build the dictionaries d1 and d2! For example + + g :: Ord a => [a] -> [a] + {-# SPECIALIZE f :: [Tree Int] -> [Tree Int] #-} + +Here, the specialised version of g is an application of g's rhs to the +Ord dictionary for (Tree Int), which only the type checker can conjure +up. There might not even *be* one, if (Tree Int) is not an instance of +Ord! (All the other specialision has suitable dictionaries to hand +from actual calls.) + +Problem. The type checker doesn't have to hand a convenient <f_rhs>, because +it is buried in a complex (as-yet-un-desugared) binding group. +Maybe we should say + + f@t1/t2 = f* t1 t2 d1 d2 + +where f* is the Id f with an IdInfo which says "inline me regardless!". +Indeed all the specialisation could be done in this way. +That in turn means that the simplifier has to be prepared to inline absolutely +any in-scope let-bound thing. + + +Again, the pragma should permit polymorphism in unconstrained variables: + + h :: Ord a => [a] -> b -> b + {-# SPECIALIZE h :: [Int] -> b -> b #-} + +We *insist* that all overloaded type variables are specialised to ground types, +(and hence there can be no context inside a SPECIALIZE pragma). +We *permit* unconstrained type variables to be specialised to + - a ground type + - or left as a polymorphic type variable +but nothing in between. So + + {-# SPECIALIZE h :: [Int] -> [c] -> [c] #-} + +is *illegal*. (It can be handled, but it adds complication, and gains the +programmer nothing.) + + +SPECIALISING INSTANCE DECLARATIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + + instance Foo a => Foo [a] where + ... + {-# SPECIALIZE instance Foo [Int] #-} + +The original instance decl creates a dictionary-function +definition: + + dfun.Foo.List :: forall a. Foo a -> Foo [a] + +The SPECIALIZE pragma just makes a specialised copy, just as for +ordinary function definitions: + + dfun.Foo.List@Int :: Foo [Int] + dfun.Foo.List@Int = dfun.Foo.List Int dFooInt + +The information about what instance of the dfun exist gets added to +the dfun's IdInfo in the same way as a user-defined function too. + + +Automatic instance decl specialisation? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Can instance decls be specialised automatically? It's tricky. +We could collect call-instance information for each dfun, but +then when we specialised their bodies we'd get new call-instances +for ordinary functions; and when we specialised their bodies, we might get +new call-instances of the dfuns, and so on. This all arises because of +the unrestricted mutual recursion between instance decls and value decls. + +Still, there's no actual problem; it just means that we may not do all +the specialisation we could theoretically do. + +Furthermore, instance decls are usually exported and used non-locally, +so we'll want to compile enough to get those specialisations done. + +Lastly, there's no such thing as a local instance decl, so we can +survive solely by spitting out *usage* information, and then reading that +back in as a pragma when next compiling the file. So for now, +we only specialise instance decls in response to pragmas. + + +SPITTING OUT USAGE INFORMATION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To spit out usage information we need to traverse the code collecting +call-instance information for all imported (non-prelude?) functions +and data types. Then we equivalence-class it and spit it out. + +This is done at the top-level when all the call instances which escape +must be for imported functions and data types. + +*** Not currently done *** + + +Partial specialisation by pragmas +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +What about partial specialisation: + + k :: (Ord a, Eq b) => [a] -> b -> b -> [a] + {-# SPECIALIZE k :: Eq b => [Int] -> b -> b -> [a] #-} + +or even + + {-# SPECIALIZE k :: Eq b => [Int] -> [b] -> [b] -> [a] #-} + +Seems quite reasonable. Similar things could be done with instance decls: + + instance (Foo a, Foo b) => Foo (a,b) where + ... + {-# SPECIALIZE instance Foo a => Foo (a,Int) #-} + {-# SPECIALIZE instance Foo b => Foo (Int,b) #-} + +Ho hum. Things are complex enough without this. I pass. + + +Requirements for the simplifier +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The simplifier has to be able to take advantage of the specialisation. + +* When the simplifier finds an application of a polymorphic f, it looks in +f's IdInfo in case there is a suitable instance to call instead. This converts + + f t1 t2 d1 d2 ===> f_t1_t2 + +Note that the dictionaries get eaten up too! + +* Dictionary selection operations on constant dictionaries must be + short-circuited: + + +.sel Int d ===> +Int + +The obvious way to do this is in the same way as other specialised +calls: +.sel has inside it some IdInfo which tells that if it's applied +to the type Int then it should eat a dictionary and transform to +Int. + +In short, dictionary selectors need IdInfo inside them for constant +methods. + +* Exactly the same applies if a superclass dictionary is being + extracted: + + Eq.sel Int d ===> dEqInt + +* Something similar applies to dictionary construction too. Suppose +dfun.Eq.List is the function taking a dictionary for (Eq a) to +one for (Eq [a]). Then we want + + dfun.Eq.List Int d ===> dEq.List_Int + +Where does the Eq [Int] dictionary come from? It is built in +response to a SPECIALIZE pragma on the Eq [a] instance decl. + +In short, dfun Ids need IdInfo with a specialisation for each +constant instance of their instance declaration. + +All this uses a single mechanism: the SpecEnv inside an Id + + +What does the specialisation IdInfo look like? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The SpecEnv of an Id maps a list of types (the template) to an expression + + [Type] |-> Expr + +For example, if f has this RuleInfo: + + [Int, a] -> \d:Ord Int. f' a + +it means that we can replace the call + + f Int t ===> (\d. f' t) + +This chucks one dictionary away and proceeds with the +specialised version of f, namely f'. + + +What can't be done this way? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There is no way, post-typechecker, to get a dictionary for (say) +Eq a from a dictionary for Eq [a]. So if we find + + ==.sel [t] d + +we can't transform to + + eqList (==.sel t d') + +where + eqList :: (a->a->Bool) -> [a] -> [a] -> Bool + +Of course, we currently have no way to automatically derive +eqList, nor to connect it to the Eq [a] instance decl, but you +can imagine that it might somehow be possible. Taking advantage +of this is permanently ruled out. + +Still, this is no great hardship, because we intend to eliminate +overloading altogether anyway! + +A note about non-tyvar dictionaries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some Ids have types like + + forall a,b,c. Eq a -> Ord [a] -> tau + +This seems curious at first, because we usually only have dictionary +args whose types are of the form (C a) where a is a type variable. +But this doesn't hold for the functions arising from instance decls, +which sometimes get arguments with types of form (C (T a)) for some +type constructor T. + +Should we specialise wrt this compound-type dictionary? We used to say +"no", saying: + "This is a heuristic judgement, as indeed is the fact that we + specialise wrt only dictionaries. We choose *not* to specialise + wrt compound dictionaries because at the moment the only place + they show up is in instance decls, where they are simply plugged + into a returned dictionary. So nothing is gained by specialising + wrt them." + +But it is simpler and more uniform to specialise wrt these dicts too; +and in future GHC is likely to support full fledged type signatures +like + f :: Eq [(a,b)] => ... + + +************************************************************************ +* * +\subsubsection{The new specialiser} +* * +************************************************************************ + +Our basic game plan is this. For let(rec) bound function + f :: (C a, D c) => (a,b,c,d) -> Bool + +* Find any specialised calls of f, (f ts ds), where + ts are the type arguments t1 .. t4, and + ds are the dictionary arguments d1 .. d2. + +* Add a new definition for f1 (say): + + f1 = /\ b d -> (..body of f..) t1 b t3 d d1 d2 + + Note that we abstract over the unconstrained type arguments. + +* Add the mapping + + [t1,b,t3,d] |-> \d1 d2 -> f1 b d + + to the specialisations of f. This will be used by the + simplifier to replace calls + (f t1 t2 t3 t4) da db + by + (\d1 d1 -> f1 t2 t4) da db + + All the stuff about how many dictionaries to discard, and what types + to apply the specialised function to, are handled by the fact that the + SpecEnv contains a template for the result of the specialisation. + +We don't build *partial* specialisations for f. For example: + + f :: Eq a => a -> a -> Bool + {-# SPECIALISE f :: (Eq b, Eq c) => (b,c) -> (b,c) -> Bool #-} + +Here, little is gained by making a specialised copy of f. +There's a distinct danger that the specialised version would +first build a dictionary for (Eq b, Eq c), and then select the (==) +method from it! Even if it didn't, not a great deal is saved. + +We do, however, generate polymorphic, but not overloaded, specialisations: + + f :: Eq a => [a] -> b -> b -> b + ... SPECIALISE f :: [Int] -> b -> b -> b ... + +Hence, the invariant is this: + + *** no specialised version is overloaded *** + + +************************************************************************ +* * +\subsubsection{The exported function} +* * +************************************************************************ +-} + +-- | Specialise calls to type-class overloaded functions occurring in a program. +specProgram :: ModGuts -> CoreM ModGuts +specProgram guts@(ModGuts { mg_module = this_mod + , mg_rules = local_rules + , mg_binds = binds }) + = do { dflags <- getDynFlags + + -- Specialise the bindings of this module + ; (binds', uds) <- runSpecM dflags this_mod (go binds) + + -- Specialise imported functions + ; hpt_rules <- getRuleBase + ; let rule_base = extendRuleBaseList hpt_rules local_rules + ; (new_rules, spec_binds) <- specImports dflags this_mod top_env emptyVarSet + [] rule_base uds + + ; let final_binds + | null spec_binds = binds' + | otherwise = Rec (flattenBinds spec_binds) : binds' + -- Note [Glom the bindings if imported functions are specialised] + + ; return (guts { mg_binds = final_binds + , mg_rules = new_rules ++ local_rules }) } + where + -- We need to start with a Subst that knows all the things + -- that are in scope, so that the substitution engine doesn't + -- accidentally re-use a unique that's already in use + -- Easiest thing is to do it all at once, as if all the top-level + -- decls were mutually recursive + top_env = SE { se_subst = GHC.Core.Subst.mkEmptySubst $ mkInScopeSet $ mkVarSet $ + bindersOfBinds binds + , se_interesting = emptyVarSet } + + go [] = return ([], emptyUDs) + go (bind:binds) = do (binds', uds) <- go binds + (bind', uds') <- specBind top_env bind uds + return (bind' ++ binds', uds') + +{- +Note [Wrap bindings returned by specImports] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +'specImports' returns a set of specialized bindings. However, these are lacking +necessary floated dictionary bindings, which are returned by +UsageDetails(ud_binds). These dictionaries need to be brought into scope with +'wrapDictBinds' before the bindings returned by 'specImports' can be used. See, +for instance, the 'specImports' call in 'specProgram'. + + +Note [Disabling cross-module specialisation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Since GHC 7.10 we have performed specialisation of INLINABLE bindings living +in modules outside of the current module. This can sometimes uncover user code +which explodes in size when aggressively optimized. The +-fno-cross-module-specialise option was introduced to allow users to being +bitten by such instances to revert to the pre-7.10 behavior. + +See #10491 +-} + +-- | An argument that we might want to specialise. +-- See Note [Specialising Calls] for the nitty gritty details. +data SpecArg + = + -- | Type arguments that should be specialised, due to appearing + -- free in the type of a 'SpecDict'. + SpecType Type + -- | Type arguments that should remain polymorphic. + | UnspecType + -- | Dictionaries that should be specialised. + | SpecDict DictExpr + -- | Value arguments that should not be specialised. + | UnspecArg + +instance Outputable SpecArg where + ppr (SpecType t) = text "SpecType" <+> ppr t + ppr UnspecType = text "UnspecType" + ppr (SpecDict d) = text "SpecDict" <+> ppr d + ppr UnspecArg = text "UnspecArg" + +getSpecDicts :: [SpecArg] -> [DictExpr] +getSpecDicts = mapMaybe go + where + go (SpecDict d) = Just d + go _ = Nothing + +getSpecTypes :: [SpecArg] -> [Type] +getSpecTypes = mapMaybe go + where + go (SpecType t) = Just t + go _ = Nothing + +isUnspecArg :: SpecArg -> Bool +isUnspecArg UnspecArg = True +isUnspecArg UnspecType = True +isUnspecArg _ = False + +isValueArg :: SpecArg -> Bool +isValueArg UnspecArg = True +isValueArg (SpecDict _) = True +isValueArg _ = False + +-- | Given binders from an original function 'f', and the 'SpecArg's +-- corresponding to its usage, compute everything necessary to build +-- a specialisation. +-- +-- We will use a running example. Consider the function +-- +-- foo :: forall a b. Eq a => Int -> blah +-- foo @a @b dEqA i = blah +-- +-- which is called with the 'CallInfo' +-- +-- [SpecType T1, UnspecType, SpecDict dEqT1, UnspecArg] +-- +-- We'd eventually like to build the RULE +-- +-- RULE "SPEC foo @T1 _" +-- forall @a @b (dEqA' :: Eq a). +-- foo @T1 @b dEqA' = $sfoo @b +-- +-- and the specialisation '$sfoo' +-- +-- $sfoo :: forall b. Int -> blah +-- $sfoo @b = \i -> SUBST[a->T1, dEqA->dEqA'] blah +-- +-- The cases for 'specHeader' below are presented in the same order as this +-- running example. The result of 'specHeader' for this example is as follows: +-- +-- ( -- Returned arguments +-- env + [a -> T1, deqA -> dEqA'] +-- , [] +-- +-- -- RULE helpers +-- , [b, dx', i] +-- , [T1, b, dx', i] +-- +-- -- Specialised function helpers +-- , [b, i] +-- , [dx] +-- , [T1, b, dx_spec, i] +-- ) +specHeader + :: SpecEnv + -> [CoreBndr] -- The binders from the original function 'f' + -> [SpecArg] -- From the CallInfo + -> SpecM ( -- Returned arguments + SpecEnv -- Substitution to apply to the body of 'f' + , [CoreBndr] -- All the remaining unspecialised args from the original function 'f' + + -- RULE helpers + , [CoreBndr] -- Binders for the RULE + , [CoreArg] -- Args for the LHS of the rule + + -- Specialised function helpers + , [CoreBndr] -- Binders for $sf + , [DictBind] -- Auxiliary dictionary bindings + , [CoreExpr] -- Specialised arguments for unfolding + ) + +-- We want to specialise on type 'T1', and so we must construct a substitution +-- 'a->T1', as well as a LHS argument for the resulting RULE and unfolding +-- details. +specHeader env (bndr : bndrs) (SpecType t : args) + = do { let env' = extendTvSubstList env [(bndr, t)] + ; (env'', unused_bndrs, rule_bs, rule_es, bs', dx, spec_args) + <- specHeader env' bndrs args + ; pure ( env'' + , unused_bndrs + , rule_bs + , Type t : rule_es + , bs' + , dx + , Type t : spec_args + ) + } + +-- Next we have a type that we don't want to specialise. We need to perform +-- a substitution on it (in case the type refers to 'a'). Additionally, we need +-- to produce a binder, LHS argument and RHS argument for the resulting rule, +-- /and/ a binder for the specialised body. +specHeader env (bndr : bndrs) (UnspecType : args) + = do { let (env', bndr') = substBndr env bndr + ; (env'', unused_bndrs, rule_bs, rule_es, bs', dx, spec_args) + <- specHeader env' bndrs args + ; pure ( env'' + , unused_bndrs + , bndr' : rule_bs + , varToCoreExpr bndr' : rule_es + , bndr' : bs' + , dx + , varToCoreExpr bndr' : spec_args + ) + } + +-- Next we want to specialise the 'Eq a' dict away. We need to construct +-- a wildcard binder to match the dictionary (See Note [Specialising Calls] for +-- the nitty-gritty), as a LHS rule and unfolding details. +specHeader env (bndr : bndrs) (SpecDict d : args) + = do { inst_dict_id <- newDictBndr env bndr + ; let (rhs_env2, dx_binds, spec_dict_args') + = bindAuxiliaryDicts env [bndr] [d] [inst_dict_id] + ; (env', unused_bndrs, rule_bs, rule_es, bs', dx, spec_args) + <- specHeader rhs_env2 bndrs args + ; pure ( env' + , unused_bndrs + -- See Note [Evidence foralls] + , exprFreeIdsList (varToCoreExpr inst_dict_id) ++ rule_bs + , varToCoreExpr inst_dict_id : rule_es + , bs' + , dx_binds ++ dx + , spec_dict_args' ++ spec_args + ) + } + +-- Finally, we have the unspecialised argument 'i'. We need to produce +-- a binder, LHS and RHS argument for the RULE, and a binder for the +-- specialised body. +-- +-- NB: Calls to 'specHeader' will trim off any trailing 'UnspecArg's, which is +-- why 'i' doesn't appear in our RULE above. But we have no guarantee that +-- there aren't 'UnspecArg's which come /before/ all of the dictionaries, so +-- this case must be here. +specHeader env (bndr : bndrs) (UnspecArg : args) + = do { let (env', bndr') = substBndr env bndr + ; (env'', unused_bndrs, rule_bs, rule_es, bs', dx, spec_args) + <- specHeader env' bndrs args + ; pure ( env'' + , unused_bndrs + , bndr' : rule_bs + , varToCoreExpr bndr' : rule_es + , bndr' : bs' + , dx + , varToCoreExpr bndr' : spec_args + ) + } + +-- Return all remaining binders from the original function. These have the +-- invariant that they should all correspond to unspecialised arguments, so +-- it's safe to stop processing at this point. +specHeader env bndrs [] = pure (env, bndrs, [], [], [], [], []) +specHeader env [] _ = pure (env, [], [], [], [], [], []) + + +-- | Specialise a set of calls to imported bindings +specImports :: DynFlags + -> Module + -> SpecEnv -- Passed in so that all top-level Ids are in scope + -> VarSet -- Don't specialise these ones + -- See Note [Avoiding recursive specialisation] + -> [Id] -- Stack of imported functions being specialised + -> RuleBase -- Rules from this module and the home package + -- (but not external packages, which can change) + -> UsageDetails -- Calls for imported things, and floating bindings + -> CoreM ( [CoreRule] -- New rules + , [CoreBind] ) -- Specialised bindings + -- See Note [Wrapping bindings returned by specImports] +specImports dflags this_mod top_env done callers rule_base + (MkUD { ud_binds = dict_binds, ud_calls = calls }) + -- See Note [Disabling cross-module specialisation] + | not $ gopt Opt_CrossModuleSpecialise dflags + = return ([], []) + + | otherwise + = do { let import_calls = dVarEnvElts calls + ; (rules, spec_binds) <- go rule_base import_calls + + -- Don't forget to wrap the specialized bindings with + -- bindings for the needed dictionaries. + -- See Note [Wrap bindings returned by specImports] + ; let spec_binds' = wrapDictBinds dict_binds spec_binds + + ; return (rules, spec_binds') } + where + go :: RuleBase -> [CallInfoSet] -> CoreM ([CoreRule], [CoreBind]) + go _ [] = return ([], []) + go rb (cis@(CIS fn _) : other_calls) + = do { let ok_calls = filterCalls cis dict_binds + -- Drop calls that (directly or indirectly) refer to fn + -- See Note [Avoiding loops] +-- ; debugTraceMsg (text "specImport" <+> vcat [ ppr fn +-- , text "calls" <+> ppr cis +-- , text "ud_binds =" <+> ppr dict_binds +-- , text "dump set =" <+> ppr dump_set +-- , text "filtered calls =" <+> ppr ok_calls ]) + ; (rules1, spec_binds1) <- specImport dflags this_mod top_env + done callers rb fn ok_calls + + ; (rules2, spec_binds2) <- go (extendRuleBaseList rb rules1) other_calls + ; return (rules1 ++ rules2, spec_binds1 ++ spec_binds2) } + +specImport :: DynFlags + -> Module + -> SpecEnv -- Passed in so that all top-level Ids are in scope + -> VarSet -- Don't specialise these + -- See Note [Avoiding recursive specialisation] + -> [Id] -- Stack of imported functions being specialised + -> RuleBase -- Rules from this module + -> Id -> [CallInfo] -- Imported function and calls for it + -> CoreM ( [CoreRule] -- New rules + , [CoreBind] ) -- Specialised bindings +specImport dflags this_mod top_env done callers rb fn calls_for_fn + | fn `elemVarSet` done + = return ([], []) -- No warning. This actually happens all the time + -- when specialising a recursive function, because + -- the RHS of the specialised function contains a recursive + -- call to the original function + + | null calls_for_fn -- We filtered out all the calls in deleteCallsMentioning + = return ([], []) + + | wantSpecImport dflags unfolding + , Just rhs <- maybeUnfoldingTemplate unfolding + = do { -- Get rules from the external package state + -- We keep doing this in case we "page-fault in" + -- more rules as we go along + ; hsc_env <- getHscEnv + ; eps <- liftIO $ hscEPS hsc_env + ; vis_orphs <- getVisibleOrphanMods + ; let full_rb = unionRuleBase rb (eps_rule_base eps) + rules_for_fn = getRules (RuleEnv full_rb vis_orphs) fn + + ; (rules1, spec_pairs, uds) + <- -- pprTrace "specImport1" (vcat [ppr fn, ppr calls_for_fn, ppr rhs]) $ + runSpecM dflags this_mod $ + specCalls (Just this_mod) top_env rules_for_fn calls_for_fn fn rhs + ; let spec_binds1 = [NonRec b r | (b,r) <- spec_pairs] + -- After the rules kick in we may get recursion, but + -- we rely on a global GlomBinds to sort that out later + -- See Note [Glom the bindings if imported functions are specialised] + + -- Now specialise any cascaded calls + ; (rules2, spec_binds2) <- -- pprTrace "specImport 2" (ppr fn $$ ppr rules1 $$ ppr spec_binds1) $ + specImports dflags this_mod top_env + (extendVarSet done fn) + (fn:callers) + (extendRuleBaseList rb rules1) + uds + + ; let final_binds = spec_binds2 ++ spec_binds1 + + ; return (rules2 ++ rules1, final_binds) } + + | otherwise = do { tryWarnMissingSpecs dflags callers fn calls_for_fn + ; return ([], [])} + + where + unfolding = realIdUnfolding fn -- We want to see the unfolding even for loop breakers + +-- | Returns whether or not to show a missed-spec warning. +-- If -Wall-missed-specializations is on, show the warning. +-- Otherwise, if -Wmissed-specializations is on, only show a warning +-- if there is at least one imported function being specialized, +-- and if all imported functions are marked with an inline pragma +-- Use the most specific warning as the reason. +tryWarnMissingSpecs :: DynFlags -> [Id] -> Id -> [CallInfo] -> CoreM () +-- See Note [Warning about missed specialisations] +tryWarnMissingSpecs dflags callers fn calls_for_fn + | wopt Opt_WarnMissedSpecs dflags + && not (null callers) + && allCallersInlined = doWarn $ Reason Opt_WarnMissedSpecs + | wopt Opt_WarnAllMissedSpecs dflags = doWarn $ Reason Opt_WarnAllMissedSpecs + | otherwise = return () + where + allCallersInlined = all (isAnyInlinePragma . idInlinePragma) callers + doWarn reason = + warnMsg reason + (vcat [ hang (text ("Could not specialise imported function") <+> quotes (ppr fn)) + 2 (vcat [ text "when specialising" <+> quotes (ppr caller) + | caller <- callers]) + , whenPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn)) + , text "Probable fix: add INLINABLE pragma on" <+> quotes (ppr fn) ]) + +wantSpecImport :: DynFlags -> Unfolding -> Bool +-- See Note [Specialise imported INLINABLE things] +wantSpecImport dflags unf + = case unf of + NoUnfolding -> False + BootUnfolding -> False + OtherCon {} -> False + DFunUnfolding {} -> True + CoreUnfolding { uf_src = src, uf_guidance = _guidance } + | gopt Opt_SpecialiseAggressively dflags -> True + | isStableSource src -> True + -- Specialise even INLINE things; it hasn't inlined yet, + -- so perhaps it never will. Moreover it may have calls + -- inside it that we want to specialise + | otherwise -> False -- Stable, not INLINE, hence INLINABLE + +{- Note [Warning about missed specialisations] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose + * In module Lib, you carefully mark a function 'foo' INLINABLE + * Import Lib(foo) into another module M + * Call 'foo' at some specialised type in M +Then you jolly well expect it to be specialised in M. But what if +'foo' calls another function 'Lib.bar'. Then you'd like 'bar' to be +specialised too. But if 'bar' is not marked INLINABLE it may well +not be specialised. The warning Opt_WarnMissedSpecs warns about this. + +It's more noisy to warning about a missed specialisation opportunity +for /every/ overloaded imported function, but sometimes useful. That +is what Opt_WarnAllMissedSpecs does. + +ToDo: warn about missed opportunities for local functions. + +Note [Specialise imported INLINABLE things] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +What imported functions do we specialise? The basic set is + * DFuns and things with INLINABLE pragmas. +but with -fspecialise-aggressively we add + * Anything with an unfolding template + +#8874 has a good example of why we want to auto-specialise DFuns. + +We have the -fspecialise-aggressively flag (usually off), because we +risk lots of orphan modules from over-vigorous specialisation. +However it's not a big deal: anything non-recursive with an +unfolding-template will probably have been inlined already. + +Note [Glom the bindings if imported functions are specialised] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose we have an imported, *recursive*, INLINABLE function + f :: Eq a => a -> a + f = /\a \d x. ...(f a d)... +In the module being compiled we have + g x = f (x::Int) +Now we'll make a specialised function + f_spec :: Int -> Int + f_spec = \x -> ...(f Int dInt)... + {-# RULE f Int _ = f_spec #-} + g = \x. f Int dInt x +Note that f_spec doesn't look recursive +After rewriting with the RULE, we get + f_spec = \x -> ...(f_spec)... +BUT since f_spec was non-recursive before it'll *stay* non-recursive. +The occurrence analyser never turns a NonRec into a Rec. So we must +make sure that f_spec is recursive. Easiest thing is to make all +the specialisations for imported bindings recursive. + + +Note [Avoiding recursive specialisation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we specialise 'f' we may find new overloaded calls to 'g', 'h' in +'f's RHS. So we want to specialise g,h. But we don't want to +specialise f any more! It's possible that f's RHS might have a +recursive yet-more-specialised call, so we'd diverge in that case. +And if the call is to the same type, one specialisation is enough. +Avoiding this recursive specialisation loop is the reason for the +'done' VarSet passed to specImports and specImport. + +************************************************************************ +* * +\subsubsection{@specExpr@: the main function} +* * +************************************************************************ +-} + +data SpecEnv + = SE { se_subst :: GHC.Core.Subst.Subst + -- We carry a substitution down: + -- a) we must clone any binding that might float outwards, + -- to avoid name clashes + -- b) we carry a type substitution to use when analysing + -- the RHS of specialised bindings (no type-let!) + + + , se_interesting :: VarSet + -- Dict Ids that we know something about + -- and hence may be worth specialising against + -- See Note [Interesting dictionary arguments] + } + +specVar :: SpecEnv -> Id -> CoreExpr +specVar env v = GHC.Core.Subst.lookupIdSubst (text "specVar") (se_subst env) v + +specExpr :: SpecEnv -> CoreExpr -> SpecM (CoreExpr, UsageDetails) + +---------------- First the easy cases -------------------- +specExpr env (Type ty) = return (Type (substTy env ty), emptyUDs) +specExpr env (Coercion co) = return (Coercion (substCo env co), emptyUDs) +specExpr env (Var v) = return (specVar env v, emptyUDs) +specExpr _ (Lit lit) = return (Lit lit, emptyUDs) +specExpr env (Cast e co) + = do { (e', uds) <- specExpr env e + ; return ((mkCast e' (substCo env co)), uds) } +specExpr env (Tick tickish body) + = do { (body', uds) <- specExpr env body + ; return (Tick (specTickish env tickish) body', uds) } + +---------------- Applications might generate a call instance -------------------- +specExpr env expr@(App {}) + = go expr [] + where + go (App fun arg) args = do (arg', uds_arg) <- specExpr env arg + (fun', uds_app) <- go fun (arg':args) + return (App fun' arg', uds_arg `plusUDs` uds_app) + + go (Var f) args = case specVar env f of + Var f' -> return (Var f', mkCallUDs env f' args) + e' -> return (e', emptyUDs) -- I don't expect this! + go other _ = specExpr env other + +---------------- Lambda/case require dumping of usage details -------------------- +specExpr env e@(Lam _ _) = do + (body', uds) <- specExpr env' body + let (free_uds, dumped_dbs) = dumpUDs bndrs' uds + return (mkLams bndrs' (wrapDictBindsE dumped_dbs body'), free_uds) + where + (bndrs, body) = collectBinders e + (env', bndrs') = substBndrs env bndrs + -- More efficient to collect a group of binders together all at once + -- and we don't want to split a lambda group with dumped bindings + +specExpr env (Case scrut case_bndr ty alts) + = do { (scrut', scrut_uds) <- specExpr env scrut + ; (scrut'', case_bndr', alts', alts_uds) + <- specCase env scrut' case_bndr alts + ; return (Case scrut'' case_bndr' (substTy env ty) alts' + , scrut_uds `plusUDs` alts_uds) } + +---------------- Finally, let is the interesting case -------------------- +specExpr env (Let bind body) + = do { -- Clone binders + (rhs_env, body_env, bind') <- cloneBindSM env bind + + -- Deal with the body + ; (body', body_uds) <- specExpr body_env body + + -- Deal with the bindings + ; (binds', uds) <- specBind rhs_env bind' body_uds + + -- All done + ; return (foldr Let body' binds', uds) } + +specTickish :: SpecEnv -> Tickish Id -> Tickish Id +specTickish env (Breakpoint ix ids) + = Breakpoint ix [ id' | id <- ids, Var id' <- [specVar env id]] + -- drop vars from the list if they have a non-variable substitution. + -- should never happen, but it's harmless to drop them anyway. +specTickish _ other_tickish = other_tickish + +specCase :: SpecEnv + -> CoreExpr -- Scrutinee, already done + -> Id -> [CoreAlt] + -> SpecM ( CoreExpr -- New scrutinee + , Id + , [CoreAlt] + , UsageDetails) +specCase env scrut' case_bndr [(con, args, rhs)] + | isDictId case_bndr -- See Note [Floating dictionaries out of cases] + , interestingDict env scrut' + , not (isDeadBinder case_bndr && null sc_args') + = do { (case_bndr_flt : sc_args_flt) <- mapM clone_me (case_bndr' : sc_args') + + ; let sc_rhss = [ Case (Var case_bndr_flt) case_bndr' (idType sc_arg') + [(con, args', Var sc_arg')] + | sc_arg' <- sc_args' ] + + -- Extend the substitution for RHS to map the *original* binders + -- to their floated versions. + mb_sc_flts :: [Maybe DictId] + mb_sc_flts = map (lookupVarEnv clone_env) args' + clone_env = zipVarEnv sc_args' sc_args_flt + subst_prs = (case_bndr, Var case_bndr_flt) + : [ (arg, Var sc_flt) + | (arg, Just sc_flt) <- args `zip` mb_sc_flts ] + env_rhs' = env_rhs { se_subst = GHC.Core.Subst.extendIdSubstList (se_subst env_rhs) subst_prs + , se_interesting = se_interesting env_rhs `extendVarSetList` + (case_bndr_flt : sc_args_flt) } + + ; (rhs', rhs_uds) <- specExpr env_rhs' rhs + ; let scrut_bind = mkDB (NonRec case_bndr_flt scrut') + case_bndr_set = unitVarSet case_bndr_flt + sc_binds = [(NonRec sc_arg_flt sc_rhs, case_bndr_set) + | (sc_arg_flt, sc_rhs) <- sc_args_flt `zip` sc_rhss ] + flt_binds = scrut_bind : sc_binds + (free_uds, dumped_dbs) = dumpUDs (case_bndr':args') rhs_uds + all_uds = flt_binds `addDictBinds` free_uds + alt' = (con, args', wrapDictBindsE dumped_dbs rhs') + ; return (Var case_bndr_flt, case_bndr', [alt'], all_uds) } + where + (env_rhs, (case_bndr':args')) = substBndrs env (case_bndr:args) + sc_args' = filter is_flt_sc_arg args' + + clone_me bndr = do { uniq <- getUniqueM + ; return (mkUserLocalOrCoVar occ uniq ty loc) } + where + name = idName bndr + ty = idType bndr + occ = nameOccName name + loc = getSrcSpan name + + arg_set = mkVarSet args' + is_flt_sc_arg var = isId var + && not (isDeadBinder var) + && isDictTy var_ty + && not (tyCoVarsOfType var_ty `intersectsVarSet` arg_set) + where + var_ty = idType var + + +specCase env scrut case_bndr alts + = do { (alts', uds_alts) <- mapAndCombineSM spec_alt alts + ; return (scrut, case_bndr', alts', uds_alts) } + where + (env_alt, case_bndr') = substBndr env case_bndr + spec_alt (con, args, rhs) = do + (rhs', uds) <- specExpr env_rhs rhs + let (free_uds, dumped_dbs) = dumpUDs (case_bndr' : args') uds + return ((con, args', wrapDictBindsE dumped_dbs rhs'), free_uds) + where + (env_rhs, args') = substBndrs env_alt args + +{- +Note [Floating dictionaries out of cases] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + g = \d. case d of { MkD sc ... -> ...(f sc)... } +Naively we can't float d2's binding out of the case expression, +because 'sc' is bound by the case, and that in turn means we can't +specialise f, which seems a pity. + +So we invert the case, by floating out a binding +for 'sc_flt' thus: + sc_flt = case d of { MkD sc ... -> sc } +Now we can float the call instance for 'f'. Indeed this is just +what'll happen if 'sc' was originally bound with a let binding, +but case is more efficient, and necessary with equalities. So it's +good to work with both. + +You might think that this won't make any difference, because the +call instance will only get nuked by the \d. BUT if 'g' itself is +specialised, then transitively we should be able to specialise f. + +In general, given + case e of cb { MkD sc ... -> ...(f sc)... } +we transform to + let cb_flt = e + sc_flt = case cb_flt of { MkD sc ... -> sc } + in + case cb_flt of bg { MkD sc ... -> ....(f sc_flt)... } + +The "_flt" things are the floated binds; we use the current substitution +to substitute sc -> sc_flt in the RHS + +************************************************************************ +* * + Dealing with a binding +* * +************************************************************************ +-} + +specBind :: SpecEnv -- Use this for RHSs + -> CoreBind -- Binders are already cloned by cloneBindSM, + -- but RHSs are un-processed + -> UsageDetails -- Info on how the scope of the binding + -> SpecM ([CoreBind], -- New bindings + UsageDetails) -- And info to pass upstream + +-- Returned UsageDetails: +-- No calls for binders of this bind +specBind rhs_env (NonRec fn rhs) body_uds + = do { (rhs', rhs_uds) <- specExpr rhs_env rhs + ; (fn', spec_defns, body_uds1) <- specDefn rhs_env body_uds fn rhs + + ; let pairs = spec_defns ++ [(fn', rhs')] + -- fn' mentions the spec_defns in its rules, + -- so put the latter first + + combined_uds = body_uds1 `plusUDs` rhs_uds + + (free_uds, dump_dbs, float_all) = dumpBindUDs [fn] combined_uds + + final_binds :: [DictBind] + -- See Note [From non-recursive to recursive] + final_binds + | not (isEmptyBag dump_dbs) + , not (null spec_defns) + = [recWithDumpedDicts pairs dump_dbs] + | otherwise + = [mkDB $ NonRec b r | (b,r) <- pairs] + ++ bagToList dump_dbs + + ; if float_all then + -- Rather than discard the calls mentioning the bound variables + -- we float this (dictionary) binding along with the others + return ([], free_uds `snocDictBinds` final_binds) + else + -- No call in final_uds mentions bound variables, + -- so we can just leave the binding here + return (map fst final_binds, free_uds) } + + +specBind rhs_env (Rec pairs) body_uds + -- Note [Specialising a recursive group] + = do { let (bndrs,rhss) = unzip pairs + ; (rhss', rhs_uds) <- mapAndCombineSM (specExpr rhs_env) rhss + ; let scope_uds = body_uds `plusUDs` rhs_uds + -- Includes binds and calls arising from rhss + + ; (bndrs1, spec_defns1, uds1) <- specDefns rhs_env scope_uds pairs + + ; (bndrs3, spec_defns3, uds3) + <- if null spec_defns1 -- Common case: no specialisation + then return (bndrs1, [], uds1) + else do { -- Specialisation occurred; do it again + (bndrs2, spec_defns2, uds2) + <- specDefns rhs_env uds1 (bndrs1 `zip` rhss) + ; return (bndrs2, spec_defns2 ++ spec_defns1, uds2) } + + ; let (final_uds, dumped_dbs, float_all) = dumpBindUDs bndrs uds3 + final_bind = recWithDumpedDicts (spec_defns3 ++ zip bndrs3 rhss') + dumped_dbs + + ; if float_all then + return ([], final_uds `snocDictBind` final_bind) + else + return ([fst final_bind], final_uds) } + + +--------------------------- +specDefns :: SpecEnv + -> UsageDetails -- Info on how it is used in its scope + -> [(OutId,InExpr)] -- The things being bound and their un-processed RHS + -> SpecM ([OutId], -- Original Ids with RULES added + [(OutId,OutExpr)], -- Extra, specialised bindings + UsageDetails) -- Stuff to fling upwards from the specialised versions + +-- Specialise a list of bindings (the contents of a Rec), but flowing usages +-- upwards binding by binding. Example: { f = ...g ...; g = ...f .... } +-- Then if the input CallDetails has a specialised call for 'g', whose specialisation +-- in turn generates a specialised call for 'f', we catch that in this one sweep. +-- But not vice versa (it's a fixpoint problem). + +specDefns _env uds [] + = return ([], [], uds) +specDefns env uds ((bndr,rhs):pairs) + = do { (bndrs1, spec_defns1, uds1) <- specDefns env uds pairs + ; (bndr1, spec_defns2, uds2) <- specDefn env uds1 bndr rhs + ; return (bndr1 : bndrs1, spec_defns1 ++ spec_defns2, uds2) } + +--------------------------- +specDefn :: SpecEnv + -> UsageDetails -- Info on how it is used in its scope + -> OutId -> InExpr -- The thing being bound and its un-processed RHS + -> SpecM (Id, -- Original Id with added RULES + [(Id,CoreExpr)], -- Extra, specialised bindings + UsageDetails) -- Stuff to fling upwards from the specialised versions + +specDefn env body_uds fn rhs + = do { let (body_uds_without_me, calls_for_me) = callsForMe fn body_uds + rules_for_me = idCoreRules fn + ; (rules, spec_defns, spec_uds) <- specCalls Nothing env rules_for_me + calls_for_me fn rhs + ; return ( fn `addIdSpecialisations` rules + , spec_defns + , body_uds_without_me `plusUDs` spec_uds) } + -- It's important that the `plusUDs` is this way + -- round, because body_uds_without_me may bind + -- dictionaries that are used in calls_for_me passed + -- to specDefn. So the dictionary bindings in + -- spec_uds may mention dictionaries bound in + -- body_uds_without_me + +--------------------------- +specCalls :: Maybe Module -- Just this_mod => specialising imported fn + -- Nothing => specialising local fn + -> SpecEnv + -> [CoreRule] -- Existing RULES for the fn + -> [CallInfo] + -> OutId -> InExpr + -> SpecM SpecInfo -- New rules, specialised bindings, and usage details + +-- This function checks existing rules, and does not create +-- duplicate ones. So the caller does not need to do this filtering. +-- See 'already_covered' + +type SpecInfo = ( [CoreRule] -- Specialisation rules + , [(Id,CoreExpr)] -- Specialised definition + , UsageDetails ) -- Usage details from specialised RHSs + +specCalls mb_mod env existing_rules calls_for_me fn rhs + -- The first case is the interesting one + | callSpecArity pis <= fn_arity -- See Note [Specialisation Must Preserve Sharing] + && notNull calls_for_me -- And there are some calls to specialise + && not (isNeverActive (idInlineActivation fn)) + -- Don't specialise NOINLINE things + -- See Note [Auto-specialisation and RULES] + +-- && not (certainlyWillInline (idUnfolding fn)) -- And it's not small +-- See Note [Inline specialisation] for why we do not +-- switch off specialisation for inline functions + + = -- pprTrace "specDefn: some" (ppr fn $$ ppr calls_for_me $$ ppr existing_rules) $ + foldlM spec_call ([], [], emptyUDs) calls_for_me + + | otherwise -- No calls or RHS doesn't fit our preconceptions + = WARN( not (exprIsTrivial rhs) && notNull calls_for_me, + text "Missed specialisation opportunity for" + <+> ppr fn $$ _trace_doc ) + -- Note [Specialisation shape] + -- pprTrace "specDefn: none" (ppr fn <+> ppr calls_for_me) $ + return ([], [], emptyUDs) + where + _trace_doc = sep [ ppr rhs_tyvars, ppr rhs_bndrs + , ppr (idInlineActivation fn) ] + + fn_type = idType fn + fn_arity = idArity fn + fn_unf = realIdUnfolding fn -- Ignore loop-breaker-ness here + pis = fst $ splitPiTys fn_type + theta = getTheta pis + n_dicts = length theta + inl_prag = idInlinePragma fn + inl_act = inlinePragmaActivation inl_prag + is_local = isLocalId fn + + -- Figure out whether the function has an INLINE pragma + -- See Note [Inline specialisations] + + (rhs_bndrs, rhs_body) = collectBindersPushingCo rhs + -- See Note [Account for casts in binding] + rhs_tyvars = filter isTyVar rhs_bndrs + + in_scope = GHC.Core.Subst.substInScope (se_subst env) + + already_covered :: DynFlags -> [CoreRule] -> [CoreExpr] -> Bool + already_covered dflags new_rules args -- Note [Specialisations already covered] + = isJust (lookupRule dflags (in_scope, realIdUnfolding) + (const True) fn args + (new_rules ++ existing_rules)) + -- NB: we look both in the new_rules (generated by this invocation + -- of specCalls), and in existing_rules (passed in to specCalls) + + ---------------------------------------------------------- + -- Specialise to one particular call pattern + spec_call :: SpecInfo -- Accumulating parameter + -> CallInfo -- Call instance + -> SpecM SpecInfo + spec_call spec_acc@(rules_acc, pairs_acc, uds_acc) + (CI { ci_key = call_args, ci_arity = call_arity }) + = ASSERT(call_arity <= fn_arity) + + -- See Note [Specialising Calls] + do { (rhs_env2, unused_bndrs, rule_bndrs, rule_args, unspec_bndrs, dx_binds, spec_args) + <- specHeader env rhs_bndrs $ dropWhileEndLE isUnspecArg call_args + ; let rhs_body' = mkLams unused_bndrs rhs_body + ; dflags <- getDynFlags + ; if already_covered dflags rules_acc rule_args + then return spec_acc + else -- pprTrace "spec_call" (vcat [ ppr _call_info, ppr fn, ppr rhs_dict_ids + -- , text "rhs_env2" <+> ppr (se_subst rhs_env2) + -- , ppr dx_binds ]) $ + do + { -- Figure out the type of the specialised function + let body = mkLams unspec_bndrs rhs_body' + body_ty = substTy rhs_env2 $ exprType body + (lam_extra_args, app_args) -- See Note [Specialisations Must Be Lifted] + | isUnliftedType body_ty -- C.f. GHC.Core.Op.WorkWrap.Lib.mkWorkerArgs + , not (isJoinId fn) + = ([voidArgId], voidPrimId : unspec_bndrs) + | otherwise = ([], unspec_bndrs) + join_arity_change = length app_args - length rule_args + spec_join_arity | Just orig_join_arity <- isJoinId_maybe fn + = Just (orig_join_arity + join_arity_change) + | otherwise + = Nothing + + ; (spec_rhs, rhs_uds) <- specExpr rhs_env2 (mkLams lam_extra_args body) + ; let spec_id_ty = exprType spec_rhs + ; spec_f <- newSpecIdSM fn spec_id_ty spec_join_arity + ; this_mod <- getModule + ; let + -- The rule to put in the function's specialisation is: + -- forall x @b d1' d2'. + -- f x @T1 @b @T2 d1' d2' = f1 x @b + -- See Note [Specialising Calls] + herald = case mb_mod of + Nothing -- Specialising local fn + -> text "SPEC" + Just this_mod -- Specialising imported fn + -> text "SPEC/" <> ppr this_mod + + rule_name = mkFastString $ showSDoc dflags $ + herald <+> ftext (occNameFS (getOccName fn)) + <+> hsep (mapMaybe ppr_call_key_ty call_args) + -- This name ends up in interface files, so use occNameString. + -- Otherwise uniques end up there, making builds + -- less deterministic (See #4012 comment:61 ff) + + rule_wout_eta = mkRule + this_mod + True {- Auto generated -} + is_local + rule_name + inl_act -- Note [Auto-specialisation and RULES] + (idName fn) + rule_bndrs + rule_args + (mkVarApps (Var spec_f) app_args) + + spec_rule + = case isJoinId_maybe fn of + Just join_arity -> etaExpandToJoinPointRule join_arity + rule_wout_eta + Nothing -> rule_wout_eta + + -- Add the { d1' = dx1; d2' = dx2 } usage stuff + -- See Note [Specialising Calls] + spec_uds = foldr consDictBind rhs_uds dx_binds + + -------------------------------------- + -- Add a suitable unfolding if the spec_inl_prag says so + -- See Note [Inline specialisations] + (spec_inl_prag, spec_unf) + | not is_local && isStrongLoopBreaker (idOccInfo fn) + = (neverInlinePragma, noUnfolding) + -- See Note [Specialising imported functions] in OccurAnal + + | InlinePragma { inl_inline = Inlinable } <- inl_prag + = (inl_prag { inl_inline = NoUserInline }, noUnfolding) + + | otherwise + = (inl_prag, specUnfolding dflags unspec_bndrs spec_app n_dicts fn_unf) + + spec_app e = e `mkApps` spec_args + + -------------------------------------- + -- Adding arity information just propagates it a bit faster + -- See Note [Arity decrease] in GHC.Core.Op.Simplify + -- Copy InlinePragma information from the parent Id. + -- So if f has INLINE[1] so does spec_f + spec_f_w_arity = spec_f `setIdArity` max 0 (fn_arity - n_dicts) + `setInlinePragma` spec_inl_prag + `setIdUnfolding` spec_unf + `asJoinId_maybe` spec_join_arity + + _rule_trace_doc = vcat [ ppr spec_f, ppr fn_type, ppr spec_id_ty + , ppr rhs_bndrs, ppr call_args + , ppr spec_rule + ] + + ; -- pprTrace "spec_call: rule" _rule_trace_doc + return ( spec_rule : rules_acc + , (spec_f_w_arity, spec_rhs) : pairs_acc + , spec_uds `plusUDs` uds_acc + ) } } + +{- Note [Specialisation Must Preserve Sharing] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider a function: + + f :: forall a. Eq a => a -> blah + f = + if expensive + then f1 + else f2 + +As written, all calls to 'f' will share 'expensive'. But if we specialise 'f' +at 'Int', eg: + + $sfInt = SUBST[a->Int,dict->dEqInt] (if expensive then f1 else f2) + + RULE "SPEC f" + forall (d :: Eq Int). + f Int _ = $sfIntf + +We've now lost sharing between 'f' and '$sfInt' for 'expensive'. Yikes! + +To avoid this, we only generate specialisations for functions whose arity is +enough to bind all of the arguments we need to specialise. This ensures our +specialised functions don't do any work before receiving all of their dicts, +and thus avoids the 'f' case above. + +Note [Specialisations Must Be Lifted] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider a function 'f': + + f = forall a. Eq a => Array# a + +used like + + case x of + True -> ...f @Int dEqInt... + False -> 0 + +Naively, we might generate an (expensive) specialisation + + $sfInt :: Array# Int + +even in the case that @x = False@! Instead, we add a dummy 'Void#' argument to +the specialisation '$sfInt' ($sfInt :: Void# -> Array# Int) in order to +preserve laziness. + +Note [Specialising Calls] +~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose we have a function: + + f :: Int -> forall a b c. (Foo a, Foo c) => Bar -> Qux + f = \x -> /\ a b c -> \d1 d2 bar -> rhs + +and suppose it is called at: + + f 7 @T1 @T2 @T3 dFooT1 dFooT3 bar + +This call is described as a 'CallInfo' whose 'ci_key' is + + [ UnspecArg, SpecType T1, UnspecType, SpecType T3, SpecDict dFooT1 + , SpecDict dFooT3, UnspecArg ] + +Why are 'a' and 'c' identified as 'SpecType', while 'b' is 'UnspecType'? +Because we must specialise the function on type variables that appear +free in its *dictionary* arguments; but not on type variables that do not +appear in any dictionaries, i.e. are fully polymorphic. + +Because this call has dictionaries applied, we'd like to specialise +the call on any type argument that appears free in those dictionaries. +In this case, those are (a ~ T1, c ~ T3). + +As a result, we'd like to generate a function: + + $sf :: Int -> forall b. Bar -> Qux + $sf = SUBST[a->T1, c->T3, d1->d1', d2->d2'] (\x -> /\ b -> \bar -> rhs) + +Note that the substitution is applied to the whole thing. This is +convenient, but just slightly fragile. Notably: + * There had better be no name clashes in a/b/c + +We must construct a rewrite rule: + + RULE "SPEC f @T1 _ @T3" + forall (x :: Int) (@b :: Type) (d1' :: Foo T1) (d2' :: Foo T3). + f x @T1 @b @T3 d1' d2' = $sf x @b + +In the rule, d1' and d2' are just wildcards, not used in the RHS. Note +additionally that 'bar' isn't captured by this rule --- we bind only +enough etas in order to capture all of the *specialised* arguments. + +Finally, we must also construct the usage-details + + { d1' = dx1; d2' = dx2 } + +where d1', d2' are cloned versions of d1,d2, with the type substitution +applied. These auxiliary bindings just avoid duplication of dx1, dx2. + +Note [Account for casts in binding] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f :: Eq a => a -> IO () + {-# INLINABLE f + StableUnf = (/\a \(d:Eq a) (x:a). blah) |> g + #-} + f = ... + +In f's stable unfolding we have done some modest simplification which +has pushed the cast to the outside. (I wonder if this is the Right +Thing, but it's what happens now; see GHC.Core.Op.Simplify.Utils Note [Casts and +lambdas].) Now that stable unfolding must be specialised, so we want +to push the cast back inside. It would be terrible if the cast +defeated specialisation! Hence the use of collectBindersPushingCo. + +Note [Evidence foralls] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose (#12212) that we are specialising + f :: forall a b. (Num a, F a ~ F b) => blah +with a=b=Int. Then the RULE will be something like + RULE forall (d:Num Int) (g :: F Int ~ F Int). + f Int Int d g = f_spec +But both varToCoreExpr (when constructing the LHS args), and the +simplifier (when simplifying the LHS args), will transform to + RULE forall (d:Num Int) (g :: F Int ~ F Int). + f Int Int d <F Int> = f_spec +by replacing g with Refl. So now 'g' is unbound, which results in a later +crash. So we use Refl right off the bat, and do not forall-quantify 'g': + * varToCoreExpr generates a Refl + * exprsFreeIdsList returns the Ids bound by the args, + which won't include g + +You might wonder if this will match as often, but the simplifier replaces +complicated Refl coercions with Refl pretty aggressively. + +Note [Orphans and auto-generated rules] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we specialise an INLINABLE function, or when we have +-fspecialise-aggressively, we auto-generate RULES that are orphans. +We don't want to warn about these, or we'd generate a lot of warnings. +Thus, we only warn about user-specified orphan rules. + +Indeed, we don't even treat the module as an orphan module if it has +auto-generated *rule* orphans. Orphan modules are read every time we +compile, so they are pretty obtrusive and slow down every compilation, +even non-optimised ones. (Reason: for type class instances it's a +type correctness issue.) But specialisation rules are strictly for +*optimisation* only so it's fine not to read the interface. + +What this means is that a SPEC rules from auto-specialisation in +module M will be used in other modules only if M.hi has been read for +some other reason, which is actually pretty likely. +-} + +bindAuxiliaryDicts + :: SpecEnv + -> [DictId] -> [CoreExpr] -- Original dict bndrs, and the witnessing expressions + -> [DictId] -- A cloned dict-id for each dict arg + -> (SpecEnv, -- Substitute for all orig_dicts + [DictBind], -- Auxiliary dict bindings + [CoreExpr]) -- Witnessing expressions (all trivial) +-- Bind any dictionary arguments to fresh names, to preserve sharing +bindAuxiliaryDicts env@(SE { se_subst = subst, se_interesting = interesting }) + orig_dict_ids call_ds inst_dict_ids + = (env', dx_binds, spec_dict_args) + where + (dx_binds, spec_dict_args) = go call_ds inst_dict_ids + env' = env { se_subst = subst `GHC.Core.Subst.extendSubstList` + (orig_dict_ids `zip` spec_dict_args) + `GHC.Core.Subst.extendInScopeList` dx_ids + , se_interesting = interesting `unionVarSet` interesting_dicts } + + dx_ids = [dx_id | (NonRec dx_id _, _) <- dx_binds] + interesting_dicts = mkVarSet [ dx_id | (NonRec dx_id dx, _) <- dx_binds + , interestingDict env dx ] + -- See Note [Make the new dictionaries interesting] + + go :: [CoreExpr] -> [CoreBndr] -> ([DictBind], [CoreExpr]) + go [] _ = ([], []) + go (dx:dxs) (dx_id:dx_ids) + | exprIsTrivial dx = (dx_binds, dx : args) + | otherwise = (mkDB (NonRec dx_id dx) : dx_binds, Var dx_id : args) + where + (dx_binds, args) = go dxs dx_ids + -- In the first case extend the substitution but not bindings; + -- in the latter extend the bindings but not the substitution. + -- For the former, note that we bind the *original* dict in the substitution, + -- overriding any d->dx_id binding put there by substBndrs + go _ _ = pprPanic "bindAuxiliaryDicts" (ppr orig_dict_ids $$ ppr call_ds $$ ppr inst_dict_ids) + +{- +Note [Make the new dictionaries interesting] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Important! We're going to substitute dx_id1 for d +and we want it to look "interesting", else we won't gather *any* +consequential calls. E.g. + f d = ...g d.... +If we specialise f for a call (f (dfun dNumInt)), we'll get +a consequent call (g d') with an auxiliary definition + d' = df dNumInt +We want that consequent call to look interesting + + +Note [From non-recursive to recursive] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Even in the non-recursive case, if any dict-binds depend on 'fn' we might +have built a recursive knot + + f a d x = <blah> + MkUD { ud_binds = NonRec d7 (MkD ..f..) + , ud_calls = ...(f T d7)... } + +The we generate + + Rec { fs x = <blah>[T/a, d7/d] + f a d x = <blah> + RULE f T _ = fs + d7 = ...f... } + +Here the recursion is only through the RULE. + +However we definitely should /not/ make the Rec in this wildly common +case: + d = ... + MkUD { ud_binds = NonRec d7 (...d...) + , ud_calls = ...(f T d7)... } + +Here we want simply to add d to the floats, giving + MkUD { ud_binds = NonRec d (...) + NonRec d7 (...d...) + , ud_calls = ...(f T d7)... } + +In general, we need only make this Rec if + - there are some specialisations (spec_binds non-empty) + - there are some dict_binds that depend on f (dump_dbs non-empty) + +Note [Avoiding loops] +~~~~~~~~~~~~~~~~~~~~~ +When specialising /dictionary functions/ we must be very careful to +avoid building loops. Here is an example that bit us badly: #3591 + + class Eq a => C a + instance Eq [a] => C [a] + +This translates to + dfun :: Eq [a] -> C [a] + dfun a d = MkD a d (meth d) + + d4 :: Eq [T] = <blah> + d2 :: C [T] = dfun T d4 + d1 :: Eq [T] = $p1 d2 + d3 :: C [T] = dfun T d1 + +None of these definitions is recursive. What happened was that we +generated a specialisation: + + RULE forall d. dfun T d = dT :: C [T] + dT = (MkD a d (meth d)) [T/a, d1/d] + = MkD T d1 (meth d1) + +But now we use the RULE on the RHS of d2, to get + + d2 = dT = MkD d1 (meth d1) + d1 = $p1 d2 + +and now d1 is bottom! The problem is that when specialising 'dfun' we +should first dump "below" the binding all floated dictionary bindings +that mention 'dfun' itself. So d2 and d3 (and hence d1) must be +placed below 'dfun', and thus unavailable to it when specialising +'dfun'. That in turn means that the call (dfun T d1) must be +discarded. On the other hand, the call (dfun T d4) is fine, assuming +d4 doesn't mention dfun. + +Solution: + Discard all calls that mention dictionaries that depend + (directly or indirectly) on the dfun we are specialising. + This is done by 'filterCalls' + +-------------- +Here's another example, this time for an imported dfun, so the call +to filterCalls is in specImports (#13429). Suppose we have + class Monoid v => C v a where ... + +We start with a call + f @ [Integer] @ Integer $fC[]Integer + +Specialising call to 'f' gives dict bindings + $dMonoid_1 :: Monoid [Integer] + $dMonoid_1 = M.$p1C @ [Integer] $fC[]Integer + + $dC_1 :: C [Integer] (Node [Integer] Integer) + $dC_1 = M.$fCvNode @ [Integer] $dMonoid_1 + +...plus a recursive call to + f @ [Integer] @ (Node [Integer] Integer) $dC_1 + +Specialising that call gives + $dMonoid_2 :: Monoid [Integer] + $dMonoid_2 = M.$p1C @ [Integer] $dC_1 + + $dC_2 :: C [Integer] (Node [Integer] Integer) + $dC_2 = M.$fCvNode @ [Integer] $dMonoid_2 + +Now we have two calls to the imported function + M.$fCvNode :: Monoid v => C v a + M.$fCvNode @v @a m = C m some_fun + +But we must /not/ use the call (M.$fCvNode @ [Integer] $dMonoid_2) +for specialisation, else we get: + + $dC_1 = M.$fCvNode @ [Integer] $dMonoid_1 + $dMonoid_2 = M.$p1C @ [Integer] $dC_1 + $s$fCvNode = C $dMonoid_2 ... + RULE M.$fCvNode [Integer] _ _ = $s$fCvNode + +Now use the rule to rewrite the call in the RHS of $dC_1 +and we get a loop! + +-------------- +Here's yet another example + + class C a where { foo,bar :: [a] -> [a] } + + instance C Int where + foo x = r_bar x + bar xs = reverse xs + + r_bar :: C a => [a] -> [a] + r_bar xs = bar (xs ++ xs) + +That translates to: + + r_bar a (c::C a) (xs::[a]) = bar a d (xs ++ xs) + + Rec { $fCInt :: C Int = MkC foo_help reverse + foo_help (xs::[Int]) = r_bar Int $fCInt xs } + +The call (r_bar $fCInt) mentions $fCInt, + which mentions foo_help, + which mentions r_bar +But we DO want to specialise r_bar at Int: + + Rec { $fCInt :: C Int = MkC foo_help reverse + foo_help (xs::[Int]) = r_bar Int $fCInt xs + + r_bar a (c::C a) (xs::[a]) = bar a d (xs ++ xs) + RULE r_bar Int _ = r_bar_Int + + r_bar_Int xs = bar Int $fCInt (xs ++ xs) + } + +Note that, because of its RULE, r_bar joins the recursive +group. (In this case it'll unravel a short moment later.) + + +Note [Specialising a recursive group] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + let rec { f x = ...g x'... + ; g y = ...f y'.... } + in f 'a' +Here we specialise 'f' at Char; but that is very likely to lead to +a specialisation of 'g' at Char. We must do the latter, else the +whole point of specialisation is lost. + +But we do not want to keep iterating to a fixpoint, because in the +presence of polymorphic recursion we might generate an infinite number +of specialisations. + +So we use the following heuristic: + * Arrange the rec block in dependency order, so far as possible + (the occurrence analyser already does this) + + * Specialise it much like a sequence of lets + + * Then go through the block a second time, feeding call-info from + the RHSs back in the bottom, as it were + +In effect, the ordering maxmimises the effectiveness of each sweep, +and we do just two sweeps. This should catch almost every case of +monomorphic recursion -- the exception could be a very knotted-up +recursion with multiple cycles tied up together. + +This plan is implemented in the Rec case of specBindItself. + +Note [Specialisations already covered] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We obviously don't want to generate two specialisations for the same +argument pattern. There are two wrinkles + +1. We do the already-covered test in specDefn, not when we generate +the CallInfo in mkCallUDs. We used to test in the latter place, but +we now iterate the specialiser somewhat, and the Id at the call site +might therefore not have all the RULES that we can see in specDefn + +2. What about two specialisations where the second is an *instance* +of the first? If the more specific one shows up first, we'll generate +specialisations for both. If the *less* specific one shows up first, +we *don't* currently generate a specialisation for the more specific +one. (See the call to lookupRule in already_covered.) Reasons: + (a) lookupRule doesn't say which matches are exact (bad reason) + (b) if the earlier specialisation is user-provided, it's + far from clear that we should auto-specialise further + +Note [Auto-specialisation and RULES] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider: + g :: Num a => a -> a + g = ... + + f :: (Int -> Int) -> Int + f w = ... + {-# RULE f g = 0 #-} + +Suppose that auto-specialisation makes a specialised version of +g::Int->Int That version won't appear in the LHS of the RULE for f. +So if the specialisation rule fires too early, the rule for f may +never fire. + +It might be possible to add new rules, to "complete" the rewrite system. +Thus when adding + RULE forall d. g Int d = g_spec +also add + RULE f g_spec = 0 + +But that's a bit complicated. For now we ask the programmer's help, +by *copying the INLINE activation pragma* to the auto-specialised +rule. So if g says {-# NOINLINE[2] g #-}, then the auto-spec rule +will also not be active until phase 2. And that's what programmers +should jolly well do anyway, even aside from specialisation, to ensure +that g doesn't inline too early. + +This in turn means that the RULE would never fire for a NOINLINE +thing so not much point in generating a specialisation at all. + +Note [Specialisation shape] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We only specialise a function if it has visible top-level lambdas +corresponding to its overloading. E.g. if + f :: forall a. Eq a => .... +then its body must look like + f = /\a. \d. ... + +Reason: when specialising the body for a call (f ty dexp), we want to +substitute dexp for d, and pick up specialised calls in the body of f. + +This doesn't always work. One example I came across was this: + newtype Gen a = MkGen{ unGen :: Int -> a } + + choose :: Eq a => a -> Gen a + choose n = MkGen (\r -> n) + + oneof = choose (1::Int) + +It's a silly example, but we get + choose = /\a. g `cast` co +where choose doesn't have any dict arguments. Thus far I have not +tried to fix this (wait till there's a real example). + +Mind you, then 'choose' will be inlined (since RHS is trivial) so +it doesn't matter. This comes up with single-method classes + + class C a where { op :: a -> a } + instance C a => C [a] where .... +==> + $fCList :: C a => C [a] + $fCList = $copList |> (...coercion>...) + ....(uses of $fCList at particular types)... + +So we suppress the WARN if the rhs is trivial. + +Note [Inline specialisations] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Here is what we do with the InlinePragma of the original function + * Activation/RuleMatchInfo: both transferred to the + specialised function + * InlineSpec: + (a) An INLINE pragma is transferred + (b) An INLINABLE pragma is *not* transferred + +Why (a): transfer INLINE pragmas? The point of INLINE was precisely to +specialise the function at its call site, and arguably that's not so +important for the specialised copies. BUT *pragma-directed* +specialisation now takes place in the typechecker/desugarer, with +manually specified INLINEs. The specialisation here is automatic. +It'd be very odd if a function marked INLINE was specialised (because +of some local use), and then forever after (including importing +modules) the specialised version wasn't INLINEd. After all, the +programmer said INLINE! + +You might wonder why we specialise INLINE functions at all. After +all they should be inlined, right? Two reasons: + + * Even INLINE functions are sometimes not inlined, when they aren't + applied to interesting arguments. But perhaps the type arguments + alone are enough to specialise (even though the args are too boring + to trigger inlining), and it's certainly better to call the + specialised version. + + * The RHS of an INLINE function might call another overloaded function, + and we'd like to generate a specialised version of that function too. + This actually happens a lot. Consider + replicateM_ :: (Monad m) => Int -> m a -> m () + {-# INLINABLE replicateM_ #-} + replicateM_ d x ma = ... + The strictness analyser may transform to + replicateM_ :: (Monad m) => Int -> m a -> m () + {-# INLINE replicateM_ #-} + replicateM_ d x ma = case x of I# x' -> $wreplicateM_ d x' ma + + $wreplicateM_ :: (Monad m) => Int# -> m a -> m () + {-# INLINABLE $wreplicateM_ #-} + $wreplicateM_ = ... + Now an importing module has a specialised call to replicateM_, say + (replicateM_ dMonadIO). We certainly want to specialise $wreplicateM_! + This particular example had a huge effect on the call to replicateM_ + in nofib/shootout/n-body. + +Why (b): discard INLINABLE pragmas? See #4874 for persuasive examples. +Suppose we have + {-# INLINABLE f #-} + f :: Ord a => [a] -> Int + f xs = letrec f' = ...f'... in f' +Then, when f is specialised and optimised we might get + wgo :: [Int] -> Int# + wgo = ...wgo... + f_spec :: [Int] -> Int + f_spec xs = case wgo xs of { r -> I# r } +and we clearly want to inline f_spec at call sites. But if we still +have the big, un-optimised of f (albeit specialised) captured in an +INLINABLE pragma for f_spec, we won't get that optimisation. + +So we simply drop INLINABLE pragmas when specialising. It's not really +a complete solution; ignoring specialisation for now, INLINABLE functions +don't get properly strictness analysed, for example. But it works well +for examples involving specialisation, which is the dominant use of +INLINABLE. See #4874. + + +************************************************************************ +* * +\subsubsection{UsageDetails and suchlike} +* * +************************************************************************ +-} + +data UsageDetails + = MkUD { + ud_binds :: !(Bag DictBind), + -- See Note [Floated dictionary bindings] + -- The order is important; + -- in ds1 `union` ds2, bindings in ds2 can depend on those in ds1 + -- (Remember, Bags preserve order in GHC.) + + ud_calls :: !CallDetails + + -- INVARIANT: suppose bs = bindersOf ud_binds + -- Then 'calls' may *mention* 'bs', + -- but there should be no calls *for* bs + } + +-- | A 'DictBind' is a binding along with a cached set containing its free +-- variables (both type variables and dictionaries) +type DictBind = (CoreBind, VarSet) + +{- Note [Floated dictionary bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We float out dictionary bindings for the reasons described under +"Dictionary floating" above. But not /just/ dictionary bindings. +Consider + + f :: Eq a => blah + f a d = rhs + + $c== :: T -> T -> Bool + $c== x y = ... + + $df :: Eq T + $df = Eq $c== ... + + gurgle = ...(f @T $df)... + +We gather the call info for (f @T $df), and we don't want to drop it +when we come across the binding for $df. So we add $df to the floats +and continue. But then we have to add $c== to the floats, and so on. +These all float above the binding for 'f', and now we can +successfully specialise 'f'. + +So the DictBinds in (ud_binds :: Bag DictBind) may contain +non-dictionary bindings too. +-} + +instance Outputable UsageDetails where + ppr (MkUD { ud_binds = dbs, ud_calls = calls }) + = text "MkUD" <+> braces (sep (punctuate comma + [text "binds" <+> equals <+> ppr dbs, + text "calls" <+> equals <+> ppr calls])) + +emptyUDs :: UsageDetails +emptyUDs = MkUD { ud_binds = emptyBag, ud_calls = emptyDVarEnv } + +------------------------------------------------------------ +type CallDetails = DIdEnv CallInfoSet + -- The order of specialized binds and rules depends on how we linearize + -- CallDetails, so to get determinism we must use a deterministic set here. + -- See Note [Deterministic UniqFM] in UniqDFM + +data CallInfoSet = CIS Id (Bag CallInfo) + -- The list of types and dictionaries is guaranteed to + -- match the type of f + -- The Bag may contain duplicate calls (i.e. f @T and another f @T) + -- These dups are eliminated by already_covered in specCalls + +data CallInfo + = CI { ci_key :: [SpecArg] -- All arguments + , ci_arity :: Int -- The number of variables necessary to bind + -- all of the specialised arguments + , ci_fvs :: VarSet -- Free vars of the ci_key + -- call (including tyvars) + -- [*not* include the main id itself, of course] + } + +type DictExpr = CoreExpr + +ciSetFilter :: (CallInfo -> Bool) -> CallInfoSet -> CallInfoSet +ciSetFilter p (CIS id a) = CIS id (filterBag p a) + +instance Outputable CallInfoSet where + ppr (CIS fn map) = hang (text "CIS" <+> ppr fn) + 2 (ppr map) + +pprCallInfo :: Id -> CallInfo -> SDoc +pprCallInfo fn (CI { ci_key = key }) + = ppr fn <+> ppr key + +ppr_call_key_ty :: SpecArg -> Maybe SDoc +ppr_call_key_ty (SpecType ty) = Just $ char '@' <> pprParendType ty +ppr_call_key_ty UnspecType = Just $ char '_' +ppr_call_key_ty (SpecDict _) = Nothing +ppr_call_key_ty UnspecArg = Nothing + +instance Outputable CallInfo where + ppr (CI { ci_key = key, ci_fvs = fvs }) + = text "CI" <> braces (hsep [ fsep (mapMaybe ppr_call_key_ty key), ppr fvs ]) + +unionCalls :: CallDetails -> CallDetails -> CallDetails +unionCalls c1 c2 = plusDVarEnv_C unionCallInfoSet c1 c2 + +unionCallInfoSet :: CallInfoSet -> CallInfoSet -> CallInfoSet +unionCallInfoSet (CIS f calls1) (CIS _ calls2) = + CIS f (calls1 `unionBags` calls2) + +callDetailsFVs :: CallDetails -> VarSet +callDetailsFVs calls = + nonDetFoldUDFM (unionVarSet . callInfoFVs) emptyVarSet calls + -- It's OK to use nonDetFoldUDFM here because we forget the ordering + -- immediately by converting to a nondeterministic set. + +callInfoFVs :: CallInfoSet -> VarSet +callInfoFVs (CIS _ call_info) = + foldr (\(CI { ci_fvs = fv }) vs -> unionVarSet fv vs) emptyVarSet call_info + +computeArity :: [SpecArg] -> Int +computeArity = length . filter isValueArg . dropWhileEndLE isUnspecArg + +callSpecArity :: [TyCoBinder] -> Int +callSpecArity = length . filter (not . isNamedBinder) . dropWhileEndLE isVisibleBinder + +getTheta :: [TyCoBinder] -> [PredType] +getTheta = fmap tyBinderType . filter isInvisibleBinder . filter (not . isNamedBinder) + + +------------------------------------------------------------ +singleCall :: Id -> [SpecArg] -> UsageDetails +singleCall id args + = MkUD {ud_binds = emptyBag, + ud_calls = unitDVarEnv id $ CIS id $ + unitBag (CI { ci_key = args -- used to be tys + , ci_arity = computeArity args + , ci_fvs = call_fvs }) } + where + tys = getSpecTypes args + dicts = getSpecDicts args + call_fvs = exprsFreeVars dicts `unionVarSet` tys_fvs + tys_fvs = tyCoVarsOfTypes tys + -- The type args (tys) are guaranteed to be part of the dictionary + -- types, because they are just the constrained types, + -- and the dictionary is therefore sure to be bound + -- inside the binding for any type variables free in the type; + -- hence it's safe to neglect tyvars free in tys when making + -- the free-var set for this call + -- BUT I don't trust this reasoning; play safe and include tys_fvs + -- + -- We don't include the 'id' itself. + +mkCallUDs, mkCallUDs' :: SpecEnv -> Id -> [CoreExpr] -> UsageDetails +mkCallUDs env f args + = -- pprTrace "mkCallUDs" (vcat [ ppr f, ppr args, ppr res ]) + res + where + res = mkCallUDs' env f args + +mkCallUDs' env f args + | not (want_calls_for f) -- Imported from elsewhere + || null theta -- Not overloaded + = emptyUDs + + | not (all type_determines_value theta) + || not (computeArity ci_key <= idArity f) + || not (length dicts == length theta) + || not (any (interestingDict env) dicts) -- Note [Interesting dictionary arguments] + -- See also Note [Specialisations already covered] + = -- pprTrace "mkCallUDs: discarding" _trace_doc + emptyUDs -- Not overloaded, or no specialisation wanted + + | otherwise + = -- pprTrace "mkCallUDs: keeping" _trace_doc + singleCall f ci_key + where + _trace_doc = vcat [ppr f, ppr args, ppr (map (interestingDict env) dicts)] + pis = fst $ splitPiTys $ idType f + theta = getTheta pis + constrained_tyvars = tyCoVarsOfTypes theta + + ci_key :: [SpecArg] + ci_key = fmap (\(t, a) -> + case t of + Named (binderVar -> tyVar) + | tyVar `elemVarSet` constrained_tyvars + -> case a of + Type ty -> SpecType ty + _ -> pprPanic "ci_key" $ ppr a + | otherwise + -> UnspecType + Anon InvisArg _ -> SpecDict a + Anon VisArg _ -> UnspecArg + ) $ zip pis args + + dicts = getSpecDicts ci_key + + want_calls_for f = isLocalId f || isJust (maybeUnfoldingTemplate (realIdUnfolding f)) + -- For imported things, we gather call instances if + -- there is an unfolding that we could in principle specialise + -- We might still decide not to use it (consulting dflags) + -- in specImports + -- Use 'realIdUnfolding' to ignore the loop-breaker flag! + + type_determines_value pred -- See Note [Type determines value] + = case classifyPredType pred of + ClassPred cls _ -> not (isIPClass cls) -- Superclasses can't be IPs + EqPred {} -> True + IrredPred {} -> True -- Things like (D []) where D is a + -- Constraint-ranged family; #7785 + ForAllPred {} -> True + +{- +Note [Type determines value] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Only specialise if all overloading is on non-IP *class* params, +because these are the ones whose *type* determines their *value*. In +parrticular, with implicit params, the type args *don't* say what the +value of the implicit param is! See #7101 + +However, consider + type family D (v::*->*) :: Constraint + type instance D [] = () + f :: D v => v Char -> Int +If we see a call (f "foo"), we'll pass a "dictionary" + () |> (g :: () ~ D []) +and it's good to specialise f at this dictionary. + +So the question is: can an implicit parameter "hide inside" a +type-family constraint like (D a). Well, no. We don't allow + type instance D Maybe = ?x:Int +Hence the IrredPred case in type_determines_value. +See #7785. + +Note [Interesting dictionary arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this + \a.\d:Eq a. let f = ... in ...(f d)... +There really is not much point in specialising f wrt the dictionary d, +because the code for the specialised f is not improved at all, because +d is lambda-bound. We simply get junk specialisations. + +What is "interesting"? Just that it has *some* structure. But what about +variables? + + * A variable might be imported, in which case its unfolding + will tell us whether it has useful structure + + * Local variables are cloned on the way down (to avoid clashes when + we float dictionaries), and cloning drops the unfolding + (cloneIdBndr). Moreover, we make up some new bindings, and it's a + nuisance to give them unfoldings. So we keep track of the + "interesting" dictionaries as a VarSet in SpecEnv. + We have to take care to put any new interesting dictionary + bindings in the set. + +We accidentally lost accurate tracking of local variables for a long +time, because cloned variables don't have unfoldings. But makes a +massive difference in a few cases, eg #5113. For nofib as a +whole it's only a small win: 2.2% improvement in allocation for ansi, +1.2% for bspt, but mostly 0.0! Average 0.1% increase in binary size. +-} + +interestingDict :: SpecEnv -> CoreExpr -> Bool +-- A dictionary argument is interesting if it has *some* structure +-- NB: "dictionary" arguments include constraints of all sorts, +-- including equality constraints; hence the Coercion case +interestingDict env (Var v) = hasSomeUnfolding (idUnfolding v) + || isDataConWorkId v + || v `elemVarSet` se_interesting env +interestingDict _ (Type _) = False +interestingDict _ (Coercion _) = False +interestingDict env (App fn (Type _)) = interestingDict env fn +interestingDict env (App fn (Coercion _)) = interestingDict env fn +interestingDict env (Tick _ a) = interestingDict env a +interestingDict env (Cast e _) = interestingDict env e +interestingDict _ _ = True + +plusUDs :: UsageDetails -> UsageDetails -> UsageDetails +plusUDs (MkUD {ud_binds = db1, ud_calls = calls1}) + (MkUD {ud_binds = db2, ud_calls = calls2}) + = MkUD { ud_binds = db1 `unionBags` db2 + , ud_calls = calls1 `unionCalls` calls2 } + +----------------------------- +_dictBindBndrs :: Bag DictBind -> [Id] +_dictBindBndrs dbs = foldr ((++) . bindersOf . fst) [] dbs + +-- | Construct a 'DictBind' from a 'CoreBind' +mkDB :: CoreBind -> DictBind +mkDB bind = (bind, bind_fvs bind) + +-- | Identify the free variables of a 'CoreBind' +bind_fvs :: CoreBind -> VarSet +bind_fvs (NonRec bndr rhs) = pair_fvs (bndr,rhs) +bind_fvs (Rec prs) = foldl' delVarSet rhs_fvs bndrs + where + bndrs = map fst prs + rhs_fvs = unionVarSets (map pair_fvs prs) + +pair_fvs :: (Id, CoreExpr) -> VarSet +pair_fvs (bndr, rhs) = exprSomeFreeVars interesting rhs + `unionVarSet` idFreeVars bndr + -- idFreeVars: don't forget variables mentioned in + -- the rules of the bndr. C.f. OccAnal.addRuleUsage + -- Also tyvars mentioned in its type; they may not appear + -- in the RHS + -- type T a = Int + -- x :: T a = 3 + where + interesting :: InterestingVarFun + interesting v = isLocalVar v || (isId v && isDFunId v) + -- Very important: include DFunIds /even/ if it is imported + -- Reason: See Note [Avoiding loops], the second example + -- involving an imported dfun. We must know whether + -- a dictionary binding depends on an imported dfun, + -- in case we try to specialise that imported dfun + -- #13429 illustrates + +-- | Flatten a set of "dumped" 'DictBind's, and some other binding +-- pairs, into a single recursive binding. +recWithDumpedDicts :: [(Id,CoreExpr)] -> Bag DictBind ->DictBind +recWithDumpedDicts pairs dbs + = (Rec bindings, fvs) + where + (bindings, fvs) = foldr add + ([], emptyVarSet) + (dbs `snocBag` mkDB (Rec pairs)) + add (NonRec b r, fvs') (pairs, fvs) = + ((b,r) : pairs, fvs `unionVarSet` fvs') + add (Rec prs1, fvs') (pairs, fvs) = + (prs1 ++ pairs, fvs `unionVarSet` fvs') + +snocDictBinds :: UsageDetails -> [DictBind] -> UsageDetails +-- Add ud_binds to the tail end of the bindings in uds +snocDictBinds uds dbs + = uds { ud_binds = ud_binds uds `unionBags` listToBag dbs } + +consDictBind :: DictBind -> UsageDetails -> UsageDetails +consDictBind bind uds = uds { ud_binds = bind `consBag` ud_binds uds } + +addDictBinds :: [DictBind] -> UsageDetails -> UsageDetails +addDictBinds binds uds = uds { ud_binds = listToBag binds `unionBags` ud_binds uds } + +snocDictBind :: UsageDetails -> DictBind -> UsageDetails +snocDictBind uds bind = uds { ud_binds = ud_binds uds `snocBag` bind } + +wrapDictBinds :: Bag DictBind -> [CoreBind] -> [CoreBind] +wrapDictBinds dbs binds + = foldr add binds dbs + where + add (bind,_) binds = bind : binds + +wrapDictBindsE :: Bag DictBind -> CoreExpr -> CoreExpr +wrapDictBindsE dbs expr + = foldr add expr dbs + where + add (bind,_) expr = Let bind expr + +---------------------- +dumpUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, Bag DictBind) +-- Used at a lambda or case binder; just dump anything mentioning the binder +dumpUDs bndrs uds@(MkUD { ud_binds = orig_dbs, ud_calls = orig_calls }) + | null bndrs = (uds, emptyBag) -- Common in case alternatives + | otherwise = -- pprTrace "dumpUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $ + (free_uds, dump_dbs) + where + free_uds = MkUD { ud_binds = free_dbs, ud_calls = free_calls } + bndr_set = mkVarSet bndrs + (free_dbs, dump_dbs, dump_set) = splitDictBinds orig_dbs bndr_set + free_calls = deleteCallsMentioning dump_set $ -- Drop calls mentioning bndr_set on the floor + deleteCallsFor bndrs orig_calls -- Discard calls for bndr_set; there should be + -- no calls for any of the dicts in dump_dbs + +dumpBindUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, Bag DictBind, Bool) +-- Used at a let(rec) binding. +-- We return a boolean indicating whether the binding itself is mentioned, +-- directly or indirectly, by any of the ud_calls; in that case we want to +-- float the binding itself; +-- See Note [Floated dictionary bindings] +dumpBindUDs bndrs (MkUD { ud_binds = orig_dbs, ud_calls = orig_calls }) + = -- pprTrace "dumpBindUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $ + (free_uds, dump_dbs, float_all) + where + free_uds = MkUD { ud_binds = free_dbs, ud_calls = free_calls } + bndr_set = mkVarSet bndrs + (free_dbs, dump_dbs, dump_set) = splitDictBinds orig_dbs bndr_set + free_calls = deleteCallsFor bndrs orig_calls + float_all = dump_set `intersectsVarSet` callDetailsFVs free_calls + +callsForMe :: Id -> UsageDetails -> (UsageDetails, [CallInfo]) +callsForMe fn (MkUD { ud_binds = orig_dbs, ud_calls = orig_calls }) + = -- pprTrace ("callsForMe") + -- (vcat [ppr fn, + -- text "Orig dbs =" <+> ppr (_dictBindBndrs orig_dbs), + -- text "Orig calls =" <+> ppr orig_calls, + -- text "Dep set =" <+> ppr dep_set, + -- text "Calls for me =" <+> ppr calls_for_me]) $ + (uds_without_me, calls_for_me) + where + uds_without_me = MkUD { ud_binds = orig_dbs + , ud_calls = delDVarEnv orig_calls fn } + calls_for_me = case lookupDVarEnv orig_calls fn of + Nothing -> [] + Just cis -> filterCalls cis orig_dbs + -- filterCalls: drop calls that (directly or indirectly) + -- refer to fn. See Note [Avoiding loops] + +---------------------- +filterCalls :: CallInfoSet -> Bag DictBind -> [CallInfo] +-- See Note [Avoiding loops] +filterCalls (CIS fn call_bag) dbs + = filter ok_call (bagToList call_bag) + where + dump_set = foldl' go (unitVarSet fn) dbs + -- This dump-set could also be computed by splitDictBinds + -- (_,_,dump_set) = splitDictBinds dbs {fn} + -- But this variant is shorter + + go so_far (db,fvs) | fvs `intersectsVarSet` so_far + = extendVarSetList so_far (bindersOf db) + | otherwise = so_far + + ok_call (CI { ci_fvs = fvs }) = not (fvs `intersectsVarSet` dump_set) + +---------------------- +splitDictBinds :: Bag DictBind -> IdSet -> (Bag DictBind, Bag DictBind, IdSet) +-- splitDictBinds dbs bndrs returns +-- (free_dbs, dump_dbs, dump_set) +-- where +-- * dump_dbs depends, transitively on bndrs +-- * free_dbs does not depend on bndrs +-- * dump_set = bndrs `union` bndrs(dump_dbs) +splitDictBinds dbs bndr_set + = foldl' split_db (emptyBag, emptyBag, bndr_set) dbs + -- Important that it's foldl' not foldr; + -- we're accumulating the set of dumped ids in dump_set + where + split_db (free_dbs, dump_dbs, dump_idset) db@(bind, fvs) + | dump_idset `intersectsVarSet` fvs -- Dump it + = (free_dbs, dump_dbs `snocBag` db, + extendVarSetList dump_idset (bindersOf bind)) + + | otherwise -- Don't dump it + = (free_dbs `snocBag` db, dump_dbs, dump_idset) + + +---------------------- +deleteCallsMentioning :: VarSet -> CallDetails -> CallDetails +-- Remove calls *mentioning* bs in any way +deleteCallsMentioning bs calls + = mapDVarEnv (ciSetFilter keep_call) calls + where + keep_call (CI { ci_fvs = fvs }) = not (fvs `intersectsVarSet` bs) + +deleteCallsFor :: [Id] -> CallDetails -> CallDetails +-- Remove calls *for* bs +deleteCallsFor bs calls = delDVarEnvList calls bs + +{- +************************************************************************ +* * +\subsubsection{Boring helper functions} +* * +************************************************************************ +-} + +newtype SpecM a = SpecM (State SpecState a) deriving (Functor) + +data SpecState = SpecState { + spec_uniq_supply :: UniqSupply, + spec_module :: Module, + spec_dflags :: DynFlags + } + +instance Applicative SpecM where + pure x = SpecM $ return x + (<*>) = ap + +instance Monad SpecM where + SpecM x >>= f = SpecM $ do y <- x + case f y of + SpecM z -> + z +#if !MIN_VERSION_base(4,13,0) + fail = MonadFail.fail +#endif + +instance MonadFail.MonadFail SpecM where + fail str = SpecM $ error str + +instance MonadUnique SpecM where + getUniqueSupplyM + = SpecM $ do st <- get + let (us1, us2) = splitUniqSupply $ spec_uniq_supply st + put $ st { spec_uniq_supply = us2 } + return us1 + + getUniqueM + = SpecM $ do st <- get + let (u,us') = takeUniqFromSupply $ spec_uniq_supply st + put $ st { spec_uniq_supply = us' } + return u + +instance HasDynFlags SpecM where + getDynFlags = SpecM $ liftM spec_dflags get + +instance HasModule SpecM where + getModule = SpecM $ liftM spec_module get + +runSpecM :: DynFlags -> Module -> SpecM a -> CoreM a +runSpecM dflags this_mod (SpecM spec) + = do us <- getUniqueSupplyM + let initialState = SpecState { + spec_uniq_supply = us, + spec_module = this_mod, + spec_dflags = dflags + } + return $ evalState spec initialState + +mapAndCombineSM :: (a -> SpecM (b, UsageDetails)) -> [a] -> SpecM ([b], UsageDetails) +mapAndCombineSM _ [] = return ([], emptyUDs) +mapAndCombineSM f (x:xs) = do (y, uds1) <- f x + (ys, uds2) <- mapAndCombineSM f xs + return (y:ys, uds1 `plusUDs` uds2) + +extendTvSubstList :: SpecEnv -> [(TyVar,Type)] -> SpecEnv +extendTvSubstList env tv_binds + = env { se_subst = GHC.Core.Subst.extendTvSubstList (se_subst env) tv_binds } + +substTy :: SpecEnv -> Type -> Type +substTy env ty = GHC.Core.Subst.substTy (se_subst env) ty + +substCo :: SpecEnv -> Coercion -> Coercion +substCo env co = GHC.Core.Subst.substCo (se_subst env) co + +substBndr :: SpecEnv -> CoreBndr -> (SpecEnv, CoreBndr) +substBndr env bs = case GHC.Core.Subst.substBndr (se_subst env) bs of + (subst', bs') -> (env { se_subst = subst' }, bs') + +substBndrs :: SpecEnv -> [CoreBndr] -> (SpecEnv, [CoreBndr]) +substBndrs env bs = case GHC.Core.Subst.substBndrs (se_subst env) bs of + (subst', bs') -> (env { se_subst = subst' }, bs') + +cloneBindSM :: SpecEnv -> CoreBind -> SpecM (SpecEnv, SpecEnv, CoreBind) +-- Clone the binders of the bind; return new bind with the cloned binders +-- Return the substitution to use for RHSs, and the one to use for the body +cloneBindSM env@(SE { se_subst = subst, se_interesting = interesting }) (NonRec bndr rhs) + = do { us <- getUniqueSupplyM + ; let (subst', bndr') = GHC.Core.Subst.cloneIdBndr subst us bndr + interesting' | interestingDict env rhs + = interesting `extendVarSet` bndr' + | otherwise = interesting + ; return (env, env { se_subst = subst', se_interesting = interesting' } + , NonRec bndr' rhs) } + +cloneBindSM env@(SE { se_subst = subst, se_interesting = interesting }) (Rec pairs) + = do { us <- getUniqueSupplyM + ; let (subst', bndrs') = GHC.Core.Subst.cloneRecIdBndrs subst us (map fst pairs) + env' = env { se_subst = subst' + , se_interesting = interesting `extendVarSetList` + [ v | (v,r) <- pairs, interestingDict env r ] } + ; return (env', env', Rec (bndrs' `zip` map snd pairs)) } + +newDictBndr :: SpecEnv -> CoreBndr -> SpecM CoreBndr +-- Make up completely fresh binders for the dictionaries +-- Their bindings are going to float outwards +newDictBndr env b = do { uniq <- getUniqueM + ; let n = idName b + ty' = substTy env (idType b) + ; return (mkUserLocal (nameOccName n) uniq ty' (getSrcSpan n)) } + +newSpecIdSM :: Id -> Type -> Maybe JoinArity -> SpecM Id + -- Give the new Id a similar occurrence name to the old one +newSpecIdSM old_id new_ty join_arity_maybe + = do { uniq <- getUniqueM + ; let name = idName old_id + new_occ = mkSpecOcc (nameOccName name) + new_id = mkUserLocal new_occ uniq new_ty (getSrcSpan name) + `asJoinId_maybe` join_arity_maybe + ; return new_id } + +{- + Old (but interesting) stuff about unboxed bindings + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +What should we do when a value is specialised to a *strict* unboxed value? + + map_*_* f (x:xs) = let h = f x + t = map f xs + in h:t + +Could convert let to case: + + map_*_Int# f (x:xs) = case f x of h# -> + let t = map f xs + in h#:t + +This may be undesirable since it forces evaluation here, but the value +may not be used in all branches of the body. In the general case this +transformation is impossible since the mutual recursion in a letrec +cannot be expressed as a case. + +There is also a problem with top-level unboxed values, since our +implementation cannot handle unboxed values at the top level. + +Solution: Lift the binding of the unboxed value and extract it when it +is used: + + map_*_Int# f (x:xs) = let h = case (f x) of h# -> _Lift h# + t = map f xs + in case h of + _Lift h# -> h#:t + +Now give it to the simplifier and the _Lifting will be optimised away. + +The benefit is that we have given the specialised "unboxed" values a +very simple lifted semantics and then leave it up to the simplifier to +optimise it --- knowing that the overheads will be removed in nearly +all cases. + +In particular, the value will only be evaluated in the branches of the +program which use it, rather than being forced at the point where the +value is bound. For example: + + filtermap_*_* p f (x:xs) + = let h = f x + t = ... + in case p x of + True -> h:t + False -> t + ==> + filtermap_*_Int# p f (x:xs) + = let h = case (f x) of h# -> _Lift h# + t = ... + in case p x of + True -> case h of _Lift h# + -> h#:t + False -> t + +The binding for h can still be inlined in the one branch and the +_Lifting eliminated. + + +Question: When won't the _Lifting be eliminated? + +Answer: When they at the top-level (where it is necessary) or when +inlining would duplicate work (or possibly code depending on +options). However, the _Lifting will still be eliminated if the +strictness analyser deems the lifted binding strict. +-} diff --git a/compiler/GHC/Core/Op/StaticArgs.hs b/compiler/GHC/Core/Op/StaticArgs.hs new file mode 100644 index 0000000000..e550fabfd9 --- /dev/null +++ b/compiler/GHC/Core/Op/StaticArgs.hs @@ -0,0 +1,433 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + + +************************************************************************ + + Static Argument Transformation pass + +************************************************************************ + +May be seen as removing invariants from loops: +Arguments of recursive functions that do not change in recursive +calls are removed from the recursion, which is done locally +and only passes the arguments which effectively change. + +Example: +map = /\ ab -> \f -> \xs -> case xs of + [] -> [] + (a:b) -> f a : map f b + +as map is recursively called with the same argument f (unmodified) +we transform it to + +map = /\ ab -> \f -> \xs -> let map' ys = case ys of + [] -> [] + (a:b) -> f a : map' b + in map' xs + +Notice that for a compiler that uses lambda lifting this is +useless as map' will be transformed back to what map was. + +We could possibly do the same for big lambdas, but we don't as +they will eventually be removed in later stages of the compiler, +therefore there is no penalty in keeping them. + +We only apply the SAT when the number of static args is > 2. This +produces few bad cases. See + should_transform +in saTransform. + +Here are the headline nofib results: + Size Allocs Runtime +Min +0.0% -13.7% -21.4% +Max +0.1% +0.0% +5.4% +Geometric Mean +0.0% -0.2% -6.9% + +The previous patch, to fix polymorphic floatout demand signatures, is +essential to make this work well! +-} + +{-# LANGUAGE CPP #-} +module GHC.Core.Op.StaticArgs ( doStaticArgs ) where + +import GhcPrelude + +import Var +import GHC.Core +import GHC.Core.Utils +import GHC.Core.Type +import GHC.Core.Coercion +import Id +import Name +import VarEnv +import UniqSupply +import Util +import UniqFM +import VarSet +import Unique +import UniqSet +import Outputable + +import Data.List (mapAccumL) +import FastString + +#include "HsVersions.h" + +doStaticArgs :: UniqSupply -> CoreProgram -> CoreProgram +doStaticArgs us binds = snd $ mapAccumL sat_bind_threaded_us us binds + where + sat_bind_threaded_us us bind = + let (us1, us2) = splitUniqSupply us + in (us1, fst $ runSAT us2 (satBind bind emptyUniqSet)) + +-- We don't bother to SAT recursive groups since it can lead +-- to massive code expansion: see Andre Santos' thesis for details. +-- This means we only apply the actual SAT to Rec groups of one element, +-- but we want to recurse into the others anyway to discover other binds +satBind :: CoreBind -> IdSet -> SatM (CoreBind, IdSATInfo) +satBind (NonRec binder expr) interesting_ids = do + (expr', sat_info_expr, expr_app) <- satExpr expr interesting_ids + return (NonRec binder expr', finalizeApp expr_app sat_info_expr) +satBind (Rec [(binder, rhs)]) interesting_ids = do + let interesting_ids' = interesting_ids `addOneToUniqSet` binder + (rhs_binders, rhs_body) = collectBinders rhs + (rhs_body', sat_info_rhs_body) <- satTopLevelExpr rhs_body interesting_ids' + let sat_info_rhs_from_args = unitVarEnv binder (bindersToSATInfo rhs_binders) + sat_info_rhs' = mergeIdSATInfo sat_info_rhs_from_args sat_info_rhs_body + + shadowing = binder `elementOfUniqSet` interesting_ids + sat_info_rhs'' = if shadowing + then sat_info_rhs' `delFromUFM` binder -- For safety + else sat_info_rhs' + + bind' <- saTransformMaybe binder (lookupUFM sat_info_rhs' binder) + rhs_binders rhs_body' + return (bind', sat_info_rhs'') +satBind (Rec pairs) interesting_ids = do + let (binders, rhss) = unzip pairs + rhss_SATed <- mapM (\e -> satTopLevelExpr e interesting_ids) rhss + let (rhss', sat_info_rhss') = unzip rhss_SATed + return (Rec (zipEqual "satBind" binders rhss'), mergeIdSATInfos sat_info_rhss') + +data App = VarApp Id | TypeApp Type | CoApp Coercion +data Staticness a = Static a | NotStatic + +type IdAppInfo = (Id, SATInfo) + +type SATInfo = [Staticness App] +type IdSATInfo = IdEnv SATInfo +emptyIdSATInfo :: IdSATInfo +emptyIdSATInfo = emptyUFM + +{- +pprIdSATInfo id_sat_info = vcat (map pprIdAndSATInfo (Map.toList id_sat_info)) + where pprIdAndSATInfo (v, sat_info) = hang (ppr v <> colon) 4 (pprSATInfo sat_info) +-} + +pprSATInfo :: SATInfo -> SDoc +pprSATInfo staticness = hcat $ map pprStaticness staticness + +pprStaticness :: Staticness App -> SDoc +pprStaticness (Static (VarApp _)) = text "SV" +pprStaticness (Static (TypeApp _)) = text "ST" +pprStaticness (Static (CoApp _)) = text "SC" +pprStaticness NotStatic = text "NS" + + +mergeSATInfo :: SATInfo -> SATInfo -> SATInfo +mergeSATInfo l r = zipWith mergeSA l r + where + mergeSA NotStatic _ = NotStatic + mergeSA _ NotStatic = NotStatic + mergeSA (Static (VarApp v)) (Static (VarApp v')) + | v == v' = Static (VarApp v) + | otherwise = NotStatic + mergeSA (Static (TypeApp t)) (Static (TypeApp t')) + | t `eqType` t' = Static (TypeApp t) + | otherwise = NotStatic + mergeSA (Static (CoApp c)) (Static (CoApp c')) + | c `eqCoercion` c' = Static (CoApp c) + | otherwise = NotStatic + mergeSA _ _ = pprPanic "mergeSATInfo" $ + text "Left:" + <> pprSATInfo l <> text ", " + <> text "Right:" + <> pprSATInfo r + +mergeIdSATInfo :: IdSATInfo -> IdSATInfo -> IdSATInfo +mergeIdSATInfo = plusUFM_C mergeSATInfo + +mergeIdSATInfos :: [IdSATInfo] -> IdSATInfo +mergeIdSATInfos = foldl' mergeIdSATInfo emptyIdSATInfo + +bindersToSATInfo :: [Id] -> SATInfo +bindersToSATInfo vs = map (Static . binderToApp) vs + where binderToApp v | isId v = VarApp v + | isTyVar v = TypeApp $ mkTyVarTy v + | otherwise = CoApp $ mkCoVarCo v + +finalizeApp :: Maybe IdAppInfo -> IdSATInfo -> IdSATInfo +finalizeApp Nothing id_sat_info = id_sat_info +finalizeApp (Just (v, sat_info')) id_sat_info = + let sat_info'' = case lookupUFM id_sat_info v of + Nothing -> sat_info' + Just sat_info -> mergeSATInfo sat_info sat_info' + in extendVarEnv id_sat_info v sat_info'' + +satTopLevelExpr :: CoreExpr -> IdSet -> SatM (CoreExpr, IdSATInfo) +satTopLevelExpr expr interesting_ids = do + (expr', sat_info_expr, expr_app) <- satExpr expr interesting_ids + return (expr', finalizeApp expr_app sat_info_expr) + +satExpr :: CoreExpr -> IdSet -> SatM (CoreExpr, IdSATInfo, Maybe IdAppInfo) +satExpr var@(Var v) interesting_ids = do + let app_info = if v `elementOfUniqSet` interesting_ids + then Just (v, []) + else Nothing + return (var, emptyIdSATInfo, app_info) + +satExpr lit@(Lit _) _ = do + return (lit, emptyIdSATInfo, Nothing) + +satExpr (Lam binders body) interesting_ids = do + (body', sat_info, this_app) <- satExpr body interesting_ids + return (Lam binders body', finalizeApp this_app sat_info, Nothing) + +satExpr (App fn arg) interesting_ids = do + (fn', sat_info_fn, fn_app) <- satExpr fn interesting_ids + let satRemainder = boring fn' sat_info_fn + case fn_app of + Nothing -> satRemainder Nothing + Just (fn_id, fn_app_info) -> + -- TODO: remove this use of append somehow (use a data structure with O(1) append but a left-to-right kind of interface) + let satRemainderWithStaticness arg_staticness = satRemainder $ Just (fn_id, fn_app_info ++ [arg_staticness]) + in case arg of + Type t -> satRemainderWithStaticness $ Static (TypeApp t) + Coercion c -> satRemainderWithStaticness $ Static (CoApp c) + Var v -> satRemainderWithStaticness $ Static (VarApp v) + _ -> satRemainderWithStaticness $ NotStatic + where + boring :: CoreExpr -> IdSATInfo -> Maybe IdAppInfo -> SatM (CoreExpr, IdSATInfo, Maybe IdAppInfo) + boring fn' sat_info_fn app_info = + do (arg', sat_info_arg, arg_app) <- satExpr arg interesting_ids + let sat_info_arg' = finalizeApp arg_app sat_info_arg + sat_info = mergeIdSATInfo sat_info_fn sat_info_arg' + return (App fn' arg', sat_info, app_info) + +satExpr (Case expr bndr ty alts) interesting_ids = do + (expr', sat_info_expr, expr_app) <- satExpr expr interesting_ids + let sat_info_expr' = finalizeApp expr_app sat_info_expr + + zipped_alts' <- mapM satAlt alts + let (alts', sat_infos_alts) = unzip zipped_alts' + return (Case expr' bndr ty alts', mergeIdSATInfo sat_info_expr' (mergeIdSATInfos sat_infos_alts), Nothing) + where + satAlt (con, bndrs, expr) = do + (expr', sat_info_expr) <- satTopLevelExpr expr interesting_ids + return ((con, bndrs, expr'), sat_info_expr) + +satExpr (Let bind body) interesting_ids = do + (body', sat_info_body, body_app) <- satExpr body interesting_ids + (bind', sat_info_bind) <- satBind bind interesting_ids + return (Let bind' body', mergeIdSATInfo sat_info_body sat_info_bind, body_app) + +satExpr (Tick tickish expr) interesting_ids = do + (expr', sat_info_expr, expr_app) <- satExpr expr interesting_ids + return (Tick tickish expr', sat_info_expr, expr_app) + +satExpr ty@(Type _) _ = do + return (ty, emptyIdSATInfo, Nothing) + +satExpr co@(Coercion _) _ = do + return (co, emptyIdSATInfo, Nothing) + +satExpr (Cast expr coercion) interesting_ids = do + (expr', sat_info_expr, expr_app) <- satExpr expr interesting_ids + return (Cast expr' coercion, sat_info_expr, expr_app) + +{- +************************************************************************ + + Static Argument Transformation Monad + +************************************************************************ +-} + +type SatM result = UniqSM result + +runSAT :: UniqSupply -> SatM a -> a +runSAT = initUs_ + +newUnique :: SatM Unique +newUnique = getUniqueM + +{- +************************************************************************ + + Static Argument Transformation Monad + +************************************************************************ + +To do the transformation, the game plan is to: + +1. Create a small nonrecursive RHS that takes the + original arguments to the function but discards + the ones that are static and makes a call to the + SATed version with the remainder. We intend that + this will be inlined later, removing the overhead + +2. Bind this nonrecursive RHS over the original body + WITH THE SAME UNIQUE as the original body so that + any recursive calls to the original now go via + the small wrapper + +3. Rebind the original function to a new one which contains + our SATed function and just makes a call to it: + we call the thing making this call the local body + +Example: transform this + + map :: forall a b. (a->b) -> [a] -> [b] + map = /\ab. \(f:a->b) (as:[a]) -> body[map] +to + map :: forall a b. (a->b) -> [a] -> [b] + map = /\ab. \(f:a->b) (as:[a]) -> + letrec map' :: [a] -> [b] + -- The "worker function + map' = \(as:[a]) -> + let map :: forall a' b'. (a -> b) -> [a] -> [b] + -- The "shadow function + map = /\a'b'. \(f':(a->b) (as:[a]). + map' as + in body[map] + in map' as + +Note [Shadow binding] +~~~~~~~~~~~~~~~~~~~~~ +The calls to the inner map inside body[map] should get inlined +by the local re-binding of 'map'. We call this the "shadow binding". + +But we can't use the original binder 'map' unchanged, because +it might be exported, in which case the shadow binding won't be +discarded as dead code after it is inlined. + +So we use a hack: we make a new SysLocal binder with the *same* unique +as binder. (Another alternative would be to reset the export flag.) + +Note [Binder type capture] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Notice that in the inner map (the "shadow function"), the static arguments +are discarded -- it's as if they were underscores. Instead, mentions +of these arguments (notably in the types of dynamic arguments) are bound +by the *outer* lambdas of the main function. So we must make up fresh +names for the static arguments so that they do not capture variables +mentioned in the types of dynamic args. + +In the map example, the shadow function must clone the static type +argument a,b, giving a',b', to ensure that in the \(as:[a]), the 'a' +is bound by the outer forall. We clone f' too for consistency, but +that doesn't matter either way because static Id arguments aren't +mentioned in the shadow binding at all. + +If we don't we get something like this: + +[Exported] +[Arity 3] +GHC.Base.until = + \ (@ a_aiK) + (p_a6T :: a_aiK -> GHC.Types.Bool) + (f_a6V :: a_aiK -> a_aiK) + (x_a6X :: a_aiK) -> + letrec { + sat_worker_s1aU :: a_aiK -> a_aiK + [] + sat_worker_s1aU = + \ (x_a6X :: a_aiK) -> + let { + sat_shadow_r17 :: forall a_a3O. + (a_a3O -> GHC.Types.Bool) -> (a_a3O -> a_a3O) -> a_a3O -> a_a3O + [] + sat_shadow_r17 = + \ (@ a_aiK) + (p_a6T :: a_aiK -> GHC.Types.Bool) + (f_a6V :: a_aiK -> a_aiK) + (x_a6X :: a_aiK) -> + sat_worker_s1aU x_a6X } in + case p_a6T x_a6X of wild_X3y [ALWAYS Dead Nothing] { + GHC.Types.False -> GHC.Base.until @ a_aiK p_a6T f_a6V (f_a6V x_a6X); + GHC.Types.True -> x_a6X + }; } in + sat_worker_s1aU x_a6X + +Where sat_shadow has captured the type variables of x_a6X etc as it has a a_aiK +type argument. This is bad because it means the application sat_worker_s1aU x_a6X +is not well typed. +-} + +saTransformMaybe :: Id -> Maybe SATInfo -> [Id] -> CoreExpr -> SatM CoreBind +saTransformMaybe binder maybe_arg_staticness rhs_binders rhs_body + | Just arg_staticness <- maybe_arg_staticness + , should_transform arg_staticness + = saTransform binder arg_staticness rhs_binders rhs_body + | otherwise + = return (Rec [(binder, mkLams rhs_binders rhs_body)]) + where + should_transform staticness = n_static_args > 1 -- THIS IS THE DECISION POINT + where + n_static_args = count isStaticValue staticness + +saTransform :: Id -> SATInfo -> [Id] -> CoreExpr -> SatM CoreBind +saTransform binder arg_staticness rhs_binders rhs_body + = do { shadow_lam_bndrs <- mapM clone binders_w_staticness + ; uniq <- newUnique + ; return (NonRec binder (mk_new_rhs uniq shadow_lam_bndrs)) } + where + -- Running example: foldr + -- foldr \alpha \beta c n xs = e, for some e + -- arg_staticness = [Static TypeApp, Static TypeApp, Static VarApp, Static VarApp, NonStatic] + -- rhs_binders = [\alpha, \beta, c, n, xs] + -- rhs_body = e + + binders_w_staticness = rhs_binders `zip` (arg_staticness ++ repeat NotStatic) + -- Any extra args are assumed NotStatic + + non_static_args :: [Var] + -- non_static_args = [xs] + -- rhs_binders_without_type_capture = [\alpha', \beta', c, n, xs] + non_static_args = [v | (v, NotStatic) <- binders_w_staticness] + + clone (bndr, NotStatic) = return bndr + clone (bndr, _ ) = do { uniq <- newUnique + ; return (setVarUnique bndr uniq) } + + -- new_rhs = \alpha beta c n xs -> + -- let sat_worker = \xs -> let sat_shadow = \alpha' beta' c n xs -> + -- sat_worker xs + -- in e + -- in sat_worker xs + mk_new_rhs uniq shadow_lam_bndrs + = mkLams rhs_binders $ + Let (Rec [(rec_body_bndr, rec_body)]) + local_body + where + local_body = mkVarApps (Var rec_body_bndr) non_static_args + + rec_body = mkLams non_static_args $ + Let (NonRec shadow_bndr shadow_rhs) rhs_body + + -- See Note [Binder type capture] + shadow_rhs = mkLams shadow_lam_bndrs local_body + -- nonrec_rhs = \alpha' beta' c n xs -> sat_worker xs + + rec_body_bndr = mkSysLocal (fsLit "sat_worker") uniq (exprType rec_body) + -- rec_body_bndr = sat_worker + + -- See Note [Shadow binding]; make a SysLocal + shadow_bndr = mkSysLocal (occNameFS (getOccName binder)) + (idUnique binder) + (exprType shadow_rhs) + +isStaticValue :: Staticness App -> Bool +isStaticValue (Static (VarApp _)) = True +isStaticValue _ = False diff --git a/compiler/GHC/Core/Op/Tidy.hs b/compiler/GHC/Core/Op/Tidy.hs index 60db2c8fea..758c1daf6c 100644 --- a/compiler/GHC/Core/Op/Tidy.hs +++ b/compiler/GHC/Core/Op/Tidy.hs @@ -191,7 +191,7 @@ tidyLetBndr rec_tidy_env env@(tidy_env, var_env) id -- Similarly for the demand info - on a let binder, this tells -- CorePrep to turn the let into a case. -- But: Remove the usage demand here - -- (See Note [Zapping DmdEnv after Demand Analyzer] in WorkWrap) + -- (See Note [Zapping DmdEnv after Demand Analyzer] in GHC.Core.Op.WorkWrap) -- -- Similarly arity info for eta expansion in CorePrep -- Don't attempt to recompute arity here; this is just tidying! diff --git a/compiler/GHC/Core/Op/WorkWrap.hs b/compiler/GHC/Core/Op/WorkWrap.hs new file mode 100644 index 0000000000..241a295899 --- /dev/null +++ b/compiler/GHC/Core/Op/WorkWrap.hs @@ -0,0 +1,776 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1993-1998 + +\section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser} +-} + +{-# LANGUAGE CPP #-} +module GHC.Core.Op.WorkWrap ( wwTopBinds ) where + +import GhcPrelude + +import GHC.Core.Arity ( manifestArity ) +import GHC.Core +import GHC.Core.Unfold ( certainlyWillInline, mkWwInlineRule, mkWorkerUnfolding ) +import GHC.Core.Utils ( exprType, exprIsHNF ) +import GHC.Core.FVs ( exprFreeVars ) +import Var +import Id +import IdInfo +import GHC.Core.Type +import UniqSupply +import BasicTypes +import GHC.Driver.Session +import Demand +import Cpr +import GHC.Core.Op.WorkWrap.Lib +import Util +import Outputable +import GHC.Core.FamInstEnv +import MonadUtils + +#include "HsVersions.h" + +{- +We take Core bindings whose binders have: + +\begin{enumerate} + +\item Strictness attached (by the front-end of the strictness +analyser), and / or + +\item Constructed Product Result information attached by the CPR +analysis pass. + +\end{enumerate} + +and we return some ``plain'' bindings which have been +worker/wrapper-ified, meaning: + +\begin{enumerate} + +\item Functions have been split into workers and wrappers where +appropriate. If a function has both strictness and CPR properties +then only one worker/wrapper doing both transformations is produced; + +\item Binders' @IdInfos@ have been updated to reflect the existence of +these workers/wrappers (this is where we get STRICTNESS and CPR pragma +info for exported values). +\end{enumerate} +-} + +wwTopBinds :: DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram + +wwTopBinds dflags fam_envs us top_binds + = initUs_ us $ do + top_binds' <- mapM (wwBind dflags fam_envs) top_binds + return (concat top_binds') + +{- +************************************************************************ +* * +\subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@} +* * +************************************************************************ + +@wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in +turn. Non-recursive case first, then recursive... +-} + +wwBind :: DynFlags + -> FamInstEnvs + -> CoreBind + -> UniqSM [CoreBind] -- returns a WwBinding intermediate form; + -- the caller will convert to Expr/Binding, + -- as appropriate. + +wwBind dflags fam_envs (NonRec binder rhs) = do + new_rhs <- wwExpr dflags fam_envs rhs + new_pairs <- tryWW dflags fam_envs NonRecursive binder new_rhs + return [NonRec b e | (b,e) <- new_pairs] + -- Generated bindings must be non-recursive + -- because the original binding was. + +wwBind dflags fam_envs (Rec pairs) + = return . Rec <$> concatMapM do_one pairs + where + do_one (binder, rhs) = do new_rhs <- wwExpr dflags fam_envs rhs + tryWW dflags fam_envs Recursive binder new_rhs + +{- +@wwExpr@ basically just walks the tree, looking for appropriate +annotations that can be used. Remember it is @wwBind@ that does the +matching by looking for strict arguments of the correct type. +@wwExpr@ is a version that just returns the ``Plain'' Tree. +-} + +wwExpr :: DynFlags -> FamInstEnvs -> CoreExpr -> UniqSM CoreExpr + +wwExpr _ _ e@(Type {}) = return e +wwExpr _ _ e@(Coercion {}) = return e +wwExpr _ _ e@(Lit {}) = return e +wwExpr _ _ e@(Var {}) = return e + +wwExpr dflags fam_envs (Lam binder expr) + = Lam new_binder <$> wwExpr dflags fam_envs expr + where new_binder | isId binder = zapIdUsedOnceInfo binder + | otherwise = binder + -- See Note [Zapping Used Once info in WorkWrap] + +wwExpr dflags fam_envs (App f a) + = App <$> wwExpr dflags fam_envs f <*> wwExpr dflags fam_envs a + +wwExpr dflags fam_envs (Tick note expr) + = Tick note <$> wwExpr dflags fam_envs expr + +wwExpr dflags fam_envs (Cast expr co) = do + new_expr <- wwExpr dflags fam_envs expr + return (Cast new_expr co) + +wwExpr dflags fam_envs (Let bind expr) + = mkLets <$> wwBind dflags fam_envs bind <*> wwExpr dflags fam_envs expr + +wwExpr dflags fam_envs (Case expr binder ty alts) = do + new_expr <- wwExpr dflags fam_envs expr + new_alts <- mapM ww_alt alts + let new_binder = zapIdUsedOnceInfo binder + -- See Note [Zapping Used Once info in WorkWrap] + return (Case new_expr new_binder ty new_alts) + where + ww_alt (con, binders, rhs) = do + new_rhs <- wwExpr dflags fam_envs rhs + let new_binders = [ if isId b then zapIdUsedOnceInfo b else b + | b <- binders ] + -- See Note [Zapping Used Once info in WorkWrap] + return (con, new_binders, new_rhs) + +{- +************************************************************************ +* * +\subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair} +* * +************************************************************************ + +@tryWW@ just accumulates arguments, converts strictness info from the +front-end into the proper form, then calls @mkWwBodies@ to do +the business. + +The only reason this is monadised is for the unique supply. + +Note [Don't w/w INLINE things] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's very important to refrain from w/w-ing an INLINE function (ie one +with a stable unfolding) because the wrapper will then overwrite the +old stable unfolding with the wrapper code. + +Furthermore, if the programmer has marked something as INLINE, +we may lose by w/w'ing it. + +If the strictness analyser is run twice, this test also prevents +wrappers (which are INLINEd) from being re-done. (You can end up with +several liked-named Ids bouncing around at the same time---absolute +mischief.) + +Notice that we refrain from w/w'ing an INLINE function even if it is +in a recursive group. It might not be the loop breaker. (We could +test for loop-breaker-hood, but I'm not sure that ever matters.) + +Note [Worker-wrapper for INLINABLE functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + {-# INLINABLE f #-} + f :: Ord a => [a] -> Int -> a + f x y = ....f.... + +where f is strict in y, we might get a more efficient loop by w/w'ing +f. But that would make a new unfolding which would overwrite the old +one! So the function would no longer be INLNABLE, and in particular +will not be specialised at call sites in other modules. + +This comes in practice (#6056). + +Solution: do the w/w for strictness analysis, but transfer the Stable +unfolding to the *worker*. So we will get something like this: + + {-# INLINE[0] f #-} + f :: Ord a => [a] -> Int -> a + f d x y = case y of I# y' -> fw d x y' + + {-# INLINABLE[0] fw #-} + fw :: Ord a => [a] -> Int# -> a + fw d x y' = let y = I# y' in ...f... + +How do we "transfer the unfolding"? Easy: by using the old one, wrapped +in work_fn! See GHC.Core.Unfold.mkWorkerUnfolding. + +Note [Worker-wrapper for NOINLINE functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We used to disable worker/wrapper for NOINLINE things, but it turns out +this can cause unnecessary reboxing of values. Consider + + {-# NOINLINE f #-} + f :: Int -> a + f x = error (show x) + + g :: Bool -> Bool -> Int -> Int + g True True p = f p + g False True p = p + 1 + g b False p = g b True p + +the strictness analysis will discover f and g are strict, but because f +has no wrapper, the worker for g will rebox p. So we get + + $wg x y p# = + let p = I# p# in -- Yikes! Reboxing! + case x of + False -> + case y of + False -> $wg False True p# + True -> +# p# 1# + True -> + case y of + False -> $wg True True p# + True -> case f p of { } + + g x y p = case p of (I# p#) -> $wg x y p# + +Now, in this case the reboxing will float into the True branch, and so +the allocation will only happen on the error path. But it won't float +inwards if there are multiple branches that call (f p), so the reboxing +will happen on every call of g. Disaster. + +Solution: do worker/wrapper even on NOINLINE things; but move the +NOINLINE pragma to the worker. + +(See #13143 for a real-world example.) + +It is crucial that we do this for *all* NOINLINE functions. #10069 +demonstrates what happens when we promise to w/w a (NOINLINE) leaf function, but +fail to deliver: + + data C = C Int# Int# + + {-# NOINLINE c1 #-} + c1 :: C -> Int# + c1 (C _ n) = n + + {-# NOINLINE fc #-} + fc :: C -> Int# + fc c = 2 *# c1 c + +Failing to w/w `c1`, but still w/wing `fc` leads to the following code: + + c1 :: C -> Int# + c1 (C _ n) = n + + $wfc :: Int# -> Int# + $wfc n = let c = C 0# n in 2 #* c1 c + + fc :: C -> Int# + fc (C _ n) = $wfc n + +Yikes! The reboxed `C` in `$wfc` can't cancel out, so we are in a bad place. +This generalises to any function that derives its strictness signature from +its callees, so we have to make sure that when a function announces particular +strictness properties, we have to w/w them accordingly, even if it means +splitting a NOINLINE function. + +Note [Worker activation] +~~~~~~~~~~~~~~~~~~~~~~~~ +Follows on from Note [Worker-wrapper for INLINABLE functions] + +It is *vital* that if the worker gets an INLINABLE pragma (from the +original function), then the worker has the same phase activation as +the wrapper (or later). That is necessary to allow the wrapper to +inline into the worker's unfolding: see GHC.Core.Op.Simplify.Utils +Note [Simplifying inside stable unfoldings]. + +If the original is NOINLINE, it's important that the work inherit the +original activation. Consider + + {-# NOINLINE expensive #-} + expensive x = x + 1 + + f y = let z = expensive y in ... + +If expensive's worker inherits the wrapper's activation, +we'll get this (because of the compromise in point (2) of +Note [Wrapper activation]) + + {-# NOINLINE[0] $wexpensive #-} + $wexpensive x = x + 1 + {-# INLINE[0] expensive #-} + expensive x = $wexpensive x + + f y = let z = expensive y in ... + +and $wexpensive will be immediately inlined into expensive, followed by +expensive into f. This effectively removes the original NOINLINE! + +Otherwise, nothing is lost by giving the worker the same activation as the +wrapper, because the worker won't have any chance of inlining until the +wrapper does; there's no point in giving it an earlier activation. + +Note [Don't w/w inline small non-loop-breaker things] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In general, we refrain from w/w-ing *small* functions, which are not +loop breakers, because they'll inline anyway. But we must take care: +it may look small now, but get to be big later after other inlining +has happened. So we take the precaution of adding an INLINE pragma to +any such functions. + +I made this change when I observed a big function at the end of +compilation with a useful strictness signature but no w-w. (It was +small during demand analysis, we refrained from w/w, and then got big +when something was inlined in its rhs.) When I measured it on nofib, +it didn't make much difference; just a few percent improved allocation +on one benchmark (bspt/Euclid.space). But nothing got worse. + +There is an infelicity though. We may get something like + f = g val +==> + g x = case gw x of r -> I# r + + f {- InlineStable, Template = g val -} + f = case gw x of r -> I# r + +The code for f duplicates that for g, without any real benefit. It +won't really be executed, because calls to f will go via the inlining. + +Note [Don't w/w join points for CPR] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There's no point in exploiting CPR info on a join point. If the whole function +is getting CPR'd, then the case expression around the worker function will get +pushed into the join point by the simplifier, which will have the same effect +that w/w'ing for CPR would have - the result will be returned in an unboxed +tuple. + + f z = let join j x y = (x+1, y+1) + in case z of A -> j 1 2 + B -> j 2 3 + + => + + f z = case $wf z of (# a, b #) -> (a, b) + $wf z = case (let join j x y = (x+1, y+1) + in case z of A -> j 1 2 + B -> j 2 3) of (a, b) -> (# a, b #) + + => + + f z = case $wf z of (# a, b #) -> (a, b) + $wf z = let join j x y = (# x+1, y+1 #) + in case z of A -> j 1 2 + B -> j 2 3 + +Note that we still want to give @j@ the CPR property, so that @f@ has it. So +CPR *analyse* join points as regular functions, but don't *transform* them. + +Doing W/W for returned products on a join point would be tricky anyway, as the +worker could not be a join point because it would not be tail-called. However, +doing the *argument* part of W/W still works for join points, since the wrapper +body will make a tail call: + + f z = let join j x y = x + y + in ... + + => + + f z = let join $wj x# y# = x# +# y# + j x y = case x of I# x# -> + case y of I# y# -> + $wj x# y# + in ... + +Note [Wrapper activation] +~~~~~~~~~~~~~~~~~~~~~~~~~ +When should the wrapper inlining be active? + +1. It must not be active earlier than the current Activation of the + Id + +2. It should be active at some point, despite (1) because of + Note [Worker-wrapper for NOINLINE functions] + +3. For ordinary functions with no pragmas we want to inline the + wrapper as early as possible (#15056). Suppose another module + defines f x = g x x + and suppose there is some RULE for (g True True). Then if we have + a call (f True), we'd expect to inline 'f' and the RULE will fire. + But if f is w/w'd (which it might be), we want the inlining to + occur just as if it hadn't been. + + (This only matters if f's RHS is big enough to w/w, but small + enough to inline given the call site, but that can happen.) + +4. We do not want to inline the wrapper before specialisation. + module Foo where + f :: Num a => a -> Int -> a + f n 0 = n -- Strict in the Int, hence wrapper + f n x = f (n+n) (x-1) + + g :: Int -> Int + g x = f x x -- Provokes a specialisation for f + + module Bar where + import Foo + + h :: Int -> Int + h x = f 3 x + + In module Bar we want to give specialisations a chance to fire + before inlining f's wrapper. + +Reminder: Note [Don't w/w INLINE things], so we don't need to worry + about INLINE things here. + +Conclusion: + - If the user said NOINLINE[n], respect that + - If the user said NOINLINE, inline the wrapper as late as + poss (phase 0). This is a compromise driven by (2) above + - Otherwise inline wrapper in phase 2. That allows the + 'gentle' simplification pass to apply specialisation rules + +Historical note: At one stage I tried making the wrapper inlining +always-active, and that had a very bad effect on nofib/imaginary/x2n1; +a wrapper was inlined before the specialisation fired. + +Note [Wrapper NoUserInline] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The use an inl_inline of NoUserInline on the wrapper distinguishes +this pragma from one that was given by the user. In particular, CSE +will not happen if there is a user-specified pragma, but should happen +for w/w’ed things (#14186). +-} + +tryWW :: DynFlags + -> FamInstEnvs + -> RecFlag + -> Id -- The fn binder + -> CoreExpr -- The bound rhs; its innards + -- are already ww'd + -> UniqSM [(Id, CoreExpr)] -- either *one* or *two* pairs; + -- if one, then no worker (only + -- the orig "wrapper" lives on); + -- if two, then a worker and a + -- wrapper. +tryWW dflags fam_envs is_rec fn_id rhs + -- See Note [Worker-wrapper for NOINLINE functions] + + | Just stable_unf <- certainlyWillInline dflags fn_info + = return [ (fn_id `setIdUnfolding` stable_unf, rhs) ] + -- See Note [Don't w/w INLINE things] + -- See Note [Don't w/w inline small non-loop-breaker things] + + | is_fun && is_eta_exp + = splitFun dflags fam_envs new_fn_id fn_info wrap_dmds div cpr rhs + + | is_thunk -- See Note [Thunk splitting] + = splitThunk dflags fam_envs is_rec new_fn_id rhs + + | otherwise + = return [ (new_fn_id, rhs) ] + + where + fn_info = idInfo fn_id + (wrap_dmds, div) = splitStrictSig (strictnessInfo fn_info) + + cpr_ty = getCprSig (cprInfo fn_info) + -- Arity of the CPR sig should match idArity when it's not a join point. + -- See Note [Arity trimming for CPR signatures] in GHC.Core.Op.CprAnal + cpr = ASSERT2( isJoinId fn_id || cpr_ty == topCprType || ct_arty cpr_ty == arityInfo fn_info + , ppr fn_id <> colon <+> text "ct_arty:" <+> int (ct_arty cpr_ty) <+> text "arityInfo:" <+> ppr (arityInfo fn_info)) + ct_cpr cpr_ty + + new_fn_id = zapIdUsedOnceInfo (zapIdUsageEnvInfo fn_id) + -- See Note [Zapping DmdEnv after Demand Analyzer] and + -- See Note [Zapping Used Once info WorkWrap] + + is_fun = notNull wrap_dmds || isJoinId fn_id + -- See Note [Don't eta expand in w/w] + is_eta_exp = length wrap_dmds == manifestArity rhs + is_thunk = not is_fun && not (exprIsHNF rhs) && not (isJoinId fn_id) + && not (isUnliftedType (idType fn_id)) + +{- +Note [Zapping DmdEnv after Demand Analyzer] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In the worker-wrapper pass we zap the DmdEnv. Why? + (a) it is never used again + (b) it wastes space + (c) it becomes incorrect as things are cloned, because + we don't push the substitution into it + +Why here? + * Because we don’t want to do it in the Demand Analyzer, as we never know + there when we are doing the last pass. + * We want them to be still there at the end of DmdAnal, so that + -ddump-str-anal contains them. + * We don’t want a second pass just for that. + * WorkWrap looks at all bindings anyway. + +We also need to do it in TidyCore.tidyLetBndr to clean up after the +final, worker/wrapper-less run of the demand analyser (see +Note [Final Demand Analyser run] in GHC.Core.Op.DmdAnal). + +Note [Zapping Used Once info in WorkWrap] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In the worker-wrapper pass we zap the used once info in demands and in +strictness signatures. + +Why? + * The simplifier may happen to transform code in a way that invalidates the + data (see #11731 for an example). + * It is not used in later passes, up to code generation. + +So as the data is useless and possibly wrong, we want to remove it. The most +convenient place to do that is the worker wrapper phase, as it runs after every +run of the demand analyser besides the very last one (which is the one where we +want to _keep_ the info for the code generator). + +We do not do it in the demand analyser for the same reasons outlined in +Note [Zapping DmdEnv after Demand Analyzer] above. + +Note [Don't eta expand in w/w] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A binding where the manifestArity of the RHS is less than idArity of the binder +means GHC.Core.Arity didn't eta expand that binding. When this happens, it does so +for a reason (see Note [exprArity invariant] in GHC.Core.Arity) and we probably have +a PAP, cast or trivial expression as RHS. + +Performing the worker/wrapper split will implicitly eta-expand the binding to +idArity, overriding GHC.Core.Arity's decision. Other than playing fast and loose with +divergence, it's also broken for newtypes: + + f = (\xy.blah) |> co + where + co :: (Int -> Int -> Char) ~ T + +Then idArity is 2 (despite the type T), and it can have a StrictSig based on a +threshold of 2. But we can't w/w it without a type error. + +The situation is less grave for PAPs, but the implicit eta expansion caused a +compiler allocation regression in T15164, where huge recursive instance method +groups, mostly consisting of PAPs, got w/w'd. This caused great churn in the +simplifier, when simply waiting for the PAPs to inline arrived at the same +output program. + +Note there is the worry here that such PAPs and trivial RHSs might not *always* +be inlined. That would lead to reboxing, because the analysis tacitly assumes +that we W/W'd for idArity and will propagate analysis information under that +assumption. So far, this doesn't seem to matter in practice. +See https://gitlab.haskell.org/ghc/ghc/merge_requests/312#note_192064. +-} + + +--------------------- +splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> Divergence -> CprResult -> CoreExpr + -> UniqSM [(Id, CoreExpr)] +splitFun dflags fam_envs fn_id fn_info wrap_dmds div cpr rhs + = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr cpr) ) do + -- The arity should match the signature + stuff <- mkWwBodies dflags fam_envs rhs_fvs fn_id wrap_dmds use_cpr_info + case stuff of + Just (work_demands, join_arity, wrap_fn, work_fn) -> do + work_uniq <- getUniqueM + let work_rhs = work_fn rhs + work_act = case fn_inline_spec of -- See Note [Worker activation] + NoInline -> fn_act + _ -> wrap_act + + work_prag = InlinePragma { inl_src = SourceText "{-# INLINE" + , inl_inline = fn_inline_spec + , inl_sat = Nothing + , inl_act = work_act + , inl_rule = FunLike } + -- inl_inline: copy from fn_id; see Note [Worker-wrapper for INLINABLE functions] + -- inl_act: see Note [Worker activation] + -- inl_rule: it does not make sense for workers to be constructorlike. + + work_join_arity | isJoinId fn_id = Just join_arity + | otherwise = Nothing + -- worker is join point iff wrapper is join point + -- (see Note [Don't w/w join points for CPR]) + + work_id = mkWorkerId work_uniq fn_id (exprType work_rhs) + `setIdOccInfo` occInfo fn_info + -- Copy over occurrence info from parent + -- Notably whether it's a loop breaker + -- Doesn't matter much, since we will simplify next, but + -- seems right-er to do so + + `setInlinePragma` work_prag + + `setIdUnfolding` mkWorkerUnfolding dflags work_fn fn_unfolding + -- See Note [Worker-wrapper for INLINABLE functions] + + `setIdStrictness` mkClosedStrictSig work_demands div + -- Even though we may not be at top level, + -- it's ok to give it an empty DmdEnv + + `setIdCprInfo` mkCprSig work_arity work_cpr_info + + `setIdDemandInfo` worker_demand + + `setIdArity` work_arity + -- Set the arity so that the Core Lint check that the + -- arity is consistent with the demand type goes + -- through + `asJoinId_maybe` work_join_arity + + work_arity = length work_demands + + -- See Note [Demand on the Worker] + single_call = saturatedByOneShots arity (demandInfo fn_info) + worker_demand | single_call = mkWorkerDemand work_arity + | otherwise = topDmd + + wrap_rhs = wrap_fn work_id + wrap_act = case fn_act of -- See Note [Wrapper activation] + ActiveAfter {} -> fn_act + NeverActive -> activeDuringFinal + _ -> activeAfterInitial + wrap_prag = InlinePragma { inl_src = SourceText "{-# INLINE" + , inl_inline = NoUserInline + , inl_sat = Nothing + , inl_act = wrap_act + , inl_rule = rule_match_info } + -- inl_act: see Note [Wrapper activation] + -- inl_inline: see Note [Wrapper NoUserInline] + -- inl_rule: RuleMatchInfo is (and must be) unaffected + + wrap_id = fn_id `setIdUnfolding` mkWwInlineRule dflags wrap_rhs arity + `setInlinePragma` wrap_prag + `setIdOccInfo` noOccInfo + -- Zap any loop-breaker-ness, to avoid bleating from Lint + -- about a loop breaker with an INLINE rule + + + + return $ [(work_id, work_rhs), (wrap_id, wrap_rhs)] + -- Worker first, because wrapper mentions it + + Nothing -> return [(fn_id, rhs)] + where + rhs_fvs = exprFreeVars rhs + fn_inl_prag = inlinePragInfo fn_info + fn_inline_spec = inl_inline fn_inl_prag + fn_act = inl_act fn_inl_prag + rule_match_info = inlinePragmaRuleMatchInfo fn_inl_prag + fn_unfolding = unfoldingInfo fn_info + arity = arityInfo fn_info + -- The arity is set by the simplifier using exprEtaExpandArity + -- So it may be more than the number of top-level-visible lambdas + + -- use_cpr_info is the CPR we w/w for. Note that we kill it for join points, + -- see Note [Don't w/w join points for CPR]. + use_cpr_info | isJoinId fn_id = topCpr + | otherwise = cpr + -- Even if we don't w/w join points for CPR, we might still do so for + -- strictness. In which case a join point worker keeps its original CPR + -- property; see Note [Don't w/w join points for CPR]. Otherwise, the worker + -- doesn't have the CPR property anymore. + work_cpr_info | isJoinId fn_id = cpr + | otherwise = topCpr + + +{- +Note [Demand on the worker] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the original function is called once, according to its demand info, then +so is the worker. This is important so that the occurrence analyser can +attach OneShot annotations to the worker’s lambda binders. + + +Example: + + -- Original function + f [Demand=<L,1*C1(U)>] :: (a,a) -> a + f = \p -> ... + + -- Wrapper + f [Demand=<L,1*C1(U)>] :: a -> a -> a + f = \p -> case p of (a,b) -> $wf a b + + -- Worker + $wf [Demand=<L,1*C1(C1(U))>] :: Int -> Int + $wf = \a b -> ... + +We need to check whether the original function is called once, with +sufficiently many arguments. This is done using saturatedByOneShots, which +takes the arity of the original function (resp. the wrapper) and the demand on +the original function. + +The demand on the worker is then calculated using mkWorkerDemand, and always of +the form [Demand=<L,1*(C1(...(C1(U))))>] + + +Note [Do not split void functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this rather common form of binding: + $j = \x:Void# -> ...no use of x... + +Since x is not used it'll be marked as absent. But there is no point +in w/w-ing because we'll simply add (\y:Void#), see GHC.Core.Op.WorkWrap.Lib.mkWorerArgs. + +If x has a more interesting type (eg Int, or Int#), there *is* a point +in w/w so that we don't pass the argument at all. + +Note [Thunk splitting] +~~~~~~~~~~~~~~~~~~~~~~ +Suppose x is used strictly (never mind whether it has the CPR +property). + + let + x* = x-rhs + in body + +splitThunk transforms like this: + + let + x* = case x-rhs of { I# a -> I# a } + in body + +Now simplifier will transform to + + case x-rhs of + I# a -> let x* = I# a + in body + +which is what we want. Now suppose x-rhs is itself a case: + + x-rhs = case e of { T -> I# a; F -> I# b } + +The join point will abstract over a, rather than over (which is +what would have happened before) which is fine. + +Notice that x certainly has the CPR property now! + +In fact, splitThunk uses the function argument w/w splitting +function, so that if x's demand is deeper (say U(U(L,L),L)) +then the splitting will go deeper too. +-} + +-- See Note [Thunk splitting] +-- splitThunk converts the *non-recursive* binding +-- x = e +-- into +-- x = let x = e +-- in case x of +-- I# y -> let x = I# y in x } +-- See comments above. Is it not beautifully short? +-- Moreover, it works just as well when there are +-- several binders, and if the binders are lifted +-- E.g. x = e +-- --> x = let x = e in +-- case x of (a,b) -> let x = (a,b) in x + +splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)] +splitThunk dflags fam_envs is_rec fn_id rhs + = ASSERT(not (isJoinId fn_id)) + do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False [fn_id] + ; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ] + ; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive + return res + else return [(fn_id, rhs)] } diff --git a/compiler/GHC/Core/Op/WorkWrap/Lib.hs b/compiler/GHC/Core/Op/WorkWrap/Lib.hs new file mode 100644 index 0000000000..3ce454e7a2 --- /dev/null +++ b/compiler/GHC/Core/Op/WorkWrap/Lib.hs @@ -0,0 +1,1209 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1993-1998 + +A library for the ``worker\/wrapper'' back-end to the strictness analyser +-} + +{-# LANGUAGE CPP #-} + +module GHC.Core.Op.WorkWrap.Lib + ( mkWwBodies, mkWWstr, mkWorkerArgs + , deepSplitProductType_maybe, findTypeShape + , isWorkerSmallEnough + ) +where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Core +import GHC.Core.Utils ( exprType, mkCast, mkDefaultCase, mkSingleAltCase ) +import Id +import IdInfo ( JoinArity ) +import GHC.Core.DataCon +import Demand +import Cpr +import GHC.Core.Make ( mkAbsentErrorApp, mkCoreUbxTup + , mkCoreApp, mkCoreLet ) +import MkId ( voidArgId, voidPrimId ) +import TysWiredIn ( tupleDataCon ) +import TysPrim ( voidPrimTy ) +import Literal ( absentLiteralOf, rubbishLit ) +import VarEnv ( mkInScopeSet ) +import VarSet ( VarSet ) +import GHC.Core.Type +import GHC.Core.Predicate ( isClassPred ) +import GHC.Types.RepType ( isVoidTy, typePrimRep ) +import GHC.Core.Coercion +import GHC.Core.FamInstEnv +import BasicTypes ( Boxity(..) ) +import GHC.Core.TyCon +import UniqSupply +import Unique +import Maybes +import Util +import Outputable +import GHC.Driver.Session +import FastString +import ListSetOps + +{- +************************************************************************ +* * +\subsection[mkWrapperAndWorker]{@mkWrapperAndWorker@} +* * +************************************************************************ + +Here's an example. The original function is: + +\begin{verbatim} +g :: forall a . Int -> [a] -> a + +g = \/\ a -> \ x ys -> + case x of + 0 -> head ys + _ -> head (tail ys) +\end{verbatim} + +From this, we want to produce: +\begin{verbatim} +-- wrapper (an unfolding) +g :: forall a . Int -> [a] -> a + +g = \/\ a -> \ x ys -> + case x of + I# x# -> $wg a x# ys + -- call the worker; don't forget the type args! + +-- worker +$wg :: forall a . Int# -> [a] -> a + +$wg = \/\ a -> \ x# ys -> + let + x = I# x# + in + case x of -- note: body of g moved intact + 0 -> head ys + _ -> head (tail ys) +\end{verbatim} + +Something we have to be careful about: Here's an example: + +\begin{verbatim} +-- "f" strictness: U(P)U(P) +f (I# a) (I# b) = a +# b + +g = f -- "g" strictness same as "f" +\end{verbatim} + +\tr{f} will get a worker all nice and friendly-like; that's good. +{\em But we don't want a worker for \tr{g}}, even though it has the +same strictness as \tr{f}. Doing so could break laziness, at best. + +Consequently, we insist that the number of strictness-info items is +exactly the same as the number of lambda-bound arguments. (This is +probably slightly paranoid, but OK in practice.) If it isn't the +same, we ``revise'' the strictness info, so that we won't propagate +the unusable strictness-info into the interfaces. + + +************************************************************************ +* * +\subsection{The worker wrapper core} +* * +************************************************************************ + +@mkWwBodies@ is called when doing the worker\/wrapper split inside a module. +-} + +type WwResult + = ([Demand], -- Demands for worker (value) args + JoinArity, -- Number of worker (type OR value) args + Id -> CoreExpr, -- Wrapper body, lacking only the worker Id + CoreExpr -> CoreExpr) -- Worker body, lacking the original function rhs + +mkWwBodies :: DynFlags + -> FamInstEnvs + -> VarSet -- Free vars of RHS + -- See Note [Freshen WW arguments] + -> Id -- The original function + -> [Demand] -- Strictness of original function + -> CprResult -- Info about function result + -> UniqSM (Maybe WwResult) + +-- wrap_fn_args E = \x y -> E +-- work_fn_args E = E x y + +-- wrap_fn_str E = case x of { (a,b) -> +-- case a of { (a1,a2) -> +-- E a1 a2 b y }} +-- work_fn_str E = \a1 a2 b y -> +-- let a = (a1,a2) in +-- let x = (a,b) in +-- E + +mkWwBodies dflags fam_envs rhs_fvs fun_id demands cpr_info + = do { let empty_subst = mkEmptyTCvSubst (mkInScopeSet rhs_fvs) + -- See Note [Freshen WW arguments] + + ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) + <- mkWWargs empty_subst fun_ty demands + ; (useful1, work_args, wrap_fn_str, work_fn_str) + <- mkWWstr dflags fam_envs has_inlineable_prag wrap_args + + -- Do CPR w/w. See Note [Always do CPR w/w] + ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty) + <- mkWWcpr (gopt Opt_CprAnal dflags) fam_envs res_ty cpr_info + + ; let (work_lam_args, work_call_args) = mkWorkerArgs dflags work_args cpr_res_ty + worker_args_dmds = [idDemandInfo v | v <- work_call_args, isId v] + wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var + worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args + + ; if isWorkerSmallEnough dflags work_args + && not (too_many_args_for_join_point wrap_args) + && ((useful1 && not only_one_void_argument) || useful2) + then return (Just (worker_args_dmds, length work_call_args, + wrapper_body, worker_body)) + else return Nothing + } + -- We use an INLINE unconditionally, even if the wrapper turns out to be + -- something trivial like + -- fw = ... + -- f = __inline__ (coerce T fw) + -- The point is to propagate the coerce to f's call sites, so even though + -- f's RHS is now trivial (size 1) we still want the __inline__ to prevent + -- fw from being inlined into f's RHS + where + fun_ty = idType fun_id + mb_join_arity = isJoinId_maybe fun_id + has_inlineable_prag = isStableUnfolding (realIdUnfolding fun_id) + -- See Note [Do not unpack class dictionaries] + + -- Note [Do not split void functions] + only_one_void_argument + | [d] <- demands + , Just (arg_ty1, _) <- splitFunTy_maybe fun_ty + , isAbsDmd d && isVoidTy arg_ty1 + = True + | otherwise + = False + + -- Note [Join points returning functions] + too_many_args_for_join_point wrap_args + | Just join_arity <- mb_join_arity + , wrap_args `lengthExceeds` join_arity + = WARN(True, text "Unable to worker/wrapper join point with arity " <+> + int join_arity <+> text "but" <+> + int (length wrap_args) <+> text "args") + True + | otherwise + = False + +-- See Note [Limit w/w arity] +isWorkerSmallEnough :: DynFlags -> [Var] -> Bool +isWorkerSmallEnough dflags vars = count isId vars <= maxWorkerArgs dflags + -- We count only Free variables (isId) to skip Type, Kind + -- variables which have no runtime representation. + +{- +Note [Always do CPR w/w] +~~~~~~~~~~~~~~~~~~~~~~~~ +At one time we refrained from doing CPR w/w for thunks, on the grounds that +we might duplicate work. But that is already handled by the demand analyser, +which doesn't give the CPR property if w/w might waste work: see +Note [CPR for thunks] in GHC.Core.Op.DmdAnal. + +And if something *has* been given the CPR property and we don't w/w, it's +a disaster, because then the enclosing function might say it has the CPR +property, but now doesn't and there a cascade of disaster. A good example +is #5920. + +Note [Limit w/w arity] +~~~~~~~~~~~~~~~~~~~~~~~~ +Guard against high worker arity as it generates a lot of stack traffic. +A simplified example is #11565#comment:6 + +Current strategy is very simple: don't perform w/w transformation at all +if the result produces a wrapper with arity higher than -fmax-worker-args=. + +It is a bit all or nothing, consider + + f (x,y) (a,b,c,d,e ... , z) = rhs + +Currently we will remove all w/w ness entirely. But actually we could +w/w on the (x,y) pair... it's the huge product that is the problem. + +Could we instead refrain from w/w on an arg-by-arg basis? Yes, that'd +solve f. But we can get a lot of args from deeply-nested products: + + g (a, (b, (c, (d, ...)))) = rhs + +This is harder to spot on an arg-by-arg basis. Previously mkWwStr was +given some "fuel" saying how many arguments it could add; when we ran +out of fuel it would stop w/wing. +Still not very clever because it had a left-right bias. + +************************************************************************ +* * +\subsection{Making wrapper args} +* * +************************************************************************ + +During worker-wrapper stuff we may end up with an unlifted thing +which we want to let-bind without losing laziness. So we +add a void argument. E.g. + + f = /\a -> \x y z -> E::Int# -- E does not mention x,y,z +==> + fw = /\ a -> \void -> E + f = /\ a -> \x y z -> fw realworld + +We use the state-token type which generates no code. +-} + +mkWorkerArgs :: DynFlags -> [Var] + -> Type -- Type of body + -> ([Var], -- Lambda bound args + [Var]) -- Args at call site +mkWorkerArgs dflags args res_ty + | any isId args || not needsAValueLambda + = (args, args) + | otherwise + = (args ++ [voidArgId], args ++ [voidPrimId]) + where + -- See "Making wrapper args" section above + needsAValueLambda = + lifted + -- We may encounter a levity-polymorphic result, in which case we + -- conservatively assume that we have laziness that needs preservation. + -- See #15186. + || not (gopt Opt_FunToThunk dflags) + -- see Note [Protecting the last value argument] + + -- Might the result be lifted? + lifted = + case isLiftedType_maybe res_ty of + Just lifted -> lifted + Nothing -> True + +{- +Note [Protecting the last value argument] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If the user writes (\_ -> E), they might be intentionally disallowing +the sharing of E. Since absence analysis and worker-wrapper are keen +to remove such unused arguments, we add in a void argument to prevent +the function from becoming a thunk. + +The user can avoid adding the void argument with the -ffun-to-thunk +flag. However, this can create sharing, which may be bad in two ways. 1) It can +create a space leak. 2) It can prevent inlining *under a lambda*. If w/w +removes the last argument from a function f, then f now looks like a thunk, and +so f can't be inlined *under a lambda*. + +Note [Join points and beta-redexes] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Originally, the worker would invoke the original function by calling it with +arguments, thus producing a beta-redex for the simplifier to munch away: + + \x y z -> e => (\x y z -> e) wx wy wz + +Now that we have special rules about join points, however, this is Not Good if +the original function is itself a join point, as then it may contain invocations +of other join points: + + join j1 x = ... + join j2 y = if y == 0 then 0 else j1 y + + => + + join j1 x = ... + join $wj2 y# = let wy = I# y# in (\y -> if y == 0 then 0 else jump j1 y) wy + join j2 y = case y of I# y# -> jump $wj2 y# + +There can't be an intervening lambda between a join point's declaration and its +occurrences, so $wj2 here is wrong. But of course, this is easy enough to fix: + + ... + let join $wj2 y# = let wy = I# y# in let y = wy in if y == 0 then 0 else j1 y + ... + +Hence we simply do the beta-reduction here. (This would be harder if we had to +worry about hygiene, but luckily wy is freshly generated.) + +Note [Join points returning functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is crucial that the arity of a join point depends on its *callers,* not its +own syntax. What this means is that a join point can have "extra lambdas": + +f :: Int -> Int -> (Int, Int) -> Int +f x y = join j (z, w) = \(u, v) -> ... + in jump j (x, y) + +Typically this happens with functions that are seen as computing functions, +rather than being curried. (The real-life example was GraphOps.addConflicts.) + +When we create the wrapper, it *must* be in "eta-contracted" form so that the +jump has the right number of arguments: + +f x y = join $wj z' w' = \u' v' -> let {z = z'; w = w'; u = u'; v = v'} in ... + j (z, w) = jump $wj z w + +(See Note [Join points and beta-redexes] for where the lets come from.) If j +were a function, we would instead say + +f x y = let $wj = \z' w' u' v' -> let {z = z'; w = w'; u = u'; v = v'} in ... + j (z, w) (u, v) = $wj z w u v + +Notice that the worker ends up with the same lambdas; it's only the wrapper we +have to be concerned about. + +FIXME Currently the functionality to produce "eta-contracted" wrappers is +unimplemented; we simply give up. + +************************************************************************ +* * +\subsection{Coercion stuff} +* * +************************************************************************ + +We really want to "look through" coerces. +Reason: I've seen this situation: + + let f = coerce T (\s -> E) + in \x -> case x of + p -> coerce T' f + q -> \s -> E2 + r -> coerce T' f + +If only we w/w'd f, we'd get + let f = coerce T (\s -> fw s) + fw = \s -> E + in ... + +Now we'll inline f to get + + let fw = \s -> E + in \x -> case x of + p -> fw + q -> \s -> E2 + r -> fw + +Now we'll see that fw has arity 1, and will arity expand +the \x to get what we want. +-} + +-- mkWWargs just does eta expansion +-- is driven off the function type and arity. +-- It chomps bites off foralls, arrows, newtypes +-- and keeps repeating that until it's satisfied the supplied arity + +mkWWargs :: TCvSubst -- Freshening substitution to apply to the type + -- See Note [Freshen WW arguments] + -> Type -- The type of the function + -> [Demand] -- Demands and one-shot info for value arguments + -> UniqSM ([Var], -- Wrapper args + CoreExpr -> CoreExpr, -- Wrapper fn + CoreExpr -> CoreExpr, -- Worker fn + Type) -- Type of wrapper body + +mkWWargs subst fun_ty demands + | null demands + = return ([], id, id, substTy subst fun_ty) + + | (dmd:demands') <- demands + , Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty + = do { uniq <- getUniqueM + ; let arg_ty' = substTy subst arg_ty + id = mk_wrap_arg uniq arg_ty' dmd + ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) + <- mkWWargs subst fun_ty' demands' + ; return (id : wrap_args, + Lam id . wrap_fn_args, + apply_or_bind_then work_fn_args (varToCoreExpr id), + res_ty) } + + | Just (tv, fun_ty') <- splitForAllTy_maybe fun_ty + = do { uniq <- getUniqueM + ; let (subst', tv') = cloneTyVarBndr subst tv uniq + -- See Note [Freshen WW arguments] + ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) + <- mkWWargs subst' fun_ty' demands + ; return (tv' : wrap_args, + Lam tv' . wrap_fn_args, + apply_or_bind_then work_fn_args (mkTyArg (mkTyVarTy tv')), + res_ty) } + + | Just (co, rep_ty) <- topNormaliseNewType_maybe fun_ty + -- The newtype case is for when the function has + -- a newtype after the arrow (rare) + -- + -- It's also important when we have a function returning (say) a pair + -- wrapped in a newtype, at least if CPR analysis can look + -- through such newtypes, which it probably can since they are + -- simply coerces. + + = do { (wrap_args, wrap_fn_args, work_fn_args, res_ty) + <- mkWWargs subst rep_ty demands + ; let co' = substCo subst co + ; return (wrap_args, + \e -> Cast (wrap_fn_args e) (mkSymCo co'), + \e -> work_fn_args (Cast e co'), + res_ty) } + + | otherwise + = WARN( True, ppr fun_ty ) -- Should not happen: if there is a demand + return ([], id, id, substTy subst fun_ty) -- then there should be a function arrow + where + -- See Note [Join points and beta-redexes] + apply_or_bind_then k arg (Lam bndr body) + = mkCoreLet (NonRec bndr arg) (k body) -- Important that arg is fresh! + apply_or_bind_then k arg fun + = k $ mkCoreApp (text "mkWWargs") fun arg +applyToVars :: [Var] -> CoreExpr -> CoreExpr +applyToVars vars fn = mkVarApps fn vars + +mk_wrap_arg :: Unique -> Type -> Demand -> Id +mk_wrap_arg uniq ty dmd + = mkSysLocalOrCoVar (fsLit "w") uniq ty + `setIdDemandInfo` dmd + +{- Note [Freshen WW arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Wen we do a worker/wrapper split, we must not in-scope names as the arguments +of the worker, else we'll get name capture. E.g. + + -- y1 is in scope from further out + f x = ..y1.. + +If we accidentally choose y1 as a worker argument disaster results: + + fww y1 y2 = let x = (y1,y2) in ...y1... + +To avoid this: + + * We use a fresh unique for both type-variable and term-variable binders + Originally we lacked this freshness for type variables, and that led + to the very obscure #12562. (A type variable in the worker shadowed + an outer term-variable binding.) + + * Because of this cloning we have to substitute in the type/kind of the + new binders. That's why we carry the TCvSubst through mkWWargs. + + So we need a decent in-scope set, just in case that type/kind + itself has foralls. We get this from the free vars of the RHS of the + function since those are the only variables that might be captured. + It's a lazy thunk, which will only be poked if the type/kind has a forall. + + Another tricky case was when f :: forall a. a -> forall a. a->a + (i.e. with shadowing), and then the worker used the same 'a' twice. + +************************************************************************ +* * +\subsection{Strictness stuff} +* * +************************************************************************ +-} + +mkWWstr :: DynFlags + -> FamInstEnvs + -> Bool -- True <=> INLINEABLE pragma on this function defn + -- See Note [Do not unpack class dictionaries] + -> [Var] -- Wrapper args; have their demand info on them + -- *Includes type variables* + -> UniqSM (Bool, -- Is this useful + [Var], -- Worker args + CoreExpr -> CoreExpr, -- Wrapper body, lacking the worker call + -- and without its lambdas + -- This fn adds the unboxing + + CoreExpr -> CoreExpr) -- Worker body, lacking the original body of the function, + -- and lacking its lambdas. + -- This fn does the reboxing +mkWWstr dflags fam_envs has_inlineable_prag args + = go args + where + go_one arg = mkWWstr_one dflags fam_envs has_inlineable_prag arg + + go [] = return (False, [], nop_fn, nop_fn) + go (arg : args) = do { (useful1, args1, wrap_fn1, work_fn1) <- go_one arg + ; (useful2, args2, wrap_fn2, work_fn2) <- go args + ; return ( useful1 || useful2 + , args1 ++ args2 + , wrap_fn1 . wrap_fn2 + , work_fn1 . work_fn2) } + +{- +Note [Unpacking arguments with product and polymorphic demands] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The argument is unpacked in a case if it has a product type and has a +strict *and* used demand put on it. I.e., arguments, with demands such +as the following ones: + + <S,U(U, L)> + <S(L,S),U> + +will be unpacked, but + + <S,U> or <B,U> + +will not, because the pieces aren't used. This is quite important otherwise +we end up unpacking massive tuples passed to the bottoming function. Example: + + f :: ((Int,Int) -> String) -> (Int,Int) -> a + f g pr = error (g pr) + + main = print (f fst (1, error "no")) + +Does 'main' print "error 1" or "error no"? We don't really want 'f' +to unbox its second argument. This actually happened in GHC's onwn +source code, in Packages.applyPackageFlag, which ended up un-boxing +the enormous DynFlags tuple, and being strict in the +as-yet-un-filled-in pkgState files. +-} + +---------------------- +-- mkWWstr_one wrap_arg = (useful, work_args, wrap_fn, work_fn) +-- * wrap_fn assumes wrap_arg is in scope, +-- brings into scope work_args (via cases) +-- * work_fn assumes work_args are in scope, a +-- brings into scope wrap_arg (via lets) +-- See Note [How to do the worker/wrapper split] +mkWWstr_one :: DynFlags -> FamInstEnvs + -> Bool -- True <=> INLINEABLE pragma on this function defn + -- See Note [Do not unpack class dictionaries] + -> Var + -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr) +mkWWstr_one dflags fam_envs has_inlineable_prag arg + | isTyVar arg + = return (False, [arg], nop_fn, nop_fn) + + | isAbsDmd dmd + , Just work_fn <- mk_absent_let dflags fam_envs arg + -- Absent case. We can't always handle absence for arbitrary + -- unlifted types, so we need to choose just the cases we can + -- (that's what mk_absent_let does) + = return (True, [], nop_fn, work_fn) + + | isStrictDmd dmd + , Just cs <- splitProdDmd_maybe dmd + -- See Note [Unpacking arguments with product and polymorphic demands] + , not (has_inlineable_prag && isClassPred arg_ty) + -- See Note [Do not unpack class dictionaries] + , Just stuff@(_, _, inst_con_arg_tys, _) <- deepSplitProductType_maybe fam_envs arg_ty + , cs `equalLength` inst_con_arg_tys + -- See Note [mkWWstr and unsafeCoerce] + = unbox_one dflags fam_envs arg cs stuff + + | isSeqDmd dmd -- For seqDmd, splitProdDmd_maybe will return Nothing, but + -- it should behave like <S, U(AAAA)>, for some suitable arity + , Just stuff@(_, _, inst_con_arg_tys, _) <- deepSplitProductType_maybe fam_envs arg_ty + , let abs_dmds = map (const absDmd) inst_con_arg_tys + = unbox_one dflags fam_envs arg abs_dmds stuff + + | otherwise -- Other cases + = return (False, [arg], nop_fn, nop_fn) + + where + arg_ty = idType arg + dmd = idDemandInfo arg + +unbox_one :: DynFlags -> FamInstEnvs -> Var + -> [Demand] + -> (DataCon, [Type], [(Type, StrictnessMark)], Coercion) + -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr) +unbox_one dflags fam_envs arg cs + (data_con, inst_tys, inst_con_arg_tys, co) + = do { (uniq1:uniqs) <- getUniquesM + ; let -- See Note [Add demands for strict constructors] + cs' = addDataConStrictness data_con cs + unpk_args = zipWith3 mk_ww_arg uniqs inst_con_arg_tys cs' + unbox_fn = mkUnpackCase (Var arg) co uniq1 + data_con unpk_args + arg_no_unf = zapStableUnfolding arg + -- See Note [Zap unfolding when beta-reducing] + -- in GHC.Core.Op.Simplify; and see #13890 + rebox_fn = Let (NonRec arg_no_unf con_app) + con_app = mkConApp2 data_con inst_tys unpk_args `mkCast` mkSymCo co + ; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False unpk_args + ; return (True, worker_args, unbox_fn . wrap_fn, work_fn . rebox_fn) } + -- Don't pass the arg, rebox instead + where + mk_ww_arg uniq ty sub_dmd = setIdDemandInfo (mk_ww_local uniq ty) sub_dmd + +---------------------- +nop_fn :: CoreExpr -> CoreExpr +nop_fn body = body + +addDataConStrictness :: DataCon -> [Demand] -> [Demand] +-- See Note [Add demands for strict constructors] +addDataConStrictness con ds + = ASSERT2( equalLength strs ds, ppr con $$ ppr strs $$ ppr ds ) + zipWith add ds strs + where + strs = dataConRepStrictness con + add dmd str | isMarkedStrict str = strictifyDmd dmd + | otherwise = dmd + +{- Note [How to do the worker/wrapper split] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The worker-wrapper transformation, mkWWstr_one, takes into account +several possibilities to decide if the function is worthy for +splitting: + +1. If an argument is absent, it would be silly to pass it to + the worker. Hence the isAbsDmd case. This case must come + first because a demand like <S,A> or <B,A> is possible. + E.g. <B,A> comes from a function like + f x = error "urk" + and <S,A> can come from Note [Add demands for strict constructors] + +2. If the argument is evaluated strictly, and we can split the + product demand (splitProdDmd_maybe), then unbox it and w/w its + pieces. For example + + f :: (Int, Int) -> Int + f p = (case p of (a,b) -> a) + 1 + is split to + f :: (Int, Int) -> Int + f p = case p of (a,b) -> $wf a + + $wf :: Int -> Int + $wf a = a + 1 + + and + g :: Bool -> (Int, Int) -> Int + g c p = case p of (a,b) -> + if c then a else b + is split to + g c p = case p of (a,b) -> $gw c a b + $gw c a b = if c then a else b + +2a But do /not/ split if the components are not used; that is, the + usage is just 'Used' rather than 'UProd'. In this case + splitProdDmd_maybe returns Nothing. Otherwise we risk decomposing + a massive tuple which is barely used. Example: + + f :: ((Int,Int) -> String) -> (Int,Int) -> a + f g pr = error (g pr) + + main = print (f fst (1, error "no")) + + Here, f does not take 'pr' apart, and it's stupid to do so. + Imagine that it had millions of fields. This actually happened + in GHC itself where the tuple was DynFlags + +3. A plain 'seqDmd', which is head-strict with usage UHead, can't + be split by splitProdDmd_maybe. But we want it to behave just + like U(AAAA) for suitable number of absent demands. So we have + a special case for it, with arity coming from the data constructor. + +Note [Worker-wrapper for bottoming functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We used not to split if the result is bottom. +[Justification: there's no efficiency to be gained.] + +But it's sometimes bad not to make a wrapper. Consider + fw = \x# -> let x = I# x# in case e of + p1 -> error_fn x + p2 -> error_fn x + p3 -> the real stuff +The re-boxing code won't go away unless error_fn gets a wrapper too. +[We don't do reboxing now, but in general it's better to pass an +unboxed thing to f, and have it reboxed in the error cases....] + +Note [Add demands for strict constructors] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this program (due to Roman): + + data X a = X !a + + foo :: X Int -> Int -> Int + foo (X a) n = go 0 + where + go i | i < n = a + go (i+1) + | otherwise = 0 + +We want the worker for 'foo' too look like this: + + $wfoo :: Int# -> Int# -> Int# + +with the first argument unboxed, so that it is not eval'd each time +around the 'go' loop (which would otherwise happen, since 'foo' is not +strict in 'a'). It is sound for the wrapper to pass an unboxed arg +because X is strict, so its argument must be evaluated. And if we +*don't* pass an unboxed argument, we can't even repair it by adding a +`seq` thus: + + foo (X a) n = a `seq` go 0 + +because the seq is discarded (very early) since X is strict! + +So here's what we do + +* We leave the demand-analysis alone. The demand on 'a' in the + definition of 'foo' is <L, U(U)>; the strictness info is Lazy + because foo's body may or may not evaluate 'a'; but the usage info + says that 'a' is unpacked and its content is used. + +* During worker/wrapper, if we unpack a strict constructor (as we do + for 'foo'), we use 'addDataConStrictness' to bump up the strictness on + the strict arguments of the data constructor. + +* That in turn means that, if the usage info supports doing so + (i.e. splitProdDmd_maybe returns Just), we will unpack that argument + -- even though the original demand (e.g. on 'a') was lazy. + +* What does "bump up the strictness" mean? Just add a head-strict + demand to the strictness! Even for a demand like <L,A> we can + safely turn it into <S,A>; remember case (1) of + Note [How to do the worker/wrapper split]. + +The net effect is that the w/w transformation is more aggressive about +unpacking the strict arguments of a data constructor, when that +eagerness is supported by the usage info. + +There is the usual danger of reboxing, which as usual we ignore. But +if X is monomorphic, and has an UNPACK pragma, then this optimisation +is even more important. We don't want the wrapper to rebox an unboxed +argument, and pass an Int to $wfoo! + +This works in nested situations like + + data family Bar a + data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) + newtype instance Bar Int = Bar Int + + foo :: Bar ((Int, Int), Int) -> Int -> Int + foo f k = case f of BarPair x y -> + case burble of + True -> case x of + BarPair p q -> ... + False -> ... + +The extra eagerness lets us produce a worker of type: + $wfoo :: Int# -> Int# -> Int# -> Int -> Int + $wfoo p# q# y# = ... + +even though the `case x` is only lazily evaluated. + +--------- Historical note ------------ +We used to add data-con strictness demands when demand analysing case +expression. However, it was noticed in #15696 that this misses some cases. For +instance, consider the program (from T10482) + + data family Bar a + data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) + newtype instance Bar Int = Bar Int + + foo :: Bar ((Int, Int), Int) -> Int -> Int + foo f k = + case f of + BarPair x y -> case burble of + True -> case x of + BarPair p q -> ... + False -> ... + +We really should be able to assume that `p` is already evaluated since it came +from a strict field of BarPair. This strictness would allow us to produce a +worker of type: + + $wfoo :: Int# -> Int# -> Int# -> Int -> Int + $wfoo p# q# y# = ... + +even though the `case x` is only lazily evaluated + +Indeed before we fixed #15696 this would happen since we would float the inner +`case x` through the `case burble` to get: + + foo f k = + case f of + BarPair x y -> case x of + BarPair p q -> case burble of + True -> ... + False -> ... + +However, after fixing #15696 this could no longer happen (for the reasons +discussed in ticket:15696#comment:76). This means that the demand placed on `f` +would then be significantly weaker (since the False branch of the case on +`burble` is not strict in `p` or `q`). + +Consequently, we now instead account for data-con strictness in mkWWstr_one, +applying the strictness demands to the final result of DmdAnal. The result is +that we get the strict demand signature we wanted even if we can't float +the case on `x` up through the case on `burble`. + + +Note [mkWWstr and unsafeCoerce] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +By using unsafeCoerce, it is possible to make the number of demands fail to +match the number of constructor arguments; this happened in #8037. +If so, the worker/wrapper split doesn't work right and we get a Core Lint +bug. The fix here is simply to decline to do w/w if that happens. + +Note [Record evaluated-ness in worker/wrapper] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose we have + + data T = MkT !Int Int + + f :: T -> T + f x = e + +and f's is strict, and has the CPR property. The we are going to generate +this w/w split + + f x = case x of + MkT x1 x2 -> case $wf x1 x2 of + (# r1, r2 #) -> MkT r1 r2 + + $wfw x1 x2 = let x = MkT x1 x2 in + case e of + MkT r1 r2 -> (# r1, r2 #) + +Note that + +* In the worker $wf, inside 'e' we can be sure that x1 will be + evaluated (it came from unpacking the argument MkT. But that's no + immediately apparent in $wf + +* In the wrapper 'f', which we'll inline at call sites, we can be sure + that 'r1' has been evaluated (because it came from unpacking the result + MkT. But that is not immediately apparent from the wrapper code. + +Missing these facts isn't unsound, but it loses possible future +opportunities for optimisation. + +Solution: use setCaseBndrEvald when creating + (A) The arg binders x1,x2 in mkWstr_one + See #13077, test T13077 + (B) The result binders r1,r2 in mkWWcpr_help + See Trace #13077, test T13077a + And #13027 comment:20, item (4) +to record that the relevant binder is evaluated. + + +************************************************************************ +* * + Type scrutiny that is specific to demand analysis +* * +************************************************************************ + +Note [Do not unpack class dictionaries] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + f :: Ord a => [a] -> Int -> a + {-# INLINABLE f #-} +and we worker/wrapper f, we'll get a worker with an INLINABLE pragma +(see Note [Worker-wrapper for INLINABLE functions] in GHC.Core.Op.WorkWrap), which +can still be specialised by the type-class specialiser, something like + fw :: Ord a => [a] -> Int# -> a + +BUT if f is strict in the Ord dictionary, we might unpack it, to get + fw :: (a->a->Bool) -> [a] -> Int# -> a +and the type-class specialiser can't specialise that. An example is +#6056. + +But in any other situation a dictionary is just an ordinary value, +and can be unpacked. So we track the INLINABLE pragma, and switch +off the unpacking in mkWWstr_one (see the isClassPred test). + +Historical note: #14955 describes how I got this fix wrong +the first time. +-} + +deepSplitProductType_maybe + :: FamInstEnvs -> Type + -> Maybe (DataCon, [Type], [(Type, StrictnessMark)], Coercion) +-- If deepSplitProductType_maybe ty = Just (dc, tys, arg_tys, co) +-- then dc @ tys (args::arg_tys) :: rep_ty +-- co :: ty ~ rep_ty +-- Why do we return the strictness of the data-con arguments? +-- Answer: see Note [Record evaluated-ness in worker/wrapper] +deepSplitProductType_maybe fam_envs ty + | let (co, ty1) = topNormaliseType_maybe fam_envs ty + `orElse` (mkRepReflCo ty, ty) + , Just (tc, tc_args) <- splitTyConApp_maybe ty1 + , Just con <- isDataProductTyCon_maybe tc + , let arg_tys = dataConInstArgTys con tc_args + strict_marks = dataConRepStrictness con + = Just (con, tc_args, zipEqual "dspt" arg_tys strict_marks, co) +deepSplitProductType_maybe _ _ = Nothing + +deepSplitCprType_maybe + :: FamInstEnvs -> ConTag -> Type + -> Maybe (DataCon, [Type], [(Type, StrictnessMark)], Coercion) +-- If deepSplitCprType_maybe n ty = Just (dc, tys, arg_tys, co) +-- then dc @ tys (args::arg_tys) :: rep_ty +-- co :: ty ~ rep_ty +-- Why do we return the strictness of the data-con arguments? +-- Answer: see Note [Record evaluated-ness in worker/wrapper] +deepSplitCprType_maybe fam_envs con_tag ty + | let (co, ty1) = topNormaliseType_maybe fam_envs ty + `orElse` (mkRepReflCo ty, ty) + , Just (tc, tc_args) <- splitTyConApp_maybe ty1 + , isDataTyCon tc + , let cons = tyConDataCons tc + , cons `lengthAtLeast` con_tag -- This might not be true if we import the + -- type constructor via a .hs-bool file (#8743) + , let con = cons `getNth` (con_tag - fIRST_TAG) + arg_tys = dataConInstArgTys con tc_args + strict_marks = dataConRepStrictness con + = Just (con, tc_args, zipEqual "dsct" arg_tys strict_marks, co) +deepSplitCprType_maybe _ _ _ = Nothing + +findTypeShape :: FamInstEnvs -> Type -> TypeShape +-- Uncover the arrow and product shape of a type +-- The data type TypeShape is defined in Demand +-- See Note [Trimming a demand to a type] in Demand +findTypeShape fam_envs ty + | Just (tc, tc_args) <- splitTyConApp_maybe ty + , Just con <- isDataProductTyCon_maybe tc + = TsProd (map (findTypeShape fam_envs) $ dataConInstArgTys con tc_args) + + | Just (_, res) <- splitFunTy_maybe ty + = TsFun (findTypeShape fam_envs res) + + | Just (_, ty') <- splitForAllTy_maybe ty + = findTypeShape fam_envs ty' + + | Just (_, ty') <- topNormaliseType_maybe fam_envs ty + = findTypeShape fam_envs ty' + + | otherwise + = TsUnk + +{- +************************************************************************ +* * +\subsection{CPR stuff} +* * +************************************************************************ + + +@mkWWcpr@ takes the worker/wrapper pair produced from the strictness +info and adds in the CPR transformation. The worker returns an +unboxed tuple containing non-CPR components. The wrapper takes this +tuple and re-produces the correct structured output. + +The non-CPR results appear ordered in the unboxed tuple as if by a +left-to-right traversal of the result structure. +-} + +mkWWcpr :: Bool + -> FamInstEnvs + -> Type -- function body type + -> CprResult -- CPR analysis results + -> UniqSM (Bool, -- Is w/w'ing useful? + CoreExpr -> CoreExpr, -- New wrapper + CoreExpr -> CoreExpr, -- New worker + Type) -- Type of worker's body + +mkWWcpr opt_CprAnal fam_envs body_ty cpr + -- CPR explicitly turned off (or in -O0) + | not opt_CprAnal = return (False, id, id, body_ty) + -- CPR is turned on by default for -O and O2 + | otherwise + = case asConCpr cpr of + Nothing -> return (False, id, id, body_ty) -- No CPR info + Just con_tag | Just stuff <- deepSplitCprType_maybe fam_envs con_tag body_ty + -> mkWWcpr_help stuff + | otherwise + -- See Note [non-algebraic or open body type warning] + -> WARN( True, text "mkWWcpr: non-algebraic or open body type" <+> ppr body_ty ) + return (False, id, id, body_ty) + +mkWWcpr_help :: (DataCon, [Type], [(Type,StrictnessMark)], Coercion) + -> UniqSM (Bool, CoreExpr -> CoreExpr, CoreExpr -> CoreExpr, Type) + +mkWWcpr_help (data_con, inst_tys, arg_tys, co) + | [arg1@(arg_ty1, _)] <- arg_tys + , isUnliftedType arg_ty1 + -- Special case when there is a single result of unlifted type + -- + -- Wrapper: case (..call worker..) of x -> C x + -- Worker: case ( ..body.. ) of C x -> x + = do { (work_uniq : arg_uniq : _) <- getUniquesM + ; let arg = mk_ww_local arg_uniq arg1 + con_app = mkConApp2 data_con inst_tys [arg] `mkCast` mkSymCo co + + ; return ( True + , \ wkr_call -> mkDefaultCase wkr_call arg con_app + , \ body -> mkUnpackCase body co work_uniq data_con [arg] (varToCoreExpr arg) + -- varToCoreExpr important here: arg can be a coercion + -- Lacking this caused #10658 + , arg_ty1 ) } + + | otherwise -- The general case + -- Wrapper: case (..call worker..) of (# a, b #) -> C a b + -- Worker: case ( ...body... ) of C a b -> (# a, b #) + = do { (work_uniq : wild_uniq : uniqs) <- getUniquesM + ; let wrap_wild = mk_ww_local wild_uniq (ubx_tup_ty,MarkedStrict) + args = zipWith mk_ww_local uniqs arg_tys + ubx_tup_ty = exprType ubx_tup_app + ubx_tup_app = mkCoreUbxTup (map fst arg_tys) (map varToCoreExpr args) + con_app = mkConApp2 data_con inst_tys args `mkCast` mkSymCo co + tup_con = tupleDataCon Unboxed (length arg_tys) + + ; return (True + , \ wkr_call -> mkSingleAltCase wkr_call wrap_wild + (DataAlt tup_con) args con_app + , \ body -> mkUnpackCase body co work_uniq data_con args ubx_tup_app + , ubx_tup_ty ) } + +mkUnpackCase :: CoreExpr -> Coercion -> Unique -> DataCon -> [Id] -> CoreExpr -> CoreExpr +-- (mkUnpackCase e co uniq Con args body) +-- returns +-- case e |> co of bndr { Con args -> body } + +mkUnpackCase (Tick tickish e) co uniq con args body -- See Note [Profiling and unpacking] + = Tick tickish (mkUnpackCase e co uniq con args body) +mkUnpackCase scrut co uniq boxing_con unpk_args body + = mkSingleAltCase casted_scrut bndr + (DataAlt boxing_con) unpk_args body + where + casted_scrut = scrut `mkCast` co + bndr = mk_ww_local uniq (exprType casted_scrut, MarkedStrict) + +{- +Note [non-algebraic or open body type warning] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are a few cases where the W/W transformation is told that something +returns a constructor, but the type at hand doesn't really match this. One +real-world example involves unsafeCoerce: + foo = IO a + foo = unsafeCoerce c_exit + foreign import ccall "c_exit" c_exit :: IO () +Here CPR will tell you that `foo` returns a () constructor for sure, but trying +to create a worker/wrapper for type `a` obviously fails. +(This was a real example until ee8e792 in libraries/base.) + +It does not seem feasible to avoid all such cases already in the analyser (and +after all, the analysis is not really wrong), so we simply do nothing here in +mkWWcpr. But we still want to emit warning with -DDEBUG, to hopefully catch +other cases where something went avoidably wrong. + +This warning also triggers for the stream fusion library within `text`. +We can'easily W/W constructed results like `Stream` because we have no simple +way to express existential types in the worker's type signature. + +Note [Profiling and unpacking] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If the original function looked like + f = \ x -> {-# SCC "foo" #-} E + +then we want the CPR'd worker to look like + \ x -> {-# SCC "foo" #-} (case E of I# x -> x) +and definitely not + \ x -> case ({-# SCC "foo" #-} E) of I# x -> x) + +This transform doesn't move work or allocation +from one cost centre to another. + +Later [SDM]: presumably this is because we want the simplifier to +eliminate the case, and the scc would get in the way? I'm ok with +including the case itself in the cost centre, since it is morally +part of the function (post transformation) anyway. + + +************************************************************************ +* * +\subsection{Utilities} +* * +************************************************************************ + +Note [Absent errors] +~~~~~~~~~~~~~~~~~~~~ +We make a new binding for Ids that are marked absent, thus + let x = absentError "x :: Int" +The idea is that this binding will never be used; but if it +buggily is used we'll get a runtime error message. + +Coping with absence for *unlifted* types is important; see, for +example, #4306 and #15627. In the UnliftedRep case, we can +use LitRubbish, which we need to apply to the required type. +For the unlifted types of singleton kind like Float#, Addr#, etc. we +also find a suitable literal, using Literal.absentLiteralOf. We don't +have literals for every primitive type, so the function is partial. + +Note: I did try the experiment of using an error thunk for unlifted +things too, relying on the simplifier to drop it as dead code. +But this is fragile + + - It fails when profiling is on, which disables various optimisations + + - It fails when reboxing happens. E.g. + data T = MkT Int Int# + f p@(MkT a _) = ...g p.... + where g is /lazy/ in 'p', but only uses the first component. Then + 'f' is /strict/ in 'p', and only uses the first component. So we only + pass that component to the worker for 'f', which reconstructs 'p' to + pass it to 'g'. Alas we can't say + ...f (MkT a (absentError Int# "blah"))... + bacause `MkT` is strict in its Int# argument, so we get an absentError + exception when we shouldn't. Very annoying! + +So absentError is only used for lifted types. +-} + +-- | Tries to find a suitable dummy RHS to bind the given absent identifier to. +-- +-- If @mk_absent_let _ id == Just wrap@, then @wrap e@ will wrap a let binding +-- for @id@ with that RHS around @e@. Otherwise, there could no suitable RHS be +-- found (currently only happens for bindings of 'VecRep' representation). +mk_absent_let :: DynFlags -> FamInstEnvs -> Id -> Maybe (CoreExpr -> CoreExpr) +mk_absent_let dflags fam_envs arg + -- The lifted case: Bind 'absentError' + -- See Note [Absent errors] + | not (isUnliftedType arg_ty) + = Just (Let (NonRec lifted_arg abs_rhs)) + -- The 'UnliftedRep' (because polymorphic) case: Bind @__RUBBISH \@arg_ty@ + -- See Note [Absent errors] + | [UnliftedRep] <- typePrimRep arg_ty + = Just (Let (NonRec arg unlifted_rhs)) + -- The monomorphic unlifted cases: Bind to some literal, if possible + -- See Note [Absent errors] + | Just tc <- tyConAppTyCon_maybe nty + , Just lit <- absentLiteralOf tc + = Just (Let (NonRec arg (Lit lit `mkCast` mkSymCo co))) + | nty `eqType` voidPrimTy + = Just (Let (NonRec arg (Var voidPrimId `mkCast` mkSymCo co))) + | otherwise + = WARN( True, text "No absent value for" <+> ppr arg_ty ) + Nothing -- Can happen for 'State#' and things of 'VecRep' + where + lifted_arg = arg `setIdStrictness` botSig `setIdCprInfo` mkCprSig 0 botCpr + -- Note in strictness signature that this is bottoming + -- (for the sake of the "empty case scrutinee not known to + -- diverge for sure lint" warning) + arg_ty = idType arg + + -- Normalise the type to have best chance of finding an absent literal + -- e.g. (#17852) data unlifted N = MkN Int# + -- f :: N -> a -> a + -- f _ x = x + (co, nty) = topNormaliseType_maybe fam_envs arg_ty + `orElse` (mkRepReflCo arg_ty, arg_ty) + + abs_rhs = mkAbsentErrorApp arg_ty msg + msg = showSDoc (gopt_set dflags Opt_SuppressUniques) + (ppr arg <+> ppr (idType arg)) + -- We need to suppress uniques here because otherwise they'd + -- end up in the generated code as strings. This is bad for + -- determinism, because with different uniques the strings + -- will have different lengths and hence different costs for + -- the inliner leading to different inlining. + -- See also Note [Unique Determinism] in Unique + unlifted_rhs = mkTyApps (Lit rubbishLit) [arg_ty] + +mk_ww_local :: Unique -> (Type, StrictnessMark) -> Id +-- The StrictnessMark comes form the data constructor and says +-- whether this field is strict +-- See Note [Record evaluated-ness in worker/wrapper] +mk_ww_local uniq (ty,str) + = setCaseBndrEvald str $ + mkSysLocalOrCoVar (fsLit "ww") uniq ty diff --git a/compiler/GHC/Core/Op/simplifier.tib b/compiler/GHC/Core/Op/simplifier.tib new file mode 100644 index 0000000000..e0f9dc91f2 --- /dev/null +++ b/compiler/GHC/Core/Op/simplifier.tib @@ -0,0 +1,771 @@ +% Andre: +% +% - I'd like the transformation rules to appear clearly-identified in +% a box of some kind, so they can be distinguished from the examples. +% + + + +\documentstyle[slpj,11pt]{article} + +\renewcommand{\textfraction}{0.2} +\renewcommand{\floatpagefraction}{0.7} + +\begin{document} + +\title{How to simplify matters} + +\author{Simon Peyton Jones and Andre Santos\\ +Department of Computing Science, University of Glasgow, G12 8QQ \\ + @simonpj@@dcs.gla.ac.uk@ +} + +\maketitle + + +\section{Motivation} + +Quite a few compilers use the {\em compilation by transformation} idiom. +The idea is that as much of possible of the compilation process is +expressed as correctness-preserving transformations, each of which +transforms a program into a semantically-equivalent +program that (hopefully) executes more quickly or in less space. +Functional languages are particularly amenable to this approach because +they have a particularly rich family of possible transformations. +Examples of transformation-based compilers +include the Orbit compiler,[.kranz orbit thesis.] +Kelsey's compilers,[.kelsey thesis, hudak kelsey principles 1989.] +the New Jersey SML compiler,[.appel compiling with continuations.] +and the Glasgow Haskell compiler.[.ghc JFIT.] Of course many, perhaps most, +other compilers also use transformation to some degree. + +Compilation by transformation uses automatic transformations; that is, those +which can safely be applied automatically by a compiler. There +is also a whole approach to programming, which we might call {\em programming by transformation}, +in which the programmer manually transforms an inefficient specification into +an efficient program. This development process might be supported by +a programming environment in which does the book keeping, but the key steps +are guided by the programmer. We focus exclusively on automatic transformations +in this paper. + +Automatic program transformations seem to fall into two broad categories: +\begin{itemize} +\item {\bf Glamorous transformations} are global, sophisticated, +intellectually satisfying transformations, sometimes guided by some +interesting kind of analysis. +Examples include: +lambda lifting,[.johnsson lambda lifting.] +full laziness,[.hughes thesis, lester spe.] +closure conversion,[.appel jim 1989.] +deforestation,[.wadler 1990 deforestation, marlow wadler deforestation Glasgow92, chin phd 1990 march, gill launchbury.] +transformations based on strictness analysis,[.peyton launchbury unboxed.] +and so on. It is easy to write papers about these sorts of transformations. + +\item {\bf Humble transformations} are small, simple, local transformations, +which individually look pretty trivial. Here are two simple examples\footnote{ +The notation @E[]@ stands for an arbitrary expression with zero or more holes. +The notation @E[e]@ denotes @E[]@ with the holes filled in by the expression @e@. +We implicitly assume that no name-capture happens --- it's just +a short-hand, not an algorithm. +}: +@ + let x = y in E[x] ===> E[y] + + case (x:xs) of ===> E1[x,xs] + (y:ys) -> E1[y,ys] + [] -> E2 +@ +Transformations of this kind are almost embarrassingly simple. How could +anyone write a paper about them? +\end{itemize} +This paper is about humble transformations, and how to implement them. +Although each individual +transformation is simple enough, there is a scaling issue: +there are a large number of candidate transformations to consider, and +there are a very large number of opportunities to apply them. + +In the Glasgow Haskell compiler, all humble transformations +are performed by the so-called {\em simplifier}. +Our goal in this paper is to give an overview of how the simplifier works, what +transformations it applies, and what issues arose in its design. + +\section{The language} + +Mutter mutter. Important points: +\begin{itemize} +\item Second order lambda calculus. +\item Arguments are variables. +\item Unboxed data types, and unboxed cases. +\end{itemize} +Less important points: +\begin{itemize} +\item Constructors and primitives are saturated. +\item if-then-else desugared to @case@ +\end{itemize} + +Give data type. + +\section{Transformations} + +This section lists all the transformations implemented by the simplifier. +Because it is a complete list, it is a long one. +We content ourselves with a brief statement of each transformation, +augmented with forward references to Section~\ref{sect:composing} +which gives examples of the ways in which the transformations can compose together. + +\subsection{Beta reduction} + +If a lambda abstraction is applied to an argument, we can simply +beta-reduce. This applies equally to ordinary lambda abstractions and +type abstractions: +@ + (\x -> E[x]) arg ===> E[arg] + (/\a -> E[a]) ty ===> E[ty] +@ +There is no danger of duplicating work because the argument is +guaranteed to be a simple variable or literal. + +\subsubsection{Floating applications inward} + +Applications can be floated inside a @let(rec)@ or @case@ expression. +This is a good idea, because they might find a lambda abstraction inside +to beta-reduce with: +@ + (let(rec) Bind in E) arg ===> let(rec) Bind in (E arg) + + (case E of {P1 -> E1;...; Pn -> En}) arg + ===> + case E of {P1 -> E1 arg; ...; Pn -> En arg} +@ + + + +\subsection{Transformations concerning @let(rec)@} + +\subsubsection{Floating @let@ out of @let@} + +It is sometimes useful to float a @let(rec)@ out of a @let(rec)@ right-hand +side: +@ + let x = let(rec) Bind in B1 ===> let(rec) Bind in + in B2 let x = B1 + in B2 + + + letrec x = let(rec) Bind in B1 ===> let(rec) Bind + in B2 x = B1 + in B2 +@ + +\subsubsection{Floating @case@ out of @let@} + + +\subsubsection{@let@ to @case@} + + +\subsection{Transformations concerning @case@} + +\subsubsection{Case of known constructor} + +If a @case@ expression scrutinises a constructor, +the @case@ can be eliminated. This transformation is a real +win: it eliminates a whole @case@ expression. +@ + case (C a1 .. an) of ===> E[a1..an] + ... + C b1 .. bn -> E[b1..bn] + ... +@ +If none of the constructors in the alternatives match, then +the default is taken: +@ + case (C a1 .. an) of ===> let y = C a1 .. an + ...[no alt matches C]... in E + y -> E +@ +There is an important variant of this transformation when +the @case@ expression scrutinises a {\em variable} +which is known to be bound to a constructor. +This situation can +arise for two reasons: +\begin{itemize} +\item An enclosing @let(rec)@ binding binds the variable to a constructor. +For example: +@ + let x = C p q in ... (case x of ...) ... +@ +\item An enclosing @case@ expression scrutinises the same variable. +For example: +@ + case x of + ... + C p q -> ... (case x of ...) ... + ... +@ +This situation is particularly common, as we discuss in Section~\ref{sect:repeated-evals}. +\end{itemize} +In each of these examples, @x@ is known to be bound to @C p q@ +at the inner @case@. The general rules are: +@ + case x of {...; C b1 .. bn -> E[b1..bn]; ...} +===> {x bound to C a1 .. an} + E[a1..an] + + case x of {...[no alts match C]...; y -> E[y]} +===> {x bound to C a1 .. an} + E[x] +@ + +\subsubsection{Dead alternative elimination} +@ + case x of + C a .. z -> E + ...[other alts]... +===> x *not* bound to C + case x of + ...[other alts]... +@ +We might know that @x@ is not bound to a particular constructor +because of an enclosing case: +@ + case x of + C a .. z -> E1 + other -> E2 +@ +Inside @E1@ we know that @x@ is bound to @C@. +However, if the type has more than two constructors, +inside @E2@ all we know is that @x@ is {\em not} bound to @C@. + +This applies to unboxed cases also, in the obvious way. + +\subsubsection{Case elimination} + +If we can prove that @x@ is not bottom, then this rule applies. +@ + case x of ===> E[x] + y -> E[y] +@ +We might know that @x@ is non-bottom because: +\begin{itemize} +\item @x@ has an unboxed type. +\item There's an enclosing case which scrutinises @x@. +\item It is bound to an expression which provably terminates. +\end{itemize} +Since this transformation can only improve termination, even if we apply it +when @x@ is not provably non-bottom, we provide a compiler flag to +enable it all the time. + +\subsubsection{Case of error} + +@ + case (error ty E) of Alts ===> error ty' E + where + ty' is type of whole case expression +@ + +Mutter about types. Mutter about variables bound to error. +Mutter about disguised forms of error. + +\subsubsection{Floating @let(rec)@ out of @case@} + +A @let(rec)@ binding can be floated out of a @case@ scrutinee: +@ + case (let(rec) Bind in E) of Alts ===> let(rec) Bind in + case E of Alts +@ +This increases the likelihood of a case-of-known-constructor transformation, +because @E@ is not hidden from the @case@ by the @let(rec)@. + +\subsubsection{Floating @case@ out of @case@} + +Analogous to floating a @let(rec)@ from a @case@ scrutinee is +floating a @case@ from a @case@ scrutinee. We have to be +careful, though, about code size. If there's only one alternative +in the inner case, things are easy: +@ + case (case E of {P -> R}) of ===> case E of {P -> case R of + Q1 -> S1 Q1 -> S1 + ... ... + Qm -> Sm Qm -> Sm} +@ +If there's more than one alternative there's a danger +that we'll duplicate @S1@...@Sm@, which might be a lot of code. +Our solution is to create a new local definition for each +alternative: +@ + case (case E of {P1 -> R1; ...; Pn -> Rn}) of + Q1 -> S1 + ... + Qm -> Sm +===> + let s1 = \x1 ... z1 -> S1 + ... + sm = \xm ... zm -> Sm + in + case E of + P1 -> case R1 of {Q1 -> s1 x1 ... z1; ...; Qm -> sm xm ... zm} + ... + Pn -> case Rn of {Q1 -> s1 x1 ... z1; ...; Qm -> sm xm ... zm} +@ +Here, @x1 ... z1@ are that subset of +variables bound by the pattern @Q1@ which are free in @S1@, and +similarly for the other @si@. + +Is this transformation a win? After all, we have introduced @m@ new +functions! Section~\ref{sect:join-points} discusses this point. + +\subsubsection{Case merging} + +@ + case x of + ...[some alts]... + other -> case x of + ...[more alts]... +===> + case x of + ...[some alts]... + ...[more alts]... +@ +Any alternatives in @[more alts]@ which are already covered by @[some alts]@ +should first be eliminated by the dead-alternative transformation. + + +\subsection{Constructor reuse} + + +\subsection{Inlining} + +The inlining transformation is simple enough: +@ + let x = R in B[x] ===> B[R] +@ +Inlining is more conventionally used to describe the instantiation of a function +body at its call site, with arguments substituted for formal parameters. We treat +this as a two-stage process: inlining followed by beta reduction. Since we are +working with a higher-order language, not all the arguments may be available at every +call site, so separating inlining from beta reduction allows us to concentrate on +one problem at a time. + +The choice of exactly {\em which} bindings to inline has a major impact on efficiency. +Specifically, we need to consider the following factors: +\begin{itemize} +\item +Inlining a function at its call site, followed by some beta reduction, +very often exposes opportunities for further transformations. +We inline many simple arithmetic and boolean operators for this reason. +\item +Inlining can increase code size. +\item +Inlining can duplicate work, for example if a redex is inlined at more than one site. +Duplicating a single expensive redex can ruin a program's efficiency. +\end{itemize} + + +Our inlining strategy depends on the form of @R@: + +Mutter mutter. + + +\subsubsection{Dead code removal} + +If a @let@-bound variable is not used the binding can be dropped: +@ + let x = E in B ===> B + x not free in B +@ +A similar transformation applies for @letrec@-bound variables. +Programmers seldom write dead code, of course, but bindings often become dead when they +are inlined. + + + + +\section{Composing transformations} +\label{sect:composing} + +The really interesting thing about humble transformations is the way in which +they compose together to carry out substantial and useful transformations. +This section gives a collection of motivating examples, all of which have +shown up in real application programs. + +\subsection{Repeated evals} +\label{sect:repeated-evals} + +Example: x+x, as in unboxed paper. + + +\subsection{Lazy pattern matching} + +Lazy pattern matching is pretty inefficient. Consider: +@ + let (x,y) = E in B +@ +which desugars to: +@ + let t = E + x = case t of (x,y) -> x + y = case t of (x,y) -> y + in B +@ +This code allocates three thunks! However, if @B@ is strict in {\em either} +@x@ {\em or} @y@, then the strictness analyser will easily spot that +the binding for @t@ is strict, so we can do a @let@-to-@case@ transformation: +@ + case E of + (x,y) -> let t = (x,y) in + let x = case t of (x,y) -> x + y = case t of (x,y) -> y + in B +@ +whereupon the case-of-known-constructor transformation +eliminates the @case@ expressions in the right-hand side of @x@ and @y@, +and @t@ is then spotted as being dead, so we get +@ + case E of + (x,y) -> B +@ + +\subsection{Join points} +\label{sect:join-points} + +One motivating example is this: +@ + if (not x) then E1 else E2 +@ +After desugaring the conditional, and inlining the definition of +@not@, we get +@ + case (case x of True -> False; False -> True}) of + True -> E1 + False -> E2 +@ +Now, if we apply our case-of-case transformation we get: +@ + let e1 = E1 + e2 = E2 + in + case x of + True -> case False of {True -> e1; False -> e2} + False -> case True of {True -> e1; False -> e2} +@ +Now the case-of-known constructor transformation applies: +@ + let e1 = E1 + e2 = E2 + in + case x of + True -> e2 + False -> e1 +@ +Since there is now only one occurrence of @e1@ and @e2@ we can +inline them, giving just what we hoped for: +@ + case x of {True -> E2; False -> E1} +@ +The point is that the local definitions will often disappear again. + +\subsubsection{How join points occur} + +But what if they don't disappear? Then the definitions @s1@ ... @sm@ +play the role of ``join points''; they represent the places where +execution joins up again, having forked at the @case x@. The +``calls'' to the @si@ should really be just jumps. To see this more clearly +consider the expression +@ + if (x || y) then E1 else E2 +@ +A C compiler will ``short-circuit'' the +evaluation of the condition if @x@ turns out to be true +generate code, something like this: +@ + if (x) goto l1; + if (y) {...code for E2...} + l1: ...code for E1... +@ +In our setting, here's what will happen. First we desugar the +conditional, and inline the definition of @||@: +@ + case (case x of {True -> True; False -> y}) of + True -> E1 + False -> E2 +@ +Now apply the case-of-case transformation: +@ + let e1 = E1 + e2 = E2 + in + case x of + True -> case True of {True -> e1; False -> e2} + False -> case y of {True -> e1; False -> e2} +@ +Unlike the @not@ example, only one of the two inner case +simplifies, and we can therefore only inline @e2@, because +@e1@ is still mentioned twice\footnote{Unless the +inlining strategy decides that @E1@ is small enough to duplicate; +it is used in separate @case@ branches so there's no concern about duplicating +work. Here's another example of the way in which we make one part of the +simplifier (the inlining strategy) help with the work of another (@case@-expression +simplification.} +@ + let e1 = E1 + in + case x of + True -> e1 + False -> case y of {True -> e1; False -> e2} +@ +The code generator produces essentially the same code as +the C code given above. The binding for @e1@ turns into +just a label, which is jumped to from the two occurrences of @e1@. + +\subsubsection{Case of @error@} + +The case-of-error transformation is often exposed by the case-of-case +transformation. Consider +@ + case (hd xs) of + True -> E1 + False -> E2 +@ +After inlining @hd@, we get +@ + case (case xs of [] -> error "hd"; (x:_) -> x) of + True -> E1 + False -> E2 +@ +(I've omitted the type argument of @error@ to save clutter.) +Now doing case-of-case gives +@ + let e1 = E1 + e2 = E2 + in + case xs of + [] -> case (error "hd") of { True -> e1; False -> e2 } + (x:_) -> case x of { True -> e1; False -> e2 } +@ +Now the case-of-error transformation springs to life, after which +we can inline @e1@ and @e2@: +@ + case xs of + [] -> error "hd" + (x:_) -> case x of {True -> E1; False -> E2} +@ + +\subsection{Nested conditionals combined} + +Sometimes programmers write something which should be done +by a single @case@ as a sequence of tests: +@ + if x==0::Int then E0 else + if x==1 then E1 else + E2 +@ +After eliminating some redundant evals and doing the case-of-case +transformation we get +@ + case x of I# x# -> + case x# of + 0# -> E0 + other -> case x# of + 1# -> E1 + other -> E2 +@ +The case-merging transformation puts these together to get +@ + case x of I# x# -> + case x# of + 0# -> E0 + 1# -> E1 + other -> E2 +@ +Sometimes the sequence of tests cannot be eliminated from the source +code because of overloading: +@ + f :: Num a => a -> Bool + f 0 = True + f 3 = True + f n = False +@ +If we specialise @f@ to @Int@ we'll get the previous example again. + +\subsection{Error tests eliminated} + +The elimination of redundant alternatives, and then of redundant cases, +arises when we inline functions which do error checking. A typical +example is this: +@ + if (x `rem` y) == 0 then (x `div` y) else y +@ +Here, both @rem@ and @div@ do an error-check for @y@ being zero. +The second check is eliminated by the transformations. +After transformation the code becomes: +@ + case x of I# x# -> + case y of I# y# -> + case y of + 0# -> error "rem: zero divisor" + _ -> case x# rem# y# of + 0# -> case x# div# y# of + r# -> I# r# + _ -> y +@ + +\subsection{Atomic arguments} + +At this point it is possible to appreciate the usefulness of +the Core-language syntax requirement that arguments are atomic. +For example, suppose that arguments could be arbitrary expressions. +Here is a possible transformation: +@ + f (case x of (p,q) -> p) +===> f strict in its second argument + case x of (p,q) -> f (p,p) +@ +Doing this transformation would be useful, because now the +argument to @f@ is a simple variable rather than a thunk. +However, if arguments are atomic, this transformation becomes +just a special case of floating a @case@ out of a strict @let@: +@ + let a = case x of (p,q) -> p + in f a +===> (f a) strict in a + case x of (p,q) -> let a=p in f a +===> + case x of (p,q) -> f p +@ +There are many examples of this kind. For almost any transformation +involving @let@ there is a corresponding one involving a function +argument. The same effect is achieved with much less complexity +by restricting function arguments to be atomic. + +\section{Design} + +Dependency analysis +Occurrence analysis + +\subsection{Renaming and cloning} + +Every program-transformation system has to worry about name capture. +For example, here is an erroneous transformation: +@ + let y = E + in + (\x -> \y -> x + y) (y+3) +===> WRONG! + let y = E + in + (\y -> (y+3) + y) +@ +The transformation fails because the originally free-occurrence +of @y@ in the argument @y+3@ has been ``captured'' by the @\y@-abstraction. +There are various sophisticated solutions to this difficulty, but +we adopted a very simple one: we uniquely rename every locally-bound identifier +on every pass of the simplifier. +Since we are in any case producing an entirely new program (rather than side-effecting +an existing one) it costs very little extra to rename the identifiers as we go. + +So our example would become +@ + let y = E + in + (\x -> \y -> x + y) (y+3) +===> WRONG! + let y1 = E + in + (\y2 -> (y1+3) + y2) +@ +The simplifier accepts as input a program which has arbitrary bound +variable names, including ``shadowing'' (where a binding hides an +outer binding for the same identifier), but it produces a program in +which every bound identifier has a distinct name. + +Both the ``old'' and ``new'' identifiers have type @Id@, but when writing +type signatures for functions in the simplifier we use the types @InId@, for +identifiers from the input program, and @OutId@ for identifiers from the output program: +@ + type InId = Id + type OutId = Id +@ +This nomenclature extends naturally to expressions: a value of type @InExpr@ is an +expression whose identifiers are from the input-program name-space, and similarly +@OutExpr@. + + +\section{The simplifier} + +The basic algorithm followed by the simplifier is: +\begin{enumerate} +\item Analyse: perform occurrence analysis and dependency analysis. +\item Simplify: apply as many transformations as possible. +\item Iterate: perform the above two steps repeatedly until no further transformations are possible. +(A compiler flag allows the programmer to bound the maximum number of iterations.) +\end{enumerate} +We make a effort to apply as many transformations as possible in Step +2. To see why this is a good idea, just consider a sequence of +transformations in which each transformation enables the next. If +each iteration of Step 2 only performs one transformation, then the +entire program will to be re-analysed by Step 1, and re-traversed by +Step 2, for each transformation of the sequence. Sometimes this is +unavoidable, but it is often possible to perform a sequence of +transformtions in a single pass. + +The key function, which simplifies expressions, has the following type: +@ + simplExpr :: SimplEnv + -> InExpr -> [OutArg] + -> SmplM OutExpr +@ +The monad, @SmplM@ can quickly be disposed of. It has only two purposes: +\begin{itemize} +\item It plumbs around a supply of unique names, so that the simplifier can +easily invent new names. +\item It gathers together counts of how many of each kind of transformation +has been applied, for statistical purposes. These counts are also used +in Step 3 to decide when the simplification process has terminated. +\end{itemize} + +The signature can be understood like this: +\begin{itemize} +\item The environment, of type @SimplEnv@, provides information about +identifiers bound by the enclosing context. +\item The second and third arguments together specify the expression to be simplified. +\item The result is the simplified expression, wrapped up by the monad. +\end{itemize} +The simplifier's invariant is this: +$$ +@simplExpr@~env~expr~[a_1,\ldots,a_n] = expr[env]~a_1~\ldots~a_n +$$ +That is, the expression returned by $@simplExpr@~env~expr~[a_1,\ldots,a_n]$ +is semantically equal (although hopefully more efficient than) +$expr$, with the renamings in $env$ applied to it, applied to the arguments +$a_1,\ldots,a_n$. + +\subsection{Application and beta reduction} + +The arguments are carried ``inwards'' by @simplExpr@, as an accumulating parameter. +This is a convenient way of implementing the transformations which float +arguments inside a @let@ and @case@. This list of pending arguments +requires a new data type, @CoreArg@, along with its ``in'' and ``out'' synonyms, +because an argument might be a type or an atom: +@ +data CoreArg bindee = TypeArg UniType + | ValArg (CoreAtom bindee) + +type InArg = CoreArg InId +type OutArg = CoreArg OutId +@ +The equations for applications simply apply +the environment to the argument (to handle renaming) and put the result +on the argument stack, tagged to say whether it is a type argument or value argument: +@ + simplExpr env (CoApp fun arg) args + = simplExpr env fun (ValArg (simplAtom env arg) : args) + simplExpr env (CoTyApp fun ty) args + = simplExpr env fun (TypeArg (simplTy env ty) : args) +@ + + + + + + +\end{document} diff --git a/compiler/GHC/Core/Rules.hs b/compiler/GHC/Core/Rules.hs index 31b27b03e6..bb58d25927 100644 --- a/compiler/GHC/Core/Rules.hs +++ b/compiler/GHC/Core/Rules.hs @@ -114,7 +114,7 @@ Note [Overall plumbing for rules] (d) Rules in the ExternalPackageTable. These can grow in response to lazy demand-loading of interfaces. -* At the moment (c) is carried in a reader-monad way by the CoreMonad. +* At the moment (c) is carried in a reader-monad way by the GHC.Core.Op.Monad. The HomePackageTable doesn't have a single RuleBase because technically we should only be able to "see" rules "below" this module; so we generate a RuleBase for (c) by combing rules from all the modules @@ -127,7 +127,7 @@ Note [Overall plumbing for rules] * So in the outer simplifier loop, we combine (b-d) into a single RuleBase, reading (b) from the ModGuts, - (c) from the CoreMonad, and + (c) from the GHC.Core.Op.Monad, and (d) from its mutable variable [Of course this means that we won't see new EPS rules that come in during a single simplifier iteration, but that probably does not @@ -330,7 +330,7 @@ but that isn't quite right: - PrimOps and ClassOps are born with a bunch of rules inside the Id, even when they are imported - - The rules in PrelRules.builtinRules should be active even + - The rules in GHC.Core.Op.ConstantFold.builtinRules should be active even in the module defining the Id (when it's a LocalId), but the rules are kept in the global RuleBase @@ -1023,7 +1023,7 @@ these cases. On the other hand, where we are allowed to insert new cost into the tick scope, we can float them upwards to the rule application site. -cf Note [Notes in call patterns] in SpecConstr +cf Note [Notes in call patterns] in GHC.Core.Op.SpecConstr Note [Matching lets] ~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/GHC/Core/SimpleOpt.hs b/compiler/GHC/Core/SimpleOpt.hs index 829e746498..3510fcc3ae 100644 --- a/compiler/GHC/Core/SimpleOpt.hs +++ b/compiler/GHC/Core/SimpleOpt.hs @@ -31,7 +31,7 @@ import GHC.Core.FVs import {-# SOURCE #-} GHC.Core.Unfold( mkUnfolding ) import GHC.Core.Make ( FloatBind(..) ) import GHC.Core.Ppr ( pprCoreBindings, pprRules ) -import OccurAnal( occurAnalyseExpr, occurAnalysePgm ) +import GHC.Core.Op.OccurAnal( occurAnalyseExpr, occurAnalysePgm ) import Literal ( Literal(LitString) ) import Id import IdInfo ( unfoldingInfo, setUnfoldingInfo, setRuleInfo, IdInfo (..) ) @@ -469,7 +469,7 @@ simple_out_bind_pair env in_bndr mb_out_bndr out_rhs post_inline_unconditionally | isExportedId in_bndr = False -- Note [Exported Ids and trivial RHSs] | stable_unf = False -- Note [Stable unfoldings and postInlineUnconditionally] - | not active = False -- in SimplUtils + | not active = False -- in GHC.Core.Op.Simplify.Utils | is_loop_breaker = False -- If it's a loop-breaker of any kind, don't inline -- because it might be referred to "earlier" | exprIsTrivial out_rhs = True @@ -489,7 +489,7 @@ simple_out_bind_pair env in_bndr mb_out_bndr out_rhs {- Note [Exported Ids and trivial RHSs] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We obviously do not want to unconditionally inline an Id that is exported. -In SimplUtils, Note [Top level and postInlineUnconditionally], we +In GHC.Core.Op.Simplify.Utils, Note [Top level and postInlineUnconditionally], we explain why we don't inline /any/ top-level things unconditionally, even trivial ones. But we do here! Why? In the simple optimiser @@ -1247,7 +1247,7 @@ pushCoArg :: CoercionR -> CoreArg -> Maybe (CoreArg, MCoercion) -- We have (fun |> co) arg, and we want to transform it to -- (fun arg) |> co -- This may fail, e.g. if (fun :: N) where N is a newtype --- C.f. simplCast in Simplify.hs +-- C.f. simplCast in GHC.Core.Op.Simplify -- 'co' is always Representational -- If the returned coercion is Nothing, then it would have been reflexive pushCoArg co (Type ty) = do { (ty', m_co') <- pushCoTyArg co ty diff --git a/compiler/GHC/Core/Unfold.hs b/compiler/GHC/Core/Unfold.hs index b6e507a7b0..49006c66b6 100644 --- a/compiler/GHC/Core/Unfold.hs +++ b/compiler/GHC/Core/Unfold.hs @@ -48,7 +48,7 @@ import GhcPrelude import GHC.Driver.Session import GHC.Core -import OccurAnal ( occurAnalyseExpr_NoBinderSwap ) +import GHC.Core.Op.OccurAnal ( occurAnalyseExpr_NoBinderSwap ) import GHC.Core.SimpleOpt import GHC.Core.Arity ( manifestArity ) import GHC.Core.Utils @@ -121,7 +121,7 @@ mkCompulsoryUnfolding expr -- Used for things that absolutely must be un , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk }) mkWorkerUnfolding :: DynFlags -> (CoreExpr -> CoreExpr) -> Unfolding -> Unfolding --- See Note [Worker-wrapper for INLINABLE functions] in WorkWrap +-- See Note [Worker-wrapper for INLINABLE functions] in GHC.Core.Op.WorkWrap mkWorkerUnfolding dflags work_fn (CoreUnfolding { uf_src = src, uf_tmpl = tmpl , uf_is_top = top_lvl }) @@ -537,7 +537,7 @@ result of #4978. Note [Do not inline top-level bottoming functions] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The FloatOut pass has gone to some trouble to float out calls to 'error' -and similar friends. See Note [Bottoming floats] in SetLevels. +and similar friends. See Note [Bottoming floats] in GHC.Core.Op.SetLevels. Do not re-inline them! But we *do* still inline if they are very small (the uncondInline stuff). @@ -590,7 +590,7 @@ Things to note: unconditional-inline thing for *trivial* expressions. NB: you might think that PostInlineUnconditionally would do this - but it doesn't fire for top-level things; see SimplUtils + but it doesn't fire for top-level things; see GHC.Core.Op.Simplify.Utils Note [Top level and postInlineUnconditionally] Note [Count coercion arguments in boring contexts] diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs index d84bcdd774..fb22885f47 100644 --- a/compiler/GHC/Core/Utils.hs +++ b/compiler/GHC/Core/Utils.hs @@ -162,7 +162,7 @@ isExprLevPoly = go go e@(Cast {}) = check_type e go (Tick _ e) = go e go e@(Type {}) = pprPanic "isExprLevPoly ty" (ppr e) - go (Coercion {}) = False -- this case can happen in SetLevels + go (Coercion {}) = False -- this case can happen in GHC.Core.Op.SetLevels check_type = isTypeLevPoly . exprType -- slow approach @@ -625,7 +625,7 @@ for 'Red'. That's because 'lvl' is unreachable. So rather than crashing we generate (error "Inaccessible alternative"). Similar things can happen (augmented by GADTs) when the Simplifier -filters down the matching alternatives in Simplify.rebuildCase. +filters down the matching alternatives in GHC.Core.Op.Simplify.rebuildCase. -} --------------------------------- @@ -817,9 +817,9 @@ case e of C2 -> e0 ``` -It isn't obvious that refineDefaultAlt does this but if you look at its one -call site in SimplUtils then the `imposs_deflt_cons` argument is populated with -constructors which are matched elsewhere. +It isn't obvious that refineDefaultAlt does this but if you look at its one call +site in GHC.Core.Op.Simplify.Utils then the `imposs_deflt_cons` argument is +populated with constructors which are matched elsewhere. -} @@ -874,7 +874,7 @@ Note [Combine identical alternatives: wrinkles] isDeadBinder (see #7360). You can see this in the call to combineIdenticalAlts in - SimplUtils.prepareAlts. Here the alternatives have type InAlt + GHC.Core.Op.Simplify.Utils.prepareAlts. Here the alternatives have type InAlt (the "In" meaning input) rather than OutAlt. * combineIdenticalAlts does not work well for nullary constructors @@ -882,9 +882,10 @@ Note [Combine identical alternatives: wrinkles] [] -> f [] (_:_) -> f y Here we won't see that [] and y are the same. Sigh! This problem - is solved in CSE, in CSE.combineAlts, which does a better version of - combineIdenticalAlts. But sadly it doesn't have the occurrence info - we have here. See Note [Combine case alts: awkward corner] in CSE). + is solved in CSE, in GHC.Core.Op.CSE.combineAlts, which does a better version + of combineIdenticalAlts. But sadly it doesn't have the occurrence info we have + here. + See Note [Combine case alts: awkward corner] in GHC.Core.Op.CSE). Note [Care with impossible-constructors when combining alternatives] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1675,10 +1676,10 @@ isDivOp _ = False ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ exprOkForSpeculation accepts very special case expressions. Reason: (a ==# b) is ok-for-speculation, but the litEq rules -in PrelRules convert it (a ==# 3#) to +in GHC.Core.Op.ConstantFold convert it (a ==# 3#) to case a of { DEFAULT -> 0#; 3# -> 1# } for excellent reasons described in - PrelRules Note [The litEq rule: converting equality to case]. + GHC.Core.Op.ConstantFold Note [The litEq rule: converting equality to case]. So, annoyingly, we want that case expression to be ok-for-speculation too. Bother. @@ -1692,12 +1693,12 @@ But we restrict it sharply: Does the RHS of v satisfy the let/app invariant? Previously we said yes, on the grounds that y is evaluated. But the binder-swap done - by SetLevels would transform the inner alternative to + by GHC.Core.Op.SetLevels would transform the inner alternative to DEFAULT -> ... (let v::Int# = case x of { ... } in ...) .... which does /not/ satisfy the let/app invariant, because x is not evaluated. See Note [Binder-swap during float-out] - in SetLevels. To avoid this awkwardness it seems simpler + in GHC.Core.Op.SetLevels. To avoid this awkwardness it seems simpler to stick to unlifted scrutinees where the issue does not arise. @@ -1718,7 +1719,7 @@ But we restrict it sharply: ----- Historical note: #15696: -------- - Previously SetLevels used exprOkForSpeculation to guide + Previously GHC.Core.Op.SetLevels used exprOkForSpeculation to guide floating of single-alternative cases; it now uses exprIsHNF Note [Floating single-alternative cases]. @@ -1728,8 +1729,8 @@ But we restrict it sharply: A -> ... _ -> ...(case (case x of { B -> p; C -> p }) of I# r -> blah)... - If SetLevels considers the inner nested case as - ok-for-speculation it can do case-floating (in SetLevels). + If GHC.Core.Op.SetLevels considers the inner nested case as + ok-for-speculation it can do case-floating (in GHC.Core.Op.SetLevels). So we'd float to: case e of x { DEAFULT -> case (case x of { B -> p; C -> p }) of I# r -> @@ -2063,7 +2064,7 @@ we don't want Lint to complain. The 'y' is evaluated, so the case in the RHS of the binding for 'v' is fine. But only if we *know* that 'y' is evaluated. -c.f. add_evals in Simplify.simplAlt +c.f. add_evals in GHC.Core.Op.Simplify.simplAlt ************************************************************************ * * @@ -2332,7 +2333,7 @@ There are some particularly delicate points here: * Note [Arity care]: we need to be careful if we just look at f's arity. Currently (Dec07), f's arity is visible in its own RHS (see - Note [Arity robustness] in SimplEnv) so we must *not* trust the + Note [Arity robustness] in GHC.Core.Op.Simplify.Env) so we must *not* trust the arity when checking that 'f' is a value. Otherwise we will eta-reduce f = \x. f x diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs index 3ec35595e0..39f137d657 100644 --- a/compiler/GHC/CoreToStg/Prep.hs +++ b/compiler/GHC/CoreToStg/Prep.hs @@ -19,7 +19,7 @@ module GHC.CoreToStg.Prep ( import GhcPrelude -import OccurAnal +import GHC.Core.Op.OccurAnal import GHC.Driver.Types import PrelNames @@ -27,7 +27,7 @@ import MkId ( realWorldPrimId ) import GHC.Core.Utils import GHC.Core.Arity import GHC.Core.FVs -import CoreMonad ( CoreToDo(..) ) +import GHC.Core.Op.Monad ( CoreToDo(..) ) import GHC.Core.Lint ( endPassIO ) import GHC.Core import GHC.Core.Make hiding( FloatBind(..) ) -- We use our own FloatBind here @@ -523,7 +523,7 @@ it seems good for CorePrep to be robust. cpeJoinPair :: CorePrepEnv -> JoinId -> CoreExpr -> UniqSM (JoinId, CpeRhs) -- Used for all join bindings --- No eta-expansion: see Note [Do not eta-expand join points] in SimplUtils +-- No eta-expansion: see Note [Do not eta-expand join points] in GHC.Core.Op.Simplify.Utils cpeJoinPair env bndr rhs = ASSERT(isJoinId bndr) do { let Just join_arity = isJoinId_maybe bndr diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs index ec3de7a14a..57c06ae7a3 100644 --- a/compiler/GHC/Driver/Flags.hs +++ b/compiler/GHC/Driver/Flags.hs @@ -183,7 +183,7 @@ data GeneralFlag | Opt_CmmElimCommonBlocks | Opt_AsmShortcutting | Opt_OmitYields - | Opt_FunToThunk -- allow WwLib.mkWorkerArgs to remove all value lambdas + | Opt_FunToThunk -- allow GHC.Core.Op.WorkWrap.Lib.mkWorkerArgs to remove all value lambdas | Opt_DictsStrict -- be strict in argument dictionaries | Opt_DmdTxDictSel -- use a special demand transformer for dictionary selectors | Opt_Loopification -- See Note [Self-recursive tail calls] diff --git a/compiler/GHC/Driver/Main.hs b/compiler/GHC/Driver/Main.hs index ed47655982..083bfd279a 100644 --- a/compiler/GHC/Driver/Main.hs +++ b/compiler/GHC/Driver/Main.hs @@ -117,7 +117,7 @@ import TcRnMonad import TcHsSyn ( ZonkFlexi (DefaultFlexi) ) import NameCache ( initNameCache ) import PrelInfo -import SimplCore +import GHC.Core.Op.Simplify.Driver import GHC.HsToCore import GHC.Iface.Load ( ifaceStats, initExternalPackageState, writeIface ) import GHC.Iface.Make diff --git a/compiler/GHC/Driver/Plugins.hs b/compiler/GHC/Driver/Plugins.hs index baa27a0b36..437e68af71 100644 --- a/compiler/GHC/Driver/Plugins.hs +++ b/compiler/GHC/Driver/Plugins.hs @@ -49,7 +49,7 @@ module GHC.Driver.Plugins ( import GhcPrelude -import {-# SOURCE #-} CoreMonad ( CoreToDo, CoreM ) +import {-# SOURCE #-} GHC.Core.Op.Monad ( CoreToDo, CoreM ) import qualified TcRnTypes import TcRnTypes ( TcGblEnv, IfM, TcM, tcg_rn_decls, tcg_rn_exports ) import TcHoleFitTypes ( HoleFitPluginR ) diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs index b0db896439..5b56e381ed 100644 --- a/compiler/GHC/Driver/Session.hs +++ b/compiler/GHC/Driver/Session.hs @@ -507,7 +507,7 @@ data DynFlags = DynFlags { -- by the assembler code generator (0 to disable) liberateCaseThreshold :: Maybe Int, -- ^ Threshold for LiberateCase floatLamArgs :: Maybe Int, -- ^ Arg count for lambda floating - -- See CoreMonad.FloatOutSwitches + -- See GHC.Core.Op.Monad.FloatOutSwitches liftLamsRecArgs :: Maybe Int, -- ^ Maximum number of arguments after lambda lifting a -- recursive function. diff --git a/compiler/GHC/HsToCore.hs b/compiler/GHC/HsToCore.hs index e006b20de6..145b7ade55 100644 --- a/compiler/GHC/HsToCore.hs +++ b/compiler/GHC/HsToCore.hs @@ -54,8 +54,8 @@ import NameSet import NameEnv import GHC.Core.Rules import BasicTypes -import CoreMonad ( CoreToDo(..) ) -import GHC.Core.Lint ( endPassIO ) +import GHC.Core.Op.Monad ( CoreToDo(..) ) +import GHC.Core.Lint ( endPassIO ) import VarSet import FastString import ErrUtils diff --git a/compiler/GHC/HsToCore/Binds.hs b/compiler/GHC/HsToCore/Binds.hs index 950cabed37..3becf64ca4 100644 --- a/compiler/GHC/HsToCore/Binds.hs +++ b/compiler/GHC/HsToCore/Binds.hs @@ -37,8 +37,8 @@ import GHC.HsToCore.PmCheck ( needToRunPmCheck, addTyCsDs, checkGuardMatches ) import GHC.Hs -- lots of things import GHC.Core -- lots of things -import GHC.Core.SimpleOpt ( simpleOptExpr ) -import OccurAnal ( occurAnalyseExpr ) +import GHC.Core.SimpleOpt ( simpleOptExpr ) +import GHC.Core.Op.OccurAnal ( occurAnalyseExpr ) import GHC.Core.Make import GHC.Core.Utils import GHC.Core.Arity ( etaExpand ) diff --git a/compiler/GHC/HsToCore/Match.hs b/compiler/GHC/HsToCore/Match.hs index f456323ccb..48cfe6fa7a 100644 --- a/compiler/GHC/HsToCore/Match.hs +++ b/compiler/GHC/HsToCore/Match.hs @@ -337,7 +337,7 @@ We do *not* desugar simply to error "empty case" or some such, because 'x' might be bound to (error "hello"), in which case we want to see that "hello" exception, not (error "empty case"). -See also Note [Case elimination: lifted case] in Simplify. +See also Note [Case elimination: lifted case] in GHC.Core.Op.Simplify. ************************************************************************ diff --git a/compiler/GHC/HsToCore/Match/Literal.hs b/compiler/GHC/HsToCore/Match/Literal.hs index c25785d064..090227bf32 100644 --- a/compiler/GHC/HsToCore/Match/Literal.hs +++ b/compiler/GHC/HsToCore/Match/Literal.hs @@ -369,7 +369,7 @@ tidyNPat (OverLit (OverLitTc False ty) val _) mb_neg _eq outer_ty -- NB: do /not/ convert Float or Double literals to F# 3.8 or D# 5.3 -- If we do convert to the constructor form, we'll generate a case -- expression on a Float# or Double# and that's not allowed in Core; see - -- #9238 and Note [Rules for floating-point comparisons] in PrelRules + -- #9238 and Note [Rules for floating-point comparisons] in GHC.Core.Op.ConstantFold where -- Sometimes (like in test case -- overloadedlists/should_run/overloadedlistsrun04), the SyntaxExprs include diff --git a/compiler/GHC/HsToCore/Utils.hs b/compiler/GHC/HsToCore/Utils.hs index eccd37b719..0b80959f09 100644 --- a/compiler/GHC/HsToCore/Utils.hs +++ b/compiler/GHC/HsToCore/Utils.hs @@ -863,7 +863,7 @@ Note [Failure thunks and CPR] (This note predates join points as formal entities (hence the quotation marks). We can't use actual join points here (see above); if we did, this would also solve the CPR problem, since join points don't get CPR'd. See Note [Don't CPR -join points] in WorkWrap.) +join points] in GHC.Core.Op.WorkWrap.) When we make a failure point we ensure that it does not look like a thunk. Example: diff --git a/compiler/GHC/Iface/Tidy.hs b/compiler/GHC/Iface/Tidy.hs index 0472dee50b..b3fd56c4d2 100644 --- a/compiler/GHC/Iface/Tidy.hs +++ b/compiler/GHC/Iface/Tidy.hs @@ -22,7 +22,7 @@ import GHC.Core import GHC.Core.Unfold import GHC.Core.FVs import GHC.Core.Op.Tidy -import CoreMonad +import GHC.Core.Op.Monad import GHC.Core.Stats (coreBindsStats, CoreStats(..)) import GHC.Core.Seq (seqBinds) import GHC.Core.Lint @@ -896,7 +896,7 @@ Now that RULE *might* be useful to an importing module, but that is purely speculative, and meanwhile the code is taking up space and codegen time. I found that binary sizes jumped by 6-10% when I started to specialise INLINE functions (again, Note [Inline -specialisations] in Specialise). +specialisations] in GHC.Core.Op.Specialise). So it seems better to drop the binding for f_spec, and the rule itself, if the auto-generated rule is the *only* reason that it is @@ -904,8 +904,8 @@ being kept alive. (The RULE still might have been useful in the past; that is, it was the right thing to have generated it in the first place. See Note -[Inline specialisations] in Specialise. But now it has served its -purpose, and can be discarded.) +[Inline specialisations] in GHC.Core.Op.Specialise. But now it has +served its purpose, and can be discarded.) So findExternalRules does this: * Remove all bindings that are kept alive *only* by isAutoRule rules @@ -1253,7 +1253,8 @@ tidyTopIdInfo dflags rhs_tidy_env name orig_rhs tidy_rhs idinfo show_unfold -- the function returns bottom -- In this case, show_unfold will be false (we don't expose unfoldings -- for bottoming functions), but we might still have a worker/wrapper - -- split (see Note [Worker-wrapper for bottoming functions] in WorkWrap.hs + -- split (see Note [Worker-wrapper for bottoming functions] in + -- GHC.Core.Op.WorkWrap) --------- Arity ------------ diff --git a/compiler/GHC/IfaceToCore.hs b/compiler/GHC/IfaceToCore.hs index ebf3aa588d..df2457cd62 100644 --- a/compiler/GHC/IfaceToCore.hs +++ b/compiler/GHC/IfaceToCore.hs @@ -62,7 +62,7 @@ import VarSet import Name import NameEnv import NameSet -import OccurAnal ( occurAnalyseExpr ) +import GHC.Core.Op.OccurAnal ( occurAnalyseExpr ) import Demand import Module import UniqFM diff --git a/compiler/GHC/Plugins.hs b/compiler/GHC/Plugins.hs index ee6a946cbb..05278f7da1 100644 --- a/compiler/GHC/Plugins.hs +++ b/compiler/GHC/Plugins.hs @@ -6,11 +6,11 @@ -- with saying "import GHC.Plugins". -- -- Particularly interesting modules for plugin writers include --- "GHC.Core" and "CoreMonad". +-- "GHC.Core" and "GHC.Core.Op.Monad". module GHC.Plugins( module GHC.Driver.Plugins, module RdrName, module OccName, module Name, module Var, module Id, module IdInfo, - module CoreMonad, module GHC.Core, module Literal, module GHC.Core.DataCon, + module GHC.Core.Op.Monad, module GHC.Core, module Literal, module GHC.Core.DataCon, module GHC.Core.Utils, module GHC.Core.Make, module GHC.Core.FVs, module GHC.Core.Subst, module GHC.Core.Rules, module Annotations, module GHC.Driver.Session, module GHC.Driver.Packages, @@ -37,7 +37,7 @@ import Id hiding ( lazySetIdInfo, setIdExported, setIdNotExported {- all import IdInfo -- Core -import CoreMonad +import GHC.Core.Op.Monad import GHC.Core import Literal import GHC.Core.DataCon @@ -95,8 +95,8 @@ import TcEnv ( lookupGlobal ) import qualified Language.Haskell.TH as TH -{- This instance is defined outside CoreMonad.hs so that - CoreMonad does not depend on TcEnv -} +{- This instance is defined outside GHC.Core.Op.Monad.hs so that + GHC.Core.Op.Monad does not depend on TcEnv -} instance MonadThings CoreM where lookupThing name = do { hsc_env <- getHscEnv ; liftIO $ lookupGlobal hsc_env name } diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs index f8e2cbd73d..7f62c6dec1 100644 --- a/compiler/GHC/StgToCmm/Expr.hs +++ b/compiler/GHC/StgToCmm/Expr.hs @@ -64,7 +64,7 @@ cgExpr :: CgStgExpr -> FCode ReturnKind cgExpr (StgApp fun args) = cgIdApp fun args -- seq# a s ==> a --- See Note [seq# magic] in PrelRules +-- See Note [seq# magic] in GHC.Core.Op.ConstantFold cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) = cgIdApp a [] @@ -404,7 +404,7 @@ cgCase scrut@(StgApp v []) _ (PrimAlt _) _ {- Note [Handle seq#] ~~~~~~~~~~~~~~~~~~~~~ -See Note [seq# magic] in PrelRules. +See Note [seq# magic] in GHC.Core.Op.ConstantFold. The special case for seq# in cgCase does this: case seq# a s of v @@ -419,7 +419,7 @@ is the same as the return convention for just 'a') cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts = -- Note [Handle seq#] - -- And see Note [seq# magic] in PrelRules + -- And see Note [seq# magic] in GHC.Core.Op.ConstantFold -- Use the same return convention as vanilla 'a'. cgCase (StgApp a []) bndr alt_type alts |