diff options
Diffstat (limited to 'compiler/GHC/Core/Opt')
-rw-r--r-- | compiler/GHC/Core/Opt/Arity.hs | 1273 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/CallArity.hs | 9 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/FloatIn.hs | 3 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/OccurAnal.hs | 4 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/SetLevels.hs | 10 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify.hs | 120 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Env.hs | 14 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Utils.hs | 306 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap.hs | 9 |
9 files changed, 1272 insertions, 476 deletions
diff --git a/compiler/GHC/Core/Opt/Arity.hs b/compiler/GHC/Core/Opt/Arity.hs index 811beb6c0a..5858ff91e0 100644 --- a/compiler/GHC/Core/Opt/Arity.hs +++ b/compiler/GHC/Core/Opt/Arity.hs @@ -11,16 +11,29 @@ -- | Arity and eta expansion module GHC.Core.Opt.Arity - ( manifestArity, joinRhsArity, exprArity - , typeArity, typeOneShots - , exprEtaExpandArity, findRhsArity - , etaExpand, etaExpandAT - , exprBotStrictness_maybe + ( -- Finding arity + manifestArity, joinRhsArity, exprArity + , findRhsArity, exprBotStrictness_maybe , ArityOpts(..) + -- ** Eta expansion + , exprEtaExpandArity, etaExpand, etaExpandAT + + -- ** Eta reduction + , tryEtaReduce + -- ** ArityType - , ArityType(..), mkBotArityType, mkTopArityType, expandableArityType - , arityTypeArity, maxWithArity, minWithArity, idArityType + , ArityType, mkBotArityType, mkManifestArityType + , arityTypeArity, idArityType, getBotArity + + -- ** typeArity and the state hack + , typeArity, typeOneShots, typeOneShot + , isOneShotBndr + , isStateHackType + + -- * Lambdas + , zapLamBndrs + -- ** Join points , etaExpandToJoinPoint, etaExpandToJoinPointRule @@ -39,7 +52,7 @@ import GHC.Core.Utils import GHC.Core.DataCon import GHC.Core.TyCon ( tyConArity ) import GHC.Core.TyCon.RecWalk ( initRecTc, checkRecTc ) -import GHC.Core.Predicate ( isDictTy, isCallStackPredTy ) +import GHC.Core.Predicate ( isDictTy, isEvVar, isCallStackPredTy ) import GHC.Core.Multiplicity -- We have two sorts of substitution: @@ -50,17 +63,19 @@ import GHC.Core.Type as Type import GHC.Core.Coercion as Type import GHC.Types.Demand -import GHC.Types.Var -import GHC.Types.Var.Env import GHC.Types.Id +import GHC.Types.Var.Env import GHC.Types.Var.Set import GHC.Types.Basic import GHC.Types.Tickish +import GHC.Builtin.Types.Prim import GHC.Builtin.Uniques + import GHC.Data.FastString import GHC.Data.Pair +import GHC.Utils.GlobalVars( unsafeHasNoStateHack ) import GHC.Utils.Constants (debugIsOn) import GHC.Utils.Outputable import GHC.Utils.Panic @@ -123,7 +138,8 @@ exprArity :: CoreExpr -> Arity -- We do /not/ guarantee that exprArity e <= typeArity e -- You may need to do arity trimming after calling exprArity -- See Note [Arity trimming] --- (If we do arity trimming here we have to do it at every cast. +-- Reason: if we do arity trimming here we have take exprType +-- and that can be expensive if there is a large cast exprArity e = go e where go (Var v) = idArity v @@ -139,13 +155,50 @@ exprArity e = go e go _ = 0 --------------- +exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, DmdSig) +-- A cheap and cheerful function that identifies bottoming functions +-- and gives them a suitable strictness signatures. It's used during +-- float-out +exprBotStrictness_maybe e + = case getBotArity (arityType botStrictnessArityEnv e) of + Nothing -> Nothing + Just ar -> Just (ar, mkVanillaDmdSig ar botDiv) + +{- Note [exprArity for applications] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we come to an application we check that the arg is trivial. + eg f (fac x) does not have arity 2, + even if f has arity 3! + +* We require that is trivial rather merely cheap. Suppose f has arity 2. + Then f (Just y) + has arity 0, because if we gave it arity 1 and then inlined f we'd get + let v = Just y in \w. <f-body> + which has arity 0. And we try to maintain the invariant that we don't + have arity decreases. + +* The `max 0` is important! (\x y -> f x) has arity 2, even if f is + unknown, hence arity 0 + + +************************************************************************ +* * + typeArity and the "state hack" +* * +********************************************************************* -} + + typeArity :: Type -> Arity +-- ^ (typeArity ty) says how many arrows GHC can expose in 'ty', after +-- looking through newtypes. More generally, (typeOneShots ty) returns +-- ty's [OneShotInfo], based only on the type itself, using typeOneShot +-- on the argument type to access the "state hack". typeArity = length . typeOneShots typeOneShots :: Type -> [OneShotInfo] -- How many value arrows are visible in the type? -- We look through foralls, and newtypes --- See Note [typeArity invariants] +-- See Note [Arity invariants for bindings] typeOneShots ty = go initRecTc ty where @@ -174,64 +227,121 @@ typeOneShots ty | otherwise = [] ---------------- -exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, DmdSig) --- A cheap and cheerful function that identifies bottoming functions --- and gives them a suitable strictness signatures. It's used during --- float-out -exprBotStrictness_maybe e - = case getBotArity (arityType botStrictnessArityEnv e) of - Nothing -> Nothing - Just ar -> Just (ar, sig ar) - where - sig ar = mkClosedDmdSig (replicate ar topDmd) botDiv +typeOneShot :: Type -> OneShotInfo +typeOneShot ty + | isStateHackType ty = OneShotLam + | otherwise = NoOneShotInfo + +-- | Like 'idOneShotInfo', but taking the Horrible State Hack in to account +-- See Note [The state-transformer hack] in "GHC.Core.Opt.Arity" +idStateHackOneShotInfo :: Id -> OneShotInfo +idStateHackOneShotInfo id + | isStateHackType (idType id) = OneShotLam + | otherwise = idOneShotInfo id + +-- | Returns whether the lambda associated with the 'Id' is +-- certainly applied at most once +-- This one is the "business end", called externally. +-- It works on type variables as well as Ids, returning True +-- Its main purpose is to encapsulate the Horrible State Hack +-- See Note [The state-transformer hack] in "GHC.Core.Opt.Arity" +isOneShotBndr :: Var -> Bool +isOneShotBndr var + | isTyVar var = True + | OneShotLam <- idStateHackOneShotInfo var = True + | otherwise = False + +isStateHackType :: Type -> Bool +isStateHackType ty + | unsafeHasNoStateHack -- Switch off with -fno-state-hack + = False + | otherwise + = case tyConAppTyCon_maybe ty of + Just tycon -> tycon == statePrimTyCon + _ -> False + -- This is a gross hack. It claims that + -- every function over realWorldStatePrimTy is a one-shot + -- function. This is pretty true in practice, and makes a big + -- difference. For example, consider + -- a `thenST` \ r -> ...E... + -- The early full laziness pass, if it doesn't know that r is one-shot + -- will pull out E (let's say it doesn't mention r) to give + -- let lvl = E in a `thenST` \ r -> ...lvl... + -- When `thenST` gets inlined, we end up with + -- let lvl = E in \s -> case a s of (r, s') -> ...lvl... + -- and we don't re-inline E. + -- + -- It would be better to spot that r was one-shot to start with, but + -- I don't want to rely on that. + -- + -- Another good example is in fill_in in PrelPack.hs. We should be able to + -- spot that fill_in has arity 2 (and when Keith is done, we will) but we can't yet. -{- -Note [typeArity invariants] -~~~~~~~~~~~~~~~~~~~~~~~~~~ -We have the following invariants around typeArity - (1) In any binding x = e, - idArity f <= typeArity (idType f) +{- Note [Arity invariants for bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We have the following invariants for let-bindings + + (1) In any binding f = e, + idArity f <= typeArity (idType f) + We enforce this with trimArityType, called in findRhsArity; + see Note [Arity trimming]. + + Note that we enforce this only for /bindings/. We do /not/ insist that + arityTypeArity (arityType e) <= typeArity (exprType e) + because that is quite a bit more expensive to guaranteed; it would + mean checking at every Cast in the recursive arityType, for example. (2) If typeArity (exprType e) = n, then manifestArity (etaExpand e n) = n That is, etaExpand can always expand as much as typeArity says - So the case analysis in etaExpand and in typeArity must match + (or less, of course). So the case analysis in etaExpand and in + typeArity must match. -Why is this important? Because + Consequence: because of (1), if we eta-expand to (idArity f), we will + end up with n manifest lambdas. - - In GHC.Iface.Tidy we use exprArity to fix the *final arity* of - each top-level Id, and in + (3) In any binding f = e, + idArity f <= arityTypeArity (safeArityType (arityType e)) + That is, we call safeArityType before attributing e's arityType to f. + See Note [SafeArityType]. - - In CorePrep we use etaExpand on each rhs, so that the visible lambdas - actually match that arity, which in turn means - that the StgRhs has the right number of lambdas + So we call safeArityType in findRhsArity. Suppose we have f :: Int -> Int -> Int f x y = x+y -- Arity 2 g :: F Int - g = case x of { True -> f |> co1 - ; False -> g |> co2 } + g = case <cond> of { True -> f |> co1 + ; False -> g |> co2 } -Now, we can't eta-expand g to have arity 2, because etaExpand, which works -off the /type/ of the expression, doesn't know how to make an eta-expanded -binding +where F is a type family. Now, we can't eta-expand g to have arity 2, +because etaExpand, which works off the /type/ of the expression +(albeit looking through newtypes), doesn't know how to make an +eta-expanded binding g = (\a b. case x of ...) |> co -because can't make up `co` or the types of `a` and `b`. +because it can't make up `co` or the types of `a` and `b`. So invariant (1) ensures that every binding has an arity that is no greater than the typeArity of the RHS; and invariant (2) ensures that etaExpand and handle what typeArity says. +Why is this important? Because + + - In GHC.Iface.Tidy we use exprArity/manifestArity to fix the *final + arity* of each top-level Id, and in + + - In CorePrep we use etaExpand on each rhs, so that the visible + lambdas actually match that arity, which in turn means that the + StgRhs has a number of lambdas that precisely matches the arity. + Note [Arity trimming] ~~~~~~~~~~~~~~~~~~~~~ -Arity trimming, implemented by minWithArity, directly implements -invariant (1) of Note [typeArity invariants]. Failing to do so, and -hence breaking invariant (1) led to #5441. +Invariant (1) of Note [Arity invariants for bindings] is upheld by findRhsArity, +which calls trimArityType to trim the ArityType to match the Arity of the +binding. Failing to do so, and hence breaking invariant (1) led to #5441. How to trim? If we end in topDiv, it's easy. But we must take great care with dead ends (i.e. botDiv). Suppose the expression was (\x y. error "urk"), @@ -293,26 +403,34 @@ trying to *make* it hold, but it's tricky and I gave up. The test simplCore/should_compile/T3722 is an excellent example. -------- End of old out of date comments, just for interest ----------- +-} +{- ******************************************************************** +* * + Zapping lambda binders +* * +********************************************************************* -} -Note [exprArity for applications] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When we come to an application we check that the arg is trivial. - eg f (fac x) does not have arity 2, - even if f has arity 3! - -* We require that is trivial rather merely cheap. Suppose f has arity 2. - Then f (Just y) - has arity 0, because if we gave it arity 1 and then inlined f we'd get - let v = Just y in \w. <f-body> - which has arity 0. And we try to maintain the invariant that we don't - have arity decreases. +zapLamBndrs :: FullArgCount -> [Var] -> [Var] +-- If (\xyz. t) appears under-applied to only two arguments, +-- we must zap the occ-info on x,y, because they appear (in 't') under the \z. +-- See Note [Occurrence analysis for lambda binders] in GHc.Core.Opt.OccurAnal +-- +-- NB: both `arg_count` and `bndrs` include both type and value args/bndrs +zapLamBndrs arg_count bndrs + | no_need_to_zap = bndrs + | otherwise = zap_em arg_count bndrs + where + no_need_to_zap = all isOneShotBndr (drop arg_count bndrs) -* The `max 0` is important! (\x y -> f x) has arity 2, even if f is - unknown, hence arity 0 + zap_em :: FullArgCount -> [Var] -> [Var] + zap_em 0 bs = bs + zap_em _ [] = [] + zap_em n (b:bs) | isTyVar b = b : zap_em (n-1) bs + | otherwise = zapLamIdInfo b : zap_em (n-1) bs -************************************************************************ +{- ********************************************************************* * * Computing the "arity" of an expression * * @@ -490,34 +608,72 @@ but not to introduce a new lambda. Note [ArityType] ~~~~~~~~~~~~~~~~ +ArityType can be thought of as an abstraction of an expression. +The ArityType + AT [ (IsCheap, NoOneShotInfo) + , (IsExpensive, OneShotLam) + , (IsCheap, OneShotLam) ] Dunno) + +abstracts an expression like + \x. let <expensive> in + \y{os}. + \z{os}. blah + +In general we have (AT lams div). Then +* In lams :: [(Cost,OneShotInfo)] + * The Cost flag describes the part of the expression down + to the first (value) lambda. + * The OneShotInfo flag gives the one-shot info on that lambda. + +* If 'div' is dead-ending ('isDeadEndDiv'), then application to + 'length lams' arguments will surely diverge, similar to the situation + with 'DmdType'. + ArityType is the result of a compositional analysis on expressions, from which we can decide the real arity of the expression (extracted with function exprEtaExpandArity). We use the following notation: - at ::= \o1..on.div + at ::= \p1..pn.div div ::= T | x | ⊥ - o ::= ? | 1 -And omit the \. if n = 0. Examples: - \?11.T stands for @AT [NoOneShotInfo,OneShotLam,OneShotLam] topDiv@ - ⊥ stands for @AT [] botDiv@ + p ::= (c o) + c ::= X | C -- Expensive or Cheap + o ::= ? | 1 -- NotOneShot or OneShotLam +We may omit the \. if n = 0. +And ⊥ stands for `AT [] botDiv` + +Here is an example demonstrating the notation: + \(C?)(X1)(C1).T +stands for + AT [ (IsCheap,NoOneShotInfo) + , (IsExpensive,OneShotLam) + , (IsCheap,OneShotLam) ] + topDiv + See the 'Outputable' instance for more information. It's pretty simple. +How can we use ArityType? Example: + f = \x\y. let v = <expensive> in + \s(one-shot) \t(one-shot). blah + 'f' has arity type \(C?)(C?)(X1)(C1).T + The one-shot-ness means we can, in effect, push that + 'let' inside the \st, and expand to arity 4 + +Suppose f = \xy. x+y +Then f :: \(C?)(C?).T + f v :: \(C?).T + f <expensive> :: \(X?).T + Here is what the fields mean. If an arbitrary expression 'f' has ArityType 'at', then * If @at = AT [o1,..,on] botDiv@ (notation: \o1..on.⊥), then @f x1..xn@ definitely diverges. Partial applications to fewer than n args may *or - may not* diverge. + may not* diverge. Ditto exnDiv. - We allow ourselves to eta-expand bottoming functions, even - if doing so may lose some `seq` sharing, - let x = <expensive> in \y. error (g x y) - ==> \y. let x = <expensive> in error (g x y) - - * If @at = AT [o1,..,on] topDiv@ (notation: \o1..on.T), then expanding 'f' - to @\x1..xn. f x1..xn@ loses no sharing, assuming the calls of f respect - the one-shot-ness o1..on of its definition. + * If `f` has ArityType `at` we can eta-expand `f` to have (aritTypeOneShots at) + arguments without losing sharing. This function checks that the either + there are no expensive expressions, or the lambdas are one-shots. NB 'f' is an arbitrary expression, eg @f = g e1 e2@. This 'f' can have arity type @AT oss _@, with @length oss > 0@, only if e1 e2 are themselves @@ -530,20 +686,45 @@ ArityType 'at', then So eta expansion is dynamically ok; see Note [State hack and bottoming functions], the part about catch# -Example: - f = \x\y. let v = <expensive> in - \s(one-shot) \t(one-shot). blah - 'f' has arity type \??11.T - The one-shot-ness means we can, in effect, push that - 'let' inside the \st. +Wrinkles +* Wrinkle [Bottoming functions]: see function 'arityLam'. + We treat bottoming functions as one-shot, because there is no point + in floating work outside the lambda, and it's fine to float it inside. -Suppose f = \xy. x+y -Then f :: \??.T - f v :: \?.T - f <expensive> :: T --} + For example, this is fine (see test stranal/sigs/BottomFromInnerLambda) + let x = <expensive> in \y. error (g x y) + ==> \y. let x = <expensive> in error (g x y) + Idea: perhaps we could enforce this invariant with + data Arity Type = TopAT [(Cost, OneShotInfo)] | DivAT [Cost] + + +Note [SafeArityType] +~~~~~~~~~~~~~~~~~~~~ +The function safeArityType trims an ArityType to return a "safe" ArityType, +for which we use a type synonym SafeArityType. It is "safe" in the sense +that (arityTypeArity at) really reflects the arity of the expression, whereas +a regular ArityType might have more lambdas in its [ATLamInfo] that the +(cost-free) arity of the expression. + +For example + \x.\y.let v = expensive in \z. blah +has + arityType = AT [C?, C?, X?, C?] Top +But the expression actually has arity 2, not 4, because of the X. +So safeArityType will trim it to (AT [C?, C?] Top), whose [ATLamInfo] +now reflects the (cost-free) arity of the expression + +Why do we ever need an "unsafe" ArityType, such as the example above? +Because its (cost-free) arity may increased by combineWithDemandOneShots +in findRhsArity. See Note [Combining arity type with demand info]. + +Thus the function `arityType` returns a regular "unsafe" ArityType, that +goes deeply into the lambdas (including under IsExpensive). But that is +very local; most ArityTypes are indeed "safe". We use the type synonym +SafeArityType to indicate where we believe the ArityType is safe. +-} -- | The analysis lattice of arity analysis. It is isomorphic to -- @@ -574,22 +755,33 @@ Then f :: \??.T -- -- We rely on this lattice structure for fixed-point iteration in -- 'findRhsArity'. For the semantics of 'ArityType', see Note [ArityType]. -data ArityType - = AT ![OneShotInfo] !Divergence - -- ^ @AT oss div@ means this value can safely be eta-expanded @length oss@ - -- times, provided use sites respect the 'OneShotInfo's in @oss@. - -- A 'OneShotLam' annotation can come from two sources: - -- * The user annotated a lambda as one-shot with 'GHC.Exts.oneShot' - -- * It's from a lambda binder of a type affected by `-fstate-hack`. - -- See 'idStateHackOneShotInfo'. - -- In both cases, 'OneShotLam' should win over 'NoOneShotInfo', see - -- Note [Combining case branches]. - -- - -- If @div@ is dead-ending ('isDeadEndDiv'), then application to - -- @length os@ arguments will surely diverge, similar to the situation - -- with 'DmdType'. +data ArityType -- See Note [ArityType] + = AT ![ATLamInfo] !Divergence + -- ^ `AT oss div` is an abstraction of the expression, which describes + -- its lambdas, and how much work appears where. + -- See Note [ArityType] for more information + -- + -- If `div` is dead-ending ('isDeadEndDiv'), then application to + -- `length os` arguments will surely diverge, similar to the situation + -- with 'DmdType'. deriving Eq +type ATLamInfo = (Cost,OneShotInfo) + -- ^ Info about one lambda in an ArityType + -- See Note [ArityType] + +type SafeArityType = ArityType -- See Note [SafeArityType] + +data Cost = IsCheap | IsExpensive + deriving( Eq ) + +allCosts :: (a -> Cost) -> [a] -> Cost +allCosts f xs = foldr (addCost . f) IsCheap xs + +addCost :: Cost -> Cost -> Cost +addCost IsCheap IsCheap = IsCheap +addCost _ _ = IsExpensive + -- | This is the BNF of the generated output: -- -- @ @@ -608,57 +800,56 @@ instance Outputable ArityType where pp_div Diverges = char '⊥' pp_div ExnOrDiv = char 'x' pp_div Dunno = char 'T' - pp_os OneShotLam = char '1' - pp_os NoOneShotInfo = char '?' + pp_os (IsCheap, OneShotLam) = text "(C1)" + pp_os (IsExpensive, OneShotLam) = text "(X1)" + pp_os (IsCheap, NoOneShotInfo) = text "(C?)" + pp_os (IsExpensive, NoOneShotInfo) = text "(X?)" mkBotArityType :: [OneShotInfo] -> ArityType -mkBotArityType oss = AT oss botDiv +mkBotArityType oss = AT [(IsCheap,os) | os <- oss] botDiv botArityType :: ArityType botArityType = mkBotArityType [] -mkTopArityType :: [OneShotInfo] -> ArityType -mkTopArityType oss = AT oss topDiv +mkManifestArityType :: [OneShotInfo] -> ArityType +mkManifestArityType oss = AT [(IsCheap,os) | os <- oss] topDiv topArityType :: ArityType -topArityType = mkTopArityType [] +topArityType = AT [] topDiv -- | The number of value args for the arity type -arityTypeArity :: ArityType -> Arity -arityTypeArity (AT oss _) = length oss - --- | True <=> eta-expansion will add at least one lambda -expandableArityType :: ArityType -> Bool -expandableArityType at = arityTypeArity at > 0 - --- | See Note [Dead ends] in "GHC.Types.Demand". --- Bottom implies a dead end. -isDeadEndArityType :: ArityType -> Bool -isDeadEndArityType (AT _ div) = isDeadEndDiv div - ------------------------ -infixl 2 `maxWithArity`, `minWithArity` - --- | Expand a non-bottoming arity type so that it has at least the given arity. -maxWithArity :: ArityType -> Arity -> ArityType -maxWithArity at@(AT oss div) !ar - | isDeadEndArityType at = at - | oss `lengthAtLeast` ar = at - | otherwise = AT (take ar $ oss ++ repeat NoOneShotInfo) div - --- | Trim an arity type so that it has at most the given arity. --- Any excess 'OneShotInfo's are truncated to 'topDiv', even if they end in --- 'ABot'. See Note [Arity trimming] -minWithArity :: ArityType -> Arity -> ArityType -minWithArity at@(AT oss _) ar - | oss `lengthAtMost` ar = at - | otherwise = AT (take ar oss) topDiv - ----------------------- -takeWhileOneShot :: ArityType -> ArityType -takeWhileOneShot (AT oss div) - | isDeadEndDiv div = AT (takeWhile isOneShotInfo oss) topDiv - | otherwise = AT (takeWhile isOneShotInfo oss) div +arityTypeArity :: SafeArityType -> Arity +arityTypeArity (AT lams _) = length lams + +arityTypeOneShots :: SafeArityType -> [OneShotInfo] +-- Returns a list only as long as the arity should be +arityTypeOneShots (AT lams _) = map snd lams + +safeArityType :: ArityType -> SafeArityType +-- ^ Assuming this ArityType is all we know, find the arity of +-- the function, and trim the argument info (and Divergenge) +-- to match that arity. See Note [SafeArityType] +safeArityType at@(AT lams _) + = case go 0 IsCheap lams of + Nothing -> at -- No trimming needed + Just ar -> AT (take ar lams) topDiv + where + go :: Arity -> Cost -> [(Cost,OneShotInfo)] -> Maybe Arity + go _ _ [] = Nothing + go ar ch1 ((ch2,os):lams) + = case (ch1 `addCost` ch2, os) of + (IsExpensive, NoOneShotInfo) -> Just ar + (ch, _) -> go (ar+1) ch lams + +infixl 2 `trimArityType` + +trimArityType :: Arity -> ArityType -> ArityType +-- ^ Trim an arity type so that it has at most the given arity. +-- Any excess 'OneShotInfo's are truncated to 'topDiv', even if +-- they end in 'ABot'. See Note [Arity trimming] +trimArityType max_arity at@(AT lams _) + | lams `lengthAtMost` max_arity = at + | otherwise = AT (take max_arity lams) topDiv data ArityOpts = ArityOpts { ao_ped_bot :: !Bool -- See Note [Dealing with bottom] @@ -667,10 +858,17 @@ data ArityOpts = ArityOpts -- | The Arity returned is the number of value args the -- expression can be applied to without doing much work -exprEtaExpandArity :: ArityOpts -> CoreExpr -> ArityType +exprEtaExpandArity :: ArityOpts -> CoreExpr -> Maybe SafeArityType -- exprEtaExpandArity is used when eta expanding -- e ==> \xy -> e x y -exprEtaExpandArity opts e = arityType (etaExpandArityEnv opts) e +-- Nothing if the expression has arity 0 +exprEtaExpandArity opts e + | AT [] _ <- arity_type + = Nothing + | otherwise + = Just arity_type + where + arity_type = safeArityType (arityType (etaExpandArityEnv opts) e) getBotArity :: ArityType -> Maybe Arity -- Arity of a divergent function @@ -678,29 +876,54 @@ getBotArity (AT oss div) | isDeadEndDiv div = Just $ length oss | otherwise = Nothing ----------------------- -findRhsArity :: ArityOpts -> RecFlag -> Id -> CoreExpr -> Arity -> ArityType + +{- ********************************************************************* +* * + findRhsArity +* * +********************************************************************* -} + +findRhsArity :: ArityOpts -> RecFlag -> Id -> CoreExpr -> Arity -> SafeArityType -- This implements the fixpoint loop for arity analysis -- See Note [Arity analysis] -- If findRhsArity e = (n, is_bot) then -- (a) any application of e to <n arguments will not do much work, -- so it is safe to expand e ==> (\x1..xn. e x1 .. xn) -- (b) if is_bot=True, then e applied to n args is guaranteed bottom - -findRhsArity opts NonRecursive _ rhs _ - = arityType (findRhsArityEnv opts) rhs - -findRhsArity opts Recursive bndr rhs old_arity - = go 0 botArityType - -- We always do one step, but usually that produces a result equal to - -- old_arity, and then we stop right away, because old_arity is assumed - -- to be sound. In other words, arities should never decrease. - -- Result: the common case is that there is just one iteration +-- +-- Returns an ArityType that is guaranteed trimmed to typeArity of 'bndr' +-- See Note [Arity trimming] +findRhsArity opts is_rec bndr rhs old_arity + = case is_rec of + Recursive -> go 0 botArityType + NonRecursive -> step init_env where - go :: Int -> ArityType -> ArityType - go !n cur_at@(AT oss div) + init_env :: ArityEnv + init_env = findRhsArityEnv opts + + ty_arity = typeArity (idType bndr) + id_one_shots = idDemandOneShots bndr + + step :: ArityEnv -> SafeArityType + step env = trimArityType ty_arity $ + safeArityType $ -- See Note [Arity invariants for bindings], item (3) + arityType env rhs `combineWithDemandOneShots` id_one_shots + -- trimArityType: see Note [Trim arity inside the loop] + -- combineWithDemandOneShots: take account of the demand on the + -- binder. Perhaps it is always called with 2 args + -- let f = \x. blah in (f 3 4, f 1 9) + -- f's demand-info says how many args it is called with + + -- The fixpoint iteration (go), done for recursive bindings. We + -- always do one step, but usually that produces a result equal + -- to old_arity, and then we stop right away, because old_arity + -- is assumed to be sound. In other words, arities should never + -- decrease. Result: the common case is that there is just one + -- iteration + go :: Int -> SafeArityType -> SafeArityType + go !n cur_at@(AT lams div) | not (isDeadEndDiv div) -- the "stop right away" case - , length oss <= old_arity = cur_at -- from above + , length lams <= old_arity = cur_at -- from above | next_at == cur_at = cur_at | otherwise = -- Warn if more than 2 iterations. Why 2? See Note [Exciting arity] @@ -709,20 +932,49 @@ findRhsArity opts Recursive bndr rhs old_arity (nest 2 (ppr bndr <+> ppr cur_at <+> ppr next_at $$ ppr rhs)) $ go (n+1) next_at where - next_at = step cur_at - - step :: ArityType -> ArityType - step at = -- pprTrace "step" (vcat [ ppr bndr <+> ppr at <+> ppr (arityType env rhs) - -- , ppr (idType bndr) - -- , ppr (typeArity (idType bndr)) ]) $ - arityType env rhs - where - env = extendSigEnv (findRhsArityEnv opts) bndr at + next_at = step (extendSigEnv init_env bndr cur_at) +infixl 2 `combineWithDemandOneShots` -{- -Note [Arity analysis] -~~~~~~~~~~~~~~~~~~~~~ +combineWithDemandOneShots :: ArityType -> [OneShotInfo] -> ArityType +-- See Note [Combining arity type with demand info] +combineWithDemandOneShots at@(AT lams div) oss + | null lams = at + | otherwise = AT (zip_lams lams oss) div + where + zip_lams :: [ATLamInfo] -> [OneShotInfo] -> [ATLamInfo] + zip_lams lams [] = lams + zip_lams [] oss = [ (IsExpensive,OneShotLam) + | _ <- takeWhile isOneShotInfo oss] + zip_lams ((ch,os1):lams) (os2:oss) + = (ch, os1 `bestOneShot` os2) : zip_lams lams oss + +idDemandOneShots :: Id -> [OneShotInfo] +idDemandOneShots bndr + = call_arity_one_shots `zip_lams` dmd_one_shots + where + call_arity_one_shots :: [OneShotInfo] + call_arity_one_shots + | call_arity == 0 = [] + | otherwise = NoOneShotInfo : replicate (call_arity-1) OneShotLam + -- Call Arity analysis says the function is always called + -- applied to this many arguments. The first NoOneShotInfo is because + -- if Call Arity says "always applied to 3 args" then the one-shot info + -- we get is [NoOneShotInfo, OneShotLam, OneShotLam] + call_arity = idCallArity bndr + + dmd_one_shots :: [OneShotInfo] + -- If the demand info is Cx(C1(C1(.))) then we know that an + -- application to one arg is also an application to three + dmd_one_shots = argOneShots (idDemandInfo bndr) + + -- Take the *longer* list + zip_lams (lam1:lams1) (lam2:lams2) = (lam1 `bestOneShot` lam2) : zip_lams lams1 lams2 + zip_lams [] lams2 = lams2 + zip_lams lams1 [] = lams1 + +{- Note [Arity analysis] +~~~~~~~~~~~~~~~~~~~~~~~~ The motivating example for arity analysis is this: f = \x. let g = f (x+1) @@ -784,57 +1036,118 @@ to floatIn the non-cheap let-binding. Which is all perfectly benign, but means we do two iterations (well, actually 3 'step's to detect we are stable) and don't want to emit the warning. -Note [Eta expanding through dictionaries] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If the experimental -fdicts-cheap flag is on, we eta-expand through -dictionary bindings. This improves arities. Thereby, it also -means that full laziness is less prone to floating out the -application of a function to its dictionary arguments, which -can thereby lose opportunities for fusion. Example: - foo :: Ord a => a -> ... - foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). .... - -- So foo has arity 1 +Note [Trim arity inside the loop] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Here's an example (from gadt/nbe.hs) which caused trouble. + data Exp g t where + Lam :: Ty a -> Exp (g,a) b -> Exp g (a->b) - f = \x. foo dInt $ bar x + eval :: Exp g t -> g -> t + eval (Lam _ e) g = \a -> eval e (g,a) -The (foo DInt) is floated out, and makes ineffective a RULE - foo (bar x) = ... +The danger is that we get arity 3 from analysing this; and the +next time arity 4, and so on for ever. Solution: use trimArityType +on each iteration. -One could go further and make exprIsCheap reply True to any -dictionary-typed expression, but that's more work. +Note [Combining arity type with demand info] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + let f = \x. let y = <expensive> in \p \q{os}. blah + in ...(f a b)...(f c d)... + +* From the RHS we get an ArityType like + AT [ (IsCheap,?), (IsExpensive,?), (IsCheap,OneShotLam) ] Dunno + where "?" means NoOneShotInfo + +* From the body, the demand analyser (or Call Arity) will tell us + that the function is always applied to at least two arguments. + +Combining these two pieces of info, we can get the final ArityType + AT [ (IsCheap,?), (IsExpensive,OneShotLam), (IsCheap,OneShotLam) ] Dunno +result: arity=3, which is better than we could do from either +source alone. + +The "combining" part is done by combineWithDemandOneShots. It +uses info from both Call Arity and demand analysis. + +We may have /more/ call demands from the calls than we have lambdas +in the binding. E.g. + let f1 = \x. g x x in ...(f1 p q r)... + -- Demand on f1 is Cx(C1(C1(L))) + + let f2 = \y. error y in ...(f2 p q r)... + -- Demand on f2 is Cx(C1(C1(L))) + +In both these cases we can eta expand f1 and f2 to arity 3. +But /only/ for called-once demands. Suppose we had + let f1 = \y. g x x in ...let h = f1 p q in ...(h r1)...(h r2)... + +Now we don't want to eta-expand f1 to have 3 args; only two. +Nor, in the case of f2, do we want to push that error call under +a lambda. Hence the takeWhile in combineWithDemandDoneShots. -} + +{- ********************************************************************* +* * + arityType +* * +********************************************************************* -} + arityLam :: Id -> ArityType -> ArityType -arityLam id (AT oss div) = AT (idStateHackOneShotInfo id : oss) div +arityLam id (AT oss div) + = AT ((IsCheap, one_shot) : oss) div + where + one_shot | isDeadEndDiv div = OneShotLam + | otherwise = idStateHackOneShotInfo id + -- If the body diverges, treat it as one-shot: no point + -- in floating out, and no penalty for floating in + -- See Wrinkle [Bottoming functions] in Note [ArityType] -floatIn :: Bool -> ArityType -> ArityType +floatIn :: Cost -> ArityType -> ArityType -- We have something like (let x = E in b), -- where b has the given arity type. -floatIn cheap at - | isDeadEndArityType at || cheap = at - -- If E is not cheap, keep arity only for one-shots - | otherwise = takeWhileOneShot at +floatIn IsCheap at = at +floatIn IsExpensive at = addWork at + +addWork :: ArityType -> ArityType +addWork at@(AT lams div) + = case lams of + [] -> at + lam:lams' -> AT (add_work lam : lams') div + where + add_work :: ATLamInfo -> ATLamInfo + add_work (_,os) = (IsExpensive,os) -arityApp :: ArityType -> Bool -> ArityType +arityApp :: ArityType -> Cost -> ArityType -- Processing (fun arg) where at is the ArityType of fun, -- Knock off an argument and behave like 'let' -arityApp (AT (_:oss) div) cheap = floatIn cheap (AT oss div) -arityApp at _ = at +arityApp (AT ((ch1,_):oss) div) ch2 = floatIn (ch1 `addCost` ch2) (AT oss div) +arityApp at _ = at -- | Least upper bound in the 'ArityType' lattice. -- See the haddocks on 'ArityType' for the lattice. -- -- Used for branches of a @case@. andArityType :: ArityType -> ArityType -> ArityType -andArityType (AT (os1:oss1) div1) (AT (os2:oss2) div2) - | AT oss' div' <- andArityType (AT oss1 div1) (AT oss2 div2) - = AT ((os1 `bestOneShot` os2) : oss') div' -- See Note [Combining case branches] -andArityType (AT [] div1) at2 - | isDeadEndDiv div1 = at2 -- Note [ABot branches: max arity wins] - | otherwise = takeWhileOneShot at2 -- See Note [Combining case branches] -andArityType at1 (AT [] div2) - | isDeadEndDiv div2 = at1 -- Note [ABot branches: max arity wins] - | otherwise = takeWhileOneShot at1 -- See Note [Combining case branches] +andArityType (AT (lam1:lams1) div1) (AT (lam2:lams2) div2) + | AT lams' div' <- andArityType (AT lams1 div1) (AT lams2 div2) + = AT ((lam1 `and_lam` lam2) : lams') div' -- See Note [Combining case branches] + where + (ch1,os1) `and_lam` (ch2,os2) + = ( ch1 `addCost` ch2, os1 `bestOneShot` os2) + +andArityType (AT [] div1) at2 = andWithTail div1 at2 +andArityType at1 (AT [] div2) = andWithTail div2 at1 + +andWithTail :: Divergence -> ArityType -> ArityType +andWithTail div1 at2@(AT oss2 _) + | isDeadEndDiv div1 -- case x of { T -> error; F -> \y.e } + = at2 + | otherwise -- case x of { T -> plusInt <expensive>; F -> \y.e } + = addWork (AT oss2 topDiv) -- We know div1 = topDiv + -- Note [ABot branches: max arity wins] + -- See Note [Combining case branches] {- Note [ABot branches: max arity wins] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -866,29 +1179,6 @@ basis that if we know one branch is one-shot, then they all must be. Surprisingly, this means that the one-shot arity type is effectively the top element of the lattice. -Note [Arity trimming] -~~~~~~~~~~~~~~~~~~~~~ -Consider ((\x y. blah) |> co), where co :: (Int->Int->Int) ~ (Int -> F a) , and -F is some type family. - -Because of Note [exprArity invariant], item (2), we must return with arity at -most 1, because typeArity (Int -> F a) = 1. So we have to trim the result of -calling arityType on (\x y. blah). Failing to do so, and hence breaking the -exprArity invariant, led to #5441. - -How to trim? If we end in topDiv, it's easy. But we must take great care with -dead ends (i.e. botDiv). Suppose the expression was (\x y. error "urk"), -we'll get \??.⊥. We absolutely must not trim that to \?.⊥, because that -claims that ((\x y. error "urk") |> co) diverges when given one argument, -which it absolutely does not. And Bad Things happen if we think something -returns bottom when it doesn't (#16066). - -So, if we need to trim a dead-ending arity type, switch (conservatively) to -topDiv. - -Historical note: long ago, we unconditionally switched to topDiv when we -encountered a cast, but that is far too conservative: see #5475 - Note [Eta expanding through CallStacks] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Just as it's good to eta-expand through dictionaries, so it is good to @@ -899,6 +1189,25 @@ do so through CallStacks. #20103 is a case in point, where we got We really want to eta-expand this! #20103 is quite convincing! We do this regardless of -fdicts-cheap; it's not really a dictionary. + +Note [Eta expanding through dictionaries] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If the experimental -fdicts-cheap flag is on, we eta-expand through +dictionary bindings. This improves arities. Thereby, it also +means that full laziness is less prone to floating out the +application of a function to its dictionary arguments, which +can thereby lose opportunities for fusion. Example: + foo :: Ord a => a -> ... + foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). .... + -- So foo has arity 1 + + f = \x. foo dInt $ bar x + +The (foo DInt) is floated out, and makes ineffective a RULE + foo (bar x) = ... + +One could go further and make exprIsCheap reply True to any +dictionary-typed expression, but that's more work. -} --------------------------- @@ -921,14 +1230,18 @@ We do this regardless of -fdicts-cheap; it's not really a dictionary. data AnalysisMode = BotStrictness -- ^ Used during 'exprBotStrictness_maybe'. + | EtaExpandArity { am_opts :: !ArityOpts } - -- ^ Used for finding an expression's eta-expanding arity quickly, without - -- fixed-point iteration ('exprEtaExpandArity'). - | FindRhsArity { am_opts :: !ArityOpts - , am_sigs :: !(IdEnv ArityType) } + -- ^ Used for finding an expression's eta-expanding arity quickly, + -- without fixed-point iteration ('exprEtaExpandArity'). + + | FindRhsArity { am_opts :: !ArityOpts + , am_sigs :: !(IdEnv SafeArityType) } -- ^ Used for regular, fixed-point arity analysis ('findRhsArity'). -- See Note [Arity analysis] for details about fixed-point iteration. - -- INVARIANT: Disjoint with 'ae_joins'. + -- am_dicts_cheap: see Note [Eta expanding through dictionaries] + -- am_sigs: note `SafeArityType` so we can use this in myIsCheapApp + -- INVARIANT: am_sigs is disjoint with 'ae_joins'. data ArityEnv = AE @@ -991,9 +1304,11 @@ extendJoinEnv env@(AE { ae_joins = joins }) join_ids = del_sig_env_list join_ids $ env { ae_joins = joins `extendVarSetList` join_ids } -extendSigEnv :: ArityEnv -> Id -> ArityType -> ArityEnv +extendSigEnv :: ArityEnv -> Id -> SafeArityType -> ArityEnv extendSigEnv env id ar_ty - = del_join_env id (modifySigEnv (\sigs -> extendVarEnv sigs id ar_ty) env) + = del_join_env id $ + modifySigEnv (\sigs -> extendVarEnv sigs id ar_ty) $ + env delInScope :: ArityEnv -> Id -> ArityEnv delInScope env id = del_join_env id $ del_sig_env id env @@ -1001,7 +1316,7 @@ delInScope env id = del_join_env id $ del_sig_env id env delInScopeList :: ArityEnv -> [Id] -> ArityEnv delInScopeList env ids = del_join_env_list ids $ del_sig_env_list ids env -lookupSigEnv :: ArityEnv -> Id -> Maybe ArityType +lookupSigEnv :: ArityEnv -> Id -> Maybe SafeArityType lookupSigEnv AE{ ae_mode = mode } id = case mode of BotStrictness -> Nothing EtaExpandArity{} -> Nothing @@ -1015,6 +1330,11 @@ pedanticBottoms AE{ ae_mode = mode } = case mode of EtaExpandArity{ am_opts = ArityOpts{ ao_ped_bot = ped_bot } } -> ped_bot FindRhsArity{ am_opts = ArityOpts{ ao_ped_bot = ped_bot } } -> ped_bot +exprCost :: ArityEnv -> CoreExpr -> Maybe Type -> Cost +exprCost env e mb_ty + | myExprIsCheap env e mb_ty = IsCheap + | otherwise = IsExpensive + -- | A version of 'exprIsCheap' that considers results from arity analysis -- and optionally the expression's type. -- Under 'exprBotStrictness_maybe', no expressions are cheap. @@ -1040,17 +1360,20 @@ myExprIsCheap AE{ae_mode = mode} e mb_ty = case mode of -- | A version of 'isCheapApp' that considers results from arity analysis. -- See Note [Arity analysis] for what's in the signature environment and why -- it's important. -myIsCheapApp :: IdEnv ArityType -> CheapAppFun +myIsCheapApp :: IdEnv SafeArityType -> CheapAppFun myIsCheapApp sigs fn n_val_args = case lookupVarEnv sigs fn of + -- Nothing means not a local function, fall back to regular -- 'GHC.Core.Utils.isCheapApp' - Nothing -> isCheapApp fn n_val_args - -- @Just at@ means local function with @at@ as current ArityType. + Nothing -> isCheapApp fn n_val_args + + -- `Just at` means local function with `at` as current SafeArityType. -- Roughly approximate what 'isCheapApp' is doing. - Just (AT oss div) + Just (AT lams div) | isDeadEndDiv div -> True -- See Note [isCheapApp: bottoming functions] in GHC.Core.Utils - | n_val_args < length oss -> True -- Essentially isWorkFreeApp - | otherwise -> False + | n_val_args == 0 -> True -- Essentially + | n_val_args < length lams -> True -- isWorkFreeApp + | otherwise -> False ---------------- arityType :: ArityEnv -> CoreExpr -> ArityType @@ -1077,7 +1400,10 @@ arityType env (Lam x e) arityType env (App fun (Type _)) = arityType env fun arityType env (App fun arg ) - = arityApp (arityType env fun) (myExprIsCheap env arg Nothing) + = arityApp fun_at arg_cost + where + fun_at = arityType env fun + arg_cost = exprCost env arg Nothing -- Case/Let; keep arity if either the expression is cheap -- or it's a 1-shot lambda @@ -1098,9 +1424,8 @@ arityType env (Case scrut bndr _ alts) | exprOkForSpeculation scrut = alts_type - | otherwise -- In the remaining cases we may not push - = takeWhileOneShot alts_type -- evaluation of the scrutinee in - + | otherwise -- In the remaining cases we may not push + = addWork alts_type -- evaluation of the scrutinee in where env' = delInScope env bndr arity_type_alt (Alt _con bndrs rhs) = arityType (delInScopeList env' bndrs) rhs @@ -1128,17 +1453,17 @@ arityType env (Let (Rec pairs) body) | otherwise = pprPanic "arityType:joinrec" (ppr pairs) -arityType env (Let (NonRec b r) e) - = floatIn cheap_rhs (arityType env' e) +arityType env (Let (NonRec b rhs) e) + = floatIn rhs_cost (arityType env' e) where - cheap_rhs = myExprIsCheap env r (Just (idType b)) - env' = extendSigEnv env b (arityType env r) + rhs_cost = exprCost env rhs (Just (idType b)) + env' = extendSigEnv env b (safeArityType (arityType env rhs)) arityType env (Let (Rec prs) e) - = floatIn (all is_cheap prs) (arityType env' e) + = floatIn (allCosts bind_cost prs) (arityType env' e) where - env' = delInScopeList env (map fst prs) - is_cheap (b,e) = myExprIsCheap env' e (Just (idType b)) + env' = delInScopeList env (map fst prs) + bind_cost (b,e) = exprCost env' e (Just (idType b)) arityType env (Tick t e) | not (tickishIsCode t) = arityType env e @@ -1201,7 +1526,7 @@ environment mapping let-bound Ids to their ArityType. idArityType :: Id -> ArityType idArityType v | strict_sig <- idDmdSig v - , not $ isTopSig strict_sig + , not $ isNopSig strict_sig , (ds, div) <- splitDmdSig strict_sig , let arity = length ds -- Every strictness signature admits an arity signature! @@ -1209,8 +1534,8 @@ idArityType v | otherwise = AT (take (idArity v) one_shots) topDiv where - one_shots :: [OneShotInfo] -- One-shot-ness derived from the type - one_shots = typeOneShots (idType v) + one_shots :: [(Cost,OneShotInfo)] -- One-shot-ness derived from the type + one_shots = repeat IsCheap `zip` typeOneShots (idType v) {- %************************************************************************ @@ -1319,7 +1644,7 @@ Consider We'll get an ArityType for foo of \?1.T. Then we want to eta-expand to - foo = (\x. \eta{os}. (case x of ...as before...) eta)) |> some_co + foo = (\x. \eta{os}. (case x of ...as before...) eta) |> some_co That 'eta' binder is fresh, and we really want it to have the one-shot flag from the inner \s{os}. By expanding with the @@ -1347,14 +1672,14 @@ etaExpand n orig_expr in_scope = {-#SCC "eta_expand:in-scopeX" #-} mkInScopeSet (exprFreeVars orig_expr) -etaExpandAT :: InScopeSet -> ArityType -> CoreExpr -> CoreExpr +etaExpandAT :: InScopeSet -> SafeArityType -> CoreExpr -> CoreExpr -- See Note [Eta expansion with ArityType] -- -- We pass in the InScopeSet from the simplifier to avoid recomputing -- it here, which can be jolly expensive if the casts are big -- In #18223 it took 10% of compile time just to do the exprFreeVars! -etaExpandAT in_scope (AT oss _) orig_expr - = eta_expand in_scope oss orig_expr +etaExpandAT in_scope at orig_expr + = eta_expand in_scope (arityTypeOneShots at) orig_expr -- etaExpand arity e = res -- Then 'res' has at least 'arity' lambdas at the top @@ -1369,7 +1694,11 @@ etaExpandAT in_scope (AT oss _) orig_expr eta_expand :: InScopeSet -> [OneShotInfo] -> CoreExpr -> CoreExpr eta_expand in_scope one_shots (Cast expr co) - = Cast (eta_expand in_scope one_shots expr) co + = mkCast (eta_expand in_scope one_shots expr) co + -- This mkCast is important, because eta_expand might return an + -- expression with a cast at the outside; and tryCastWorkerWrapper + -- asssumes that we don't have nested casts. Makes a difference + -- in compile-time for T18223 eta_expand in_scope one_shots orig_expr = go in_scope one_shots [] orig_expr @@ -1440,7 +1769,7 @@ casts complicate the question. If we have and e :: N (N Int) then the eta-expansion should look like - (\(x::S) (y::S) -> e |> co x y) |> sym co + (\(x::S) (y::S) -> (e |> co) x y) |> sym co where co :: N (N Int) ~ S -> S -> Int co = axN @(N Int) ; (S -> axN @Int) @@ -1619,11 +1948,11 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty -- (go [o1,..,on] subst ty) = (in_scope, EI [b1,..,bn] co) -- co :: subst(ty) ~ b1_ty -> ... -> bn_ty -> tr - go _ [] subst _ -- See Note [exprArity invariant] + go _ [] subst _ ----------- Done! No more expansion needed = (getTCvInScope subst, EI [] MRefl) - go n oss@(one_shot:oss1) subst ty -- See Note [exprArity invariant] + go n oss@(one_shot:oss1) subst ty ----------- Forall types (forall a. ty) | Just (tcv,ty') <- splitForAllTyCoVar_maybe ty , (subst', tcv') <- Type.substVarBndr subst tcv @@ -1676,6 +2005,428 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty -- with an explicit lambda having a non-function type +{- +************************************************************************ +* * + Eta reduction +* * +************************************************************************ + +Note [Eta reduction makes sense] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +GHC's eta reduction transforms + \x y. <fun> x y ---> <fun> +We discuss when this is /sound/ in Note [Eta reduction soundness]. +But even assuming it is sound, when is it /desirable/. That +is what we discuss here. + +This test is made by `ok_fun` in tryEtaReduce. + +1. We want to eta-reduce only if we get all the way to a trivial + expression; we don't want to remove extra lambdas unless we are + going to avoid allocating this thing altogether. + + Trivial means *including* casts and type lambdas: + * `\x. f x |> co --> f |> (ty(x) -> co)` (provided `co` doesn't mention `x`) + * `/\a. \x. f @(Maybe a) x --> /\a. f @(Maybe a)` + See Note [Do not eta reduce PAPs] for why we insist on a trivial head. + +2. Type and dictionary abstraction. Regardless of whether 'f' is a value, it + is always sound to reduce /type lambdas/, thus: + (/\a -> f a) --> f + Moreover, we always want to, because it makes RULEs apply more often: + This RULE: `forall g. foldr (build (/\a -> g a))` + should match `foldr (build (/\b -> ...something complex...))` + and the simplest way to do so is eta-reduce `/\a -> g a` in the RULE to `g`. + + The type checker can insert these eta-expanded versions, + with both type and dictionary lambdas; hence the slightly + ad-hoc (all ok_lam bndrs) + +3. (See fun_arity in tryEtaReduce.) We have to hide `f`'s `idArity` in + its own RHS, lest we suffer from the last point of Note [Arity + robustness] in GHC.Core.Opt.Simplify.Env. There we have `f = \x. f x` + and we should not eta-reduce to `f=f`. Which might change a + terminating program (think @f `seq` e@) to a non-terminating one. + So we check for being a loop breaker first. However for GlobalIds + we can look at the arity; and for primops we must, since they have + no unfolding. [SG: Perhaps this is rather a soundness subtlety?] + +Of course, eta reduction is not always sound. See Note [Eta reduction soundness] +for when it is. + +When there are multiple arguments, we might get multiple eta-redexes. Example: + \x y. e x y + ==> { reduce \y. (e x) y in context \x._ } + \x. e x + ==> { reduce \x. e x in context _ } + e +And (1) implies that we never want to stop with `\x. e x`, because that is not a +trivial expression. So in practice, the implementation works by considering a +whole group of leading lambdas to reduce. + +These delicacies are why we don't simply use 'exprIsTrivial' and 'exprIsHNF' +in 'tryEtaReduce'. Alas. + +Note [Eta reduction soundness] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +GHC's eta reduction transforms + \x y. <fun> x y ---> <fun> +For soundness, we obviously require that `x` and `y` +to not occur free. But what /other/ restrictions are there for +eta reduction to be sound? + +We discuss separately what it means for eta reduction to be +/desirable/, in Note [Eta reduction makes sense]. + +Eta reduction is *not* a sound transformation in general, because it +may change termination behavior if *value* lambdas are involved: + `bot` /= `\x. bot x` (as can be observed by a simple `seq`) +The past has shown that oversight of this fact can not only lead to endless +loops or exceptions, but also straight out *segfaults*. + +Nevertheless, we can give the following criteria for when it is sound to +perform eta reduction on an expression with n leading lambdas `\xs. e xs` +(checked in 'is_eta_reduction_sound' in 'tryEtaReduce', which focuses on the +case where `e` is trivial): + + A. It is sound to eta-reduce n arguments as long as n does not exceed the + `exprArity` of `e`. (Needs Arity analysis.) + This criterion exploits information about how `e` is *defined*. + + Example: If `e = \x. bot` then we know it won't diverge until it is called + with one argument. Hence it is safe to eta-reduce `\x. e x` to `e`. + By contrast, it would be *unsound* to eta-reduce 2 args, `\x y. e x y` to `e`: + `e 42` diverges when `(\x y. e x y) 42` does not. + + S. It is sound to eta-reduce n arguments in an evaluation context in which all + calls happen with at least n arguments. (Needs Strictness analysis.) + NB: This treats evaluations like a call with 0 args. + NB: This criterion exploits information about how `e` is *used*. + + Example: Given a function `g` like + `g c = Just (c 1 2 + c 2 3)` + it is safe to eta-reduce the arg in `g (\x y. e x y)` to `g e` without + knowing *anything* about `e` (perhaps it's a parameter occ itself), simply + because `g` always calls its parameter with 2 arguments. + It is also safe to eta-reduce just one arg, e.g., `g (\x. e x)` to `g e`. + By contrast, it would *unsound* to eta-reduce 3 args in a call site + like `g (\x y z. e x y z)` to `g e`, because that diverges when + `e = \x y. bot`. + + Could we relax to "*At least one call in the same trace* is with n args"? + (NB: Strictness analysis can only answer this relaxed question, not the + original formulation.) + Consider what happens for + ``g2 c = c True `seq` c False 42`` + Here, `g2` will call `c` with 2 arguments (if there is a call at all). + But it is unsound to eta-reduce the arg in `g2 (\x y. e x y)` to `g2 e` + when `e = \x. if x then bot else id`, because the latter will diverge when + the former would not. + + On the other hand, with `-fno-pendantic-bottoms` , we will have eta-expanded + the definition of `e` and then eta-reduction is sound + (see Note [Dealing with bottom]). + Consequence: We have to check that `-fpedantic-bottoms` is off; otherwise + eta-reduction based on demands is in fact unsound. + + See Note [Eta reduction based on evaluation context] for the implementation + details. This criterion is tested extensively in T21261. + + E. (See fun_arity in tryEtaReduce.) As a perhaps special case on the + boundary of (A) and (S), when we know that a fun binder `f` is in + WHNF, we simply assume it has arity 1 and apply (A). Example: + g f = f `seq` \x. f x + Here it's sound eta-reduce `\x. f x` to `f`, because `f` can't be bottom + after the `seq`. This turned up in #7542. + +And here are a few more technical criteria for when it is *not* sound to +eta-reduce that are specific to Core and GHC: + + L. With linear types, eta-reduction can break type-checking: + f :: A ⊸ B + g :: A -> B + g = \x. f x + The above is correct, but eta-reducing g would yield g=f, the linter will + complain that g and f don't have the same type. NB: Not unsound in the + dynamic semantics, but unsound according to the static semantics of Core. + + J. We may not undersaturate join points. + See Note [Invariants on join points] in GHC.Core, and #20599. + + B. We may not undersaturate functions with no binding. + See Note [Eta expanding primops]. + + W. We may not undersaturate StrictWorkerIds. + See Note [Strict Worker Ids] in GHC.CoreToStg.Prep. + +Here is a list of historic accidents surrounding unsound eta-reduction: + +* Consider + f = \x.f x + h y = case (case y of { True -> f `seq` True; False -> False }) of + True -> ...; False -> ... + If we (unsoundly) eta-reduce f to get f=f, the strictness analyser + says f=bottom, and replaces the (f `seq` True) with just + (f `cast` unsafe-co). + [SG in 2022: I don't think worker/wrapper would do this today.] + BUT, as things stand, 'f' got arity 1, and it *keeps* arity 1 (perhaps also + wrongly). So CorePrep eta-expands the definition again, so that it does not + terminate after all. + Result: seg-fault because the boolean case actually gets a function value. + See #1947. + +* Never *reduce* arity. For example + f = \xy. g x y + Then if h has arity 1 we don't want to eta-reduce because then + f's arity would decrease, and that is bad + [SG in 2022: I don't understand this point. There is no `h`, perhaps that + should have been `g`. Even then, this proposed eta-reduction is invalid by + criterion (A), which might actually be the point this anecdote is trying to + make. Perhaps the "no arity decrease" idea is also related to + Note [Arity robustness]?] + +Note [Do not eta reduce PAPs] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +I considered eta-reducing if the result is a PAP: + \x. f e1 e2 x ==> f e1 e2 + +This reduces clutter, sometimes a lot. See Note [Do not eta-expand PAPs] +in GHC.Core.Opt.Simplify.Utils, where we are careful not to eta-expand +a PAP. If eta-expanding is bad, then eta-reducing is good! + +Also the code generator likes eta-reduced PAPs; see GHC.CoreToStg.Prep +Note [No eta reduction needed in rhsToBody]. + +But note that we don't want to eta-reduce + \x y. f <expensive> x y +to + f <expensive> +The former has arity 2, and repeats <expensive> for every call of the +function; the latter has arity 0, and shares <expensive>. We don't want +to change behaviour. Hence the call to exprIsCheap in ok_fun. + +I noticed this when examining #18993 and, although it is delicate, +eta-reducing to a PAP happens to fix the regression in #18993. + +HOWEVER, if we transform + \x. f y x ==> f y +that might mean that f isn't saturated any more, and does not inline. +This led to some other regressions. + +TL;DR currrently we do /not/ eta reduce if the result is a PAP. + +Note [Eta reduction with casted arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + (\(x:t3). f (x |> g)) :: t3 -> t2 + where + f :: t1 -> t2 + g :: t3 ~ t1 +This should be eta-reduced to + + f |> (sym g -> t2) + +So we need to accumulate a coercion, pushing it inward (past +variable arguments only) thus: + f (x |> co_arg) |> co --> (f |> (sym co_arg -> co)) x + f (x:t) |> co --> (f |> (t -> co)) x + f @ a |> co --> (f |> (forall a.co)) @ a + f @ (g:t1~t2) |> co --> (f |> (t1~t2 => co)) @ (g:t1~t2) +These are the equations for ok_arg. + +Note [Eta reduction with casted function] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Since we are pushing a coercion inwards, it is easy to accommodate + (\xy. (f x |> g) y) + (\xy. (f x y) |> g) + +See the `(Cast e co)` equation for `go` in `tryEtaReduce`. The +eta-expander pushes those casts outwards, so you might think we won't +ever see a cast here, but if we have + \xy. (f x y |> g) +we will call tryEtaReduce [x,y] (f x y |> g), and we'd like that to +work. This happens in GHC.Core.Opt.Simplify.Utils.mkLam, where +eta-expansion may be turned off (by sm_eta_expand). + +Note [Eta reduction based on evaluation context] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note [Eta reduction soundness], criterion (S) allows us to eta-reduce +`g (\x y. e x y)` to `g e` when we know that `g` always calls its parameter with +at least 2 arguments. So how do we read that off `g`'s demand signature? + +Let's take the simple example of #21261, where `g` (actually, `f`) is defined as + g c = c 1 2 + c 3 4 +Then this is how the pieces are put together: + + * Demand analysis infers `<SCS(C1(L))>` for `g`'s demand signature + + * When the Simplifier next simplifies the argument in `g (\x y. e x y)`, it + looks up the *evaluation context* of the argument in the form of the + sub-demand `CS(C1(L))` and stores it in the 'SimplCont'. + (Why does it drop the outer evaluation cardinality of the demand, `S`? + Because it's irrelevant! When we simplify an expression, we do so under the + assumption that it is currently under evaluation.) + This sub-demand literally says "Whenever this expression is evaluated, it + is also called with two arguments, potentially multiple times". + + * Then the simplifier takes apart the lambda and simplifies the lambda group + and then calls 'tryEtaReduce' when rebuilding the lambda, passing the + evaluation context `CS(C1(L))` along. Then we simply peel off 2 call + sub-demands `Cn` and see whether all of the n's (here: `S=C_1N` and + `1=C_11`) were strict. And strict they are! Thus, it will eta-reduce + `\x y. e x y` to `e`. +-} + +-- | `tryEtaReduce [x,y,z] e sd` returns `Just e'` if `\x y z -> e` is evaluated +-- according to `sd` and can soundly and gainfully be eta-reduced to `e'`. +-- See Note [Eta reduction soundness] +-- and Note [Eta reduction makes sense] when that is the case. +tryEtaReduce :: [Var] -> CoreExpr -> SubDemand -> Maybe CoreExpr +-- Return an expression equal to (\bndrs. body) +tryEtaReduce bndrs body eval_sd + = go (reverse bndrs) body (mkRepReflCo (exprType body)) + where + incoming_arity = count isId bndrs -- See Note [Eta reduction makes sense], point (2) + + go :: [Var] -- Binders, innermost first, types [a3,a2,a1] + -> CoreExpr -- Of type tr + -> Coercion -- Of type tr ~ ts + -> Maybe CoreExpr -- Of type a1 -> a2 -> a3 -> ts + -- See Note [Eta reduction with casted arguments] + -- for why we have an accumulating coercion + -- + -- Invariant: (go bs body co) returns an expression + -- equivalent to (\(reverse bs). body |> co) + + -- See Note [Eta reduction with casted function] + go bs (Cast e co1) co2 + = go bs e (co1 `mkTransCo` co2) + + go bs (Tick t e) co + | tickishFloatable t + = fmap (Tick t) $ go bs e co + -- Float app ticks: \x -> Tick t (e x) ==> Tick t e + + go (b : bs) (App fun arg) co + | Just (co', ticks) <- ok_arg b arg co (exprType fun) + = fmap (flip (foldr mkTick) ticks) $ go bs fun co' + -- Float arg ticks: \x -> e (Tick t x) ==> Tick t e + + go remaining_bndrs fun co + | all isTyVar remaining_bndrs + -- If all the remaining_bnrs are tyvars, then the etad_exp + -- will be trivial, which is what we want. + -- e.g. We might have /\a \b. f [a] b, and we want to + -- eta-reduce to /\a. f [a] + -- We don't want to give up on this one: see #20040 + -- See Note [Eta reduction makes sense], point (1) + , remaining_bndrs `ltLength` bndrs + -- Only reply Just if /something/ has happened + , ok_fun fun + , let etad_expr = mkLams (reverse remaining_bndrs) (mkCast fun co) + used_vars = exprFreeVars etad_expr + reduced_bndrs = mkVarSet (dropList remaining_bndrs bndrs) + , used_vars `disjointVarSet` reduced_bndrs + -- Check for any of the binders free in the result, + -- including the accumulated coercion + -- See Note [Eta reduction makes sense], intro and point (1) + = Just etad_expr + + go _remaining_bndrs _fun _ = -- pprTrace "tER fail" (ppr _fun $$ ppr _remaining_bndrs) $ + Nothing + + --------------- + -- See Note [Eta reduction makes sense], point (1) + ok_fun (App fun (Type {})) = ok_fun fun + ok_fun (Cast fun _) = ok_fun fun + ok_fun (Tick _ expr) = ok_fun expr + ok_fun (Var fun_id) = is_eta_reduction_sound fun_id || all ok_lam bndrs + ok_fun _fun = False + + --------------- + -- See Note [Eta reduction soundness], this is THE place to check soundness! + is_eta_reduction_sound fun = + -- Check that eta-reduction won't make the program stricter... + (fun_arity fun >= incoming_arity -- criterion (A) and (E) + || all_calls_with_arity incoming_arity) -- criterion (S) + -- ... and that the function can be eta reduced to arity 0 + -- without violating invariants of Core and GHC + && canEtaReduceToArity fun 0 0 -- criteria (L), (J), (W), (B) + all_calls_with_arity n = isStrict (peelManyCalls n eval_sd) + -- See Note [Eta reduction based on evaluation context] + + --------------- + fun_arity fun + | isLocalId fun + , isStrongLoopBreaker (idOccInfo fun) = 0 + -- See Note [Eta reduction makes sense], point (3) + | arity > 0 = arity + | isEvaldUnfolding (idUnfolding fun) = 1 + -- See Note [Eta reduction soundness], criterion (E) + | otherwise = 0 + where + arity = idArity fun + + --------------- + ok_lam v = isTyVar v || isEvVar v + -- See Note [Eta reduction makes sense], point (2) + + --------------- + ok_arg :: Var -- Of type bndr_t + -> CoreExpr -- Of type arg_t + -> Coercion -- Of kind (t1~t2) + -> Type -- Type (arg_t -> t1) of the function + -- to which the argument is supplied + -> Maybe (Coercion -- Of type (arg_t -> t1 ~ bndr_t -> t2) + -- (and similarly for tyvars, coercion args) + , [CoreTickish]) + -- See Note [Eta reduction with casted arguments] + ok_arg bndr (Type ty) co _ + | Just tv <- getTyVar_maybe ty + , bndr == tv = Just (mkHomoForAllCos [tv] co, []) + ok_arg bndr (Var v) co fun_ty + | bndr == v + , let mult = idMult bndr + , Just (fun_mult, _, _) <- splitFunTy_maybe fun_ty + , mult `eqType` fun_mult -- There is no change in multiplicity, otherwise we must abort + = Just (mkFunResCo Representational (idScaledType bndr) co, []) + ok_arg bndr (Cast e co_arg) co fun_ty + | (ticks, Var v) <- stripTicksTop tickishFloatable e + , Just (fun_mult, _, _) <- splitFunTy_maybe fun_ty + , bndr == v + , fun_mult `eqType` idMult bndr + = Just (mkFunCo Representational (multToCo fun_mult) (mkSymCo co_arg) co, ticks) + -- The simplifier combines multiple casts into one, + -- so we can have a simple-minded pattern match here + ok_arg bndr (Tick t arg) co fun_ty + | tickishFloatable t, Just (co', ticks) <- ok_arg bndr arg co fun_ty + = Just (co', t:ticks) + + ok_arg _ _ _ _ = Nothing + +-- | Can we eta-reduce the given function to the specified arity? +-- See Note [Eta reduction soundness], criteria (B), (J), (W) and (L). +canEtaReduceToArity :: Id -> JoinArity -> Arity -> Bool +canEtaReduceToArity fun dest_join_arity dest_arity = + not $ + hasNoBinding fun -- (B) + -- Don't undersaturate functions with no binding. + + || ( isJoinId fun && dest_join_arity < idJoinArity fun ) -- (J) + -- Don't undersaturate join points. + -- See Note [Invariants on join points] in GHC.Core, and #20599 + + || ( dest_arity < idCbvMarkArity fun ) -- (W) + -- Don't undersaturate StrictWorkerIds. + -- See Note [Strict Worker Ids] in GHC.CoreToStg.Prep. + + || isLinearType (idType fun) -- (L) + -- Don't perform eta reduction on linear types. + -- If `f :: A %1-> B` and `g :: A -> B`, + -- then `g x = f x` is OK but `g = f` is not. + + {- ********************************************************************* * * The "push rules" diff --git a/compiler/GHC/Core/Opt/CallArity.hs b/compiler/GHC/Core/Opt/CallArity.hs index 67b9a88875..306b3bd446 100644 --- a/compiler/GHC/Core/Opt/CallArity.hs +++ b/compiler/GHC/Core/Opt/CallArity.hs @@ -17,7 +17,7 @@ import GHC.Types.Var.Env import GHC.Types.Basic import GHC.Core import GHC.Types.Id -import GHC.Core.Opt.Arity ( typeArity, typeOneShots ) +import GHC.Core.Opt.Arity ( typeArity ) import GHC.Core.Utils ( exprIsCheap, exprIsTrivial ) import GHC.Data.Graph.UnVar import GHC.Types.Demand @@ -377,15 +377,14 @@ a body representing “all external calls”, which returns a pessimistic CallArityRes (the co-call graph is the complete graph, all arityies 0). Note [Trimming arity] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - +~~~~~~~~~~~~~~~~~~~~~ In the Call Arity papers, we are working on an untyped lambda calculus with no other id annotations, where eta-expansion is always possible. But this is not the case for Core! 1. We need to ensure the invariant callArity e <= typeArity (exprType e) for the same reasons that exprArity needs this invariant (see Note - [exprArity invariant] in GHC.Core.Opt.Arity). + [typeArity invariants] in GHC.Core.Opt.Arity). If we are not doing that, a too-high arity annotation will be stored with the id, confusing the simplifier later on. @@ -544,7 +543,7 @@ callArityAnal arity int (Let bind e) -- Which bindings should we look at? -- See Note [Which variables are interesting] isInteresting :: Var -> Bool -isInteresting v = not $ null $ typeOneShots $ idType v +isInteresting v = typeArity (idType v) > 0 interestingBinds :: CoreBind -> [Var] interestingBinds = filter isInteresting . bindersOf diff --git a/compiler/GHC/Core/Opt/FloatIn.hs b/compiler/GHC/Core/Opt/FloatIn.hs index 88411a7add..cf3ca726e4 100644 --- a/compiler/GHC/Core/Opt/FloatIn.hs +++ b/compiler/GHC/Core/Opt/FloatIn.hs @@ -22,13 +22,14 @@ import GHC.Prelude import GHC.Platform import GHC.Core +import GHC.Core.Opt.Arity( isOneShotBndr ) import GHC.Core.Make hiding ( wrapFloats ) import GHC.Core.Utils import GHC.Core.FVs import GHC.Core.Type import GHC.Types.Basic ( RecFlag(..), isRec, Levity(Unlifted) ) -import GHC.Types.Id ( isOneShotBndr, idType, isJoinId, isJoinId_maybe ) +import GHC.Types.Id ( idType, isJoinId, isJoinId_maybe ) import GHC.Types.Tickish import GHC.Types.Var import GHC.Types.Var.Set diff --git a/compiler/GHC/Core/Opt/OccurAnal.hs b/compiler/GHC/Core/Opt/OccurAnal.hs index 304ed12c2d..6e0fa12543 100644 --- a/compiler/GHC/Core/Opt/OccurAnal.hs +++ b/compiler/GHC/Core/Opt/OccurAnal.hs @@ -28,7 +28,7 @@ import GHC.Core import GHC.Core.FVs import GHC.Core.Utils ( exprIsTrivial, isDefaultAlt, isExpandableApp, stripTicksTopE, mkTicks ) -import GHC.Core.Opt.Arity ( joinRhsArity ) +import GHC.Core.Opt.Arity ( joinRhsArity, isOneShotBndr ) import GHC.Core.Coercion import GHC.Core.Type import GHC.Core.TyCo.FVs( tyCoVarsOfMCo ) @@ -1755,7 +1755,7 @@ lambda and casts, e.g. * Why do we take care to account for intervening casts? Answer: currently we don't do eta-expansion and cast-swizzling in a stable - unfolding (see Note [Eta-expansion in stable unfoldings]). + unfolding (see Historical-note [Eta-expansion in stable unfoldings]). So we can get f = \x. ((\y. ...x...y...) |> co) Now, since the lambdas aren't together, the occurrence analyser will diff --git a/compiler/GHC/Core/Opt/SetLevels.hs b/compiler/GHC/Core/Opt/SetLevels.hs index 9e2376da45..a8a99ba42f 100644 --- a/compiler/GHC/Core/Opt/SetLevels.hs +++ b/compiler/GHC/Core/Opt/SetLevels.hs @@ -85,7 +85,7 @@ import GHC.Core.Utils ( exprType, exprIsHNF , collectMakeStaticArgs , mkLamTypes ) -import GHC.Core.Opt.Arity ( exprBotStrictness_maybe ) +import GHC.Core.Opt.Arity ( exprBotStrictness_maybe, isOneShotBndr ) import GHC.Core.FVs -- all of it import GHC.Core.Subst import GHC.Core.Make ( sortQuantVars ) @@ -1384,9 +1384,11 @@ lvlLamBndrs env lvl bndrs new_lvl | any is_major bndrs = incMajorLvl lvl | otherwise = incMinorLvl lvl - is_major bndr = isId bndr && not (isProbablyOneShotLambda bndr) - -- The "probably" part says "don't float things out of a - -- probable one-shot lambda" + is_major bndr = not (isOneShotBndr bndr) + -- Only non-one-shot lambdas bump a major level, which in + -- turn triggers floating. NB: isOneShotBndr is always + -- true of a type variable -- there is no point in floating + -- out of a big lambda. -- See Note [Computing one-shot info] in GHC.Types.Demand lvlJoinBndrs :: LevelEnv -> Level -> RecFlag -> [OutVar] diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs index 445fabe682..f87a28f440 100644 --- a/compiler/GHC/Core/Opt/Simplify.hs +++ b/compiler/GHC/Core/Opt/Simplify.hs @@ -38,9 +38,9 @@ import GHC.Core.Ppr ( pprCoreExpr ) import GHC.Core.Unfold import GHC.Core.Unfold.Make import GHC.Core.Utils -import GHC.Core.Opt.Arity ( ArityType(..), typeArity +import GHC.Core.Opt.Arity ( ArityType, exprArity, getBotArity , pushCoTyArg, pushCoValArg - , etaExpandAT ) + , typeArity, arityTypeArity, etaExpandAT ) import GHC.Core.SimpleOpt ( exprIsConApp_maybe, joinPointBinding_maybe, joinPointBindings_maybe ) import GHC.Core.FVs ( mkRuleInfo ) import GHC.Core.Rules ( lookupRule, getRules, initRuleOpts ) @@ -352,7 +352,8 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se -- See Note [Floating and type abstraction] in GHC.Core.Opt.Simplify.Utils -- Simplify the RHS - ; let rhs_cont = mkRhsStop (substTy body_env (exprType body)) (idDemandInfo bndr) + ; let rhs_cont = mkRhsStop (substTy body_env (exprType body)) + is_rec (idDemandInfo bndr) ; (body_floats0, body0) <- {-#SCC "simplExprF" #-} simplExprF body_env body rhs_cont -- ANF-ise a constructor or PAP rhs @@ -375,11 +376,11 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se {-#SCC "simplLazyBind-type-abstraction-first" #-} do { (poly_binds, body3) <- abstractFloats (seUnfoldingOpts env) top_lvl tvs' body_floats2 body2 - ; let floats = foldl' extendFloats (emptyFloats env) poly_binds - ; return (floats, body3) } + ; let poly_floats = foldl' extendFloats (emptyFloats env) poly_binds + ; return (poly_floats, body3) } ; let env' = env `setInScopeFromF` rhs_floats - ; rhs' <- mkLam env' tvs' body3 rhs_cont + ; rhs' <- rebuildLam env' tvs' body3 rhs_cont ; (bind_float, env2) <- completeBind env' (BC_Let top_lvl is_rec) bndr bndr1 rhs' ; return (rhs_floats `addFloats` bind_float, env2) } @@ -598,7 +599,7 @@ tryCastWorkerWrapper env bind_cxt old_bndr occ_info bndr (Cast rhs co) -- a DFunUnfolding in mk_worker_unfolding , not (exprIsTrivial rhs) -- Not x = y |> co; Wrinkle 1 , not (hasInlineUnfolding info) -- Not INLINE things: Wrinkle 4 - , isConcrete (typeKind rhs_ty) -- Don't peel off a cast if doing so would + , isConcrete (typeKind work_ty) -- Don't peel off a cast if doing so would -- lose the underlying runtime representation. -- See Note [Preserve RuntimeRep info in cast w/w] , not (isOpaquePragma (idInlinePragma old_bndr)) -- Not for OPAQUE bindings @@ -661,7 +662,9 @@ tryCastWorkerWrapper env bind_cxt old_bndr occ_info bndr (Cast rhs co) _ -> mkLetUnfolding (sm_uf_opts mode) top_lvl InlineRhs work_id work_rhs tryCastWorkerWrapper env _ _ _ bndr rhs -- All other bindings - = return (mkFloatBind env (NonRec bndr rhs)) + = do { traceSmpl "tcww:no" (vcat [ text "bndr:" <+> ppr bndr + , text "rhs:" <+> ppr rhs ]) + ; return (mkFloatBind env (NonRec bndr rhs)) } mkCastWrapperInlinePrag :: InlinePragma -> InlinePragma -- See Note [Cast worker/wrapper] @@ -699,6 +702,7 @@ prepareBinding :: SimplEnv -> TopLevelFlag -> RecFlag -> Bool -- bndr = K a a tmp -- That's what prepareBinding does -- Precondition: binder is not a JoinId +-- Postcondition: the returned SimplFloats contains only let-floats prepareBinding env top_lvl is_rec strict_bind bndr rhs_floats rhs = do { -- Never float join-floats out of a non-join let-binding (which this is) -- So wrap the body in the join-floats right now @@ -822,30 +826,15 @@ makeTrivial env top_lvl dmd occ_fs expr = do { (floats, triv_expr) <- makeTrivial env top_lvl dmd occ_fs expr' ; return (floats, Cast triv_expr co) } - | otherwise - = do { (floats, new_id) <- makeTrivialBinding env top_lvl occ_fs - id_info expr expr_ty - ; return (floats, Var new_id) } - where - id_info = vanillaIdInfo `setDemandInfo` dmd - expr_ty = exprType expr - -makeTrivialBinding :: HasDebugCallStack - => SimplEnv -> TopLevelFlag - -> FastString -- ^ a "friendly name" to build the new binder from - -> IdInfo - -> OutExpr - -> OutType -- Type of the expression - -> SimplM (LetFloats, OutId) -makeTrivialBinding env top_lvl occ_fs info expr expr_ty + | otherwise -- 'expr' is not of form (Cast e co) = do { (floats, expr1) <- prepareRhs env top_lvl occ_fs expr ; uniq <- getUniqueM ; let name = mkSystemVarName uniq occ_fs - var = mkLocalIdWithInfo name Many expr_ty info + var = mkLocalIdWithInfo name Many expr_ty id_info -- Now something very like completeBind, -- but without the postInlineUnconditionally part - ; (arity_type, expr2) <- tryEtaExpandRhs env NonRecursive var expr1 + ; (arity_type, expr2) <- tryEtaExpandRhs env (BC_Let top_lvl NonRecursive) var expr1 -- Technically we should extend the in-scope set in 'env' with -- the 'floats' from prepareRHS; but they are all fresh, so there is -- no danger of introducing name shadowig in eta expansion @@ -855,9 +844,12 @@ makeTrivialBinding env top_lvl occ_fs info expr expr_ty ; let final_id = addLetBndrInfo var arity_type unf bind = NonRec final_id expr2 - ; return ( floats `addLetFlts` unitLetFloat bind, final_id ) } + ; traceSmpl "makeTrivial" (vcat [text "final_id" <+> ppr final_id, text "rhs" <+> ppr expr2 ]) + ; return ( floats `addLetFlts` unitLetFloat bind, Var final_id ) } where - mode = getMode env + id_info = vanillaIdInfo `setDemandInfo` dmd + expr_ty = exprType expr + mode = getMode env bindingOk :: TopLevelFlag -> CoreExpr -> Type -> Bool -- True iff we can have a binding of this expression at this level @@ -945,7 +937,7 @@ completeBind env bind_cxt old_bndr new_bndr new_rhs -- Do eta-expansion on the RHS of the binding -- See Note [Eta-expanding at let bindings] in GHC.Core.Opt.Simplify.Utils - ; (new_arity, eta_rhs) <- tryEtaExpandRhs env is_rec new_bndr new_rhs + ; (new_arity, eta_rhs) <- tryEtaExpandRhs env bind_cxt new_bndr new_rhs -- Simplify the unfolding ; new_unfolding <- simplLetUnfolding env bind_cxt old_bndr @@ -975,9 +967,7 @@ addLetBndrInfo :: OutId -> ArityType -> Unfolding -> OutId addLetBndrInfo new_bndr new_arity_type new_unf = new_bndr `setIdInfo` info5 where - AT oss div = new_arity_type - new_arity = length oss - + new_arity = arityTypeArity new_arity_type info1 = idInfo new_bndr `setArityInfo` new_arity -- Unfolding info: Note [Setting the new unfolding] @@ -990,12 +980,11 @@ addLetBndrInfo new_bndr new_arity_type new_unf = info2 -- Bottoming bindings: see Note [Bottoming bindings] - info4 | isDeadEndDiv div = info3 `setDmdSigInfo` bot_sig - `setCprSigInfo` bot_cpr - | otherwise = info3 - - bot_sig = mkClosedDmdSig (replicate new_arity topDmd) div - bot_cpr = mkCprSig new_arity botCpr + info4 = case getBotArity new_arity_type of + Nothing -> info3 + Just ar -> assert (ar == new_arity) $ + info3 `setDmdSigInfo` mkVanillaDmdSig new_arity botDiv + `setCprSigInfo` mkCprSig new_arity botCpr -- Zap call arity info. We have used it by now (via -- `tryEtaExpandRhs`), and the simplifier can invalidate this @@ -1009,12 +998,12 @@ Suppose we have let x = error "urk" in ...(case x of <alts>)... or - let f = \x. error (x ++ "urk") + let f = \y. error (y ++ "urk") in ...(case f "foo" of <alts>)... Then we'd like to drop the dead <alts> immediately. So it's good to -propagate the info that x's RHS is bottom to x's IdInfo as rapidly as -possible. +propagate the info that x's (or f's) RHS is bottom to x's (or f's) +IdInfo as rapidly as possible. We use tryEtaExpandRhs on every binding, and it turns out that the arity computation it performs (via GHC.Core.Opt.Arity.findRhsArity) already @@ -1023,6 +1012,21 @@ is propagate that info to the binder's IdInfo. This showed up in #12150; see comment:16. +There is a second reason for settting the strictness signature. Consider + let -- f :: <[S]b> + f = \x. error "urk" + in ...(f a b c)... +Then, in GHC.Core.Opt.Arity.findRhsArity we'll use the demand-info on `f` +to eta-expand to + let f = \x y z. error "urk" + in ...(f a b c)... + +But now f's strictness signature has too short an arity; see +GHC.Core.Lint Note [Check arity on bottoming functions]. +Fortuitously, the same strictness-signature-fixup code gives the +function a new strictness signature with the right number of +arguments. Example in stranal/should_compile/EtaExpansion. + Note [Setting the demand info] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the unfolding is a value, the demand info may @@ -1689,7 +1693,7 @@ simpl_lam env bndr body cont = do { let (inner_bndrs, inner_body) = collectBinders body ; (env', bndrs') <- simplLamBndrs env (bndr:inner_bndrs) ; body' <- simplExpr env' inner_body - ; new_lam <- mkLam env' bndrs' body' cont + ; new_lam <- rebuildLam env' bndrs' body' cont ; rebuild env' new_lam cont } ------------- @@ -3552,8 +3556,8 @@ mkDupableContWithDmds env dmds -- let a = ...arg... -- in [...hole...] a -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable - do { let (dmd:_) = dmds -- Never fails - ; (floats1, cont') <- mkDupableContWithDmds env dmds cont + do { let (dmd:cont_dmds) = dmds -- Never fails + ; (floats1, cont') <- mkDupableContWithDmds env cont_dmds cont ; let env' = env `setInScopeFromF` floats1 ; (_, se', arg') <- simplArg env' dup se arg ; (let_floats2, arg'') <- makeTrivial env NotTopLevel dmd (fsLit "karg") arg' @@ -4086,12 +4090,14 @@ simplStableUnfolding env bind_cxt id rhs_ty id_arity unf CoreUnfolding { uf_tmpl = expr, uf_src = src, uf_guidance = guide } | isStableSource src -> do { expr' <- case bind_cxt of - BC_Join cont -> -- Binder is a join point - -- See Note [Rules and unfolding for join points] - simplJoinRhs unf_env id expr cont - BC_Let {} -> -- Binder is not a join point - do { expr' <- simplExprC unf_env expr (mkBoringStop rhs_ty) - ; return (eta_expand expr') } + BC_Join cont -> -- Binder is a join point + -- See Note [Rules and unfolding for join points] + simplJoinRhs unf_env id expr cont + BC_Let _ is_rec -> -- Binder is not a join point + do { let cont = mkRhsStop rhs_ty is_rec topDmd + -- mkRhsStop: switch off eta-expansion at the top level + ; expr' <- simplExprC unf_env expr cont + ; return (eta_expand expr') } ; case guide of UnfWhen { ug_arity = arity , ug_unsat_ok = sat_ok @@ -4138,11 +4144,13 @@ simplStableUnfolding env bind_cxt id rhs_ty id_arity unf -- See Note [Simplifying inside stable unfoldings] in GHC.Core.Opt.Simplify.Utils -- See Note [Eta-expand stable unfoldings] - eta_expand expr - | not eta_on = expr - | exprIsTrivial expr = expr - | otherwise = etaExpandAT (getInScope env) id_arity expr - eta_on = sm_eta_expand (getMode env) + -- Use the arity from the main Id (in id_arity), rather than computing it from rhs + eta_expand expr | sm_eta_expand (getMode env) + , exprArity expr < arityTypeArity id_arity + , wantEtaExpansion expr + = etaExpandAT (getInScope env) id_arity expr + | otherwise + = expr {- Note [Eta-expand stable unfoldings] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -4166,7 +4174,7 @@ eta-expand the stable unfolding to arity N too. Simple and consistent. Wrinkles -* See Note [Eta-expansion in stable unfoldings] in +* See Historical-note [Eta-expansion in stable unfoldings] in GHC.Core.Opt.Simplify.Utils * Don't eta-expand a trivial expr, else each pass will eta-reduce it, diff --git a/compiler/GHC/Core/Opt/Simplify/Env.hs b/compiler/GHC/Core/Opt/Simplify/Env.hs index fa6599b6bc..5defa782e0 100644 --- a/compiler/GHC/Core/Opt/Simplify/Env.hs +++ b/compiler/GHC/Core/Opt/Simplify/Env.hs @@ -20,7 +20,7 @@ module GHC.Core.Opt.Simplify.Env ( getSimplRules, -- * Substitution results - SimplSR(..), mkContEx, substId, lookupRecBndr, refineFromInScope, + SimplSR(..), mkContEx, substId, lookupRecBndr, -- * Simplifying 'Id' binders simplNonRecBndr, simplNonRecJoinBndr, simplRecBndrs, simplRecJoinBndrs, @@ -32,6 +32,7 @@ module GHC.Core.Opt.Simplify.Env ( SimplFloats(..), emptyFloats, isEmptyFloats, mkRecFloats, mkFloatBind, addLetFloats, addJoinFloats, addFloats, extendFloats, wrapFloats, + isEmptyJoinFloats, isEmptyLetFloats, doFloatFromRhs, getTopFloatBinds, -- * LetFloats @@ -519,10 +520,16 @@ so we must take the 'or' of the two. emptyLetFloats :: LetFloats emptyLetFloats = LetFloats nilOL FltLifted +isEmptyLetFloats :: LetFloats -> Bool +isEmptyLetFloats (LetFloats fs _) = isNilOL fs + emptyJoinFloats :: JoinFloats emptyJoinFloats = nilOL -unitLetFloat :: HasDebugCallStack => OutBind -> LetFloats +isEmptyJoinFloats :: JoinFloats -> Bool +isEmptyJoinFloats = isNilOL + +unitLetFloat :: OutBind -> LetFloats -- This key function constructs a singleton float with the right form unitLetFloat bind = assert (all (not . isJoinId) (bindersOf bind)) $ LetFloats (unitOL bind) (flag bind) @@ -801,7 +808,6 @@ simplRecBndrs env@(SimplEnv {}) ids do { let (!env1, ids1) = mapAccumL substIdBndr env ids ; seqIds ids1 `seq` return env1 } - --------------- substIdBndr :: SimplEnv -> InBndr -> (SimplEnv, OutBndr) -- Might be a coercion variable @@ -1028,7 +1034,7 @@ getTCvSubst (SimplEnv { seInScope = in_scope, seTvSubst = tv_env , seCvSubst = cv_env }) = mkTCvSubst in_scope (tv_env, cv_env) -substTy :: SimplEnv -> Type -> Type +substTy :: HasDebugCallStack => SimplEnv -> Type -> Type substTy env ty = Type.substTy (getTCvSubst env) ty substTyVar :: SimplEnv -> TyVar -> Type diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs index 8afaef82ce..d0a7abb84f 100644 --- a/compiler/GHC/Core/Opt/Simplify/Utils.hs +++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs @@ -8,7 +8,8 @@ The simplifier utilities module GHC.Core.Opt.Simplify.Utils ( -- Rebuilding - mkLam, mkCase, prepareAlts, tryEtaExpandRhs, + rebuildLam, mkCase, prepareAlts, + tryEtaExpandRhs, wantEtaExpansion, -- Inlining, preInlineUnconditionally, postInlineUnconditionally, @@ -23,9 +24,9 @@ module GHC.Core.Opt.Simplify.Utils ( SimplCont(..), DupFlag(..), StaticEnv, isSimplified, contIsStop, contIsDupable, contResultType, contHoleType, contHoleScaling, - contIsTrivial, contArgs, + contIsTrivial, contArgs, contIsRhs, countArgs, - mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg, + mkBoringStop, mkRhsStop, mkLazyArgStop, interestingCallContext, -- ArgInfo @@ -335,7 +336,7 @@ instance Outputable ArgInfo where ppr (ArgInfo { ai_fun = fun, ai_args = args, ai_dmds = dmds }) = text "ArgInfo" <+> braces (sep [ text "fun =" <+> ppr fun - , text "dmds =" <+> ppr dmds + , text "dmds(first 10) =" <+> ppr (take 10 dmds) , text "args =" <+> ppr args ]) instance Outputable ArgSpec where @@ -428,8 +429,9 @@ mkFunRules rs = Just (n_required, rs) mkBoringStop :: OutType -> SimplCont mkBoringStop ty = Stop ty BoringCtxt topSubDmd -mkRhsStop :: OutType -> Demand -> SimplCont -- See Note [RHS of lets] in GHC.Core.Unfold -mkRhsStop ty bndr_dmd = Stop ty RhsCtxt (subDemandIfEvaluated bndr_dmd) +mkRhsStop :: OutType -> RecFlag -> Demand -> SimplCont +-- See Note [RHS of lets] in GHC.Core.Unfold +mkRhsStop ty is_rec bndr_dmd = Stop ty (RhsCtxt is_rec) (subDemandIfEvaluated bndr_dmd) mkLazyArgStop :: OutType -> ArgInfo -> SimplCont mkLazyArgStop ty fun_info = Stop ty (lazyArgContext fun_info) arg_sd @@ -437,16 +439,10 @@ mkLazyArgStop ty fun_info = Stop ty (lazyArgContext fun_info) arg_sd arg_sd = subDemandIfEvaluated (head (ai_dmds fun_info)) ------------------- -contIsRhsOrArg :: SimplCont -> Bool -contIsRhsOrArg (Stop {}) = True -contIsRhsOrArg (StrictBind {}) = True -contIsRhsOrArg (StrictArg {}) = True -contIsRhsOrArg _ = False - -contIsRhs :: SimplCont -> Bool -contIsRhs (Stop _ RhsCtxt _) = True -contIsRhs (CastIt _ k) = contIsRhs k -- For f = e |> co, treat e as Rhs context -contIsRhs _ = False +contIsRhs :: SimplCont -> Maybe RecFlag +contIsRhs (Stop _ (RhsCtxt is_rec) _) = Just is_rec +contIsRhs (CastIt _ k) = contIsRhs k -- For f = e |> co, treat e as Rhs context +contIsRhs _ = Nothing ------------------- contIsStop :: SimplCont -> Bool @@ -767,13 +763,16 @@ strictArgContext (ArgInfo { ai_encl = encl_rules, ai_discs = discs }) -- Use this for strict arguments | encl_rules = RuleArgCtxt | disc:_ <- discs, disc > 0 = DiscArgCtxt -- Be keener here - | otherwise = RhsCtxt - -- Why RhsCtxt? if we see f (g x) (h x), and f is strict, we + | otherwise = RhsCtxt NonRecursive + -- Why RhsCtxt? if we see f (g x), and f is strict, we -- want to be a bit more eager to inline g, because it may -- expose an eval (on x perhaps) that can be eliminated or -- shared. I saw this in nofib 'boyer2', RewriteFuns.onewayunify1 -- It's worth an 18% improvement in allocation for this -- particular benchmark; 5% on 'mate' and 1.3% on 'multiplier' + -- + -- Why NonRecursive? Becuase it's a bit like + -- let a = g x in f a interestingCallContext :: SimplEnv -> SimplCont -> CallCtxt -- See Note [Interesting call context] @@ -962,12 +961,10 @@ simplEnvForGHCi logger dflags updModeForStableUnfoldings :: Activation -> SimplMode -> SimplMode updModeForStableUnfoldings unf_act current_mode = current_mode { sm_phase = phaseFromActivation unf_act - , sm_eta_expand = False , sm_inline = True } - -- sm_phase: see Note [Simplifying inside stable unfoldings] - -- sm_eta_expand: see Note [Eta-expansion in stable unfoldings] - -- sm_rules: just inherit; sm_rules might be "off" - -- because of -fno-enable-rewrite-rules + -- sm_eta_expand: see Historical-note [No eta expansion in stable unfoldings] + -- sm_rules: just inherit; sm_rules might be "off" + -- because of -fno-enable-rewrite-rules where phaseFromActivation (ActiveAfter _ n) = Phase n phaseFromActivation _ = InitialPhase @@ -986,15 +983,23 @@ updModeForRules current_mode {- Note [Simplifying rules] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When simplifying a rule LHS, refrain from /any/ inlining or applying -of other RULES. +of other RULES. Doing anything to the LHS is plain confusing, because +it means that what the rule matches is not what the user +wrote. c.f. #10595, and #10528. + +* sm_inline, sm_rules: inlining (or applying rules) on rule LHSs risks + introducing Ticks into the LHS, which makes matching + trickier. #10665, #10745. + + Doing this to either side confounds tools like HERMIT, which seek to reason + about and apply the RULES as originally written. See #10829. -Doing anything to the LHS is plain confusing, because it means that what the -rule matches is not what the user wrote. c.f. #10595, and #10528. -Moreover, inlining (or applying rules) on rule LHSs risks introducing -Ticks into the LHS, which makes matching trickier. #10665, #10745. + See also Note [Do not expose strictness if sm_inline=False] -Doing this to either side confounds tools like HERMIT, which seek to reason -about and apply the RULES as originally written. See #10829. +* sm_eta_expand: the template (LHS) of a rule must only mention coercion + /variables/ not arbitrary coercions. See Note [Casts in the template] in + GHC.Core.Rules. Eta expansion can create new coercions; so we switch + it off. There is, however, one case where we are pretty much /forced/ to transform the LHS of a rule: postInlineUnconditionally. For instance, in the case of @@ -1021,29 +1026,25 @@ we don't want to swizzle this to (\x. blah) |> (Refl xty `FunCo` CoVar cv) So we switch off cast swizzling in updModeForRules. -Note [Eta-expansion in stable unfoldings] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We don't do eta-expansion inside stable unfoldings. It's extra work, -and can be expensive (the bizarre T18223 is a case in point). - -See Note [Occurrence analysis for lambda binders] in GHC.Core.Opt.OccurAnal. - -Historical note. There was /previously/ another reason not to do eta -expansion in stable unfoldings. If we have a stable unfolding - - f :: Ord a => a -> IO () - -- Unfolding template - -- = /\a \(d:Ord a) (x:a). bla - -we previously did not want to eta-expand to - - f :: Ord a => a -> IO () - -- Unfolding template - -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co - -because not specialisation of the overloading didn't work properly (#9509). -But now it does: see Note [Account for casts in binding] in GHC.Core.Opt.Specialise - +Historical-note [No eta expansion in stable unfoldings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This Note is no longer relevant because the specialiser has improved. +See Note [Account for casts in binding] in GHC.Core.Opt.Specialise. +So we do not override sm_eta_expand in updModeForStableUnfoldings. + + Old note: If we have a stable unfolding + f :: Ord a => a -> IO () + -- Unfolding template + -- = /\a \(d:Ord a) (x:a). bla + we do not want to eta-expand to + f :: Ord a => a -> IO () + -- Unfolding template + -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co + because not specialisation of the overloading doesn't work properly + (see Note [Specialisation shape] in GHC.Core.Opt.Specialise), #9509. + So we disable eta-expansion in stable unfoldings. + + End of Historical Note Note [Inlining in gentle mode] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1639,73 +1640,88 @@ won't inline because 'e' is too big. ************************************************************************ -} -mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr --- mkLam tries three things +rebuildLam :: SimplEnv + -> [OutBndr] -> OutExpr + -> SimplCont + -> SimplM OutExpr +-- (rebuildLam env bndrs body cont) +-- returns expr which means the same as \bndrs. body +-- +-- But it tries -- a) eta reduction, if that gives a trivial expression -- b) eta expansion [only if there are some value lambdas] -- -- NB: the SimplEnv already includes the [OutBndr] in its in-scope set -mkLam _env [] body _cont + +rebuildLam _env [] body _cont = return body -mkLam env bndrs body cont - = {-#SCC "mkLam" #-} --- pprTrace "mkLam" (ppr bndrs $$ ppr body $$ ppr cont) $ + +rebuildLam env bndrs body cont + = {-# SCC "rebuildLam" #-} do { dflags <- getDynFlags - ; mkLam' dflags bndrs body } + ; try_eta dflags bndrs body } where - mode = getMode env + mode = getMode env + in_scope = getInScope env -- Includes 'bndrs' + mb_rhs = contIsRhs cont -- See Note [Eta reduction based on evaluation context] - -- NB: cont is never ApplyToVal, otherwise contEvalContext panics - eval_sd dflags | gopt Opt_PedanticBottoms dflags = topSubDmd - -- See Note [Eta reduction soundness], criterion (S) - -- the bit about -fpedantic-bottoms - | otherwise = contEvalContext cont - - mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr - mkLam' dflags bndrs body@(Lam {}) - = mkLam' dflags (bndrs ++ bndrs1) body1 + eval_sd dflags + | gopt Opt_PedanticBottoms dflags = topSubDmd + -- See Note [Eta reduction soundness], criterion (S) + -- the bit about -fpedantic-bottoms + | otherwise = contEvalContext cont + -- NB: cont is never ApplyToVal, because beta-reduction would + -- have happened. So contEvalContext can panic on ApplyToVal. + + try_eta :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr + try_eta dflags bndrs body + | -- Try eta reduction + gopt Opt_DoEtaReduction dflags + , Just etad_lam <- tryEtaReduce bndrs body (eval_sd dflags) + = do { tick (EtaReduction (head bndrs)) + ; return etad_lam } + + | -- Try eta expansion + Nothing <- mb_rhs -- See Note [Eta expanding lambdas] + , sm_eta_expand mode + , any isRuntimeVar bndrs -- Only when there is at least one value lambda already + , Just body_arity <- exprEtaExpandArity (initArityOpts dflags) body + = do { tick (EtaExpansion (head bndrs)) + ; let body' = etaExpandAT in_scope body_arity body + ; traceSmpl "eta expand" (vcat [text "before" <+> ppr body + , text "after" <+> ppr body']) + -- NB: body' might have an outer Cast, but if so + -- mk_lams will pull it further out, past 'bndrs' to the top + ; mk_lams dflags bndrs body' } + + | otherwise + = mk_lams dflags bndrs body + + mk_lams :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr + -- mk_lams pulls casts and ticks to the top + mk_lams dflags bndrs body@(Lam {}) + = mk_lams dflags (bndrs ++ bndrs1) body1 where (bndrs1, body1) = collectBinders body - mkLam' dflags bndrs (Tick t expr) + mk_lams dflags bndrs (Tick t expr) | tickishFloatable t - = mkTick t <$> mkLam' dflags bndrs expr + = do { expr' <- mk_lams dflags bndrs expr + ; return (mkTick t expr') } - mkLam' dflags bndrs (Cast body co) + mk_lams dflags bndrs (Cast body co) | -- Note [Casts and lambdas] sm_cast_swizzle mode , not (any bad bndrs) - = do { lam <- mkLam' dflags bndrs body + = do { lam <- mk_lams dflags bndrs body ; return (mkCast lam (mkPiCos Representational bndrs co)) } where co_vars = tyCoVarsOfCo co bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars - mkLam' dflags bndrs body - | gopt Opt_DoEtaReduction dflags - -- , pprTrace "try eta" (ppr bndrs $$ ppr body $$ ppr cont $$ ppr (eval_sd dflags)) True - , Just etad_lam <- {-# SCC "tryee" #-} tryEtaReduce bndrs body (eval_sd dflags) - = do { tick (EtaReduction (head bndrs)) - ; return etad_lam } - - | not (contIsRhs cont) -- See Note [Eta expanding lambdas] - , sm_eta_expand mode - , any isRuntimeVar bndrs - , let body_arity = {-# SCC "eta" #-} exprEtaExpandArity (initArityOpts dflags) body - , expandableArityType body_arity - = do { tick (EtaExpansion (head bndrs)) - ; let res = {-# SCC "eta3" #-} - mkLams bndrs $ - etaExpandAT in_scope body_arity body - ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body) - , text "after" <+> ppr res]) - ; return res } - - | otherwise + mk_lams _ bndrs body = return (mkLams bndrs body) - where - in_scope = getInScope env -- Includes 'bndrs' {- Note [Eta expanding lambdas] @@ -1727,21 +1743,40 @@ bother to try expansion in mkLam in that case; hence the contIsRhs guard. NB: We check the SimplEnv (sm_eta_expand), not DynFlags. - See Note [Eta-expansion in stable unfoldings] + See Historical-note [Eta-expansion in stable unfoldings] Note [Casts and lambdas] ~~~~~~~~~~~~~~~~~~~~~~~~ Consider - (\x. (\y. e) `cast` g1) `cast` g2 -There is a danger here that the two lambdas look separated, and the -full laziness pass might float an expression to between the two. + (\(x:tx). (\(y:ty). e) `cast` co) -So this equation in mkLam' floats the g1 out, thus: - (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1) -where x:tx. +We float the cast out, thus + (\(x:tx) (y:ty). e) `cast` (tx -> co) -In general, this floats casts outside lambdas, where (I hope) they -might meet and cancel with some other cast: +We do this for at least three reasons: + +1. There is a danger here that the two lambdas look separated, and the + full laziness pass might float an expression to between the two. + +2. The occurrence analyser will mark x as InsideLam if the Lam nodes + are separated (see the Lam case of occAnal). By floating the cast + out we put the two Lams together, so x can get a vanilla Once + annotation. If this lambda is the RHS of a let, which we inline, + we can do preInlineUnconditionally on that x=arg binding. With the + InsideLam OccInfo, we can't do that, which results in an extra + iteration of the Simplifier. + +3. It may cancel with another cast. E.g + (\x. e |> co1) |> co2 + If we float out co1 it might cancel with co2. Similarly + let f = (\x. e |> co1) in ... + If we float out co1, and then do cast worker/wrapper, we get + let f1 = \x.e; f = f1 |> co1 in ... + and now we can inline f, hoping that co1 may cancel at a call site. + +TL;DR: put the lambdas together if at all possible. + +In general, here's the transformation: \x. e `cast` co ===> (\x. e) `cast` (tx -> co) /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co) /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co) @@ -1774,62 +1809,55 @@ Wrinkles ************************************************************************ -} -tryEtaExpandRhs :: SimplEnv -> RecFlag -> OutId -> OutExpr +tryEtaExpandRhs :: SimplEnv -> BindContext -> OutId -> OutExpr -> SimplM (ArityType, OutExpr) -- See Note [Eta-expanding at let bindings] -- If tryEtaExpandRhs rhs = (n, is_bot, rhs') then -- (a) rhs' has manifest arity n -- (b) if is_bot is True then rhs' applied to n args is guaranteed bottom -tryEtaExpandRhs env is_rec bndr rhs +tryEtaExpandRhs _env (BC_Join {}) bndr rhs | Just join_arity <- isJoinId_maybe bndr = do { let (join_bndrs, join_body) = collectNBinders join_arity rhs oss = [idOneShotInfo id | id <- join_bndrs, isId id] arity_type | exprIsDeadEnd join_body = mkBotArityType oss - | otherwise = mkTopArityType oss + | otherwise = mkManifestArityType oss ; return (arity_type, rhs) } -- Note [Do not eta-expand join points] -- But do return the correct arity and bottom-ness, because -- these are used to set the bndr's IdInfo (#15517) -- Note [Invariants on join points] invariant 2b, in GHC.Core + | otherwise + = pprPanic "tryEtaExpandRhs" (ppr bndr) + +tryEtaExpandRhs env (BC_Let _ is_rec) bndr rhs | sm_eta_expand mode -- Provided eta-expansion is on , new_arity > old_arity -- And the current manifest arity isn't enough - , want_eta rhs + , wantEtaExpansion rhs = do { tick (EtaExpansion bndr) ; return (arity_type, etaExpandAT in_scope arity_type rhs) } | otherwise = return (arity_type, rhs) - where - mode = getMode env - in_scope = getInScope env - dflags = sm_dflags mode - arityOpts = initArityOpts dflags - old_arity = exprArity rhs - ty_arity = typeArity (idType bndr) - - arity_type = findRhsArity arityOpts is_rec bndr rhs old_arity - `maxWithArity` idCallArity bndr - `minWithArity` ty_arity - -- minWithArity: see Note [Arity trimming] in GHC.Core.Opt.Arity - - new_arity = arityTypeArity arity_type - - -- See Note [Which RHSs do we eta-expand?] - want_eta (Cast e _) = want_eta e - want_eta (Tick _ e) = want_eta e - want_eta (Lam b e) | isTyVar b = want_eta e - want_eta (App e a) | exprIsTrivial a = want_eta e - want_eta (Var {}) = False - want_eta (Lit {}) = False - want_eta _ = True -{- - want_eta _ = case arity_type of - ATop (os:_) -> isOneShotInfo os - ATop [] -> False - ABot {} -> True --} + mode = getMode env + in_scope = getInScope env + dflags = sm_dflags mode + arity_opts = initArityOpts dflags + old_arity = exprArity rhs + arity_type = findRhsArity arity_opts is_rec bndr rhs old_arity + new_arity = arityTypeArity arity_type + +wantEtaExpansion :: CoreExpr -> Bool +-- Mostly True; but False of PAPs which will immediately eta-reduce again +-- See Note [Which RHSs do we eta-expand?] +wantEtaExpansion (Cast e _) = wantEtaExpansion e +wantEtaExpansion (Tick _ e) = wantEtaExpansion e +wantEtaExpansion (Lam b e) | isTyVar b = wantEtaExpansion e +wantEtaExpansion (App e _) = wantEtaExpansion e +wantEtaExpansion (Var {}) = False +wantEtaExpansion (Lit {}) = False +wantEtaExpansion _ = True {- Note [Eta-expanding at let bindings] diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs index 35d818d814..1c7a728d12 100644 --- a/compiler/GHC/Core/Opt/WorkWrap.hs +++ b/compiler/GHC/Core/Opt/WorkWrap.hs @@ -679,10 +679,11 @@ is there only to generate used-once info for single-entry thunks. Note [Don't eta expand in w/w] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A binding where the manifestArity of the RHS is less than idArity of the binder -means GHC.Core.Opt.Arity didn't eta expand that binding. When this happens, it does so -for a reason (see Note [exprArity invariant] in GHC.Core.Opt.Arity) and we probably have -a PAP, cast or trivial expression as RHS. +A binding where the manifestArity of the RHS is less than idArity of +the binder means GHC.Core.Opt.Arity didn't eta expand that binding +When this happens, it does so for a reason (see Note [Arity invariants for bindings] +in GHC.Core.Opt.Arity) and we probably have a PAP, cast or trivial expression +as RHS. Below is a historical account of what happened when w/w still did eta expansion. Nowadays, it doesn't do that, but will simply w/w for the wrong arity, unleashing |