Separate core inlining logic from `Unfolding` type.

This seems like a good idea either way, but is mostly motivated by a patch where this avoids a module loop.
author: Andreas Klebinger <klebinger.andreas@gmx.at> 2022-10-02 15:16:33 +0200
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2023-01-16 20:48:19 -0500
commit: 7a9a10423324864a2ce5d9c7e714a1045afd5153 (patch)
tree: 0db8134b9cc8ec055d50b8b5df0dd889187e8375
parent: 496607fdb77baf12e2fe263104ba5d0d700eee3b (diff)
download: haskell-7a9a10423324864a2ce5d9c7e714a1045afd5153.tar.gz
9 files changed, 604 insertions, 572 deletions
diff --git a/compiler/GHC/Core/Opt/LiberateCase.hs b/compiler/GHC/Core/Opt/LiberateCase.hs
index 0249b23aaf..a1f1d6e45d 100644
--- a/compiler/GHC/Core/Opt/LiberateCase.hs
+++ b/compiler/GHC/Core/Opt/LiberateCase.hs
@@ -14,6 +14,7 @@ import GHC.Prelude
 
 import GHC.Core
 import GHC.Core.Unfold
+import GHC.Core.Opt.Simplify.Inline
 import GHC.Builtin.Types ( unitDataConId )
 import GHC.Types.Id
 import GHC.Types.Var.Env
diff --git a/compiler/GHC/Core/Opt/Simplify/Inline.hs b/compiler/GHC/Core/Opt/Simplify/Inline.hs
new file mode 100644
index 0000000000..f91319e754
--- /dev/null
+++ b/compiler/GHC/Core/Opt/Simplify/Inline.hs
@@ -0,0 +1,555 @@
+{-
+(c) The University of Glasgow 2006
+(c) The AQUA Project, Glasgow University, 1994-1998
+
+This module contains inlining logic used by the simplifier.
+-}
+
+
+{-# LANGUAGE BangPatterns #-}
+
+module GHC.Core.Opt.Simplify.Inline (
+        -- * Cheap and cheerful inlining checks.
+        couldBeSmallEnoughToInline,
+        smallEnoughToInline,
+
+        -- * The smart inlining decisions are made by callSiteInline
+        callSiteInline, CallCtxt(..),
+    ) where
+
+import GHC.Prelude
+
+import GHC.Driver.Flags
+
+import GHC.Core
+import GHC.Core.Unfold
+import GHC.Types.Id
+import GHC.Types.Basic  ( Arity, RecFlag(..) )
+import GHC.Utils.Logger
+import GHC.Utils.Misc
+import GHC.Utils.Outputable
+import GHC.Types.Name
+
+import Data.List (isPrefixOf)
+
+{-
+************************************************************************
+*                                                                      *
+\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
+*                                                                      *
+************************************************************************
+
+We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
+we ``couldn't possibly use'' on the other side.  Can be overridden w/
+flaggery.  Just the same as smallEnoughToInline, except that it has no
+actual arguments.
+-}
+
+couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool
+couldBeSmallEnoughToInline opts threshold rhs
+  = case sizeExpr opts threshold [] body of
+       TooBig -> False
+       _      -> True
+  where
+    (_, body) = collectBinders rhs
+
+----------------
+smallEnoughToInline :: UnfoldingOpts -> Unfolding -> Bool
+smallEnoughToInline opts (CoreUnfolding {uf_guidance = guidance})
+  = case guidance of
+       UnfIfGoodArgs {ug_size = size} -> size <= unfoldingUseThreshold opts
+       UnfWhen {} -> True
+       UnfNever   -> False
+smallEnoughToInline _ _
+  = False
+
+{-
+************************************************************************
+*                                                                      *
+\subsection{callSiteInline}
+*                                                                      *
+************************************************************************
+
+This is the key function.  It decides whether to inline a variable at a call site
+
+callSiteInline is used at call sites, so it is a bit more generous.
+It's a very important function that embodies lots of heuristics.
+A non-WHNF can be inlined if it doesn't occur inside a lambda,
+and occurs exactly once or
+    occurs once in each branch of a case and is small
+
+If the thing is in WHNF, there's no danger of duplicating work,
+so we can inline if it occurs once, or is small
+
+NOTE: we don't want to inline top-level functions that always diverge.
+It just makes the code bigger.  Tt turns out that the convenient way to prevent
+them inlining is to give them a NOINLINE pragma, which we do in
+StrictAnal.addStrictnessInfoToTopId
+-}
+
+callSiteInline :: Logger
+               -> UnfoldingOpts
+               -> Int                   -- Case depth
+               -> Id                    -- The Id
+               -> Bool                  -- True <=> unfolding is active
+               -> Bool                  -- True if there are no arguments at all (incl type args)
+               -> [ArgSummary]          -- One for each value arg; True if it is interesting
+               -> CallCtxt              -- True <=> continuation is interesting
+               -> Maybe CoreExpr        -- Unfolding, if any
+callSiteInline logger opts !case_depth id active_unfolding lone_variable arg_infos cont_info
+  = case idUnfolding id of
+      -- idUnfolding checks for loop-breakers, returning NoUnfolding
+      -- Things with an INLINE pragma may have an unfolding *and*
+      -- be a loop breaker  (maybe the knot is not yet untied)
+        CoreUnfolding { uf_tmpl = unf_template
+                      , uf_cache = unf_cache
+                      , uf_guidance = guidance }
+          | active_unfolding -> tryUnfolding logger opts case_depth id lone_variable
+                                    arg_infos cont_info unf_template
+                                    unf_cache guidance
+          | otherwise -> traceInline logger opts id "Inactive unfolding:" (ppr id) Nothing
+        NoUnfolding      -> Nothing
+        BootUnfolding    -> Nothing
+        OtherCon {}      -> Nothing
+        DFunUnfolding {} -> Nothing     -- Never unfold a DFun
+
+-- | Report the inlining of an identifier's RHS to the user, if requested.
+traceInline :: Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
+traceInline logger opts inline_id str doc result
+  -- We take care to ensure that doc is used in only one branch, ensuring that
+  -- the simplifier can push its allocation into the branch. See Note [INLINE
+  -- conditional tracing utilities].
+  | enable    = logTraceMsg logger str doc result
+  | otherwise = result
+  where
+    enable
+      | logHasDumpFlag logger Opt_D_dump_verbose_inlinings
+      = True
+      | Just prefix <- unfoldingReportPrefix opts
+      = prefix `isPrefixOf` occNameString (getOccName inline_id)
+      | otherwise
+      = False
+{-# INLINE traceInline #-} -- see Note [INLINE conditional tracing utilities]
+
+{- Note [Avoid inlining into deeply nested cases]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Consider a function f like this:
+
+  f arg1 arg2 =
+    case ...
+      ... -> g arg1
+      ... -> g arg2
+
+This function is small. So should be safe to inline.
+However sometimes this doesn't quite work out like that.
+Consider this code:
+
+f1 arg1 arg2 ... = ...
+    case _foo of
+      alt1 -> ... f2 arg1 ...
+      alt2 -> ... f2 arg2 ...
+
+f2 arg1 arg2 ... = ...
+    case _foo of
+      alt1 -> ... f3 arg1 ...
+      alt2 -> ... f3 arg2 ...
+
+f3 arg1 arg2 ... = ...
+
+... repeats up to n times. And then f1 is
+applied to some arguments:
+
+foo = ... f1 <interestingArgs> ...
+
+Initially f2..fn are not interesting to inline so we don't.
+However we see that f1 is applied to interesting args.
+So it's an obvious choice to inline those:
+
+foo =
+    ...
+      case _foo of
+        alt1 -> ... f2 <interestingArg> ...
+        alt2 -> ... f2 <interestingArg> ...
+
+As a result we go and inline f2 both mentions of f2 in turn are now applied to interesting
+arguments and f2 is small:
+
+foo =
+    ...
+      case _foo of
+        alt1 -> ... case _foo of
+            alt1 -> ... f3 <interestingArg> ...
+            alt2 -> ... f3 <interestingArg> ...
+
+        alt2 -> ... case _foo of
+            alt1 -> ... f3 <interestingArg> ...
+            alt2 -> ... f3 <interestingArg> ...
+
+The same thing happens for each binding up to f_n, duplicating the amount of inlining
+done in each step. Until at some point we are either done or run out of simplifier
+ticks/RAM. This pattern happened #18730.
+
+To combat this we introduce one more heuristic when weighing inlining decision.
+We keep track of a "case-depth". Which increases each time we look inside a case
+expression with more than one alternative.
+
+We then apply a penalty to inlinings based on the case-depth at which they would
+be inlined. Bounding the number of inlinings in such a scenario.
+
+The heuristic can be tuned in two ways:
+
+* We can ignore the first n levels of case nestings for inlining decisions using
+  -funfolding-case-threshold.
+* The penalty grows linear with the depth. It's computed as size*(depth-threshold)/scaling.
+  Scaling can be set with -funfolding-case-scaling.
+
+Some guidance on setting these defaults:
+
+* A low treshold (<= 2) is needed to prevent exponential cases from spiraling out of
+  control. We picked 2 for no particular reason.
+* Scaling the penalty by any more than 30 means the reproducer from
+  T18730 won't compile even with reasonably small values of n. Instead
+  it will run out of runs/ticks. This means to positively affect the reproducer
+  a scaling <= 30 is required.
+* A scaling of >= 15 still causes a few very large regressions on some nofib benchmarks.
+  (+80% for gc/fulsom, +90% for real/ben-raytrace, +20% for spectral/fibheaps)
+* A scaling of >= 25 showed no regressions on nofib. However it showed a number of
+  (small) regression for compiler perf benchmarks.
+
+The end result is that we are settling for a scaling of 30, with a threshold of 2.
+This gives us minimal compiler perf regressions. No nofib runtime regressions and
+will still avoid this pattern sometimes. This is a "safe" default, where we err on
+the side of compiler blowup instead of risking runtime regressions.
+
+For cases where the default falls short the flag can be changed to allow more/less inlining as
+needed on a per-module basis.
+
+-}
+
+tryUnfolding :: Logger -> UnfoldingOpts -> Int -> Id -> Bool -> [ArgSummary] -> CallCtxt
+             -> CoreExpr -> UnfoldingCache -> UnfoldingGuidance
+             -> Maybe CoreExpr
+tryUnfolding logger opts !case_depth id lone_variable arg_infos
+             cont_info unf_template unf_cache guidance
+ = case guidance of
+     UnfNever -> traceInline logger opts id str (text "UnfNever") Nothing
+
+     UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }
+        | enough_args && (boring_ok || some_benefit || unfoldingVeryAggressive opts)
+                -- See Note [INLINE for small functions] (3)
+        -> traceInline logger opts id str (mk_doc some_benefit empty True) (Just unf_template)
+        | otherwise
+        -> traceInline logger opts id str (mk_doc some_benefit empty False) Nothing
+        where
+          some_benefit = calc_some_benefit uf_arity
+          enough_args  = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0)
+
+     UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
+        | unfoldingVeryAggressive opts
+        -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
+        | is_wf && some_benefit && small_enough
+        -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
+        | otherwise
+        -> traceInline logger opts id str (mk_doc some_benefit extra_doc False) Nothing
+        where
+          some_benefit = calc_some_benefit (length arg_discounts)
+          -- See Note [Avoid inlining into deeply nested cases]
+          depth_treshold = unfoldingCaseThreshold opts
+          depth_scaling = unfoldingCaseScaling opts
+          depth_penalty | case_depth <= depth_treshold = 0
+                        | otherwise       = (size * (case_depth - depth_treshold)) `div` depth_scaling
+          adjusted_size = size + depth_penalty - discount
+          small_enough = adjusted_size <= unfoldingUseThreshold opts
+          discount = computeDiscount arg_discounts res_discount arg_infos cont_info
+
+          extra_doc = vcat [ text "case depth =" <+> int case_depth
+                           , text "depth based penalty =" <+> int depth_penalty
+                           , text "discounted size =" <+> int adjusted_size ]
+  where
+    -- Unpack the UnfoldingCache lazily because it may not be needed, and all
+    -- its fields are strict; so evaluating unf_cache at all forces all the
+    -- isWorkFree etc computations to take place.  That risks wasting effort for
+    -- Ids that are never going to inline anyway.
+    -- See Note [UnfoldingCache] in GHC.Core
+    UnfoldingCache{ uf_is_work_free = is_wf, uf_expandable = is_exp } = unf_cache
+
+    mk_doc some_benefit extra_doc yes_or_no
+      = vcat [ text "arg infos" <+> ppr arg_infos
+             , text "interesting continuation" <+> ppr cont_info
+             , text "some_benefit" <+> ppr some_benefit
+             , text "is exp:" <+> ppr is_exp
+             , text "is work-free:" <+> ppr is_wf
+             , text "guidance" <+> ppr guidance
+             , extra_doc
+             , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
+
+    ctx = log_default_dump_context (logFlags logger)
+    str = "Considering inlining: " ++ showSDocOneLine ctx (ppr id)
+    n_val_args = length arg_infos
+
+           -- some_benefit is used when the RHS is small enough
+           -- and the call has enough (or too many) value
+           -- arguments (ie n_val_args >= arity). But there must
+           -- be *something* interesting about some argument, or the
+           -- result context, to make it worth inlining
+    calc_some_benefit :: Arity -> Bool   -- The Arity is the number of args
+                                         -- expected by the unfolding
+    calc_some_benefit uf_arity
+       | not saturated = interesting_args       -- Under-saturated
+                                        -- Note [Unsaturated applications]
+       | otherwise = interesting_args   -- Saturated or over-saturated
+                  || interesting_call
+      where
+        saturated      = n_val_args >= uf_arity
+        over_saturated = n_val_args > uf_arity
+        interesting_args = any nonTriv arg_infos
+                -- NB: (any nonTriv arg_infos) looks at the
+                -- over-saturated args too which is "wrong";
+                -- but if over-saturated we inline anyway.
+
+        interesting_call
+          | over_saturated
+          = True
+          | otherwise
+          = case cont_info of
+              CaseCtxt   -> not (lone_variable && is_exp)  -- Note [Lone variables]
+              ValAppCtxt -> True                           -- Note [Cast then apply]
+              RuleArgCtxt -> uf_arity > 0  -- See Note [RHS of lets]
+              DiscArgCtxt -> uf_arity > 0  -- Note [Inlining in ArgCtxt]
+              RhsCtxt NonRecursive
+                          -> uf_arity > 0  -- See Note [RHS of lets]
+              _other      -> False         -- See Note [Nested functions]
+
+
+{- Note [RHS of lets]
+~~~~~~~~~~~~~~~~~~~~~
+When the call is the argument of a function with a RULE, or the RHS of a let,
+we are a little bit keener to inline (in tryUnfolding).  For example
+     f y = (y,y,y)
+     g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
+We'd inline 'f' if the call was in a case context, and it kind-of-is,
+only we can't see it.  Also
+     x = f v
+could be expensive whereas
+     x = case v of (a,b) -> a
+is patently cheap and may allow more eta expansion.
+
+So, in `interesting_call` in `tryUnfolding`, we treat the RHS of a
+/non-recursive/ let as not-totally-boring.  A /recursive/ let isn't
+going be inlined so there is much less point.  Hence the (only reason
+for the) RecFlag in RhsCtxt
+
+Note [Unsaturated applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When a call is not saturated, we *still* inline if one of the
+arguments has interesting structure.  That's sometimes very important.
+A good example is the Ord instance for Bool in Base:
+
+ Rec {
+    $fOrdBool =GHC.Classes.D:Ord
+                 @ Bool
+                 ...
+                 $cmin_ajX
+
+    $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
+    $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
+  }
+
+But the defn of GHC.Classes.$dmmin is:
+
+  $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
+    {- Arity: 3, HasNoCafRefs, Strictness: SLL,
+       Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
+                   case @ a GHC.Classes.<= @ a $dOrd x y of wild {
+                     GHC.Types.False -> y GHC.Types.True -> x }) -}
+
+We *really* want to inline $dmmin, even though it has arity 3, in
+order to unravel the recursion.
+
+
+Note [Things to watch]
+~~~~~~~~~~~~~~~~~~~~~~
+*   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
+    Assume x is exported, so not inlined unconditionally.
+    Then we want x to inline unconditionally; no reason for it
+    not to, and doing so avoids an indirection.
+
+*   { x = I# 3; ....f x.... }
+    Make sure that x does not inline unconditionally!
+    Lest we get extra allocation.
+
+Note [Nested functions]
+~~~~~~~~~~~~~~~~~~~~~~~
+At one time we treated a call of a non-top-level function as
+"interesting" (regardless of how boring the context) in the hope
+that inlining it would eliminate the binding, and its allocation.
+Specifically, in the default case of interesting_call we had
+   _other -> not is_top && uf_arity > 0
+
+But actually postInlineUnconditionally does some of this and overall
+it makes virtually no difference to nofib.  So I simplified away this
+special case
+
+Note [Cast then apply]
+~~~~~~~~~~~~~~~~~~~~~~
+Consider
+   myIndex = __inline_me ( (/\a. <blah>) |> co )
+   co :: (forall a. a -> a) ~ (forall a. T a)
+     ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
+
+We need to inline myIndex to unravel this; but the actual call (myIndex a) has
+no value arguments.  The ValAppCtxt gives it enough incentive to inline.
+
+Note [Inlining in ArgCtxt]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+The condition (arity > 0) here is very important, because otherwise
+we end up inlining top-level stuff into useless places; eg
+   x = I# 3#
+   f = \y.  g x
+This can make a very big difference: it adds 16% to nofib 'integer' allocs,
+and 20% to 'power'.
+
+At one stage I replaced this condition by 'True' (leading to the above
+slow-down).  The motivation was test eyeball/inline1.hs; but that seems
+to work ok now.
+
+NOTE: arguably, we should inline in ArgCtxt only if the result of the
+call is at least CONLIKE.  At least for the cases where we use ArgCtxt
+for the RHS of a 'let', we only profit from the inlining if we get a
+CONLIKE thing (modulo lets).
+
+Note [Lone variables]
+~~~~~~~~~~~~~~~~~~~~~
+See also Note [Interaction of exprIsWorkFree and lone variables]
+which appears below
+
+The "lone-variable" case is important.  I spent ages messing about
+with unsatisfactory variants, but this is nice.  The idea is that if a
+variable appears all alone
+
+        as an arg of lazy fn, or rhs    BoringCtxt
+        as scrutinee of a case          CaseCtxt
+        as arg of a fn                  ArgCtxt
+AND
+        it is bound to a cheap expression
+
+then we should not inline it (unless there is some other reason,
+e.g. it is the sole occurrence).  That is what is happening at
+the use of 'lone_variable' in 'interesting_call'.
+
+Why?  At least in the case-scrutinee situation, turning
+        let x = (a,b) in case x of y -> ...
+into
+        let x = (a,b) in case (a,b) of y -> ...
+and thence to
+        let x = (a,b) in let y = (a,b) in ...
+is bad if the binding for x will remain.
+
+Another example: I discovered that strings
+were getting inlined straight back into applications of 'error'
+because the latter is strict.
+        s = "foo"
+        f = \x -> ...(error s)...
+
+Fundamentally such contexts should not encourage inlining because, provided
+the RHS is "expandable" (see Note [exprIsExpandable] in GHC.Core.Utils) the
+context can ``see'' the unfolding of the variable (e.g. case or a
+RULE) so there's no gain.
+
+However, watch out:
+
+ * Consider this:
+        foo = \n. [n])  {-# INLINE foo #-}
+        bar = foo 20    {-# INLINE bar #-}
+        baz = \n. case bar of { (m:_) -> m + n }
+   Here we really want to inline 'bar' so that we can inline 'foo'
+   and the whole thing unravels as it should obviously do.  This is
+   important: in the NDP project, 'bar' generates a closure data
+   structure rather than a list.
+
+   So the non-inlining of lone_variables should only apply if the
+   unfolding is regarded as expandable; because that is when
+   exprIsConApp_maybe looks through the unfolding.  Hence the "&&
+   is_exp" in the CaseCtxt branch of interesting_call
+
+ * Even a type application or coercion isn't a lone variable.
+   Consider
+        case $fMonadST @ RealWorld of { :DMonad a b c -> c }
+   We had better inline that sucker!  The case won't see through it.
+
+   For now, I'm treating treating a variable applied to types
+   in a *lazy* context "lone". The motivating example was
+        f = /\a. \x. BIG
+        g = /\a. \y.  h (f a)
+   There's no advantage in inlining f here, and perhaps
+   a significant disadvantage.  Hence some_val_args in the Stop case
+
+Note [Interaction of exprIsWorkFree and lone variables]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The lone-variable test says "don't inline if a case expression
+scrutinises a lone variable whose unfolding is cheap".  It's very
+important that, under these circumstances, exprIsConApp_maybe
+can spot a constructor application. So, for example, we don't
+consider
+        let x = e in (x,x)
+to be cheap, and that's good because exprIsConApp_maybe doesn't
+think that expression is a constructor application.
+
+In the 'not (lone_variable && is_wf)' test, I used to test is_value
+rather than is_wf, which was utterly wrong, because the above
+expression responds True to exprIsHNF, which is what sets is_value.
+
+This kind of thing can occur if you have
+
+        {-# INLINE foo #-}
+        foo = let x = e in (x,x)
+
+which Roman did.
+
+
+-}
+
+computeDiscount :: [Int] -> Int -> [ArgSummary] -> CallCtxt
+                -> Int
+computeDiscount arg_discounts res_discount arg_infos cont_info
+
+  = 10          -- Discount of 10 because the result replaces the call
+                -- so we count 10 for the function itself
+
+    + 10 * length actual_arg_discounts
+               -- Discount of 10 for each arg supplied,
+               -- because the result replaces the call
+
+    + total_arg_discount + res_discount'
+  where
+    actual_arg_discounts = zipWith mk_arg_discount arg_discounts arg_infos
+    total_arg_discount   = sum actual_arg_discounts
+
+    mk_arg_discount _        TrivArg    = 0
+    mk_arg_discount _        NonTrivArg = 10
+    mk_arg_discount discount ValueArg   = discount
+
+    res_discount'
+      | LT <- arg_discounts `compareLength` arg_infos
+      = res_discount   -- Over-saturated
+      | otherwise
+      = case cont_info of
+           BoringCtxt  -> 0
+           CaseCtxt    -> res_discount  -- Presumably a constructor
+           ValAppCtxt  -> res_discount  -- Presumably a function
+           _           -> 40 `min` res_discount
+                -- ToDo: this 40 `min` res_discount doesn't seem right
+                --   for DiscArgCtxt it shouldn't matter because the function will
+                --       get the arg discount for any non-triv arg
+                --   for RuleArgCtxt we do want to be keener to inline; but not only
+                --       constructor results
+                --   for RhsCtxt I suppose that exposing a data con is good in general
+                --   And 40 seems very arbitrary
+                --
+                -- res_discount can be very large when a function returns
+                -- constructors; but we only want to invoke that large discount
+                -- when there's a case continuation.
+                -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
+                -- But we want to avoid inlining large functions that return
+                -- constructors into contexts that are simply "interesting"
diff --git a/compiler/GHC/Core/Opt/Simplify/Iteration.hs b/compiler/GHC/Core/Opt/Simplify/Iteration.hs
index d8b95e7358..e29581a2f0 100644
--- a/compiler/GHC/Core/Opt/Simplify/Iteration.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Iteration.hs
@@ -22,6 +22,7 @@ import GHC.Core.Opt.Simplify.Monad
 import GHC.Core.Type hiding ( substTy, substTyVar, extendTvSubst, extendCvSubst )
 import GHC.Core.TyCo.Compare( eqType )
 import GHC.Core.Opt.Simplify.Env
+import GHC.Core.Opt.Simplify.Inline
 import GHC.Core.Opt.Simplify.Utils
 import GHC.Core.Opt.OccurAnal ( occurAnalyseExpr, zapLambdaBndrs, scrutBinderSwap_maybe )
 import GHC.Core.Make       ( FloatBind, mkImpossibleExpr, castBottomExpr )
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs
index 25a7779274..28b1ebc221 100644
--- a/compiler/GHC/Core/Opt/Simplify/Utils.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs
@@ -47,6 +47,7 @@ import GHC.Prelude hiding (head, init, last, tail)
 import GHC.Core
 import GHC.Types.Literal ( isLitRubbish )
 import GHC.Core.Opt.Simplify.Env
+import GHC.Core.Opt.Simplify.Inline
 import GHC.Core.Opt.Stats ( Tick(..) )
 import qualified GHC.Core.Subst
 import GHC.Core.Ppr
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs
index fbdf0269e1..6a45129f06 100644
--- a/compiler/GHC/Core/Opt/SpecConstr.hs
+++ b/compiler/GHC/Core/Opt/SpecConstr.hs
@@ -32,6 +32,7 @@ import GHC.Core
 import GHC.Core.Subst
 import GHC.Core.Utils
 import GHC.Core.Unfold
+import GHC.Core.Opt.Simplify.Inline
 import GHC.Core.FVs     ( exprsFreeVarsList, exprFreeVars )
 import GHC.Core.Opt.Monad
 import GHC.Core.Opt.WorkWrap.Utils
diff --git a/compiler/GHC/Core/Unfold.hs b/compiler/GHC/Core/Unfold.hs
index 7446a2e983..48a7e5e82f 100644
--- a/compiler/GHC/Core/Unfold.hs
+++ b/compiler/GHC/Core/Unfold.hs
@@ -21,25 +21,22 @@ find, unsurprisingly, a Core expression.
 module GHC.Core.Unfold (
         Unfolding, UnfoldingGuidance,   -- Abstract types
 
+        ExprSize(..), sizeExpr,
+
+        ArgSummary(..), nonTriv,
+        CallCtxt(..),
+
         UnfoldingOpts (..), defaultUnfoldingOpts,
         updateCreationThreshold, updateUseThreshold,
         updateFunAppDiscount, updateDictDiscount,
         updateVeryAggressive, updateCaseScaling,
         updateCaseThreshold, updateReportPrefix,
 
-        ArgSummary(..),
-
-        couldBeSmallEnoughToInline, inlineBoringOk,
-        smallEnoughToInline,
-
-        callSiteInline, CallCtxt(..),
-        calcUnfoldingGuidance
+        inlineBoringOk, calcUnfoldingGuidance
     ) where
 
 import GHC.Prelude
 
-import GHC.Driver.Flags
-
 import GHC.Core
 import GHC.Core.Utils
 import GHC.Types.Id
@@ -48,20 +45,16 @@ import GHC.Types.Literal
 import GHC.Builtin.PrimOps
 import GHC.Types.Id.Info
 import GHC.Types.RepType ( isZeroBitTy )
-import GHC.Types.Basic  ( Arity, RecFlag(..) )
+import GHC.Types.Basic  ( Arity, RecFlag )
 import GHC.Core.Type
 import GHC.Builtin.Names
 import GHC.Data.Bag
-import GHC.Utils.Logger
 import GHC.Utils.Misc
 import GHC.Utils.Outputable
 import GHC.Types.ForeignCall
-import GHC.Types.Name
 import GHC.Types.Tickish
 
 import qualified Data.ByteString as BS
-import Data.List (isPrefixOf)
-
 
 -- | Unfolding options
 data UnfoldingOpts = UnfoldingOpts
@@ -151,6 +144,41 @@ updateCaseScaling n opts = opts { unfoldingCaseScaling = n }
 updateReportPrefix :: Maybe String -> UnfoldingOpts -> UnfoldingOpts
 updateReportPrefix n opts = opts { unfoldingReportPrefix = n }
 
+data ArgSummary = TrivArg       -- Nothing interesting
+                | NonTrivArg    -- Arg has structure
+                | ValueArg      -- Arg is a con-app or PAP
+                                -- ..or con-like. Note [Conlike is interesting]
+
+instance Outputable ArgSummary where
+  ppr TrivArg    = text "TrivArg"
+  ppr NonTrivArg = text "NonTrivArg"
+  ppr ValueArg   = text "ValueArg"
+
+nonTriv ::  ArgSummary -> Bool
+nonTriv TrivArg = False
+nonTriv _       = True
+
+data CallCtxt
+  = BoringCtxt
+  | RhsCtxt RecFlag     -- Rhs of a let-binding; see Note [RHS of lets]
+  | DiscArgCtxt         -- Argument of a function with non-zero arg discount
+  | RuleArgCtxt         -- We are somewhere in the argument of a function with rules
+
+  | ValAppCtxt          -- We're applied to at least one value arg
+                        -- This arises when we have ((f x |> co) y)
+                        -- Then the (f x) has argument 'x' but in a ValAppCtxt
+
+  | CaseCtxt            -- We're the scrutinee of a case
+                        -- that decomposes its scrutinee
+
+instance Outputable CallCtxt where
+  ppr CaseCtxt    = text "CaseCtxt"
+  ppr ValAppCtxt  = text "ValAppCtxt"
+  ppr BoringCtxt  = text "BoringCtxt"
+  ppr (RhsCtxt ir)= text "RhsCtxt" <> parens (ppr ir)
+  ppr DiscArgCtxt = text "DiscArgCtxt"
+  ppr RuleArgCtxt = text "RuleArgCtxt"
+
 {-
 Note [Occurrence analysis of unfoldings]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -930,561 +958,3 @@ sizeN :: Int -> ExprSize
 
 sizeZero = SizeIs 0 emptyBag 0
 sizeN n  = SizeIs n emptyBag 0
-
-{-
-************************************************************************
-*                                                                      *
-\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
-*                                                                      *
-************************************************************************
-
-We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
-we ``couldn't possibly use'' on the other side.  Can be overridden w/
-flaggery.  Just the same as smallEnoughToInline, except that it has no
-actual arguments.
--}
-
-couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool
-couldBeSmallEnoughToInline opts threshold rhs
-  = case sizeExpr opts threshold [] body of
-       TooBig -> False
-       _      -> True
-  where
-    (_, body) = collectBinders rhs
-
-----------------
-smallEnoughToInline :: UnfoldingOpts -> Unfolding -> Bool
-smallEnoughToInline opts (CoreUnfolding {uf_guidance = guidance})
-  = case guidance of
-       UnfIfGoodArgs {ug_size = size} -> size <= unfoldingUseThreshold opts
-       UnfWhen {} -> True
-       UnfNever   -> False
-smallEnoughToInline _ _
-  = False
-
-{-
-************************************************************************
-*                                                                      *
-\subsection{callSiteInline}
-*                                                                      *
-************************************************************************
-
-This is the key function.  It decides whether to inline a variable at a call site
-
-callSiteInline is used at call sites, so it is a bit more generous.
-It's a very important function that embodies lots of heuristics.
-A non-WHNF can be inlined if it doesn't occur inside a lambda,
-and occurs exactly once or
-    occurs once in each branch of a case and is small
-
-If the thing is in WHNF, there's no danger of duplicating work,
-so we can inline if it occurs once, or is small
-
-NOTE: we don't want to inline top-level functions that always diverge.
-It just makes the code bigger.  Tt turns out that the convenient way to prevent
-them inlining is to give them a NOINLINE pragma, which we do in
-StrictAnal.addStrictnessInfoToTopId
--}
-
-data ArgSummary = TrivArg       -- Nothing interesting
-                | NonTrivArg    -- Arg has structure
-                | ValueArg      -- Arg is a con-app or PAP
-                                -- ..or con-like. Note [Conlike is interesting]
-
-instance Outputable ArgSummary where
-  ppr TrivArg    = text "TrivArg"
-  ppr NonTrivArg = text "NonTrivArg"
-  ppr ValueArg   = text "ValueArg"
-
-nonTriv ::  ArgSummary -> Bool
-nonTriv TrivArg = False
-nonTriv _       = True
-
-data CallCtxt
-  = BoringCtxt
-  | RhsCtxt RecFlag     -- Rhs of a let-binding; see Note [RHS of lets]
-  | DiscArgCtxt         -- Argument of a function with non-zero arg discount
-  | RuleArgCtxt         -- We are somewhere in the argument of a function with rules
-
-  | ValAppCtxt          -- We're applied to at least one value arg
-                        -- This arises when we have ((f x |> co) y)
-                        -- Then the (f x) has argument 'x' but in a ValAppCtxt
-
-  | CaseCtxt            -- We're the scrutinee of a case
-                        -- that decomposes its scrutinee
-
-instance Outputable CallCtxt where
-  ppr CaseCtxt    = text "CaseCtxt"
-  ppr ValAppCtxt  = text "ValAppCtxt"
-  ppr BoringCtxt  = text "BoringCtxt"
-  ppr (RhsCtxt ir)= text "RhsCtxt" <> parens (ppr ir)
-  ppr DiscArgCtxt = text "DiscArgCtxt"
-  ppr RuleArgCtxt = text "RuleArgCtxt"
-
-callSiteInline :: Logger
-               -> UnfoldingOpts
-               -> Int                   -- Case depth
-               -> Id                    -- The Id
-               -> Bool                  -- True <=> unfolding is active
-               -> Bool                  -- True if there are no arguments at all (incl type args)
-               -> [ArgSummary]          -- One for each value arg; True if it is interesting
-               -> CallCtxt              -- True <=> continuation is interesting
-               -> Maybe CoreExpr        -- Unfolding, if any
-callSiteInline logger opts !case_depth id active_unfolding lone_variable arg_infos cont_info
-  = case idUnfolding id of
-      -- idUnfolding checks for loop-breakers, returning NoUnfolding
-      -- Things with an INLINE pragma may have an unfolding *and*
-      -- be a loop breaker  (maybe the knot is not yet untied)
-        CoreUnfolding { uf_tmpl = unf_template
-                      , uf_cache = unf_cache
-                      , uf_guidance = guidance }
-          | active_unfolding -> tryUnfolding logger opts case_depth id lone_variable
-                                    arg_infos cont_info unf_template
-                                    unf_cache guidance
-          | otherwise -> traceInline logger opts id "Inactive unfolding:" (ppr id) Nothing
-        NoUnfolding      -> Nothing
-        BootUnfolding    -> Nothing
-        OtherCon {}      -> Nothing
-        DFunUnfolding {} -> Nothing     -- Never unfold a DFun
-
--- | Report the inlining of an identifier's RHS to the user, if requested.
-traceInline :: Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
-traceInline logger opts inline_id str doc result
-  -- We take care to ensure that doc is used in only one branch, ensuring that
-  -- the simplifier can push its allocation into the branch. See Note [INLINE
-  -- conditional tracing utilities].
-  | enable    = logTraceMsg logger str doc result
-  | otherwise = result
-  where
-    enable
-      | logHasDumpFlag logger Opt_D_dump_verbose_inlinings
-      = True
-      | Just prefix <- unfoldingReportPrefix opts
-      = prefix `isPrefixOf` occNameString (getOccName inline_id)
-      | otherwise
-      = False
-{-# INLINE traceInline #-} -- see Note [INLINE conditional tracing utilities]
-
-{- Note [Avoid inlining into deeply nested cases]
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Consider a function f like this:
-
-  f arg1 arg2 =
-    case ...
-      ... -> g arg1
-      ... -> g arg2
-
-This function is small. So should be safe to inline.
-However sometimes this doesn't quite work out like that.
-Consider this code:
-
-f1 arg1 arg2 ... = ...
-    case _foo of
-      alt1 -> ... f2 arg1 ...
-      alt2 -> ... f2 arg2 ...
-
-f2 arg1 arg2 ... = ...
-    case _foo of
-      alt1 -> ... f3 arg1 ...
-      alt2 -> ... f3 arg2 ...
-
-f3 arg1 arg2 ... = ...
-
-... repeats up to n times. And then f1 is
-applied to some arguments:
-
-foo = ... f1 <interestingArgs> ...
-
-Initially f2..fn are not interesting to inline so we don't.
-However we see that f1 is applied to interesting args.
-So it's an obvious choice to inline those:
-
-foo =
-    ...
-      case _foo of
-        alt1 -> ... f2 <interestingArg> ...
-        alt2 -> ... f2 <interestingArg> ...
-
-As a result we go and inline f2 both mentions of f2 in turn are now applied to interesting
-arguments and f2 is small:
-
-foo =
-    ...
-      case _foo of
-        alt1 -> ... case _foo of
-            alt1 -> ... f3 <interestingArg> ...
-            alt2 -> ... f3 <interestingArg> ...
-
-        alt2 -> ... case _foo of
-            alt1 -> ... f3 <interestingArg> ...
-            alt2 -> ... f3 <interestingArg> ...
-
-The same thing happens for each binding up to f_n, duplicating the amount of inlining
-done in each step. Until at some point we are either done or run out of simplifier
-ticks/RAM. This pattern happened #18730.
-
-To combat this we introduce one more heuristic when weighing inlining decision.
-We keep track of a "case-depth". Which increases each time we look inside a case
-expression with more than one alternative.
-
-We then apply a penalty to inlinings based on the case-depth at which they would
-be inlined. Bounding the number of inlinings in such a scenario.
-
-The heuristic can be tuned in two ways:
-
-* We can ignore the first n levels of case nestings for inlining decisions using
-  -funfolding-case-threshold.
-* The penalty grows linear with the depth. It's computed as size*(depth-threshold)/scaling.
-  Scaling can be set with -funfolding-case-scaling.
-
-Some guidance on setting these defaults:
-
-* A low treshold (<= 2) is needed to prevent exponential cases from spiraling out of
-  control. We picked 2 for no particular reason.
-* Scaling the penalty by any more than 30 means the reproducer from
-  T18730 won't compile even with reasonably small values of n. Instead
-  it will run out of runs/ticks. This means to positively affect the reproducer
-  a scaling <= 30 is required.
-* A scaling of >= 15 still causes a few very large regressions on some nofib benchmarks.
-  (+80% for gc/fulsom, +90% for real/ben-raytrace, +20% for spectral/fibheaps)
-* A scaling of >= 25 showed no regressions on nofib. However it showed a number of
-  (small) regression for compiler perf benchmarks.
-
-The end result is that we are settling for a scaling of 30, with a threshold of 2.
-This gives us minimal compiler perf regressions. No nofib runtime regressions and
-will still avoid this pattern sometimes. This is a "safe" default, where we err on
-the side of compiler blowup instead of risking runtime regressions.
-
-For cases where the default falls short the flag can be changed to allow more/less inlining as
-needed on a per-module basis.
-
--}
-
-tryUnfolding :: Logger -> UnfoldingOpts -> Int -> Id -> Bool -> [ArgSummary] -> CallCtxt
-             -> CoreExpr -> UnfoldingCache -> UnfoldingGuidance
-             -> Maybe CoreExpr
-tryUnfolding logger opts !case_depth id lone_variable arg_infos
-             cont_info unf_template unf_cache guidance
- = case guidance of
-     UnfNever -> traceInline logger opts id str (text "UnfNever") Nothing
-
-     UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }
-        | enough_args && (boring_ok || some_benefit || unfoldingVeryAggressive opts)
-                -- See Note [INLINE for small functions] (3)
-        -> traceInline logger opts id str (mk_doc some_benefit empty True) (Just unf_template)
-        | otherwise
-        -> traceInline logger opts id str (mk_doc some_benefit empty False) Nothing
-        where
-          some_benefit = calc_some_benefit uf_arity
-          enough_args  = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0)
-
-     UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
-        | unfoldingVeryAggressive opts
-        -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
-        | is_wf && some_benefit && small_enough
-        -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
-        | otherwise
-        -> traceInline logger opts id str (mk_doc some_benefit extra_doc False) Nothing
-        where
-          some_benefit = calc_some_benefit (length arg_discounts)
-          -- See Note [Avoid inlining into deeply nested cases]
-          depth_treshold = unfoldingCaseThreshold opts
-          depth_scaling = unfoldingCaseScaling opts
-          depth_penalty | case_depth <= depth_treshold = 0
-                        | otherwise       = (size * (case_depth - depth_treshold)) `div` depth_scaling
-          adjusted_size = size + depth_penalty - discount
-          small_enough = adjusted_size <= unfoldingUseThreshold opts
-          discount = computeDiscount arg_discounts res_discount arg_infos cont_info
-
-          extra_doc = vcat [ text "case depth =" <+> int case_depth
-                           , text "depth based penalty =" <+> int depth_penalty
-                           , text "discounted size =" <+> int adjusted_size ]
-
-  where
-    -- Unpack the UnfoldingCache lazily because it may not be needed, and all
-    -- its fields are strict; so evaluating unf_cache at all forces all the
-    -- isWorkFree etc computations to take place.  That risks wasting effort for
-    -- Ids that are never going to inline anyway.
-    -- See Note [UnfoldingCache] in GHC.Core
-    UnfoldingCache{ uf_is_work_free = is_wf, uf_expandable = is_exp } = unf_cache
-
-    mk_doc some_benefit extra_doc yes_or_no
-      = vcat [ text "arg infos" <+> ppr arg_infos
-             , text "interesting continuation" <+> ppr cont_info
-             , text "some_benefit" <+> ppr some_benefit
-             , text "is exp:" <+> ppr is_exp
-             , text "is work-free:" <+> ppr is_wf
-             , text "guidance" <+> ppr guidance
-             , extra_doc
-             , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
-
-    ctx = log_default_dump_context (logFlags logger)
-    str = "Considering inlining: " ++ showSDocOneLine ctx (ppr id)
-    n_val_args = length arg_infos
-
-           -- some_benefit is used when the RHS is small enough
-           -- and the call has enough (or too many) value
-           -- arguments (ie n_val_args >= arity). But there must
-           -- be *something* interesting about some argument, or the
-           -- result context, to make it worth inlining
-    calc_some_benefit :: Arity -> Bool   -- The Arity is the number of args
-                                         -- expected by the unfolding
-    calc_some_benefit uf_arity
-       | not saturated = interesting_args       -- Under-saturated
-                                        -- Note [Unsaturated applications]
-       | otherwise = interesting_args   -- Saturated or over-saturated
-                  || interesting_call
-      where
-        saturated      = n_val_args >= uf_arity
-        over_saturated = n_val_args > uf_arity
-        interesting_args = any nonTriv arg_infos
-                -- NB: (any nonTriv arg_infos) looks at the
-                -- over-saturated args too which is "wrong";
-                -- but if over-saturated we inline anyway.
-
-        interesting_call
-          | over_saturated
-          = True
-          | otherwise
-          = case cont_info of
-              CaseCtxt   -> not (lone_variable && is_exp)  -- Note [Lone variables]
-              ValAppCtxt -> True                           -- Note [Cast then apply]
-              RuleArgCtxt -> uf_arity > 0  -- See Note [RHS of lets]
-              DiscArgCtxt -> uf_arity > 0  -- Note [Inlining in ArgCtxt]
-              RhsCtxt NonRecursive
-                          -> uf_arity > 0  -- See Note [RHS of lets]
-              _other      -> False         -- See Note [Nested functions]
-
-
-{- Note [RHS of lets]
-~~~~~~~~~~~~~~~~~~~~~
-When the call is the argument of a function with a RULE, or the RHS of a let,
-we are a little bit keener to inline (in tryUnfolding).  For example
-     f y = (y,y,y)
-     g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
-We'd inline 'f' if the call was in a case context, and it kind-of-is,
-only we can't see it.  Also
-     x = f v
-could be expensive whereas
-     x = case v of (a,b) -> a
-is patently cheap and may allow more eta expansion.
-
-So, in `interesting_call` in `tryUnfolding`, we treat the RHS of a
-/non-recursive/ let as not-totally-boring.  A /recursive/ let isn't
-going be inlined so there is much less point.  Hence the (only reason
-for the) RecFlag in RhsCtxt
-
-Note [Unsaturated applications]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-When a call is not saturated, we *still* inline if one of the
-arguments has interesting structure.  That's sometimes very important.
-A good example is the Ord instance for Bool in Base:
-
- Rec {
-    $fOrdBool =GHC.Classes.D:Ord
-                 @ Bool
-                 ...
-                 $cmin_ajX
-
-    $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
-    $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
-  }
-
-But the defn of GHC.Classes.$dmmin is:
-
-  $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
-    {- Arity: 3, HasNoCafRefs, Strictness: SLL,
-       Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
-                   case @ a GHC.Classes.<= @ a $dOrd x y of wild {
-                     GHC.Types.False -> y GHC.Types.True -> x }) -}
-
-We *really* want to inline $dmmin, even though it has arity 3, in
-order to unravel the recursion.
-
-
-Note [Things to watch]
-~~~~~~~~~~~~~~~~~~~~~~
-*   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
-    Assume x is exported, so not inlined unconditionally.
-    Then we want x to inline unconditionally; no reason for it
-    not to, and doing so avoids an indirection.
-
-*   { x = I# 3; ....f x.... }
-    Make sure that x does not inline unconditionally!
-    Lest we get extra allocation.
-
-Note [Nested functions]
-~~~~~~~~~~~~~~~~~~~~~~~
-At one time we treated a call of a non-top-level function as
-"interesting" (regardless of how boring the context) in the hope
-that inlining it would eliminate the binding, and its allocation.
-Specifically, in the default case of interesting_call we had
-   _other -> not is_top && uf_arity > 0
-
-But actually postInlineUnconditionally does some of this and overall
-it makes virtually no difference to nofib.  So I simplified away this
-special case
-
-Note [Cast then apply]
-~~~~~~~~~~~~~~~~~~~~~~
-Consider
-   myIndex = __inline_me ( (/\a. <blah>) |> co )
-   co :: (forall a. a -> a) ~ (forall a. T a)
-     ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
-
-We need to inline myIndex to unravel this; but the actual call (myIndex a) has
-no value arguments.  The ValAppCtxt gives it enough incentive to inline.
-
-Note [Inlining in ArgCtxt]
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-The condition (arity > 0) here is very important, because otherwise
-we end up inlining top-level stuff into useless places; eg
-   x = I# 3#
-   f = \y.  g x
-This can make a very big difference: it adds 16% to nofib 'integer' allocs,
-and 20% to 'power'.
-
-At one stage I replaced this condition by 'True' (leading to the above
-slow-down).  The motivation was test eyeball/inline1.hs; but that seems
-to work ok now.
-
-NOTE: arguably, we should inline in ArgCtxt only if the result of the
-call is at least CONLIKE.  At least for the cases where we use ArgCtxt
-for the RHS of a 'let', we only profit from the inlining if we get a
-CONLIKE thing (modulo lets).
-
-Note [Lone variables]
-~~~~~~~~~~~~~~~~~~~~~
-See also Note [Interaction of exprIsWorkFree and lone variables]
-which appears below
-
-The "lone-variable" case is important.  I spent ages messing about
-with unsatisfactory variants, but this is nice.  The idea is that if a
-variable appears all alone
-
-        as an arg of lazy fn, or rhs    BoringCtxt
-        as scrutinee of a case          CaseCtxt
-        as arg of a fn                  ArgCtxt
-AND
-        it is bound to a cheap expression
-
-then we should not inline it (unless there is some other reason,
-e.g. it is the sole occurrence).  That is what is happening at
-the use of 'lone_variable' in 'interesting_call'.
-
-Why?  At least in the case-scrutinee situation, turning
-        let x = (a,b) in case x of y -> ...
-into
-        let x = (a,b) in case (a,b) of y -> ...
-and thence to
-        let x = (a,b) in let y = (a,b) in ...
-is bad if the binding for x will remain.
-
-Another example: I discovered that strings
-were getting inlined straight back into applications of 'error'
-because the latter is strict.
-        s = "foo"
-        f = \x -> ...(error s)...
-
-Fundamentally such contexts should not encourage inlining because, provided
-the RHS is "expandable" (see Note [exprIsExpandable] in GHC.Core.Utils) the
-context can ``see'' the unfolding of the variable (e.g. case or a
-RULE) so there's no gain.
-
-However, watch out:
-
- * Consider this:
-        foo = \n. [n])  {-# INLINE foo #-}
-        bar = foo 20    {-# INLINE bar #-}
-        baz = \n. case bar of { (m:_) -> m + n }
-   Here we really want to inline 'bar' so that we can inline 'foo'
-   and the whole thing unravels as it should obviously do.  This is
-   important: in the NDP project, 'bar' generates a closure data
-   structure rather than a list.
-
-   So the non-inlining of lone_variables should only apply if the
-   unfolding is regarded as expandable; because that is when
-   exprIsConApp_maybe looks through the unfolding.  Hence the "&&
-   is_exp" in the CaseCtxt branch of interesting_call
-
- * Even a type application or coercion isn't a lone variable.
-   Consider
-        case $fMonadST @ RealWorld of { :DMonad a b c -> c }
-   We had better inline that sucker!  The case won't see through it.
-
-   For now, I'm treating treating a variable applied to types
-   in a *lazy* context "lone". The motivating example was
-        f = /\a. \x. BIG
-        g = /\a. \y.  h (f a)
-   There's no advantage in inlining f here, and perhaps
-   a significant disadvantage.  Hence some_val_args in the Stop case
-
-Note [Interaction of exprIsWorkFree and lone variables]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The lone-variable test says "don't inline if a case expression
-scrutinises a lone variable whose unfolding is cheap".  It's very
-important that, under these circumstances, exprIsConApp_maybe
-can spot a constructor application. So, for example, we don't
-consider
-        let x = e in (x,x)
-to be cheap, and that's good because exprIsConApp_maybe doesn't
-think that expression is a constructor application.
-
-In the 'not (lone_variable && is_wf)' test, I used to test is_value
-rather than is_wf, which was utterly wrong, because the above
-expression responds True to exprIsHNF, which is what sets is_value.
-
-This kind of thing can occur if you have
-
-        {-# INLINE foo #-}
-        foo = let x = e in (x,x)
-
-which Roman did.
-
-
--}
-
-computeDiscount :: [Int] -> Int -> [ArgSummary] -> CallCtxt
-                -> Int
-computeDiscount arg_discounts res_discount arg_infos cont_info
-
-  = 10          -- Discount of 10 because the result replaces the call
-                -- so we count 10 for the function itself
-
-    + 10 * length actual_arg_discounts
-               -- Discount of 10 for each arg supplied,
-               -- because the result replaces the call
-
-    + total_arg_discount + res_discount'
-  where
-    actual_arg_discounts = zipWith mk_arg_discount arg_discounts arg_infos
-    total_arg_discount   = sum actual_arg_discounts
-
-    mk_arg_discount _        TrivArg    = 0
-    mk_arg_discount _        NonTrivArg = 10
-    mk_arg_discount discount ValueArg   = discount
-
-    res_discount'
-      | LT <- arg_discounts `compareLength` arg_infos
-      = res_discount   -- Over-saturated
-      | otherwise
-      = case cont_info of
-           BoringCtxt  -> 0
-           CaseCtxt    -> res_discount  -- Presumably a constructor
-           ValAppCtxt  -> res_discount  -- Presumably a function
-           _           -> 40 `min` res_discount
-                -- ToDo: this 40 `min` res_discount doesn't seem right
-                --   for DiscArgCtxt it shouldn't matter because the function will
-                --       get the arg discount for any non-triv arg
-                --   for RuleArgCtxt we do want to be keener to inline; but not only
-                --       constructor results
-                --   for RhsCtxt I suppose that exposing a data con is good in general
-                --   And 40 seems very arbitrary
-                --
-                -- res_discount can be very large when a function returns
-                -- constructors; but we only want to invoke that large discount
-                -- when there's a case continuation.
-                -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
-                -- But we want to avoid inlining large functions that return
-                -- constructors into contexts that are simply "interesting"
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index ab320bfcac..2bb41c0fc3 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -325,6 +325,7 @@ Library
         GHC.Core.Opt.SetLevels
         GHC.Core.Opt.Simplify
         GHC.Core.Opt.Simplify.Env
+        GHC.Core.Opt.Simplify.Inline
         GHC.Core.Opt.Simplify.Iteration
         GHC.Core.Opt.Simplify.Monad
         GHC.Core.Opt.Simplify.Utils
diff --git a/testsuite/tests/count-deps/CountDepsAst.stdout b/testsuite/tests/count-deps/CountDepsAst.stdout
index c3b067c31e..384243cd93 100644
--- a/testsuite/tests/count-deps/CountDepsAst.stdout
+++ b/testsuite/tests/count-deps/CountDepsAst.stdout
@@ -45,6 +45,7 @@ GHC.Core.Opt.OccurAnal
 GHC.Core.Opt.Pipeline.Types
 GHC.Core.Opt.Simplify
 GHC.Core.Opt.Simplify.Env
+GHC.Core.Opt.Simplify.Inline
 GHC.Core.Opt.Simplify.Iteration
 GHC.Core.Opt.Simplify.Monad
 GHC.Core.Opt.Simplify.Utils
diff --git a/testsuite/tests/count-deps/CountDepsParser.stdout b/testsuite/tests/count-deps/CountDepsParser.stdout
index 8ff84fd40d..f00c74ce8d 100644
--- a/testsuite/tests/count-deps/CountDepsParser.stdout
+++ b/testsuite/tests/count-deps/CountDepsParser.stdout
@@ -45,6 +45,7 @@ GHC.Core.Opt.OccurAnal
 GHC.Core.Opt.Pipeline.Types
 GHC.Core.Opt.Simplify
 GHC.Core.Opt.Simplify.Env
+GHC.Core.Opt.Simplify.Inline
 GHC.Core.Opt.Simplify.Iteration
 GHC.Core.Opt.Simplify.Monad
 GHC.Core.Opt.Simplify.Utils
author	Andreas Klebinger <klebinger.andreas@gmx.at>	2022-10-02 15:16:33 +0200
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2023-01-16 20:48:19 -0500
commit	7a9a10423324864a2ce5d9c7e714a1045afd5153 (patch)
tree	0db8134b9cc8ec055d50b8b5df0dd889187e8375
parent	496607fdb77baf12e2fe263104ba5d0d700eee3b (diff)
download	haskell-7a9a10423324864a2ce5d9c7e714a1045afd5153.tar.gz