Reduce inlining in deeply-nested cases

This adds a new heuristic, controllable via two new flags to better tune inlining behaviour. The new flags are -funfolding-case-threshold and -funfolding-case-scaling which are document both in the user guide and in Note [Avoid inlining into deeply nested cases]. Co-authored-by: Andreas Klebinger <klebinger.andreas@gmx.at>
author: Simon Peyton Jones <simonpj@microsoft.com> 2020-12-16 12:57:17 +0000
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2021-02-09 16:21:40 -0500
commit: 891a791f3f2438e5d768e1f96119d275e58e8d37 (patch)
tree: 5a051da2d04b14ce92bdcb98a5c5ab84c9d344fb
parent: ab5fd982a7a501136cb8b90fa841c02cc9551b5a (diff)
download: haskell-891a791f3f2438e5d768e1f96119d275e58e8d37.tar.gz
15 files changed, 399 insertions, 45 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index 0cea6da0bf..4ca8985f8b 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -1882,7 +1882,7 @@ simplIdF env var cont
 
 completeCall :: SimplEnv -> OutId -> SimplCont -> SimplM (SimplFloats, OutExpr)
 completeCall env var cont
-  | Just expr <- callSiteInline dflags var active_unf
+  | Just expr <- callSiteInline dflags case_depth var active_unf
                                 lone_variable arg_infos interesting_cont
   -- Inline the variable's RHS
   = do { checkedTick (UnfoldingDone var)
@@ -1897,7 +1897,8 @@ completeCall env var cont
        ; rebuildCall env info cont }
 
   where
-    dflags = seDynFlags env
+    dflags     = seDynFlags env
+    case_depth = seCaseDepth env
     (lone_variable, arg_infos, call_cont) = contArgs cont
     n_val_args       = length arg_infos
     interesting_cont = interestingCallContext env call_cont
@@ -2724,9 +2725,11 @@ reallyRebuildCase env scrut case_bndr alts cont
        ; rebuild env case_expr cont }
 
   | otherwise
-  = do { (floats, cont') <- mkDupableCaseCont env alts cont
-       ; case_expr <- simplAlts (env `setInScopeFromF` floats)
-                                scrut (scaleIdBy holeScaling case_bndr) (scaleAltsBy holeScaling alts) cont'
+  = do { (floats, env', cont') <- mkDupableCaseCont env alts cont
+       ; case_expr <- simplAlts env' scrut
+                                (scaleIdBy holeScaling case_bndr)
+                                (scaleAltsBy holeScaling alts)
+                                cont'
        ; return (floats, case_expr) }
   where
     holeScaling = contHoleScaling cont
@@ -3234,10 +3237,15 @@ join points and inlining them away.  See #4930.
 
 --------------------
 mkDupableCaseCont :: SimplEnv -> [InAlt] -> SimplCont
-                  -> SimplM (SimplFloats, SimplCont)
+                  -> SimplM ( SimplFloats  -- Join points (if any)
+                            , SimplEnv     -- Use this for the alts
+                            , SimplCont)
 mkDupableCaseCont env alts cont
-  | altsWouldDup alts = mkDupableCont env cont
-  | otherwise         = return (emptyFloats env, cont)
+  | altsWouldDup alts = do { (floats, cont) <- mkDupableCont env cont
+                           ; let env' = bumpCaseDepth $
+                                        env `setInScopeFromF` floats
+                           ; return (floats, env', cont) }
+  | otherwise         = return (emptyFloats env, env, cont)
 
 altsWouldDup :: [InAlt] -> Bool -- True iff strictly > 1 non-bottom alternative
 altsWouldDup []  = False        -- See Note [Bottom alternatives]
@@ -3370,12 +3378,11 @@ mkDupableContWithDmds env _
         --              in case [...hole...] of { pi -> ji xij }
         -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable
     do  { tick (CaseOfCase case_bndr)
-        ; (floats, alt_cont) <- mkDupableCaseCont env alts cont
+        ; (floats, alt_env, alt_cont) <- mkDupableCaseCont (se `setInScopeFromE` env) alts cont
                 -- NB: We call mkDupableCaseCont here to make cont duplicable
                 --     (if necessary, depending on the number of alts)
                 -- And this is important: see Note [Fusing case continuations]
 
-        ; let alt_env = se `setInScopeFromF` floats
         ; let cont_scaling = contHoleScaling cont
           -- See Note [Scaling in case-of-case]
         ; (alt_env', case_bndr') <- simplBinder alt_env (scaleIdBy cont_scaling case_bndr)
diff --git a/compiler/GHC/Core/Opt/Simplify/Env.hs b/compiler/GHC/Core/Opt/Simplify/Env.hs
index 180e562c73..0d4e06f9c2 100644
--- a/compiler/GHC/Core/Opt/Simplify/Env.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Env.hs
@@ -14,7 +14,7 @@ module GHC.Core.Opt.Simplify.Env (
         SimplEnv(..), pprSimplEnv,   -- Temp not abstract
         mkSimplEnv, extendIdSubst,
         extendTvSubst, extendCvSubst,
-        zapSubstEnv, setSubstEnv,
+        zapSubstEnv, setSubstEnv, bumpCaseDepth,
         getInScope, setInScopeFromE, setInScopeFromF,
         setInScopeSet, modifyInScope, addNewInScopeIds,
         getSimplRules,
@@ -103,6 +103,8 @@ data SimplEnv
         -- The current set of in-scope variables
         -- They are all OutVars, and all bound in this module
       , seInScope   :: InScopeSet       -- OutVars only
+
+      , seCaseDepth :: !Int  -- Depth of multi-branch case alternatives
     }
 
 data SimplFloats
@@ -272,11 +274,12 @@ points we're substituting. -}
 
 mkSimplEnv :: SimplMode -> SimplEnv
 mkSimplEnv mode
-  = SimplEnv { seMode = mode
-             , seInScope = init_in_scope
-             , seTvSubst = emptyVarEnv
-             , seCvSubst = emptyVarEnv
-             , seIdSubst = emptyVarEnv }
+  = SimplEnv { seMode      = mode
+             , seInScope   = init_in_scope
+             , seTvSubst   = emptyVarEnv
+             , seCvSubst   = emptyVarEnv
+             , seIdSubst   = emptyVarEnv
+             , seCaseDepth = 0 }
         -- The top level "enclosing CC" is "SUBSUMED".
 
 init_in_scope :: InScopeSet
@@ -319,6 +322,9 @@ setMode mode env = env { seMode = mode }
 updMode :: (SimplMode -> SimplMode) -> SimplEnv -> SimplEnv
 updMode upd env = env { seMode = upd (seMode env) }
 
+bumpCaseDepth :: SimplEnv -> SimplEnv
+bumpCaseDepth env = env { seCaseDepth = seCaseDepth env + 1 }
+
 ---------------------
 extendIdSubst :: SimplEnv -> Id -> SimplSR -> SimplEnv
 extendIdSubst env@(SimplEnv {seIdSubst = subst}) var res
diff --git a/compiler/GHC/Core/Opt/Simplify/Monad.hs b/compiler/GHC/Core/Opt/Simplify/Monad.hs
index 0130fcf61e..d1e27f9fca 100644
--- a/compiler/GHC/Core/Opt/Simplify/Monad.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Monad.hs
@@ -254,8 +254,13 @@ checkedTick t
       [ text "When trying" <+> ppr t
       , text "To increase the limit, use -fsimpl-tick-factor=N (default 100)."
       , space
-      , text "If you need to increase the limit substantially, please file a"
-      , text "bug report and indicate the factor you needed."
+      , text "In addition try adjusting -funfolding-case-threshold=N and"
+      , text "-funfolding-case-scaling=N for the module in question."
+      , text "Using threshold=1 and scaling=5 should break most inlining loops."
+      , space
+      , text "If you need to increase the tick factor substantially, while also"
+      , text "adjusting unfolding parameters please file a bug report and"
+      , text "indicate the factor you needed."
       , space
       , text "If GHC was unable to complete compilation even"
                <+> text "with a very large factor"
diff --git a/compiler/GHC/Core/Unfold.hs b/compiler/GHC/Core/Unfold.hs
index b97e10cd46..8a61eec3c7 100644
--- a/compiler/GHC/Core/Unfold.hs
+++ b/compiler/GHC/Core/Unfold.hs
@@ -26,7 +26,7 @@ module GHC.Core.Unfold (
         UnfoldingOpts (..), defaultUnfoldingOpts,
         updateCreationThreshold, updateUseThreshold,
         updateFunAppDiscount, updateDictDiscount,
-        updateVeryAggressive,
+        updateVeryAggressive, updateCaseScaling, updateCaseThreshold,
 
         ArgSummary(..),
 
@@ -82,6 +82,12 @@ data UnfoldingOpts = UnfoldingOpts
 
    , unfoldingVeryAggressive :: !Bool
       -- ^ Force inlining in many more cases
+
+      -- Don't consider depth up to x
+   , unfoldingCaseThreshold :: !Int
+
+      -- Penalize depth with 1/x
+   , unfoldingCaseScaling :: !Int
    }
 
 defaultUnfoldingOpts :: UnfoldingOpts
@@ -106,6 +112,13 @@ defaultUnfoldingOpts = UnfoldingOpts
       -- we'll be able to pick the right method from a dictionary
 
    , unfoldingVeryAggressive = False
+
+      -- Only apply scaling once we are deeper than threshold cases
+      -- in an RHS.
+   , unfoldingCaseThreshold = 2
+
+      -- Penalize depth with (size*depth)/scaling
+   , unfoldingCaseScaling = 30
    }
 
 -- Helpers for "GHC.Driver.Session"
@@ -125,6 +138,13 @@ updateDictDiscount n opts = opts { unfoldingDictDiscount = n }
 updateVeryAggressive :: Bool -> UnfoldingOpts -> UnfoldingOpts
 updateVeryAggressive n opts = opts { unfoldingVeryAggressive = n }
 
+
+updateCaseThreshold :: Int -> UnfoldingOpts -> UnfoldingOpts
+updateCaseThreshold n opts = opts { unfoldingCaseThreshold = n }
+
+updateCaseScaling :: Int -> UnfoldingOpts -> UnfoldingOpts
+updateCaseScaling n opts = opts { unfoldingCaseScaling = n }
+
 {-
 Note [Occurrence analysis of unfoldings]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1033,6 +1053,7 @@ StrictAnal.addStrictnessInfoToTopId
 -}
 
 callSiteInline :: DynFlags
+               -> Int                   -- Case depth
                -> Id                    -- The Id
                -> Bool                  -- True <=> unfolding is active
                -> Bool                  -- True if there are no arguments at all (incl type args)
@@ -1075,7 +1096,7 @@ instance Outputable CallCtxt where
   ppr DiscArgCtxt = text "DiscArgCtxt"
   ppr RuleArgCtxt = text "RuleArgCtxt"
 
-callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
+callSiteInline dflags !case_depth id active_unfolding lone_variable arg_infos cont_info
   = case idUnfolding id of
       -- idUnfolding checks for loop-breakers, returning NoUnfolding
       -- Things with an INLINE pragma may have an unfolding *and*
@@ -1083,7 +1104,7 @@ callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
         CoreUnfolding { uf_tmpl = unf_template
                       , uf_is_work_free = is_wf
                       , uf_guidance = guidance, uf_expandable = is_exp }
-          | active_unfolding -> tryUnfolding dflags id lone_variable
+          | active_unfolding -> tryUnfolding dflags case_depth id lone_variable
                                     arg_infos cont_info unf_template
                                     is_wf is_exp guidance
           | otherwise -> traceInline dflags id "Inactive unfolding:" (ppr id) Nothing
@@ -1110,10 +1131,106 @@ traceInline dflags inline_id str doc result
       = False
 {-# INLINE traceInline #-} -- see Note [INLINE conditional tracing utilities]
 
-tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt
+{- Note [Avoid inlining into deeply nested cases]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Consider a function f like this:
+
+  f arg1 arg2 =
+    case ...
+      ... -> g arg1
+      ... -> g arg2
+
+This function is small. So should be safe to inline.
+However sometimes this doesn't quite work out like that.
+Consider this code:
+
+f1 arg1 arg2 ... = ...
+    case _foo of
+      alt1 -> ... f2 arg1 ...
+      alt2 -> ... f2 arg2 ...
+
+f2 arg1 arg2 ... = ...
+    case _foo of
+      alt1 -> ... f3 arg1 ...
+      alt2 -> ... f3 arg2 ...
+
+f3 arg1 arg2 ... = ...
+
+... repeats up to n times. And then f1 is
+applied to some arguments:
+
+foo = ... f1 <interestingArgs> ...
+
+Initially f2..fn are not interesting to inline so we don't.
+However we see that f1 is applied to interesting args.
+So it's an obvious choice to inline those:
+
+foo =
+    ...
+      case _foo of
+        alt1 -> ... f2 <interestingArg> ...
+        alt2 -> ... f2 <interestingArg> ...
+
+As a result we go and inline f2 both mentions of f2 in turn are now applied to interesting
+arguments and f2 is small:
+
+foo =
+    ...
+      case _foo of
+        alt1 -> ... case _foo of
+            alt1 -> ... f3 <interestingArg> ...
+            alt2 -> ... f3 <interestingArg> ...
+
+        alt2 -> ... case _foo of
+            alt1 -> ... f3 <interestingArg> ...
+            alt2 -> ... f3 <interestingArg> ...
+
+The same thing happens for each binding up to f_n, duplicating the amount of inlining
+done in each step. Until at some point we are either done or run out of simplifier
+ticks/RAM. This pattern happened #18730.
+
+To combat this we introduce one more heuristic when weighing inlining decision.
+We keep track of a "case-depth". Which increases each time we look inside a case
+expression with more than one alternative.
+
+We then apply a penalty to inlinings based on the case-depth at which they would
+be inlined. Bounding the number of inlinings in such a scenario.
+
+The heuristic can be tuned in two ways:
+
+* We can ignore the first n levels of case nestings for inlining decisions using
+  -funfolding-case-threshold.
+* The penalty grows linear with the depth. It's computed as size*(depth-threshold)/scaling.
+  Scaling can be set with -funfolding-case-scaling.
+
+Some guidance on setting these defaults:
+
+* A low treshold (<= 2) is needed to prevent exponential cases from spiraling out of
+  control. We picked 2 for no particular reason.
+* Scaling the penalty by any more than 30 means the reproducer from
+  T18730 won't compile even with reasonably small values of n. Instead
+  it will run out of runs/ticks. This means to positively affect the reproducer
+  a scaling <= 30 is required.
+* A scaling of >= 15 still causes a few very large regressions on some nofib benchmarks.
+  (+80% for gc/fulsom, +90% for real/ben-raytrace, +20% for spectral/fibheaps)
+* A scaling of >= 25 showed no regressions on nofib. However it showed a number of
+  (small) regression for compiler perf benchmarks.
+
+The end result is that we are settling for a scaling of 30, with a threshold of 2.
+This gives us minimal compiler perf regressions. No nofib runtime regressions and
+will still avoid this pattern sometimes. This is a "safe" default, where we err on
+the side of compiler blowup instead of risking runtime regressions.
+
+For cases where the default falls short the flag can be changed to allow more/less inlining as
+needed on a per-module basis.
+
+-}
+
+tryUnfolding :: DynFlags -> Int -> Id -> Bool -> [ArgSummary] -> CallCtxt
              -> CoreExpr -> Bool -> Bool -> UnfoldingGuidance
              -> Maybe CoreExpr
-tryUnfolding dflags id lone_variable
+tryUnfolding dflags !case_depth id lone_variable
              arg_infos cont_info unf_template
              is_wf is_exp guidance
  = case guidance of
@@ -1138,9 +1255,16 @@ tryUnfolding dflags id lone_variable
         -> traceInline dflags id str (mk_doc some_benefit extra_doc False) Nothing
         where
           some_benefit = calc_some_benefit (length arg_discounts)
-          extra_doc = text "discounted size =" <+> int discounted_size
-          discounted_size = size - discount
-          small_enough = discounted_size <= unfoldingUseThreshold uf_opts
+          extra_doc = vcat [ text "case depth =" <+> int case_depth
+                           , text "depth based penalty =" <+> int depth_penalty
+                           , text "discounted size =" <+> int adjusted_size ]
+          -- See Note [Avoid inlining into deeply nested cases]
+          depth_treshold = unfoldingCaseThreshold uf_opts
+          depth_scaling = unfoldingCaseScaling uf_opts
+          depth_penalty | case_depth <= depth_treshold = 0
+                        | otherwise       = (size * (case_depth - depth_treshold)) `div` depth_scaling
+          adjusted_size = size + depth_penalty - discount
+          small_enough = adjusted_size <= unfoldingUseThreshold uf_opts
           discount = computeDiscount arg_discounts res_discount arg_infos cont_info
 
   where
diff --git a/compiler/GHC/Core/Unfold.hs-boot b/compiler/GHC/Core/Unfold.hs-boot
index b86f8b2585..c62f1915c9 100644
--- a/compiler/GHC/Core/Unfold.hs-boot
+++ b/compiler/GHC/Core/Unfold.hs-boot
@@ -11,3 +11,5 @@ updateUseThreshold      :: Int -> UnfoldingOpts -> UnfoldingOpts
 updateFunAppDiscount    :: Int -> UnfoldingOpts -> UnfoldingOpts
 updateDictDiscount      :: Int -> UnfoldingOpts -> UnfoldingOpts
 updateVeryAggressive    :: Bool -> UnfoldingOpts -> UnfoldingOpts
+updateCaseThreshold     :: Int -> UnfoldingOpts -> UnfoldingOpts
+updateCaseScaling       :: Int -> UnfoldingOpts -> UnfoldingOpts
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index 85f1b71852..cee4ba692b 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -2845,6 +2845,11 @@ dynamic_flags_deps = [
   , make_ord_flag defFlag "funfolding-dict-discount"
       (intSuffix   (\n d -> d { unfoldingOpts = updateDictDiscount n (unfoldingOpts d)}))
 
+  , make_ord_flag defFlag "funfolding-case-threshold"
+      (intSuffix   (\n d -> d { unfoldingOpts = updateCaseThreshold n (unfoldingOpts d)}))
+  , make_ord_flag defFlag "funfolding-case-scaling"
+      (intSuffix   (\n d -> d { unfoldingOpts = updateCaseScaling n (unfoldingOpts d)}))
+
   , make_dep_flag defFlag "funfolding-keeness-factor"
       (floatSuffix (\_ d -> d))
       "-funfolding-keeness-factor is no longer respected as of GHC 9.0"
diff --git a/docs/users_guide/hints.rst b/docs/users_guide/hints.rst
index cdec6a4f4c..5cb171ff69 100644
--- a/docs/users_guide/hints.rst
+++ b/docs/users_guide/hints.rst
@@ -368,3 +368,70 @@ discussed in the previous section. Strict functions get right down to
 business, rather than filling up the heap with closures (the system's
 notes to itself about how to evaluate something, should it eventually be
 required).
+
+.. _control-inlining:
+
+Controlling inlining via optimisation flags.
+--------------------------------------------
+
+.. index::
+    single: inlining, controlling
+    single: unfolding, controlling
+
+Inlining is one of the major optimizations GHC performs. Partially
+because inlining often allows other optimizations to be triggered.
+Sadly this is also a double edged sword. While inlining can often
+cut through runtime overheads this usually comes at the cost
+of not just program size, but also compiler performance. In
+extreme cases making it impossible to compile certain code.
+
+For this reason GHC offers various ways to tune inlining
+behaviour.
+
+Unfolding creation
+~~~~~~~~~~~~~~~~~~
+
+In order for a function from a different module to be inlined
+GHC requires the functions unfolding. The following flags can
+be used to control unfolding creation. Making their creation more
+or less likely:
+
+* :ghc-flag:`-fexpose-all-unfoldings`
+* :ghc-flag:`-funfolding-creation-threshold=⟨n⟩`
+
+Inlining decisions
+~~~~~~~~~~~~~~~~~~
+
+If a unfolding is available the following flags can impact GHC's
+decision about inlining a specific binding.
+
+* :ghc-flag:`-funfolding-use-threshold=⟨n⟩`
+* :ghc-flag:`-funfolding-case-threshold=⟨n⟩`
+* :ghc-flag:`-funfolding-case-scaling=⟨n⟩`
+* :ghc-flag:`-funfolding-dict-discount=⟨n⟩`
+* :ghc-flag:`-funfolding-fun-discount=⟨n⟩`
+
+Should the simplifier run out of ticks because of a inlining loop
+users are encouraged to try decreasing :ghc-flag:`-funfolding-case-threshold=⟨n⟩`
+or :ghc-flag:`-funfolding-case-scaling=⟨n⟩` to limit inlining into
+deeply nested expressions while allowing a higher tick factor.
+
+The defaults of these are tuned such that we don't expect regressions for most
+user programs. Using a :ghc-flag:`-funfolding-case-threshold=⟨n⟩` of 1-2 with a
+:ghc-flag:`-funfolding-case-scaling=⟨n⟩` of 15-25 can cause usually small runtime
+regressions but will prevent most inlining loops from getting out of control.
+
+In extreme cases lowering scaling and treshold further can be useful, but at that
+point it's very likely that beneficial inlining is prevented as well resulting
+in significant runtime regressions.
+
+In such cases it's recommended to move the problematic piece of code into it's own
+module and changing inline parameters for the offending module only.
+
+Inlining generics
+~~~~~~~~~~~~~~~~~
+
+There are also flags specific to the inlining of generics:
+
+:ghc-flag:`-finline-generics`
+:ghc-flag:`-finline-generics-aggressively`
diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst
index a53fa21802..a6e5a60ffb 100644
--- a/docs/users_guide/using-optimisation.rst
+++ b/docs/users_guide/using-optimisation.rst
@@ -1423,6 +1423,73 @@ by saying ``-fno-wombat``.
     determines if a function definition will be kept around at all for
     potential inlining.
 
+.. ghc-flag:: -funfolding-case-threshold=⟨n⟩
+    :shortdesc: *default: 2.* Reduce inlining for cases nested deeper than n.
+    :type: dynamic
+    :category:
+
+    :default: 2
+
+    .. index::
+       single: inlining, controlling
+       single: unfolding, controlling
+
+    GHC is in general quite eager to inline small functions. However sometimes
+    these functions will be expanded by more inlining after inlining. Since
+    they are now applied to "interesting" arguments. Even worse, their expanded
+    form might reference again a small function, which will be inlined and expanded
+    afterwards. This can repeat often and lead to explosive growth of programs.
+
+    As it happened in #18730.
+
+    Starting with GHC 9.0 we will be less eager to inline deep into nested cases.
+    We achieve this by applying a inlining penalty that increases as the nesting
+    gets deeper. However sometimes a specific (maybe quite high!) threshold of nesting
+    is to be expected.
+
+    In such cases this flag can be used to ignore the first ⟨n⟩ levels of nesting
+    when computing the penalty.
+
+    This flag in combination with :ghc-flag:`-funfolding-case-scaling=⟨n⟩` can
+    be used to break inlining loops without disabling inlining completely. For
+    this purpose a smaller value is more likely to break such loops although
+    often adjusting the scaling is enough and preferably.
+
+.. ghc-flag:: -funfolding-case-scaling=⟨n⟩
+    :shortdesc: *default: 30.* Apply a penalty of (inlining_cost * `1/n`) for each level of case nesting.
+    :type: dynamic
+    :category:
+
+    :default: 30
+
+    .. index::
+       single: inlining, controlling
+       single: unfolding, controlling
+
+    GHC is in general quite eager to inline small functions. However sometimes
+    these functions will be expanded by more inlining after inlining. Since
+    they are now applied to "interesting" arguments. Even worse, their expanded
+    form might reference again a small function, which will be inlined and expanded
+    afterwards. This can repeat often and lead to explosive growth of programs.
+
+    As it happened in #18730.
+
+    Starting with GHC 9.0 we will be less eager to inline deep into nested cases.
+    We achieve this by applying a inlining penalty that increases as the nesting
+    gets deeper. However sometimes we are ok with inlining a lot in the name of
+    performance.
+
+    In such cases this flag can be used to tune how hard we penalize inlining into
+    deeply nested cases beyond the threshold set by :ghc-flag:`-funfolding-case-threshold=⟨n⟩`.
+    Cases are only counted against the nesting level if they have more than one alternative.
+
+    We use 1/n to scale the penalty. That is a higher value gives a lower penalty.
+
+    This can be used to break inlining loops. For this purpose a lower value is
+    recommended. Values in the range 10 <= n <= 20 allow some inlining to take place
+    while still allowing GHC to compile modules containing such inlining loops.
+
+
 .. ghc-flag:: -fworker-wrapper
     :shortdesc: Enable the worker/wrapper transformation.
     :type: dynamic
@@ -1455,3 +1522,5 @@ by saying ``-fno-wombat``.
     This flag sets the size (in bytes) threshold above which the second approach
     is used. You can disable the second approach entirely by setting the
     threshold to 0.
+
+
diff --git a/testsuite/tests/dependent/should_compile/all.T b/testsuite/tests/dependent/should_compile/all.T
index cf5c76d380..a368edd128 100644
--- a/testsuite/tests/dependent/should_compile/all.T
+++ b/testsuite/tests/dependent/should_compile/all.T
@@ -10,14 +10,18 @@ test('RaeBlogPost', normal, compile, [''])
 test('mkGADTVars', normal, compile, [''])
 test('TypeLevelVec',normal,compile, [''])
 test('T9632', normal, compile, [''])
-# The dynamic-paper test fails in the profasm way if we don't increase
+
+# dynamic-paper used to run out of simplfier ticks because of
+# infinite inlining, but the new case-depth mechanism cuts that off,
+# so it now compiles fine.
+#
+# Historical notes: The dynamic-paper test fails in the profasm way if we don't increase
 # the simplifier tick limit. If we do, we run out of stack
 # space. If we increase the stack size enough with -K,
 # we run out of simplifier ticks again. This is
 # discussed in #11330.
-test('dynamic-paper',
-     expect_broken_for(11330, ['profasm']),
-     compile_fail, [''])
+test('dynamic-paper', normal, compile, [''])
+
 test('T11311', normal, compile, [''])
 test('T11405', normal, compile, [''])
 test('T11241', normal, compile, [''])
diff --git a/testsuite/tests/dependent/should_compile/dynamic-paper.stderr b/testsuite/tests/dependent/should_compile/dynamic-paper.stderr
deleted file mode 100644
index b05335047f..0000000000
--- a/testsuite/tests/dependent/should_compile/dynamic-paper.stderr
+++ /dev/null
@@ -1,15 +0,0 @@
-Simplifier ticks exhausted
-  When trying UnfoldingDone delta1
-  To increase the limit, use -fsimpl-tick-factor=N (default 100).
-   
-  If you need to increase the limit substantially, please file a
-  bug report and indicate the factor you needed.
-   
-  If GHC was unable to complete compilation even with a very large factor
-  (a thousand or more), please consult the "Known bugs or infelicities"
-  section in the Users Guide before filing a report. There are a
-  few situations unlikely to occur in practical programs for which
-  simplifier non-termination has been judged acceptable.
-   
-  To see detailed counts use -ddump-simpl-stats
-  Total ticks: 140801
diff --git a/testsuite/tests/driver/inline-check.stderr b/testsuite/tests/driver/inline-check.stderr
index 5bf9edaf24..953e101315 100644
--- a/testsuite/tests/driver/inline-check.stderr
+++ b/testsuite/tests/driver/inline-check.stderr
@@ -5,6 +5,8 @@ Considering inlining: foo
   is exp: True
   is work-free: True
   guidance IF_ARGS [0] 30 0
+  case depth = 0
+  depth based penalty = 0
   discounted size = 10
   ANSWER = YES
 Inactive unfolding: foo1
diff --git a/testsuite/tests/simplCore/should_compile/T18730.hs b/testsuite/tests/simplCore/should_compile/T18730.hs
new file mode 100644
index 0000000000..87cd1819d8
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T18730.hs
@@ -0,0 +1,26 @@
+{-# LANGUAGE TupleSections #-}
+{-# OPTIONS_GHC -funfolding-case-scaling=5 #-}
+
+module T18730 where
+
+import T18730_A (Gen)
+
+genFields :: Gen [(String, Int)]
+genFields =
+  mapM
+    (\(f, g) -> (f,) <$> g)
+    [ ("field", genIntField)
+    , ("field_10", genIntField)
+    , ("field_10", genIntField)
+    , ("field_10", genIntField)
+    , ("field_10", genIntField)
+    , ("field_10", genIntField)
+    , ("field_10", genIntField)
+    , ("field_10", genIntField)
+    , ("field_10", genIntField)
+    , ("field_10", genIntField)
+    , ("field_10", genIntField)
+    ]
+
+genIntField :: Gen Int
+genIntField = pure 0
diff --git a/testsuite/tests/simplCore/should_compile/T18730.stderr b/testsuite/tests/simplCore/should_compile/T18730.stderr
new file mode 100644
index 0000000000..2b9a11ea07
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T18730.stderr
@@ -0,0 +1 @@
+[1 of 1] Compiling T18730_A         ( T18730_A.hs, T18730_A.o )
diff --git a/testsuite/tests/simplCore/should_compile/T18730_A.hs b/testsuite/tests/simplCore/should_compile/T18730_A.hs
new file mode 100644
index 0000000000..c076956b43
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T18730_A.hs
@@ -0,0 +1,50 @@
+module T18730_A where
+
+import Control.Monad (ap)
+import Data.Word
+import Data.Bits
+
+newtype Gen a = MkGen
+  { -- | Run the generator on a particular seed.
+    -- If you just want to get a random value out, consider using 'generate'.
+    unGen :: QCGen -> Int -> a
+  }
+
+instance Functor Gen where
+  fmap f (MkGen h) =
+    MkGen (\r n -> f (h r n))
+
+instance Applicative Gen where
+  pure x =
+    MkGen (\_ _ -> x)
+  (<*>) = ap
+
+instance Monad Gen where
+  return = pure
+
+  MkGen m >>= k =
+    MkGen
+      ( \r n ->
+          case split r of
+            (r1, r2) ->
+              let MkGen m' = k (m r1 n)
+               in m' r2 n
+      )
+
+  (>>) = (*>)
+
+data QCGen = QCGen !Word64 !Word64
+
+split :: QCGen -> (QCGen, QCGen)
+split (QCGen seed gamma) =
+    (QCGen seed'' gamma, QCGen seed' (mixGamma seed''))
+  where
+    seed'  = seed + gamma
+    seed'' = seed' + gamma
+
+-- This piece appears to be critical
+mixGamma :: Word64 -> Word64
+mixGamma z0 =
+    if z0 >= 24
+        then z0
+        else z0 `xor` 0xaaaaaaaaaaaaaaaa
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index d62a7ce0e6..e892ad7194 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -338,6 +338,7 @@ test('T18603', normal, compile, ['-dcore-lint -O'])
 # T18649 should /not/ generate a specialisation rule
 test('T18649', normal, compile, ['-O -ddump-rules -Wno-simplifiable-class-constraints'])
 
+test('T18730', normal, multimod_compile, ['T18730_A', '-dcore-lint -O'])
 test('T18747A', normal, compile, [''])
 test('T18747B', normal, compile, [''])
 test('T18815', only_ways(['optasm']), makefile_test, ['T18815'])
author	Simon Peyton Jones <simonpj@microsoft.com>	2020-12-16 12:57:17 +0000
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2021-02-09 16:21:40 -0500
commit	891a791f3f2438e5d768e1f96119d275e58e8d37 (patch)
tree	5a051da2d04b14ce92bdcb98a5c5ab84c9d344fb
parent	ab5fd982a7a501136cb8b90fa841c02cc9551b5a (diff)
download	haskell-891a791f3f2438e5d768e1f96119d275e58e8d37.tar.gz