diff options
author | Simon Peyton Jones <simonpj@microsoft.com> | 2020-12-16 12:57:17 +0000 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-02-09 16:21:40 -0500 |
commit | 891a791f3f2438e5d768e1f96119d275e58e8d37 (patch) | |
tree | 5a051da2d04b14ce92bdcb98a5c5ab84c9d344fb /compiler/GHC/Core | |
parent | ab5fd982a7a501136cb8b90fa841c02cc9551b5a (diff) | |
download | haskell-891a791f3f2438e5d768e1f96119d275e58e8d37.tar.gz |
Reduce inlining in deeply-nested cases
This adds a new heuristic, controllable via two new flags to
better tune inlining behaviour.
The new flags are -funfolding-case-threshold and
-funfolding-case-scaling which are document both
in the user guide and in
Note [Avoid inlining into deeply nested cases].
Co-authored-by: Andreas Klebinger <klebinger.andreas@gmx.at>
Diffstat (limited to 'compiler/GHC/Core')
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify.hs | 27 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Env.hs | 18 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Monad.hs | 9 | ||||
-rw-r--r-- | compiler/GHC/Core/Unfold.hs | 140 | ||||
-rw-r--r-- | compiler/GHC/Core/Unfold.hs-boot | 2 |
5 files changed, 170 insertions, 26 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs index 0cea6da0bf..4ca8985f8b 100644 --- a/compiler/GHC/Core/Opt/Simplify.hs +++ b/compiler/GHC/Core/Opt/Simplify.hs @@ -1882,7 +1882,7 @@ simplIdF env var cont completeCall :: SimplEnv -> OutId -> SimplCont -> SimplM (SimplFloats, OutExpr) completeCall env var cont - | Just expr <- callSiteInline dflags var active_unf + | Just expr <- callSiteInline dflags case_depth var active_unf lone_variable arg_infos interesting_cont -- Inline the variable's RHS = do { checkedTick (UnfoldingDone var) @@ -1897,7 +1897,8 @@ completeCall env var cont ; rebuildCall env info cont } where - dflags = seDynFlags env + dflags = seDynFlags env + case_depth = seCaseDepth env (lone_variable, arg_infos, call_cont) = contArgs cont n_val_args = length arg_infos interesting_cont = interestingCallContext env call_cont @@ -2724,9 +2725,11 @@ reallyRebuildCase env scrut case_bndr alts cont ; rebuild env case_expr cont } | otherwise - = do { (floats, cont') <- mkDupableCaseCont env alts cont - ; case_expr <- simplAlts (env `setInScopeFromF` floats) - scrut (scaleIdBy holeScaling case_bndr) (scaleAltsBy holeScaling alts) cont' + = do { (floats, env', cont') <- mkDupableCaseCont env alts cont + ; case_expr <- simplAlts env' scrut + (scaleIdBy holeScaling case_bndr) + (scaleAltsBy holeScaling alts) + cont' ; return (floats, case_expr) } where holeScaling = contHoleScaling cont @@ -3234,10 +3237,15 @@ join points and inlining them away. See #4930. -------------------- mkDupableCaseCont :: SimplEnv -> [InAlt] -> SimplCont - -> SimplM (SimplFloats, SimplCont) + -> SimplM ( SimplFloats -- Join points (if any) + , SimplEnv -- Use this for the alts + , SimplCont) mkDupableCaseCont env alts cont - | altsWouldDup alts = mkDupableCont env cont - | otherwise = return (emptyFloats env, cont) + | altsWouldDup alts = do { (floats, cont) <- mkDupableCont env cont + ; let env' = bumpCaseDepth $ + env `setInScopeFromF` floats + ; return (floats, env', cont) } + | otherwise = return (emptyFloats env, env, cont) altsWouldDup :: [InAlt] -> Bool -- True iff strictly > 1 non-bottom alternative altsWouldDup [] = False -- See Note [Bottom alternatives] @@ -3370,12 +3378,11 @@ mkDupableContWithDmds env _ -- in case [...hole...] of { pi -> ji xij } -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable do { tick (CaseOfCase case_bndr) - ; (floats, alt_cont) <- mkDupableCaseCont env alts cont + ; (floats, alt_env, alt_cont) <- mkDupableCaseCont (se `setInScopeFromE` env) alts cont -- NB: We call mkDupableCaseCont here to make cont duplicable -- (if necessary, depending on the number of alts) -- And this is important: see Note [Fusing case continuations] - ; let alt_env = se `setInScopeFromF` floats ; let cont_scaling = contHoleScaling cont -- See Note [Scaling in case-of-case] ; (alt_env', case_bndr') <- simplBinder alt_env (scaleIdBy cont_scaling case_bndr) diff --git a/compiler/GHC/Core/Opt/Simplify/Env.hs b/compiler/GHC/Core/Opt/Simplify/Env.hs index 180e562c73..0d4e06f9c2 100644 --- a/compiler/GHC/Core/Opt/Simplify/Env.hs +++ b/compiler/GHC/Core/Opt/Simplify/Env.hs @@ -14,7 +14,7 @@ module GHC.Core.Opt.Simplify.Env ( SimplEnv(..), pprSimplEnv, -- Temp not abstract mkSimplEnv, extendIdSubst, extendTvSubst, extendCvSubst, - zapSubstEnv, setSubstEnv, + zapSubstEnv, setSubstEnv, bumpCaseDepth, getInScope, setInScopeFromE, setInScopeFromF, setInScopeSet, modifyInScope, addNewInScopeIds, getSimplRules, @@ -103,6 +103,8 @@ data SimplEnv -- The current set of in-scope variables -- They are all OutVars, and all bound in this module , seInScope :: InScopeSet -- OutVars only + + , seCaseDepth :: !Int -- Depth of multi-branch case alternatives } data SimplFloats @@ -272,11 +274,12 @@ points we're substituting. -} mkSimplEnv :: SimplMode -> SimplEnv mkSimplEnv mode - = SimplEnv { seMode = mode - , seInScope = init_in_scope - , seTvSubst = emptyVarEnv - , seCvSubst = emptyVarEnv - , seIdSubst = emptyVarEnv } + = SimplEnv { seMode = mode + , seInScope = init_in_scope + , seTvSubst = emptyVarEnv + , seCvSubst = emptyVarEnv + , seIdSubst = emptyVarEnv + , seCaseDepth = 0 } -- The top level "enclosing CC" is "SUBSUMED". init_in_scope :: InScopeSet @@ -319,6 +322,9 @@ setMode mode env = env { seMode = mode } updMode :: (SimplMode -> SimplMode) -> SimplEnv -> SimplEnv updMode upd env = env { seMode = upd (seMode env) } +bumpCaseDepth :: SimplEnv -> SimplEnv +bumpCaseDepth env = env { seCaseDepth = seCaseDepth env + 1 } + --------------------- extendIdSubst :: SimplEnv -> Id -> SimplSR -> SimplEnv extendIdSubst env@(SimplEnv {seIdSubst = subst}) var res diff --git a/compiler/GHC/Core/Opt/Simplify/Monad.hs b/compiler/GHC/Core/Opt/Simplify/Monad.hs index 0130fcf61e..d1e27f9fca 100644 --- a/compiler/GHC/Core/Opt/Simplify/Monad.hs +++ b/compiler/GHC/Core/Opt/Simplify/Monad.hs @@ -254,8 +254,13 @@ checkedTick t [ text "When trying" <+> ppr t , text "To increase the limit, use -fsimpl-tick-factor=N (default 100)." , space - , text "If you need to increase the limit substantially, please file a" - , text "bug report and indicate the factor you needed." + , text "In addition try adjusting -funfolding-case-threshold=N and" + , text "-funfolding-case-scaling=N for the module in question." + , text "Using threshold=1 and scaling=5 should break most inlining loops." + , space + , text "If you need to increase the tick factor substantially, while also" + , text "adjusting unfolding parameters please file a bug report and" + , text "indicate the factor you needed." , space , text "If GHC was unable to complete compilation even" <+> text "with a very large factor" diff --git a/compiler/GHC/Core/Unfold.hs b/compiler/GHC/Core/Unfold.hs index b97e10cd46..8a61eec3c7 100644 --- a/compiler/GHC/Core/Unfold.hs +++ b/compiler/GHC/Core/Unfold.hs @@ -26,7 +26,7 @@ module GHC.Core.Unfold ( UnfoldingOpts (..), defaultUnfoldingOpts, updateCreationThreshold, updateUseThreshold, updateFunAppDiscount, updateDictDiscount, - updateVeryAggressive, + updateVeryAggressive, updateCaseScaling, updateCaseThreshold, ArgSummary(..), @@ -82,6 +82,12 @@ data UnfoldingOpts = UnfoldingOpts , unfoldingVeryAggressive :: !Bool -- ^ Force inlining in many more cases + + -- Don't consider depth up to x + , unfoldingCaseThreshold :: !Int + + -- Penalize depth with 1/x + , unfoldingCaseScaling :: !Int } defaultUnfoldingOpts :: UnfoldingOpts @@ -106,6 +112,13 @@ defaultUnfoldingOpts = UnfoldingOpts -- we'll be able to pick the right method from a dictionary , unfoldingVeryAggressive = False + + -- Only apply scaling once we are deeper than threshold cases + -- in an RHS. + , unfoldingCaseThreshold = 2 + + -- Penalize depth with (size*depth)/scaling + , unfoldingCaseScaling = 30 } -- Helpers for "GHC.Driver.Session" @@ -125,6 +138,13 @@ updateDictDiscount n opts = opts { unfoldingDictDiscount = n } updateVeryAggressive :: Bool -> UnfoldingOpts -> UnfoldingOpts updateVeryAggressive n opts = opts { unfoldingVeryAggressive = n } + +updateCaseThreshold :: Int -> UnfoldingOpts -> UnfoldingOpts +updateCaseThreshold n opts = opts { unfoldingCaseThreshold = n } + +updateCaseScaling :: Int -> UnfoldingOpts -> UnfoldingOpts +updateCaseScaling n opts = opts { unfoldingCaseScaling = n } + {- Note [Occurrence analysis of unfoldings] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1033,6 +1053,7 @@ StrictAnal.addStrictnessInfoToTopId -} callSiteInline :: DynFlags + -> Int -- Case depth -> Id -- The Id -> Bool -- True <=> unfolding is active -> Bool -- True if there are no arguments at all (incl type args) @@ -1075,7 +1096,7 @@ instance Outputable CallCtxt where ppr DiscArgCtxt = text "DiscArgCtxt" ppr RuleArgCtxt = text "RuleArgCtxt" -callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info +callSiteInline dflags !case_depth id active_unfolding lone_variable arg_infos cont_info = case idUnfolding id of -- idUnfolding checks for loop-breakers, returning NoUnfolding -- Things with an INLINE pragma may have an unfolding *and* @@ -1083,7 +1104,7 @@ callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info CoreUnfolding { uf_tmpl = unf_template , uf_is_work_free = is_wf , uf_guidance = guidance, uf_expandable = is_exp } - | active_unfolding -> tryUnfolding dflags id lone_variable + | active_unfolding -> tryUnfolding dflags case_depth id lone_variable arg_infos cont_info unf_template is_wf is_exp guidance | otherwise -> traceInline dflags id "Inactive unfolding:" (ppr id) Nothing @@ -1110,10 +1131,106 @@ traceInline dflags inline_id str doc result = False {-# INLINE traceInline #-} -- see Note [INLINE conditional tracing utilities] -tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt +{- Note [Avoid inlining into deeply nested cases] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Consider a function f like this: + + f arg1 arg2 = + case ... + ... -> g arg1 + ... -> g arg2 + +This function is small. So should be safe to inline. +However sometimes this doesn't quite work out like that. +Consider this code: + +f1 arg1 arg2 ... = ... + case _foo of + alt1 -> ... f2 arg1 ... + alt2 -> ... f2 arg2 ... + +f2 arg1 arg2 ... = ... + case _foo of + alt1 -> ... f3 arg1 ... + alt2 -> ... f3 arg2 ... + +f3 arg1 arg2 ... = ... + +... repeats up to n times. And then f1 is +applied to some arguments: + +foo = ... f1 <interestingArgs> ... + +Initially f2..fn are not interesting to inline so we don't. +However we see that f1 is applied to interesting args. +So it's an obvious choice to inline those: + +foo = + ... + case _foo of + alt1 -> ... f2 <interestingArg> ... + alt2 -> ... f2 <interestingArg> ... + +As a result we go and inline f2 both mentions of f2 in turn are now applied to interesting +arguments and f2 is small: + +foo = + ... + case _foo of + alt1 -> ... case _foo of + alt1 -> ... f3 <interestingArg> ... + alt2 -> ... f3 <interestingArg> ... + + alt2 -> ... case _foo of + alt1 -> ... f3 <interestingArg> ... + alt2 -> ... f3 <interestingArg> ... + +The same thing happens for each binding up to f_n, duplicating the amount of inlining +done in each step. Until at some point we are either done or run out of simplifier +ticks/RAM. This pattern happened #18730. + +To combat this we introduce one more heuristic when weighing inlining decision. +We keep track of a "case-depth". Which increases each time we look inside a case +expression with more than one alternative. + +We then apply a penalty to inlinings based on the case-depth at which they would +be inlined. Bounding the number of inlinings in such a scenario. + +The heuristic can be tuned in two ways: + +* We can ignore the first n levels of case nestings for inlining decisions using + -funfolding-case-threshold. +* The penalty grows linear with the depth. It's computed as size*(depth-threshold)/scaling. + Scaling can be set with -funfolding-case-scaling. + +Some guidance on setting these defaults: + +* A low treshold (<= 2) is needed to prevent exponential cases from spiraling out of + control. We picked 2 for no particular reason. +* Scaling the penalty by any more than 30 means the reproducer from + T18730 won't compile even with reasonably small values of n. Instead + it will run out of runs/ticks. This means to positively affect the reproducer + a scaling <= 30 is required. +* A scaling of >= 15 still causes a few very large regressions on some nofib benchmarks. + (+80% for gc/fulsom, +90% for real/ben-raytrace, +20% for spectral/fibheaps) +* A scaling of >= 25 showed no regressions on nofib. However it showed a number of + (small) regression for compiler perf benchmarks. + +The end result is that we are settling for a scaling of 30, with a threshold of 2. +This gives us minimal compiler perf regressions. No nofib runtime regressions and +will still avoid this pattern sometimes. This is a "safe" default, where we err on +the side of compiler blowup instead of risking runtime regressions. + +For cases where the default falls short the flag can be changed to allow more/less inlining as +needed on a per-module basis. + +-} + +tryUnfolding :: DynFlags -> Int -> Id -> Bool -> [ArgSummary] -> CallCtxt -> CoreExpr -> Bool -> Bool -> UnfoldingGuidance -> Maybe CoreExpr -tryUnfolding dflags id lone_variable +tryUnfolding dflags !case_depth id lone_variable arg_infos cont_info unf_template is_wf is_exp guidance = case guidance of @@ -1138,9 +1255,16 @@ tryUnfolding dflags id lone_variable -> traceInline dflags id str (mk_doc some_benefit extra_doc False) Nothing where some_benefit = calc_some_benefit (length arg_discounts) - extra_doc = text "discounted size =" <+> int discounted_size - discounted_size = size - discount - small_enough = discounted_size <= unfoldingUseThreshold uf_opts + extra_doc = vcat [ text "case depth =" <+> int case_depth + , text "depth based penalty =" <+> int depth_penalty + , text "discounted size =" <+> int adjusted_size ] + -- See Note [Avoid inlining into deeply nested cases] + depth_treshold = unfoldingCaseThreshold uf_opts + depth_scaling = unfoldingCaseScaling uf_opts + depth_penalty | case_depth <= depth_treshold = 0 + | otherwise = (size * (case_depth - depth_treshold)) `div` depth_scaling + adjusted_size = size + depth_penalty - discount + small_enough = adjusted_size <= unfoldingUseThreshold uf_opts discount = computeDiscount arg_discounts res_discount arg_infos cont_info where diff --git a/compiler/GHC/Core/Unfold.hs-boot b/compiler/GHC/Core/Unfold.hs-boot index b86f8b2585..c62f1915c9 100644 --- a/compiler/GHC/Core/Unfold.hs-boot +++ b/compiler/GHC/Core/Unfold.hs-boot @@ -11,3 +11,5 @@ updateUseThreshold :: Int -> UnfoldingOpts -> UnfoldingOpts updateFunAppDiscount :: Int -> UnfoldingOpts -> UnfoldingOpts updateDictDiscount :: Int -> UnfoldingOpts -> UnfoldingOpts updateVeryAggressive :: Bool -> UnfoldingOpts -> UnfoldingOpts +updateCaseThreshold :: Int -> UnfoldingOpts -> UnfoldingOpts +updateCaseScaling :: Int -> UnfoldingOpts -> UnfoldingOpts |