summaryrefslogtreecommitdiff
path: root/compiler/GHC/Core
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2020-12-16 12:57:17 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-02-09 16:21:40 -0500
commit891a791f3f2438e5d768e1f96119d275e58e8d37 (patch)
tree5a051da2d04b14ce92bdcb98a5c5ab84c9d344fb /compiler/GHC/Core
parentab5fd982a7a501136cb8b90fa841c02cc9551b5a (diff)
downloadhaskell-891a791f3f2438e5d768e1f96119d275e58e8d37.tar.gz
Reduce inlining in deeply-nested cases
This adds a new heuristic, controllable via two new flags to better tune inlining behaviour. The new flags are -funfolding-case-threshold and -funfolding-case-scaling which are document both in the user guide and in Note [Avoid inlining into deeply nested cases]. Co-authored-by: Andreas Klebinger <klebinger.andreas@gmx.at>
Diffstat (limited to 'compiler/GHC/Core')
-rw-r--r--compiler/GHC/Core/Opt/Simplify.hs27
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Env.hs18
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Monad.hs9
-rw-r--r--compiler/GHC/Core/Unfold.hs140
-rw-r--r--compiler/GHC/Core/Unfold.hs-boot2
5 files changed, 170 insertions, 26 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index 0cea6da0bf..4ca8985f8b 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -1882,7 +1882,7 @@ simplIdF env var cont
completeCall :: SimplEnv -> OutId -> SimplCont -> SimplM (SimplFloats, OutExpr)
completeCall env var cont
- | Just expr <- callSiteInline dflags var active_unf
+ | Just expr <- callSiteInline dflags case_depth var active_unf
lone_variable arg_infos interesting_cont
-- Inline the variable's RHS
= do { checkedTick (UnfoldingDone var)
@@ -1897,7 +1897,8 @@ completeCall env var cont
; rebuildCall env info cont }
where
- dflags = seDynFlags env
+ dflags = seDynFlags env
+ case_depth = seCaseDepth env
(lone_variable, arg_infos, call_cont) = contArgs cont
n_val_args = length arg_infos
interesting_cont = interestingCallContext env call_cont
@@ -2724,9 +2725,11 @@ reallyRebuildCase env scrut case_bndr alts cont
; rebuild env case_expr cont }
| otherwise
- = do { (floats, cont') <- mkDupableCaseCont env alts cont
- ; case_expr <- simplAlts (env `setInScopeFromF` floats)
- scrut (scaleIdBy holeScaling case_bndr) (scaleAltsBy holeScaling alts) cont'
+ = do { (floats, env', cont') <- mkDupableCaseCont env alts cont
+ ; case_expr <- simplAlts env' scrut
+ (scaleIdBy holeScaling case_bndr)
+ (scaleAltsBy holeScaling alts)
+ cont'
; return (floats, case_expr) }
where
holeScaling = contHoleScaling cont
@@ -3234,10 +3237,15 @@ join points and inlining them away. See #4930.
--------------------
mkDupableCaseCont :: SimplEnv -> [InAlt] -> SimplCont
- -> SimplM (SimplFloats, SimplCont)
+ -> SimplM ( SimplFloats -- Join points (if any)
+ , SimplEnv -- Use this for the alts
+ , SimplCont)
mkDupableCaseCont env alts cont
- | altsWouldDup alts = mkDupableCont env cont
- | otherwise = return (emptyFloats env, cont)
+ | altsWouldDup alts = do { (floats, cont) <- mkDupableCont env cont
+ ; let env' = bumpCaseDepth $
+ env `setInScopeFromF` floats
+ ; return (floats, env', cont) }
+ | otherwise = return (emptyFloats env, env, cont)
altsWouldDup :: [InAlt] -> Bool -- True iff strictly > 1 non-bottom alternative
altsWouldDup [] = False -- See Note [Bottom alternatives]
@@ -3370,12 +3378,11 @@ mkDupableContWithDmds env _
-- in case [...hole...] of { pi -> ji xij }
-- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable
do { tick (CaseOfCase case_bndr)
- ; (floats, alt_cont) <- mkDupableCaseCont env alts cont
+ ; (floats, alt_env, alt_cont) <- mkDupableCaseCont (se `setInScopeFromE` env) alts cont
-- NB: We call mkDupableCaseCont here to make cont duplicable
-- (if necessary, depending on the number of alts)
-- And this is important: see Note [Fusing case continuations]
- ; let alt_env = se `setInScopeFromF` floats
; let cont_scaling = contHoleScaling cont
-- See Note [Scaling in case-of-case]
; (alt_env', case_bndr') <- simplBinder alt_env (scaleIdBy cont_scaling case_bndr)
diff --git a/compiler/GHC/Core/Opt/Simplify/Env.hs b/compiler/GHC/Core/Opt/Simplify/Env.hs
index 180e562c73..0d4e06f9c2 100644
--- a/compiler/GHC/Core/Opt/Simplify/Env.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Env.hs
@@ -14,7 +14,7 @@ module GHC.Core.Opt.Simplify.Env (
SimplEnv(..), pprSimplEnv, -- Temp not abstract
mkSimplEnv, extendIdSubst,
extendTvSubst, extendCvSubst,
- zapSubstEnv, setSubstEnv,
+ zapSubstEnv, setSubstEnv, bumpCaseDepth,
getInScope, setInScopeFromE, setInScopeFromF,
setInScopeSet, modifyInScope, addNewInScopeIds,
getSimplRules,
@@ -103,6 +103,8 @@ data SimplEnv
-- The current set of in-scope variables
-- They are all OutVars, and all bound in this module
, seInScope :: InScopeSet -- OutVars only
+
+ , seCaseDepth :: !Int -- Depth of multi-branch case alternatives
}
data SimplFloats
@@ -272,11 +274,12 @@ points we're substituting. -}
mkSimplEnv :: SimplMode -> SimplEnv
mkSimplEnv mode
- = SimplEnv { seMode = mode
- , seInScope = init_in_scope
- , seTvSubst = emptyVarEnv
- , seCvSubst = emptyVarEnv
- , seIdSubst = emptyVarEnv }
+ = SimplEnv { seMode = mode
+ , seInScope = init_in_scope
+ , seTvSubst = emptyVarEnv
+ , seCvSubst = emptyVarEnv
+ , seIdSubst = emptyVarEnv
+ , seCaseDepth = 0 }
-- The top level "enclosing CC" is "SUBSUMED".
init_in_scope :: InScopeSet
@@ -319,6 +322,9 @@ setMode mode env = env { seMode = mode }
updMode :: (SimplMode -> SimplMode) -> SimplEnv -> SimplEnv
updMode upd env = env { seMode = upd (seMode env) }
+bumpCaseDepth :: SimplEnv -> SimplEnv
+bumpCaseDepth env = env { seCaseDepth = seCaseDepth env + 1 }
+
---------------------
extendIdSubst :: SimplEnv -> Id -> SimplSR -> SimplEnv
extendIdSubst env@(SimplEnv {seIdSubst = subst}) var res
diff --git a/compiler/GHC/Core/Opt/Simplify/Monad.hs b/compiler/GHC/Core/Opt/Simplify/Monad.hs
index 0130fcf61e..d1e27f9fca 100644
--- a/compiler/GHC/Core/Opt/Simplify/Monad.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Monad.hs
@@ -254,8 +254,13 @@ checkedTick t
[ text "When trying" <+> ppr t
, text "To increase the limit, use -fsimpl-tick-factor=N (default 100)."
, space
- , text "If you need to increase the limit substantially, please file a"
- , text "bug report and indicate the factor you needed."
+ , text "In addition try adjusting -funfolding-case-threshold=N and"
+ , text "-funfolding-case-scaling=N for the module in question."
+ , text "Using threshold=1 and scaling=5 should break most inlining loops."
+ , space
+ , text "If you need to increase the tick factor substantially, while also"
+ , text "adjusting unfolding parameters please file a bug report and"
+ , text "indicate the factor you needed."
, space
, text "If GHC was unable to complete compilation even"
<+> text "with a very large factor"
diff --git a/compiler/GHC/Core/Unfold.hs b/compiler/GHC/Core/Unfold.hs
index b97e10cd46..8a61eec3c7 100644
--- a/compiler/GHC/Core/Unfold.hs
+++ b/compiler/GHC/Core/Unfold.hs
@@ -26,7 +26,7 @@ module GHC.Core.Unfold (
UnfoldingOpts (..), defaultUnfoldingOpts,
updateCreationThreshold, updateUseThreshold,
updateFunAppDiscount, updateDictDiscount,
- updateVeryAggressive,
+ updateVeryAggressive, updateCaseScaling, updateCaseThreshold,
ArgSummary(..),
@@ -82,6 +82,12 @@ data UnfoldingOpts = UnfoldingOpts
, unfoldingVeryAggressive :: !Bool
-- ^ Force inlining in many more cases
+
+ -- Don't consider depth up to x
+ , unfoldingCaseThreshold :: !Int
+
+ -- Penalize depth with 1/x
+ , unfoldingCaseScaling :: !Int
}
defaultUnfoldingOpts :: UnfoldingOpts
@@ -106,6 +112,13 @@ defaultUnfoldingOpts = UnfoldingOpts
-- we'll be able to pick the right method from a dictionary
, unfoldingVeryAggressive = False
+
+ -- Only apply scaling once we are deeper than threshold cases
+ -- in an RHS.
+ , unfoldingCaseThreshold = 2
+
+ -- Penalize depth with (size*depth)/scaling
+ , unfoldingCaseScaling = 30
}
-- Helpers for "GHC.Driver.Session"
@@ -125,6 +138,13 @@ updateDictDiscount n opts = opts { unfoldingDictDiscount = n }
updateVeryAggressive :: Bool -> UnfoldingOpts -> UnfoldingOpts
updateVeryAggressive n opts = opts { unfoldingVeryAggressive = n }
+
+updateCaseThreshold :: Int -> UnfoldingOpts -> UnfoldingOpts
+updateCaseThreshold n opts = opts { unfoldingCaseThreshold = n }
+
+updateCaseScaling :: Int -> UnfoldingOpts -> UnfoldingOpts
+updateCaseScaling n opts = opts { unfoldingCaseScaling = n }
+
{-
Note [Occurrence analysis of unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1033,6 +1053,7 @@ StrictAnal.addStrictnessInfoToTopId
-}
callSiteInline :: DynFlags
+ -> Int -- Case depth
-> Id -- The Id
-> Bool -- True <=> unfolding is active
-> Bool -- True if there are no arguments at all (incl type args)
@@ -1075,7 +1096,7 @@ instance Outputable CallCtxt where
ppr DiscArgCtxt = text "DiscArgCtxt"
ppr RuleArgCtxt = text "RuleArgCtxt"
-callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
+callSiteInline dflags !case_depth id active_unfolding lone_variable arg_infos cont_info
= case idUnfolding id of
-- idUnfolding checks for loop-breakers, returning NoUnfolding
-- Things with an INLINE pragma may have an unfolding *and*
@@ -1083,7 +1104,7 @@ callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
CoreUnfolding { uf_tmpl = unf_template
, uf_is_work_free = is_wf
, uf_guidance = guidance, uf_expandable = is_exp }
- | active_unfolding -> tryUnfolding dflags id lone_variable
+ | active_unfolding -> tryUnfolding dflags case_depth id lone_variable
arg_infos cont_info unf_template
is_wf is_exp guidance
| otherwise -> traceInline dflags id "Inactive unfolding:" (ppr id) Nothing
@@ -1110,10 +1131,106 @@ traceInline dflags inline_id str doc result
= False
{-# INLINE traceInline #-} -- see Note [INLINE conditional tracing utilities]
-tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt
+{- Note [Avoid inlining into deeply nested cases]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Consider a function f like this:
+
+ f arg1 arg2 =
+ case ...
+ ... -> g arg1
+ ... -> g arg2
+
+This function is small. So should be safe to inline.
+However sometimes this doesn't quite work out like that.
+Consider this code:
+
+f1 arg1 arg2 ... = ...
+ case _foo of
+ alt1 -> ... f2 arg1 ...
+ alt2 -> ... f2 arg2 ...
+
+f2 arg1 arg2 ... = ...
+ case _foo of
+ alt1 -> ... f3 arg1 ...
+ alt2 -> ... f3 arg2 ...
+
+f3 arg1 arg2 ... = ...
+
+... repeats up to n times. And then f1 is
+applied to some arguments:
+
+foo = ... f1 <interestingArgs> ...
+
+Initially f2..fn are not interesting to inline so we don't.
+However we see that f1 is applied to interesting args.
+So it's an obvious choice to inline those:
+
+foo =
+ ...
+ case _foo of
+ alt1 -> ... f2 <interestingArg> ...
+ alt2 -> ... f2 <interestingArg> ...
+
+As a result we go and inline f2 both mentions of f2 in turn are now applied to interesting
+arguments and f2 is small:
+
+foo =
+ ...
+ case _foo of
+ alt1 -> ... case _foo of
+ alt1 -> ... f3 <interestingArg> ...
+ alt2 -> ... f3 <interestingArg> ...
+
+ alt2 -> ... case _foo of
+ alt1 -> ... f3 <interestingArg> ...
+ alt2 -> ... f3 <interestingArg> ...
+
+The same thing happens for each binding up to f_n, duplicating the amount of inlining
+done in each step. Until at some point we are either done or run out of simplifier
+ticks/RAM. This pattern happened #18730.
+
+To combat this we introduce one more heuristic when weighing inlining decision.
+We keep track of a "case-depth". Which increases each time we look inside a case
+expression with more than one alternative.
+
+We then apply a penalty to inlinings based on the case-depth at which they would
+be inlined. Bounding the number of inlinings in such a scenario.
+
+The heuristic can be tuned in two ways:
+
+* We can ignore the first n levels of case nestings for inlining decisions using
+ -funfolding-case-threshold.
+* The penalty grows linear with the depth. It's computed as size*(depth-threshold)/scaling.
+ Scaling can be set with -funfolding-case-scaling.
+
+Some guidance on setting these defaults:
+
+* A low treshold (<= 2) is needed to prevent exponential cases from spiraling out of
+ control. We picked 2 for no particular reason.
+* Scaling the penalty by any more than 30 means the reproducer from
+ T18730 won't compile even with reasonably small values of n. Instead
+ it will run out of runs/ticks. This means to positively affect the reproducer
+ a scaling <= 30 is required.
+* A scaling of >= 15 still causes a few very large regressions on some nofib benchmarks.
+ (+80% for gc/fulsom, +90% for real/ben-raytrace, +20% for spectral/fibheaps)
+* A scaling of >= 25 showed no regressions on nofib. However it showed a number of
+ (small) regression for compiler perf benchmarks.
+
+The end result is that we are settling for a scaling of 30, with a threshold of 2.
+This gives us minimal compiler perf regressions. No nofib runtime regressions and
+will still avoid this pattern sometimes. This is a "safe" default, where we err on
+the side of compiler blowup instead of risking runtime regressions.
+
+For cases where the default falls short the flag can be changed to allow more/less inlining as
+needed on a per-module basis.
+
+-}
+
+tryUnfolding :: DynFlags -> Int -> Id -> Bool -> [ArgSummary] -> CallCtxt
-> CoreExpr -> Bool -> Bool -> UnfoldingGuidance
-> Maybe CoreExpr
-tryUnfolding dflags id lone_variable
+tryUnfolding dflags !case_depth id lone_variable
arg_infos cont_info unf_template
is_wf is_exp guidance
= case guidance of
@@ -1138,9 +1255,16 @@ tryUnfolding dflags id lone_variable
-> traceInline dflags id str (mk_doc some_benefit extra_doc False) Nothing
where
some_benefit = calc_some_benefit (length arg_discounts)
- extra_doc = text "discounted size =" <+> int discounted_size
- discounted_size = size - discount
- small_enough = discounted_size <= unfoldingUseThreshold uf_opts
+ extra_doc = vcat [ text "case depth =" <+> int case_depth
+ , text "depth based penalty =" <+> int depth_penalty
+ , text "discounted size =" <+> int adjusted_size ]
+ -- See Note [Avoid inlining into deeply nested cases]
+ depth_treshold = unfoldingCaseThreshold uf_opts
+ depth_scaling = unfoldingCaseScaling uf_opts
+ depth_penalty | case_depth <= depth_treshold = 0
+ | otherwise = (size * (case_depth - depth_treshold)) `div` depth_scaling
+ adjusted_size = size + depth_penalty - discount
+ small_enough = adjusted_size <= unfoldingUseThreshold uf_opts
discount = computeDiscount arg_discounts res_discount arg_infos cont_info
where
diff --git a/compiler/GHC/Core/Unfold.hs-boot b/compiler/GHC/Core/Unfold.hs-boot
index b86f8b2585..c62f1915c9 100644
--- a/compiler/GHC/Core/Unfold.hs-boot
+++ b/compiler/GHC/Core/Unfold.hs-boot
@@ -11,3 +11,5 @@ updateUseThreshold :: Int -> UnfoldingOpts -> UnfoldingOpts
updateFunAppDiscount :: Int -> UnfoldingOpts -> UnfoldingOpts
updateDictDiscount :: Int -> UnfoldingOpts -> UnfoldingOpts
updateVeryAggressive :: Bool -> UnfoldingOpts -> UnfoldingOpts
+updateCaseThreshold :: Int -> UnfoldingOpts -> UnfoldingOpts
+updateCaseScaling :: Int -> UnfoldingOpts -> UnfoldingOpts