summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2022-11-09 12:08:06 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-11-11 13:18:07 -0500
commitd0c691b6110b11a43d5ea2685d17bc001d2298da (patch)
tree3de6c1a31b444e687936b03063921091d0182f21
parent6dab0046b575e102bf3245fd63d5ac6bc6f4204d (diff)
downloadhaskell-d0c691b6110b11a43d5ea2685d17bc001d2298da.tar.gz
Add a fast path for data constructor workers
See Note [Fast path for data constructors] in GHC.Core.Opt.Simplify.Iteration This bypasses lots of expensive logic, in the special case of applications of data constructors. It is a surprisingly worthwhile improvement, as you can see in the figures below. Metrics: compile_time/bytes allocated ------------------------------------------------ CoOpt_Read(normal) -2.0% CoOpt_Singletons(normal) -2.0% ManyConstructors(normal) -1.3% T10421(normal) -1.9% GOOD T10421a(normal) -1.5% T10858(normal) -1.6% T11545(normal) -1.7% T12234(optasm) -1.3% T12425(optasm) -1.9% GOOD T13035(normal) -1.0% GOOD T13056(optasm) -1.8% T13253(normal) -3.3% GOOD T15164(normal) -1.7% T15304(normal) -3.4% T15630(normal) -2.8% T16577(normal) -4.3% GOOD T17096(normal) -1.1% T17516(normal) -3.1% T18282(normal) -1.9% T18304(normal) -1.2% T18698a(normal) -1.2% GOOD T18698b(normal) -1.5% GOOD T18923(normal) -1.3% T1969(normal) -1.3% GOOD T19695(normal) -4.4% GOOD T21839c(normal) -2.7% GOOD T21839r(normal) -2.7% GOOD T4801(normal) -3.8% GOOD T5642(normal) -3.1% GOOD T6048(optasm) -2.5% GOOD T9020(optasm) -2.7% GOOD T9630(normal) -2.1% GOOD T9961(normal) -11.7% GOOD WWRec(normal) -1.0% geo. mean -1.1% minimum -11.7% maximum +0.1% Metric Decrease: T10421 T12425 T13035 T13253 T16577 T18698a T18698b T1969 T19695 T21839c T21839r T4801 T5642 T6048 T9020 T9630 T9961
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Iteration.hs40
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Utils.hs32
-rw-r--r--compiler/GHC/Types/Id/Make.hs1
3 files changed, 55 insertions, 18 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify/Iteration.hs b/compiler/GHC/Core/Opt/Simplify/Iteration.hs
index 39e0e892d2..1b7e30dfec 100644
--- a/compiler/GHC/Core/Opt/Simplify/Iteration.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Iteration.hs
@@ -1497,9 +1497,10 @@ rebuild env expr cont
ApplyToTy { sc_arg_ty = ty, sc_cont = cont}
-> rebuild env (App expr (Type ty)) cont
- ApplyToVal { sc_arg = arg, sc_env = se, sc_dup = dup_flag, sc_cont = cont}
+ ApplyToVal { sc_arg = arg, sc_env = se, sc_dup = dup_flag
+ , sc_cont = cont, sc_hole_ty = fun_ty }
-- See Note [Avoid redundant simplification]
- -> do { (_, _, arg') <- simplArg env dup_flag se arg
+ -> do { (_, _, arg') <- simplArg env dup_flag fun_ty se arg
; rebuild env (App expr arg') cont }
completeBindX :: SimplEnv
@@ -1598,7 +1599,8 @@ simplCast env body co0 cont0
-- co1 :: t1 ~ s1
-- co2 :: s2 ~ t2
addCoerce co cont@(ApplyToVal { sc_arg = arg, sc_env = arg_se
- , sc_dup = dup, sc_cont = tail })
+ , sc_dup = dup, sc_cont = tail
+ , sc_hole_ty = fun_ty })
| Just (m_co1, m_co2) <- pushCoValArg co
, fixed_rep m_co1
= {-#SCC "addCoerce-pushCoValArg" #-}
@@ -1610,7 +1612,7 @@ simplCast env body co0 cont0
-- See Note [Avoiding exponential behaviour]
MCo co1 ->
- do { (dup', arg_se', arg') <- simplArg env dup arg_se arg
+ do { (dup', arg_se', arg') <- simplArg env dup fun_ty arg_se arg
-- When we build the ApplyTo we can't mix the OutCoercion
-- 'co' with the InExpr 'arg', so we simplify
-- to make it all consistent. It's a bit messy.
@@ -1636,14 +1638,16 @@ simplCast env body co0 cont0
-- See Note [Representation polymorphism invariants] in GHC.Core
-- test: typecheck/should_run/EtaExpandLevPoly
-simplArg :: SimplEnv -> DupFlag -> StaticEnv -> CoreExpr
+simplArg :: SimplEnv -> DupFlag
+ -> OutType -- Type of the function applied to this arg
+ -> StaticEnv -> CoreExpr -- Expression with its static envt
-> SimplM (DupFlag, StaticEnv, OutExpr)
-simplArg env dup_flag arg_env arg
+simplArg env dup_flag fun_ty arg_env arg
| isSimplified dup_flag
= return (dup_flag, arg_env, arg)
| otherwise
= do { let arg_env' = arg_env `setInScopeFromE` env
- ; arg' <- simplExpr arg_env' arg
+ ; arg' <- simplExprC arg_env' arg (mkBoringStop (funArgTy fun_ty))
; return (Simplified, zapSubstEnv arg_env', arg') }
-- Return a StaticEnv that includes the in-scope set from 'env',
-- because arg' may well mention those variables (#20639)
@@ -2029,6 +2033,21 @@ zap the SubstEnv. This is VITAL. Consider
We'll clone the inner \x, adding x->x' in the id_subst Then when we
inline y, we must *not* replace x by x' in the inlined copy!!
+
+Note [Fast path for data constructors]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For applications of a data constructor worker, the full glory of
+rebuildCall is a waste of effort;
+* They never inline, obviously
+* They have no rewrite rules
+* They are not strict (see Note [Data-con worker strictness]
+ in GHC.Core.DataCon)
+So it's fine to zoom straight to `rebuild` which just rebuilds the
+call in a very straightforward way.
+
+Some programs have a /lot/ of data constructors in the source program
+(compiler/perf/T9961 is an example), so this fast path can be very
+valuable.
-}
simplVar :: SimplEnv -> InVar -> SimplM OutExpr
@@ -2046,6 +2065,9 @@ simplVar env var
simplIdF :: SimplEnv -> InId -> SimplCont -> SimplM (SimplFloats, OutExpr)
simplIdF env var cont
+ | isDataConWorkId var -- See Note [Fast path for data constructors]
+ = rebuild env (Var var) cont
+ | otherwise
= case substId env var of
ContEx tvs cvs ids e -> simplExprF env' e cont
-- Don't trimJoinCont; haven't already simplified e,
@@ -2315,6 +2337,8 @@ field of the ArgInfo record is the state of a little state-machine:
If we inline `f` before simplifying `BIG` well use preInlineUnconditionally,
and we'll simplify BIG once, at x's occurrence, rather than twice.
+* GHC.Core.Opt.Simplify.Utils. mkRewriteCall: if there are no rules, and no
+ unfolding, we can skip both TryRules and TryInlining, which saves work.
Note [Avoid redundant simplification]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -3645,7 +3669,7 @@ mkDupableContWithDmds env dmds
do { let (dmd:cont_dmds) = dmds -- Never fails
; (floats1, cont') <- mkDupableContWithDmds env cont_dmds cont
; let env' = env `setInScopeFromF` floats1
- ; (_, se', arg') <- simplArg env' dup se arg
+ ; (_, se', arg') <- simplArg env' dup hole_ty se arg
; (let_floats2, arg'') <- makeTrivial env NotTopLevel dmd (fsLit "karg") arg'
; let all_floats = floats1 `addLetFloats` let_floats2
; return ( all_floats
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs
index 31a0130969..5042483bd0 100644
--- a/compiler/GHC/Core/Opt/Simplify/Utils.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs
@@ -425,12 +425,22 @@ decArgCount :: RewriteCall -> RewriteCall
decArgCount (TryRules n rules) = TryRules (n-1) rules
decArgCount rew = rew
-mkTryRules :: [CoreRule] -> RewriteCall
+mkRewriteCall :: Id -> RuleEnv -> RewriteCall
-- See Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration
-mkTryRules [] = TryInlining
-mkTryRules rs = TryRules n_required rs
+-- We try to skip any unnecessary stages:
+-- No rules => skip TryRules
+-- No unfolding => skip TryInlining
+-- This skipping is "just" for efficiency. But rebuildCall is
+-- quite a heavy hammer, so skipping stages is a good plan.
+-- And it's extremely simple to do.
+mkRewriteCall fun rule_env
+ | not (null rules) = TryRules n_required rules
+ | canUnfold unf = TryInlining
+ | otherwise = TryNothing
where
- n_required = maximum (map ruleArity rs)
+ n_required = maximum (map ruleArity rules)
+ rules = getRules rule_env fun
+ unf = idUnfolding fun
{-
************************************************************************
@@ -604,21 +614,23 @@ mkArgInfo :: SimplEnv -> RuleEnv -> Id -> SimplCont -> ArgInfo
mkArgInfo env rule_base fun cont
| n_val_args < idArity fun -- Note [Unsaturated functions]
= ArgInfo { ai_fun = fun, ai_args = []
- , ai_rewrite = fun_rules
+ , ai_rewrite = fun_rewrite
, ai_encl = False
, ai_dmds = vanilla_dmds
, ai_discs = vanilla_discounts }
| otherwise
= ArgInfo { ai_fun = fun
, ai_args = []
- , ai_rewrite = fun_rules
- , ai_encl = notNull rules || contHasRules cont
+ , ai_rewrite = fun_rewrite
+ , ai_encl = fun_has_rules || contHasRules cont
, ai_dmds = add_type_strictness (idType fun) arg_dmds
, ai_discs = arg_discounts }
where
- rules = getRules rule_base fun
- fun_rules = mkTryRules rules
- n_val_args = countValArgs cont
+ n_val_args = countValArgs cont
+ fun_rewrite = mkRewriteCall fun rule_base
+ fun_has_rules = case fun_rewrite of
+ TryRules {} -> True
+ _ -> False
vanilla_discounts, arg_discounts :: [Int]
vanilla_discounts = repeat 0
diff --git a/compiler/GHC/Types/Id/Make.hs b/compiler/GHC/Types/Id/Make.hs
index 99ea5dddb0..79ae56cd5e 100644
--- a/compiler/GHC/Types/Id/Make.hs
+++ b/compiler/GHC/Types/Id/Make.hs
@@ -585,6 +585,7 @@ mkDataConWorkId wkr_name data_con
`setInlinePragInfo` wkr_inline_prag
`setUnfoldingInfo` evaldUnfolding -- Record that it's evaluated,
-- even if arity = 0
+ -- No strictness: see Note [Data-con worker strictness] in GHC.Core.DataCon
wkr_inline_prag = defaultInlinePragma { inl_rule = ConLike }
wkr_arity = dataConRepArity data_con