diff options
author | Simon Peyton Jones <simonpj@microsoft.com> | 2022-11-09 12:08:06 +0000 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-11-11 13:18:07 -0500 |
commit | d0c691b6110b11a43d5ea2685d17bc001d2298da (patch) | |
tree | 3de6c1a31b444e687936b03063921091d0182f21 | |
parent | 6dab0046b575e102bf3245fd63d5ac6bc6f4204d (diff) | |
download | haskell-d0c691b6110b11a43d5ea2685d17bc001d2298da.tar.gz |
Add a fast path for data constructor workers
See Note [Fast path for data constructors] in
GHC.Core.Opt.Simplify.Iteration
This bypasses lots of expensive logic, in the special case of
applications of data constructors. It is a surprisingly worthwhile
improvement, as you can see in the figures below.
Metrics: compile_time/bytes allocated
------------------------------------------------
CoOpt_Read(normal) -2.0%
CoOpt_Singletons(normal) -2.0%
ManyConstructors(normal) -1.3%
T10421(normal) -1.9% GOOD
T10421a(normal) -1.5%
T10858(normal) -1.6%
T11545(normal) -1.7%
T12234(optasm) -1.3%
T12425(optasm) -1.9% GOOD
T13035(normal) -1.0% GOOD
T13056(optasm) -1.8%
T13253(normal) -3.3% GOOD
T15164(normal) -1.7%
T15304(normal) -3.4%
T15630(normal) -2.8%
T16577(normal) -4.3% GOOD
T17096(normal) -1.1%
T17516(normal) -3.1%
T18282(normal) -1.9%
T18304(normal) -1.2%
T18698a(normal) -1.2% GOOD
T18698b(normal) -1.5% GOOD
T18923(normal) -1.3%
T1969(normal) -1.3% GOOD
T19695(normal) -4.4% GOOD
T21839c(normal) -2.7% GOOD
T21839r(normal) -2.7% GOOD
T4801(normal) -3.8% GOOD
T5642(normal) -3.1% GOOD
T6048(optasm) -2.5% GOOD
T9020(optasm) -2.7% GOOD
T9630(normal) -2.1% GOOD
T9961(normal) -11.7% GOOD
WWRec(normal) -1.0%
geo. mean -1.1%
minimum -11.7%
maximum +0.1%
Metric Decrease:
T10421
T12425
T13035
T13253
T16577
T18698a
T18698b
T1969
T19695
T21839c
T21839r
T4801
T5642
T6048
T9020
T9630
T9961
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Iteration.hs | 40 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Utils.hs | 32 | ||||
-rw-r--r-- | compiler/GHC/Types/Id/Make.hs | 1 |
3 files changed, 55 insertions, 18 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify/Iteration.hs b/compiler/GHC/Core/Opt/Simplify/Iteration.hs index 39e0e892d2..1b7e30dfec 100644 --- a/compiler/GHC/Core/Opt/Simplify/Iteration.hs +++ b/compiler/GHC/Core/Opt/Simplify/Iteration.hs @@ -1497,9 +1497,10 @@ rebuild env expr cont ApplyToTy { sc_arg_ty = ty, sc_cont = cont} -> rebuild env (App expr (Type ty)) cont - ApplyToVal { sc_arg = arg, sc_env = se, sc_dup = dup_flag, sc_cont = cont} + ApplyToVal { sc_arg = arg, sc_env = se, sc_dup = dup_flag + , sc_cont = cont, sc_hole_ty = fun_ty } -- See Note [Avoid redundant simplification] - -> do { (_, _, arg') <- simplArg env dup_flag se arg + -> do { (_, _, arg') <- simplArg env dup_flag fun_ty se arg ; rebuild env (App expr arg') cont } completeBindX :: SimplEnv @@ -1598,7 +1599,8 @@ simplCast env body co0 cont0 -- co1 :: t1 ~ s1 -- co2 :: s2 ~ t2 addCoerce co cont@(ApplyToVal { sc_arg = arg, sc_env = arg_se - , sc_dup = dup, sc_cont = tail }) + , sc_dup = dup, sc_cont = tail + , sc_hole_ty = fun_ty }) | Just (m_co1, m_co2) <- pushCoValArg co , fixed_rep m_co1 = {-#SCC "addCoerce-pushCoValArg" #-} @@ -1610,7 +1612,7 @@ simplCast env body co0 cont0 -- See Note [Avoiding exponential behaviour] MCo co1 -> - do { (dup', arg_se', arg') <- simplArg env dup arg_se arg + do { (dup', arg_se', arg') <- simplArg env dup fun_ty arg_se arg -- When we build the ApplyTo we can't mix the OutCoercion -- 'co' with the InExpr 'arg', so we simplify -- to make it all consistent. It's a bit messy. @@ -1636,14 +1638,16 @@ simplCast env body co0 cont0 -- See Note [Representation polymorphism invariants] in GHC.Core -- test: typecheck/should_run/EtaExpandLevPoly -simplArg :: SimplEnv -> DupFlag -> StaticEnv -> CoreExpr +simplArg :: SimplEnv -> DupFlag + -> OutType -- Type of the function applied to this arg + -> StaticEnv -> CoreExpr -- Expression with its static envt -> SimplM (DupFlag, StaticEnv, OutExpr) -simplArg env dup_flag arg_env arg +simplArg env dup_flag fun_ty arg_env arg | isSimplified dup_flag = return (dup_flag, arg_env, arg) | otherwise = do { let arg_env' = arg_env `setInScopeFromE` env - ; arg' <- simplExpr arg_env' arg + ; arg' <- simplExprC arg_env' arg (mkBoringStop (funArgTy fun_ty)) ; return (Simplified, zapSubstEnv arg_env', arg') } -- Return a StaticEnv that includes the in-scope set from 'env', -- because arg' may well mention those variables (#20639) @@ -2029,6 +2033,21 @@ zap the SubstEnv. This is VITAL. Consider We'll clone the inner \x, adding x->x' in the id_subst Then when we inline y, we must *not* replace x by x' in the inlined copy!! + +Note [Fast path for data constructors] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For applications of a data constructor worker, the full glory of +rebuildCall is a waste of effort; +* They never inline, obviously +* They have no rewrite rules +* They are not strict (see Note [Data-con worker strictness] + in GHC.Core.DataCon) +So it's fine to zoom straight to `rebuild` which just rebuilds the +call in a very straightforward way. + +Some programs have a /lot/ of data constructors in the source program +(compiler/perf/T9961 is an example), so this fast path can be very +valuable. -} simplVar :: SimplEnv -> InVar -> SimplM OutExpr @@ -2046,6 +2065,9 @@ simplVar env var simplIdF :: SimplEnv -> InId -> SimplCont -> SimplM (SimplFloats, OutExpr) simplIdF env var cont + | isDataConWorkId var -- See Note [Fast path for data constructors] + = rebuild env (Var var) cont + | otherwise = case substId env var of ContEx tvs cvs ids e -> simplExprF env' e cont -- Don't trimJoinCont; haven't already simplified e, @@ -2315,6 +2337,8 @@ field of the ArgInfo record is the state of a little state-machine: If we inline `f` before simplifying `BIG` well use preInlineUnconditionally, and we'll simplify BIG once, at x's occurrence, rather than twice. +* GHC.Core.Opt.Simplify.Utils. mkRewriteCall: if there are no rules, and no + unfolding, we can skip both TryRules and TryInlining, which saves work. Note [Avoid redundant simplification] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3645,7 +3669,7 @@ mkDupableContWithDmds env dmds do { let (dmd:cont_dmds) = dmds -- Never fails ; (floats1, cont') <- mkDupableContWithDmds env cont_dmds cont ; let env' = env `setInScopeFromF` floats1 - ; (_, se', arg') <- simplArg env' dup se arg + ; (_, se', arg') <- simplArg env' dup hole_ty se arg ; (let_floats2, arg'') <- makeTrivial env NotTopLevel dmd (fsLit "karg") arg' ; let all_floats = floats1 `addLetFloats` let_floats2 ; return ( all_floats diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs index 31a0130969..5042483bd0 100644 --- a/compiler/GHC/Core/Opt/Simplify/Utils.hs +++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs @@ -425,12 +425,22 @@ decArgCount :: RewriteCall -> RewriteCall decArgCount (TryRules n rules) = TryRules (n-1) rules decArgCount rew = rew -mkTryRules :: [CoreRule] -> RewriteCall +mkRewriteCall :: Id -> RuleEnv -> RewriteCall -- See Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration -mkTryRules [] = TryInlining -mkTryRules rs = TryRules n_required rs +-- We try to skip any unnecessary stages: +-- No rules => skip TryRules +-- No unfolding => skip TryInlining +-- This skipping is "just" for efficiency. But rebuildCall is +-- quite a heavy hammer, so skipping stages is a good plan. +-- And it's extremely simple to do. +mkRewriteCall fun rule_env + | not (null rules) = TryRules n_required rules + | canUnfold unf = TryInlining + | otherwise = TryNothing where - n_required = maximum (map ruleArity rs) + n_required = maximum (map ruleArity rules) + rules = getRules rule_env fun + unf = idUnfolding fun {- ************************************************************************ @@ -604,21 +614,23 @@ mkArgInfo :: SimplEnv -> RuleEnv -> Id -> SimplCont -> ArgInfo mkArgInfo env rule_base fun cont | n_val_args < idArity fun -- Note [Unsaturated functions] = ArgInfo { ai_fun = fun, ai_args = [] - , ai_rewrite = fun_rules + , ai_rewrite = fun_rewrite , ai_encl = False , ai_dmds = vanilla_dmds , ai_discs = vanilla_discounts } | otherwise = ArgInfo { ai_fun = fun , ai_args = [] - , ai_rewrite = fun_rules - , ai_encl = notNull rules || contHasRules cont + , ai_rewrite = fun_rewrite + , ai_encl = fun_has_rules || contHasRules cont , ai_dmds = add_type_strictness (idType fun) arg_dmds , ai_discs = arg_discounts } where - rules = getRules rule_base fun - fun_rules = mkTryRules rules - n_val_args = countValArgs cont + n_val_args = countValArgs cont + fun_rewrite = mkRewriteCall fun rule_base + fun_has_rules = case fun_rewrite of + TryRules {} -> True + _ -> False vanilla_discounts, arg_discounts :: [Int] vanilla_discounts = repeat 0 diff --git a/compiler/GHC/Types/Id/Make.hs b/compiler/GHC/Types/Id/Make.hs index 99ea5dddb0..79ae56cd5e 100644 --- a/compiler/GHC/Types/Id/Make.hs +++ b/compiler/GHC/Types/Id/Make.hs @@ -585,6 +585,7 @@ mkDataConWorkId wkr_name data_con `setInlinePragInfo` wkr_inline_prag `setUnfoldingInfo` evaldUnfolding -- Record that it's evaluated, -- even if arity = 0 + -- No strictness: see Note [Data-con worker strictness] in GHC.Core.DataCon wkr_inline_prag = defaultInlinePragma { inl_rule = ConLike } wkr_arity = dataConRepArity data_con |