diff options
author | Simon Peyton Jones <simonpj@microsoft.com> | 2022-08-25 15:54:51 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-10-11 12:49:21 -0400 |
commit | caced75765472a1a94453f2e5a439dba0d04a265 (patch) | |
tree | d83c65272aeb12d7f330bd9cdf77b0db1180a9ad | |
parent | 9789ea8e9f35d5c0674e10730c3435c4d3293f2b (diff) | |
download | haskell-caced75765472a1a94453f2e5a439dba0d04a265.tar.gz |
Don't keep exit join points so much
We were religiously keeping exit join points throughout, which
had some bad effects (#21148, #22084).
This MR does two things:
* Arranges that exit join points are inhibited from inlining
only in /one/ Simplifier pass (right after Exitification).
See Note [Be selective about not-inlining exit join points]
in GHC.Core.Opt.Exitify
It's not a big deal, but it shaves 0.1% off compile times.
* Inline used-once non-recursive join points very aggressively
Given join j x = rhs in
joinrec k y = ....j x....
where this is the only occurrence of `j`, we want to inline `j`.
(Unless sm_keep_exits is on.)
See Note [Inline used-once non-recursive join points] in
GHC.Core.Opt.Simplify.Utils
This is just a tidy-up really. It doesn't change allocation, but
getting rid of a binding is always good.
Very effect on nofib -- some up and down.
-rw-r--r-- | compiler/GHC/Core/Opt/Exitify.hs | 24 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Pipeline.hs | 80 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Env.hs | 15 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Utils.hs | 29 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/SpecConstr.hs | 6 | ||||
-rw-r--r-- | compiler/GHC/Driver/Config/Core/Opt/Simplify.hs | 54 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T21148.hs | 12 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T21148.stderr | 126 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/all.T | 1 | ||||
-rw-r--r-- | testsuite/tests/stranal/should_compile/T21128.hs | 5 | ||||
-rw-r--r-- | testsuite/tests/stranal/should_compile/T21128.stderr | 46 |
11 files changed, 300 insertions, 98 deletions
diff --git a/compiler/GHC/Core/Opt/Exitify.hs b/compiler/GHC/Core/Opt/Exitify.hs index 89156418bc..b8ba685a5e 100644 --- a/compiler/GHC/Core/Opt/Exitify.hs +++ b/compiler/GHC/Core/Opt/Exitify.hs @@ -433,6 +433,7 @@ inlining. Exit join points, recognizable using `isExitJoinId` are join points with an occurrence in a recursive group, and can be recognized (after the occurrence analyzer ran!) using `isExitJoinId`. + This function detects joinpoints with `occ_in_lam (idOccinfo id) == True`, because the lambdas of a non-recursive join point are not considered for `occ_in_lam`. For example, in the following code, `j1` is /not/ marked @@ -446,6 +447,29 @@ To prevent inlining, we check for isExitJoinId * In `simplLetUnfolding` we simply give exit join points no unfolding, which prevents inlining in `postInlineUnconditionally` and call sites. +But see Note [Be selective about not-inlining exit join points] + +Note [Be selective about not-inlining exit join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we follow "do not inline exit join points" mantra throughout, +some bad things happen. + +* We can lose CPR information: see #21148 + +* We get useless clutter (#22084) that + - makes the program bigger (including duplicated code #20739), and + - adds extra jumps (and maybe stack saves) at runtime + +So instead we follow "do not inline exit join points" for a /single run/ +of the simplifier, right after Exitification. That should give a +sufficient chance for used-once things to inline, but subsequent runs +will inline them back in. (Annoyingly, as things stand, only with -O2 +is there a subsequent run, but that might change, and it's not a huge +deal anyway.) + +This is controlled by the Simplifier's sm_keep_exits flag; see +GHC.Core.Opt.Pipeline. + Note [Placement of the exitification pass] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ I (Joachim) experimented with multiple positions for the Exitification pass in diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs index 6ed1adf84a..214e7620c2 100644 --- a/compiler/GHC/Core/Opt/Pipeline.hs +++ b/compiler/GHC/Core/Opt/Pipeline.hs @@ -15,7 +15,7 @@ import GHC.Driver.Plugins ( withPlugins, installCoreToDos ) import GHC.Driver.Env import GHC.Driver.Config.Core.Lint ( endPass ) import GHC.Driver.Config.Core.Opt.LiberateCase ( initLiberateCaseOpts ) -import GHC.Driver.Config.Core.Opt.Simplify ( initSimplifyOpts, initSimplMode, initGentleSimplMode ) +import GHC.Driver.Config.Core.Opt.Simplify ( initSimplifyOpts, initSimplMode ) import GHC.Driver.Config.Core.Opt.WorkWrap ( initWorkWrapOpts ) import GHC.Driver.Config.Core.Rules ( initRuleOpts ) import GHC.Platform.Ways ( hasWay, Way(WayProf) ) @@ -28,6 +28,7 @@ import GHC.Core.Utils ( dumpIdInfoOfProgram ) import GHC.Core.Lint ( lintAnnots ) import GHC.Core.Lint.Interactive ( interactiveInScope ) import GHC.Core.Opt.Simplify ( simplifyExpr, simplifyPgm ) +import GHC.Core.Opt.Simplify.Env( SimplMode(..) ) import GHC.Core.Opt.Simplify.Monad import GHC.Core.Opt.Monad import GHC.Core.Opt.Pipeline.Types @@ -154,32 +155,45 @@ getCoreToDo dflags rule_base extra_vars maybe_strictness_before _ = CoreDoNothing - simpl_phase phase name iter - = CoreDoPasses - $ [ maybe_strictness_before phase - , CoreDoSimplify $ initSimplifyOpts dflags extra_vars iter - (initSimplMode dflags phase name) rule_base - , maybe_rule_check phase ] + ---------------------------- + base_simpl_mode :: SimplMode + base_simpl_mode = initSimplMode dflags - -- Run GHC's internal simplification phase, after all rules have run. - -- See Note [Compiler phases] in GHC.Types.Basic - simplify name = simpl_phase FinalPhase name max_iter - - -- initial simplify: mk specialiser happy: minimum effort please + -- gentle_mode: make specialiser happy: minimum effort please -- See Note [Inline in InitialPhase] -- See Note [RULEs enabled in InitialPhase] - simpl_gently = CoreDoSimplify $ initSimplifyOpts dflags extra_vars max_iter - (initGentleSimplMode dflags) rule_base + gentle_mode = base_simpl_mode { sm_names = ["Gentle"] + , sm_phase = InitialPhase + , sm_case_case = False } + + simpl_mode phase name + = base_simpl_mode { sm_names = [name], sm_phase = phase } + + keep_exits :: SimplMode -> SimplMode + -- See Note [Be selective about not-inlining exit join points] + -- in GHC.Core.Opt.Exitify + keep_exits mode = mode { sm_keep_exits = True } + + ---------------------------- + run_simplifier mode iter + = CoreDoSimplify $ initSimplifyOpts dflags extra_vars iter mode rule_base + simpl_phase phase name iter = CoreDoPasses $ + [ maybe_strictness_before phase + , run_simplifier (simpl_mode phase name) iter + , maybe_rule_check phase ] + + -- Run GHC's internal simplification phase, after all rules have run. + -- See Note [Compiler phases] in GHC.Types.Basic + simpl_gently = run_simplifier gentle_mode max_iter + simplify_final name = run_simplifier ( simpl_mode FinalPhase name) max_iter + simpl_keep_exits name = run_simplifier (keep_exits $ simpl_mode FinalPhase name) max_iter + + ---------------------------- dmd_cpr_ww = if ww_on then [CoreDoDemand True,CoreDoCpr,CoreDoWorkerWrapper] else [CoreDoDemand False] -- NB: No CPR! See Note [Don't change boxity without worker/wrapper] - demand_analyser = (CoreDoPasses ( - dmd_cpr_ww ++ - [simplify "post-worker-wrapper"] - )) - -- Static forms are moved to the top level with the FloatOut pass. -- See Note [Grand plan for static forms] in GHC.Iface.Tidy.StaticPtrTable. static_ptrs_float_outwards = @@ -269,14 +283,16 @@ getCoreToDo dflags rule_base extra_vars runWhen call_arity $ CoreDoPasses [ CoreDoCallArity - , simplify "post-call-arity" + , simplify_final "post-call-arity" ], -- Strictness analysis - runWhen strictness demand_analyser, + runWhen strictness $ CoreDoPasses + (dmd_cpr_ww ++ [simplify_final "post-worker-wrapper"]), runWhen exitification CoreDoExitify, -- See Note [Placement of the exitification pass] + -- in GHC.Core.Opt.Exitify runWhen full_laziness $ CoreDoFloatOutwards FloatOutSwitches { @@ -298,7 +314,17 @@ getCoreToDo dflags rule_base extra_vars runWhen do_float_in CoreDoFloatInwards, - simplify "final", -- Final tidy-up + -- Final tidy-up run of the simplifier + simpl_keep_exits "final tidy up", + -- Keep exit join point because this is the first + -- Simplifier run after Exitify. Subsequent runs will + -- re-inline those exit join points; their work is done. + -- See Note [Be selective about not-inlining exit join points] + -- in GHC.Core.Opt.Exitify + -- + -- Annoyingly, we only /have/ a subsequent run with -O2. With + -- plain -O we'll still have those exit join points hanging around. + -- Oh well. maybe_rule_check FinalPhase, @@ -308,31 +334,31 @@ getCoreToDo dflags rule_base extra_vars -- Case-liberation for -O2. This should be after -- strictness analysis and the simplification which follows it. runWhen liberate_case $ CoreDoPasses - [ CoreLiberateCase, simplify "post-liberate-case" ], + [ CoreLiberateCase, simplify_final "post-liberate-case" ], -- Run the simplifier after LiberateCase to vastly -- reduce the possibility of shadowing -- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr runWhen spec_constr $ CoreDoPasses - [ CoreDoSpecConstr, simplify "post-spec-constr"], + [ CoreDoSpecConstr, simplify_final "post-spec-constr"], -- See Note [Simplify after SpecConstr] maybe_rule_check FinalPhase, runWhen late_specialise $ CoreDoPasses - [ CoreDoSpecialising, simplify "post-late-spec"], + [ CoreDoSpecialising, simplify_final "post-late-spec"], -- LiberateCase can yield new CSE opportunities because it peels -- off one layer of a recursive function (concretely, I saw this -- in wheel-sieve1), and I'm guessing that SpecConstr can too -- And CSE is a very cheap pass. So it seems worth doing here. runWhen ((liberate_case || spec_constr) && cse) $ CoreDoPasses - [ CoreCSE, simplify "post-final-cse" ], + [ CoreCSE, simplify_final "post-final-cse" ], --------- End of -O2 passes -------------- runWhen late_dmd_anal $ CoreDoPasses ( - dmd_cpr_ww ++ [simplify "post-late-ww"] + dmd_cpr_ww ++ [simplify_final "post-late-ww"] ), -- Final run of the demand_analyser, ensures that one-shot thunks are diff --git a/compiler/GHC/Core/Opt/Simplify/Env.hs b/compiler/GHC/Core/Opt/Simplify/Env.hs index 6409a6d7eb..f56ebe4870 100644 --- a/compiler/GHC/Core/Opt/Simplify/Env.hs +++ b/compiler/GHC/Core/Opt/Simplify/Env.hs @@ -248,13 +248,16 @@ data SimplMode = SimplMode -- See comments in GHC.Core.Opt.Simplify.Monad , sm_uf_opts :: !UnfoldingOpts -- ^ Unfolding options , sm_case_case :: !Bool -- ^ Whether case-of-case is enabled , sm_pre_inline :: !Bool -- ^ Whether pre-inlining is enabled - , sm_float_enable :: !FloatEnable -- ^ Whether to enable floating out + , sm_keep_exits :: !Bool -- ^ True <=> keep ExitJoinIds + -- See Note [Do not inline exit join points] + -- in GHC.Core.Opt.Exitify + , sm_float_enable :: !FloatEnable -- ^ Whether to enable floating out , sm_do_eta_reduction :: !Bool - , sm_arity_opts :: !ArityOpts - , sm_rule_opts :: !RuleOpts - , sm_case_folding :: !Bool - , sm_case_merge :: !Bool - , sm_co_opt_opts :: !OptCoercionOpts -- ^ Coercion optimiser options + , sm_arity_opts :: !ArityOpts + , sm_rule_opts :: !RuleOpts + , sm_case_folding :: !Bool + , sm_case_merge :: !Bool + , sm_co_opt_opts :: !OptCoercionOpts -- ^ Coercion optimiser options } instance Outputable SimplMode where diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs index abd58fcb39..6f26d2527b 100644 --- a/compiler/GHC/Core/Opt/Simplify/Utils.hs +++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs @@ -1333,11 +1333,11 @@ preInlineUnconditionally -- Reason: we don't want to inline single uses, or discard dead bindings, -- for unlifted, side-effect-ful bindings preInlineUnconditionally env top_lvl bndr rhs rhs_env - | not pre_inline_unconditionally = Nothing + | not pre_inline = Nothing | not active = Nothing | isTopLevel top_lvl && isDeadEndId bndr = Nothing -- Note [Top-level bottoming Ids] | isCoVar bndr = Nothing -- Note [Do not inline CoVars unconditionally] - | isExitJoinId bndr = Nothing -- Note [Do not inline exit join points] + | keep_exits, isExitJoinId bndr = Nothing -- Note [Do not inline exit join points] -- in module Exitify | not (one_occ (idOccInfo bndr)) = Nothing | not (isStableUnfolding unf) = Just $! (extend_subst_with rhs) @@ -1347,19 +1347,36 @@ preInlineUnconditionally env top_lvl bndr rhs rhs_env , Just inl <- maybeUnfoldingTemplate unf = Just $! (extend_subst_with inl) | otherwise = Nothing where + mode = seMode env + phase = sm_phase mode + keep_exits = sm_keep_exits mode + pre_inline = sm_pre_inline mode + unf = idUnfolding bndr extend_subst_with inl_rhs = extendIdSubst env bndr $! (mkContEx rhs_env inl_rhs) one_occ IAmDead = True -- Happens in ((\x.1) v) + one_occ OneOcc{ occ_n_br = 1 , occ_in_lam = NotInsideLam } = isNotTopLevel top_lvl || early_phase + one_occ OneOcc{ occ_n_br = 1 , occ_in_lam = IsInsideLam , occ_int_cxt = IsInteresting } = canInlineInLam rhs - one_occ _ = False - pre_inline_unconditionally = sePreInline env - active = isActive (sePhase env) (inlinePragmaActivation inline_prag) + one_occ OneOcc{ occ_n_br = 1 } -- Inline join point that are used once, even inside + | isJoinId bndr = True -- lambdas (which are presumably other join points) + -- E.g. join j x = rhs in + -- joinrec k y = ....j x.... + -- Here j must be an exit for k, and we can safely inline it under the lambda + -- This includes the case where j is nullary: a nullary join point is just the + -- same as an arity-1 one. So we don't look at occ_int_cxt. + -- All of this only applies if keep_exits is False, otherwise the + -- earlier guard on preInlineUnconditionally would have fired + + one_occ _ = False + + active = isActive phase (inlinePragmaActivation inline_prag) -- See Note [pre/postInlineUnconditionally in gentle mode] inline_prag = idInlinePragma bndr @@ -1391,7 +1408,7 @@ preInlineUnconditionally env top_lvl bndr rhs rhs_env -- not ticks. Counting ticks cannot be duplicated, and non-counting -- ticks around a Lam will disappear anyway. - early_phase = sePhase env /= FinalPhase + early_phase = phase /= FinalPhase -- If we don't have this early_phase test, consider -- x = length [1,2,3] -- The full laziness pass carefully floats all the cons cells to diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs index 9119671f95..b8a77875a6 100644 --- a/compiler/GHC/Core/Opt/SpecConstr.hs +++ b/compiler/GHC/Core/Opt/SpecConstr.hs @@ -1512,8 +1512,10 @@ scExpr' env (Case scrut b ty alts) scrut_occ = case con of DataAlt dc -- See Note [Do not specialise evals] | not (single_alt && all deadArgOcc arg_occs) - -> ScrutOcc (unitUFM dc arg_occs) - _ -> UnkOcc + -> -- pprTrace "sc_alt1" (ppr b' $$ ppr con $$ ppr bs $$ ppr arg_occs) $ + ScrutOcc (unitUFM dc arg_occs) + _ -> -- pprTrace "sc_alt1" (ppr b' $$ ppr con $$ ppr bs $$ ppr arg_occs) $ + UnkOcc ; return (usg', b_occ `combineOcc` scrut_occ, Alt con bs2 rhs') } diff --git a/compiler/GHC/Driver/Config/Core/Opt/Simplify.hs b/compiler/GHC/Driver/Config/Core/Opt/Simplify.hs index b413f2d066..86284c8be9 100644 --- a/compiler/GHC/Driver/Config/Core/Opt/Simplify.hs +++ b/compiler/GHC/Driver/Config/Core/Opt/Simplify.hs @@ -2,7 +2,6 @@ module GHC.Driver.Config.Core.Opt.Simplify ( initSimplifyExprOpts , initSimplifyOpts , initSimplMode - , initGentleSimplMode ) where import GHC.Prelude @@ -27,12 +26,13 @@ import GHC.Types.Var ( Var ) initSimplifyExprOpts :: DynFlags -> InteractiveContext -> SimplifyExprOpts initSimplifyExprOpts dflags ic = SimplifyExprOpts { se_fam_inst = snd $ ic_instances ic - , se_mode = (initSimplMode dflags InitialPhase "GHCi") - { sm_inline = False - -- Do not do any inlining, in case we expose some - -- unboxed tuple stuff that confuses the bytecode + + , se_mode = (initSimplMode dflags) { sm_names = ["GHCi"] + , sm_inline = False } + -- sm_inline: do not do any inlining, in case we expose + -- some unboxed tuple stuff that confuses the bytecode -- interpreter - } + , se_top_env_cfg = TopEnvConfig { te_history_size = historySize dflags , te_tick_factor = simplTickFactor dflags @@ -57,31 +57,25 @@ initSimplifyOpts dflags extra_vars iterations mode rule_base = let } in opts -initSimplMode :: DynFlags -> CompilerPhase -> String -> SimplMode -initSimplMode dflags phase name = SimplMode - { sm_names = [name] - , sm_phase = phase - , sm_rules = gopt Opt_EnableRewriteRules dflags - , sm_eta_expand = gopt Opt_DoLambdaEtaExpansion dflags - , sm_cast_swizzle = True - , sm_inline = True - , sm_uf_opts = unfoldingOpts dflags - , sm_case_case = True - , sm_pre_inline = gopt Opt_SimplPreInlining dflags - , sm_float_enable = floatEnable dflags +initSimplMode :: DynFlags -> SimplMode +initSimplMode dflags = SimplMode + { sm_names = ["Unknown simplifier run"] -- Always overriden + , sm_phase = InitialPhase + , sm_rules = gopt Opt_EnableRewriteRules dflags + , sm_eta_expand = gopt Opt_DoLambdaEtaExpansion dflags + , sm_pre_inline = gopt Opt_SimplPreInlining dflags , sm_do_eta_reduction = gopt Opt_DoEtaReduction dflags - , sm_arity_opts = initArityOpts dflags - , sm_rule_opts = initRuleOpts dflags - , sm_case_folding = gopt Opt_CaseFolding dflags - , sm_case_merge = gopt Opt_CaseMerge dflags - , sm_co_opt_opts = initOptCoercionOpts dflags - } - -initGentleSimplMode :: DynFlags -> SimplMode -initGentleSimplMode dflags = (initSimplMode dflags InitialPhase "Gentle") - { -- Don't do case-of-case transformations. - -- This makes full laziness work better - sm_case_case = False + , sm_uf_opts = unfoldingOpts dflags + , sm_float_enable = floatEnable dflags + , sm_arity_opts = initArityOpts dflags + , sm_rule_opts = initRuleOpts dflags + , sm_case_folding = gopt Opt_CaseFolding dflags + , sm_case_merge = gopt Opt_CaseMerge dflags + , sm_co_opt_opts = initOptCoercionOpts dflags + , sm_cast_swizzle = True + , sm_inline = True + , sm_case_case = True + , sm_keep_exits = False } floatEnable :: DynFlags -> FloatEnable diff --git a/testsuite/tests/simplCore/should_compile/T21148.hs b/testsuite/tests/simplCore/should_compile/T21148.hs new file mode 100644 index 0000000000..72d3e14167 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T21148.hs @@ -0,0 +1,12 @@ +module T211148 where + +-- The point of this test is that f should get a (nested) +-- CPR property, with a worker of type +-- $wf :: Int# -> State# RealWorld -> (# State# RealWorld, Int# #) + +{-# NOINLINE f #-} +-- The NOINLINE makes GHC do a worker/wrapper split +-- even though f is small +f :: Int -> IO Int +f x = return $! sum [0..x] + diff --git a/testsuite/tests/simplCore/should_compile/T21148.stderr b/testsuite/tests/simplCore/should_compile/T21148.stderr new file mode 100644 index 0000000000..9197584912 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T21148.stderr @@ -0,0 +1,126 @@ + +==================== Tidy Core ==================== +Result size of Tidy Core + = {terms: 73, types: 80, coercions: 6, joins: 2/2} + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +T211148.$trModule4 :: GHC.Prim.Addr# +[GblId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 20 0}] +T211148.$trModule4 = "main"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +T211148.$trModule3 :: GHC.Types.TrName +[GblId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +T211148.$trModule3 = GHC.Types.TrNameS T211148.$trModule4 + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +T211148.$trModule2 :: GHC.Prim.Addr# +[GblId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 30 0}] +T211148.$trModule2 = "T211148"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +T211148.$trModule1 :: GHC.Types.TrName +[GblId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +T211148.$trModule1 = GHC.Types.TrNameS T211148.$trModule2 + +-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0} +T211148.$trModule :: GHC.Types.Module +[GblId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +T211148.$trModule + = GHC.Types.Module T211148.$trModule3 T211148.$trModule1 + +-- RHS size: {terms: 41, types: 35, coercions: 0, joins: 2/2} +T211148.$wf [InlPrag=NOINLINE] + :: GHC.Prim.Int# + -> GHC.Prim.State# GHC.Prim.RealWorld + -> (# GHC.Prim.State# GHC.Prim.RealWorld, GHC.Prim.Int# #) +[GblId, Arity=2, Str=<L><L>, Unf=OtherCon []] +T211148.$wf + = \ (ww_s179 :: GHC.Prim.Int#) + (eta_s17b [OS=OneShot] :: GHC.Prim.State# GHC.Prim.RealWorld) -> + case GHC.Prim.># 0# ww_s179 of { + __DEFAULT -> + join { + exit_X0 [Dmd=SC(S,C(1,!P(L,L)))] + :: GHC.Prim.Int# + -> GHC.Prim.Int# + -> (# GHC.Prim.State# GHC.Prim.RealWorld, GHC.Prim.Int# #) + [LclId[JoinId(2)(Nothing)], Arity=2, Str=<L><L>] + exit_X0 (x_s16Z [OS=OneShot] :: GHC.Prim.Int#) + (ww1_s172 [OS=OneShot] :: GHC.Prim.Int#) + = (# eta_s17b, GHC.Prim.+# ww1_s172 x_s16Z #) } in + joinrec { + $wgo3_s175 [InlPrag=[2], Occ=LoopBreaker, Dmd=SC(S,C(1,!P(L,L)))] + :: GHC.Prim.Int# + -> GHC.Prim.Int# + -> (# GHC.Prim.State# GHC.Prim.RealWorld, GHC.Prim.Int# #) + [LclId[JoinId(2)(Nothing)], Arity=2, Str=<L><L>, Unf=OtherCon []] + $wgo3_s175 (x_s16Z :: GHC.Prim.Int#) (ww1_s172 :: GHC.Prim.Int#) + = case GHC.Prim.==# x_s16Z ww_s179 of { + __DEFAULT -> + jump $wgo3_s175 + (GHC.Prim.+# x_s16Z 1#) (GHC.Prim.+# ww1_s172 x_s16Z); + 1# -> jump exit_X0 x_s16Z ww1_s172 + }; } in + jump $wgo3_s175 0# 0#; + 1# -> (# eta_s17b, 0# #) + } + +-- RHS size: {terms: 14, types: 19, coercions: 0, joins: 0/0} +T211148.f1 [InlPrag=NOINLINE[final]] + :: Int + -> GHC.Prim.State# GHC.Prim.RealWorld + -> (# GHC.Prim.State# GHC.Prim.RealWorld, Int #) +[GblId, + Arity=2, + Str=<1!P(L)><L>, + Cpr=1(, 1), + Unf=Unf{Src=StableSystem, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=ALWAYS_IF(arity=2,unsat_ok=True,boring_ok=False) + Tmpl= \ (x_s177 [Occ=Once1!] :: Int) + (eta_s17b [Occ=Once1, OS=OneShot] + :: GHC.Prim.State# GHC.Prim.RealWorld) -> + case x_s177 of { GHC.Types.I# ww_s179 [Occ=Once1] -> + case T211148.$wf ww_s179 eta_s17b of + { (# ww1_s17e [Occ=Once1], ww2_s17j [Occ=Once1] #) -> + (# ww1_s17e, GHC.Types.I# ww2_s17j #) + } + }}] +T211148.f1 + = \ (x_s177 :: Int) + (eta_s17b [OS=OneShot] :: GHC.Prim.State# GHC.Prim.RealWorld) -> + case x_s177 of { GHC.Types.I# ww_s179 -> + case T211148.$wf ww_s179 eta_s17b of { (# ww1_s17e, ww2_s17j #) -> + (# ww1_s17e, GHC.Types.I# ww2_s17j #) + } + } + +-- RHS size: {terms: 1, types: 0, coercions: 6, joins: 0/0} +f [InlPrag=NOINLINE[final]] :: Int -> IO Int +[GblId, + Arity=2, + Str=<1!P(L)><L>, + Cpr=1(, 1), + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=True)}] +f = T211148.f1 + `cast` (<Int>_R %<'Many>_N ->_R Sym (GHC.Types.N:IO[0] <Int>_R) + :: (Int + -> GHC.Prim.State# GHC.Prim.RealWorld + -> (# GHC.Prim.State# GHC.Prim.RealWorld, Int #)) + ~R# (Int -> IO Int)) + + + diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index 283c6cf1b0..fc708ef9f0 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -429,6 +429,7 @@ test('T21763a', only_ways(['optasm']), compile, ['-O2 -ddump-rules']) test('T22028', normal, compile, ['-O -ddump-rule-firings']) test('T22114', normal, compile, ['-O']) test('T21286', normal, multimod_compile, ['T21286', '-O -ddump-rule-firings']) +test('T21148', [grep_errmsg(r'Cpr=') ], compile, ['-O -ddump-simpl']) # One module, T21851.hs, has OPTIONS_GHC -ddump-simpl test('T21851', [grep_errmsg(r'case.*w\$sf') ], multimod_compile, ['T21851', '-O -dno-typeable-binds -dsuppress-uniques']) diff --git a/testsuite/tests/stranal/should_compile/T21128.hs b/testsuite/tests/stranal/should_compile/T21128.hs index 899adac49c..02991433f2 100644 --- a/testsuite/tests/stranal/should_compile/T21128.hs +++ b/testsuite/tests/stranal/should_compile/T21128.hs @@ -2,6 +2,10 @@ module T21128 where import T21128a +{- This test originally had some unnecessary reboxing of y +in the hot path of $wtheresCrud. That reboxing should +not happen. -} + theresCrud :: Int -> Int -> Int theresCrud x y = go x where @@ -9,3 +13,4 @@ theresCrud x y = go x go 1 = index x y 1 go n = go (n-1) {-# NOINLINE theresCrud #-} + diff --git a/testsuite/tests/stranal/should_compile/T21128.stderr b/testsuite/tests/stranal/should_compile/T21128.stderr index a64c1f1d5a..955717ef35 100644 --- a/testsuite/tests/stranal/should_compile/T21128.stderr +++ b/testsuite/tests/stranal/should_compile/T21128.stderr @@ -1,7 +1,7 @@ ==================== Tidy Core ==================== Result size of Tidy Core - = {terms: 137, types: 92, coercions: 4, joins: 0/0} + = {terms: 125, types: 68, coercions: 4, joins: 0/0} lvl = "error"# @@ -29,17 +29,11 @@ lvl9 = SrcLoc lvl2 lvl3 lvl5 lvl6 lvl7 lvl6 lvl8 lvl10 = PushCallStack lvl1 lvl9 EmptyCallStack -$windexError - = \ @a @b ww eta eta1 eta2 -> - error - (lvl10 `cast` <Co:4> :: CallStack ~R# (?callStack::CallStack)) - (++ (ww eta) (++ (ww eta1) (ww eta2))) - indexError = \ @a @b $dShow eta eta1 eta2 -> - case $dShow of { C:Show ww ww1 ww2 -> - $windexError ww1 eta eta1 eta2 - } + error + (lvl10 `cast` <Co:4> :: ...) + (++ (show $dShow eta) (++ (show $dShow eta1) (show $dShow eta2))) $trModule3 = TrNameS $trModule4 @@ -48,8 +42,7 @@ $trModule1 = TrNameS $trModule2 $trModule = Module $trModule3 $trModule1 $wlvl - = \ ww ww1 ww2 -> - $windexError $fShowInt_$cshow (I# ww2) (I# ww1) (I# ww) + = \ ww ww1 ww2 -> indexError $fShowInt (I# ww2) (I# ww1) (I# ww) index = \ l u i -> @@ -73,7 +66,7 @@ index ==================== Tidy Core ==================== Result size of Tidy Core - = {terms: 108, types: 47, coercions: 0, joins: 3/4} + = {terms: 108, types: 46, coercions: 0, joins: 3/3} $trModule4 = "main"# @@ -89,35 +82,34 @@ i = I# 1# l = I# 0# -lvl = \ y -> $windexError $fShowInt_$cshow l y l +lvl = \ x ww -> indexError $fShowInt x (I# ww) i -lvl1 = \ ww y -> $windexError $fShowInt_$cshow (I# ww) y i +lvl1 = \ ww -> indexError $fShowInt l (I# ww) l $wtheresCrud = \ ww ww1 -> - let { y = I# ww1 } in join { - lvl2 + exit + = case <# 0# ww1 of { + __DEFAULT -> case lvl1 ww1 of wild { }; + 1# -> 0# + } } in + join { + exit1 = case <=# ww 1# of { - __DEFAULT -> case lvl1 ww y of wild { }; + __DEFAULT -> case lvl (I# ww) ww1 of wild { }; 1# -> case <# 1# ww1 of { - __DEFAULT -> case lvl1 ww y of wild { }; + __DEFAULT -> case lvl (I# ww) ww1 of wild { }; 1# -> -# 1# ww } } } in - join { - lvl3 - = case <# 0# ww1 of { - __DEFAULT -> case lvl y of wild { }; - 1# -> 0# - } } in joinrec { $wgo ww2 = case ww2 of wild { __DEFAULT -> jump $wgo (-# wild 1#); - 0# -> jump lvl3; - 1# -> jump lvl2 + 0# -> jump exit; + 1# -> jump exit1 }; } in jump $wgo ww |