summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2022-08-25 15:54:51 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-10-11 12:49:21 -0400
commitcaced75765472a1a94453f2e5a439dba0d04a265 (patch)
treed83c65272aeb12d7f330bd9cdf77b0db1180a9ad
parent9789ea8e9f35d5c0674e10730c3435c4d3293f2b (diff)
downloadhaskell-caced75765472a1a94453f2e5a439dba0d04a265.tar.gz
Don't keep exit join points so much
We were religiously keeping exit join points throughout, which had some bad effects (#21148, #22084). This MR does two things: * Arranges that exit join points are inhibited from inlining only in /one/ Simplifier pass (right after Exitification). See Note [Be selective about not-inlining exit join points] in GHC.Core.Opt.Exitify It's not a big deal, but it shaves 0.1% off compile times. * Inline used-once non-recursive join points very aggressively Given join j x = rhs in joinrec k y = ....j x.... where this is the only occurrence of `j`, we want to inline `j`. (Unless sm_keep_exits is on.) See Note [Inline used-once non-recursive join points] in GHC.Core.Opt.Simplify.Utils This is just a tidy-up really. It doesn't change allocation, but getting rid of a binding is always good. Very effect on nofib -- some up and down.
-rw-r--r--compiler/GHC/Core/Opt/Exitify.hs24
-rw-r--r--compiler/GHC/Core/Opt/Pipeline.hs80
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Env.hs15
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Utils.hs29
-rw-r--r--compiler/GHC/Core/Opt/SpecConstr.hs6
-rw-r--r--compiler/GHC/Driver/Config/Core/Opt/Simplify.hs54
-rw-r--r--testsuite/tests/simplCore/should_compile/T21148.hs12
-rw-r--r--testsuite/tests/simplCore/should_compile/T21148.stderr126
-rw-r--r--testsuite/tests/simplCore/should_compile/all.T1
-rw-r--r--testsuite/tests/stranal/should_compile/T21128.hs5
-rw-r--r--testsuite/tests/stranal/should_compile/T21128.stderr46
11 files changed, 300 insertions, 98 deletions
diff --git a/compiler/GHC/Core/Opt/Exitify.hs b/compiler/GHC/Core/Opt/Exitify.hs
index 89156418bc..b8ba685a5e 100644
--- a/compiler/GHC/Core/Opt/Exitify.hs
+++ b/compiler/GHC/Core/Opt/Exitify.hs
@@ -433,6 +433,7 @@ inlining.
Exit join points, recognizable using `isExitJoinId` are join points with an
occurrence in a recursive group, and can be recognized (after the occurrence
analyzer ran!) using `isExitJoinId`.
+
This function detects joinpoints with `occ_in_lam (idOccinfo id) == True`,
because the lambdas of a non-recursive join point are not considered for
`occ_in_lam`. For example, in the following code, `j1` is /not/ marked
@@ -446,6 +447,29 @@ To prevent inlining, we check for isExitJoinId
* In `simplLetUnfolding` we simply give exit join points no unfolding, which
prevents inlining in `postInlineUnconditionally` and call sites.
+But see Note [Be selective about not-inlining exit join points]
+
+Note [Be selective about not-inlining exit join points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we follow "do not inline exit join points" mantra throughout,
+some bad things happen.
+
+* We can lose CPR information: see #21148
+
+* We get useless clutter (#22084) that
+ - makes the program bigger (including duplicated code #20739), and
+ - adds extra jumps (and maybe stack saves) at runtime
+
+So instead we follow "do not inline exit join points" for a /single run/
+of the simplifier, right after Exitification. That should give a
+sufficient chance for used-once things to inline, but subsequent runs
+will inline them back in. (Annoyingly, as things stand, only with -O2
+is there a subsequent run, but that might change, and it's not a huge
+deal anyway.)
+
+This is controlled by the Simplifier's sm_keep_exits flag; see
+GHC.Core.Opt.Pipeline.
+
Note [Placement of the exitification pass]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
I (Joachim) experimented with multiple positions for the Exitification pass in
diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs
index 6ed1adf84a..214e7620c2 100644
--- a/compiler/GHC/Core/Opt/Pipeline.hs
+++ b/compiler/GHC/Core/Opt/Pipeline.hs
@@ -15,7 +15,7 @@ import GHC.Driver.Plugins ( withPlugins, installCoreToDos )
import GHC.Driver.Env
import GHC.Driver.Config.Core.Lint ( endPass )
import GHC.Driver.Config.Core.Opt.LiberateCase ( initLiberateCaseOpts )
-import GHC.Driver.Config.Core.Opt.Simplify ( initSimplifyOpts, initSimplMode, initGentleSimplMode )
+import GHC.Driver.Config.Core.Opt.Simplify ( initSimplifyOpts, initSimplMode )
import GHC.Driver.Config.Core.Opt.WorkWrap ( initWorkWrapOpts )
import GHC.Driver.Config.Core.Rules ( initRuleOpts )
import GHC.Platform.Ways ( hasWay, Way(WayProf) )
@@ -28,6 +28,7 @@ import GHC.Core.Utils ( dumpIdInfoOfProgram )
import GHC.Core.Lint ( lintAnnots )
import GHC.Core.Lint.Interactive ( interactiveInScope )
import GHC.Core.Opt.Simplify ( simplifyExpr, simplifyPgm )
+import GHC.Core.Opt.Simplify.Env( SimplMode(..) )
import GHC.Core.Opt.Simplify.Monad
import GHC.Core.Opt.Monad
import GHC.Core.Opt.Pipeline.Types
@@ -154,32 +155,45 @@ getCoreToDo dflags rule_base extra_vars
maybe_strictness_before _
= CoreDoNothing
- simpl_phase phase name iter
- = CoreDoPasses
- $ [ maybe_strictness_before phase
- , CoreDoSimplify $ initSimplifyOpts dflags extra_vars iter
- (initSimplMode dflags phase name) rule_base
- , maybe_rule_check phase ]
+ ----------------------------
+ base_simpl_mode :: SimplMode
+ base_simpl_mode = initSimplMode dflags
- -- Run GHC's internal simplification phase, after all rules have run.
- -- See Note [Compiler phases] in GHC.Types.Basic
- simplify name = simpl_phase FinalPhase name max_iter
-
- -- initial simplify: mk specialiser happy: minimum effort please
+ -- gentle_mode: make specialiser happy: minimum effort please
-- See Note [Inline in InitialPhase]
-- See Note [RULEs enabled in InitialPhase]
- simpl_gently = CoreDoSimplify $ initSimplifyOpts dflags extra_vars max_iter
- (initGentleSimplMode dflags) rule_base
+ gentle_mode = base_simpl_mode { sm_names = ["Gentle"]
+ , sm_phase = InitialPhase
+ , sm_case_case = False }
+
+ simpl_mode phase name
+ = base_simpl_mode { sm_names = [name], sm_phase = phase }
+
+ keep_exits :: SimplMode -> SimplMode
+ -- See Note [Be selective about not-inlining exit join points]
+ -- in GHC.Core.Opt.Exitify
+ keep_exits mode = mode { sm_keep_exits = True }
+
+ ----------------------------
+ run_simplifier mode iter
+ = CoreDoSimplify $ initSimplifyOpts dflags extra_vars iter mode rule_base
+ simpl_phase phase name iter = CoreDoPasses $
+ [ maybe_strictness_before phase
+ , run_simplifier (simpl_mode phase name) iter
+ , maybe_rule_check phase ]
+
+ -- Run GHC's internal simplification phase, after all rules have run.
+ -- See Note [Compiler phases] in GHC.Types.Basic
+ simpl_gently = run_simplifier gentle_mode max_iter
+ simplify_final name = run_simplifier ( simpl_mode FinalPhase name) max_iter
+ simpl_keep_exits name = run_simplifier (keep_exits $ simpl_mode FinalPhase name) max_iter
+
+ ----------------------------
dmd_cpr_ww = if ww_on then [CoreDoDemand True,CoreDoCpr,CoreDoWorkerWrapper]
else [CoreDoDemand False] -- NB: No CPR! See Note [Don't change boxity without worker/wrapper]
- demand_analyser = (CoreDoPasses (
- dmd_cpr_ww ++
- [simplify "post-worker-wrapper"]
- ))
-
-- Static forms are moved to the top level with the FloatOut pass.
-- See Note [Grand plan for static forms] in GHC.Iface.Tidy.StaticPtrTable.
static_ptrs_float_outwards =
@@ -269,14 +283,16 @@ getCoreToDo dflags rule_base extra_vars
runWhen call_arity $ CoreDoPasses
[ CoreDoCallArity
- , simplify "post-call-arity"
+ , simplify_final "post-call-arity"
],
-- Strictness analysis
- runWhen strictness demand_analyser,
+ runWhen strictness $ CoreDoPasses
+ (dmd_cpr_ww ++ [simplify_final "post-worker-wrapper"]),
runWhen exitification CoreDoExitify,
-- See Note [Placement of the exitification pass]
+ -- in GHC.Core.Opt.Exitify
runWhen full_laziness $
CoreDoFloatOutwards FloatOutSwitches {
@@ -298,7 +314,17 @@ getCoreToDo dflags rule_base extra_vars
runWhen do_float_in CoreDoFloatInwards,
- simplify "final", -- Final tidy-up
+ -- Final tidy-up run of the simplifier
+ simpl_keep_exits "final tidy up",
+ -- Keep exit join point because this is the first
+ -- Simplifier run after Exitify. Subsequent runs will
+ -- re-inline those exit join points; their work is done.
+ -- See Note [Be selective about not-inlining exit join points]
+ -- in GHC.Core.Opt.Exitify
+ --
+ -- Annoyingly, we only /have/ a subsequent run with -O2. With
+ -- plain -O we'll still have those exit join points hanging around.
+ -- Oh well.
maybe_rule_check FinalPhase,
@@ -308,31 +334,31 @@ getCoreToDo dflags rule_base extra_vars
-- Case-liberation for -O2. This should be after
-- strictness analysis and the simplification which follows it.
runWhen liberate_case $ CoreDoPasses
- [ CoreLiberateCase, simplify "post-liberate-case" ],
+ [ CoreLiberateCase, simplify_final "post-liberate-case" ],
-- Run the simplifier after LiberateCase to vastly
-- reduce the possibility of shadowing
-- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr
runWhen spec_constr $ CoreDoPasses
- [ CoreDoSpecConstr, simplify "post-spec-constr"],
+ [ CoreDoSpecConstr, simplify_final "post-spec-constr"],
-- See Note [Simplify after SpecConstr]
maybe_rule_check FinalPhase,
runWhen late_specialise $ CoreDoPasses
- [ CoreDoSpecialising, simplify "post-late-spec"],
+ [ CoreDoSpecialising, simplify_final "post-late-spec"],
-- LiberateCase can yield new CSE opportunities because it peels
-- off one layer of a recursive function (concretely, I saw this
-- in wheel-sieve1), and I'm guessing that SpecConstr can too
-- And CSE is a very cheap pass. So it seems worth doing here.
runWhen ((liberate_case || spec_constr) && cse) $ CoreDoPasses
- [ CoreCSE, simplify "post-final-cse" ],
+ [ CoreCSE, simplify_final "post-final-cse" ],
--------- End of -O2 passes --------------
runWhen late_dmd_anal $ CoreDoPasses (
- dmd_cpr_ww ++ [simplify "post-late-ww"]
+ dmd_cpr_ww ++ [simplify_final "post-late-ww"]
),
-- Final run of the demand_analyser, ensures that one-shot thunks are
diff --git a/compiler/GHC/Core/Opt/Simplify/Env.hs b/compiler/GHC/Core/Opt/Simplify/Env.hs
index 6409a6d7eb..f56ebe4870 100644
--- a/compiler/GHC/Core/Opt/Simplify/Env.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Env.hs
@@ -248,13 +248,16 @@ data SimplMode = SimplMode -- See comments in GHC.Core.Opt.Simplify.Monad
, sm_uf_opts :: !UnfoldingOpts -- ^ Unfolding options
, sm_case_case :: !Bool -- ^ Whether case-of-case is enabled
, sm_pre_inline :: !Bool -- ^ Whether pre-inlining is enabled
- , sm_float_enable :: !FloatEnable -- ^ Whether to enable floating out
+ , sm_keep_exits :: !Bool -- ^ True <=> keep ExitJoinIds
+ -- See Note [Do not inline exit join points]
+ -- in GHC.Core.Opt.Exitify
+ , sm_float_enable :: !FloatEnable -- ^ Whether to enable floating out
, sm_do_eta_reduction :: !Bool
- , sm_arity_opts :: !ArityOpts
- , sm_rule_opts :: !RuleOpts
- , sm_case_folding :: !Bool
- , sm_case_merge :: !Bool
- , sm_co_opt_opts :: !OptCoercionOpts -- ^ Coercion optimiser options
+ , sm_arity_opts :: !ArityOpts
+ , sm_rule_opts :: !RuleOpts
+ , sm_case_folding :: !Bool
+ , sm_case_merge :: !Bool
+ , sm_co_opt_opts :: !OptCoercionOpts -- ^ Coercion optimiser options
}
instance Outputable SimplMode where
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs
index abd58fcb39..6f26d2527b 100644
--- a/compiler/GHC/Core/Opt/Simplify/Utils.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs
@@ -1333,11 +1333,11 @@ preInlineUnconditionally
-- Reason: we don't want to inline single uses, or discard dead bindings,
-- for unlifted, side-effect-ful bindings
preInlineUnconditionally env top_lvl bndr rhs rhs_env
- | not pre_inline_unconditionally = Nothing
+ | not pre_inline = Nothing
| not active = Nothing
| isTopLevel top_lvl && isDeadEndId bndr = Nothing -- Note [Top-level bottoming Ids]
| isCoVar bndr = Nothing -- Note [Do not inline CoVars unconditionally]
- | isExitJoinId bndr = Nothing -- Note [Do not inline exit join points]
+ | keep_exits, isExitJoinId bndr = Nothing -- Note [Do not inline exit join points]
-- in module Exitify
| not (one_occ (idOccInfo bndr)) = Nothing
| not (isStableUnfolding unf) = Just $! (extend_subst_with rhs)
@@ -1347,19 +1347,36 @@ preInlineUnconditionally env top_lvl bndr rhs rhs_env
, Just inl <- maybeUnfoldingTemplate unf = Just $! (extend_subst_with inl)
| otherwise = Nothing
where
+ mode = seMode env
+ phase = sm_phase mode
+ keep_exits = sm_keep_exits mode
+ pre_inline = sm_pre_inline mode
+
unf = idUnfolding bndr
extend_subst_with inl_rhs = extendIdSubst env bndr $! (mkContEx rhs_env inl_rhs)
one_occ IAmDead = True -- Happens in ((\x.1) v)
+
one_occ OneOcc{ occ_n_br = 1
, occ_in_lam = NotInsideLam } = isNotTopLevel top_lvl || early_phase
+
one_occ OneOcc{ occ_n_br = 1
, occ_in_lam = IsInsideLam
, occ_int_cxt = IsInteresting } = canInlineInLam rhs
- one_occ _ = False
- pre_inline_unconditionally = sePreInline env
- active = isActive (sePhase env) (inlinePragmaActivation inline_prag)
+ one_occ OneOcc{ occ_n_br = 1 } -- Inline join point that are used once, even inside
+ | isJoinId bndr = True -- lambdas (which are presumably other join points)
+ -- E.g. join j x = rhs in
+ -- joinrec k y = ....j x....
+ -- Here j must be an exit for k, and we can safely inline it under the lambda
+ -- This includes the case where j is nullary: a nullary join point is just the
+ -- same as an arity-1 one. So we don't look at occ_int_cxt.
+ -- All of this only applies if keep_exits is False, otherwise the
+ -- earlier guard on preInlineUnconditionally would have fired
+
+ one_occ _ = False
+
+ active = isActive phase (inlinePragmaActivation inline_prag)
-- See Note [pre/postInlineUnconditionally in gentle mode]
inline_prag = idInlinePragma bndr
@@ -1391,7 +1408,7 @@ preInlineUnconditionally env top_lvl bndr rhs rhs_env
-- not ticks. Counting ticks cannot be duplicated, and non-counting
-- ticks around a Lam will disappear anyway.
- early_phase = sePhase env /= FinalPhase
+ early_phase = phase /= FinalPhase
-- If we don't have this early_phase test, consider
-- x = length [1,2,3]
-- The full laziness pass carefully floats all the cons cells to
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs
index 9119671f95..b8a77875a6 100644
--- a/compiler/GHC/Core/Opt/SpecConstr.hs
+++ b/compiler/GHC/Core/Opt/SpecConstr.hs
@@ -1512,8 +1512,10 @@ scExpr' env (Case scrut b ty alts)
scrut_occ = case con of
DataAlt dc -- See Note [Do not specialise evals]
| not (single_alt && all deadArgOcc arg_occs)
- -> ScrutOcc (unitUFM dc arg_occs)
- _ -> UnkOcc
+ -> -- pprTrace "sc_alt1" (ppr b' $$ ppr con $$ ppr bs $$ ppr arg_occs) $
+ ScrutOcc (unitUFM dc arg_occs)
+ _ -> -- pprTrace "sc_alt1" (ppr b' $$ ppr con $$ ppr bs $$ ppr arg_occs) $
+ UnkOcc
; return (usg', b_occ `combineOcc` scrut_occ, Alt con bs2 rhs') }
diff --git a/compiler/GHC/Driver/Config/Core/Opt/Simplify.hs b/compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
index b413f2d066..86284c8be9 100644
--- a/compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
@@ -2,7 +2,6 @@ module GHC.Driver.Config.Core.Opt.Simplify
( initSimplifyExprOpts
, initSimplifyOpts
, initSimplMode
- , initGentleSimplMode
) where
import GHC.Prelude
@@ -27,12 +26,13 @@ import GHC.Types.Var ( Var )
initSimplifyExprOpts :: DynFlags -> InteractiveContext -> SimplifyExprOpts
initSimplifyExprOpts dflags ic = SimplifyExprOpts
{ se_fam_inst = snd $ ic_instances ic
- , se_mode = (initSimplMode dflags InitialPhase "GHCi")
- { sm_inline = False
- -- Do not do any inlining, in case we expose some
- -- unboxed tuple stuff that confuses the bytecode
+
+ , se_mode = (initSimplMode dflags) { sm_names = ["GHCi"]
+ , sm_inline = False }
+ -- sm_inline: do not do any inlining, in case we expose
+ -- some unboxed tuple stuff that confuses the bytecode
-- interpreter
- }
+
, se_top_env_cfg = TopEnvConfig
{ te_history_size = historySize dflags
, te_tick_factor = simplTickFactor dflags
@@ -57,31 +57,25 @@ initSimplifyOpts dflags extra_vars iterations mode rule_base = let
}
in opts
-initSimplMode :: DynFlags -> CompilerPhase -> String -> SimplMode
-initSimplMode dflags phase name = SimplMode
- { sm_names = [name]
- , sm_phase = phase
- , sm_rules = gopt Opt_EnableRewriteRules dflags
- , sm_eta_expand = gopt Opt_DoLambdaEtaExpansion dflags
- , sm_cast_swizzle = True
- , sm_inline = True
- , sm_uf_opts = unfoldingOpts dflags
- , sm_case_case = True
- , sm_pre_inline = gopt Opt_SimplPreInlining dflags
- , sm_float_enable = floatEnable dflags
+initSimplMode :: DynFlags -> SimplMode
+initSimplMode dflags = SimplMode
+ { sm_names = ["Unknown simplifier run"] -- Always overriden
+ , sm_phase = InitialPhase
+ , sm_rules = gopt Opt_EnableRewriteRules dflags
+ , sm_eta_expand = gopt Opt_DoLambdaEtaExpansion dflags
+ , sm_pre_inline = gopt Opt_SimplPreInlining dflags
, sm_do_eta_reduction = gopt Opt_DoEtaReduction dflags
- , sm_arity_opts = initArityOpts dflags
- , sm_rule_opts = initRuleOpts dflags
- , sm_case_folding = gopt Opt_CaseFolding dflags
- , sm_case_merge = gopt Opt_CaseMerge dflags
- , sm_co_opt_opts = initOptCoercionOpts dflags
- }
-
-initGentleSimplMode :: DynFlags -> SimplMode
-initGentleSimplMode dflags = (initSimplMode dflags InitialPhase "Gentle")
- { -- Don't do case-of-case transformations.
- -- This makes full laziness work better
- sm_case_case = False
+ , sm_uf_opts = unfoldingOpts dflags
+ , sm_float_enable = floatEnable dflags
+ , sm_arity_opts = initArityOpts dflags
+ , sm_rule_opts = initRuleOpts dflags
+ , sm_case_folding = gopt Opt_CaseFolding dflags
+ , sm_case_merge = gopt Opt_CaseMerge dflags
+ , sm_co_opt_opts = initOptCoercionOpts dflags
+ , sm_cast_swizzle = True
+ , sm_inline = True
+ , sm_case_case = True
+ , sm_keep_exits = False
}
floatEnable :: DynFlags -> FloatEnable
diff --git a/testsuite/tests/simplCore/should_compile/T21148.hs b/testsuite/tests/simplCore/should_compile/T21148.hs
new file mode 100644
index 0000000000..72d3e14167
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T21148.hs
@@ -0,0 +1,12 @@
+module T211148 where
+
+-- The point of this test is that f should get a (nested)
+-- CPR property, with a worker of type
+-- $wf :: Int# -> State# RealWorld -> (# State# RealWorld, Int# #)
+
+{-# NOINLINE f #-}
+-- The NOINLINE makes GHC do a worker/wrapper split
+-- even though f is small
+f :: Int -> IO Int
+f x = return $! sum [0..x]
+
diff --git a/testsuite/tests/simplCore/should_compile/T21148.stderr b/testsuite/tests/simplCore/should_compile/T21148.stderr
new file mode 100644
index 0000000000..9197584912
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T21148.stderr
@@ -0,0 +1,126 @@
+
+==================== Tidy Core ====================
+Result size of Tidy Core
+ = {terms: 73, types: 80, coercions: 6, joins: 2/2}
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+T211148.$trModule4 :: GHC.Prim.Addr#
+[GblId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 20 0}]
+T211148.$trModule4 = "main"#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+T211148.$trModule3 :: GHC.Types.TrName
+[GblId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
+T211148.$trModule3 = GHC.Types.TrNameS T211148.$trModule4
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+T211148.$trModule2 :: GHC.Prim.Addr#
+[GblId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 30 0}]
+T211148.$trModule2 = "T211148"#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+T211148.$trModule1 :: GHC.Types.TrName
+[GblId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
+T211148.$trModule1 = GHC.Types.TrNameS T211148.$trModule2
+
+-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
+T211148.$trModule :: GHC.Types.Module
+[GblId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
+T211148.$trModule
+ = GHC.Types.Module T211148.$trModule3 T211148.$trModule1
+
+-- RHS size: {terms: 41, types: 35, coercions: 0, joins: 2/2}
+T211148.$wf [InlPrag=NOINLINE]
+ :: GHC.Prim.Int#
+ -> GHC.Prim.State# GHC.Prim.RealWorld
+ -> (# GHC.Prim.State# GHC.Prim.RealWorld, GHC.Prim.Int# #)
+[GblId, Arity=2, Str=<L><L>, Unf=OtherCon []]
+T211148.$wf
+ = \ (ww_s179 :: GHC.Prim.Int#)
+ (eta_s17b [OS=OneShot] :: GHC.Prim.State# GHC.Prim.RealWorld) ->
+ case GHC.Prim.># 0# ww_s179 of {
+ __DEFAULT ->
+ join {
+ exit_X0 [Dmd=SC(S,C(1,!P(L,L)))]
+ :: GHC.Prim.Int#
+ -> GHC.Prim.Int#
+ -> (# GHC.Prim.State# GHC.Prim.RealWorld, GHC.Prim.Int# #)
+ [LclId[JoinId(2)(Nothing)], Arity=2, Str=<L><L>]
+ exit_X0 (x_s16Z [OS=OneShot] :: GHC.Prim.Int#)
+ (ww1_s172 [OS=OneShot] :: GHC.Prim.Int#)
+ = (# eta_s17b, GHC.Prim.+# ww1_s172 x_s16Z #) } in
+ joinrec {
+ $wgo3_s175 [InlPrag=[2], Occ=LoopBreaker, Dmd=SC(S,C(1,!P(L,L)))]
+ :: GHC.Prim.Int#
+ -> GHC.Prim.Int#
+ -> (# GHC.Prim.State# GHC.Prim.RealWorld, GHC.Prim.Int# #)
+ [LclId[JoinId(2)(Nothing)], Arity=2, Str=<L><L>, Unf=OtherCon []]
+ $wgo3_s175 (x_s16Z :: GHC.Prim.Int#) (ww1_s172 :: GHC.Prim.Int#)
+ = case GHC.Prim.==# x_s16Z ww_s179 of {
+ __DEFAULT ->
+ jump $wgo3_s175
+ (GHC.Prim.+# x_s16Z 1#) (GHC.Prim.+# ww1_s172 x_s16Z);
+ 1# -> jump exit_X0 x_s16Z ww1_s172
+ }; } in
+ jump $wgo3_s175 0# 0#;
+ 1# -> (# eta_s17b, 0# #)
+ }
+
+-- RHS size: {terms: 14, types: 19, coercions: 0, joins: 0/0}
+T211148.f1 [InlPrag=NOINLINE[final]]
+ :: Int
+ -> GHC.Prim.State# GHC.Prim.RealWorld
+ -> (# GHC.Prim.State# GHC.Prim.RealWorld, Int #)
+[GblId,
+ Arity=2,
+ Str=<1!P(L)><L>,
+ Cpr=1(, 1),
+ Unf=Unf{Src=StableSystem, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True,
+ Guidance=ALWAYS_IF(arity=2,unsat_ok=True,boring_ok=False)
+ Tmpl= \ (x_s177 [Occ=Once1!] :: Int)
+ (eta_s17b [Occ=Once1, OS=OneShot]
+ :: GHC.Prim.State# GHC.Prim.RealWorld) ->
+ case x_s177 of { GHC.Types.I# ww_s179 [Occ=Once1] ->
+ case T211148.$wf ww_s179 eta_s17b of
+ { (# ww1_s17e [Occ=Once1], ww2_s17j [Occ=Once1] #) ->
+ (# ww1_s17e, GHC.Types.I# ww2_s17j #)
+ }
+ }}]
+T211148.f1
+ = \ (x_s177 :: Int)
+ (eta_s17b [OS=OneShot] :: GHC.Prim.State# GHC.Prim.RealWorld) ->
+ case x_s177 of { GHC.Types.I# ww_s179 ->
+ case T211148.$wf ww_s179 eta_s17b of { (# ww1_s17e, ww2_s17j #) ->
+ (# ww1_s17e, GHC.Types.I# ww2_s17j #)
+ }
+ }
+
+-- RHS size: {terms: 1, types: 0, coercions: 6, joins: 0/0}
+f [InlPrag=NOINLINE[final]] :: Int -> IO Int
+[GblId,
+ Arity=2,
+ Str=<1!P(L)><L>,
+ Cpr=1(, 1),
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True,
+ Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=True)}]
+f = T211148.f1
+ `cast` (<Int>_R %<'Many>_N ->_R Sym (GHC.Types.N:IO[0] <Int>_R)
+ :: (Int
+ -> GHC.Prim.State# GHC.Prim.RealWorld
+ -> (# GHC.Prim.State# GHC.Prim.RealWorld, Int #))
+ ~R# (Int -> IO Int))
+
+
+
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index 283c6cf1b0..fc708ef9f0 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -429,6 +429,7 @@ test('T21763a', only_ways(['optasm']), compile, ['-O2 -ddump-rules'])
test('T22028', normal, compile, ['-O -ddump-rule-firings'])
test('T22114', normal, compile, ['-O'])
test('T21286', normal, multimod_compile, ['T21286', '-O -ddump-rule-firings'])
+test('T21148', [grep_errmsg(r'Cpr=') ], compile, ['-O -ddump-simpl'])
# One module, T21851.hs, has OPTIONS_GHC -ddump-simpl
test('T21851', [grep_errmsg(r'case.*w\$sf') ], multimod_compile, ['T21851', '-O -dno-typeable-binds -dsuppress-uniques'])
diff --git a/testsuite/tests/stranal/should_compile/T21128.hs b/testsuite/tests/stranal/should_compile/T21128.hs
index 899adac49c..02991433f2 100644
--- a/testsuite/tests/stranal/should_compile/T21128.hs
+++ b/testsuite/tests/stranal/should_compile/T21128.hs
@@ -2,6 +2,10 @@ module T21128 where
import T21128a
+{- This test originally had some unnecessary reboxing of y
+in the hot path of $wtheresCrud. That reboxing should
+not happen. -}
+
theresCrud :: Int -> Int -> Int
theresCrud x y = go x
where
@@ -9,3 +13,4 @@ theresCrud x y = go x
go 1 = index x y 1
go n = go (n-1)
{-# NOINLINE theresCrud #-}
+
diff --git a/testsuite/tests/stranal/should_compile/T21128.stderr b/testsuite/tests/stranal/should_compile/T21128.stderr
index a64c1f1d5a..955717ef35 100644
--- a/testsuite/tests/stranal/should_compile/T21128.stderr
+++ b/testsuite/tests/stranal/should_compile/T21128.stderr
@@ -1,7 +1,7 @@
==================== Tidy Core ====================
Result size of Tidy Core
- = {terms: 137, types: 92, coercions: 4, joins: 0/0}
+ = {terms: 125, types: 68, coercions: 4, joins: 0/0}
lvl = "error"#
@@ -29,17 +29,11 @@ lvl9 = SrcLoc lvl2 lvl3 lvl5 lvl6 lvl7 lvl6 lvl8
lvl10 = PushCallStack lvl1 lvl9 EmptyCallStack
-$windexError
- = \ @a @b ww eta eta1 eta2 ->
- error
- (lvl10 `cast` <Co:4> :: CallStack ~R# (?callStack::CallStack))
- (++ (ww eta) (++ (ww eta1) (ww eta2)))
-
indexError
= \ @a @b $dShow eta eta1 eta2 ->
- case $dShow of { C:Show ww ww1 ww2 ->
- $windexError ww1 eta eta1 eta2
- }
+ error
+ (lvl10 `cast` <Co:4> :: ...)
+ (++ (show $dShow eta) (++ (show $dShow eta1) (show $dShow eta2)))
$trModule3 = TrNameS $trModule4
@@ -48,8 +42,7 @@ $trModule1 = TrNameS $trModule2
$trModule = Module $trModule3 $trModule1
$wlvl
- = \ ww ww1 ww2 ->
- $windexError $fShowInt_$cshow (I# ww2) (I# ww1) (I# ww)
+ = \ ww ww1 ww2 -> indexError $fShowInt (I# ww2) (I# ww1) (I# ww)
index
= \ l u i ->
@@ -73,7 +66,7 @@ index
==================== Tidy Core ====================
Result size of Tidy Core
- = {terms: 108, types: 47, coercions: 0, joins: 3/4}
+ = {terms: 108, types: 46, coercions: 0, joins: 3/3}
$trModule4 = "main"#
@@ -89,35 +82,34 @@ i = I# 1#
l = I# 0#
-lvl = \ y -> $windexError $fShowInt_$cshow l y l
+lvl = \ x ww -> indexError $fShowInt x (I# ww) i
-lvl1 = \ ww y -> $windexError $fShowInt_$cshow (I# ww) y i
+lvl1 = \ ww -> indexError $fShowInt l (I# ww) l
$wtheresCrud
= \ ww ww1 ->
- let { y = I# ww1 } in
join {
- lvl2
+ exit
+ = case <# 0# ww1 of {
+ __DEFAULT -> case lvl1 ww1 of wild { };
+ 1# -> 0#
+ } } in
+ join {
+ exit1
= case <=# ww 1# of {
- __DEFAULT -> case lvl1 ww y of wild { };
+ __DEFAULT -> case lvl (I# ww) ww1 of wild { };
1# ->
case <# 1# ww1 of {
- __DEFAULT -> case lvl1 ww y of wild { };
+ __DEFAULT -> case lvl (I# ww) ww1 of wild { };
1# -> -# 1# ww
}
} } in
- join {
- lvl3
- = case <# 0# ww1 of {
- __DEFAULT -> case lvl y of wild { };
- 1# -> 0#
- } } in
joinrec {
$wgo ww2
= case ww2 of wild {
__DEFAULT -> jump $wgo (-# wild 1#);
- 0# -> jump lvl3;
- 1# -> jump lvl2
+ 0# -> jump exit;
+ 1# -> jump exit1
}; } in
jump $wgo ww