From 96d32ff289f87b8c78f0a8d1b11295c9563ec020 Mon Sep 17 00:00:00 2001 From: Simon Peyton Jones Date: Mon, 26 Sep 2022 12:47:35 +0100 Subject: Make rewrite rules "win" over inlining If a rewrite rule and a rewrite rule compete in the simplifier, this patch makes sure that the rewrite rule "win". That is, in general a bit fragile, but it's a huge help when making specialisation work reliably, as #21851 and #22097 showed. The change is fairly straightforwad, and documented in Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration. Compile-times change, up and down a bit -- in some cases because we get better specialisation. But the payoff (more reliable specialisation) is large. Metrics: compile_time/bytes allocated ----------------------------------------------- T10421(normal) +3.7% BAD T10421a(normal) +5.5% T13253(normal) +1.3% T14052(ghci) +1.8% T15304(normal) -1.4% T16577(normal) +3.1% BAD T17516(normal) +2.3% T17836(normal) -1.9% T18223(normal) -1.8% T8095(normal) -1.3% T9961(normal) +2.5% BAD geo. mean +0.0% minimum -1.9% maximum +5.5% Nofib results are (bytes allocated) +-------------------------------++----------+ | ||tsv (rel) | +===============================++==========+ | imaginary/paraffins || +0.27% | | imaginary/rfib || -0.04% | | real/anna || +0.02% | | real/fem || -0.04% | | real/fluid || +1.68% | | real/gamteb || -0.34% | | real/gg || +1.54% | | real/hidden || -0.01% | | real/hpg || -0.03% | | real/infer || -0.03% | | real/prolog || +0.02% | | real/veritas || -0.47% | | shootout/fannkuch-redux || -0.03% | | shootout/k-nucleotide || -0.02% | | shootout/n-body || -0.06% | | shootout/spectral-norm || -0.01% | | spectral/cryptarithm2 || +1.25% | | spectral/fibheaps || +18.33% | | spectral/last-piece || -0.34% | +===============================++==========+ | geom mean || +0.17% | There are extensive notes in !8897 about the regressions. Briefly * fibheaps: there was a very delicately balanced inlining that tipped over the wrong way after this change. * cryptarithm2 and paraffins are caused by #22274, which is a separate issue really. (I.e. the right fix is *not* to make inlining "win" over rules.) So I'm accepting these changes Metric Increase: T10421 T16577 T9961 --- compiler/GHC/Core/Opt/Simplify/Iteration.hs | 216 +++++++++++++-------- compiler/GHC/Core/Opt/Simplify/Utils.hs | 157 ++++++++------- compiler/GHC/Utils/Monad.hs | 5 +- testsuite/tests/lib/integer/Makefile | 6 +- testsuite/tests/simplCore/should_compile/T21851.hs | 15 ++ .../tests/simplCore/should_compile/T21851.stderr | 19 ++ .../tests/simplCore/should_compile/T21851a.hs | 5 + testsuite/tests/simplCore/should_compile/T22097.hs | 7 + .../tests/simplCore/should_compile/T22097.stderr | 46 +++++ .../tests/simplCore/should_compile/T22097a.hs | 23 +++ .../tests/simplCore/should_compile/T6056.stderr | 2 +- testsuite/tests/simplCore/should_compile/all.T | 4 + 12 files changed, 344 insertions(+), 161 deletions(-) create mode 100644 testsuite/tests/simplCore/should_compile/T21851.hs create mode 100644 testsuite/tests/simplCore/should_compile/T21851.stderr create mode 100644 testsuite/tests/simplCore/should_compile/T21851a.hs create mode 100644 testsuite/tests/simplCore/should_compile/T22097.hs create mode 100644 testsuite/tests/simplCore/should_compile/T22097.stderr create mode 100644 testsuite/tests/simplCore/should_compile/T22097a.hs diff --git a/compiler/GHC/Core/Opt/Simplify/Iteration.hs b/compiler/GHC/Core/Opt/Simplify/Iteration.hs index d2bdace3e2..f3fb5c2f0b 100644 --- a/compiler/GHC/Core/Opt/Simplify/Iteration.hs +++ b/compiler/GHC/Core/Opt/Simplify/Iteration.hs @@ -1919,7 +1919,9 @@ wrapJoinCont env cont thing_inside -------------------- -trimJoinCont :: Id -> Maybe JoinArity -> SimplCont -> SimplCont +trimJoinCont :: Id -- Used only in error message + -> Maybe JoinArity + -> SimplCont -> SimplCont -- Drop outer context from join point invocation (jump) -- See Note [Join points and case-of-case] @@ -2017,6 +2019,17 @@ outside. Surprisingly tricky! Variables * * ************************************************************************ + +Note [zapSubstEnv] +~~~~~~~~~~~~~~~~~~ +When simplifying something that has already been simplified, be sure to +zap the SubstEnv. This is VITAL. Consider + let x = e in + let y = \z -> ...x... in + \ x -> ...y... + +We'll clone the inner \x, adding x->x' in the id_subst Then when we +inline y, we must *not* replace x by x' in the inlined copy!! -} simplVar :: SimplEnv -> InVar -> SimplM OutExpr @@ -2035,86 +2048,28 @@ simplVar env var simplIdF :: SimplEnv -> InId -> SimplCont -> SimplM (SimplFloats, OutExpr) simplIdF env var cont = case substId env var of - ContEx tvs cvs ids e -> - let env' = setSubstEnv env tvs cvs ids - in simplExprF env' e cont - -- Don't trim; haven't already simplified e, - -- so the cont is not embodied in e - - DoneId var1 -> do - logger <- getLogger - let cont' = trimJoinCont var (isJoinId_maybe var1) cont - completeCall logger env var1 cont' - - DoneEx e mb_join -> - let env' = zapSubstEnv env - cont' = trimJoinCont var mb_join cont - in simplExprF env' e cont' - -- Note [zapSubstEnv] - -- ~~~~~~~~~~~~~~~~~~ - -- The template is already simplified, so don't re-substitute. - -- This is VITAL. Consider - -- let x = e in - -- let y = \z -> ...x... in - -- \ x -> ...y... - -- We'll clone the inner \x, adding x->x' in the id_subst - -- Then when we inline y, we must *not* replace x by x' in - -- the inlined copy!! - ---------------------------------------------------------- --- Dealing with a call site - -completeCall :: Logger -> SimplEnv -> OutId -> SimplCont -> SimplM (SimplFloats, OutExpr) -completeCall logger env var cont - | Just expr <- callSiteInline logger uf_opts case_depth var active_unf - lone_variable arg_infos interesting_cont - -- Inline the variable's RHS - = do { checkedTick (UnfoldingDone var) - ; dump_inline expr cont - ; let env1 = zapSubstEnv env - ; simplExprF env1 expr cont } - - | otherwise - -- Don't inline; instead rebuild the call - = do { rule_base <- getSimplRules - ; let rules = getRules rule_base var - info = mkArgInfo env var rules - n_val_args call_cont - ; rebuildCall env info cont } + ContEx tvs cvs ids e -> simplExprF env' e cont + -- Don't trimJoinCont; haven't already simplified e, + -- so the cont is not embodied in e + where + env' = setSubstEnv env tvs cvs ids - where - uf_opts = seUnfoldingOpts env - case_depth = seCaseDepth env - (lone_variable, arg_infos, call_cont) = contArgs cont - n_val_args = length arg_infos - interesting_cont = interestingCallContext env call_cont - active_unf = activeUnfolding (seMode env) var + DoneId var1 -> + do { rule_base <- getSimplRules + ; let cont' = trimJoinCont var1 (isJoinId_maybe var1) cont + info = mkArgInfo env rule_base var1 cont' + ; rebuildCall env info cont' } - log_inlining doc - = liftIO $ logDumpFile logger (mkDumpStyle alwaysQualify) - Opt_D_dump_inlinings - "" FormatText doc + DoneEx e mb_join -> simplExprF env' e cont' + where + cont' = trimJoinCont var mb_join cont + env' = zapSubstEnv env -- See Note [zapSubstEnv] - dump_inline unfolding cont - | not (logHasDumpFlag logger Opt_D_dump_inlinings) = return () - | not (logHasDumpFlag logger Opt_D_verbose_core2core) - = when (isExternalName (idName var)) $ - log_inlining $ - sep [text "Inlining done:", nest 4 (ppr var)] - | otherwise - = log_inlining $ - sep [text "Inlining done: " <> ppr var, - nest 4 (vcat [text "Inlined fn: " <+> nest 2 (ppr unfolding), - text "Cont: " <+> ppr cont])] +--------------------------------------------------------- +-- Dealing with a call site -rebuildCall :: SimplEnv - -> ArgInfo - -> SimplCont +rebuildCall :: SimplEnv -> ArgInfo -> SimplCont -> SimplM (SimplFloats, OutExpr) --- We decided not to inline, so --- - simplify the arguments --- - try rewrite rules --- - and rebuild ---------- Bottoming applications -------------- rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args, ai_dmds = [] }) cont @@ -2137,27 +2092,48 @@ rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args, ai_dmds = [] }) con res = argInfoExpr fun rev_args cont_ty = contResultType cont ----------- Try rewrite RULES -------------- --- See Note [Trying rewrite rules] +---------- Try inlining, if ai_rewrite = TryInlining -------- +-- In the TryInlining case we try inlining immediately, before simplifying +-- any (more) arguments. Why? See Note [Rewrite rules and inlining]. +-- +-- If there are rewrite rules we'll skip this case until we have +-- simplified enough args to satisfy nr_wanted==0 in the TryRules case below +-- Then we'll try the rules, and if that fails, we'll do TryInlining +rebuildCall env info@(ArgInfo { ai_fun = fun, ai_args = rev_args + , ai_rewrite = TryInlining }) cont + = do { logger <- getLogger + ; let full_cont = pushSimplifiedRevArgs env rev_args cont + ; mb_inline <- tryInlining env logger fun full_cont + ; case mb_inline of + Just expr -> do { checkedTick (UnfoldingDone fun) + ; let env1 = zapSubstEnv env + ; simplExprF env1 expr full_cont } + Nothing -> rebuildCall env (info { ai_rewrite = TryNothing }) cont + } + +---------- Try rewrite RULES, if ai_rewrite = TryRules -------------- +-- See Note [Rewrite rules and inlining] +-- See also Note [Trying rewrite rules] rebuildCall env info@(ArgInfo { ai_fun = fun, ai_args = rev_args - , ai_rules = Just (nr_wanted, rules) }) cont + , ai_rewrite = TryRules nr_wanted rules }) cont | nr_wanted == 0 || no_more_args - , let info' = info { ai_rules = Nothing } = -- We've accumulated a simplified call in -- so try rewrite rules; see Note [RULES apply to simplified arguments] -- See also Note [Rules for recursive functions] do { mb_match <- tryRules env rules fun (reverse rev_args) cont ; case mb_match of Just (env', rhs, cont') -> simplExprF env' rhs cont' - Nothing -> rebuildCall env info' cont } + Nothing -> rebuildCall env (info { ai_rewrite = TryInlining }) cont } where + -- If we have run out of arguments, just try the rules; there might + -- be some with lower arity. Casts get in the way -- they aren't + -- allowed on rule LHSs no_more_args = case cont of ApplyToTy {} -> False ApplyToVal {} -> False _ -> True - ----------- Simplify applications and casts -------------- +---------- Simplify type applications and casts -------------- rebuildCall env info (CastIt co cont) = rebuildCall env (addCastTo info co) cont @@ -2202,6 +2178,7 @@ rebuildCall env (ArgInfo { ai_fun = fun_id, ai_args = rev_args }) call' = mkApps (Var fun_id) [mkTyArg rr', mkTyArg ty', arg'] ; return (emptyFloats env, call') } +---------- Simplify value arguments -------------------- rebuildCall env fun_info (ApplyToVal { sc_arg = arg, sc_env = arg_se , sc_dup = dup_flag, sc_hole_ty = fun_ty @@ -2237,6 +2214,42 @@ rebuildCall env fun_info rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args }) cont = rebuild env (argInfoExpr fun rev_args) cont +----------------------------------- +tryInlining :: SimplEnv -> Logger -> OutId -> SimplCont -> SimplM (Maybe OutExpr) +tryInlining env logger var cont + | Just expr <- callSiteInline logger uf_opts case_depth var active_unf + lone_variable arg_infos interesting_cont + = do { dump_inline expr cont + ; return (Just expr) } + + | otherwise + = return Nothing + + where + uf_opts = seUnfoldingOpts env + case_depth = seCaseDepth env + (lone_variable, arg_infos, call_cont) = contArgs cont + interesting_cont = interestingCallContext env call_cont + active_unf = activeUnfolding (seMode env) var + + log_inlining doc + = liftIO $ logDumpFile logger (mkDumpStyle alwaysQualify) + Opt_D_dump_inlinings + "" FormatText doc + + dump_inline unfolding cont + | not (logHasDumpFlag logger Opt_D_dump_inlinings) = return () + | not (logHasDumpFlag logger Opt_D_verbose_core2core) + = when (isExternalName (idName var)) $ + log_inlining $ + sep [text "Inlining done:", nest 4 (ppr var)] + | otherwise + = log_inlining $ + sep [text "Inlining done: " <> ppr var, + nest 4 (vcat [text "Inlined fn: " <+> nest 2 (ppr unfolding), + text "Cont: " <+> ppr cont])] + + {- Note [Trying rewrite rules] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Consider an application (f e1 e2 e3) where the e1,e2,e3 are not yet @@ -2272,6 +2285,38 @@ makes a particularly big difference when superclass selectors are involved: op ($p1 ($p2 (df d))) We want all this to unravel in one sweep. +Note [Rewrite rules and inlining] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In general we try to arrange that inlining is disabled (via a pragma) if +a rewrite rule should apply, so that the rule has a decent chance to fire +before we inline the function. + +But it turns out that (especially when type-class specialisation or +SpecConstr is involved) it is very helpful for the the rewrite rule to +"win" over inlining when both are active at once: see #21851, #22097. + +The simplifier arranges to do this, as follows. In effect, the ai_rewrite +field of the ArgInfo record is the state of a little state-machine: + +* mkArgInfo sets the ai_rewrite field to TryRules if there are any rewrite + rules avaialable for that function. + +* rebuildCall simplifies arguments until enough are simplified to match the + rule with greatest arity. See Note [RULES apply to simplified arguments] + and the first field of `TryRules`. + + But no more! As soon as we have simplified enough arguments to satisfy the + maximum-arity rules, we try the rules; see Note [Trying rewrite rules]. + +* Once we have tried rules (or immediately if there are no rules) set + ai_rewrite to TryInlining, and the Simplifier will try to inline the + function. We want to try this immediately (before simplifying any (more) + arguments). Why? Consider + f BIG where f = \x{OneOcc}. ...x... + If we inline `f` before simplifying `BIG` well use preInlineUnconditionally, + and we'll simplify BIG once, at x's occurrence, rather than twice. + + Note [Avoid redundant simplification] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because RULES apply to simplified arguments, there's a danger of repeatedly @@ -2327,7 +2372,8 @@ See Note [No free join points in arityType] in GHC.Core.Opt.Arity -} tryRules :: SimplEnv -> [CoreRule] - -> Id -> [ArgSpec] + -> Id + -> [ArgSpec] -- In /normal, forward/ order -> SimplCont -> SimplM (Maybe (SimplEnv, CoreExpr, SimplCont)) @@ -3668,7 +3714,7 @@ mkDupableStrictBind env arg_bndr join_rhs res_ty | otherwise = do { join_bndr <- newJoinId [arg_bndr] res_ty ; let arg_info = ArgInfo { ai_fun = join_bndr - , ai_rules = Nothing, ai_args = [] + , ai_rewrite = TryNothing, ai_args = [] , ai_encl = False, ai_dmds = repeat topDmd , ai_discs = repeat 0 } ; return ( addJoinFloats (emptyFloats env) $ diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs index 2a3a272f50..abd58fcb39 100644 --- a/compiler/GHC/Core/Opt/Simplify/Utils.hs +++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs @@ -30,9 +30,10 @@ module GHC.Core.Opt.Simplify.Utils ( interestingCallContext, -- ArgInfo - ArgInfo(..), ArgSpec(..), mkArgInfo, + ArgInfo(..), ArgSpec(..), RewriteCall(..), mkArgInfo, addValArgTo, addCastTo, addTyArgTo, - argInfoExpr, argInfoAppArgs, pushSimplifiedArgs, + argInfoExpr, argInfoAppArgs, + pushSimplifiedArgs, pushSimplifiedRevArgs, isStrictArgInfo, lazyArgContext, abstractFloats, @@ -52,6 +53,7 @@ import GHC.Core.Ppr import GHC.Core.TyCo.Ppr ( pprParendType ) import GHC.Core.FVs import GHC.Core.Utils +import GHC.Core.Rules( getRules ) import GHC.Core.Opt.Arity import GHC.Core.Unfold import GHC.Core.Unfold.Make @@ -210,6 +212,7 @@ data SimplCont type StaticEnv = SimplEnv -- Just the static part is relevant +-- See Note [DupFlag invariants] data DupFlag = NoDup -- Unsimplified, might be big | Simplified -- Simplified | OkToDup -- Simplified and small @@ -226,8 +229,9 @@ perhapsSubstTy dup env ty {- Note [StaticEnv invariant] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We pair up an InExpr or InAlts with a StaticEnv, which establishes the -lexical scope for that InExpr. When we simplify that InExpr/InAlts, we -use +lexical scope for that InExpr. + +When we simplify that InExpr/InAlts, we use - Its captured StaticEnv - Overriding its InScopeSet with the larger one at the simplification point. @@ -244,13 +248,14 @@ isn't big enough. Note [DupFlag invariants] ~~~~~~~~~~~~~~~~~~~~~~~~~ -In both (ApplyToVal dup _ env k) - and (Select dup _ _ env k) +In both ApplyToVal { se_dup = dup, se_env = env, se_cont = k} + and Select { se_dup = dup, se_env = env, se_cont = k} the following invariants hold (a) if dup = OkToDup, then continuation k is also ok-to-dup - (b) if dup = OkToDup or Simplified, the subst-env is empty - (and hence no need to re-simplify) + (b) if dup = OkToDup or Simplified, the subst-env is empty, + or at least is always ignored; the payload is + already an OutThing -} instance Outputable DupFlag where @@ -309,7 +314,8 @@ data ArgInfo ai_fun :: OutId, -- The function ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order) - ai_rules :: FunRules, -- Rules for this function + ai_rewrite :: RewriteCall, -- What transformation to try next for this call + -- See Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration ai_encl :: Bool, -- Flag saying whether this function -- or an enclosing one has rules (recursively) @@ -325,6 +331,12 @@ data ArgInfo -- Always infinite } +data RewriteCall -- What rewriting to try next for this call + -- See Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration + = TryRules FullArgCount [CoreRule] + | TryInlining + | TryNothing + data ArgSpec = ValArg { as_dmd :: Demand -- Demand placed on this argument , as_arg :: OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal @@ -349,20 +361,20 @@ instance Outputable ArgSpec where addValArgTo :: ArgInfo -> OutExpr -> OutType -> ArgInfo addValArgTo ai arg hole_ty - | ArgInfo { ai_dmds = dmd:dmds, ai_discs = _:discs, ai_rules = rules } <- ai + | ArgInfo { ai_dmds = dmd:dmds, ai_discs = _:discs, ai_rewrite = rew } <- ai -- Pop the top demand and and discounts off , let arg_spec = ValArg { as_arg = arg, as_hole_ty = hole_ty, as_dmd = dmd } - = ai { ai_args = arg_spec : ai_args ai - , ai_dmds = dmds - , ai_discs = discs - , ai_rules = decRules rules } + = ai { ai_args = arg_spec : ai_args ai + , ai_dmds = dmds + , ai_discs = discs + , ai_rewrite = decArgCount rew } | otherwise = pprPanic "addValArgTo" (ppr ai $$ ppr arg) -- There should always be enough demands and discounts addTyArgTo :: ArgInfo -> OutType -> OutType -> ArgInfo -addTyArgTo ai arg_ty hole_ty = ai { ai_args = arg_spec : ai_args ai - , ai_rules = decRules (ai_rules ai) } +addTyArgTo ai arg_ty hole_ty = ai { ai_args = arg_spec : ai_args ai + , ai_rewrite = decArgCount (ai_rewrite ai) } where arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty } @@ -381,19 +393,22 @@ argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast argInfoAppArgs (ValArg { as_arg = arg } : as) = arg : argInfoAppArgs as argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as -pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont -pushSimplifiedArgs _env [] k = k -pushSimplifiedArgs env (arg : args) k - = case arg of - TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty } - -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest } - ValArg { as_arg = arg, as_hole_ty = hole_ty } - -> ApplyToVal { sc_arg = arg, sc_env = env, sc_dup = Simplified - , sc_hole_ty = hole_ty, sc_cont = rest } - CastBy c -> CastIt c rest - where - rest = pushSimplifiedArgs env args k - -- The env has an empty SubstEnv +pushSimplifiedArgs, pushSimplifiedRevArgs + :: SimplEnv + -> [ArgSpec] -- In normal, forward order for pushSimplifiedArgs, + -- in /reverse/ order for pushSimplifiedRevArgs + -> SimplCont -> SimplCont +pushSimplifiedArgs env args cont = foldr (pushSimplifiedArg env) cont args +pushSimplifiedRevArgs env args cont = foldl' (\k a -> pushSimplifiedArg env a k) cont args + +pushSimplifiedArg :: SimplEnv -> ArgSpec -> SimplCont -> SimplCont +pushSimplifiedArg _env (TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }) cont + = ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = cont } +pushSimplifiedArg env (ValArg { as_arg = arg, as_hole_ty = hole_ty }) cont + = ApplyToVal { sc_arg = arg, sc_env = env, sc_dup = Simplified + -- The SubstEnv will be ignored since sc_dup=Simplified + , sc_hole_ty = hole_ty, sc_cont = cont } +pushSimplifiedArg _ (CastBy c) cont = CastIt c cont argInfoExpr :: OutId -> [ArgSpec] -> OutExpr -- NB: the [ArgSpec] is reversed so that the first arg @@ -406,18 +421,14 @@ argInfoExpr fun rev_args go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty go (CastBy co : as) = mkCast (go as) co +decArgCount :: RewriteCall -> RewriteCall +decArgCount (TryRules n rules) = TryRules (n-1) rules +decArgCount rew = rew -type FunRules = Maybe (Int, [CoreRule]) -- Remaining rules for this function - -- Nothing => No rules - -- Just (n, rules) => some rules, requiring at least n more type/value args - -decRules :: FunRules -> FunRules -decRules (Just (n, rules)) = Just (n-1, rules) -decRules Nothing = Nothing - -mkFunRules :: [CoreRule] -> FunRules -mkFunRules [] = Nothing -mkFunRules rs = Just (n_required, rs) +mkTryRules :: [CoreRule] -> RewriteCall +-- See Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration +mkTryRules [] = TryInlining +mkTryRules rs = TryRules n_required rs where n_required = maximum (map ruleArity rs) @@ -516,6 +527,7 @@ contHoleScaling (StrictArg { sc_fun_ty = fun_ty, sc_cont = k }) contHoleScaling (ApplyToTy { sc_cont = k }) = contHoleScaling k contHoleScaling (ApplyToVal { sc_cont = k }) = contHoleScaling k contHoleScaling (TickIt _ k) = contHoleScaling k + ------------------- countArgs :: SimplCont -> Int -- Count all arguments, including types, coercions, @@ -525,6 +537,14 @@ countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont countArgs (CastIt _ cont) = countArgs cont countArgs _ = 0 +countValArgs :: SimplCont -> Int +-- Count value arguments only +countValArgs (ApplyToTy { sc_cont = cont }) = 1 + countValArgs cont +countValArgs (ApplyToVal { sc_cont = cont }) = 1 + countValArgs cont +countValArgs (CastIt _ cont) = countValArgs cont +countValArgs _ = 0 + +------------------- contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont) -- Summarises value args, discards type args and coercions -- The returned continuation of the call is only used to @@ -579,29 +599,26 @@ contEvalContext k = case k of -- and case binder dmds, see addCaseBndrDmd. No priority right now. ------------------- -mkArgInfo :: SimplEnv - -> Id - -> [CoreRule] -- Rules for function - -> Int -- Number of value args - -> SimplCont -- Context of the call - -> ArgInfo - -mkArgInfo env fun rules n_val_args call_cont +mkArgInfo :: SimplEnv -> RuleEnv -> Id -> SimplCont -> ArgInfo + +mkArgInfo env rule_base fun cont | n_val_args < idArity fun -- Note [Unsaturated functions] = ArgInfo { ai_fun = fun, ai_args = [] - , ai_rules = fun_rules + , ai_rewrite = fun_rules , ai_encl = False , ai_dmds = vanilla_dmds , ai_discs = vanilla_discounts } | otherwise = ArgInfo { ai_fun = fun , ai_args = [] - , ai_rules = fun_rules - , ai_encl = interestingArgContext rules call_cont + , ai_rewrite = fun_rules + , ai_encl = notNull rules || contHasRules cont , ai_dmds = add_type_strictness (idType fun) arg_dmds , ai_discs = arg_discounts } where - fun_rules = mkFunRules rules + rules = getRules rule_base fun + fun_rules = mkTryRules rules + n_val_args = countValArgs cont vanilla_discounts, arg_discounts :: [Int] vanilla_discounts = repeat 0 @@ -814,7 +831,7 @@ interestingCallContext env cont -- a build it's *great* to inline it here. So we must ensure that -- the context for (f x) is not totally uninteresting. -interestingArgContext :: [CoreRule] -> SimplCont -> Bool +contHasRules :: SimplCont -> Bool -- If the argument has form (f x y), where x,y are boring, -- and f is marked INLINE, then we don't want to inline f. -- But if the context of the argument is @@ -822,33 +839,29 @@ interestingArgContext :: [CoreRule] -> SimplCont -> Bool -- where g has rules, then we *do* want to inline f, in case it -- exposes a rule that might fire. Similarly, if the context is -- h (g (f x x)) --- where h has rules, then we do want to inline f; hence the --- call_cont argument to interestingArgContext +-- where h has rules, then we do want to inline f. So contHasRules +-- tries to see if the context of the f-call is a call to a function +-- with rules. -- --- The ai-rules flag makes this happen; if it's +-- The ai_encl flag makes this happen; if it's -- set, the inliner gets just enough keener to inline f -- regardless of how boring f's arguments are, if it's marked INLINE -- -- The alternative would be to *always* inline an INLINE function, -- regardless of how boring its context is; but that seems overkill -- For example, it'd mean that wrapper functions were always inlined --- --- The call_cont passed to interestingArgContext is the context of --- the call itself, e.g. g in the example above -interestingArgContext rules call_cont - = notNull rules || enclosing_fn_has_rules +contHasRules cont + = go cont where - enclosing_fn_has_rules = go call_cont - - go (Select {}) = False - go (ApplyToVal {}) = False -- Shouldn't really happen - go (ApplyToTy {}) = False -- Ditto - go (StrictArg { sc_fun = fun }) = ai_encl fun - go (StrictBind {}) = False -- ?? - go (CastIt _ c) = go c - go (Stop _ RuleArgCtxt _) = True - go (Stop _ _ _) = False - go (TickIt _ c) = go c + go (ApplyToVal { sc_cont = cont }) = go cont + go (ApplyToTy { sc_cont = cont }) = go cont + go (CastIt _ cont) = go cont + go (StrictArg { sc_fun = fun }) = ai_encl fun + go (Stop _ RuleArgCtxt _) = True + go (TickIt _ c) = go c + go (Select {}) = False + go (StrictBind {}) = False -- ?? + go (Stop _ _ _) = False {- Note [Interesting arguments] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/GHC/Utils/Monad.hs b/compiler/GHC/Utils/Monad.hs index 5366d12dca..6f93ac2c27 100644 --- a/compiler/GHC/Utils/Monad.hs +++ b/compiler/GHC/Utils/Monad.hs @@ -163,7 +163,10 @@ mapSndM = traverse . traverse -- | Monadic version of concatMap concatMapM :: (Monad m, Traversable f) => (a -> m [b]) -> f a -> m [b] concatMapM f xs = liftM concat (mapM f xs) -{-# SPECIALIZE concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] #-} +{-# INLINE concatMapM #-} +-- It's better to inline to inline this than to specialise +-- concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] +-- Inlining cuts compiler allocation by around 1% -- | Applicative version of mapMaybe mapMaybeM :: Applicative m => (a -> m (Maybe b)) -> [a] -> m [b] diff --git a/testsuite/tests/lib/integer/Makefile b/testsuite/tests/lib/integer/Makefile index 4292a1b970..af27258eb3 100644 --- a/testsuite/tests/lib/integer/Makefile +++ b/testsuite/tests/lib/integer/Makefile @@ -11,8 +11,9 @@ CHECK2 = grep -q -- '$1' folding.simpl || \ .PHONY: integerConstantFolding integerConstantFolding: - '$(TEST_HC)' -Wall -v0 -O --make integerConstantFolding -fforce-recomp -ddump-simpl > folding.simpl + '$(TEST_HC)' -Wall -v0 -O --make integerConstantFolding -fforce-recomp -ddump-simpl -dno-debug-output > folding.simpl # All the 100nnn values should be constant-folded away +# -dno-debug-output suppresses a "Glomming" message ! grep -q '\<100[0-9][0-9][0-9]\>' folding.simpl || { echo "Unfolded values found"; grep '\<100[0-9][0-9][0-9]\>' folding.simpl; } $(call CHECK,\<200007\>,plusInteger) $(call CHECK,\<683234160\>,timesInteger) @@ -64,8 +65,9 @@ IntegerConversionRules: .PHONY: naturalConstantFolding naturalConstantFolding: - '$(TEST_HC)' -Wall -v0 -O --make naturalConstantFolding -fforce-recomp -ddump-simpl > folding.simpl + '$(TEST_HC)' -Wall -v0 -O --make naturalConstantFolding -fforce-recomp -ddump-simpl -dno-debug-output > folding.simpl # All the 100nnn values should be constant-folded away +# -dno-debug-output suppresses a "Glomming" message ! grep -q '\<100[0-9][0-9][0-9]\>' folding.simpl || { echo "Unfolded values found"; grep '\<100[0-9][0-9][0-9]\>' folding.simpl; } # Bit arithmetic $(call CHECK,\<532\>,andNatural) diff --git a/testsuite/tests/simplCore/should_compile/T21851.hs b/testsuite/tests/simplCore/should_compile/T21851.hs new file mode 100644 index 0000000000..b5a9dcf4d9 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T21851.hs @@ -0,0 +1,15 @@ +{-# OPTIONS_GHC -ddump-simpl #-} + +module T21851 (g') where +import T21851a + +g :: Num a => a -> a +g x = fst (f x) +{-# NOINLINE[99] g #-} + +g' :: Int -> Int +g' = g + +-- We should see a call to a /specialised/ verion of `f`, +-- something like +-- g' = \ (x :: Int) -> case T21851a.$w$sf x of { (# ww, ww1 #) -> ww } diff --git a/testsuite/tests/simplCore/should_compile/T21851.stderr b/testsuite/tests/simplCore/should_compile/T21851.stderr new file mode 100644 index 0000000000..0ddb38546a --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T21851.stderr @@ -0,0 +1,19 @@ +[1 of 2] Compiling T21851a ( T21851a.hs, T21851a.o ) +[2 of 2] Compiling T21851 ( T21851.hs, T21851.o ) + +==================== Tidy Core ==================== +Result size of Tidy Core + = {terms: 7, types: 10, coercions: 0, joins: 0/0} + +-- RHS size: {terms: 6, types: 8, coercions: 0, joins: 0/0} +g' :: Int -> Int +[GblId, + Arity=1, + Str=, + Unf=Unf{Src=, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [0] 30 0}] +g' + = \ (x :: Int) -> case T21851a.$w$sf x of { (# ww, ww1 #) -> ww } + + + diff --git a/testsuite/tests/simplCore/should_compile/T21851a.hs b/testsuite/tests/simplCore/should_compile/T21851a.hs new file mode 100644 index 0000000000..d11e9eb4b7 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T21851a.hs @@ -0,0 +1,5 @@ +module T21851a where + +f :: Num b => b -> (b, b) -- note: recursive to prevent inlining +f x = (x + 1, snd (f x)) -- on such a small example +{-# SPECIALIZE f :: Int -> (Int, Int) #-} diff --git a/testsuite/tests/simplCore/should_compile/T22097.hs b/testsuite/tests/simplCore/should_compile/T22097.hs new file mode 100644 index 0000000000..ea236a1a25 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T22097.hs @@ -0,0 +1,7 @@ +{-# OPTIONS_GHC -ddump-simpl #-} +{-# LANGUAGE TypeApplications #-} +module T22097 where +import T22097a ( isEven ) + +main :: IO () +main = print $ isEven @Int 10 diff --git a/testsuite/tests/simplCore/should_compile/T22097.stderr b/testsuite/tests/simplCore/should_compile/T22097.stderr new file mode 100644 index 0000000000..f2ff31a7bf --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T22097.stderr @@ -0,0 +1,46 @@ +[1 of 2] Compiling T22097a ( T22097a.hs, T22097a.o ) +[2 of 2] Compiling T22097 ( T22097.hs, T22097.o ) + +==================== Tidy Core ==================== +Result size of Tidy Core + = {terms: 15, types: 14, coercions: 3, joins: 0/0} + +-- RHS size: {terms: 5, types: 1, coercions: 0, joins: 0/0} +T22097.main2 :: String +[GblId, + Unf=Unf{Src=, TopLvl=True, Value=False, ConLike=False, + WorkFree=False, Expandable=False, Guidance=IF_ARGS [] 30 0}] +T22097.main2 + = case T22097a.$wgoEven 10# of { (# #) -> GHC.Show.$fShowBool4 } + +-- RHS size: {terms: 6, types: 2, coercions: 0, joins: 0/0} +T22097.main1 + :: GHC.Prim.State# GHC.Prim.RealWorld + -> (# GHC.Prim.State# GHC.Prim.RealWorld, () #) +[GblId, + Arity=1, + Str=, + Unf=Unf{Src=, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [0] 40 0}] +T22097.main1 + = \ (eta [OS=OneShot] :: GHC.Prim.State# GHC.Prim.RealWorld) -> + GHC.IO.Handle.Text.hPutStr2 + GHC.IO.Handle.FD.stdout T22097.main2 GHC.Types.True eta + +-- RHS size: {terms: 1, types: 0, coercions: 3, joins: 0/0} +main :: IO () +[GblId, + Arity=1, + Str=, + Unf=Unf{Src=, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=True)}] +main + = T22097.main1 + `cast` (Sym (GHC.Types.N:IO[0] <()>_R) + :: (GHC.Prim.State# GHC.Prim.RealWorld + -> (# GHC.Prim.State# GHC.Prim.RealWorld, () #)) + ~R# IO ()) + + + diff --git a/testsuite/tests/simplCore/should_compile/T22097a.hs b/testsuite/tests/simplCore/should_compile/T22097a.hs new file mode 100644 index 0000000000..7d3dab3b7d --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T22097a.hs @@ -0,0 +1,23 @@ +module T22097a + ( isEven, isOdd ) +where + +{-# SPECIALIZE isEven :: Int -> Bool #-} +isEven :: Integral a => a -> Bool +isEven = fst evenOdd + +{-# SPECIALIZE isOdd :: Int -> Bool #-} +isOdd :: Integral a => a -> Bool +isOdd = snd evenOdd + +evenOdd :: Integral a => (a -> Bool, a -> Bool) +evenOdd = (goEven, goOdd) + where + goEven n + | n < 0 = goEven (- n) + | n > 0 = goOdd (n - 1) + | otherwise = True + + goOdd n + | n < 0 = goOdd n + | otherwise = goEven n diff --git a/testsuite/tests/simplCore/should_compile/T6056.stderr b/testsuite/tests/simplCore/should_compile/T6056.stderr index 461ba97c70..ba8ff0e3a4 100644 --- a/testsuite/tests/simplCore/should_compile/T6056.stderr +++ b/testsuite/tests/simplCore/should_compile/T6056.stderr @@ -1,4 +1,4 @@ Rule fired: SPEC/T6056 $wsmallerAndRest @Int (T6056) Rule fired: SPEC/T6056 $wsmallerAndRest @Int (T6056) Rule fired: SPEC/T6056 $wsmallerAndRest @Int (T6056) -Rule fired: SPEC/T6056 $wsmallerAndRest @Int (T6056) +Rule fired: SPEC/T6056 smallerAndRest @Int (T6056) diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index 2da9a99ca1..283c6cf1b0 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -430,3 +430,7 @@ test('T22028', normal, compile, ['-O -ddump-rule-firings']) test('T22114', normal, compile, ['-O']) test('T21286', normal, multimod_compile, ['T21286', '-O -ddump-rule-firings']) +# One module, T21851.hs, has OPTIONS_GHC -ddump-simpl +test('T21851', [grep_errmsg(r'case.*w\$sf') ], multimod_compile, ['T21851', '-O -dno-typeable-binds -dsuppress-uniques']) +# One module, T22097.hs, has OPTIONS_GHC -ddump-simpl +test('T22097', [grep_errmsg(r'case.*wgoEven') ], multimod_compile, ['T22097', '-O -dno-typeable-binds -dsuppress-uniques']) -- cgit v1.2.1