summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2022-09-26 12:47:35 +0100
committerSimon Peyton Jones <simon.peytonjones@gmail.com>2022-10-10 22:30:21 +0100
commit96d32ff289f87b8c78f0a8d1b11295c9563ec020 (patch)
treed05acdd595822371880ca8b7088daf63d8011ba6
parent44fcdb04467c23b794a82451c64cbfaed6f4ef62 (diff)
downloadhaskell-wip/T21851-rule-win.tar.gz
Make rewrite rules "win" over inliningwip/T21851-rule-win
If a rewrite rule and a rewrite rule compete in the simplifier, this patch makes sure that the rewrite rule "win". That is, in general a bit fragile, but it's a huge help when making specialisation work reliably, as #21851 and #22097 showed. The change is fairly straightforwad, and documented in Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration. Compile-times change, up and down a bit -- in some cases because we get better specialisation. But the payoff (more reliable specialisation) is large. Metrics: compile_time/bytes allocated ----------------------------------------------- T10421(normal) +3.7% BAD T10421a(normal) +5.5% T13253(normal) +1.3% T14052(ghci) +1.8% T15304(normal) -1.4% T16577(normal) +3.1% BAD T17516(normal) +2.3% T17836(normal) -1.9% T18223(normal) -1.8% T8095(normal) -1.3% T9961(normal) +2.5% BAD geo. mean +0.0% minimum -1.9% maximum +5.5% Nofib results are (bytes allocated) +-------------------------------++----------+ | ||tsv (rel) | +===============================++==========+ | imaginary/paraffins || +0.27% | | imaginary/rfib || -0.04% | | real/anna || +0.02% | | real/fem || -0.04% | | real/fluid || +1.68% | | real/gamteb || -0.34% | | real/gg || +1.54% | | real/hidden || -0.01% | | real/hpg || -0.03% | | real/infer || -0.03% | | real/prolog || +0.02% | | real/veritas || -0.47% | | shootout/fannkuch-redux || -0.03% | | shootout/k-nucleotide || -0.02% | | shootout/n-body || -0.06% | | shootout/spectral-norm || -0.01% | | spectral/cryptarithm2 || +1.25% | | spectral/fibheaps || +18.33% | | spectral/last-piece || -0.34% | +===============================++==========+ | geom mean || +0.17% | There are extensive notes in !8897 about the regressions. Briefly * fibheaps: there was a very delicately balanced inlining that tipped over the wrong way after this change. * cryptarithm2 and paraffins are caused by #22274, which is a separate issue really. (I.e. the right fix is *not* to make inlining "win" over rules.) So I'm accepting these changes Metric Increase: T10421 T16577 T9961
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Iteration.hs216
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Utils.hs157
-rw-r--r--compiler/GHC/Utils/Monad.hs5
-rw-r--r--testsuite/tests/lib/integer/Makefile6
-rw-r--r--testsuite/tests/simplCore/should_compile/T21851.hs15
-rw-r--r--testsuite/tests/simplCore/should_compile/T21851.stderr19
-rw-r--r--testsuite/tests/simplCore/should_compile/T21851a.hs5
-rw-r--r--testsuite/tests/simplCore/should_compile/T22097.hs7
-rw-r--r--testsuite/tests/simplCore/should_compile/T22097.stderr46
-rw-r--r--testsuite/tests/simplCore/should_compile/T22097a.hs23
-rw-r--r--testsuite/tests/simplCore/should_compile/T6056.stderr2
-rw-r--r--testsuite/tests/simplCore/should_compile/all.T4
12 files changed, 344 insertions, 161 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify/Iteration.hs b/compiler/GHC/Core/Opt/Simplify/Iteration.hs
index d2bdace3e2..f3fb5c2f0b 100644
--- a/compiler/GHC/Core/Opt/Simplify/Iteration.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Iteration.hs
@@ -1919,7 +1919,9 @@ wrapJoinCont env cont thing_inside
--------------------
-trimJoinCont :: Id -> Maybe JoinArity -> SimplCont -> SimplCont
+trimJoinCont :: Id -- Used only in error message
+ -> Maybe JoinArity
+ -> SimplCont -> SimplCont
-- Drop outer context from join point invocation (jump)
-- See Note [Join points and case-of-case]
@@ -2017,6 +2019,17 @@ outside. Surprisingly tricky!
Variables
* *
************************************************************************
+
+Note [zapSubstEnv]
+~~~~~~~~~~~~~~~~~~
+When simplifying something that has already been simplified, be sure to
+zap the SubstEnv. This is VITAL. Consider
+ let x = e in
+ let y = \z -> ...x... in
+ \ x -> ...y...
+
+We'll clone the inner \x, adding x->x' in the id_subst Then when we
+inline y, we must *not* replace x by x' in the inlined copy!!
-}
simplVar :: SimplEnv -> InVar -> SimplM OutExpr
@@ -2035,86 +2048,28 @@ simplVar env var
simplIdF :: SimplEnv -> InId -> SimplCont -> SimplM (SimplFloats, OutExpr)
simplIdF env var cont
= case substId env var of
- ContEx tvs cvs ids e ->
- let env' = setSubstEnv env tvs cvs ids
- in simplExprF env' e cont
- -- Don't trim; haven't already simplified e,
- -- so the cont is not embodied in e
-
- DoneId var1 -> do
- logger <- getLogger
- let cont' = trimJoinCont var (isJoinId_maybe var1) cont
- completeCall logger env var1 cont'
-
- DoneEx e mb_join ->
- let env' = zapSubstEnv env
- cont' = trimJoinCont var mb_join cont
- in simplExprF env' e cont'
- -- Note [zapSubstEnv]
- -- ~~~~~~~~~~~~~~~~~~
- -- The template is already simplified, so don't re-substitute.
- -- This is VITAL. Consider
- -- let x = e in
- -- let y = \z -> ...x... in
- -- \ x -> ...y...
- -- We'll clone the inner \x, adding x->x' in the id_subst
- -- Then when we inline y, we must *not* replace x by x' in
- -- the inlined copy!!
-
----------------------------------------------------------
--- Dealing with a call site
-
-completeCall :: Logger -> SimplEnv -> OutId -> SimplCont -> SimplM (SimplFloats, OutExpr)
-completeCall logger env var cont
- | Just expr <- callSiteInline logger uf_opts case_depth var active_unf
- lone_variable arg_infos interesting_cont
- -- Inline the variable's RHS
- = do { checkedTick (UnfoldingDone var)
- ; dump_inline expr cont
- ; let env1 = zapSubstEnv env
- ; simplExprF env1 expr cont }
-
- | otherwise
- -- Don't inline; instead rebuild the call
- = do { rule_base <- getSimplRules
- ; let rules = getRules rule_base var
- info = mkArgInfo env var rules
- n_val_args call_cont
- ; rebuildCall env info cont }
+ ContEx tvs cvs ids e -> simplExprF env' e cont
+ -- Don't trimJoinCont; haven't already simplified e,
+ -- so the cont is not embodied in e
+ where
+ env' = setSubstEnv env tvs cvs ids
- where
- uf_opts = seUnfoldingOpts env
- case_depth = seCaseDepth env
- (lone_variable, arg_infos, call_cont) = contArgs cont
- n_val_args = length arg_infos
- interesting_cont = interestingCallContext env call_cont
- active_unf = activeUnfolding (seMode env) var
+ DoneId var1 ->
+ do { rule_base <- getSimplRules
+ ; let cont' = trimJoinCont var1 (isJoinId_maybe var1) cont
+ info = mkArgInfo env rule_base var1 cont'
+ ; rebuildCall env info cont' }
- log_inlining doc
- = liftIO $ logDumpFile logger (mkDumpStyle alwaysQualify)
- Opt_D_dump_inlinings
- "" FormatText doc
+ DoneEx e mb_join -> simplExprF env' e cont'
+ where
+ cont' = trimJoinCont var mb_join cont
+ env' = zapSubstEnv env -- See Note [zapSubstEnv]
- dump_inline unfolding cont
- | not (logHasDumpFlag logger Opt_D_dump_inlinings) = return ()
- | not (logHasDumpFlag logger Opt_D_verbose_core2core)
- = when (isExternalName (idName var)) $
- log_inlining $
- sep [text "Inlining done:", nest 4 (ppr var)]
- | otherwise
- = log_inlining $
- sep [text "Inlining done: " <> ppr var,
- nest 4 (vcat [text "Inlined fn: " <+> nest 2 (ppr unfolding),
- text "Cont: " <+> ppr cont])]
+---------------------------------------------------------
+-- Dealing with a call site
-rebuildCall :: SimplEnv
- -> ArgInfo
- -> SimplCont
+rebuildCall :: SimplEnv -> ArgInfo -> SimplCont
-> SimplM (SimplFloats, OutExpr)
--- We decided not to inline, so
--- - simplify the arguments
--- - try rewrite rules
--- - and rebuild
---------- Bottoming applications --------------
rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args, ai_dmds = [] }) cont
@@ -2137,27 +2092,48 @@ rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args, ai_dmds = [] }) con
res = argInfoExpr fun rev_args
cont_ty = contResultType cont
----------- Try rewrite RULES --------------
--- See Note [Trying rewrite rules]
+---------- Try inlining, if ai_rewrite = TryInlining --------
+-- In the TryInlining case we try inlining immediately, before simplifying
+-- any (more) arguments. Why? See Note [Rewrite rules and inlining].
+--
+-- If there are rewrite rules we'll skip this case until we have
+-- simplified enough args to satisfy nr_wanted==0 in the TryRules case below
+-- Then we'll try the rules, and if that fails, we'll do TryInlining
+rebuildCall env info@(ArgInfo { ai_fun = fun, ai_args = rev_args
+ , ai_rewrite = TryInlining }) cont
+ = do { logger <- getLogger
+ ; let full_cont = pushSimplifiedRevArgs env rev_args cont
+ ; mb_inline <- tryInlining env logger fun full_cont
+ ; case mb_inline of
+ Just expr -> do { checkedTick (UnfoldingDone fun)
+ ; let env1 = zapSubstEnv env
+ ; simplExprF env1 expr full_cont }
+ Nothing -> rebuildCall env (info { ai_rewrite = TryNothing }) cont
+ }
+
+---------- Try rewrite RULES, if ai_rewrite = TryRules --------------
+-- See Note [Rewrite rules and inlining]
+-- See also Note [Trying rewrite rules]
rebuildCall env info@(ArgInfo { ai_fun = fun, ai_args = rev_args
- , ai_rules = Just (nr_wanted, rules) }) cont
+ , ai_rewrite = TryRules nr_wanted rules }) cont
| nr_wanted == 0 || no_more_args
- , let info' = info { ai_rules = Nothing }
= -- We've accumulated a simplified call in <fun,rev_args>
-- so try rewrite rules; see Note [RULES apply to simplified arguments]
-- See also Note [Rules for recursive functions]
do { mb_match <- tryRules env rules fun (reverse rev_args) cont
; case mb_match of
Just (env', rhs, cont') -> simplExprF env' rhs cont'
- Nothing -> rebuildCall env info' cont }
+ Nothing -> rebuildCall env (info { ai_rewrite = TryInlining }) cont }
where
+ -- If we have run out of arguments, just try the rules; there might
+ -- be some with lower arity. Casts get in the way -- they aren't
+ -- allowed on rule LHSs
no_more_args = case cont of
ApplyToTy {} -> False
ApplyToVal {} -> False
_ -> True
-
----------- Simplify applications and casts --------------
+---------- Simplify type applications and casts --------------
rebuildCall env info (CastIt co cont)
= rebuildCall env (addCastTo info co) cont
@@ -2202,6 +2178,7 @@ rebuildCall env (ArgInfo { ai_fun = fun_id, ai_args = rev_args })
call' = mkApps (Var fun_id) [mkTyArg rr', mkTyArg ty', arg']
; return (emptyFloats env, call') }
+---------- Simplify value arguments --------------------
rebuildCall env fun_info
(ApplyToVal { sc_arg = arg, sc_env = arg_se
, sc_dup = dup_flag, sc_hole_ty = fun_ty
@@ -2237,6 +2214,42 @@ rebuildCall env fun_info
rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args }) cont
= rebuild env (argInfoExpr fun rev_args) cont
+-----------------------------------
+tryInlining :: SimplEnv -> Logger -> OutId -> SimplCont -> SimplM (Maybe OutExpr)
+tryInlining env logger var cont
+ | Just expr <- callSiteInline logger uf_opts case_depth var active_unf
+ lone_variable arg_infos interesting_cont
+ = do { dump_inline expr cont
+ ; return (Just expr) }
+
+ | otherwise
+ = return Nothing
+
+ where
+ uf_opts = seUnfoldingOpts env
+ case_depth = seCaseDepth env
+ (lone_variable, arg_infos, call_cont) = contArgs cont
+ interesting_cont = interestingCallContext env call_cont
+ active_unf = activeUnfolding (seMode env) var
+
+ log_inlining doc
+ = liftIO $ logDumpFile logger (mkDumpStyle alwaysQualify)
+ Opt_D_dump_inlinings
+ "" FormatText doc
+
+ dump_inline unfolding cont
+ | not (logHasDumpFlag logger Opt_D_dump_inlinings) = return ()
+ | not (logHasDumpFlag logger Opt_D_verbose_core2core)
+ = when (isExternalName (idName var)) $
+ log_inlining $
+ sep [text "Inlining done:", nest 4 (ppr var)]
+ | otherwise
+ = log_inlining $
+ sep [text "Inlining done: " <> ppr var,
+ nest 4 (vcat [text "Inlined fn: " <+> nest 2 (ppr unfolding),
+ text "Cont: " <+> ppr cont])]
+
+
{- Note [Trying rewrite rules]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider an application (f e1 e2 e3) where the e1,e2,e3 are not yet
@@ -2272,6 +2285,38 @@ makes a particularly big difference when superclass selectors are involved:
op ($p1 ($p2 (df d)))
We want all this to unravel in one sweep.
+Note [Rewrite rules and inlining]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In general we try to arrange that inlining is disabled (via a pragma) if
+a rewrite rule should apply, so that the rule has a decent chance to fire
+before we inline the function.
+
+But it turns out that (especially when type-class specialisation or
+SpecConstr is involved) it is very helpful for the the rewrite rule to
+"win" over inlining when both are active at once: see #21851, #22097.
+
+The simplifier arranges to do this, as follows. In effect, the ai_rewrite
+field of the ArgInfo record is the state of a little state-machine:
+
+* mkArgInfo sets the ai_rewrite field to TryRules if there are any rewrite
+ rules avaialable for that function.
+
+* rebuildCall simplifies arguments until enough are simplified to match the
+ rule with greatest arity. See Note [RULES apply to simplified arguments]
+ and the first field of `TryRules`.
+
+ But no more! As soon as we have simplified enough arguments to satisfy the
+ maximum-arity rules, we try the rules; see Note [Trying rewrite rules].
+
+* Once we have tried rules (or immediately if there are no rules) set
+ ai_rewrite to TryInlining, and the Simplifier will try to inline the
+ function. We want to try this immediately (before simplifying any (more)
+ arguments). Why? Consider
+ f BIG where f = \x{OneOcc}. ...x...
+ If we inline `f` before simplifying `BIG` well use preInlineUnconditionally,
+ and we'll simplify BIG once, at x's occurrence, rather than twice.
+
+
Note [Avoid redundant simplification]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Because RULES apply to simplified arguments, there's a danger of repeatedly
@@ -2327,7 +2372,8 @@ See Note [No free join points in arityType] in GHC.Core.Opt.Arity
-}
tryRules :: SimplEnv -> [CoreRule]
- -> Id -> [ArgSpec]
+ -> Id
+ -> [ArgSpec] -- In /normal, forward/ order
-> SimplCont
-> SimplM (Maybe (SimplEnv, CoreExpr, SimplCont))
@@ -3668,7 +3714,7 @@ mkDupableStrictBind env arg_bndr join_rhs res_ty
| otherwise
= do { join_bndr <- newJoinId [arg_bndr] res_ty
; let arg_info = ArgInfo { ai_fun = join_bndr
- , ai_rules = Nothing, ai_args = []
+ , ai_rewrite = TryNothing, ai_args = []
, ai_encl = False, ai_dmds = repeat topDmd
, ai_discs = repeat 0 }
; return ( addJoinFloats (emptyFloats env) $
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs
index 2a3a272f50..abd58fcb39 100644
--- a/compiler/GHC/Core/Opt/Simplify/Utils.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs
@@ -30,9 +30,10 @@ module GHC.Core.Opt.Simplify.Utils (
interestingCallContext,
-- ArgInfo
- ArgInfo(..), ArgSpec(..), mkArgInfo,
+ ArgInfo(..), ArgSpec(..), RewriteCall(..), mkArgInfo,
addValArgTo, addCastTo, addTyArgTo,
- argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
+ argInfoExpr, argInfoAppArgs,
+ pushSimplifiedArgs, pushSimplifiedRevArgs,
isStrictArgInfo, lazyArgContext,
abstractFloats,
@@ -52,6 +53,7 @@ import GHC.Core.Ppr
import GHC.Core.TyCo.Ppr ( pprParendType )
import GHC.Core.FVs
import GHC.Core.Utils
+import GHC.Core.Rules( getRules )
import GHC.Core.Opt.Arity
import GHC.Core.Unfold
import GHC.Core.Unfold.Make
@@ -210,6 +212,7 @@ data SimplCont
type StaticEnv = SimplEnv -- Just the static part is relevant
+-- See Note [DupFlag invariants]
data DupFlag = NoDup -- Unsimplified, might be big
| Simplified -- Simplified
| OkToDup -- Simplified and small
@@ -226,8 +229,9 @@ perhapsSubstTy dup env ty
{- Note [StaticEnv invariant]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We pair up an InExpr or InAlts with a StaticEnv, which establishes the
-lexical scope for that InExpr. When we simplify that InExpr/InAlts, we
-use
+lexical scope for that InExpr.
+
+When we simplify that InExpr/InAlts, we use
- Its captured StaticEnv
- Overriding its InScopeSet with the larger one at the
simplification point.
@@ -244,13 +248,14 @@ isn't big enough.
Note [DupFlag invariants]
~~~~~~~~~~~~~~~~~~~~~~~~~
-In both (ApplyToVal dup _ env k)
- and (Select dup _ _ env k)
+In both ApplyToVal { se_dup = dup, se_env = env, se_cont = k}
+ and Select { se_dup = dup, se_env = env, se_cont = k}
the following invariants hold
(a) if dup = OkToDup, then continuation k is also ok-to-dup
- (b) if dup = OkToDup or Simplified, the subst-env is empty
- (and hence no need to re-simplify)
+ (b) if dup = OkToDup or Simplified, the subst-env is empty,
+ or at least is always ignored; the payload is
+ already an OutThing
-}
instance Outputable DupFlag where
@@ -309,7 +314,8 @@ data ArgInfo
ai_fun :: OutId, -- The function
ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
- ai_rules :: FunRules, -- Rules for this function
+ ai_rewrite :: RewriteCall, -- What transformation to try next for this call
+ -- See Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration
ai_encl :: Bool, -- Flag saying whether this function
-- or an enclosing one has rules (recursively)
@@ -325,6 +331,12 @@ data ArgInfo
-- Always infinite
}
+data RewriteCall -- What rewriting to try next for this call
+ -- See Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration
+ = TryRules FullArgCount [CoreRule]
+ | TryInlining
+ | TryNothing
+
data ArgSpec
= ValArg { as_dmd :: Demand -- Demand placed on this argument
, as_arg :: OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
@@ -349,20 +361,20 @@ instance Outputable ArgSpec where
addValArgTo :: ArgInfo -> OutExpr -> OutType -> ArgInfo
addValArgTo ai arg hole_ty
- | ArgInfo { ai_dmds = dmd:dmds, ai_discs = _:discs, ai_rules = rules } <- ai
+ | ArgInfo { ai_dmds = dmd:dmds, ai_discs = _:discs, ai_rewrite = rew } <- ai
-- Pop the top demand and and discounts off
, let arg_spec = ValArg { as_arg = arg, as_hole_ty = hole_ty, as_dmd = dmd }
- = ai { ai_args = arg_spec : ai_args ai
- , ai_dmds = dmds
- , ai_discs = discs
- , ai_rules = decRules rules }
+ = ai { ai_args = arg_spec : ai_args ai
+ , ai_dmds = dmds
+ , ai_discs = discs
+ , ai_rewrite = decArgCount rew }
| otherwise
= pprPanic "addValArgTo" (ppr ai $$ ppr arg)
-- There should always be enough demands and discounts
addTyArgTo :: ArgInfo -> OutType -> OutType -> ArgInfo
-addTyArgTo ai arg_ty hole_ty = ai { ai_args = arg_spec : ai_args ai
- , ai_rules = decRules (ai_rules ai) }
+addTyArgTo ai arg_ty hole_ty = ai { ai_args = arg_spec : ai_args ai
+ , ai_rewrite = decArgCount (ai_rewrite ai) }
where
arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
@@ -381,19 +393,22 @@ argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
argInfoAppArgs (ValArg { as_arg = arg } : as) = arg : argInfoAppArgs as
argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
-pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
-pushSimplifiedArgs _env [] k = k
-pushSimplifiedArgs env (arg : args) k
- = case arg of
- TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
- -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
- ValArg { as_arg = arg, as_hole_ty = hole_ty }
- -> ApplyToVal { sc_arg = arg, sc_env = env, sc_dup = Simplified
- , sc_hole_ty = hole_ty, sc_cont = rest }
- CastBy c -> CastIt c rest
- where
- rest = pushSimplifiedArgs env args k
- -- The env has an empty SubstEnv
+pushSimplifiedArgs, pushSimplifiedRevArgs
+ :: SimplEnv
+ -> [ArgSpec] -- In normal, forward order for pushSimplifiedArgs,
+ -- in /reverse/ order for pushSimplifiedRevArgs
+ -> SimplCont -> SimplCont
+pushSimplifiedArgs env args cont = foldr (pushSimplifiedArg env) cont args
+pushSimplifiedRevArgs env args cont = foldl' (\k a -> pushSimplifiedArg env a k) cont args
+
+pushSimplifiedArg :: SimplEnv -> ArgSpec -> SimplCont -> SimplCont
+pushSimplifiedArg _env (TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }) cont
+ = ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = cont }
+pushSimplifiedArg env (ValArg { as_arg = arg, as_hole_ty = hole_ty }) cont
+ = ApplyToVal { sc_arg = arg, sc_env = env, sc_dup = Simplified
+ -- The SubstEnv will be ignored since sc_dup=Simplified
+ , sc_hole_ty = hole_ty, sc_cont = cont }
+pushSimplifiedArg _ (CastBy c) cont = CastIt c cont
argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
-- NB: the [ArgSpec] is reversed so that the first arg
@@ -406,18 +421,14 @@ argInfoExpr fun rev_args
go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
go (CastBy co : as) = mkCast (go as) co
+decArgCount :: RewriteCall -> RewriteCall
+decArgCount (TryRules n rules) = TryRules (n-1) rules
+decArgCount rew = rew
-type FunRules = Maybe (Int, [CoreRule]) -- Remaining rules for this function
- -- Nothing => No rules
- -- Just (n, rules) => some rules, requiring at least n more type/value args
-
-decRules :: FunRules -> FunRules
-decRules (Just (n, rules)) = Just (n-1, rules)
-decRules Nothing = Nothing
-
-mkFunRules :: [CoreRule] -> FunRules
-mkFunRules [] = Nothing
-mkFunRules rs = Just (n_required, rs)
+mkTryRules :: [CoreRule] -> RewriteCall
+-- See Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration
+mkTryRules [] = TryInlining
+mkTryRules rs = TryRules n_required rs
where
n_required = maximum (map ruleArity rs)
@@ -516,6 +527,7 @@ contHoleScaling (StrictArg { sc_fun_ty = fun_ty, sc_cont = k })
contHoleScaling (ApplyToTy { sc_cont = k }) = contHoleScaling k
contHoleScaling (ApplyToVal { sc_cont = k }) = contHoleScaling k
contHoleScaling (TickIt _ k) = contHoleScaling k
+
-------------------
countArgs :: SimplCont -> Int
-- Count all arguments, including types, coercions,
@@ -525,6 +537,14 @@ countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
countArgs (CastIt _ cont) = countArgs cont
countArgs _ = 0
+countValArgs :: SimplCont -> Int
+-- Count value arguments only
+countValArgs (ApplyToTy { sc_cont = cont }) = 1 + countValArgs cont
+countValArgs (ApplyToVal { sc_cont = cont }) = 1 + countValArgs cont
+countValArgs (CastIt _ cont) = countValArgs cont
+countValArgs _ = 0
+
+-------------------
contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
-- Summarises value args, discards type args and coercions
-- The returned continuation of the call is only used to
@@ -579,29 +599,26 @@ contEvalContext k = case k of
-- and case binder dmds, see addCaseBndrDmd. No priority right now.
-------------------
-mkArgInfo :: SimplEnv
- -> Id
- -> [CoreRule] -- Rules for function
- -> Int -- Number of value args
- -> SimplCont -- Context of the call
- -> ArgInfo
-
-mkArgInfo env fun rules n_val_args call_cont
+mkArgInfo :: SimplEnv -> RuleEnv -> Id -> SimplCont -> ArgInfo
+
+mkArgInfo env rule_base fun cont
| n_val_args < idArity fun -- Note [Unsaturated functions]
= ArgInfo { ai_fun = fun, ai_args = []
- , ai_rules = fun_rules
+ , ai_rewrite = fun_rules
, ai_encl = False
, ai_dmds = vanilla_dmds
, ai_discs = vanilla_discounts }
| otherwise
= ArgInfo { ai_fun = fun
, ai_args = []
- , ai_rules = fun_rules
- , ai_encl = interestingArgContext rules call_cont
+ , ai_rewrite = fun_rules
+ , ai_encl = notNull rules || contHasRules cont
, ai_dmds = add_type_strictness (idType fun) arg_dmds
, ai_discs = arg_discounts }
where
- fun_rules = mkFunRules rules
+ rules = getRules rule_base fun
+ fun_rules = mkTryRules rules
+ n_val_args = countValArgs cont
vanilla_discounts, arg_discounts :: [Int]
vanilla_discounts = repeat 0
@@ -814,7 +831,7 @@ interestingCallContext env cont
-- a build it's *great* to inline it here. So we must ensure that
-- the context for (f x) is not totally uninteresting.
-interestingArgContext :: [CoreRule] -> SimplCont -> Bool
+contHasRules :: SimplCont -> Bool
-- If the argument has form (f x y), where x,y are boring,
-- and f is marked INLINE, then we don't want to inline f.
-- But if the context of the argument is
@@ -822,33 +839,29 @@ interestingArgContext :: [CoreRule] -> SimplCont -> Bool
-- where g has rules, then we *do* want to inline f, in case it
-- exposes a rule that might fire. Similarly, if the context is
-- h (g (f x x))
--- where h has rules, then we do want to inline f; hence the
--- call_cont argument to interestingArgContext
+-- where h has rules, then we do want to inline f. So contHasRules
+-- tries to see if the context of the f-call is a call to a function
+-- with rules.
--
--- The ai-rules flag makes this happen; if it's
+-- The ai_encl flag makes this happen; if it's
-- set, the inliner gets just enough keener to inline f
-- regardless of how boring f's arguments are, if it's marked INLINE
--
-- The alternative would be to *always* inline an INLINE function,
-- regardless of how boring its context is; but that seems overkill
-- For example, it'd mean that wrapper functions were always inlined
---
--- The call_cont passed to interestingArgContext is the context of
--- the call itself, e.g. g <hole> in the example above
-interestingArgContext rules call_cont
- = notNull rules || enclosing_fn_has_rules
+contHasRules cont
+ = go cont
where
- enclosing_fn_has_rules = go call_cont
-
- go (Select {}) = False
- go (ApplyToVal {}) = False -- Shouldn't really happen
- go (ApplyToTy {}) = False -- Ditto
- go (StrictArg { sc_fun = fun }) = ai_encl fun
- go (StrictBind {}) = False -- ??
- go (CastIt _ c) = go c
- go (Stop _ RuleArgCtxt _) = True
- go (Stop _ _ _) = False
- go (TickIt _ c) = go c
+ go (ApplyToVal { sc_cont = cont }) = go cont
+ go (ApplyToTy { sc_cont = cont }) = go cont
+ go (CastIt _ cont) = go cont
+ go (StrictArg { sc_fun = fun }) = ai_encl fun
+ go (Stop _ RuleArgCtxt _) = True
+ go (TickIt _ c) = go c
+ go (Select {}) = False
+ go (StrictBind {}) = False -- ??
+ go (Stop _ _ _) = False
{- Note [Interesting arguments]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/GHC/Utils/Monad.hs b/compiler/GHC/Utils/Monad.hs
index 5366d12dca..6f93ac2c27 100644
--- a/compiler/GHC/Utils/Monad.hs
+++ b/compiler/GHC/Utils/Monad.hs
@@ -163,7 +163,10 @@ mapSndM = traverse . traverse
-- | Monadic version of concatMap
concatMapM :: (Monad m, Traversable f) => (a -> m [b]) -> f a -> m [b]
concatMapM f xs = liftM concat (mapM f xs)
-{-# SPECIALIZE concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] #-}
+{-# INLINE concatMapM #-}
+-- It's better to inline to inline this than to specialise
+-- concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
+-- Inlining cuts compiler allocation by around 1%
-- | Applicative version of mapMaybe
mapMaybeM :: Applicative m => (a -> m (Maybe b)) -> [a] -> m [b]
diff --git a/testsuite/tests/lib/integer/Makefile b/testsuite/tests/lib/integer/Makefile
index 4292a1b970..af27258eb3 100644
--- a/testsuite/tests/lib/integer/Makefile
+++ b/testsuite/tests/lib/integer/Makefile
@@ -11,8 +11,9 @@ CHECK2 = grep -q -- '$1' folding.simpl || \
.PHONY: integerConstantFolding
integerConstantFolding:
- '$(TEST_HC)' -Wall -v0 -O --make integerConstantFolding -fforce-recomp -ddump-simpl > folding.simpl
+ '$(TEST_HC)' -Wall -v0 -O --make integerConstantFolding -fforce-recomp -ddump-simpl -dno-debug-output > folding.simpl
# All the 100nnn values should be constant-folded away
+# -dno-debug-output suppresses a "Glomming" message
! grep -q '\<100[0-9][0-9][0-9]\>' folding.simpl || { echo "Unfolded values found"; grep '\<100[0-9][0-9][0-9]\>' folding.simpl; }
$(call CHECK,\<200007\>,plusInteger)
$(call CHECK,\<683234160\>,timesInteger)
@@ -64,8 +65,9 @@ IntegerConversionRules:
.PHONY: naturalConstantFolding
naturalConstantFolding:
- '$(TEST_HC)' -Wall -v0 -O --make naturalConstantFolding -fforce-recomp -ddump-simpl > folding.simpl
+ '$(TEST_HC)' -Wall -v0 -O --make naturalConstantFolding -fforce-recomp -ddump-simpl -dno-debug-output > folding.simpl
# All the 100nnn values should be constant-folded away
+# -dno-debug-output suppresses a "Glomming" message
! grep -q '\<100[0-9][0-9][0-9]\>' folding.simpl || { echo "Unfolded values found"; grep '\<100[0-9][0-9][0-9]\>' folding.simpl; }
# Bit arithmetic
$(call CHECK,\<532\>,andNatural)
diff --git a/testsuite/tests/simplCore/should_compile/T21851.hs b/testsuite/tests/simplCore/should_compile/T21851.hs
new file mode 100644
index 0000000000..b5a9dcf4d9
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T21851.hs
@@ -0,0 +1,15 @@
+{-# OPTIONS_GHC -ddump-simpl #-}
+
+module T21851 (g') where
+import T21851a
+
+g :: Num a => a -> a
+g x = fst (f x)
+{-# NOINLINE[99] g #-}
+
+g' :: Int -> Int
+g' = g
+
+-- We should see a call to a /specialised/ verion of `f`,
+-- something like
+-- g' = \ (x :: Int) -> case T21851a.$w$sf x of { (# ww, ww1 #) -> ww }
diff --git a/testsuite/tests/simplCore/should_compile/T21851.stderr b/testsuite/tests/simplCore/should_compile/T21851.stderr
new file mode 100644
index 0000000000..0ddb38546a
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T21851.stderr
@@ -0,0 +1,19 @@
+[1 of 2] Compiling T21851a ( T21851a.hs, T21851a.o )
+[2 of 2] Compiling T21851 ( T21851.hs, T21851.o )
+
+==================== Tidy Core ====================
+Result size of Tidy Core
+ = {terms: 7, types: 10, coercions: 0, joins: 0/0}
+
+-- RHS size: {terms: 6, types: 8, coercions: 0, joins: 0/0}
+g' :: Int -> Int
+[GblId,
+ Arity=1,
+ Str=<L>,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [0] 30 0}]
+g'
+ = \ (x :: Int) -> case T21851a.$w$sf x of { (# ww, ww1 #) -> ww }
+
+
+
diff --git a/testsuite/tests/simplCore/should_compile/T21851a.hs b/testsuite/tests/simplCore/should_compile/T21851a.hs
new file mode 100644
index 0000000000..d11e9eb4b7
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T21851a.hs
@@ -0,0 +1,5 @@
+module T21851a where
+
+f :: Num b => b -> (b, b) -- note: recursive to prevent inlining
+f x = (x + 1, snd (f x)) -- on such a small example
+{-# SPECIALIZE f :: Int -> (Int, Int) #-}
diff --git a/testsuite/tests/simplCore/should_compile/T22097.hs b/testsuite/tests/simplCore/should_compile/T22097.hs
new file mode 100644
index 0000000000..ea236a1a25
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T22097.hs
@@ -0,0 +1,7 @@
+{-# OPTIONS_GHC -ddump-simpl #-}
+{-# LANGUAGE TypeApplications #-}
+module T22097 where
+import T22097a ( isEven )
+
+main :: IO ()
+main = print $ isEven @Int 10
diff --git a/testsuite/tests/simplCore/should_compile/T22097.stderr b/testsuite/tests/simplCore/should_compile/T22097.stderr
new file mode 100644
index 0000000000..f2ff31a7bf
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T22097.stderr
@@ -0,0 +1,46 @@
+[1 of 2] Compiling T22097a ( T22097a.hs, T22097a.o )
+[2 of 2] Compiling T22097 ( T22097.hs, T22097.o )
+
+==================== Tidy Core ====================
+Result size of Tidy Core
+ = {terms: 15, types: 14, coercions: 3, joins: 0/0}
+
+-- RHS size: {terms: 5, types: 1, coercions: 0, joins: 0/0}
+T22097.main2 :: String
+[GblId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=False, ConLike=False,
+ WorkFree=False, Expandable=False, Guidance=IF_ARGS [] 30 0}]
+T22097.main2
+ = case T22097a.$wgoEven 10# of { (# #) -> GHC.Show.$fShowBool4 }
+
+-- RHS size: {terms: 6, types: 2, coercions: 0, joins: 0/0}
+T22097.main1
+ :: GHC.Prim.State# GHC.Prim.RealWorld
+ -> (# GHC.Prim.State# GHC.Prim.RealWorld, () #)
+[GblId,
+ Arity=1,
+ Str=<L>,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [0] 40 0}]
+T22097.main1
+ = \ (eta [OS=OneShot] :: GHC.Prim.State# GHC.Prim.RealWorld) ->
+ GHC.IO.Handle.Text.hPutStr2
+ GHC.IO.Handle.FD.stdout T22097.main2 GHC.Types.True eta
+
+-- RHS size: {terms: 1, types: 0, coercions: 3, joins: 0/0}
+main :: IO ()
+[GblId,
+ Arity=1,
+ Str=<L>,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True,
+ Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=True)}]
+main
+ = T22097.main1
+ `cast` (Sym (GHC.Types.N:IO[0] <()>_R)
+ :: (GHC.Prim.State# GHC.Prim.RealWorld
+ -> (# GHC.Prim.State# GHC.Prim.RealWorld, () #))
+ ~R# IO ())
+
+
+
diff --git a/testsuite/tests/simplCore/should_compile/T22097a.hs b/testsuite/tests/simplCore/should_compile/T22097a.hs
new file mode 100644
index 0000000000..7d3dab3b7d
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T22097a.hs
@@ -0,0 +1,23 @@
+module T22097a
+ ( isEven, isOdd )
+where
+
+{-# SPECIALIZE isEven :: Int -> Bool #-}
+isEven :: Integral a => a -> Bool
+isEven = fst evenOdd
+
+{-# SPECIALIZE isOdd :: Int -> Bool #-}
+isOdd :: Integral a => a -> Bool
+isOdd = snd evenOdd
+
+evenOdd :: Integral a => (a -> Bool, a -> Bool)
+evenOdd = (goEven, goOdd)
+ where
+ goEven n
+ | n < 0 = goEven (- n)
+ | n > 0 = goOdd (n - 1)
+ | otherwise = True
+
+ goOdd n
+ | n < 0 = goOdd n
+ | otherwise = goEven n
diff --git a/testsuite/tests/simplCore/should_compile/T6056.stderr b/testsuite/tests/simplCore/should_compile/T6056.stderr
index 461ba97c70..ba8ff0e3a4 100644
--- a/testsuite/tests/simplCore/should_compile/T6056.stderr
+++ b/testsuite/tests/simplCore/should_compile/T6056.stderr
@@ -1,4 +1,4 @@
Rule fired: SPEC/T6056 $wsmallerAndRest @Int (T6056)
Rule fired: SPEC/T6056 $wsmallerAndRest @Int (T6056)
Rule fired: SPEC/T6056 $wsmallerAndRest @Int (T6056)
-Rule fired: SPEC/T6056 $wsmallerAndRest @Int (T6056)
+Rule fired: SPEC/T6056 smallerAndRest @Int (T6056)
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index 2da9a99ca1..283c6cf1b0 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -430,3 +430,7 @@ test('T22028', normal, compile, ['-O -ddump-rule-firings'])
test('T22114', normal, compile, ['-O'])
test('T21286', normal, multimod_compile, ['T21286', '-O -ddump-rule-firings'])
+# One module, T21851.hs, has OPTIONS_GHC -ddump-simpl
+test('T21851', [grep_errmsg(r'case.*w\$sf') ], multimod_compile, ['T21851', '-O -dno-typeable-binds -dsuppress-uniques'])
+# One module, T22097.hs, has OPTIONS_GHC -ddump-simpl
+test('T22097', [grep_errmsg(r'case.*wgoEven') ], multimod_compile, ['T22097', '-O -dno-typeable-binds -dsuppress-uniques'])