summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Graf <sebastian.graf@kit.edu>2021-10-13 17:25:30 +0200
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-10-29 05:02:25 -0400
commit925c47b46529d202190f18bd653a6945caa51823 (patch)
treeabcd83a8a9e7e6cac99025850a6e762f609dfbb6
parent7170052651ff02bfcf1e9611f0813dd20a7c8558 (diff)
downloadhaskell-925c47b46529d202190f18bd653a6945caa51823.tar.gz
WorkWrap: Update Unfolding with WW'd body prior to `tryWW` (#20510)
We have a function in #20510 that is small enough to get a stable unfolding in WW: ```hs small :: Int -> Int small x = go 0 x where go z 0 = z * x go z y = go (z+y) (y-1) ``` But it appears we failed to use the WW'd RHS as the stable unfolding. As a result, inlining `small` would expose the non-WW'd version of `go`. That appears to regress badly in #19727 which is a bit too large to extract a reproducer from that is guaranteed to reproduce across GHC versions. The solution is to simply update the unfolding in `certainlyWillInline` with the WW'd RHS. Fixes #20510.
-rw-r--r--compiler/GHC/Core/Opt/WorkWrap.hs7
-rw-r--r--compiler/GHC/Core/Unfold.hs91
-rw-r--r--compiler/GHC/Core/Unfold/Make.hs92
-rw-r--r--testsuite/tests/simplCore/should_compile/T7360.stderr4
-rw-r--r--testsuite/tests/stranal/should_compile/T20510.hs7
-rw-r--r--testsuite/tests/stranal/should_compile/T20510.stderr125
-rw-r--r--testsuite/tests/stranal/should_compile/all.T3
7 files changed, 234 insertions, 95 deletions
diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs
index 9becea0c18..511d3bf6e3 100644
--- a/compiler/GHC/Core/Opt/WorkWrap.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap.hs
@@ -13,13 +13,12 @@ import GHC.Driver.Session
import GHC.Core.Opt.Arity ( manifestArity )
import GHC.Core
-import GHC.Core.Unfold
import GHC.Core.Unfold.Make
import GHC.Core.Utils ( exprType, exprIsHNF )
import GHC.Core.Type
import GHC.Core.Opt.WorkWrap.Utils
import GHC.Core.FamInstEnv
-import GHC.Core.SimpleOpt( SimpleOpts(..) )
+import GHC.Core.SimpleOpt
import GHC.Types.Var
import GHC.Types.Id
@@ -719,7 +718,9 @@ splitFun ww_opts fn_id rhs
return [(fn_id, rhs)]
Just stuff
- | Just stable_unf <- certainlyWillInline uf_opts fn_info
+ | let opt_wwd_rhs = simpleOptExpr (wo_simple_opts ww_opts) rhs
+ -- We need to stabilise the WW'd (and optimised) RHS below
+ , Just stable_unf <- certainlyWillInline uf_opts fn_info opt_wwd_rhs
-- We could make a w/w split, but in fact the RHS is small
-- See Note [Don't w/w inline small non-loop-breaker things]
, let id_w_unf = fn_id `setIdUnfolding` stable_unf
diff --git a/compiler/GHC/Core/Unfold.hs b/compiler/GHC/Core/Unfold.hs
index dbc6b1e7fd..08c5a10b30 100644
--- a/compiler/GHC/Core/Unfold.hs
+++ b/compiler/GHC/Core/Unfold.hs
@@ -18,8 +18,6 @@ find, unsurprisingly, a Core expression.
{-# LANGUAGE BangPatterns #-}
-{-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
-
module GHC.Core.Unfold (
Unfolding, UnfoldingGuidance, -- Abstract types
@@ -32,7 +30,7 @@ module GHC.Core.Unfold (
ArgSummary(..),
couldBeSmallEnoughToInline, inlineBoringOk,
- certainlyWillInline, smallEnoughToInline,
+ smallEnoughToInline,
callSiteInline, CallCtxt(..),
calcUnfoldingGuidance
@@ -45,12 +43,11 @@ import GHC.Driver.Flags
import GHC.Core
import GHC.Core.Utils
import GHC.Types.Id
-import GHC.Types.Demand ( isDeadEndSig )
import GHC.Core.DataCon
import GHC.Types.Literal
import GHC.Builtin.PrimOps
import GHC.Types.Id.Info
-import GHC.Types.Basic ( Arity, isNoInlinePragma )
+import GHC.Types.Basic ( Arity )
import GHC.Core.Type
import GHC.Builtin.Names
import GHC.Builtin.Types.Prim ( realWorldStatePrimTy )
@@ -961,89 +958,7 @@ smallEnoughToInline opts (CoreUnfolding {uf_guidance = guidance})
smallEnoughToInline _ _
= False
-----------------
-
-certainlyWillInline :: UnfoldingOpts -> IdInfo -> Maybe Unfolding
--- ^ Sees if the unfolding is pretty certain to inline.
--- If so, return a *stable* unfolding for it, that will always inline.
-certainlyWillInline opts fn_info
- = case fn_unf of
- CoreUnfolding { uf_tmpl = expr, uf_guidance = guidance, uf_src = src }
- | noinline -> Nothing -- See Note [Worker/wrapper for NOINLINE functions]
- | otherwise
- -> case guidance of
- UnfNever -> Nothing
- UnfWhen {} -> Just (fn_unf { uf_src = src' })
- -- INLINE functions have UnfWhen
- UnfIfGoodArgs { ug_size = size, ug_args = args }
- -> do_cunf expr size args src'
- where
- src' = -- Do not change InlineCompulsory!
- case src of
- InlineCompulsory -> InlineCompulsory
- _ -> InlineStable
-
- DFunUnfolding {} -> Just fn_unf -- Don't w/w DFuns; it never makes sense
- -- to do so, and even if it is currently a
- -- loop breaker, it may not be later
-
- _other_unf -> Nothing
-
- where
- noinline = isNoInlinePragma (inlinePragInfo fn_info)
- fn_unf = unfoldingInfo fn_info -- NB: loop-breakers never inline
-
- -- The UnfIfGoodArgs case seems important. If we w/w small functions
- -- binary sizes go up by 10%! (This is with SplitObjs.)
- -- I'm not totally sure why.
- -- INLINABLE functions come via this path
- -- See Note [certainlyWillInline: INLINABLE]
- do_cunf expr size args src'
- | arityInfo fn_info > 0 -- See Note [certainlyWillInline: be careful of thunks]
- , not (isDeadEndSig (dmdSigInfo fn_info))
- -- Do not unconditionally inline a bottoming functions even if
- -- it seems smallish. We've carefully lifted it out to top level,
- -- so we don't want to re-inline it.
- , let unf_arity = length args
- , size - (10 * (unf_arity + 1)) <= unfoldingUseThreshold opts
- = Just (fn_unf { uf_src = src'
- , uf_guidance = UnfWhen { ug_arity = unf_arity
- , ug_unsat_ok = unSaturatedOk
- , ug_boring_ok = inlineBoringOk expr } })
- -- Note the "unsaturatedOk". A function like f = \ab. a
- -- will certainly inline, even if partially applied (f e), so we'd
- -- better make sure that the transformed inlining has the same property
- | otherwise
- = Nothing
-
-{- Note [certainlyWillInline: be careful of thunks]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Don't claim that thunks will certainly inline, because that risks work
-duplication. Even if the work duplication is not great (eg is_cheap
-holds), it can make a big difference in an inner loop In #5623 we
-found that the WorkWrap phase thought that
- y = case x of F# v -> F# (v +# v)
-was certainlyWillInline, so the addition got duplicated.
-
-Note that we check arityInfo instead of the arity of the unfolding to detect
-this case. This is so that we don't accidentally fail to inline small partial
-applications, like `f = g 42` (where `g` recurses into `f`) where g has arity 2
-(say). Here there is no risk of work duplication, and the RHS is tiny, so
-certainlyWillInline should return True. But `unf_arity` is zero! However f's
-arity, gotten from `arityInfo fn_info`, is 1.
-
-Failing to say that `f` will inline forces W/W to generate a potentially huge
-worker for f that will immediately cancel with `g`'s wrapper anyway, causing
-unnecessary churn in the Simplifier while arriving at the same result.
-
-Note [certainlyWillInline: INLINABLE]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-certainlyWillInline /must/ return Nothing for a large INLINABLE thing,
-even though we have a stable inlining, so that strictness w/w takes
-place. It makes a big difference to efficiency, and the w/w pass knows
-how to transfer the INLINABLE info to the worker; see WorkWrap
-Note [Worker/wrapper for INLINABLE functions]
-
+{-
************************************************************************
* *
\subsection{callSiteInline}
diff --git a/compiler/GHC/Core/Unfold/Make.hs b/compiler/GHC/Core/Unfold/Make.hs
index 71981061ef..dd0a0b968a 100644
--- a/compiler/GHC/Core/Unfold/Make.hs
+++ b/compiler/GHC/Core/Unfold/Make.hs
@@ -1,4 +1,4 @@
-
+{-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
-- | Unfolding creation
module GHC.Core.Unfold.Make
@@ -16,6 +16,7 @@ module GHC.Core.Unfold.Make
, mkCompulsoryUnfolding'
, mkDFunUnfolding
, specUnfolding
+ , certainlyWillInline
)
where
@@ -28,6 +29,7 @@ import GHC.Core.DataCon
import GHC.Core.Utils
import GHC.Types.Basic
import GHC.Types.Id
+import GHC.Types.Id.Info
import GHC.Types.Demand ( DmdSig, isDeadEndSig )
import GHC.Utils.Outputable
@@ -309,4 +311,92 @@ mkCoreUnfolding src top_lvl expr guidance
uf_expandable = exprIsExpandable expr,
uf_guidance = guidance }
+----------------
+certainlyWillInline :: UnfoldingOpts -> IdInfo -> CoreExpr -> Maybe Unfolding
+-- ^ Sees if the unfolding is pretty certain to inline.
+-- If so, return a *stable* unfolding for it, that will always inline.
+-- The CoreExpr is the WW'd and simplified RHS. In contrast, the unfolding
+-- template might not have been WW'd yet.
+certainlyWillInline opts fn_info rhs'
+ = case fn_unf of
+ CoreUnfolding { uf_guidance = guidance, uf_src = src }
+ | noinline -> Nothing -- See Note [Worker/wrapper for NOINLINE functions]
+ | otherwise
+ -> case guidance of
+ UnfNever -> Nothing
+ UnfWhen {} -> Just (fn_unf { uf_src = src', uf_tmpl = tmpl' })
+ -- INLINE functions have UnfWhen
+ UnfIfGoodArgs { ug_size = size, ug_args = args }
+ -> do_cunf size args src' tmpl'
+ where
+ src' = -- Do not change InlineCompulsory!
+ case src of
+ InlineCompulsory -> InlineCompulsory
+ _ -> InlineStable
+ tmpl' = -- Do not overwrite stable unfoldings!
+ case src of
+ InlineRhs -> occurAnalyseExpr rhs'
+ _ -> uf_tmpl fn_unf
+
+ DFunUnfolding {} -> Just fn_unf -- Don't w/w DFuns; it never makes sense
+ -- to do so, and even if it is currently a
+ -- loop breaker, it may not be later
+
+ _other_unf -> Nothing
+ where
+ noinline = isNoInlinePragma (inlinePragInfo fn_info)
+ fn_unf = unfoldingInfo fn_info -- NB: loop-breakers never inline
+
+ -- The UnfIfGoodArgs case seems important. If we w/w small functions
+ -- binary sizes go up by 10%! (This is with SplitObjs.)
+ -- I'm not totally sure why.
+ -- INLINABLE functions come via this path
+ -- See Note [certainlyWillInline: INLINABLE]
+ do_cunf size args src' tmpl'
+ | arityInfo fn_info > 0 -- See Note [certainlyWillInline: be careful of thunks]
+ , not (isDeadEndSig (dmdSigInfo fn_info))
+ -- Do not unconditionally inline a bottoming functions even if
+ -- it seems smallish. We've carefully lifted it out to top level,
+ -- so we don't want to re-inline it.
+ , let unf_arity = length args
+ , size - (10 * (unf_arity + 1)) <= unfoldingUseThreshold opts
+ = Just (fn_unf { uf_src = src'
+ , uf_tmpl = tmpl'
+ , uf_guidance = UnfWhen { ug_arity = unf_arity
+ , ug_unsat_ok = unSaturatedOk
+ , ug_boring_ok = inlineBoringOk tmpl' } })
+ -- Note the "unsaturatedOk". A function like f = \ab. a
+ -- will certainly inline, even if partially applied (f e), so we'd
+ -- better make sure that the transformed inlining has the same property
+ | otherwise
+ = Nothing
+
+{- Note [certainlyWillInline: be careful of thunks]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't claim that thunks will certainly inline, because that risks work
+duplication. Even if the work duplication is not great (eg is_cheap
+holds), it can make a big difference in an inner loop In #5623 we
+found that the WorkWrap phase thought that
+ y = case x of F# v -> F# (v +# v)
+was certainlyWillInline, so the addition got duplicated.
+
+Note that we check arityInfo instead of the arity of the unfolding to detect
+this case. This is so that we don't accidentally fail to inline small partial
+applications, like `f = g 42` (where `g` recurses into `f`) where g has arity 2
+(say). Here there is no risk of work duplication, and the RHS is tiny, so
+certainlyWillInline should return True. But `unf_arity` is zero! However f's
+arity, gotten from `arityInfo fn_info`, is 1.
+
+Failing to say that `f` will inline forces W/W to generate a potentially huge
+worker for f that will immediately cancel with `g`'s wrapper anyway, causing
+unnecessary churn in the Simplifier while arriving at the same result.
+
+Note [certainlyWillInline: INLINABLE]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+certainlyWillInline /must/ return Nothing for a large INLINABLE thing,
+even though we have a stable inlining, so that strictness w/w takes
+place. It makes a big difference to efficiency, and the w/w pass knows
+how to transfer the INLINABLE info to the worker; see WorkWrap
+Note [Worker/wrapper for INLINABLE functions]
+-}
diff --git a/testsuite/tests/simplCore/should_compile/T7360.stderr b/testsuite/tests/simplCore/should_compile/T7360.stderr
index 9b42a8c41d..d4b68ee22e 100644
--- a/testsuite/tests/simplCore/should_compile/T7360.stderr
+++ b/testsuite/tests/simplCore/should_compile/T7360.stderr
@@ -59,10 +59,8 @@ fun2 :: forall {a}. [a] -> ((), Int)
Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
Tmpl= \ (@a) (x [Occ=Once1] :: [a]) ->
(T7360.fun4,
- case x of wild [Occ=Once1] { __DEFAULT ->
- case GHC.List.$wlenAcc @a wild 0# of ww1 [Occ=Once1] { __DEFAULT ->
+ case GHC.List.$wlenAcc @a x 0# of ww1 [Occ=Once1] { __DEFAULT ->
GHC.Types.I# ww1
- }
})}]
fun2
= \ (@a) (x :: [a]) ->
diff --git a/testsuite/tests/stranal/should_compile/T20510.hs b/testsuite/tests/stranal/should_compile/T20510.hs
new file mode 100644
index 0000000000..b9f05118ef
--- /dev/null
+++ b/testsuite/tests/stranal/should_compile/T20510.hs
@@ -0,0 +1,7 @@
+module T20510 where
+
+small :: Int -> Int
+small x = go 0 x
+ where
+ go z 0 = z * x
+ go z y = go (z+y) (y-1)
diff --git a/testsuite/tests/stranal/should_compile/T20510.stderr b/testsuite/tests/stranal/should_compile/T20510.stderr
new file mode 100644
index 0000000000..b2cbed4594
--- /dev/null
+++ b/testsuite/tests/stranal/should_compile/T20510.stderr
@@ -0,0 +1,125 @@
+
+==================== Exitification transformation ====================
+Result size of Exitification transformation
+ = {terms: 50, types: 22, coercions: 0, joins: 2/2}
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+lvl :: Int
+[LclId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
+lvl = GHC.Types.I# 0#
+
+-- RHS size: {terms: 32, types: 14, coercions: 0, joins: 2/2}
+small :: Int -> Int
+[LclIdX,
+ Arity=1,
+ Str=<SP(SL)>,
+ Cpr=1,
+ Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True,
+ Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
+ Tmpl= \ (x [Dmd=SP(SL)] :: Int) ->
+ joinrec {
+ go [InlPrag=[2], Occ=T[2]] :: Int -> Int -> Int
+ [LclId[JoinId(2)],
+ Arity=2,
+ Str=<SP(L)><SP(SL)>,
+ Unf=Unf{Src=InlineStable, TopLvl=False, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True,
+ Guidance=ALWAYS_IF(arity=2,unsat_ok=True,boring_ok=False)
+ Tmpl= \ (z [Occ=Once1!, Dmd=SP(L)] :: Int)
+ (ds [Occ=Once1!, Dmd=SP(SL)] :: Int) ->
+ case z of { GHC.Types.I# ww [Occ=Once1] ->
+ case ds of { GHC.Types.I# ww [Occ=Once1, Dmd=SL] ->
+ jump $wgo ww ww
+ }
+ }}]
+ go (z [Occ=Once1!, Dmd=SP(L)] :: Int)
+ (ds [Occ=Once1!, Dmd=SP(SL)] :: Int)
+ = case z of { GHC.Types.I# ww [Occ=Once1] ->
+ case ds of { GHC.Types.I# ww [Occ=Once1, Dmd=SL] ->
+ jump $wgo ww ww
+ }
+ };
+ $wgo [InlPrag=[2], Occ=LoopBreakerT[2]]
+ :: GHC.Prim.Int# -> GHC.Prim.Int# -> Int
+ [LclId[JoinId(2)],
+ Arity=2,
+ Str=<L><SL>,
+ Unf=Unf{Src=<vanilla>, TopLvl=False, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [0 51] 69 10}]
+ $wgo (ww [Occ=Once2] :: GHC.Prim.Int#)
+ (ww [Occ=Once1!, Dmd=SL] :: GHC.Prim.Int#)
+ = case ww of ds {
+ __DEFAULT ->
+ jump go
+ (GHC.Types.I# (GHC.Prim.+# ww ds))
+ (GHC.Types.I# (GHC.Prim.-# ds 1#));
+ 0# ->
+ case x of { GHC.Types.I# y [Occ=Once1] ->
+ GHC.Types.I# (GHC.Prim.*# ww y)
+ }
+ }; } in
+ jump go lvl x}]
+small
+ = \ (x [Dmd=SP(SL)] :: Int) ->
+ join {
+ exit :: GHC.Prim.Int# -> Int
+ [LclId[JoinId(1)]]
+ exit (ww :: GHC.Prim.Int#)
+ = case x of { GHC.Types.I# y ->
+ GHC.Types.I# (GHC.Prim.*# ww y)
+ } } in
+ joinrec {
+ $wgo [InlPrag=[2], Occ=LoopBreaker]
+ :: GHC.Prim.Int# -> GHC.Prim.Int# -> Int
+ [LclId[JoinId(2)],
+ Arity=2,
+ Str=<L><SL>,
+ Unf=Unf{Src=<vanilla>, TopLvl=False, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [0 38] 49 10}]
+ $wgo (ww :: GHC.Prim.Int#) (ww [Dmd=SL] :: GHC.Prim.Int#)
+ = case ww of ds {
+ __DEFAULT -> jump $wgo (GHC.Prim.+# ww ds) (GHC.Prim.-# ds 1#);
+ 0# -> jump exit ww
+ }; } in
+ case x of { GHC.Types.I# ww [Dmd=SL] -> jump $wgo 0# ww }
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+$trModule :: GHC.Prim.Addr#
+[LclId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 20 0}]
+$trModule = "main"#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+$trModule :: GHC.Types.TrName
+[LclId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
+$trModule = GHC.Types.TrNameS $trModule
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+$trModule :: GHC.Prim.Addr#
+[LclId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 30 0}]
+$trModule = "T20510"#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+$trModule :: GHC.Types.TrName
+[LclId,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
+$trModule = GHC.Types.TrNameS $trModule
+
+-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
+T20510.$trModule :: GHC.Types.Module
+[LclIdX,
+ Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
+ WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
+T20510.$trModule = GHC.Types.Module $trModule $trModule
+
+
+
diff --git a/testsuite/tests/stranal/should_compile/all.T b/testsuite/tests/stranal/should_compile/all.T
index 1798c08638..d953da1da9 100644
--- a/testsuite/tests/stranal/should_compile/all.T
+++ b/testsuite/tests/stranal/should_compile/all.T
@@ -72,3 +72,6 @@ test('T19766', [ grep_errmsg(r'absentError') ], compile, ['-ddump-worker-wrapper
test('T19849', normal, compile, [''])
test('T19882a', normal, compile, [''])
test('T19882b', normal, compile, [''])
+# We want that the 'go' joinrec in the unfolding has been worker/wrappered.
+# So we simply grep for 'jump $wgo' and hope we find more than 2 call sites:
+test('T20510', [ grep_errmsg(r'jump \$wgo') ], compile, ['-dsuppress-uniques -ddump-exitify'])