summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2020-04-19 11:16:32 -0400
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-05-29 01:39:19 -0400
commit46720997a0b1fa2971a884adf43de096ce130a7e (patch)
treef4ff59bdb6979c03f535b7682c170a057d0f5607
parentc49f7df02ff02c8f09e6a6e00a271b867ca6b092 (diff)
downloadhaskell-46720997a0b1fa2971a884adf43de096ce130a7e.tar.gz
Allow simplification through runRW#
Because runRW# inlines so late, we were previously able to do very little simplification across it. For instance, given even a simple program like case runRW# (\s -> let n = I# 42# in n) of I# n# -> f n# we previously had no way to avoid the allocation of the I#. This patch allows the simplifier to push strict contexts into the continuation of a runRW# application, as explained in in Note [Simplification of runRW#] in GHC.CoreToStg.Prep. Fixes #15127. Metric Increase: T9961 Metric Decrease: ManyConstructors Co-Authored-By: Simon Peyton-Jone <simonpj@microsoft.com>
-rw-r--r--compiler/GHC/Core/Lint.hs128
-rw-r--r--compiler/GHC/Core/Opt/OccurAnal.hs12
-rw-r--r--compiler/GHC/Core/Opt/SetLevels.hs10
-rw-r--r--compiler/GHC/Core/Opt/Simplify.hs32
-rw-r--r--compiler/GHC/CoreToStg/Prep.hs88
-rw-r--r--compiler/GHC/HsToCore/Utils.hs1
6 files changed, 231 insertions, 40 deletions
diff --git a/compiler/GHC/Core/Lint.hs b/compiler/GHC/Core/Lint.hs
index 601c0fc38a..aa650536c3 100644
--- a/compiler/GHC/Core/Lint.hs
+++ b/compiler/GHC/Core/Lint.hs
@@ -685,22 +685,9 @@ lintRhs :: Id -> CoreExpr -> LintM LintedType
-- its OccInfo and join-pointer-hood
lintRhs bndr rhs
| Just arity <- isJoinId_maybe bndr
- = lint_join_lams arity arity True rhs
+ = lintJoinLams arity (Just bndr) rhs
| AlwaysTailCalled arity <- tailCallInfo (idOccInfo bndr)
- = lint_join_lams arity arity False rhs
- where
- lint_join_lams 0 _ _ rhs
- = lintCoreExpr rhs
-
- lint_join_lams n tot enforce (Lam var expr)
- = lintLambda var $ lint_join_lams (n-1) tot enforce expr
-
- lint_join_lams n tot True _other
- = failWithL $ mkBadJoinArityMsg bndr tot (tot-n) rhs
- lint_join_lams _ _ False rhs
- = markAllJoinsBad $ lintCoreExpr rhs
- -- Future join point, not yet eta-expanded
- -- Body is not a tail position
+ = lintJoinLams arity Nothing rhs
-- Allow applications of the data constructor @StaticPtr@ at the top
-- but produce errors otherwise.
@@ -722,6 +709,22 @@ lintRhs _bndr rhs = fmap lf_check_static_ptrs getLintFlags >>= go
binders0
go _ = markAllJoinsBad $ lintCoreExpr rhs
+-- | Lint the RHS of a join point with expected join arity of @n@ (see Note
+-- [Join points] in GHC.Core).
+lintJoinLams :: JoinArity -> Maybe Id -> CoreExpr -> LintM LintedType
+lintJoinLams join_arity enforce rhs
+ = go join_arity rhs
+ where
+ go 0 rhs = lintCoreExpr rhs
+ go n (Lam var expr) = lintLambda var $ go (n-1) expr
+ -- N.B. join points can be cast. e.g. we consider ((\x -> ...) `cast` ...)
+ -- to be a join point at join arity 1.
+ go n _other | Just bndr <- enforce -- Join point with too few RHS lambdas
+ = failWithL $ mkBadJoinArityMsg bndr join_arity n rhs
+ | otherwise -- Future join point, not yet eta-expanded
+ = markAllJoinsBad $ lintCoreExpr rhs
+ -- Body of lambda is not a tail position
+
lintIdUnfolding :: Id -> Type -> Unfolding -> LintM ()
lintIdUnfolding bndr bndr_ty uf
| isStableUnfolding uf
@@ -762,6 +765,40 @@ we will check any unfolding after it has been unfolded; checking the
unfolding beforehand is merely an optimization, and one that actively
hurts us here.
+Note [Linting of runRW#]
+~~~~~~~~~~~~~~~~~~~~~~~~
+runRW# has some very peculiar behavior (see Note [runRW magic] in
+GHC.CoreToStg.Prep) which CoreLint must accommodate.
+
+As described in Note [Casts and lambdas] in
+GHC.Core.Opt.Simplify.Utils, the simplifier pushes casts out of
+lambdas. Concretely, the simplifier will transform
+
+ runRW# @r @ty (\s -> expr `cast` co)
+
+into
+
+ runRW# @r @ty ((\s -> expr) `cast` co)
+
+Consequently we need to handle the case that the continuation is a
+cast of a lambda. See Note [Casts and lambdas] in
+GHC.Core.Opt.Simplify.Utils.
+
+In the event that the continuation is headed by a lambda (which
+will bind the State# token) we can safely allow calls to join
+points since CorePrep is going to apply the continuation to
+RealWorld.
+
+In the case that the continuation is not a lambda we lint the
+continuation disallowing join points, to rule out things like,
+
+ join j = ...
+ in runRW# @r @ty (
+ let x = jump j
+ in x
+ )
+
+
************************************************************************
* *
\subsection[lintCoreExpr]{lintCoreExpr}
@@ -776,6 +813,18 @@ type LintedCoercion = Coercion
type LintedTyCoVar = TyCoVar
type LintedId = Id
+-- | Lint an expression cast through the given coercion, returning the type
+-- resulting from the cast.
+lintCastExpr :: CoreExpr -> LintedType -> Coercion -> LintM LintedType
+lintCastExpr expr expr_ty co
+ = do { co' <- lintCoercion co
+ ; let (Pair from_ty to_ty, role) = coercionKindRole co'
+ ; checkValueType to_ty $
+ text "target of cast" <+> quotes (ppr co')
+ ; lintRole co' Representational role
+ ; ensureEqTys from_ty expr_ty (mkCastErr expr co' from_ty expr_ty)
+ ; return to_ty }
+
lintCoreExpr :: CoreExpr -> LintM LintedType
-- The returned type has the substitution from the monad
-- already applied to it:
@@ -793,14 +842,8 @@ lintCoreExpr (Lit lit)
= return (literalType lit)
lintCoreExpr (Cast expr co)
- = do { expr_ty <- markAllJoinsBad $ lintCoreExpr expr
- ; co' <- lintCoercion co
- ; let (Pair from_ty to_ty, role) = coercionKindRole co'
- ; checkValueType to_ty $
- text "target of cast" <+> quotes (ppr co')
- ; lintRole co' Representational role
- ; ensureEqTys from_ty expr_ty (mkCastErr expr co' from_ty expr_ty)
- ; return to_ty }
+ = do expr_ty <- markAllJoinsBad $ lintCoreExpr expr
+ lintCastExpr expr expr_ty co
lintCoreExpr (Tick tickish expr)
= do case tickish of
@@ -860,6 +903,31 @@ lintCoreExpr e@(Let (Rec pairs) body)
bndrs = map fst pairs
lintCoreExpr e@(App _ _)
+ | Var fun <- fun
+ , fun `hasKey` runRWKey
+ -- N.B. we may have an over-saturated application of the form:
+ -- runRW (\s -> \x -> ...) y
+ , arg_ty1 : arg_ty2 : arg3 : rest <- args
+ = do { fun_ty1 <- lintCoreArg (idType fun) arg_ty1
+ ; fun_ty2 <- lintCoreArg fun_ty1 arg_ty2
+ -- See Note [Linting of runRW#]
+ ; let lintRunRWCont :: CoreArg -> LintM LintedType
+ lintRunRWCont (Cast expr co) = do
+ ty <- lintRunRWCont expr
+ lintCastExpr expr ty co
+ lintRunRWCont expr@(Lam _ _) = do
+ lintJoinLams 1 (Just fun) expr
+ lintRunRWCont other = markAllJoinsBad $ lintCoreExpr other
+ -- TODO: Look through ticks?
+ ; arg3_ty <- lintRunRWCont arg3
+ ; app_ty <- lintValApp arg3 fun_ty2 arg3_ty
+ ; lintCoreArgs app_ty rest }
+
+ | Var fun <- fun
+ , fun `hasKey` runRWKey
+ = failWithL (text "Invalid runRW# application")
+
+ | otherwise
= do { fun_ty <- lintCoreFun fun (length args)
; lintCoreArgs fun_ty args }
where
@@ -1139,11 +1207,15 @@ lintTyApp fun_ty arg_ty
= failWithL (mkTyAppMsg fun_ty arg_ty)
-----------------
+
+-- | @lintValApp arg fun_ty arg_ty@ lints an application of @fun arg@
+-- where @fun :: fun_ty@ and @arg :: arg_ty@, returning the type of the
+-- application.
lintValApp :: CoreExpr -> LintedType -> LintedType -> LintM LintedType
lintValApp arg fun_ty arg_ty
- | Just (arg,res) <- splitFunTy_maybe fun_ty
- = do { ensureEqTys arg arg_ty err1
- ; return res }
+ | Just (arg_ty', res_ty') <- splitFunTy_maybe fun_ty
+ = do { ensureEqTys arg_ty' arg_ty err1
+ ; return res_ty' }
| otherwise
= failWithL err2
where
@@ -2780,11 +2852,11 @@ mkInvalidJoinPointMsg var ty
2 (ppr var <+> dcolon <+> ppr ty)
mkBadJoinArityMsg :: Var -> Int -> Int -> CoreExpr -> SDoc
-mkBadJoinArityMsg var ar nlams rhs
+mkBadJoinArityMsg var ar n rhs
= vcat [ text "Join point has too few lambdas",
text "Join var:" <+> ppr var,
text "Join arity:" <+> ppr ar,
- text "Number of lambdas:" <+> ppr nlams,
+ text "Number of lambdas:" <+> ppr (ar - n),
text "Rhs = " <+> ppr rhs
]
diff --git a/compiler/GHC/Core/Opt/OccurAnal.hs b/compiler/GHC/Core/Opt/OccurAnal.hs
index c2c2dc9cef..8afe90f1a8 100644
--- a/compiler/GHC/Core/Opt/OccurAnal.hs
+++ b/compiler/GHC/Core/Opt/OccurAnal.hs
@@ -39,6 +39,7 @@ import GHC.Types.Demand ( argOneShots, argsOneShots )
import GHC.Data.Graph.Directed ( SCC(..), Node(..)
, stronglyConnCompFromEdgedVerticesUniq
, stronglyConnCompFromEdgedVerticesUniqR )
+import GHC.Builtin.Names( runRWKey )
import GHC.Types.Unique
import GHC.Types.Unique.FM
import GHC.Types.Unique.Set
@@ -1882,8 +1883,15 @@ occAnalApp :: OccEnv
-> (UsageDetails, Expr CoreBndr)
-- Naked variables (not applied) end up here too
occAnalApp env (Var fun, args, ticks)
- | null ticks = (all_uds, mkApps fun' args')
- | otherwise = (all_uds, mkTicks ticks $ mkApps fun' args')
+ -- Account for join arity of runRW# continuation
+ -- See Note [Simplification of runRW#]
+ | fun `hasKey` runRWKey
+ , [t1, t2, arg] <- args
+ , let (usage, arg') = occAnalRhs env (Just 1) arg
+ = (usage, mkTicks ticks $ mkApps (Var fun) [t1, t2, arg'])
+
+ | otherwise
+ = (all_uds, mkTicks ticks $ mkApps fun' args')
where
(fun', fun_id') = lookupVarEnv (occ_bs_env env) fun
`orElse` (Var fun, fun)
diff --git a/compiler/GHC/Core/Opt/SetLevels.hs b/compiler/GHC/Core/Opt/SetLevels.hs
index 38b8ede40d..4bdf8545e1 100644
--- a/compiler/GHC/Core/Opt/SetLevels.hs
+++ b/compiler/GHC/Core/Opt/SetLevels.hs
@@ -91,11 +91,13 @@ import GHC.Types.Demand ( StrictSig, Demand, isStrictDmd, splitStrictSig,
import GHC.Types.Cpr ( mkCprSig, botCpr )
import GHC.Types.Name ( getOccName, mkSystemVarName )
import GHC.Types.Name.Occurrence ( occNameString )
+import GHC.Types.Unique ( hasKey )
import GHC.Core.Type ( Type, mkLamTypes, splitTyConApp_maybe, tyCoVarsOfType
, mightBeUnliftedType, closeOverKindsDSet )
import GHC.Types.Basic ( Arity, RecFlag(..), isRec )
import GHC.Core.DataCon ( dataConOrigResTy )
import GHC.Builtin.Types
+import GHC.Builtin.Names ( runRWKey )
import GHC.Types.Unique.Supply
import GHC.Utils.Misc
import GHC.Utils.Outputable
@@ -399,8 +401,14 @@ lvlNonTailExpr env expr
lvlApp :: LevelEnv
-> CoreExprWithFVs
-> (CoreExprWithFVs, [CoreExprWithFVs]) -- Input application
- -> LvlM LevelledExpr -- Result expression
+ -> LvlM LevelledExpr -- Result expression
lvlApp env orig_expr ((_,AnnVar fn), args)
+ -- Try to ensure that runRW#'s continuation isn't floated out.
+ -- See Note [Simplification of runRW#].
+ | fn `hasKey` runRWKey
+ = do { args' <- mapM (lvlExpr env) args
+ ; return (foldl' App (lookupVar env fn) args') }
+
| floatOverSat env -- See Note [Floating over-saturated applications]
, arity > 0
, arity < n_val_args
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index 23b26abcf8..af7a3a405d 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -37,10 +37,13 @@ import GHC.Core.DataCon
, StrictnessMark (..) )
import GHC.Core.Opt.Monad ( Tick(..), SimplMode(..) )
import GHC.Core
+import GHC.Builtin.Types.Prim( realWorldStatePrimTy )
+import GHC.Builtin.Names( runRWKey )
import GHC.Types.Demand ( StrictSig(..), dmdTypeDepth, isStrictDmd
, mkClosedStrictSig, topDmd, botDiv )
import GHC.Types.Cpr ( mkCprSig, botCpr )
import GHC.Core.Ppr ( pprCoreExpr )
+import GHC.Types.Unique ( hasKey )
import GHC.Core.Unfold
import GHC.Core.Utils
import GHC.Core.SimpleOpt ( pushCoTyArg, pushCoValArg
@@ -1877,14 +1880,36 @@ rebuildCall env info (CastIt co cont)
rebuildCall env info (ApplyToTy { sc_arg_ty = arg_ty, sc_cont = cont })
= rebuildCall env (addTyArgTo info arg_ty) cont
-rebuildCall env info@(ArgInfo { ai_encl = encl_rules, ai_type = fun_ty
+---------- The runRW# rule. Do this after absorbing all arguments ------
+-- runRW# :: forall (r :: RuntimeRep) (o :: TYPE r). (State# RealWorld -> o) -> o
+-- K[ runRW# rr ty (\s. body) ] --> runRW rr' ty' (\s. K[ body ])
+rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args })
+ (ApplyToVal { sc_arg = arg, sc_env = arg_se, sc_cont = cont })
+ | fun `hasKey` runRWKey
+ , not (contIsStop cont) -- Don't fiddle around if the continuation is boring
+ , [ TyArg {}, TyArg {} ] <- rev_args
+ = do { s <- newId (fsLit "s") realWorldStatePrimTy
+ ; let env' = (arg_se `setInScopeFromE` env) `addNewInScopeIds` [s]
+ cont' = ApplyToVal { sc_dup = Simplified, sc_arg = Var s
+ , sc_env = env', sc_cont = cont }
+ ; body' <- simplExprC env' arg cont'
+ ; let arg' = Lam s body'
+ ty' = contResultType cont
+ rr' = getRuntimeRep ty'
+ call' = mkApps (Var fun) [mkTyArg rr', mkTyArg ty', arg']
+ ; return (emptyFloats env, call') }
+
+rebuildCall env info@(ArgInfo { ai_type = fun_ty, ai_encl = encl_rules
, ai_strs = str:strs, ai_discs = disc:discs })
(ApplyToVal { sc_arg = arg, sc_env = arg_se
, sc_dup = dup_flag, sc_cont = cont })
+
+ -- Argument is already simplified
| isSimplified dup_flag -- See Note [Avoid redundant simplification]
= rebuildCall env (addValArgTo info' arg) cont
- | str -- Strict argument
+ -- Strict arguments
+ | str
, sm_case_case (getMode env)
= -- pprTrace "Strict Arg" (ppr arg $$ ppr (seIdSubst env) $$ ppr (seInScope env)) $
simplExprF (arg_se `setInScopeFromE` env) arg
@@ -1892,7 +1917,8 @@ rebuildCall env info@(ArgInfo { ai_encl = encl_rules, ai_type = fun_ty
, sc_dup = Simplified, sc_cont = cont })
-- Note [Shadowing]
- | otherwise -- Lazy argument
+ -- Lazy arguments
+ | otherwise
-- DO NOT float anything outside, hence simplExprC
-- There is no benefit (unlike in a let-binding), and we'd
-- have to be very careful about bogus strictness through
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs
index 32bbfff214..ac74dd6723 100644
--- a/compiler/GHC/CoreToStg/Prep.hs
+++ b/compiler/GHC/CoreToStg/Prep.hs
@@ -829,14 +829,23 @@ cpeApp top_env expr
-- rather than the far superior "f x y". Test case is par01.
= let (terminal, args', depth') = collect_args arg
in cpe_app env terminal (args' ++ args) (depth + depth' - 1)
- cpe_app env (Var f) [CpeApp _runtimeRep@Type{}, CpeApp _type@Type{}, CpeApp arg] 1
+ cpe_app env (Var f) (CpeApp _runtimeRep@Type{} : CpeApp _type@Type{} : CpeApp arg : rest) n
| f `hasKey` runRWKey
+ -- N.B. While it may appear that n == 1 in the case of runRW#
+ -- applications, keep in mind that we may have applications that return
+ , n >= 1
-- See Note [runRW magic]
-- Replace (runRW# f) by (f realWorld#), beta reducing if possible (this
-- is why we return a CorePrepEnv as well)
= case arg of
- Lam s body -> cpe_app (extendCorePrepEnv env s realWorldPrimId) body [] 0
- _ -> cpe_app env arg [CpeApp (Var realWorldPrimId)] 1
+ Lam s body -> cpe_app (extendCorePrepEnv env s realWorldPrimId) body rest (n-2)
+ _ -> cpe_app env arg (CpeApp (Var realWorldPrimId) : rest) (n-1)
+ -- TODO: What about casts?
+
+ cpe_app _env (Var f) args n
+ | f `hasKey` runRWKey
+ = pprPanic "cpe_app(runRW#)" (ppr args $$ ppr n)
+
cpe_app env (Var v) args depth
= do { v1 <- fiddleCCall v
; let e2 = lookupCorePrepEnv env v1
@@ -965,8 +974,77 @@ pragma. It is levity-polymorphic.
=> (State# RealWorld -> (# State# RealWorld, o #))
-> (# State# RealWorld, o #)
-It needs no special treatment in GHC except this special inlining here
-in CorePrep (and in GHC.CoreToByteCode).
+It's correctness needs no special treatment in GHC except this special inlining
+here in CorePrep (and in GHC.CoreToByteCode).
+
+However, there are a variety of optimisation opportunities that the simplifier
+takes advantage of. See Note [Simplification of runRW#].
+
+
+Note [Simplification of runRW#]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider the program,
+
+ case runRW# (\s -> let n = I# 42# in n) of
+ I# n# -> f n#
+
+There is no reason why we should allocate an I# constructor given that we
+immediately destructure it. To avoid this the simplifier will push strict
+contexts into runRW's continuation. That is, it transforms
+
+ K[ runRW# @r @ty cont ]
+ ~>
+ runRW# @r @ty K[cont]
+
+This has a few interesting implications. Consider, for instance, this program:
+
+ join j = ...
+ in case runRW# @r @ty cont of
+ result -> jump j result
+
+Performing the transform described above would result in:
+
+ join j x = ...
+ in runRW# @r @ty (\s ->
+ case cont of in
+ result -> jump j result
+ )
+
+If runRW# were a "normal" function this call to join point j would not be
+allowed in its continuation argument. However, since runRW# is inlined (as
+described in Note [runRW magic] above), such join point occurences are
+completely fine. Both occurrence analysis and Core Lint have special treatment
+for runRW# applications. See Note [Linting of runRW#] for details on the latter.
+
+Moreover, it's helpful to ensure that runRW's continuation isn't floated out
+(since doing so would then require a call, whereas we would otherwise end up
+with straight-line). Consequently, GHC.Core.Opt.SetLevels.lvlApp has special
+treatment for runRW# applications, ensure the arguments are not floated if
+MFEs.
+
+Other considered designs
+------------------------
+
+One design that was rejected was to *require* that runRW#'s continuation be
+headed by a lambda. However, this proved to be quite fragile. For instance,
+SetLevels is very eager to float bottoming expressions. For instance given
+something of the form,
+
+ runRW# @r @ty (\s -> case expr of x -> undefined)
+
+SetLevels will see that the body the lambda is bottoming and will consequently
+float it to the top-level (assuming expr has no free coercion variables which
+prevent this). We therefore end up with
+
+ runRW# @r @ty (\s -> lvl s)
+
+Which the simplifier will beta reduce, leaving us with
+
+ runRW# @r @ty lvl
+
+Breaking our desired invariant. Ultimately we decided to simply accept that
+the continuation may not be a manifest lambda.
+
-- ---------------------------------------------------------------------------
-- CpeArg: produces a result satisfying CpeArg
diff --git a/compiler/GHC/HsToCore/Utils.hs b/compiler/GHC/HsToCore/Utils.hs
index 48673a18d5..194cf4e1ac 100644
--- a/compiler/GHC/HsToCore/Utils.hs
+++ b/compiler/GHC/HsToCore/Utils.hs
@@ -482,7 +482,6 @@ mkCoreAppDs _ (Var f `App` Type _r `App` Type ty1 `App` Type ty2 `App` arg1) arg
Var v1 | isInternalName (idName v1)
-> v1 -- Note [Desugaring seq], points (2) and (3)
_ -> mkWildValBinder ty1
-
mkCoreAppDs s fun arg = mkCoreApp s fun arg -- The rest is done in GHC.Core.Make
-- NB: No argument can be levity polymorphic