Allow simplification through runRW#

Because runRW# inlines so late, we were previously able to do very little simplification across it. For instance, given even a simple program like case runRW# (\s -> let n = I# 42# in n) of I# n# -> f n# we previously had no way to avoid the allocation of the I#. This patch allows the simplifier to push strict contexts into the continuation of a runRW# application, as explained in in Note [Simplification of runRW#] in GHC.CoreToStg.Prep. Fixes #15127. Metric Increase: T9961 Metric Decrease: ManyConstructors Co-Authored-By: Simon Peyton-Jone <simonpj@microsoft.com>
author: Ben Gamari <ben@smart-cactus.org> 2020-04-19 11:16:32 -0400
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2020-05-29 01:39:19 -0400
commit: 46720997a0b1fa2971a884adf43de096ce130a7e (patch)
tree: f4ff59bdb6979c03f535b7682c170a057d0f5607
parent: c49f7df02ff02c8f09e6a6e00a271b867ca6b092 (diff)
download: haskell-46720997a0b1fa2971a884adf43de096ce130a7e.tar.gz
6 files changed, 231 insertions, 40 deletions
diff --git a/compiler/GHC/Core/Lint.hs b/compiler/GHC/Core/Lint.hs
index 601c0fc38a..aa650536c3 100644
--- a/compiler/GHC/Core/Lint.hs
+++ b/compiler/GHC/Core/Lint.hs
@@ -685,22 +685,9 @@ lintRhs :: Id -> CoreExpr -> LintM LintedType
 --     its OccInfo and join-pointer-hood
 lintRhs bndr rhs
     | Just arity <- isJoinId_maybe bndr
-    = lint_join_lams arity arity True rhs
+    = lintJoinLams arity (Just bndr) rhs
     | AlwaysTailCalled arity <- tailCallInfo (idOccInfo bndr)
-    = lint_join_lams arity arity False rhs
-  where
-    lint_join_lams 0 _ _ rhs
-      = lintCoreExpr rhs
-
-    lint_join_lams n tot enforce (Lam var expr)
-      = lintLambda var $ lint_join_lams (n-1) tot enforce expr
-
-    lint_join_lams n tot True _other
-      = failWithL $ mkBadJoinArityMsg bndr tot (tot-n) rhs
-    lint_join_lams _ _ False rhs
-      = markAllJoinsBad $ lintCoreExpr rhs
-          -- Future join point, not yet eta-expanded
-          -- Body is not a tail position
+    = lintJoinLams arity Nothing rhs
 
 -- Allow applications of the data constructor @StaticPtr@ at the top
 -- but produce errors otherwise.
@@ -722,6 +709,22 @@ lintRhs _bndr rhs = fmap lf_check_static_ptrs getLintFlags >>= go
         binders0
     go _ = markAllJoinsBad $ lintCoreExpr rhs
 
+-- | Lint the RHS of a join point with expected join arity of @n@ (see Note
+-- [Join points] in GHC.Core).
+lintJoinLams :: JoinArity -> Maybe Id -> CoreExpr -> LintM LintedType
+lintJoinLams join_arity enforce rhs
+  = go join_arity rhs
+  where
+    go 0 rhs             = lintCoreExpr rhs
+    go n (Lam var expr)  = lintLambda var $ go (n-1) expr
+      -- N.B. join points can be cast. e.g. we consider ((\x -> ...) `cast` ...)
+      -- to be a join point at join arity 1.
+    go n _other | Just bndr <- enforce -- Join point with too few RHS lambdas
+                = failWithL $ mkBadJoinArityMsg bndr join_arity n rhs
+                | otherwise -- Future join point, not yet eta-expanded
+                = markAllJoinsBad $ lintCoreExpr rhs
+                  -- Body of lambda is not a tail position
+
 lintIdUnfolding :: Id -> Type -> Unfolding -> LintM ()
 lintIdUnfolding bndr bndr_ty uf
   | isStableUnfolding uf
@@ -762,6 +765,40 @@ we will check any unfolding after it has been unfolded; checking the
 unfolding beforehand is merely an optimization, and one that actively
 hurts us here.
 
+Note [Linting of runRW#]
+~~~~~~~~~~~~~~~~~~~~~~~~
+runRW# has some very peculiar behavior (see Note [runRW magic] in
+GHC.CoreToStg.Prep) which CoreLint must accommodate.
+
+As described in Note [Casts and lambdas] in
+GHC.Core.Opt.Simplify.Utils, the simplifier pushes casts out of
+lambdas. Concretely, the simplifier will transform
+
+    runRW# @r @ty (\s -> expr `cast` co)
+
+into
+
+    runRW# @r @ty ((\s -> expr) `cast` co)
+
+Consequently we need to handle the case that the continuation is a
+cast of a lambda. See Note [Casts and lambdas] in
+GHC.Core.Opt.Simplify.Utils.
+
+In the event that the continuation is headed by a lambda (which
+will bind the State# token) we can safely allow calls to join
+points since CorePrep is going to apply the continuation to
+RealWorld.
+
+In the case that the continuation is not a lambda we lint the
+continuation disallowing join points, to rule out things like,
+
+    join j = ...
+    in runRW# @r @ty (
+         let x = jump j
+         in x
+       )
+
+
 ************************************************************************
 *                                                                      *
 \subsection[lintCoreExpr]{lintCoreExpr}
@@ -776,6 +813,18 @@ type LintedCoercion = Coercion
 type LintedTyCoVar  = TyCoVar
 type LintedId       = Id
 
+-- | Lint an expression cast through the given coercion, returning the type
+-- resulting from the cast.
+lintCastExpr :: CoreExpr -> LintedType -> Coercion -> LintM LintedType
+lintCastExpr expr expr_ty co
+  = do { co' <- lintCoercion co
+       ; let (Pair from_ty to_ty, role) = coercionKindRole co'
+       ; checkValueType to_ty $
+         text "target of cast" <+> quotes (ppr co')
+       ; lintRole co' Representational role
+       ; ensureEqTys from_ty expr_ty (mkCastErr expr co' from_ty expr_ty)
+       ; return to_ty }
+
 lintCoreExpr :: CoreExpr -> LintM LintedType
 -- The returned type has the substitution from the monad
 -- already applied to it:
@@ -793,14 +842,8 @@ lintCoreExpr (Lit lit)
   = return (literalType lit)
 
 lintCoreExpr (Cast expr co)
-  = do { expr_ty <- markAllJoinsBad $ lintCoreExpr expr
-       ; co' <- lintCoercion co
-       ; let (Pair from_ty to_ty, role) = coercionKindRole co'
-       ; checkValueType to_ty $
-         text "target of cast" <+> quotes (ppr co')
-       ; lintRole co' Representational role
-       ; ensureEqTys from_ty expr_ty (mkCastErr expr co' from_ty expr_ty)
-       ; return to_ty }
+  = do expr_ty <- markAllJoinsBad   $ lintCoreExpr expr
+       lintCastExpr expr expr_ty co
 
 lintCoreExpr (Tick tickish expr)
   = do case tickish of
@@ -860,6 +903,31 @@ lintCoreExpr e@(Let (Rec pairs) body)
     bndrs = map fst pairs
 
 lintCoreExpr e@(App _ _)
+  | Var fun <- fun
+  , fun `hasKey` runRWKey
+    -- N.B. we may have an over-saturated application of the form:
+    --   runRW (\s -> \x -> ...) y
+  , arg_ty1 : arg_ty2 : arg3 : rest <- args
+  = do { fun_ty1 <- lintCoreArg (idType fun) arg_ty1
+       ; fun_ty2 <- lintCoreArg fun_ty1      arg_ty2
+         -- See Note [Linting of runRW#]
+       ; let lintRunRWCont :: CoreArg -> LintM LintedType
+             lintRunRWCont (Cast expr co) = do
+                ty <- lintRunRWCont expr
+                lintCastExpr expr ty co
+             lintRunRWCont expr@(Lam _ _) = do
+                lintJoinLams 1 (Just fun) expr
+             lintRunRWCont other = markAllJoinsBad $ lintCoreExpr other
+             -- TODO: Look through ticks?
+       ; arg3_ty <- lintRunRWCont arg3
+       ; app_ty <- lintValApp arg3 fun_ty2 arg3_ty
+       ; lintCoreArgs app_ty rest }
+
+  | Var fun <- fun
+  , fun `hasKey` runRWKey
+  = failWithL (text "Invalid runRW# application")
+
+  | otherwise
   = do { fun_ty <- lintCoreFun fun (length args)
        ; lintCoreArgs fun_ty args }
   where
@@ -1139,11 +1207,15 @@ lintTyApp fun_ty arg_ty
   = failWithL (mkTyAppMsg fun_ty arg_ty)
 
 -----------------
+
+-- | @lintValApp arg fun_ty arg_ty@ lints an application of @fun arg@
+-- where @fun :: fun_ty@ and @arg :: arg_ty@, returning the type of the
+-- application.
 lintValApp :: CoreExpr -> LintedType -> LintedType -> LintM LintedType
 lintValApp arg fun_ty arg_ty
-  | Just (arg,res) <- splitFunTy_maybe fun_ty
-  = do { ensureEqTys arg arg_ty err1
-       ; return res }
+  | Just (arg_ty', res_ty') <- splitFunTy_maybe fun_ty
+  = do { ensureEqTys arg_ty' arg_ty err1
+       ; return res_ty' }
   | otherwise
   = failWithL err2
   where
@@ -2780,11 +2852,11 @@ mkInvalidJoinPointMsg var ty
         2 (ppr var <+> dcolon <+> ppr ty)
 
 mkBadJoinArityMsg :: Var -> Int -> Int -> CoreExpr -> SDoc
-mkBadJoinArityMsg var ar nlams rhs
+mkBadJoinArityMsg var ar n rhs
   = vcat [ text "Join point has too few lambdas",
            text "Join var:" <+> ppr var,
            text "Join arity:" <+> ppr ar,
-           text "Number of lambdas:" <+> ppr nlams,
+           text "Number of lambdas:" <+> ppr (ar - n),
            text "Rhs = " <+> ppr rhs
            ]
 
diff --git a/compiler/GHC/Core/Opt/OccurAnal.hs b/compiler/GHC/Core/Opt/OccurAnal.hs
index c2c2dc9cef..8afe90f1a8 100644
--- a/compiler/GHC/Core/Opt/OccurAnal.hs
+++ b/compiler/GHC/Core/Opt/OccurAnal.hs
@@ -39,6 +39,7 @@ import GHC.Types.Demand ( argOneShots, argsOneShots )
 import GHC.Data.Graph.Directed ( SCC(..), Node(..)
                                , stronglyConnCompFromEdgedVerticesUniq
                                , stronglyConnCompFromEdgedVerticesUniqR )
+import GHC.Builtin.Names( runRWKey )
 import GHC.Types.Unique
 import GHC.Types.Unique.FM
 import GHC.Types.Unique.Set
@@ -1882,8 +1883,15 @@ occAnalApp :: OccEnv
            -> (UsageDetails, Expr CoreBndr)
 -- Naked variables (not applied) end up here too
 occAnalApp env (Var fun, args, ticks)
-  | null ticks = (all_uds, mkApps fun' args')
-  | otherwise  = (all_uds, mkTicks ticks $ mkApps fun' args')
+  -- Account for join arity of runRW# continuation
+  -- See Note [Simplification of runRW#]
+  | fun `hasKey` runRWKey
+  , [t1, t2, arg]  <- args
+  , let (usage, arg') = occAnalRhs env (Just 1) arg
+  = (usage, mkTicks ticks $ mkApps (Var fun) [t1, t2, arg'])
+
+  | otherwise
+  = (all_uds, mkTicks ticks $ mkApps fun' args')
   where
     (fun', fun_id') = lookupVarEnv (occ_bs_env env) fun
                       `orElse` (Var fun, fun)
diff --git a/compiler/GHC/Core/Opt/SetLevels.hs b/compiler/GHC/Core/Opt/SetLevels.hs
index 38b8ede40d..4bdf8545e1 100644
--- a/compiler/GHC/Core/Opt/SetLevels.hs
+++ b/compiler/GHC/Core/Opt/SetLevels.hs
@@ -91,11 +91,13 @@ import GHC.Types.Demand       ( StrictSig, Demand, isStrictDmd, splitStrictSig,
 import GHC.Types.Cpr          ( mkCprSig, botCpr )
 import GHC.Types.Name         ( getOccName, mkSystemVarName )
 import GHC.Types.Name.Occurrence ( occNameString )
+import GHC.Types.Unique       ( hasKey )
 import GHC.Core.Type    ( Type, mkLamTypes, splitTyConApp_maybe, tyCoVarsOfType
                         , mightBeUnliftedType, closeOverKindsDSet )
 import GHC.Types.Basic  ( Arity, RecFlag(..), isRec )
 import GHC.Core.DataCon ( dataConOrigResTy )
 import GHC.Builtin.Types
+import GHC.Builtin.Names      ( runRWKey )
 import GHC.Types.Unique.Supply
 import GHC.Utils.Misc
 import GHC.Utils.Outputable
@@ -399,8 +401,14 @@ lvlNonTailExpr env expr
 lvlApp :: LevelEnv
        -> CoreExprWithFVs
        -> (CoreExprWithFVs, [CoreExprWithFVs]) -- Input application
-        -> LvlM LevelledExpr                   -- Result expression
+       -> LvlM LevelledExpr                    -- Result expression
 lvlApp env orig_expr ((_,AnnVar fn), args)
+  -- Try to ensure that runRW#'s continuation isn't floated out.
+  -- See Note [Simplification of runRW#].
+  | fn `hasKey` runRWKey
+  = do { args' <- mapM (lvlExpr env) args
+       ; return (foldl' App (lookupVar env fn) args') }
+
   | floatOverSat env   -- See Note [Floating over-saturated applications]
   , arity > 0
   , arity < n_val_args
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index 23b26abcf8..af7a3a405d 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -37,10 +37,13 @@ import GHC.Core.DataCon
    , StrictnessMark (..) )
 import GHC.Core.Opt.Monad ( Tick(..), SimplMode(..) )
 import GHC.Core
+import GHC.Builtin.Types.Prim( realWorldStatePrimTy )
+import GHC.Builtin.Names( runRWKey )
 import GHC.Types.Demand ( StrictSig(..), dmdTypeDepth, isStrictDmd
                         , mkClosedStrictSig, topDmd, botDiv )
 import GHC.Types.Cpr    ( mkCprSig, botCpr )
 import GHC.Core.Ppr     ( pprCoreExpr )
+import GHC.Types.Unique ( hasKey )
 import GHC.Core.Unfold
 import GHC.Core.Utils
 import GHC.Core.SimpleOpt ( pushCoTyArg, pushCoValArg
@@ -1877,14 +1880,36 @@ rebuildCall env info (CastIt co cont)
 rebuildCall env info (ApplyToTy { sc_arg_ty = arg_ty, sc_cont = cont })
   = rebuildCall env (addTyArgTo info arg_ty) cont
 
-rebuildCall env info@(ArgInfo { ai_encl = encl_rules, ai_type = fun_ty
+---------- The runRW# rule. Do this after absorbing all arguments ------
+-- runRW# :: forall (r :: RuntimeRep) (o :: TYPE r). (State# RealWorld -> o) -> o
+-- K[ runRW# rr ty (\s. body) ]  -->  runRW rr' ty' (\s. K[ body ])
+rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args })
+            (ApplyToVal { sc_arg = arg, sc_env = arg_se, sc_cont = cont })
+  | fun `hasKey` runRWKey
+  , not (contIsStop cont)  -- Don't fiddle around if the continuation is boring
+  , [ TyArg {}, TyArg {} ] <- rev_args
+  = do { s <- newId (fsLit "s") realWorldStatePrimTy
+       ; let env'  = (arg_se `setInScopeFromE` env) `addNewInScopeIds` [s]
+             cont' = ApplyToVal { sc_dup = Simplified, sc_arg = Var s
+                                , sc_env = env', sc_cont = cont }
+       ; body' <- simplExprC env' arg cont'
+       ; let arg'  = Lam s body'
+             ty'   = contResultType cont
+             rr'   = getRuntimeRep ty'
+             call' = mkApps (Var fun) [mkTyArg rr', mkTyArg ty', arg']
+       ; return (emptyFloats env, call') }
+
+rebuildCall env info@(ArgInfo { ai_type = fun_ty, ai_encl = encl_rules
                               , ai_strs = str:strs, ai_discs = disc:discs })
             (ApplyToVal { sc_arg = arg, sc_env = arg_se
                         , sc_dup = dup_flag, sc_cont = cont })
+
+  -- Argument is already simplified
   | isSimplified dup_flag     -- See Note [Avoid redundant simplification]
   = rebuildCall env (addValArgTo info' arg) cont
 
-  | str         -- Strict argument
+  -- Strict arguments
+  | str
   , sm_case_case (getMode env)
   = -- pprTrace "Strict Arg" (ppr arg $$ ppr (seIdSubst env) $$ ppr (seInScope env)) $
     simplExprF (arg_se `setInScopeFromE` env) arg
@@ -1892,7 +1917,8 @@ rebuildCall env info@(ArgInfo { ai_encl = encl_rules, ai_type = fun_ty
                           , sc_dup = Simplified, sc_cont = cont })
                 -- Note [Shadowing]
 
-  | otherwise                           -- Lazy argument
+  -- Lazy arguments
+  | otherwise
         -- DO NOT float anything outside, hence simplExprC
         -- There is no benefit (unlike in a let-binding), and we'd
         -- have to be very careful about bogus strictness through
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs
index 32bbfff214..ac74dd6723 100644
--- a/compiler/GHC/CoreToStg/Prep.hs
+++ b/compiler/GHC/CoreToStg/Prep.hs
@@ -829,14 +829,23 @@ cpeApp top_env expr
         -- rather than the far superior "f x y".  Test case is par01.
         = let (terminal, args', depth') = collect_args arg
           in cpe_app env terminal (args' ++ args) (depth + depth' - 1)
-    cpe_app env (Var f) [CpeApp _runtimeRep@Type{}, CpeApp _type@Type{}, CpeApp arg] 1
+    cpe_app env (Var f) (CpeApp _runtimeRep@Type{} : CpeApp _type@Type{} : CpeApp arg : rest) n
         | f `hasKey` runRWKey
+        -- N.B. While it may appear that n == 1 in the case of runRW#
+        -- applications, keep in mind that we may have applications that return
+        , n >= 1
         -- See Note [runRW magic]
         -- Replace (runRW# f) by (f realWorld#), beta reducing if possible (this
         -- is why we return a CorePrepEnv as well)
         = case arg of
-            Lam s body -> cpe_app (extendCorePrepEnv env s realWorldPrimId) body [] 0
-            _          -> cpe_app env arg [CpeApp (Var realWorldPrimId)] 1
+            Lam s body -> cpe_app (extendCorePrepEnv env s realWorldPrimId) body rest (n-2)
+            _          -> cpe_app env arg (CpeApp (Var realWorldPrimId) : rest) (n-1)
+             -- TODO: What about casts?
+
+    cpe_app _env (Var f) args n
+        | f `hasKey` runRWKey
+        = pprPanic "cpe_app(runRW#)" (ppr args $$ ppr n)
+
     cpe_app env (Var v) args depth
       = do { v1 <- fiddleCCall v
            ; let e2 = lookupCorePrepEnv env v1
@@ -965,8 +974,77 @@ pragma.  It is levity-polymorphic.
            => (State# RealWorld -> (# State# RealWorld, o #))
                               -> (# State# RealWorld, o #)
 
-It needs no special treatment in GHC except this special inlining here
-in CorePrep (and in GHC.CoreToByteCode).
+It's correctness needs no special treatment in GHC except this special inlining
+here in CorePrep (and in GHC.CoreToByteCode).
+
+However, there are a variety of optimisation opportunities that the simplifier
+takes advantage of. See Note [Simplification of runRW#].
+
+
+Note [Simplification of runRW#]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider the program,
+
+    case runRW# (\s -> let n = I# 42# in n) of
+      I# n# -> f n#
+
+There is no reason why we should allocate an I# constructor given that we
+immediately destructure it. To avoid this the simplifier will push strict
+contexts into runRW's continuation. That is, it transforms
+
+    K[ runRW# @r @ty cont ]
+              ~>
+    runRW# @r @ty K[cont]
+
+This has a few interesting implications. Consider, for instance, this program:
+
+    join j = ...
+    in case runRW# @r @ty cont of
+         result -> jump j result
+
+Performing the transform described above would result in:
+
+    join j x = ...
+    in runRW# @r @ty (\s ->
+         case cont of in
+           result -> jump j result
+       )
+
+If runRW# were a "normal" function this call to join point j would not be
+allowed in its continuation argument. However, since runRW# is inlined (as
+described in Note [runRW magic] above), such join point occurences are
+completely fine. Both occurrence analysis and Core Lint have special treatment
+for runRW# applications. See Note [Linting of runRW#] for details on the latter.
+
+Moreover, it's helpful to ensure that runRW's continuation isn't floated out
+(since doing so would then require a call, whereas we would otherwise end up
+with straight-line). Consequently, GHC.Core.Opt.SetLevels.lvlApp has special
+treatment for runRW# applications, ensure the arguments are not floated if
+MFEs.
+
+Other considered designs
+------------------------
+
+One design that was rejected was to *require* that runRW#'s continuation be
+headed by a lambda. However, this proved to be quite fragile. For instance,
+SetLevels is very eager to float bottoming expressions. For instance given
+something of the form,
+
+    runRW# @r @ty (\s -> case expr of x -> undefined)
+
+SetLevels will see that the body the lambda is bottoming and will consequently
+float it to the top-level (assuming expr has no free coercion variables which
+prevent this). We therefore end up with
+
+    runRW# @r @ty (\s -> lvl s)
+
+Which the simplifier will beta reduce, leaving us with
+
+    runRW# @r @ty lvl
+
+Breaking our desired invariant. Ultimately we decided to simply accept that
+the continuation may not be a manifest lambda.
+
 
 -- ---------------------------------------------------------------------------
 --      CpeArg: produces a result satisfying CpeArg
diff --git a/compiler/GHC/HsToCore/Utils.hs b/compiler/GHC/HsToCore/Utils.hs
index 48673a18d5..194cf4e1ac 100644
--- a/compiler/GHC/HsToCore/Utils.hs
+++ b/compiler/GHC/HsToCore/Utils.hs
@@ -482,7 +482,6 @@ mkCoreAppDs _ (Var f `App` Type _r `App` Type ty1 `App` Type ty2 `App` arg1) arg
                    Var v1 | isInternalName (idName v1)
                           -> v1        -- Note [Desugaring seq], points (2) and (3)
                    _      -> mkWildValBinder ty1
-
 mkCoreAppDs s fun arg = mkCoreApp s fun arg  -- The rest is done in GHC.Core.Make
 
 -- NB: No argument can be levity polymorphic
author	Ben Gamari <ben@smart-cactus.org>	2020-04-19 11:16:32 -0400
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2020-05-29 01:39:19 -0400
commit	46720997a0b1fa2971a884adf43de096ce130a7e (patch)
tree	f4ff59bdb6979c03f535b7682c170a057d0f5607
parent	c49f7df02ff02c8f09e6a6e00a271b867ca6b092 (diff)
download	haskell-46720997a0b1fa2971a884adf43de096ce130a7e.tar.gz