summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/GHC/Core/Opt/Arity.hs2
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Monad.hs2
-rw-r--r--compiler/GHC/Core/Unify.hs75
-rw-r--r--compiler/GHC/Utils/Monad.hs172
4 files changed, 176 insertions, 75 deletions
diff --git a/compiler/GHC/Core/Opt/Arity.hs b/compiler/GHC/Core/Opt/Arity.hs
index 829673433c..4cfb3bacf0 100644
--- a/compiler/GHC/Core/Opt/Arity.hs
+++ b/compiler/GHC/Core/Opt/Arity.hs
@@ -1031,7 +1031,7 @@ one-shot flag from the inner \s{osf}. By expanding with the
ArityType gotten from analysing the RHS, we achieve this neatly.
This makes a big difference to the one-shot monad trick;
-see Note [The one-shot state monad trick] in GHC.Core.Unify.
+see Note [The one-shot state monad trick] in GHC.Utils.Monad.
-}
-- | @etaExpand n e@ returns an expression with
diff --git a/compiler/GHC/Core/Opt/Simplify/Monad.hs b/compiler/GHC/Core/Opt/Simplify/Monad.hs
index 3c05549ad5..1d1d401bcf 100644
--- a/compiler/GHC/Core/Opt/Simplify/Monad.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Monad.hs
@@ -71,7 +71,7 @@ pattern SM :: (SimplTopEnv -> UniqSupply -> SimplCount
-- This pattern synonym makes the simplifier monad eta-expand,
-- which as a very beneficial effect on compiler performance
-- (worth a 1-2% reduction in bytes-allocated). See #18202.
--- See Note [The one-shot state monad trick] in GHC.Core.Unify
+-- See Note [The one-shot state monad trick] in GHC.Utils.Monad
pattern SM m <- SM' m
where
SM m = SM' (oneShot m)
diff --git a/compiler/GHC/Core/Unify.hs b/compiler/GHC/Core/Unify.hs
index 7f54afbd15..ed317da470 100644
--- a/compiler/GHC/Core/Unify.hs
+++ b/compiler/GHC/Core/Unify.hs
@@ -1212,77 +1212,6 @@ data BindFlag
************************************************************************
-}
-{- Note [The one-shot state monad trick]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Many places in GHC use a state monad, and we really want those
-functions to be eta-expanded (#18202). Consider
-
- newtype M a = MkM (State -> (State, a))
-
- instance Monad M where
- mf >>= k = MkM (\s -> case mf of MkM f ->
- case f s of (s',r) ->
- case k r of MkM g ->
- g s')
-
- foo :: Int -> M Int
- foo x = g y >>= \r -> h r
- where
- y = expensive x
-
-In general, you might say (map (foo 4) xs), and expect (expensive 4)
-to be evaluated only once. So foo should have arity 1 (not 2).
-But that's rare, and if you /aren't/ re-using (M a) values it's much
-more efficient to make foo have arity 2.
-
-See https://www.joachim-breitner.de/blog/763-Faster_Winter_5__Eta-Expanding_ReaderT
-
-So here is the trick. Define
-
- data M a = MkM' (State -> (State, a))
- pattern MkM f <- MkM' f
- where
- MkM f = MkM' (oneShot f)
-
-The patten synonm means that whenever we write (MkM f), we'll
-actually get (MkM' (oneShot f)), so we'll pin a one-shot flag
-on f's lambda-binder. Now look at foo:
-
- foo = \x. g (expensive x) >>= \r -> h r
- = \x. let mf = g (expensive x)
- k = \r -> h r
- in MkM' (oneShot (\s -> case mf of MkM' f ->
- case f s of (s',r) ->
- case k r of MkM' g ->
- g s'))
- -- The MkM' are just newtype casts nt_co
- = \x. let mf = g (expensive x)
- k = \r -> h r
- in (\s{os}. case (mf |> nt_co) s of (s',r) ->
- (k r) |> nt_co s')
- |> sym nt_co
-
- -- Float into that \s{os}
- = \x. (\s{os}. case (g (expensive x) |> nt_co) s of (s',r) ->
- h r |> nt_co s')
- |> sym nt_co
-
-and voila! In summary:
-
-* It's a very simple, two-line change
-
-* It eta-expands all uses of the monad, automatically
-
-* It is very similar to the built-in "state hack" (see
- GHC.Core.Opt.Arity Note [The state-transformer hack]) but the trick
- described here is applicable on a monad-by-monad basis under
- programmer control.
-
-* Beware: itt changes the behaviour of
- map (foo 3) xs
- ToDo: explain what to do if you want to do this
--}
-
data UMEnv
= UMEnv { um_unif :: AmIUnifying
@@ -1311,11 +1240,11 @@ data UMState = UMState
newtype UM a
= UM' { unUM :: UMState -> UnifyResultM (UMState, a) }
- -- See Note [The one-shot state monad trick]
+ -- See Note [The one-shot state monad trick] in GHC.Utils.Monad
deriving (Functor)
pattern UM :: (UMState -> UnifyResultM (UMState, a)) -> UM a
--- See Note [The one-shot state monad trick]
+-- See Note [The one-shot state monad trick] in GHC.Utils.Monad
pattern UM m <- UM' m
where
UM m = UM' (oneShot m)
diff --git a/compiler/GHC/Utils/Monad.hs b/compiler/GHC/Utils/Monad.hs
index c4abd3785f..8ba0eefb34 100644
--- a/compiler/GHC/Utils/Monad.hs
+++ b/compiler/GHC/Utils/Monad.hs
@@ -226,3 +226,175 @@ unlessM condM acc = do { cond <- condM
filterOutM :: (Applicative m) => (a -> m Bool) -> [a] -> m [a]
filterOutM p =
foldr (\ x -> liftA2 (\ flg -> if flg then id else (x:)) (p x)) (pure [])
+
+{- Note [The one-shot state monad trick]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Summary: many places in GHC use a state monad, and we really want those
+functions to be eta-expanded (#18202).
+
+The problem
+~~~~~~~~~~~
+Consider
+ newtype M a = MkM (State -> (State, a))
+
+ instance Monad M where
+ mf >>= k = MkM (\s -> case mf of MkM f ->
+ case f s of (s',r) ->
+ case k r of MkM g ->
+ g s')
+
+ fooM :: Int -> M Int
+ fooM x = g y >>= \r -> h r
+ where
+ y = expensive x
+
+Now suppose you say (repeat 20 (fooM 4)), where
+ repeat :: Int -> M Int -> M Int
+performs its argument n times. You would expect (expensive 4) to be
+evaluated only once, not 20 times. So foo should have arity 1 (not 2);
+it should look like this (modulo casts)
+
+ fooM x = let y = expensive x in
+ \s -> case g y of ...
+
+But creating and then repeating, a monadic computation is rare. If you
+/aren't/ re-using (M a) value, it's /much/ more efficient to make
+foo have arity 2, thus:
+
+ fooM x s = case g (expensive x) of ...
+
+Why more efficient? Because now foo takes its argument both at once,
+rather than one at a time, creating a heap-allocated function closure. See
+https://www.joachim-breitner.de/blog/763-Faster_Winter_5__Eta-Expanding_ReaderT
+for a very good explanation of the issue which led to these optimisations
+into GHC.
+
+The trick
+~~~~~~~~~
+With state monads like M the general case is that we *aren't* reusing
+(M a) values so it is much more efficient to avoid allocating a
+function closure for them. So the state monad trick is a way to keep
+the monadic syntax but to make GHC eta-expand functions like `fooM`.
+To do that we use the "oneShot" magic function.
+
+Here is the trick:
+ * Define a "smart constructor"
+ mkM :: (State -> (State,a)) -> M a
+ mkM f = MkM (oneShot m)
+
+ * Never call MkM directly, as a constructor. Instead, always call mkM.
+
+And that's it! The magic 'oneShot' function does this transformation:
+ oneShot (\s. e) ==> \s{os}. e
+which pins a one-shot flag {os} onto the binder 's'. That tells GHC
+that it can assume the lambda is called only once, and thus can freely
+float computations in and out of the lambda.
+
+To be concrete, let's see what happens to fooM:
+
+ fooM = \x. g (expensive x) >>= \r -> h r
+ = \x. let mf = g (expensive x)
+ k = \r -> h r
+ in MkM (oneShot (\s -> case mf of MkM' f ->
+ case f s of (s',r) ->
+ case k r of MkM' g ->
+ g s'))
+ -- The MkM' are just newtype casts nt_co
+ = \x. let mf = g (expensive x)
+ k = \r -> h r
+ in (\s{os}. case (mf |> nt_co) s of (s',r) ->
+ (k r) |> nt_co s')
+ |> sym nt_co
+
+ -- Crucial step: float let-bindings into that \s{os}
+ = \x. (\s{os}. case (g (expensive x) |> nt_co) s of (s',r) ->
+ h r |> nt_co s')
+ |> sym nt_co
+
+and voila! fooM has arity 2.
+
+The trick is very similar to the built-in "state hack"
+(see Note [The state-transformer hack] in "GHC.Core.Opt.Arity") but is
+applicable on a monad-by-monad basis under programmer control.
+
+Using pattern synonyms
+~~~~~~~~~~~~~~~~~~~~~~
+Using a smart constructor is fine, but there is no way to check that we
+have found *all* uses, especially if the uses escape a single module.
+A neat (but more sophisticated) alternative is to use pattern synonyms:
+
+ -- We rename the existing constructor.
+ newtype M a = MkM' (State -> (State, a))
+
+ -- The pattern has the old constructor name.
+ pattern MkM f <- MkM' f
+ where
+ MkM f = MkM' (oneShot f)
+
+Now we can simply grep to check that there are no uses of MkM'
+/anywhere/, to guarantee that we have not missed any. (Using the
+smart constructor alone we still need the data constructor in
+patterns.) That's the advantage of the pattern-synonym approach, but
+it is more elaborate.
+
+The pattern synonym approach is due to Sebastian Graaf (#18238)
+
+Derived instances
+~~~~~~~~~~~~~~~~~
+One caveat of both approaches is that derived instances don't use the smart
+constructor /or/ the pattern synonym. So they won't benefit from the automatic
+insertion of "oneShot".
+
+ data M a = MkM' (State -> (State,a))
+ deriving (Functor) <-- Functor implementation will use MkM'!
+
+Conclusion: don't use 'derviving' in these cases.
+
+Multi-shot actions (cf #18238)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Sometimes we really *do* want computations to be shared! Remember our
+example (repeat 20 (fooM 4)). See Note [multiShotIO] in GHC.Types.Unique.Supply
+
+We can force fooM to have arity 1 using multiShot:
+
+ fooM :: Int -> M Int
+ fooM x = multiShotM (g y >>= \r -> h r)
+ where
+ y = expensive x
+
+ multiShotM :: M a -> M a
+ {-# INLINE multiShotM #-}
+ multiShotM (MkM m) = MkM (\s -> inline m s)
+ -- Really uses the data constructor,
+ -- not the smart constructor!
+
+Now we can see how fooM optimises (ignoring casts)
+
+ multiShotM (g y >>= \r -> h r)
+ ==> {inline (>>=)}
+ multiShotM (\s{os}. case g y s of ...)
+ ==> {inline multiShotM}
+ let m = \s{os}. case g y s of ...
+ in \s. inline m s
+ ==> {inline m}
+ \s. (\s{os}. case g y s of ...) s
+ ==> \s. case g y s of ...
+
+and voila! the one-shot flag has gone. It's possible that y has been
+replaced by (expensive x), but full laziness should pull it back out.
+(This part seems less robust.)
+
+The magic `inline` function does two things
+* It prevents eta reduction. If we wrote just
+ multiShotIO (IO m) = IO (\s -> m s)
+ the lamda would eta-reduce to 'm' and all would be lost.
+
+* It helps ensure that 'm' really does inline.
+
+Note that 'inline' evaporates in phase 0. See Note [inlineIdMagic]
+in GHC.Core.Opt.ConstantFold.match_inline.
+
+The INLINE pragma on multiShotM is very important, else the
+'inline' call will evaporate when compiling the module that
+defines 'multiShotM', before it is ever exported.
+-}