summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <bgamari.foss@gmail.com>2015-11-12 14:52:11 +0100
committerBen Gamari <ben@smart-cactus.org>2015-11-12 14:52:13 +0100
commit351de169e14ad9277aaca653df4a3753c151f7bb (patch)
treedb78ed4b832e6063abe9832ecb8d5e4f4bb8378b
parent8755719462733dee52190e298436e8e8e8f12bea (diff)
downloadhaskell-351de169e14ad9277aaca653df4a3753c151f7bb.tar.gz
New magic function for applying realWorld#
Test Plan: validate Reviewers: goldfire, erikd, rwbarton, simonpj, austin, simonmar, hvr Reviewed By: simonpj Subscribers: simonmar, thomie Differential Revision: https://phabricator.haskell.org/D1103 GHC Trac Issues: #10678
-rw-r--r--compiler/basicTypes/MkId.hs59
-rw-r--r--compiler/coreSyn/CorePrep.hs13
-rw-r--r--compiler/prelude/PrelNames.hs3
-rw-r--r--includes/stg/MiscClosures.h2
-rw-r--r--libraries/base/GHC/IO.hs5
-rw-r--r--libraries/base/GHC/ST.hs58
-rw-r--r--libraries/ghc-prim/GHC/Magic.hs19
-rw-r--r--libraries/ghc-prim/changelog.md1
-rw-r--r--libraries/integer-gmp/src/GHC/Integer/Type.hs3
-rw-r--r--testsuite/tests/perf/should_run/all.T3
-rw-r--r--testsuite/tests/primops/should_run/T10678.hs22
-rw-r--r--testsuite/tests/primops/should_run/all.T9
12 files changed, 129 insertions, 68 deletions
diff --git a/compiler/basicTypes/MkId.hs b/compiler/basicTypes/MkId.hs
index c2a3678f02..989d79707e 100644
--- a/compiler/basicTypes/MkId.hs
+++ b/compiler/basicTypes/MkId.hs
@@ -30,7 +30,7 @@ module MkId (
wiredInIds, ghcPrimIds,
unsafeCoerceName, unsafeCoerceId, realWorldPrimId,
voidPrimId, voidArgId,
- nullAddrId, seqId, lazyId, lazyIdKey,
+ nullAddrId, seqId, lazyId, lazyIdKey, runRWId,
coercionTokenId, magicDictId, coerceId,
proxyHashId,
@@ -120,7 +120,7 @@ is right here.
wiredInIds :: [Id]
wiredInIds
- = [lazyId, dollarId, oneShotId]
+ = [lazyId, dollarId, oneShotId, runRWId]
++ errorIds -- Defined in MkCore
++ ghcPrimIds
@@ -1057,7 +1057,8 @@ another gun with which to shoot yourself in the foot.
lazyIdName, unsafeCoerceName, nullAddrName, seqName,
realWorldName, voidPrimIdName, coercionTokenName,
- magicDictName, coerceName, proxyName, dollarName, oneShotName :: Name
+ magicDictName, coerceName, proxyName, dollarName, oneShotName,
+ runRWName :: Name
unsafeCoerceName = mkWiredInIdName gHC_PRIM (fsLit "unsafeCoerce#") unsafeCoerceIdKey unsafeCoerceId
nullAddrName = mkWiredInIdName gHC_PRIM (fsLit "nullAddr#") nullAddrIdKey nullAddrId
seqName = mkWiredInIdName gHC_PRIM (fsLit "seq") seqIdKey seqId
@@ -1070,6 +1071,7 @@ coerceName = mkWiredInIdName gHC_PRIM (fsLit "coerce") coerceKey
proxyName = mkWiredInIdName gHC_PRIM (fsLit "proxy#") proxyHashKey proxyHashId
dollarName = mkWiredInIdName gHC_BASE (fsLit "$") dollarIdKey dollarId
oneShotName = mkWiredInIdName gHC_MAGIC (fsLit "oneShot") oneShotKey oneShotId
+runRWName = mkWiredInIdName gHC_MAGIC (fsLit "runRW#") runRWKey runRWId
dollarId :: Id -- Note [dollarId magic]
dollarId = pcMiscPrelId dollarName ty
@@ -1182,6 +1184,19 @@ oneShotId = pcMiscPrelId oneShotName ty info
x' = setOneShotLambda x
rhs = mkLams [openAlphaTyVar, openBetaTyVar, body, x'] $ Var body `App` Var x
+runRWId :: Id -- See Note [runRW magic] in this module
+runRWId = pcMiscPrelId runRWName ty info
+ where
+ info = noCafIdInfo `setInlinePragInfo` neverInlinePragma
+ -- State# RealWorld
+ stateRW = mkTyConApp statePrimTyCon [realWorldTy]
+ -- (# State# RealWorld, o #)
+ ret_ty = mkTyConApp unboxedPairTyCon [stateRW, openAlphaTy]
+ -- State# RealWorld -> (# State# RealWorld, o #)
+ arg_ty = stateRW `mkFunTy` ret_ty
+ -- (State# RealWorld -> (# State# RealWorld, o #))
+ -- -> (# State# RealWorld, o #)
+ ty = mkForAllTys [openAlphaTyVar] (arg_ty `mkFunTy` ret_ty)
--------------------------------------------------------------------------------
magicDictId :: Id -- See Note [magicDictId magic]
@@ -1322,6 +1337,44 @@ See Trac #3259 for a real world example.
lazyId is defined in GHC.Base, so we don't *have* to inline it. If it
appears un-applied, we'll end up just calling it.
+Note [runRW magic]
+~~~~~~~~~~~~~~~~~~
+Some definitions, for instance @runST@, must have careful control over float out
+of the bindings in their body. Consider this use of @runST@,
+
+ f x = runST ( \ s -> let (a, s') = newArray# 100 [] s
+ (_, s'') = fill_in_array_or_something a x s'
+ in freezeArray# a s'' )
+
+If we inline @runST@, we'll get:
+
+ f x = let (a, s') = newArray# 100 [] realWorld#{-NB-}
+ (_, s'') = fill_in_array_or_something a x s'
+ in freezeArray# a s''
+
+And now if we allow the @newArray#@ binding to float out to become a CAF,
+we end up with a result that is totally and utterly wrong:
+
+ f = let (a, s') = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
+ in \ x ->
+ let (_, s'') = fill_in_array_or_something a x s'
+ in freezeArray# a s''
+
+All calls to @f@ will share a {\em single} array! Clearly this is nonsense and
+must be prevented.
+
+This is what @runRW#@ gives us: by being inlined extremely late in the
+optimization (right before lowering to STG, in CorePrep), we can ensure that
+no further floating will occur. This allows us to safely inline things like
+@runST@, which are otherwise needlessly expensive (see #10678 and #5916).
+
+While the definition of @GHC.Magic.runRW#@, we override its type in @MkId@
+to be open-kinded,
+
+ runRW# :: (o :: OpenKind) => (State# RealWorld -> (# State# RealWorld, o #))
+ -> (# State# RealWorld, o #)
+
+
Note [The oneShot function]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
In the context of making left-folds fuse somewhat okish (see ticket #7994
diff --git a/compiler/coreSyn/CorePrep.hs b/compiler/coreSyn/CorePrep.hs
index 23afcdfb04..e49ece43d2 100644
--- a/compiler/coreSyn/CorePrep.hs
+++ b/compiler/coreSyn/CorePrep.hs
@@ -18,6 +18,7 @@ import OccurAnal
import HscTypes
import PrelNames
+import MkId ( realWorldPrimId )
import CoreUtils
import CoreArity
import CoreFVs
@@ -511,10 +512,20 @@ cpeRhsE env (Lit (LitInteger i _))
cpeRhsE _env expr@(Lit {}) = return (emptyFloats, expr)
cpeRhsE env expr@(Var {}) = cpeApp env expr
-cpeRhsE env (Var f `App` _ `App` arg)
+cpeRhsE env (Var f `App` _{-type-} `App` arg)
| f `hasKey` lazyIdKey -- Replace (lazy a) by a
= cpeRhsE env arg -- See Note [lazyId magic] in MkId
+ -- See Note [runRW magic] in MkId
+ | f `hasKey` runRWKey -- Replace (runRW# f) by (f realWorld#),
+ = case arg of -- beta reducing if possible
+ Lam s body -> cpeRhsE env (substExpr (text "runRW#") subst body)
+ where subst = extendIdSubst emptySubst s (Var realWorldPrimId)
+ -- XXX I think we can use emptySubst here
+ -- because realWorldPrimId is a global variable
+ -- and so cannot be bound by a lambda in body
+ _ -> cpeRhsE env (arg `App` Var realWorldPrimId)
+
cpeRhsE env expr@(App {}) = cpeApp env expr
cpeRhsE env (Let bind expr)
diff --git a/compiler/prelude/PrelNames.hs b/compiler/prelude/PrelNames.hs
index 05a38ffec9..7229f76401 100644
--- a/compiler/prelude/PrelNames.hs
+++ b/compiler/prelude/PrelNames.hs
@@ -1834,11 +1834,12 @@ rootMainKey, runMainKey :: Unique
rootMainKey = mkPreludeMiscIdUnique 101
runMainKey = mkPreludeMiscIdUnique 102
-thenIOIdKey, lazyIdKey, assertErrorIdKey, oneShotKey :: Unique
+thenIOIdKey, lazyIdKey, assertErrorIdKey, oneShotKey, runRWKey :: Unique
thenIOIdKey = mkPreludeMiscIdUnique 103
lazyIdKey = mkPreludeMiscIdUnique 104
assertErrorIdKey = mkPreludeMiscIdUnique 105
oneShotKey = mkPreludeMiscIdUnique 106
+runRWKey = mkPreludeMiscIdUnique 107
breakpointIdKey, breakpointCondIdKey, breakpointAutoIdKey,
breakpointJumpIdKey, breakpointCondJumpIdKey,
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index 6fd7181426..06d937a6c9 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -423,6 +423,8 @@ RTS_FUN_DECL(stg_addCFinalizzerToWeakzh);
RTS_FUN_DECL(stg_finalizzeWeakzh);
RTS_FUN_DECL(stg_deRefWeakzh);
+RTS_FUN_DECL(stg_runRWzh);
+
RTS_FUN_DECL(stg_newBCOzh);
RTS_FUN_DECL(stg_mkApUpd0zh);
diff --git a/libraries/base/GHC/IO.hs b/libraries/base/GHC/IO.hs
index 1e8c74e5c1..f38c88f009 100644
--- a/libraries/base/GHC/IO.hs
+++ b/libraries/base/GHC/IO.hs
@@ -176,11 +176,8 @@ like 'bracket' cannot be used safely within 'unsafeDupablePerformIO'.
@since 4.4.0.0
-}
-{-# NOINLINE unsafeDupablePerformIO #-}
- -- See Note [unsafeDupablePerformIO is NOINLINE]
unsafeDupablePerformIO :: IO a -> a
-unsafeDupablePerformIO (IO m) = lazy (case m realWorld# of (# _, r #) -> r)
- -- See Note [unsafeDupablePerformIO has a lazy RHS]
+unsafeDupablePerformIO (IO m) = case runRW# m of (# _, a #) -> a
-- Note [unsafeDupablePerformIO is NOINLINE]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/libraries/base/GHC/ST.hs b/libraries/base/GHC/ST.hs
index 46c5196c9e..d84dd4d9d1 100644
--- a/libraries/base/GHC/ST.hs
+++ b/libraries/base/GHC/ST.hs
@@ -18,7 +18,7 @@
module GHC.ST (
ST(..), STret(..), STRep,
- fixST, runST, runSTRep,
+ fixST, runST,
-- * Unsafe functions
liftST, unsafeInterleaveST
@@ -103,62 +103,10 @@ instance Show (ST s a) where
showsPrec _ _ = showString "<<ST action>>"
showList = showList__ (showsPrec 0)
-{-
-Definition of runST
-~~~~~~~~~~~~~~~~~~~
-
-SLPJ 95/04: Why @runST@ must not have an unfolding; consider:
-\begin{verbatim}
-f x =
- runST ( \ s -> let
- (a, s') = newArray# 100 [] s
- (_, s'') = fill_in_array_or_something a x s'
- in
- freezeArray# a s'' )
-\end{verbatim}
-If we inline @runST@, we'll get:
-\begin{verbatim}
-f x = let
- (a, s') = newArray# 100 [] realWorld#{-NB-}
- (_, s'') = fill_in_array_or_something a x s'
- in
- freezeArray# a s''
-\end{verbatim}
-And now the @newArray#@ binding can be floated to become a CAF, which
-is totally and utterly wrong:
-\begin{verbatim}
-f = let
- (a, s') = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
- in
- \ x ->
- let (_, s'') = fill_in_array_or_something a x s' in
- freezeArray# a s''
-\end{verbatim}
-All calls to @f@ will share a {\em single} array! End SLPJ 95/04.
--}
-
{-# INLINE runST #-}
--- The INLINE prevents runSTRep getting inlined in *this* module
--- so that it is still visible when runST is inlined in an importing
--- module. Regrettably delicate. runST is behaving like a wrapper.
-
-- | Return the value computed by a state transformer computation.
-- The @forall@ ensures that the internal state used by the 'ST'
-- computation is inaccessible to the rest of the program.
runST :: (forall s. ST s a) -> a
-runST st = runSTRep (case st of { ST st_rep -> st_rep })
-
--- I'm only letting runSTRep be inlined right at the end, in particular *after* full laziness
--- That's what the "INLINE [0]" says.
--- SLPJ Apr 99
--- {-# INLINE [0] runSTRep #-}
-
--- SDM: further to the above, inline phase 0 is run *before*
--- full-laziness at the moment, which means that the above comment is
--- invalid. Inlining runSTRep doesn't make a huge amount of
--- difference, anyway. Hence:
-
-{-# NOINLINE runSTRep #-}
-runSTRep :: (forall s. STRep s a) -> a
-runSTRep st_rep = case st_rep realWorld# of
- (# _, r #) -> r
+runST (ST st_rep) = case runRW# st_rep of (# _, a #) -> a
+-- See Note [Definition of runRW#] in GHC.Magic
diff --git a/libraries/ghc-prim/GHC/Magic.hs b/libraries/ghc-prim/GHC/Magic.hs
index 740abb729e..495705b3b4 100644
--- a/libraries/ghc-prim/GHC/Magic.hs
+++ b/libraries/ghc-prim/GHC/Magic.hs
@@ -1,5 +1,9 @@
+{-# LANGUAGE CPP #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+
-----------------------------------------------------------------------------
-- |
-- Module : GHC.Magic
@@ -17,8 +21,9 @@
--
-----------------------------------------------------------------------------
-module GHC.Magic ( inline, lazy, oneShot ) where
+module GHC.Magic ( inline, lazy, oneShot, runRW# ) where
+import GHC.Prim
import GHC.CString ()
-- | The call @inline f@ arranges that 'f' is inlined, regardless of
@@ -82,3 +87,15 @@ oneShot :: (a -> b) -> (a -> b)
oneShot f = f
-- Implementation note: This is wired in in MkId.lhs, so the code here is
-- mostly there to have a place for the documentation.
+
+-- | Apply a function to a 'RealWorld' token.
+runRW# :: (State# RealWorld -> (# State# RealWorld, o #))
+ -> (# State# RealWorld, o #)
+-- See Note [runRW magic] in MkId
+#if !defined(__HADDOCK_VERSION__)
+runRW# m = m realWorld#
+#else
+runRW# = runRW# -- The realWorld# is too much for haddock
+#endif
+{-# NOINLINE runRW# #-}
+-- This is inlined manually in CorePrep
diff --git a/libraries/ghc-prim/changelog.md b/libraries/ghc-prim/changelog.md
index 2e4288622b..45daa64b26 100644
--- a/libraries/ghc-prim/changelog.md
+++ b/libraries/ghc-prim/changelog.md
@@ -27,6 +27,7 @@
getSizeofMutableByteArray# :: MutableByteArray# d -> State# d -> (# State# d, Int# #)
subWordC# :: Word# -> Word# -> (# Word#, Int# #)
+ runRW# :: (State# RealWorld -> (# State# RealWorld, o #)) -> (# State# RealWorld, o #)
- Added to `GHC.Types`:
diff --git a/libraries/integer-gmp/src/GHC/Integer/Type.hs b/libraries/integer-gmp/src/GHC/Integer/Type.hs
index 5bc52539fd..167492d34d 100644
--- a/libraries/integer-gmp/src/GHC/Integer/Type.hs
+++ b/libraries/integer-gmp/src/GHC/Integer/Type.hs
@@ -1934,8 +1934,7 @@ liftIO (IO m) = m
-- NB: equivalent of GHC.IO.unsafeDupablePerformIO, see notes there
runS :: S RealWorld a -> a
-runS m = lazy (case m realWorld# of (# _, r #) -> r)
-{-# NOINLINE runS #-}
+runS m = case runRW# m of (# _, a #) -> a
-- stupid hack
fail :: [Char] -> S s a
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index 6ac8861450..a86d61f51d 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -271,8 +271,9 @@ test('T7257',
[stats_num_field('bytes allocated',
[(wordsize(32), 1150000000, 10),
# expected value: 1246287228 (i386/Linux)
- (wordsize(64), 1774893760, 5)]),
+ (wordsize(64), 1654893248, 5)]),
# 2012-09-21: 1774893760 (amd64/Linux)
+ # 2015-11-03: 1654893248 (amd64/Linux)
stats_num_field('peak_megabytes_allocated',
[(wordsize(32), 217, 5),
# 2012-10-08: 217 (x86/Linux)
diff --git a/testsuite/tests/primops/should_run/T10678.hs b/testsuite/tests/primops/should_run/T10678.hs
new file mode 100644
index 0000000000..9019ab6345
--- /dev/null
+++ b/testsuite/tests/primops/should_run/T10678.hs
@@ -0,0 +1,22 @@
+{-# LANGUAGE MagicHash #-}
+
+import GHC.Prim
+
+main :: IO ()
+main = go 1000000# 10 (2^100)
+
+go :: Int# -> Integer -> Integer -> IO ()
+go 0# _ _ = return ()
+go n# a b = (a + b) `seq` go (n# -# 1#) a b
+{-# NOINLINE go #-}
+
+{-
+This test is based on a strategy from rwbarton relying on the inefficiency
+of `Integer` addition as defined by `integer-gmp` without `runRW#`.
+
+ When I was testing the patch interactively, I measured allocations for,
+ say, a million (large Integer) + (small Integer) additions. If that
+ addition allocates, say, 6 words, then one can fairly reliably write the
+ program so that it will allocate between 6 million and 7 million words,
+ total.
+-}
diff --git a/testsuite/tests/primops/should_run/all.T b/testsuite/tests/primops/should_run/all.T
index b21279aa99..b0001d6d32 100644
--- a/testsuite/tests/primops/should_run/all.T
+++ b/testsuite/tests/primops/should_run/all.T
@@ -3,3 +3,12 @@ test('T7689', normal, compile_and_run, [''])
# The test is using unboxed tuples, so omit ghci
test('T9430', omit_ways(['ghci']), compile_and_run, [''])
test('T10481', exit_code(1), compile_and_run, [''])
+test('T10678',
+ [stats_num_field('bytes allocated',
+ [(wordsize(64), 88041768, 5)
+ # 2015-11-04: 88041768 +/- 5% (before runRW#)
+ # 2015-11-04: 64004171 (after runRW#)
+ ]),
+ only_ways('normal')
+ ],
+ compile_and_run, ['-O'])