From 351de169e14ad9277aaca653df4a3753c151f7bb Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Thu, 12 Nov 2015 14:52:11 +0100 Subject: New magic function for applying realWorld# Test Plan: validate Reviewers: goldfire, erikd, rwbarton, simonpj, austin, simonmar, hvr Reviewed By: simonpj Subscribers: simonmar, thomie Differential Revision: https://phabricator.haskell.org/D1103 GHC Trac Issues: #10678 --- compiler/basicTypes/MkId.hs | 59 +++++++++++++++++++++++++-- compiler/coreSyn/CorePrep.hs | 13 +++++- compiler/prelude/PrelNames.hs | 3 +- includes/stg/MiscClosures.h | 2 + libraries/base/GHC/IO.hs | 5 +-- libraries/base/GHC/ST.hs | 58 ++------------------------ libraries/ghc-prim/GHC/Magic.hs | 19 ++++++++- libraries/ghc-prim/changelog.md | 1 + libraries/integer-gmp/src/GHC/Integer/Type.hs | 3 +- testsuite/tests/perf/should_run/all.T | 3 +- testsuite/tests/primops/should_run/T10678.hs | 22 ++++++++++ testsuite/tests/primops/should_run/all.T | 9 ++++ 12 files changed, 129 insertions(+), 68 deletions(-) create mode 100644 testsuite/tests/primops/should_run/T10678.hs diff --git a/compiler/basicTypes/MkId.hs b/compiler/basicTypes/MkId.hs index c2a3678f02..989d79707e 100644 --- a/compiler/basicTypes/MkId.hs +++ b/compiler/basicTypes/MkId.hs @@ -30,7 +30,7 @@ module MkId ( wiredInIds, ghcPrimIds, unsafeCoerceName, unsafeCoerceId, realWorldPrimId, voidPrimId, voidArgId, - nullAddrId, seqId, lazyId, lazyIdKey, + nullAddrId, seqId, lazyId, lazyIdKey, runRWId, coercionTokenId, magicDictId, coerceId, proxyHashId, @@ -120,7 +120,7 @@ is right here. wiredInIds :: [Id] wiredInIds - = [lazyId, dollarId, oneShotId] + = [lazyId, dollarId, oneShotId, runRWId] ++ errorIds -- Defined in MkCore ++ ghcPrimIds @@ -1057,7 +1057,8 @@ another gun with which to shoot yourself in the foot. lazyIdName, unsafeCoerceName, nullAddrName, seqName, realWorldName, voidPrimIdName, coercionTokenName, - magicDictName, coerceName, proxyName, dollarName, oneShotName :: Name + magicDictName, coerceName, proxyName, dollarName, oneShotName, + runRWName :: Name unsafeCoerceName = mkWiredInIdName gHC_PRIM (fsLit "unsafeCoerce#") unsafeCoerceIdKey unsafeCoerceId nullAddrName = mkWiredInIdName gHC_PRIM (fsLit "nullAddr#") nullAddrIdKey nullAddrId seqName = mkWiredInIdName gHC_PRIM (fsLit "seq") seqIdKey seqId @@ -1070,6 +1071,7 @@ coerceName = mkWiredInIdName gHC_PRIM (fsLit "coerce") coerceKey proxyName = mkWiredInIdName gHC_PRIM (fsLit "proxy#") proxyHashKey proxyHashId dollarName = mkWiredInIdName gHC_BASE (fsLit "$") dollarIdKey dollarId oneShotName = mkWiredInIdName gHC_MAGIC (fsLit "oneShot") oneShotKey oneShotId +runRWName = mkWiredInIdName gHC_MAGIC (fsLit "runRW#") runRWKey runRWId dollarId :: Id -- Note [dollarId magic] dollarId = pcMiscPrelId dollarName ty @@ -1182,6 +1184,19 @@ oneShotId = pcMiscPrelId oneShotName ty info x' = setOneShotLambda x rhs = mkLams [openAlphaTyVar, openBetaTyVar, body, x'] $ Var body `App` Var x +runRWId :: Id -- See Note [runRW magic] in this module +runRWId = pcMiscPrelId runRWName ty info + where + info = noCafIdInfo `setInlinePragInfo` neverInlinePragma + -- State# RealWorld + stateRW = mkTyConApp statePrimTyCon [realWorldTy] + -- (# State# RealWorld, o #) + ret_ty = mkTyConApp unboxedPairTyCon [stateRW, openAlphaTy] + -- State# RealWorld -> (# State# RealWorld, o #) + arg_ty = stateRW `mkFunTy` ret_ty + -- (State# RealWorld -> (# State# RealWorld, o #)) + -- -> (# State# RealWorld, o #) + ty = mkForAllTys [openAlphaTyVar] (arg_ty `mkFunTy` ret_ty) -------------------------------------------------------------------------------- magicDictId :: Id -- See Note [magicDictId magic] @@ -1322,6 +1337,44 @@ See Trac #3259 for a real world example. lazyId is defined in GHC.Base, so we don't *have* to inline it. If it appears un-applied, we'll end up just calling it. +Note [runRW magic] +~~~~~~~~~~~~~~~~~~ +Some definitions, for instance @runST@, must have careful control over float out +of the bindings in their body. Consider this use of @runST@, + + f x = runST ( \ s -> let (a, s') = newArray# 100 [] s + (_, s'') = fill_in_array_or_something a x s' + in freezeArray# a s'' ) + +If we inline @runST@, we'll get: + + f x = let (a, s') = newArray# 100 [] realWorld#{-NB-} + (_, s'') = fill_in_array_or_something a x s' + in freezeArray# a s'' + +And now if we allow the @newArray#@ binding to float out to become a CAF, +we end up with a result that is totally and utterly wrong: + + f = let (a, s') = newArray# 100 [] realWorld#{-NB-} -- YIKES!!! + in \ x -> + let (_, s'') = fill_in_array_or_something a x s' + in freezeArray# a s'' + +All calls to @f@ will share a {\em single} array! Clearly this is nonsense and +must be prevented. + +This is what @runRW#@ gives us: by being inlined extremely late in the +optimization (right before lowering to STG, in CorePrep), we can ensure that +no further floating will occur. This allows us to safely inline things like +@runST@, which are otherwise needlessly expensive (see #10678 and #5916). + +While the definition of @GHC.Magic.runRW#@, we override its type in @MkId@ +to be open-kinded, + + runRW# :: (o :: OpenKind) => (State# RealWorld -> (# State# RealWorld, o #)) + -> (# State# RealWorld, o #) + + Note [The oneShot function] ~~~~~~~~~~~~~~~~~~~~~~~~~~~ In the context of making left-folds fuse somewhat okish (see ticket #7994 diff --git a/compiler/coreSyn/CorePrep.hs b/compiler/coreSyn/CorePrep.hs index 23afcdfb04..e49ece43d2 100644 --- a/compiler/coreSyn/CorePrep.hs +++ b/compiler/coreSyn/CorePrep.hs @@ -18,6 +18,7 @@ import OccurAnal import HscTypes import PrelNames +import MkId ( realWorldPrimId ) import CoreUtils import CoreArity import CoreFVs @@ -511,10 +512,20 @@ cpeRhsE env (Lit (LitInteger i _)) cpeRhsE _env expr@(Lit {}) = return (emptyFloats, expr) cpeRhsE env expr@(Var {}) = cpeApp env expr -cpeRhsE env (Var f `App` _ `App` arg) +cpeRhsE env (Var f `App` _{-type-} `App` arg) | f `hasKey` lazyIdKey -- Replace (lazy a) by a = cpeRhsE env arg -- See Note [lazyId magic] in MkId + -- See Note [runRW magic] in MkId + | f `hasKey` runRWKey -- Replace (runRW# f) by (f realWorld#), + = case arg of -- beta reducing if possible + Lam s body -> cpeRhsE env (substExpr (text "runRW#") subst body) + where subst = extendIdSubst emptySubst s (Var realWorldPrimId) + -- XXX I think we can use emptySubst here + -- because realWorldPrimId is a global variable + -- and so cannot be bound by a lambda in body + _ -> cpeRhsE env (arg `App` Var realWorldPrimId) + cpeRhsE env expr@(App {}) = cpeApp env expr cpeRhsE env (Let bind expr) diff --git a/compiler/prelude/PrelNames.hs b/compiler/prelude/PrelNames.hs index 05a38ffec9..7229f76401 100644 --- a/compiler/prelude/PrelNames.hs +++ b/compiler/prelude/PrelNames.hs @@ -1834,11 +1834,12 @@ rootMainKey, runMainKey :: Unique rootMainKey = mkPreludeMiscIdUnique 101 runMainKey = mkPreludeMiscIdUnique 102 -thenIOIdKey, lazyIdKey, assertErrorIdKey, oneShotKey :: Unique +thenIOIdKey, lazyIdKey, assertErrorIdKey, oneShotKey, runRWKey :: Unique thenIOIdKey = mkPreludeMiscIdUnique 103 lazyIdKey = mkPreludeMiscIdUnique 104 assertErrorIdKey = mkPreludeMiscIdUnique 105 oneShotKey = mkPreludeMiscIdUnique 106 +runRWKey = mkPreludeMiscIdUnique 107 breakpointIdKey, breakpointCondIdKey, breakpointAutoIdKey, breakpointJumpIdKey, breakpointCondJumpIdKey, diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index 6fd7181426..06d937a6c9 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -423,6 +423,8 @@ RTS_FUN_DECL(stg_addCFinalizzerToWeakzh); RTS_FUN_DECL(stg_finalizzeWeakzh); RTS_FUN_DECL(stg_deRefWeakzh); +RTS_FUN_DECL(stg_runRWzh); + RTS_FUN_DECL(stg_newBCOzh); RTS_FUN_DECL(stg_mkApUpd0zh); diff --git a/libraries/base/GHC/IO.hs b/libraries/base/GHC/IO.hs index 1e8c74e5c1..f38c88f009 100644 --- a/libraries/base/GHC/IO.hs +++ b/libraries/base/GHC/IO.hs @@ -176,11 +176,8 @@ like 'bracket' cannot be used safely within 'unsafeDupablePerformIO'. @since 4.4.0.0 -} -{-# NOINLINE unsafeDupablePerformIO #-} - -- See Note [unsafeDupablePerformIO is NOINLINE] unsafeDupablePerformIO :: IO a -> a -unsafeDupablePerformIO (IO m) = lazy (case m realWorld# of (# _, r #) -> r) - -- See Note [unsafeDupablePerformIO has a lazy RHS] +unsafeDupablePerformIO (IO m) = case runRW# m of (# _, a #) -> a -- Note [unsafeDupablePerformIO is NOINLINE] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/libraries/base/GHC/ST.hs b/libraries/base/GHC/ST.hs index 46c5196c9e..d84dd4d9d1 100644 --- a/libraries/base/GHC/ST.hs +++ b/libraries/base/GHC/ST.hs @@ -18,7 +18,7 @@ module GHC.ST ( ST(..), STret(..), STRep, - fixST, runST, runSTRep, + fixST, runST, -- * Unsafe functions liftST, unsafeInterleaveST @@ -103,62 +103,10 @@ instance Show (ST s a) where showsPrec _ _ = showString "<>" showList = showList__ (showsPrec 0) -{- -Definition of runST -~~~~~~~~~~~~~~~~~~~ - -SLPJ 95/04: Why @runST@ must not have an unfolding; consider: -\begin{verbatim} -f x = - runST ( \ s -> let - (a, s') = newArray# 100 [] s - (_, s'') = fill_in_array_or_something a x s' - in - freezeArray# a s'' ) -\end{verbatim} -If we inline @runST@, we'll get: -\begin{verbatim} -f x = let - (a, s') = newArray# 100 [] realWorld#{-NB-} - (_, s'') = fill_in_array_or_something a x s' - in - freezeArray# a s'' -\end{verbatim} -And now the @newArray#@ binding can be floated to become a CAF, which -is totally and utterly wrong: -\begin{verbatim} -f = let - (a, s') = newArray# 100 [] realWorld#{-NB-} -- YIKES!!! - in - \ x -> - let (_, s'') = fill_in_array_or_something a x s' in - freezeArray# a s'' -\end{verbatim} -All calls to @f@ will share a {\em single} array! End SLPJ 95/04. --} - {-# INLINE runST #-} --- The INLINE prevents runSTRep getting inlined in *this* module --- so that it is still visible when runST is inlined in an importing --- module. Regrettably delicate. runST is behaving like a wrapper. - -- | Return the value computed by a state transformer computation. -- The @forall@ ensures that the internal state used by the 'ST' -- computation is inaccessible to the rest of the program. runST :: (forall s. ST s a) -> a -runST st = runSTRep (case st of { ST st_rep -> st_rep }) - --- I'm only letting runSTRep be inlined right at the end, in particular *after* full laziness --- That's what the "INLINE [0]" says. --- SLPJ Apr 99 --- {-# INLINE [0] runSTRep #-} - --- SDM: further to the above, inline phase 0 is run *before* --- full-laziness at the moment, which means that the above comment is --- invalid. Inlining runSTRep doesn't make a huge amount of --- difference, anyway. Hence: - -{-# NOINLINE runSTRep #-} -runSTRep :: (forall s. STRep s a) -> a -runSTRep st_rep = case st_rep realWorld# of - (# _, r #) -> r +runST (ST st_rep) = case runRW# st_rep of (# _, a #) -> a +-- See Note [Definition of runRW#] in GHC.Magic diff --git a/libraries/ghc-prim/GHC/Magic.hs b/libraries/ghc-prim/GHC/Magic.hs index 740abb729e..495705b3b4 100644 --- a/libraries/ghc-prim/GHC/Magic.hs +++ b/libraries/ghc-prim/GHC/Magic.hs @@ -1,5 +1,9 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE Trustworthy #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} + ----------------------------------------------------------------------------- -- | -- Module : GHC.Magic @@ -17,8 +21,9 @@ -- ----------------------------------------------------------------------------- -module GHC.Magic ( inline, lazy, oneShot ) where +module GHC.Magic ( inline, lazy, oneShot, runRW# ) where +import GHC.Prim import GHC.CString () -- | The call @inline f@ arranges that 'f' is inlined, regardless of @@ -82,3 +87,15 @@ oneShot :: (a -> b) -> (a -> b) oneShot f = f -- Implementation note: This is wired in in MkId.lhs, so the code here is -- mostly there to have a place for the documentation. + +-- | Apply a function to a 'RealWorld' token. +runRW# :: (State# RealWorld -> (# State# RealWorld, o #)) + -> (# State# RealWorld, o #) +-- See Note [runRW magic] in MkId +#if !defined(__HADDOCK_VERSION__) +runRW# m = m realWorld# +#else +runRW# = runRW# -- The realWorld# is too much for haddock +#endif +{-# NOINLINE runRW# #-} +-- This is inlined manually in CorePrep diff --git a/libraries/ghc-prim/changelog.md b/libraries/ghc-prim/changelog.md index 2e4288622b..45daa64b26 100644 --- a/libraries/ghc-prim/changelog.md +++ b/libraries/ghc-prim/changelog.md @@ -27,6 +27,7 @@ getSizeofMutableByteArray# :: MutableByteArray# d -> State# d -> (# State# d, Int# #) subWordC# :: Word# -> Word# -> (# Word#, Int# #) + runRW# :: (State# RealWorld -> (# State# RealWorld, o #)) -> (# State# RealWorld, o #) - Added to `GHC.Types`: diff --git a/libraries/integer-gmp/src/GHC/Integer/Type.hs b/libraries/integer-gmp/src/GHC/Integer/Type.hs index 5bc52539fd..167492d34d 100644 --- a/libraries/integer-gmp/src/GHC/Integer/Type.hs +++ b/libraries/integer-gmp/src/GHC/Integer/Type.hs @@ -1934,8 +1934,7 @@ liftIO (IO m) = m -- NB: equivalent of GHC.IO.unsafeDupablePerformIO, see notes there runS :: S RealWorld a -> a -runS m = lazy (case m realWorld# of (# _, r #) -> r) -{-# NOINLINE runS #-} +runS m = case runRW# m of (# _, a #) -> a -- stupid hack fail :: [Char] -> S s a diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T index 6ac8861450..a86d61f51d 100644 --- a/testsuite/tests/perf/should_run/all.T +++ b/testsuite/tests/perf/should_run/all.T @@ -271,8 +271,9 @@ test('T7257', [stats_num_field('bytes allocated', [(wordsize(32), 1150000000, 10), # expected value: 1246287228 (i386/Linux) - (wordsize(64), 1774893760, 5)]), + (wordsize(64), 1654893248, 5)]), # 2012-09-21: 1774893760 (amd64/Linux) + # 2015-11-03: 1654893248 (amd64/Linux) stats_num_field('peak_megabytes_allocated', [(wordsize(32), 217, 5), # 2012-10-08: 217 (x86/Linux) diff --git a/testsuite/tests/primops/should_run/T10678.hs b/testsuite/tests/primops/should_run/T10678.hs new file mode 100644 index 0000000000..9019ab6345 --- /dev/null +++ b/testsuite/tests/primops/should_run/T10678.hs @@ -0,0 +1,22 @@ +{-# LANGUAGE MagicHash #-} + +import GHC.Prim + +main :: IO () +main = go 1000000# 10 (2^100) + +go :: Int# -> Integer -> Integer -> IO () +go 0# _ _ = return () +go n# a b = (a + b) `seq` go (n# -# 1#) a b +{-# NOINLINE go #-} + +{- +This test is based on a strategy from rwbarton relying on the inefficiency +of `Integer` addition as defined by `integer-gmp` without `runRW#`. + + When I was testing the patch interactively, I measured allocations for, + say, a million (large Integer) + (small Integer) additions. If that + addition allocates, say, 6 words, then one can fairly reliably write the + program so that it will allocate between 6 million and 7 million words, + total. +-} diff --git a/testsuite/tests/primops/should_run/all.T b/testsuite/tests/primops/should_run/all.T index b21279aa99..b0001d6d32 100644 --- a/testsuite/tests/primops/should_run/all.T +++ b/testsuite/tests/primops/should_run/all.T @@ -3,3 +3,12 @@ test('T7689', normal, compile_and_run, ['']) # The test is using unboxed tuples, so omit ghci test('T9430', omit_ways(['ghci']), compile_and_run, ['']) test('T10481', exit_code(1), compile_and_run, ['']) +test('T10678', + [stats_num_field('bytes allocated', + [(wordsize(64), 88041768, 5) + # 2015-11-04: 88041768 +/- 5% (before runRW#) + # 2015-11-04: 64004171 (after runRW#) + ]), + only_ways('normal') + ], + compile_and_run, ['-O']) -- cgit v1.2.1