diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2020-11-05 14:29:33 +0100 |
---|---|---|
committer | Sylvain Henry <sylvain@haskus.fr> | 2021-01-05 15:02:58 +0100 |
commit | 6c771aafa30e261f6822b3ddddbe66f8a55f307c (patch) | |
tree | 0db9f845e28c38d5eaf618735f9be3ab892aa7f9 /compiler/GHC | |
parent | 26a928b8fdb1b4ccb75e8edb620b8cf12cb38621 (diff) | |
download | haskell-6c771aafa30e261f6822b3ddddbe66f8a55f307c.tar.gz |
Implement Unique supply with Addr# atomic primop
Before this patch the compiler depended on the RTS way (threaded or not)
to use atomic incrementation or not. This is wrong because the RTS is
supposed to be switchable at link time, without recompilation.
Now we always use atomic incrementation of the unique counter.
Diffstat (limited to 'compiler/GHC')
-rw-r--r-- | compiler/GHC/Core/Opt/Monad.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/Driver/CmdLine.hs | 10 | ||||
-rw-r--r-- | compiler/GHC/Driver/Session.hs | 10 | ||||
-rw-r--r-- | compiler/GHC/Stg/Pipeline.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/Tc/Utils/Monad.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/Types/Unique.hs | 9 | ||||
-rw-r--r-- | compiler/GHC/Types/Unique/Supply.hs | 193 |
7 files changed, 96 insertions, 129 deletions
diff --git a/compiler/GHC/Core/Opt/Monad.hs b/compiler/GHC/Core/Opt/Monad.hs index 21b4403e94..7fa1c4f871 100644 --- a/compiler/GHC/Core/Opt/Monad.hs +++ b/compiler/GHC/Core/Opt/Monad.hs @@ -59,7 +59,6 @@ import GHC.Core.Unfold import GHC.Types.Basic ( CompilerPhase(..) ) import GHC.Types.Annotations import GHC.Types.Var -import GHC.Types.Unique (uniqFromMask) import GHC.Types.Unique.Supply import GHC.Types.Name.Env import GHC.Types.SrcLoc diff --git a/compiler/GHC/Driver/CmdLine.hs b/compiler/GHC/Driver/CmdLine.hs index 2becd3e952..187ca2661a 100644 --- a/compiler/GHC/Driver/CmdLine.hs +++ b/compiler/GHC/Driver/CmdLine.hs @@ -77,6 +77,7 @@ data OptKind m -- Suppose the flag is -f | OptPrefix (String -> EwM m ()) -- -f or -farg (i.e. the arg is optional) | OptIntSuffix (Maybe Int -> EwM m ()) -- -f or -f=n; pass n to fn | IntSuffix (Int -> EwM m ()) -- -f or -f=n; pass n to fn + | WordSuffix (Word -> EwM m ()) -- -f or -f=n; pass n to fn | FloatSuffix (Float -> EwM m ()) -- -f or -f=n; pass n to fn | PassFlag (String -> EwM m ()) -- -f; pass "-f" fn | AnySuffix (String -> EwM m ()) -- -f or -farg; pass entire "-farg" to fn @@ -253,6 +254,9 @@ processOneArg opt_kind rest arg args IntSuffix f | Just n <- parseInt rest_no_eq -> Right (f n, args) | otherwise -> Left ("malformed integer argument in " ++ dash_arg) + WordSuffix f | Just n <- parseWord rest_no_eq -> Right (f n, args) + | otherwise -> Left ("malformed natural argument in " ++ dash_arg) + FloatSuffix f | Just n <- parseFloat rest_no_eq -> Right (f n, args) | otherwise -> Left ("malformed float argument in " ++ dash_arg) @@ -279,6 +283,7 @@ arg_ok (Prefix _) _ _ = True -- Missing argument checked for in p -- to improve error message (#12625) arg_ok (OptIntSuffix _) _ _ = True arg_ok (IntSuffix _) _ _ = True +arg_ok (WordSuffix _) _ _ = True arg_ok (FloatSuffix _) _ _ = True arg_ok (OptPrefix _) _ _ = True arg_ok (PassFlag _) rest _ = null rest @@ -294,6 +299,11 @@ parseInt s = case reads s of ((n,""):_) -> Just n _ -> Nothing +parseWord :: String -> Maybe Word +parseWord s = case reads s of + ((n,""):_) -> Just n + _ -> Nothing + parseFloat :: String -> Maybe Float parseFloat s = case reads s of ((n,""):_) -> Just n diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs index f7cdd5944b..3bbd049144 100644 --- a/compiler/GHC/Driver/Session.hs +++ b/compiler/GHC/Driver/Session.hs @@ -710,8 +710,9 @@ data DynFlags = DynFlags { maxErrors :: Maybe Int, -- | Unique supply configuration for testing build determinism - initialUnique :: Int, + initialUnique :: Word, uniqueIncrement :: Int, + -- 'Int' because it can be used to test uniques in decreasing order. -- | Temporary: CFG Edge weights for fast iterations cfgWeights :: Weights @@ -2092,6 +2093,8 @@ add_dep_message (OptIntSuffix f) message = OptIntSuffix $ \oi -> f oi >> deprecate message add_dep_message (IntSuffix f) message = IntSuffix $ \i -> f i >> deprecate message +add_dep_message (WordSuffix f) message = + WordSuffix $ \i -> f i >> deprecate message add_dep_message (FloatSuffix f) message = FloatSuffix $ \fl -> f fl >> deprecate message add_dep_message (PassFlag f) message = @@ -2856,7 +2859,7 @@ dynamic_flags_deps = [ , make_ord_flag defGhcFlag "fmax-inline-memset-insns" (intSuffix (\n d -> d { maxInlineMemsetInsns = n })) , make_ord_flag defGhcFlag "dinitial-unique" - (intSuffix (\n d -> d { initialUnique = n })) + (wordSuffix (\n d -> d { initialUnique = n })) , make_ord_flag defGhcFlag "dunique-increment" (intSuffix (\n d -> d { uniqueIncrement = n })) @@ -4247,6 +4250,9 @@ intSuffix fn = IntSuffix (\n -> upd (fn n)) intSuffixM :: (Int -> DynFlags -> DynP DynFlags) -> OptKind (CmdLineP DynFlags) intSuffixM fn = IntSuffix (\n -> updM (fn n)) +wordSuffix :: (Word -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags) +wordSuffix fn = WordSuffix (\n -> upd (fn n)) + floatSuffix :: (Float -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags) floatSuffix fn = FloatSuffix (\n -> upd (fn n)) diff --git a/compiler/GHC/Stg/Pipeline.hs b/compiler/GHC/Stg/Pipeline.hs index a407737cf1..ea758e58db 100644 --- a/compiler/GHC/Stg/Pipeline.hs +++ b/compiler/GHC/Stg/Pipeline.hs @@ -27,7 +27,6 @@ import GHC.Unit.Module ( Module ) import GHC.Driver.Session import GHC.Utils.Error -import GHC.Types.Unique (uniqFromMask) import GHC.Types.Unique.Supply import GHC.Utils.Outputable import GHC.Utils.Panic diff --git a/compiler/GHC/Tc/Utils/Monad.hs b/compiler/GHC/Tc/Utils/Monad.hs index 240a836d72..7056ba898b 100644 --- a/compiler/GHC/Tc/Utils/Monad.hs +++ b/compiler/GHC/Tc/Utils/Monad.hs @@ -200,7 +200,6 @@ import GHC.Types.SrcLoc import GHC.Types.Name.Env import GHC.Types.Name.Set import GHC.Types.Name.Ppr -import GHC.Types.Unique (uniqFromMask) import GHC.Types.Unique.Supply import GHC.Types.Annotations import GHC.Types.Basic( TopLevelFlag, TypeOrKind(..) ) diff --git a/compiler/GHC/Types/Unique.hs b/compiler/GHC/Types/Unique.hs index 29fd5c6cd6..cc013dc9a1 100644 --- a/compiler/GHC/Types/Unique.hs +++ b/compiler/GHC/Types/Unique.hs @@ -31,7 +31,7 @@ module GHC.Types.Unique ( mkUniqueGrimily, getKey, - mkUnique, unpkUnique, uniqFromMask, + mkUnique, unpkUnique, eqUnique, ltUnique, incrUnique, stepUnique, @@ -147,13 +147,6 @@ unpkUnique (MkUnique u) in (tag, i) -foreign import ccall unsafe "genSym" genSym :: IO Int - -uniqFromMask :: Char -> IO Unique -uniqFromMask mask - = do { uqNum <- genSym - ; return $! mkUnique mask uqNum } - -- | The interface file symbol-table encoding assumes that known-key uniques fit -- in 30-bits; verify this. -- diff --git a/compiler/GHC/Types/Unique/Supply.hs b/compiler/GHC/Types/Unique/Supply.hs index 30181c773e..4b146edd9f 100644 --- a/compiler/GHC/Types/Unique/Supply.hs +++ b/compiler/GHC/Types/Unique/Supply.hs @@ -3,11 +3,8 @@ (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 -} -{-# OPTIONS_GHC -fno-state-hack #-} - -- This -fno-state-hack is important - -- See Note [Optimising the unique supply] - {-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE BangPatterns #-} @@ -22,7 +19,7 @@ module GHC.Types.Unique.Supply ( -- ** Operations on supplies uniqFromSupply, uniqsFromSupply, -- basic ops - takeUniqFromSupply, + takeUniqFromSupply, uniqFromMask, mkSplitUniqSupply, splitUniqSupply, listSplitUniqSupply, @@ -40,7 +37,7 @@ module GHC.Types.Unique.Supply ( import GHC.Prelude import GHC.Types.Unique -import GHC.Utils.Panic.Plain (panic) +import GHC.Utils.Panic.Plain import GHC.IO @@ -48,9 +45,17 @@ import GHC.Utils.Monad import Control.Monad import Data.Bits import Data.Char -import GHC.Exts( inline ) +import GHC.Exts( Ptr(..), noDuplicate# ) +#if MIN_VERSION_GLASGOW_HASKELL(9,1,0,0) +import GHC.Exts( Int(..), word2Int#, fetchAddWordAddr#, plusWord#, readWordOffAddr# ) +#if defined(DEBUG) +import GHC.Utils.Misc +#endif +#endif +import Foreign.Storable #include "Unique.h" +#include "HsVersions.h" {- ************************************************************************ @@ -83,8 +88,23 @@ lazily-evaluated infinite tree. Note [Optimising the unique supply] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + The inner loop of mkSplitUniqSupply is a function closure + mk_supply s0 = + case noDuplicate# s0 of { s1 -> + case unIO genSym s1 of { (# s2, u #) -> + case unIO (unsafeDupableInterleaveIO (IO mk_supply)) s2 of { (# s3, x #) -> + case unIO (unsafeDupableInterleaveIO (IO mk_supply)) s3 of { (# s4, y #) -> + (# s4, MkSplitUniqSupply (mask .|. u) x y #) + }}}} + +It's a classic example of an IO action that is captured and then called +repeatedly (see #18238 for some discussion). It mustn't allocate! The test +perf/should_run/UniqLoop keeps track of this loop. Watch it carefully. + +We used to write it as: + mk_supply :: IO UniqSupply mk_supply = unsafeInterleaveIO $ genSym >>= \ u -> @@ -92,100 +112,11 @@ The inner loop of mkSplitUniqSupply is a function closure mk_supply >>= \ s2 -> return (MkSplitUniqSupply (mask .|. u) s1 s2) -It's a classic example of an IO action that is captured -and the called repeatedly (see #18238 for some discussion). -It turns out that we can get something like - - $wmkSplitUniqSupply c# s - = letrec - mk_supply - = \s -> unsafeDupableInterleaveIO1 - (\s2 -> case noDuplicate# s2 of s3 -> - ... - case mk_supply s4 of (# s5, t1 #) -> - ... - (# s6, MkSplitUniqSupply ... #) - in mk_supply s - -This is bad becuase we allocate that inner (\s2...) every time. -Why doesn't full laziness float out the (\s2...)? Because of -the state hack (#18238). - -So for this module we switch the state hack off -- it's an example -of when it makes things worse rather than better. And we use -multiShotIO (see Note [multiShotIO]) thus: - - mk_supply = multiShotIO $ - unsafeInterleaveIO $ - genSym >>= \ u -> - ... - -Now full laziness can float that lambda out, and we get - - $wmkSplitUniqSupply c# s - = letrec - lvl = \s2 -> case noDuplicate# s2 of s3 -> - ... - case unsafeDupableInterleaveIO - lvl s4 of (# s5, t1 #) -> - ... - (# s6, MkSplitUniqSupply ... #) - in unsafeDupableInterleaveIO1 lvl s - -This is all terribly delicate. It just so happened that before I -fixed #18078, and even with the state-hack still enabled, we were -getting this: - - $wmkSplitUniqSupply c# s - = letrec - mk_supply = \s2 -> case noDuplicate# s2 of s3 -> - ... - case mks_help s3 of (# s5,t1 #) -> - ... - (# s6, MkSplitUniqSupply ... #) - mks_help = unsafeDupableInterleaveIO mk_supply - -- mks_help marked as loop breaker - in mks_help s - -The fact that we didn't need full laziness was somewhat fortuitious. -We got the right number of allocations. But the partial application of -the arity-2 unsafeDupableInterleaveIO in mks_help makes it quite a -bit slower. (Test perf/should_run/UniqLoop had a 20% perf change.) - -Sigh. The test perf/should_run/UniqLoop keeps track of this loop. -Watch it carefully. - -Note [multiShotIO] -~~~~~~~~~~~~~~~~~~ -The function multiShotIO :: IO a -> IO a -says that the argument IO action may be invoked repeatedly (is -multi-shot), and so there should be a multi-shot lambda around it. -It's quite easy to define, in any module with `-fno-state-hack`: - multiShotIO :: IO a -> IO a - {-# INLINE multiShotIO #-} - multiShotIO (IO m) = IO (\s -> inline m s) - -Because of -fno-state-hack, that '\s' will be multi-shot. Now, -ignoring the casts from IO: - multiShotIO (\ss{one-shot}. blah) - ==> let m = \ss{one-shot}. blah - in \s. inline m s - ==> \s. (\ss{one-shot}.blah) s - ==> \s. blah[s/ss] - -The magic `inline` function does two things -* It prevents eta reduction. If we wrote just - multiShotIO (IO m) = IO (\s -> m s) - the lamda would eta-reduce to 'm' and all would be lost. - -* It helps ensure that 'm' really does inline. - -Note that 'inline' evaporates in phase 0. See Note [inlineId magic] -in GHC.Core.Opt.ConstantFold.match_inline. - -The INLINE pragma on multiShotIO is very important, else the -'inline' call will evaporate when compiling the module that -defines 'multiShotIO', before it is ever exported. +and to rely on -fno-state-hack, full laziness and inlining to get the same +result. It was very brittle and required enabling -fno-state-hack globally. So +it has been rewritten using lower level constructs to explicitly state what we +want. + -} @@ -208,28 +139,58 @@ mkSplitUniqSupply :: Char -> IO UniqSupply -- See Note [How the unique supply works] -- See Note [Optimising the unique supply] mkSplitUniqSupply c - = mk_supply + = unsafeDupableInterleaveIO (IO mk_supply) + where - !mask = ord c `shiftL` uNIQUE_BITS + !mask = ord c `unsafeShiftL` uNIQUE_BITS -- Here comes THE MAGIC: see Note [How the unique supply works] -- This is one of the most hammered bits in the whole compiler -- See Note [Optimising the unique supply] - -- NB: Use unsafeInterleaveIO for thread-safety. - mk_supply = multiShotIO $ - unsafeInterleaveIO $ - genSym >>= \ u -> - mk_supply >>= \ s1 -> - mk_supply >>= \ s2 -> - return (MkSplitUniqSupply (mask .|. u) s1 s2) + -- NB: Use noDuplicate# for thread-safety. + mk_supply s0 = + case noDuplicate# s0 of { s1 -> + case unIO genSym s1 of { (# s2, u #) -> + -- deferred IO computations + case unIO (unsafeDupableInterleaveIO (IO mk_supply)) s2 of { (# s3, x #) -> + case unIO (unsafeDupableInterleaveIO (IO mk_supply)) s3 of { (# s4, y #) -> + (# s4, MkSplitUniqSupply (mask .|. u) x y #) + }}}} + +#if !MIN_VERSION_GLASGOW_HASKELL(9,1,0,0) +foreign import ccall unsafe "genSym" genSym :: IO Int +#else +genSym :: IO Int +genSym = do + let !mask = (1 `unsafeShiftL` uNIQUE_BITS) - 1 + let !(Ptr counter) = ghc_unique_counter + let !(Ptr inc_ptr) = ghc_unique_inc + u <- IO $ \s0 -> case readWordOffAddr# inc_ptr 0# s0 of + (# s1, inc #) -> case fetchAddWordAddr# counter inc s1 of + (# s2, val #) -> + let !u = I# (word2Int# (val `plusWord#` inc)) .&. mask + in (# s2, u #) +#if defined(DEBUG) + -- Uh oh! We will overflow next time a unique is requested. + -- (Note that if the increment isn't 1 we may miss this check) + MASSERT(u /= mask) +#endif + return u +#endif -multiShotIO :: IO a -> IO a -{-# INLINE multiShotIO #-} --- See Note [multiShotIO] -multiShotIO (IO m) = IO (\s -> inline m s) +foreign import ccall unsafe "&ghc_unique_counter" ghc_unique_counter :: Ptr Word +foreign import ccall unsafe "&ghc_unique_inc" ghc_unique_inc :: Ptr Int + +initUniqSupply :: Word -> Int -> IO () +initUniqSupply counter inc = do + poke ghc_unique_counter counter + poke ghc_unique_inc inc + +uniqFromMask :: Char -> IO Unique +uniqFromMask mask + = do { uqNum <- genSym + ; return $! mkUnique mask uqNum } -foreign import ccall unsafe "genSym" genSym :: IO Int -foreign import ccall unsafe "initGenSym" initUniqSupply :: Int -> Int -> IO () splitUniqSupply :: UniqSupply -> (UniqSupply, UniqSupply) -- ^ Build two 'UniqSupply' from a single one, each of which |