diff options
author | romes <rodrigo.m.mesquita@gmail.com> | 2022-06-04 18:14:01 +0100 |
---|---|---|
committer | romes <rodrigo.m.mesquita@gmail.com> | 2022-06-04 18:14:01 +0100 |
commit | 43ec07e29306c39a877ef832f1ea7d560a4c8145 (patch) | |
tree | a25626442622c61685d72b2c655b625e8f6e6e71 | |
parent | 13bfdade3d6aaa62b7ca9872ac586732ee1e48ce (diff) | |
download | haskell-43ec07e29306c39a877ef832f1ea7d560a4c8145.tar.gz |
Revert "FastString as just a ShortByteString"
This reverts commit 13bfdade3d6aaa62b7ca9872ac586732ee1e48ce.
-rw-r--r-- | compiler/GHC/Data/FastString.hs | 305 | ||||
-rw-r--r-- | compiler/GHC/Types/Unique.hs | 1 |
2 files changed, 155 insertions, 151 deletions
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs index e8ba8fad7b..131f174c41 100644 --- a/compiler/GHC/Data/FastString.hs +++ b/compiler/GHC/Data/FastString.hs @@ -1,5 +1,4 @@ {-# LANGUAGE BangPatterns #-} -{-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DerivingStrategies #-} @@ -60,7 +59,7 @@ module GHC.Data.FastString lengthFZS, -- * FastStrings - FastString, -- not abstract, for now. + FastString(..), -- not abstract, for now. NonDetFastString (..), LexicalFastString (..), @@ -117,7 +116,7 @@ import GHC.Prelude as Prelude import GHC.Utils.Encoding import GHC.Utils.IO.Unsafe import GHC.Utils.Panic.Plain --- import GHC.Utils.Misc +import GHC.Utils.Misc import GHC.Data.FastMutInt import Control.Concurrent.MVar @@ -154,13 +153,13 @@ import GHC.IO -- | Gives the Modified UTF-8 encoded bytes corresponding to a 'FastString' bytesFS, fastStringToByteString :: FastString -> ByteString {-# INLINE[1] bytesFS #-} -bytesFS f = SBS.fromShort $ f +bytesFS f = SBS.fromShort $ fs_sbs f {-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-} fastStringToByteString = bytesFS fastStringToShortByteString :: FastString -> ShortByteString -fastStringToShortByteString = id +fastStringToShortByteString = fs_sbs fastZStringToByteString :: FastZString -> ByteString fastZStringToByteString (FastZString bs) = bs @@ -169,8 +168,8 @@ fastZStringToByteString (FastZString bs) = bs unsafeMkByteString :: String -> ByteString unsafeMkByteString = BSC.pack --- hashFastString :: FastString -> Int --- hashFastString fs = hashStr $ fs +hashFastString :: FastString -> Int +hashFastString fs = hashStr $ fs_sbs fs -- ----------------------------------------------------------------------------- @@ -199,57 +198,56 @@ comparison. It is also associated with a lazy reference to the Z-encoding of this string which is used by the compiler internally. -} -type FastString = ShortByteString ---data FastString = FastString { --- uniq :: {-# UNPACK #-} !Int, -- unique id --- n_chars :: {-# UNPACK #-} !Int, -- number of chars --- fs_sbs :: {-# UNPACK #-} !ShortByteString, --- fs_zenc :: FastZString --- -- ^ Lazily computed Z-encoding of this string. See Note [Z-Encoding] in --- -- GHC.Utils.Encoding. --- -- --- -- Since 'FastString's are globally memoized this is computed at most --- -- once for any given string. --- } - --- instance Eq FastString where --- f1 == f2 = uniq f1 == uniq f2 +data FastString = FastString { + uniq :: {-# UNPACK #-} !Int, -- unique id + n_chars :: {-# UNPACK #-} !Int, -- number of chars + fs_sbs :: {-# UNPACK #-} !ShortByteString, + fs_zenc :: FastZString + -- ^ Lazily computed Z-encoding of this string. See Note [Z-Encoding] in + -- GHC.Utils.Encoding. + -- + -- Since 'FastString's are globally memoized this is computed at most + -- once for any given string. + } + +instance Eq FastString where + f1 == f2 = uniq f1 == uniq f2 -- We don't provide any "Ord FastString" instance to force you to think about -- which ordering you want: -- * lexical: deterministic, O(n). Cf lexicalCompareFS and LexicalFastString. -- * by unique: non-deterministic, O(1). Cf uniqCompareFS and NonDetFastString. --- instance IsString FastString where --- fromString = fsLit +instance IsString FastString where + fromString = fsLit --- instance Semi.Semigroup FastString where --- (<>) = appendFS +instance Semi.Semigroup FastString where + (<>) = appendFS --- instance Monoid FastString where --- mempty = nilFS --- mappend = (Semi.<>) --- mconcat = concatFS +instance Monoid FastString where + mempty = nilFS + mappend = (Semi.<>) + mconcat = concatFS --- instance Show FastString where --- show fs = show (unpackFS fs) +instance Show FastString where + show fs = show (unpackFS fs) --- instance Data FastString where --- -- don't traverse? --- toConstr _ = abstractConstr "FastString" --- gunfold _ _ = error "gunfold" --- dataTypeOf _ = mkNoRepType "FastString" +instance Data FastString where + -- don't traverse? + toConstr _ = abstractConstr "FastString" + gunfold _ _ = error "gunfold" + dataTypeOf _ = mkNoRepType "FastString" --- instance NFData FastString where --- rnf fs = seq fs () +instance NFData FastString where + rnf fs = seq fs () -- | Compare FastString lexically -- -- If you don't care about the lexical ordering, use `uniqCompareFS` instead. lexicalCompareFS :: FastString -> FastString -> Ordering -lexicalCompareFS = compare - -- if uniq fs1 == uniq fs2 then EQ else - -- utf8CompareShortByteString (fs_sbs fs1) (fs_sbs fs2) +lexicalCompareFS fs1 fs2 = + if uniq fs1 == uniq fs2 then EQ else + utf8CompareShortByteString (fs_sbs fs1) (fs_sbs fs2) -- perform a lexical comparison taking into account the Modified UTF-8 -- encoding we use (cf #18562) @@ -257,7 +255,7 @@ lexicalCompareFS = compare -- -- Much cheaper than `lexicalCompareFS` but non-deterministic! uniqCompareFS :: FastString -> FastString -> Ordering -uniqCompareFS = compare -- (uniq fs1) (uniq fs2) +uniqCompareFS fs1 fs2 = compare (uniq fs1) (uniq fs2) -- | Non-deterministic FastString -- @@ -315,49 +313,48 @@ Following parameters are determined based on: * Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@: on 2018-10-24, we have 13920 entries. -} -numSegments, initialNumBuckets :: Int --- segmentBits, segmentMask, --- segmentBits = 8 +segmentBits, numSegments, segmentMask, initialNumBuckets :: Int +segmentBits = 8 numSegments = 256 -- bit segmentBits --- segmentMask = 0xff -- bit segmentBits - 1 +segmentMask = 0xff -- bit segmentBits - 1 initialNumBuckets = 64 --- hashToSegment# :: Int# -> Int# --- hashToSegment# hash# = hash# `andI#` segmentMask# --- where --- !(I# segmentMask#) = segmentMask - --- hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int# --- hashToIndex# buckets# hash# = --- (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size# --- where --- !(I# segmentBits#) = segmentBits --- size# = sizeofMutableArray# buckets# - --- maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment --- maybeResizeSegment segmentRef = do --- segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef --- let oldSize# = sizeofMutableArray# old# --- newSize# = oldSize# *# 2# --- (I# n#) <- readFastMutInt counter --- if isTrue# (n# <# newSize#) -- maximum load of 1 --- then return segment --- else do --- resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# -> --- case newArray# newSize# [] s1# of --- (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #) --- forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do --- fsList <- IO $ readArray# old# i# --- forM_ fsList $ \fs -> do --- let -- Shall we store in hash value in FastString instead? --- !(I# hash#) = hashFastString fs --- idx# = hashToIndex# new# hash# --- IO $ \s1# -> --- case readArray# new# idx# s1# of --- (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of --- s3# -> (# s3#, () #) --- writeIORef segmentRef resizedSegment --- return resizedSegment +hashToSegment# :: Int# -> Int# +hashToSegment# hash# = hash# `andI#` segmentMask# + where + !(I# segmentMask#) = segmentMask + +hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int# +hashToIndex# buckets# hash# = + (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size# + where + !(I# segmentBits#) = segmentBits + size# = sizeofMutableArray# buckets# + +maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment +maybeResizeSegment segmentRef = do + segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef + let oldSize# = sizeofMutableArray# old# + newSize# = oldSize# *# 2# + (I# n#) <- readFastMutInt counter + if isTrue# (n# <# newSize#) -- maximum load of 1 + then return segment + else do + resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# -> + case newArray# newSize# [] s1# of + (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #) + forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do + fsList <- IO $ readArray# old# i# + forM_ fsList $ \fs -> do + let -- Shall we store in hash value in FastString instead? + !(I# hash#) = hashFastString fs + idx# = hashToIndex# new# hash# + IO $ \s1# -> + case readArray# new# idx# s1# of + (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of + s3# -> (# s3#, () #) + writeIORef segmentRef resizedSegment + return resizedSegment {-# NOINLINE stringTable #-} stringTable :: FastStringTable @@ -456,55 +453,57 @@ The procedure goes like this: * Otherwise, insert and return the string we created. -} --- mkFastStringWith --- :: (Int -> FastMutInt-> IO FastString) -> ShortByteString -> IO FastString --- mkFastStringWith mk_fs sbs = do --- FastStringTableSegment lock _ buckets# <- readIORef segmentRef --- let idx# = hashToIndex# buckets# hash# --- bucket <- IO $ readArray# buckets# idx# --- res <- bucket_match bucket sbs --- case res of --- Just found -> return found --- Nothing -> do --- -- The withMVar below is not dupable. It can lead to deadlock if it is --- -- only run partially and putMVar is not called after takeMVar. --- noDuplicate --- n <- get_uid --- new_fs <- mk_fs n n_zencs --- withMVar lock $ \_ -> insert new_fs --- where --- !(FastStringTable uid n_zencs segments#) = stringTable --- get_uid = atomicFetchAddFastMut uid 1 - --- !(I# hash#) = hashStr sbs --- (# segmentRef #) = indexArray# segments# (hashToSegment# hash#) --- insert fs = do --- FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef --- let idx# = hashToIndex# buckets# hash# --- bucket <- IO $ readArray# buckets# idx# --- res <- bucket_match bucket sbs --- case res of --- -- The FastString was added by another thread after previous read and --- -- before we acquired the write lock. --- Just found -> return found --- Nothing -> do --- IO $ \s1# -> --- case writeArray# buckets# idx# (fs : bucket) s1# of --- s2# -> (# s2#, () #) --- _ <- atomicFetchAddFastMut counter 1 --- return fs - --- bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString) --- bucket_match [] _ = return Nothing --- bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs --- | fs_sbs == sbs = return (Just fs) --- | otherwise = bucket_match ls sbs +mkFastStringWith + :: (Int -> FastMutInt-> IO FastString) -> ShortByteString -> IO FastString +mkFastStringWith mk_fs sbs = do + FastStringTableSegment lock _ buckets# <- readIORef segmentRef + let idx# = hashToIndex# buckets# hash# + bucket <- IO $ readArray# buckets# idx# + res <- bucket_match bucket sbs + case res of + Just found -> return found + Nothing -> do + -- The withMVar below is not dupable. It can lead to deadlock if it is + -- only run partially and putMVar is not called after takeMVar. + noDuplicate + n <- get_uid + new_fs <- mk_fs n n_zencs + withMVar lock $ \_ -> insert new_fs + where + !(FastStringTable uid n_zencs segments#) = stringTable + get_uid = atomicFetchAddFastMut uid 1 + + !(I# hash#) = hashStr sbs + (# segmentRef #) = indexArray# segments# (hashToSegment# hash#) + insert fs = do + FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef + let idx# = hashToIndex# buckets# hash# + bucket <- IO $ readArray# buckets# idx# + res <- bucket_match bucket sbs + case res of + -- The FastString was added by another thread after previous read and + -- before we acquired the write lock. + Just found -> return found + Nothing -> do + IO $ \s1# -> + case writeArray# buckets# idx# (fs : bucket) s1# of + s2# -> (# s2#, () #) + _ <- atomicFetchAddFastMut counter 1 + return fs + +bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString) +bucket_match [] _ = return Nothing +bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs + | fs_sbs == sbs = return (Just fs) + | otherwise = bucket_match ls sbs mkFastStringBytes :: Ptr Word8 -> Int -> FastString mkFastStringBytes !ptr !len = -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is -- idempotent. - unsafeDupablePerformIO $ newSBSFromPtr ptr len + unsafeDupablePerformIO $ do + sbs <- newSBSFromPtr ptr len + mkFastStringWith (mkNewFastStringShortByteString sbs) sbs newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString newSBSFromPtr (Ptr src#) (I# len#) = @@ -516,19 +515,24 @@ newSBSFromPtr (Ptr src#) (I# len#) = -- | Create a 'FastString' by copying an existing 'ByteString' mkFastStringByteString :: ByteString -> FastString -mkFastStringByteString = SBS.toShort +mkFastStringByteString bs = + let sbs = SBS.toShort bs in + inlinePerformIO $ + mkFastStringWith (mkNewFastStringShortByteString sbs) sbs -- | Create a 'FastString' from an existing 'ShortByteString' without -- copying. mkFastStringShortByteString :: ShortByteString -> FastString -mkFastStringShortByteString = id - -- inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs +mkFastStringShortByteString sbs = + inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs -- | Creates a UTF-8 encoded 'FastString' from a 'String' mkFastString :: String -> FastString {-# NOINLINE[1] mkFastString #-} mkFastString str = - inlinePerformIO $ utf8EncodeShortByteString str + inlinePerformIO $ do + sbs <- utf8EncodeShortByteString str + mkFastStringWith (mkNewFastStringShortByteString sbs) sbs -- The following rule is used to avoid polluting the non-reclaimable FastString -- table with transient strings when we only want their encoding. @@ -541,17 +545,17 @@ mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str) -- | Creates a (lazy) Z-encoded 'FastString' from a 'ShortByteString' and -- account the number of forced z-strings into the passed 'FastMutInt'. --- mkZFastString :: FastMutInt -> ShortByteString -> FastZString --- mkZFastString n_zencs sbs = unsafePerformIO $ do --- _ <- atomicFetchAddFastMut n_zencs 1 --- return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs)) - --- mkNewFastStringShortByteString :: ShortByteString -> Int --- -> FastMutInt -> IO FastString --- mkNewFastStringShortByteString sbs uid n_zencs = --- let zstr = mkZFastString n_zencs sbs --- chars <- countUTF8Chars sbs --- return (FastString uid chars sbs zstr) +mkZFastString :: FastMutInt -> ShortByteString -> FastZString +mkZFastString n_zencs sbs = unsafePerformIO $ do + _ <- atomicFetchAddFastMut n_zencs 1 + return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs)) + +mkNewFastStringShortByteString :: ShortByteString -> Int + -> FastMutInt -> IO FastString +mkNewFastStringShortByteString sbs uid n_zencs = do + let zstr = mkZFastString n_zencs sbs + chars <- countUTF8Chars sbs + return (FastString uid chars sbs zstr) hashStr :: ShortByteString -> Int -- produce a hash value between 0 & m (inclusive) @@ -580,15 +584,15 @@ hashStr sbs@(SBS.SBS ba#) = loop 0# 0# -- | Returns the length of the 'FastString' in characters lengthFS :: FastString -> Int -lengthFS = inlinePerformIO . countUTF8Chars +lengthFS fs = n_chars fs -- | Returns @True@ if the 'FastString' is empty nullFS :: FastString -> Bool -nullFS fs = SBS.null fs +nullFS fs = SBS.null $ fs_sbs fs -- | Unpacks and decodes the FastString unpackFS :: FastString -> String -unpackFS fs = utf8DecodeShortByteString fs +unpackFS fs = utf8DecodeShortByteString $ fs_sbs fs -- | Returns a Z-encoded version of a 'FastString'. This might be the -- original, if it was already Z-encoded. The first time this @@ -596,17 +600,18 @@ unpackFS fs = utf8DecodeShortByteString fs -- memoized. -- zEncodeFS :: FastString -> FastZString -zEncodeFS = mkFastZStringString . zEncodeString . utf8DecodeShortByteString +zEncodeFS fs = fs_zenc fs appendFS :: FastString -> FastString -> FastString -appendFS = (Semi.<>) +appendFS fs1 fs2 = mkFastStringShortByteString + $ (Semi.<>) (fs_sbs fs1) (fs_sbs fs2) concatFS :: [FastString] -> FastString -concatFS = mconcat +concatFS = mkFastStringShortByteString . mconcat . map fs_sbs headFS :: FastString -> Char headFS fs - | SBS.null fs = panic "headFS: Empty FastString" + | SBS.null $ fs_sbs fs = panic "headFS: Empty FastString" headFS fs = head $ unpackFS fs consFS :: Char -> FastString -> FastString @@ -619,7 +624,7 @@ unconsFS fs = (chr : str) -> Just (chr, mkFastString str) uniqueOfFS :: FastString -> Int -uniqueOfFS = hashStr +uniqueOfFS fs = uniq fs nilFS :: FastString nilFS = mkFastString "" diff --git a/compiler/GHC/Types/Unique.hs b/compiler/GHC/Types/Unique.hs index cdabceab98..f4538bf579 100644 --- a/compiler/GHC/Types/Unique.hs +++ b/compiler/GHC/Types/Unique.hs @@ -18,7 +18,6 @@ Haskell). -} {-# LANGUAGE CPP #-} -{-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE BangPatterns, MagicHash #-} module GHC.Types.Unique ( |