summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorromes <rodrigo.m.mesquita@gmail.com>2022-06-04 18:14:01 +0100
committerromes <rodrigo.m.mesquita@gmail.com>2022-06-04 18:14:01 +0100
commit43ec07e29306c39a877ef832f1ea7d560a4c8145 (patch)
treea25626442622c61685d72b2c655b625e8f6e6e71
parent13bfdade3d6aaa62b7ca9872ac586732ee1e48ce (diff)
downloadhaskell-43ec07e29306c39a877ef832f1ea7d560a4c8145.tar.gz
Revert "FastString as just a ShortByteString"
This reverts commit 13bfdade3d6aaa62b7ca9872ac586732ee1e48ce.
-rw-r--r--compiler/GHC/Data/FastString.hs305
-rw-r--r--compiler/GHC/Types/Unique.hs1
2 files changed, 155 insertions, 151 deletions
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
index e8ba8fad7b..131f174c41 100644
--- a/compiler/GHC/Data/FastString.hs
+++ b/compiler/GHC/Data/FastString.hs
@@ -1,5 +1,4 @@
{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DerivingStrategies #-}
@@ -60,7 +59,7 @@ module GHC.Data.FastString
lengthFZS,
-- * FastStrings
- FastString, -- not abstract, for now.
+ FastString(..), -- not abstract, for now.
NonDetFastString (..),
LexicalFastString (..),
@@ -117,7 +116,7 @@ import GHC.Prelude as Prelude
import GHC.Utils.Encoding
import GHC.Utils.IO.Unsafe
import GHC.Utils.Panic.Plain
--- import GHC.Utils.Misc
+import GHC.Utils.Misc
import GHC.Data.FastMutInt
import Control.Concurrent.MVar
@@ -154,13 +153,13 @@ import GHC.IO
-- | Gives the Modified UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS, fastStringToByteString :: FastString -> ByteString
{-# INLINE[1] bytesFS #-}
-bytesFS f = SBS.fromShort $ f
+bytesFS f = SBS.fromShort $ fs_sbs f
{-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-}
fastStringToByteString = bytesFS
fastStringToShortByteString :: FastString -> ShortByteString
-fastStringToShortByteString = id
+fastStringToShortByteString = fs_sbs
fastZStringToByteString :: FastZString -> ByteString
fastZStringToByteString (FastZString bs) = bs
@@ -169,8 +168,8 @@ fastZStringToByteString (FastZString bs) = bs
unsafeMkByteString :: String -> ByteString
unsafeMkByteString = BSC.pack
--- hashFastString :: FastString -> Int
--- hashFastString fs = hashStr $ fs
+hashFastString :: FastString -> Int
+hashFastString fs = hashStr $ fs_sbs fs
-- -----------------------------------------------------------------------------
@@ -199,57 +198,56 @@ comparison.
It is also associated with a lazy reference to the Z-encoding
of this string which is used by the compiler internally.
-}
-type FastString = ShortByteString
---data FastString = FastString {
--- uniq :: {-# UNPACK #-} !Int, -- unique id
--- n_chars :: {-# UNPACK #-} !Int, -- number of chars
--- fs_sbs :: {-# UNPACK #-} !ShortByteString,
--- fs_zenc :: FastZString
--- -- ^ Lazily computed Z-encoding of this string. See Note [Z-Encoding] in
--- -- GHC.Utils.Encoding.
--- --
--- -- Since 'FastString's are globally memoized this is computed at most
--- -- once for any given string.
--- }
-
--- instance Eq FastString where
--- f1 == f2 = uniq f1 == uniq f2
+data FastString = FastString {
+ uniq :: {-# UNPACK #-} !Int, -- unique id
+ n_chars :: {-# UNPACK #-} !Int, -- number of chars
+ fs_sbs :: {-# UNPACK #-} !ShortByteString,
+ fs_zenc :: FastZString
+ -- ^ Lazily computed Z-encoding of this string. See Note [Z-Encoding] in
+ -- GHC.Utils.Encoding.
+ --
+ -- Since 'FastString's are globally memoized this is computed at most
+ -- once for any given string.
+ }
+
+instance Eq FastString where
+ f1 == f2 = uniq f1 == uniq f2
-- We don't provide any "Ord FastString" instance to force you to think about
-- which ordering you want:
-- * lexical: deterministic, O(n). Cf lexicalCompareFS and LexicalFastString.
-- * by unique: non-deterministic, O(1). Cf uniqCompareFS and NonDetFastString.
--- instance IsString FastString where
--- fromString = fsLit
+instance IsString FastString where
+ fromString = fsLit
--- instance Semi.Semigroup FastString where
--- (<>) = appendFS
+instance Semi.Semigroup FastString where
+ (<>) = appendFS
--- instance Monoid FastString where
--- mempty = nilFS
--- mappend = (Semi.<>)
--- mconcat = concatFS
+instance Monoid FastString where
+ mempty = nilFS
+ mappend = (Semi.<>)
+ mconcat = concatFS
--- instance Show FastString where
--- show fs = show (unpackFS fs)
+instance Show FastString where
+ show fs = show (unpackFS fs)
--- instance Data FastString where
--- -- don't traverse?
--- toConstr _ = abstractConstr "FastString"
--- gunfold _ _ = error "gunfold"
--- dataTypeOf _ = mkNoRepType "FastString"
+instance Data FastString where
+ -- don't traverse?
+ toConstr _ = abstractConstr "FastString"
+ gunfold _ _ = error "gunfold"
+ dataTypeOf _ = mkNoRepType "FastString"
--- instance NFData FastString where
--- rnf fs = seq fs ()
+instance NFData FastString where
+ rnf fs = seq fs ()
-- | Compare FastString lexically
--
-- If you don't care about the lexical ordering, use `uniqCompareFS` instead.
lexicalCompareFS :: FastString -> FastString -> Ordering
-lexicalCompareFS = compare
- -- if uniq fs1 == uniq fs2 then EQ else
- -- utf8CompareShortByteString (fs_sbs fs1) (fs_sbs fs2)
+lexicalCompareFS fs1 fs2 =
+ if uniq fs1 == uniq fs2 then EQ else
+ utf8CompareShortByteString (fs_sbs fs1) (fs_sbs fs2)
-- perform a lexical comparison taking into account the Modified UTF-8
-- encoding we use (cf #18562)
@@ -257,7 +255,7 @@ lexicalCompareFS = compare
--
-- Much cheaper than `lexicalCompareFS` but non-deterministic!
uniqCompareFS :: FastString -> FastString -> Ordering
-uniqCompareFS = compare -- (uniq fs1) (uniq fs2)
+uniqCompareFS fs1 fs2 = compare (uniq fs1) (uniq fs2)
-- | Non-deterministic FastString
--
@@ -315,49 +313,48 @@ Following parameters are determined based on:
* Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
on 2018-10-24, we have 13920 entries.
-}
-numSegments, initialNumBuckets :: Int
--- segmentBits, segmentMask,
--- segmentBits = 8
+segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
+segmentBits = 8
numSegments = 256 -- bit segmentBits
--- segmentMask = 0xff -- bit segmentBits - 1
+segmentMask = 0xff -- bit segmentBits - 1
initialNumBuckets = 64
--- hashToSegment# :: Int# -> Int#
--- hashToSegment# hash# = hash# `andI#` segmentMask#
--- where
--- !(I# segmentMask#) = segmentMask
-
--- hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
--- hashToIndex# buckets# hash# =
--- (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
--- where
--- !(I# segmentBits#) = segmentBits
--- size# = sizeofMutableArray# buckets#
-
--- maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
--- maybeResizeSegment segmentRef = do
--- segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
--- let oldSize# = sizeofMutableArray# old#
--- newSize# = oldSize# *# 2#
--- (I# n#) <- readFastMutInt counter
--- if isTrue# (n# <# newSize#) -- maximum load of 1
--- then return segment
--- else do
--- resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
--- case newArray# newSize# [] s1# of
--- (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
--- forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
--- fsList <- IO $ readArray# old# i#
--- forM_ fsList $ \fs -> do
--- let -- Shall we store in hash value in FastString instead?
--- !(I# hash#) = hashFastString fs
--- idx# = hashToIndex# new# hash#
--- IO $ \s1# ->
--- case readArray# new# idx# s1# of
--- (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
--- s3# -> (# s3#, () #)
--- writeIORef segmentRef resizedSegment
--- return resizedSegment
+hashToSegment# :: Int# -> Int#
+hashToSegment# hash# = hash# `andI#` segmentMask#
+ where
+ !(I# segmentMask#) = segmentMask
+
+hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
+hashToIndex# buckets# hash# =
+ (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
+ where
+ !(I# segmentBits#) = segmentBits
+ size# = sizeofMutableArray# buckets#
+
+maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
+maybeResizeSegment segmentRef = do
+ segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
+ let oldSize# = sizeofMutableArray# old#
+ newSize# = oldSize# *# 2#
+ (I# n#) <- readFastMutInt counter
+ if isTrue# (n# <# newSize#) -- maximum load of 1
+ then return segment
+ else do
+ resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
+ case newArray# newSize# [] s1# of
+ (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
+ forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
+ fsList <- IO $ readArray# old# i#
+ forM_ fsList $ \fs -> do
+ let -- Shall we store in hash value in FastString instead?
+ !(I# hash#) = hashFastString fs
+ idx# = hashToIndex# new# hash#
+ IO $ \s1# ->
+ case readArray# new# idx# s1# of
+ (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
+ s3# -> (# s3#, () #)
+ writeIORef segmentRef resizedSegment
+ return resizedSegment
{-# NOINLINE stringTable #-}
stringTable :: FastStringTable
@@ -456,55 +453,57 @@ The procedure goes like this:
* Otherwise, insert and return the string we created.
-}
--- mkFastStringWith
--- :: (Int -> FastMutInt-> IO FastString) -> ShortByteString -> IO FastString
--- mkFastStringWith mk_fs sbs = do
--- FastStringTableSegment lock _ buckets# <- readIORef segmentRef
--- let idx# = hashToIndex# buckets# hash#
--- bucket <- IO $ readArray# buckets# idx#
--- res <- bucket_match bucket sbs
--- case res of
--- Just found -> return found
--- Nothing -> do
--- -- The withMVar below is not dupable. It can lead to deadlock if it is
--- -- only run partially and putMVar is not called after takeMVar.
--- noDuplicate
--- n <- get_uid
--- new_fs <- mk_fs n n_zencs
--- withMVar lock $ \_ -> insert new_fs
--- where
--- !(FastStringTable uid n_zencs segments#) = stringTable
--- get_uid = atomicFetchAddFastMut uid 1
-
--- !(I# hash#) = hashStr sbs
--- (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
--- insert fs = do
--- FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
--- let idx# = hashToIndex# buckets# hash#
--- bucket <- IO $ readArray# buckets# idx#
--- res <- bucket_match bucket sbs
--- case res of
--- -- The FastString was added by another thread after previous read and
--- -- before we acquired the write lock.
--- Just found -> return found
--- Nothing -> do
--- IO $ \s1# ->
--- case writeArray# buckets# idx# (fs : bucket) s1# of
--- s2# -> (# s2#, () #)
--- _ <- atomicFetchAddFastMut counter 1
--- return fs
-
--- bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString)
--- bucket_match [] _ = return Nothing
--- bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs
--- | fs_sbs == sbs = return (Just fs)
--- | otherwise = bucket_match ls sbs
+mkFastStringWith
+ :: (Int -> FastMutInt-> IO FastString) -> ShortByteString -> IO FastString
+mkFastStringWith mk_fs sbs = do
+ FastStringTableSegment lock _ buckets# <- readIORef segmentRef
+ let idx# = hashToIndex# buckets# hash#
+ bucket <- IO $ readArray# buckets# idx#
+ res <- bucket_match bucket sbs
+ case res of
+ Just found -> return found
+ Nothing -> do
+ -- The withMVar below is not dupable. It can lead to deadlock if it is
+ -- only run partially and putMVar is not called after takeMVar.
+ noDuplicate
+ n <- get_uid
+ new_fs <- mk_fs n n_zencs
+ withMVar lock $ \_ -> insert new_fs
+ where
+ !(FastStringTable uid n_zencs segments#) = stringTable
+ get_uid = atomicFetchAddFastMut uid 1
+
+ !(I# hash#) = hashStr sbs
+ (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
+ insert fs = do
+ FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
+ let idx# = hashToIndex# buckets# hash#
+ bucket <- IO $ readArray# buckets# idx#
+ res <- bucket_match bucket sbs
+ case res of
+ -- The FastString was added by another thread after previous read and
+ -- before we acquired the write lock.
+ Just found -> return found
+ Nothing -> do
+ IO $ \s1# ->
+ case writeArray# buckets# idx# (fs : bucket) s1# of
+ s2# -> (# s2#, () #)
+ _ <- atomicFetchAddFastMut counter 1
+ return fs
+
+bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString)
+bucket_match [] _ = return Nothing
+bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs
+ | fs_sbs == sbs = return (Just fs)
+ | otherwise = bucket_match ls sbs
mkFastStringBytes :: Ptr Word8 -> Int -> FastString
mkFastStringBytes !ptr !len =
-- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
-- idempotent.
- unsafeDupablePerformIO $ newSBSFromPtr ptr len
+ unsafeDupablePerformIO $ do
+ sbs <- newSBSFromPtr ptr len
+ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString
newSBSFromPtr (Ptr src#) (I# len#) =
@@ -516,19 +515,24 @@ newSBSFromPtr (Ptr src#) (I# len#) =
-- | Create a 'FastString' by copying an existing 'ByteString'
mkFastStringByteString :: ByteString -> FastString
-mkFastStringByteString = SBS.toShort
+mkFastStringByteString bs =
+ let sbs = SBS.toShort bs in
+ inlinePerformIO $
+ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
-- | Create a 'FastString' from an existing 'ShortByteString' without
-- copying.
mkFastStringShortByteString :: ShortByteString -> FastString
-mkFastStringShortByteString = id
- -- inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
+mkFastStringShortByteString sbs =
+ inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
{-# NOINLINE[1] mkFastString #-}
mkFastString str =
- inlinePerformIO $ utf8EncodeShortByteString str
+ inlinePerformIO $ do
+ sbs <- utf8EncodeShortByteString str
+ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
-- The following rule is used to avoid polluting the non-reclaimable FastString
-- table with transient strings when we only want their encoding.
@@ -541,17 +545,17 @@ mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str)
-- | Creates a (lazy) Z-encoded 'FastString' from a 'ShortByteString' and
-- account the number of forced z-strings into the passed 'FastMutInt'.
--- mkZFastString :: FastMutInt -> ShortByteString -> FastZString
--- mkZFastString n_zencs sbs = unsafePerformIO $ do
--- _ <- atomicFetchAddFastMut n_zencs 1
--- return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))
-
--- mkNewFastStringShortByteString :: ShortByteString -> Int
--- -> FastMutInt -> IO FastString
--- mkNewFastStringShortByteString sbs uid n_zencs =
--- let zstr = mkZFastString n_zencs sbs
--- chars <- countUTF8Chars sbs
--- return (FastString uid chars sbs zstr)
+mkZFastString :: FastMutInt -> ShortByteString -> FastZString
+mkZFastString n_zencs sbs = unsafePerformIO $ do
+ _ <- atomicFetchAddFastMut n_zencs 1
+ return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))
+
+mkNewFastStringShortByteString :: ShortByteString -> Int
+ -> FastMutInt -> IO FastString
+mkNewFastStringShortByteString sbs uid n_zencs = do
+ let zstr = mkZFastString n_zencs sbs
+ chars <- countUTF8Chars sbs
+ return (FastString uid chars sbs zstr)
hashStr :: ShortByteString -> Int
-- produce a hash value between 0 & m (inclusive)
@@ -580,15 +584,15 @@ hashStr sbs@(SBS.SBS ba#) = loop 0# 0#
-- | Returns the length of the 'FastString' in characters
lengthFS :: FastString -> Int
-lengthFS = inlinePerformIO . countUTF8Chars
+lengthFS fs = n_chars fs
-- | Returns @True@ if the 'FastString' is empty
nullFS :: FastString -> Bool
-nullFS fs = SBS.null fs
+nullFS fs = SBS.null $ fs_sbs fs
-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
-unpackFS fs = utf8DecodeShortByteString fs
+unpackFS fs = utf8DecodeShortByteString $ fs_sbs fs
-- | Returns a Z-encoded version of a 'FastString'. This might be the
-- original, if it was already Z-encoded. The first time this
@@ -596,17 +600,18 @@ unpackFS fs = utf8DecodeShortByteString fs
-- memoized.
--
zEncodeFS :: FastString -> FastZString
-zEncodeFS = mkFastZStringString . zEncodeString . utf8DecodeShortByteString
+zEncodeFS fs = fs_zenc fs
appendFS :: FastString -> FastString -> FastString
-appendFS = (Semi.<>)
+appendFS fs1 fs2 = mkFastStringShortByteString
+ $ (Semi.<>) (fs_sbs fs1) (fs_sbs fs2)
concatFS :: [FastString] -> FastString
-concatFS = mconcat
+concatFS = mkFastStringShortByteString . mconcat . map fs_sbs
headFS :: FastString -> Char
headFS fs
- | SBS.null fs = panic "headFS: Empty FastString"
+ | SBS.null $ fs_sbs fs = panic "headFS: Empty FastString"
headFS fs = head $ unpackFS fs
consFS :: Char -> FastString -> FastString
@@ -619,7 +624,7 @@ unconsFS fs =
(chr : str) -> Just (chr, mkFastString str)
uniqueOfFS :: FastString -> Int
-uniqueOfFS = hashStr
+uniqueOfFS fs = uniq fs
nilFS :: FastString
nilFS = mkFastString ""
diff --git a/compiler/GHC/Types/Unique.hs b/compiler/GHC/Types/Unique.hs
index cdabceab98..f4538bf579 100644
--- a/compiler/GHC/Types/Unique.hs
+++ b/compiler/GHC/Types/Unique.hs
@@ -18,7 +18,6 @@ Haskell).
-}
{-# LANGUAGE CPP #-}
-{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE BangPatterns, MagicHash #-}
module GHC.Types.Unique (