Revert "FastString as just a ShortByteString"

This reverts commit 13bfdade3d6aaa62b7ca9872ac586732ee1e48ce.
author: romes <rodrigo.m.mesquita@gmail.com> 2022-06-04 18:14:01 +0100
committer: romes <rodrigo.m.mesquita@gmail.com> 2022-06-04 18:14:01 +0100
commit: 43ec07e29306c39a877ef832f1ea7d560a4c8145 (patch)
tree: a25626442622c61685d72b2c655b625e8f6e6e71
parent: 13bfdade3d6aaa62b7ca9872ac586732ee1e48ce (diff)
download: haskell-43ec07e29306c39a877ef832f1ea7d560a4c8145.tar.gz
2 files changed, 155 insertions, 151 deletions
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
index e8ba8fad7b..131f174c41 100644
--- a/compiler/GHC/Data/FastString.hs
+++ b/compiler/GHC/Data/FastString.hs
@@ -1,5 +1,4 @@
 {-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE TypeSynonymInstances #-}
 {-# LANGUAGE CPP #-}
 {-# LANGUAGE DeriveDataTypeable #-}
 {-# LANGUAGE DerivingStrategies #-}
@@ -60,7 +59,7 @@ module GHC.Data.FastString
         lengthFZS,
 
         -- * FastStrings
-        FastString,     -- not abstract, for now.
+        FastString(..),     -- not abstract, for now.
         NonDetFastString (..),
         LexicalFastString (..),
 
@@ -117,7 +116,7 @@ import GHC.Prelude as Prelude
 import GHC.Utils.Encoding
 import GHC.Utils.IO.Unsafe
 import GHC.Utils.Panic.Plain
--- import GHC.Utils.Misc
+import GHC.Utils.Misc
 import GHC.Data.FastMutInt
 
 import Control.Concurrent.MVar
@@ -154,13 +153,13 @@ import GHC.IO
 -- | Gives the Modified UTF-8 encoded bytes corresponding to a 'FastString'
 bytesFS, fastStringToByteString :: FastString -> ByteString
 {-# INLINE[1] bytesFS #-}
-bytesFS f = SBS.fromShort $ f
+bytesFS f = SBS.fromShort $ fs_sbs f
 
 {-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-}
 fastStringToByteString = bytesFS
 
 fastStringToShortByteString :: FastString -> ShortByteString
-fastStringToShortByteString = id
+fastStringToShortByteString = fs_sbs
 
 fastZStringToByteString :: FastZString -> ByteString
 fastZStringToByteString (FastZString bs) = bs
@@ -169,8 +168,8 @@ fastZStringToByteString (FastZString bs) = bs
 unsafeMkByteString :: String -> ByteString
 unsafeMkByteString = BSC.pack
 
--- hashFastString :: FastString -> Int
--- hashFastString fs = hashStr $ fs
+hashFastString :: FastString -> Int
+hashFastString fs = hashStr $ fs_sbs fs
 
 -- -----------------------------------------------------------------------------
 
@@ -199,57 +198,56 @@ comparison.
 It is also associated with a lazy reference to the Z-encoding
 of this string which is used by the compiler internally.
 -}
-type FastString = ShortByteString
---data FastString = FastString {
---      uniq    :: {-# UNPACK #-} !Int, -- unique id
---      n_chars :: {-# UNPACK #-} !Int, -- number of chars
---      fs_sbs  :: {-# UNPACK #-} !ShortByteString,
---      fs_zenc :: FastZString
---      -- ^ Lazily computed Z-encoding of this string. See Note [Z-Encoding] in
---      -- GHC.Utils.Encoding.
---      --
---      -- Since 'FastString's are globally memoized this is computed at most
---      -- once for any given string.
---  }
-
--- instance Eq FastString where
---   f1 == f2  =  uniq f1 == uniq f2
+data FastString = FastString {
+      uniq    :: {-# UNPACK #-} !Int, -- unique id
+      n_chars :: {-# UNPACK #-} !Int, -- number of chars
+      fs_sbs  :: {-# UNPACK #-} !ShortByteString,
+      fs_zenc :: FastZString
+      -- ^ Lazily computed Z-encoding of this string. See Note [Z-Encoding] in
+      -- GHC.Utils.Encoding.
+      --
+      -- Since 'FastString's are globally memoized this is computed at most
+      -- once for any given string.
+  }
+
+instance Eq FastString where
+  f1 == f2  =  uniq f1 == uniq f2
 
 -- We don't provide any "Ord FastString" instance to force you to think about
 -- which ordering you want:
 --    * lexical:   deterministic,     O(n). Cf lexicalCompareFS and LexicalFastString.
 --    * by unique: non-deterministic, O(1). Cf uniqCompareFS    and NonDetFastString.
 
--- instance IsString FastString where
---     fromString = fsLit
+instance IsString FastString where
+    fromString = fsLit
 
--- instance Semi.Semigroup FastString where
---     (<>) = appendFS
+instance Semi.Semigroup FastString where
+    (<>) = appendFS
 
--- instance Monoid FastString where
---     mempty = nilFS
---     mappend = (Semi.<>)
---     mconcat = concatFS
+instance Monoid FastString where
+    mempty = nilFS
+    mappend = (Semi.<>)
+    mconcat = concatFS
 
--- instance Show FastString where
---    show fs = show (unpackFS fs)
+instance Show FastString where
+   show fs = show (unpackFS fs)
 
--- instance Data FastString where
---   -- don't traverse?
---   toConstr _   = abstractConstr "FastString"
---   gunfold _ _  = error "gunfold"
---   dataTypeOf _ = mkNoRepType "FastString"
+instance Data FastString where
+  -- don't traverse?
+  toConstr _   = abstractConstr "FastString"
+  gunfold _ _  = error "gunfold"
+  dataTypeOf _ = mkNoRepType "FastString"
 
--- instance NFData FastString where
---   rnf fs = seq fs ()
+instance NFData FastString where
+  rnf fs = seq fs ()
 
 -- | Compare FastString lexically
 --
 -- If you don't care about the lexical ordering, use `uniqCompareFS` instead.
 lexicalCompareFS :: FastString -> FastString -> Ordering
-lexicalCompareFS = compare
-  -- if uniq fs1 == uniq fs2 then EQ else
-  -- utf8CompareShortByteString (fs_sbs fs1) (fs_sbs fs2)
+lexicalCompareFS fs1 fs2 =
+  if uniq fs1 == uniq fs2 then EQ else
+  utf8CompareShortByteString (fs_sbs fs1) (fs_sbs fs2)
   -- perform a lexical comparison taking into account the Modified UTF-8
   -- encoding we use (cf #18562)
 
@@ -257,7 +255,7 @@ lexicalCompareFS = compare
 --
 -- Much cheaper than `lexicalCompareFS` but non-deterministic!
 uniqCompareFS :: FastString -> FastString -> Ordering
-uniqCompareFS = compare -- (uniq fs1) (uniq fs2)
+uniqCompareFS fs1 fs2 = compare (uniq fs1) (uniq fs2)
 
 -- | Non-deterministic FastString
 --
@@ -315,49 +313,48 @@ Following parameters are determined based on:
 * Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
   on 2018-10-24, we have 13920 entries.
 -}
-numSegments, initialNumBuckets :: Int
--- segmentBits, segmentMask,
--- segmentBits = 8
+segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
+segmentBits = 8
 numSegments = 256   -- bit segmentBits
--- segmentMask = 0xff  -- bit segmentBits - 1
+segmentMask = 0xff  -- bit segmentBits - 1
 initialNumBuckets = 64
 
--- hashToSegment# :: Int# -> Int#
--- hashToSegment# hash# = hash# `andI#` segmentMask#
---   where
---     !(I# segmentMask#) = segmentMask
-
--- hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
--- hashToIndex# buckets# hash# =
---   (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
---   where
---     !(I# segmentBits#) = segmentBits
---     size# = sizeofMutableArray# buckets#
-
--- maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
--- maybeResizeSegment segmentRef = do
---   segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
---   let oldSize# = sizeofMutableArray# old#
---       newSize# = oldSize# *# 2#
---   (I# n#) <- readFastMutInt counter
---   if isTrue# (n# <# newSize#) -- maximum load of 1
---   then return segment
---   else do
---     resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
---       case newArray# newSize# [] s1# of
---         (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
---     forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
---       fsList <- IO $ readArray# old# i#
---       forM_ fsList $ \fs -> do
---         let -- Shall we store in hash value in FastString instead?
---             !(I# hash#) = hashFastString fs
---             idx# = hashToIndex# new# hash#
---         IO $ \s1# ->
---           case readArray# new# idx# s1# of
---             (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
---               s3# -> (# s3#, () #)
---     writeIORef segmentRef resizedSegment
---     return resizedSegment
+hashToSegment# :: Int# -> Int#
+hashToSegment# hash# = hash# `andI#` segmentMask#
+  where
+    !(I# segmentMask#) = segmentMask
+
+hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
+hashToIndex# buckets# hash# =
+  (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
+  where
+    !(I# segmentBits#) = segmentBits
+    size# = sizeofMutableArray# buckets#
+
+maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
+maybeResizeSegment segmentRef = do
+  segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
+  let oldSize# = sizeofMutableArray# old#
+      newSize# = oldSize# *# 2#
+  (I# n#) <- readFastMutInt counter
+  if isTrue# (n# <# newSize#) -- maximum load of 1
+  then return segment
+  else do
+    resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
+      case newArray# newSize# [] s1# of
+        (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
+    forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
+      fsList <- IO $ readArray# old# i#
+      forM_ fsList $ \fs -> do
+        let -- Shall we store in hash value in FastString instead?
+            !(I# hash#) = hashFastString fs
+            idx# = hashToIndex# new# hash#
+        IO $ \s1# ->
+          case readArray# new# idx# s1# of
+            (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
+              s3# -> (# s3#, () #)
+    writeIORef segmentRef resizedSegment
+    return resizedSegment
 
 {-# NOINLINE stringTable #-}
 stringTable :: FastStringTable
@@ -456,55 +453,57 @@ The procedure goes like this:
    * Otherwise, insert and return the string we created.
 -}
 
--- mkFastStringWith
---     :: (Int -> FastMutInt-> IO FastString) -> ShortByteString -> IO FastString
--- mkFastStringWith mk_fs sbs = do
---   FastStringTableSegment lock _ buckets# <- readIORef segmentRef
---   let idx# = hashToIndex# buckets# hash#
---   bucket <- IO $ readArray# buckets# idx#
---   res <- bucket_match bucket sbs
---   case res of
---     Just found -> return found
---     Nothing -> do
---       -- The withMVar below is not dupable. It can lead to deadlock if it is
---       -- only run partially and putMVar is not called after takeMVar.
---       noDuplicate
---       n <- get_uid
---       new_fs <- mk_fs n n_zencs
---       withMVar lock $ \_ -> insert new_fs
---   where
---     !(FastStringTable uid n_zencs segments#) = stringTable
---     get_uid = atomicFetchAddFastMut uid 1
-
---     !(I# hash#) = hashStr sbs
---     (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
---     insert fs = do
---       FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
---       let idx# = hashToIndex# buckets# hash#
---       bucket <- IO $ readArray# buckets# idx#
---       res <- bucket_match bucket sbs
---       case res of
---         -- The FastString was added by another thread after previous read and
---         -- before we acquired the write lock.
---         Just found -> return found
---         Nothing -> do
---           IO $ \s1# ->
---             case writeArray# buckets# idx# (fs : bucket) s1# of
---               s2# -> (# s2#, () #)
---           _ <- atomicFetchAddFastMut counter 1
---           return fs
-
--- bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString)
--- bucket_match [] _ = return Nothing
--- bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs
---   | fs_sbs == sbs = return (Just fs)
---   | otherwise     =  bucket_match ls sbs
+mkFastStringWith
+    :: (Int -> FastMutInt-> IO FastString) -> ShortByteString -> IO FastString
+mkFastStringWith mk_fs sbs = do
+  FastStringTableSegment lock _ buckets# <- readIORef segmentRef
+  let idx# = hashToIndex# buckets# hash#
+  bucket <- IO $ readArray# buckets# idx#
+  res <- bucket_match bucket sbs
+  case res of
+    Just found -> return found
+    Nothing -> do
+      -- The withMVar below is not dupable. It can lead to deadlock if it is
+      -- only run partially and putMVar is not called after takeMVar.
+      noDuplicate
+      n <- get_uid
+      new_fs <- mk_fs n n_zencs
+      withMVar lock $ \_ -> insert new_fs
+  where
+    !(FastStringTable uid n_zencs segments#) = stringTable
+    get_uid = atomicFetchAddFastMut uid 1
+
+    !(I# hash#) = hashStr sbs
+    (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
+    insert fs = do
+      FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
+      let idx# = hashToIndex# buckets# hash#
+      bucket <- IO $ readArray# buckets# idx#
+      res <- bucket_match bucket sbs
+      case res of
+        -- The FastString was added by another thread after previous read and
+        -- before we acquired the write lock.
+        Just found -> return found
+        Nothing -> do
+          IO $ \s1# ->
+            case writeArray# buckets# idx# (fs : bucket) s1# of
+              s2# -> (# s2#, () #)
+          _ <- atomicFetchAddFastMut counter 1
+          return fs
+
+bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString)
+bucket_match [] _ = return Nothing
+bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs
+  | fs_sbs == sbs = return (Just fs)
+  | otherwise     =  bucket_match ls sbs
 
 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
 mkFastStringBytes !ptr !len =
     -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
     -- idempotent.
-    unsafeDupablePerformIO $ newSBSFromPtr ptr len
+    unsafeDupablePerformIO $ do
+        sbs <- newSBSFromPtr ptr len
+        mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
 
 newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString
 newSBSFromPtr (Ptr src#) (I# len#) =
@@ -516,19 +515,24 @@ newSBSFromPtr (Ptr src#) (I# len#) =
 
 -- | Create a 'FastString' by copying an existing 'ByteString'
 mkFastStringByteString :: ByteString -> FastString
-mkFastStringByteString = SBS.toShort
+mkFastStringByteString bs =
+  let sbs = SBS.toShort bs in
+  inlinePerformIO $
+      mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
 
 -- | Create a 'FastString' from an existing 'ShortByteString' without
 -- copying.
 mkFastStringShortByteString :: ShortByteString -> FastString
-mkFastStringShortByteString = id
-  -- inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
+mkFastStringShortByteString sbs =
+  inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
 
 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
 mkFastString :: String -> FastString
 {-# NOINLINE[1] mkFastString #-}
 mkFastString str =
-  inlinePerformIO $ utf8EncodeShortByteString str
+  inlinePerformIO $ do
+    sbs <- utf8EncodeShortByteString str
+    mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
 
 -- The following rule is used to avoid polluting the non-reclaimable FastString
 -- table with transient strings when we only want their encoding.
@@ -541,17 +545,17 @@ mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str)
 
 -- | Creates a (lazy) Z-encoded 'FastString' from a 'ShortByteString' and
 -- account the number of forced z-strings into the passed 'FastMutInt'.
--- mkZFastString :: FastMutInt -> ShortByteString -> FastZString
--- mkZFastString n_zencs sbs = unsafePerformIO $ do
---   _ <- atomicFetchAddFastMut n_zencs 1
---   return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))
-
--- mkNewFastStringShortByteString :: ShortByteString -> Int
---                                -> FastMutInt -> IO FastString
--- mkNewFastStringShortByteString sbs uid n_zencs =
---   let zstr = mkZFastString n_zencs sbs
---   chars <- countUTF8Chars sbs
---   return (FastString uid chars sbs zstr)
+mkZFastString :: FastMutInt -> ShortByteString -> FastZString
+mkZFastString n_zencs sbs = unsafePerformIO $ do
+  _ <- atomicFetchAddFastMut n_zencs 1
+  return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))
+
+mkNewFastStringShortByteString :: ShortByteString -> Int
+                               -> FastMutInt -> IO FastString
+mkNewFastStringShortByteString sbs uid n_zencs = do
+  let zstr = mkZFastString n_zencs sbs
+  chars <- countUTF8Chars sbs
+  return (FastString uid chars sbs zstr)
 
 hashStr  :: ShortByteString -> Int
  -- produce a hash value between 0 & m (inclusive)
@@ -580,15 +584,15 @@ hashStr sbs@(SBS.SBS ba#) = loop 0# 0#
 
 -- | Returns the length of the 'FastString' in characters
 lengthFS :: FastString -> Int
-lengthFS = inlinePerformIO . countUTF8Chars
+lengthFS fs = n_chars fs
 
 -- | Returns @True@ if the 'FastString' is empty
 nullFS :: FastString -> Bool
-nullFS fs = SBS.null fs
+nullFS fs = SBS.null $ fs_sbs fs
 
 -- | Unpacks and decodes the FastString
 unpackFS :: FastString -> String
-unpackFS fs = utf8DecodeShortByteString fs
+unpackFS fs = utf8DecodeShortByteString $ fs_sbs fs
 
 -- | Returns a Z-encoded version of a 'FastString'.  This might be the
 -- original, if it was already Z-encoded.  The first time this
@@ -596,17 +600,18 @@ unpackFS fs = utf8DecodeShortByteString fs
 -- memoized.
 --
 zEncodeFS :: FastString -> FastZString
-zEncodeFS = mkFastZStringString . zEncodeString . utf8DecodeShortByteString
+zEncodeFS fs = fs_zenc fs
 
 appendFS :: FastString -> FastString -> FastString
-appendFS = (Semi.<>)
+appendFS fs1 fs2 = mkFastStringShortByteString
+                 $ (Semi.<>) (fs_sbs fs1) (fs_sbs fs2)
 
 concatFS :: [FastString] -> FastString
-concatFS = mconcat
+concatFS = mkFastStringShortByteString . mconcat . map fs_sbs
 
 headFS :: FastString -> Char
 headFS fs
-  | SBS.null fs = panic "headFS: Empty FastString"
+  | SBS.null $ fs_sbs fs = panic "headFS: Empty FastString"
 headFS fs = head $ unpackFS fs
 
 consFS :: Char -> FastString -> FastString
@@ -619,7 +624,7 @@ unconsFS fs =
     (chr : str) -> Just (chr, mkFastString str)
 
 uniqueOfFS :: FastString -> Int
-uniqueOfFS = hashStr
+uniqueOfFS fs = uniq fs
 
 nilFS :: FastString
 nilFS = mkFastString ""
diff --git a/compiler/GHC/Types/Unique.hs b/compiler/GHC/Types/Unique.hs
index cdabceab98..f4538bf579 100644
--- a/compiler/GHC/Types/Unique.hs
+++ b/compiler/GHC/Types/Unique.hs
@@ -18,7 +18,6 @@ Haskell).
 -}
 
 {-# LANGUAGE CPP #-}
-{-# LANGUAGE TypeSynonymInstances #-}
 {-# LANGUAGE BangPatterns, MagicHash #-}
 
 module GHC.Types.Unique (
author	romes <rodrigo.m.mesquita@gmail.com>	2022-06-04 18:14:01 +0100
committer	romes <rodrigo.m.mesquita@gmail.com>	2022-06-04 18:14:01 +0100
commit	43ec07e29306c39a877ef832f1ea7d560a4c8145 (patch)
tree	a25626442622c61685d72b2c655b625e8f6e6e71
parent	13bfdade3d6aaa62b7ca9872ac586732ee1e48ce (diff)
download	haskell-43ec07e29306c39a877ef832f1ea7d560a4c8145.tar.gz