summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gröber <dxld@darkboxed.org>2019-09-26 20:57:11 +0200
committerBen Gamari <ben@smart-cactus.org>2020-03-18 14:59:58 +0000
commitbc780f9aa151fa6ef64b30e17b77c4f2100312b2 (patch)
treeed06037aab5f076a768c6e544170442b57daec88
parent7c1fbec72f7571b9f41703bfc1905ffd028c002c (diff)
downloadhaskell-bc780f9aa151fa6ef64b30e17b77c4f2100312b2.tar.gz
Use ShortByteString for FastString
There are multiple reasons we want this: - Fewer allocations: ByteString has 3 fields, ShortByteString just has one. - ByteString memory is pinned: - This can cause fragmentation issues (see for example #13110) but also - makes using FastStrings in compact regions impossible. Metric Decrease: T5837 T12150 T12234 T12425
-rw-r--r--compiler/GHC/Core/DataCon.hs9
-rw-r--r--compiler/GHC/Core/SimpleOpt.hs2
-rw-r--r--compiler/utils/Encoding.hs171
-rw-r--r--compiler/utils/FastString.hs196
-rw-r--r--compiler/utils/StringBuffer.hs2
5 files changed, 192 insertions, 188 deletions
diff --git a/compiler/GHC/Core/DataCon.hs b/compiler/GHC/Core/DataCon.hs
index 5b3501b3a9..087ca2349e 100644
--- a/compiler/GHC/Core/DataCon.hs
+++ b/compiler/GHC/Core/DataCon.hs
@@ -1342,11 +1342,14 @@ dataConRepArgTys (MkData { dcRep = rep
dataConIdentity :: DataCon -> ByteString
-- We want this string to be UTF-8, so we get the bytes directly from the FastStrings.
dataConIdentity dc = LBS.toStrict $ BSB.toLazyByteString $ mconcat
- [ BSB.byteString $ bytesFS (unitIdFS (moduleUnitId mod))
+ [ BSB.shortByteString $ fastStringToShortByteString $
+ unitIdFS $ moduleUnitId mod
, BSB.int8 $ fromIntegral (ord ':')
- , BSB.byteString $ bytesFS (moduleNameFS (moduleName mod))
+ , BSB.shortByteString $ fastStringToShortByteString $
+ moduleNameFS $ moduleName mod
, BSB.int8 $ fromIntegral (ord '.')
- , BSB.byteString $ bytesFS (occNameFS (nameOccName name))
+ , BSB.shortByteString $ fastStringToShortByteString $
+ occNameFS $ nameOccName name
]
where name = dataConName dc
mod = ASSERT( isExternalName name ) nameModule name
diff --git a/compiler/GHC/Core/SimpleOpt.hs b/compiler/GHC/Core/SimpleOpt.hs
index 829e746498..6378f4ad42 100644
--- a/compiler/GHC/Core/SimpleOpt.hs
+++ b/compiler/GHC/Core/SimpleOpt.hs
@@ -1113,7 +1113,7 @@ dealWithStringLiteral fun str co
= let strFS = mkFastStringByteString str
char = mkConApp charDataCon [mkCharLit (headFS strFS)]
- charTail = bytesFS (tailFS strFS)
+ charTail = BS.tail (bytesFS strFS)
-- In singleton strings, just add [] instead of unpackCstring# ""#.
rest = if BS.null charTail
diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs
index b4af68621d..63524e1d15 100644
--- a/compiler/utils/Encoding.hs
+++ b/compiler/utils/Encoding.hs
@@ -13,14 +13,16 @@
module Encoding (
-- * UTF-8
- utf8DecodeChar#,
+ utf8DecodeCharAddr#,
utf8PrevChar,
utf8CharStart,
utf8DecodeChar,
utf8DecodeByteString,
+ utf8DecodeShortByteString,
utf8DecodeStringLazy,
utf8EncodeChar,
utf8EncodeString,
+ utf8EncodeShortByteString,
utf8EncodedLength,
countUTF8Chars,
@@ -36,14 +38,15 @@ module Encoding (
import GhcPrelude
import Foreign
-import Foreign.ForeignPtr.Unsafe
import Data.Char
import qualified Data.Char as Char
import Numeric
import GHC.IO
+import GHC.ST
import Data.ByteString (ByteString)
import qualified Data.ByteString.Internal as BS
+import Data.ByteString.Short.Internal (ShortByteString(..))
import GHC.Exts
@@ -60,23 +63,23 @@ import GHC.Exts
-- before decoding them (see StringBuffer.hs).
{-# INLINE utf8DecodeChar# #-}
-utf8DecodeChar# :: Addr# -> (# Char#, Int# #)
-utf8DecodeChar# a# =
- let !ch0 = word2Int# (indexWord8OffAddr# a# 0#) in
+utf8DecodeChar# :: (Int# -> Word#) -> (# Char#, Int# #)
+utf8DecodeChar# indexWord8# =
+ let !ch0 = word2Int# (indexWord8# 0#) in
case () of
_ | isTrue# (ch0 <=# 0x7F#) -> (# chr# ch0, 1# #)
| isTrue# ((ch0 >=# 0xC0#) `andI#` (ch0 <=# 0xDF#)) ->
- let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+ let !ch1 = word2Int# (indexWord8# 1#) in
if isTrue# ((ch1 <# 0x80#) `orI#` (ch1 >=# 0xC0#)) then fail 1# else
(# chr# (((ch0 -# 0xC0#) `uncheckedIShiftL#` 6#) +#
(ch1 -# 0x80#)),
2# #)
| isTrue# ((ch0 >=# 0xE0#) `andI#` (ch0 <=# 0xEF#)) ->
- let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+ let !ch1 = word2Int# (indexWord8# 1#) in
if isTrue# ((ch1 <# 0x80#) `orI#` (ch1 >=# 0xC0#)) then fail 1# else
- let !ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+ let !ch2 = word2Int# (indexWord8# 2#) in
if isTrue# ((ch2 <# 0x80#) `orI#` (ch2 >=# 0xC0#)) then fail 2# else
(# chr# (((ch0 -# 0xE0#) `uncheckedIShiftL#` 12#) +#
((ch1 -# 0x80#) `uncheckedIShiftL#` 6#) +#
@@ -84,11 +87,11 @@ utf8DecodeChar# a# =
3# #)
| isTrue# ((ch0 >=# 0xF0#) `andI#` (ch0 <=# 0xF8#)) ->
- let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+ let !ch1 = word2Int# (indexWord8# 1#) in
if isTrue# ((ch1 <# 0x80#) `orI#` (ch1 >=# 0xC0#)) then fail 1# else
- let !ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+ let !ch2 = word2Int# (indexWord8# 2#) in
if isTrue# ((ch2 <# 0x80#) `orI#` (ch2 >=# 0xC0#)) then fail 2# else
- let !ch3 = word2Int# (indexWord8OffAddr# a# 3#) in
+ let !ch3 = word2Int# (indexWord8# 3#) in
if isTrue# ((ch3 <# 0x80#) `orI#` (ch3 >=# 0xC0#)) then fail 3# else
(# chr# (((ch0 -# 0xF0#) `uncheckedIShiftL#` 18#) +#
((ch1 -# 0x80#) `uncheckedIShiftL#` 12#) +#
@@ -106,9 +109,18 @@ utf8DecodeChar# a# =
-- confusing parse error later on. Instead we use '\0' which
-- will signal a lexer error immediately.
+utf8DecodeCharAddr# :: Addr# -> (# Char#, Int# #)
+utf8DecodeCharAddr# a# =
+ utf8DecodeChar# (indexWord8OffAddr# a#)
+
+utf8DecodeCharByteArray# :: ByteArray# -> Int# -> (# Char#, Int# #)
+utf8DecodeCharByteArray# ba# off# =
+ utf8DecodeChar# (\i# -> indexWord8Array# ba# (i# +# off#))
+
utf8DecodeChar :: Ptr Word8 -> (Char, Int)
-utf8DecodeChar (Ptr a#) =
- case utf8DecodeChar# a# of (# c#, nBytes# #) -> ( C# c#, I# nBytes# )
+utf8DecodeChar !(Ptr a#) =
+ case utf8DecodeCharAddr# a# of
+ (# c#, nBytes# #) -> ( C# c#, I# nBytes# )
-- UTF-8 is cleverly designed so that we can always figure out where
-- the start of the current character is, given any position in a
@@ -124,73 +136,100 @@ utf8CharStart p = go p
then go (p `plusPtr` (-1))
else return p
-utf8DecodeByteString :: ByteString -> [Char]
-utf8DecodeByteString (BS.PS ptr offset len)
- = utf8DecodeStringLazy ptr offset len
-
-utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
-utf8DecodeStringLazy fptr offset len
- = unsafeDupablePerformIO $ unpack start
+{-# INLINE utf8DecodeLazy# #-}
+utf8DecodeLazy# :: (IO ()) -> (Int# -> Word#) -> Int# -> IO [Char]
+utf8DecodeLazy# retain indexWord8# len#
+ = unpack 0#
where
- !start = unsafeForeignPtrToPtr fptr `plusPtr` offset
- !end = start `plusPtr` len
-
- unpack p
- | p >= end = touchForeignPtr fptr >> return []
+ unpack i#
+ | isTrue# (i# >=# len#) = retain >> return []
| otherwise =
- case utf8DecodeChar# (unPtr p) of
- (# c#, nBytes# #) -> do
- rest <- unsafeDupableInterleaveIO $ unpack (p `plusPtr#` nBytes#)
- return (C# c# : rest)
-
-countUTF8Chars :: Ptr Word8 -> Int -> IO Int
-countUTF8Chars ptr len = go ptr 0
- where
- !end = ptr `plusPtr` len
+ case utf8DecodeChar# (\j# -> indexWord8# (i# +# j#)) of
+ (# c#, nBytes# #) -> do
+ rest <- unsafeDupableInterleaveIO $ unpack (i# +# nBytes#)
+ return (C# c# : rest)
- go p !n
- | p >= end = return n
- | otherwise = do
- case utf8DecodeChar# (unPtr p) of
- (# _, nBytes# #) -> go (p `plusPtr#` nBytes#) (n+1)
-
-unPtr :: Ptr a -> Addr#
-unPtr (Ptr a) = a
-
-plusPtr# :: Ptr a -> Int# -> Ptr a
-plusPtr# ptr nBytes# = ptr `plusPtr` (I# nBytes#)
+utf8DecodeByteString :: ByteString -> [Char]
+utf8DecodeByteString (BS.PS fptr offset len)
+ = utf8DecodeStringLazy fptr offset len
-utf8EncodeChar :: Char -> Ptr Word8 -> IO (Ptr Word8)
-utf8EncodeChar c ptr =
+utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
+utf8DecodeStringLazy fp offset (I# len#)
+ = unsafeDupablePerformIO $ withForeignPtr fp $ \ptr ->
+ let !(Ptr a#) = ptr `plusPtr` offset
+ index# = indexWord8OffAddr# a# in
+ utf8DecodeLazy# (touchForeignPtr fp) index# len#
+
+utf8DecodeShortByteString :: ShortByteString -> [Char]
+utf8DecodeShortByteString (SBS ba#)
+ = unsafeDupablePerformIO $
+ let index# = indexWord8Array# ba#
+ len# = sizeofByteArray# ba# in
+ utf8DecodeLazy# (return ()) index# len#
+
+countUTF8Chars :: ShortByteString -> IO Int
+countUTF8Chars (SBS ba) = go 0# 0#
+ where
+ len# = sizeofByteArray# ba
+ go i# n#
+ | isTrue# (i# >=# len#) =
+ return (I# n#)
+ | otherwise = do
+ case utf8DecodeCharByteArray# ba i# of
+ (# _, nBytes# #) -> go (i# +# nBytes#) (n# +# 1#)
+
+{-# INLINE utf8EncodeChar #-}
+utf8EncodeChar :: (Int# -> Word# -> State# s -> State# s)
+ -> Char -> ST s Int
+utf8EncodeChar write# c =
let x = ord c in
case () of
_ | x > 0 && x <= 0x007f -> do
- poke ptr (fromIntegral x)
- return (ptr `plusPtr` 1)
+ write 0 x
+ return 1
-- NB. '\0' is encoded as '\xC0\x80', not '\0'. This is so that we
-- can have 0-terminated UTF-8 strings (see GHC.Base.unpackCStringUtf8).
| x <= 0x07ff -> do
- poke ptr (fromIntegral (0xC0 .|. ((x `shiftR` 6) .&. 0x1F)))
- pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x .&. 0x3F)))
- return (ptr `plusPtr` 2)
+ write 0 (0xC0 .|. ((x `shiftR` 6) .&. 0x1F))
+ write 1 (0x80 .|. (x .&. 0x3F))
+ return 2
| x <= 0xffff -> do
- poke ptr (fromIntegral (0xE0 .|. (x `shiftR` 12) .&. 0x0F))
- pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x `shiftR` 6) .&. 0x3F))
- pokeElemOff ptr 2 (fromIntegral (0x80 .|. (x .&. 0x3F)))
- return (ptr `plusPtr` 3)
+ write 0 (0xE0 .|. (x `shiftR` 12) .&. 0x0F)
+ write 1 (0x80 .|. (x `shiftR` 6) .&. 0x3F)
+ write 2 (0x80 .|. (x .&. 0x3F))
+ return 3
| otherwise -> do
- poke ptr (fromIntegral (0xF0 .|. (x `shiftR` 18)))
- pokeElemOff ptr 1 (fromIntegral (0x80 .|. ((x `shiftR` 12) .&. 0x3F)))
- pokeElemOff ptr 2 (fromIntegral (0x80 .|. ((x `shiftR` 6) .&. 0x3F)))
- pokeElemOff ptr 3 (fromIntegral (0x80 .|. (x .&. 0x3F)))
- return (ptr `plusPtr` 4)
+ write 0 (0xF0 .|. (x `shiftR` 18))
+ write 1 (0x80 .|. ((x `shiftR` 12) .&. 0x3F))
+ write 2 (0x80 .|. ((x `shiftR` 6) .&. 0x3F))
+ write 3 (0x80 .|. (x .&. 0x3F))
+ return 4
+ where
+ {-# INLINE write #-}
+ write (I# off#) (I# c#) = ST $ \s ->
+ case write# off# (int2Word# c#) s of
+ s -> (# s, () #)
utf8EncodeString :: Ptr Word8 -> String -> IO ()
-utf8EncodeString ptr str = go ptr str
- where go !_ [] = return ()
- go ptr (c:cs) = do
- ptr' <- utf8EncodeChar c ptr
- go ptr' cs
+utf8EncodeString (Ptr a#) str = go a# str
+ where go !_ [] = return ()
+ go a# (c:cs) = do
+ I# off# <- stToIO $ utf8EncodeChar (writeWord8OffAddr# a#) c
+ go (a# `plusAddr#` off#) cs
+
+utf8EncodeShortByteString :: String -> IO ShortByteString
+utf8EncodeShortByteString str = stToIO $ ST $ \s ->
+ let !(I# len#) = utf8EncodedLength str in
+ case newByteArray# len# s of { (# s, mba# #) ->
+ let ST f_go = go mba# 0# str in
+ case f_go s of { (# s, () #) ->
+ case unsafeFreezeByteArray# mba# s of { (# s, ba# #) ->
+ (# s, SBS ba# #) }}}
+ where
+ go _ _ [] = return ()
+ go mba# i# (c:cs) = do
+ I# off# <- utf8EncodeChar (\j# -> writeWord8Array# mba# (i# +# j#)) c
+ go mba# (i# +# off#) cs
utf8EncodedLength :: String -> Int
utf8EncodedLength str = go 0 str
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index f3423ecf1c..2d39a698b3 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -32,12 +32,16 @@
module FastString
(
-- * ByteString
- bytesFS, -- :: FastString -> ByteString
- fastStringToByteString, -- = bytesFS (kept for haddock)
+ bytesFS,
+ fastStringToByteString,
mkFastStringByteString,
fastZStringToByteString,
unsafeMkByteString,
+ -- * ShortByteString
+ fastStringToShortByteString,
+ mkFastStringShortByteString,
+
-- * FastZString
FastZString,
hPutFZS,
@@ -52,7 +56,6 @@ module FastString
mkFastString,
mkFastStringBytes,
mkFastStringByteList,
- mkFastStringForeignPtr,
mkFastString#,
-- ** Deconstruction
@@ -67,7 +70,6 @@ module FastString
nullFS,
appendFS,
headFS,
- tailFS,
concatFS,
consFS,
nilFS,
@@ -108,10 +110,12 @@ import Control.Concurrent.MVar
import Control.DeepSeq
import Control.Monad
import Data.ByteString (ByteString)
+import Data.ByteString.Short (ShortByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
-import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Unsafe as BS
+import qualified Data.ByteString.Short as SBS
+import qualified Data.ByteString.Short.Internal as SBS
import Foreign.C
import GHC.Exts
import System.IO
@@ -121,6 +125,7 @@ import Data.Char
import Data.Semigroup as Semi
import GHC.IO
+import GHC.ST
import Foreign
@@ -132,12 +137,14 @@ import GHC.Base ( unpackCString#, unpackNBytes# )
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
-bytesFS :: FastString -> ByteString
-bytesFS f = fs_bs f
+bytesFS, fastStringToByteString :: FastString -> ByteString
+bytesFS = fastStringToByteString
{-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-}
-fastStringToByteString :: FastString -> ByteString
-fastStringToByteString = bytesFS
+fastStringToByteString f = SBS.fromShort $ fs_sbs f
+
+fastStringToShortByteString :: FastString -> ShortByteString
+fastStringToShortByteString = fs_sbs
fastZStringToByteString :: FastZString -> ByteString
fastZStringToByteString (FastZString bs) = bs
@@ -147,9 +154,7 @@ unsafeMkByteString :: String -> ByteString
unsafeMkByteString = BSC.pack
hashFastString :: FastString -> Int
-hashFastString (FastString _ bs _)
- = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
- return $ hashStr (castPtr ptr) len
+hashFastString (FastString _ sbs _) = hashStr sbs
-- -----------------------------------------------------------------------------
@@ -180,7 +185,7 @@ of this string which is used by the compiler internally.
-}
data FastString = FastString {
uniq :: {-# UNPACK #-} !Int, -- unique id
- fs_bs :: {-# UNPACK #-} !ByteString,
+ fs_sbs :: {-# UNPACK #-} !ShortByteString,
fs_zenc :: FastZString
-- ^ Lazily computed z-encoding of this string.
--
@@ -227,12 +232,9 @@ instance NFData FastString where
rnf fs = seq fs ()
cmpFS :: FastString -> FastString -> Ordering
-cmpFS f1@(FastString u1 _ _) f2@(FastString u2 _ _) =
+cmpFS (FastString u1 sbs1 _) (FastString u2 sbs2 _) =
if u1 == u2 then EQ else
- compare (bytesFS f1) (bytesFS f2)
-
-foreign import ccall unsafe "memcmp"
- memcmp :: Ptr a -> Ptr b -> Int -> IO Int
+ compare sbs1 sbs2
-- -----------------------------------------------------------------------------
-- Construction
@@ -403,12 +405,12 @@ The procedure goes like this:
-}
mkFastStringWith
- :: (Int -> IORef Int-> IO FastString) -> Ptr Word8 -> Int -> IO FastString
-mkFastStringWith mk_fs !ptr !len = do
+ :: (Int -> IORef Int-> IO FastString) -> ShortByteString -> IO FastString
+mkFastStringWith mk_fs sbs = do
FastStringTableSegment lock _ buckets# <- readIORef segmentRef
let idx# = hashToIndex# buckets# hash#
bucket <- IO $ readArray# buckets# idx#
- res <- bucket_match bucket len ptr
+ res <- bucket_match bucket sbs
case res of
Just found -> return found
Nothing -> do
@@ -422,13 +424,13 @@ mkFastStringWith mk_fs !ptr !len = do
!(FastStringTable uid n_zencs segments#) = stringTable
get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
- !(I# hash#) = hashStr ptr len
+ !(I# hash#) = hashStr sbs
(# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
insert fs = do
FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
let idx# = hashToIndex# buckets# hash#
bucket <- IO $ readArray# buckets# idx#
- res <- bucket_match bucket len ptr
+ res <- bucket_match bucket sbs
case res of
-- The FastString was added by another thread after previous read and
-- before we acquired the write lock.
@@ -440,102 +442,74 @@ mkFastStringWith mk_fs !ptr !len = do
modifyIORef' counter succ
return fs
-bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
-bucket_match [] _ _ = return Nothing
-bucket_match (v@(FastString _ bs _):ls) len ptr
- | len == BS.length bs = do
- b <- BS.unsafeUseAsCString bs $ \buf ->
- cmpStringPrefix ptr (castPtr buf) len
- if b then return (Just v)
- else bucket_match ls len ptr
- | otherwise =
- bucket_match ls len ptr
+bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString)
+bucket_match [] _ = return Nothing
+bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs
+ | fs_sbs == sbs = return (Just fs)
+ | otherwise = bucket_match ls sbs
mkFastStringBytes :: Ptr Word8 -> Int -> FastString
mkFastStringBytes !ptr !len =
-- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
-- idempotent.
- unsafeDupablePerformIO $
- mkFastStringWith (copyNewFastString ptr len) ptr len
-
--- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
--- between this and 'mkFastStringBytes' is that we don't have to copy
--- the bytes if the string is new to the table.
-mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
-mkFastStringForeignPtr ptr !fp len
- = mkFastStringWith (mkNewFastString fp len) ptr len
-
--- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
--- between this and 'mkFastStringBytes' is that we don't have to copy
--- the bytes if the string is new to the table.
+ unsafeDupablePerformIO $ do
+ sbs <- newSBSFromPtr ptr len
+ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
+
+newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString
+newSBSFromPtr (Ptr src#) (I# len#) =
+ stToIO $ ST $ \s ->
+ case newByteArray# len# s of { (# s, dst# #) ->
+ case copyAddrToByteArray# src# dst# 0# len# s of { s ->
+ case unsafeFreezeByteArray# dst# s of { (# s, ba# #) ->
+ (# s, SBS.SBS ba# #) }}}
+
+-- | Create a 'FastString' by copying an existing 'ByteString'
mkFastStringByteString :: ByteString -> FastString
mkFastStringByteString bs =
- inlinePerformIO $
- BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
- let ptr' = castPtr ptr
- mkFastStringWith (mkNewFastStringByteString bs) ptr' len
+ let sbs = SBS.toShort bs in
+ inlinePerformIO $
+ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
+
+-- | Create a 'FastString' from an existing 'ShortByteString' without
+-- copying.
+mkFastStringShortByteString :: ShortByteString -> FastString
+mkFastStringShortByteString sbs =
+ inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
mkFastString str =
inlinePerformIO $ do
- let l = utf8EncodedLength str
- buf <- mallocForeignPtrBytes l
- withForeignPtr buf $ \ptr -> do
- utf8EncodeString ptr str
- mkFastStringForeignPtr ptr buf l
+ sbs <- utf8EncodeShortByteString str
+ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
-- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
mkFastStringByteList :: [Word8] -> FastString
-mkFastStringByteList str = mkFastStringByteString (BS.pack str)
+mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str)
--- | Creates a (lazy) Z-encoded 'FastString' from a 'String' and account
--- the number of forced z-strings into the passed 'IORef'.
-mkZFastString :: IORef Int -> ByteString -> FastZString
-mkZFastString n_zencs bs = unsafePerformIO $ do
+-- | Creates a (lazy) Z-encoded 'FastString' from a 'ShortByteString' and
+-- account the number of forced z-strings into the passed 'IORef'.
+mkZFastString :: IORef Int -> ShortByteString -> FastZString
+mkZFastString n_zencs sbs = unsafePerformIO $ do
atomicModifyIORef' n_zencs $ \n -> (n+1, ())
- return $ mkFastZStringString (zEncodeString (utf8DecodeByteString bs))
-
-mkNewFastString :: ForeignPtr Word8 -> Int -> Int
- -> IORef Int -> IO FastString
-mkNewFastString fp len uid n_zencs = do
- let bs = BS.fromForeignPtr fp 0 len
- zstr = mkZFastString n_zencs bs
- return (FastString uid bs zstr)
-
-mkNewFastStringByteString :: ByteString -> Int
- -> IORef Int -> IO FastString
-mkNewFastStringByteString bs uid n_zencs = do
- let zstr = mkZFastString n_zencs bs
- return (FastString uid bs zstr)
-
-copyNewFastString :: Ptr Word8 -> Int -> Int -> IORef Int -> IO FastString
-copyNewFastString ptr len uid n_zencs = do
- fp <- copyBytesToForeignPtr ptr len
- let bs = BS.fromForeignPtr fp 0 len
- zstr = mkZFastString n_zencs bs
- return (FastString uid bs zstr)
-
-copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
-copyBytesToForeignPtr ptr len = do
- fp <- mallocForeignPtrBytes len
- withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
- return fp
-
-cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
-cmpStringPrefix ptr1 ptr2 len =
- do r <- memcmp ptr1 ptr2 len
- return (r == 0)
-
-
-hashStr :: Ptr Word8 -> Int -> Int
- -- use the Addr to produce a hash value between 0 & m (inclusive)
-hashStr (Ptr a#) (I# len#) = loop 0# 0#
+ return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))
+
+mkNewFastStringShortByteString :: ShortByteString -> Int
+ -> IORef Int -> IO FastString
+mkNewFastStringShortByteString sbs uid n_zencs = do
+ let zstr = mkZFastString n_zencs sbs
+ return (FastString uid sbs zstr)
+
+hashStr :: ShortByteString -> Int
+ -- produce a hash value between 0 & m (inclusive)
+hashStr sbs@(SBS.SBS ba#) = loop 0# 0#
where
+ !(I# len#) = SBS.length sbs
loop h n | isTrue# (n ==# len#) = I# h
| otherwise = loop h2 (n +# 1#)
where
- !c = ord# (indexCharOffAddr# a# n)
+ !c = indexInt8Array# ba# n
!h2 = (h *# 16777619#) `xorI#` c
-- -----------------------------------------------------------------------------
@@ -543,17 +517,15 @@ hashStr (Ptr a#) (I# len#) = loop 0# 0#
-- | Returns the length of the 'FastString' in characters
lengthFS :: FastString -> Int
-lengthFS (FastString _uid bs _zstr) = inlinePerformIO $
- let (fp, off, len) = BS.toForeignPtr bs in
- withForeignPtr fp $ \ptr -> countUTF8Chars (ptr `plusPtr` off) len
+lengthFS (FastString _uid sbs _zstr) = inlinePerformIO $ countUTF8Chars sbs
-- | Returns @True@ if the 'FastString' is empty
nullFS :: FastString -> Bool
-nullFS f = BS.null (fs_bs f)
+nullFS f = SBS.null (fs_sbs f)
-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
-unpackFS (FastString _ bs _) = utf8DecodeByteString bs
+unpackFS (FastString _ sbs _) = utf8DecodeShortByteString sbs
-- | Returns a Z-encoded version of a 'FastString'. This might be the
-- original, if it was already Z-encoded. The first time this
@@ -568,22 +540,12 @@ appendFS fs1 fs2 = mkFastStringByteString
$ BS.append (bytesFS fs1) (bytesFS fs2)
concatFS :: [FastString] -> FastString
-concatFS = mkFastStringByteString . BS.concat . map fs_bs
+concatFS = mkFastStringShortByteString . mconcat . map fs_sbs
headFS :: FastString -> Char
-headFS (FastString _ bs _)
- | BS.null bs = panic "headFS: Empty FastString"
-headFS (FastString _ bs _) =
- inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
- return (fst (utf8DecodeChar (castPtr ptr)))
-
-tailFS :: FastString -> FastString
-tailFS (FastString _ bs _)
- | BS.null bs = panic "tailFS: Empty FastString"
-tailFS (FastString _ bs _) =
- inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
- do let (_, n) = utf8DecodeChar (castPtr ptr)
- return $! mkFastStringByteString (BS.drop n bs)
+headFS (FastString _ sbs _)
+ | SBS.null sbs = panic "headFS: Empty FastString"
+headFS fs = head $ unpackFS fs
consFS :: Char -> FastString -> FastString
consFS c fs = mkFastString (c : unpackFS fs)
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index 91377cad17..2cdf56af93 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -200,7 +200,7 @@ nextChar (StringBuffer buf len (I# cur#)) =
-- Getting our fingers dirty a little here, but this is performance-critical
inlinePerformIO $ do
withForeignPtr buf $ \(Ptr a#) -> do
- case utf8DecodeChar# (a# `plusAddr#` cur#) of
+ case utf8DecodeCharAddr# (a# `plusAddr#` cur#) of
(# c#, nBytes# #) ->
let cur' = I# (cur# +# nBytes#) in
return (C# c#, StringBuffer buf len cur')