diff options
author | Daniel Gröber <dxld@darkboxed.org> | 2019-09-26 20:57:11 +0200 |
---|---|---|
committer | Matthew Pickering <matthewtpickering@gmail.com> | 2020-02-11 10:13:46 +0000 |
commit | 0bac745dc5fe700ca15089a3438e8df15889e8ed (patch) | |
tree | e3351522eb0351843e30938e34de13f92e058b0e | |
parent | 63c3418685ad894266e80c1061e2afb92f993996 (diff) | |
download | haskell-0bac745dc5fe700ca15089a3438e8df15889e8ed.tar.gz |
Use ShortByteString for FastString
There are multiple reasons we want this:
- Fewer allocations: ByteString has 3 fields, ShortByteString just has one.
- ByteString memory is pinned:
- This can cause fragmentation issues (see for example #13110) but also
- makes using FastStrings in compact regions impossible.
-rw-r--r-- | compiler/basicTypes/DataCon.hs | 9 | ||||
-rw-r--r-- | compiler/coreSyn/CoreOpt.hs | 2 | ||||
-rw-r--r-- | compiler/utils/Encoding.hs | 148 | ||||
-rw-r--r-- | compiler/utils/FastString.hs | 192 | ||||
-rw-r--r-- | compiler/utils/StringBuffer.hs | 2 |
5 files changed, 178 insertions, 175 deletions
diff --git a/compiler/basicTypes/DataCon.hs b/compiler/basicTypes/DataCon.hs index fcc5fcfed0..b05fa759c4 100644 --- a/compiler/basicTypes/DataCon.hs +++ b/compiler/basicTypes/DataCon.hs @@ -1343,11 +1343,14 @@ dataConRepArgTys (MkData { dcRep = rep dataConIdentity :: DataCon -> ByteString -- We want this string to be UTF-8, so we get the bytes directly from the FastStrings. dataConIdentity dc = LBS.toStrict $ BSB.toLazyByteString $ mconcat - [ BSB.byteString $ bytesFS (unitIdFS (moduleUnitId mod)) + [ BSB.shortByteString $ fastStringToShortByteString $ + unitIdFS $ moduleUnitId mod , BSB.int8 $ fromIntegral (ord ':') - , BSB.byteString $ bytesFS (moduleNameFS (moduleName mod)) + , BSB.shortByteString $ fastStringToShortByteString $ + moduleNameFS $ moduleName mod , BSB.int8 $ fromIntegral (ord '.') - , BSB.byteString $ bytesFS (occNameFS (nameOccName name)) + , BSB.shortByteString $ fastStringToShortByteString $ + occNameFS $ nameOccName name ] where name = dataConName dc mod = ASSERT( isExternalName name ) nameModule name diff --git a/compiler/coreSyn/CoreOpt.hs b/compiler/coreSyn/CoreOpt.hs index c516799bef..8734cdf8eb 100644 --- a/compiler/coreSyn/CoreOpt.hs +++ b/compiler/coreSyn/CoreOpt.hs @@ -1113,7 +1113,7 @@ dealWithStringLiteral fun str co = let strFS = mkFastStringByteString str char = mkConApp charDataCon [mkCharLit (headFS strFS)] - charTail = bytesFS (tailFS strFS) + charTail = BS.tail (bytesFS strFS) -- In singleton strings, just add [] instead of unpackCstring# ""#. rest = if BS.null charTail diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs index b4af68621d..13a2739be1 100644 --- a/compiler/utils/Encoding.hs +++ b/compiler/utils/Encoding.hs @@ -18,9 +18,11 @@ module Encoding ( utf8CharStart, utf8DecodeChar, utf8DecodeByteString, + utf8DecodeShortByteString, utf8DecodeStringLazy, utf8EncodeChar, utf8EncodeString, + utf8EncodeShortByteString, utf8EncodedLength, countUTF8Chars, @@ -36,14 +38,15 @@ module Encoding ( import GhcPrelude import Foreign -import Foreign.ForeignPtr.Unsafe import Data.Char import qualified Data.Char as Char import Numeric import GHC.IO +import GHC.ST import Data.ByteString (ByteString) import qualified Data.ByteString.Internal as BS +import Data.ByteString.Short.Internal (ShortByteString(..)) import GHC.Exts @@ -60,23 +63,23 @@ import GHC.Exts -- before decoding them (see StringBuffer.hs). {-# INLINE utf8DecodeChar# #-} -utf8DecodeChar# :: Addr# -> (# Char#, Int# #) -utf8DecodeChar# a# = - let !ch0 = word2Int# (indexWord8OffAddr# a# 0#) in +utf8DecodeChar# :: (Int# -> Word#) -> (# Char#, Int# #) +utf8DecodeChar# indexWord8# = + let !ch0 = word2Int# (indexWord8# 0#) in case () of _ | isTrue# (ch0 <=# 0x7F#) -> (# chr# ch0, 1# #) | isTrue# ((ch0 >=# 0xC0#) `andI#` (ch0 <=# 0xDF#)) -> - let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + let !ch1 = word2Int# (indexWord8# 1#) in if isTrue# ((ch1 <# 0x80#) `orI#` (ch1 >=# 0xC0#)) then fail 1# else (# chr# (((ch0 -# 0xC0#) `uncheckedIShiftL#` 6#) +# (ch1 -# 0x80#)), 2# #) | isTrue# ((ch0 >=# 0xE0#) `andI#` (ch0 <=# 0xEF#)) -> - let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + let !ch1 = word2Int# (indexWord8# 1#) in if isTrue# ((ch1 <# 0x80#) `orI#` (ch1 >=# 0xC0#)) then fail 1# else - let !ch2 = word2Int# (indexWord8OffAddr# a# 2#) in + let !ch2 = word2Int# (indexWord8# 2#) in if isTrue# ((ch2 <# 0x80#) `orI#` (ch2 >=# 0xC0#)) then fail 2# else (# chr# (((ch0 -# 0xE0#) `uncheckedIShiftL#` 12#) +# ((ch1 -# 0x80#) `uncheckedIShiftL#` 6#) +# @@ -84,11 +87,11 @@ utf8DecodeChar# a# = 3# #) | isTrue# ((ch0 >=# 0xF0#) `andI#` (ch0 <=# 0xF8#)) -> - let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + let !ch1 = word2Int# (indexWord8# 1#) in if isTrue# ((ch1 <# 0x80#) `orI#` (ch1 >=# 0xC0#)) then fail 1# else - let !ch2 = word2Int# (indexWord8OffAddr# a# 2#) in + let !ch2 = word2Int# (indexWord8# 2#) in if isTrue# ((ch2 <# 0x80#) `orI#` (ch2 >=# 0xC0#)) then fail 2# else - let !ch3 = word2Int# (indexWord8OffAddr# a# 3#) in + let !ch3 = word2Int# (indexWord8# 3#) in if isTrue# ((ch3 <# 0x80#) `orI#` (ch3 >=# 0xC0#)) then fail 3# else (# chr# (((ch0 -# 0xF0#) `uncheckedIShiftL#` 18#) +# ((ch1 -# 0x80#) `uncheckedIShiftL#` 12#) +# @@ -108,7 +111,8 @@ utf8DecodeChar# a# = utf8DecodeChar :: Ptr Word8 -> (Char, Int) utf8DecodeChar (Ptr a#) = - case utf8DecodeChar# a# of (# c#, nBytes# #) -> ( C# c#, I# nBytes# ) + case utf8DecodeChar# (indexWord8OffAddr# a#) of + (# c#, nBytes# #) -> ( C# c#, I# nBytes# ) -- UTF-8 is cleverly designed so that we can always figure out where -- the start of the current character is, given any position in a @@ -125,72 +129,106 @@ utf8CharStart p = go p else return p utf8DecodeByteString :: ByteString -> [Char] -utf8DecodeByteString (BS.PS ptr offset len) - = utf8DecodeStringLazy ptr offset len - -utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char] -utf8DecodeStringLazy fptr offset len - = unsafeDupablePerformIO $ unpack start +utf8DecodeByteString (BS.PS fptr offset (I# len#)) + = unsafeDupablePerformIO $ do + withForeignPtr fptr $ \ptr -> + let addr# = unPtr (ptr `plusPtr` offset) + index# = indexWord8OffAddr# addr# in + utf8DecodeLazy# (touchForeignPtr fptr) index# len# + +utf8DecodeShortByteString :: ShortByteString -> [Char] +utf8DecodeShortByteString (SBS ba#) + = unsafeDupablePerformIO $ utf8DecodeLazy# (return ()) index# len# where - !start = unsafeForeignPtrToPtr fptr `plusPtr` offset - !end = start `plusPtr` len + index# = indexWord8Array# ba# + len# = sizeofByteArray# ba# - unpack p - | p >= end = touchForeignPtr fptr >> return [] +{-# INLINE utf8DecodeLazy# #-} +utf8DecodeLazy# :: (IO ()) -> (Int# -> Word#) -> Int# -> IO [Char] +utf8DecodeLazy# retain indexWord8# len# + = unpack 0# + where + unpack i# + | isTrue# (i# >=# len#) = retain >> return [] | otherwise = - case utf8DecodeChar# (unPtr p) of - (# c#, nBytes# #) -> do - rest <- unsafeDupableInterleaveIO $ unpack (p `plusPtr#` nBytes#) - return (C# c# : rest) + case utf8DecodeChar# (\j# -> indexWord8# (i# +# j#)) of + (# c#, nBytes# #) -> do + rest <- unsafeDupableInterleaveIO $ unpack (i# +# nBytes#) + return (C# c# : rest) -countUTF8Chars :: Ptr Word8 -> Int -> IO Int -countUTF8Chars ptr len = go ptr 0 +utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char] +utf8DecodeStringLazy fp offset (I# len#) + = unsafeDupablePerformIO $ withForeignPtr fp $ \ptr -> + let ptr# = unPtr (ptr `plusPtr` offset) + index# = indexWord8OffAddr# ptr# in + utf8DecodeLazy# (touchForeignPtr fp) index# len# + +countUTF8Chars :: ShortByteString -> IO Int +countUTF8Chars (SBS ba) = go 0# 0# where - !end = ptr `plusPtr` len - - go p !n - | p >= end = return n - | otherwise = do - case utf8DecodeChar# (unPtr p) of - (# _, nBytes# #) -> go (p `plusPtr#` nBytes#) (n+1) + len# = sizeofByteArray# ba + go i# n# + | isTrue# (i# >=# len#) = + return (I# n#) + | otherwise = do + case utf8DecodeChar# (\j# -> indexWord8Array# ba (i# +# j#)) of + (# _, nBytes# #) -> go (i# +# nBytes#) (n# +# 1#) unPtr :: Ptr a -> Addr# unPtr (Ptr a) = a -plusPtr# :: Ptr a -> Int# -> Ptr a -plusPtr# ptr nBytes# = ptr `plusPtr` (I# nBytes#) - -utf8EncodeChar :: Char -> Ptr Word8 -> IO (Ptr Word8) -utf8EncodeChar c ptr = +{-# INLINE utf8EncodeChar #-} +utf8EncodeChar :: (Int# -> Word# -> State# s -> State# s) + -> Char -> ST s Int +utf8EncodeChar write# c = let x = ord c in case () of _ | x > 0 && x <= 0x007f -> do - poke ptr (fromIntegral x) - return (ptr `plusPtr` 1) + write 0 x + return 1 -- NB. '\0' is encoded as '\xC0\x80', not '\0'. This is so that we -- can have 0-terminated UTF-8 strings (see GHC.Base.unpackCStringUtf8). | x <= 0x07ff -> do - poke ptr (fromIntegral (0xC0 .|. ((x `shiftR` 6) .&. 0x1F))) - pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x .&. 0x3F))) - return (ptr `plusPtr` 2) + write 0 (0xC0 .|. ((x `shiftR` 6) .&. 0x1F)) + write 1 (0x80 .|. (x .&. 0x3F)) + return 2 | x <= 0xffff -> do - poke ptr (fromIntegral (0xE0 .|. (x `shiftR` 12) .&. 0x0F)) - pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x `shiftR` 6) .&. 0x3F)) - pokeElemOff ptr 2 (fromIntegral (0x80 .|. (x .&. 0x3F))) - return (ptr `plusPtr` 3) + write 0 (0xE0 .|. (x `shiftR` 12) .&. 0x0F) + write 1 (0x80 .|. (x `shiftR` 6) .&. 0x3F) + write 2 (0x80 .|. (x .&. 0x3F)) + return 3 | otherwise -> do - poke ptr (fromIntegral (0xF0 .|. (x `shiftR` 18))) - pokeElemOff ptr 1 (fromIntegral (0x80 .|. ((x `shiftR` 12) .&. 0x3F))) - pokeElemOff ptr 2 (fromIntegral (0x80 .|. ((x `shiftR` 6) .&. 0x3F))) - pokeElemOff ptr 3 (fromIntegral (0x80 .|. (x .&. 0x3F))) - return (ptr `plusPtr` 4) + write 0 (0xF0 .|. (x `shiftR` 18)) + write 1 (0x80 .|. ((x `shiftR` 12) .&. 0x3F)) + write 2 (0x80 .|. ((x `shiftR` 6) .&. 0x3F)) + write 3 (0x80 .|. (x .&. 0x3F)) + return 4 + where + {-# INLINE write #-} + write (I# off#) (I# c#) = ST $ \s -> + case write# off# (int2Word# c#) s of + s -> (# s, () #) utf8EncodeString :: Ptr Word8 -> String -> IO () utf8EncodeString ptr str = go ptr str where go !_ [] = return () go ptr (c:cs) = do - ptr' <- utf8EncodeChar c ptr - go ptr' cs + off <- stToIO $ utf8EncodeChar (writeWord8OffAddr# (unPtr ptr)) c + go (ptr `plusPtr` off) cs + +utf8EncodeShortByteString :: String -> IO ShortByteString +utf8EncodeShortByteString str = stToIO $ ST $ \s -> + let !(I# len#) = utf8EncodedLength str in + case newByteArray# len# s of { (# s, mba# #) -> + let ST f_go = go mba# 0# str in + case f_go s of { (# s, () #) -> + case unsafeFreezeByteArray# mba# s of { (# s, ba# #) -> + (# s, SBS ba# #) }}} + where + go _ _ [] = return () + go mba# i# (c:cs) = do + I# off# <- utf8EncodeChar (\j# -> writeWord8Array# mba# (i# +# j#)) c + go mba# (i# +# off#) cs utf8EncodedLength :: String -> Int utf8EncodedLength str = go 0 str diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs index f3423ecf1c..dde684e81e 100644 --- a/compiler/utils/FastString.hs +++ b/compiler/utils/FastString.hs @@ -32,12 +32,16 @@ module FastString ( -- * ByteString - bytesFS, -- :: FastString -> ByteString - fastStringToByteString, -- = bytesFS (kept for haddock) + bytesFS, + fastStringToByteString, mkFastStringByteString, fastZStringToByteString, unsafeMkByteString, + -- * ShortByteString + fastStringToShortByteString, + mkFastStringShortByteString, + -- * FastZString FastZString, hPutFZS, @@ -52,7 +56,6 @@ module FastString mkFastString, mkFastStringBytes, mkFastStringByteList, - mkFastStringForeignPtr, mkFastString#, -- ** Deconstruction @@ -67,7 +70,6 @@ module FastString nullFS, appendFS, headFS, - tailFS, concatFS, consFS, nilFS, @@ -108,10 +110,12 @@ import Control.Concurrent.MVar import Control.DeepSeq import Control.Monad import Data.ByteString (ByteString) +import Data.ByteString.Short (ShortByteString) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC -import qualified Data.ByteString.Internal as BS import qualified Data.ByteString.Unsafe as BS +import qualified Data.ByteString.Short as SBS +import qualified Data.ByteString.Short.Internal as SBS import Foreign.C import GHC.Exts import System.IO @@ -121,6 +125,7 @@ import Data.Char import Data.Semigroup as Semi import GHC.IO +import GHC.ST import Foreign @@ -132,12 +137,14 @@ import GHC.Base ( unpackCString#, unpackNBytes# ) -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' -bytesFS :: FastString -> ByteString -bytesFS f = fs_bs f +bytesFS, fastStringToByteString :: FastString -> ByteString +bytesFS = fastStringToByteString {-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-} -fastStringToByteString :: FastString -> ByteString -fastStringToByteString = bytesFS +fastStringToByteString f = SBS.fromShort $ fs_sbs f + +fastStringToShortByteString :: FastString -> ShortByteString +fastStringToShortByteString = fs_sbs fastZStringToByteString :: FastZString -> ByteString fastZStringToByteString (FastZString bs) = bs @@ -147,9 +154,7 @@ unsafeMkByteString :: String -> ByteString unsafeMkByteString = BSC.pack hashFastString :: FastString -> Int -hashFastString (FastString _ bs _) - = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> - return $ hashStr (castPtr ptr) len +hashFastString (FastString _ sbs _) = hashStr sbs -- ----------------------------------------------------------------------------- @@ -180,7 +185,7 @@ of this string which is used by the compiler internally. -} data FastString = FastString { uniq :: {-# UNPACK #-} !Int, -- unique id - fs_bs :: {-# UNPACK #-} !ByteString, + fs_sbs :: {-# UNPACK #-} !ShortByteString, fs_zenc :: FastZString -- ^ Lazily computed z-encoding of this string. -- @@ -231,9 +236,6 @@ cmpFS f1@(FastString u1 _ _) f2@(FastString u2 _ _) = if u1 == u2 then EQ else compare (bytesFS f1) (bytesFS f2) -foreign import ccall unsafe "memcmp" - memcmp :: Ptr a -> Ptr b -> Int -> IO Int - -- ----------------------------------------------------------------------------- -- Construction @@ -403,12 +405,12 @@ The procedure goes like this: -} mkFastStringWith - :: (Int -> IORef Int-> IO FastString) -> Ptr Word8 -> Int -> IO FastString -mkFastStringWith mk_fs !ptr !len = do + :: (Int -> IORef Int-> IO FastString) -> ShortByteString -> IO FastString +mkFastStringWith mk_fs sbs = do FastStringTableSegment lock _ buckets# <- readIORef segmentRef let idx# = hashToIndex# buckets# hash# bucket <- IO $ readArray# buckets# idx# - res <- bucket_match bucket len ptr + res <- bucket_match bucket sbs case res of Just found -> return found Nothing -> do @@ -422,13 +424,13 @@ mkFastStringWith mk_fs !ptr !len = do !(FastStringTable uid n_zencs segments#) = stringTable get_uid = atomicModifyIORef' uid $ \n -> (n+1,n) - !(I# hash#) = hashStr ptr len + !(I# hash#) = hashStr sbs (# segmentRef #) = indexArray# segments# (hashToSegment# hash#) insert fs = do FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef let idx# = hashToIndex# buckets# hash# bucket <- IO $ readArray# buckets# idx# - res <- bucket_match bucket len ptr + res <- bucket_match bucket sbs case res of -- The FastString was added by another thread after previous read and -- before we acquired the write lock. @@ -440,102 +442,74 @@ mkFastStringWith mk_fs !ptr !len = do modifyIORef' counter succ return fs -bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString) -bucket_match [] _ _ = return Nothing -bucket_match (v@(FastString _ bs _):ls) len ptr - | len == BS.length bs = do - b <- BS.unsafeUseAsCString bs $ \buf -> - cmpStringPrefix ptr (castPtr buf) len - if b then return (Just v) - else bucket_match ls len ptr - | otherwise = - bucket_match ls len ptr +bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString) +bucket_match [] _ = return Nothing +bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs + | fs_sbs == sbs = return (Just fs) + | otherwise = bucket_match ls sbs mkFastStringBytes :: Ptr Word8 -> Int -> FastString mkFastStringBytes !ptr !len = -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is -- idempotent. - unsafeDupablePerformIO $ - mkFastStringWith (copyNewFastString ptr len) ptr len - --- | Create a 'FastString' from an existing 'ForeignPtr'; the difference --- between this and 'mkFastStringBytes' is that we don't have to copy --- the bytes if the string is new to the table. -mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString -mkFastStringForeignPtr ptr !fp len - = mkFastStringWith (mkNewFastString fp len) ptr len - --- | Create a 'FastString' from an existing 'ForeignPtr'; the difference --- between this and 'mkFastStringBytes' is that we don't have to copy --- the bytes if the string is new to the table. + unsafeDupablePerformIO $ do + sbs <- newSBSFromPtr ptr len + mkFastStringWith (mkNewFastStringShortByteString sbs) sbs + +newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString +newSBSFromPtr (Ptr src#) (I# len#) = + stToIO $ ST $ \s -> + case newByteArray# len# s of { (# s, dst# #) -> + case copyAddrToByteArray# src# dst# 0# len# s of { s -> + case unsafeFreezeByteArray# dst# s of { (# s, ba# #) -> + (# s, SBS.SBS ba# #) }}} + +-- | Create a 'FastString' by copying an existing 'ByteString' mkFastStringByteString :: ByteString -> FastString mkFastStringByteString bs = - inlinePerformIO $ - BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do - let ptr' = castPtr ptr - mkFastStringWith (mkNewFastStringByteString bs) ptr' len + let sbs = SBS.toShort bs in + inlinePerformIO $ + mkFastStringWith (mkNewFastStringShortByteString sbs) sbs + +-- | Create a 'FastString' from an existing 'ShortByteString' without +-- copying. +mkFastStringShortByteString :: ShortByteString -> FastString +mkFastStringShortByteString sbs = + inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs -- | Creates a UTF-8 encoded 'FastString' from a 'String' mkFastString :: String -> FastString mkFastString str = inlinePerformIO $ do - let l = utf8EncodedLength str - buf <- mallocForeignPtrBytes l - withForeignPtr buf $ \ptr -> do - utf8EncodeString ptr str - mkFastStringForeignPtr ptr buf l + sbs <- utf8EncodeShortByteString str + mkFastStringWith (mkNewFastStringShortByteString sbs) sbs -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@ mkFastStringByteList :: [Word8] -> FastString -mkFastStringByteList str = mkFastStringByteString (BS.pack str) +mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str) --- | Creates a (lazy) Z-encoded 'FastString' from a 'String' and account --- the number of forced z-strings into the passed 'IORef'. -mkZFastString :: IORef Int -> ByteString -> FastZString -mkZFastString n_zencs bs = unsafePerformIO $ do +-- | Creates a (lazy) Z-encoded 'FastString' from a 'ShortByteString' and +-- account the number of forced z-strings into the passed 'IORef'. +mkZFastString :: IORef Int -> ShortByteString -> FastZString +mkZFastString n_zencs sbs = unsafePerformIO $ do atomicModifyIORef' n_zencs $ \n -> (n+1, ()) - return $ mkFastZStringString (zEncodeString (utf8DecodeByteString bs)) - -mkNewFastString :: ForeignPtr Word8 -> Int -> Int - -> IORef Int -> IO FastString -mkNewFastString fp len uid n_zencs = do - let bs = BS.fromForeignPtr fp 0 len - zstr = mkZFastString n_zencs bs - return (FastString uid bs zstr) - -mkNewFastStringByteString :: ByteString -> Int - -> IORef Int -> IO FastString -mkNewFastStringByteString bs uid n_zencs = do - let zstr = mkZFastString n_zencs bs - return (FastString uid bs zstr) - -copyNewFastString :: Ptr Word8 -> Int -> Int -> IORef Int -> IO FastString -copyNewFastString ptr len uid n_zencs = do - fp <- copyBytesToForeignPtr ptr len - let bs = BS.fromForeignPtr fp 0 len - zstr = mkZFastString n_zencs bs - return (FastString uid bs zstr) - -copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8) -copyBytesToForeignPtr ptr len = do - fp <- mallocForeignPtrBytes len - withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len - return fp - -cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool -cmpStringPrefix ptr1 ptr2 len = - do r <- memcmp ptr1 ptr2 len - return (r == 0) - - -hashStr :: Ptr Word8 -> Int -> Int - -- use the Addr to produce a hash value between 0 & m (inclusive) -hashStr (Ptr a#) (I# len#) = loop 0# 0# + return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs)) + +mkNewFastStringShortByteString :: ShortByteString -> Int + -> IORef Int -> IO FastString +mkNewFastStringShortByteString sbs uid n_zencs = do + let zstr = mkZFastString n_zencs sbs + return (FastString uid sbs zstr) + +hashStr :: ShortByteString -> Int + -- produce a hash value between 0 & m (inclusive) +hashStr sbs@(SBS.SBS ba#) = loop 0# 0# where + !(I# len#) = SBS.length sbs loop h n | isTrue# (n ==# len#) = I# h | otherwise = loop h2 (n +# 1#) where - !c = ord# (indexCharOffAddr# a# n) + !c = indexInt8Array# ba# n !h2 = (h *# 16777619#) `xorI#` c -- ----------------------------------------------------------------------------- @@ -543,17 +517,15 @@ hashStr (Ptr a#) (I# len#) = loop 0# 0# -- | Returns the length of the 'FastString' in characters lengthFS :: FastString -> Int -lengthFS (FastString _uid bs _zstr) = inlinePerformIO $ - let (fp, off, len) = BS.toForeignPtr bs in - withForeignPtr fp $ \ptr -> countUTF8Chars (ptr `plusPtr` off) len +lengthFS (FastString _uid sbs _zstr) = inlinePerformIO $ countUTF8Chars sbs -- | Returns @True@ if the 'FastString' is empty nullFS :: FastString -> Bool -nullFS f = BS.null (fs_bs f) +nullFS f = SBS.null (fs_sbs f) -- | Unpacks and decodes the FastString unpackFS :: FastString -> String -unpackFS (FastString _ bs _) = utf8DecodeByteString bs +unpackFS (FastString _ sbs _) = utf8DecodeShortByteString sbs -- | Returns a Z-encoded version of a 'FastString'. This might be the -- original, if it was already Z-encoded. The first time this @@ -568,22 +540,12 @@ appendFS fs1 fs2 = mkFastStringByteString $ BS.append (bytesFS fs1) (bytesFS fs2) concatFS :: [FastString] -> FastString -concatFS = mkFastStringByteString . BS.concat . map fs_bs +concatFS = mkFastStringShortByteString . mconcat . map fs_sbs headFS :: FastString -> Char -headFS (FastString _ bs _) - | BS.null bs = panic "headFS: Empty FastString" -headFS (FastString _ bs _) = - inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> - return (fst (utf8DecodeChar (castPtr ptr))) - -tailFS :: FastString -> FastString -tailFS (FastString _ bs _) - | BS.null bs = panic "tailFS: Empty FastString" -tailFS (FastString _ bs _) = - inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> - do let (_, n) = utf8DecodeChar (castPtr ptr) - return $! mkFastStringByteString (BS.drop n bs) +headFS (FastString _ sbs _) + | SBS.null sbs = panic "headFS: Empty FastString" +headFS fs = head $ unpackFS fs consFS :: Char -> FastString -> FastString consFS c fs = mkFastString (c : unpackFS fs) diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs index 91377cad17..77a3426e74 100644 --- a/compiler/utils/StringBuffer.hs +++ b/compiler/utils/StringBuffer.hs @@ -200,7 +200,7 @@ nextChar (StringBuffer buf len (I# cur#)) = -- Getting our fingers dirty a little here, but this is performance-critical inlinePerformIO $ do withForeignPtr buf $ \(Ptr a#) -> do - case utf8DecodeChar# (a# `plusAddr#` cur#) of + case utf8DecodeChar# (indexWord8OffAddr# (a# `plusAddr#` cur#)) of (# c#, nBytes# #) -> let cur' = I# (cur# +# nBytes#) in return (C# c#, StringBuffer buf len cur') |