diff options
author | Ben Gamari <ben@smart-cactus.org> | 2020-10-23 14:16:58 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2020-11-29 18:26:15 -0500 |
commit | b3027eb0a1c1b3cedbb54b0fc8d0924283a7cd25 (patch) | |
tree | 428241e11a6a5d15592964d049344b85d8160cf4 | |
parent | f40b662b9ea555bab6e9729f4165eaca7021d322 (diff) | |
download | haskell-b3027eb0a1c1b3cedbb54b0fc8d0924283a7cd25.tar.gz |
StringBuffer: Rid it of ForeignPtrswip/stringbuffer
Bumps haddock submodule.
-rw-r--r-- | compiler/GHC/Data/ByteArray.hs | 5 | ||||
-rw-r--r-- | compiler/GHC/Data/StringBuffer.hs | 252 | ||||
-rw-r--r-- | compiler/GHC/Parser/Header.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/Parser/Lexer.x | 4 | ||||
-rw-r--r-- | compiler/GHC/Types/Error.hs | 4 | ||||
-rw-r--r-- | compiler/ghc.cabal.in | 1 | ||||
-rw-r--r-- | libraries/ghc-boot/GHC/Utils/Encoding.hs | 38 | ||||
-rw-r--r-- | testsuite/tests/parser/should_run/CountParserDeps.stdout | 3 | ||||
m--------- | utils/haddock | 0 |
9 files changed, 181 insertions, 128 deletions
diff --git a/compiler/GHC/Data/ByteArray.hs b/compiler/GHC/Data/ByteArray.hs index 5121ed4a57..19591092fa 100644 --- a/compiler/GHC/Data/ByteArray.hs +++ b/compiler/GHC/Data/ByteArray.hs @@ -14,6 +14,7 @@ module GHC.Data.ByteArray , MutableByteArray , getMutableByteArray , unsafeMutableByteArrayContents + , sizeofMutableByteArray , newMutableByteArray , newPinnedMutableByteArray , copyByteArray @@ -92,6 +93,10 @@ newPinnedMutableByteArray (I# size) = IO $ \s -> case newPinnedByteArray# size s of (# s', mba #) -> (# s', MutableByteArray mba #) +sizeofMutableByteArray :: MutableByteArray -> Int +sizeofMutableByteArray (MutableByteArray mba) = + I# (sizeofMutableByteArray# mba) + copyByteArray :: ByteArray -- ^ source -> Int -- ^ source offset diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs index 42ab89f8cc..9873975396 100644 --- a/compiler/GHC/Data/StringBuffer.hs +++ b/compiler/GHC/Data/StringBuffer.hs @@ -17,8 +17,7 @@ Buffers for scanning string input stored in external arrays. module GHC.Data.StringBuffer ( - StringBuffer(..), - -- non-abstract for vs\/HaskellService + StringBuffer, -- * Creation\/destruction hGetStringBuffer, @@ -26,8 +25,11 @@ module GHC.Data.StringBuffer hPutStringBuffer, appendStringBuffers, stringToStringBuffer, + byteStringToStringBuffer, + withStringBufferContents, -- * Inspection + lengthStringBuffer, nextChar, currentChar, prevChar, @@ -51,13 +53,18 @@ module GHC.Data.StringBuffer #include "HsVersions.h" import GHC.Prelude +import GHC.Stack import GHC.Utils.Encoding import GHC.Data.FastString +import GHC.Data.ByteArray import GHC.Utils.IO.Unsafe import GHC.Utils.Panic.Plain import GHC.Utils.Misc +import Foreign.C.String +import qualified Data.ByteString as BS +import qualified Data.ByteString.Unsafe as BS import Data.Maybe import Control.Exception import System.IO @@ -65,6 +72,7 @@ import System.IO.Unsafe ( unsafePerformIO ) import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import GHC.IO.Encoding.Failure ( CodingFailureMode(IgnoreCodingFailure) ) +import GHC.Word import GHC.Exts import Foreign @@ -72,18 +80,15 @@ import Foreign -- ----------------------------------------------------------------------------- -- The StringBuffer type --- |A StringBuffer is an internal pointer to a sized chunk of bytes. +-- | A 'StringBuffer' is an internal pointer to a sized chunk of bytes. -- The bytes are intended to be *immutable*. There are pure --- operations to read the contents of a StringBuffer. --- --- A StringBuffer may have a finalizer, depending on how it was --- obtained. +-- operations to read the contents of a 'StringBuffer'. -- data StringBuffer = StringBuffer { - buf :: {-# UNPACK #-} !(ForeignPtr Word8), - len :: {-# UNPACK #-} !Int, -- length - cur :: {-# UNPACK #-} !Int -- current pos + buf :: {-# UNPACK #-} !ByteArray, + cur :: {-# UNPACK #-} !Int + -- ^ Current position in bytes. } -- The buffer is assumed to be UTF-8 encoded, and furthermore -- we add three @\'\\0\'@ bytes to the end as sentinels so that the @@ -92,9 +97,17 @@ data StringBuffer instance Show StringBuffer where showsPrec _ s = showString "<stringbuffer(" - . shows (len s) . showString "," . shows (cur s) + . shows (cur s) . showString ")>" +isValid :: StringBuffer -> Bool +isValid sb = sizeofByteArray (buf sb) >= cur sb + +checkValid :: HasCallStack => StringBuffer -> StringBuffer +checkValid sb + | not (isValid sb) = error "isValid" + | otherwise = sb + -- ----------------------------------------------------------------------------- -- Creation / Destruction @@ -102,34 +115,35 @@ instance Show StringBuffer where -- managed by the garbage collector. hGetStringBuffer :: FilePath -> IO StringBuffer hGetStringBuffer fname = do - h <- openBinaryFile fname ReadMode - size_i <- hFileSize h - offset_i <- skipBOM h size_i 0 -- offset is 0 initially - let size = fromIntegral $ size_i - offset_i - buf <- mallocForeignPtrArray (size+3) - withForeignPtr buf $ \ptr -> do - r <- if size == 0 then return 0 else hGetBuf h ptr size - hClose h - if (r /= size) - then ioError (userError "short read of file") - else newUTF8StringBuffer buf ptr size + h <- openBinaryFile fname ReadMode + size_i <- hFileSize h + offset_i <- skipBOM h size_i 0 -- offset is 0 initially + let size = fromIntegral $ size_i - offset_i + buf <- newPinnedMutableByteArray (size+3) + r <- if size == 0 + then return 0 + else hGetBuf h (unsafeMutableByteArrayContents buf) size + hClose h + if r /= size + then ioError (userError "short read of file") + else newUTF8StringBuffer buf size hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer -hGetStringBufferBlock handle wanted - = do size_i <- hFileSize handle - offset_i <- hTell handle >>= skipBOM handle size_i - let size = min wanted (fromIntegral $ size_i-offset_i) - buf <- mallocForeignPtrArray (size+3) - withForeignPtr buf $ \ptr -> - do r <- if size == 0 then return 0 else hGetBuf handle ptr size - if r /= size - then ioError (userError $ "short read of file: "++show(r,size,size_i,handle)) - else newUTF8StringBuffer buf ptr size +hGetStringBufferBlock handle wanted = do + size_i <- hFileSize handle + offset_i <- hTell handle >>= skipBOM handle size_i + let size = min wanted (fromIntegral $ size_i-offset_i) + buf <- newPinnedMutableByteArray (size+3) + r <- if size == 0 + then return 0 + else hGetBuf handle (unsafeMutableByteArrayContents buf) size + if r /= size + then ioError (userError $ "short read of file: "++show(r,size,size_i,handle)) + else newUTF8StringBuffer buf size hPutStringBuffer :: Handle -> StringBuffer -> IO () -hPutStringBuffer hdl (StringBuffer buf len cur) - = withForeignPtr (plusForeignPtr buf cur) $ \ptr -> - hPutBuf hdl ptr len +hPutStringBuffer hdl (StringBuffer buf cur) = do + withByteArrayContents buf $ \ptr -> hPutBuf hdl (ptr `plusPtr` cur) (sizeofByteArray buf) -- | Skip the byte-order mark if there is one (see #1744 and #6016), -- and return the new position of the handle in bytes. @@ -156,39 +170,49 @@ skipBOM h size offset = where safeEncoding = mkUTF8 IgnoreCodingFailure -newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer -newUTF8StringBuffer buf ptr size = do - pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] +-- | @newUTF8StringBuffer buf size@ creates a 'StringBuffer' from a +-- 'MutableByteArray' of length @size+3@ containing UTF-8 encoded text. A three +-- byte sentinel will be added to the end of the buffer. +newUTF8StringBuffer :: MutableByteArray -> Int -> IO StringBuffer +newUTF8StringBuffer buf size = do + ASSERTM(return $ sizeofMutableByteArray buf == (size + 3)) -- sentinels for UTF-8 decoding - return $ StringBuffer buf size 0 + writeWord8Array buf (size+0) 0 + writeWord8Array buf (size+1) 0 + writeWord8Array buf (size+3) 0 + buf' <- unsafeFreezeByteArray buf + return $ StringBuffer buf' 0 appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer -appendStringBuffers sb1 sb2 - = do newBuf <- mallocForeignPtrArray (size+3) - withForeignPtr newBuf $ \ptr -> - withForeignPtr (buf sb1) $ \sb1Ptr -> - withForeignPtr (buf sb2) $ \sb2Ptr -> - do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len - copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len - pokeArray (ptr `advancePtr` size) [0,0,0] - return (StringBuffer newBuf size 0) - where sb1_len = calcLen sb1 - sb2_len = calcLen sb2 - calcLen sb = len sb - cur sb - size = sb1_len + sb2_len +appendStringBuffers sb1 sb2 = do + dst <- newPinnedMutableByteArray (size+3) + copyByteArray (buf sb1) (cur sb1) dst 0 sb1_len + copyByteArray (buf sb2) (cur sb2) dst sb1_len sb2_len + newUTF8StringBuffer dst size + where + sb1_len = lengthStringBuffer sb1 + sb2_len = lengthStringBuffer sb2 + size = sb1_len + sb2_len + +withStringBufferContents :: StringBuffer -> (CStringLen -> IO a) -> IO a +withStringBufferContents sb@(StringBuffer buf cur) action = + withByteArrayContents buf $ \p -> action (p `plusPtr` cur, lengthStringBuffer sb) + +byteStringToStringBuffer :: BS.ByteString -> StringBuffer +byteStringToStringBuffer bs = unsafePerformIO $ do + let size = BS.length bs + buf <- newPinnedMutableByteArray (size+3) + BS.unsafeUseAsCString bs (\p -> copyAddrToMutableByteArray p buf 0 size) + newUTF8StringBuffer buf size -- | Encode a 'String' into a 'StringBuffer' as UTF-8. The resulting buffer -- is automatically managed by the garbage collector. stringToStringBuffer :: String -> StringBuffer -stringToStringBuffer str = - unsafePerformIO $ do +stringToStringBuffer str = unsafePerformIO $ do let size = utf8EncodedLength str - buf <- mallocForeignPtrArray (size+3) - withForeignPtr buf $ \ptr -> do - utf8EncodeString ptr str - pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding - return (StringBuffer buf size 0) + buf <- newPinnedMutableByteArray (size+3) + utf8EncodeString (unsafeMutableByteArrayContents buf) str + newUTF8StringBuffer buf size -- ----------------------------------------------------------------------------- -- Grab a character @@ -200,14 +224,11 @@ stringToStringBuffer str = -- character cannot be decoded as UTF-8, @\'\\0\'@ is returned. {-# INLINE nextChar #-} nextChar :: StringBuffer -> (Char,StringBuffer) -nextChar (StringBuffer buf len (I# cur#)) = +nextChar sb@(StringBuffer buf (I# cur#)) = -- Getting our fingers dirty a little here, but this is performance-critical - inlinePerformIO $ - withForeignPtr buf $ \(Ptr a#) -> - case utf8DecodeCharAddr# (a# `plusAddr#` cur#) 0# of - (# c#, nBytes# #) -> - let cur' = I# (cur# +# nBytes#) in - return (C# c#, StringBuffer buf len cur') + case utf8DecodeCharByteArray# (getByteArray buf) cur# of + (# c#, nBytes# #) -> + (C# c#, checkValid $ sb { cur = I# (cur# +# nBytes#) }) -- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous -- to 'Data.List.head'). __Warning:__ The behavior is undefined if the @@ -217,12 +238,11 @@ currentChar :: StringBuffer -> Char currentChar = fst . nextChar prevChar :: StringBuffer -> Char -> Char -prevChar (StringBuffer _ _ 0) deflt = deflt -prevChar (StringBuffer buf _ cur) _ = - inlinePerformIO $ - withForeignPtr buf $ \p -> do - p' <- utf8PrevChar (p `plusPtr` cur) - return (fst (utf8DecodeChar p')) +prevChar (StringBuffer _ 0) deflt = deflt +prevChar (StringBuffer buf cur) _ = + let !(I# p') = utf8PrevChar (getByteArray buf) cur + !(# c, _ #) = utf8DecodeCharByteArray# (getByteArray buf) p' + in C# c -- ----------------------------------------------------------------------------- -- Moving @@ -241,7 +261,7 @@ stepOn s = snd (nextChar s) offsetBytes :: Int -- ^ @n@, the number of bytes -> StringBuffer -> StringBuffer -offsetBytes i s = s { cur = cur s + i } +offsetBytes i s = checkValid $ s { cur = cur (checkValid s) + i } -- | Compute the difference in offset between two 'StringBuffer's that share -- the same buffer. __Warning:__ The behavior is undefined if the @@ -249,33 +269,34 @@ offsetBytes i s = s { cur = cur s + i } byteDiff :: StringBuffer -> StringBuffer -> Int byteDiff s1 s2 = cur s2 - cur s1 +lengthStringBuffer :: StringBuffer -> Int +lengthStringBuffer sb = sizeofByteArray (buf sb) - cur sb - 3 + -- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null'). atEnd :: StringBuffer -> Bool -atEnd (StringBuffer _ l c) = l == c +atEnd sb = lengthStringBuffer sb == 0 -- | Computes a 'StringBuffer' which points to the first character of the -- wanted line. Lines begin at 1. atLine :: Int -> StringBuffer -> Maybe StringBuffer -atLine line sb@(StringBuffer buf len _) = - inlinePerformIO $ - withForeignPtr buf $ \p -> do - p' <- skipToLine line len p - if p' == nullPtr - then return Nothing - else - let - delta = p' `minusPtr` p - in return $ Just (sb { cur = delta - , len = len - delta - }) - +atLine line sb@(StringBuffer buf _) = + inlinePerformIO $ withByteArrayContents buf $ \p -> do + p' <- skipToLine line (lengthStringBuffer sb) p + if p' == nullPtr + then return Nothing + else + let !delta = p' `minusPtr` p + in return $! Just $! checkValid $ sb { cur = delta } + +-- | @skipToLine line len op0@ finds the byte offset to the beginning of +-- the given line number. skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8) skipToLine !line !len !op0 = go 1 op0 where - !opend = op0 `plusPtr` len + !op_end = op0 `plusPtr` len go !i_line !op - | op >= opend = pure nullPtr + | op >= op_end = pure nullPtr | i_line == line = pure op | otherwise = do w <- peek op :: IO Word8 @@ -300,39 +321,46 @@ lexemeToString :: StringBuffer -> Int -- ^ @n@, the number of bytes -> String lexemeToString _ 0 = "" -lexemeToString (StringBuffer buf _ cur) bytes = - utf8DecodeStringLazy buf cur bytes +lexemeToString sb bytes + | lengthStringBuffer sb < bytes = panic "lexemeToString: overflow 1" + | not (isValid sb) = panic "lexemeToString: overflow 2" +lexemeToString (StringBuffer buf (I# cur#)) (I# bytes#) = + utf8DecodeByteArrayLazy# (getByteArray buf) cur# bytes# lexemeToFastString :: StringBuffer -> Int -- ^ @n@, the number of bytes -> FastString lexemeToFastString _ 0 = nilFS -lexemeToFastString (StringBuffer buf _ cur) len = - inlinePerformIO $ - withForeignPtr buf $ \ptr -> - return $! mkFastStringBytes (ptr `plusPtr` cur) len +lexemeToFastString sb len | len > lengthStringBuffer sb = panic "lexemeToFastString" +lexemeToFastString (StringBuffer buf cur) len = + inlinePerformIO $ + withByteArrayContents buf $ \ptr -> + return $! mkFastStringBytes (ptr `plusPtr` cur) len -- | Return the previous @n@ characters (or fewer if we are less than @n@ -- characters into the buffer. decodePrevNChars :: Int -> StringBuffer -> String -decodePrevNChars n (StringBuffer buf _ cur) = - inlinePerformIO $ withForeignPtr buf $ \p0 -> - go p0 n "" (p0 `plusPtr` (cur - 1)) +decodePrevNChars n (StringBuffer buf0 cur) = + go (getByteArray buf0) (min n (cur - 1)) "" (cur - 1) where - go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String - go buf0 n acc p | n == 0 || buf0 >= p = return acc - go buf0 n acc p = do - p' <- utf8PrevChar p - let (c,_) = utf8DecodeChar p' - go buf0 (n - 1) (c:acc) p' + go :: ByteArray# -> Int -> String -> Int -> String + go buf n acc ofs + | n == 0 = acc + | otherwise = + let !ofs'@(I# ofs'#) = utf8PrevChar buf ofs + !(# c,_ #) = utf8DecodeCharByteArray# buf ofs'# + in go buf (n - 1) (C# c:acc) ofs' -- ----------------------------------------------------------------------------- -- Parsing integer strings in various bases parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer -parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int - = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let - go i x | i == len = x - | otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of - '_' -> go (i + 1) x -- skip "_" (#14473) - char -> go (i + 1) (x * radix + toInteger (char_to_int char)) - in go 0 0 +parseUnsignedInteger (StringBuffer buf (I# cur)) (I# len) radix char_to_int + = go (len +# cur) cur 0 + where + go :: Int# -> Int# -> Integer -> Integer + go end i !acc + | isTrue# (i ==# end) = acc + | otherwise = + case utf8DecodeCharByteArray# (getByteArray buf) i of + (# '_'#, _ #) -> go end (i +# 1#) acc -- skip "_" (#14473) + (# char, _ #) -> go end (i +# 1#) (acc * radix + toInteger (char_to_int (C# char))) diff --git a/compiler/GHC/Parser/Header.hs b/compiler/GHC/Parser/Header.hs index fdf854ad8e..8532cae63f 100644 --- a/compiler/GHC/Parser/Header.hs +++ b/compiler/GHC/Parser/Header.hs @@ -214,7 +214,7 @@ lazyGetToks popts filename handle = do -- counteracts the quadratic slowdown we otherwise get for very -- large module names (#5981) nextbuf <- hGetStringBufferBlock handle new_size - if (len nextbuf == 0) then lazyLexBuf handle state True new_size else do + if lengthStringBuffer nextbuf == 0 then lazyLexBuf handle state True new_size else do newbuf <- appendStringBuffers (buffer state) nextbuf unsafeInterleaveIO $ lazyLexBuf handle state{buffer=newbuf} False new_size diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x index 4d7b1ab157..2bf254b7c6 100644 --- a/compiler/GHC/Parser/Lexer.x +++ b/compiler/GHC/Parser/Lexer.x @@ -1894,7 +1894,7 @@ lex_string_tok span buf _len = do ITprimstring _ bs -> ITprimstring (SourceText src) bs ITstring _ s -> ITstring (SourceText src) s _ -> panic "lex_string_tok" - src = lexemeToString buf (cur bufEnd - cur buf) + src = lexemeToString buf (lengthStringBuffer buf - lengthStringBuffer bufEnd) return (L (mkPsSpan (psSpanStart span) end) tok') lex_string :: String -> P Token @@ -1994,7 +1994,7 @@ finish_char_tok buf loc ch -- We've already seen the closing quote -- Just need to check for trailing # = do magicHash <- getBit MagicHashBit i@(AI end bufEnd) <- getInput - let src = lexemeToString buf (cur bufEnd - cur buf) + let src = lexemeToString buf (lengthStringBuffer buf - lengthStringBuffer bufEnd) if magicHash then do case alexGetChar' i of Just ('#',i@(AI end _)) -> do diff --git a/compiler/GHC/Types/Error.hs b/compiler/GHC/Types/Error.hs index cb624c6c99..1510c42160 100644 --- a/compiler/GHC/Types/Error.hs +++ b/compiler/GHC/Types/Error.hs @@ -30,7 +30,7 @@ import GHC.Utils.Outputable as Outputable import qualified GHC.Utils.Ppr.Colour as Col import GHC.Types.SrcLoc as SrcLoc import GHC.Data.FastString (unpackFS) -import GHC.Data.StringBuffer (atLine, hGetStringBuffer, len, lexemeToString) +import GHC.Data.StringBuffer (atLine, hGetStringBuffer, lengthStringBuffer, lexemeToString) import GHC.Utils.Json import System.IO.Error ( catchIOError ) @@ -175,7 +175,7 @@ getCaretDiagnostic severity (RealSrcSpan span _) = content <- hGetStringBuffer fn case atLine i content of Just at_line -> pure $ - case lines (fix <$> lexemeToString at_line (len at_line)) of + case lines (fix <$> lexemeToString at_line (lengthStringBuffer at_line)) of srcLine : _ -> Just srcLine _ -> Nothing _ -> pure Nothing diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 1195fa6cbc..36e18d1865 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -356,6 +356,7 @@ Library GHC.Data.Bag GHC.Data.Bitmap GHC.Data.BooleanFormula + GHC.Data.ByteArray GHC.Data.EnumSet GHC.Data.FastMutInt GHC.Data.FastString diff --git a/libraries/ghc-boot/GHC/Utils/Encoding.hs b/libraries/ghc-boot/GHC/Utils/Encoding.hs index 0f84be189b..38dfc69fbb 100644 --- a/libraries/ghc-boot/GHC/Utils/Encoding.hs +++ b/libraries/ghc-boot/GHC/Utils/Encoding.hs @@ -17,12 +17,15 @@ module GHC.Utils.Encoding ( -- * UTF-8 utf8DecodeCharAddr#, + utf8DecodeCharByteArray#, utf8PrevChar, utf8CharStart, utf8DecodeChar, utf8DecodeByteString, + utf8DecodeByteArray, utf8DecodeShortByteString, utf8CompareShortByteString, + utf8DecodeByteArrayLazy#, utf8DecodeStringLazy, utf8EncodeChar, utf8EncodeString, @@ -53,6 +56,7 @@ import Data.ByteString (ByteString) import qualified Data.ByteString.Internal as BS import Data.ByteString.Short.Internal (ShortByteString(..)) +import GHC.Word import GHC.Exts -- ----------------------------------------------------------------------------- @@ -131,15 +135,20 @@ utf8DecodeChar !(Ptr a#) = -- the start of the current character is, given any position in a -- stream. This function finds the start of the previous character, -- assuming there *is* a previous character. -utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8) -utf8PrevChar p = utf8CharStart (p `plusPtr` (-1)) +utf8PrevChar :: ByteArray# -> Int -> Int +utf8PrevChar arr ofs = utf8CharStart arr (ofs - 1) -utf8CharStart :: Ptr Word8 -> IO (Ptr Word8) -utf8CharStart p = go p - where go p = do w <- peek p - if w >= 0x80 && w < 0xC0 - then go (p `plusPtr` (-1)) - else return p +utf8CharStart :: ByteArray# -> Int -> Int +utf8CharStart = go + where + go arr ofs@(I# ofs#) + | True + , ofs < 0 || ofs > I# (sizeofByteArray# arr) + = error "utf8CharStart: overflow" + | w >= 0x80 && w < 0xC0 = go arr (ofs - 1) + | otherwise = ofs + where + w = W8# (indexWord8Array# arr ofs#) {-# INLINE utf8DecodeLazy# #-} utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char] @@ -158,6 +167,12 @@ utf8DecodeByteString :: ByteString -> [Char] utf8DecodeByteString (BS.PS fptr offset len) = utf8DecodeStringLazy fptr offset len +utf8DecodeByteArrayLazy# :: ByteArray# -> Int# -> Int# -> [Char] +utf8DecodeByteArrayLazy# a# offset# len# + = unsafeDupablePerformIO $ + let decodeChar i = utf8DecodeCharByteArray# a# (i +# offset#) + in utf8DecodeLazy# (return ()) decodeChar len# + utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char] utf8DecodeStringLazy fp offset (I# len#) = unsafeDupablePerformIO $ do @@ -200,12 +215,15 @@ utf8CompareShortByteString (SBS a1) (SBS a2) = go 0# 0# | isTrue# (b1_1 `ltWord#` b2_1) -> LT | otherwise -> go (off1 +# 1#) (off2 +# 1#) -utf8DecodeShortByteString :: ShortByteString -> [Char] -utf8DecodeShortByteString (SBS ba#) +utf8DecodeByteArray :: ByteArray# -> [Char] +utf8DecodeByteArray ba# = unsafeDupablePerformIO $ let len# = sizeofByteArray# ba# in utf8DecodeLazy# (return ()) (utf8DecodeCharByteArray# ba#) len# +utf8DecodeShortByteString :: ShortByteString -> [Char] +utf8DecodeShortByteString (SBS ba#) = utf8DecodeByteArray ba# + countUTF8Chars :: ShortByteString -> IO Int countUTF8Chars (SBS ba) = go 0# 0# where diff --git a/testsuite/tests/parser/should_run/CountParserDeps.stdout b/testsuite/tests/parser/should_run/CountParserDeps.stdout index 5fbdf896ee..889f15eef9 100644 --- a/testsuite/tests/parser/should_run/CountParserDeps.stdout +++ b/testsuite/tests/parser/should_run/CountParserDeps.stdout @@ -1,4 +1,4 @@ -Found 235 parser module dependencies +Found 236 parser module dependencies GHC.Builtin.Names GHC.Builtin.PrimOps GHC.Builtin.Types @@ -62,6 +62,7 @@ GHC.Core.Utils GHC.CoreToIface GHC.Data.Bag GHC.Data.BooleanFormula +GHC.Data.ByteArray GHC.Data.EnumSet GHC.Data.FastMutInt GHC.Data.FastString diff --git a/utils/haddock b/utils/haddock -Subproject 25fa8fde84701c010fa466c2648f8f6d10265e8 +Subproject 8850e481da7c65cd023af9b3a37bad02edfb47e |