summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2020-10-23 14:16:58 -0400
committerBen Gamari <ben@smart-cactus.org>2020-11-29 18:26:15 -0500
commitb3027eb0a1c1b3cedbb54b0fc8d0924283a7cd25 (patch)
tree428241e11a6a5d15592964d049344b85d8160cf4
parentf40b662b9ea555bab6e9729f4165eaca7021d322 (diff)
downloadhaskell-wip/stringbuffer.tar.gz
StringBuffer: Rid it of ForeignPtrswip/stringbuffer
Bumps haddock submodule.
-rw-r--r--compiler/GHC/Data/ByteArray.hs5
-rw-r--r--compiler/GHC/Data/StringBuffer.hs252
-rw-r--r--compiler/GHC/Parser/Header.hs2
-rw-r--r--compiler/GHC/Parser/Lexer.x4
-rw-r--r--compiler/GHC/Types/Error.hs4
-rw-r--r--compiler/ghc.cabal.in1
-rw-r--r--libraries/ghc-boot/GHC/Utils/Encoding.hs38
-rw-r--r--testsuite/tests/parser/should_run/CountParserDeps.stdout3
m---------utils/haddock0
9 files changed, 181 insertions, 128 deletions
diff --git a/compiler/GHC/Data/ByteArray.hs b/compiler/GHC/Data/ByteArray.hs
index 5121ed4a57..19591092fa 100644
--- a/compiler/GHC/Data/ByteArray.hs
+++ b/compiler/GHC/Data/ByteArray.hs
@@ -14,6 +14,7 @@ module GHC.Data.ByteArray
, MutableByteArray
, getMutableByteArray
, unsafeMutableByteArrayContents
+ , sizeofMutableByteArray
, newMutableByteArray
, newPinnedMutableByteArray
, copyByteArray
@@ -92,6 +93,10 @@ newPinnedMutableByteArray (I# size) = IO $ \s ->
case newPinnedByteArray# size s of
(# s', mba #) -> (# s', MutableByteArray mba #)
+sizeofMutableByteArray :: MutableByteArray -> Int
+sizeofMutableByteArray (MutableByteArray mba) =
+ I# (sizeofMutableByteArray# mba)
+
copyByteArray
:: ByteArray -- ^ source
-> Int -- ^ source offset
diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs
index 42ab89f8cc..9873975396 100644
--- a/compiler/GHC/Data/StringBuffer.hs
+++ b/compiler/GHC/Data/StringBuffer.hs
@@ -17,8 +17,7 @@ Buffers for scanning string input stored in external arrays.
module GHC.Data.StringBuffer
(
- StringBuffer(..),
- -- non-abstract for vs\/HaskellService
+ StringBuffer,
-- * Creation\/destruction
hGetStringBuffer,
@@ -26,8 +25,11 @@ module GHC.Data.StringBuffer
hPutStringBuffer,
appendStringBuffers,
stringToStringBuffer,
+ byteStringToStringBuffer,
+ withStringBufferContents,
-- * Inspection
+ lengthStringBuffer,
nextChar,
currentChar,
prevChar,
@@ -51,13 +53,18 @@ module GHC.Data.StringBuffer
#include "HsVersions.h"
import GHC.Prelude
+import GHC.Stack
import GHC.Utils.Encoding
import GHC.Data.FastString
+import GHC.Data.ByteArray
import GHC.Utils.IO.Unsafe
import GHC.Utils.Panic.Plain
import GHC.Utils.Misc
+import Foreign.C.String
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Unsafe as BS
import Data.Maybe
import Control.Exception
import System.IO
@@ -65,6 +72,7 @@ import System.IO.Unsafe ( unsafePerformIO )
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
import GHC.IO.Encoding.Failure ( CodingFailureMode(IgnoreCodingFailure) )
+import GHC.Word
import GHC.Exts
import Foreign
@@ -72,18 +80,15 @@ import Foreign
-- -----------------------------------------------------------------------------
-- The StringBuffer type
--- |A StringBuffer is an internal pointer to a sized chunk of bytes.
+-- | A 'StringBuffer' is an internal pointer to a sized chunk of bytes.
-- The bytes are intended to be *immutable*. There are pure
--- operations to read the contents of a StringBuffer.
---
--- A StringBuffer may have a finalizer, depending on how it was
--- obtained.
+-- operations to read the contents of a 'StringBuffer'.
--
data StringBuffer
= StringBuffer {
- buf :: {-# UNPACK #-} !(ForeignPtr Word8),
- len :: {-# UNPACK #-} !Int, -- length
- cur :: {-# UNPACK #-} !Int -- current pos
+ buf :: {-# UNPACK #-} !ByteArray,
+ cur :: {-# UNPACK #-} !Int
+ -- ^ Current position in bytes.
}
-- The buffer is assumed to be UTF-8 encoded, and furthermore
-- we add three @\'\\0\'@ bytes to the end as sentinels so that the
@@ -92,9 +97,17 @@ data StringBuffer
instance Show StringBuffer where
showsPrec _ s = showString "<stringbuffer("
- . shows (len s) . showString "," . shows (cur s)
+ . shows (cur s)
. showString ")>"
+isValid :: StringBuffer -> Bool
+isValid sb = sizeofByteArray (buf sb) >= cur sb
+
+checkValid :: HasCallStack => StringBuffer -> StringBuffer
+checkValid sb
+ | not (isValid sb) = error "isValid"
+ | otherwise = sb
+
-- -----------------------------------------------------------------------------
-- Creation / Destruction
@@ -102,34 +115,35 @@ instance Show StringBuffer where
-- managed by the garbage collector.
hGetStringBuffer :: FilePath -> IO StringBuffer
hGetStringBuffer fname = do
- h <- openBinaryFile fname ReadMode
- size_i <- hFileSize h
- offset_i <- skipBOM h size_i 0 -- offset is 0 initially
- let size = fromIntegral $ size_i - offset_i
- buf <- mallocForeignPtrArray (size+3)
- withForeignPtr buf $ \ptr -> do
- r <- if size == 0 then return 0 else hGetBuf h ptr size
- hClose h
- if (r /= size)
- then ioError (userError "short read of file")
- else newUTF8StringBuffer buf ptr size
+ h <- openBinaryFile fname ReadMode
+ size_i <- hFileSize h
+ offset_i <- skipBOM h size_i 0 -- offset is 0 initially
+ let size = fromIntegral $ size_i - offset_i
+ buf <- newPinnedMutableByteArray (size+3)
+ r <- if size == 0
+ then return 0
+ else hGetBuf h (unsafeMutableByteArrayContents buf) size
+ hClose h
+ if r /= size
+ then ioError (userError "short read of file")
+ else newUTF8StringBuffer buf size
hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
-hGetStringBufferBlock handle wanted
- = do size_i <- hFileSize handle
- offset_i <- hTell handle >>= skipBOM handle size_i
- let size = min wanted (fromIntegral $ size_i-offset_i)
- buf <- mallocForeignPtrArray (size+3)
- withForeignPtr buf $ \ptr ->
- do r <- if size == 0 then return 0 else hGetBuf handle ptr size
- if r /= size
- then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
- else newUTF8StringBuffer buf ptr size
+hGetStringBufferBlock handle wanted = do
+ size_i <- hFileSize handle
+ offset_i <- hTell handle >>= skipBOM handle size_i
+ let size = min wanted (fromIntegral $ size_i-offset_i)
+ buf <- newPinnedMutableByteArray (size+3)
+ r <- if size == 0
+ then return 0
+ else hGetBuf handle (unsafeMutableByteArrayContents buf) size
+ if r /= size
+ then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
+ else newUTF8StringBuffer buf size
hPutStringBuffer :: Handle -> StringBuffer -> IO ()
-hPutStringBuffer hdl (StringBuffer buf len cur)
- = withForeignPtr (plusForeignPtr buf cur) $ \ptr ->
- hPutBuf hdl ptr len
+hPutStringBuffer hdl (StringBuffer buf cur) = do
+ withByteArrayContents buf $ \ptr -> hPutBuf hdl (ptr `plusPtr` cur) (sizeofByteArray buf)
-- | Skip the byte-order mark if there is one (see #1744 and #6016),
-- and return the new position of the handle in bytes.
@@ -156,39 +170,49 @@ skipBOM h size offset =
where
safeEncoding = mkUTF8 IgnoreCodingFailure
-newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
-newUTF8StringBuffer buf ptr size = do
- pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+-- | @newUTF8StringBuffer buf size@ creates a 'StringBuffer' from a
+-- 'MutableByteArray' of length @size+3@ containing UTF-8 encoded text. A three
+-- byte sentinel will be added to the end of the buffer.
+newUTF8StringBuffer :: MutableByteArray -> Int -> IO StringBuffer
+newUTF8StringBuffer buf size = do
+ ASSERTM(return $ sizeofMutableByteArray buf == (size + 3))
-- sentinels for UTF-8 decoding
- return $ StringBuffer buf size 0
+ writeWord8Array buf (size+0) 0
+ writeWord8Array buf (size+1) 0
+ writeWord8Array buf (size+3) 0
+ buf' <- unsafeFreezeByteArray buf
+ return $ StringBuffer buf' 0
appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
-appendStringBuffers sb1 sb2
- = do newBuf <- mallocForeignPtrArray (size+3)
- withForeignPtr newBuf $ \ptr ->
- withForeignPtr (buf sb1) $ \sb1Ptr ->
- withForeignPtr (buf sb2) $ \sb2Ptr ->
- do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len
- copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len
- pokeArray (ptr `advancePtr` size) [0,0,0]
- return (StringBuffer newBuf size 0)
- where sb1_len = calcLen sb1
- sb2_len = calcLen sb2
- calcLen sb = len sb - cur sb
- size = sb1_len + sb2_len
+appendStringBuffers sb1 sb2 = do
+ dst <- newPinnedMutableByteArray (size+3)
+ copyByteArray (buf sb1) (cur sb1) dst 0 sb1_len
+ copyByteArray (buf sb2) (cur sb2) dst sb1_len sb2_len
+ newUTF8StringBuffer dst size
+ where
+ sb1_len = lengthStringBuffer sb1
+ sb2_len = lengthStringBuffer sb2
+ size = sb1_len + sb2_len
+
+withStringBufferContents :: StringBuffer -> (CStringLen -> IO a) -> IO a
+withStringBufferContents sb@(StringBuffer buf cur) action =
+ withByteArrayContents buf $ \p -> action (p `plusPtr` cur, lengthStringBuffer sb)
+
+byteStringToStringBuffer :: BS.ByteString -> StringBuffer
+byteStringToStringBuffer bs = unsafePerformIO $ do
+ let size = BS.length bs
+ buf <- newPinnedMutableByteArray (size+3)
+ BS.unsafeUseAsCString bs (\p -> copyAddrToMutableByteArray p buf 0 size)
+ newUTF8StringBuffer buf size
-- | Encode a 'String' into a 'StringBuffer' as UTF-8. The resulting buffer
-- is automatically managed by the garbage collector.
stringToStringBuffer :: String -> StringBuffer
-stringToStringBuffer str =
- unsafePerformIO $ do
+stringToStringBuffer str = unsafePerformIO $ do
let size = utf8EncodedLength str
- buf <- mallocForeignPtrArray (size+3)
- withForeignPtr buf $ \ptr -> do
- utf8EncodeString ptr str
- pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
- -- sentinels for UTF-8 decoding
- return (StringBuffer buf size 0)
+ buf <- newPinnedMutableByteArray (size+3)
+ utf8EncodeString (unsafeMutableByteArrayContents buf) str
+ newUTF8StringBuffer buf size
-- -----------------------------------------------------------------------------
-- Grab a character
@@ -200,14 +224,11 @@ stringToStringBuffer str =
-- character cannot be decoded as UTF-8, @\'\\0\'@ is returned.
{-# INLINE nextChar #-}
nextChar :: StringBuffer -> (Char,StringBuffer)
-nextChar (StringBuffer buf len (I# cur#)) =
+nextChar sb@(StringBuffer buf (I# cur#)) =
-- Getting our fingers dirty a little here, but this is performance-critical
- inlinePerformIO $
- withForeignPtr buf $ \(Ptr a#) ->
- case utf8DecodeCharAddr# (a# `plusAddr#` cur#) 0# of
- (# c#, nBytes# #) ->
- let cur' = I# (cur# +# nBytes#) in
- return (C# c#, StringBuffer buf len cur')
+ case utf8DecodeCharByteArray# (getByteArray buf) cur# of
+ (# c#, nBytes# #) ->
+ (C# c#, checkValid $ sb { cur = I# (cur# +# nBytes#) })
-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous
-- to 'Data.List.head'). __Warning:__ The behavior is undefined if the
@@ -217,12 +238,11 @@ currentChar :: StringBuffer -> Char
currentChar = fst . nextChar
prevChar :: StringBuffer -> Char -> Char
-prevChar (StringBuffer _ _ 0) deflt = deflt
-prevChar (StringBuffer buf _ cur) _ =
- inlinePerformIO $
- withForeignPtr buf $ \p -> do
- p' <- utf8PrevChar (p `plusPtr` cur)
- return (fst (utf8DecodeChar p'))
+prevChar (StringBuffer _ 0) deflt = deflt
+prevChar (StringBuffer buf cur) _ =
+ let !(I# p') = utf8PrevChar (getByteArray buf) cur
+ !(# c, _ #) = utf8DecodeCharByteArray# (getByteArray buf) p'
+ in C# c
-- -----------------------------------------------------------------------------
-- Moving
@@ -241,7 +261,7 @@ stepOn s = snd (nextChar s)
offsetBytes :: Int -- ^ @n@, the number of bytes
-> StringBuffer
-> StringBuffer
-offsetBytes i s = s { cur = cur s + i }
+offsetBytes i s = checkValid $ s { cur = cur (checkValid s) + i }
-- | Compute the difference in offset between two 'StringBuffer's that share
-- the same buffer. __Warning:__ The behavior is undefined if the
@@ -249,33 +269,34 @@ offsetBytes i s = s { cur = cur s + i }
byteDiff :: StringBuffer -> StringBuffer -> Int
byteDiff s1 s2 = cur s2 - cur s1
+lengthStringBuffer :: StringBuffer -> Int
+lengthStringBuffer sb = sizeofByteArray (buf sb) - cur sb - 3
+
-- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null').
atEnd :: StringBuffer -> Bool
-atEnd (StringBuffer _ l c) = l == c
+atEnd sb = lengthStringBuffer sb == 0
-- | Computes a 'StringBuffer' which points to the first character of the
-- wanted line. Lines begin at 1.
atLine :: Int -> StringBuffer -> Maybe StringBuffer
-atLine line sb@(StringBuffer buf len _) =
- inlinePerformIO $
- withForeignPtr buf $ \p -> do
- p' <- skipToLine line len p
- if p' == nullPtr
- then return Nothing
- else
- let
- delta = p' `minusPtr` p
- in return $ Just (sb { cur = delta
- , len = len - delta
- })
-
+atLine line sb@(StringBuffer buf _) =
+ inlinePerformIO $ withByteArrayContents buf $ \p -> do
+ p' <- skipToLine line (lengthStringBuffer sb) p
+ if p' == nullPtr
+ then return Nothing
+ else
+ let !delta = p' `minusPtr` p
+ in return $! Just $! checkValid $ sb { cur = delta }
+
+-- | @skipToLine line len op0@ finds the byte offset to the beginning of
+-- the given line number.
skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
skipToLine !line !len !op0 = go 1 op0
where
- !opend = op0 `plusPtr` len
+ !op_end = op0 `plusPtr` len
go !i_line !op
- | op >= opend = pure nullPtr
+ | op >= op_end = pure nullPtr
| i_line == line = pure op
| otherwise = do
w <- peek op :: IO Word8
@@ -300,39 +321,46 @@ lexemeToString :: StringBuffer
-> Int -- ^ @n@, the number of bytes
-> String
lexemeToString _ 0 = ""
-lexemeToString (StringBuffer buf _ cur) bytes =
- utf8DecodeStringLazy buf cur bytes
+lexemeToString sb bytes
+ | lengthStringBuffer sb < bytes = panic "lexemeToString: overflow 1"
+ | not (isValid sb) = panic "lexemeToString: overflow 2"
+lexemeToString (StringBuffer buf (I# cur#)) (I# bytes#) =
+ utf8DecodeByteArrayLazy# (getByteArray buf) cur# bytes#
lexemeToFastString :: StringBuffer
-> Int -- ^ @n@, the number of bytes
-> FastString
lexemeToFastString _ 0 = nilFS
-lexemeToFastString (StringBuffer buf _ cur) len =
- inlinePerformIO $
- withForeignPtr buf $ \ptr ->
- return $! mkFastStringBytes (ptr `plusPtr` cur) len
+lexemeToFastString sb len | len > lengthStringBuffer sb = panic "lexemeToFastString"
+lexemeToFastString (StringBuffer buf cur) len =
+ inlinePerformIO $
+ withByteArrayContents buf $ \ptr ->
+ return $! mkFastStringBytes (ptr `plusPtr` cur) len
-- | Return the previous @n@ characters (or fewer if we are less than @n@
-- characters into the buffer.
decodePrevNChars :: Int -> StringBuffer -> String
-decodePrevNChars n (StringBuffer buf _ cur) =
- inlinePerformIO $ withForeignPtr buf $ \p0 ->
- go p0 n "" (p0 `plusPtr` (cur - 1))
+decodePrevNChars n (StringBuffer buf0 cur) =
+ go (getByteArray buf0) (min n (cur - 1)) "" (cur - 1)
where
- go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
- go buf0 n acc p | n == 0 || buf0 >= p = return acc
- go buf0 n acc p = do
- p' <- utf8PrevChar p
- let (c,_) = utf8DecodeChar p'
- go buf0 (n - 1) (c:acc) p'
+ go :: ByteArray# -> Int -> String -> Int -> String
+ go buf n acc ofs
+ | n == 0 = acc
+ | otherwise =
+ let !ofs'@(I# ofs'#) = utf8PrevChar buf ofs
+ !(# c,_ #) = utf8DecodeCharByteArray# buf ofs'#
+ in go buf (n - 1) (C# c:acc) ofs'
-- -----------------------------------------------------------------------------
-- Parsing integer strings in various bases
parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
-parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
- = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
- go i x | i == len = x
- | otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of
- '_' -> go (i + 1) x -- skip "_" (#14473)
- char -> go (i + 1) (x * radix + toInteger (char_to_int char))
- in go 0 0
+parseUnsignedInteger (StringBuffer buf (I# cur)) (I# len) radix char_to_int
+ = go (len +# cur) cur 0
+ where
+ go :: Int# -> Int# -> Integer -> Integer
+ go end i !acc
+ | isTrue# (i ==# end) = acc
+ | otherwise =
+ case utf8DecodeCharByteArray# (getByteArray buf) i of
+ (# '_'#, _ #) -> go end (i +# 1#) acc -- skip "_" (#14473)
+ (# char, _ #) -> go end (i +# 1#) (acc * radix + toInteger (char_to_int (C# char)))
diff --git a/compiler/GHC/Parser/Header.hs b/compiler/GHC/Parser/Header.hs
index fdf854ad8e..8532cae63f 100644
--- a/compiler/GHC/Parser/Header.hs
+++ b/compiler/GHC/Parser/Header.hs
@@ -214,7 +214,7 @@ lazyGetToks popts filename handle = do
-- counteracts the quadratic slowdown we otherwise get for very
-- large module names (#5981)
nextbuf <- hGetStringBufferBlock handle new_size
- if (len nextbuf == 0) then lazyLexBuf handle state True new_size else do
+ if lengthStringBuffer nextbuf == 0 then lazyLexBuf handle state True new_size else do
newbuf <- appendStringBuffers (buffer state) nextbuf
unsafeInterleaveIO $ lazyLexBuf handle state{buffer=newbuf} False new_size
diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x
index 4d7b1ab157..2bf254b7c6 100644
--- a/compiler/GHC/Parser/Lexer.x
+++ b/compiler/GHC/Parser/Lexer.x
@@ -1894,7 +1894,7 @@ lex_string_tok span buf _len = do
ITprimstring _ bs -> ITprimstring (SourceText src) bs
ITstring _ s -> ITstring (SourceText src) s
_ -> panic "lex_string_tok"
- src = lexemeToString buf (cur bufEnd - cur buf)
+ src = lexemeToString buf (lengthStringBuffer buf - lengthStringBuffer bufEnd)
return (L (mkPsSpan (psSpanStart span) end) tok')
lex_string :: String -> P Token
@@ -1994,7 +1994,7 @@ finish_char_tok buf loc ch -- We've already seen the closing quote
-- Just need to check for trailing #
= do magicHash <- getBit MagicHashBit
i@(AI end bufEnd) <- getInput
- let src = lexemeToString buf (cur bufEnd - cur buf)
+ let src = lexemeToString buf (lengthStringBuffer buf - lengthStringBuffer bufEnd)
if magicHash then do
case alexGetChar' i of
Just ('#',i@(AI end _)) -> do
diff --git a/compiler/GHC/Types/Error.hs b/compiler/GHC/Types/Error.hs
index cb624c6c99..1510c42160 100644
--- a/compiler/GHC/Types/Error.hs
+++ b/compiler/GHC/Types/Error.hs
@@ -30,7 +30,7 @@ import GHC.Utils.Outputable as Outputable
import qualified GHC.Utils.Ppr.Colour as Col
import GHC.Types.SrcLoc as SrcLoc
import GHC.Data.FastString (unpackFS)
-import GHC.Data.StringBuffer (atLine, hGetStringBuffer, len, lexemeToString)
+import GHC.Data.StringBuffer (atLine, hGetStringBuffer, lengthStringBuffer, lexemeToString)
import GHC.Utils.Json
import System.IO.Error ( catchIOError )
@@ -175,7 +175,7 @@ getCaretDiagnostic severity (RealSrcSpan span _) =
content <- hGetStringBuffer fn
case atLine i content of
Just at_line -> pure $
- case lines (fix <$> lexemeToString at_line (len at_line)) of
+ case lines (fix <$> lexemeToString at_line (lengthStringBuffer at_line)) of
srcLine : _ -> Just srcLine
_ -> Nothing
_ -> pure Nothing
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 1195fa6cbc..36e18d1865 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -356,6 +356,7 @@ Library
GHC.Data.Bag
GHC.Data.Bitmap
GHC.Data.BooleanFormula
+ GHC.Data.ByteArray
GHC.Data.EnumSet
GHC.Data.FastMutInt
GHC.Data.FastString
diff --git a/libraries/ghc-boot/GHC/Utils/Encoding.hs b/libraries/ghc-boot/GHC/Utils/Encoding.hs
index 0f84be189b..38dfc69fbb 100644
--- a/libraries/ghc-boot/GHC/Utils/Encoding.hs
+++ b/libraries/ghc-boot/GHC/Utils/Encoding.hs
@@ -17,12 +17,15 @@
module GHC.Utils.Encoding (
-- * UTF-8
utf8DecodeCharAddr#,
+ utf8DecodeCharByteArray#,
utf8PrevChar,
utf8CharStart,
utf8DecodeChar,
utf8DecodeByteString,
+ utf8DecodeByteArray,
utf8DecodeShortByteString,
utf8CompareShortByteString,
+ utf8DecodeByteArrayLazy#,
utf8DecodeStringLazy,
utf8EncodeChar,
utf8EncodeString,
@@ -53,6 +56,7 @@ import Data.ByteString (ByteString)
import qualified Data.ByteString.Internal as BS
import Data.ByteString.Short.Internal (ShortByteString(..))
+import GHC.Word
import GHC.Exts
-- -----------------------------------------------------------------------------
@@ -131,15 +135,20 @@ utf8DecodeChar !(Ptr a#) =
-- the start of the current character is, given any position in a
-- stream. This function finds the start of the previous character,
-- assuming there *is* a previous character.
-utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8)
-utf8PrevChar p = utf8CharStart (p `plusPtr` (-1))
+utf8PrevChar :: ByteArray# -> Int -> Int
+utf8PrevChar arr ofs = utf8CharStart arr (ofs - 1)
-utf8CharStart :: Ptr Word8 -> IO (Ptr Word8)
-utf8CharStart p = go p
- where go p = do w <- peek p
- if w >= 0x80 && w < 0xC0
- then go (p `plusPtr` (-1))
- else return p
+utf8CharStart :: ByteArray# -> Int -> Int
+utf8CharStart = go
+ where
+ go arr ofs@(I# ofs#)
+ | True
+ , ofs < 0 || ofs > I# (sizeofByteArray# arr)
+ = error "utf8CharStart: overflow"
+ | w >= 0x80 && w < 0xC0 = go arr (ofs - 1)
+ | otherwise = ofs
+ where
+ w = W8# (indexWord8Array# arr ofs#)
{-# INLINE utf8DecodeLazy# #-}
utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
@@ -158,6 +167,12 @@ utf8DecodeByteString :: ByteString -> [Char]
utf8DecodeByteString (BS.PS fptr offset len)
= utf8DecodeStringLazy fptr offset len
+utf8DecodeByteArrayLazy# :: ByteArray# -> Int# -> Int# -> [Char]
+utf8DecodeByteArrayLazy# a# offset# len#
+ = unsafeDupablePerformIO $
+ let decodeChar i = utf8DecodeCharByteArray# a# (i +# offset#)
+ in utf8DecodeLazy# (return ()) decodeChar len#
+
utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
utf8DecodeStringLazy fp offset (I# len#)
= unsafeDupablePerformIO $ do
@@ -200,12 +215,15 @@ utf8CompareShortByteString (SBS a1) (SBS a2) = go 0# 0#
| isTrue# (b1_1 `ltWord#` b2_1) -> LT
| otherwise -> go (off1 +# 1#) (off2 +# 1#)
-utf8DecodeShortByteString :: ShortByteString -> [Char]
-utf8DecodeShortByteString (SBS ba#)
+utf8DecodeByteArray :: ByteArray# -> [Char]
+utf8DecodeByteArray ba#
= unsafeDupablePerformIO $
let len# = sizeofByteArray# ba# in
utf8DecodeLazy# (return ()) (utf8DecodeCharByteArray# ba#) len#
+utf8DecodeShortByteString :: ShortByteString -> [Char]
+utf8DecodeShortByteString (SBS ba#) = utf8DecodeByteArray ba#
+
countUTF8Chars :: ShortByteString -> IO Int
countUTF8Chars (SBS ba) = go 0# 0#
where
diff --git a/testsuite/tests/parser/should_run/CountParserDeps.stdout b/testsuite/tests/parser/should_run/CountParserDeps.stdout
index 5fbdf896ee..889f15eef9 100644
--- a/testsuite/tests/parser/should_run/CountParserDeps.stdout
+++ b/testsuite/tests/parser/should_run/CountParserDeps.stdout
@@ -1,4 +1,4 @@
-Found 235 parser module dependencies
+Found 236 parser module dependencies
GHC.Builtin.Names
GHC.Builtin.PrimOps
GHC.Builtin.Types
@@ -62,6 +62,7 @@ GHC.Core.Utils
GHC.CoreToIface
GHC.Data.Bag
GHC.Data.BooleanFormula
+GHC.Data.ByteArray
GHC.Data.EnumSet
GHC.Data.FastMutInt
GHC.Data.FastString
diff --git a/utils/haddock b/utils/haddock
-Subproject 25fa8fde84701c010fa466c2648f8f6d10265e8
+Subproject 8850e481da7c65cd023af9b3a37bad02edfb47e