StringBuffer: Rid it of ForeignPtrswip/stringbuffer

Bumps haddock submodule.
author: Ben Gamari <ben@smart-cactus.org> 2020-10-23 14:16:58 -0400
committer: Ben Gamari <ben@smart-cactus.org> 2020-11-29 18:26:15 -0500
commit: b3027eb0a1c1b3cedbb54b0fc8d0924283a7cd25 (patch)
tree: 428241e11a6a5d15592964d049344b85d8160cf4
parent: f40b662b9ea555bab6e9729f4165eaca7021d322 (diff)
download: haskell-wip/stringbuffer.tar.gz
9 files changed, 181 insertions, 128 deletions
diff --git a/compiler/GHC/Data/ByteArray.hs b/compiler/GHC/Data/ByteArray.hs
index 5121ed4a57..19591092fa 100644
--- a/compiler/GHC/Data/ByteArray.hs
+++ b/compiler/GHC/Data/ByteArray.hs
@@ -14,6 +14,7 @@ module GHC.Data.ByteArray
   , MutableByteArray
   , getMutableByteArray
   , unsafeMutableByteArrayContents
+  , sizeofMutableByteArray
   , newMutableByteArray
   , newPinnedMutableByteArray
   , copyByteArray
@@ -92,6 +93,10 @@ newPinnedMutableByteArray (I# size) = IO $ \s ->
   case newPinnedByteArray# size s of
     (# s', mba #) -> (# s', MutableByteArray mba #)
 
+sizeofMutableByteArray :: MutableByteArray -> Int
+sizeofMutableByteArray (MutableByteArray mba) =
+  I# (sizeofMutableByteArray# mba)
+
 copyByteArray
   :: ByteArray          -- ^ source
   -> Int                -- ^ source offset
diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs
index 42ab89f8cc..9873975396 100644
--- a/compiler/GHC/Data/StringBuffer.hs
+++ b/compiler/GHC/Data/StringBuffer.hs
@@ -17,8 +17,7 @@ Buffers for scanning string input stored in external arrays.
 
 module GHC.Data.StringBuffer
        (
-        StringBuffer(..),
-        -- non-abstract for vs\/HaskellService
+        StringBuffer,
 
          -- * Creation\/destruction
         hGetStringBuffer,
@@ -26,8 +25,11 @@ module GHC.Data.StringBuffer
         hPutStringBuffer,
         appendStringBuffers,
         stringToStringBuffer,
+        byteStringToStringBuffer,
+        withStringBufferContents,
 
         -- * Inspection
+        lengthStringBuffer,
         nextChar,
         currentChar,
         prevChar,
@@ -51,13 +53,18 @@ module GHC.Data.StringBuffer
 #include "HsVersions.h"
 
 import GHC.Prelude
+import GHC.Stack
 
 import GHC.Utils.Encoding
 import GHC.Data.FastString
+import GHC.Data.ByteArray
 import GHC.Utils.IO.Unsafe
 import GHC.Utils.Panic.Plain
 import GHC.Utils.Misc
+import Foreign.C.String
 
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Unsafe as BS
 import Data.Maybe
 import Control.Exception
 import System.IO
@@ -65,6 +72,7 @@ import System.IO.Unsafe         ( unsafePerformIO )
 import GHC.IO.Encoding.UTF8     ( mkUTF8 )
 import GHC.IO.Encoding.Failure  ( CodingFailureMode(IgnoreCodingFailure) )
 
+import GHC.Word
 import GHC.Exts
 
 import Foreign
@@ -72,18 +80,15 @@ import Foreign
 -- -----------------------------------------------------------------------------
 -- The StringBuffer type
 
--- |A StringBuffer is an internal pointer to a sized chunk of bytes.
+-- | A 'StringBuffer' is an internal pointer to a sized chunk of bytes.
 -- The bytes are intended to be *immutable*.  There are pure
--- operations to read the contents of a StringBuffer.
---
--- A StringBuffer may have a finalizer, depending on how it was
--- obtained.
+-- operations to read the contents of a 'StringBuffer'.
 --
 data StringBuffer
  = StringBuffer {
-     buf :: {-# UNPACK #-} !(ForeignPtr Word8),
-     len :: {-# UNPACK #-} !Int,        -- length
-     cur :: {-# UNPACK #-} !Int         -- current pos
+     buf :: {-# UNPACK #-} !ByteArray,
+     cur :: {-# UNPACK #-} !Int
+     -- ^ Current position in bytes.
   }
   -- The buffer is assumed to be UTF-8 encoded, and furthermore
   -- we add three @\'\\0\'@ bytes to the end as sentinels so that the
@@ -92,9 +97,17 @@ data StringBuffer
 
 instance Show StringBuffer where
         showsPrec _ s = showString "<stringbuffer("
-                      . shows (len s) . showString "," . shows (cur s)
+                      . shows (cur s)
                       . showString ")>"
 
+isValid :: StringBuffer -> Bool
+isValid sb = sizeofByteArray (buf sb) >= cur sb
+
+checkValid :: HasCallStack => StringBuffer -> StringBuffer
+checkValid sb
+  | not (isValid sb) = error "isValid"
+  | otherwise = sb
+
 -- -----------------------------------------------------------------------------
 -- Creation / Destruction
 
@@ -102,34 +115,35 @@ instance Show StringBuffer where
 -- managed by the garbage collector.
 hGetStringBuffer :: FilePath -> IO StringBuffer
 hGetStringBuffer fname = do
-   h <- openBinaryFile fname ReadMode
-   size_i <- hFileSize h
-   offset_i <- skipBOM h size_i 0  -- offset is 0 initially
-   let size = fromIntegral $ size_i - offset_i
-   buf <- mallocForeignPtrArray (size+3)
-   withForeignPtr buf $ \ptr -> do
-     r <- if size == 0 then return 0 else hGetBuf h ptr size
-     hClose h
-     if (r /= size)
-        then ioError (userError "short read of file")
-        else newUTF8StringBuffer buf ptr size
+  h <- openBinaryFile fname ReadMode
+  size_i <- hFileSize h
+  offset_i <- skipBOM h size_i 0  -- offset is 0 initially
+  let size = fromIntegral $ size_i - offset_i
+  buf <- newPinnedMutableByteArray (size+3)
+  r <- if size == 0
+          then return 0
+          else hGetBuf h (unsafeMutableByteArrayContents buf) size
+  hClose h
+  if r /= size
+    then ioError (userError "short read of file")
+    else newUTF8StringBuffer buf size
 
 hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
-hGetStringBufferBlock handle wanted
-    = do size_i <- hFileSize handle
-         offset_i <- hTell handle >>= skipBOM handle size_i
-         let size = min wanted (fromIntegral $ size_i-offset_i)
-         buf <- mallocForeignPtrArray (size+3)
-         withForeignPtr buf $ \ptr ->
-             do r <- if size == 0 then return 0 else hGetBuf handle ptr size
-                if r /= size
-                   then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
-                   else newUTF8StringBuffer buf ptr size
+hGetStringBufferBlock handle wanted = do
+  size_i <- hFileSize handle
+  offset_i <- hTell handle >>= skipBOM handle size_i
+  let size = min wanted (fromIntegral $ size_i-offset_i)
+  buf <- newPinnedMutableByteArray (size+3)
+  r <- if size == 0
+          then return 0
+          else hGetBuf handle (unsafeMutableByteArrayContents buf) size
+  if r /= size
+    then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
+    else newUTF8StringBuffer buf size
 
 hPutStringBuffer :: Handle -> StringBuffer -> IO ()
-hPutStringBuffer hdl (StringBuffer buf len cur)
-    = withForeignPtr (plusForeignPtr buf cur) $ \ptr ->
-          hPutBuf hdl ptr len
+hPutStringBuffer hdl (StringBuffer buf cur) = do
+  withByteArrayContents buf $ \ptr -> hPutBuf hdl (ptr `plusPtr` cur) (sizeofByteArray buf)
 
 -- | Skip the byte-order mark if there is one (see #1744 and #6016),
 -- and return the new position of the handle in bytes.
@@ -156,39 +170,49 @@ skipBOM h size offset =
   where
     safeEncoding = mkUTF8 IgnoreCodingFailure
 
-newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
-newUTF8StringBuffer buf ptr size = do
-  pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+-- | @newUTF8StringBuffer buf size@ creates a 'StringBuffer' from a
+-- 'MutableByteArray' of length @size+3@ containing UTF-8 encoded text. A three
+-- byte sentinel will be added to the end of the buffer.
+newUTF8StringBuffer :: MutableByteArray -> Int -> IO StringBuffer
+newUTF8StringBuffer buf size = do
+  ASSERTM(return $ sizeofMutableByteArray buf == (size + 3))
   -- sentinels for UTF-8 decoding
-  return $ StringBuffer buf size 0
+  writeWord8Array buf (size+0) 0
+  writeWord8Array buf (size+1) 0
+  writeWord8Array buf (size+3) 0
+  buf' <- unsafeFreezeByteArray buf
+  return $ StringBuffer buf' 0
 
 appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
-appendStringBuffers sb1 sb2
-    = do newBuf <- mallocForeignPtrArray (size+3)
-         withForeignPtr newBuf $ \ptr ->
-          withForeignPtr (buf sb1) $ \sb1Ptr ->
-           withForeignPtr (buf sb2) $ \sb2Ptr ->
-             do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len
-                copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len
-                pokeArray (ptr `advancePtr` size) [0,0,0]
-                return (StringBuffer newBuf size 0)
-    where sb1_len = calcLen sb1
-          sb2_len = calcLen sb2
-          calcLen sb = len sb - cur sb
-          size =  sb1_len + sb2_len
+appendStringBuffers sb1 sb2 = do
+  dst <- newPinnedMutableByteArray (size+3)
+  copyByteArray (buf sb1) (cur sb1) dst 0 sb1_len
+  copyByteArray (buf sb2) (cur sb2) dst sb1_len sb2_len
+  newUTF8StringBuffer dst size
+  where
+    sb1_len = lengthStringBuffer sb1
+    sb2_len = lengthStringBuffer sb2
+    size =  sb1_len + sb2_len
+
+withStringBufferContents :: StringBuffer -> (CStringLen -> IO a) -> IO a
+withStringBufferContents sb@(StringBuffer buf cur) action =
+  withByteArrayContents buf $ \p -> action (p `plusPtr` cur, lengthStringBuffer sb)
+
+byteStringToStringBuffer :: BS.ByteString -> StringBuffer
+byteStringToStringBuffer bs = unsafePerformIO $ do
+  let size = BS.length bs
+  buf <- newPinnedMutableByteArray (size+3)
+  BS.unsafeUseAsCString bs (\p -> copyAddrToMutableByteArray p buf 0 size)
+  newUTF8StringBuffer buf size
 
 -- | Encode a 'String' into a 'StringBuffer' as UTF-8.  The resulting buffer
 -- is automatically managed by the garbage collector.
 stringToStringBuffer :: String -> StringBuffer
-stringToStringBuffer str =
- unsafePerformIO $ do
+stringToStringBuffer str = unsafePerformIO $ do
   let size = utf8EncodedLength str
-  buf <- mallocForeignPtrArray (size+3)
-  withForeignPtr buf $ \ptr -> do
-    utf8EncodeString ptr str
-    pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
-    -- sentinels for UTF-8 decoding
-  return (StringBuffer buf size 0)
+  buf <- newPinnedMutableByteArray (size+3)
+  utf8EncodeString (unsafeMutableByteArrayContents buf) str
+  newUTF8StringBuffer buf size
 
 -- -----------------------------------------------------------------------------
 -- Grab a character
@@ -200,14 +224,11 @@ stringToStringBuffer str =
 -- character cannot be decoded as UTF-8, @\'\\0\'@ is returned.
 {-# INLINE nextChar #-}
 nextChar :: StringBuffer -> (Char,StringBuffer)
-nextChar (StringBuffer buf len (I# cur#)) =
+nextChar sb@(StringBuffer buf (I# cur#)) =
   -- Getting our fingers dirty a little here, but this is performance-critical
-  inlinePerformIO $
-    withForeignPtr buf $ \(Ptr a#) ->
-        case utf8DecodeCharAddr# (a# `plusAddr#` cur#) 0# of
-          (# c#, nBytes# #) ->
-             let cur' = I# (cur# +# nBytes#) in
-             return (C# c#, StringBuffer buf len cur')
+    case utf8DecodeCharByteArray# (getByteArray buf) cur# of
+      (# c#, nBytes# #) ->
+        (C# c#, checkValid $ sb { cur = I# (cur# +# nBytes#) })
 
 -- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous
 -- to 'Data.List.head').  __Warning:__ The behavior is undefined if the
@@ -217,12 +238,11 @@ currentChar :: StringBuffer -> Char
 currentChar = fst . nextChar
 
 prevChar :: StringBuffer -> Char -> Char
-prevChar (StringBuffer _   _   0)   deflt = deflt
-prevChar (StringBuffer buf _   cur) _     =
-  inlinePerformIO $
-    withForeignPtr buf $ \p -> do
-      p' <- utf8PrevChar (p `plusPtr` cur)
-      return (fst (utf8DecodeChar p'))
+prevChar (StringBuffer _   0)   deflt = deflt
+prevChar (StringBuffer buf cur) _     =
+    let !(I# p') = utf8PrevChar (getByteArray buf) cur
+        !(# c, _ #) = utf8DecodeCharByteArray# (getByteArray buf) p'
+    in C# c
 
 -- -----------------------------------------------------------------------------
 -- Moving
@@ -241,7 +261,7 @@ stepOn s = snd (nextChar s)
 offsetBytes :: Int                      -- ^ @n@, the number of bytes
             -> StringBuffer
             -> StringBuffer
-offsetBytes i s = s { cur = cur s + i }
+offsetBytes i s = checkValid $ s { cur = cur (checkValid s) + i }
 
 -- | Compute the difference in offset between two 'StringBuffer's that share
 -- the same buffer.  __Warning:__ The behavior is undefined if the
@@ -249,33 +269,34 @@ offsetBytes i s = s { cur = cur s + i }
 byteDiff :: StringBuffer -> StringBuffer -> Int
 byteDiff s1 s2 = cur s2 - cur s1
 
+lengthStringBuffer :: StringBuffer -> Int
+lengthStringBuffer sb = sizeofByteArray (buf sb) - cur sb - 3
+
 -- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null').
 atEnd :: StringBuffer -> Bool
-atEnd (StringBuffer _ l c) = l == c
+atEnd sb = lengthStringBuffer sb == 0
 
 -- | Computes a 'StringBuffer' which points to the first character of the
 -- wanted line. Lines begin at 1.
 atLine :: Int -> StringBuffer -> Maybe StringBuffer
-atLine line sb@(StringBuffer buf len _) =
-  inlinePerformIO $
-    withForeignPtr buf $ \p -> do
-      p' <- skipToLine line len p
-      if p' == nullPtr
-        then return Nothing
-        else
-          let
-            delta = p' `minusPtr` p
-          in return $ Just (sb { cur = delta
-                               , len = len - delta
-                               })
-
+atLine line sb@(StringBuffer buf _) =
+  inlinePerformIO $ withByteArrayContents buf $ \p -> do
+    p' <- skipToLine line (lengthStringBuffer sb) p
+    if p' == nullPtr
+      then return Nothing
+      else
+        let !delta = p' `minusPtr` p
+        in return $! Just $! checkValid $ sb { cur = delta }
+
+-- | @skipToLine line len op0@ finds the byte offset to the beginning of
+-- the given line number.
 skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
 skipToLine !line !len !op0 = go 1 op0
   where
-    !opend = op0 `plusPtr` len
+    !op_end = op0 `plusPtr` len
 
     go !i_line !op
-      | op >= opend    = pure nullPtr
+      | op >= op_end   = pure nullPtr
       | i_line == line = pure op
       | otherwise      = do
           w <- peek op :: IO Word8
@@ -300,39 +321,46 @@ lexemeToString :: StringBuffer
                -> Int                   -- ^ @n@, the number of bytes
                -> String
 lexemeToString _ 0 = ""
-lexemeToString (StringBuffer buf _ cur) bytes =
-  utf8DecodeStringLazy buf cur bytes
+lexemeToString sb bytes
+  | lengthStringBuffer sb < bytes = panic "lexemeToString: overflow 1"
+  | not (isValid sb)  = panic "lexemeToString: overflow 2"
+lexemeToString (StringBuffer buf (I# cur#)) (I# bytes#) =
+  utf8DecodeByteArrayLazy# (getByteArray buf) cur# bytes#
 
 lexemeToFastString :: StringBuffer
                    -> Int               -- ^ @n@, the number of bytes
                    -> FastString
 lexemeToFastString _ 0 = nilFS
-lexemeToFastString (StringBuffer buf _ cur) len =
-   inlinePerformIO $
-     withForeignPtr buf $ \ptr ->
-       return $! mkFastStringBytes (ptr `plusPtr` cur) len
+lexemeToFastString sb len | len > lengthStringBuffer sb = panic "lexemeToFastString"
+lexemeToFastString (StringBuffer buf cur) len =
+  inlinePerformIO $
+    withByteArrayContents buf $ \ptr ->
+      return $! mkFastStringBytes (ptr `plusPtr` cur) len
 
 -- | Return the previous @n@ characters (or fewer if we are less than @n@
 -- characters into the buffer.
 decodePrevNChars :: Int -> StringBuffer -> String
-decodePrevNChars n (StringBuffer buf _ cur) =
-    inlinePerformIO $ withForeignPtr buf $ \p0 ->
-      go p0 n "" (p0 `plusPtr` (cur - 1))
+decodePrevNChars n (StringBuffer buf0 cur) =
+    go (getByteArray buf0) (min n (cur - 1)) "" (cur - 1)
   where
-    go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
-    go buf0 n acc p | n == 0 || buf0 >= p = return acc
-    go buf0 n acc p = do
-        p' <- utf8PrevChar p
-        let (c,_) = utf8DecodeChar p'
-        go buf0 (n - 1) (c:acc) p'
+    go :: ByteArray# -> Int -> String -> Int -> String
+    go buf n acc ofs
+      | n == 0 = acc
+      | otherwise =
+          let !ofs'@(I# ofs'#) = utf8PrevChar buf ofs
+              !(# c,_ #) = utf8DecodeCharByteArray# buf ofs'#
+          in go buf (n - 1) (C# c:acc) ofs'
 
 -- -----------------------------------------------------------------------------
 -- Parsing integer strings in various bases
 parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
-parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
-  = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
-    go i x | i == len  = x
-           | otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of
-               '_'  -> go (i + 1) x    -- skip "_" (#14473)
-               char -> go (i + 1) (x * radix + toInteger (char_to_int char))
-  in go 0 0
+parseUnsignedInteger (StringBuffer buf (I# cur)) (I# len) radix char_to_int
+  = go (len +# cur) cur 0
+  where
+    go :: Int# -> Int# -> Integer -> Integer
+    go end i !acc
+      | isTrue# (i ==# end) = acc
+      | otherwise =
+        case utf8DecodeCharByteArray# (getByteArray buf) i of
+          (# '_'#, _ #) -> go end (i +# 1#) acc    -- skip "_" (#14473)
+          (# char, _ #) -> go end (i +# 1#) (acc * radix + toInteger (char_to_int (C# char)))
diff --git a/compiler/GHC/Parser/Header.hs b/compiler/GHC/Parser/Header.hs
index fdf854ad8e..8532cae63f 100644
--- a/compiler/GHC/Parser/Header.hs
+++ b/compiler/GHC/Parser/Header.hs
@@ -214,7 +214,7 @@ lazyGetToks popts filename handle = do
        -- counteracts the quadratic slowdown we otherwise get for very
        -- large module names (#5981)
      nextbuf <- hGetStringBufferBlock handle new_size
-     if (len nextbuf == 0) then lazyLexBuf handle state True new_size else do
+     if lengthStringBuffer nextbuf == 0 then lazyLexBuf handle state True new_size else do
        newbuf <- appendStringBuffers (buffer state) nextbuf
        unsafeInterleaveIO $ lazyLexBuf handle state{buffer=newbuf} False new_size
 
diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x
index 4d7b1ab157..2bf254b7c6 100644
--- a/compiler/GHC/Parser/Lexer.x
+++ b/compiler/GHC/Parser/Lexer.x
@@ -1894,7 +1894,7 @@ lex_string_tok span buf _len = do
             ITprimstring _ bs -> ITprimstring (SourceText src) bs
             ITstring _ s -> ITstring (SourceText src) s
             _ -> panic "lex_string_tok"
-    src = lexemeToString buf (cur bufEnd - cur buf)
+    src = lexemeToString buf (lengthStringBuffer buf - lengthStringBuffer bufEnd)
   return (L (mkPsSpan (psSpanStart span) end) tok')
 
 lex_string :: String -> P Token
@@ -1994,7 +1994,7 @@ finish_char_tok buf loc ch  -- We've already seen the closing quote
                         -- Just need to check for trailing #
   = do  magicHash <- getBit MagicHashBit
         i@(AI end bufEnd) <- getInput
-        let src = lexemeToString buf (cur bufEnd - cur buf)
+        let src = lexemeToString buf (lengthStringBuffer buf - lengthStringBuffer bufEnd)
         if magicHash then do
             case alexGetChar' i of
               Just ('#',i@(AI end _)) -> do
diff --git a/compiler/GHC/Types/Error.hs b/compiler/GHC/Types/Error.hs
index cb624c6c99..1510c42160 100644
--- a/compiler/GHC/Types/Error.hs
+++ b/compiler/GHC/Types/Error.hs
@@ -30,7 +30,7 @@ import GHC.Utils.Outputable as Outputable
 import qualified GHC.Utils.Ppr.Colour as Col
 import GHC.Types.SrcLoc as SrcLoc
 import GHC.Data.FastString (unpackFS)
-import GHC.Data.StringBuffer (atLine, hGetStringBuffer, len, lexemeToString)
+import GHC.Data.StringBuffer (atLine, hGetStringBuffer, lengthStringBuffer, lexemeToString)
 import GHC.Utils.Json
 
 import System.IO.Error  ( catchIOError )
@@ -175,7 +175,7 @@ getCaretDiagnostic severity (RealSrcSpan span _) =
       content <- hGetStringBuffer fn
       case atLine i content of
         Just at_line -> pure $
-          case lines (fix <$> lexemeToString at_line (len at_line)) of
+          case lines (fix <$> lexemeToString at_line (lengthStringBuffer at_line)) of
             srcLine : _ -> Just srcLine
             _           -> Nothing
         _ -> pure Nothing
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 1195fa6cbc..36e18d1865 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -356,6 +356,7 @@ Library
         GHC.Data.Bag
         GHC.Data.Bitmap
         GHC.Data.BooleanFormula
+        GHC.Data.ByteArray
         GHC.Data.EnumSet
         GHC.Data.FastMutInt
         GHC.Data.FastString
diff --git a/libraries/ghc-boot/GHC/Utils/Encoding.hs b/libraries/ghc-boot/GHC/Utils/Encoding.hs
index 0f84be189b..38dfc69fbb 100644
--- a/libraries/ghc-boot/GHC/Utils/Encoding.hs
+++ b/libraries/ghc-boot/GHC/Utils/Encoding.hs
@@ -17,12 +17,15 @@
 module GHC.Utils.Encoding (
         -- * UTF-8
         utf8DecodeCharAddr#,
+        utf8DecodeCharByteArray#,
         utf8PrevChar,
         utf8CharStart,
         utf8DecodeChar,
         utf8DecodeByteString,
+        utf8DecodeByteArray,
         utf8DecodeShortByteString,
         utf8CompareShortByteString,
+        utf8DecodeByteArrayLazy#,
         utf8DecodeStringLazy,
         utf8EncodeChar,
         utf8EncodeString,
@@ -53,6 +56,7 @@ import Data.ByteString (ByteString)
 import qualified Data.ByteString.Internal as BS
 import Data.ByteString.Short.Internal (ShortByteString(..))
 
+import GHC.Word
 import GHC.Exts
 
 -- -----------------------------------------------------------------------------
@@ -131,15 +135,20 @@ utf8DecodeChar !(Ptr a#) =
 -- the start of the current character is, given any position in a
 -- stream.  This function finds the start of the previous character,
 -- assuming there *is* a previous character.
-utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8)
-utf8PrevChar p = utf8CharStart (p `plusPtr` (-1))
+utf8PrevChar :: ByteArray# -> Int -> Int
+utf8PrevChar arr ofs = utf8CharStart arr (ofs - 1)
 
-utf8CharStart :: Ptr Word8 -> IO (Ptr Word8)
-utf8CharStart p = go p
- where go p = do w <- peek p
-                 if w >= 0x80 && w < 0xC0
-                        then go (p `plusPtr` (-1))
-                        else return p
+utf8CharStart :: ByteArray# -> Int -> Int
+utf8CharStart = go
+  where
+    go arr ofs@(I# ofs#)
+      | True
+      , ofs < 0 || ofs > I# (sizeofByteArray# arr)
+      = error "utf8CharStart: overflow"
+      | w >= 0x80 && w < 0xC0 = go arr (ofs - 1)
+      | otherwise             = ofs
+      where
+        w = W8# (indexWord8Array# arr ofs#)
 
 {-# INLINE utf8DecodeLazy# #-}
 utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
@@ -158,6 +167,12 @@ utf8DecodeByteString :: ByteString -> [Char]
 utf8DecodeByteString (BS.PS fptr offset len)
   = utf8DecodeStringLazy fptr offset len
 
+utf8DecodeByteArrayLazy# :: ByteArray# -> Int# -> Int# -> [Char]
+utf8DecodeByteArrayLazy# a# offset# len#
+  = unsafeDupablePerformIO $
+      let decodeChar i = utf8DecodeCharByteArray# a# (i +# offset#)
+      in utf8DecodeLazy# (return ()) decodeChar len#
+
 utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
 utf8DecodeStringLazy fp offset (I# len#)
   = unsafeDupablePerformIO $ do
@@ -200,12 +215,15 @@ utf8CompareShortByteString (SBS a1) (SBS a2) = go 0# 0#
                          | isTrue# (b1_1 `ltWord#` b2_1) -> LT
                          | otherwise                     -> go (off1 +# 1#) (off2 +# 1#)
 
-utf8DecodeShortByteString :: ShortByteString -> [Char]
-utf8DecodeShortByteString (SBS ba#)
+utf8DecodeByteArray :: ByteArray# -> [Char]
+utf8DecodeByteArray ba#
   = unsafeDupablePerformIO $
       let len# = sizeofByteArray# ba# in
       utf8DecodeLazy# (return ()) (utf8DecodeCharByteArray# ba#) len#
 
+utf8DecodeShortByteString :: ShortByteString -> [Char]
+utf8DecodeShortByteString (SBS ba#) = utf8DecodeByteArray ba#
+
 countUTF8Chars :: ShortByteString -> IO Int
 countUTF8Chars (SBS ba) = go 0# 0#
   where
diff --git a/testsuite/tests/parser/should_run/CountParserDeps.stdout b/testsuite/tests/parser/should_run/CountParserDeps.stdout
index 5fbdf896ee..889f15eef9 100644
--- a/testsuite/tests/parser/should_run/CountParserDeps.stdout
+++ b/testsuite/tests/parser/should_run/CountParserDeps.stdout
@@ -1,4 +1,4 @@
-Found 235 parser module dependencies
+Found 236 parser module dependencies
 GHC.Builtin.Names
 GHC.Builtin.PrimOps
 GHC.Builtin.Types
@@ -62,6 +62,7 @@ GHC.Core.Utils
 GHC.CoreToIface
 GHC.Data.Bag
 GHC.Data.BooleanFormula
+GHC.Data.ByteArray
 GHC.Data.EnumSet
 GHC.Data.FastMutInt
 GHC.Data.FastString
diff --git a/utils/haddock b/utils/haddock
-Subproject 25fa8fde84701c010fa466c2648f8f6d10265e8
+Subproject 8850e481da7c65cd023af9b3a37bad02edfb47e
author	Ben Gamari <ben@smart-cactus.org>	2020-10-23 14:16:58 -0400
committer	Ben Gamari <ben@smart-cactus.org>	2020-11-29 18:26:15 -0500
commit	b3027eb0a1c1b3cedbb54b0fc8d0924283a7cd25 (patch)
tree	428241e11a6a5d15592964d049344b85d8160cf4
parent	f40b662b9ea555bab6e9729f4165eaca7021d322 (diff)
download	haskell-wip/stringbuffer.tar.gz