summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorBen Gamari <bgamari.foss@gmail.com>2017-04-17 12:26:56 -0400
committerBen Gamari <ben@smart-cactus.org>2017-04-17 20:34:40 -0400
commit1cc82d38759c7a5f527ccc6cb514b8ba576cc3d1 (patch)
treedae54e5e992c487cc9d2d1a2ea435ef25952c826 /compiler
parent3d3975f2f4caf3af76a7ea27d2882ddaee7db3c9 (diff)
downloadhaskell-1cc82d38759c7a5f527ccc6cb514b8ba576cc3d1.tar.gz
utils: Lazily decode UTF8 strings
Reviewers: austin, hvr Subscribers: rwbarton, thomie GHC Trac Issues: #13527 Differential Revision: https://phabricator.haskell.org/D3442
Diffstat (limited to 'compiler')
-rw-r--r--compiler/utils/Encoding.hs34
-rw-r--r--compiler/utils/FastString.hs4
-rw-r--r--compiler/utils/StringBuffer.hs4
3 files changed, 25 insertions, 17 deletions
diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs
index 712de6ca82..f2b0979995 100644
--- a/compiler/utils/Encoding.hs
+++ b/compiler/utils/Encoding.hs
@@ -17,7 +17,8 @@ module Encoding (
utf8PrevChar,
utf8CharStart,
utf8DecodeChar,
- utf8DecodeString,
+ utf8DecodeByteString,
+ utf8DecodeStringLazy,
utf8EncodeChar,
utf8EncodeString,
utf8EncodedLength,
@@ -33,9 +34,15 @@ module Encoding (
) where
import Foreign
+import Foreign.ForeignPtr.Unsafe
import Data.Char
import qualified Data.Char as Char
import Numeric
+import GHC.IO
+
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Internal as BS
+
import GHC.Exts
-- -----------------------------------------------------------------------------
@@ -115,19 +122,24 @@ utf8CharStart p = go p
then go (p `plusPtr` (-1))
else return p
-utf8DecodeString :: Ptr Word8 -> Int -> IO [Char]
-utf8DecodeString ptr len
- = unpack ptr
+utf8DecodeByteString :: ByteString -> [Char]
+utf8DecodeByteString (BS.PS ptr offset len)
+ = utf8DecodeStringLazy ptr offset len
+
+utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
+utf8DecodeStringLazy fptr offset len
+ = unsafeDupablePerformIO $ unpack start
where
- !end = ptr `plusPtr` len
+ !start = unsafeForeignPtrToPtr fptr `plusPtr` offset
+ !end = start `plusPtr` len
unpack p
- | p >= end = return []
- | otherwise =
- case utf8DecodeChar# (unPtr p) of
- (# c#, nBytes# #) -> do
- chs <- unpack (p `plusPtr#` nBytes#)
- return (C# c# : chs)
+ | p >= end = touchForeignPtr fptr >> return []
+ | otherwise =
+ case utf8DecodeChar# (unPtr p) of
+ (# c#, nBytes# #) -> do
+ rest <- unsafeDupableInterleaveIO $ unpack (p `plusPtr#` nBytes#)
+ return (C# c# : rest)
countUTF8Chars :: Ptr Word8 -> Int -> IO Int
countUTF8Chars ptr len = go ptr 0
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index 8d1bbb5c67..8653485e0c 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -485,9 +485,7 @@ nullFS f = BS.null (fs_bs f)
-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
-unpackFS (FastString _ _ bs _) =
- inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
- utf8DecodeString (castPtr ptr) len
+unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> [Word8]
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index ec5184a1c2..fcc344554b 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -251,9 +251,7 @@ lexemeToString :: StringBuffer
-> String
lexemeToString _ 0 = ""
lexemeToString (StringBuffer buf _ cur) bytes =
- inlinePerformIO $
- withForeignPtr buf $ \ptr ->
- utf8DecodeString (ptr `plusPtr` cur) bytes
+ utf8DecodeStringLazy buf cur bytes
lexemeToFastString :: StringBuffer
-> Int -- ^ @n@, the number of bytes