summaryrefslogtreecommitdiff
path: root/compiler/GHC/Data
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-07-16 13:54:57 -0400
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-07-22 08:18:41 -0400
commitae1666353696b5d85938d8a2f5fb11fb66f21678 (patch)
tree7de167e7c2f27ec2acc300ba815677878c1bd738 /compiler/GHC/Data
parent2c5991ccaf45cb7e68e54d59a27ee144a4499edb (diff)
downloadhaskell-ae1666353696b5d85938d8a2f5fb11fb66f21678.tar.gz
ghc-boot: Clean up UTF-8 codecs
In preparation for moving the UTF-8 codecs into `base`: * Move them to GHC.Utils.Encoding.UTF8 * Make names more consistent * Add some Haddocks
Diffstat (limited to 'compiler/GHC/Data')
-rw-r--r--compiler/GHC/Data/FastString.hs6
-rw-r--r--compiler/GHC/Data/StringBuffer.hs10
2 files changed, 8 insertions, 8 deletions
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
index 131f174c41..98ca34c249 100644
--- a/compiler/GHC/Data/FastString.hs
+++ b/compiler/GHC/Data/FastString.hs
@@ -531,13 +531,13 @@ mkFastString :: String -> FastString
{-# NOINLINE[1] mkFastString #-}
mkFastString str =
inlinePerformIO $ do
- sbs <- utf8EncodeShortByteString str
+ let !sbs = utf8EncodeShortByteString str
mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
-- The following rule is used to avoid polluting the non-reclaimable FastString
-- table with transient strings when we only want their encoding.
{-# RULES
-"bytesFS/mkFastString" forall x. bytesFS (mkFastString x) = utf8EncodeString x #-}
+"bytesFS/mkFastString" forall x. bytesFS (mkFastString x) = utf8EncodeByteString x #-}
-- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
mkFastStringByteList :: [Word8] -> FastString
@@ -554,7 +554,7 @@ mkNewFastStringShortByteString :: ShortByteString -> Int
-> FastMutInt -> IO FastString
mkNewFastStringShortByteString sbs uid n_zencs = do
let zstr = mkZFastString n_zencs sbs
- chars <- countUTF8Chars sbs
+ chars = utf8CountCharsShortByteString sbs
return (FastString uid chars sbs zstr)
hashStr :: ShortByteString -> Int
diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs
index e6dcb14b6b..1426cf26e3 100644
--- a/compiler/GHC/Data/StringBuffer.hs
+++ b/compiler/GHC/Data/StringBuffer.hs
@@ -199,7 +199,7 @@ stringToStringBuffer str =
let size = utf8EncodedLength str
buf <- mallocForeignPtrArray (size+3)
unsafeWithForeignPtr buf $ \ptr -> do
- utf8EncodeStringPtr ptr str
+ utf8EncodePtr ptr str
pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
-- sentinels for UTF-8 decoding
return (StringBuffer buf size 0)
@@ -297,7 +297,7 @@ prevChar (StringBuffer buf _ cur) _ =
inlinePerformIO $
unsafeWithForeignPtr buf $ \p -> do
p' <- utf8PrevChar (p `plusPtr` cur)
- return (fst (utf8DecodeChar p'))
+ return (fst (utf8DecodeCharPtr p'))
-- -----------------------------------------------------------------------------
-- Moving
@@ -383,7 +383,7 @@ lexemeToString :: StringBuffer
-> String
lexemeToString _ 0 = ""
lexemeToString (StringBuffer buf _ cur) bytes =
- utf8DecodeStringLazy buf cur bytes
+ utf8DecodeForeignPtr buf cur bytes
lexemeToFastString :: StringBuffer
-> Int -- ^ @n@, the number of bytes
@@ -405,7 +405,7 @@ decodePrevNChars n (StringBuffer buf _ cur) =
go buf0 n acc p | n == 0 || buf0 >= p = return acc
go buf0 n acc p = do
p' <- utf8PrevChar p
- let (c,_) = utf8DecodeChar p'
+ let (c,_) = utf8DecodeCharPtr p'
go buf0 (n - 1) (c:acc) p'
-- -----------------------------------------------------------------------------
@@ -414,7 +414,7 @@ parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
= inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
go i x | i == len = x
- | otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of
+ | otherwise = case fst (utf8DecodeCharPtr (ptr `plusPtr` (cur + i))) of
'_' -> go (i + 1) x -- skip "_" (#14473)
char -> go (i + 1) (x * radix + toInteger (char_to_int char))
in go 0 0