diff options
author | Ben Gamari <ben@smart-cactus.org> | 2022-07-16 13:54:57 -0400 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-07-22 08:18:41 -0400 |
commit | ae1666353696b5d85938d8a2f5fb11fb66f21678 (patch) | |
tree | 7de167e7c2f27ec2acc300ba815677878c1bd738 /compiler | |
parent | 2c5991ccaf45cb7e68e54d59a27ee144a4499edb (diff) | |
download | haskell-ae1666353696b5d85938d8a2f5fb11fb66f21678.tar.gz |
ghc-boot: Clean up UTF-8 codecs
In preparation for moving the UTF-8 codecs into `base`:
* Move them to GHC.Utils.Encoding.UTF8
* Make names more consistent
* Add some Haddocks
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/GHC/CmmToAsm/Dwarf/Types.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/Data/FastString.hs | 6 | ||||
-rw-r--r-- | compiler/GHC/Data/StringBuffer.hs | 10 | ||||
-rw-r--r-- | compiler/GHC/Hs/DocString.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Prof.hs | 4 | ||||
-rw-r--r-- | compiler/GHC/Types/Literal.hs | 2 |
6 files changed, 13 insertions, 13 deletions
diff --git a/compiler/GHC/CmmToAsm/Dwarf/Types.hs b/compiler/GHC/CmmToAsm/Dwarf/Types.hs index e29f03e1d6..caa829db21 100644 --- a/compiler/GHC/CmmToAsm/Dwarf/Types.hs +++ b/compiler/GHC/CmmToAsm/Dwarf/Types.hs @@ -599,7 +599,7 @@ pprString str = pprString' $ hcat $ map escapeChar $ if str `lengthIs` utf8EncodedLength str then str - else map (chr . fromIntegral) $ BS.unpack $ utf8EncodeString str + else map (chr . fromIntegral) $ BS.unpack $ utf8EncodeByteString str -- | Escape a single non-unicode character escapeChar :: Char -> SDoc diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs index 131f174c41..98ca34c249 100644 --- a/compiler/GHC/Data/FastString.hs +++ b/compiler/GHC/Data/FastString.hs @@ -531,13 +531,13 @@ mkFastString :: String -> FastString {-# NOINLINE[1] mkFastString #-} mkFastString str = inlinePerformIO $ do - sbs <- utf8EncodeShortByteString str + let !sbs = utf8EncodeShortByteString str mkFastStringWith (mkNewFastStringShortByteString sbs) sbs -- The following rule is used to avoid polluting the non-reclaimable FastString -- table with transient strings when we only want their encoding. {-# RULES -"bytesFS/mkFastString" forall x. bytesFS (mkFastString x) = utf8EncodeString x #-} +"bytesFS/mkFastString" forall x. bytesFS (mkFastString x) = utf8EncodeByteString x #-} -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@ mkFastStringByteList :: [Word8] -> FastString @@ -554,7 +554,7 @@ mkNewFastStringShortByteString :: ShortByteString -> Int -> FastMutInt -> IO FastString mkNewFastStringShortByteString sbs uid n_zencs = do let zstr = mkZFastString n_zencs sbs - chars <- countUTF8Chars sbs + chars = utf8CountCharsShortByteString sbs return (FastString uid chars sbs zstr) hashStr :: ShortByteString -> Int diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs index e6dcb14b6b..1426cf26e3 100644 --- a/compiler/GHC/Data/StringBuffer.hs +++ b/compiler/GHC/Data/StringBuffer.hs @@ -199,7 +199,7 @@ stringToStringBuffer str = let size = utf8EncodedLength str buf <- mallocForeignPtrArray (size+3) unsafeWithForeignPtr buf $ \ptr -> do - utf8EncodeStringPtr ptr str + utf8EncodePtr ptr str pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] -- sentinels for UTF-8 decoding return (StringBuffer buf size 0) @@ -297,7 +297,7 @@ prevChar (StringBuffer buf _ cur) _ = inlinePerformIO $ unsafeWithForeignPtr buf $ \p -> do p' <- utf8PrevChar (p `plusPtr` cur) - return (fst (utf8DecodeChar p')) + return (fst (utf8DecodeCharPtr p')) -- ----------------------------------------------------------------------------- -- Moving @@ -383,7 +383,7 @@ lexemeToString :: StringBuffer -> String lexemeToString _ 0 = "" lexemeToString (StringBuffer buf _ cur) bytes = - utf8DecodeStringLazy buf cur bytes + utf8DecodeForeignPtr buf cur bytes lexemeToFastString :: StringBuffer -> Int -- ^ @n@, the number of bytes @@ -405,7 +405,7 @@ decodePrevNChars n (StringBuffer buf _ cur) = go buf0 n acc p | n == 0 || buf0 >= p = return acc go buf0 n acc p = do p' <- utf8PrevChar p - let (c,_) = utf8DecodeChar p' + let (c,_) = utf8DecodeCharPtr p' go buf0 (n - 1) (c:acc) p' -- ----------------------------------------------------------------------------- @@ -414,7 +414,7 @@ parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let go i x | i == len = x - | otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of + | otherwise = case fst (utf8DecodeCharPtr (ptr `plusPtr` (cur + i))) of '_' -> go (i + 1) x -- skip "_" (#14473) char -> go (i + 1) (x * radix + toInteger (char_to_int char)) in go 0 0 diff --git a/compiler/GHC/Hs/DocString.hs b/compiler/GHC/Hs/DocString.hs index 3a557ee0e8..7e35004237 100644 --- a/compiler/GHC/Hs/DocString.hs +++ b/compiler/GHC/Hs/DocString.hs @@ -137,7 +137,7 @@ instance Outputable HsDocStringChunk where mkHsDocStringChunk :: String -> HsDocStringChunk -mkHsDocStringChunk s = HsDocStringChunk (utf8EncodeString s) +mkHsDocStringChunk s = HsDocStringChunk (utf8EncodeByteString s) -- | Create a 'HsDocString' from a UTF8-encoded 'ByteString'. mkHsDocStringChunkUtf8ByteString :: ByteString -> HsDocStringChunk diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs index 478925122c..8c8f89dbe9 100644 --- a/compiler/GHC/StgToCmm/Prof.hs +++ b/compiler/GHC/StgToCmm/Prof.hs @@ -230,7 +230,7 @@ emitCostCentreDecl cc = do ; modl <- newByteStringCLit (bytesFS $ moduleNameFS $ moduleName $ cc_mod cc) - ; loc <- newByteStringCLit $ utf8EncodeString $ + ; loc <- newByteStringCLit $ utf8EncodeByteString $ renderWithContext ctx (ppr $! costCentreSrcSpan cc) ; let lits = [ zero platform, -- StgInt ccID, @@ -297,7 +297,7 @@ emitInfoTableProv ip = do ctx = stgToCmmContext cfg platform = stgToCmmPlatform cfg ; let (src, label) = maybe ("", "") (first (renderWithContext ctx . ppr)) (infoTableProv ip) - mk_string = newByteStringCLit . utf8EncodeString + mk_string = newByteStringCLit . utf8EncodeByteString ; label <- mk_string label ; modl <- newByteStringCLit (bytesFS $ moduleNameFS $ moduleName mod) diff --git a/compiler/GHC/Types/Literal.hs b/compiler/GHC/Types/Literal.hs index 5b14ecc78d..b525fc94df 100644 --- a/compiler/GHC/Types/Literal.hs +++ b/compiler/GHC/Types/Literal.hs @@ -578,7 +578,7 @@ mkLitChar = LitChar mkLitString :: String -> Literal -- stored UTF-8 encoded mkLitString [] = LitString mempty -mkLitString s = LitString (utf8EncodeString s) +mkLitString s = LitString (utf8EncodeByteString s) mkLitBigNat :: Integer -> Literal mkLitBigNat x = assertPpr (x >= 0) (integer x) |