ghc-boot: Clean up UTF-8 codecs

In preparation for moving the UTF-8 codecs into `base`: * Move them to GHC.Utils.Encoding.UTF8 * Make names more consistent * Add some Haddocks
author: Ben Gamari <ben@smart-cactus.org> 2022-07-16 13:54:57 -0400
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2022-07-22 08:18:41 -0400
commit: ae1666353696b5d85938d8a2f5fb11fb66f21678 (patch)
tree: 7de167e7c2f27ec2acc300ba815677878c1bd738 /compiler
parent: 2c5991ccaf45cb7e68e54d59a27ee144a4499edb (diff)
download: haskell-ae1666353696b5d85938d8a2f5fb11fb66f21678.tar.gz
6 files changed, 13 insertions, 13 deletions
diff --git a/compiler/GHC/CmmToAsm/Dwarf/Types.hs b/compiler/GHC/CmmToAsm/Dwarf/Types.hs
index e29f03e1d6..caa829db21 100644
--- a/compiler/GHC/CmmToAsm/Dwarf/Types.hs
+++ b/compiler/GHC/CmmToAsm/Dwarf/Types.hs
@@ -599,7 +599,7 @@ pprString str
   = pprString' $ hcat $ map escapeChar $
     if str `lengthIs` utf8EncodedLength str
     then str
-    else map (chr . fromIntegral) $ BS.unpack $ utf8EncodeString str
+    else map (chr . fromIntegral) $ BS.unpack $ utf8EncodeByteString str
 
 -- | Escape a single non-unicode character
 escapeChar :: Char -> SDoc
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
index 131f174c41..98ca34c249 100644
--- a/compiler/GHC/Data/FastString.hs
+++ b/compiler/GHC/Data/FastString.hs
@@ -531,13 +531,13 @@ mkFastString :: String -> FastString
 {-# NOINLINE[1] mkFastString #-}
 mkFastString str =
   inlinePerformIO $ do
-    sbs <- utf8EncodeShortByteString str
+    let !sbs = utf8EncodeShortByteString str
     mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
 
 -- The following rule is used to avoid polluting the non-reclaimable FastString
 -- table with transient strings when we only want their encoding.
 {-# RULES
-"bytesFS/mkFastString" forall x. bytesFS (mkFastString x) = utf8EncodeString x #-}
+"bytesFS/mkFastString" forall x. bytesFS (mkFastString x) = utf8EncodeByteString x #-}
 
 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
 mkFastStringByteList :: [Word8] -> FastString
@@ -554,7 +554,7 @@ mkNewFastStringShortByteString :: ShortByteString -> Int
                                -> FastMutInt -> IO FastString
 mkNewFastStringShortByteString sbs uid n_zencs = do
   let zstr = mkZFastString n_zencs sbs
-  chars <- countUTF8Chars sbs
+      chars = utf8CountCharsShortByteString sbs
   return (FastString uid chars sbs zstr)
 
 hashStr  :: ShortByteString -> Int
diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs
index e6dcb14b6b..1426cf26e3 100644
--- a/compiler/GHC/Data/StringBuffer.hs
+++ b/compiler/GHC/Data/StringBuffer.hs
@@ -199,7 +199,7 @@ stringToStringBuffer str =
   let size = utf8EncodedLength str
   buf <- mallocForeignPtrArray (size+3)
   unsafeWithForeignPtr buf $ \ptr -> do
-    utf8EncodeStringPtr ptr str
+    utf8EncodePtr ptr str
     pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
     -- sentinels for UTF-8 decoding
   return (StringBuffer buf size 0)
@@ -297,7 +297,7 @@ prevChar (StringBuffer buf _   cur) _     =
   inlinePerformIO $
     unsafeWithForeignPtr buf $ \p -> do
       p' <- utf8PrevChar (p `plusPtr` cur)
-      return (fst (utf8DecodeChar p'))
+      return (fst (utf8DecodeCharPtr p'))
 
 -- -----------------------------------------------------------------------------
 -- Moving
@@ -383,7 +383,7 @@ lexemeToString :: StringBuffer
                -> String
 lexemeToString _ 0 = ""
 lexemeToString (StringBuffer buf _ cur) bytes =
-  utf8DecodeStringLazy buf cur bytes
+  utf8DecodeForeignPtr buf cur bytes
 
 lexemeToFastString :: StringBuffer
                    -> Int               -- ^ @n@, the number of bytes
@@ -405,7 +405,7 @@ decodePrevNChars n (StringBuffer buf _ cur) =
     go buf0 n acc p | n == 0 || buf0 >= p = return acc
     go buf0 n acc p = do
         p' <- utf8PrevChar p
-        let (c,_) = utf8DecodeChar p'
+        let (c,_) = utf8DecodeCharPtr p'
         go buf0 (n - 1) (c:acc) p'
 
 -- -----------------------------------------------------------------------------
@@ -414,7 +414,7 @@ parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
   = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
     go i x | i == len  = x
-           | otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of
+           | otherwise = case fst (utf8DecodeCharPtr (ptr `plusPtr` (cur + i))) of
                '_'  -> go (i + 1) x    -- skip "_" (#14473)
                char -> go (i + 1) (x * radix + toInteger (char_to_int char))
   in go 0 0
diff --git a/compiler/GHC/Hs/DocString.hs b/compiler/GHC/Hs/DocString.hs
index 3a557ee0e8..7e35004237 100644
--- a/compiler/GHC/Hs/DocString.hs
+++ b/compiler/GHC/Hs/DocString.hs
@@ -137,7 +137,7 @@ instance Outputable HsDocStringChunk where
 
 
 mkHsDocStringChunk :: String -> HsDocStringChunk
-mkHsDocStringChunk s = HsDocStringChunk (utf8EncodeString s)
+mkHsDocStringChunk s = HsDocStringChunk (utf8EncodeByteString s)
 
 -- | Create a 'HsDocString' from a UTF8-encoded 'ByteString'.
 mkHsDocStringChunkUtf8ByteString :: ByteString -> HsDocStringChunk
diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs
index 478925122c..8c8f89dbe9 100644
--- a/compiler/GHC/StgToCmm/Prof.hs
+++ b/compiler/GHC/StgToCmm/Prof.hs
@@ -230,7 +230,7 @@ emitCostCentreDecl cc = do
   ; modl  <- newByteStringCLit (bytesFS $ moduleNameFS
                                         $ moduleName
                                         $ cc_mod cc)
-  ; loc <- newByteStringCLit $ utf8EncodeString $
+  ; loc <- newByteStringCLit $ utf8EncodeByteString $
                    renderWithContext ctx (ppr $! costCentreSrcSpan cc)
   ; let
      lits = [ zero platform,  -- StgInt ccID,
@@ -297,7 +297,7 @@ emitInfoTableProv ip = do
         ctx      = stgToCmmContext  cfg
         platform = stgToCmmPlatform cfg
   ; let (src, label) = maybe ("", "") (first (renderWithContext ctx . ppr)) (infoTableProv ip)
-        mk_string    = newByteStringCLit . utf8EncodeString
+        mk_string    = newByteStringCLit . utf8EncodeByteString
   ; label <- mk_string label
   ; modl  <- newByteStringCLit (bytesFS $ moduleNameFS
                                         $ moduleName mod)
diff --git a/compiler/GHC/Types/Literal.hs b/compiler/GHC/Types/Literal.hs
index 5b14ecc78d..b525fc94df 100644
--- a/compiler/GHC/Types/Literal.hs
+++ b/compiler/GHC/Types/Literal.hs
@@ -578,7 +578,7 @@ mkLitChar = LitChar
 mkLitString :: String -> Literal
 -- stored UTF-8 encoded
 mkLitString [] = LitString mempty
-mkLitString s  = LitString (utf8EncodeString s)
+mkLitString s  = LitString (utf8EncodeByteString s)
 
 mkLitBigNat :: Integer -> Literal
 mkLitBigNat x = assertPpr (x >= 0) (integer x)
author	Ben Gamari <ben@smart-cactus.org>	2022-07-16 13:54:57 -0400
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2022-07-22 08:18:41 -0400
commit	ae1666353696b5d85938d8a2f5fb11fb66f21678 (patch)
tree	7de167e7c2f27ec2acc300ba815677878c1bd738 /compiler
parent	2c5991ccaf45cb7e68e54d59a27ee144a4499edb (diff)
download	haskell-ae1666353696b5d85938d8a2f5fb11fb66f21678.tar.gz