diff options
author | Matthew Pickering <matthewtpickering@gmail.com> | 2021-01-18 11:53:55 +0000 |
---|---|---|
committer | Matthew Pickering <matthewtpickering@gmail.com> | 2021-02-02 08:57:07 +0000 |
commit | 29720620de92f63a153a384710cc1370acc9852d (patch) | |
tree | 20550a39fbd06ae04108a7de82889d7e7c298dfa | |
parent | 84bb03c18b819a0d9fde369d827a308a3a540bd2 (diff) | |
download | haskell-29720620de92f63a153a384710cc1370acc9852d.tar.gz |
Don't use FastString to convert string to UTF8
-rw-r--r-- | compiler/GHC/Data/StringBuffer.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/Hs/Doc.hs | 11 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Prof.hs | 21 | ||||
-rw-r--r-- | libraries/ghc-boot/GHC/Utils/Encoding.hs | 14 |
4 files changed, 23 insertions, 25 deletions
diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs index 42ab89f8cc..fe4d3529a3 100644 --- a/compiler/GHC/Data/StringBuffer.hs +++ b/compiler/GHC/Data/StringBuffer.hs @@ -185,7 +185,7 @@ stringToStringBuffer str = let size = utf8EncodedLength str buf <- mallocForeignPtrArray (size+3) withForeignPtr buf $ \ptr -> do - utf8EncodeString ptr str + utf8EncodeStringPtr ptr str pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] -- sentinels for UTF-8 decoding return (StringBuffer buf size 0) diff --git a/compiler/GHC/Hs/Doc.hs b/compiler/GHC/Hs/Doc.hs index 400df5cdf9..207b65f2fd 100644 --- a/compiler/GHC/Hs/Doc.hs +++ b/compiler/GHC/Hs/Doc.hs @@ -27,7 +27,6 @@ import GHC.Prelude import GHC.Utils.Binary import GHC.Utils.Encoding -import GHC.Utils.IO.Unsafe import GHC.Types.Name import GHC.Utils.Outputable as Outputable import GHC.Types.SrcLoc @@ -35,12 +34,10 @@ import GHC.Types.SrcLoc import Data.ByteString (ByteString) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as C8 -import qualified Data.ByteString.Internal as BS import Data.Data import Data.Map (Map) import qualified Data.Map as Map import Data.Maybe -import Foreign -- | Haskell Documentation String -- @@ -68,13 +65,7 @@ isEmptyDocString :: HsDocString -> Bool isEmptyDocString (HsDocString bs) = BS.null bs mkHsDocString :: String -> HsDocString -mkHsDocString s = - inlinePerformIO $ do - let len = utf8EncodedLength s - buf <- mallocForeignPtrBytes len - withForeignPtr buf $ \ptr -> do - utf8EncodeString ptr s - pure (HsDocString (BS.fromForeignPtr buf 0 len)) +mkHsDocString s = HsDocString (utf8EncodeString s) -- | Create a 'HsDocString' from a UTF8-encoded 'ByteString'. mkHsDocStringUtf8ByteString :: ByteString -> HsDocString diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs index 451d38ec4c..0176c3dbdc 100644 --- a/compiler/GHC/StgToCmm/Prof.hs +++ b/compiler/GHC/StgToCmm/Prof.hs @@ -51,6 +51,7 @@ import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Driver.CodeOutput ( ipInitCode ) +import GHC.Utils.Encoding import Control.Monad import Data.Char (ord) @@ -230,9 +231,8 @@ emitCostCentreDecl cc = do ; modl <- newByteStringCLit (bytesFS $ moduleNameFS $ moduleName $ cc_mod cc) - ; loc <- newByteStringCLit $ bytesFS $ mkFastString $ + ; loc <- newByteStringCLit $ utf8EncodeString $ showPpr dflags (costCentreSrcSpan cc) - -- XXX going via FastString to get UTF-8 encoding is silly ; let lits = [ zero platform, -- StgInt ccID, label, -- char *label, @@ -296,20 +296,17 @@ emitInfoTableProv ip = do ; let mod = infoProvModule ip ; let (src, label) = maybe ("", "") (\(s, l) -> (showPpr dflags s, l)) (infoTableProv ip) ; platform <- getPlatform - -- NB. bytesFS: we want the UTF-8 bytes here (#5559) - ; label <- newByteStringCLit (bytesFS $ mkFastString label) + ; let mk_string = newByteStringCLit . utf8EncodeString + ; label <- mk_string label ; modl <- newByteStringCLit (bytesFS $ moduleNameFS $ moduleName $ mod) - ; ty_string <- newByteStringCLit (bytesFS (mkFastString (infoTableType ip))) - ; loc <- newByteStringCLit $ bytesFS $ mkFastString $ src - -- XXX going via FastString to get UTF-8 encoding is silly - ; table_name <- newByteStringCLit $ bytesFS $ mkFastString $ - showPpr dflags (pprCLabel platform CStyle (infoTablePtr ip)) - - ; closure_type <- newByteStringCLit $ bytesFS $ mkFastString $ - showPpr dflags (text $ show $ infoProvEntClosureType ip) + ; ty_string <- mk_string (infoTableType ip) + ; loc <- mk_string src + ; table_name <- mk_string (showPpr dflags (pprCLabel platform CStyle (infoTablePtr ip))) + ; closure_type <- mk_string + (showPpr dflags (text $ show $ infoProvEntClosureType ip)) ; let lits = [ CmmLabel (infoTablePtr ip), -- Info table pointer table_name, -- char *table_name diff --git a/libraries/ghc-boot/GHC/Utils/Encoding.hs b/libraries/ghc-boot/GHC/Utils/Encoding.hs index 0f84be189b..88953174e2 100644 --- a/libraries/ghc-boot/GHC/Utils/Encoding.hs +++ b/libraries/ghc-boot/GHC/Utils/Encoding.hs @@ -26,6 +26,7 @@ module GHC.Utils.Encoding ( utf8DecodeStringLazy, utf8EncodeChar, utf8EncodeString, + utf8EncodeStringPtr, utf8EncodeShortByteString, utf8EncodedLength, countUTF8Chars, @@ -249,8 +250,17 @@ utf8EncodeChar write# c = case write# off# (int2Word# c#) s of s -> (# s, () #) -utf8EncodeString :: Ptr Word8 -> String -> IO () -utf8EncodeString (Ptr a#) str = go a# str +utf8EncodeString :: String -> ByteString +utf8EncodeString s = + unsafePerformIO $ do + let len = utf8EncodedLength s + buf <- mallocForeignPtrBytes len + withForeignPtr buf $ \ptr -> do + utf8EncodeStringPtr ptr s + pure (BS.fromForeignPtr buf 0 len) + +utf8EncodeStringPtr :: Ptr Word8 -> String -> IO () +utf8EncodeStringPtr (Ptr a#) str = go a# str where go !_ [] = return () go a# (c:cs) = do I# off# <- stToIO $ utf8EncodeChar (writeWord8OffAddr# a#) c |