diff options
-rw-r--r-- | compiler/GHC/Core/SimpleOpt.hs | 38 | ||||
-rw-r--r-- | libraries/ghc-boot/GHC/Utils/Encoding.hs | 9 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T9400.hs | 4 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T9400.stderr | 26 |
4 files changed, 50 insertions, 27 deletions
diff --git a/compiler/GHC/Core/SimpleOpt.hs b/compiler/GHC/Core/SimpleOpt.hs index ffec34ff17..67dc4609c3 100644 --- a/compiler/GHC/Core/SimpleOpt.hs +++ b/compiler/GHC/Core/SimpleOpt.hs @@ -48,12 +48,12 @@ import GHC.Builtin.Types import GHC.Builtin.Names import GHC.Types.Basic import GHC.Unit.Module ( Module ) +import GHC.Utils.Encoding import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Utils.Panic.Plain import GHC.Utils.Misc import GHC.Data.Maybe ( orElse ) -import GHC.Data.FastString import Data.List (mapAccumL) import qualified Data.ByteString as BS @@ -874,9 +874,8 @@ calls to unpackCString# and returns: Just (':', [Char], ['a', unpackCString# "bc"]). -We need to be careful about UTF8 strings here. ""# contains a ByteString, so -we must parse it back into a FastString to split off the first character. -That way we can treat unpackCString# and unpackCStringUtf8# in the same way. +We need to be careful about UTF8 strings here. ""# contains an encoded ByteString, so +we call utf8UnconsByteString to correctly deal with the encoding and splitting. We must also be careful about lvl = "foo"# @@ -885,6 +884,8 @@ to ensure that we see through the let-binding for 'lvl'. Hence the (exprIsLiteral_maybe .. arg) in the guard before the call to dealWithStringLiteral. +The tests for this function are in T9400. + Note [Push coercions in exprIsConApp_maybe] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In #13025 I found a case where we had @@ -1237,23 +1238,18 @@ dealWithStringLiteral :: Var -> BS.ByteString -> Coercion -- This is not possible with user-supplied empty literals, GHC.Core.Make.mkStringExprFS -- turns those into [] automatically, but just in case something else in GHC -- generates a string literal directly. -dealWithStringLiteral _ str co - | BS.null str - = pushCoDataCon nilDataCon [Type charTy] co - -dealWithStringLiteral fun str co - = let strFS = mkFastStringByteString str - - char = mkConApp charDataCon [mkCharLit (headFS strFS)] - charTail = BS.tail (bytesFS strFS) - - -- In singleton strings, just add [] instead of unpackCstring# ""#. - rest = if BS.null charTail - then mkConApp nilDataCon [Type charTy] - else App (Var fun) - (Lit (LitString charTail)) - - in pushCoDataCon consDataCon [Type charTy, char, rest] co +dealWithStringLiteral fun str co = + case utf8UnconsByteString str of + Nothing -> pushCoDataCon nilDataCon [Type charTy] co + Just (char, charTail) -> + let char_expr = mkConApp charDataCon [mkCharLit char] + -- In singleton strings, just add [] instead of unpackCstring# ""#. + rest = if BS.null charTail + then mkConApp nilDataCon [Type charTy] + else App (Var fun) + (Lit (LitString charTail)) + + in pushCoDataCon consDataCon [Type charTy, char_expr, rest] co {- Note [Unfolding DFuns] diff --git a/libraries/ghc-boot/GHC/Utils/Encoding.hs b/libraries/ghc-boot/GHC/Utils/Encoding.hs index ba07784b0d..5eb3779b3b 100644 --- a/libraries/ghc-boot/GHC/Utils/Encoding.hs +++ b/libraries/ghc-boot/GHC/Utils/Encoding.hs @@ -22,6 +22,7 @@ module GHC.Utils.Encoding ( utf8CharStart, utf8DecodeChar, utf8DecodeByteString, + utf8UnconsByteString, utf8DecodeShortByteString, utf8CompareShortByteString, utf8DecodeStringLazy, @@ -169,6 +170,14 @@ utf8DecodeByteString :: ByteString -> [Char] utf8DecodeByteString (BS.PS fptr offset len) = utf8DecodeStringLazy fptr offset len +utf8UnconsByteString :: ByteString -> Maybe (Char, ByteString) +utf8UnconsByteString (BS.PS _ _ 0) = Nothing +utf8UnconsByteString (BS.PS fptr offset len) + = unsafeDupablePerformIO $ + withForeignPtr fptr $ \ptr -> do + let (c,n) = utf8DecodeChar (ptr `plusPtr` offset) + return $ Just (c, BS.PS fptr (offset + n) (len - n)) + utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char] utf8DecodeStringLazy fp offset (I# len#) = unsafeDupablePerformIO $ do diff --git a/testsuite/tests/simplCore/should_compile/T9400.hs b/testsuite/tests/simplCore/should_compile/T9400.hs index 4e9cb72cb9..85aad51564 100644 --- a/testsuite/tests/simplCore/should_compile/T9400.hs +++ b/testsuite/tests/simplCore/should_compile/T9400.hs @@ -16,3 +16,7 @@ main = do (x:xs) -> putStrLn xs case "ab" of "" -> putStrLn "fail" + case "\0abc" of + (x:xs) -> putStrLn xs + case "zażółćz" of + (x:xs) -> putStrLn xs diff --git a/testsuite/tests/simplCore/should_compile/T9400.stderr b/testsuite/tests/simplCore/should_compile/T9400.stderr index c4584f460e..511a481d96 100644 --- a/testsuite/tests/simplCore/should_compile/T9400.stderr +++ b/testsuite/tests/simplCore/should_compile/T9400.stderr @@ -9,7 +9,7 @@ T9400.hs:18:9: warning: [-Woverlapping-patterns (in -Wdefault)] ==================== Tidy Core ==================== Result size of Tidy Core - = {terms: 38, types: 22, coercions: 0, joins: 0/0} + = {terms: 48, types: 28, coercions: 0, joins: 0/0} -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} $trModule1 :: Addr# @@ -36,7 +36,7 @@ T9400.$trModule :: Module [GblId, Unf=OtherCon []] T9400.$trModule = GHC.Types.Module $trModule2 $trModule4 --- RHS size: {terms: 23, types: 15, coercions: 0, joins: 0/0} +-- RHS size: {terms: 33, types: 21, coercions: 0, joins: 0/0} main :: IO () [GblId] main @@ -64,10 +64,24 @@ main @() @() (putStrLn (unpackCString# "efg"#)) - (case Control.Exception.Base.patError - @LiftedRep @() "T9400.hs:(17,5)-(18,29)|case"# - of wild { - })))) + (>> + @IO + GHC.Base.$fMonadIO + @() + @() + (case Control.Exception.Base.patError + @LiftedRep @() "T9400.hs:(17,5)-(18,29)|case"# + of wild { + }) + (>> + @IO + GHC.Base.$fMonadIO + @() + @() + (putStrLn (unpackCStringUtf8# "abc"#)) + (putStrLn + (unpackCStringUtf8# + "a\\197\\188\\195\\179\\197\\130\\196\\135z"#))))))) |