summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/GHC/Core/SimpleOpt.hs38
-rw-r--r--libraries/ghc-boot/GHC/Utils/Encoding.hs9
-rw-r--r--testsuite/tests/simplCore/should_compile/T9400.hs4
-rw-r--r--testsuite/tests/simplCore/should_compile/T9400.stderr26
4 files changed, 50 insertions, 27 deletions
diff --git a/compiler/GHC/Core/SimpleOpt.hs b/compiler/GHC/Core/SimpleOpt.hs
index ffec34ff17..67dc4609c3 100644
--- a/compiler/GHC/Core/SimpleOpt.hs
+++ b/compiler/GHC/Core/SimpleOpt.hs
@@ -48,12 +48,12 @@ import GHC.Builtin.Types
import GHC.Builtin.Names
import GHC.Types.Basic
import GHC.Unit.Module ( Module )
+import GHC.Utils.Encoding
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
import GHC.Utils.Misc
import GHC.Data.Maybe ( orElse )
-import GHC.Data.FastString
import Data.List (mapAccumL)
import qualified Data.ByteString as BS
@@ -874,9 +874,8 @@ calls to unpackCString# and returns:
Just (':', [Char], ['a', unpackCString# "bc"]).
-We need to be careful about UTF8 strings here. ""# contains a ByteString, so
-we must parse it back into a FastString to split off the first character.
-That way we can treat unpackCString# and unpackCStringUtf8# in the same way.
+We need to be careful about UTF8 strings here. ""# contains an encoded ByteString, so
+we call utf8UnconsByteString to correctly deal with the encoding and splitting.
We must also be careful about
lvl = "foo"#
@@ -885,6 +884,8 @@ to ensure that we see through the let-binding for 'lvl'. Hence the
(exprIsLiteral_maybe .. arg) in the guard before the call to
dealWithStringLiteral.
+The tests for this function are in T9400.
+
Note [Push coercions in exprIsConApp_maybe]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In #13025 I found a case where we had
@@ -1237,23 +1238,18 @@ dealWithStringLiteral :: Var -> BS.ByteString -> Coercion
-- This is not possible with user-supplied empty literals, GHC.Core.Make.mkStringExprFS
-- turns those into [] automatically, but just in case something else in GHC
-- generates a string literal directly.
-dealWithStringLiteral _ str co
- | BS.null str
- = pushCoDataCon nilDataCon [Type charTy] co
-
-dealWithStringLiteral fun str co
- = let strFS = mkFastStringByteString str
-
- char = mkConApp charDataCon [mkCharLit (headFS strFS)]
- charTail = BS.tail (bytesFS strFS)
-
- -- In singleton strings, just add [] instead of unpackCstring# ""#.
- rest = if BS.null charTail
- then mkConApp nilDataCon [Type charTy]
- else App (Var fun)
- (Lit (LitString charTail))
-
- in pushCoDataCon consDataCon [Type charTy, char, rest] co
+dealWithStringLiteral fun str co =
+ case utf8UnconsByteString str of
+ Nothing -> pushCoDataCon nilDataCon [Type charTy] co
+ Just (char, charTail) ->
+ let char_expr = mkConApp charDataCon [mkCharLit char]
+ -- In singleton strings, just add [] instead of unpackCstring# ""#.
+ rest = if BS.null charTail
+ then mkConApp nilDataCon [Type charTy]
+ else App (Var fun)
+ (Lit (LitString charTail))
+
+ in pushCoDataCon consDataCon [Type charTy, char_expr, rest] co
{-
Note [Unfolding DFuns]
diff --git a/libraries/ghc-boot/GHC/Utils/Encoding.hs b/libraries/ghc-boot/GHC/Utils/Encoding.hs
index ba07784b0d..5eb3779b3b 100644
--- a/libraries/ghc-boot/GHC/Utils/Encoding.hs
+++ b/libraries/ghc-boot/GHC/Utils/Encoding.hs
@@ -22,6 +22,7 @@ module GHC.Utils.Encoding (
utf8CharStart,
utf8DecodeChar,
utf8DecodeByteString,
+ utf8UnconsByteString,
utf8DecodeShortByteString,
utf8CompareShortByteString,
utf8DecodeStringLazy,
@@ -169,6 +170,14 @@ utf8DecodeByteString :: ByteString -> [Char]
utf8DecodeByteString (BS.PS fptr offset len)
= utf8DecodeStringLazy fptr offset len
+utf8UnconsByteString :: ByteString -> Maybe (Char, ByteString)
+utf8UnconsByteString (BS.PS _ _ 0) = Nothing
+utf8UnconsByteString (BS.PS fptr offset len)
+ = unsafeDupablePerformIO $
+ withForeignPtr fptr $ \ptr -> do
+ let (c,n) = utf8DecodeChar (ptr `plusPtr` offset)
+ return $ Just (c, BS.PS fptr (offset + n) (len - n))
+
utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
utf8DecodeStringLazy fp offset (I# len#)
= unsafeDupablePerformIO $ do
diff --git a/testsuite/tests/simplCore/should_compile/T9400.hs b/testsuite/tests/simplCore/should_compile/T9400.hs
index 4e9cb72cb9..85aad51564 100644
--- a/testsuite/tests/simplCore/should_compile/T9400.hs
+++ b/testsuite/tests/simplCore/should_compile/T9400.hs
@@ -16,3 +16,7 @@ main = do
(x:xs) -> putStrLn xs
case "ab" of
"" -> putStrLn "fail"
+ case "\0abc" of
+ (x:xs) -> putStrLn xs
+ case "zażółćz" of
+ (x:xs) -> putStrLn xs
diff --git a/testsuite/tests/simplCore/should_compile/T9400.stderr b/testsuite/tests/simplCore/should_compile/T9400.stderr
index c4584f460e..511a481d96 100644
--- a/testsuite/tests/simplCore/should_compile/T9400.stderr
+++ b/testsuite/tests/simplCore/should_compile/T9400.stderr
@@ -9,7 +9,7 @@ T9400.hs:18:9: warning: [-Woverlapping-patterns (in -Wdefault)]
==================== Tidy Core ====================
Result size of Tidy Core
- = {terms: 38, types: 22, coercions: 0, joins: 0/0}
+ = {terms: 48, types: 28, coercions: 0, joins: 0/0}
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
$trModule1 :: Addr#
@@ -36,7 +36,7 @@ T9400.$trModule :: Module
[GblId, Unf=OtherCon []]
T9400.$trModule = GHC.Types.Module $trModule2 $trModule4
--- RHS size: {terms: 23, types: 15, coercions: 0, joins: 0/0}
+-- RHS size: {terms: 33, types: 21, coercions: 0, joins: 0/0}
main :: IO ()
[GblId]
main
@@ -64,10 +64,24 @@ main
@()
@()
(putStrLn (unpackCString# "efg"#))
- (case Control.Exception.Base.patError
- @LiftedRep @() "T9400.hs:(17,5)-(18,29)|case"#
- of wild {
- }))))
+ (>>
+ @IO
+ GHC.Base.$fMonadIO
+ @()
+ @()
+ (case Control.Exception.Base.patError
+ @LiftedRep @() "T9400.hs:(17,5)-(18,29)|case"#
+ of wild {
+ })
+ (>>
+ @IO
+ GHC.Base.$fMonadIO
+ @()
+ @()
+ (putStrLn (unpackCStringUtf8# "abc"#))
+ (putStrLn
+ (unpackCStringUtf8#
+ "a\\197\\188\\195\\179\\197\\130\\196\\135z"#)))))))