summaryrefslogtreecommitdiff
path: root/libraries/ghc-boot
diff options
context:
space:
mode:
authorSylvain Henry <sylvain@haskus.fr>2020-12-03 18:54:54 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-03-03 08:12:29 -0500
commitd8dc0f96237fe6fe7081c04727c7c2573477e5cb (patch)
treedbc4e8d25cf5a085e979df98bacad5999bf78aee /libraries/ghc-boot
parenteea96042f1e8682605ae68db10f2bcdd7dab923e (diff)
downloadhaskell-d8dc0f96237fe6fe7081c04727c7c2573477e5cb.tar.gz
Fix array and cleanup conversion primops (#19026)
The first change makes the array ones use the proper fixed-size types, which also means that just like before, they can be used without explicit conversions with the boxed sized types. (Before, it was Int# / Word# on both sides, now it is fixed sized on both sides). For the second change, don't use "extend" or "narrow" in some of the user-facing primops names for conversions. - Names like `narrowInt32#` are misleading when `Int` is 32-bits. - Names like `extendInt64#` are flat-out wrong when `Int is 32-bits. - `narrow{Int,Word}<N>#` however map a type to itself, and so don't suffer from this problem. They are left as-is. These changes are batched together because Alex happend to use the array ops. We can only use released versions of Alex at this time, sadly, and I don't want to have to have a release thatwon't work for the final GHC 9.2. So by combining these we get all the changes for Alex done at once. Bump hackage state in a few places, and also make that workflow slightly easier for the future. Bump minimum Alex version Bump Cabal, array, bytestring, containers, text, and binary submodules
Diffstat (limited to 'libraries/ghc-boot')
-rw-r--r--libraries/ghc-boot/GHC/Utils/Encoding.hs45
1 files changed, 41 insertions, 4 deletions
diff --git a/libraries/ghc-boot/GHC/Utils/Encoding.hs b/libraries/ghc-boot/GHC/Utils/Encoding.hs
index 0f84be189b..519b607425 100644
--- a/libraries/ghc-boot/GHC/Utils/Encoding.hs
+++ b/libraries/ghc-boot/GHC/Utils/Encoding.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples #-}
{-# OPTIONS_GHC -O2 -fno-warn-name-shadowing #-}
-- We always optimise this, otherwise performance of a non-optimised
@@ -116,11 +117,20 @@ utf8DecodeChar# indexWord8# =
utf8DecodeCharAddr# :: Addr# -> Int# -> (# Char#, Int# #)
utf8DecodeCharAddr# a# off# =
+#if !MIN_VERSION_base(4,16,0)
utf8DecodeChar# (\i# -> indexWord8OffAddr# a# (i# +# off#))
+#else
+ utf8DecodeChar# (\i# -> word8ToWord# (indexWord8OffAddr# a# (i# +# off#)))
+#endif
utf8DecodeCharByteArray# :: ByteArray# -> Int# -> (# Char#, Int# #)
utf8DecodeCharByteArray# ba# off# =
+#if !MIN_VERSION_base(4,16,0)
utf8DecodeChar# (\i# -> indexWord8Array# ba# (i# +# off#))
+#else
+ utf8DecodeChar# (\i# -> word8ToWord# (indexWord8Array# ba# (i# +# off#)))
+#endif
+
utf8DecodeChar :: Ptr Word8 -> (Char, Int)
utf8DecodeChar !(Ptr a#) =
@@ -184,16 +194,29 @@ utf8CompareShortByteString (SBS a1) (SBS a2) = go 0# 0#
| isTrue# (off1 >=# sz1) = LT
| isTrue# (off2 >=# sz2) = GT
| otherwise =
+#if !MIN_VERSION_base(4,16,0)
let !b1_1 = indexWord8Array# a1 off1
!b2_1 = indexWord8Array# a2 off2
+#else
+ let !b1_1 = word8ToWord# (indexWord8Array# a1 off1)
+ !b2_1 = word8ToWord# (indexWord8Array# a2 off2)
+#endif
in case b1_1 of
0xC0## -> case b2_1 of
0xC0## -> go (off1 +# 1#) (off2 +# 1#)
+#if !MIN_VERSION_base(4,16,0)
_ -> case indexWord8Array# a1 (off1 +# 1#) of
+#else
+ _ -> case word8ToWord# (indexWord8Array# a1 (off1 +# 1#)) of
+#endif
0x80## -> LT
_ -> go (off1 +# 1#) (off2 +# 1#)
_ -> case b2_1 of
+#if !MIN_VERSION_base(4,16,0)
0xC0## -> case indexWord8Array# a2 (off2 +# 1#) of
+#else
+ 0xC0## -> case word8ToWord# (indexWord8Array# a2 (off2 +# 1#)) of
+#endif
0x80## -> GT
_ -> go (off1 +# 1#) (off2 +# 1#)
_ | isTrue# (b1_1 `gtWord#` b2_1) -> GT
@@ -218,10 +241,10 @@ countUTF8Chars (SBS ba) = go 0# 0#
(# _, nBytes# #) -> go (i# +# nBytes#) (n# +# 1#)
{-# INLINE utf8EncodeChar #-}
-utf8EncodeChar :: (Int# -> Word# -> State# s -> State# s)
+utf8EncodeChar :: (Int# -> Word8# -> State# s -> State# s)
-> Char -> ST s Int
utf8EncodeChar write# c =
- let x = ord c in
+ let x = fromIntegral (ord c) in
case () of
_ | x > 0 && x <= 0x007f -> do
write 0 x
@@ -245,15 +268,24 @@ utf8EncodeChar write# c =
return 4
where
{-# INLINE write #-}
- write (I# off#) (I# c#) = ST $ \s ->
- case write# off# (int2Word# c#) s of
+ write (I# off#) (W# c#) = ST $ \s ->
+#if !MIN_VERSION_base(4,16,0)
+ case write# off# (narrowWord8# c#) s of
+#else
+ case write# off# (wordToWord8# c#) s of
+#endif
s -> (# s, () #)
utf8EncodeString :: Ptr Word8 -> String -> IO ()
utf8EncodeString (Ptr a#) str = go a# str
where go !_ [] = return ()
go a# (c:cs) = do
+#if !MIN_VERSION_base(4,16,0)
+ -- writeWord8OffAddr# was taking a Word#
+ I# off# <- stToIO $ utf8EncodeChar (\i w -> writeWord8OffAddr# a# i (extendWord8# w)) c
+#else
I# off# <- stToIO $ utf8EncodeChar (writeWord8OffAddr# a#) c
+#endif
go (a# `plusAddr#` off#) cs
utf8EncodeShortByteString :: String -> IO ShortByteString
@@ -267,7 +299,12 @@ utf8EncodeShortByteString str = IO $ \s ->
where
go _ _ [] = return ()
go mba# i# (c:cs) = do
+#if !MIN_VERSION_base(4,16,0)
+ -- writeWord8Array# was taking a Word#
+ I# off# <- utf8EncodeChar (\j# w -> writeWord8Array# mba# (i# +# j#) (extendWord8# w)) c
+#else
I# off# <- utf8EncodeChar (\j# -> writeWord8Array# mba# (i# +# j#)) c
+#endif
go mba# (i# +# off#) cs
utf8EncodedLength :: String -> Int