diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2020-12-03 18:54:54 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-03-03 08:12:29 -0500 |
commit | d8dc0f96237fe6fe7081c04727c7c2573477e5cb (patch) | |
tree | dbc4e8d25cf5a085e979df98bacad5999bf78aee /libraries/ghc-boot | |
parent | eea96042f1e8682605ae68db10f2bcdd7dab923e (diff) | |
download | haskell-d8dc0f96237fe6fe7081c04727c7c2573477e5cb.tar.gz |
Fix array and cleanup conversion primops (#19026)
The first change makes the array ones use the proper fixed-size types,
which also means that just like before, they can be used without
explicit conversions with the boxed sized types. (Before, it was Int# /
Word# on both sides, now it is fixed sized on both sides).
For the second change, don't use "extend" or "narrow" in some of the
user-facing primops names for conversions.
- Names like `narrowInt32#` are misleading when `Int` is 32-bits.
- Names like `extendInt64#` are flat-out wrong when `Int is
32-bits.
- `narrow{Int,Word}<N>#` however map a type to itself, and so don't
suffer from this problem. They are left as-is.
These changes are batched together because Alex happend to use the array
ops. We can only use released versions of Alex at this time, sadly, and
I don't want to have to have a release thatwon't work for the final GHC
9.2. So by combining these we get all the changes for Alex done at once.
Bump hackage state in a few places, and also make that workflow slightly
easier for the future.
Bump minimum Alex version
Bump Cabal, array, bytestring, containers, text, and binary submodules
Diffstat (limited to 'libraries/ghc-boot')
-rw-r--r-- | libraries/ghc-boot/GHC/Utils/Encoding.hs | 45 |
1 files changed, 41 insertions, 4 deletions
diff --git a/libraries/ghc-boot/GHC/Utils/Encoding.hs b/libraries/ghc-boot/GHC/Utils/Encoding.hs index 0f84be189b..519b607425 100644 --- a/libraries/ghc-boot/GHC/Utils/Encoding.hs +++ b/libraries/ghc-boot/GHC/Utils/Encoding.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples #-} {-# OPTIONS_GHC -O2 -fno-warn-name-shadowing #-} -- We always optimise this, otherwise performance of a non-optimised @@ -116,11 +117,20 @@ utf8DecodeChar# indexWord8# = utf8DecodeCharAddr# :: Addr# -> Int# -> (# Char#, Int# #) utf8DecodeCharAddr# a# off# = +#if !MIN_VERSION_base(4,16,0) utf8DecodeChar# (\i# -> indexWord8OffAddr# a# (i# +# off#)) +#else + utf8DecodeChar# (\i# -> word8ToWord# (indexWord8OffAddr# a# (i# +# off#))) +#endif utf8DecodeCharByteArray# :: ByteArray# -> Int# -> (# Char#, Int# #) utf8DecodeCharByteArray# ba# off# = +#if !MIN_VERSION_base(4,16,0) utf8DecodeChar# (\i# -> indexWord8Array# ba# (i# +# off#)) +#else + utf8DecodeChar# (\i# -> word8ToWord# (indexWord8Array# ba# (i# +# off#))) +#endif + utf8DecodeChar :: Ptr Word8 -> (Char, Int) utf8DecodeChar !(Ptr a#) = @@ -184,16 +194,29 @@ utf8CompareShortByteString (SBS a1) (SBS a2) = go 0# 0# | isTrue# (off1 >=# sz1) = LT | isTrue# (off2 >=# sz2) = GT | otherwise = +#if !MIN_VERSION_base(4,16,0) let !b1_1 = indexWord8Array# a1 off1 !b2_1 = indexWord8Array# a2 off2 +#else + let !b1_1 = word8ToWord# (indexWord8Array# a1 off1) + !b2_1 = word8ToWord# (indexWord8Array# a2 off2) +#endif in case b1_1 of 0xC0## -> case b2_1 of 0xC0## -> go (off1 +# 1#) (off2 +# 1#) +#if !MIN_VERSION_base(4,16,0) _ -> case indexWord8Array# a1 (off1 +# 1#) of +#else + _ -> case word8ToWord# (indexWord8Array# a1 (off1 +# 1#)) of +#endif 0x80## -> LT _ -> go (off1 +# 1#) (off2 +# 1#) _ -> case b2_1 of +#if !MIN_VERSION_base(4,16,0) 0xC0## -> case indexWord8Array# a2 (off2 +# 1#) of +#else + 0xC0## -> case word8ToWord# (indexWord8Array# a2 (off2 +# 1#)) of +#endif 0x80## -> GT _ -> go (off1 +# 1#) (off2 +# 1#) _ | isTrue# (b1_1 `gtWord#` b2_1) -> GT @@ -218,10 +241,10 @@ countUTF8Chars (SBS ba) = go 0# 0# (# _, nBytes# #) -> go (i# +# nBytes#) (n# +# 1#) {-# INLINE utf8EncodeChar #-} -utf8EncodeChar :: (Int# -> Word# -> State# s -> State# s) +utf8EncodeChar :: (Int# -> Word8# -> State# s -> State# s) -> Char -> ST s Int utf8EncodeChar write# c = - let x = ord c in + let x = fromIntegral (ord c) in case () of _ | x > 0 && x <= 0x007f -> do write 0 x @@ -245,15 +268,24 @@ utf8EncodeChar write# c = return 4 where {-# INLINE write #-} - write (I# off#) (I# c#) = ST $ \s -> - case write# off# (int2Word# c#) s of + write (I# off#) (W# c#) = ST $ \s -> +#if !MIN_VERSION_base(4,16,0) + case write# off# (narrowWord8# c#) s of +#else + case write# off# (wordToWord8# c#) s of +#endif s -> (# s, () #) utf8EncodeString :: Ptr Word8 -> String -> IO () utf8EncodeString (Ptr a#) str = go a# str where go !_ [] = return () go a# (c:cs) = do +#if !MIN_VERSION_base(4,16,0) + -- writeWord8OffAddr# was taking a Word# + I# off# <- stToIO $ utf8EncodeChar (\i w -> writeWord8OffAddr# a# i (extendWord8# w)) c +#else I# off# <- stToIO $ utf8EncodeChar (writeWord8OffAddr# a#) c +#endif go (a# `plusAddr#` off#) cs utf8EncodeShortByteString :: String -> IO ShortByteString @@ -267,7 +299,12 @@ utf8EncodeShortByteString str = IO $ \s -> where go _ _ [] = return () go mba# i# (c:cs) = do +#if !MIN_VERSION_base(4,16,0) + -- writeWord8Array# was taking a Word# + I# off# <- utf8EncodeChar (\j# w -> writeWord8Array# mba# (i# +# j#) (extendWord8# w)) c +#else I# off# <- utf8EncodeChar (\j# -> writeWord8Array# mba# (i# +# j#)) c +#endif go mba# (i# +# off#) cs utf8EncodedLength :: String -> Int |