diff options
author | Matthew Pickering <matthewtpickering@gmail.com> | 2019-10-20 21:04:03 +0100 |
---|---|---|
committer | Matthew Pickering <matthewtpickering@gmail.com> | 2019-10-22 08:34:51 +0100 |
commit | 6a8451516afcf12c839a8968bcd310824939e4d1 (patch) | |
tree | 756dce4a160eeb338dfe40f8745d084784905a51 | |
parent | 6f638e7ad13011fca9f4dd0990504744d49e7072 (diff) | |
download | haskell-6a8451516afcf12c839a8968bcd310824939e4d1.tar.gz |
Refactoring as suggested by AndreasK
-rw-r--r-- | compiler/utils/Encoding.hs | 41 | ||||
-rw-r--r-- | compiler/utils/StringBuffer.hs | 2 |
2 files changed, 28 insertions, 15 deletions
diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs index 13a2739be1..6349ef3230 100644 --- a/compiler/utils/Encoding.hs +++ b/compiler/utils/Encoding.hs @@ -13,7 +13,8 @@ module Encoding ( -- * UTF-8 - utf8DecodeChar#, + utf8DecodeCharArr#, + utf8DecodeCharAddr#, utf8PrevChar, utf8CharStart, utf8DecodeChar, @@ -62,9 +63,9 @@ import GHC.Exts -- To make this safe, we add extra sentinel bytes to unparsed UTF-8 sequences -- before decoding them (see StringBuffer.hs). -{-# INLINE utf8DecodeChar# #-} -utf8DecodeChar# :: (Int# -> Word#) -> (# Char#, Int# #) -utf8DecodeChar# indexWord8# = +{-# INLINE utf8DecodeCharX# #-} +utf8DecodeCharX# :: (Int# -> Word#) -> (# Char#, Int# #) +utf8DecodeCharX# indexWord8# = let !ch0 = word2Int# (indexWord8# 0#) in case () of _ | isTrue# (ch0 <=# 0x7F#) -> (# chr# ch0, 1# #) @@ -109,9 +110,24 @@ utf8DecodeChar# indexWord8# = -- confusing parse error later on. Instead we use '\0' which -- will signal a lexer error immediately. + +-- | Wrapper for things with a fixed addr, making use of utf8DecodeCharX# +utf8DecodeCharAddr# :: Addr# -> Int# -> (# Char#, Int# #) +utf8DecodeCharAddr# addr# off = + utf8DecodeCharX# (indexWord8OffAddr# (addr# `plusAddr#` off)) +{-# NOINLINE[0] utf8DecodeCharAddr# #-} +{-# NOINLINE[0] utf8DecodeCharArr# #-} + + +-- | Wrapper for unpinned things, making use of utf8DecodeCharX# +utf8DecodeCharArr# :: ByteArray# -> Int# -> (# Char#, Int# #) +utf8DecodeCharArr# arr byteOffset = + utf8DecodeCharX# (\j -> indexWord8Array# arr (j +# byteOffset)) + + utf8DecodeChar :: Ptr Word8 -> (Char, Int) utf8DecodeChar (Ptr a#) = - case utf8DecodeChar# (indexWord8OffAddr# a#) of + case utf8DecodeCharAddr# a# 0# of (# c#, nBytes# #) -> ( C# c#, I# nBytes# ) -- UTF-8 is cleverly designed so that we can always figure out where @@ -133,25 +149,23 @@ utf8DecodeByteString (BS.PS fptr offset (I# len#)) = unsafeDupablePerformIO $ do withForeignPtr fptr $ \ptr -> let addr# = unPtr (ptr `plusPtr` offset) - index# = indexWord8OffAddr# addr# in - utf8DecodeLazy# (touchForeignPtr fptr) index# len# + in utf8DecodeLazy# (touchForeignPtr fptr) (utf8DecodeCharAddr# addr#) len# utf8DecodeShortByteString :: ShortByteString -> [Char] utf8DecodeShortByteString (SBS ba#) - = unsafeDupablePerformIO $ utf8DecodeLazy# (return ()) index# len# + = unsafeDupablePerformIO $ utf8DecodeLazy# (return ()) (utf8DecodeCharArr# ba#) len# where - index# = indexWord8Array# ba# len# = sizeofByteArray# ba# {-# INLINE utf8DecodeLazy# #-} -utf8DecodeLazy# :: (IO ()) -> (Int# -> Word#) -> Int# -> IO [Char] +utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char] utf8DecodeLazy# retain indexWord8# len# = unpack 0# where unpack i# | isTrue# (i# >=# len#) = retain >> return [] | otherwise = - case utf8DecodeChar# (\j# -> indexWord8# (i# +# j#)) of + case indexWord8# i# of (# c#, nBytes# #) -> do rest <- unsafeDupableInterleaveIO $ unpack (i# +# nBytes#) return (C# c# : rest) @@ -160,8 +174,7 @@ utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char] utf8DecodeStringLazy fp offset (I# len#) = unsafeDupablePerformIO $ withForeignPtr fp $ \ptr -> let ptr# = unPtr (ptr `plusPtr` offset) - index# = indexWord8OffAddr# ptr# in - utf8DecodeLazy# (touchForeignPtr fp) index# len# + in utf8DecodeLazy# (touchForeignPtr fp) (utf8DecodeCharAddr# ptr#) len# countUTF8Chars :: ShortByteString -> IO Int countUTF8Chars (SBS ba) = go 0# 0# @@ -171,7 +184,7 @@ countUTF8Chars (SBS ba) = go 0# 0# | isTrue# (i# >=# len#) = return (I# n#) | otherwise = do - case utf8DecodeChar# (\j# -> indexWord8Array# ba (i# +# j#)) of + case utf8DecodeCharArr# ba i# of (# _, nBytes# #) -> go (i# +# nBytes#) (n# +# 1#) unPtr :: Ptr a -> Addr# diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs index 77a3426e74..96712bdc5c 100644 --- a/compiler/utils/StringBuffer.hs +++ b/compiler/utils/StringBuffer.hs @@ -200,7 +200,7 @@ nextChar (StringBuffer buf len (I# cur#)) = -- Getting our fingers dirty a little here, but this is performance-critical inlinePerformIO $ do withForeignPtr buf $ \(Ptr a#) -> do - case utf8DecodeChar# (indexWord8OffAddr# (a# `plusAddr#` cur#)) of + case utf8DecodeCharAddr# a# cur# of (# c#, nBytes# #) -> let cur' = I# (cur# +# nBytes#) in return (C# c#, StringBuffer buf len cur') |