summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gröber <dxld@darkboxed.org>2020-02-10 14:19:21 +0100
committerBen Gamari <ben@smart-cactus.org>2020-03-18 14:59:58 +0000
commit2af183ffb5b78658004233c6c061acdb82a9cd9b (patch)
treeb0e54f7ea734afa8a15cf08e87a1314d8573149b
parentbc780f9aa151fa6ef64b30e17b77c4f2100312b2 (diff)
downloadhaskell-2af183ffb5b78658004233c6c061acdb82a9cd9b.tar.gz
Pass specialised utf8DecodeChar# to utf8DecodeLazy# for performance
Currently we're passing a indexWord8OffAddr# type function to utf8DecodeLazy# which then passes it on to utf8DecodeChar#. By passing one of utf8DecodeCharAddr# or utf8DecodeCharByteArray# instead we benefit from the inlining and specialization already done for those.
-rw-r--r--compiler/utils/Encoding.hs24
-rw-r--r--compiler/utils/StringBuffer.hs2
2 files changed, 12 insertions, 14 deletions
diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs
index 63524e1d15..2ddb767c2f 100644
--- a/compiler/utils/Encoding.hs
+++ b/compiler/utils/Encoding.hs
@@ -109,9 +109,9 @@ utf8DecodeChar# indexWord8# =
-- confusing parse error later on. Instead we use '\0' which
-- will signal a lexer error immediately.
-utf8DecodeCharAddr# :: Addr# -> (# Char#, Int# #)
-utf8DecodeCharAddr# a# =
- utf8DecodeChar# (indexWord8OffAddr# a#)
+utf8DecodeCharAddr# :: Addr# -> Int# -> (# Char#, Int# #)
+utf8DecodeCharAddr# a# off# =
+ utf8DecodeChar# (\i# -> indexWord8OffAddr# a# (i# +# off#))
utf8DecodeCharByteArray# :: ByteArray# -> Int# -> (# Char#, Int# #)
utf8DecodeCharByteArray# ba# off# =
@@ -119,7 +119,7 @@ utf8DecodeCharByteArray# ba# off# =
utf8DecodeChar :: Ptr Word8 -> (Char, Int)
utf8DecodeChar !(Ptr a#) =
- case utf8DecodeCharAddr# a# of
+ case utf8DecodeCharAddr# a# 0# of
(# c#, nBytes# #) -> ( C# c#, I# nBytes# )
-- UTF-8 is cleverly designed so that we can always figure out where
@@ -137,14 +137,14 @@ utf8CharStart p = go p
else return p
{-# INLINE utf8DecodeLazy# #-}
-utf8DecodeLazy# :: (IO ()) -> (Int# -> Word#) -> Int# -> IO [Char]
-utf8DecodeLazy# retain indexWord8# len#
+utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
+utf8DecodeLazy# retain decodeChar# len#
= unpack 0#
where
unpack i#
| isTrue# (i# >=# len#) = retain >> return []
| otherwise =
- case utf8DecodeChar# (\j# -> indexWord8# (i# +# j#)) of
+ case decodeChar# i# of
(# c#, nBytes# #) -> do
rest <- unsafeDupableInterleaveIO $ unpack (i# +# nBytes#)
return (C# c# : rest)
@@ -156,16 +156,14 @@ utf8DecodeByteString (BS.PS fptr offset len)
utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
utf8DecodeStringLazy fp offset (I# len#)
= unsafeDupablePerformIO $ withForeignPtr fp $ \ptr ->
- let !(Ptr a#) = ptr `plusPtr` offset
- index# = indexWord8OffAddr# a# in
- utf8DecodeLazy# (touchForeignPtr fp) index# len#
+ let !(Ptr a#) = ptr `plusPtr` offset in
+ utf8DecodeLazy# (touchForeignPtr fp) (utf8DecodeCharAddr# a#) len#
utf8DecodeShortByteString :: ShortByteString -> [Char]
utf8DecodeShortByteString (SBS ba#)
= unsafeDupablePerformIO $
- let index# = indexWord8Array# ba#
- len# = sizeofByteArray# ba# in
- utf8DecodeLazy# (return ()) index# len#
+ let len# = sizeofByteArray# ba# in
+ utf8DecodeLazy# (return ()) (utf8DecodeCharByteArray# ba#) len#
countUTF8Chars :: ShortByteString -> IO Int
countUTF8Chars (SBS ba) = go 0# 0#
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index 2cdf56af93..df07f5c408 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -200,7 +200,7 @@ nextChar (StringBuffer buf len (I# cur#)) =
-- Getting our fingers dirty a little here, but this is performance-critical
inlinePerformIO $ do
withForeignPtr buf $ \(Ptr a#) -> do
- case utf8DecodeCharAddr# (a# `plusAddr#` cur#) of
+ case utf8DecodeCharAddr# (a# `plusAddr#` cur#) 0# of
(# c#, nBytes# #) ->
let cur' = I# (cur# +# nBytes#) in
return (C# c#, StringBuffer buf len cur')