summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gröber <dxld@darkboxed.org>2020-02-10 14:19:21 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-07-22 20:18:11 -0400
commit8336ba78e00ec42521ba8314bc65ec766e6bcc7d (patch)
tree6afd917e80c2efface0e561a1f3673d9fda47f5d
parent1010c33bb8704fa55a82bc2601d5cae2e6ecc21f (diff)
downloadhaskell-8336ba78e00ec42521ba8314bc65ec766e6bcc7d.tar.gz
Pass specialised utf8DecodeChar# to utf8DecodeLazy# for performance
Currently we're passing a indexWord8OffAddr# type function to utf8DecodeLazy# which then passes it on to utf8DecodeChar#. By passing one of utf8DecodeCharAddr# or utf8DecodeCharByteArray# instead we benefit from the inlining and specialization already done for those.
-rw-r--r--compiler/GHC/Data/StringBuffer.hs2
-rw-r--r--compiler/GHC/Utils/Encoding.hs24
2 files changed, 12 insertions, 14 deletions
diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs
index daf599d5e7..11ddfe47bc 100644
--- a/compiler/GHC/Data/StringBuffer.hs
+++ b/compiler/GHC/Data/StringBuffer.hs
@@ -200,7 +200,7 @@ nextChar (StringBuffer buf len (I# cur#)) =
-- Getting our fingers dirty a little here, but this is performance-critical
inlinePerformIO $ do
withForeignPtr buf $ \(Ptr a#) -> do
- case utf8DecodeCharAddr# (a# `plusAddr#` cur#) of
+ case utf8DecodeCharAddr# (a# `plusAddr#` cur#) 0# of
(# c#, nBytes# #) ->
let cur' = I# (cur# +# nBytes#) in
return (C# c#, StringBuffer buf len cur')
diff --git a/compiler/GHC/Utils/Encoding.hs b/compiler/GHC/Utils/Encoding.hs
index d0db1bde77..96531e7cd1 100644
--- a/compiler/GHC/Utils/Encoding.hs
+++ b/compiler/GHC/Utils/Encoding.hs
@@ -109,9 +109,9 @@ utf8DecodeChar# indexWord8# =
-- confusing parse error later on. Instead we use '\0' which
-- will signal a lexer error immediately.
-utf8DecodeCharAddr# :: Addr# -> (# Char#, Int# #)
-utf8DecodeCharAddr# a# =
- utf8DecodeChar# (indexWord8OffAddr# a#)
+utf8DecodeCharAddr# :: Addr# -> Int# -> (# Char#, Int# #)
+utf8DecodeCharAddr# a# off# =
+ utf8DecodeChar# (\i# -> indexWord8OffAddr# a# (i# +# off#))
utf8DecodeCharByteArray# :: ByteArray# -> Int# -> (# Char#, Int# #)
utf8DecodeCharByteArray# ba# off# =
@@ -119,7 +119,7 @@ utf8DecodeCharByteArray# ba# off# =
utf8DecodeChar :: Ptr Word8 -> (Char, Int)
utf8DecodeChar !(Ptr a#) =
- case utf8DecodeCharAddr# a# of
+ case utf8DecodeCharAddr# a# 0# of
(# c#, nBytes# #) -> ( C# c#, I# nBytes# )
-- UTF-8 is cleverly designed so that we can always figure out where
@@ -137,14 +137,14 @@ utf8CharStart p = go p
else return p
{-# INLINE utf8DecodeLazy# #-}
-utf8DecodeLazy# :: (IO ()) -> (Int# -> Word#) -> Int# -> IO [Char]
-utf8DecodeLazy# retain indexWord8# len#
+utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
+utf8DecodeLazy# retain decodeChar# len#
= unpack 0#
where
unpack i#
| isTrue# (i# >=# len#) = retain >> return []
| otherwise =
- case utf8DecodeChar# (\j# -> indexWord8# (i# +# j#)) of
+ case decodeChar# i# of
(# c#, nBytes# #) -> do
rest <- unsafeDupableInterleaveIO $ unpack (i# +# nBytes#)
return (C# c# : rest)
@@ -156,16 +156,14 @@ utf8DecodeByteString (BS.PS fptr offset len)
utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
utf8DecodeStringLazy fp offset (I# len#)
= unsafeDupablePerformIO $ withForeignPtr fp $ \ptr ->
- let !(Ptr a#) = ptr `plusPtr` offset
- index# = indexWord8OffAddr# a# in
- utf8DecodeLazy# (touchForeignPtr fp) index# len#
+ let !(Ptr a#) = ptr `plusPtr` offset in
+ utf8DecodeLazy# (touchForeignPtr fp) (utf8DecodeCharAddr# a#) len#
utf8DecodeShortByteString :: ShortByteString -> [Char]
utf8DecodeShortByteString (SBS ba#)
= unsafeDupablePerformIO $
- let index# = indexWord8Array# ba#
- len# = sizeofByteArray# ba# in
- utf8DecodeLazy# (return ()) index# len#
+ let len# = sizeofByteArray# ba# in
+ utf8DecodeLazy# (return ()) (utf8DecodeCharByteArray# ba#) len#
countUTF8Chars :: ShortByteString -> IO Int
countUTF8Chars (SBS ba) = go 0# 0#