Pass specialised utf8DecodeChar# to utf8DecodeLazy# for performance

Currently we're passing a indexWord8OffAddr# type function to utf8DecodeLazy# which then passes it on to utf8DecodeChar#. By passing one of utf8DecodeCharAddr# or utf8DecodeCharByteArray# instead we benefit from the inlining and specialization already done for those.
author: Daniel Gröber <dxld@darkboxed.org> 2020-02-10 14:19:21 +0100
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2020-07-22 20:18:11 -0400
commit: 8336ba78e00ec42521ba8314bc65ec766e6bcc7d (patch)
tree: 6afd917e80c2efface0e561a1f3673d9fda47f5d
parent: 1010c33bb8704fa55a82bc2601d5cae2e6ecc21f (diff)
download: haskell-8336ba78e00ec42521ba8314bc65ec766e6bcc7d.tar.gz
2 files changed, 12 insertions, 14 deletions
diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs
index daf599d5e7..11ddfe47bc 100644
--- a/compiler/GHC/Data/StringBuffer.hs
+++ b/compiler/GHC/Data/StringBuffer.hs
@@ -200,7 +200,7 @@ nextChar (StringBuffer buf len (I# cur#)) =
   -- Getting our fingers dirty a little here, but this is performance-critical
   inlinePerformIO $ do
     withForeignPtr buf $ \(Ptr a#) -> do
-        case utf8DecodeCharAddr# (a# `plusAddr#` cur#) of
+        case utf8DecodeCharAddr# (a# `plusAddr#` cur#) 0# of
           (# c#, nBytes# #) ->
              let cur' = I# (cur# +# nBytes#) in
              return (C# c#, StringBuffer buf len cur')
diff --git a/compiler/GHC/Utils/Encoding.hs b/compiler/GHC/Utils/Encoding.hs
index d0db1bde77..96531e7cd1 100644
--- a/compiler/GHC/Utils/Encoding.hs
+++ b/compiler/GHC/Utils/Encoding.hs
@@ -109,9 +109,9 @@ utf8DecodeChar# indexWord8# =
         -- confusing parse error later on.  Instead we use '\0' which
         -- will signal a lexer error immediately.
 
-utf8DecodeCharAddr# :: Addr# -> (# Char#, Int# #)
-utf8DecodeCharAddr# a# =
-    utf8DecodeChar# (indexWord8OffAddr# a#)
+utf8DecodeCharAddr# :: Addr# -> Int# -> (# Char#, Int# #)
+utf8DecodeCharAddr# a# off# =
+    utf8DecodeChar# (\i# -> indexWord8OffAddr# a# (i# +# off#))
 
 utf8DecodeCharByteArray# :: ByteArray# -> Int# -> (# Char#, Int# #)
 utf8DecodeCharByteArray# ba# off# =
@@ -119,7 +119,7 @@ utf8DecodeCharByteArray# ba# off# =
 
 utf8DecodeChar :: Ptr Word8 -> (Char, Int)
 utf8DecodeChar !(Ptr a#) =
-  case utf8DecodeCharAddr# a# of
+  case utf8DecodeCharAddr# a# 0# of
     (# c#, nBytes# #) -> ( C# c#, I# nBytes# )
 
 -- UTF-8 is cleverly designed so that we can always figure out where
@@ -137,14 +137,14 @@ utf8CharStart p = go p
                         else return p
 
 {-# INLINE utf8DecodeLazy# #-}
-utf8DecodeLazy# :: (IO ()) -> (Int# -> Word#) -> Int# -> IO [Char]
-utf8DecodeLazy# retain indexWord8# len#
+utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
+utf8DecodeLazy# retain decodeChar# len#
   = unpack 0#
   where
     unpack i#
         | isTrue# (i# >=# len#) = retain >> return []
         | otherwise =
-            case utf8DecodeChar# (\j# -> indexWord8# (i# +# j#)) of
+            case decodeChar# i# of
               (# c#, nBytes# #) -> do
                 rest <- unsafeDupableInterleaveIO $ unpack (i# +# nBytes#)
                 return (C# c# : rest)
@@ -156,16 +156,14 @@ utf8DecodeByteString (BS.PS fptr offset len)
 utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
 utf8DecodeStringLazy fp offset (I# len#)
   = unsafeDupablePerformIO $ withForeignPtr fp $ \ptr ->
-      let !(Ptr a#) = ptr `plusPtr` offset
-          index# = indexWord8OffAddr# a# in
-      utf8DecodeLazy# (touchForeignPtr fp) index# len#
+      let !(Ptr a#) = ptr `plusPtr` offset in
+      utf8DecodeLazy# (touchForeignPtr fp) (utf8DecodeCharAddr# a#) len#
 
 utf8DecodeShortByteString :: ShortByteString -> [Char]
 utf8DecodeShortByteString (SBS ba#)
   = unsafeDupablePerformIO $
-      let index# = indexWord8Array# ba#
-          len# = sizeofByteArray# ba# in
-      utf8DecodeLazy# (return ()) index# len#
+      let len# = sizeofByteArray# ba# in
+      utf8DecodeLazy# (return ()) (utf8DecodeCharByteArray# ba#) len#
 
 countUTF8Chars :: ShortByteString -> IO Int
 countUTF8Chars (SBS ba) = go 0# 0#
author	Daniel Gröber <dxld@darkboxed.org>	2020-02-10 14:19:21 +0100
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2020-07-22 20:18:11 -0400
commit	8336ba78e00ec42521ba8314bc65ec766e6bcc7d (patch)
tree	6afd917e80c2efface0e561a1f3673d9fda47f5d
parent	1010c33bb8704fa55a82bc2601d5cae2e6ecc21f (diff)
download	haskell-8336ba78e00ec42521ba8314bc65ec766e6bcc7d.tar.gz