Pass specialised utf8DecodeChar# to utf8DecodeLazy# for performance

Currently we're passing a indexWord8OffAddr# type function to utf8DecodeLazy# which then passes it on to utf8DecodeChar#. By passing one of utf8DecodeCharAddr# or utf8DecodeCharByteArray# instead we benefit from the inlining and specialization already done for those.
author: Daniel Gröber <dxld@darkboxed.org> 2020-02-10 14:19:21 +0100
committer: Ben Gamari <ben@smart-cactus.org> 2020-03-18 14:59:58 +0000
commit: 2af183ffb5b78658004233c6c061acdb82a9cd9b (patch)
tree: b0e54f7ea734afa8a15cf08e87a1314d8573149b
parent: bc780f9aa151fa6ef64b30e17b77c4f2100312b2 (diff)
download: haskell-2af183ffb5b78658004233c6c061acdb82a9cd9b.tar.gz
2 files changed, 12 insertions, 14 deletions
diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs
index 63524e1d15..2ddb767c2f 100644
--- a/compiler/utils/Encoding.hs
+++ b/compiler/utils/Encoding.hs
@@ -109,9 +109,9 @@ utf8DecodeChar# indexWord8# =
         -- confusing parse error later on.  Instead we use '\0' which
         -- will signal a lexer error immediately.
 
-utf8DecodeCharAddr# :: Addr# -> (# Char#, Int# #)
-utf8DecodeCharAddr# a# =
-    utf8DecodeChar# (indexWord8OffAddr# a#)
+utf8DecodeCharAddr# :: Addr# -> Int# -> (# Char#, Int# #)
+utf8DecodeCharAddr# a# off# =
+    utf8DecodeChar# (\i# -> indexWord8OffAddr# a# (i# +# off#))
 
 utf8DecodeCharByteArray# :: ByteArray# -> Int# -> (# Char#, Int# #)
 utf8DecodeCharByteArray# ba# off# =
@@ -119,7 +119,7 @@ utf8DecodeCharByteArray# ba# off# =
 
 utf8DecodeChar :: Ptr Word8 -> (Char, Int)
 utf8DecodeChar !(Ptr a#) =
-  case utf8DecodeCharAddr# a# of
+  case utf8DecodeCharAddr# a# 0# of
     (# c#, nBytes# #) -> ( C# c#, I# nBytes# )
 
 -- UTF-8 is cleverly designed so that we can always figure out where
@@ -137,14 +137,14 @@ utf8CharStart p = go p
                         else return p
 
 {-# INLINE utf8DecodeLazy# #-}
-utf8DecodeLazy# :: (IO ()) -> (Int# -> Word#) -> Int# -> IO [Char]
-utf8DecodeLazy# retain indexWord8# len#
+utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
+utf8DecodeLazy# retain decodeChar# len#
   = unpack 0#
   where
     unpack i#
         | isTrue# (i# >=# len#) = retain >> return []
         | otherwise =
-            case utf8DecodeChar# (\j# -> indexWord8# (i# +# j#)) of
+            case decodeChar# i# of
               (# c#, nBytes# #) -> do
                 rest <- unsafeDupableInterleaveIO $ unpack (i# +# nBytes#)
                 return (C# c# : rest)
@@ -156,16 +156,14 @@ utf8DecodeByteString (BS.PS fptr offset len)
 utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
 utf8DecodeStringLazy fp offset (I# len#)
   = unsafeDupablePerformIO $ withForeignPtr fp $ \ptr ->
-      let !(Ptr a#) = ptr `plusPtr` offset
-          index# = indexWord8OffAddr# a# in
-      utf8DecodeLazy# (touchForeignPtr fp) index# len#
+      let !(Ptr a#) = ptr `plusPtr` offset in
+      utf8DecodeLazy# (touchForeignPtr fp) (utf8DecodeCharAddr# a#) len#
 
 utf8DecodeShortByteString :: ShortByteString -> [Char]
 utf8DecodeShortByteString (SBS ba#)
   = unsafeDupablePerformIO $
-      let index# = indexWord8Array# ba#
-          len# = sizeofByteArray# ba# in
-      utf8DecodeLazy# (return ()) index# len#
+      let len# = sizeofByteArray# ba# in
+      utf8DecodeLazy# (return ()) (utf8DecodeCharByteArray# ba#) len#
 
 countUTF8Chars :: ShortByteString -> IO Int
 countUTF8Chars (SBS ba) = go 0# 0#
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index 2cdf56af93..df07f5c408 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -200,7 +200,7 @@ nextChar (StringBuffer buf len (I# cur#)) =
   -- Getting our fingers dirty a little here, but this is performance-critical
   inlinePerformIO $ do
     withForeignPtr buf $ \(Ptr a#) -> do
-        case utf8DecodeCharAddr# (a# `plusAddr#` cur#) of
+        case utf8DecodeCharAddr# (a# `plusAddr#` cur#) 0# of
           (# c#, nBytes# #) ->
              let cur' = I# (cur# +# nBytes#) in
              return (C# c#, StringBuffer buf len cur')
author	Daniel Gröber <dxld@darkboxed.org>	2020-02-10 14:19:21 +0100
committer	Ben Gamari <ben@smart-cactus.org>	2020-03-18 14:59:58 +0000
commit	2af183ffb5b78658004233c6c061acdb82a9cd9b (patch)
tree	b0e54f7ea734afa8a15cf08e87a1314d8573149b
parent	bc780f9aa151fa6ef64b30e17b77c4f2100312b2 (diff)
download	haskell-2af183ffb5b78658004233c6c061acdb82a9cd9b.tar.gz