summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@well-typed.com>2020-03-27 18:05:14 +0300
committerAndreas Klebinger <klebinger.andreas@gmx.at>2022-10-18 08:41:49 +0000
commite4da2f7395f08d415fde5cf8757d5d8b01ac3abf (patch)
tree005de99db58500cf1b8da833e663df88e5adbea7
parentba4bd4a48223bc9b215cfda138a5de9f99c87cdf (diff)
downloadhaskell-wip/chunked-unpackCString.tar.gz
ghc-prim: Strictly in chunks of 32 characterswip/chunked-unpackCString
-rw-r--r--libraries/ghc-prim/GHC/CString.hs48
1 files changed, 35 insertions, 13 deletions
diff --git a/libraries/ghc-prim/GHC/CString.hs b/libraries/ghc-prim/GHC/CString.hs
index 003bd9af65..04dbd61e0e 100644
--- a/libraries/ghc-prim/GHC/CString.hs
+++ b/libraries/ghc-prim/GHC/CString.hs
@@ -178,23 +178,45 @@ increment will happen in the loop.
unpackCString# :: Addr# -> [Char]
{-# NOINLINE CONLIKE unpackCString# #-}
-unpackCString# addr
- | isTrue# (ch `eqChar#` '\0'#) = []
- | True = C# ch : unpackCString# (addr `plusAddr#` 1#)
- where
- -- See Note [unpackCString# iterating over addr]
- !ch = indexCharOffAddr# addr 0#
-
+ -- See the NOINLINE note on unpackCString#
+unpackCString# addr = unpackAppendCString'# [] addr
unpackAppendCString# :: Addr# -> [Char] -> [Char]
{-# NOINLINE unpackAppendCString# #-}
-- See the NOINLINE note on unpackCString#
-unpackAppendCString# addr rest
- | isTrue# (ch `eqChar#` '\0'#) = rest
- | True = C# ch : unpackAppendCString# (addr `plusAddr#` 1#) rest
- where
- -- See Note [unpackCString# iterating over addr]
- !ch = indexCharOffAddr# addr 0#
+unpackAppendCString# addr rest = unpackAppendCString'# rest addr
+
+-- | This is an local helper to reduce duplication between
+-- 'unpackCString#' and 'unpackAppendCString#'. Because it is inlined the
+-- this gets specialised to @rest = []@ in the former case.
+unpackAppendCString'# :: [Char] -> Addr# -> [Char]
+{-# INLINE unpackAppendCString'# #-}
+unpackAppendCString'# rest0 addr0 = goStrict addr0 unpackChunkLen
+ where
+ -- Laziness is expensive: it involves allocating a thunk, then an indirect
+ -- jump, perhaps some cache misses, etc. However, in practice we find that
+ -- most applications tend to use at least *some* of their unpacked string.
+ -- Consequently we unpack eagerly in chunks of this many characters.
+ -- Compared to fully-lazy unpacking this improves runtime of GHC by about
+ -- 0.5%.
+ unpackChunkLen = 32#
+
+ unpackChunk :: Addr# -> [Char]
+ unpackChunk addr = goStrict addr unpackChunkLen
+
+ goStrict :: Addr# -> Int# -> [Char]
+ goStrict addr n
+ | isTrue# (ch `eqChar#` '\0'#) = rest0
+ | isTrue# (n ==# 0#) =
+ -- We've reached the end of our chunk, lazily unpack the next chunk
+ let rest = unpackChunk (addr `plusAddr#` 1#)
+ in C# ch : rest
+ | True =
+ let !rest = goStrict (addr `plusAddr#` 1#) (n -# 1#)
+ in C# ch : rest
+ where
+ -- See Note [unpackCString# iterating over addr]
+ !ch = indexCharOffAddr# addr 0#
-- Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#.
-- See Note [String literals in GHC] for more details.