summaryrefslogtreecommitdiff
path: root/libraries/ghc-prim
diff options
context:
space:
mode:
authorBen Gamari <bgamari.foss@gmail.com>2017-07-24 19:01:58 -0400
committerBen Gamari <ben@smart-cactus.org>2017-07-24 19:12:55 -0400
commit85a295d5607b5f8015bb3517601ced0d1adc29ef (patch)
tree6067fafa5e22c965911a8f24696c28d6620ee9b8 /libraries/ghc-prim
parent897366a012de053fd3558ffb665337287c3df926 (diff)
downloadhaskell-85a295d5607b5f8015bb3517601ced0d1adc29ef.tar.gz
ghc-prim: Don't allocate a thunk for each unpacked UTF-8 character
While debugging #14005 I noticed that unpackCStringUtf8# was allocating a thunk for each Unicode character that it unpacked. This seems hardly worthwhile given that the thunk's closure will be at least three words, whereas the Char itself will be only two and requires only a bit of bit twiddling to construct. Test Plan: Validate Reviewers: simonmar, austin Subscribers: dfeuer, rwbarton, thomie Differential Revision: https://phabricator.haskell.org/D3769
Diffstat (limited to 'libraries/ghc-prim')
-rw-r--r--libraries/ghc-prim/GHC/CString.hs28
1 files changed, 16 insertions, 12 deletions
diff --git a/libraries/ghc-prim/GHC/CString.hs b/libraries/ghc-prim/GHC/CString.hs
index cdda2db3ab..0e6199f30f 100644
--- a/libraries/ghc-prim/GHC/CString.hs
+++ b/libraries/ghc-prim/GHC/CString.hs
@@ -125,24 +125,28 @@ unpackCStringUtf8# :: Addr# -> [Char]
unpackCStringUtf8# addr
= unpack 0#
where
+ -- We take care to strictly evaluate the character decoding as
+ -- indexCharOffAddr# is marked with the can_fail flag and
+ -- consequently GHC won't evaluate the expression unless it is absolutely
+ -- needed.
unpack nh
| isTrue# (ch `eqChar#` '\0'# ) = []
| isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpack (nh +# 1#)
| isTrue# (ch `leChar#` '\xDF'#) =
- C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#))) :
- unpack (nh +# 2#)
+ let !c = C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#)))
+ in c : unpack (nh +# 2#)
| isTrue# (ch `leChar#` '\xEF'#) =
- C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#))) :
- unpack (nh +# 3#)
+ let !c = C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#)))
+ in c : unpack (nh +# 3#)
| True =
- C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#))) :
- unpack (nh +# 4#)
+ let !c = C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#)))
+ in c : unpack (nh +# 4#)
where
!ch = indexCharOffAddr# addr nh