summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2017-07-21 12:00:48 -0400
committerBen Gamari <ben@smart-cactus.org>2017-07-21 12:12:33 -0400
commit60c306369c8344f378707894039c3901788dafb4 (patch)
tree515b1ce0ccb5b8f10f88b47dc932e66f0bcbadee
parent806c39855db0e5bd6d929b82d2a70c43b2b9a39f (diff)
downloadhaskell-60c306369c8344f378707894039c3901788dafb4.tar.gz
ghc-prim: Don't allocate a thunk for each unpacked UTF-8 character
While debugging #14005 I noticed that unpackCStringUtf8# was allocating a thunk for each Unicode character that it unpacked. This seems hardly worthwhile given that the thunk's closure will be at least three words, whereas the Char itself will be only two and requires only a bit of bit twiddling to construct.
-rw-r--r--libraries/ghc-prim/GHC/CString.hs24
1 files changed, 12 insertions, 12 deletions
diff --git a/libraries/ghc-prim/GHC/CString.hs b/libraries/ghc-prim/GHC/CString.hs
index cdda2db3ab..e739af79e1 100644
--- a/libraries/ghc-prim/GHC/CString.hs
+++ b/libraries/ghc-prim/GHC/CString.hs
@@ -129,20 +129,20 @@ unpackCStringUtf8# addr
| isTrue# (ch `eqChar#` '\0'# ) = []
| isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpack (nh +# 1#)
| isTrue# (ch `leChar#` '\xDF'#) =
- C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#))) :
- unpack (nh +# 2#)
+ let !c = C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#)))
+ in c : unpack (nh +# 2#)
| isTrue# (ch `leChar#` '\xEF'#) =
- C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#))) :
- unpack (nh +# 3#)
+ let !c = C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#)))
+ in c : unpack (nh +# 3#)
| True =
- C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#))) :
- unpack (nh +# 4#)
+ let !c = C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#)))
+ in c : unpack (nh +# 4#)
where
!ch = indexCharOffAddr# addr nh