diff options
author | Ben Gamari <ben@smart-cactus.org> | 2017-07-21 12:00:48 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2017-07-21 12:12:33 -0400 |
commit | 60c306369c8344f378707894039c3901788dafb4 (patch) | |
tree | 515b1ce0ccb5b8f10f88b47dc932e66f0bcbadee | |
parent | 806c39855db0e5bd6d929b82d2a70c43b2b9a39f (diff) | |
download | haskell-60c306369c8344f378707894039c3901788dafb4.tar.gz |
ghc-prim: Don't allocate a thunk for each unpacked UTF-8 character
While debugging #14005 I noticed that unpackCStringUtf8# was allocating a thunk
for each Unicode character that it unpacked. This seems hardly worthwhile given
that the thunk's closure will be at least three words, whereas the Char itself
will be only two and requires only a bit of bit twiddling to construct.
-rw-r--r-- | libraries/ghc-prim/GHC/CString.hs | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/libraries/ghc-prim/GHC/CString.hs b/libraries/ghc-prim/GHC/CString.hs index cdda2db3ab..e739af79e1 100644 --- a/libraries/ghc-prim/GHC/CString.hs +++ b/libraries/ghc-prim/GHC/CString.hs @@ -129,20 +129,20 @@ unpackCStringUtf8# addr | isTrue# (ch `eqChar#` '\0'# ) = [] | isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpack (nh +# 1#) | isTrue# (ch `leChar#` '\xDF'#) = - C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +# - (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#))) : - unpack (nh +# 2#) + let !c = C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +# + (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#))) + in c : unpack (nh +# 2#) | isTrue# (ch `leChar#` '\xEF'#) = - C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +# - ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +# - (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#))) : - unpack (nh +# 3#) + let !c = C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +# + ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +# + (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#))) + in c : unpack (nh +# 3#) | True = - C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +# - ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +# - ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +# - (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#))) : - unpack (nh +# 4#) + let !c = C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +# + ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +# + ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +# + (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#))) + in c : unpack (nh +# 4#) where !ch = indexCharOffAddr# addr nh |