summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-02-17 15:25:21 -0700
committerKarl Williamson <khw@cpan.org>2015-02-18 14:03:23 -0700
commite23c50db6337fb5f27e78e4d7e72f052a886113d (patch)
treee37faf9cafae1214269f0d29513636589bf13ad6
parentce4fe27b699be446d76ea7ae21b2dce87c97165d (diff)
downloadperl-e23c50db6337fb5f27e78e4d7e72f052a886113d.tar.gz
utf8.c: Slight refactor of UTF-16 code
This eliminates a branch in the usual case, at the expense of an extra one in the rarer case, which allows us to collapse some error condition code. It sprinkles some UNLIKELYs.
-rw-r--r--utf8.c23
1 files changed, 15 insertions, 8 deletions
diff --git a/utf8.c b/utf8.c
index bf5a36e232..179a96988e 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1278,19 +1278,26 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
#define LAST_HIGH_SURROGATE 0xDBFF
#define FIRST_LOW_SURROGATE 0xDC00
#define LAST_LOW_SURROGATE UNICODE_SURROGATE_LAST
- if (uv >= FIRST_HIGH_SURROGATE && uv <= LAST_HIGH_SURROGATE) {
- if (p >= pend) {
- Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
- } else {
+
+ /* This assumes that most uses will be in the first Unicode plane, not
+ * needing surrogates */
+ if (UNLIKELY(uv >= UNICODE_SURROGATE_FIRST
+ && uv <= UNICODE_SURROGATE_LAST))
+ {
+ if (UNLIKELY(p >= pend) || UNLIKELY(uv > LAST_HIGH_SURROGATE)) {
+ Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
+ }
+ else {
UV low = (p[0] << 8) + p[1];
- p += 2;
- if (low < FIRST_LOW_SURROGATE || low > LAST_LOW_SURROGATE)
+ if ( UNLIKELY(low < FIRST_LOW_SURROGATE)
+ || UNLIKELY(low > LAST_LOW_SURROGATE))
+ {
Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
+ }
+ p += 2;
uv = ((uv - FIRST_HIGH_SURROGATE) << 10)
+ (low - FIRST_LOW_SURROGATE) + 0x10000;
}
- } else if (uv >= FIRST_LOW_SURROGATE && uv <= LAST_LOW_SURROGATE) {
- Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
}
#ifdef EBCDIC
d = uvoffuni_to_utf8_flags(d, uv, 0);