diff options
author | Karl Williamson <khw@cpan.org> | 2015-02-17 15:25:21 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2015-02-18 14:03:23 -0700 |
commit | e23c50db6337fb5f27e78e4d7e72f052a886113d (patch) | |
tree | e37faf9cafae1214269f0d29513636589bf13ad6 /utf8.c | |
parent | ce4fe27b699be446d76ea7ae21b2dce87c97165d (diff) | |
download | perl-e23c50db6337fb5f27e78e4d7e72f052a886113d.tar.gz |
utf8.c: Slight refactor of UTF-16 code
This eliminates a branch in the usual case, at the expense of an extra
one in the rarer case, which allows us to collapse some error condition
code. It sprinkles some UNLIKELYs.
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 23 |
1 files changed, 15 insertions, 8 deletions
@@ -1278,19 +1278,26 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen) #define LAST_HIGH_SURROGATE 0xDBFF #define FIRST_LOW_SURROGATE 0xDC00 #define LAST_LOW_SURROGATE UNICODE_SURROGATE_LAST - if (uv >= FIRST_HIGH_SURROGATE && uv <= LAST_HIGH_SURROGATE) { - if (p >= pend) { - Perl_croak(aTHX_ "Malformed UTF-16 surrogate"); - } else { + + /* This assumes that most uses will be in the first Unicode plane, not + * needing surrogates */ + if (UNLIKELY(uv >= UNICODE_SURROGATE_FIRST + && uv <= UNICODE_SURROGATE_LAST)) + { + if (UNLIKELY(p >= pend) || UNLIKELY(uv > LAST_HIGH_SURROGATE)) { + Perl_croak(aTHX_ "Malformed UTF-16 surrogate"); + } + else { UV low = (p[0] << 8) + p[1]; - p += 2; - if (low < FIRST_LOW_SURROGATE || low > LAST_LOW_SURROGATE) + if ( UNLIKELY(low < FIRST_LOW_SURROGATE) + || UNLIKELY(low > LAST_LOW_SURROGATE)) + { Perl_croak(aTHX_ "Malformed UTF-16 surrogate"); + } + p += 2; uv = ((uv - FIRST_HIGH_SURROGATE) << 10) + (low - FIRST_LOW_SURROGATE) + 0x10000; } - } else if (uv >= FIRST_LOW_SURROGATE && uv <= LAST_LOW_SURROGATE) { - Perl_croak(aTHX_ "Malformed UTF-16 surrogate"); } #ifdef EBCDIC d = uvoffuni_to_utf8_flags(d, uv, 0); |