diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2005-06-03 00:52:46 +0300 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2005-06-03 08:08:25 +0000 |
commit | 3b0fc154d4e77cfb1d426144cb362eb2fa6018f1 (patch) | |
tree | 148a122517822ca9924e52a4b360d78095134af6 /utf8.c | |
parent | b432a67249666bce4aa3385263660dc667d150d7 (diff) | |
download | perl-3b0fc154d4e77cfb1d426144cb362eb2fa6018f1.tar.gz |
speed up is_utf8_char()
Message-ID: <429F557E.3090007@gmail.com>
p4raw-id: //depot/perl@24687
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 62 |
1 files changed, 35 insertions, 27 deletions
@@ -186,40 +186,48 @@ will be returned if it is valid, otherwise 0. STRLEN Perl_is_utf8_char(pTHX_ const U8 *s) { - U8 u = *s; - STRLEN slen, len; - UV uv, ouv; + STRLEN len; +#ifdef IS_UTF8_CHAR + len = UTF8SKIP(s); + if (len <= 4) + return IS_UTF8_CHAR(s, len) ? len : 0; +#endif /* #ifdef IS_UTF8_CHAR */ + { + U8 u = *s; + STRLEN slen; + UV uv, ouv; - if (UTF8_IS_INVARIANT(u)) - return 1; + if (UTF8_IS_INVARIANT(u)) + return 1; - if (!UTF8_IS_START(u)) - return 0; + if (!UTF8_IS_START(u)) + return 0; - len = UTF8SKIP(s); + len = UTF8SKIP(s); - if (len < 2 || !UTF8_IS_CONTINUATION(s[1])) - return 0; + if (len < 2 || !UTF8_IS_CONTINUATION(s[1])) + return 0; - slen = len - 1; - s++; - u &= UTF_START_MASK(len); - uv = u; - ouv = uv; - while (slen--) { - if (!UTF8_IS_CONTINUATION(*s)) - return 0; - uv = UTF8_ACCUMULATE(uv, *s); - if (uv < ouv) - return 0; - ouv = uv; - s++; - } + slen = len - 1; + s++; + u &= UTF_START_MASK(len); + uv = u; + ouv = uv; + while (slen--) { + if (!UTF8_IS_CONTINUATION(*s)) + return 0; + uv = UTF8_ACCUMULATE(uv, *s); + if (uv < ouv) + return 0; + ouv = uv; + s++; + } - if ((STRLEN)UNISKIP(uv) < len) - return 0; + if ((STRLEN)UNISKIP(uv) < len) + return 0; - return len; + return len; + } } /* |