summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2005-06-03 00:52:46 +0300
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2005-06-03 08:08:25 +0000
commit3b0fc154d4e77cfb1d426144cb362eb2fa6018f1 (patch)
tree148a122517822ca9924e52a4b360d78095134af6 /utf8.c
parentb432a67249666bce4aa3385263660dc667d150d7 (diff)
downloadperl-3b0fc154d4e77cfb1d426144cb362eb2fa6018f1.tar.gz
speed up is_utf8_char()
Message-ID: <429F557E.3090007@gmail.com> p4raw-id: //depot/perl@24687
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c62
1 files changed, 35 insertions, 27 deletions
diff --git a/utf8.c b/utf8.c
index be75891a00..ecc77c009e 100644
--- a/utf8.c
+++ b/utf8.c
@@ -186,40 +186,48 @@ will be returned if it is valid, otherwise 0.
STRLEN
Perl_is_utf8_char(pTHX_ const U8 *s)
{
- U8 u = *s;
- STRLEN slen, len;
- UV uv, ouv;
+ STRLEN len;
+#ifdef IS_UTF8_CHAR
+ len = UTF8SKIP(s);
+ if (len <= 4)
+ return IS_UTF8_CHAR(s, len) ? len : 0;
+#endif /* #ifdef IS_UTF8_CHAR */
+ {
+ U8 u = *s;
+ STRLEN slen;
+ UV uv, ouv;
- if (UTF8_IS_INVARIANT(u))
- return 1;
+ if (UTF8_IS_INVARIANT(u))
+ return 1;
- if (!UTF8_IS_START(u))
- return 0;
+ if (!UTF8_IS_START(u))
+ return 0;
- len = UTF8SKIP(s);
+ len = UTF8SKIP(s);
- if (len < 2 || !UTF8_IS_CONTINUATION(s[1]))
- return 0;
+ if (len < 2 || !UTF8_IS_CONTINUATION(s[1]))
+ return 0;
- slen = len - 1;
- s++;
- u &= UTF_START_MASK(len);
- uv = u;
- ouv = uv;
- while (slen--) {
- if (!UTF8_IS_CONTINUATION(*s))
- return 0;
- uv = UTF8_ACCUMULATE(uv, *s);
- if (uv < ouv)
- return 0;
- ouv = uv;
- s++;
- }
+ slen = len - 1;
+ s++;
+ u &= UTF_START_MASK(len);
+ uv = u;
+ ouv = uv;
+ while (slen--) {
+ if (!UTF8_IS_CONTINUATION(*s))
+ return 0;
+ uv = UTF8_ACCUMULATE(uv, *s);
+ if (uv < ouv)
+ return 0;
+ ouv = uv;
+ s++;
+ }
- if ((STRLEN)UNISKIP(uv) < len)
- return 0;
+ if ((STRLEN)UNISKIP(uv) < len)
+ return 0;
- return len;
+ return len;
+ }
}
/*