summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c62
1 files changed, 35 insertions, 27 deletions
diff --git a/utf8.c b/utf8.c
index be75891a00..ecc77c009e 100644
--- a/utf8.c
+++ b/utf8.c
@@ -186,40 +186,48 @@ will be returned if it is valid, otherwise 0.
STRLEN
Perl_is_utf8_char(pTHX_ const U8 *s)
{
- U8 u = *s;
- STRLEN slen, len;
- UV uv, ouv;
+ STRLEN len;
+#ifdef IS_UTF8_CHAR
+ len = UTF8SKIP(s);
+ if (len <= 4)
+ return IS_UTF8_CHAR(s, len) ? len : 0;
+#endif /* #ifdef IS_UTF8_CHAR */
+ {
+ U8 u = *s;
+ STRLEN slen;
+ UV uv, ouv;
- if (UTF8_IS_INVARIANT(u))
- return 1;
+ if (UTF8_IS_INVARIANT(u))
+ return 1;
- if (!UTF8_IS_START(u))
- return 0;
+ if (!UTF8_IS_START(u))
+ return 0;
- len = UTF8SKIP(s);
+ len = UTF8SKIP(s);
- if (len < 2 || !UTF8_IS_CONTINUATION(s[1]))
- return 0;
+ if (len < 2 || !UTF8_IS_CONTINUATION(s[1]))
+ return 0;
- slen = len - 1;
- s++;
- u &= UTF_START_MASK(len);
- uv = u;
- ouv = uv;
- while (slen--) {
- if (!UTF8_IS_CONTINUATION(*s))
- return 0;
- uv = UTF8_ACCUMULATE(uv, *s);
- if (uv < ouv)
- return 0;
- ouv = uv;
- s++;
- }
+ slen = len - 1;
+ s++;
+ u &= UTF_START_MASK(len);
+ uv = u;
+ ouv = uv;
+ while (slen--) {
+ if (!UTF8_IS_CONTINUATION(*s))
+ return 0;
+ uv = UTF8_ACCUMULATE(uv, *s);
+ if (uv < ouv)
+ return 0;
+ ouv = uv;
+ s++;
+ }
- if ((STRLEN)UNISKIP(uv) < len)
- return 0;
+ if ((STRLEN)UNISKIP(uv) < len)
+ return 0;
- return len;
+ return len;
+ }
}
/*