summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-04-18 22:14:15 -0600
committerKarl Williamson <public@khwilliamson.com>2012-04-26 11:58:57 -0600
commitcd7e6c884f038d4463b1c4768533b484e5c5c919 (patch)
tree39ec6bd1365e0b22b518f91f3b5b6edc3d90867d /utf8.c
parent524080c4d32ea2975130ce2ce31f3b3d508bf140 (diff)
downloadperl-cd7e6c884f038d4463b1c4768533b484e5c5c919.tar.gz
is_utf8_char_slow(): Avoid accepting overlongs
There are possible overlong sequences that this function blindly accepts. Instead of developing the code to figure this out, turn this function into a wrapper for utf8n_to_uvuni() which already has this check.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c38
1 files changed, 5 insertions, 33 deletions
diff --git a/utf8.c b/utf8.c
index 04e8b97737..c01ea4b5b9 100644
--- a/utf8.c
+++ b/utf8.c
@@ -277,43 +277,15 @@ five bytes or more.
STATIC STRLEN
S_is_utf8_char_slow(const U8 *s, const STRLEN len)
{
- U8 u = *s;
- STRLEN slen;
- UV uv, ouv;
-
- PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW;
-
- if (UTF8_IS_INVARIANT(u))
- return len == 1;
+ dTHX; /* The function called below requires thread context */
- if (!UTF8_IS_START(u))
- return 0;
-
- if (len < 2 || !UTF8_IS_CONTINUATION(s[1]))
- return 0;
+ STRLEN actual_len;
- slen = len - 1;
- s++;
-#ifdef EBCDIC
- u = NATIVE_TO_UTF(u);
-#endif
- u &= UTF_START_MASK(len);
- uv = u;
- ouv = uv;
- while (slen--) {
- if (!UTF8_IS_CONTINUATION(*s))
- return 0;
- uv = UTF8_ACCUMULATE(uv, *s);
- if (uv < ouv)
- return 0;
- ouv = uv;
- s++;
- }
+ PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW;
- if ((STRLEN)UNISKIP(uv) < len)
- return 0;
+ utf8n_to_uvuni(s, len, &actual_len, UTF8_CHECK_ONLY);
- return len;
+ return (actual_len == (STRLEN) -1) ? 0 : actual_len;
}
/*