diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-04-18 22:14:15 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-04-26 11:58:57 -0600 |
commit | cd7e6c884f038d4463b1c4768533b484e5c5c919 (patch) | |
tree | 39ec6bd1365e0b22b518f91f3b5b6edc3d90867d /utf8.c | |
parent | 524080c4d32ea2975130ce2ce31f3b3d508bf140 (diff) | |
download | perl-cd7e6c884f038d4463b1c4768533b484e5c5c919.tar.gz |
is_utf8_char_slow(): Avoid accepting overlongs
There are possible overlong sequences that this function blindly
accepts. Instead of developing the code to figure this out, turn this
function into a wrapper for utf8n_to_uvuni() which already has this
check.
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 38 |
1 files changed, 5 insertions, 33 deletions
@@ -277,43 +277,15 @@ five bytes or more. STATIC STRLEN S_is_utf8_char_slow(const U8 *s, const STRLEN len) { - U8 u = *s; - STRLEN slen; - UV uv, ouv; - - PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW; - - if (UTF8_IS_INVARIANT(u)) - return len == 1; + dTHX; /* The function called below requires thread context */ - if (!UTF8_IS_START(u)) - return 0; - - if (len < 2 || !UTF8_IS_CONTINUATION(s[1])) - return 0; + STRLEN actual_len; - slen = len - 1; - s++; -#ifdef EBCDIC - u = NATIVE_TO_UTF(u); -#endif - u &= UTF_START_MASK(len); - uv = u; - ouv = uv; - while (slen--) { - if (!UTF8_IS_CONTINUATION(*s)) - return 0; - uv = UTF8_ACCUMULATE(uv, *s); - if (uv < ouv) - return 0; - ouv = uv; - s++; - } + PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW; - if ((STRLEN)UNISKIP(uv) < len) - return 0; + utf8n_to_uvuni(s, len, &actual_len, UTF8_CHECK_ONLY); - return len; + return (actual_len == (STRLEN) -1) ? 0 : actual_len; } /* |