diff options
author | Karl Williamson <khw@cpan.org> | 2016-10-19 21:20:48 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-10-19 21:32:13 -0600 |
commit | 3cc6a05eedade6f51526feb18c12356b0589d77a (patch) | |
tree | fd4a087fd5513b9149a6f3bfe1627c75e7e41737 | |
parent | 14c482b0d8b7402f1b2b28d2918a55c83577d6ff (diff) | |
download | perl-3cc6a05eedade6f51526feb18c12356b0589d77a.tar.gz |
utf8n_to_uvchr(): Reduce chances of reading beyond buffer
utf8n_to_uvchr() can be called incorrectly, leading it to believe the
buffer is longer than it actually is. But often, it will be called with
NUL terminated strings, so it can reduce it's chances of being fooled by
refusing to read beyond a NUL. The NUL will terminate any UTF-8 byte
sequence, and the only reason to read beyond it would be to print all
the expected bytes in the sequence.
This commit is not the final word, but it is an easy fix for a common
case.
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | utf8.c | 15 |
3 files changed, 16 insertions, 3 deletions
@@ -1685,7 +1685,7 @@ inRP |bool |does_utf8_overflow|NN const U8 * const s|NN const U8 * e inRP |bool |is_utf8_overlong_given_start_byte_ok|NN const U8 * const s|const STRLEN len sMR |char * |unexpected_non_continuation_text \ |NN const U8 * const s \ - |const STRLEN print_len \ + |STRLEN print_len \ |const STRLEN non_cont_byte_pos \ |const STRLEN expect_len sM |char * |_byte_dump_string|NN const U8 * s|const STRLEN len @@ -5635,7 +5635,7 @@ STATIC SV* S_swatch_get(pTHX_ SV* swash, UV start, UV span) STATIC U8 S_to_lower_latin1(const U8 c, U8 *p, STRLEN *lenp) __attribute__warn_unused_result__; -STATIC char * S_unexpected_non_continuation_text(pTHX_ const U8 * const s, const STRLEN print_len, const STRLEN non_cont_byte_pos, const STRLEN expect_len) +STATIC char * S_unexpected_non_continuation_text(pTHX_ const U8 * const s, STRLEN print_len, const STRLEN non_cont_byte_pos, const STRLEN expect_len) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT \ assert(s) @@ -735,7 +735,7 @@ PERL_STATIC_INLINE char * S_unexpected_non_continuation_text(pTHX_ const U8 * const s, /* How many bytes to print */ - const STRLEN print_len, + STRLEN print_len, /* Which one is the non-continuation */ const STRLEN non_cont_byte_pos, @@ -750,6 +750,7 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s, ? "immediately" : Perl_form(aTHX_ "%d bytes", (int) non_cont_byte_pos); + unsigned int i; PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT; @@ -757,6 +758,18 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s, * calculated, it's likely faster to pass it; verify under DEBUGGING */ assert(expect_len == UTF8SKIP(s)); + /* It is possible that utf8n_to_uvchr() was called incorrectly, with a + * length that is larger than is actually available in the buffer. If we + * print all the bytes based on that length, we will read past the buffer + * end. Often, the strings are NUL terminated, so to lower the chances of + * this happening, print the malformed bytes only up through any NUL. */ + for (i = 1; i < print_len; i++) { + if (*(s + i) == '\0') { + print_len = i + 1; /* +1 gets the NUL printed */ + break; + } + } + return Perl_form(aTHX_ "%s: %s (unexpected non-continuation byte 0x%02x," " %s after start byte 0x%02x; need %d bytes, got %d)", malformed_text, |