summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-12-11 20:53:54 -0700
committerKarl Williamson <khw@cpan.org>2016-12-23 16:48:36 -0700
commit99a765e9e37afa8c2519ed155d6ce30fe0b6994c (patch)
treec44e2a960b33a3f95177b21372ae0682f05a3c7f /utf8.c
parent34aeb2e92066dd41c16797e63eb0496735b5dfe4 (diff)
downloadperl-99a765e9e37afa8c2519ed155d6ce30fe0b6994c.tar.gz
utf8.c: Add flag to indicate unsure as to end of string to print
When decoding a UTF-8 encoded string, we may have guessed as to how long it is. This adds a flag so that the base level decode routine knows that it is a guess, and it minimizes what gets printed, rather than the normal full information, so as to minimize reading past the end of the string
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c24
1 files changed, 9 insertions, 15 deletions
diff --git a/utf8.c b/utf8.c
index de7a2e68f2..5fca6f7248 100644
--- a/utf8.c
+++ b/utf8.c
@@ -817,7 +817,6 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
? "immediately"
: Perl_form(aTHX_ "%d bytes",
(int) non_cont_byte_pos);
- unsigned int i;
PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT;
@@ -825,18 +824,6 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
* calculated, it's likely faster to pass it; verify under DEBUGGING */
assert(expect_len == UTF8SKIP(s));
- /* It is possible that utf8n_to_uvchr() was called incorrectly, with a
- * length that is larger than is actually available in the buffer. If we
- * print all the bytes based on that length, we will read past the buffer
- * end. Often, the strings are NUL terminated, so to lower the chances of
- * this happening, print the malformed bytes only up through any NUL. */
- for (i = 1; i < print_len; i++) {
- if (*(s + i) == '\0') {
- print_len = i + 1; /* +1 gets the NUL printed */
- break;
- }
- }
-
return Perl_form(aTHX_ "%s: %s (unexpected non-continuation byte 0x%02x,"
" %s after start byte 0x%02x; need %d bytes, got %d)",
malformed_text,
@@ -1480,10 +1467,17 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
if (! (flags & UTF8_ALLOW_NON_CONTINUATION)) {
disallowed = TRUE;
if (ckWARN_d(WARN_UTF8) && ! (flags & UTF8_CHECK_ONLY)) {
+
+ /* If we don't know for sure that the input length is
+ * valid, avoid as much as possible reading past the
+ * end of the buffer */
+ int printlen = (flags & _UTF8_NO_CONFIDENCE_IN_CURLEN)
+ ? s - s0
+ : send - s0;
pack_warn = packWARN(WARN_UTF8);
message = Perl_form(aTHX_ "%s",
unexpected_non_continuation_text(s0,
- send - s0,
+ printlen,
s - s0,
(int) expectlen));
}
@@ -2470,7 +2464,7 @@ S_is_utf8_common(pTHX_ const U8 *const p, SV **swash,
* validating routine */
if (! isUTF8_CHAR(p, p + UTF8SKIP(p))) {
_force_out_malformed_utf8_message(p, p + UTF8SKIP(p),
- 0,
+ _UTF8_NO_CONFIDENCE_IN_CURLEN,
1 /* Die */ );
NOT_REACHED; /* NOTREACHED */
}