summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-02-13 07:03:43 -0700
committerKarl Williamson <khw@cpan.org>2018-04-15 12:50:55 -0600
commit421da25c4318861925129cd1b17263289db3443c (patch)
tree1ead3b1514b85d10986f6dd32f11881148a6644f
parente144fcd2682920c543c2cbd19a9b439f888ee70c (diff)
downloadperl-421da25c4318861925129cd1b17263289db3443c.tar.gz
utf8.c: Don't dump malformation past first NUL
When a UTF-8 string contains a malformation, the bytes are dumped out as a debugging aid. One should exercise caution, however, and not dump out bytes that are actually past the end of the string. Commit 99a765e9e37 from 2016 added the capability to signal to the dumping routines that we're not sure where the string ends, and to dump the minimal possible. It occurred to me that an additional safety measure can be easily added, which this commit does. And that is, in the dumping routines to stop at the first NUL. All PVs in SVs automatically get a traiing NUL added, even if they contain embedded NULs. A NUL can never be part of a malformation, and so its presence likely signals the end of the string.
-rw-r--r--utf8.c16
1 files changed, 14 insertions, 2 deletions
diff --git a/utf8.c b/utf8.c
index e3e17ff102..540980d9dd 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1123,7 +1123,7 @@ Perl__byte_dump_string(pTHX_ const U8 * const start, const STRLEN len, const boo
PERL_STATIC_INLINE char *
S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
- /* How many bytes to print */
+ /* Max number of bytes to print */
STRLEN print_len,
/* Which one is the non-continuation */
@@ -1139,6 +1139,8 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
? "immediately"
: Perl_form(aTHX_ "%d bytes",
(int) non_cont_byte_pos);
+ const U8 * x = s + non_cont_byte_pos;
+ const U8 * e = s + print_len;
PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT;
@@ -1146,10 +1148,20 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
* calculated, it's likely faster to pass it; verify under DEBUGGING */
assert(expect_len == UTF8SKIP(s));
+ /* As a defensive coding measure, don't output anything past a NUL. Such
+ * bytes shouldn't be in the middle of a malformation, and could mark the
+ * end of the allocated string, and what comes after is undefined */
+ for (; x < e; x++) {
+ if (*x == '\0') {
+ x++; /* Output this particular NUL */
+ break;
+ }
+ }
+
return Perl_form(aTHX_ "%s: %s (unexpected non-continuation byte 0x%02x,"
" %s after start byte 0x%02x; need %d bytes, got %d)",
malformed_text,
- _byte_dump_string(s, print_len, 0),
+ _byte_dump_string(s, x - s, 0),
*(s + non_cont_byte_pos),
where,
*s,