diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-12-18 15:24:50 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-12-18 15:24:50 +0000 |
commit | 097fb8e2acde8522bd4ee4e5e00d3d2b810e2e56 (patch) | |
tree | 120af15df280bf1c52905c88cd22cd17a10fcd88 | |
parent | 8218ea5d24f5de0a37393f19671aa5631c088be7 (diff) | |
download | perl-097fb8e2acde8522bd4ee4e5e00d3d2b810e2e56.tar.gz |
Make the utf8 malformedness messages more verbose.
p4raw-id: //depot/perl@13757
-rw-r--r-- | lib/utf8.t | 2 | ||||
-rw-r--r-- | utf8.c | 25 |
2 files changed, 17 insertions, 10 deletions
diff --git a/lib/utf8.t b/lib/utf8.t index ee3c258cf5..aaa06853f1 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -159,7 +159,7 @@ plan tests => 94; use utf8; %a = ("\xE1\xA0"=>"sterling"); print 'start'; printf '%x,', ord \$_ foreach keys %a; print "end\n"; BANG - qr/^Malformed UTF-8 character \(2 bytes, need 3\).*start\d+,end$/s + qr/^Malformed UTF-8 character \(2 bytes, need 3.+\).*start\d+,end$/s ], ); foreach (@tests) { @@ -251,9 +251,11 @@ Most code should use utf8_to_uvchr() rather than call this directly. UV Perl_utf8n_to_uvuni(pTHX_ U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) { + U8 *s0 = s; UV uv = *s, ouv = 0; STRLEN len = 1; bool dowarn = ckWARN_d(WARN_UTF8); + U8 startbyte = *s; STRLEN expectlen = 0; U32 warning = 0; @@ -396,23 +398,28 @@ malformed: Perl_sv_catpvf(aTHX_ sv, "(empty string)"); break; case UTF8_WARN_CONTINUATION: - Perl_sv_catpvf(aTHX_ sv, "(unexpected continuation byte 0x%02"UVxf")", uv); + Perl_sv_catpvf(aTHX_ sv, "(unexpected continuation byte 0x%02"UVxf", with no preceding start byte)", uv); break; case UTF8_WARN_NON_CONTINUATION: - Perl_sv_catpvf(aTHX_ sv, "(unexpected non-continuation byte 0x%02"UVxf" after start byte 0x%02"UVxf")", - (UV)s[1], uv); + if (s == s0) + Perl_sv_catpvf(aTHX_ sv, "(unexpected non-continuation byte 0x%02"UVxf", immediately after start byte 0x%02"UVxf")", + (UV)s[1], startbyte); + else + Perl_sv_catpvf(aTHX_ sv, "(unexpected non-continuation byte 0x%02"UVxf", %d byte%s after start byte 0x%02"UVxf", expected %d bytes)", + (UV)s[1], s - s0, s - s0 > 1 ? "s" : "", startbyte, expectlen); + break; case UTF8_WARN_FE_FF: Perl_sv_catpvf(aTHX_ sv, "(byte 0x%02"UVxf")", uv); break; case UTF8_WARN_SHORT: - Perl_sv_catpvf(aTHX_ sv, "(%d byte%s, need %d)", - curlen, curlen == 1 ? "" : "s", expectlen); + Perl_sv_catpvf(aTHX_ sv, "(%d byte%s, need %d, after start byte 0x%02"UVxf")", + curlen, curlen == 1 ? "" : "s", expectlen, startbyte); expectlen = curlen; /* distance for caller to skip */ break; case UTF8_WARN_OVERFLOW: - Perl_sv_catpvf(aTHX_ sv, "(overflow at 0x%"UVxf", byte 0x%02x)", - ouv, *s); + Perl_sv_catpvf(aTHX_ sv, "(overflow at 0x%"UVxf", byte 0x%02x, after start byte 0x%02"UVxf")", + ouv, *s, startbyte); break; case UTF8_WARN_SURROGATE: Perl_sv_catpvf(aTHX_ sv, "(UTF-16 surrogate 0x%04"UVxf")", uv); @@ -421,8 +428,8 @@ malformed: Perl_sv_catpvf(aTHX_ sv, "(byte order mark 0x%04"UVxf")", uv); break; case UTF8_WARN_LONG: - Perl_sv_catpvf(aTHX_ sv, "(%d byte%s, need %d)", - expectlen, expectlen == 1 ? "": "s", UNISKIP(uv)); + Perl_sv_catpvf(aTHX_ sv, "(%d byte%s, need %d, after start byte 0x%02"UVxf")", + expectlen, expectlen == 1 ? "": "s", UNISKIP(uv), startbyte); break; case UTF8_WARN_FFFF: Perl_sv_catpvf(aTHX_ sv, "(character 0x%04"UVxf")", uv); |