summaryrefslogtreecommitdiff
path: root/t/op/utf8decode.t
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-10-15 11:02:04 -0600
committerKarl Williamson <khw@cpan.org>2016-10-16 10:47:00 -0600
commitaadb82e0ec97e7fc243dcacb81423f82c41eb512 (patch)
tree805dce8f65ff81681db19bdb7353ea9d648d88db /t/op/utf8decode.t
parent5ec712b17f589b0efc75ccd871d07947dd474a85 (diff)
downloadperl-aadb82e0ec97e7fc243dcacb81423f82c41eb512.tar.gz
PATCH: [perl #129891] t/op/utf8decode.t failing
This bug is a result of 32-bit vs 64-bit words, and is a problem in the test file and not the underlying code. The blamed commit changed things so that is a UTF-8 sequence has multiple malformations, a diagnostic is generated for each. Some of the tests in utf8decode.t overflow on 32-bit words, but not 64. The solution is to change the .t to also look for the extra overflow warnings on 32 bit machines.
Diffstat (limited to 't/op/utf8decode.t')
-rw-r--r--t/op/utf8decode.t44
1 files changed, 35 insertions, 9 deletions
diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t
index 8de9154f70..90c233aeb5 100644
--- a/t/op/utf8decode.t
+++ b/t/op/utf8decode.t
@@ -14,6 +14,8 @@ $|=1;
my $ordwide = ord($wide);
printf "# under use bytes ord(v256) = 0x%02x\n", $ordwide;
skip_all('UTF-8-centric tests (not valid for UTF-EBCDIC)') if $ordwide == 140;
+ # This could be ported to EBCDIC, but a lot of trouble.
+ # ext/XS-APItest/t/utf8.t contains comprehensive tests for both platforms
if ($ordwide != 196) {
printf "# v256 starts with 0x%02x\n", $ordwide;
@@ -22,12 +24,22 @@ $|=1;
no utf8;
+my $is64bit = length sprintf("%x", ~0) > 8;
+
foreach (<DATA>) {
if (/^(?:\d+(?:\.\d+)?)\s/ || /^#/) {
# print "# $_\n";
} elsif (my ($id, $okay, $Unicode, $byteslen, $hex, $charslen, $experr)
= /^(\d+\.\d+\.\d+[bu]?) # ID
- \s+(y|n|N-?\d+) # expect to pass or fail
+ \s+(y|n|N-?\d+(?:,\d+)?) # expect to pass or fail
+ # 'n' means expect one diagnostic
+ # 'N\d+' means expect this
+ # number of diagnostics
+ # 'N\d+,\d+' means expect the first
+ # number of diagnostics
+ # on a 32-bit system; the
+ # second number on a
+ # 64-bit one
\s+([0-9a-f]{1,8}(?:,[0-9a-f]{1,8})*|-) # Unicode characters
\s+(\d+) # number of octets
\s+([0-9a-f]{2}(?::[0-9a-f]{2})*) # octets in hex
@@ -49,10 +61,12 @@ foreach (<DATA>) {
isnt($experr, '', "Expected warning for $id provided");
warnings_like(sub {unpack 'C0U*', $octets}, [qr/$experr/],
"Only expected warning for $id");
- } elsif ($okay !~ /^N(-?\d+)/) {
+ } elsif ($okay !~ /^N-?(\d+)(?:,(\d+))?/) {
is($okay, 'n', "Confused test description for $id");
} else {
- my $expect = $1;
+ my $expect32 = $1;
+ my $expect64 = $2 // $expect32;
+ my $expect = ($is64bit) ? $expect64 : $expect32;
my @warnings;
{
@@ -63,16 +77,26 @@ foreach (<DATA>) {
unpack 'C0U*', $octets;
}
+ unless (is(scalar @warnings, $expect, "Expected number of warnings for $id seen")) {
+ note(join "", "Got:\n", @warnings);
+ }
isnt($experr, '', "Expected first warning for $id provided");
- like($warnings[0], qr/$experr/, "Expected first warning for $id seen");
+
+ my $message;
+ if ($expect64 != $expect32 && ! $is64bit) {
+ like($warnings[0], qr/overflow/, "overflow warning for $id seen");
+ shift @warnings;
+ $message = "Expected first warning after overflow for $id seen";
+ }
+ else {
+ $message = "Expected first warning for $id seen";
+ }
+ like($warnings[0], qr/$experr/, $message);
local $::TODO;
if ($expect < 0) {
$expect = -$expect;
$::TODO = "Markus Kuhn states that $expect invalid sequences should be signalled";
}
- unless (is(scalar @warnings, $expect, "Expected number of warnings for $id seen")) {
- note(join "", "Got:\n", @warnings);
- }
}
} else {
@@ -85,6 +109,8 @@ done_testing();
# This table is based on Markus Kuhn's UTF-8 Decode Stress Tester,
# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
# version dated 2015-08-28.
+#
+# See the code that parses these lines for comments as to the column meanings
__DATA__
1 Correct UTF-8
@@ -143,8 +169,8 @@ __DATA__
3.4.1 N15 - 30 c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf - unexpected non-continuation byte 0xe0, immediately after start byte 0xc0
3.5 Impossible bytes (but not with Perl's extended UTF-8)
3.5.1 n - 1 fe - 1 byte, need 7
-3.5.2 n - 1 ff - 1 byte, need 13
-3.5.3 N5 - 4 fe:fe:ff:ff - byte 0xfe
+3.5.2 N2,1 - 1 ff - 1 byte, need 13
+3.5.3 N8,5 - 4 fe:fe:ff:ff - byte 0xfe
4 Overlong sequences
4.1 Examples of an overlong ASCII character
4.1.1 n - 2 c0:af - overlong