diff options
author | Karl Williamson <khw@cpan.org> | 2016-10-15 11:02:04 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-10-16 10:47:00 -0600 |
commit | aadb82e0ec97e7fc243dcacb81423f82c41eb512 (patch) | |
tree | 805dce8f65ff81681db19bdb7353ea9d648d88db /t/op/utf8decode.t | |
parent | 5ec712b17f589b0efc75ccd871d07947dd474a85 (diff) | |
download | perl-aadb82e0ec97e7fc243dcacb81423f82c41eb512.tar.gz |
PATCH: [perl #129891] t/op/utf8decode.t failing
This bug is a result of 32-bit vs 64-bit words, and is a problem in the
test file and not the underlying code.
The blamed commit changed things so that is a UTF-8 sequence has
multiple malformations, a diagnostic is generated for each. Some of the
tests in utf8decode.t overflow on 32-bit words, but not 64. The
solution is to change the .t to also look for the extra overflow
warnings on 32 bit machines.
Diffstat (limited to 't/op/utf8decode.t')
-rw-r--r-- | t/op/utf8decode.t | 44 |
1 files changed, 35 insertions, 9 deletions
diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t index 8de9154f70..90c233aeb5 100644 --- a/t/op/utf8decode.t +++ b/t/op/utf8decode.t @@ -14,6 +14,8 @@ $|=1; my $ordwide = ord($wide); printf "# under use bytes ord(v256) = 0x%02x\n", $ordwide; skip_all('UTF-8-centric tests (not valid for UTF-EBCDIC)') if $ordwide == 140; + # This could be ported to EBCDIC, but a lot of trouble. + # ext/XS-APItest/t/utf8.t contains comprehensive tests for both platforms if ($ordwide != 196) { printf "# v256 starts with 0x%02x\n", $ordwide; @@ -22,12 +24,22 @@ $|=1; no utf8; +my $is64bit = length sprintf("%x", ~0) > 8; + foreach (<DATA>) { if (/^(?:\d+(?:\.\d+)?)\s/ || /^#/) { # print "# $_\n"; } elsif (my ($id, $okay, $Unicode, $byteslen, $hex, $charslen, $experr) = /^(\d+\.\d+\.\d+[bu]?) # ID - \s+(y|n|N-?\d+) # expect to pass or fail + \s+(y|n|N-?\d+(?:,\d+)?) # expect to pass or fail + # 'n' means expect one diagnostic + # 'N\d+' means expect this + # number of diagnostics + # 'N\d+,\d+' means expect the first + # number of diagnostics + # on a 32-bit system; the + # second number on a + # 64-bit one \s+([0-9a-f]{1,8}(?:,[0-9a-f]{1,8})*|-) # Unicode characters \s+(\d+) # number of octets \s+([0-9a-f]{2}(?::[0-9a-f]{2})*) # octets in hex @@ -49,10 +61,12 @@ foreach (<DATA>) { isnt($experr, '', "Expected warning for $id provided"); warnings_like(sub {unpack 'C0U*', $octets}, [qr/$experr/], "Only expected warning for $id"); - } elsif ($okay !~ /^N(-?\d+)/) { + } elsif ($okay !~ /^N-?(\d+)(?:,(\d+))?/) { is($okay, 'n', "Confused test description for $id"); } else { - my $expect = $1; + my $expect32 = $1; + my $expect64 = $2 // $expect32; + my $expect = ($is64bit) ? $expect64 : $expect32; my @warnings; { @@ -63,16 +77,26 @@ foreach (<DATA>) { unpack 'C0U*', $octets; } + unless (is(scalar @warnings, $expect, "Expected number of warnings for $id seen")) { + note(join "", "Got:\n", @warnings); + } isnt($experr, '', "Expected first warning for $id provided"); - like($warnings[0], qr/$experr/, "Expected first warning for $id seen"); + + my $message; + if ($expect64 != $expect32 && ! $is64bit) { + like($warnings[0], qr/overflow/, "overflow warning for $id seen"); + shift @warnings; + $message = "Expected first warning after overflow for $id seen"; + } + else { + $message = "Expected first warning for $id seen"; + } + like($warnings[0], qr/$experr/, $message); local $::TODO; if ($expect < 0) { $expect = -$expect; $::TODO = "Markus Kuhn states that $expect invalid sequences should be signalled"; } - unless (is(scalar @warnings, $expect, "Expected number of warnings for $id seen")) { - note(join "", "Got:\n", @warnings); - } } } else { @@ -85,6 +109,8 @@ done_testing(); # This table is based on Markus Kuhn's UTF-8 Decode Stress Tester, # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt, # version dated 2015-08-28. +# +# See the code that parses these lines for comments as to the column meanings __DATA__ 1 Correct UTF-8 @@ -143,8 +169,8 @@ __DATA__ 3.4.1 N15 - 30 c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf - unexpected non-continuation byte 0xe0, immediately after start byte 0xc0 3.5 Impossible bytes (but not with Perl's extended UTF-8) 3.5.1 n - 1 fe - 1 byte, need 7 -3.5.2 n - 1 ff - 1 byte, need 13 -3.5.3 N5 - 4 fe:fe:ff:ff - byte 0xfe +3.5.2 N2,1 - 1 ff - 1 byte, need 13 +3.5.3 N8,5 - 4 fe:fe:ff:ff - byte 0xfe 4 Overlong sequences 4.1 Examples of an overlong ASCII character 4.1.1 n - 2 c0:af - overlong |