diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-04-27 11:09:14 -0600 |
---|---|---|
committer | Ricardo Signes <rjbs@cpan.org> | 2012-05-01 19:08:57 -0400 |
commit | 2f8f112e03b73a49c60674d3b5e00b4463f1d5b7 (patch) | |
tree | abb87cf258b6151db2e57d7147562ab893ea0372 /ext/XS-APItest | |
parent | 1530a57dfaff29c214be6c42259309d263abc973 (diff) | |
download | perl-2f8f112e03b73a49c60674d3b5e00b4463f1d5b7.tar.gz |
utf8n_to_uvuni(): Fix broken malformation interactions
All code points whose UTF-8 representations start with a byte containing
either \xFE or \xFF are considered problematic because they are not
portable. There are many such code points that are too large to
represent on a 32 or even a 64 bit platform. Commit
eb83ed87110e41de6a4cd4463f75df60798a9243 failed to properly catch
overflow when the input flags to this function say to warn on, but
otherwise accept FE and FF sequences. Now overflow is checked for
unconditionally.
Diffstat (limited to 'ext/XS-APItest')
-rw-r--r-- | ext/XS-APItest/t/utf8.t | 75 |
1 files changed, 58 insertions, 17 deletions
diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t index b59fb69212..8bafd89dab 100644 --- a/ext/XS-APItest/t/utf8.t +++ b/ext/XS-APItest/t/utf8.t @@ -148,7 +148,7 @@ else { # The above overflows unless a quad platform # Now test the cases where a legal code point is generated, but may or may not # be allowed/warned on. -foreach my $test ( +my @tests = ( [ "surrogate", "\xed\xa4\x8d", $UTF8_WARN_SURROGATE, $UTF8_DISALLOW_SURROGATE, 'surrogate', 0xD90D, 3, qr/surrogate/ @@ -164,19 +164,39 @@ foreach my $test ( [ "begins with FE", "\xfe\x82\x80\x80\x80\x80\x80", # This code point is chosen so that it is representable in a UV on - # 32-bit machines, otherwise we would have to handle it like the FF - # ones + # 32-bit machines $UTF8_WARN_FE_FF, $UTF8_DISALLOW_FE_FF, 'utf8', 0x80000000, 7, qr/Code point beginning with byte .* is not Unicode, and not portable/ ], - [ "begins with FF", "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80", - $UTF8_WARN_FE_FF, $UTF8_DISALLOW_FE_FF, 'utf8', $FF_ret, 13, + [ "overflow with FE/FF", + # This tests the interaction of WARN_FE_FF/DISALLOW_FE_FF with + # overflow. The overflow malformation is never allowed, so preventing + # it takes precedence if the FE_FF options would otherwise allow in an + # overflowing value. These two code points (1 for 32-bits; 1 for 64) + # were chosen because the old overflow detection algorithm did not + # catch them; this means this test also checks for that fix. + ($has_quad) + ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" + : "\xfe\x86\x80\x80\x80\x80\x80", + $UTF8_WARN_FE_FF, $UTF8_DISALLOW_FE_FF, 'utf8', 0, + ($has_quad) ? 13 : 7, qr/Code point beginning with byte .* is not Unicode, and not portable/ ], -) { +); + +if ($has_quad) { # All FF's will overflow on 32 bit + push @tests, + [ "begins with FF", "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80", + $UTF8_WARN_FE_FF, $UTF8_DISALLOW_FE_FF, 'utf8', $FF_ret, 13, + qr/Code point beginning with byte .* is not Unicode, and not portable/ + ]; +} + +foreach my $test (@tests) { my ($testname, $bytes, $warn_flags, $disallow_flags, $category, $allowed_uv, $expected_len, $message ) = @$test; my $length = length $bytes; + my $will_overflow = $testname =~ /overflow/; # This is more complicated than the malformations tested earlier, as there # are several orthogonal variables involved. We test all the subclasses @@ -186,16 +206,19 @@ foreach my $test ( foreach my $warn_flag (0, $warn_flags) { foreach my $disallow_flag (0, $disallow_flags) { - # On 32-bit machines, anything beginning with \xff is not - # representable, and would overflow even if we were to allow - # them in this test. - next if ! $has_quad - && ! $disallow_flag - && substr($bytes, 0, 1) eq "\xff"; - no warnings 'utf8'; my $eval_warn = $warning eq 0 ? "no warnings" : "use warnings '$warning'"; - my $this_name = "$testname: " . (($disallow_flag) ? 'disallowed' : 'allowed'); + + # is effectively disallowed if will overflow, even if the flag + # indicates it is allowed, fix up test name to indicate this + # as well + my $disallowed = $disallow_flag || $will_overflow; + + my $this_name = "$testname: " . (($disallow_flag) + ? 'disallowed' + : ($disallowed) + ? 'FE_FF allowed' + : 'allowed'); $this_name .= ", $eval_warn"; $this_name .= ", " . (($warn_flag) ? 'with warning flag' : 'no warning flag'); @@ -208,7 +231,7 @@ foreach my $test ( note "\$!='$!'; eval'd=\"$eval_text\""; next; } - if ($disallow_flag) { + if ($disallowed) { is($ret_ref->[0], 0, "$this_name: Returns 0"); } else { @@ -216,7 +239,22 @@ foreach my $test ( } is($ret_ref->[1], $expected_len, "$this_name: Returns expected length"); - if ($warn_flag && ($warning eq 'utf8' || $warning eq $category)) { + if ($will_overflow && ! $disallow_flag && $warning eq 'utf8') { + + # Will get the overflow message instead of the expected + # message under these circumstances, as they would + # otherwise accept an overflowed value, which the code + # should not allow, so falls back to overflow. + if (is(scalar @warnings, 1, "$this_name: Got a single warning ")) { + like($warnings[0], qr/overflow/, "$this_name: Got overflow warning"); + } + else { + if (scalar @warnings) { + note "The warnings were: " . join(", ", @warnings); + } + } + } + elsif ($warn_flag && ($warning eq 'utf8' || $warning eq $category)) { if (is(scalar @warnings, 1, "$this_name: Got a single warning ")) { like($warnings[0], $message, "$this_name: Got expected warning"); } @@ -233,7 +271,10 @@ foreach my $test ( } } - if ($disallow_flag) { + # Check CHECK_ONLY results when the input is disallowed. Do + # this when actually disallowed, not just when the + # $disallow_flag is set + if ($disallowed) { undef @warnings; $ret_ref = test_utf8n_to_uvuni($bytes, $length, $disallow_flag|$UTF8_CHECK_ONLY); is($ret_ref->[0], 0, "$this_name, CHECK_ONLY: Returns 0"); |