diff options
-rw-r--r-- | ext/XS-APItest/t/utf8.t | 3 | ||||
-rw-r--r-- | ext/XS-APItest/t/utf8_warn_base.pl | 124 | ||||
-rw-r--r-- | t/comp/parser.t | 2 | ||||
-rw-r--r-- | t/lib/warnings/utf8 | 7 | ||||
-rw-r--r-- | t/op/chop.t | 20 | ||||
-rw-r--r-- | t/op/index.t | 11 | ||||
-rw-r--r-- | t/opbasic/qq.t | 7 | ||||
-rw-r--r-- | t/re/pat_advanced.t | 3 | ||||
-rw-r--r-- | t/uni/parser.t | 3 |
9 files changed, 79 insertions, 101 deletions
diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t index 788d564188..95e2628f57 100644 --- a/ext/XS-APItest/t/utf8.t +++ b/ext/XS-APItest/t/utf8.t @@ -869,6 +869,9 @@ my $p = (isASCII) ? "\xe1\x80" : I8_to_native("\xE4\xA0"); # partial for my $restriction (sort keys %restriction_types) { use bytes; + next if $restriction eq 'fits_in_31_bits' + && !defined $restriction_types{"fits_in_31_bits"}{'first_invalid_offset'}; + for my $use_flags ("", "_flags") { # For each restriction, we test it in both the is_foo_flags functions diff --git a/ext/XS-APItest/t/utf8_warn_base.pl b/ext/XS-APItest/t/utf8_warn_base.pl index 66f6f3df6a..a3f4052b55 100644 --- a/ext/XS-APItest/t/utf8_warn_base.pl +++ b/ext/XS-APItest/t/utf8_warn_base.pl @@ -377,70 +377,6 @@ my @tests = ( (isASCII) ? 4 : 5, qr/Unicode non-character.*is not recommended for open interchange/ ], - [ "requires at least 32 bits", - (isASCII) - ? "\xfe\x82\x80\x80\x80\x80\x80" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), - # This code point is chosen so that it is representable in a UV on - # 32-bit machines - $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, - $::UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x80000000, - (isASCII) ? 7 : $::max_bytes, - (isASCII) ? 1 : 8, - nonportable_regex(0x80000000) - ], - [ "highest 32 bit code point", - (isASCII) - ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), - $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, - $::UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0xFFFFFFFF, - (isASCII) ? 7 : $::max_bytes, - (isASCII) ? 1 : 8, - nonportable_regex(0xffffffff) - ], - [ "requires at least 32 bits, and use SUPER-type flags, instead of" - . " ABOVE_31_BIT", - (isASCII) - ? "\xfe\x82\x80\x80\x80\x80\x80" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), - $::UTF8_WARN_SUPER, $::UTF8_DISALLOW_SUPER, $::UTF8_GOT_SUPER, - 'utf8', 0x80000000, - (isASCII) ? 7 : $::max_bytes, - 1, - nonportable_regex(0x80000000) - ], - [ "overflow with warnings/disallow for more than 31 bits", - # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT - # with overflow. The overflow malformation is never allowed, so - # preventing it takes precedence if the ABOVE_31_BIT options would - # otherwise allow in an overflowing value. The ASCII code points (1 - # for 32-bits; 1 for 64) were chosen because the old overflow - # detection algorithm did not catch them; this means this test also - # checks for that fix. The EBCDIC are arbitrary overflowing ones - # since we have no reports of failures with it. - (($::is64bit) - ? ((isASCII) - ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" - : I8_to_native( - "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")) - : ((isASCII) - ? "\xfe\x86\x80\x80\x80\x80\x80" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))), - $::UTF8_WARN_ABOVE_31_BIT, - $::UTF8_DISALLOW_ABOVE_31_BIT, - $::UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0, - (! isASCII || $::is64bit) ? $::max_bytes : 7, - (isASCII || $::is64bit) ? 2 : 8, - qr/overflows/ - ], ); if (! $::is64bit) { @@ -471,6 +407,66 @@ else { $::max_bytes, (isASCII) ? 1 : 7, qr/and( is)? not portable/ ]; + [ "requires at least 32 bits", + (isASCII) + ? "\xfe\x82\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), + # This code point is chosen so that it is representable in a UV on + # 32-bit machines + $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0x80000000, + (isASCII) ? 7 : $::max_bytes, + (isASCII) ? 1 : 8, + nonportable_regex(0x80000000) + ], + [ "highest 32 bit code point", + (isASCII) + ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), + $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0xFFFFFFFF, + (isASCII) ? 7 : $::max_bytes, + (isASCII) ? 1 : 8, + nonportable_regex(0xffffffff) + ], + [ "requires at least 32 bits, and use SUPER-type flags, instead of" + . " ABOVE_31_BIT", + (isASCII) + ? "\xfe\x82\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), + $::UTF8_WARN_SUPER, $::UTF8_DISALLOW_SUPER, $::UTF8_GOT_SUPER, + 'utf8', 0x80000000, + (isASCII) ? 7 : $::max_bytes, + 1, + nonportable_regex(0x80000000) + ], + [ "overflow with warnings/disallow for more than 31 bits", + # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT + # with overflow. The overflow malformation is never allowed, so + # preventing it takes precedence if the ABOVE_31_BIT options would + # otherwise allow in an overflowing value. The ASCII code points (1 + # for 32-bits; 1 for 64) were chosen because the old overflow + # detection algorithm did not catch them; this means this test also + # checks for that fix. The EBCDIC are arbitrary overflowing ones + # since we have no reports of failures with it. + ((isASCII) + ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" + : I8_to_native( + "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")), + $::UTF8_WARN_ABOVE_31_BIT, + $::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0, + (! isASCII || $::is64bit) ? $::max_bytes : 7, + (isASCII || $::is64bit) ? 2 : 8, + qr/overflows/ + ]; + if (! isASCII) { push @tests, # These could falsely show wrongly in a naive # implementation diff --git a/t/comp/parser.t b/t/comp/parser.t index 6fd5ad0aa0..9b0f3a710a 100644 --- a/t/comp/parser.t +++ b/t/comp/parser.t @@ -586,7 +586,7 @@ is $@, "", 'substr keys assignment'; { no warnings; - eval "q" . chr(100000000064); + eval "q" . chr(0x7fffffff); like $@, qr/Can't find string terminator "." anywhere before EOF/, 'RT 128952'; } diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8 index a10174a633..a4dfb12698 100644 --- a/t/lib/warnings/utf8 +++ b/t/lib/warnings/utf8 @@ -99,12 +99,11 @@ Operation "uc" returns its argument for non-Unicode code point 0x110000 at - lin Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5. ######## use warnings 'utf8'; -no warnings 'deprecated'; # This is above IV_MAX on 32 bit machines -my $big_nonUnicode = uc(chr(0x8000_0000)); +my $big_nonUnicode = uc(chr(0x7fff_ffff)); no warnings 'non_unicode'; -my $big_nonUnicode = uc(chr(0x8000_0000)); +my $big_nonUnicode = uc(chr(0x7fff_ffff)); EXPECT -Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 3. +Operation "uc" returns its argument for non-Unicode code point 0x7FFFFFFF at - line 2. ######## use warnings 'utf8'; my $d7ff = lc pack("U", 0xD7FF); diff --git a/t/op/chop.t b/t/op/chop.t index f12332ae7c..8afc546113 100644 --- a/t/op/chop.t +++ b/t/op/chop.t @@ -7,7 +7,7 @@ BEGIN { require './charset_tools.pl'; } -my $tests_count = 148; +my $tests_count = 146; plan tests => $tests_count; $_ = 'abc'; @@ -253,22 +253,10 @@ foreach my $start (@chars) { # [perl #73246] chop doesn't support utf8 # the problem was UTF8_IS_START() didn't handle perl's extended UTF8 - no warnings 'deprecated'; # This is above IV_MAX on 32 bit machines - my $utf = "\x{80000001}\x{80000000}"; + my $utf = "\x{7fffffff}\x{7ffffffe}"; my $result = chop($utf); - is($utf, "\x{80000001}", "chopping high 'unicode'- remnant"); - is($result, "\x{80000000}", "chopping high 'unicode' - result"); - - SKIP: { - no warnings 'overflow'; # avoid compile-time warnings below on 32-bit architectures - use Config; - $Config{ivsize} >= 8 - or skip("this build can't handle very large characters", 2); - my $utf = "\x{7fffffffffffffff}\x{7ffffffffffffffe}"; - my $result = chop $utf; - is($utf, "\x{7fffffffffffffff}", "chop even higher 'unicode' - remnant"); - is($result, "\x{7ffffffffffffffe}", "chop even higher 'unicode' - result"); - } + is($utf, "\x{7fffffff}", "chopping high 'unicode'- remnant"); + is($result, "\x{7ffffffe}", "chopping high 'unicode' - result"); } $/ = "\n"; diff --git a/t/op/index.t b/t/op/index.t index d1e46dc648..f043ef81e2 100644 --- a/t/op/index.t +++ b/t/op/index.t @@ -130,17 +130,16 @@ is(rindex($a, "foo", ), 0); } { - no warnings 'deprecated'; # These are above IV_MAX on 32 bit machines - my $a = eval q{"\x{80000000}"}; + my $a = eval q{"\x{7fffffff}"}; my $s = $a.'defxyz'; - is(index($s, 'def'), 1, "0x80000000 is a single character"); + is(index($s, 'def'), 1, "0x7fffffff is a single character"); - my $b = eval q{"\x{fffffffd}"}; + my $b = eval q{"\x{7ffffffd}"}; my $t = $b.'pqrxyz'; - is(index($t, 'pqr'), 1, "0xfffffffd is a single character"); + is(index($t, 'pqr'), 1, "0x7ffffffd is a single character"); local ${^UTF8CACHE} = -1; - is(index($t, 'xyz'), 4, "0xfffffffd and utf8cache"); + is(index($t, 'xyz'), 4, "0x7ffffffd and utf8cache"); } diff --git a/t/opbasic/qq.t b/t/opbasic/qq.t index 5d6908cef1..e633783df2 100644 --- a/t/opbasic/qq.t +++ b/t/opbasic/qq.t @@ -8,7 +8,7 @@ BEGIN { # This file uses a specially crafted is() function rather than that found in # t/test.pl or Test::More. Hence, we place this file in directory t/opbasic. -print q(1..29 +print q(1..28 ); # This is() function is written to avoid "" @@ -71,11 +71,6 @@ is ("a\o{120}b", "a" . chr(0x50) . "b"); is ("a\o{400}b", "a" . chr(0x100) . "b"); is ("a\o{1000}b", "a" . chr(0x200) . "b"); -# This caused a memory fault -no warnings "utf8"; -no warnings 'deprecated'; # This is above IV_MAX on 32 bit machines -is ("abc", eval qq[qq\x{8000_0000}abc\x{8000_0000}]); - # Maybe \x{} should be an error, but if not it should certainly mean \x{0} # rather than anything else. is ("\x{}", chr(0)); diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index f2d9c74da9..a2ff68169b 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -2402,8 +2402,7 @@ EOF $Config{uvsize} == 8 or skip("need large code-points for this test", 1); - # This is above IV_MAX on 32 bit machines, so turn off those warnings - fresh_perl_is('no warnings "deprecated"; /\x{E000000000}|/ and print qq(ok\n)', "ok\n", {}, + fresh_perl_is('/\x{E000000000}|/ and print qq(ok\n)', "ok\n", {}, "buffer overflow in TRIE_STORE_REVCHAR"); } diff --git a/t/uni/parser.t b/t/uni/parser.t index 2c68fb0473..c5cf21c7ca 100644 --- a/t/uni/parser.t +++ b/t/uni/parser.t @@ -237,9 +237,8 @@ SKIP: { # [perl #128738] skip("test is only valid on 64-bit ints", 2); } else { - no warnings 'deprecated'; my $a; - eval "\$a = q \x{ffffffff}Hello, \\\\whirled!\x{ffffffff}"; + eval "\$a = q \x{7fffffff}Hello, \\\\whirled!\x{7fffffff}"; is $@, "", "No errors in eval'ing a string with large code point delimiter"; is $a, 'Hello, \whirled!', |