diff options
Diffstat (limited to 't/pragma/utf8.t')
-rwxr-xr-x | t/pragma/utf8.t | 407 |
1 files changed, 245 insertions, 162 deletions
diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t index 6986720aab..89416dcfab 100755 --- a/t/pragma/utf8.t +++ b/t/pragma/utf8.t @@ -10,7 +10,7 @@ BEGIN { } } -print "1..90\n"; +print "1..104\n"; my $test = 1; @@ -42,6 +42,7 @@ sub nok_bytes { { use utf8; + $_ = ">\x{263A}<"; s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; ok $_, '>☺<'; @@ -106,212 +107,191 @@ sub nok_bytes { } { - use utf8; - - $_ = "\x{263A}>\x{263A}\x{263A}"; - - ok length, 4; - $test++; # 13 - - ok length((m/>(.)/)[0]), 1; - $test++; # 14 - - ok length($&), 2; - $test++; # 15 + # no use utf8 needed + $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}"; + + ok length($_), 6; # 13 + $test++; - ok length($'), 1; - $test++; # 16 + ($a) = m/x(.)/; - ok length($`), 1; - $test++; # 17 + ok length($a), 1; # 14 + $test++; - ok length($1), 1; - $test++; # 18 + ok length($`), 2; # 15 + $test++; + ok length($&), 2; # 16 + $test++; + ok length($'), 2; # 17 + $test++; - ok length($tmp=$&), 2; - $test++; # 19 + ok length($1), 1; # 18 + $test++; - ok length($tmp=$'), 1; - $test++; # 20 + ok length($b=$`), 2; # 19 + $test++; - ok length($tmp=$`), 1; - $test++; # 21 + ok length($b=$&), 2; # 20 + $test++; - ok length($tmp=$1), 1; - $test++; # 22 + ok length($b=$'), 2; # 21 + $test++; - { - use bytes; + ok length($b=$1), 1; # 22 + $test++; - my $tmp = $&; - ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272); - $test++; # 23 + ok $a, "\x{263A}"; # 23 + $test++; - $tmp = $'; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 24 + ok $`, "\x{263A}\x{263A}"; # 24 + $test++; - $tmp = $`; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 25 + ok $&, "x\x{263A}"; # 25 + $test++; - $tmp = $1; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 26 - } + ok $', "y\x{263A}"; # 26 + $test++; - ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272); - $test++; # 27 + ok $1, "\x{263A}"; # 27 + $test++; - ok_bytes $', pack("C*", 0342, 0230, 0272); - $test++; # 28 + ok_bytes $a, "\342\230\272"; # 28 + $test++; - ok_bytes $`, pack("C*", 0342, 0230, 0272); - $test++; # 29 + ok_bytes $1, "\342\230\272"; # 29 + $test++; - ok_bytes $1, pack("C*", 0342, 0230, 0272); - $test++; # 30 + ok_bytes $&, "x\342\230\272"; # 30 + $test++; { - use bytes; - no utf8; - - ok length, 10; - $test++; # 31 + use utf8; # required + $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A); + } - ok length((m/>(.)/)[0]), 1; - $test++; # 32 + ok length($_), 6; # 31 + $test++; - ok length($&), 2; - $test++; # 33 + ($a) = m/x(.)/; - ok length($'), 5; - $test++; # 34 + ok length($a), 1; # 32 + $test++; - ok length($`), 3; - $test++; # 35 + ok length($`), 2; # 33 + $test++; - ok length($1), 1; - $test++; # 36 + ok length($&), 2; # 34 + $test++; - ok $&, pack("C*", ord(">"), 0342); - $test++; # 37 + ok length($'), 2; # 35 + $test++; - ok $', pack("C*", 0230, 0272, 0342, 0230, 0272); - $test++; # 38 + ok length($1), 1; # 36 + $test++; - ok $`, pack("C*", 0342, 0230, 0272); - $test++; # 39 + ok length($b=$`), 2; # 37 + $test++; - ok $1, pack("C*", 0342); - $test++; # 40 - } + ok length($b=$&), 2; # 38 + $test++; - { - no utf8; - $_="\342\230\272>\342\230\272\342\230\272"; - } + ok length($b=$'), 2; # 39 + $test++; - ok length, 10; - $test++; # 41 + ok length($b=$1), 1; # 40 + $test++; - ok length((m/>(.)/)[0]), 1; - $test++; # 42 + ok $a, "\x{263A}"; # 41 + $test++; - ok length($&), 2; - $test++; # 43 + ok $`, "\x{263A}\x{263A}"; # 42 + $test++; - ok length($'), 1; - $test++; # 44 + ok $&, "x\x{263A}"; # 43 + $test++; - ok length($`), 1; - $test++; # 45 + ok $', "y\x{263A}"; # 44 + $test++; - ok length($1), 1; - $test++; # 46 + ok $1, "\x{263A}"; # 45 + $test++; - ok length($tmp=$&), 2; - $test++; # 47 + ok_bytes $a, "\342\230\272"; # 46 + $test++; - ok length($tmp=$'), 1; - $test++; # 48 + ok_bytes $1, "\342\230\272"; # 47 + $test++; - ok length($tmp=$`), 1; - $test++; # 49 + ok_bytes $&, "x\342\230\272"; # 48 + $test++; - ok length($tmp=$1), 1; - $test++; # 50 + $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272"; - { - use bytes; + ok length($_), 14; # 49 + $test++; - my $tmp = $&; - ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272); - $test++; # 51 + ($a) = m/x(.)/; - $tmp = $'; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 52 + ok length($a), 1; # 50 + $test++; - $tmp = $`; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 53 + ok length($`), 6; # 51 + $test++; - $tmp = $1; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 54 - } + ok length($&), 2; # 52 + $test++; - { - use bytes; - no utf8; + ok length($'), 6; # 53 + $test++; - ok length, 10; - $test++; # 55 + ok length($1), 1; # 54 + $test++; - ok length((m/>(.)/)[0]), 1; - $test++; # 56 + ok length($b=$`), 6; # 55 + $test++; - ok length($&), 2; - $test++; # 57 + ok length($b=$&), 2; # 56 + $test++; - ok length($'), 5; - $test++; # 58 + ok length($b=$'), 6; # 57 + $test++; - ok length($`), 3; - $test++; # 59 + ok length($b=$1), 1; # 58 + $test++; - ok length($1), 1; - $test++; # 60 + ok $a, "\342"; # 59 + $test++; - ok $&, pack("C*", ord(">"), 0342); - $test++; # 61 + ok $`, "\342\230\272\342\230\272"; # 60 + $test++; - ok $', pack("C*", 0230, 0272, 0342, 0230, 0272); - $test++; # 62 + ok $&, "x\342"; # 61 + $test++; - ok $`, pack("C*", 0342, 0230, 0272); - $test++; # 63 + ok $', "\230\272y\342\230\272"; # 62 + $test++; - ok $1, pack("C*", 0342); - $test++; # 64 - } + ok $1, "\342"; # 63 + $test++; +} +{ + use utf8; ok "\x{ab}" =~ /^\x{ab}$/, 1; - $test++; # 65 + $test++; # 64 } { use utf8; ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2); - $test++; # 66 + $test++; # 65 } { use utf8; my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); ok "@a", "1234 123 2345"; - $test++; # 67 + $test++; # 66 } { @@ -319,7 +299,7 @@ sub nok_bytes { my $x = chr(123); my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345))); ok "@a", "1234 2345"; - $test++; # 68 + $test++; # 67 } { @@ -331,10 +311,10 @@ sub nok_bytes { { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8 print "not " if $a eq $b; - print "ok $test\n"; $test++; + print "ok $test\n"; $test++; # 68 { use utf8; print "not " if $a eq $b; } - print "ok $test\n"; $test++; + print "ok $test\n"; $test++; # 69 } { @@ -344,7 +324,7 @@ sub nok_bytes { for (@x) { s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; my($latin) = /^(.+)(?:\s+\d)/; - print $latin eq "stra\337e" ? "ok $test\n" : + print $latin eq "stra\337e" ? "ok $test\n" : # 70, 71 "#latin[$latin]\nnot ok $test\n"; $test++; $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a @@ -369,7 +349,7 @@ sub nok_bytes { } print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B"; - print "ok $test\n"; + print "ok $test\n"; # 72 $test++; } @@ -384,27 +364,27 @@ sub nok_bytes { print "not " unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a; print "ok $test\n"; - $test++; + $test++; # 73 my ($a, $b) = split(/\x{100}/, $s); print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"; print "ok $test\n"; - $test++; + $test++; # 74 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20"; print "ok $test\n"; - $test++; + $test++; # 75 my ($a, $b) = split(/\x40\x{80}/, $s); print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"; print "ok $test\n"; - $test++; + $test++; # 76 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"; print "ok $test\n"; - $test++; + $test++; # 77 } { @@ -414,14 +394,14 @@ sub nok_bytes { my $smiley = "\x{263a}"; - for my $s ("\x{263a}", # 1 - $smiley, # 2 + for my $s ("\x{263a}", # 78 + $smiley, # 79 - "" . $smiley, # 3 - "" . "\x{263a}", # 4 + "" . $smiley, # 80 + "" . "\x{263a}", # 81 - $smiley . "", # 5 - "\x{263a}" . "", # 6 + $smiley . "", # 82 + "\x{263a}" . "", # 83 ) { my $length_chars = length($s); my $length_bytes; @@ -437,14 +417,14 @@ sub nok_bytes { $test++; } - for my $s ("\x{263a}" . "\x{263a}", # 7 - $smiley . $smiley, # 8 + for my $s ("\x{263a}" . "\x{263a}", # 84 + $smiley . $smiley, # 85 - "\x{263a}\x{263a}", # 9 - "$smiley$smiley", # 10 + "\x{263a}\x{263a}", # 86 + "$smiley$smiley", # 87 - "\x{263a}" x 2, # 11 - $smiley x 2, # 12 + "\x{263a}" x 2, # 88 + $smiley x 2, # 89 ) { my $length_chars = length($s); my $length_bytes; @@ -460,3 +440,106 @@ sub nok_bytes { $test++; } } + +{ + use utf8; + + print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; + print "ok $test\n"; + $test++; # 90 + + print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; + print "ok $test\n"; + $test++; # 91 + + print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; + print "ok $test\n"; + $test++; # 92 + + print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; + print "ok $test\n"; + $test++; # 93 + + print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; + print "ok $test\n"; + $test++; # 94 + + print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; + print "ok $test\n"; + $test++; # 95 + + print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; + print "ok $test\n"; + $test++; # 96 + + print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; + print "ok $test\n"; + $test++; # 97 +} + +{ + # the first half of 20001028.003 + + my $X = chr(1448); + my ($Y) = $X =~ /(.*)/; + print "not " unless length $Y == 1; + print "ok $test\n"; + $test++; # 98 +} + +{ + # 20001108.001 + + use utf8; + my $X = "Szab\x{f3},Bal\x{e1}zs"; + my $Y = $X; + $Y =~ s/(B)/$1/ for 0..3; + print "not " unless $Y eq $X; + print "ok $test\n"; + $test++; # 99 +} + +{ + # 20001114.001 + + use utf8; + use charnames ':full'; + my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}"; + print "not " unless ord($text) == 0xc4; + print "ok $test\n"; + $test++; # 100 +} + +{ + # 20001205.014 + + use utf8; + + my $a = "ABC\x{263A}"; + + my @b = split( //, $a ); + + print "not " unless @b == 4; + print "ok $test\n"; + $test++; # 101 + + print "not " unless length($b[3]) == 1; + print "ok $test\n"; + $test++; # 102 + + $a =~ s/^A/Z/; + print "not " unless length($a) == 4; + print "ok $test\n"; + $test++; # 103 +} + +{ + # the second half of 20001028.003 + + use utf8; + $X =~ s/^/chr(1488)/e; + print "not " unless length $X == 1; + print "ok $test\n"; + $test++; # 104 +} + |