summaryrefslogtreecommitdiff
path: root/t/pragma/utf8.t
diff options
context:
space:
mode:
Diffstat (limited to 't/pragma/utf8.t')
-rwxr-xr-xt/pragma/utf8.t407
1 files changed, 245 insertions, 162 deletions
diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t
index 6986720aab..89416dcfab 100755
--- a/t/pragma/utf8.t
+++ b/t/pragma/utf8.t
@@ -10,7 +10,7 @@ BEGIN {
}
}
-print "1..90\n";
+print "1..104\n";
my $test = 1;
@@ -42,6 +42,7 @@ sub nok_bytes {
{
use utf8;
+
$_ = ">\x{263A}<";
s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
ok $_, '>&#9786;<';
@@ -106,212 +107,191 @@ sub nok_bytes {
}
{
- use utf8;
-
- $_ = "\x{263A}>\x{263A}\x{263A}";
-
- ok length, 4;
- $test++; # 13
-
- ok length((m/>(.)/)[0]), 1;
- $test++; # 14
-
- ok length($&), 2;
- $test++; # 15
+ # no use utf8 needed
+ $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}";
+
+ ok length($_), 6; # 13
+ $test++;
- ok length($'), 1;
- $test++; # 16
+ ($a) = m/x(.)/;
- ok length($`), 1;
- $test++; # 17
+ ok length($a), 1; # 14
+ $test++;
- ok length($1), 1;
- $test++; # 18
+ ok length($`), 2; # 15
+ $test++;
+ ok length($&), 2; # 16
+ $test++;
+ ok length($'), 2; # 17
+ $test++;
- ok length($tmp=$&), 2;
- $test++; # 19
+ ok length($1), 1; # 18
+ $test++;
- ok length($tmp=$'), 1;
- $test++; # 20
+ ok length($b=$`), 2; # 19
+ $test++;
- ok length($tmp=$`), 1;
- $test++; # 21
+ ok length($b=$&), 2; # 20
+ $test++;
- ok length($tmp=$1), 1;
- $test++; # 22
+ ok length($b=$'), 2; # 21
+ $test++;
- {
- use bytes;
+ ok length($b=$1), 1; # 22
+ $test++;
- my $tmp = $&;
- ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
- $test++; # 23
+ ok $a, "\x{263A}"; # 23
+ $test++;
- $tmp = $';
- ok $tmp, pack("C*", 0342, 0230, 0272);
- $test++; # 24
+ ok $`, "\x{263A}\x{263A}"; # 24
+ $test++;
- $tmp = $`;
- ok $tmp, pack("C*", 0342, 0230, 0272);
- $test++; # 25
+ ok $&, "x\x{263A}"; # 25
+ $test++;
- $tmp = $1;
- ok $tmp, pack("C*", 0342, 0230, 0272);
- $test++; # 26
- }
+ ok $', "y\x{263A}"; # 26
+ $test++;
- ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272);
- $test++; # 27
+ ok $1, "\x{263A}"; # 27
+ $test++;
- ok_bytes $', pack("C*", 0342, 0230, 0272);
- $test++; # 28
+ ok_bytes $a, "\342\230\272"; # 28
+ $test++;
- ok_bytes $`, pack("C*", 0342, 0230, 0272);
- $test++; # 29
+ ok_bytes $1, "\342\230\272"; # 29
+ $test++;
- ok_bytes $1, pack("C*", 0342, 0230, 0272);
- $test++; # 30
+ ok_bytes $&, "x\342\230\272"; # 30
+ $test++;
{
- use bytes;
- no utf8;
-
- ok length, 10;
- $test++; # 31
+ use utf8; # required
+ $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A);
+ }
- ok length((m/>(.)/)[0]), 1;
- $test++; # 32
+ ok length($_), 6; # 31
+ $test++;
- ok length($&), 2;
- $test++; # 33
+ ($a) = m/x(.)/;
- ok length($'), 5;
- $test++; # 34
+ ok length($a), 1; # 32
+ $test++;
- ok length($`), 3;
- $test++; # 35
+ ok length($`), 2; # 33
+ $test++;
- ok length($1), 1;
- $test++; # 36
+ ok length($&), 2; # 34
+ $test++;
- ok $&, pack("C*", ord(">"), 0342);
- $test++; # 37
+ ok length($'), 2; # 35
+ $test++;
- ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
- $test++; # 38
+ ok length($1), 1; # 36
+ $test++;
- ok $`, pack("C*", 0342, 0230, 0272);
- $test++; # 39
+ ok length($b=$`), 2; # 37
+ $test++;
- ok $1, pack("C*", 0342);
- $test++; # 40
- }
+ ok length($b=$&), 2; # 38
+ $test++;
- {
- no utf8;
- $_="\342\230\272>\342\230\272\342\230\272";
- }
+ ok length($b=$'), 2; # 39
+ $test++;
- ok length, 10;
- $test++; # 41
+ ok length($b=$1), 1; # 40
+ $test++;
- ok length((m/>(.)/)[0]), 1;
- $test++; # 42
+ ok $a, "\x{263A}"; # 41
+ $test++;
- ok length($&), 2;
- $test++; # 43
+ ok $`, "\x{263A}\x{263A}"; # 42
+ $test++;
- ok length($'), 1;
- $test++; # 44
+ ok $&, "x\x{263A}"; # 43
+ $test++;
- ok length($`), 1;
- $test++; # 45
+ ok $', "y\x{263A}"; # 44
+ $test++;
- ok length($1), 1;
- $test++; # 46
+ ok $1, "\x{263A}"; # 45
+ $test++;
- ok length($tmp=$&), 2;
- $test++; # 47
+ ok_bytes $a, "\342\230\272"; # 46
+ $test++;
- ok length($tmp=$'), 1;
- $test++; # 48
+ ok_bytes $1, "\342\230\272"; # 47
+ $test++;
- ok length($tmp=$`), 1;
- $test++; # 49
+ ok_bytes $&, "x\342\230\272"; # 48
+ $test++;
- ok length($tmp=$1), 1;
- $test++; # 50
+ $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272";
- {
- use bytes;
+ ok length($_), 14; # 49
+ $test++;
- my $tmp = $&;
- ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
- $test++; # 51
+ ($a) = m/x(.)/;
- $tmp = $';
- ok $tmp, pack("C*", 0342, 0230, 0272);
- $test++; # 52
+ ok length($a), 1; # 50
+ $test++;
- $tmp = $`;
- ok $tmp, pack("C*", 0342, 0230, 0272);
- $test++; # 53
+ ok length($`), 6; # 51
+ $test++;
- $tmp = $1;
- ok $tmp, pack("C*", 0342, 0230, 0272);
- $test++; # 54
- }
+ ok length($&), 2; # 52
+ $test++;
- {
- use bytes;
- no utf8;
+ ok length($'), 6; # 53
+ $test++;
- ok length, 10;
- $test++; # 55
+ ok length($1), 1; # 54
+ $test++;
- ok length((m/>(.)/)[0]), 1;
- $test++; # 56
+ ok length($b=$`), 6; # 55
+ $test++;
- ok length($&), 2;
- $test++; # 57
+ ok length($b=$&), 2; # 56
+ $test++;
- ok length($'), 5;
- $test++; # 58
+ ok length($b=$'), 6; # 57
+ $test++;
- ok length($`), 3;
- $test++; # 59
+ ok length($b=$1), 1; # 58
+ $test++;
- ok length($1), 1;
- $test++; # 60
+ ok $a, "\342"; # 59
+ $test++;
- ok $&, pack("C*", ord(">"), 0342);
- $test++; # 61
+ ok $`, "\342\230\272\342\230\272"; # 60
+ $test++;
- ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
- $test++; # 62
+ ok $&, "x\342"; # 61
+ $test++;
- ok $`, pack("C*", 0342, 0230, 0272);
- $test++; # 63
+ ok $', "\230\272y\342\230\272"; # 62
+ $test++;
- ok $1, pack("C*", 0342);
- $test++; # 64
- }
+ ok $1, "\342"; # 63
+ $test++;
+}
+{
+ use utf8;
ok "\x{ab}" =~ /^\x{ab}$/, 1;
- $test++; # 65
+ $test++; # 64
}
{
use utf8;
ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
- $test++; # 66
+ $test++; # 65
}
{
use utf8;
my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
ok "@a", "1234 123 2345";
- $test++; # 67
+ $test++; # 66
}
{
@@ -319,7 +299,7 @@ sub nok_bytes {
my $x = chr(123);
my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
ok "@a", "1234 2345";
- $test++; # 68
+ $test++; # 67
}
{
@@ -331,10 +311,10 @@ sub nok_bytes {
{ use utf8; $b = "\xe4" } # \xXX must not produce UTF-8
print "not " if $a eq $b;
- print "ok $test\n"; $test++;
+ print "ok $test\n"; $test++; # 68
{ use utf8; print "not " if $a eq $b; }
- print "ok $test\n"; $test++;
+ print "ok $test\n"; $test++; # 69
}
{
@@ -344,7 +324,7 @@ sub nok_bytes {
for (@x) {
s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
my($latin) = /^(.+)(?:\s+\d)/;
- print $latin eq "stra\337e" ? "ok $test\n" :
+ print $latin eq "stra\337e" ? "ok $test\n" : # 70, 71
"#latin[$latin]\nnot ok $test\n";
$test++;
$latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
@@ -369,7 +349,7 @@ sub nok_bytes {
}
print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
- print "ok $test\n";
+ print "ok $test\n"; # 72
$test++;
}
@@ -384,27 +364,27 @@ sub nok_bytes {
print "not "
unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
print "ok $test\n";
- $test++;
+ $test++; # 73
my ($a, $b) = split(/\x{100}/, $s);
print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
print "ok $test\n";
- $test++;
+ $test++; # 74
my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
print "ok $test\n";
- $test++;
+ $test++; # 75
my ($a, $b) = split(/\x40\x{80}/, $s);
print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
print "ok $test\n";
- $test++;
+ $test++; # 76
my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
print "ok $test\n";
- $test++;
+ $test++; # 77
}
{
@@ -414,14 +394,14 @@ sub nok_bytes {
my $smiley = "\x{263a}";
- for my $s ("\x{263a}", # 1
- $smiley, # 2
+ for my $s ("\x{263a}", # 78
+ $smiley, # 79
- "" . $smiley, # 3
- "" . "\x{263a}", # 4
+ "" . $smiley, # 80
+ "" . "\x{263a}", # 81
- $smiley . "", # 5
- "\x{263a}" . "", # 6
+ $smiley . "", # 82
+ "\x{263a}" . "", # 83
) {
my $length_chars = length($s);
my $length_bytes;
@@ -437,14 +417,14 @@ sub nok_bytes {
$test++;
}
- for my $s ("\x{263a}" . "\x{263a}", # 7
- $smiley . $smiley, # 8
+ for my $s ("\x{263a}" . "\x{263a}", # 84
+ $smiley . $smiley, # 85
- "\x{263a}\x{263a}", # 9
- "$smiley$smiley", # 10
+ "\x{263a}\x{263a}", # 86
+ "$smiley$smiley", # 87
- "\x{263a}" x 2, # 11
- $smiley x 2, # 12
+ "\x{263a}" x 2, # 88
+ $smiley x 2, # 89
) {
my $length_chars = length($s);
my $length_bytes;
@@ -460,3 +440,106 @@ sub nok_bytes {
$test++;
}
}
+
+{
+ use utf8;
+
+ print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
+ print "ok $test\n";
+ $test++; # 90
+
+ print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
+ print "ok $test\n";
+ $test++; # 91
+
+ print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
+ print "ok $test\n";
+ $test++; # 92
+
+ print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
+ print "ok $test\n";
+ $test++; # 93
+
+ print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
+ print "ok $test\n";
+ $test++; # 94
+
+ print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
+ print "ok $test\n";
+ $test++; # 95
+
+ print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
+ print "ok $test\n";
+ $test++; # 96
+
+ print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
+ print "ok $test\n";
+ $test++; # 97
+}
+
+{
+ # the first half of 20001028.003
+
+ my $X = chr(1448);
+ my ($Y) = $X =~ /(.*)/;
+ print "not " unless length $Y == 1;
+ print "ok $test\n";
+ $test++; # 98
+}
+
+{
+ # 20001108.001
+
+ use utf8;
+ my $X = "Szab\x{f3},Bal\x{e1}zs";
+ my $Y = $X;
+ $Y =~ s/(B)/$1/ for 0..3;
+ print "not " unless $Y eq $X;
+ print "ok $test\n";
+ $test++; # 99
+}
+
+{
+ # 20001114.001
+
+ use utf8;
+ use charnames ':full';
+ my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
+ print "not " unless ord($text) == 0xc4;
+ print "ok $test\n";
+ $test++; # 100
+}
+
+{
+ # 20001205.014
+
+ use utf8;
+
+ my $a = "ABC\x{263A}";
+
+ my @b = split( //, $a );
+
+ print "not " unless @b == 4;
+ print "ok $test\n";
+ $test++; # 101
+
+ print "not " unless length($b[3]) == 1;
+ print "ok $test\n";
+ $test++; # 102
+
+ $a =~ s/^A/Z/;
+ print "not " unless length($a) == 4;
+ print "ok $test\n";
+ $test++; # 103
+}
+
+{
+ # the second half of 20001028.003
+
+ use utf8;
+ $X =~ s/^/chr(1488)/e;
+ print "not " unless length $X == 1;
+ print "ok $test\n";
+ $test++; # 104
+}
+