summaryrefslogtreecommitdiff
path: root/lib/encoding.t
blob: 6a50c03fc6347d6002d5d3be5ed3460ad8c346fd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
BEGIN {
    if (ord("A") == 193) {
	print "1..0 # encoding pragma does not support EBCDIC platforms\n";
	exit(0);
    }
}

print "1..23\n";

use encoding "latin1"; # ignored (overwritten by the next line)
use encoding "greek";  # iso 8859-7 (no "latin" alias, surprise...)

# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)

$a = "\xDF";
$b = "\x{100}";

print "not " unless ord($a) == 0x3af;
print "ok 1\n";

print "not " unless ord($b) == 0x100;
print "ok 2\n";

my $c;

$c = $a . $b;

print "not " unless ord($c) == 0x3af;
print "ok 3\n";

print "not " unless length($c) == 2;
print "ok 4\n";

print "not " unless ord(substr($c, 1, 1)) == 0x100;
print "ok 5\n";

print "not " unless ord(chr(0xdf)) == 0x3af; # spooky
print "ok 6\n";

print "not " unless ord(pack("C", 0xdf)) == 0x3af;
print "ok 7\n";

# we didn't break pack/unpack, I hope

print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf;
print "ok 8\n";

# the first octet of UTF-8 encoded 0x3af 
print "not " unless unpack("C", chr(0xdf)) == 0xce;
print "ok 9\n";

print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
print "ok 10\n";

print "not " unless unpack("U", chr(0xdf)) == 0x3af;
print "ok 11\n";

# charnames must still work
use charnames ':full';
print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
print "ok 12\n";

# combine

$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);

print "not " unless ord($c) == 0x3af;
print "ok 13\n";

print "not " unless ord(substr($c, 1, 1)) == 0xdf;
print "ok 14\n";

print "not " unless ord(substr($c, 2, 1)) == 0x3af;
print "ok 15\n";

# regex literals

print "not " unless "\xDF"    =~ /\x{3AF}/;
print "ok 16\n";

print "not " unless "\x{3AF}" =~ /\xDF/;
print "ok 17\n";

print "not " unless "\xDF"    =~ /\xDF/;
print "ok 18\n";

print "not " unless "\x{3AF}" =~ /\x{3AF}/;
print "ok 19\n";

# eq, cmp

my $byte=pack("C*", 0xDF);

print "not " unless pack("U*", 0x3AF) eq $byte;
print "ok 20\n";

print "not " if chr(0xDF) cmp $byte;
print "ok 21\n";

print "not " unless ((pack("U*", 0x3B0)       cmp $byte) ==  1) &&
                    ((pack("U*", 0x3AE)       cmp $byte) == -1) &&
                    ((pack("U*", 0x3AF, 0x20) cmp $byte) ==  1) &&
	            ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
print "ok 22\n";

# Used to core dump in 5.7.3
print ord undef == 0 ? "ok 23\n" : "not ok 23\n";