summaryrefslogtreecommitdiff
path: root/ext/PerlIO/t/encoding.t
blob: 06a0ba7e7294992a053b0f5418e475f78aca28d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!./perl -w

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    no warnings; # Need global -w flag for later tests, but don't want this
    # to warn here:
    push @INC, "::lib:$MacPerl::Architecture:" if $^O eq 'MacOS';
    unless (find PerlIO::Layer 'perlio') {
	print "1..0 # Skip: not perlio\n";
	exit 0;
    }
    unless (eval { require Encode } ) {
	print "1..0 # Skip: not Encode\n";
	exit 0;
    }
}

print "1..14\n";

my $grk = "grk$$";
my $utf = "utf$$";
my $fail1 = "fa$$";
my $fail2 = "fb$$";
my $russki = "koi8r$$";
my $threebyte = "3byte$$";

if (open(GRK, ">$grk")) {
    binmode(GRK, ":bytes");
    # alpha beta gamma in ISO 8859-7
    print GRK "\xe1\xe2\xe3";
    close GRK or die "Could not close: $!";
}

{
    open(my $i,'<:encoding(iso-8859-7)',$grk);
    print "ok 1\n";
    open(my $o,'>:utf8',$utf);
    print "ok 2\n";
    print $o readline($i);
    print "ok 3\n";
    close($o) or die "Could not close: $!";
    close($i);
}

if (open(UTF, "<$utf")) {
    binmode(UTF, ":bytes");
    if (ord('A') == 193) { # EBCDIC
	# alpha beta gamma in UTF-EBCDIC Unicode (0x3b1 0x3b2 0x3b3)
	print "not " unless <UTF> eq "\xb4\x58\xb4\x59\xb4\x62";
    } else {
	# alpha beta gamma in UTF-8 Unicode (0x3b1 0x3b2 0x3b3)
	print "not " unless <UTF> eq "\xce\xb1\xce\xb2\xce\xb3";
    }
    print "ok 4\n";
    close UTF;
}

{
    use Encode;
    open(my $i,'<:utf8',$utf);
    print "ok 5\n";
    open(my $o,'>:encoding(iso-8859-7)',$grk);
    print "ok 6\n";
    print $o readline($i);
    print "ok 7\n";
    close($o) or die "Could not close: $!";
    close($i);
}

if (open(GRK, "<$grk")) {
    binmode(GRK, ":bytes");
    print "not " unless <GRK> eq "\xe1\xe2\xe3";
    print "ok 8\n";
    close GRK;
}

$SIG{__WARN__} = sub {$warn .= $_[0]};

if (open(FAIL, ">:encoding(NoneSuch)", $fail1)) {
    print "not ok 9 # Open should fail\n";
} else {
    print "ok 9\n";
}
if (!defined $warn) {
    print "not ok 10 # warning is undef\n";
} elsif ($warn =~ /^Cannot find encoding "NoneSuch" at/) {
    print "ok 10\n";
} else {
    print "not ok 10 # warning is '$warn'";
}

if (open(RUSSKI, ">$russki")) {
    print RUSSKI "\x3c\x3f\x78";
    close RUSSKI or die "Could not close: $!";
    open(RUSSKI, "$russki");
    binmode(RUSSKI, ":raw");
    my $buf1;
    read(RUSSKI, $buf1, 1);
    # eof(RUSSKI);
    binmode(RUSSKI, ":encoding(koi8-r)");
    my $buf2;
    read(RUSSKI, $buf2, 1);
    my $offset = tell(RUSSKI);
    if (ord($buf1) == 0x3c &&
	ord($buf2) == (ord('A') == 193) ? 0x6f : 0x3f &&
	$offset == 2) {
	print "ok 11\n";
    } else {
	printf "not ok 11 # [%s] [%s] %d\n",
	       join(" ", unpack("H*", $buf1)),
	       join(" ", unpack("H*", $buf2)), $offset;
    }
    close(RUSSKI);
} else {
    print "not ok 11 # open failed: $!\n";
}

undef $warn;

# Check there is no Use of uninitialized value in concatenation (.) warning
# due to the way @latin2iso_num was used to make aliases.
if (open(FAIL, ">:encoding(latin42)", $fail2)) {
    print "not ok 12 # Open should fail\n";
} else {
    print "ok 12\n";
}
if (!defined $warn) {
    print "not ok 13 # warning is undef\n";
} elsif ($warn =~ /^Cannot find encoding "latin42" at.*line \d+\.$/) {
    print "ok 13\n";
} else {
    print "not ok 13 # warning is: \n";
    $warn =~ s/^/# /mg;
    print "$warn";
}

# Create a string of chars that are 3 bytes in UTF-8 
my $str = "\x{1f80}" x 2048;

# Write them to a file
open(F,'>:utf8',$threebyte) || die "Cannot open $threebyte:$!";
print F $str;
close(F);

# Read file back as UTF-8 
open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
my $dstr = <F>;
close(F);
print "not " unless ($dstr eq $str);
print "ok 14\n";

END {
    unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
}