1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
utf8.c AOK
[utf8_to_uv]
Malformed UTF-8 character
my $a = ord "\x80" ;
Malformed UTF-8 character
my $a = ord "\xf080" ;
<<<<<< this warning can't be easily triggered from perl anymore
[utf16_to_utf8]
Malformed UTF-16 surrogate
<<<<<< Add a test when somethig actually calls utf16_to_utf8
__END__
# utf8.c [utf8_to_uv] -W
BEGIN {
if (ord('A') == 193) {
print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings.";
exit 0;
}
}
use utf8 ;
my $a = "snøstorm" ;
{
no warnings 'utf8' ;
my $a = "snøstorm";
use warnings 'utf8' ;
my $a = "snøstorm";
}
EXPECT
Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9.
Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14.
########
use warnings 'utf8';
my $d7ff = chr(0xD7FF);
my $d800 = chr(0xD800);
my $dfff = chr(0xDFFF);
my $e000 = chr(0xE000);
my $fffd = chr(0xFFFD);
my $ffff = chr(0xFFFF);
my $hex4 = chr(0x10000);
my $hex5 = chr(0x100000);
my $max = chr(0x10FFFF);
no warnings 'utf8';
my $d7ff = chr(0xD7FF);
my $d800 = chr(0xD800);
my $dfff = chr(0xDFFF);
my $e000 = chr(0xE000);
my $fffd = chr(0xFFFD);
my $ffff = chr(0xFFFF);
my $hex4 = chr(0x10000);
my $hex5 = chr(0x100000);
my $max = chr(0x10FFFF);
EXPECT
UTF-16 surrogate 0xd800 at - line 3.
UTF-16 surrogate 0xdfff at - line 4.
Unicode character 0xffff is illegal at - line 7.
Unicode character 0x10ffff is illegal at - line 10.
########
use warnings 'utf8';
my $d7ff = pack("U", 0xD7FF);
my $d800 = pack("U", 0xD800);
my $dfff = pack("U", 0xDFFF);
my $e000 = pack("U", 0xE000);
my $fffd = pack("U", 0xFFFD);
my $ffff = pack("U", 0xFFFF);
my $hex4 = pack("U", 0x10000);
my $hex5 = pack("U", 0x100000);
my $max = pack("U", 0x10FFFF);
no warnings 'utf8';
my $d7ff = pack("U", 0xD7FF);
my $d800 = pack("U", 0xD800);
my $dfff = pack("U", 0xDFFF);
my $e000 = pack("U", 0xE000);
my $fffd = pack("U", 0xFFFD);
my $ffff = pack("U", 0xFFFF);
my $hex4 = pack("U", 0x10000);
my $hex5 = pack("U", 0x100000);
my $max = pack("U", 0x10FFFF);
EXPECT
UTF-16 surrogate 0xd800 at - line 3.
UTF-16 surrogate 0xdfff at - line 4.
Unicode character 0xffff is illegal at - line 7.
Unicode character 0x10ffff is illegal at - line 10.
########
use warnings 'utf8';
my $d7ff = "\x{D7FF}";
my $d800 = "\x{D800}";
my $dfff = "\x{DFFF}";
my $e000 = "\x{E000}";
my $fffd = "\x{FFFD}";
my $ffff = "\x{FFFF}";
my $hex4 = "\x{10000}";
my $hex5 = "\x{100000}";
my $max = "\x{10FFFF}";
no warnings 'utf8';
my $d7ff = "\x{D7FF}";
my $d800 = "\x{D800}";
my $dfff = "\x{DFFF}";
my $e000 = "\x{E000}";
my $fffd = "\x{FFFD}";
my $ffff = "\x{FFFF}";
my $hex4 = "\x{10000}";
my $hex5 = "\x{100000}";
my $max = "\x{10FFFF}";
EXPECT
UTF-16 surrogate 0xd800 at - line 3.
UTF-16 surrogate 0xdfff at - line 4.
Unicode character 0xffff is illegal at - line 7.
Unicode character 0x10ffff is illegal at - line 10.
|