1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
#!./perl
BEGIN {
chdir 't' if -d 't';
@INC = qw(. ../lib); # ../lib needed for test.deparse
require "test.pl";
}
plan tests => 35;
# compile time evaluation
# 'A' 65 ASCII
# 'A' 193 EBCDIC
ok(ord('A') == 65 || ord('A') == 193, "ord('A') is ".ord('A'));
is(ord(chr(500)), 500, "compile time chr 500");
# run time evaluation
$x = 'ABC';
ok(ord($x) == 65 || ord($x) == 193, "ord('$x') is ".ord($x));
ok(chr 65 eq 'A' || chr 193 eq 'A', "chr can produce 'A'");
$x = 500;
is(ord(chr($x)), $x, "runtime chr $x");
is(ord("\x{1234}"), 0x1234, 'compile time ord \x{....}');
$x = "\x{1234}";
is(ord($x), 0x1234, 'runtime ord \x{....}');
{
no warnings 'utf8'; # avoid Unicode warnings
# The following code points are some interesting steps.
is(ord(chr( 0x100)), 0x100, '0x0100');
is(ord(chr( 0x3FF)), 0x3FF, 'last two-byte char in UTF-EBCDIC');
is(ord(chr( 0x400)), 0x400, 'first three-byte char in UTF-EBCDIC');
is(ord(chr( 0x7FF)), 0x7FF, 'last two-byte char in UTF-8');
is(ord(chr( 0x800)), 0x800, 'first three-byte char in UTF-8');
is(ord(chr( 0xFFF)), 0xFFF, '0x0FFF');
is(ord(chr( 0x1000)), 0x1000, '0x1000');
is(ord(chr( 0x3FFF)), 0x3FFF, 'last three-byte char in UTF-EBCDIC');
is(ord(chr( 0x4000)), 0x4000, 'first four-byte char in UTF-EBCDIC');
is(ord(chr( 0xCFFF)), 0xCFFF, '0xCFFF');
is(ord(chr( 0xD000)), 0xD000, '0xD000');
is(ord(chr( 0xD7FF)), 0xD7FF, '0xD7FF');
is(ord(chr( 0xD800)), 0xD800, 'surrogate begin (not strict utf-8)');
is(ord(chr( 0xDFFF)), 0xDFFF, 'surrogate end (not strict utf-8)');
is(ord(chr( 0xE000)), 0xE000, '0xE000');
is(ord(chr( 0xFDD0)), 0xFDD0, 'first additional noncharacter in BMP');
is(ord(chr( 0xFDEF)), 0xFDEF, 'last additional noncharacter in BMP');
is(ord(chr( 0xFFFE)), 0xFFFE, '0xFFFE');
is(ord(chr( 0xFFFF)), 0xFFFF, 'last three-byte char in UTF-8');
is(ord(chr( 0x10000)), 0x10000, 'first four-byte char in UTF-8');
is(ord(chr( 0x3FFFF)), 0x3FFFF, 'last four-byte char in UTF-EBCDIC');
is(ord(chr( 0x40000)), 0x40000, 'first five-byte char in UTF-EBCDIC');
is(ord(chr( 0xFFFFF)), 0xFFFFF, '0xFFFFF');
is(ord(chr(0x100000)), 0x100000, '0x100000');
is(ord(chr(0x10FFFF)), 0x10FFFF, 'Unicode last code point');
is(ord(chr(0x110000)), 0x110000, '0x110000');
is(ord(chr(0x1FFFFF)), 0x1FFFFF, 'last four-byte char in UTF-8');
is(ord(chr(0x200000)), 0x200000, 'first five-byte char in UTF-8');
}
|