summaryrefslogtreecommitdiff
path: root/dist/Unicode-Normalize/t/norm.t
blob: ffb7449984c95c9f244bba83450a8a7d139a2d85 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

BEGIN {
    if ($ENV{PERL_CORE}) {
        chdir('t') if -d 't';
        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

#########################

use strict;
use warnings;
BEGIN { $| = 1; print "1..64\n"; }
my $count = 0;
sub ok { Unicode::Normalize::ok(\$count, @_) }

use Unicode::Normalize qw(normalize);

ok(1);

sub _pack_U   { Unicode::Normalize::dot_t_pack_U(@_) }
sub _unpack_U { Unicode::Normalize::dot_t_unpack_U(@_) }

#########################

ok(normalize('D', ""), "");
ok(normalize('C', ""), "");
ok(normalize('KD',""), "");
ok(normalize('KC',""), "");

ok(normalize('D', "A"), "A");
ok(normalize('C', "A"), "A");
ok(normalize('KD',"A"), "A");
ok(normalize('KC',"A"), "A");

ok(normalize('NFD', ""), "");
ok(normalize('NFC', ""), "");
ok(normalize('NFKD',""), "");
ok(normalize('NFKC',""), "");

ok(normalize('NFD', "A"), "A");
ok(normalize('NFC', "A"), "A");
ok(normalize('NFKD',"A"), "A");
ok(normalize('NFKC',"A"), "A");

# 17

# don't modify the source
my $sNFD = "\x{FA19}";
ok(normalize('NFD', $sNFD), "\x{795E}");
ok($sNFD, "\x{FA19}");

my $sNFC = "\x{FA1B}";
ok(normalize('NFC', $sNFC), "\x{798F}");
ok($sNFC, "\x{FA1B}");

my $sNFKD = "\x{FA1E}";
ok(normalize('NFKD', $sNFKD), "\x{7FBD}");
ok($sNFKD, "\x{FA1E}");

my $sNFKC = "\x{FA26}";
ok(normalize('NFKC', $sNFKC), "\x{90FD}");
ok($sNFKC, "\x{FA26}");

# 25

sub hexNFC {
  join " ", map sprintf("%04X", $_),
  _unpack_U normalize 'C', _pack_U map hex, split ' ', shift;
}
sub hexNFD {
  join " ", map sprintf("%04X", $_),
  _unpack_U normalize 'D', _pack_U map hex, split ' ', shift;
}

ok(hexNFD("1E14 AC01"), "0045 0304 0300 1100 1161 11A8");
ok(hexNFD("AC00 AE00"), "1100 1161 1100 1173 11AF");

ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("00E0 05AE 05C4 0315 0062"),      "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");

ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("00E0 05AE 05C4 0315 0062"),      "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");

ok(hexNFC("AC00 11A7"), "AC00 11A7");
ok(hexNFC("AC00 11A8"), "AC01");
ok(hexNFC("AC00 11A9"), "AC02");
ok(hexNFC("AC00 11C2"), "AC1B");
ok(hexNFC("AC00 11C3"), "AC00 11C3");

# 47

# Test Cases from Public Review Issue #29: Normalization Issue
# cf. http://www.unicode.org/review/pr-29.html
ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
ok(hexNFC("1100 0300 1161"), "1100 0300 1161");
ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300");
ok(hexNFC("1100 1161 0300"), "AC00 0300");
ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327");
ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327");

ok(hexNFC("0300 0041"), "0300 0041");
ok(hexNFC("0300 0301 0041"), "0300 0301 0041");
ok(hexNFC("0301 0300 0041"), "0301 0300 0041");
ok(hexNFC("0000 0300 0000 0301"), "0000 0300 0000 0301");
ok(hexNFC("0000 0301 0000 0300"), "0000 0301 0000 0300");

ok(hexNFC("0327 0061 0300"), "0327 00E0");
ok(hexNFC("0301 0061 0300"), "0301 00E0");
ok(hexNFC("0315 0061 0300"), "0315 00E0");
ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0");
ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0");
ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0");

# 64