summaryrefslogtreecommitdiff
path: root/ext/Unicode/Normalize/t/test.t
blob: d02bcc0304a469eda95425b158c7dc082300174c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'

#########################

use Test;
use strict;
use warnings;
BEGIN { plan tests => 20 };
use Unicode::Normalize;
ok(1); # If we made it this far, we're ok.

#########################

ok(NFC(""), "");
ok(NFD(""), "");

sub hexNFC {
  join " ", map sprintf("%04X", $_),
  unpack 'U*', NFC pack 'U*', map hex(), split ' ', shift;
}
sub hexNFD {
  join " ", map sprintf("%04X", $_),
  unpack 'U*', NFD pack 'U*', map hex(), split ' ', shift;
}

my $ordA   = ord("A");
my $ASCII  = $ordA == 0x41;
my $EBCDIC = $ordA == 0xc1;

if ($ASCII) {
  ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
  ok(hexNFC("00E0 05AE 05C4 0315 0062"),      "00E0 05AE 05C4 0315 0062");
  ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
} elsif ($EBCDIC) {
  # A WITH GRAVE  is 0044 in EBCDIC, not 00E0
  # SMALL LATIN B is 0082 in EBCDIC, not 0062
  ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "0044 05AE 05C4 0315 0082");
  ok(hexNFC("00E0 05AE 05C4 0315 0062"),      "0044 05AE 05C4 0315 0082");
  ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "0044 05AE 05C4 0315 0082");
} else {
  skip("Neither ASCII nor EBCDIC based") for 1..3;
}

ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");

ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("00E0 05AE 05C4 0315 0062"),      "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");

if ($ASCII) {
  ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
  ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
} elsif ($EBCDIC) {
  # SMALL LATIN A is 0081 in EBCDIC, not 0061
  # SMALL LATIN B is 0082 in EBCDIC, not 0062
  ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0081 05AE 05C4 0300 0315 0082");
  ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0081 05AE 05C4 0300 0315 0082");
} else {
  skip("Neither ASCII nor EBCDIC based") for 1..2;
}

ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");

if ($ASCII) {
  ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
} elsif ($EBCDIC) {
  # CAPITAL LATIN A is 00C1 in EBCDIC, not 0041
  ok(hexNFC("0000 0041 0000 0000"), "0000 00C1 0000 0000");
} else {
  skip("Neither ASCII nor EBCDIC based");
}

ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");

# should be unary.
if ($ASCII) {
  ok(NFC "\x{41}\x{0302}\x{0301}\x62" eq "\x{1EA4}\x62");
} elsif ($EBCDIC) {
  ok(NFC "\x{41}\x{0302}\x{0301}\x62" eq "\x{1EA4}\x82");
} else {
  skip("Neither ASCII nor EBCDIC based");
}
ok(NFD "\x{E0}\x{AC00}" eq "\x{61}\x{0300}\x{1100}\x{1161}");