summaryrefslogtreecommitdiff
path: root/lib/Unicode/Collate/t
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Unicode/Collate/t')
-rw-r--r--lib/Unicode/Collate/t/altern.t7
-rw-r--r--lib/Unicode/Collate/t/cjkrange.t94
-rw-r--r--lib/Unicode/Collate/t/contract.t10
-rw-r--r--lib/Unicode/Collate/t/hangtype.t12
-rw-r--r--lib/Unicode/Collate/t/hangul.t10
-rw-r--r--lib/Unicode/Collate/t/ignor.t158
-rw-r--r--lib/Unicode/Collate/t/illegal.t7
-rw-r--r--lib/Unicode/Collate/t/illegalp.t11
-rw-r--r--lib/Unicode/Collate/t/index.t14
-rw-r--r--lib/Unicode/Collate/t/normal.t9
-rw-r--r--lib/Unicode/Collate/t/override.t187
-rw-r--r--lib/Unicode/Collate/t/rearrang.t10
-rw-r--r--lib/Unicode/Collate/t/test.t294
-rw-r--r--lib/Unicode/Collate/t/trailwt.t7
-rw-r--r--lib/Unicode/Collate/t/variable.t7
-rw-r--r--lib/Unicode/Collate/t/version.t11
-rw-r--r--lib/Unicode/Collate/t/view.t7
17 files changed, 521 insertions, 334 deletions
diff --git a/lib/Unicode/Collate/t/altern.t b/lib/Unicode/Collate/t/altern.t
index d48e168b69..c958f4b58f 100644
--- a/lib/Unicode/Collate/t/altern.t
+++ b/lib/Unicode/Collate/t/altern.t
@@ -5,12 +5,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
diff --git a/lib/Unicode/Collate/t/cjkrange.t b/lib/Unicode/Collate/t/cjkrange.t
new file mode 100644
index 0000000000..5a39bb8a57
--- /dev/null
+++ b/lib/Unicode/Collate/t/cjkrange.t
@@ -0,0 +1,94 @@
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Collate " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+use Test;
+BEGIN { plan tests => 51 };
+
+use strict;
+use warnings;
+use Unicode::Collate;
+
+ok(1);
+
+my $Collator = Unicode::Collate->new(
+ table => 'keys.txt',
+ normalization => undef,
+);
+
+# U+9FA6..U+9FBB are CJK UI since Unicode 4.1.0.
+# U+3400 is CJK UI ExtA, then greater than any CJK UI.
+
+##### 2..11
+ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
+ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA
+ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA
+ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
+ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
+ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
+ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
+ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
+
+##### 12..21
+$Collator->change(UCA_Version => 11);
+ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
+ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # Unassigned > Unassigned
+ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
+ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
+ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
+ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
+
+##### 22..31
+$Collator->change(UCA_Version => 9);
+ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
+ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # Unassigned > Unassigned
+ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
+ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
+ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
+ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
+
+##### 32..41
+$Collator->change(UCA_Version => 8);
+ok($Collator->gt("\x{9FA5}", "\x{3400}")); # UI > ExtA
+ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
+ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < Unassigned(ExtB)
+ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < Unassigned(ExtB)
+ok($Collator->lt("\x{9FFF}","\x{20000}")); # Unassigned < Unassigned(ExtB)
+ok($Collator->lt("\x{9FFF}","\x{2A6D6}")); # Unassigned < Unassigned(ExtB)
+
+##### 42..51
+$Collator->change(UCA_Version => 14);
+ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
+ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA
+ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA
+ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
+ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
+ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
+ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
+ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
+ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
+
diff --git a/lib/Unicode/Collate/t/contract.t b/lib/Unicode/Collate/t/contract.t
index 18a0cfbdc9..9c55ecdc8b 100644
--- a/lib/Unicode/Collate/t/contract.t
+++ b/lib/Unicode/Collate/t/contract.t
@@ -4,12 +4,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
@@ -20,9 +17,6 @@ use strict;
use warnings;
use Unicode::Collate;
-use vars qw($IsEBCDIC);
-$IsEBCDIC = ord("A") != 0x41;
-
our $kjeEntry = <<'ENTRIES';
0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
diff --git a/lib/Unicode/Collate/t/hangtype.t b/lib/Unicode/Collate/t/hangtype.t
index b6a46691aa..d8ea74673e 100644
--- a/lib/Unicode/Collate/t/hangtype.t
+++ b/lib/Unicode/Collate/t/hangtype.t
@@ -4,17 +4,14 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
-BEGIN { plan tests => 30 };
+BEGIN { plan tests => 33 };
use strict;
use warnings;
@@ -48,9 +45,12 @@ ok(Unicode::Collate::getHST(0x11FA), '');
ok(Unicode::Collate::getHST(0x11FF), '');
ok(Unicode::Collate::getHST(0x3011), '');
ok(Unicode::Collate::getHST(0x11A7), '');
+ok(Unicode::Collate::getHST(0xABFF), '');
ok(Unicode::Collate::getHST(0xAC00), 'LV');
ok(Unicode::Collate::getHST(0xAC01), 'LVT');
ok(Unicode::Collate::getHST(0xAC1B), 'LVT');
ok(Unicode::Collate::getHST(0xAC1C), 'LV');
ok(Unicode::Collate::getHST(0xD7A3), 'LVT');
+ok(Unicode::Collate::getHST(0xD7A4), '');
+ok(Unicode::Collate::getHST(0xFFFF), '');
diff --git a/lib/Unicode/Collate/t/hangul.t b/lib/Unicode/Collate/t/hangul.t
index fd6cc69394..d9f7db9b10 100644
--- a/lib/Unicode/Collate/t/hangul.t
+++ b/lib/Unicode/Collate/t/hangul.t
@@ -4,12 +4,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
@@ -20,9 +17,6 @@ use strict;
use warnings;
use Unicode::Collate;
-use vars qw($IsEBCDIC);
-$IsEBCDIC = ord("A") != 0x41;
-
#########################
ok(1);
diff --git a/lib/Unicode/Collate/t/ignor.t b/lib/Unicode/Collate/t/ignor.t
new file mode 100644
index 0000000000..4ee47c6f12
--- /dev/null
+++ b/lib/Unicode/Collate/t/ignor.t
@@ -0,0 +1,158 @@
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Collate " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+use Test;
+BEGIN { plan tests => 41 };
+
+use strict;
+use warnings;
+use Unicode::Collate;
+
+ok(1);
+
+my $trad = Unicode::Collate->new(
+ table => 'keys.txt',
+ normalization => undef,
+ ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
+ level => 3,
+ entry => << 'ENTRIES',
+ 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
+ 0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish
+ 0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish
+ENTRIES
+);
+# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
+# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
+
+##### 2..3
+
+ok(
+ join(':', $trad->sort( qw/ acha aca ada acia acka / ) ),
+ join(':', qw/ aca acia acka acha ada / ),
+);
+
+ok(
+ join(':', $trad->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ),
+ join(':', qw/ ACA ACIA ACKA ACHA ADA / ),
+);
+
+##### 4..7
+
+ok($trad->gt("ocho", "oc\cAho")); # UCA v14
+ok($trad->gt("ocho", "oc\0\cA\0\cBho")); # UCA v14
+ok($trad->eq("-", ""));
+ok($trad->gt("ocho", "oc-ho"));
+
+##### 8..11
+
+$trad->change(UCA_Version => 9);
+
+ok($trad->eq("ocho", "oc\cAho")); # UCA v9
+ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9
+ok($trad->eq("-", ""));
+ok($trad->gt("ocho", "oc-ho"));
+
+##### 12..15
+
+$trad->change(UCA_Version => 8);
+
+ok($trad->gt("ocho", "oc\cAho"));
+ok($trad->gt("ocho", "oc\0\cA\0\cBho"));
+ok($trad->eq("-", ""));
+ok($trad->gt("ocho", "oc-ho"));
+
+
+##### 16..19
+
+$trad->change(UCA_Version => 9);
+
+my $hiragana = "\x{3042}\x{3044}";
+my $katakana = "\x{30A2}\x{30A4}";
+
+# HIRAGANA and KATAKANA are ignorable via ignoreName
+ok($trad->eq($hiragana, ""));
+ok($trad->eq("", $katakana));
+ok($trad->eq($hiragana, $katakana));
+ok($trad->eq($katakana, $hiragana));
+
+
+##### 20..31
+
+# According to Conformance Test (UCA_Version == 9 or 11),
+# a L3-ignorable is treated as a completely ignorable.
+
+my $L3ignorable = Unicode::Collate->new(
+ alternate => 'Non-ignorable',
+ level => 3,
+ table => undef,
+ normalization => undef,
+ UCA_Version => 9,
+ entry => <<'ENTRIES',
+0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
+0001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429)
+0591 ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA
+1D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM
+0021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK
+09BE ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA
+09C7 ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E
+09CB ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
+09C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
+1D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE
+1D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK
+1D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA
+1D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK
+ENTRIES
+);
+
+ok($L3ignorable->lt("\cA", "!"));
+ok($L3ignorable->lt("\x{591}", "!"));
+ok($L3ignorable->eq("\cA", "\x{591}"));
+ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A"));
+ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A"));
+ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A"));
+ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A"));
+ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}"));
+ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}"));
+ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}"));
+ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}"));
+ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}"));
+
+##### 32..41
+
+my $c = Unicode::Collate->new(
+ table => 'keys.txt',
+ normalization => undef,
+ level => 1,
+ UCA_Version => 14,
+ entry => << 'ENTRIES',
+034F ; [.0000.0000.0000.034F] # COMBINING GRAPHEME JOINER
+0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
+0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish
+0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish
+ENTRIES
+);
+# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
+# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
+
+ok($c->gt("ocho", "oc\x00\x00ho"));
+ok($c->gt("ocho", "oc\cAho"));
+ok($c->gt("ocho", "oc\x{034F}ho"));
+ok($c->gt("ocio", "oc\x{034F}ho"));
+ok($c->lt("ocgo", "oc\x{034F}ho"));
+ok($c->lt("oceo", "oc\x{034F}ho"));
+
+ok($c->viewSortKey("ocho"), "[0B4B 0A3F 0B4B | | |]");
+ok($c->viewSortKey("oc\x00\x00ho"), "[0B4B 0A3D 0AB9 0B4B | | |]");
+ok($c->viewSortKey("oc\cAho"), "[0B4B 0A3D 0AB9 0B4B | | |]");
+ok($c->viewSortKey("oc\x{034F}ho"), "[0B4B 0A3D 0AB9 0B4B | | |]");
+
+
diff --git a/lib/Unicode/Collate/t/illegal.t b/lib/Unicode/Collate/t/illegal.t
index 803e2f6739..825177c283 100644
--- a/lib/Unicode/Collate/t/illegal.t
+++ b/lib/Unicode/Collate/t/illegal.t
@@ -5,12 +5,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
diff --git a/lib/Unicode/Collate/t/illegalp.t b/lib/Unicode/Collate/t/illegalp.t
index 690c88d0bb..ff1936d353 100644
--- a/lib/Unicode/Collate/t/illegalp.t
+++ b/lib/Unicode/Collate/t/illegalp.t
@@ -5,12 +5,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
@@ -26,9 +23,9 @@ ok(1);
# No test for Unicode::Collate is included in this .t file.
#
# UCA conformance test requires completely ignorable characters
-# (including noncharacters) must be able to be ordered in code point order;
+# (including noncharacters) must be able to be sorted in code point order.
# If not so, Unicode::Collate must not be compliant with UCA.
-#
+#
# ~~~ CollationTest_SHIFTED.txt in CollationTest-4.0.0
#
# 206F 0021; # ! NOMINAL DIGIT SHAPES [| | | 0251]
diff --git a/lib/Unicode/Collate/t/index.t b/lib/Unicode/Collate/t/index.t
index a1d67d5346..5b6c78d968 100644
--- a/lib/Unicode/Collate/t/index.t
+++ b/lib/Unicode/Collate/t/index.t
@@ -5,12 +5,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
@@ -339,10 +336,10 @@ $ret = $Collator->gsubst($str, "perl", \&strreverse);
ok($ret, 2);
ok($str, "lr\cB\x{300}e\cBP and LREP.");
-$str = "Camel ass came\x{301}l CAMEL horse cAm\0E\0L...";
+$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
$Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
-ok($str,
-"<b>Camel</b> ass <b>came\x{301}l</b> <b>CAMEL</b> horse <b>cAm\0E\0L</b>...");
+ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> "
+ . "<b>CAMEL</b> horse <b>cAm\0E\0L</b>...");
$Collator->change(level => 3);
@@ -401,3 +398,4 @@ $Collator->change(alternate => 'Non-ignorable');
($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!");
ok($ret, undef);
+
diff --git a/lib/Unicode/Collate/t/normal.t b/lib/Unicode/Collate/t/normal.t
index 026240d6fa..57ea03368b 100644
--- a/lib/Unicode/Collate/t/normal.t
+++ b/lib/Unicode/Collate/t/normal.t
@@ -4,12 +4,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
@@ -44,7 +41,7 @@ FF21; [.0A87.0020.0008] # LATIN CAPITAL LETTER A
00C5; [.0AC5.0020.0008] # LATIN CAPITAL LETTER A WITH RING ABOVE
ENTRIES
-# Aong < A+ring < Z < fullA+ring < A-ring
+# Aong < A+ring < Z < fullA+ring < A-ring
#########################
diff --git a/lib/Unicode/Collate/t/override.t b/lib/Unicode/Collate/t/override.t
new file mode 100644
index 0000000000..3e48e15f29
--- /dev/null
+++ b/lib/Unicode/Collate/t/override.t
@@ -0,0 +1,187 @@
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Collate " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+use Test;
+BEGIN { plan tests => 76 };
+
+use strict;
+use warnings;
+use Unicode::Collate;
+
+ok(1);
+
+##### 2..6
+
+my $all_undef_8 = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ overrideCJK => undef,
+ overrideHangul => undef,
+ UCA_Version => 8,
+);
+
+# All in the Unicode code point order.
+# No hangul decomposition.
+
+ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
+ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
+ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
+ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
+ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
+
+
+##### 7..11
+
+my $all_undef_9 = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ overrideCJK => undef,
+ overrideHangul => undef,
+ UCA_Version => 9,
+);
+
+# CJK Ideo. < CJK ext A/B < Others.
+# No hangul decomposition.
+
+ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
+ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
+ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
+ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
+ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
+
+##### 12..16
+
+my $ignoreHangul = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ overrideHangul => sub {()},
+ entry => <<'ENTRIES',
+AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL
+ENTRIES
+);
+
+# All Hangul Syllables except U+AE00 are ignored.
+
+ok($ignoreHangul->eq("\x{AC00}", ""));
+ok($ignoreHangul->lt("\x{AC00}", "\0"));
+ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
+ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
+ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
+
+
+my $ignoreCJK = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ overrideCJK => sub {()},
+ entry => <<'ENTRIES',
+5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
+ENTRIES
+);
+
+# All CJK Unified Ideographs except U+5B57 are ignored.
+
+##### 17..21
+ok($ignoreCJK->eq("\x{4E00}", ""));
+ok($ignoreCJK->lt("\x{4E00}", "\0"));
+ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
+ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
+ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
+
+##### 22..29
+ok($ignoreCJK->eq("\x{3400}", ""));
+ok($ignoreCJK->eq("\x{4DB5}", ""));
+ok($ignoreCJK->eq("\x{9FA5}", ""));
+ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
+ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
+ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
+ok($ignoreCJK->eq("\x{20000}", ""));
+ok($ignoreCJK->eq("\x{2A6D6}", ""));
+
+##### 30..37
+$ignoreCJK->change(UCA_Version => 9);
+ok($ignoreCJK->eq("\x{3400}", ""));
+ok($ignoreCJK->eq("\x{4DB5}", ""));
+ok($ignoreCJK->eq("\x{9FA5}", ""));
+ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
+ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
+ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
+ok($ignoreCJK->eq("\x{20000}", ""));
+ok($ignoreCJK->eq("\x{2A6D6}", ""));
+
+##### 38..45
+$ignoreCJK->change(UCA_Version => 8);
+ok($ignoreCJK->eq("\x{3400}", ""));
+ok($ignoreCJK->eq("\x{4DB5}", ""));
+ok($ignoreCJK->eq("\x{9FA5}", ""));
+ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
+ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
+ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
+ok($ignoreCJK->eq("\x{20000}", ""));
+ok($ignoreCJK->eq("\x{2A6D6}", ""));
+
+##### 46..53
+$ignoreCJK->change(UCA_Version => 14);
+ok($ignoreCJK->eq("\x{3400}", ""));
+ok($ignoreCJK->eq("\x{4DB5}", ""));
+ok($ignoreCJK->eq("\x{9FA5}", ""));
+ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
+ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
+ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
+ok($ignoreCJK->eq("\x{20000}", ""));
+ok($ignoreCJK->eq("\x{2A6D6}", ""));
+
+##### 54..76
+my $overCJK = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ entry => <<'ENTRIES',
+0061 ; [.0101.0020.0002.0061] # latin a
+0041 ; [.0101.0020.0008.0041] # LATIN A
+4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
+ENTRIES
+ overrideCJK => sub {
+ my $u = 0xFFFF - $_[0]; # reversed
+ [$u, 0x20, 0x2, $u];
+ },
+);
+
+ok($overCJK->lt("a", "A")); # diff. at level 3.
+ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2.
+ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
+ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
+ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
+
+ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
+ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
+ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
+ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
+ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
+ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
+
+$overCJK->change(UCA_Version => 9);
+
+ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
+ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
+ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}"));
+ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}"));
+ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
+ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
+
+$overCJK->change(UCA_Version => 14);
+
+ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
+ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
+ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
+ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
+ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
+ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
+
diff --git a/lib/Unicode/Collate/t/rearrang.t b/lib/Unicode/Collate/t/rearrang.t
index cc02fa9f79..0977db9e48 100644
--- a/lib/Unicode/Collate/t/rearrang.t
+++ b/lib/Unicode/Collate/t/rearrang.t
@@ -5,12 +5,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
@@ -28,6 +25,7 @@ ok(1);
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
+ UCA_Version => 9,
);
# rearrange : 0x0E40..0x0E44, 0x0EC0..0x0EC4 (default)
@@ -73,6 +71,7 @@ my $no_rearrange = Unicode::Collate->new(
table => undef,
normalization => undef,
rearrange => [],
+ UCA_Version => 9,
);
ok($no_rearrange->lt("A", "B"));
@@ -87,6 +86,7 @@ my $undef_rearrange = Unicode::Collate->new(
table => undef,
normalization => undef,
rearrange => undef,
+ UCA_Version => 9,
);
ok($undef_rearrange->lt("A", "B"));
diff --git a/lib/Unicode/Collate/t/test.t b/lib/Unicode/Collate/t/test.t
index 53fa7ca879..a5337a014f 100644
--- a/lib/Unicode/Collate/t/test.t
+++ b/lib/Unicode/Collate/t/test.t
@@ -5,17 +5,14 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
-BEGIN { plan tests => 160 };
+BEGIN { plan tests => 107 };
use strict;
use warnings;
@@ -23,7 +20,17 @@ use Unicode::Collate;
ok(1);
-##### 2..6
+sub _pack_U { Unicode::Collate::pack_U(@_) }
+sub _unpack_U { Unicode::Collate::unpack_U(@_) }
+
+my $A_acute = _pack_U(0xC1);
+my $a_acute = _pack_U(0xE1);
+my $acute = _pack_U(0x0301);
+
+my $hiragana = "\x{3042}\x{3044}";
+my $katakana = "\x{30A2}\x{30A4}";
+
+##### 2..7
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
@@ -32,28 +39,21 @@ my $Collator = Unicode::Collate->new(
ok(ref $Collator, "Unicode::Collate");
-
-ok(
- join(':', $Collator->sort(
- qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN /
- ) ),
- join(':',
- qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings /
- ),
-);
-
ok($Collator->cmp("", ""), 0);
ok($Collator->eq("", ""));
ok($Collator->cmp("", "perl"), -1);
-##### 7..17
+ok(
+ join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ),
+ join(':', qw/ aca acha acia acka ada / ),
+);
-sub _pack_U { Unicode::Collate::pack_U(@_) }
-sub _unpack_U { Unicode::Collate::unpack_U(@_) }
+ok(
+ join(':', $Collator->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ),
+ join(':', qw/ ACA ACHA ACIA ACKA ADA / ),
+);
-my $A_acute = _pack_U(0xC1);
-my $a_acute = _pack_U(0xE1);
-my $acute = _pack_U(0x0301);
+##### 8..18
ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1)
ok($Collator->cmp($a_acute, $A_acute), -1);
@@ -71,84 +71,7 @@ ok($Collator->lt("A", $A_acute));
ok($Collator->lt("A", $a_acute));
ok($Collator->lt($a_acute, $A_acute));
-##### 18..20
-
-eval { require Unicode::Normalize };
-if (!$@) {
- my $NFD = Unicode::Collate->new(
- table => 'keys.txt',
- level => 1,
- entry => <<'ENTRIES',
-0430 ; [.0CB5.0020.0002.0430] # CYRILLIC SMALL LETTER A
-0410 ; [.0CB5.0020.0008.0410] # CYRILLIC CAPITAL LETTER A
-04D3 ; [.0CBD.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
-0430 0308 ; [.0CBD.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
-04D2 ; [.0CBD.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
-0410 0308 ; [.0CBD.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
-0430 3099 ; [.0CBE.0020.0002.04D3] # A WITH KATAKANA VOICED
-0430 3099 0308 ; [.0CBF.0020.0002.04D3] # A WITH KATAKANA VOICED, DIAERESIS
-ENTRIES
- );
- ok($NFD->eq("\x{4D3}\x{325}", "\x{430}\x{308}\x{325}"));
- ok($NFD->lt("\x{430}\x{308}A", "\x{430}\x{308}B"));
- ok($NFD->lt("\x{430}\x{3099}B", "\x{430}\x{308}\x{3099}A"));
-}
-else {
- ok(1);
- ok(1);
- ok(1);
-}
-
-##### 21..34
-
-my $trad = Unicode::Collate->new(
- table => 'keys.txt',
- normalization => undef,
- ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
- level => 3,
- entry => << 'ENTRIES',
- 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
- 0043 0068 ; [.0A3F.0020.0008.0043] # "Ch" in traditional Spanish
-ENTRIES
-);
-# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
-# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
-# Deutsch sz is included in 'keys.txt';
-
-ok(
- join(':', $trad->sort( qw/ acha aca ada acia acka / ) ),
- join(':', qw/ aca acia acka acha ada / ),
-);
-
-ok(
- join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ),
- join(':', qw/ aca acha acia acka ada / ),
-);
-
-ok($trad->eq("ocho", "oc\cAho")); # UCA v9
-ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9
-ok($trad->eq("-", ""));
-ok($trad->gt("ocho", "oc-ho"));
-
-$trad->change(UCA_Version => 8);
-
-ok($trad->gt("ocho", "oc\cAho"));
-ok($trad->gt("ocho", "oc\0\cA\0\cBho"));
-ok($trad->eq("-", ""));
-ok($trad->gt("ocho", "oc-ho"));
-
-$trad->change(UCA_Version => 9);
-
-my $hiragana = "\x{3042}\x{3044}";
-my $katakana = "\x{30A2}\x{30A4}";
-
-# HIRAGANA and KATAKANA are ignorable via ignoreName
-ok($trad->eq($hiragana, ""));
-ok($trad->eq("", $katakana));
-ok($trad->eq($hiragana, $katakana));
-ok($trad->eq($katakana, $hiragana));
-
-##### 35..41
+##### 19..25
$Collator->change(level => 2);
@@ -161,7 +84,7 @@ ok( $Collator->cmp($hiragana, $katakana), 0);
ok( $Collator->eq($hiragana, $katakana) );
ok( $Collator->ge($hiragana, $katakana) );
-##### 42..47
+##### 26..31
# hangul
ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") );
@@ -171,7 +94,7 @@ ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") );
ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") );
ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana
-##### 48..56
+##### 32..40
$Collator->change(%old_level, katakana_before_hiragana => 1);
@@ -186,7 +109,7 @@ ok( $Collator->ne($hiragana, $katakana) );
ok( $Collator->gt($hiragana, $katakana) );
ok( $Collator->ge($hiragana, $katakana) );
-##### 57..62
+##### 41..46
$Collator->change(upper_before_lower => 1);
@@ -197,13 +120,15 @@ ok( $Collator->cmp($hiragana, $katakana), 1);
ok( $Collator->ge($hiragana, $katakana), 1);
ok( $Collator->gt($hiragana, $katakana), 1);
-##### 63..68
+##### 47..48
$Collator->change(katakana_before_hiragana => 0);
ok( $Collator->cmp("abc", "ABC"), 1);
ok( $Collator->cmp($hiragana, $katakana), -1);
+##### 49..52
+
$Collator->change(upper_before_lower => 0);
ok( $Collator->cmp("abc", "ABC"), -1);
@@ -211,7 +136,7 @@ ok( $Collator->le("abc", "ABC") );
ok( $Collator->cmp($hiragana, $katakana), -1);
ok( $Collator->lt($hiragana, $katakana) );
-##### 69..70
+##### 53..54
my $ignoreAE = Unicode::Collate->new(
table => 'keys.txt',
@@ -222,7 +147,7 @@ my $ignoreAE = Unicode::Collate->new(
ok($ignoreAE->eq("element","lament"));
ok($ignoreAE->eq("Perl","ePrl"));
-##### 71
+##### 55
my $onlyABC = Unicode::Collate->new(
table => undef,
@@ -242,7 +167,7 @@ ok(
join(':', qw/ A aB Ab ABA BAC cAc cc / ),
);
-##### 72..75
+##### 56..59
my $undefAE = Unicode::Collate->new(
table => 'keys.txt',
@@ -255,7 +180,7 @@ ok($Collator->lt("edge","fog"));
ok($undefAE ->gt("lake","like"));
ok($Collator->lt("lake","like"));
-##### 76..85
+##### 60..69
# Table is undefined, then no entry is defined.
@@ -270,7 +195,6 @@ ok($undef_table->lt('', 'A'));
ok($undef_table->lt('ABC', 'B'));
# Hangul should be decomposed (even w/o Unicode::Normalize).
-
ok($undef_table->lt("Perl", "\x{AC00}"));
ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}"));
ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}"));
@@ -280,7 +204,6 @@ ok($undef_table->lt("\x{AE00}", "\x{3042}"));
# U+3042: Hiragana A
# Weight for CJK Ideographs is defined, though.
-
ok($undef_table->lt("", "\x{4E00}"));
ok($undef_table->lt("\x{4E8C}","ABC"));
ok($undef_table->lt("\x{4E00}","\x{3042}"));
@@ -289,7 +212,7 @@ ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
# U+4E8C: Ideograph "TWO"
-##### 86..90
+##### 70..74
my $few_entries = Unicode::Collate->new(
entry => <<'ENTRIES',
@@ -320,105 +243,7 @@ ok($few_entries->lt("\x{AE30}", "\x{AC00}"));
ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));
-##### 91..95
-
-my $all_undef_8 = Unicode::Collate->new(
- table => undef,
- normalization => undef,
- overrideCJK => undef,
- overrideHangul => undef,
- UCA_Version => 8,
-);
-
-# All in the Unicode code point order.
-# No hangul decomposition.
-
-ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
-ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
-ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
-ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
-ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
-
-##### 96..100
-
-my $all_undef_9 = Unicode::Collate->new(
- table => undef,
- normalization => undef,
- overrideCJK => undef,
- overrideHangul => undef,
- UCA_Version => 9,
-);
-
-# CJK Ideo. < CJK ext A/B < Others.
-# No hangul decomposition.
-
-ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
-ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
-ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
-ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
-ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
-
-##### 101..105
-
-my $ignoreCJK = Unicode::Collate->new(
- table => undef,
- normalization => undef,
- overrideCJK => sub {()},
- entry => <<'ENTRIES',
-5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
-ENTRIES
-);
-
-# All CJK Unified Ideographs except U+5B57 are ignored.
-
-ok($ignoreCJK->eq("\x{4E00}", ""));
-ok($ignoreCJK->lt("\x{4E00}", "\0"));
-ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
-ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
-ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
-
-##### 106..110
-
-my $ignoreHangul = Unicode::Collate->new(
- table => undef,
- normalization => undef,
- overrideHangul => sub {()},
- entry => <<'ENTRIES',
-AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL
-ENTRIES
-);
-
-# All Hangul Syllables except U+AE00 are ignored.
-
-ok($ignoreHangul->eq("\x{AC00}", ""));
-ok($ignoreHangul->lt("\x{AC00}", "\0"));
-ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
-ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
-ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
-
-##### 111..115
-
-my $overCJK = Unicode::Collate->new(
- table => undef,
- normalization => undef,
- entry => <<'ENTRIES',
-0061 ; [.0101.0020.0002.0061] # latin a
-0041 ; [.0101.0020.0008.0041] # LATIN A
-4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
-ENTRIES
- overrideCJK => sub {
- my $u = 0xFFFF - $_[0]; # reversed
- [$u, 0x20, 0x2, $u];
- },
-);
-
-ok($overCJK->lt("a", "A")); # diff. at level 3.
-ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2.
-ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
-ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
-ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
-
-##### 116..120
+##### 75..79
my $dropArticles = Unicode::Collate->new(
table => "keys.txt",
@@ -436,7 +261,7 @@ ok($dropArticles->lt("the pen", "a pencil"));
ok($Collator->lt("Perl", "The Perl"));
ok($Collator->gt("the pen", "a pencil"));
-##### 121..122
+##### 80..81
my $backLevel1 = Unicode::Collate->new(
table => undef,
@@ -449,7 +274,7 @@ my $backLevel1 = Unicode::Collate->new(
ok($backLevel1->gt("AB", "BA"));
ok($backLevel1->gt("\x{3042}\x{3044}", "\x{3044}\x{3042}"));
-##### 123..130
+##### 82..89
my $backLevel2 = Unicode::Collate->new(
table => "keys.txt",
@@ -471,47 +296,8 @@ ok($backLevel2->lt("\x{4E03}", $katakana));
ok($Collator ->gt("\x{4E00}", $hiragana));
ok($Collator ->gt("\x{4E03}", $katakana));
-##### 131..142
-
-# According to Conformance Test,
-# a L3-ignorable is treated as a completely ignorable.
-
-my $L3ignorable = Unicode::Collate->new(
- alternate => 'Non-ignorable',
- level => 3,
- table => undef,
- normalization => undef,
- entry => <<'ENTRIES',
-0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
-0001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429)
-0591 ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA
-1D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM
-0021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK
-09BE ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA
-09C7 ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E
-09CB ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
-09C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
-1D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE
-1D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK
-1D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA
-1D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK
-ENTRIES
-);
-ok($L3ignorable->lt("\cA", "!"));
-ok($L3ignorable->lt("\x{591}", "!"));
-ok($L3ignorable->eq("\cA", "\x{591}"));
-ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A"));
-ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A"));
-ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A"));
-ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A"));
-ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}"));
-ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}"));
-ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}"));
-ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}"));
-ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}"));
-
-##### 143..149
+##### 90..96
my $O_str = Unicode::Collate->new(
table => "keys.txt",
@@ -545,7 +331,7 @@ ok($Collator->eq("\x{200B}", "\0"));
ok($O_str ->gt("\x{200B}", "\0"));
ok($O_str ->gt("\x{200B}", "A"));
-##### 150..159
+##### 97..107
my %origVer = $Collator->change(UCA_Version => 8);
diff --git a/lib/Unicode/Collate/t/trailwt.t b/lib/Unicode/Collate/t/trailwt.t
index 463252cf1c..e987f8f509 100644
--- a/lib/Unicode/Collate/t/trailwt.t
+++ b/lib/Unicode/Collate/t/trailwt.t
@@ -4,12 +4,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
diff --git a/lib/Unicode/Collate/t/variable.t b/lib/Unicode/Collate/t/variable.t
index 880327a6bd..1a6bd6495b 100644
--- a/lib/Unicode/Collate/t/variable.t
+++ b/lib/Unicode/Collate/t/variable.t
@@ -5,12 +5,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
diff --git a/lib/Unicode/Collate/t/version.t b/lib/Unicode/Collate/t/version.t
index fec144c9d7..17adf539ca 100644
--- a/lib/Unicode/Collate/t/version.t
+++ b/lib/Unicode/Collate/t/version.t
@@ -5,12 +5,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
@@ -26,8 +23,8 @@ ok(1);
#########################
# Fix me when UCA and/or keys.txt is upgraded.
-my $UCA_Version = "11";
-my $Base_Unicode_Version = "4.0";
+my $UCA_Version = "14";
+my $Base_Unicode_Version = "4.1.0";
my $Key_Version = "3.1.1";
ok(Unicode::Collate::UCA_Version, $UCA_Version);
diff --git a/lib/Unicode/Collate/t/view.t b/lib/Unicode/Collate/t/view.t
index 578d4843e5..44963f4189 100644
--- a/lib/Unicode/Collate/t/view.t
+++ b/lib/Unicode/Collate/t/view.t
@@ -5,12 +5,9 @@ BEGIN {
"cannot stringify a Unicode code point\n";
exit 0;
}
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}