summaryrefslogtreecommitdiff
path: root/cpan/Unicode-Collate/t
diff options
context:
space:
mode:
authorChris 'BinGOs' Williams <chris@bingosnet.co.uk>2011-05-19 00:41:54 +0100
committerChris 'BinGOs' Williams <chris@bingosnet.co.uk>2011-06-09 12:17:12 +0100
commit192652842bbfd287493bef71f834c57b6dfd08e7 (patch)
tree7573937045310233970d448bd35a97bdf8ef1558 /cpan/Unicode-Collate/t
parente0a65de5f30351e0d7a451c03e62f80a5793c3b4 (diff)
downloadperl-192652842bbfd287493bef71f834c57b6dfd08e7.tar.gz
Updated Unicode-Collate to CPAN version 0.76
[DELTA] 0.76 Sun May 15 10:06:59 2011 - updated CJK/Pinyin.pm and CJK/Stroke.pm according to CLDR 1.9.1 using type='pinyin' alt='short' and type='stroke' alt='short' respectively. 0.75 Sat May 7 21:07:38 2011 - supported ignore_level2 and rewrite. - Added iglevel2.t and rewrite.t in t. 0.74 Mon Mar 21 19:07:38 2011 - removed sw (Swahili) collation according to CLDR 1.9. (removed files: Collate/Locale/sw.pl and data/sw.txt) - shifted primary weights of letters > Z for some languages. (affected locales: da, fi, fo, kl, nb, nn, sv)
Diffstat (limited to 'cpan/Unicode-Collate/t')
-rw-r--r--cpan/Unicode-Collate/t/cjk_b5.t19
-rw-r--r--cpan/Unicode-Collate/t/cjk_gb.t19
-rw-r--r--cpan/Unicode-Collate/t/cjk_ja.t23
-rw-r--r--cpan/Unicode-Collate/t/cjk_ko.t15
-rw-r--r--cpan/Unicode-Collate/t/cjk_py.t51
-rw-r--r--cpan/Unicode-Collate/t/cjk_st.t25
-rw-r--r--cpan/Unicode-Collate/t/iglevel2.t218
-rw-r--r--cpan/Unicode-Collate/t/loc_da.t2
-rw-r--r--cpan/Unicode-Collate/t/loc_es.t9
-rw-r--r--cpan/Unicode-Collate/t/loc_fi.t19
-rw-r--r--cpan/Unicode-Collate/t/loc_fo.t2
-rw-r--r--cpan/Unicode-Collate/t/loc_kl.t2
-rw-r--r--cpan/Unicode-Collate/t/loc_nb.t2
-rw-r--r--cpan/Unicode-Collate/t/loc_nn.t2
-rw-r--r--cpan/Unicode-Collate/t/loc_sv.t2
-rw-r--r--cpan/Unicode-Collate/t/loc_sw.t94
-rw-r--r--cpan/Unicode-Collate/t/loc_zhpy.t50
-rw-r--r--cpan/Unicode-Collate/t/loc_zhst.t6
-rw-r--r--cpan/Unicode-Collate/t/rewrite.t92
19 files changed, 484 insertions, 168 deletions
diff --git a/cpan/Unicode-Collate/t/cjk_b5.t b/cpan/Unicode-Collate/t/cjk_b5.t
index 7da07ea7df..a973a8455d 100644
--- a/cpan/Unicode-Collate/t/cjk_b5.t
+++ b/cpan/Unicode-Collate/t/cjk_b5.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 26 };
+BEGIN { plan tests => 28 };
use strict;
use warnings;
@@ -30,6 +30,14 @@ my $collator = Unicode::Collate->new(
overrideCJK => \&Unicode::Collate::CJK::Big5::weightBig5
);
+sub hex_sort {
+ my @source = map pack('U', hex $_), split ' ', shift;
+ my @sorted = $collator->sort(@source);
+ return join " ", map sprintf("%04X", unpack 'U', $_), @sorted;
+}
+
+# 1
+
$collator->change(level => 1);
ok($collator->lt("\x{5159}", "\x{515B}"));
@@ -61,3 +69,12 @@ ok($collator->lt("\x{20002}", "\x{20003}"));
ok($collator->lt("\x{20003}", "\x{20004}"));
ok($collator->lt("\x{20004}", "\x{20005}"));
+# 26
+
+ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'),
+ '4E00 4E03 4E5D 4E8C 516B 5341 4E09 4E94 516D 56DB');
+
+ok(hex_sort('4E0C 4E8D 4F5C 5140 554A 5750 57C3 5EA7 963F 9F3D 9F3E 9F44'),
+ '5140 4F5C 5750 963F 57C3 5EA7 554A 9F3E 4E0C 4E8D 9F3D 9F44');
+
+# 28
diff --git a/cpan/Unicode-Collate/t/cjk_gb.t b/cpan/Unicode-Collate/t/cjk_gb.t
index 389a4e8421..f8fed99df2 100644
--- a/cpan/Unicode-Collate/t/cjk_gb.t
+++ b/cpan/Unicode-Collate/t/cjk_gb.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 23 };
+BEGIN { plan tests => 25 };
use strict;
use warnings;
@@ -30,6 +30,14 @@ my $collator = Unicode::Collate->new(
overrideCJK => \&Unicode::Collate::CJK::GB2312::weightGB2312
);
+sub hex_sort {
+ my @source = map pack('U', hex $_), split ' ', shift;
+ my @sorted = $collator->sort(@source);
+ return join " ", map sprintf("%04X", unpack 'U', $_), @sorted;
+}
+
+# 1
+
$collator->change(level => 1);
ok($collator->lt("\x{554A}", "\x{963F}"));
@@ -59,3 +67,12 @@ ok($collator->lt("\x{20002}", "\x{20003}"));
ok($collator->lt("\x{20003}", "\x{20004}"));
ok($collator->lt("\x{20004}", "\x{20005}"));
+# 23
+
+ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'),
+ '516B 4E8C 4E5D 516D 4E03 4E09 5341 56DB 4E94 4E00');
+
+ok(hex_sort('4E0C 4E8D 4F5C 5140 554A 5750 57C3 5EA7 963F 9F3D 9F3E 9F44'),
+ '554A 963F 57C3 4F5C 5750 5EA7 4E8D 4E0C 5140 9F3D 9F3E 9F44');
+
+# 25
diff --git a/cpan/Unicode-Collate/t/cjk_ja.t b/cpan/Unicode-Collate/t/cjk_ja.t
index cc6853670d..cf78472879 100644
--- a/cpan/Unicode-Collate/t/cjk_ja.t
+++ b/cpan/Unicode-Collate/t/cjk_ja.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 31 };
+BEGIN { plan tests => 33 };
use strict;
use warnings;
@@ -30,6 +30,14 @@ my $collator = Unicode::Collate->new(
overrideCJK => \&Unicode::Collate::CJK::JISX0208::weightJISX0208
);
+sub hex_sort {
+ my @source = map pack('U', hex $_), split ' ', shift;
+ my @sorted = $collator->sort(@source);
+ return join " ", map sprintf("%04X", unpack 'U', $_), @sorted;
+}
+
+# 1
+
$collator->change(level => 1);
# first ten kanji
@@ -58,6 +66,13 @@ ok($collator->lt("\x{20002}", "\x{20003}"));
ok($collator->lt("\x{20003}", "\x{20004}"));
ok($collator->lt("\x{20004}", "\x{20005}"));
+# 21
+
+ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'),
+ '4E00 4E5D 4E94 4E09 56DB 4E03 5341 4E8C 516B 516D');
+
+# 22
+
$collator->change(overrideCJK => undef);
ok($collator->lt("\x{4E00}", "\x{4E01}"));
@@ -72,3 +87,9 @@ ok($collator->lt("\x{9F9D}", "\x{9F9E}"));
ok($collator->lt("\x{9F9E}", "\x{9F9F}"));
ok($collator->lt("\x{9F9F}", "\x{9FA0}"));
+# 32
+
+ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'),
+ '4E00 4E03 4E09 4E5D 4E8C 4E94 516B 516D 5341 56DB');
+
+# 33
diff --git a/cpan/Unicode-Collate/t/cjk_ko.t b/cpan/Unicode-Collate/t/cjk_ko.t
index 69f4c7cc4c..8be7106594 100644
--- a/cpan/Unicode-Collate/t/cjk_ko.t
+++ b/cpan/Unicode-Collate/t/cjk_ko.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 51 };
+BEGIN { plan tests => 52 };
use strict;
use warnings;
@@ -29,6 +29,14 @@ my $collator = Unicode::Collate->new(
overrideCJK => \&Unicode::Collate::CJK::Korean::weightKorean
);
+sub hex_sort {
+ my @source = map pack('U', hex $_), split ' ', shift;
+ my @sorted = $collator->sort(@source);
+ return join " ", map sprintf("%04X", unpack 'U', $_), @sorted;
+}
+
+# 1
+
$collator->change(level => 1);
ok($collator->eq("\x{AC00}", "\x{4F3D}"));
@@ -98,3 +106,8 @@ ok($collator->lt("\x{20003}", "\x{20004}"));
ok($collator->lt("\x{20004}", "\x{20005}"));
# 51
+
+ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'),
+ '4E5D 516D 56DB 4E09 5341 4E94 4E8C 4E00 4E03 516B');
+
+# 52
diff --git a/cpan/Unicode-Collate/t/cjk_py.t b/cpan/Unicode-Collate/t/cjk_py.t
index ec800abbc1..04f3eb3b04 100644
--- a/cpan/Unicode-Collate/t/cjk_py.t
+++ b/cpan/Unicode-Collate/t/cjk_py.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 25 };
+BEGIN { plan tests => 19 };
use strict;
use warnings;
@@ -30,29 +30,27 @@ my $collator = Unicode::Collate->new(
overrideCJK => \&Unicode::Collate::CJK::Pinyin::weightPinyin
);
+sub hex_sort {
+ my @source = map pack('U', hex $_), split ' ', shift;
+ my @sorted = $collator->sort(@source);
+ return join " ", map sprintf("%04X", unpack 'U', $_), @sorted;
+}
+
+# 1
+
$collator->change(level => 1);
-ok($collator->lt("\x{5416}", "\x{963F}"));
-ok($collator->lt("\x{963F}", "\x{554A}"));
-ok($collator->lt("\x{554A}", "\x{9515}"));
-ok($collator->lt("\x{9515}", "\x{9312}"));
-ok($collator->lt("\x{9312}", "\x{55C4}"));
-ok($collator->lt("\x{55C4}", "\x{5391}"));
-ok($collator->lt("\x{5391}", "\x{54CE}"));
-ok($collator->lt("\x{54CE}", "\x{54C0}"));
-ok($collator->lt("\x{54C0}", "\x{5509}"));
-ok($collator->lt("\x{5509}", "\x{57C3}"));
-
-ok($collator->lt("\x{57C3}", "\x{4E00}"));
-ok($collator->lt("\x{4E00}", "\x{8331}"));
-
-ok($collator->lt("\x{5EA7}", "\x{888F}"));
-ok($collator->lt("\x{888F}", "\x{505A}"));
-ok($collator->lt("\x{505A}", "\x{8444}"));
-ok($collator->lt("\x{8444}", "\x{84D9}"));
-ok($collator->lt("\x{84D9}", "\x{98F5}"));
-ok($collator->lt("\x{98F5}", "\x{7CF3}"));
-ok($collator->lt("\x{7CF3}", "\x{5497}"));
+ok($collator->lt("\x{963F}", "\x{5730}"));
+ok($collator->lt("\x{5730}", "\x{7ACB}"));
+ok($collator->lt("\x{7ACB}", "\x{4EBA}"));
+ok($collator->lt("\x{4EBA}", "\x{65E5}"));
+ok($collator->lt("\x{65E5}", "\x{4E0A}"));
+ok($collator->lt("\x{4E0A}", "\x{5929}"));
+ok($collator->lt("\x{5929}", "\x{4E0B}"));
+ok($collator->lt("\x{4E0B}", "\x{65BC}"));
+ok($collator->lt("\x{65BC}", "\x{4E2D}"));
+ok($collator->lt("\x{4E2D}", "\x{7AFA}"));
+ok($collator->lt("\x{7AFA}", "\x{5750}"));
# Ext.B
ok($collator->lt("\x{20000}", "\x{20001}"));
@@ -61,3 +59,12 @@ ok($collator->lt("\x{20002}", "\x{20003}"));
ok($collator->lt("\x{20003}", "\x{20004}"));
ok($collator->lt("\x{20004}", "\x{20005}"));
+# 17
+
+ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'),
+ '516B 4E8C 4E5D 516D 4E03 4E09 5341 56DB 4E94 4E00');
+
+ok(hex_sort('4E0C 4E8D 4F5C 5140 554A 5750 57C3 5EA7 963F 9F3D 9F3E 9F44'),
+ '963F 554A 57C3 4E8D 9F3E 4E0C 9F3D 5140 9F44 4F5C 5750 5EA7');
+
+# 19
diff --git a/cpan/Unicode-Collate/t/cjk_st.t b/cpan/Unicode-Collate/t/cjk_st.t
index 4ebbdec2d3..1528dac7c5 100644
--- a/cpan/Unicode-Collate/t/cjk_st.t
+++ b/cpan/Unicode-Collate/t/cjk_st.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 24 };
+BEGIN { plan tests => 26 };
use strict;
use warnings;
@@ -30,6 +30,14 @@ my $collator = Unicode::Collate->new(
overrideCJK => \&Unicode::Collate::CJK::Stroke::weightStroke
);
+sub hex_sort {
+ my @source = map pack('U', hex $_), split ' ', shift;
+ my @sorted = $collator->sort(@source);
+ return join " ", map sprintf("%04X", unpack 'U', $_), @sorted;
+}
+
+# 1
+
$collator->change(level => 1);
ok($collator->lt("\x{4E00}", "\x{4E59}"));
@@ -44,9 +52,9 @@ ok($collator->lt("\x{4EBA}", "\x{513F}"));
ok($collator->lt("\x{513F}", "\x{5165}"));
ok($collator->lt("\x{9E1D}", "\x{7069}"));
-ok($collator->lt("\x{7069}", "\x{7C72}"));
-ok($collator->lt("\x{7C72}", "\x{706A}"));
-ok($collator->lt("\x{706A}", "\x{7229}"));
+ok($collator->lt("\x{7069}", "\x{706A}"));
+ok($collator->lt("\x{706A}", "\x{7C72}"));
+ok($collator->lt("\x{7C72}", "\x{7229}"));
ok($collator->lt("\x{7229}", "\x{9EA4}"));
ok($collator->lt("\x{9EA4}", "\x{9F7E}"));
ok($collator->lt("\x{9F7E}", "\x{9F49}"));
@@ -59,3 +67,12 @@ ok($collator->lt("\x{20002}", "\x{20003}"));
ok($collator->lt("\x{20003}", "\x{20004}"));
ok($collator->lt("\x{20004}", "\x{20005}"));
+# 24
+
+ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'),
+ '4E00 4E03 4E5D 4E8C 516B 5341 4E09 4E94 516D 56DB');
+
+ok(hex_sort('4E0C 4E8D 4F5C 5140 554A 5750 57C3 5EA7 963F 9F3D 9F3E 9F44'),
+ '4E0C 4E8D 5140 4F5C 5750 963F 57C3 5EA7 554A 9F3D 9F3E 9F44');
+
+# 26
diff --git a/cpan/Unicode-Collate/t/iglevel2.t b/cpan/Unicode-Collate/t/iglevel2.t
new file mode 100644
index 0000000000..f9fdbe0658
--- /dev/null
+++ b/cpan/Unicode-Collate/t/iglevel2.t
@@ -0,0 +1,218 @@
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Collate " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+use Test;
+BEGIN { plan tests => 42 };
+
+use strict;
+use warnings;
+use Unicode::Collate;
+
+ok(1);
+
+#########################
+
+my $Collator = Unicode::Collate->new(
+ table => 'keys.txt',
+ normalization => undef,
+ ignore_level2 => 1,
+ entry => << 'ENTRIES',
+1B00 ; [.0000.00FF.0002.1B00] # BALINESE SIGN ULU RICEM
+1B01 ; [.0000.0100.0002.1B01] # BALINESE SIGN ULU CANDRA
+1B02 ; [.0000.0101.0002.1B02] # BALINESE SIGN CECEK
+03C6 ; [.1900.0020.0002.03C6] # GREEK SMALL LETTER PHI
+03D5 ; [.1900.0020.0004.03D5] # GREEK PHI SYMBOL; QQK
+03A6 ; [.1900.0020.0008.03A6] # GREEK CAPITAL LETTER PHI
+ENTRIES
+);
+
+ok($Collator->eq("camel", "came\x{300}l"));
+ok($Collator->eq("camel", "ca\x{300}me\x{301}l"));
+ok($Collator->lt("camel", "Camel"));
+
+# 4
+
+$Collator->change(ignore_level2 => 0);
+
+ok($Collator->lt("camel", "came\x{300}l"));
+ok($Collator->lt("camel", "ca\x{300}me\x{301}l"));
+ok($Collator->lt("camel", "Camel"));
+
+$Collator->change(level => 1);
+
+ok($Collator->eq("camel", "came\x{300}l"));
+ok($Collator->eq("camel", "ca\x{300}me\x{301}l"));
+ok($Collator->eq("camel", "Camel"));
+
+$Collator->change(level => 2);
+
+ok($Collator->lt("camel", "came\x{300}l"));
+ok($Collator->lt("camel", "ca\x{300}me\x{301}l"));
+ok($Collator->eq("camel", "Camel"));
+
+# 13
+
+$Collator->change(ignore_level2 => 1);
+
+ok($Collator->eq("camel", "came\x{300}l"));
+ok($Collator->eq("camel", "ca\x{300}me\x{301}l"));
+ok($Collator->eq("camel", "Camel"));
+
+$Collator->change(level => 3);
+
+ok($Collator->eq("camel", "came\x{300}l"));
+ok($Collator->eq("camel", "ca\x{300}me\x{301}l"));
+ok($Collator->lt("camel", "Camel"));
+
+# secondary: neither 00FF nor 0100 is zero
+ok($Collator->eq("camel", "came\x{1B00}l"));
+ok($Collator->eq("camel", "came\x{1B01}l"));
+ok($Collator->eq("camel", "came\x{1B02}l"));
+
+# primary: 1900 isn't zero
+ok($Collator->lt("\x{03C6}", "\x{03D5}"));
+ok($Collator->lt("\x{03D5}", "\x{03A6}"));
+
+# 24
+
+{
+ my $s;
+ my $txt = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L.";
+
+ $Collator->change(ignore_level2 => 0, level => 1);
+
+ $s = $txt;
+ $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
+ ok($s, "=Camel= donkey zebra =came\x{301}l= =CAMEL= horse =cAm\0E\0L=.");
+
+ $Collator->change(level => 2);
+
+ $s = $txt;
+ $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
+ ok($s, "=Camel= donkey zebra came\x{301}l =CAMEL= horse =cAm\0E\0L=.");
+
+ $Collator->change(level => 3);
+
+ $s = $txt;
+ $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
+ ok($s, "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L.");
+
+ $Collator->change(ignore_level2 => 1);
+
+ $s = $txt;
+ $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
+ ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse cAm\0E\0L.");
+
+ $Collator->change(level => 2);
+ $s = $txt;
+ $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
+ ok($s, "=Camel= donkey zebra =came\x{301}l= =CAMEL= horse =cAm\0E\0L=.");
+
+ $Collator->change(level => 1);
+ $s = $txt;
+ $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
+ ok($s, "=Camel= donkey zebra =came\x{301}l= =CAMEL= horse =cAm\0E\0L=.");
+
+}
+
+# 30
+
+{
+ my $c = Unicode::Collate->new(
+ table => 'keys.txt', normalization => undef, level => 1,
+ );
+ my $str = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l...";
+ $c->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
+ ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> <b>CAMEL</b> horse <b>cam\0e\0l</b>...");
+}
+
+{
+ my $c = Unicode::Collate->new(
+ table => 'keys.txt', normalization => undef, level => 2,
+ );
+ my $str = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l...";
+ $c->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
+ ok($str, "<b>Camel</b> donkey zebra came\x{301}l <b>CAMEL</b> horse <b>cam\0e\0l</b>...");
+}
+
+{
+ my $c = Unicode::Collate->new(
+ table => 'keys.txt', normalization => undef, ignore_level2 => 1,
+ );
+ my $str = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l...";
+ $c->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
+ ok($str, "Camel donkey zebra <b>came\x{301}l</b> CAMEL horse <b>cam\0e\0l</b>...");
+}
+
+{
+ my $c = Unicode::Collate->new(
+ table => 'keys.txt', normalization => undef, level => 3,
+ );
+ my $str = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l...";
+ $c->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
+ ok($str, "Camel donkey zebra came\x{301}l CAMEL horse <b>cam\0e\0l</b>...");
+}
+
+# 34
+
+{
+ my $str;
+ my $camel = "camel Camel came\x{301}l c-a-m-e-l cam\0e\0l";
+
+ $Collator->change(ignore_level2 => 0);
+
+ $Collator->change(level => 1);
+ $str = $camel;
+ $Collator->gsubst($str, "camel", sub { "=$_[0]=" });
+ ok($str, "=camel= =Camel= =came\x{301}l= =c-a-m-e-l= =cam\0e\0l=");
+
+ $Collator->change(level => 2);
+ $str = $camel;
+ $Collator->gsubst($str, "camel", sub { "=$_[0]=" });
+ ok($str, "=camel= =Camel= came\x{301}l =c-a-m-e-l= =cam\0e\0l=");
+
+ $Collator->change(level => 3);
+ $str = $camel;
+ $Collator->gsubst($str, "camel", sub { "=$_[0]=" });
+ ok($str, "=camel= Camel came\x{301}l =c-a-m-e-l= =cam\0e\0l=");
+
+ $Collator->change(level => 4);
+ $str = $camel;
+ $Collator->gsubst($str, "camel", sub { "=$_[0]=" });
+ ok($str, "=camel= Camel came\x{301}l c-a-m-e-l =cam\0e\0l=");
+
+ $Collator->change(ignore_level2 => 1);
+
+ $Collator->change(level => 1);
+ $str = $camel;
+ $Collator->gsubst($str, "camel", sub { "=$_[0]=" });
+ ok($str, "=camel= =Camel= =came\x{301}l= =c-a-m-e-l= =cam\0e\0l=");
+
+ $Collator->change(level => 2);
+ $str = $camel;
+ $Collator->gsubst($str, "camel", sub { "=$_[0]=" });
+ ok($str, "=camel= =Camel= =came\x{301}l= =c-a-m-e-l= =cam\0e\0l=");
+
+ $Collator->change(level => 3);
+ $str = $camel;
+ $Collator->gsubst($str, "camel", sub { "=$_[0]=" });
+ ok($str, "=camel= Camel =came\x{301}l= =c-a-m-e-l= =cam\0e\0l=");
+
+ $Collator->change(level => 4);
+ $str = $camel;
+ $Collator->gsubst($str, "camel", sub { "=$_[0]=" });
+ ok($str, "=camel= Camel =came\x{301}l= c-a-m-e-l =cam\0e\0l=");
+}
+
+# 42
+
diff --git a/cpan/Unicode-Collate/t/loc_da.t b/cpan/Unicode-Collate/t/loc_da.t
index e21afec813..f8ca07dbd2 100644
--- a/cpan/Unicode-Collate/t/loc_da.t
+++ b/cpan/Unicode-Collate/t/loc_da.t
@@ -49,7 +49,7 @@ $objDa->change(level => 1);
ok($objDa->lt('z', $ae));
ok($objDa->lt($ae, $ostk));
ok($objDa->lt($ostk, $arng));
-ok($objDa->lt($arng, "\x{292}"));
+ok($objDa->lt($arng, "\x{1C0}"));
# 6
diff --git a/cpan/Unicode-Collate/t/loc_es.t b/cpan/Unicode-Collate/t/loc_es.t
index 2491b38a30..ae7d054a78 100644
--- a/cpan/Unicode-Collate/t/loc_es.t
+++ b/cpan/Unicode-Collate/t/loc_es.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 26 };
+BEGIN { plan tests => 28 };
use strict;
use warnings;
@@ -69,3 +69,10 @@ ok($objEs->eq("n\x{303}", pack('U', 0xF1)));
ok($objEs->eq("N\x{303}", pack('U', 0xD1)));
# 26
+
+$objEs->change(level => 2, ignore_level2 => 1);
+
+ok($objEs->lt("n", "n\x{303}"));
+ok($objEs->eq("a", "a\x{303}"));
+
+# 28
diff --git a/cpan/Unicode-Collate/t/loc_fi.t b/cpan/Unicode-Collate/t/loc_fi.t
index b9bedc3a96..f555c19bef 100644
--- a/cpan/Unicode-Collate/t/loc_fi.t
+++ b/cpan/Unicode-Collate/t/loc_fi.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 82 };
+BEGIN { plan tests => 83 };
use strict;
use warnings;
@@ -42,11 +42,12 @@ ok($objFi->getlocale, 'fi');
$objFi->change(level => 1);
-ok($objFi->lt('z', $arng));
+ok($objFi->lt('z', $arng));
ok($objFi->lt($arng, $auml));
ok($objFi->lt($auml, $ouml));
+ok($objFi->lt($ouml, "\x{1C0}"));
-# 5
+# 6
ok($objFi->eq("d\x{335}", "\x{111}"));
ok($objFi->eq("g\x{335}", "\x{1E5}"));
@@ -58,7 +59,7 @@ ok($objFi->eq('y', $uuml));
ok($objFi->eq($auml, $ae));
ok($objFi->eq($ouml, $ostk));
-# 14
+# 15
$objFi->change(level => 2);
@@ -72,7 +73,7 @@ ok($objFi->lt('y', $uuml));
ok($objFi->lt($auml, $ae));
ok($objFi->lt($ouml, $ostk));
-# 23
+# 24
ok($objFi->eq("\x{111}", "\x{110}"));
ok($objFi->eq("\x{1E5}", "\x{1E4}"));
@@ -88,7 +89,7 @@ ok($objFi->eq($AE, "\x{1D2D}"));
ok($objFi->eq($ouml, $Ouml));
ok($objFi->eq($ostk, $Ostk));
-# 36
+# 37
$objFi->change(level => 3);
@@ -106,7 +107,7 @@ ok($objFi->lt($AE, "\x{1D2D}"));
ok($objFi->lt($ouml, $Ouml));
ok($objFi->lt($ostk, $Ostk));
-# 49
+# 50
ok($objFi->eq("u\x{308}", $uuml));
ok($objFi->eq("U\x{308}", $Uuml));
@@ -121,7 +122,7 @@ ok($objFi->eq("O\x{308}", $Ouml));
ok($objFi->eq("o\x{338}", $ostk));
ok($objFi->eq("O\x{338}", $Ostk));
-# 61
+# 62
ok($objFi->eq("u\x{308}\x{300}", "\x{1DC}"));
ok($objFi->eq("U\x{308}\x{300}", "\x{1DB}"));
@@ -145,4 +146,4 @@ ok($objFi->eq("O\x{308}\x{304}", "\x{22A}"));
ok($objFi->eq("o\x{338}\x{301}", "\x{1FF}"));
ok($objFi->eq("O\x{338}\x{301}", "\x{1FE}"));
-# 82
+# 83
diff --git a/cpan/Unicode-Collate/t/loc_fo.t b/cpan/Unicode-Collate/t/loc_fo.t
index 6678b942d0..0a75f71d5d 100644
--- a/cpan/Unicode-Collate/t/loc_fo.t
+++ b/cpan/Unicode-Collate/t/loc_fo.t
@@ -49,7 +49,7 @@ $objFo->change(level => 1);
ok($objFo->lt('z', $ae));
ok($objFo->lt($ae, $ostk));
ok($objFo->lt($ostk, $arng));
-ok($objFo->lt($arng, "\x{292}"));
+ok($objFo->lt($arng, "\x{1C0}"));
# 6
diff --git a/cpan/Unicode-Collate/t/loc_kl.t b/cpan/Unicode-Collate/t/loc_kl.t
index b123ed7d42..a8a07d3e02 100644
--- a/cpan/Unicode-Collate/t/loc_kl.t
+++ b/cpan/Unicode-Collate/t/loc_kl.t
@@ -49,7 +49,7 @@ $objKl->change(level => 1);
ok($objKl->lt('z', $ae));
ok($objKl->lt($ae, $ostk));
ok($objKl->lt($ostk, $arng));
-ok($objKl->lt($arng, "\x{292}"));
+ok($objKl->lt($arng, "\x{1C0}"));
# 6
diff --git a/cpan/Unicode-Collate/t/loc_nb.t b/cpan/Unicode-Collate/t/loc_nb.t
index 8ca61cbad4..4de166a644 100644
--- a/cpan/Unicode-Collate/t/loc_nb.t
+++ b/cpan/Unicode-Collate/t/loc_nb.t
@@ -49,7 +49,7 @@ $objNb->change(level => 1);
ok($objNb->lt('z', $ae));
ok($objNb->lt($ae, $ostk));
ok($objNb->lt($ostk, $arng));
-ok($objNb->lt($arng, "\x{292}"));
+ok($objNb->lt($arng, "\x{1C0}"));
# 6
diff --git a/cpan/Unicode-Collate/t/loc_nn.t b/cpan/Unicode-Collate/t/loc_nn.t
index 50503a32af..6af4447d93 100644
--- a/cpan/Unicode-Collate/t/loc_nn.t
+++ b/cpan/Unicode-Collate/t/loc_nn.t
@@ -49,7 +49,7 @@ $objNn->change(level => 1);
ok($objNn->lt('z', $ae));
ok($objNn->lt($ae, $ostk));
ok($objNn->lt($ostk, $arng));
-ok($objNn->lt($arng, "\x{292}"));
+ok($objNn->lt($arng, "\x{1C0}"));
# 6
diff --git a/cpan/Unicode-Collate/t/loc_sv.t b/cpan/Unicode-Collate/t/loc_sv.t
index c905e14717..c28632ea06 100644
--- a/cpan/Unicode-Collate/t/loc_sv.t
+++ b/cpan/Unicode-Collate/t/loc_sv.t
@@ -51,7 +51,7 @@ $objSv->change(level => 1);
ok($objSv->lt('z', $arng));
ok($objSv->lt($arng, $auml));
ok($objSv->lt($auml, $ouml));
-ok($objSv->lt($ouml, "\x{292}"));
+ok($objSv->lt($ouml, "\x{1C0}"));
# 6
diff --git a/cpan/Unicode-Collate/t/loc_sw.t b/cpan/Unicode-Collate/t/loc_sw.t
index 1805c1b497..9b1e78685c 100644
--- a/cpan/Unicode-Collate/t/loc_sw.t
+++ b/cpan/Unicode-Collate/t/loc_sw.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 72 };
+BEGIN { plan tests => 17 };
use strict;
use warnings;
@@ -25,94 +25,24 @@ ok(1);
my $objSw = Unicode::Collate::Locale->
new(locale => 'SW', normalization => undef);
-ok($objSw->getlocale, 'sw');
+ok($objSw->getlocale, "default"); # no tailoring since 0.74
$objSw->change(level => 1);
-ok($objSw->lt("b", "ch"));
-ok($objSw->lt("bz","ch"));
-ok($objSw->gt("c", "ch"));
+ok($objSw->lt("c", "ch"));
+ok($objSw->gt("cz","ch"));
ok($objSw->lt("d", "dh"));
-ok($objSw->lt("dz","dh"));
-ok($objSw->gt("e", "dh"));
+ok($objSw->gt("dz","dh"));
ok($objSw->lt("g", "gh"));
-ok($objSw->lt("gz","gh"));
-ok($objSw->gt("h", "gh"));
+ok($objSw->gt("gz","gh"));
ok($objSw->lt("k", "kh"));
-ok($objSw->lt("kz","kh"));
-ok($objSw->gt("l", "kh"));
+ok($objSw->gt("kz","kh"));
ok($objSw->lt("n", "ng'"));
-ok($objSw->lt("nz","ng'"));
-ok($objSw->lt("ng'","ny"));
-ok($objSw->gt("o", "ny"));
+ok($objSw->gt("ny","ng'"));
+ok($objSw->gt("nz","ny"));
ok($objSw->lt("s", "sh"));
-ok($objSw->lt("sz","sh"));
-ok($objSw->gt("t", "sh"));
+ok($objSw->gt("sz","sh"));
ok($objSw->lt("t", "th"));
-ok($objSw->lt("tz","th"));
-ok($objSw->gt("u", "th"));
+ok($objSw->gt("tz","th"));
-# 24
-
-$objSw->change(level => 2);
-
-ok($objSw->eq("ch", "Ch"));
-ok($objSw->eq("Ch", "CH"));
-ok($objSw->eq("dh", "Dh"));
-ok($objSw->eq("Dh", "DH"));
-ok($objSw->eq("gh", "Gh"));
-ok($objSw->eq("Gh", "GH"));
-ok($objSw->eq("kh", "Kh"));
-ok($objSw->eq("Kh", "KH"));
-ok($objSw->eq("ng'","Ng'"));
-ok($objSw->eq("Ng'","NG'"));
-ok($objSw->eq("ny", "Ny"));
-ok($objSw->eq("Ny", "NY"));
-ok($objSw->eq("sh", "Sh"));
-ok($objSw->eq("Sh", "SH"));
-ok($objSw->eq("th", "Th"));
-ok($objSw->eq("Th", "TH"));
-
-# 40
-
-$objSw->change(level => 3);
-
-ok($objSw->lt("ch", "Ch"));
-ok($objSw->lt("Ch", "CH"));
-ok($objSw->lt("dh", "Dh"));
-ok($objSw->lt("Dh", "DH"));
-ok($objSw->lt("gh", "Gh"));
-ok($objSw->lt("Gh", "GH"));
-ok($objSw->lt("kh", "Kh"));
-ok($objSw->lt("Kh", "KH"));
-ok($objSw->lt("ng'","Ng'"));
-ok($objSw->lt("Ng'","NG'"));
-ok($objSw->lt("ny", "Ny"));
-ok($objSw->lt("Ny", "NY"));
-ok($objSw->lt("sh", "Sh"));
-ok($objSw->lt("Sh", "SH"));
-ok($objSw->lt("th", "Th"));
-ok($objSw->lt("Th", "TH"));
-
-# 56
-
-$objSw->change(upper_before_lower => 1);
-
-ok($objSw->gt("ch", "Ch"));
-ok($objSw->gt("Ch", "CH"));
-ok($objSw->gt("dh", "Dh"));
-ok($objSw->gt("Dh", "DH"));
-ok($objSw->gt("gh", "Gh"));
-ok($objSw->gt("Gh", "GH"));
-ok($objSw->gt("kh", "Kh"));
-ok($objSw->gt("Kh", "KH"));
-ok($objSw->gt("ng'","Ng'"));
-ok($objSw->gt("Ng'","NG'"));
-ok($objSw->gt("ny", "Ny"));
-ok($objSw->gt("Ny", "NY"));
-ok($objSw->gt("sh", "Sh"));
-ok($objSw->gt("Sh", "SH"));
-ok($objSw->gt("th", "Th"));
-ok($objSw->gt("Th", "TH"));
-
-# 72
+# 17
diff --git a/cpan/Unicode-Collate/t/loc_zhpy.t b/cpan/Unicode-Collate/t/loc_zhpy.t
index 193d158e7f..1d376ec996 100644
--- a/cpan/Unicode-Collate/t/loc_zhpy.t
+++ b/cpan/Unicode-Collate/t/loc_zhpy.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 302 };
+BEGIN { plan tests => 283 };
use strict;
use warnings;
@@ -323,40 +323,16 @@ ok($objZhP->eq("E\x{302}\x{323}", "\x{1EC6}"));
$objZhP->change(level => 1);
-ok($objZhP->lt("\x{A000}", "\x{3105}"));
+ok($objZhP->lt("\x{963F}", "\x{5730}"));
+ok($objZhP->lt("\x{5730}", "\x{7ACB}"));
+ok($objZhP->lt("\x{7ACB}", "\x{4EBA}"));
+ok($objZhP->lt("\x{4EBA}", "\x{65E5}"));
+ok($objZhP->lt("\x{65E5}", "\x{4E0A}"));
+ok($objZhP->lt("\x{4E0A}", "\x{5929}"));
+ok($objZhP->lt("\x{5929}", "\x{4E0B}"));
+ok($objZhP->lt("\x{4E0B}", "\x{65BC}"));
+ok($objZhP->lt("\x{65BC}", "\x{4E2D}"));
+ok($objZhP->lt("\x{4E2D}", "\x{7AFA}"));
+ok($objZhP->lt("\x{7AFA}", "\x{5750}"));
-ok($objZhP->lt("\x{3105}", "\x{3106}"));
-ok($objZhP->lt("\x{3106}", "\x{3128}"));
-ok($objZhP->lt("\x{3128}", "\x{3129}"));
-ok($objZhP->lt("\x{3129}", "\x{5416}"));
-ok($objZhP->lt("\x{5416}", "\x{963F}"));
-ok($objZhP->lt("\x{963F}", "\x{554A}"));
-ok($objZhP->lt("\x{554A}", "\x{9515}"));
-ok($objZhP->lt("\x{9515}", "\x{9312}"));
-ok($objZhP->lt("\x{9312}", "\x{55C4}"));
-ok($objZhP->lt("\x{55C4}", "\x{5391}"));
-ok($objZhP->lt("\x{5391}", "\x{54CE}"));
-ok($objZhP->lt("\x{54CE}", "\x{54C0}"));
-ok($objZhP->lt("\x{54C0}", "\x{5509}"));
-ok($objZhP->lt("\x{5509}", "\x{57C3}"));
-
-ok($objZhP->lt("\x{6FED}", "\x{FA1F}"));
-ok($objZhP->lt("\x{FA1F}", "\x{85F9}"));
-
-ok($objZhP->lt("\x{57C3}", "\x{4E00}"));
-ok($objZhP->lt("\x{4E00}", "\x{8331}"));
-
-ok($objZhP->lt("\x{8331}", "\x{682A}"));
-ok($objZhP->lt("\x{682A}", "\x{3231}"));
-ok($objZhP->lt("\x{3231}", "\x{73E0}"));
-ok($objZhP->lt("\x{73E0}", "\x{8BF8}"));
-
-ok($objZhP->lt("\x{5EA7}", "\x{888F}"));
-ok($objZhP->lt("\x{888F}", "\x{505A}"));
-ok($objZhP->lt("\x{505A}", "\x{8444}"));
-ok($objZhP->lt("\x{8444}", "\x{84D9}"));
-ok($objZhP->lt("\x{84D9}", "\x{98F5}"));
-ok($objZhP->lt("\x{98F5}", "\x{7CF3}"));
-ok($objZhP->lt("\x{7CF3}", "\x{5497}"));
-
-# 302
+# 283
diff --git a/cpan/Unicode-Collate/t/loc_zhst.t b/cpan/Unicode-Collate/t/loc_zhst.t
index 8f60ed69f0..87f7976f8b 100644
--- a/cpan/Unicode-Collate/t/loc_zhst.t
+++ b/cpan/Unicode-Collate/t/loc_zhst.t
@@ -337,9 +337,9 @@ ok($objZhS->lt("\x{4EBA}", "\x{513F}"));
ok($objZhS->lt("\x{513F}", "\x{5165}"));
ok($objZhS->lt("\x{9E1D}", "\x{7069}"));
-ok($objZhS->lt("\x{7069}", "\x{7C72}"));
-ok($objZhS->lt("\x{7C72}", "\x{706A}"));
-ok($objZhS->lt("\x{706A}", "\x{7229}"));
+ok($objZhS->lt("\x{7069}", "\x{706A}"));
+ok($objZhS->lt("\x{706A}", "\x{7C72}"));
+ok($objZhS->lt("\x{7C72}", "\x{7229}"));
ok($objZhS->lt("\x{7229}", "\x{9EA4}"));
ok($objZhS->lt("\x{9EA4}", "\x{9F7E}"));
ok($objZhS->lt("\x{9F7E}", "\x{9F49}"));
diff --git a/cpan/Unicode-Collate/t/rewrite.t b/cpan/Unicode-Collate/t/rewrite.t
new file mode 100644
index 0000000000..004a0c9647
--- /dev/null
+++ b/cpan/Unicode-Collate/t/rewrite.t
@@ -0,0 +1,92 @@
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Collate " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+use Test;
+BEGIN { plan tests => 17 };
+
+use strict;
+use warnings;
+use Unicode::Collate;
+
+ok(1);
+
+#########################
+
+my $code = sub {
+ my $line = shift;
+ $line =~ s/\[\.0000\..{4}\..{4}\./[.0000.0000.0000./g;
+ return $line;
+ };
+
+#####
+
+my $Collator = Unicode::Collate->new(
+ table => 'keys.txt', normalization => undef, rewrite => $code,
+);
+
+ok($Collator->eq("camel", "came\x{300}l"));
+ok($Collator->eq("camel", "ca\x{300}me\x{301}l"));
+ok($Collator->lt("camel", "Camel"));
+{
+ my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l.";
+ $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
+ ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=.");
+}
+
+# 5
+
+my $rewriteDUCET = Unicode::Collate->new(
+ normalization => undef, rewrite => $code,
+);
+
+ok($rewriteDUCET->eq("camel", "came\x{300}l"));
+ok($rewriteDUCET->eq("camel", "ca\x{300}me\x{301}l"));
+ok($rewriteDUCET->lt("camel", "Camel"));
+{
+ my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l.";
+ $rewriteDUCET->gsubst($s, "camel", sub { "=$_[0]=" });
+ ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=.");
+}
+
+# 9
+
+my $undef_hira = Unicode::Collate->new(
+ table => 'keys.txt',
+ normalization => undef,
+ level => 1,
+ rewrite => sub {
+ my $line = shift;
+ return '' if $line =~ /HIRAGANA/;
+ return $line;
+ },
+);
+
+my $hiragana = "\x{3042}\x{3044}";
+my $katakana = "\x{30A2}\x{30A4}";
+my $cjkkanji = "\x{4E00}";
+
+# HIRAGANA are undefined via rewrite
+# So they are after CJK Unified Ideographs.
+
+ok($undef_hira->lt("abc", "perl"));
+ok($undef_hira->lt("", "ABC"));
+ok($undef_hira->lt($katakana, $hiragana));
+ok($undef_hira->lt($katakana, $cjkkanji));
+ok($undef_hira->lt($cjkkanji, $hiragana));
+
+$Collator->change(level => 1);
+ok($Collator->eq($katakana, $hiragana));
+ok($Collator->lt($katakana, $cjkkanji));
+ok($Collator->gt($cjkkanji, $hiragana));
+
+# 17