diff options
author | Chris 'BinGOs' Williams <chris@bingosnet.co.uk> | 2011-05-19 00:41:54 +0100 |
---|---|---|
committer | Chris 'BinGOs' Williams <chris@bingosnet.co.uk> | 2011-06-09 12:17:12 +0100 |
commit | 192652842bbfd287493bef71f834c57b6dfd08e7 (patch) | |
tree | 7573937045310233970d448bd35a97bdf8ef1558 /cpan/Unicode-Collate/t | |
parent | e0a65de5f30351e0d7a451c03e62f80a5793c3b4 (diff) | |
download | perl-192652842bbfd287493bef71f834c57b6dfd08e7.tar.gz |
Updated Unicode-Collate to CPAN version 0.76
[DELTA]
0.76 Sun May 15 10:06:59 2011
- updated CJK/Pinyin.pm and CJK/Stroke.pm according to CLDR 1.9.1 using
type='pinyin' alt='short' and type='stroke' alt='short' respectively.
0.75 Sat May 7 21:07:38 2011
- supported ignore_level2 and rewrite.
- Added iglevel2.t and rewrite.t in t.
0.74 Mon Mar 21 19:07:38 2011
- removed sw (Swahili) collation according to CLDR 1.9.
(removed files: Collate/Locale/sw.pl and data/sw.txt)
- shifted primary weights of letters > Z for some languages.
(affected locales: da, fi, fo, kl, nb, nn, sv)
Diffstat (limited to 'cpan/Unicode-Collate/t')
-rw-r--r-- | cpan/Unicode-Collate/t/cjk_b5.t | 19 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/cjk_gb.t | 19 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/cjk_ja.t | 23 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/cjk_ko.t | 15 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/cjk_py.t | 51 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/cjk_st.t | 25 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/iglevel2.t | 218 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_da.t | 2 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_es.t | 9 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_fi.t | 19 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_fo.t | 2 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_kl.t | 2 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_nb.t | 2 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_nn.t | 2 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_sv.t | 2 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_sw.t | 94 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_zhpy.t | 50 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_zhst.t | 6 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/rewrite.t | 92 |
19 files changed, 484 insertions, 168 deletions
diff --git a/cpan/Unicode-Collate/t/cjk_b5.t b/cpan/Unicode-Collate/t/cjk_b5.t index 7da07ea7df..a973a8455d 100644 --- a/cpan/Unicode-Collate/t/cjk_b5.t +++ b/cpan/Unicode-Collate/t/cjk_b5.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 26 }; +BEGIN { plan tests => 28 }; use strict; use warnings; @@ -30,6 +30,14 @@ my $collator = Unicode::Collate->new( overrideCJK => \&Unicode::Collate::CJK::Big5::weightBig5 ); +sub hex_sort { + my @source = map pack('U', hex $_), split ' ', shift; + my @sorted = $collator->sort(@source); + return join " ", map sprintf("%04X", unpack 'U', $_), @sorted; +} + +# 1 + $collator->change(level => 1); ok($collator->lt("\x{5159}", "\x{515B}")); @@ -61,3 +69,12 @@ ok($collator->lt("\x{20002}", "\x{20003}")); ok($collator->lt("\x{20003}", "\x{20004}")); ok($collator->lt("\x{20004}", "\x{20005}")); +# 26 + +ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'), + '4E00 4E03 4E5D 4E8C 516B 5341 4E09 4E94 516D 56DB'); + +ok(hex_sort('4E0C 4E8D 4F5C 5140 554A 5750 57C3 5EA7 963F 9F3D 9F3E 9F44'), + '5140 4F5C 5750 963F 57C3 5EA7 554A 9F3E 4E0C 4E8D 9F3D 9F44'); + +# 28 diff --git a/cpan/Unicode-Collate/t/cjk_gb.t b/cpan/Unicode-Collate/t/cjk_gb.t index 389a4e8421..f8fed99df2 100644 --- a/cpan/Unicode-Collate/t/cjk_gb.t +++ b/cpan/Unicode-Collate/t/cjk_gb.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 23 }; +BEGIN { plan tests => 25 }; use strict; use warnings; @@ -30,6 +30,14 @@ my $collator = Unicode::Collate->new( overrideCJK => \&Unicode::Collate::CJK::GB2312::weightGB2312 ); +sub hex_sort { + my @source = map pack('U', hex $_), split ' ', shift; + my @sorted = $collator->sort(@source); + return join " ", map sprintf("%04X", unpack 'U', $_), @sorted; +} + +# 1 + $collator->change(level => 1); ok($collator->lt("\x{554A}", "\x{963F}")); @@ -59,3 +67,12 @@ ok($collator->lt("\x{20002}", "\x{20003}")); ok($collator->lt("\x{20003}", "\x{20004}")); ok($collator->lt("\x{20004}", "\x{20005}")); +# 23 + +ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'), + '516B 4E8C 4E5D 516D 4E03 4E09 5341 56DB 4E94 4E00'); + +ok(hex_sort('4E0C 4E8D 4F5C 5140 554A 5750 57C3 5EA7 963F 9F3D 9F3E 9F44'), + '554A 963F 57C3 4F5C 5750 5EA7 4E8D 4E0C 5140 9F3D 9F3E 9F44'); + +# 25 diff --git a/cpan/Unicode-Collate/t/cjk_ja.t b/cpan/Unicode-Collate/t/cjk_ja.t index cc6853670d..cf78472879 100644 --- a/cpan/Unicode-Collate/t/cjk_ja.t +++ b/cpan/Unicode-Collate/t/cjk_ja.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 31 }; +BEGIN { plan tests => 33 }; use strict; use warnings; @@ -30,6 +30,14 @@ my $collator = Unicode::Collate->new( overrideCJK => \&Unicode::Collate::CJK::JISX0208::weightJISX0208 ); +sub hex_sort { + my @source = map pack('U', hex $_), split ' ', shift; + my @sorted = $collator->sort(@source); + return join " ", map sprintf("%04X", unpack 'U', $_), @sorted; +} + +# 1 + $collator->change(level => 1); # first ten kanji @@ -58,6 +66,13 @@ ok($collator->lt("\x{20002}", "\x{20003}")); ok($collator->lt("\x{20003}", "\x{20004}")); ok($collator->lt("\x{20004}", "\x{20005}")); +# 21 + +ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'), + '4E00 4E5D 4E94 4E09 56DB 4E03 5341 4E8C 516B 516D'); + +# 22 + $collator->change(overrideCJK => undef); ok($collator->lt("\x{4E00}", "\x{4E01}")); @@ -72,3 +87,9 @@ ok($collator->lt("\x{9F9D}", "\x{9F9E}")); ok($collator->lt("\x{9F9E}", "\x{9F9F}")); ok($collator->lt("\x{9F9F}", "\x{9FA0}")); +# 32 + +ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'), + '4E00 4E03 4E09 4E5D 4E8C 4E94 516B 516D 5341 56DB'); + +# 33 diff --git a/cpan/Unicode-Collate/t/cjk_ko.t b/cpan/Unicode-Collate/t/cjk_ko.t index 69f4c7cc4c..8be7106594 100644 --- a/cpan/Unicode-Collate/t/cjk_ko.t +++ b/cpan/Unicode-Collate/t/cjk_ko.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 51 }; +BEGIN { plan tests => 52 }; use strict; use warnings; @@ -29,6 +29,14 @@ my $collator = Unicode::Collate->new( overrideCJK => \&Unicode::Collate::CJK::Korean::weightKorean ); +sub hex_sort { + my @source = map pack('U', hex $_), split ' ', shift; + my @sorted = $collator->sort(@source); + return join " ", map sprintf("%04X", unpack 'U', $_), @sorted; +} + +# 1 + $collator->change(level => 1); ok($collator->eq("\x{AC00}", "\x{4F3D}")); @@ -98,3 +106,8 @@ ok($collator->lt("\x{20003}", "\x{20004}")); ok($collator->lt("\x{20004}", "\x{20005}")); # 51 + +ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'), + '4E5D 516D 56DB 4E09 5341 4E94 4E8C 4E00 4E03 516B'); + +# 52 diff --git a/cpan/Unicode-Collate/t/cjk_py.t b/cpan/Unicode-Collate/t/cjk_py.t index ec800abbc1..04f3eb3b04 100644 --- a/cpan/Unicode-Collate/t/cjk_py.t +++ b/cpan/Unicode-Collate/t/cjk_py.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 25 }; +BEGIN { plan tests => 19 }; use strict; use warnings; @@ -30,29 +30,27 @@ my $collator = Unicode::Collate->new( overrideCJK => \&Unicode::Collate::CJK::Pinyin::weightPinyin ); +sub hex_sort { + my @source = map pack('U', hex $_), split ' ', shift; + my @sorted = $collator->sort(@source); + return join " ", map sprintf("%04X", unpack 'U', $_), @sorted; +} + +# 1 + $collator->change(level => 1); -ok($collator->lt("\x{5416}", "\x{963F}")); -ok($collator->lt("\x{963F}", "\x{554A}")); -ok($collator->lt("\x{554A}", "\x{9515}")); -ok($collator->lt("\x{9515}", "\x{9312}")); -ok($collator->lt("\x{9312}", "\x{55C4}")); -ok($collator->lt("\x{55C4}", "\x{5391}")); -ok($collator->lt("\x{5391}", "\x{54CE}")); -ok($collator->lt("\x{54CE}", "\x{54C0}")); -ok($collator->lt("\x{54C0}", "\x{5509}")); -ok($collator->lt("\x{5509}", "\x{57C3}")); - -ok($collator->lt("\x{57C3}", "\x{4E00}")); -ok($collator->lt("\x{4E00}", "\x{8331}")); - -ok($collator->lt("\x{5EA7}", "\x{888F}")); -ok($collator->lt("\x{888F}", "\x{505A}")); -ok($collator->lt("\x{505A}", "\x{8444}")); -ok($collator->lt("\x{8444}", "\x{84D9}")); -ok($collator->lt("\x{84D9}", "\x{98F5}")); -ok($collator->lt("\x{98F5}", "\x{7CF3}")); -ok($collator->lt("\x{7CF3}", "\x{5497}")); +ok($collator->lt("\x{963F}", "\x{5730}")); +ok($collator->lt("\x{5730}", "\x{7ACB}")); +ok($collator->lt("\x{7ACB}", "\x{4EBA}")); +ok($collator->lt("\x{4EBA}", "\x{65E5}")); +ok($collator->lt("\x{65E5}", "\x{4E0A}")); +ok($collator->lt("\x{4E0A}", "\x{5929}")); +ok($collator->lt("\x{5929}", "\x{4E0B}")); +ok($collator->lt("\x{4E0B}", "\x{65BC}")); +ok($collator->lt("\x{65BC}", "\x{4E2D}")); +ok($collator->lt("\x{4E2D}", "\x{7AFA}")); +ok($collator->lt("\x{7AFA}", "\x{5750}")); # Ext.B ok($collator->lt("\x{20000}", "\x{20001}")); @@ -61,3 +59,12 @@ ok($collator->lt("\x{20002}", "\x{20003}")); ok($collator->lt("\x{20003}", "\x{20004}")); ok($collator->lt("\x{20004}", "\x{20005}")); +# 17 + +ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'), + '516B 4E8C 4E5D 516D 4E03 4E09 5341 56DB 4E94 4E00'); + +ok(hex_sort('4E0C 4E8D 4F5C 5140 554A 5750 57C3 5EA7 963F 9F3D 9F3E 9F44'), + '963F 554A 57C3 4E8D 9F3E 4E0C 9F3D 5140 9F44 4F5C 5750 5EA7'); + +# 19 diff --git a/cpan/Unicode-Collate/t/cjk_st.t b/cpan/Unicode-Collate/t/cjk_st.t index 4ebbdec2d3..1528dac7c5 100644 --- a/cpan/Unicode-Collate/t/cjk_st.t +++ b/cpan/Unicode-Collate/t/cjk_st.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 24 }; +BEGIN { plan tests => 26 }; use strict; use warnings; @@ -30,6 +30,14 @@ my $collator = Unicode::Collate->new( overrideCJK => \&Unicode::Collate::CJK::Stroke::weightStroke ); +sub hex_sort { + my @source = map pack('U', hex $_), split ' ', shift; + my @sorted = $collator->sort(@source); + return join " ", map sprintf("%04X", unpack 'U', $_), @sorted; +} + +# 1 + $collator->change(level => 1); ok($collator->lt("\x{4E00}", "\x{4E59}")); @@ -44,9 +52,9 @@ ok($collator->lt("\x{4EBA}", "\x{513F}")); ok($collator->lt("\x{513F}", "\x{5165}")); ok($collator->lt("\x{9E1D}", "\x{7069}")); -ok($collator->lt("\x{7069}", "\x{7C72}")); -ok($collator->lt("\x{7C72}", "\x{706A}")); -ok($collator->lt("\x{706A}", "\x{7229}")); +ok($collator->lt("\x{7069}", "\x{706A}")); +ok($collator->lt("\x{706A}", "\x{7C72}")); +ok($collator->lt("\x{7C72}", "\x{7229}")); ok($collator->lt("\x{7229}", "\x{9EA4}")); ok($collator->lt("\x{9EA4}", "\x{9F7E}")); ok($collator->lt("\x{9F7E}", "\x{9F49}")); @@ -59,3 +67,12 @@ ok($collator->lt("\x{20002}", "\x{20003}")); ok($collator->lt("\x{20003}", "\x{20004}")); ok($collator->lt("\x{20004}", "\x{20005}")); +# 24 + +ok(hex_sort('4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341'), + '4E00 4E03 4E5D 4E8C 516B 5341 4E09 4E94 516D 56DB'); + +ok(hex_sort('4E0C 4E8D 4F5C 5140 554A 5750 57C3 5EA7 963F 9F3D 9F3E 9F44'), + '4E0C 4E8D 5140 4F5C 5750 963F 57C3 5EA7 554A 9F3D 9F3E 9F44'); + +# 26 diff --git a/cpan/Unicode-Collate/t/iglevel2.t b/cpan/Unicode-Collate/t/iglevel2.t new file mode 100644 index 0000000000..f9fdbe0658 --- /dev/null +++ b/cpan/Unicode-Collate/t/iglevel2.t @@ -0,0 +1,218 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 42 }; + +use strict; +use warnings; +use Unicode::Collate; + +ok(1); + +######################### + +my $Collator = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + ignore_level2 => 1, + entry => << 'ENTRIES', +1B00 ; [.0000.00FF.0002.1B00] # BALINESE SIGN ULU RICEM +1B01 ; [.0000.0100.0002.1B01] # BALINESE SIGN ULU CANDRA +1B02 ; [.0000.0101.0002.1B02] # BALINESE SIGN CECEK +03C6 ; [.1900.0020.0002.03C6] # GREEK SMALL LETTER PHI +03D5 ; [.1900.0020.0004.03D5] # GREEK PHI SYMBOL; QQK +03A6 ; [.1900.0020.0008.03A6] # GREEK CAPITAL LETTER PHI +ENTRIES +); + +ok($Collator->eq("camel", "came\x{300}l")); +ok($Collator->eq("camel", "ca\x{300}me\x{301}l")); +ok($Collator->lt("camel", "Camel")); + +# 4 + +$Collator->change(ignore_level2 => 0); + +ok($Collator->lt("camel", "came\x{300}l")); +ok($Collator->lt("camel", "ca\x{300}me\x{301}l")); +ok($Collator->lt("camel", "Camel")); + +$Collator->change(level => 1); + +ok($Collator->eq("camel", "came\x{300}l")); +ok($Collator->eq("camel", "ca\x{300}me\x{301}l")); +ok($Collator->eq("camel", "Camel")); + +$Collator->change(level => 2); + +ok($Collator->lt("camel", "came\x{300}l")); +ok($Collator->lt("camel", "ca\x{300}me\x{301}l")); +ok($Collator->eq("camel", "Camel")); + +# 13 + +$Collator->change(ignore_level2 => 1); + +ok($Collator->eq("camel", "came\x{300}l")); +ok($Collator->eq("camel", "ca\x{300}me\x{301}l")); +ok($Collator->eq("camel", "Camel")); + +$Collator->change(level => 3); + +ok($Collator->eq("camel", "came\x{300}l")); +ok($Collator->eq("camel", "ca\x{300}me\x{301}l")); +ok($Collator->lt("camel", "Camel")); + +# secondary: neither 00FF nor 0100 is zero +ok($Collator->eq("camel", "came\x{1B00}l")); +ok($Collator->eq("camel", "came\x{1B01}l")); +ok($Collator->eq("camel", "came\x{1B02}l")); + +# primary: 1900 isn't zero +ok($Collator->lt("\x{03C6}", "\x{03D5}")); +ok($Collator->lt("\x{03D5}", "\x{03A6}")); + +# 24 + +{ + my $s; + my $txt = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L."; + + $Collator->change(ignore_level2 => 0, level => 1); + + $s = $txt; + $Collator->gsubst($s, "camel", sub { "=$_[0]=" }); + ok($s, "=Camel= donkey zebra =came\x{301}l= =CAMEL= horse =cAm\0E\0L=."); + + $Collator->change(level => 2); + + $s = $txt; + $Collator->gsubst($s, "camel", sub { "=$_[0]=" }); + ok($s, "=Camel= donkey zebra came\x{301}l =CAMEL= horse =cAm\0E\0L=."); + + $Collator->change(level => 3); + + $s = $txt; + $Collator->gsubst($s, "camel", sub { "=$_[0]=" }); + ok($s, "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L."); + + $Collator->change(ignore_level2 => 1); + + $s = $txt; + $Collator->gsubst($s, "camel", sub { "=$_[0]=" }); + ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse cAm\0E\0L."); + + $Collator->change(level => 2); + $s = $txt; + $Collator->gsubst($s, "camel", sub { "=$_[0]=" }); + ok($s, "=Camel= donkey zebra =came\x{301}l= =CAMEL= horse =cAm\0E\0L=."); + + $Collator->change(level => 1); + $s = $txt; + $Collator->gsubst($s, "camel", sub { "=$_[0]=" }); + ok($s, "=Camel= donkey zebra =came\x{301}l= =CAMEL= horse =cAm\0E\0L=."); + +} + +# 30 + +{ + my $c = Unicode::Collate->new( + table => 'keys.txt', normalization => undef, level => 1, + ); + my $str = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l..."; + $c->gsubst($str, "camel", sub { "<b>$_[0]</b>" }); + ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> <b>CAMEL</b> horse <b>cam\0e\0l</b>..."); +} + +{ + my $c = Unicode::Collate->new( + table => 'keys.txt', normalization => undef, level => 2, + ); + my $str = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l..."; + $c->gsubst($str, "camel", sub { "<b>$_[0]</b>" }); + ok($str, "<b>Camel</b> donkey zebra came\x{301}l <b>CAMEL</b> horse <b>cam\0e\0l</b>..."); +} + +{ + my $c = Unicode::Collate->new( + table => 'keys.txt', normalization => undef, ignore_level2 => 1, + ); + my $str = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l..."; + $c->gsubst($str, "camel", sub { "<b>$_[0]</b>" }); + ok($str, "Camel donkey zebra <b>came\x{301}l</b> CAMEL horse <b>cam\0e\0l</b>..."); +} + +{ + my $c = Unicode::Collate->new( + table => 'keys.txt', normalization => undef, level => 3, + ); + my $str = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l..."; + $c->gsubst($str, "camel", sub { "<b>$_[0]</b>" }); + ok($str, "Camel donkey zebra came\x{301}l CAMEL horse <b>cam\0e\0l</b>..."); +} + +# 34 + +{ + my $str; + my $camel = "camel Camel came\x{301}l c-a-m-e-l cam\0e\0l"; + + $Collator->change(ignore_level2 => 0); + + $Collator->change(level => 1); + $str = $camel; + $Collator->gsubst($str, "camel", sub { "=$_[0]=" }); + ok($str, "=camel= =Camel= =came\x{301}l= =c-a-m-e-l= =cam\0e\0l="); + + $Collator->change(level => 2); + $str = $camel; + $Collator->gsubst($str, "camel", sub { "=$_[0]=" }); + ok($str, "=camel= =Camel= came\x{301}l =c-a-m-e-l= =cam\0e\0l="); + + $Collator->change(level => 3); + $str = $camel; + $Collator->gsubst($str, "camel", sub { "=$_[0]=" }); + ok($str, "=camel= Camel came\x{301}l =c-a-m-e-l= =cam\0e\0l="); + + $Collator->change(level => 4); + $str = $camel; + $Collator->gsubst($str, "camel", sub { "=$_[0]=" }); + ok($str, "=camel= Camel came\x{301}l c-a-m-e-l =cam\0e\0l="); + + $Collator->change(ignore_level2 => 1); + + $Collator->change(level => 1); + $str = $camel; + $Collator->gsubst($str, "camel", sub { "=$_[0]=" }); + ok($str, "=camel= =Camel= =came\x{301}l= =c-a-m-e-l= =cam\0e\0l="); + + $Collator->change(level => 2); + $str = $camel; + $Collator->gsubst($str, "camel", sub { "=$_[0]=" }); + ok($str, "=camel= =Camel= =came\x{301}l= =c-a-m-e-l= =cam\0e\0l="); + + $Collator->change(level => 3); + $str = $camel; + $Collator->gsubst($str, "camel", sub { "=$_[0]=" }); + ok($str, "=camel= Camel =came\x{301}l= =c-a-m-e-l= =cam\0e\0l="); + + $Collator->change(level => 4); + $str = $camel; + $Collator->gsubst($str, "camel", sub { "=$_[0]=" }); + ok($str, "=camel= Camel =came\x{301}l= c-a-m-e-l =cam\0e\0l="); +} + +# 42 + diff --git a/cpan/Unicode-Collate/t/loc_da.t b/cpan/Unicode-Collate/t/loc_da.t index e21afec813..f8ca07dbd2 100644 --- a/cpan/Unicode-Collate/t/loc_da.t +++ b/cpan/Unicode-Collate/t/loc_da.t @@ -49,7 +49,7 @@ $objDa->change(level => 1); ok($objDa->lt('z', $ae)); ok($objDa->lt($ae, $ostk)); ok($objDa->lt($ostk, $arng)); -ok($objDa->lt($arng, "\x{292}")); +ok($objDa->lt($arng, "\x{1C0}")); # 6 diff --git a/cpan/Unicode-Collate/t/loc_es.t b/cpan/Unicode-Collate/t/loc_es.t index 2491b38a30..ae7d054a78 100644 --- a/cpan/Unicode-Collate/t/loc_es.t +++ b/cpan/Unicode-Collate/t/loc_es.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 26 }; +BEGIN { plan tests => 28 }; use strict; use warnings; @@ -69,3 +69,10 @@ ok($objEs->eq("n\x{303}", pack('U', 0xF1))); ok($objEs->eq("N\x{303}", pack('U', 0xD1))); # 26 + +$objEs->change(level => 2, ignore_level2 => 1); + +ok($objEs->lt("n", "n\x{303}")); +ok($objEs->eq("a", "a\x{303}")); + +# 28 diff --git a/cpan/Unicode-Collate/t/loc_fi.t b/cpan/Unicode-Collate/t/loc_fi.t index b9bedc3a96..f555c19bef 100644 --- a/cpan/Unicode-Collate/t/loc_fi.t +++ b/cpan/Unicode-Collate/t/loc_fi.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 82 }; +BEGIN { plan tests => 83 }; use strict; use warnings; @@ -42,11 +42,12 @@ ok($objFi->getlocale, 'fi'); $objFi->change(level => 1); -ok($objFi->lt('z', $arng)); +ok($objFi->lt('z', $arng)); ok($objFi->lt($arng, $auml)); ok($objFi->lt($auml, $ouml)); +ok($objFi->lt($ouml, "\x{1C0}")); -# 5 +# 6 ok($objFi->eq("d\x{335}", "\x{111}")); ok($objFi->eq("g\x{335}", "\x{1E5}")); @@ -58,7 +59,7 @@ ok($objFi->eq('y', $uuml)); ok($objFi->eq($auml, $ae)); ok($objFi->eq($ouml, $ostk)); -# 14 +# 15 $objFi->change(level => 2); @@ -72,7 +73,7 @@ ok($objFi->lt('y', $uuml)); ok($objFi->lt($auml, $ae)); ok($objFi->lt($ouml, $ostk)); -# 23 +# 24 ok($objFi->eq("\x{111}", "\x{110}")); ok($objFi->eq("\x{1E5}", "\x{1E4}")); @@ -88,7 +89,7 @@ ok($objFi->eq($AE, "\x{1D2D}")); ok($objFi->eq($ouml, $Ouml)); ok($objFi->eq($ostk, $Ostk)); -# 36 +# 37 $objFi->change(level => 3); @@ -106,7 +107,7 @@ ok($objFi->lt($AE, "\x{1D2D}")); ok($objFi->lt($ouml, $Ouml)); ok($objFi->lt($ostk, $Ostk)); -# 49 +# 50 ok($objFi->eq("u\x{308}", $uuml)); ok($objFi->eq("U\x{308}", $Uuml)); @@ -121,7 +122,7 @@ ok($objFi->eq("O\x{308}", $Ouml)); ok($objFi->eq("o\x{338}", $ostk)); ok($objFi->eq("O\x{338}", $Ostk)); -# 61 +# 62 ok($objFi->eq("u\x{308}\x{300}", "\x{1DC}")); ok($objFi->eq("U\x{308}\x{300}", "\x{1DB}")); @@ -145,4 +146,4 @@ ok($objFi->eq("O\x{308}\x{304}", "\x{22A}")); ok($objFi->eq("o\x{338}\x{301}", "\x{1FF}")); ok($objFi->eq("O\x{338}\x{301}", "\x{1FE}")); -# 82 +# 83 diff --git a/cpan/Unicode-Collate/t/loc_fo.t b/cpan/Unicode-Collate/t/loc_fo.t index 6678b942d0..0a75f71d5d 100644 --- a/cpan/Unicode-Collate/t/loc_fo.t +++ b/cpan/Unicode-Collate/t/loc_fo.t @@ -49,7 +49,7 @@ $objFo->change(level => 1); ok($objFo->lt('z', $ae)); ok($objFo->lt($ae, $ostk)); ok($objFo->lt($ostk, $arng)); -ok($objFo->lt($arng, "\x{292}")); +ok($objFo->lt($arng, "\x{1C0}")); # 6 diff --git a/cpan/Unicode-Collate/t/loc_kl.t b/cpan/Unicode-Collate/t/loc_kl.t index b123ed7d42..a8a07d3e02 100644 --- a/cpan/Unicode-Collate/t/loc_kl.t +++ b/cpan/Unicode-Collate/t/loc_kl.t @@ -49,7 +49,7 @@ $objKl->change(level => 1); ok($objKl->lt('z', $ae)); ok($objKl->lt($ae, $ostk)); ok($objKl->lt($ostk, $arng)); -ok($objKl->lt($arng, "\x{292}")); +ok($objKl->lt($arng, "\x{1C0}")); # 6 diff --git a/cpan/Unicode-Collate/t/loc_nb.t b/cpan/Unicode-Collate/t/loc_nb.t index 8ca61cbad4..4de166a644 100644 --- a/cpan/Unicode-Collate/t/loc_nb.t +++ b/cpan/Unicode-Collate/t/loc_nb.t @@ -49,7 +49,7 @@ $objNb->change(level => 1); ok($objNb->lt('z', $ae)); ok($objNb->lt($ae, $ostk)); ok($objNb->lt($ostk, $arng)); -ok($objNb->lt($arng, "\x{292}")); +ok($objNb->lt($arng, "\x{1C0}")); # 6 diff --git a/cpan/Unicode-Collate/t/loc_nn.t b/cpan/Unicode-Collate/t/loc_nn.t index 50503a32af..6af4447d93 100644 --- a/cpan/Unicode-Collate/t/loc_nn.t +++ b/cpan/Unicode-Collate/t/loc_nn.t @@ -49,7 +49,7 @@ $objNn->change(level => 1); ok($objNn->lt('z', $ae)); ok($objNn->lt($ae, $ostk)); ok($objNn->lt($ostk, $arng)); -ok($objNn->lt($arng, "\x{292}")); +ok($objNn->lt($arng, "\x{1C0}")); # 6 diff --git a/cpan/Unicode-Collate/t/loc_sv.t b/cpan/Unicode-Collate/t/loc_sv.t index c905e14717..c28632ea06 100644 --- a/cpan/Unicode-Collate/t/loc_sv.t +++ b/cpan/Unicode-Collate/t/loc_sv.t @@ -51,7 +51,7 @@ $objSv->change(level => 1); ok($objSv->lt('z', $arng)); ok($objSv->lt($arng, $auml)); ok($objSv->lt($auml, $ouml)); -ok($objSv->lt($ouml, "\x{292}")); +ok($objSv->lt($ouml, "\x{1C0}")); # 6 diff --git a/cpan/Unicode-Collate/t/loc_sw.t b/cpan/Unicode-Collate/t/loc_sw.t index 1805c1b497..9b1e78685c 100644 --- a/cpan/Unicode-Collate/t/loc_sw.t +++ b/cpan/Unicode-Collate/t/loc_sw.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 72 }; +BEGIN { plan tests => 17 }; use strict; use warnings; @@ -25,94 +25,24 @@ ok(1); my $objSw = Unicode::Collate::Locale-> new(locale => 'SW', normalization => undef); -ok($objSw->getlocale, 'sw'); +ok($objSw->getlocale, "default"); # no tailoring since 0.74 $objSw->change(level => 1); -ok($objSw->lt("b", "ch")); -ok($objSw->lt("bz","ch")); -ok($objSw->gt("c", "ch")); +ok($objSw->lt("c", "ch")); +ok($objSw->gt("cz","ch")); ok($objSw->lt("d", "dh")); -ok($objSw->lt("dz","dh")); -ok($objSw->gt("e", "dh")); +ok($objSw->gt("dz","dh")); ok($objSw->lt("g", "gh")); -ok($objSw->lt("gz","gh")); -ok($objSw->gt("h", "gh")); +ok($objSw->gt("gz","gh")); ok($objSw->lt("k", "kh")); -ok($objSw->lt("kz","kh")); -ok($objSw->gt("l", "kh")); +ok($objSw->gt("kz","kh")); ok($objSw->lt("n", "ng'")); -ok($objSw->lt("nz","ng'")); -ok($objSw->lt("ng'","ny")); -ok($objSw->gt("o", "ny")); +ok($objSw->gt("ny","ng'")); +ok($objSw->gt("nz","ny")); ok($objSw->lt("s", "sh")); -ok($objSw->lt("sz","sh")); -ok($objSw->gt("t", "sh")); +ok($objSw->gt("sz","sh")); ok($objSw->lt("t", "th")); -ok($objSw->lt("tz","th")); -ok($objSw->gt("u", "th")); +ok($objSw->gt("tz","th")); -# 24 - -$objSw->change(level => 2); - -ok($objSw->eq("ch", "Ch")); -ok($objSw->eq("Ch", "CH")); -ok($objSw->eq("dh", "Dh")); -ok($objSw->eq("Dh", "DH")); -ok($objSw->eq("gh", "Gh")); -ok($objSw->eq("Gh", "GH")); -ok($objSw->eq("kh", "Kh")); -ok($objSw->eq("Kh", "KH")); -ok($objSw->eq("ng'","Ng'")); -ok($objSw->eq("Ng'","NG'")); -ok($objSw->eq("ny", "Ny")); -ok($objSw->eq("Ny", "NY")); -ok($objSw->eq("sh", "Sh")); -ok($objSw->eq("Sh", "SH")); -ok($objSw->eq("th", "Th")); -ok($objSw->eq("Th", "TH")); - -# 40 - -$objSw->change(level => 3); - -ok($objSw->lt("ch", "Ch")); -ok($objSw->lt("Ch", "CH")); -ok($objSw->lt("dh", "Dh")); -ok($objSw->lt("Dh", "DH")); -ok($objSw->lt("gh", "Gh")); -ok($objSw->lt("Gh", "GH")); -ok($objSw->lt("kh", "Kh")); -ok($objSw->lt("Kh", "KH")); -ok($objSw->lt("ng'","Ng'")); -ok($objSw->lt("Ng'","NG'")); -ok($objSw->lt("ny", "Ny")); -ok($objSw->lt("Ny", "NY")); -ok($objSw->lt("sh", "Sh")); -ok($objSw->lt("Sh", "SH")); -ok($objSw->lt("th", "Th")); -ok($objSw->lt("Th", "TH")); - -# 56 - -$objSw->change(upper_before_lower => 1); - -ok($objSw->gt("ch", "Ch")); -ok($objSw->gt("Ch", "CH")); -ok($objSw->gt("dh", "Dh")); -ok($objSw->gt("Dh", "DH")); -ok($objSw->gt("gh", "Gh")); -ok($objSw->gt("Gh", "GH")); -ok($objSw->gt("kh", "Kh")); -ok($objSw->gt("Kh", "KH")); -ok($objSw->gt("ng'","Ng'")); -ok($objSw->gt("Ng'","NG'")); -ok($objSw->gt("ny", "Ny")); -ok($objSw->gt("Ny", "NY")); -ok($objSw->gt("sh", "Sh")); -ok($objSw->gt("Sh", "SH")); -ok($objSw->gt("th", "Th")); -ok($objSw->gt("Th", "TH")); - -# 72 +# 17 diff --git a/cpan/Unicode-Collate/t/loc_zhpy.t b/cpan/Unicode-Collate/t/loc_zhpy.t index 193d158e7f..1d376ec996 100644 --- a/cpan/Unicode-Collate/t/loc_zhpy.t +++ b/cpan/Unicode-Collate/t/loc_zhpy.t @@ -12,7 +12,7 @@ BEGIN { } use Test; -BEGIN { plan tests => 302 }; +BEGIN { plan tests => 283 }; use strict; use warnings; @@ -323,40 +323,16 @@ ok($objZhP->eq("E\x{302}\x{323}", "\x{1EC6}")); $objZhP->change(level => 1); -ok($objZhP->lt("\x{A000}", "\x{3105}")); +ok($objZhP->lt("\x{963F}", "\x{5730}")); +ok($objZhP->lt("\x{5730}", "\x{7ACB}")); +ok($objZhP->lt("\x{7ACB}", "\x{4EBA}")); +ok($objZhP->lt("\x{4EBA}", "\x{65E5}")); +ok($objZhP->lt("\x{65E5}", "\x{4E0A}")); +ok($objZhP->lt("\x{4E0A}", "\x{5929}")); +ok($objZhP->lt("\x{5929}", "\x{4E0B}")); +ok($objZhP->lt("\x{4E0B}", "\x{65BC}")); +ok($objZhP->lt("\x{65BC}", "\x{4E2D}")); +ok($objZhP->lt("\x{4E2D}", "\x{7AFA}")); +ok($objZhP->lt("\x{7AFA}", "\x{5750}")); -ok($objZhP->lt("\x{3105}", "\x{3106}")); -ok($objZhP->lt("\x{3106}", "\x{3128}")); -ok($objZhP->lt("\x{3128}", "\x{3129}")); -ok($objZhP->lt("\x{3129}", "\x{5416}")); -ok($objZhP->lt("\x{5416}", "\x{963F}")); -ok($objZhP->lt("\x{963F}", "\x{554A}")); -ok($objZhP->lt("\x{554A}", "\x{9515}")); -ok($objZhP->lt("\x{9515}", "\x{9312}")); -ok($objZhP->lt("\x{9312}", "\x{55C4}")); -ok($objZhP->lt("\x{55C4}", "\x{5391}")); -ok($objZhP->lt("\x{5391}", "\x{54CE}")); -ok($objZhP->lt("\x{54CE}", "\x{54C0}")); -ok($objZhP->lt("\x{54C0}", "\x{5509}")); -ok($objZhP->lt("\x{5509}", "\x{57C3}")); - -ok($objZhP->lt("\x{6FED}", "\x{FA1F}")); -ok($objZhP->lt("\x{FA1F}", "\x{85F9}")); - -ok($objZhP->lt("\x{57C3}", "\x{4E00}")); -ok($objZhP->lt("\x{4E00}", "\x{8331}")); - -ok($objZhP->lt("\x{8331}", "\x{682A}")); -ok($objZhP->lt("\x{682A}", "\x{3231}")); -ok($objZhP->lt("\x{3231}", "\x{73E0}")); -ok($objZhP->lt("\x{73E0}", "\x{8BF8}")); - -ok($objZhP->lt("\x{5EA7}", "\x{888F}")); -ok($objZhP->lt("\x{888F}", "\x{505A}")); -ok($objZhP->lt("\x{505A}", "\x{8444}")); -ok($objZhP->lt("\x{8444}", "\x{84D9}")); -ok($objZhP->lt("\x{84D9}", "\x{98F5}")); -ok($objZhP->lt("\x{98F5}", "\x{7CF3}")); -ok($objZhP->lt("\x{7CF3}", "\x{5497}")); - -# 302 +# 283 diff --git a/cpan/Unicode-Collate/t/loc_zhst.t b/cpan/Unicode-Collate/t/loc_zhst.t index 8f60ed69f0..87f7976f8b 100644 --- a/cpan/Unicode-Collate/t/loc_zhst.t +++ b/cpan/Unicode-Collate/t/loc_zhst.t @@ -337,9 +337,9 @@ ok($objZhS->lt("\x{4EBA}", "\x{513F}")); ok($objZhS->lt("\x{513F}", "\x{5165}")); ok($objZhS->lt("\x{9E1D}", "\x{7069}")); -ok($objZhS->lt("\x{7069}", "\x{7C72}")); -ok($objZhS->lt("\x{7C72}", "\x{706A}")); -ok($objZhS->lt("\x{706A}", "\x{7229}")); +ok($objZhS->lt("\x{7069}", "\x{706A}")); +ok($objZhS->lt("\x{706A}", "\x{7C72}")); +ok($objZhS->lt("\x{7C72}", "\x{7229}")); ok($objZhS->lt("\x{7229}", "\x{9EA4}")); ok($objZhS->lt("\x{9EA4}", "\x{9F7E}")); ok($objZhS->lt("\x{9F7E}", "\x{9F49}")); diff --git a/cpan/Unicode-Collate/t/rewrite.t b/cpan/Unicode-Collate/t/rewrite.t new file mode 100644 index 0000000000..004a0c9647 --- /dev/null +++ b/cpan/Unicode-Collate/t/rewrite.t @@ -0,0 +1,92 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 17 }; + +use strict; +use warnings; +use Unicode::Collate; + +ok(1); + +######################### + +my $code = sub { + my $line = shift; + $line =~ s/\[\.0000\..{4}\..{4}\./[.0000.0000.0000./g; + return $line; + }; + +##### + +my $Collator = Unicode::Collate->new( + table => 'keys.txt', normalization => undef, rewrite => $code, +); + +ok($Collator->eq("camel", "came\x{300}l")); +ok($Collator->eq("camel", "ca\x{300}me\x{301}l")); +ok($Collator->lt("camel", "Camel")); +{ + my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l."; + $Collator->gsubst($s, "camel", sub { "=$_[0]=" }); + ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=."); +} + +# 5 + +my $rewriteDUCET = Unicode::Collate->new( + normalization => undef, rewrite => $code, +); + +ok($rewriteDUCET->eq("camel", "came\x{300}l")); +ok($rewriteDUCET->eq("camel", "ca\x{300}me\x{301}l")); +ok($rewriteDUCET->lt("camel", "Camel")); +{ + my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l."; + $rewriteDUCET->gsubst($s, "camel", sub { "=$_[0]=" }); + ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=."); +} + +# 9 + +my $undef_hira = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + level => 1, + rewrite => sub { + my $line = shift; + return '' if $line =~ /HIRAGANA/; + return $line; + }, +); + +my $hiragana = "\x{3042}\x{3044}"; +my $katakana = "\x{30A2}\x{30A4}"; +my $cjkkanji = "\x{4E00}"; + +# HIRAGANA are undefined via rewrite +# So they are after CJK Unified Ideographs. + +ok($undef_hira->lt("abc", "perl")); +ok($undef_hira->lt("", "ABC")); +ok($undef_hira->lt($katakana, $hiragana)); +ok($undef_hira->lt($katakana, $cjkkanji)); +ok($undef_hira->lt($cjkkanji, $hiragana)); + +$Collator->change(level => 1); +ok($Collator->eq($katakana, $hiragana)); +ok($Collator->lt($katakana, $cjkkanji)); +ok($Collator->gt($cjkkanji, $hiragana)); + +# 17 |