diff options
author | Chris 'BinGOs' Williams <chris@bingosnet.co.uk> | 2010-11-07 13:31:59 +0000 |
---|---|---|
committer | Chris 'BinGOs' Williams <chris@bingosnet.co.uk> | 2010-11-07 13:32:48 +0000 |
commit | 584e761d155b7f9055c1f0566f42a2ef083716d8 (patch) | |
tree | 65f48ccaaf4e2d8400cf29e903cce0c9ea8d8822 /cpan/Unicode-Collate/t | |
parent | 90e83bc9e824bf1896bb96af89ec7c40ed9eebf2 (diff) | |
download | perl-584e761d155b7f9055c1f0566f42a2ef083716d8.tar.gz |
Update Unicode-Collate to CPAN version 0.66
[DELTA]
0.66 Sun Nov 7 10:47:30 2010
- U::C::Locale newly supports locale: ko.
- added Unicode::Collate::CJK::Korean for ko.
- added t/loc_ko.t.
- 12 compat. ideographs (e.g. U+FA0E) are treated as unified ideographs.
(though DUCET also does it, now Unicode::Collate does it without DUCET.)
- added t/compatui.t.
! Ideographs Ext.B (U+20000..U+2A6D6) can be overrided with UCA_Version 8.
This is a long-standing behavior from Unicode::Collate 0.11 to 0.63.
A wrong fix at 0.64 should be abandoned.
Diffstat (limited to 'cpan/Unicode-Collate/t')
-rw-r--r-- | cpan/Unicode-Collate/t/cjkrange.t | 6 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/compatui.t | 115 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_ja.t | 2 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_ko.t | 69 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/loc_zhpy.t | 7 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/overcjk0.t | 16 |
6 files changed, 201 insertions, 14 deletions
diff --git a/cpan/Unicode-Collate/t/cjkrange.t b/cpan/Unicode-Collate/t/cjkrange.t index d9178b473c..a6118d8e6d 100644 --- a/cpan/Unicode-Collate/t/cjkrange.t +++ b/cpan/Unicode-Collate/t/cjkrange.t @@ -29,9 +29,9 @@ my $Collator = Unicode::Collate->new( # U+9FBC..U+9FC3 are CJK UI since Unicode 5.1.0. # U+9FA6..U+9FBB are CJK UI since Unicode 4.1.0. # CJK UI Ext are greater than any CJK UI. -# U+3400 ..U+4DB5 are CJK UI ExtA since Unicode 3.0.0. -# U+20000..U+2A6D6 are CJK UI ExtB since Unicode 3.1.0. -# U+2A700..U+2B734 are CJK UI ExtC since Unicode 5.2.0. +# U+3400 ..U+4DB5 are CJK UI Ext.A since Unicode 3.0.0. +# U+20000..U+2A6D6 are CJK UI Ext.B since Unicode 3.1.0. +# U+2A700..U+2B734 are CJK UI Ext.C since Unicode 5.2.0. ##### 2..13 $Collator->change(UCA_Version => 8); diff --git a/cpan/Unicode-Collate/t/compatui.t b/cpan/Unicode-Collate/t/compatui.t new file mode 100644 index 0000000000..7169fa3225 --- /dev/null +++ b/cpan/Unicode-Collate/t/compatui.t @@ -0,0 +1,115 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 491 }; + +use strict; +use warnings; +use Unicode::Collate; + +ok(1); + +my @Versions = (8, 9, 11, 14, 16, 18, 20); + +# 12 compatibility ideographs are treated as unified ideographs: +# FA0E, FA0F, FA11, FA13, FA14, FA1F, FA21, FA23, FA24, FA27, FA28, FA29. + +my $Collator = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, +); + +for my $v (@Versions) { +$Collator->change(UCA_Version => $v); +ok($Collator->lt("\x{4E00}", "\x{1FFF}")); +ok($Collator->lt("\x{9FA5}", "\x{1FFF}")); +ok($Collator->gt("\x{FA00}", "\x{1FFF}")); +ok($Collator->gt("\x{FA0D}", "\x{1FFF}")); +ok($Collator->lt("\x{FA0E}", "\x{1FFF}")); +ok($Collator->lt("\x{FA0F}", "\x{1FFF}")); +ok($Collator->gt("\x{FA10}", "\x{1FFF}")); +ok($Collator->lt("\x{FA11}", "\x{1FFF}")); +ok($Collator->gt("\x{FA12}", "\x{1FFF}")); +ok($Collator->lt("\x{FA13}", "\x{1FFF}")); +ok($Collator->lt("\x{FA14}", "\x{1FFF}")); +ok($Collator->gt("\x{FA15}", "\x{1FFF}")); +ok($Collator->gt("\x{FA16}", "\x{1FFF}")); +ok($Collator->gt("\x{FA17}", "\x{1FFF}")); +ok($Collator->gt("\x{FA18}", "\x{1FFF}")); +ok($Collator->gt("\x{FA19}", "\x{1FFF}")); +ok($Collator->gt("\x{FA1A}", "\x{1FFF}")); +ok($Collator->gt("\x{FA1B}", "\x{1FFF}")); +ok($Collator->gt("\x{FA1C}", "\x{1FFF}")); +ok($Collator->gt("\x{FA1D}", "\x{1FFF}")); +ok($Collator->gt("\x{FA1E}", "\x{1FFF}")); +ok($Collator->lt("\x{FA1F}", "\x{1FFF}")); +ok($Collator->gt("\x{FA20}", "\x{1FFF}")); +ok($Collator->lt("\x{FA21}", "\x{1FFF}")); +ok($Collator->gt("\x{FA22}", "\x{1FFF}")); +ok($Collator->lt("\x{FA23}", "\x{1FFF}")); +ok($Collator->lt("\x{FA24}", "\x{1FFF}")); +ok($Collator->gt("\x{FA25}", "\x{1FFF}")); +ok($Collator->gt("\x{FA26}", "\x{1FFF}")); +ok($Collator->lt("\x{FA27}", "\x{1FFF}")); +ok($Collator->lt("\x{FA28}", "\x{1FFF}")); +ok($Collator->lt("\x{FA29}", "\x{1FFF}")); +ok($Collator->gt("\x{FA2A}", "\x{1FFF}")); +ok($Collator->gt("\x{FA30}", "\x{1FFF}")); +ok($Collator->gt("\x{FAFF}", "\x{1FFF}")); +} + +my $IgnoreCJK = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + overrideCJK => sub {()}, +); + +for my $v (@Versions) { +$IgnoreCJK->change(UCA_Version => $v); +ok($IgnoreCJK->eq("\x{4E00}", "")); +ok($IgnoreCJK->eq("\x{9FA5}", "")); +ok($IgnoreCJK->gt("\x{FA00}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA0D}", "\x{1FFF}")); +ok($IgnoreCJK->eq("\x{FA0E}", "")); +ok($IgnoreCJK->eq("\x{FA0F}", "")); +ok($IgnoreCJK->gt("\x{FA10}", "\x{1FFF}")); +ok($IgnoreCJK->eq("\x{FA11}", "")); +ok($IgnoreCJK->gt("\x{FA12}", "\x{1FFF}")); +ok($IgnoreCJK->eq("\x{FA13}", "")); +ok($IgnoreCJK->eq("\x{FA14}", "")); +ok($IgnoreCJK->gt("\x{FA15}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA16}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA17}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA18}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA19}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA1A}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA1B}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA1C}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA1D}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA1E}", "\x{1FFF}")); +ok($IgnoreCJK->eq("\x{FA1F}", "")); +ok($IgnoreCJK->gt("\x{FA20}", "\x{1FFF}")); +ok($IgnoreCJK->eq("\x{FA21}", "")); +ok($IgnoreCJK->gt("\x{FA22}", "\x{1FFF}")); +ok($IgnoreCJK->eq("\x{FA23}", "")); +ok($IgnoreCJK->eq("\x{FA24}", "")); +ok($IgnoreCJK->gt("\x{FA25}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA26}", "\x{1FFF}")); +ok($IgnoreCJK->eq("\x{FA27}", "")); +ok($IgnoreCJK->eq("\x{FA28}", "")); +ok($IgnoreCJK->eq("\x{FA29}", "")); +ok($IgnoreCJK->gt("\x{FA2A}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FA30}", "\x{1FFF}")); +ok($IgnoreCJK->gt("\x{FAFF}", "\x{1FFF}")); +} diff --git a/cpan/Unicode-Collate/t/loc_ja.t b/cpan/Unicode-Collate/t/loc_ja.t index 8dbffc030e..3e02520f97 100644 --- a/cpan/Unicode-Collate/t/loc_ja.t +++ b/cpan/Unicode-Collate/t/loc_ja.t @@ -191,7 +191,7 @@ ok($objJa->eq("\x{FF3B}", "\[")); ok($objJa->eq("\x{FF3C}", "\'")); ok($objJa->eq("\x{FF3D}", "\]")); ok($objJa->eq("\x{FF3E}", "\^")); -ok($objJa->eq("\x{FF3F}", "\_")); +ok($objJa->eq("\x{FF3F}", "_")); ok($objJa->eq("\x{FF40}", "\`")); ok($objJa->eq("\x{FF41}", "a")); ok($objJa->eq("\x{FF42}", "b")); diff --git a/cpan/Unicode-Collate/t/loc_ko.t b/cpan/Unicode-Collate/t/loc_ko.t new file mode 100644 index 0000000000..268eb3bd8c --- /dev/null +++ b/cpan/Unicode-Collate/t/loc_ko.t @@ -0,0 +1,69 @@ +#!perl +use strict; +use warnings; +use Unicode::Collate::Locale; + +use Test; +plan tests => 42; + +my $objKo = Unicode::Collate::Locale-> + new(locale => 'KO', normalization => undef); + +ok(1); +ok($objKo->getlocale, 'ko'); + +$objKo->change(level => 1); + +ok($objKo->eq("\x{AC00}", "\x{4F3D}")); +ok($objKo->eq("\x{4F3D}", "\x{4F73}")); +ok($objKo->eq("\x{4F73}", "\x{5047}")); +ok($objKo->eq("\x{5047}", "\x{50F9}")); +ok($objKo->eq("\x{50F9}", "\x{52A0}")); +ok($objKo->eq("\x{52A0}", "\x{53EF}")); +ok($objKo->lt("\x{53EF}", "\x{AC01}")); + +ok($objKo->eq("\x{AC1D}", "\x{5580}")); +ok($objKo->eq("\x{5580}", "\x{5BA2}")); +ok($objKo->lt("\x{5BA2}", "\x{AC31}")); + +ok($objKo->eq("\x{C77C}", "\x{4E00}")); +ok($objKo->eq("\x{4E00}", "\x{4F5A}")); +ok($objKo->eq("\x{4F5A}", "\x{4F7E}")); + +ok($objKo->lt("\x{993C}", "\x{D790}")); +ok($objKo->eq("\x{D790}", "\x{8A70}")); +ok($objKo->eq("\x{8A70}", "\x{72B5}")); +ok($objKo->eq("\x{72B5}", "\x{7E88}")); +ok($objKo->eq("\x{7E88}", "\x{896D}")); +ok($objKo->eq("\x{896D}", "\x{9821}")); +ok($objKo->eq("\x{9821}", "\x{9EE0}")); + +# 22 + +$objKo->change(level => 2); + +ok($objKo->lt("\x{AC00}", "\x{4F3D}")); +ok($objKo->lt("\x{4F3D}", "\x{4F73}")); +ok($objKo->lt("\x{4F73}", "\x{5047}")); +ok($objKo->lt("\x{5047}", "\x{50F9}")); +ok($objKo->lt("\x{50F9}", "\x{52A0}")); +ok($objKo->lt("\x{52A0}", "\x{53EF}")); +ok($objKo->lt("\x{53EF}", "\x{AC01}")); + +ok($objKo->lt("\x{AC1D}", "\x{5580}")); +ok($objKo->lt("\x{5580}", "\x{5BA2}")); +ok($objKo->lt("\x{5BA2}", "\x{AC31}")); + +ok($objKo->lt("\x{C77C}", "\x{4E00}")); +ok($objKo->lt("\x{4E00}", "\x{4F5A}")); +ok($objKo->lt("\x{4F5A}", "\x{4F7E}")); + +ok($objKo->lt("\x{993C}", "\x{D790}")); +ok($objKo->lt("\x{D790}", "\x{8A70}")); +ok($objKo->lt("\x{8A70}", "\x{72B5}")); +ok($objKo->lt("\x{72B5}", "\x{7E88}")); +ok($objKo->lt("\x{7E88}", "\x{896D}")); +ok($objKo->lt("\x{896D}", "\x{9821}")); +ok($objKo->lt("\x{9821}", "\x{9EE0}")); + +#42 diff --git a/cpan/Unicode-Collate/t/loc_zhpy.t b/cpan/Unicode-Collate/t/loc_zhpy.t index 81c0fb2f43..b65ae52282 100644 --- a/cpan/Unicode-Collate/t/loc_zhpy.t +++ b/cpan/Unicode-Collate/t/loc_zhpy.t @@ -4,7 +4,7 @@ use warnings; use Unicode::Collate::Locale; use Test; -plan tests => 300; +plan tests => 302; my $objZhP = Unicode::Collate::Locale-> new(locale => 'ZH__pinyin', normalization => undef); @@ -325,6 +325,9 @@ ok($objZhP->lt("\x{54CE}", "\x{54C0}")); ok($objZhP->lt("\x{54C0}", "\x{5509}")); ok($objZhP->lt("\x{5509}", "\x{57C3}")); +ok($objZhP->lt("\x{6FED}", "\x{FA1F}")); +ok($objZhP->lt("\x{FA1F}", "\x{85F9}")); + ok($objZhP->lt("\x{57C3}", "\x{4E00}")); ok($objZhP->lt("\x{4E00}", "\x{8331}")); @@ -341,4 +344,4 @@ ok($objZhP->lt("\x{84D9}", "\x{98F5}")); ok($objZhP->lt("\x{98F5}", "\x{7CF3}")); ok($objZhP->lt("\x{7CF3}", "\x{5497}")); -# 300 +# 302 diff --git a/cpan/Unicode-Collate/t/overcjk0.t b/cpan/Unicode-Collate/t/overcjk0.t index b1c92af1fa..2eec339bb0 100644 --- a/cpan/Unicode-Collate/t/overcjk0.t +++ b/cpan/Unicode-Collate/t/overcjk0.t @@ -49,8 +49,8 @@ ok($ignoreCJK->eq("\x{9FC3}", "")); # UI since Unicode 5.1.0 ok($ignoreCJK->eq("\x{9FC4}", "")); # UI since Unicode 5.2.0 ok($ignoreCJK->eq("\x{9FCB}", "")); # UI since Unicode 5.2.0 ok($ignoreCJK->gt("\x{9FCC}", "Perl")); -ok($ignoreCJK->eq("\x{20000}", "")); -ok($ignoreCJK->eq("\x{2A6D6}", "")); +ok($ignoreCJK->eq("\x{20000}", "")); # ExtB since Unicode 3.1.0 +ok($ignoreCJK->eq("\x{2A6D6}", "")); # ExtB since Unicode 3.1.0 ok($ignoreCJK->eq("\x{2A700}", "")); # ExtC since Unicode 5.2.0 ok($ignoreCJK->eq("\x{2B734}", "")); # ExtC since Unicode 5.2.0 @@ -64,8 +64,8 @@ ok($ignoreCJK->gt("\x{9FBB}", "Perl")); ok($ignoreCJK->gt("\x{9FBC}", "Perl")); ok($ignoreCJK->gt("\x{9FC3}", "Perl")); ok($ignoreCJK->gt("\x{9FC4}", "Perl")); -ok($ignoreCJK->gt("\x{20000}","Perl")); -ok($ignoreCJK->gt("\x{2A6D6}","Perl")); +ok($ignoreCJK->eq("\x{20000}", "")); # ExtB since Unicode 3.1.0 +ok($ignoreCJK->eq("\x{2A6D6}", "")); # ExtB since Unicode 3.1.0 ##### 31..40 $ignoreCJK->change(UCA_Version => 9); @@ -90,8 +90,8 @@ ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0 ok($ignoreCJK->gt("\x{9FBC}", "Perl")); ok($ignoreCJK->gt("\x{9FC3}", "Perl")); ok($ignoreCJK->gt("\x{9FC4}", "Perl")); -ok($ignoreCJK->eq("\x{20000}", "")); -ok($ignoreCJK->eq("\x{2A6D6}", "")); +ok($ignoreCJK->eq("\x{20000}", "")); # ExtB since Unicode 3.1.0 +ok($ignoreCJK->eq("\x{2A6D6}", "")); # ExtB since Unicode 3.1.0 ok($ignoreCJK->gt("\x{2A700}", "Perl")); ok($ignoreCJK->gt("\x{2B734}", "Perl")); @@ -107,8 +107,8 @@ ok($ignoreCJK->eq("\x{9FC3}", "")); # UI since Unicode 5.1.0 ok($ignoreCJK->gt("\x{9FC4}", "Perl")); ok($ignoreCJK->gt("\x{9FCB}", "Perl")); ok($ignoreCJK->gt("\x{9FCC}", "Perl")); -ok($ignoreCJK->eq("\x{20000}", "")); -ok($ignoreCJK->eq("\x{2A6D6}", "")); +ok($ignoreCJK->eq("\x{20000}", "")); # ExtB since Unicode 3.1.0 +ok($ignoreCJK->eq("\x{2A6D6}", "")); # ExtB since Unicode 3.1.0 ok($ignoreCJK->gt("\x{2A700}", "Perl")); ok($ignoreCJK->gt("\x{2B734}", "Perl")); |