diff options
author | Chris 'BinGOs' Williams <chris@bingosnet.co.uk> | 2012-12-22 10:08:22 +0000 |
---|---|---|
committer | Chris 'BinGOs' Williams <chris@bingosnet.co.uk> | 2012-12-22 10:08:22 +0000 |
commit | f443a335db5cfba183476a57f32385ab3f111b42 (patch) | |
tree | 431e01e512e50debb5fa8b480b1b56a5f577a9a0 /cpan/Unicode-Collate | |
parent | e4979e1455b839a5b17acf4031c45d8610c246b1 (diff) | |
download | perl-f443a335db5cfba183476a57f32385ab3f111b42.tar.gz |
Update Unicode-Collate to CPAN version 0.97
[DELTA]
0.97 Sat Dec 22 14:25:50 2012
- bug fix: XS of 0.96 (if UCA_Version is 9 to 11) wrongly referred to
DUCET for completely ignorable characters, even though the collator
don't use DUCET.
- separated t/notable.t from t/test.t.
Diffstat (limited to 'cpan/Unicode-Collate')
-rw-r--r-- | cpan/Unicode-Collate/Changes | 6 | ||||
-rw-r--r-- | cpan/Unicode-Collate/Collate.pm | 8 | ||||
-rw-r--r-- | cpan/Unicode-Collate/README | 2 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/notable.t | 140 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/test.t | 122 |
5 files changed, 174 insertions, 104 deletions
diff --git a/cpan/Unicode-Collate/Changes b/cpan/Unicode-Collate/Changes index faaa985361..9fc2dc26c4 100644 --- a/cpan/Unicode-Collate/Changes +++ b/cpan/Unicode-Collate/Changes @@ -1,5 +1,11 @@ Revision history for Perl module Unicode::Collate. +0.97 Sat Dec 22 14:25:50 2012 + - bug fix: XS of 0.96 (if UCA_Version is 9 to 11) wrongly referred to + DUCET for completely ignorable characters, even though the collator + don't use DUCET. + - separated t/notable.t from t/test.t. + 0.96 Sat Dec 15 19:43:10 2012 - special noncharancter tailorings ('highestFFFF' and 'minimalFFFE') * some locales are modified for 'highestFFFF': as, bn, fa, gu, hi, hy, diff --git a/cpan/Unicode-Collate/Collate.pm b/cpan/Unicode-Collate/Collate.pm index 3085094472..4a1c837ab6 100644 --- a/cpan/Unicode-Collate/Collate.pm +++ b/cpan/Unicode-Collate/Collate.pm @@ -14,7 +14,7 @@ use File::Spec; no warnings 'utf8'; -our $VERSION = '0.96'; +our $VERSION = '0.97'; our $PACKAGE = __PACKAGE__; ### begin XS only ### @@ -502,7 +502,7 @@ sub splitEnt } elsif ($ver9) { $src[$i] = undef if $map->{ $src[$i] } ? @{ $map->{ $src[$i] } } == 0 - : _ignorable_simple($src[$i]); ### XS only + : $uXS && _ignorable_simple($src[$i]); ### XS only } } @@ -1217,7 +1217,7 @@ next to C<c>. For a certain language where C<ch> as the next letter, C<"abch"> is greater than C<"abc\x{FFFF}">, but lesser than C<"abd">. Note: This is equivalent to C<entry =E<gt> 'FFFF ; [.FFFE.0020.0005.FFFF]'>. -C<entry> allows tailoring of any other character than U+FFFF. +Any other character than C<U+FFFF> can be tailored by C<entry>. =item identical @@ -1326,7 +1326,7 @@ then C<$a2> and C<$b2> at level 1, as followed. "bbb\x{FFFE}a" Note: This is equivalent to C<entry =E<gt> 'FFFE ; [.0001.0020.0005.FFFE]'>. -C<entry> allows tailoring of any other character than U+FFFE. +Any other character than C<U+FFFE> can be tailored by C<entry>. =item normalization diff --git a/cpan/Unicode-Collate/README b/cpan/Unicode-Collate/README index 3ec9dbc50b..fdd837b983 100644 --- a/cpan/Unicode-Collate/README +++ b/cpan/Unicode-Collate/README @@ -1,4 +1,4 @@ -Unicode/Collate version 0.96 +Unicode/Collate version 0.97 =============================== NAME diff --git a/cpan/Unicode-Collate/t/notable.t b/cpan/Unicode-Collate/t/notable.t new file mode 100644 index 0000000000..124351760f --- /dev/null +++ b/cpan/Unicode-Collate/t/notable.t @@ -0,0 +1,140 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use strict; +use warnings; +BEGIN { $| = 1; print "1..32\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + +use Unicode::Collate; + +ok(1); + +######################### + +{ + # Table is undefined, then no entry is defined. + my $undef_table = Unicode::Collate->new( + table => undef, + normalization => undef, + level => 1, + ); + + # in the Unicode code point order + ok($undef_table->lt('', 'A')); + ok($undef_table->lt('ABC', 'B')); + + # Hangul should be decomposed (even w/o Unicode::Normalize). + ok($undef_table->lt("Perl", "\x{AC00}")); + ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}")); + ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}")); + ok($undef_table->lt("\x{AE00}", "\x{3042}")); + + # U+AC00: Hangul GA + # U+AE00: Hangul GEUL + # U+3042: Hiragana A + + # Weight for CJK Ideographs is defined, though. + ok($undef_table->lt("", "\x{4E00}")); + ok($undef_table->lt("\x{4E8C}","ABC")); + ok($undef_table->lt("\x{4E00}","\x{3042}")); + ok($undef_table->lt("\x{4E00}","\x{4E8C}")); + +# 11 + + # U+4E00: Ideograph "ONE" + # U+4E8C: Ideograph "TWO" + + for my $v ('', 8, 9, 11, 14) { + $undef_table->change(UCA_Version => $v) if $v; + ok($undef_table->lt("\x{4E00}","\0")); + } +} + +# 16 + +{ + my $onlyABC = Unicode::Collate->new( + table => undef, + normalization => undef, + entry => << 'ENTRIES', +0061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A +0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A +0062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B +0042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B +0063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C +0043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C +ENTRIES + ); + ok( + join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ), + join(':', qw/ A aB Ab ABA BAC cAc cc / ), + ); +} + +# 17 + +{ + my $few_entries = Unicode::Collate->new( + entry => <<'ENTRIES', +0050 ; [.0101.0020.0002.0050] # P +0045 ; [.0102.0020.0002.0045] # E +0052 ; [.0103.0020.0002.0052] # R +004C ; [.0104.0020.0002.004C] # L +1100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G +1175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I +5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" +ENTRIES + table => undef, + normalization => undef, + ); + # defined before undefined + my $sortABC = join '', + $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ "); + + ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ"); + + ok($few_entries->lt('E', 'D')); + ok($few_entries->lt("\x{5B57}", "\x{4E00}")); + ok($few_entries->lt("\x{AE30}", "\x{AC00}")); + + # Hangul must be decomposed. + ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}")); +} + +# 22 + +{ + my $highestNUL = Unicode::Collate->new( + table => undef, + normalization => undef, + level => 1, + entry => '0000 ; [.FFFE.0020.0005.0000]', + ); + + for my $v ('', 8, 9, 11, 14) { + $highestNUL->change(UCA_Version => $v) if $v; + ok($highestNUL->lt("abc\x{4E00}", "abc\0")); + ok($highestNUL->lt("abc\x{E0000}","abc\0")); + } +} + +# 32 diff --git a/cpan/Unicode-Collate/t/test.t b/cpan/Unicode-Collate/t/test.t index 552440f919..00feeadad9 100644 --- a/cpan/Unicode-Collate/t/test.t +++ b/cpan/Unicode-Collate/t/test.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..112\n"; } +BEGIN { $| = 1; print "1..96\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -40,7 +40,7 @@ my $acute = _pack_U(0x0301); my $hiragana = "\x{3042}\x{3044}"; my $katakana = "\x{30A2}\x{30A4}"; -##### 2..7 +# 1 my $Collator = Unicode::Collate->new( table => 'keys.txt', @@ -63,7 +63,7 @@ ok( join(':', qw/ ACA ACHA ACIA ACKA ADA / ), ); -##### 8..18 +# 7 ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1) ok($Collator->cmp($a_acute, $A_acute), -1); @@ -81,7 +81,7 @@ ok($Collator->lt("A", $A_acute)); ok($Collator->lt("A", $a_acute)); ok($Collator->lt($a_acute, $A_acute)); -##### 19..25 +# 18 $Collator->change(level => 2); @@ -94,7 +94,7 @@ ok( $Collator->cmp($hiragana, $katakana), 0); ok( $Collator->eq($hiragana, $katakana) ); ok( $Collator->ge($hiragana, $katakana) ); -##### 26..31 +# 25 # hangul ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") ); @@ -104,7 +104,7 @@ ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") ); ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") ); ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana -##### 32..40 +# 31 $Collator->change(%old_level, katakana_before_hiragana => 1); @@ -119,7 +119,7 @@ ok( $Collator->ne($hiragana, $katakana) ); ok( $Collator->gt($hiragana, $katakana) ); ok( $Collator->ge($hiragana, $katakana) ); -##### 41..46 +# 40 $Collator->change(upper_before_lower => 1); @@ -130,14 +130,14 @@ ok( $Collator->cmp($hiragana, $katakana), 1); ok( $Collator->ge($hiragana, $katakana), 1); ok( $Collator->gt($hiragana, $katakana), 1); -##### 47..48 +# 46 $Collator->change(katakana_before_hiragana => 0); ok( $Collator->cmp("abc", "ABC"), 1); ok( $Collator->cmp($hiragana, $katakana), -1); -##### 49..52 +# 48 $Collator->change(upper_before_lower => 0); @@ -146,7 +146,8 @@ ok( $Collator->le("abc", "ABC") ); ok( $Collator->cmp($hiragana, $katakana), -1); ok( $Collator->lt($hiragana, $katakana) ); -##### 53..54 +# 52 + { my $ignoreAE = Unicode::Collate->new( table => 'keys.txt', @@ -157,27 +158,8 @@ ok( $Collator->lt($hiragana, $katakana) ); ok($ignoreAE->eq("Perl","ePrl")); } -##### 55 -{ - my $onlyABC = Unicode::Collate->new( - table => undef, - normalization => undef, - entry => << 'ENTRIES', -0061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A -0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A -0062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B -0042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B -0063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C -0043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C -ENTRIES - ); - ok( - join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ), - join(':', qw/ A aB Ab ABA BAC cAc cc / ), - ); -} +# 54 -##### 56..59 { my $undefAE = Unicode::Collate->new( table => 'keys.txt', @@ -190,69 +172,8 @@ ENTRIES ok($Collator->lt("lake","like")); } -##### 60..69 -{ - # Table is undefined, then no entry is defined. - my $undef_table = Unicode::Collate->new( - table => undef, - normalization => undef, - level => 1, - ); - - # in the Unicode code point order - ok($undef_table->lt('', 'A')); - ok($undef_table->lt('ABC', 'B')); - - # Hangul should be decomposed (even w/o Unicode::Normalize). - ok($undef_table->lt("Perl", "\x{AC00}")); - ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}")); - ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}")); - ok($undef_table->lt("\x{AE00}", "\x{3042}")); +# 58 - # U+AC00: Hangul GA - # U+AE00: Hangul GEUL - # U+3042: Hiragana A - - # Weight for CJK Ideographs is defined, though. - ok($undef_table->lt("", "\x{4E00}")); - ok($undef_table->lt("\x{4E8C}","ABC")); - ok($undef_table->lt("\x{4E00}","\x{3042}")); - ok($undef_table->lt("\x{4E00}","\x{4E8C}")); - - # U+4E00: Ideograph "ONE" - # U+4E8C: Ideograph "TWO" -} - -##### 70..74 -{ - my $few_entries = Unicode::Collate->new( - entry => <<'ENTRIES', -0050 ; [.0101.0020.0002.0050] # P -0045 ; [.0102.0020.0002.0045] # E -0052 ; [.0103.0020.0002.0052] # R -004C ; [.0104.0020.0002.004C] # L -1100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G -1175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I -5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" -ENTRIES - table => undef, - normalization => undef, - ); - # defined before undefined - my $sortABC = join '', - $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ "); - - ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ"); - - ok($few_entries->lt('E', 'D')); - ok($few_entries->lt("\x{5B57}", "\x{4E00}")); - ok($few_entries->lt("\x{AE30}", "\x{AC00}")); - - # Hangul must be decomposed. - ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}")); -} - -##### 75..79 { my $dropArticles = Unicode::Collate->new( table => "keys.txt", @@ -270,7 +191,8 @@ ENTRIES ok($Collator->gt("the pen", "a pencil")); } -##### 80..83 +# 63 + { my $undefName = Unicode::Collate->new( table => "keys.txt", @@ -286,7 +208,8 @@ ENTRIES ok($Collator ->gt("\x{4E03}", $katakana)); } -##### 84..90 +# 67 + { my $O_str = Unicode::Collate->new( table => "keys.txt", @@ -321,7 +244,7 @@ ENTRIES ok($O_str ->gt("\x{200B}", "A")); } -##### 91..101 +# 74 my %origVer = $Collator->change(UCA_Version => 8); @@ -351,7 +274,7 @@ $Collator->change(level => 4); ok($Collator->gt("!\x{300}", "")); ok($Collator->eq("!\x{300}", "!")); -##### 102..107 +# 85 $_ = 'Foo'; @@ -385,7 +308,7 @@ $_ = 'Foo'; @temp = $c->index("perl5", "LR"); ok($_, 'Foo'); -##### 108..109 +# 91 { my $caseless = Unicode::Collate->new( @@ -397,7 +320,7 @@ ok($_, 'Foo'); ok( $caseless->eq("ABC","abc") ); } -##### 110..112 +# 93 { eval { require Unicode::Normalize; }; @@ -417,4 +340,5 @@ ok($_, 'Foo'); } } -##### +# 96 + |