summaryrefslogtreecommitdiff
path: root/cpan/Unicode-Collate
diff options
context:
space:
mode:
authorChris 'BinGOs' Williams <chris@bingosnet.co.uk>2012-12-22 10:08:22 +0000
committerChris 'BinGOs' Williams <chris@bingosnet.co.uk>2012-12-22 10:08:22 +0000
commitf443a335db5cfba183476a57f32385ab3f111b42 (patch)
tree431e01e512e50debb5fa8b480b1b56a5f577a9a0 /cpan/Unicode-Collate
parente4979e1455b839a5b17acf4031c45d8610c246b1 (diff)
downloadperl-f443a335db5cfba183476a57f32385ab3f111b42.tar.gz
Update Unicode-Collate to CPAN version 0.97
[DELTA] 0.97 Sat Dec 22 14:25:50 2012 - bug fix: XS of 0.96 (if UCA_Version is 9 to 11) wrongly referred to DUCET for completely ignorable characters, even though the collator don't use DUCET. - separated t/notable.t from t/test.t.
Diffstat (limited to 'cpan/Unicode-Collate')
-rw-r--r--cpan/Unicode-Collate/Changes6
-rw-r--r--cpan/Unicode-Collate/Collate.pm8
-rw-r--r--cpan/Unicode-Collate/README2
-rw-r--r--cpan/Unicode-Collate/t/notable.t140
-rw-r--r--cpan/Unicode-Collate/t/test.t122
5 files changed, 174 insertions, 104 deletions
diff --git a/cpan/Unicode-Collate/Changes b/cpan/Unicode-Collate/Changes
index faaa985361..9fc2dc26c4 100644
--- a/cpan/Unicode-Collate/Changes
+++ b/cpan/Unicode-Collate/Changes
@@ -1,5 +1,11 @@
Revision history for Perl module Unicode::Collate.
+0.97 Sat Dec 22 14:25:50 2012
+ - bug fix: XS of 0.96 (if UCA_Version is 9 to 11) wrongly referred to
+ DUCET for completely ignorable characters, even though the collator
+ don't use DUCET.
+ - separated t/notable.t from t/test.t.
+
0.96 Sat Dec 15 19:43:10 2012
- special noncharancter tailorings ('highestFFFF' and 'minimalFFFE')
* some locales are modified for 'highestFFFF': as, bn, fa, gu, hi, hy,
diff --git a/cpan/Unicode-Collate/Collate.pm b/cpan/Unicode-Collate/Collate.pm
index 3085094472..4a1c837ab6 100644
--- a/cpan/Unicode-Collate/Collate.pm
+++ b/cpan/Unicode-Collate/Collate.pm
@@ -14,7 +14,7 @@ use File::Spec;
no warnings 'utf8';
-our $VERSION = '0.96';
+our $VERSION = '0.97';
our $PACKAGE = __PACKAGE__;
### begin XS only ###
@@ -502,7 +502,7 @@ sub splitEnt
} elsif ($ver9) {
$src[$i] = undef if $map->{ $src[$i] }
? @{ $map->{ $src[$i] } } == 0
- : _ignorable_simple($src[$i]); ### XS only
+ : $uXS && _ignorable_simple($src[$i]); ### XS only
}
}
@@ -1217,7 +1217,7 @@ next to C<c>. For a certain language where C<ch> as the next letter,
C<"abch"> is greater than C<"abc\x{FFFF}">, but lesser than C<"abd">.
Note: This is equivalent to C<entry =E<gt> 'FFFF ; [.FFFE.0020.0005.FFFF]'>.
-C<entry> allows tailoring of any other character than U+FFFF.
+Any other character than C<U+FFFF> can be tailored by C<entry>.
=item identical
@@ -1326,7 +1326,7 @@ then C<$a2> and C<$b2> at level 1, as followed.
"bbb\x{FFFE}a"
Note: This is equivalent to C<entry =E<gt> 'FFFE ; [.0001.0020.0005.FFFE]'>.
-C<entry> allows tailoring of any other character than U+FFFE.
+Any other character than C<U+FFFE> can be tailored by C<entry>.
=item normalization
diff --git a/cpan/Unicode-Collate/README b/cpan/Unicode-Collate/README
index 3ec9dbc50b..fdd837b983 100644
--- a/cpan/Unicode-Collate/README
+++ b/cpan/Unicode-Collate/README
@@ -1,4 +1,4 @@
-Unicode/Collate version 0.96
+Unicode/Collate version 0.97
===============================
NAME
diff --git a/cpan/Unicode-Collate/t/notable.t b/cpan/Unicode-Collate/t/notable.t
new file mode 100644
index 0000000000..124351760f
--- /dev/null
+++ b/cpan/Unicode-Collate/t/notable.t
@@ -0,0 +1,140 @@
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Collate " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..32\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Collate;
+
+ok(1);
+
+#########################
+
+{
+ # Table is undefined, then no entry is defined.
+ my $undef_table = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ level => 1,
+ );
+
+ # in the Unicode code point order
+ ok($undef_table->lt('', 'A'));
+ ok($undef_table->lt('ABC', 'B'));
+
+ # Hangul should be decomposed (even w/o Unicode::Normalize).
+ ok($undef_table->lt("Perl", "\x{AC00}"));
+ ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}"));
+ ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}"));
+ ok($undef_table->lt("\x{AE00}", "\x{3042}"));
+
+ # U+AC00: Hangul GA
+ # U+AE00: Hangul GEUL
+ # U+3042: Hiragana A
+
+ # Weight for CJK Ideographs is defined, though.
+ ok($undef_table->lt("", "\x{4E00}"));
+ ok($undef_table->lt("\x{4E8C}","ABC"));
+ ok($undef_table->lt("\x{4E00}","\x{3042}"));
+ ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
+
+# 11
+
+ # U+4E00: Ideograph "ONE"
+ # U+4E8C: Ideograph "TWO"
+
+ for my $v ('', 8, 9, 11, 14) {
+ $undef_table->change(UCA_Version => $v) if $v;
+ ok($undef_table->lt("\x{4E00}","\0"));
+ }
+}
+
+# 16
+
+{
+ my $onlyABC = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ entry => << 'ENTRIES',
+0061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A
+0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
+0062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B
+0042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B
+0063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C
+0043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C
+ENTRIES
+ );
+ ok(
+ join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ),
+ join(':', qw/ A aB Ab ABA BAC cAc cc / ),
+ );
+}
+
+# 17
+
+{
+ my $few_entries = Unicode::Collate->new(
+ entry => <<'ENTRIES',
+0050 ; [.0101.0020.0002.0050] # P
+0045 ; [.0102.0020.0002.0045] # E
+0052 ; [.0103.0020.0002.0052] # R
+004C ; [.0104.0020.0002.004C] # L
+1100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G
+1175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I
+5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
+ENTRIES
+ table => undef,
+ normalization => undef,
+ );
+ # defined before undefined
+ my $sortABC = join '',
+ $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ ");
+
+ ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ");
+
+ ok($few_entries->lt('E', 'D'));
+ ok($few_entries->lt("\x{5B57}", "\x{4E00}"));
+ ok($few_entries->lt("\x{AE30}", "\x{AC00}"));
+
+ # Hangul must be decomposed.
+ ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));
+}
+
+# 22
+
+{
+ my $highestNUL = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ level => 1,
+ entry => '0000 ; [.FFFE.0020.0005.0000]',
+ );
+
+ for my $v ('', 8, 9, 11, 14) {
+ $highestNUL->change(UCA_Version => $v) if $v;
+ ok($highestNUL->lt("abc\x{4E00}", "abc\0"));
+ ok($highestNUL->lt("abc\x{E0000}","abc\0"));
+ }
+}
+
+# 32
diff --git a/cpan/Unicode-Collate/t/test.t b/cpan/Unicode-Collate/t/test.t
index 552440f919..00feeadad9 100644
--- a/cpan/Unicode-Collate/t/test.t
+++ b/cpan/Unicode-Collate/t/test.t
@@ -13,7 +13,7 @@ BEGIN {
use strict;
use warnings;
-BEGIN { $| = 1; print "1..112\n"; }
+BEGIN { $| = 1; print "1..96\n"; }
my $count = 0;
sub ok ($;$) {
my $p = my $r = shift;
@@ -40,7 +40,7 @@ my $acute = _pack_U(0x0301);
my $hiragana = "\x{3042}\x{3044}";
my $katakana = "\x{30A2}\x{30A4}";
-##### 2..7
+# 1
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
@@ -63,7 +63,7 @@ ok(
join(':', qw/ ACA ACHA ACIA ACKA ADA / ),
);
-##### 8..18
+# 7
ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1)
ok($Collator->cmp($a_acute, $A_acute), -1);
@@ -81,7 +81,7 @@ ok($Collator->lt("A", $A_acute));
ok($Collator->lt("A", $a_acute));
ok($Collator->lt($a_acute, $A_acute));
-##### 19..25
+# 18
$Collator->change(level => 2);
@@ -94,7 +94,7 @@ ok( $Collator->cmp($hiragana, $katakana), 0);
ok( $Collator->eq($hiragana, $katakana) );
ok( $Collator->ge($hiragana, $katakana) );
-##### 26..31
+# 25
# hangul
ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") );
@@ -104,7 +104,7 @@ ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") );
ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") );
ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana
-##### 32..40
+# 31
$Collator->change(%old_level, katakana_before_hiragana => 1);
@@ -119,7 +119,7 @@ ok( $Collator->ne($hiragana, $katakana) );
ok( $Collator->gt($hiragana, $katakana) );
ok( $Collator->ge($hiragana, $katakana) );
-##### 41..46
+# 40
$Collator->change(upper_before_lower => 1);
@@ -130,14 +130,14 @@ ok( $Collator->cmp($hiragana, $katakana), 1);
ok( $Collator->ge($hiragana, $katakana), 1);
ok( $Collator->gt($hiragana, $katakana), 1);
-##### 47..48
+# 46
$Collator->change(katakana_before_hiragana => 0);
ok( $Collator->cmp("abc", "ABC"), 1);
ok( $Collator->cmp($hiragana, $katakana), -1);
-##### 49..52
+# 48
$Collator->change(upper_before_lower => 0);
@@ -146,7 +146,8 @@ ok( $Collator->le("abc", "ABC") );
ok( $Collator->cmp($hiragana, $katakana), -1);
ok( $Collator->lt($hiragana, $katakana) );
-##### 53..54
+# 52
+
{
my $ignoreAE = Unicode::Collate->new(
table => 'keys.txt',
@@ -157,27 +158,8 @@ ok( $Collator->lt($hiragana, $katakana) );
ok($ignoreAE->eq("Perl","ePrl"));
}
-##### 55
-{
- my $onlyABC = Unicode::Collate->new(
- table => undef,
- normalization => undef,
- entry => << 'ENTRIES',
-0061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A
-0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
-0062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B
-0042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B
-0063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C
-0043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C
-ENTRIES
- );
- ok(
- join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ),
- join(':', qw/ A aB Ab ABA BAC cAc cc / ),
- );
-}
+# 54
-##### 56..59
{
my $undefAE = Unicode::Collate->new(
table => 'keys.txt',
@@ -190,69 +172,8 @@ ENTRIES
ok($Collator->lt("lake","like"));
}
-##### 60..69
-{
- # Table is undefined, then no entry is defined.
- my $undef_table = Unicode::Collate->new(
- table => undef,
- normalization => undef,
- level => 1,
- );
-
- # in the Unicode code point order
- ok($undef_table->lt('', 'A'));
- ok($undef_table->lt('ABC', 'B'));
-
- # Hangul should be decomposed (even w/o Unicode::Normalize).
- ok($undef_table->lt("Perl", "\x{AC00}"));
- ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}"));
- ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}"));
- ok($undef_table->lt("\x{AE00}", "\x{3042}"));
+# 58
- # U+AC00: Hangul GA
- # U+AE00: Hangul GEUL
- # U+3042: Hiragana A
-
- # Weight for CJK Ideographs is defined, though.
- ok($undef_table->lt("", "\x{4E00}"));
- ok($undef_table->lt("\x{4E8C}","ABC"));
- ok($undef_table->lt("\x{4E00}","\x{3042}"));
- ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
-
- # U+4E00: Ideograph "ONE"
- # U+4E8C: Ideograph "TWO"
-}
-
-##### 70..74
-{
- my $few_entries = Unicode::Collate->new(
- entry => <<'ENTRIES',
-0050 ; [.0101.0020.0002.0050] # P
-0045 ; [.0102.0020.0002.0045] # E
-0052 ; [.0103.0020.0002.0052] # R
-004C ; [.0104.0020.0002.004C] # L
-1100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G
-1175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I
-5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
-ENTRIES
- table => undef,
- normalization => undef,
- );
- # defined before undefined
- my $sortABC = join '',
- $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ ");
-
- ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ");
-
- ok($few_entries->lt('E', 'D'));
- ok($few_entries->lt("\x{5B57}", "\x{4E00}"));
- ok($few_entries->lt("\x{AE30}", "\x{AC00}"));
-
- # Hangul must be decomposed.
- ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));
-}
-
-##### 75..79
{
my $dropArticles = Unicode::Collate->new(
table => "keys.txt",
@@ -270,7 +191,8 @@ ENTRIES
ok($Collator->gt("the pen", "a pencil"));
}
-##### 80..83
+# 63
+
{
my $undefName = Unicode::Collate->new(
table => "keys.txt",
@@ -286,7 +208,8 @@ ENTRIES
ok($Collator ->gt("\x{4E03}", $katakana));
}
-##### 84..90
+# 67
+
{
my $O_str = Unicode::Collate->new(
table => "keys.txt",
@@ -321,7 +244,7 @@ ENTRIES
ok($O_str ->gt("\x{200B}", "A"));
}
-##### 91..101
+# 74
my %origVer = $Collator->change(UCA_Version => 8);
@@ -351,7 +274,7 @@ $Collator->change(level => 4);
ok($Collator->gt("!\x{300}", ""));
ok($Collator->eq("!\x{300}", "!"));
-##### 102..107
+# 85
$_ = 'Foo';
@@ -385,7 +308,7 @@ $_ = 'Foo';
@temp = $c->index("perl5", "LR");
ok($_, 'Foo');
-##### 108..109
+# 91
{
my $caseless = Unicode::Collate->new(
@@ -397,7 +320,7 @@ ok($_, 'Foo');
ok( $caseless->eq("ABC","abc") );
}
-##### 110..112
+# 93
{
eval { require Unicode::Normalize; };
@@ -417,4 +340,5 @@ ok($_, 'Foo');
}
}
-#####
+# 96
+