diff options
Diffstat (limited to 'cpan/Unicode-Collate/t')
-rw-r--r-- | cpan/Unicode-Collate/t/cjkrange.t | 4 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/compatui.t | 4 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/hangtype.t | 4 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/ident.t | 161 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/index.t | 122 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/overcjk0.t | 4 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/overcjk1.t | 4 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/test.t | 35 | ||||
-rw-r--r-- | cpan/Unicode-Collate/t/view.t | 43 |
9 files changed, 348 insertions, 33 deletions
diff --git a/cpan/Unicode-Collate/t/cjkrange.t b/cpan/Unicode-Collate/t/cjkrange.t index 37fb9fd279..e3d4f38d73 100644 --- a/cpan/Unicode-Collate/t/cjkrange.t +++ b/cpan/Unicode-Collate/t/cjkrange.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..379\n"; } # 1 + 42 x @Versions +BEGIN { $| = 1; print "1..421\n"; } # 1 + 42 x @Versions my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -49,7 +49,7 @@ my $coll = Unicode::Collate->new( # 2A700..2B734 are CJK UI Ext.C since UCA_Version 20 (Unicode 5.2). # 2B740..2B81D are CJK UI Ext.D since UCA_Version 22 (Unicode 6.0). -my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24); +my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24, 26); for my $v (@Versions) { $coll->change(UCA_Version => $v); diff --git a/cpan/Unicode-Collate/t/compatui.t b/cpan/Unicode-Collate/t/compatui.t index 6fb01b892c..822743ed8f 100644 --- a/cpan/Unicode-Collate/t/compatui.t +++ b/cpan/Unicode-Collate/t/compatui.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..631\n"; } # 1 + 70 x @Versions +BEGIN { $| = 1; print "1..701\n"; } # 1 + 70 x @Versions my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -30,7 +30,7 @@ ok(1); ######################### -my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24); +my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24, 26); # 12 compatibility ideographs are treated as unified ideographs: # FA0E, FA0F, FA11, FA13, FA14, FA1F, FA21, FA23, FA24, FA27, FA28, FA29. diff --git a/cpan/Unicode-Collate/t/hangtype.t b/cpan/Unicode-Collate/t/hangtype.t index b85a308f12..5aa7d49153 100644 --- a/cpan/Unicode-Collate/t/hangtype.t +++ b/cpan/Unicode-Collate/t/hangtype.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..451\n"; } # 1 + 50 x @Versions +BEGIN { $| = 1; print "1..501\n"; } # 1 + 50 x @Versions my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -30,7 +30,7 @@ ok(1); ######################### -my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24); +my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24, 26); for my $v (@Versions) { ok(Unicode::Collate::getHST(0x0000, $v), ''); diff --git a/cpan/Unicode-Collate/t/ident.t b/cpan/Unicode-Collate/t/ident.t new file mode 100644 index 0000000000..4f132d4ec1 --- /dev/null +++ b/cpan/Unicode-Collate/t/ident.t @@ -0,0 +1,161 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use strict; +use warnings; +BEGIN { $| = 1; print "1..45\n"; } +my $count = 0; +sub ok ($;$) { + my $p = my $r = shift; + if (@_) { + my $x = shift; + $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; + } + print $p ? "ok" : "not ok", ' ', ++$count, "\n"; +} + +use Unicode::Collate; + +ok(1); + +######################### + +my $Collator = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, +); + +# [001F] UNIT SEPARATOR +{ + ok($Collator->eq("\0", "\x1F")); + ok($Collator->eq("\x1F", "\x{200B}")); + ok($Collator->eq("\0", "\x{200B}")); + ok($Collator->eq("\x{313}", "\x{343}")); + ok($Collator->eq("\x{2000}", "\x{2001}")); + ok($Collator->eq("\x{200B}", "\x{200C}")); + ok($Collator->eq("\x{304C}", "\x{304B}\x{3099}")); + + $Collator->change(identical => 1); + + ok($Collator->lt("\0", "\x1F")); + ok($Collator->lt("\x1F", "\x{200B}")); + ok($Collator->lt("\0", "\x{200B}")); + ok($Collator->lt("\x{313}", "\x{343}")); + ok($Collator->lt("\x{2000}", "\x{2001}")); + ok($Collator->lt("\x{200B}", "\x{200C}")); + ok($Collator->gt("\x{304C}", "\x{304B}\x{3099}")); + + $Collator->change(identical => 0); + + ok($Collator->eq("\0", "\x1F")); + ok($Collator->eq("\x1F", "\x{200B}")); + ok($Collator->eq("\0", "\x{200B}")); + ok($Collator->eq("\x{313}", "\x{343}")); + ok($Collator->eq("\x{2000}", "\x{2001}")); + ok($Collator->eq("\x{200B}", "\x{200C}")); + ok($Collator->eq("\x{304C}", "\x{304B}\x{3099}")); +} + +#### 22 + +eval { require Unicode::Normalize }; +if (!$@) { + $Collator->change(normalization => "NFD"); + + $Collator->change(identical => 1); + + ok($Collator->lt("\0", "\x{200B}")); + ok($Collator->eq("\x{313}", "\x{343}")); + ok($Collator->lt("\x{2000}", "\x{2001}")); + ok($Collator->lt("\x{200B}", "\x{200C}")); + ok($Collator->eq("\x{304C}", "\x{304B}\x{3099}")); + + $Collator->change(identical => 0); + + ok($Collator->eq("\0", "\x{200B}")); + ok($Collator->eq("\x{313}", "\x{343}")); + ok($Collator->eq("\x{2000}", "\x{2001}")); + ok($Collator->eq("\x{200B}", "\x{200C}")); + ok($Collator->eq("\x{304C}", "\x{304B}\x{3099}")); +} else { + ok(1) for 1..10; +} + +$Collator->change(normalization => undef, identical => 1); + +##### 32 + +ok($Collator->viewSortKey("\0"), '[| | | | 0000 0000]'); +ok($Collator->viewSortKey("\x{200B}"), '[| | | | 0000 200B]'); + +ok($Collator->viewSortKey('a'), + '[0A15 | 0020 | 0002 | FFFF | 0000 0061]'); + +ok($Collator->viewSortKey("\x{304C}"), + '[1926 | 0020 013D | 000E 0002 | FFFF FFFF | 0000 304C]'); + +ok($Collator->viewSortKey("\x{100000}"), + '[FBE0 8000 | 0020 | 0002 | FFFF FFFF | 0010 0000]'); + +eval { require Unicode::Normalize }; +if (!$@) { + $Collator->change(normalization => "NFD"); + + ok($Collator->viewSortKey("\x{304C}"), + '[1926 | 0020 013D | 000E 0002 | FFFF FFFF | 0000 304B 0000 3099]'); +} else { + ok(1); +} + +$Collator->change(normalization => undef); + +##### 38 + +$Collator->change(level => 3); + +ok($Collator->viewSortKey("\x{304C}"), + '[1926 | 0020 013D | 000E 0002 | | 0000 304C]'); + +$Collator->change(level => 2); + +ok($Collator->viewSortKey("\x{304C}"), + '[1926 | 0020 013D | | | 0000 304C]'); + +$Collator->change(level => 1); + +ok($Collator->viewSortKey("\x{304C}"), + '[1926 | | | | 0000 304C]'); + +##### 41 + +$Collator->change(UCA_Version => 8); + +ok($Collator->viewSortKey("\x{304C}"), + '[1926||||0000 304C]'); + +$Collator->change(level => 2); + +ok($Collator->viewSortKey("\x{304C}"), + '[1926|0020 013D|||0000 304C]'); + +$Collator->change(level => 3); + +ok($Collator->viewSortKey("\x{304C}"), + '[1926|0020 013D|000E 0002||0000 304C]'); + +$Collator->change(level => 4); + +ok($Collator->viewSortKey("\x{304C}"), + '[1926|0020 013D|000E 0002|FFFF FFFF|0000 304C]'); + +##### 45 diff --git a/cpan/Unicode-Collate/t/index.t b/cpan/Unicode-Collate/t/index.t index b3433a9e5f..11cf618f5a 100644 --- a/cpan/Unicode-Collate/t/index.t +++ b/cpan/Unicode-Collate/t/index.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..71\n"; } +BEGIN { $| = 1; print "1..91\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -28,8 +28,6 @@ use Unicode::Collate; ok(1); -######################### - our $IsEBCDIC = ord("A") != 0x41; my $Collator = Unicode::Collate->new( @@ -37,7 +35,7 @@ my $Collator = Unicode::Collate->new( normalization => undef, ); -############## +##### 1 my %old_level = $Collator->change(level => 2); @@ -64,7 +62,7 @@ if (my($pos,$len) = $Collator->index($str, $sub)) { ok($str, $orig); -############## +##### 3 my $match; @@ -126,7 +124,7 @@ if (my($pos, $len) = $Collator->index($str, $sub)) { } ok($match, $ret); -############## +##### 9 $Collator->change(level => 1); @@ -165,7 +163,7 @@ if (my($pos,$len) = $Collator->index("", "abc")) { } ok($match, undef); -############## +##### 13 $Collator->change(level => 1); @@ -201,7 +199,7 @@ if (my($pos, $len) = $Collator->index($str, $sub)) { } ok($match, $ret); -############## +##### 16 $Collator->change(level => 1); @@ -246,7 +244,7 @@ ok($match, undef); $Collator->change(%old_level); -############## +##### 22 my @ret; @@ -318,7 +316,7 @@ ok($ret, undef); $Collator->change(%old_level); -############## +##### 38 $Collator->change(level => 1); @@ -349,6 +347,8 @@ $Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" }); ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> " . "<b>CAMEL</b> horse <b>cAm\0E\0L</b>..."); +##### 47 + # http://www.xray.mpe.mpg.de/mailing-lists/perl-unicode/2010-09/msg00014.html # when the substring includes an ignorable element like a space... @@ -376,6 +376,8 @@ $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; $Collator->gsubst($str, "ca\x{300}melho\x{302}rse", sub { "=$_[0]=" }); ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); +##### 53 + $Collator->change(level => 3); $str = "P\cBe\x{300}\cBrl and PERL."; @@ -400,7 +402,7 @@ ok($str, "P\cBe\x{300}\cBrl and PERL."); $Collator->change(%old_level); -############## +##### 61 $str = "Perl and Camel"; $ret = $Collator->gsubst($str, "\cA\cA\0", "AB"); @@ -422,7 +424,7 @@ $ret = $Collator->gsubst($str, 'PP', "ABC"); ok($ret, 2); ok($str, "ABCABCP"); -############## +##### 69 # Shifted; ignorable after variable @@ -434,3 +436,99 @@ $Collator->change(alternate => 'Non-ignorable'); ($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!"); ok($ret, undef); +##### 71 + +# Now preprocess is defined. + +$Collator->change(preprocess => sub {''}); + +eval { $Collator->index("", "") }; +ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); + +eval { $Collator->index("a", "a") }; +ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); + +eval { $Collator->match("", "") }; +ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); + +eval { $Collator->match("a", "a") }; +ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); + +$Collator->change(preprocess => sub { uc shift }); + +eval { $Collator->index("", "") }; +ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); + +eval { $Collator->index("a", "a") }; +ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); + +eval { $Collator->match("", "") }; +ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); + +eval { $Collator->match("a", "a") }; +ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); + +##### 79 + +eval { require Unicode::Normalize }; +my $has_norm = !$@; + +if ($has_norm) { + # Now preprocess and normalization are defined. + + $Collator->change(normalization => 'NFD'); + + eval { $Collator->index("", "") }; + ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); + + eval { $Collator->index("a", "a") }; + ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); + + eval { $Collator->match("", "") }; + ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); + + eval { $Collator->match("a", "a") }; + ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); +} else { + ok(1) for 1..4; +} + +$Collator->change(preprocess => undef); + +if ($has_norm) { + # Now only normalization is defined. + + eval { $Collator->index("", "") }; + ok($@ && $@ =~ /Don't use Normalization with index\(\)/); + + eval { $Collator->index("a", "a") }; + ok($@ && $@ =~ /Don't use Normalization with index\(\)/); + + eval { $Collator->match("", "") }; + ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/); + + eval { $Collator->match("a", "a") }; + ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/); + + $Collator->change(normalization => undef); +} else { + ok(1) for 1..4; +} + +##### 87 + +# Now preprocess and normalization are undef. + +eval { $Collator->index("", "") }; +ok(!$@); + +eval { $Collator->index("a", "a") }; +ok(!$@); + +eval { $Collator->match("", "") }; +ok(!$@); + +eval { $Collator->match("a", "a") }; +ok(!$@); + +##### 91 diff --git a/cpan/Unicode-Collate/t/overcjk0.t b/cpan/Unicode-Collate/t/overcjk0.t index 588e8a8c02..081f57b158 100644 --- a/cpan/Unicode-Collate/t/overcjk0.t +++ b/cpan/Unicode-Collate/t/overcjk0.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..285\n"; } # 6 + 31 x @Versions +BEGIN { $| = 1; print "1..316\n"; } # 6 + 31 x @Versions my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -62,7 +62,7 @@ ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned. # 2A700..2B734 are CJK UI Ext.C since UCA_Version 20 (Unicode 5.2). # 2B740..2B81D are CJK UI Ext.D since UCA_Version 22 (Unicode 6.0). -my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24); +my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24, 26); for my $v (@Versions) { $ignoreCJK->change(UCA_Version => $v); diff --git a/cpan/Unicode-Collate/t/overcjk1.t b/cpan/Unicode-Collate/t/overcjk1.t index dc3ae8f23b..7bee17658a 100644 --- a/cpan/Unicode-Collate/t/overcjk1.t +++ b/cpan/Unicode-Collate/t/overcjk1.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..155\n"; } # 11 + 16 x @Versions +BEGIN { $| = 1; print "1..171\n"; } # 11 + 16 x @Versions my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -62,7 +62,7 @@ ok($overCJK->lt("a\x{4E03}", "A\x{4E01}")); # 9FC4..9FCB are CJK UI since UCA_Version 20 (Unicode 5.2). # 9FCC is CJK UI since UCA_Version 24 (Unicode 6.1). -my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24); +my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24, 26); for my $v (@Versions) { $overCJK->change(UCA_Version => $v); diff --git a/cpan/Unicode-Collate/t/test.t b/cpan/Unicode-Collate/t/test.t index 440c3a9da1..552440f919 100644 --- a/cpan/Unicode-Collate/t/test.t +++ b/cpan/Unicode-Collate/t/test.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..107\n"; } +BEGIN { $| = 1; print "1..112\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -385,5 +385,36 @@ $_ = 'Foo'; @temp = $c->index("perl5", "LR"); ok($_, 'Foo'); -##### +##### 108..109 + +{ + my $caseless = Unicode::Collate->new( + table => "keys.txt", + normalization => undef, + preprocess => sub { uc shift }, + ); + ok( $Collator->gt("ABC","abc") ); + ok( $caseless->eq("ABC","abc") ); +} +##### 110..112 + +{ + eval { require Unicode::Normalize; }; + if ($@) { + eval { my $n1 = Unicode::Collate->new(table => "keys.txt"); }; + ok($@ =~ /Unicode::Normalize is required/); + + eval { my $n2 = Unicode::Collate->new + (table => "keys.txt", normalization => undef); }; + ok(!$@); + + eval { my $n3 = Unicode::Collate->new + (table => "keys.txt", normalization => 'prenormalized'); }; + ok($@ =~ /Unicode::Normalize is required/); + } else { + ok(1) for 1..3; + } +} + +##### diff --git a/cpan/Unicode-Collate/t/view.t b/cpan/Unicode-Collate/t/view.t index 6f7c0fb7ae..4759533823 100644 --- a/cpan/Unicode-Collate/t/view.t +++ b/cpan/Unicode-Collate/t/view.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..53\n"; } +BEGIN { $| = 1; print "1..89\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -28,15 +28,14 @@ use Unicode::Collate; ok(1); -######################### +##### 1 my $Collator = Unicode::Collate->new( table => 'keys.txt', normalization => undef, + UCA_Version => 24, ); -############## - ok($Collator->viewSortKey(""), "[| | |]"); ok($Collator->viewSortKey("A"), "[0A15 | 0020 | 0008 | FFFF]"); @@ -60,7 +59,7 @@ ok($Collator->viewSortKey("A"), "[0A15 | 0020 | |]"); $Collator->change(level => 1); ok($Collator->viewSortKey("A"), "[0A15 | | |]"); -### Version 8 +##### 10 $Collator->change(level => 4, UCA_Version => 8); @@ -87,7 +86,7 @@ ok($Collator->viewSortKey("A"), "[0A15|0020||]"); $Collator->change(level => 1); ok($Collator->viewSortKey("A"), "[0A15|||]"); -# Version 9 +##### 19 $Collator->change(level => 3, UCA_Version => 9); ok($Collator->viewSortKey("A\x{300}z\x{301}"), @@ -156,7 +155,7 @@ ok($Collator->viewSortKey("?!."), '[| | | 024E 024B 0255]'); $Collator->change(%origVar); -##### +##### 37 # Level 3 weight @@ -197,7 +196,7 @@ ok($Collator->viewSortKey("a\x{3042}"), ok($Collator->viewSortKey("A\x{30A2}"), '[0A15 1921 | 0020 0020 | 0008 0011 | FFFF FFFF]'); -##### +##### 47 our $el = Unicode::Collate->new( entry => <<'ENTRY', @@ -214,6 +213,7 @@ FF2C ; [.0B03.0020.0009.FF2C] # FULLWIDTH LATIN CAPITAL LETTER L; QQK ENTRY table => undef, normalization => undef, + UCA_Version => 24, ); our $el12 = '0B03 0B03 0B03 0B03 0B03 | 0020 0020 0020 0020 0020'; @@ -240,5 +240,30 @@ ok($el->viewSortKey("l\x{FF4C}\x{217C}\x{2113}\x{24DB}"), ok($el->viewSortKey("L\x{FF2C}\x{216C}\x{2112}\x{24C1}"), "[$el12 | 0008 0009 000A 000B 000C | FFFF FFFF FFFF FFFF FFFF]"); -##### +##### 53 + +my @Versions = (9, 11, 14, 16, 18, 20, 22, 24, 26); + +for my $v (@Versions) { + $Collator->change(UCA_Version => $v); + my $app = $v >= 26 ? ' |]' : ']'; + + $Collator->change(variable => 'Shifted', level => 4); + ok($Collator->viewSortKey("1+2"), + '[0A0C 0A0D | 0020 0020 | 0002 0002 | FFFF 039F FFFF'.$app); + + $Collator->change(variable => 'Shift-Trimmed'); + ok($Collator->viewSortKey("1+2"), + '[0A0C 0A0D | 0020 0020 | 0002 0002 | 039F'.$app); + + $Collator->change(variable => 'Non-ignorable', level => 3); + ok($Collator->viewSortKey("1+2"), + '[0A0C 039F 0A0D | 0020 0020 0020 | 0002 0002 0002 |]'); + + $Collator->change(variable => 'Blanked'); + ok($Collator->viewSortKey("1+2"), + '[0A0C 0A0D | 0020 0020 | 0002 0002 |]'); +} + +##### 89 |