summaryrefslogtreecommitdiff
path: root/cpan/Unicode-Collate/t/override.t
diff options
context:
space:
mode:
Diffstat (limited to 'cpan/Unicode-Collate/t/override.t')
-rw-r--r--cpan/Unicode-Collate/t/override.t187
1 files changed, 187 insertions, 0 deletions
diff --git a/cpan/Unicode-Collate/t/override.t b/cpan/Unicode-Collate/t/override.t
new file mode 100644
index 0000000000..3e48e15f29
--- /dev/null
+++ b/cpan/Unicode-Collate/t/override.t
@@ -0,0 +1,187 @@
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Collate " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+use Test;
+BEGIN { plan tests => 76 };
+
+use strict;
+use warnings;
+use Unicode::Collate;
+
+ok(1);
+
+##### 2..6
+
+my $all_undef_8 = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ overrideCJK => undef,
+ overrideHangul => undef,
+ UCA_Version => 8,
+);
+
+# All in the Unicode code point order.
+# No hangul decomposition.
+
+ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
+ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
+ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
+ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
+ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
+
+
+##### 7..11
+
+my $all_undef_9 = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ overrideCJK => undef,
+ overrideHangul => undef,
+ UCA_Version => 9,
+);
+
+# CJK Ideo. < CJK ext A/B < Others.
+# No hangul decomposition.
+
+ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
+ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
+ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
+ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
+ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
+
+##### 12..16
+
+my $ignoreHangul = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ overrideHangul => sub {()},
+ entry => <<'ENTRIES',
+AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL
+ENTRIES
+);
+
+# All Hangul Syllables except U+AE00 are ignored.
+
+ok($ignoreHangul->eq("\x{AC00}", ""));
+ok($ignoreHangul->lt("\x{AC00}", "\0"));
+ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
+ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
+ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
+
+
+my $ignoreCJK = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ overrideCJK => sub {()},
+ entry => <<'ENTRIES',
+5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
+ENTRIES
+);
+
+# All CJK Unified Ideographs except U+5B57 are ignored.
+
+##### 17..21
+ok($ignoreCJK->eq("\x{4E00}", ""));
+ok($ignoreCJK->lt("\x{4E00}", "\0"));
+ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
+ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
+ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
+
+##### 22..29
+ok($ignoreCJK->eq("\x{3400}", ""));
+ok($ignoreCJK->eq("\x{4DB5}", ""));
+ok($ignoreCJK->eq("\x{9FA5}", ""));
+ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
+ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
+ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
+ok($ignoreCJK->eq("\x{20000}", ""));
+ok($ignoreCJK->eq("\x{2A6D6}", ""));
+
+##### 30..37
+$ignoreCJK->change(UCA_Version => 9);
+ok($ignoreCJK->eq("\x{3400}", ""));
+ok($ignoreCJK->eq("\x{4DB5}", ""));
+ok($ignoreCJK->eq("\x{9FA5}", ""));
+ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
+ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
+ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
+ok($ignoreCJK->eq("\x{20000}", ""));
+ok($ignoreCJK->eq("\x{2A6D6}", ""));
+
+##### 38..45
+$ignoreCJK->change(UCA_Version => 8);
+ok($ignoreCJK->eq("\x{3400}", ""));
+ok($ignoreCJK->eq("\x{4DB5}", ""));
+ok($ignoreCJK->eq("\x{9FA5}", ""));
+ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
+ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
+ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
+ok($ignoreCJK->eq("\x{20000}", ""));
+ok($ignoreCJK->eq("\x{2A6D6}", ""));
+
+##### 46..53
+$ignoreCJK->change(UCA_Version => 14);
+ok($ignoreCJK->eq("\x{3400}", ""));
+ok($ignoreCJK->eq("\x{4DB5}", ""));
+ok($ignoreCJK->eq("\x{9FA5}", ""));
+ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
+ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
+ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
+ok($ignoreCJK->eq("\x{20000}", ""));
+ok($ignoreCJK->eq("\x{2A6D6}", ""));
+
+##### 54..76
+my $overCJK = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ entry => <<'ENTRIES',
+0061 ; [.0101.0020.0002.0061] # latin a
+0041 ; [.0101.0020.0008.0041] # LATIN A
+4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
+ENTRIES
+ overrideCJK => sub {
+ my $u = 0xFFFF - $_[0]; # reversed
+ [$u, 0x20, 0x2, $u];
+ },
+);
+
+ok($overCJK->lt("a", "A")); # diff. at level 3.
+ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2.
+ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
+ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
+ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
+
+ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
+ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
+ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
+ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
+ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
+ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
+
+$overCJK->change(UCA_Version => 9);
+
+ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
+ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
+ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}"));
+ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}"));
+ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
+ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
+
+$overCJK->change(UCA_Version => 14);
+
+ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
+ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
+ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
+ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
+ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
+ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
+