summaryrefslogtreecommitdiff
path: root/cpan/Unicode-Collate/t/override.t
diff options
context:
space:
mode:
Diffstat (limited to 'cpan/Unicode-Collate/t/override.t')
-rw-r--r--cpan/Unicode-Collate/t/override.t210
1 files changed, 49 insertions, 161 deletions
diff --git a/cpan/Unicode-Collate/t/override.t b/cpan/Unicode-Collate/t/override.t
index 0149f16f1f..032db15bb1 100644
--- a/cpan/Unicode-Collate/t/override.t
+++ b/cpan/Unicode-Collate/t/override.t
@@ -11,7 +11,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 137 };
+BEGIN { plan tests => 35 };
use strict;
use warnings;
@@ -77,183 +77,71 @@ ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
+##### 17..21
-my $ignoreCJK = Unicode::Collate->new(
+my $undefHangul = Unicode::Collate->new(
table => undef,
normalization => undef,
- overrideCJK => sub {()},
- entry => <<'ENTRIES',
-5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
-ENTRIES
+ overrideHangul => sub {
+ my $u = shift;
+ return $u == 0xAE00 ? 0x100 : undef;
+ }
);
-# All CJK Unified Ideographs except U+5B57 are ignored.
-
-##### 17..21
-ok($ignoreCJK->eq("\x{4E00}", ""));
-ok($ignoreCJK->lt("\x{4E00}", "\0"));
-ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
-ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
-ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
+# All Hangul Syllables except U+AE00 are undefined.
-##### 22..35
-ok($ignoreCJK->eq("\x{3400}", ""));
-ok($ignoreCJK->eq("\x{4DB5}", ""));
-ok($ignoreCJK->eq("\x{9FA5}", ""));
-ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->eq("\x{9FBC}", "")); # UI since Unicode 5.1.0
-ok($ignoreCJK->eq("\x{9FC3}", "")); # UI since Unicode 5.1.0
-ok($ignoreCJK->eq("\x{9FC4}", "")); # UI since Unicode 5.2.0
-ok($ignoreCJK->eq("\x{9FCB}", "")); # UI since Unicode 5.2.0
-ok($ignoreCJK->gt("\x{9FCC}", "Perl"));
-ok($ignoreCJK->eq("\x{20000}", ""));
-ok($ignoreCJK->eq("\x{2A6D6}", ""));
-ok($ignoreCJK->eq("\x{2A700}", "")); # ExtC since Unicode 5.2.0
-ok($ignoreCJK->eq("\x{2B734}", "")); # ExtC since Unicode 5.2.0
+ok($undefHangul->lt("\x{AE00}", "r"));
+ok($undefHangul->gt("\x{AC00}", "r"));
+ok($undefHangul->gt("\x{AC00}", "\x{1100}\x{1161}"));
+ok($undefHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
+ok($undefHangul->lt("\x{AC00}", "\x{B000}"));
-##### 36..45
-$ignoreCJK->change(UCA_Version => 8);
-ok($ignoreCJK->eq("\x{3400}", ""));
-ok($ignoreCJK->eq("\x{4DB5}", ""));
-ok($ignoreCJK->eq("\x{9FA5}", ""));
-ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
-ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
-ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC3}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC4}", "Perl"));
-ok($ignoreCJK->eq("\x{20000}", ""));
-ok($ignoreCJK->eq("\x{2A6D6}", ""));
+##### 22..25
-##### 46..55
-$ignoreCJK->change(UCA_Version => 9);
-ok($ignoreCJK->eq("\x{3400}", ""));
-ok($ignoreCJK->eq("\x{4DB5}", ""));
-ok($ignoreCJK->eq("\x{9FA5}", ""));
-ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
-ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
-ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC3}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC4}", "Perl"));
-ok($ignoreCJK->eq("\x{20000}", ""));
-ok($ignoreCJK->eq("\x{2A6D6}", ""));
+my $undefCJK = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ overrideCJK => sub {
+ my $u = shift;
+ return $u == 0x4E00 ? 0x100 : undef;
+ }
+);
-##### 56..67
-$ignoreCJK->change(UCA_Version => 14);
-ok($ignoreCJK->eq("\x{3400}", ""));
-ok($ignoreCJK->eq("\x{4DB5}", ""));
-ok($ignoreCJK->eq("\x{9FA5}", ""));
-ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC3}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC4}", "Perl"));
-ok($ignoreCJK->eq("\x{20000}", ""));
-ok($ignoreCJK->eq("\x{2A6D6}", ""));
-ok($ignoreCJK->gt("\x{2A700}", "Perl"));
-ok($ignoreCJK->gt("\x{2B734}", "Perl"));
+# All CJK Ideographs except U+4E00 are undefined.
-##### 68..81
-$ignoreCJK->change(UCA_Version => 18);
-ok($ignoreCJK->eq("\x{3400}", ""));
-ok($ignoreCJK->eq("\x{4DB5}", ""));
-ok($ignoreCJK->eq("\x{9FA5}", ""));
-ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->eq("\x{9FBC}", "")); # UI since Unicode 5.1.0
-ok($ignoreCJK->eq("\x{9FC3}", "")); # UI since Unicode 5.1.0
-ok($ignoreCJK->gt("\x{9FC4}", "Perl"));
-ok($ignoreCJK->gt("\x{9FCB}", "Perl"));
-ok($ignoreCJK->gt("\x{9FCC}", "Perl"));
-ok($ignoreCJK->eq("\x{20000}", ""));
-ok($ignoreCJK->eq("\x{2A6D6}", ""));
-ok($ignoreCJK->gt("\x{2A700}", "Perl"));
-ok($ignoreCJK->gt("\x{2B734}", "Perl"));
+ok($undefCJK->lt("\x{4E00}", "r"));
+ok($undefCJK->lt("\x{5000}", "r")); # still CJK < unassigned
+ok($undefCJK->lt("Pe\x{4E00}rl", "Perl")); # 'r' is unassigned.
+ok($undefCJK->lt("\x{5000}", "\x{6000}"));
-#####
+##### 26..30
-my $overCJK = Unicode::Collate->new(
+my $cpHangul = Unicode::Collate->new(
table => undef,
normalization => undef,
- entry => <<'ENTRIES',
-0061 ; [.0101.0020.0002.0061] # latin a
-0041 ; [.0101.0020.0008.0041] # LATIN A
-4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
-ENTRIES
- overrideCJK => sub {
- my $u = 0xFFFF - $_[0]; # reversed
- [$u, 0x20, 0x2, $u];
- },
+ overrideHangul => sub { shift }
);
-##### 82..86
-ok($overCJK->lt("a", "A")); # diff. at level 3.
-ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2.
-ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
-ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
-ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
+ok($cpHangul->lt("\x{AC00}", "\x{AC01}"));
+ok($cpHangul->lt("\x{AC01}", "\x{D7A3}"));
+ok($cpHangul->lt("\x{D7A3}", "r")); # 'r' is unassigned.
+ok($cpHangul->lt("r", "\x{D7A4}"));
+ok($cpHangul->lt("\x{D7A3}", "\x{4E00}"));
-##### 87..97
-ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
-ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
-ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FBB}", "A\x{9FBC}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBC}", "A\x{9FBF}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBF}", "A\x{9FC3}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FC3}", "A\x{9FC4}")); # UI since Unicode 5.2.0
-ok($overCJK->gt("a\x{9FC4}", "A\x{9FCB}")); # UI since Unicode 5.2.0
-ok($overCJK->lt("a\x{9FCB}", "A\x{9FCC}"));
-ok($overCJK->lt("a\x{9FC4}", "A\x{9FCF}"));
+##### 31..35
-##### 98..106
-$overCJK->change(UCA_Version => 9);
-ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
-ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
-ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}"));
-ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}"));
-ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
-ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
-ok($overCJK->lt("a\x{9FBF}", "A\x{9FC3}"));
-ok($overCJK->lt("a\x{9FC3}", "A\x{9FC4}"));
-ok($overCJK->lt("a\x{9FC4}", "A\x{9FCF}"));
-
-##### 107..115
-$overCJK->change(UCA_Version => 14);
-ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
-ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
-ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); # UI since Unicode 4.1.0
-ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
-ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
-ok($overCJK->lt("a\x{9FBF}", "A\x{9FC3}"));
-ok($overCJK->lt("a\x{9FC3}", "A\x{9FC4}"));
-ok($overCJK->lt("a\x{9FC4}", "A\x{9FCF}"));
+my $arrayHangul = Unicode::Collate->new(
+ table => undef,
+ normalization => undef,
+ overrideHangul => sub {
+ my $u = shift;
+ return [$u, 0x20, 0x2, $u];
+ }
+);
-##### 116..126
-$overCJK->change(UCA_Version => 18);
-ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
-ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
-ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FBB}", "A\x{9FBC}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBC}", "A\x{9FBF}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBF}", "A\x{9FC3}")); # UI since Unicode 5.1.0
-ok($overCJK->lt("a\x{9FC3}", "A\x{9FC4}"));
-ok($overCJK->lt("a\x{9FC3}", "A\x{9FCB}"));
-ok($overCJK->lt("a\x{9FC3}", "A\x{9FCC}"));
-ok($overCJK->lt("a\x{9FC4}", "A\x{9FCF}"));
+ok($arrayHangul->lt("\x{AC00}", "\x{AC01}"));
+ok($arrayHangul->lt("\x{AC01}", "\x{D7A3}"));
+ok($arrayHangul->lt("\x{D7A3}", "r")); # 'r' is unassigned.
+ok($arrayHangul->lt("r", "\x{D7A4}"));
+ok($arrayHangul->lt("\x{D7A3}", "\x{4E00}"));
-##### 127..137
-$overCJK->change(UCA_Version => 20);
-ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
-ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
-ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FBB}", "A\x{9FBC}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBC}", "A\x{9FBF}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBF}", "A\x{9FC3}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FC3}", "A\x{9FC4}")); # UI since Unicode 5.2.0
-ok($overCJK->gt("a\x{9FC4}", "A\x{9FCB}")); # UI since Unicode 5.2.0
-ok($overCJK->lt("a\x{9FCB}", "A\x{9FCC}"));
-ok($overCJK->lt("a\x{9FC4}", "A\x{9FCF}"));