summaryrefslogtreecommitdiff
path: root/lib/Unicode/Collate
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2003-12-27 18:28:37 +0000
committerNicholas Clark <nick@ccl4.org>2003-12-27 18:28:37 +0000
commitabd1ec547ebff81b71080b2060165663ba730934 (patch)
treedd6d5f6b63ebae1de557d61b55b277f7467458a1 /lib/Unicode/Collate
parentaaa799f9f94038e03b6fe1069e18029f5c7fdb32 (diff)
downloadperl-abd1ec547ebff81b71080b2060165663ba730934.tar.gz
Assimilate Unicode::Collate 0.33
p4raw-id: //depot/perl@21976
Diffstat (limited to 'lib/Unicode/Collate')
-rw-r--r--lib/Unicode/Collate/Changes17
-rw-r--r--lib/Unicode/Collate/README3
-rw-r--r--lib/Unicode/Collate/t/altern.t108
-rw-r--r--lib/Unicode/Collate/t/contract.t2
-rw-r--r--lib/Unicode/Collate/t/illegal.t113
-rw-r--r--lib/Unicode/Collate/t/test.t223
-rw-r--r--lib/Unicode/Collate/t/version.t2
-rw-r--r--lib/Unicode/Collate/t/view.t239
8 files changed, 536 insertions, 171 deletions
diff --git a/lib/Unicode/Collate/Changes b/lib/Unicode/Collate/Changes
index df60b97c7f..a59ffa0e55 100644
--- a/lib/Unicode/Collate/Changes
+++ b/lib/Unicode/Collate/Changes
@@ -1,5 +1,16 @@
Revision history for Perl module Unicode::Collate.
+0.33 Sat Dec 13 14:07:27 2003
+ - documentation improvement: in "entry", "overrideHangul", etc.
+
+0.32 Wed Dec 3 23:38:18 2003
+ - A matching part from index(), match() etc. will include illegal
+ code points (as well as ignorable characters) following a grapheme.
+ - Contraction with illegal code point will be invalid.
+ - Added some tests in illegal.t; added view.t.
+ - Some tests are separated from test.t into altern.t and rearrang.t.
+ - modified XSUB internals.
+
0.31 Sun Nov 16 15:40:15 2003
- Illegal code points (surrogate and noncharacter; they are definitely
ignorable) will be distinguished from NULL ("\0");
@@ -7,8 +18,8 @@ Revision history for Perl module Unicode::Collate.
(Perl 5.7.3 or before)). If perl 5.6.X is used, XSUB may help it
in place of broken CORE::unpack('U*') in older perl.
- added illegal.t and illegalp.t.
- - added XSUB edition (EXPERIMENTAL) where some functions are implemented
- in XSUB (Pure Perl edition is also supported.)
+ - added XSUB (EXPERIMENTAL!) where some functions are implemented
+ in XSUB. Pure Perl is also supported.
0.30 Mon Oct 13 21:26:37 2003
- fix: Completely ignorable in table should be able to be overrided
@@ -19,7 +30,7 @@ Revision history for Perl module Unicode::Collate.
- some doc fixes
0.29 Mon Oct 13 12:18:23 2003
- - now UCA Version 11.
+ - now UCA Version 11 (but no functionality is different from Version 9).
- supported hangul_terminator.
- fix: Base_Unicode_Version falsely returns Perl's Unicode version.
C4 in UTS #10 requires UTS's Unicode version.
diff --git a/lib/Unicode/Collate/README b/lib/Unicode/Collate/README
index 2fc4e5fcc6..376a0c2c13 100644
--- a/lib/Unicode/Collate/README
+++ b/lib/Unicode/Collate/README
@@ -1,4 +1,4 @@
-Unicode/Collate version 0.31
+Unicode/Collate version 0.33
===============================
NAME
@@ -47,6 +47,7 @@ To install this module type the following:
make test
make install
+(!! XSUB for Unicode::Collate is an EXPERIMENTAL support !!)
If you have a C compiler and want to use XSUB edition,
type the following (!! "enableXS" must run before "Makefile.PL" !!):
diff --git a/lib/Unicode/Collate/t/altern.t b/lib/Unicode/Collate/t/altern.t
new file mode 100644
index 0000000000..d48e168b69
--- /dev/null
+++ b/lib/Unicode/Collate/t/altern.t
@@ -0,0 +1,108 @@
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Collate " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+use Test;
+BEGIN { plan tests => 37 };
+
+use strict;
+use warnings;
+use Unicode::Collate;
+
+ok(1);
+
+#########################
+
+sub _pack_U { Unicode::Collate::pack_U(@_) }
+sub _unpack_U { Unicode::Collate::unpack_U(@_) }
+
+my $A_acute = _pack_U(0xC1);
+my $acute = _pack_U(0x0301);
+
+my $Collator = Unicode::Collate->new(
+ table => 'keys.txt',
+ normalization => undef,
+);
+
+my %origAlt = $Collator->change(alternate => 'Blanked');
+
+ok($Collator->lt("death", "de luge"));
+ok($Collator->lt("de luge", "de-luge"));
+ok($Collator->lt("de-luge", "deluge"));
+ok($Collator->lt("deluge", "de\x{2010}luge"));
+ok($Collator->lt("deluge", "de Luge"));
+
+$Collator->change(alternate => 'Non-ignorable');
+
+ok($Collator->lt("de luge", "de Luge"));
+ok($Collator->lt("de Luge", "de-luge"));
+ok($Collator->lt("de-Luge", "de\x{2010}luge"));
+ok($Collator->lt("de-luge", "death"));
+ok($Collator->lt("death", "deluge"));
+
+$Collator->change(alternate => 'Shifted');
+
+ok($Collator->lt("death", "de luge"));
+ok($Collator->lt("de luge", "de-luge"));
+ok($Collator->lt("de-luge", "deluge"));
+ok($Collator->lt("deluge", "de Luge"));
+ok($Collator->lt("de Luge", "deLuge"));
+
+$Collator->change(alternate => 'Shift-Trimmed');
+
+ok($Collator->lt("death", "deluge"));
+ok($Collator->lt("deluge", "de luge"));
+ok($Collator->lt("de luge", "de-luge"));
+ok($Collator->lt("de-luge", "deLuge"));
+ok($Collator->lt("deLuge", "de Luge"));
+
+$Collator->change(%origAlt);
+
+ok($Collator->{alternate}, 'shifted');
+
+##############
+
+# ignorable after alternate
+
+# Shifted;
+ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
+ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
+ok($Collator->eq("?\x{300}", "?"));
+ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
+
+$Collator->change(level => 3);
+ok($Collator->eq("\cA", "?"));
+
+$Collator->change(alternate => 'blanked', level => 4);
+ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
+ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
+ok($Collator->eq("?\x{300}", "?"));
+ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
+
+$Collator->change(level => 3);
+ok($Collator->eq("\cA", "?"));
+
+$Collator->change(alternate => 'Non-ignorable', level => 4);
+
+ok($Collator->lt("?\x{300}", "?!"));
+ok($Collator->gt("?\x{300}A$acute", "?$A_acute"));
+ok($Collator->gt("?\x{300}", "?"));
+ok($Collator->gt("?\x{344}", "?"));
+
+$Collator->change(level => 3);
+ok($Collator->lt("\cA", "?"));
+
+$Collator->change(alternate => 'Shifted', level => 4);
+
diff --git a/lib/Unicode/Collate/t/contract.t b/lib/Unicode/Collate/t/contract.t
index 1c6658d572..18a0cfbdc9 100644
--- a/lib/Unicode/Collate/t/contract.t
+++ b/lib/Unicode/Collate/t/contract.t
@@ -72,7 +72,7 @@ $sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{043A}\x{0334}\x{0301}");
$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{043A}\x{0301}\x{0334}");
eval { require Unicode::Normalize };
-if (!$@ && !$IsEBCDIC) {
+if (!$@) {
my $kjeNFD = Unicode::Collate->new(
level => 1,
table => undef,
diff --git a/lib/Unicode/Collate/t/illegal.t b/lib/Unicode/Collate/t/illegal.t
index b9961b6981..803e2f6739 100644
--- a/lib/Unicode/Collate/t/illegal.t
+++ b/lib/Unicode/Collate/t/illegal.t
@@ -29,7 +29,7 @@ BEGIN {
}
}
-BEGIN { plan tests => 22 };
+BEGIN { plan tests => 40 };
ok(1);
@@ -45,21 +45,25 @@ my $illeg = Unicode::Collate->new(
entry => <<'ENTRIES',
0000 ; [.0020.0000.0000.0000] # [0000] NULL
0001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING
-FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE>
-FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF>
-D800 ; [.0024.0000.0000.D800] # <surrogate-D800>
-DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF>
-FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0>
-FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF>
+FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid)
+FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid)
+D800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid)
+DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid)
+FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid)
+FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid)
0002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT
-10FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF>
-110000; [.0041.0000.0000.110000] # <out-of-range 110000>
+10FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid)
+110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid)
+0041 ; [.1000.0020.0008.0041] # latin A
+0041 0000 ; [.1100.0020.0008.0041] # latin A + NULL
+0041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid)
ENTRIES
level => 1,
table => undef,
normalization => undef,
);
+# 2..12
ok($illeg->lt("", "\x00"));
ok($illeg->lt("", "\x01"));
ok($illeg->eq("", "\x{FFFE}"));
@@ -72,6 +76,7 @@ ok($illeg->lt("", "\x02"));
ok($illeg->eq("", "\x{10FFFF}"));
ok($illeg->eq("", "\x{110000}"));
+# 13..22
ok($illeg->lt("\x00", "\x01"));
ok($illeg->lt("\x01", "\x02"));
ok($illeg->ne("\0", "\x{D800}"));
@@ -83,3 +88,93 @@ ok($illeg->ne("\0", "\x{FFFF}"));
ok($illeg->ne("\0", "\x{10FFFF}"));
ok($illeg->ne("\0", "\x{110000}"));
+# 23..26
+ok($illeg->eq("A", "A\x{FFFF}"));
+ok($illeg->gt("A\0", "A\x{FFFF}"));
+ok($illeg->lt("A", "A\0"));
+ok($illeg->lt("AA", "A\0"));
+
+##################
+
+my($match, $str, $sub, $ret);
+
+my $Collator = Unicode::Collate->new(
+ table => 'keys.txt',
+ level => 1,
+ normalization => undef,
+);
+
+$sub = "pe";
+
+
+$str = "Pe\x{300}\x{301}rl";
+$ret = "Pe\x{300}\x{301}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{300}\0\0\x{301}rl";
+$ret = "Pe\x{300}\0\0\x{301}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{DA00}\x{301}\x{DFFF}rl";
+$ret = "Pe\x{DA00}\x{301}\x{DFFF}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{FFFF}\x{301}rl";
+$ret = "Pe\x{FFFF}\x{301}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{110000}\x{301}rl";
+$ret = "Pe\x{110000}\x{301}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{300}\x{d801}\x{301}rl";
+$ret = "Pe\x{300}\x{d801}\x{301}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{300}\x{ffff}\x{301}rl";
+$ret = "Pe\x{300}\x{ffff}\x{301}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{300}\x{110000}\x{301}rl";
+$ret = "Pe\x{300}\x{110000}\x{301}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{D9ab}\x{DFFF}rl";
+$ret = "Pe\x{D9ab}\x{DFFF}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{FFFF}rl";
+$ret = "Pe\x{FFFF}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{110000}rl";
+$ret = "Pe\x{110000}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{300}\x{D800}\x{DFFF}rl";
+$ret = "Pe\x{300}\x{D800}\x{DFFF}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{300}\x{FFFF}rl";
+$ret = "Pe\x{300}\x{FFFF}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+$str = "Pe\x{300}\x{110000}rl";
+$ret = "Pe\x{300}\x{110000}";
+($match) = $Collator->match($str, $sub);
+ok($match, $ret);
+
+
diff --git a/lib/Unicode/Collate/t/test.t b/lib/Unicode/Collate/t/test.t
index 8a7eb8b59f..53fa7ca879 100644
--- a/lib/Unicode/Collate/t/test.t
+++ b/lib/Unicode/Collate/t/test.t
@@ -15,14 +15,12 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 203 };
+BEGIN { plan tests => 160 };
use strict;
use warnings;
use Unicode::Collate;
-our $IsEBCDIC = ord("A") != 0x41;
-
ok(1);
##### 2..6
@@ -73,11 +71,10 @@ ok($Collator->lt("A", $A_acute));
ok($Collator->lt("A", $a_acute));
ok($Collator->lt($a_acute, $A_acute));
-##### 17..20
+##### 18..20
eval { require Unicode::Normalize };
-
-if (!$@ && !$IsEBCDIC) {
+if (!$@) {
my $NFD = Unicode::Collate->new(
table => 'keys.txt',
level => 1,
@@ -102,7 +99,7 @@ else {
ok(1);
}
-##### 21..30
+##### 21..34
my $trad = Unicode::Collate->new(
table => 'keys.txt',
@@ -127,10 +124,20 @@ ok(
join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ),
join(':', qw/ aca acha acia acka ada / ),
);
+
ok($trad->eq("ocho", "oc\cAho")); # UCA v9
ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9
-ok($trad->eq("-", "")); # also UCA v8
-ok($trad->lt("oc-ho", "ocho")); # also UCA v8
+ok($trad->eq("-", ""));
+ok($trad->gt("ocho", "oc-ho"));
+
+$trad->change(UCA_Version => 8);
+
+ok($trad->gt("ocho", "oc\cAho"));
+ok($trad->gt("ocho", "oc\0\cA\0\cBho"));
+ok($trad->eq("-", ""));
+ok($trad->gt("ocho", "oc-ho"));
+
+$trad->change(UCA_Version => 9);
my $hiragana = "\x{3042}\x{3044}";
my $katakana = "\x{30A2}\x{30A4}";
@@ -141,7 +148,7 @@ ok($trad->eq("", $katakana));
ok($trad->eq($hiragana, $katakana));
ok($trad->eq($katakana, $hiragana));
-##### 31..37
+##### 35..41
$Collator->change(level => 2);
@@ -154,7 +161,7 @@ ok( $Collator->cmp($hiragana, $katakana), 0);
ok( $Collator->eq($hiragana, $katakana) );
ok( $Collator->ge($hiragana, $katakana) );
-##### 38..43
+##### 42..47
# hangul
ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") );
@@ -164,7 +171,7 @@ ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") );
ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") );
ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana
-##### 44..52
+##### 48..56
$Collator->change(%old_level, katakana_before_hiragana => 1);
@@ -179,7 +186,7 @@ ok( $Collator->ne($hiragana, $katakana) );
ok( $Collator->gt($hiragana, $katakana) );
ok( $Collator->ge($hiragana, $katakana) );
-##### 53..58
+##### 57..62
$Collator->change(upper_before_lower => 1);
@@ -190,7 +197,7 @@ ok( $Collator->cmp($hiragana, $katakana), 1);
ok( $Collator->ge($hiragana, $katakana), 1);
ok( $Collator->gt($hiragana, $katakana), 1);
-##### 59..64
+##### 63..68
$Collator->change(katakana_before_hiragana => 0);
@@ -204,7 +211,7 @@ ok( $Collator->le("abc", "ABC") );
ok( $Collator->cmp($hiragana, $katakana), -1);
ok( $Collator->lt($hiragana, $katakana) );
-##### 65..66
+##### 69..70
my $ignoreAE = Unicode::Collate->new(
table => 'keys.txt',
@@ -215,7 +222,7 @@ my $ignoreAE = Unicode::Collate->new(
ok($ignoreAE->eq("element","lament"));
ok($ignoreAE->eq("Perl","ePrl"));
-##### 67
+##### 71
my $onlyABC = Unicode::Collate->new(
table => undef,
@@ -235,7 +242,7 @@ ok(
join(':', qw/ A aB Ab ABA BAC cAc cc / ),
);
-##### 68..71
+##### 72..75
my $undefAE = Unicode::Collate->new(
table => 'keys.txt',
@@ -248,7 +255,7 @@ ok($Collator->lt("edge","fog"));
ok($undefAE ->gt("lake","like"));
ok($Collator->lt("lake","like"));
-##### 72..81
+##### 76..85
# Table is undefined, then no entry is defined.
@@ -282,7 +289,7 @@ ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
# U+4E8C: Ideograph "TWO"
-##### 82..86
+##### 86..90
my $few_entries = Unicode::Collate->new(
entry => <<'ENTRIES',
@@ -313,7 +320,7 @@ ok($few_entries->lt("\x{AE30}", "\x{AC00}"));
ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));
-##### 87..91
+##### 91..95
my $all_undef_8 = Unicode::Collate->new(
table => undef,
@@ -332,7 +339,7 @@ ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
-##### 92..96
+##### 96..100
my $all_undef_9 = Unicode::Collate->new(
table => undef,
@@ -351,7 +358,7 @@ ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
-##### 97..101
+##### 101..105
my $ignoreCJK = Unicode::Collate->new(
table => undef,
@@ -370,7 +377,7 @@ ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
-##### 102..106
+##### 106..110
my $ignoreHangul = Unicode::Collate->new(
table => undef,
@@ -389,45 +396,7 @@ ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
-##### 107..127
-
-my %origAlter = $Collator->change(alternate => 'Blanked');
-
-ok($Collator->lt("death", "de luge"));
-ok($Collator->lt("de luge", "de-luge"));
-ok($Collator->lt("de-luge", "deluge"));
-ok($Collator->lt("deluge", "de\x{2010}luge"));
-ok($Collator->lt("deluge", "de Luge"));
-
-$Collator->change(alternate => 'Non-ignorable');
-
-ok($Collator->lt("de luge", "de Luge"));
-ok($Collator->lt("de Luge", "de-luge"));
-ok($Collator->lt("de-Luge", "de\x{2010}luge"));
-ok($Collator->lt("de-luge", "death"));
-ok($Collator->lt("death", "deluge"));
-
-$Collator->change(alternate => 'Shifted');
-
-ok($Collator->lt("death", "de luge"));
-ok($Collator->lt("de luge", "de-luge"));
-ok($Collator->lt("de-luge", "deluge"));
-ok($Collator->lt("deluge", "de Luge"));
-ok($Collator->lt("de Luge", "deLuge"));
-
-$Collator->change(alternate => 'Shift-Trimmed');
-
-ok($Collator->lt("death", "deluge"));
-ok($Collator->lt("deluge", "de luge"));
-ok($Collator->lt("de luge", "de-luge"));
-ok($Collator->lt("de-luge", "deLuge"));
-ok($Collator->lt("deLuge", "de Luge"));
-
-$Collator->change(%origAlter);
-
-ok($Collator->{alternate}, 'shifted');
-
-##### 128..132
+##### 111..115
my $overCJK = Unicode::Collate->new(
table => undef,
@@ -449,62 +418,7 @@ ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
-##### 133..144
-
-# rearrange : 0x0E40..0x0E44, 0x0EC0..0x0EC4 (default)
-
-my %old_rearrange = $Collator->change(rearrange => undef);
-
-ok($Collator->gt("\x{0E41}A", "\x{0E40}B"));
-ok($Collator->gt("A\x{0E41}A", "A\x{0E40}B"));
-
-$Collator->change(rearrange => [ 0x61 ]);
- # U+0061, 'a': This is a Unicode value, never a native value.
-
-ok($Collator->gt("ab", "AB")); # as 'ba' > 'AB'
-
-$Collator->change(%old_rearrange);
-
-ok($Collator->lt("ab", "AB"));
-ok($Collator->lt("\x{0E40}", "\x{0E41}"));
-ok($Collator->lt("\x{0E40}A", "\x{0E41}B"));
-ok($Collator->lt("\x{0E41}A", "\x{0E40}B"));
-ok($Collator->lt("A\x{0E41}A", "A\x{0E40}B"));
-
-ok($all_undef_8->lt("\x{0E40}", "\x{0E41}"));
-ok($all_undef_8->lt("\x{0E40}A", "\x{0E41}B"));
-ok($all_undef_8->lt("\x{0E41}A", "\x{0E40}B"));
-ok($all_undef_8->lt("A\x{0E41}A", "A\x{0E40}B"));
-
-##### 145..149
-
-my $no_rearrange = Unicode::Collate->new(
- table => undef,
- normalization => undef,
- rearrange => [],
-);
-
-ok($no_rearrange->lt("A", "B"));
-ok($no_rearrange->lt("\x{0E40}", "\x{0E41}"));
-ok($no_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
-ok($no_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
-ok($no_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));
-
-##### 150..154
-
-my $undef_rearrange = Unicode::Collate->new(
- table => undef,
- normalization => undef,
- rearrange => undef,
-);
-
-ok($undef_rearrange->lt("A", "B"));
-ok($undef_rearrange->lt("\x{0E40}", "\x{0E41}"));
-ok($undef_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
-ok($undef_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
-ok($undef_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));
-
-##### 155..159
+##### 116..120
my $dropArticles = Unicode::Collate->new(
table => "keys.txt",
@@ -522,7 +436,7 @@ ok($dropArticles->lt("the pen", "a pencil"));
ok($Collator->lt("Perl", "The Perl"));
ok($Collator->gt("the pen", "a pencil"));
-##### 160..161
+##### 121..122
my $backLevel1 = Unicode::Collate->new(
table => undef,
@@ -535,7 +449,7 @@ my $backLevel1 = Unicode::Collate->new(
ok($backLevel1->gt("AB", "BA"));
ok($backLevel1->gt("\x{3042}\x{3044}", "\x{3044}\x{3042}"));
-##### 162..169
+##### 123..130
my $backLevel2 = Unicode::Collate->new(
table => "keys.txt",
@@ -557,41 +471,7 @@ ok($backLevel2->lt("\x{4E03}", $katakana));
ok($Collator ->gt("\x{4E00}", $hiragana));
ok($Collator ->gt("\x{4E03}", $katakana));
-##### 170..184
-
-# ignorable after variable
-
-# Shifted;
-ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
-ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
-ok($Collator->eq("?\x{300}", "?"));
-ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
-
-$Collator->change(level => 3);
-ok($Collator->eq("\cA", "?"));
-
-$Collator->change(alternate => 'blanked', level => 4);
-ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
-ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
-ok($Collator->eq("?\x{300}", "?"));
-ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
-
-$Collator->change(level => 3);
-ok($Collator->eq("\cA", "?"));
-
-$Collator->change(alternate => 'Non-ignorable', level => 4);
-
-ok($Collator->lt("?\x{300}", "?!"));
-ok($Collator->gt("?\x{300}A$acute", "?$A_acute"));
-ok($Collator->gt("?\x{300}", "?"));
-ok($Collator->gt("?\x{344}", "?"));
-
-$Collator->change(level => 3);
-ok($Collator->lt("\cA", "?"));
-
-$Collator->change(alternate => 'Shifted', level => 4);
-
-##### 185..196
+##### 131..142
# According to Conformance Test,
# a L3-ignorable is treated as a completely ignorable.
@@ -631,7 +511,7 @@ ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}"));
ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}"));
ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}"));
-##### 197..203
+##### 143..149
my $O_str = Unicode::Collate->new(
table => "keys.txt",
@@ -665,4 +545,35 @@ ok($Collator->eq("\x{200B}", "\0"));
ok($O_str ->gt("\x{200B}", "\0"));
ok($O_str ->gt("\x{200B}", "A"));
+##### 150..159
+
+my %origVer = $Collator->change(UCA_Version => 8);
+
+$Collator->change(level => 3);
+
+ok($Collator->gt("!\x{300}", ""));
+ok($Collator->gt("!\x{300}", "!"));
+ok($Collator->eq("!\x{300}", "\x{300}"));
+
+$Collator->change(level => 2);
+
+ok($Collator->eq("!\x{300}", "\x{300}"));
+
+$Collator->change(level => 4);
+
+ok($Collator->gt("!\x{300}", "!"));
+ok($Collator->lt("!\x{300}", "\x{300}"));
+
+$Collator->change(%origVer, level => 3);
+
+ok($Collator->eq("!\x{300}", ""));
+ok($Collator->eq("!\x{300}", "!"));
+ok($Collator->lt("!\x{300}", "\x{300}"));
+
+$Collator->change(level => 4);
+
+ok($Collator->gt("!\x{300}", ""));
+ok($Collator->eq("!\x{300}", "!"));
+
#####
+
diff --git a/lib/Unicode/Collate/t/version.t b/lib/Unicode/Collate/t/version.t
index 0a6d448e1e..fec144c9d7 100644
--- a/lib/Unicode/Collate/t/version.t
+++ b/lib/Unicode/Collate/t/version.t
@@ -25,7 +25,7 @@ ok(1);
#########################
-# Fix me when UCA and/or key.txt is upgraded.
+# Fix me when UCA and/or keys.txt is upgraded.
my $UCA_Version = "11";
my $Base_Unicode_Version = "4.0";
my $Key_Version = "3.1.1";
diff --git a/lib/Unicode/Collate/t/view.t b/lib/Unicode/Collate/t/view.t
new file mode 100644
index 0000000000..578d4843e5
--- /dev/null
+++ b/lib/Unicode/Collate/t/view.t
@@ -0,0 +1,239 @@
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Collate " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+use Test;
+BEGIN { plan tests => 53 };
+
+use strict;
+use warnings;
+use Unicode::Collate;
+
+#########################
+
+ok(1);
+
+my $Collator = Unicode::Collate->new(
+ table => 'keys.txt',
+ normalization => undef,
+);
+
+##############
+
+ok($Collator->viewSortKey(""), "[| | |]");
+
+ok($Collator->viewSortKey("A"), "[0A15 | 0020 | 0008 | FFFF]");
+
+ok($Collator->viewSortKey("ABC"),
+ "[0A15 0A29 0A3D | 0020 0020 0020 | 0008 0008 0008 | FFFF FFFF FFFF]");
+
+ok($Collator->viewSortKey("(12)"),
+ "[0A0C 0A0D | 0020 0020 | 0002 0002 | 027A FFFF FFFF 027B]");
+
+ok($Collator->viewSortKey("!\x{300}"), "[| | | 024B]");
+
+ok($Collator->viewSortKey("\x{300}"), "[| 0035 | 0002 | FFFF]");
+
+$Collator->change(level => 3);
+ok($Collator->viewSortKey("A"), "[0A15 | 0020 | 0008 |]");
+
+$Collator->change(level => 2);
+ok($Collator->viewSortKey("A"), "[0A15 | 0020 | |]");
+
+$Collator->change(level => 1);
+ok($Collator->viewSortKey("A"), "[0A15 | | |]");
+
+### Version 8
+
+$Collator->change(level => 4, UCA_Version => 8);
+
+ok($Collator->viewSortKey(""), "[|||]");
+
+ok($Collator->viewSortKey("A"), "[0A15|0020|0008|FFFF]");
+
+ok($Collator->viewSortKey("ABC"),
+ "[0A15 0A29 0A3D|0020 0020 0020|0008 0008 0008|FFFF FFFF FFFF]");
+
+ok($Collator->viewSortKey("(12)"),
+ "[0A0C 0A0D|0020 0020|0002 0002|027A FFFF FFFF 027B]");
+
+ok($Collator->viewSortKey("!\x{300}"), "[|0035|0002|024B FFFF]");
+
+ok($Collator->viewSortKey("\x{300}"), "[|0035|0002|FFFF]");
+
+$Collator->change(level => 3);
+ok($Collator->viewSortKey("A"), "[0A15|0020|0008|]");
+
+$Collator->change(level => 2);
+ok($Collator->viewSortKey("A"), "[0A15|0020||]");
+
+$Collator->change(level => 1);
+ok($Collator->viewSortKey("A"), "[0A15|||]");
+
+# Version 9
+
+$Collator->change(level => 3, UCA_Version => 9);
+ok($Collator->viewSortKey("A\x{300}z\x{301}"),
+ "[0A15 0C13 | 0020 0035 0020 0032 | 0008 0002 0002 0002 |]");
+
+$Collator->change(backwards => 1);
+ok($Collator->viewSortKey("A\x{300}z\x{301}"),
+ "[0C13 0A15 | 0020 0035 0020 0032 | 0008 0002 0002 0002 |]");
+
+$Collator->change(backwards => 2);
+ok($Collator->viewSortKey("A\x{300}z\x{301}"),
+ "[0A15 0C13 | 0032 0020 0035 0020 | 0008 0002 0002 0002 |]");
+
+$Collator->change(backwards => [1,3]);
+ok($Collator->viewSortKey("A\x{300}z\x{301}"),
+ "[0C13 0A15 | 0020 0035 0020 0032 | 0002 0002 0002 0008 |]");
+
+$Collator->change(backwards => [2]);
+ok($Collator->viewSortKey("\x{300}\x{301}\x{302}\x{303}"),
+ "[| 004E 003C 0032 0035 | 0002 0002 0002 0002 |]");
+
+$Collator->change(backwards => []);
+ok($Collator->viewSortKey("A\x{300}z\x{301}"),
+ "[0A15 0C13 | 0020 0035 0020 0032 | 0008 0002 0002 0002 |]");
+
+$Collator->change(level => 4);
+
+# Variable
+
+our %origVar = $Collator->change(variable => 'Blanked');
+ok($Collator->viewSortKey("1+2"),
+ '[0A0C 0A0D | 0020 0020 | 0002 0002 | 0031 002B 0032]');
+
+ok($Collator->viewSortKey("?\x{300}!\x{301}\x{315}."),
+ '[| | | 003F 0021 002E]');
+
+ok($Collator->viewSortKey("?!."), '[| | | 003F 0021 002E]');
+
+$Collator->change(variable => 'Non-ignorable');
+ok($Collator->viewSortKey("1+2"),
+ '[0A0C 039F 0A0D | 0020 0020 0020 | 0002 0002 0002 | 0031 002B 0032]');
+
+ok($Collator->viewSortKey("?\x{300}!"),
+ '[024E 024B | 0020 0035 0020 | 0002 0002 0002 | 003F 0300 0021]');
+
+ok($Collator->viewSortKey("?!."),
+ '[024E 024B 0255 | 0020 0020 0020 | 0002 0002 0002 | 003F 0021 002E]');
+
+$Collator->change(variable => 'Shifted');
+ok($Collator->viewSortKey("1+2"),
+ '[0A0C 0A0D | 0020 0020 | 0002 0002 | FFFF 039F FFFF]');
+
+ok($Collator->viewSortKey("?\x{300}!\x{301}\x{315}."),
+ '[| | | 024E 024B 0255]');
+
+ok($Collator->viewSortKey("?!."), '[| | | 024E 024B 0255]');
+
+$Collator->change(variable => 'Shift-Trimmed');
+ok($Collator->viewSortKey("1+2"),
+ '[0A0C 0A0D | 0020 0020 | 0002 0002 | 039F]');
+
+ok($Collator->viewSortKey("?\x{300}!\x{301}\x{315}."),
+ '[| | | 024E 024B 0255]');
+
+ok($Collator->viewSortKey("?!."), '[| | | 024E 024B 0255]');
+
+$Collator->change(%origVar);
+
+#####
+
+# Level 3 weight
+
+ok($Collator->viewSortKey("a\x{3042}"),
+ '[0A15 1921 | 0020 0020 | 0002 000E | FFFF FFFF]');
+
+ok($Collator->viewSortKey("A\x{30A2}"),
+ '[0A15 1921 | 0020 0020 | 0008 0011 | FFFF FFFF]');
+
+$Collator->change(upper_before_lower => 1);
+
+ok($Collator->viewSortKey("a\x{3042}"),
+ '[0A15 1921 | 0020 0020 | 0008 000E | FFFF FFFF]');
+
+ok($Collator->viewSortKey("A\x{30A2}"),
+ '[0A15 1921 | 0020 0020 | 0002 0011 | FFFF FFFF]');
+
+$Collator->change(katakana_before_hiragana => 1);
+
+ok($Collator->viewSortKey("a\x{3042}"),
+ '[0A15 1921 | 0020 0020 | 0008 0013 | FFFF FFFF]');
+ok($Collator->viewSortKey("A\x{30A2}"),
+ '[0A15 1921 | 0020 0020 | 0002 000F | FFFF FFFF]');
+
+$Collator->change(upper_before_lower => 0);
+
+ok($Collator->viewSortKey("a\x{3042}"),
+ '[0A15 1921 | 0020 0020 | 0002 0013 | FFFF FFFF]');
+
+ok($Collator->viewSortKey("A\x{30A2}"),
+ '[0A15 1921 | 0020 0020 | 0008 000F | FFFF FFFF]');
+
+$Collator->change(katakana_before_hiragana => 0);
+
+ok($Collator->viewSortKey("a\x{3042}"),
+ '[0A15 1921 | 0020 0020 | 0002 000E | FFFF FFFF]');
+
+ok($Collator->viewSortKey("A\x{30A2}"),
+ '[0A15 1921 | 0020 0020 | 0008 0011 | FFFF FFFF]');
+
+#####
+
+our $el = Unicode::Collate->new(
+ entry => <<'ENTRY',
+006C ; [.0B03.0020.0002.006C] # LATIN SMALL LETTER L
+FF4C ; [.0B03.0020.0003.FF4C] # FULLWIDTH LATIN SMALL LETTER L; QQK
+217C ; [.0B03.0020.0004.217C] # SMALL ROMAN NUMERAL FIFTY; QQK
+2113 ; [.0B03.0020.0005.2113] # SCRIPT SMALL L; QQK
+24DB ; [.0B03.0020.0006.24DB] # CIRCLED LATIN SMALL LETTER L; QQK
+004C ; [.0B03.0020.0008.004C] # LATIN CAPITAL LETTER L
+FF2C ; [.0B03.0020.0009.FF2C] # FULLWIDTH LATIN CAPITAL LETTER L; QQK
+216C ; [.0B03.0020.000A.216C] # ROMAN NUMERAL FIFTY; QQK
+2112 ; [.0B03.0020.000B.2112] # SCRIPT CAPITAL L; QQK
+24C1 ; [.0B03.0020.000C.24C1] # CIRCLED LATIN CAPITAL LETTER L; QQK
+ENTRY
+ table => undef,
+ normalization => undef,
+);
+
+our $el12 = '0B03 0B03 0B03 0B03 0B03 | 0020 0020 0020 0020 0020';
+
+ok($el->viewSortKey("l\x{FF4C}\x{217C}\x{2113}\x{24DB}"),
+ "[$el12 | 0002 0003 0004 0005 0006 | FFFF FFFF FFFF FFFF FFFF]");
+
+ok($el->viewSortKey("L\x{FF2C}\x{216C}\x{2112}\x{24C1}"),
+ "[$el12 | 0008 0009 000A 000B 000C | FFFF FFFF FFFF FFFF FFFF]");
+
+$el->change(upper_before_lower => 1);
+
+ok($el->viewSortKey("l\x{FF4C}\x{217C}\x{2113}\x{24DB}"),
+ "[$el12 | 0008 0009 000A 000B 000C | FFFF FFFF FFFF FFFF FFFF]");
+
+ok($el->viewSortKey("L\x{FF2C}\x{216C}\x{2112}\x{24C1}"),
+ "[$el12 | 0002 0003 0004 0005 0006 | FFFF FFFF FFFF FFFF FFFF]");
+
+$el->change(upper_before_lower => 0);
+
+ok($el->viewSortKey("l\x{FF4C}\x{217C}\x{2113}\x{24DB}"),
+ "[$el12 | 0002 0003 0004 0005 0006 | FFFF FFFF FFFF FFFF FFFF]");
+
+ok($el->viewSortKey("L\x{FF2C}\x{216C}\x{2112}\x{24C1}"),
+ "[$el12 | 0008 0009 000A 000B 000C | FFFF FFFF FFFF FFFF FFFF]");
+
+#####
+