diff options
Diffstat (limited to 'lib/Unicode/Collate/t')
-rw-r--r-- | lib/Unicode/Collate/t/altern.t | 7 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/cjkrange.t | 94 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/contract.t | 10 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/hangtype.t | 12 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/hangul.t | 10 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/ignor.t | 158 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/illegal.t | 7 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/illegalp.t | 11 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/index.t | 14 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/normal.t | 9 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/override.t | 187 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/rearrang.t | 10 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/test.t | 294 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/trailwt.t | 7 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/variable.t | 7 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/version.t | 11 | ||||
-rw-r--r-- | lib/Unicode/Collate/t/view.t | 7 |
17 files changed, 521 insertions, 334 deletions
diff --git a/lib/Unicode/Collate/t/altern.t b/lib/Unicode/Collate/t/altern.t index d48e168b69..c958f4b58f 100644 --- a/lib/Unicode/Collate/t/altern.t +++ b/lib/Unicode/Collate/t/altern.t @@ -5,12 +5,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } diff --git a/lib/Unicode/Collate/t/cjkrange.t b/lib/Unicode/Collate/t/cjkrange.t new file mode 100644 index 0000000000..5a39bb8a57 --- /dev/null +++ b/lib/Unicode/Collate/t/cjkrange.t @@ -0,0 +1,94 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 51 }; + +use strict; +use warnings; +use Unicode::Collate; + +ok(1); + +my $Collator = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, +); + +# U+9FA6..U+9FBB are CJK UI since Unicode 4.1.0. +# U+3400 is CJK UI ExtA, then greater than any CJK UI. + +##### 2..11 +ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA +ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA +ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA +ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI +ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB +ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB +ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB +ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB + +##### 12..21 +$Collator->change(UCA_Version => 11); +ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA +ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # Unassigned > Unassigned +ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB +ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB +ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB +ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB + +##### 22..31 +$Collator->change(UCA_Version => 9); +ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA +ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # Unassigned > Unassigned +ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB +ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB +ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB +ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB + +##### 32..41 +$Collator->change(UCA_Version => 8); +ok($Collator->gt("\x{9FA5}", "\x{3400}")); # UI > ExtA +ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI +ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < Unassigned(ExtB) +ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < Unassigned(ExtB) +ok($Collator->lt("\x{9FFF}","\x{20000}")); # Unassigned < Unassigned(ExtB) +ok($Collator->lt("\x{9FFF}","\x{2A6D6}")); # Unassigned < Unassigned(ExtB) + +##### 42..51 +$Collator->change(UCA_Version => 14); +ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA +ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA +ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA +ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA +ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI +ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB +ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB +ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB +ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB + diff --git a/lib/Unicode/Collate/t/contract.t b/lib/Unicode/Collate/t/contract.t index 18a0cfbdc9..9c55ecdc8b 100644 --- a/lib/Unicode/Collate/t/contract.t +++ b/lib/Unicode/Collate/t/contract.t @@ -4,12 +4,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -20,9 +17,6 @@ use strict; use warnings; use Unicode::Collate; -use vars qw($IsEBCDIC); -$IsEBCDIC = ord("A") != 0x41; - our $kjeEntry = <<'ENTRIES'; 0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT 0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY diff --git a/lib/Unicode/Collate/t/hangtype.t b/lib/Unicode/Collate/t/hangtype.t index b6a46691aa..d8ea74673e 100644 --- a/lib/Unicode/Collate/t/hangtype.t +++ b/lib/Unicode/Collate/t/hangtype.t @@ -4,17 +4,14 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } use Test; -BEGIN { plan tests => 30 }; +BEGIN { plan tests => 33 }; use strict; use warnings; @@ -48,9 +45,12 @@ ok(Unicode::Collate::getHST(0x11FA), ''); ok(Unicode::Collate::getHST(0x11FF), ''); ok(Unicode::Collate::getHST(0x3011), ''); ok(Unicode::Collate::getHST(0x11A7), ''); +ok(Unicode::Collate::getHST(0xABFF), ''); ok(Unicode::Collate::getHST(0xAC00), 'LV'); ok(Unicode::Collate::getHST(0xAC01), 'LVT'); ok(Unicode::Collate::getHST(0xAC1B), 'LVT'); ok(Unicode::Collate::getHST(0xAC1C), 'LV'); ok(Unicode::Collate::getHST(0xD7A3), 'LVT'); +ok(Unicode::Collate::getHST(0xD7A4), ''); +ok(Unicode::Collate::getHST(0xFFFF), ''); diff --git a/lib/Unicode/Collate/t/hangul.t b/lib/Unicode/Collate/t/hangul.t index fd6cc69394..d9f7db9b10 100644 --- a/lib/Unicode/Collate/t/hangul.t +++ b/lib/Unicode/Collate/t/hangul.t @@ -4,12 +4,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -20,9 +17,6 @@ use strict; use warnings; use Unicode::Collate; -use vars qw($IsEBCDIC); -$IsEBCDIC = ord("A") != 0x41; - ######################### ok(1); diff --git a/lib/Unicode/Collate/t/ignor.t b/lib/Unicode/Collate/t/ignor.t new file mode 100644 index 0000000000..4ee47c6f12 --- /dev/null +++ b/lib/Unicode/Collate/t/ignor.t @@ -0,0 +1,158 @@ +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 41 }; + +use strict; +use warnings; +use Unicode::Collate; + +ok(1); + +my $trad = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/, + level => 3, + entry => << 'ENTRIES', + 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish + 0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish + 0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish +ENTRIES +); +# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C +# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D + +##### 2..3 + +ok( + join(':', $trad->sort( qw/ acha aca ada acia acka / ) ), + join(':', qw/ aca acia acka acha ada / ), +); + +ok( + join(':', $trad->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ), + join(':', qw/ ACA ACIA ACKA ACHA ADA / ), +); + +##### 4..7 + +ok($trad->gt("ocho", "oc\cAho")); # UCA v14 +ok($trad->gt("ocho", "oc\0\cA\0\cBho")); # UCA v14 +ok($trad->eq("-", "")); +ok($trad->gt("ocho", "oc-ho")); + +##### 8..11 + +$trad->change(UCA_Version => 9); + +ok($trad->eq("ocho", "oc\cAho")); # UCA v9 +ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9 +ok($trad->eq("-", "")); +ok($trad->gt("ocho", "oc-ho")); + +##### 12..15 + +$trad->change(UCA_Version => 8); + +ok($trad->gt("ocho", "oc\cAho")); +ok($trad->gt("ocho", "oc\0\cA\0\cBho")); +ok($trad->eq("-", "")); +ok($trad->gt("ocho", "oc-ho")); + + +##### 16..19 + +$trad->change(UCA_Version => 9); + +my $hiragana = "\x{3042}\x{3044}"; +my $katakana = "\x{30A2}\x{30A4}"; + +# HIRAGANA and KATAKANA are ignorable via ignoreName +ok($trad->eq($hiragana, "")); +ok($trad->eq("", $katakana)); +ok($trad->eq($hiragana, $katakana)); +ok($trad->eq($katakana, $hiragana)); + + +##### 20..31 + +# According to Conformance Test (UCA_Version == 9 or 11), +# a L3-ignorable is treated as a completely ignorable. + +my $L3ignorable = Unicode::Collate->new( + alternate => 'Non-ignorable', + level => 3, + table => undef, + normalization => undef, + UCA_Version => 9, + entry => <<'ENTRIES', +0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) +0001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429) +0591 ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA +1D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM +0021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK +09BE ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA +09C7 ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E +09CB ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O +09C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O +1D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE +1D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK +1D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA +1D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK +ENTRIES +); + +ok($L3ignorable->lt("\cA", "!")); +ok($L3ignorable->lt("\x{591}", "!")); +ok($L3ignorable->eq("\cA", "\x{591}")); +ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A")); +ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A")); +ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A")); +ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A")); +ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}")); +ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}")); +ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}")); +ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}")); +ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}")); + +##### 32..41 + +my $c = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + level => 1, + UCA_Version => 14, + entry => << 'ENTRIES', +034F ; [.0000.0000.0000.034F] # COMBINING GRAPHEME JOINER +0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish +0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish +0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish +ENTRIES +); +# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C +# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D + +ok($c->gt("ocho", "oc\x00\x00ho")); +ok($c->gt("ocho", "oc\cAho")); +ok($c->gt("ocho", "oc\x{034F}ho")); +ok($c->gt("ocio", "oc\x{034F}ho")); +ok($c->lt("ocgo", "oc\x{034F}ho")); +ok($c->lt("oceo", "oc\x{034F}ho")); + +ok($c->viewSortKey("ocho"), "[0B4B 0A3F 0B4B | | |]"); +ok($c->viewSortKey("oc\x00\x00ho"), "[0B4B 0A3D 0AB9 0B4B | | |]"); +ok($c->viewSortKey("oc\cAho"), "[0B4B 0A3D 0AB9 0B4B | | |]"); +ok($c->viewSortKey("oc\x{034F}ho"), "[0B4B 0A3D 0AB9 0B4B | | |]"); + + diff --git a/lib/Unicode/Collate/t/illegal.t b/lib/Unicode/Collate/t/illegal.t index 803e2f6739..825177c283 100644 --- a/lib/Unicode/Collate/t/illegal.t +++ b/lib/Unicode/Collate/t/illegal.t @@ -5,12 +5,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } diff --git a/lib/Unicode/Collate/t/illegalp.t b/lib/Unicode/Collate/t/illegalp.t index 690c88d0bb..ff1936d353 100644 --- a/lib/Unicode/Collate/t/illegalp.t +++ b/lib/Unicode/Collate/t/illegalp.t @@ -5,12 +5,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -26,9 +23,9 @@ ok(1); # No test for Unicode::Collate is included in this .t file. # # UCA conformance test requires completely ignorable characters -# (including noncharacters) must be able to be ordered in code point order; +# (including noncharacters) must be able to be sorted in code point order. # If not so, Unicode::Collate must not be compliant with UCA. -# +# # ~~~ CollationTest_SHIFTED.txt in CollationTest-4.0.0 # # 206F 0021; # ! NOMINAL DIGIT SHAPES [| | | 0251] diff --git a/lib/Unicode/Collate/t/index.t b/lib/Unicode/Collate/t/index.t index a1d67d5346..5b6c78d968 100644 --- a/lib/Unicode/Collate/t/index.t +++ b/lib/Unicode/Collate/t/index.t @@ -5,12 +5,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -339,10 +336,10 @@ $ret = $Collator->gsubst($str, "perl", \&strreverse); ok($ret, 2); ok($str, "lr\cB\x{300}e\cBP and LREP."); -$str = "Camel ass came\x{301}l CAMEL horse cAm\0E\0L..."; +$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L..."; $Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" }); -ok($str, -"<b>Camel</b> ass <b>came\x{301}l</b> <b>CAMEL</b> horse <b>cAm\0E\0L</b>..."); +ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> " + . "<b>CAMEL</b> horse <b>cAm\0E\0L</b>..."); $Collator->change(level => 3); @@ -401,3 +398,4 @@ $Collator->change(alternate => 'Non-ignorable'); ($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!"); ok($ret, undef); + diff --git a/lib/Unicode/Collate/t/normal.t b/lib/Unicode/Collate/t/normal.t index 026240d6fa..57ea03368b 100644 --- a/lib/Unicode/Collate/t/normal.t +++ b/lib/Unicode/Collate/t/normal.t @@ -4,12 +4,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -44,7 +41,7 @@ FF21; [.0A87.0020.0008] # LATIN CAPITAL LETTER A 00C5; [.0AC5.0020.0008] # LATIN CAPITAL LETTER A WITH RING ABOVE ENTRIES -# Aong < A+ring < Z < fullA+ring < A-ring +# Aong < A+ring < Z < fullA+ring < A-ring ######################### diff --git a/lib/Unicode/Collate/t/override.t b/lib/Unicode/Collate/t/override.t new file mode 100644 index 0000000000..3e48e15f29 --- /dev/null +++ b/lib/Unicode/Collate/t/override.t @@ -0,0 +1,187 @@ +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; + exit 0; + } + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +use Test; +BEGIN { plan tests => 76 }; + +use strict; +use warnings; +use Unicode::Collate; + +ok(1); + +##### 2..6 + +my $all_undef_8 = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideCJK => undef, + overrideHangul => undef, + UCA_Version => 8, +); + +# All in the Unicode code point order. +# No hangul decomposition. + +ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); +ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); +ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); +ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); +ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); + + +##### 7..11 + +my $all_undef_9 = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideCJK => undef, + overrideHangul => undef, + UCA_Version => 9, +); + +# CJK Ideo. < CJK ext A/B < Others. +# No hangul decomposition. + +ok($all_undef_9->lt("\x{4E00}", "\x{3402}")); +ok($all_undef_9->lt("\x{3402}", "\x{20000}")); +ok($all_undef_9->lt("\x{20000}", "\x{AC00}")); +ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}")); +ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned + +##### 12..16 + +my $ignoreHangul = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideHangul => sub {()}, + entry => <<'ENTRIES', +AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL +ENTRIES +); + +# All Hangul Syllables except U+AE00 are ignored. + +ok($ignoreHangul->eq("\x{AC00}", "")); +ok($ignoreHangul->lt("\x{AC00}", "\0")); +ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}")); +ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored. +ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. + + +my $ignoreCJK = Unicode::Collate->new( + table => undef, + normalization => undef, + overrideCJK => sub {()}, + entry => <<'ENTRIES', +5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" +ENTRIES +); + +# All CJK Unified Ideographs except U+5B57 are ignored. + +##### 17..21 +ok($ignoreCJK->eq("\x{4E00}", "")); +ok($ignoreCJK->lt("\x{4E00}", "\0")); +ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK. +ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK. +ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned. + +##### 22..29 +ok($ignoreCJK->eq("\x{3400}", "")); +ok($ignoreCJK->eq("\x{4DB5}", "")); +ok($ignoreCJK->eq("\x{9FA5}", "")); +ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0 +ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0 +ok($ignoreCJK->gt("\x{9FBC}", "Perl")); +ok($ignoreCJK->eq("\x{20000}", "")); +ok($ignoreCJK->eq("\x{2A6D6}", "")); + +##### 30..37 +$ignoreCJK->change(UCA_Version => 9); +ok($ignoreCJK->eq("\x{3400}", "")); +ok($ignoreCJK->eq("\x{4DB5}", "")); +ok($ignoreCJK->eq("\x{9FA5}", "")); +ok($ignoreCJK->gt("\x{9FA6}", "Perl")); +ok($ignoreCJK->gt("\x{9FBB}", "Perl")); +ok($ignoreCJK->gt("\x{9FBC}", "Perl")); +ok($ignoreCJK->eq("\x{20000}", "")); +ok($ignoreCJK->eq("\x{2A6D6}", "")); + +##### 38..45 +$ignoreCJK->change(UCA_Version => 8); +ok($ignoreCJK->eq("\x{3400}", "")); +ok($ignoreCJK->eq("\x{4DB5}", "")); +ok($ignoreCJK->eq("\x{9FA5}", "")); +ok($ignoreCJK->gt("\x{9FA6}", "Perl")); +ok($ignoreCJK->gt("\x{9FBB}", "Perl")); +ok($ignoreCJK->gt("\x{9FBC}", "Perl")); +ok($ignoreCJK->eq("\x{20000}", "")); +ok($ignoreCJK->eq("\x{2A6D6}", "")); + +##### 46..53 +$ignoreCJK->change(UCA_Version => 14); +ok($ignoreCJK->eq("\x{3400}", "")); +ok($ignoreCJK->eq("\x{4DB5}", "")); +ok($ignoreCJK->eq("\x{9FA5}", "")); +ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0 +ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0 +ok($ignoreCJK->gt("\x{9FBC}", "Perl")); +ok($ignoreCJK->eq("\x{20000}", "")); +ok($ignoreCJK->eq("\x{2A6D6}", "")); + +##### 54..76 +my $overCJK = Unicode::Collate->new( + table => undef, + normalization => undef, + entry => <<'ENTRIES', +0061 ; [.0101.0020.0002.0061] # latin a +0041 ; [.0101.0020.0008.0041] # LATIN A +4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03. +ENTRIES + overrideCJK => sub { + my $u = 0xFFFF - $_[0]; # reversed + [$u, 0x20, 0x2, $u]; + }, +); + +ok($overCJK->lt("a", "A")); # diff. at level 3. +ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2. +ok($overCJK->lt("A\x{4E03}", "A\x{4E00}")); +ok($overCJK->lt("A\x{4E03}", "a\x{4E00}")); +ok($overCJK->lt("a\x{4E03}", "A\x{4E00}")); + +ok($overCJK->gt("a\x{3400}", "A\x{4DB5}")); +ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}")); +ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); +ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); +ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}")); +ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}")); + +$overCJK->change(UCA_Version => 9); + +ok($overCJK->gt("a\x{3400}", "A\x{4DB5}")); +ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}")); +ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}")); +ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}")); +ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}")); +ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}")); + +$overCJK->change(UCA_Version => 14); + +ok($overCJK->gt("a\x{3400}", "A\x{4DB5}")); +ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}")); +ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); +ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); +ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}")); +ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}")); + diff --git a/lib/Unicode/Collate/t/rearrang.t b/lib/Unicode/Collate/t/rearrang.t index cc02fa9f79..0977db9e48 100644 --- a/lib/Unicode/Collate/t/rearrang.t +++ b/lib/Unicode/Collate/t/rearrang.t @@ -5,12 +5,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -28,6 +25,7 @@ ok(1); my $Collator = Unicode::Collate->new( table => 'keys.txt', normalization => undef, + UCA_Version => 9, ); # rearrange : 0x0E40..0x0E44, 0x0EC0..0x0EC4 (default) @@ -73,6 +71,7 @@ my $no_rearrange = Unicode::Collate->new( table => undef, normalization => undef, rearrange => [], + UCA_Version => 9, ); ok($no_rearrange->lt("A", "B")); @@ -87,6 +86,7 @@ my $undef_rearrange = Unicode::Collate->new( table => undef, normalization => undef, rearrange => undef, + UCA_Version => 9, ); ok($undef_rearrange->lt("A", "B")); diff --git a/lib/Unicode/Collate/t/test.t b/lib/Unicode/Collate/t/test.t index 53fa7ca879..a5337a014f 100644 --- a/lib/Unicode/Collate/t/test.t +++ b/lib/Unicode/Collate/t/test.t @@ -5,17 +5,14 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } use Test; -BEGIN { plan tests => 160 }; +BEGIN { plan tests => 107 }; use strict; use warnings; @@ -23,7 +20,17 @@ use Unicode::Collate; ok(1); -##### 2..6 +sub _pack_U { Unicode::Collate::pack_U(@_) } +sub _unpack_U { Unicode::Collate::unpack_U(@_) } + +my $A_acute = _pack_U(0xC1); +my $a_acute = _pack_U(0xE1); +my $acute = _pack_U(0x0301); + +my $hiragana = "\x{3042}\x{3044}"; +my $katakana = "\x{30A2}\x{30A4}"; + +##### 2..7 my $Collator = Unicode::Collate->new( table => 'keys.txt', @@ -32,28 +39,21 @@ my $Collator = Unicode::Collate->new( ok(ref $Collator, "Unicode::Collate"); - -ok( - join(':', $Collator->sort( - qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN / - ) ), - join(':', - qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings / - ), -); - ok($Collator->cmp("", ""), 0); ok($Collator->eq("", "")); ok($Collator->cmp("", "perl"), -1); -##### 7..17 +ok( + join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ), + join(':', qw/ aca acha acia acka ada / ), +); -sub _pack_U { Unicode::Collate::pack_U(@_) } -sub _unpack_U { Unicode::Collate::unpack_U(@_) } +ok( + join(':', $Collator->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ), + join(':', qw/ ACA ACHA ACIA ACKA ADA / ), +); -my $A_acute = _pack_U(0xC1); -my $a_acute = _pack_U(0xE1); -my $acute = _pack_U(0x0301); +##### 8..18 ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1) ok($Collator->cmp($a_acute, $A_acute), -1); @@ -71,84 +71,7 @@ ok($Collator->lt("A", $A_acute)); ok($Collator->lt("A", $a_acute)); ok($Collator->lt($a_acute, $A_acute)); -##### 18..20 - -eval { require Unicode::Normalize }; -if (!$@) { - my $NFD = Unicode::Collate->new( - table => 'keys.txt', - level => 1, - entry => <<'ENTRIES', -0430 ; [.0CB5.0020.0002.0430] # CYRILLIC SMALL LETTER A -0410 ; [.0CB5.0020.0008.0410] # CYRILLIC CAPITAL LETTER A -04D3 ; [.0CBD.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS -0430 0308 ; [.0CBD.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS -04D2 ; [.0CBD.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS -0410 0308 ; [.0CBD.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS -0430 3099 ; [.0CBE.0020.0002.04D3] # A WITH KATAKANA VOICED -0430 3099 0308 ; [.0CBF.0020.0002.04D3] # A WITH KATAKANA VOICED, DIAERESIS -ENTRIES - ); - ok($NFD->eq("\x{4D3}\x{325}", "\x{430}\x{308}\x{325}")); - ok($NFD->lt("\x{430}\x{308}A", "\x{430}\x{308}B")); - ok($NFD->lt("\x{430}\x{3099}B", "\x{430}\x{308}\x{3099}A")); -} -else { - ok(1); - ok(1); - ok(1); -} - -##### 21..34 - -my $trad = Unicode::Collate->new( - table => 'keys.txt', - normalization => undef, - ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/, - level => 3, - entry => << 'ENTRIES', - 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish - 0043 0068 ; [.0A3F.0020.0008.0043] # "Ch" in traditional Spanish -ENTRIES -); -# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C -# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D -# Deutsch sz is included in 'keys.txt'; - -ok( - join(':', $trad->sort( qw/ acha aca ada acia acka / ) ), - join(':', qw/ aca acia acka acha ada / ), -); - -ok( - join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ), - join(':', qw/ aca acha acia acka ada / ), -); - -ok($trad->eq("ocho", "oc\cAho")); # UCA v9 -ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9 -ok($trad->eq("-", "")); -ok($trad->gt("ocho", "oc-ho")); - -$trad->change(UCA_Version => 8); - -ok($trad->gt("ocho", "oc\cAho")); -ok($trad->gt("ocho", "oc\0\cA\0\cBho")); -ok($trad->eq("-", "")); -ok($trad->gt("ocho", "oc-ho")); - -$trad->change(UCA_Version => 9); - -my $hiragana = "\x{3042}\x{3044}"; -my $katakana = "\x{30A2}\x{30A4}"; - -# HIRAGANA and KATAKANA are ignorable via ignoreName -ok($trad->eq($hiragana, "")); -ok($trad->eq("", $katakana)); -ok($trad->eq($hiragana, $katakana)); -ok($trad->eq($katakana, $hiragana)); - -##### 35..41 +##### 19..25 $Collator->change(level => 2); @@ -161,7 +84,7 @@ ok( $Collator->cmp($hiragana, $katakana), 0); ok( $Collator->eq($hiragana, $katakana) ); ok( $Collator->ge($hiragana, $katakana) ); -##### 42..47 +##### 26..31 # hangul ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") ); @@ -171,7 +94,7 @@ ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") ); ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") ); ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana -##### 48..56 +##### 32..40 $Collator->change(%old_level, katakana_before_hiragana => 1); @@ -186,7 +109,7 @@ ok( $Collator->ne($hiragana, $katakana) ); ok( $Collator->gt($hiragana, $katakana) ); ok( $Collator->ge($hiragana, $katakana) ); -##### 57..62 +##### 41..46 $Collator->change(upper_before_lower => 1); @@ -197,13 +120,15 @@ ok( $Collator->cmp($hiragana, $katakana), 1); ok( $Collator->ge($hiragana, $katakana), 1); ok( $Collator->gt($hiragana, $katakana), 1); -##### 63..68 +##### 47..48 $Collator->change(katakana_before_hiragana => 0); ok( $Collator->cmp("abc", "ABC"), 1); ok( $Collator->cmp($hiragana, $katakana), -1); +##### 49..52 + $Collator->change(upper_before_lower => 0); ok( $Collator->cmp("abc", "ABC"), -1); @@ -211,7 +136,7 @@ ok( $Collator->le("abc", "ABC") ); ok( $Collator->cmp($hiragana, $katakana), -1); ok( $Collator->lt($hiragana, $katakana) ); -##### 69..70 +##### 53..54 my $ignoreAE = Unicode::Collate->new( table => 'keys.txt', @@ -222,7 +147,7 @@ my $ignoreAE = Unicode::Collate->new( ok($ignoreAE->eq("element","lament")); ok($ignoreAE->eq("Perl","ePrl")); -##### 71 +##### 55 my $onlyABC = Unicode::Collate->new( table => undef, @@ -242,7 +167,7 @@ ok( join(':', qw/ A aB Ab ABA BAC cAc cc / ), ); -##### 72..75 +##### 56..59 my $undefAE = Unicode::Collate->new( table => 'keys.txt', @@ -255,7 +180,7 @@ ok($Collator->lt("edge","fog")); ok($undefAE ->gt("lake","like")); ok($Collator->lt("lake","like")); -##### 76..85 +##### 60..69 # Table is undefined, then no entry is defined. @@ -270,7 +195,6 @@ ok($undef_table->lt('', 'A')); ok($undef_table->lt('ABC', 'B')); # Hangul should be decomposed (even w/o Unicode::Normalize). - ok($undef_table->lt("Perl", "\x{AC00}")); ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}")); ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}")); @@ -280,7 +204,6 @@ ok($undef_table->lt("\x{AE00}", "\x{3042}")); # U+3042: Hiragana A # Weight for CJK Ideographs is defined, though. - ok($undef_table->lt("", "\x{4E00}")); ok($undef_table->lt("\x{4E8C}","ABC")); ok($undef_table->lt("\x{4E00}","\x{3042}")); @@ -289,7 +212,7 @@ ok($undef_table->lt("\x{4E00}","\x{4E8C}")); # U+4E8C: Ideograph "TWO" -##### 86..90 +##### 70..74 my $few_entries = Unicode::Collate->new( entry => <<'ENTRIES', @@ -320,105 +243,7 @@ ok($few_entries->lt("\x{AE30}", "\x{AC00}")); ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}")); -##### 91..95 - -my $all_undef_8 = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideCJK => undef, - overrideHangul => undef, - UCA_Version => 8, -); - -# All in the Unicode code point order. -# No hangul decomposition. - -ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); -ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); -ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); -ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); -ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); - -##### 96..100 - -my $all_undef_9 = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideCJK => undef, - overrideHangul => undef, - UCA_Version => 9, -); - -# CJK Ideo. < CJK ext A/B < Others. -# No hangul decomposition. - -ok($all_undef_9->lt("\x{4E00}", "\x{3402}")); -ok($all_undef_9->lt("\x{3402}", "\x{20000}")); -ok($all_undef_9->lt("\x{20000}", "\x{AC00}")); -ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}")); -ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned - -##### 101..105 - -my $ignoreCJK = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideCJK => sub {()}, - entry => <<'ENTRIES', -5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" -ENTRIES -); - -# All CJK Unified Ideographs except U+5B57 are ignored. - -ok($ignoreCJK->eq("\x{4E00}", "")); -ok($ignoreCJK->lt("\x{4E00}", "\0")); -ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK. -ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK. -ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned. - -##### 106..110 - -my $ignoreHangul = Unicode::Collate->new( - table => undef, - normalization => undef, - overrideHangul => sub {()}, - entry => <<'ENTRIES', -AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL -ENTRIES -); - -# All Hangul Syllables except U+AE00 are ignored. - -ok($ignoreHangul->eq("\x{AC00}", "")); -ok($ignoreHangul->lt("\x{AC00}", "\0")); -ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}")); -ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored. -ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. - -##### 111..115 - -my $overCJK = Unicode::Collate->new( - table => undef, - normalization => undef, - entry => <<'ENTRIES', -0061 ; [.0101.0020.0002.0061] # latin a -0041 ; [.0101.0020.0008.0041] # LATIN A -4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03. -ENTRIES - overrideCJK => sub { - my $u = 0xFFFF - $_[0]; # reversed - [$u, 0x20, 0x2, $u]; - }, -); - -ok($overCJK->lt("a", "A")); # diff. at level 3. -ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2. -ok($overCJK->lt("A\x{4E03}", "A\x{4E00}")); -ok($overCJK->lt("A\x{4E03}", "a\x{4E00}")); -ok($overCJK->lt("a\x{4E03}", "A\x{4E00}")); - -##### 116..120 +##### 75..79 my $dropArticles = Unicode::Collate->new( table => "keys.txt", @@ -436,7 +261,7 @@ ok($dropArticles->lt("the pen", "a pencil")); ok($Collator->lt("Perl", "The Perl")); ok($Collator->gt("the pen", "a pencil")); -##### 121..122 +##### 80..81 my $backLevel1 = Unicode::Collate->new( table => undef, @@ -449,7 +274,7 @@ my $backLevel1 = Unicode::Collate->new( ok($backLevel1->gt("AB", "BA")); ok($backLevel1->gt("\x{3042}\x{3044}", "\x{3044}\x{3042}")); -##### 123..130 +##### 82..89 my $backLevel2 = Unicode::Collate->new( table => "keys.txt", @@ -471,47 +296,8 @@ ok($backLevel2->lt("\x{4E03}", $katakana)); ok($Collator ->gt("\x{4E00}", $hiragana)); ok($Collator ->gt("\x{4E03}", $katakana)); -##### 131..142 - -# According to Conformance Test, -# a L3-ignorable is treated as a completely ignorable. - -my $L3ignorable = Unicode::Collate->new( - alternate => 'Non-ignorable', - level => 3, - table => undef, - normalization => undef, - entry => <<'ENTRIES', -0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) -0001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429) -0591 ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA -1D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM -0021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK -09BE ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA -09C7 ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E -09CB ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O -09C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O -1D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE -1D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK -1D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA -1D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK -ENTRIES -); -ok($L3ignorable->lt("\cA", "!")); -ok($L3ignorable->lt("\x{591}", "!")); -ok($L3ignorable->eq("\cA", "\x{591}")); -ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A")); -ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A")); -ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A")); -ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A")); -ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}")); -ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}")); -ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}")); -ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}")); -ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}")); - -##### 143..149 +##### 90..96 my $O_str = Unicode::Collate->new( table => "keys.txt", @@ -545,7 +331,7 @@ ok($Collator->eq("\x{200B}", "\0")); ok($O_str ->gt("\x{200B}", "\0")); ok($O_str ->gt("\x{200B}", "A")); -##### 150..159 +##### 97..107 my %origVer = $Collator->change(UCA_Version => 8); diff --git a/lib/Unicode/Collate/t/trailwt.t b/lib/Unicode/Collate/t/trailwt.t index 463252cf1c..e987f8f509 100644 --- a/lib/Unicode/Collate/t/trailwt.t +++ b/lib/Unicode/Collate/t/trailwt.t @@ -4,12 +4,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } diff --git a/lib/Unicode/Collate/t/variable.t b/lib/Unicode/Collate/t/variable.t index 880327a6bd..1a6bd6495b 100644 --- a/lib/Unicode/Collate/t/variable.t +++ b/lib/Unicode/Collate/t/variable.t @@ -5,12 +5,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } diff --git a/lib/Unicode/Collate/t/version.t b/lib/Unicode/Collate/t/version.t index fec144c9d7..17adf539ca 100644 --- a/lib/Unicode/Collate/t/version.t +++ b/lib/Unicode/Collate/t/version.t @@ -5,12 +5,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -26,8 +23,8 @@ ok(1); ######################### # Fix me when UCA and/or keys.txt is upgraded. -my $UCA_Version = "11"; -my $Base_Unicode_Version = "4.0"; +my $UCA_Version = "14"; +my $Base_Unicode_Version = "4.1.0"; my $Key_Version = "3.1.1"; ok(Unicode::Collate::UCA_Version, $UCA_Version); diff --git a/lib/Unicode/Collate/t/view.t b/lib/Unicode/Collate/t/view.t index 578d4843e5..44963f4189 100644 --- a/lib/Unicode/Collate/t/view.t +++ b/lib/Unicode/Collate/t/view.t @@ -5,12 +5,9 @@ BEGIN { "cannot stringify a Unicode code point\n"; exit 0; } -} - -BEGIN { if ($ENV{PERL_CORE}) { - chdir('t') if -d 't'; - @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } |