diff options
author | Chris 'BinGOs' Williams <chris@bingosnet.co.uk> | 2012-12-15 13:29:13 +0000 |
---|---|---|
committer | Chris 'BinGOs' Williams <chris@bingosnet.co.uk> | 2012-12-15 13:29:13 +0000 |
commit | 6d3c12b51eab17f76fe9a1afc3d7023d3cdb6a6b (patch) | |
tree | 399169b454d31a1358abae400a3783451138159c /cpan/Unicode-Collate | |
parent | fc826e38a4eb6efa4cfc92de662b40a6d5333e5e (diff) | |
download | perl-6d3c12b51eab17f76fe9a1afc3d7023d3cdb6a6b.tar.gz |
Update Unicode-Collate to CPAN version 0.96
[DELTA]
0.96 Sat Dec 15 19:43:10 2012
- special noncharancter tailorings ('highestFFFF' and 'minimalFFFE')
* some locales are modified for 'highestFFFF': as, bn, fa, gu, hi, hy,
kn, kok, mr, or, sa, si, si_dict, ta, te, th, ur.
- U::C::Locale now allows 'entry' to add or override mappings.
- bug fix: using DUCET through XS wrongly prevented completely ignorable
characters from tailoring.
- modified tests: default.t, loc_as.t, loc_bn.t, loc_fa.t, loc_gu.t,
loc_hi.t, loc_hy.t, loc_kn.t, loc_kok.t, loc_mr.t, loc_or.t, loc_sa.t,
loc_si.t, loc_sidt.t, loc_ta.t, loc_te.t, loc_test.t, loc_th.t,
loc_ur.t, nonchar.t in t.
Diffstat (limited to 'cpan/Unicode-Collate')
42 files changed, 858 insertions, 494 deletions
diff --git a/cpan/Unicode-Collate/Changes b/cpan/Unicode-Collate/Changes index 4eef25291a..faaa985361 100644 --- a/cpan/Unicode-Collate/Changes +++ b/cpan/Unicode-Collate/Changes @@ -1,5 +1,17 @@ Revision history for Perl module Unicode::Collate. +0.96 Sat Dec 15 19:43:10 2012 + - special noncharancter tailorings ('highestFFFF' and 'minimalFFFE') + * some locales are modified for 'highestFFFF': as, bn, fa, gu, hi, hy, + kn, kok, mr, or, sa, si, si_dict, ta, te, th, ur. + - U::C::Locale now allows 'entry' to add or override mappings. + - bug fix: using DUCET through XS wrongly prevented completely ignorable + characters from tailoring. + - modified tests: default.t, loc_as.t, loc_bn.t, loc_fa.t, loc_gu.t, + loc_hi.t, loc_hy.t, loc_kn.t, loc_kok.t, loc_mr.t, loc_or.t, loc_sa.t, + loc_si.t, loc_sidt.t, loc_ta.t, loc_te.t, loc_test.t, loc_th.t, + loc_ur.t, nonchar.t in t. + 0.95 Sat Dec 8 15:11:09 2012 - U::C::Locale newly supports locales: bs_Cyrl, ee. - updated to CLDR 21: uk. diff --git a/cpan/Unicode-Collate/Collate.pm b/cpan/Unicode-Collate/Collate.pm index f3901ad940..3085094472 100644 --- a/cpan/Unicode-Collate/Collate.pm +++ b/cpan/Unicode-Collate/Collate.pm @@ -14,7 +14,7 @@ use File::Spec; no warnings 'utf8'; -our $VERSION = '0.95'; +our $VERSION = '0.96'; our $PACKAGE = __PACKAGE__; ### begin XS only ### @@ -82,6 +82,10 @@ use constant Hangul_SFin => 0xD7A3; # Logical_Order_Exception in PropList.txt my $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ]; +# for highestFFFF and minimalFFFE +my $HighestVCE = pack(VCE_TEMPLATE, 0, 0xFFFE, 0x20, 0x5, 0xFFFF); +my $minimalVCE = pack(VCE_TEMPLATE, 0, 1, 0x20, 0x5, 0xFFFE); + sub UCA_Version { "26" } sub Base_Unicode_Version { "6.2.0" } @@ -103,7 +107,7 @@ our @ChangeOK = qw/ alternate backwards level normalization rearrange katakana_before_hiragana upper_before_lower ignore_level2 overrideHangul overrideCJK preprocess UCA_Version - hangul_terminator variable identical + hangul_terminator variable identical highestFFFF minimalFFFE /; our @ChangeNG = qw/ @@ -496,13 +500,9 @@ sub splitEnt if (_isIllegal($src[$i]) || $vers <= 20 && _isNonchar($src[$i])) { $src[$i] = undef; } elsif ($ver9) { - $src[$i] = undef if $map->{ $src[$i] } && - @{ $map->{ $src[$i] } } == 0; -### begin XS only ### - if ($uXS) { - $src[$i] = undef if _ignorable_simple($src[$i]); - } -### end XS only ### + $src[$i] = undef if $map->{ $src[$i] } + ? @{ $map->{ $src[$i] } } == 0 + : _ignorable_simple($src[$i]); ### XS only } } @@ -582,8 +582,8 @@ sub splitEnt } # skip completely ignorable - if ($uXS && $jcps !~ /;/ && _ignorable_simple($jcps) || ### XS only - $map->{$jcps} && @{ $map->{$jcps} } == 0) { + if ($map->{$jcps} ? @{ $map->{$jcps} } == 0 : + $uXS && $jcps !~ /;/ && _ignorable_simple($jcps)) { ### XS only if ($wLen && @buf) { $buf[-1][2] = $i + 1; } @@ -624,8 +624,9 @@ sub getWt my $uXS = $self->{__useXS}; ### XS only return if !defined $u; - return map($self->varCE($_), @{ $map->{$u} }) - if $map->{$u}; + return $self->varCE($HighestVCE) if $u eq 0xFFFF && $self->{highestFFFF}; + return $self->varCE($minimalVCE) if $u eq 0xFFFE && $self->{minimalFFFE}; + return map($self->varCE($_), @{ $map->{$u} }) if $map->{$u}; ### begin XS only ### return map($self->varCE($_), _fetch_simple($u)) if $uXS && _exists_simple($u); @@ -1046,12 +1047,14 @@ with no parameters, the collator should do the default collation. backwards => $levelNumber, # or \@levelNumbers entry => $element, hangul_terminator => $term_primary_weight, + highestFFFF => $bool, identical => $bool, ignoreName => qr/$ignoreName/, ignoreChar => qr/$ignoreChar/, ignore_level2 => $bool, katakana_before_hiragana => $bool, level => $collationLevel, + minimalFFFE => $bool, normalization => $normalization_form, overrideCJK => \&overrideCJK, overrideHangul => \&overrideHangul, @@ -1197,6 +1200,25 @@ automatically terminated with a terminator primary weight. These characters may need terminator included in a collation element table beforehand. +=item highestFFFF + +-- see 5.14 Collation Elements, UTS #35. + +If the parameter is made true, C<U+FFFF> has a highest primary weight. +When a boolean of C<$coll-E<gt>ge($str, "abc")> and +C<$coll-E<gt>le($str, "abc\x{FFFF}")> is true, it is expected that C<$str> +begins with C<"abc">, or another primary equivalent. +C<$str> may be C<"abcd">, C<"abc012">, but should not include C<U+FFFF> +such as C<"abc\x{FFFF}xyz">. + +C<$coll-E<gt>le($str, "abc\x{FFFF}")> works like C<$coll-E<gt>lt($str, "abd")> +almostly, but the latter has a problem that you should know which letter is +next to C<c>. For a certain language where C<ch> as the next letter, +C<"abch"> is greater than C<"abc\x{FFFF}">, but lesser than C<"abd">. + +Note: This is equivalent to C<entry =E<gt> 'FFFF ; [.FFFE.0020.0005.FFFF]'>. +C<entry> allows tailoring of any other character than U+FFFF. + =item identical -- see A.3 Deterministic Comparison, UTS #10. @@ -1281,6 +1303,31 @@ and 'shift-trimmed'), the level 4 may be unreliable. See also C<identical>. +=item minimalFFFE + +-- see 5.14 Collation Elements, UTS #35. + +If the parameter is made true, C<U+FFFE> has a minimal primary weight. +The comparison between C<"$a1\x{FFFE}$a2"> and C<"$b1\x{FFFE}$b2"> +first compares C<$a1> and C<$b1> at level 1, and +then C<$a2> and C<$b2> at level 1, as followed. + + "ab\x{FFFE}a" + "Ab\x{FFFE}a" + "ab\x{FFFE}c" + "Ab\x{FFFE}c" + "ab\x{FFFE}xyz" + "abc\x{FFFE}def" + "abc\x{FFFE}xYz" + "aBc\x{FFFE}xyz" + "abcX\x{FFFE}def" + "abcx\x{FFFE}xyz" + "b\x{FFFE}aaa" + "bbb\x{FFFE}a" + +Note: This is equivalent to C<entry =E<gt> 'FFFE ; [.0001.0020.0005.FFFE]'>. +C<entry> allows tailoring of any other character than U+FFFE. + =item normalization -- see 4.1 Normalize, UTS #10. diff --git a/cpan/Unicode-Collate/Collate.xs b/cpan/Unicode-Collate/Collate.xs index 4d4ecca3ee..27920ed0ea 100644 --- a/cpan/Unicode-Collate/Collate.xs +++ b/cpan/Unicode-Collate/Collate.xs @@ -624,7 +624,7 @@ varCE (self, vce) d[7] = (U8)(Shift4Wt >> 8); d[8] = (U8)(Shift4Wt & 0xFF); } - } else { /* shift-trimmed */ + } else { /* shift-trimmed or completely ignorable */ d[7] = d[8] = '\0'; } } diff --git a/cpan/Unicode-Collate/Collate/Locale.pm b/cpan/Unicode-Collate/Collate/Locale.pm index b7b3528c1b..fcabc9e2fa 100644 --- a/cpan/Unicode-Collate/Collate/Locale.pm +++ b/cpan/Unicode-Collate/Collate/Locale.pm @@ -4,7 +4,7 @@ use strict; use Carp; use base qw(Unicode::Collate); -our $VERSION = '0.95'; +our $VERSION = '0.96'; my $PL_EXT = '.pl'; @@ -110,10 +110,13 @@ sub new { my $href = _fetchpl($hash{accepted_locale}); while (my($k,$v) = each %$href) { - if (exists $hash{$k}) { + if (!exists $hash{$k}) { + $hash{$k} = $v; + } elsif ($k eq 'entry') { + $hash{$k} = $v.$hash{$k}; + } else { croak "$k is reserved by $hash{locale}, can't be overwritten"; } - $hash{$k} = $v; } return $class->SUPER::new(%hash); } @@ -271,6 +274,9 @@ Tailoring tags provided by C<Unicode::Collate> are allowed as long as they are not used for C<locale> support. Esp. the C<table> tag is always untailorable, since it is reserved for DUCET. +However C<entry> is allowed, even if it is used for C<locale> support, +to add or override mappings. + E.g. a collator for French, which ignores diacritics and case difference (i.e. level 1), with reversed case ordering and no normalization. diff --git a/cpan/Unicode-Collate/Collate/Locale/as.pl b/cpan/Unicode-Collate/Collate/Locale/as.pl index e0956221a1..1aeca849fd 100644 --- a/cpan/Unicode-Collate/Collate/Locale/as.pl +++ b/cpan/Unicode-Collate/Collate/Locale/as.pl @@ -1,10 +1,13 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0982 ; [.204B.0020.0002.0982][.FFF1.0000.0000.0000] # BENGALI SIGN ANUSVARA 0981 ; [.204B.0020.0002.0981][.FFF2.0000.0000.0000] # BENGALI SIGN CANDRABINDU 0983 ; [.204B.0020.0002.0983][.FFF3.0000.0000.0000] # BENGALI SIGN VISARGA +0994 ; [.204B.0020.0002.0994][.FFF0.0000.0000.0000] # BENGALI LETTER AU 09A4 09CD 200D ; [.205A.0020.0002.09A4][.FFF1.0000.0000.0000] # <BENGALI LETTER TA, BENGALI SIGN VIRAMA, ZERO WIDTH JOINER> +09A3 ; [.205A.0020.0002.09A3][.FFF0.0000.0000.0000] # BENGALI LETTER NNA 0995 09CD 09B7 ; [.206D.0020.0002.0995][.FFF1.0000.0000.0000] # <BENGALI LETTER KA, BENGALI SIGN VIRAMA, BENGALI LETTER SSA> +09B9 ; [.206D.0020.0002.09B9][.FFF0.0000.0000.0000] # BENGALI LETTER HA ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/bn.pl b/cpan/Unicode-Collate/Collate/Locale/bn.pl index 8cbca75776..b64452ab73 100644 --- a/cpan/Unicode-Collate/Collate/Locale/bn.pl +++ b/cpan/Unicode-Collate/Collate/Locale/bn.pl @@ -1,8 +1,9 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0982 ; [.204B.0020.0002.0982][.FFF1.0000.0000.0000] # BENGALI SIGN ANUSVARA 0983 ; [.204B.0020.0002.0983][.FFF2.0000.0000.0000] # BENGALI SIGN VISARGA 0981 ; [.204B.0020.0002.0981][.FFF3.0000.0000.0000] # BENGALI SIGN CANDRABINDU +0994 ; [.204B.0020.0002.0994][.FFF0.0000.0000.0000] # BENGALI LETTER AU ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/fa.pl b/cpan/Unicode-Collate/Collate/Locale/fa.pl index 735ff8b401..415d3e540a 100644 --- a/cpan/Unicode-Collate/Collate/Locale/fa.pl +++ b/cpan/Unicode-Collate/Collate/Locale/fa.pl @@ -1,28 +1,28 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 -064E ; [.0000.00A8.0002.064E] # ARABIC FATHA -0650 ; [.0000.00A9.0002.0650] # ARABIC KASRA -064F ; [.0000.00AA.0002.064F] # ARABIC DAMMA -064B ; [.0000.00AB.0002.064B] # ARABIC FATHATAN -064D ; [.0000.00AC.0002.064D] # ARABIC KASRATAN -064C ; [.0000.00AD.0002.064C] # ARABIC DAMMATAN +0650 ; [.0000.00AE.0002.0650] # ARABIC KASRA +064B ; [.0000.00B3.0002.064B] # ARABIC FATHATAN +064D ; [.0000.00B4.0002.064D] # ARABIC KASRATAN +064C ; [.0000.00B5.0002.064C] # ARABIC DAMMATAN 0622 ; [.1C9B.0020.0002.0622][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH MADDA ABOVE 0627 0653 ; [.1C9B.0020.0002.0622][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH MADDA ABOVE +08AC ; [.1C9B.0020.0002.08AC][.FFF0.0000.0000.0000] # ARABIC LETTER ROHINGYA YEH 0671 ; [.1C9C.0021.0002.0671] # ARABIC LETTER ALEF WASLA -0621 ; [.1C9C.0020.0002.0621][.FFF1.0000.0000.0000] # ARABIC LETTER HAMZA -0623 ; [.1C9C.0021.0002.0623][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA ABOVE -0627 0654 ; [.1C9C.0021.0002.0623][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA ABOVE -0672 ; [.1C9C.0022.0002.0672][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE -0625 ; [.1C9C.0023.0002.0625][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA BELOW -0627 0655 ; [.1C9C.0023.0002.0625][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA BELOW -0673 ; [.1C9C.0024.0002.0673][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH WAVY HAMZA BELOW -0624 ; [.1C9C.0025.0002.0624][.FFF1.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE -0648 0654 ; [.1C9C.0025.0002.0624][.FFF1.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE -06CC 0654 ; [.1C9C.0026.0002.06CC][.FFF1.0000.0000.0000] # <ARABIC LETTER FARSI YEH, ARABIC HAMZA ABOVE> -0649 0654 ; [.1C9C.0026.0003.0649][.FFF1.0000.0000.0000] # <ARABIC LETTER ALEF MAKSURA, ARABIC HAMZA ABOVE> -0626 ; [.1C9C.0026.0004.0626][.FFF1.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE -064A 0654 ; [.1C9C.0026.0004.0626][.FFF1.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE +0621 ; [.1C9D.0020.0002.0621][.FFF0.0000.0000.0000] # ARABIC LETTER HAMZA +0623 ; [.1C9D.0021.0002.0623][.FFF0.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA ABOVE +0627 0654 ; [.1C9D.0021.0002.0623][.FFF0.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA ABOVE +0672 ; [.1C9D.0022.0002.0672][.FFF0.0000.0000.0000] # ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE +0625 ; [.1C9D.0023.0002.0625][.FFF0.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA BELOW +0627 0655 ; [.1C9D.0023.0002.0625][.FFF0.0000.0000.0000] # ARABIC LETTER ALEF WITH HAMZA BELOW +0673 ; [.1C9D.0024.0002.0673][.FFF0.0000.0000.0000] # ARABIC LETTER ALEF WITH WAVY HAMZA BELOW +0624 ; [.1C9D.0025.0002.0624][.FFF0.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE +0648 0654 ; [.1C9D.0025.0002.0624][.FFF0.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE +06CC 0654 ; [.1C9D.0026.0002.06CC][.FFF0.0000.0000.0000] # <ARABIC LETTER FARSI YEH, ARABIC HAMZA ABOVE> +0649 0654 ; [.1C9D.0026.0003.0649][.FFF0.0000.0000.0000] # <ARABIC LETTER ALEF MAKSURA, ARABIC HAMZA ABOVE> +0626 ; [.1C9D.0026.0004.0626][.FFF0.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE +064A 0654 ; [.1C9D.0026.0004.0626][.FFF0.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE +066E ; [.1C9D.0020.0002.066E][.FFF1.0000.0000.0000] # ARABIC LETTER DOTLESS BEH 06AA ; [.1D0E.0021.0002.06AA] # ARABIC LETTER SWASH KAF 06AB ; [.1D0E.0022.0002.06AB] # ARABIC LETTER KAF WITH RING 0643 ; [.1D0E.0023.0002.0643] # ARABIC LETTER KAF @@ -38,6 +38,7 @@ 06C0 ; [.1D43.0025.0002.06C0][.FFF1.0000.0000.0000] # ARABIC LETTER HEH WITH YEH ABOVE 06D5 0654 ; [.1D43.0025.0002.06C0][.FFF1.0000.0000.0000] # ARABIC LETTER HEH WITH YEH ABOVE 06BE ; [.1D43.0026.0002.06BE][.FFF1.0000.0000.0000] # ARABIC LETTER HEH DOACHASHMEE +06CF ; [.1D43.0020.0002.06CF][.FFF0.0000.0000.0000] # ARABIC LETTER WAW WITH DOT ABOVE 0649 ; [.1D49.0021.0002.0649] # ARABIC LETTER ALEF MAKSURA 06D2 ; [.1D49.0022.0002.06D2] # ARABIC LETTER YEH BARREE 06D3 ; [.1D49.0022.0002.06D2][.0000.00BE.0002.0654] # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE diff --git a/cpan/Unicode-Collate/Collate/Locale/gu.pl b/cpan/Unicode-Collate/Collate/Locale/gu.pl index cdb1f5f3c0..b009f638f9 100644 --- a/cpan/Unicode-Collate/Collate/Locale/gu.pl +++ b/cpan/Unicode-Collate/Collate/Locale/gu.pl @@ -1,8 +1,9 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0A82 ; [.20B7.0020.0002.0A82][.FFF1.0000.0000.0000] # GUJARATI SIGN ANUSVARA 0A81 ; [.20B7.0021.0002.0A81][.FFF1.0000.0000.0A81] # GUJARATI SIGN CANDRABINDU 0A83 ; [.20B7.0020.0002.0A83][.FFF2.0000.0000.0000] # GUJARATI SIGN VISARGA +0AD0 ; [.20B7.0020.0002.0AD0][.FFF0.0000.0000.0000] # GUJARATI OM ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/hi.pl b/cpan/Unicode-Collate/Collate/Locale/hi.pl index 36fedf1b10..c58fdfb770 100644 --- a/cpan/Unicode-Collate/Collate/Locale/hi.pl +++ b/cpan/Unicode-Collate/Collate/Locale/hi.pl @@ -1,8 +1,9 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0902 ; [.1FDC.0020.0002.0902][.FFF1.0000.0000.0000] # DEVANAGARI SIGN ANUSVARA 0901 ; [.1FDC.0021.0002.0901][.FFF1.0000.0000.0901] # DEVANAGARI SIGN CANDRABINDU 0903 ; [.1FDC.0020.0002.0903][.FFF2.0000.0000.0000] # DEVANAGARI SIGN VISARGA +0950 ; [.1FDC.0020.0002.0950][.FFF0.0000.0000.0000] # DEVANAGARI OM ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/hy.pl b/cpan/Unicode-Collate/Collate/Locale/hy.pl index 16bf4eeee4..3125576911 100644 --- a/cpan/Unicode-Collate/Collate/Locale/hy.pl +++ b/cpan/Unicode-Collate/Collate/Locale/hy.pl @@ -1,7 +1,9 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0587 ; [.1C44.0020.0002.0587][.FFF1.0000.0000.0000] # ARMENIAN SMALL LIGATURE ECH YIWN +0584 ; [.1C44.0020.0002.0584][.FFF0.0000.0000.0000] # ARMENIAN SMALL LETTER KEH 0535 0582 ; [.1C44.0020.0008.0535][.FFF1.0000.0000.0000] # <ARMENIAN CAPITAL LETTER ECH, ARMENIAN SMALL LETTER YIWN> +0554 ; [.1C44.0020.0008.0554][.FFF0.0000.0000.0000] # ARMENIAN CAPITAL LETTER KEH ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/kn.pl b/cpan/Unicode-Collate/Collate/Locale/kn.pl index 66c760f7e0..3527fb2e7d 100644 --- a/cpan/Unicode-Collate/Collate/Locale/kn.pl +++ b/cpan/Unicode-Collate/Collate/Locale/kn.pl @@ -1,9 +1,10 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0C82 ; [.21C7.0020.0002.0C82][.FFF1.0000.0000.0000] # KANNADA SIGN ANUSVARA 0C83 ; [.21C7.0020.0002.0C83][.FFF2.0000.0000.0000] # KANNADA SIGN VISARGA 0CF1 ; [.21C7.0020.0002.0CF1][.FFF3.0000.0000.0000] # KANNADA SIGN JIHVAMULIYA 0CF2 ; [.21C7.0020.0002.0CF2][.FFF4.0000.0000.0000] # KANNADA SIGN UPADHMANIYA +0C94 ; [.21C7.0020.0002.0C94][.FFF0.0000.0000.0000] # KANNADA LETTER AU ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/kok.pl b/cpan/Unicode-Collate/Collate/Locale/kok.pl index 3c23e8ee87..e9218a427a 100644 --- a/cpan/Unicode-Collate/Collate/Locale/kok.pl +++ b/cpan/Unicode-Collate/Collate/Locale/kok.pl @@ -1,11 +1,13 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0902 ; [.1FDC.0020.0002.0902][.FFF1.0000.0000.0000] # DEVANAGARI SIGN ANUSVARA 0901 ; [.1FDC.0021.0002.0901][.FFF1.0000.0000.0901] # DEVANAGARI SIGN CANDRABINDU 0903 ; [.1FDC.0020.0002.0903][.FFF2.0000.0000.0000] # DEVANAGARI SIGN VISARGA +0950 ; [.1FDC.0020.0002.0950][.FFF0.0000.0000.0000] # DEVANAGARI OM 0933 ; [.201D.0020.0002.0933][.FFF1.0000.0000.0000] # DEVANAGARI LETTER LLA 0934 ; [.201D.0020.0002.0933][.FFF1.0000.0000.0000][.0000.00F1.0002.093C] # DEVANAGARI LETTER LLLA 0915 094D 0937 ; [.201D.0020.0002.0915][.FFF2.0000.0000.0000] # <DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER SSA> +0939 ; [.201D.0020.0002.0939][.FFF0.0000.0000.0000] # DEVANAGARI LETTER HA ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/mr.pl b/cpan/Unicode-Collate/Collate/Locale/mr.pl index 7029ea4897..7e72266bd2 100644 --- a/cpan/Unicode-Collate/Collate/Locale/mr.pl +++ b/cpan/Unicode-Collate/Collate/Locale/mr.pl @@ -1,12 +1,14 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0902 ; [.1FDC.0020.0002.0902][.FFF1.0000.0000.0000] # DEVANAGARI SIGN ANUSVARA 0901 ; [.1FDC.0021.0002.0901][.FFF1.0000.0000.0901] # DEVANAGARI SIGN CANDRABINDU 0903 ; [.1FDC.0020.0002.0903][.FFF2.0000.0000.0000] # DEVANAGARI SIGN VISARGA +0950 ; [.1FDC.0020.0002.0950][.FFF0.0000.0000.0000] # DEVANAGARI OM 0933 ; [.201D.0020.0002.0933][.FFF1.0000.0000.0000] # DEVANAGARI LETTER LLA 0934 ; [.201D.0020.0002.0933][.FFF1.0000.0000.0000][.0000.00F1.0002.093C] # DEVANAGARI LETTER LLLA 0915 094D 0937 ; [.201D.0020.0002.0915][.FFF2.0000.0000.0000] # <DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER SSA> 091C 094D 091E ; [.201D.0020.0002.091C][.FFF3.0000.0000.0000] # <DEVANAGARI LETTER JA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER NYA> +0939 ; [.201D.0020.0002.0939][.FFF0.0000.0000.0000] # DEVANAGARI LETTER HA ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/or.pl b/cpan/Unicode-Collate/Collate/Locale/or.pl index 6a23696f04..774613e165 100644 --- a/cpan/Unicode-Collate/Collate/Locale/or.pl +++ b/cpan/Unicode-Collate/Collate/Locale/or.pl @@ -1,10 +1,12 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0B01 ; [.2108.0020.0002.0B01][.FFF1.0000.0000.0000] # ORIYA SIGN CANDRABINDU 0B02 ; [.2108.0020.0002.0B02][.FFF2.0000.0000.0000] # ORIYA SIGN ANUSVARA 0B03 ; [.2108.0020.0002.0B03][.FFF3.0000.0000.0000] # ORIYA SIGN VISARGA +0B14 ; [.2108.0020.0002.0B14][.FFF0.0000.0000.0000] # ORIYA LETTER AU 0B15 0B4D 0B37 ; [.212C.0020.0002.0B15][.FFF1.0000.0000.0000] # <ORIYA LETTER KA, ORIYA SIGN VIRAMA, ORIYA LETTER SSA> +0B39 ; [.212C.0020.0002.0B39][.FFF0.0000.0000.0000] # ORIYA LETTER HA 0B5F ; [.2122.0021.0002.0B5F] # ORIYA LETTER YYA ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/sa.pl b/cpan/Unicode-Collate/Collate/Locale/sa.pl index 7029ea4897..7e72266bd2 100644 --- a/cpan/Unicode-Collate/Collate/Locale/sa.pl +++ b/cpan/Unicode-Collate/Collate/Locale/sa.pl @@ -1,12 +1,14 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0902 ; [.1FDC.0020.0002.0902][.FFF1.0000.0000.0000] # DEVANAGARI SIGN ANUSVARA 0901 ; [.1FDC.0021.0002.0901][.FFF1.0000.0000.0901] # DEVANAGARI SIGN CANDRABINDU 0903 ; [.1FDC.0020.0002.0903][.FFF2.0000.0000.0000] # DEVANAGARI SIGN VISARGA +0950 ; [.1FDC.0020.0002.0950][.FFF0.0000.0000.0000] # DEVANAGARI OM 0933 ; [.201D.0020.0002.0933][.FFF1.0000.0000.0000] # DEVANAGARI LETTER LLA 0934 ; [.201D.0020.0002.0933][.FFF1.0000.0000.0000][.0000.00F1.0002.093C] # DEVANAGARI LETTER LLLA 0915 094D 0937 ; [.201D.0020.0002.0915][.FFF2.0000.0000.0000] # <DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER SSA> 091C 094D 091E ; [.201D.0020.0002.091C][.FFF3.0000.0000.0000] # <DEVANAGARI LETTER JA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER NYA> +0939 ; [.201D.0020.0002.0939][.FFF0.0000.0000.0000] # DEVANAGARI LETTER HA ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/si.pl b/cpan/Unicode-Collate/Collate/Locale/si.pl index 97133e4da0..80f06273f4 100644 --- a/cpan/Unicode-Collate/Collate/Locale/si.pl +++ b/cpan/Unicode-Collate/Collate/Locale/si.pl @@ -1,8 +1,9 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0D82 ; [.225A.0020.0002.0D82][.FFF1.0000.0000.0000] # SINHALA SIGN ANUSVARAYA 0D83 ; [.225A.0020.0002.0D83][.FFF2.0000.0000.0000] # SINHALA SIGN VISARGAYA +0D96 ; [.225A.0020.0002.0D96][.FFF0.0000.0000.0000] # SINHALA LETTER AUYANNA 0DA5 ; [.2265.0020.0002.0DA5] # SINHALA LETTER TAALUJA SANYOOGA NAAKSIKYAYA 0DA4 ; [.2266.0020.0002.0DA4] # SINHALA LETTER TAALUJA NAASIKYAYA ENTRY diff --git a/cpan/Unicode-Collate/Collate/Locale/si_dict.pl b/cpan/Unicode-Collate/Collate/Locale/si_dict.pl index 621db1e720..a76d0b2bef 100644 --- a/cpan/Unicode-Collate/Collate/Locale/si_dict.pl +++ b/cpan/Unicode-Collate/Collate/Locale/si_dict.pl @@ -1,8 +1,9 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0D82 ; [.225A.0020.0002.0D82][.FFF1.0000.0000.0000] # SINHALA SIGN ANUSVARAYA 0D83 ; [.225A.0020.0002.0D83][.FFF2.0000.0000.0000] # SINHALA SIGN VISARGAYA +0D96 ; [.225A.0020.0002.0D96][.FFF0.0000.0000.0000] # SINHALA LETTER AUYANNA 0DA5 ; [.2263.0020.0002.0DA2][.2295.0020.0002.0DCA][.2265.0021.0002.0DA5] # SINHALA LETTER TAALUJA SANYOOGA NAAKSIKYAYA ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/ta.pl b/cpan/Unicode-Collate/Collate/Locale/ta.pl index f3079bef7f..4dae5f1d32 100644 --- a/cpan/Unicode-Collate/Collate/Locale/ta.pl +++ b/cpan/Unicode-Collate/Collate/Locale/ta.pl @@ -1,31 +1,55 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 -0B82 ; [.214A.0020.0002.0B82][.FFF1.0000.0000.0000] # TAMIL SIGN ANUSVARA -0B95 0BCD ; [.214B.0020.0002.0B95][.FFF1.0000.0000.0000] # <TAMIL LETTER KA, TAMIL SIGN VIRAMA> -0B99 0BCD ; [.214C.0020.0002.0B99][.FFF1.0000.0000.0000] # <TAMIL LETTER NGA, TAMIL SIGN VIRAMA> -0B9A 0BCD ; [.214D.0020.0002.0B9A][.FFF1.0000.0000.0000] # <TAMIL LETTER CA, TAMIL SIGN VIRAMA> -0B9E 0BCD ; [.214E.0020.0002.0B9E][.FFF1.0000.0000.0000] # <TAMIL LETTER NYA, TAMIL SIGN VIRAMA> -0B9F 0BCD ; [.214F.0020.0002.0B9F][.FFF1.0000.0000.0000] # <TAMIL LETTER TTA, TAMIL SIGN VIRAMA> -0BA3 0BCD ; [.2150.0020.0002.0BA3][.FFF1.0000.0000.0000] # <TAMIL LETTER NNA, TAMIL SIGN VIRAMA> -0BA4 0BCD ; [.2151.0020.0002.0BA4][.FFF1.0000.0000.0000] # <TAMIL LETTER TA, TAMIL SIGN VIRAMA> -0BA8 0BCD ; [.2152.0020.0002.0BA8][.FFF1.0000.0000.0000] # <TAMIL LETTER NA, TAMIL SIGN VIRAMA> -0BAA 0BCD ; [.2153.0020.0002.0BAA][.FFF1.0000.0000.0000] # <TAMIL LETTER PA, TAMIL SIGN VIRAMA> -0BAE 0BCD ; [.2154.0020.0002.0BAE][.FFF1.0000.0000.0000] # <TAMIL LETTER MA, TAMIL SIGN VIRAMA> -0BAF 0BCD ; [.2155.0020.0002.0BAF][.FFF1.0000.0000.0000] # <TAMIL LETTER YA, TAMIL SIGN VIRAMA> -0BB0 0BCD ; [.2156.0020.0002.0BB0][.FFF1.0000.0000.0000] # <TAMIL LETTER RA, TAMIL SIGN VIRAMA> -0BB2 0BCD ; [.2157.0020.0002.0BB2][.FFF1.0000.0000.0000] # <TAMIL LETTER LA, TAMIL SIGN VIRAMA> -0BB5 0BCD ; [.2158.0020.0002.0BB5][.FFF1.0000.0000.0000] # <TAMIL LETTER VA, TAMIL SIGN VIRAMA> -0BB4 0BCD ; [.2159.0020.0002.0BB4][.FFF1.0000.0000.0000] # <TAMIL LETTER LLLA, TAMIL SIGN VIRAMA> -0BB3 0BCD ; [.215A.0020.0002.0BB3][.FFF1.0000.0000.0000] # <TAMIL LETTER LLA, TAMIL SIGN VIRAMA> -0BB1 0BCD ; [.215B.0020.0002.0BB1][.FFF1.0000.0000.0000] # <TAMIL LETTER RRA, TAMIL SIGN VIRAMA> -0BA9 0BCD ; [.215C.0020.0002.0BA9][.FFF1.0000.0000.0000] # <TAMIL LETTER NNNA, TAMIL SIGN VIRAMA> -0B9C 0BCD ; [.215D.0020.0002.0B9C][.FFF1.0000.0000.0000] # <TAMIL LETTER JA, TAMIL SIGN VIRAMA> -0BB6 0BCD ; [.215E.0020.0002.0BB6][.FFF1.0000.0000.0000] # <TAMIL LETTER SHA, TAMIL SIGN VIRAMA> -0BB7 0BCD ; [.215F.0020.0002.0BB7][.FFF1.0000.0000.0000] # <TAMIL LETTER SSA, TAMIL SIGN VIRAMA> -0BB8 0BCD ; [.2160.0020.0002.0BB8][.FFF1.0000.0000.0000] # <TAMIL LETTER SA, TAMIL SIGN VIRAMA> -0BB9 0BCD ; [.2161.0020.0002.0BB9][.FFF1.0000.0000.0000] # <TAMIL LETTER HA, TAMIL SIGN VIRAMA> -0B95 0BCD 0BB7 0BCD ; [.2162.0020.0002.0B95][.FFF1.0000.0000.0000] # <TAMIL LETTER KA, TAMIL SIGN VIRAMA, TAMIL LETTER SSA, TAMIL SIGN VIRAMA> -0B95 0BCD 0BB7 ; [.2162.0020.0002.0B95][.FFF2.0000.0000.0000] # <TAMIL LETTER KA, TAMIL SIGN VIRAMA, TAMIL LETTER SSA> +0B82 ; [.214B.0020.0002.0B82][.FFF0.0000.0000.0000] # TAMIL SIGN ANUSVARA +0B83 ; [.214B.0020.0002.0B83][.FFF1.0000.0000.0000] # TAMIL SIGN VISARGA +0B95 0BCD 0BB7 0BCD ; [.2162.0020.0002.0B95][.FFF2.0000.0000.0000] # <TAMIL LETTER KA, TAMIL SIGN VIRAMA, TAMIL LETTER SSA, TAMIL SIGN VIRAMA> +0B95 0BCD 0BB7 ; [.2162.0020.0002.0B95][.FFF3.0000.0000.0000] # <TAMIL LETTER KA, TAMIL SIGN VIRAMA, TAMIL LETTER SSA> +0B95 0BCD ; [.214C.0020.0002.0B95][.FFF0.0000.0000.0000] # <TAMIL LETTER KA, TAMIL SIGN VIRAMA> +0B95 ; [.214C.0020.0002.0B95][.FFF1.0000.0000.0000] # TAMIL LETTER KA +0B99 0BCD ; [.214D.0020.0002.0B99][.FFF0.0000.0000.0000] # <TAMIL LETTER NGA, TAMIL SIGN VIRAMA> +0B99 ; [.214D.0020.0002.0B99][.FFF1.0000.0000.0000] # TAMIL LETTER NGA +0B9A 0BCD ; [.214E.0020.0002.0B9A][.FFF0.0000.0000.0000] # <TAMIL LETTER CA, TAMIL SIGN VIRAMA> +0B9A ; [.214E.0020.0002.0B9A][.FFF1.0000.0000.0000] # TAMIL LETTER CA +0B9E 0BCD ; [.214F.0020.0002.0B9E][.FFF0.0000.0000.0000] # <TAMIL LETTER NYA, TAMIL SIGN VIRAMA> +0B9E ; [.214F.0020.0002.0B9E][.FFF1.0000.0000.0000] # TAMIL LETTER NYA +0B9F 0BCD ; [.2150.0020.0002.0B9F][.FFF0.0000.0000.0000] # <TAMIL LETTER TTA, TAMIL SIGN VIRAMA> +0B9F ; [.2150.0020.0002.0B9F][.FFF1.0000.0000.0000] # TAMIL LETTER TTA +0BA3 0BCD ; [.2151.0020.0002.0BA3][.FFF0.0000.0000.0000] # <TAMIL LETTER NNA, TAMIL SIGN VIRAMA> +0BA3 ; [.2151.0020.0002.0BA3][.FFF1.0000.0000.0000] # TAMIL LETTER NNA +0BA4 0BCD ; [.2152.0020.0002.0BA4][.FFF0.0000.0000.0000] # <TAMIL LETTER TA, TAMIL SIGN VIRAMA> +0BA4 ; [.2152.0020.0002.0BA4][.FFF1.0000.0000.0000] # TAMIL LETTER TA +0BA8 0BCD ; [.2153.0020.0002.0BA8][.FFF0.0000.0000.0000] # <TAMIL LETTER NA, TAMIL SIGN VIRAMA> +0BA8 ; [.2153.0020.0002.0BA8][.FFF1.0000.0000.0000] # TAMIL LETTER NA +0BAA 0BCD ; [.2154.0020.0002.0BAA][.FFF0.0000.0000.0000] # <TAMIL LETTER PA, TAMIL SIGN VIRAMA> +0BAA ; [.2154.0020.0002.0BAA][.FFF1.0000.0000.0000] # TAMIL LETTER PA +0BAE 0BCD ; [.2155.0020.0002.0BAE][.FFF0.0000.0000.0000] # <TAMIL LETTER MA, TAMIL SIGN VIRAMA> +0BAE ; [.2155.0020.0002.0BAE][.FFF1.0000.0000.0000] # TAMIL LETTER MA +0BAF 0BCD ; [.2156.0020.0002.0BAF][.FFF0.0000.0000.0000] # <TAMIL LETTER YA, TAMIL SIGN VIRAMA> +0BAF ; [.2156.0020.0002.0BAF][.FFF1.0000.0000.0000] # TAMIL LETTER YA +0BB0 0BCD ; [.2157.0020.0002.0BB0][.FFF0.0000.0000.0000] # <TAMIL LETTER RA, TAMIL SIGN VIRAMA> +0BB0 ; [.2157.0020.0002.0BB0][.FFF1.0000.0000.0000] # TAMIL LETTER RA +0BB2 0BCD ; [.2158.0020.0002.0BB2][.FFF0.0000.0000.0000] # <TAMIL LETTER LA, TAMIL SIGN VIRAMA> +0BB2 ; [.2158.0020.0002.0BB2][.FFF1.0000.0000.0000] # TAMIL LETTER LA +0BB5 0BCD ; [.2159.0020.0002.0BB5][.FFF0.0000.0000.0000] # <TAMIL LETTER VA, TAMIL SIGN VIRAMA> +0BB5 ; [.2159.0020.0002.0BB5][.FFF1.0000.0000.0000] # TAMIL LETTER VA +0BB4 0BCD ; [.215A.0020.0002.0BB4][.FFF0.0000.0000.0000] # <TAMIL LETTER LLLA, TAMIL SIGN VIRAMA> +0BB4 ; [.215A.0020.0002.0BB4][.FFF1.0000.0000.0000] # TAMIL LETTER LLLA +0BB3 0BCD ; [.215B.0020.0002.0BB3][.FFF0.0000.0000.0000] # <TAMIL LETTER LLA, TAMIL SIGN VIRAMA> +0BB3 ; [.215B.0020.0002.0BB3][.FFF1.0000.0000.0000] # TAMIL LETTER LLA +0BB1 0BCD ; [.215C.0020.0002.0BB1][.FFF0.0000.0000.0000] # <TAMIL LETTER RRA, TAMIL SIGN VIRAMA> +0BB1 ; [.215C.0020.0002.0BB1][.FFF1.0000.0000.0000] # TAMIL LETTER RRA +0BA9 0BCD ; [.215D.0020.0002.0BA9][.FFF0.0000.0000.0000] # <TAMIL LETTER NNNA, TAMIL SIGN VIRAMA> +0BA9 ; [.215D.0020.0002.0BA9][.FFF1.0000.0000.0000] # TAMIL LETTER NNNA +0B9C 0BCD ; [.215E.0020.0002.0B9C][.FFF0.0000.0000.0000] # <TAMIL LETTER JA, TAMIL SIGN VIRAMA> +0B9C ; [.215E.0020.0002.0B9C][.FFF1.0000.0000.0000] # TAMIL LETTER JA +0BB6 0BCD ; [.215F.0020.0002.0BB6][.FFF0.0000.0000.0000] # <TAMIL LETTER SHA, TAMIL SIGN VIRAMA> +0BB6 ; [.215F.0020.0002.0BB6][.FFF1.0000.0000.0000] # TAMIL LETTER SHA +0BB7 0BCD ; [.2160.0020.0002.0BB7][.FFF0.0000.0000.0000] # <TAMIL LETTER SSA, TAMIL SIGN VIRAMA> +0BB7 ; [.2160.0020.0002.0BB7][.FFF1.0000.0000.0000] # TAMIL LETTER SSA +0BB8 0BCD ; [.2161.0020.0002.0BB8][.FFF0.0000.0000.0000] # <TAMIL LETTER SA, TAMIL SIGN VIRAMA> +0BB8 ; [.2161.0020.0002.0BB8][.FFF1.0000.0000.0000] # TAMIL LETTER SA +0BB9 0BCD ; [.2162.0020.0002.0BB9][.FFF0.0000.0000.0000] # <TAMIL LETTER HA, TAMIL SIGN VIRAMA> +0BB9 ; [.2162.0020.0002.0BB9][.FFF1.0000.0000.0000] # TAMIL LETTER HA ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/te.pl b/cpan/Unicode-Collate/Collate/Locale/te.pl index 44528cffc1..eb63a7ffbc 100644 --- a/cpan/Unicode-Collate/Collate/Locale/te.pl +++ b/cpan/Unicode-Collate/Collate/Locale/te.pl @@ -1,8 +1,9 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 0C01 ; [.217F.0020.0002.0C01][.FFF1.0000.0000.0000] # TELUGU SIGN CANDRABINDU 0C02 ; [.217F.0020.0002.0C02][.FFF2.0000.0000.0000] # TELUGU SIGN ANUSVARA 0C03 ; [.217F.0020.0002.0C03][.FFF3.0000.0000.0000] # TELUGU SIGN VISARGA +0C14 ; [.217F.0020.0002.0C14][.FFF0.0000.0000.0000] # TELUGU LETTER AU ENTRY }; diff --git a/cpan/Unicode-Collate/Collate/Locale/th.pl b/cpan/Unicode-Collate/Collate/Locale/th.pl index d83b6f1a62..27f266fc5d 100644 --- a/cpan/Unicode-Collate/Collate/Locale/th.pl +++ b/cpan/Unicode-Collate/Collate/Locale/th.pl @@ -1,10 +1,12 @@ +{ - locale_version => 0.95, + locale_version => 0.96, variable => 'shifted', alternate => 'shifted', entry => <<'ENTRY', # for DUCET v6.2.0 -0E2F ; [*03BD.0020.0002.0E2F][*FFF1.0000.0000.0000] # THAI CHARACTER PAIYANNOI +0E2F ; [*03BE.0020.0002.0E2F][*FFF0.0000.0000.0000] # THAI CHARACTER PAIYANNOI +0E5A ; [*03BE.0020.0002.0E5A][*FFF1.0000.0000.0000] # THAI CHARACTER ANGKHANKHU 0E46 ; [*03BF.0020.0002.0E46][*FFF1.0000.0000.0000] # THAI CHARACTER MAIYAMOK +0E5B ; [*03BF.0020.0002.0E5B][*FFF0.0000.0000.0000] # THAI CHARACTER KHOMUT 0E4C ; [.0000.00FF.0002.0E4C] # THAI CHARACTER THANTHAKHAT 0E47 ; [.0000.0100.0002.0E47] # THAI CHARACTER MAITAIKHU 0E48 ; [.0000.0101.0002.0E48] # THAI CHARACTER MAI EK diff --git a/cpan/Unicode-Collate/Collate/Locale/ur.pl b/cpan/Unicode-Collate/Collate/Locale/ur.pl index 34ecc4e495..560df6bc7e 100644 --- a/cpan/Unicode-Collate/Collate/Locale/ur.pl +++ b/cpan/Unicode-Collate/Collate/Locale/ur.pl @@ -1,76 +1,76 @@ +{ - locale_version => 0.93, + locale_version => 0.96, entry => <<'ENTRY', # for DUCET v6.2.0 -0627 ; [.1C99.0020.0002.0627] # ARABIC LETTER ALEF -0623 ; [.1C99.0021.0002.0623] # ARABIC LETTER ALEF WITH HAMZA ABOVE -0627 0654 ; [.1C99.0021.0002.0623] # ARABIC LETTER ALEF WITH HAMZA ABOVE -0622 ; [.1C99.0020.0002.0622][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH MADDA ABOVE -0627 0653 ; [.1C99.0020.0002.0622][.FFF1.0000.0000.0000] # ARABIC LETTER ALEF WITH MADDA ABOVE -0628 ; [.1C99.0020.0002.0628][.FFF2.0000.0000.0000] # ARABIC LETTER BEH -0628 06BE ; [.1C99.0020.0002.0628][.FFF3.0000.0000.0000] # <ARABIC LETTER BEH, ARABIC LETTER HEH DOACHASHMEE> -067E ; [.1C99.0020.0002.067E][.FFF4.0000.0000.0000] # ARABIC LETTER PEH -067E 06BE ; [.1C99.0020.0002.067E][.FFF5.0000.0000.0000] # <ARABIC LETTER PEH, ARABIC LETTER HEH DOACHASHMEE> -062A ; [.1C99.0020.0002.062A][.FFF6.0000.0000.0000] # ARABIC LETTER TEH -062A 06BE ; [.1C99.0020.0002.062A][.FFF7.0000.0000.0000] # <ARABIC LETTER TEH, ARABIC LETTER HEH DOACHASHMEE> -0679 ; [.1C99.0020.0002.0679][.FFF8.0000.0000.0000] # ARABIC LETTER TTEH -0679 06BE ; [.1C99.0020.0002.0679][.FFF9.0000.0000.0000] # <ARABIC LETTER TTEH, ARABIC LETTER HEH DOACHASHMEE> -062B ; [.1C99.0020.0002.062B][.FFFA.0000.0000.0000] # ARABIC LETTER THEH -062C ; [.1C99.0020.0002.062C][.FFFB.0000.0000.0000] # ARABIC LETTER JEEM -062C 06BE ; [.1C9A.0020.0002.062C][.FFF1.0000.0000.0000] # <ARABIC LETTER JEEM, ARABIC LETTER HEH DOACHASHMEE> -0686 ; [.1C9A.0020.0002.0686][.FFF2.0000.0000.0000] # ARABIC LETTER TCHEH -0686 06BE ; [.1C9A.0020.0002.0686][.FFF3.0000.0000.0000] # <ARABIC LETTER TCHEH, ARABIC LETTER HEH DOACHASHMEE> -062D ; [.1C9A.0020.0002.062D][.FFF4.0000.0000.0000] # ARABIC LETTER HAH -062E ; [.1C9A.0020.0002.062E][.FFF5.0000.0000.0000] # ARABIC LETTER KHAH -062F ; [.1C9A.0020.0002.062F][.FFF6.0000.0000.0000] # ARABIC LETTER DAL -062F 06BE ; [.1C9A.0020.0002.062F][.FFF7.0000.0000.0000] # <ARABIC LETTER DAL, ARABIC LETTER HEH DOACHASHMEE> -0688 ; [.1C9A.0020.0002.0688][.FFF8.0000.0000.0000] # ARABIC LETTER DDAL -0688 06BE ; [.1C9A.0020.0002.0688][.FFF9.0000.0000.0000] # <ARABIC LETTER DDAL, ARABIC LETTER HEH DOACHASHMEE> -0630 ; [.1C9A.0020.0002.0630][.FFFA.0000.0000.0000] # ARABIC LETTER THAL -0631 ; [.1C9A.0020.0002.0631][.FFFB.0000.0000.0000] # ARABIC LETTER REH -0631 06BE ; [.1C9B.0020.0002.0631][.FFF1.0000.0000.0000] # <ARABIC LETTER REH, ARABIC LETTER HEH DOACHASHMEE> -0691 ; [.1C9B.0020.0002.0691][.FFF2.0000.0000.0000] # ARABIC LETTER RREH -0691 06BE ; [.1C9B.0020.0002.0691][.FFF3.0000.0000.0000] # <ARABIC LETTER RREH, ARABIC LETTER HEH DOACHASHMEE> -0632 ; [.1C9B.0020.0002.0632][.FFF4.0000.0000.0000] # ARABIC LETTER ZAIN -0698 ; [.1C9B.0020.0002.0698][.FFF5.0000.0000.0000] # ARABIC LETTER JEH -0633 ; [.1C9B.0020.0002.0633][.FFF6.0000.0000.0000] # ARABIC LETTER SEEN -0634 ; [.1C9B.0020.0002.0634][.FFF7.0000.0000.0000] # ARABIC LETTER SHEEN -0635 ; [.1C9B.0020.0002.0635][.FFF8.0000.0000.0000] # ARABIC LETTER SAD -0636 ; [.1C9B.0020.0002.0636][.FFF9.0000.0000.0000] # ARABIC LETTER DAD -0637 ; [.1C9B.0020.0002.0637][.FFFA.0000.0000.0000] # ARABIC LETTER TAH -0638 ; [.1C9B.0020.0002.0638][.FFFB.0000.0000.0000] # ARABIC LETTER ZAH -0639 ; [.1C9C.0020.0002.0639][.FFF1.0000.0000.0000] # ARABIC LETTER AIN -063A ; [.1C9C.0020.0002.063A][.FFF2.0000.0000.0000] # ARABIC LETTER GHAIN -0641 ; [.1C9C.0020.0002.0641][.FFF3.0000.0000.0000] # ARABIC LETTER FEH -0642 ; [.1C9C.0020.0002.0642][.FFF4.0000.0000.0000] # ARABIC LETTER QAF -06A9 ; [.1C9C.0020.0002.06A9][.FFF5.0000.0000.0000] # ARABIC LETTER KEHEH -06A9 06BE ; [.1C9C.0020.0002.06A9][.FFF6.0000.0000.0000] # <ARABIC LETTER KEHEH, ARABIC LETTER HEH DOACHASHMEE> -06AF ; [.1C9C.0020.0002.06AF][.FFF7.0000.0000.0000] # ARABIC LETTER GAF -06AF 06BE ; [.1C9C.0020.0002.06AF][.FFF8.0000.0000.0000] # <ARABIC LETTER GAF, ARABIC LETTER HEH DOACHASHMEE> -0644 ; [.1C9C.0020.0002.0644][.FFF9.0000.0000.0000] # ARABIC LETTER LAM -0644 06BE ; [.1C9C.0020.0002.0644][.FFFA.0000.0000.0000] # <ARABIC LETTER LAM, ARABIC LETTER HEH DOACHASHMEE> -0645 ; [.1C9C.0020.0002.0645][.FFFB.0000.0000.0000] # ARABIC LETTER MEEM -0645 06BE ; [.1C9D.0020.0002.0645][.FFF1.0000.0000.0000] # <ARABIC LETTER MEEM, ARABIC LETTER HEH DOACHASHMEE> -0646 ; [.1C9D.0020.0002.0646][.FFF2.0000.0000.0000] # ARABIC LETTER NOON -0646 06BE ; [.1C9D.0020.0002.0646][.FFF3.0000.0000.0000] # <ARABIC LETTER NOON, ARABIC LETTER HEH DOACHASHMEE> -06BA ; [.1C9D.0020.0002.06BA][.FFF4.0000.0000.0000] # ARABIC LETTER NOON GHUNNA -06BA 06BE ; [.1C9D.0020.0002.06BA][.FFF5.0000.0000.0000] # <ARABIC LETTER NOON GHUNNA, ARABIC LETTER HEH DOACHASHMEE> -0648 ; [.1C9D.0020.0002.0648][.FFF6.0000.0000.0000] # ARABIC LETTER WAW -0624 ; [.1C9D.0021.0002.0624][.FFF6.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE -0648 0654 ; [.1C9D.0021.0002.0624][.FFF6.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE -0648 06BE ; [.1C9D.0020.0002.0648][.FFF7.0000.0000.0000] # <ARABIC LETTER WAW, ARABIC LETTER HEH DOACHASHMEE> -06C1 ; [.1C9D.0020.0002.06C1][.FFF8.0000.0000.0000] # ARABIC LETTER HEH GOAL -06C2 ; [.1C9D.0021.0002.06C2][.FFF8.0000.0000.0000] # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE -06C1 0654 ; [.1C9D.0021.0002.06C2][.FFF8.0000.0000.0000] # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE -06BE ; [.1C9D.0020.0002.06BE][.FFF9.0000.0000.0000] # ARABIC LETTER HEH DOACHASHMEE -06C3 ; [.1C9D.0020.0002.06C3][.FFFA.0000.0000.0000] # ARABIC LETTER TEH MARBUTA GOAL -0621 ; [.1C9D.0020.0002.0621][.FFFB.0000.0000.0000] # ARABIC LETTER HAMZA -06CC ; [.1C9E.0020.0002.06CC][.FFF1.0000.0000.0000] # ARABIC LETTER FARSI YEH -0626 ; [.1C9E.0021.0002.0626][.FFF1.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE -064A 0654 ; [.1C9E.0021.0002.0626][.FFF1.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE -06CC 06BE ; [.1C9E.0020.0002.06CC][.FFF2.0000.0000.0000] # <ARABIC LETTER FARSI YEH, ARABIC LETTER HEH DOACHASHMEE> -06D2 ; [.1C9E.0020.0002.06D2][.FFF3.0000.0000.0000] # ARABIC LETTER YEH BARREE -06D3 ; [.1C9E.0021.0002.06D3][.FFF3.0000.0000.0000] # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE -06D2 0654 ; [.1C9E.0021.0002.06D3][.FFF3.0000.0000.0000] # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +0623 ; [.1C9C.0021.0002.0623] # ARABIC LETTER ALEF WITH HAMZA ABOVE +0627 0654 ; [.1C9C.0021.0002.0623] # ARABIC LETTER ALEF WITH HAMZA ABOVE +0622 ; [.1C9D.0020.0002.0622][.FFC0.0000.0000.0000] # ARABIC LETTER ALEF WITH MADDA ABOVE +0627 0653 ; [.1C9D.0020.0002.0622][.FFC0.0000.0000.0000] # ARABIC LETTER ALEF WITH MADDA ABOVE +0628 ; [.1C9D.0020.0002.0628][.FFC1.0000.0000.0000] # ARABIC LETTER BEH +0628 06BE ; [.1C9D.0020.0002.0628][.FFC2.0000.0000.0000] # <ARABIC LETTER BEH, ARABIC LETTER HEH DOACHASHMEE> +067E ; [.1C9D.0020.0002.067E][.FFC3.0000.0000.0000] # ARABIC LETTER PEH +067E 06BE ; [.1C9D.0020.0002.067E][.FFC4.0000.0000.0000] # <ARABIC LETTER PEH, ARABIC LETTER HEH DOACHASHMEE> +062A ; [.1C9D.0020.0002.062A][.FFC5.0000.0000.0000] # ARABIC LETTER TEH +062A 06BE ; [.1C9D.0020.0002.062A][.FFC6.0000.0000.0000] # <ARABIC LETTER TEH, ARABIC LETTER HEH DOACHASHMEE> +0679 ; [.1C9D.0020.0002.0679][.FFC7.0000.0000.0000] # ARABIC LETTER TTEH +0679 06BE ; [.1C9D.0020.0002.0679][.FFC8.0000.0000.0000] # <ARABIC LETTER TTEH, ARABIC LETTER HEH DOACHASHMEE> +062B ; [.1C9D.0020.0002.062B][.FFC9.0000.0000.0000] # ARABIC LETTER THEH +062C ; [.1C9D.0020.0002.062C][.FFCA.0000.0000.0000] # ARABIC LETTER JEEM +062C 06BE ; [.1C9D.0020.0002.062C][.FFCB.0000.0000.0000] # <ARABIC LETTER JEEM, ARABIC LETTER HEH DOACHASHMEE> +0686 ; [.1C9D.0020.0002.0686][.FFCC.0000.0000.0000] # ARABIC LETTER TCHEH +0686 06BE ; [.1C9D.0020.0002.0686][.FFCD.0000.0000.0000] # <ARABIC LETTER TCHEH, ARABIC LETTER HEH DOACHASHMEE> +062D ; [.1C9D.0020.0002.062D][.FFCE.0000.0000.0000] # ARABIC LETTER HAH +062E ; [.1C9D.0020.0002.062E][.FFCF.0000.0000.0000] # ARABIC LETTER KHAH +062F ; [.1C9D.0020.0002.062F][.FFD0.0000.0000.0000] # ARABIC LETTER DAL +062F 06BE ; [.1C9D.0020.0002.062F][.FFD1.0000.0000.0000] # <ARABIC LETTER DAL, ARABIC LETTER HEH DOACHASHMEE> +0688 ; [.1C9D.0020.0002.0688][.FFD2.0000.0000.0000] # ARABIC LETTER DDAL +0688 06BE ; [.1C9D.0020.0002.0688][.FFD3.0000.0000.0000] # <ARABIC LETTER DDAL, ARABIC LETTER HEH DOACHASHMEE> +0630 ; [.1C9D.0020.0002.0630][.FFD4.0000.0000.0000] # ARABIC LETTER THAL +0631 ; [.1C9D.0020.0002.0631][.FFD5.0000.0000.0000] # ARABIC LETTER REH +0631 06BE ; [.1C9D.0020.0002.0631][.FFD6.0000.0000.0000] # <ARABIC LETTER REH, ARABIC LETTER HEH DOACHASHMEE> +0691 ; [.1C9D.0020.0002.0691][.FFD7.0000.0000.0000] # ARABIC LETTER RREH +0691 06BE ; [.1C9D.0020.0002.0691][.FFD8.0000.0000.0000] # <ARABIC LETTER RREH, ARABIC LETTER HEH DOACHASHMEE> +0632 ; [.1C9D.0020.0002.0632][.FFD9.0000.0000.0000] # ARABIC LETTER ZAIN +0698 ; [.1C9D.0020.0002.0698][.FFDA.0000.0000.0000] # ARABIC LETTER JEH +0633 ; [.1C9D.0020.0002.0633][.FFDB.0000.0000.0000] # ARABIC LETTER SEEN +0634 ; [.1C9D.0020.0002.0634][.FFDC.0000.0000.0000] # ARABIC LETTER SHEEN +0635 ; [.1C9D.0020.0002.0635][.FFDD.0000.0000.0000] # ARABIC LETTER SAD +0636 ; [.1C9D.0020.0002.0636][.FFDE.0000.0000.0000] # ARABIC LETTER DAD +0637 ; [.1C9D.0020.0002.0637][.FFDF.0000.0000.0000] # ARABIC LETTER TAH +0638 ; [.1C9D.0020.0002.0638][.FFE0.0000.0000.0000] # ARABIC LETTER ZAH +0639 ; [.1C9D.0020.0002.0639][.FFE1.0000.0000.0000] # ARABIC LETTER AIN +063A ; [.1C9D.0020.0002.063A][.FFE2.0000.0000.0000] # ARABIC LETTER GHAIN +0641 ; [.1C9D.0020.0002.0641][.FFE3.0000.0000.0000] # ARABIC LETTER FEH +0642 ; [.1C9D.0020.0002.0642][.FFE4.0000.0000.0000] # ARABIC LETTER QAF +06A9 ; [.1C9D.0020.0002.06A9][.FFE5.0000.0000.0000] # ARABIC LETTER KEHEH +06A9 06BE ; [.1C9D.0020.0002.06A9][.FFE6.0000.0000.0000] # <ARABIC LETTER KEHEH, ARABIC LETTER HEH DOACHASHMEE> +06AF ; [.1C9D.0020.0002.06AF][.FFE7.0000.0000.0000] # ARABIC LETTER GAF +06AF 06BE ; [.1C9D.0020.0002.06AF][.FFE8.0000.0000.0000] # <ARABIC LETTER GAF, ARABIC LETTER HEH DOACHASHMEE> +0644 ; [.1C9D.0020.0002.0644][.FFE9.0000.0000.0000] # ARABIC LETTER LAM +0644 06BE ; [.1C9D.0020.0002.0644][.FFEA.0000.0000.0000] # <ARABIC LETTER LAM, ARABIC LETTER HEH DOACHASHMEE> +0645 ; [.1C9D.0020.0002.0645][.FFEB.0000.0000.0000] # ARABIC LETTER MEEM +0645 06BE ; [.1C9D.0020.0002.0645][.FFEC.0000.0000.0000] # <ARABIC LETTER MEEM, ARABIC LETTER HEH DOACHASHMEE> +0646 ; [.1C9D.0020.0002.0646][.FFED.0000.0000.0000] # ARABIC LETTER NOON +0646 06BE ; [.1C9D.0020.0002.0646][.FFEE.0000.0000.0000] # <ARABIC LETTER NOON, ARABIC LETTER HEH DOACHASHMEE> +06BA ; [.1C9D.0020.0002.06BA][.FFEF.0000.0000.0000] # ARABIC LETTER NOON GHUNNA +06BA 06BE ; [.1C9D.0020.0002.06BA][.FFF0.0000.0000.0000] # <ARABIC LETTER NOON GHUNNA, ARABIC LETTER HEH DOACHASHMEE> +0648 ; [.1C9D.0020.0002.0648][.FFF1.0000.0000.0000] # ARABIC LETTER WAW +0624 ; [.1C9D.0021.0002.0624][.FFF1.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE +0648 0654 ; [.1C9D.0021.0002.0624][.FFF1.0000.0000.0000] # ARABIC LETTER WAW WITH HAMZA ABOVE +0648 06BE ; [.1C9D.0020.0002.0648][.FFF2.0000.0000.0000] # <ARABIC LETTER WAW, ARABIC LETTER HEH DOACHASHMEE> +06C1 ; [.1C9D.0020.0002.06C1][.FFF3.0000.0000.0000] # ARABIC LETTER HEH GOAL +06C2 ; [.1C9D.0021.0002.06C2][.FFF3.0000.0000.0000] # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE +06C1 0654 ; [.1C9D.0021.0002.06C2][.FFF3.0000.0000.0000] # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE +06BE ; [.1C9D.0020.0002.06BE][.FFF4.0000.0000.0000] # ARABIC LETTER HEH DOACHASHMEE +06C3 ; [.1C9D.0020.0002.06C3][.FFF5.0000.0000.0000] # ARABIC LETTER TEH MARBUTA GOAL +0621 ; [.1C9D.0020.0002.0621][.FFF6.0000.0000.0000] # ARABIC LETTER HAMZA +06CC ; [.1C9D.0020.0002.06CC][.FFF7.0000.0000.0000] # ARABIC LETTER FARSI YEH +0626 ; [.1C9D.0021.0002.0626][.FFF7.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE +064A 0654 ; [.1C9D.0021.0002.0626][.FFF7.0000.0000.0000] # ARABIC LETTER YEH WITH HAMZA ABOVE +06CC 06BE ; [.1C9D.0020.0002.06CC][.FFF8.0000.0000.0000] # <ARABIC LETTER FARSI YEH, ARABIC LETTER HEH DOACHASHMEE> +06D2 ; [.1C9D.0020.0002.06D2][.FFF9.0000.0000.0000] # ARABIC LETTER YEH BARREE +06D3 ; [.1C9D.0021.0002.06D3][.FFF9.0000.0000.0000] # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D2 0654 ; [.1C9D.0021.0002.06D3][.FFF9.0000.0000.0000] # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +066E ; [.1C9D.0020.0002.066E][.FFFA.0000.0000.0000] # ARABIC LETTER DOTLESS BEH 0652 ; [.0000.00AF.0002.0652] # ARABIC SUKUN 064E ; [.0000.00B0.0002.064E] # ARABIC FATHA 0650 ; [.0000.00B1.0002.0650] # ARABIC KASRA diff --git a/cpan/Unicode-Collate/README b/cpan/Unicode-Collate/README index 5b257a8820..3ec9dbc50b 100644 --- a/cpan/Unicode-Collate/README +++ b/cpan/Unicode-Collate/README @@ -1,4 +1,4 @@ -Unicode/Collate version 0.95 +Unicode/Collate version 0.96 =============================== NAME diff --git a/cpan/Unicode-Collate/t/default.t b/cpan/Unicode-Collate/t/default.t index bec9c294fd..14a96937e6 100644 --- a/cpan/Unicode-Collate/t/default.t +++ b/cpan/Unicode-Collate/t/default.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..60\n"; } +BEGIN { $| = 1; print "1..70\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -40,8 +40,6 @@ my $acute = _pack_U(0x0301); my $hiragana = "\x{3042}\x{3044}"; my $katakana = "\x{30A2}\x{30A4}"; -##### 2..11 - my $Collator = Unicode::Collate->new( normalization => undef, ); @@ -53,6 +51,8 @@ ok($Collator->version(), Unicode::Collate->Base_Unicode_Version); ok($Collator->version, $Collator->Base_Unicode_Version); ok($Collator->version(), $Collator->Base_Unicode_Version()); +# 6 + ok($Collator->cmp("", ""), 0); ok($Collator->eq("", "")); ok($Collator->cmp("", "perl"), -1); @@ -67,7 +67,7 @@ ok( join(':', qw/ ACA ACHA ACIA ACKA ADA / ), ); -##### 12..22 +# 11 ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1) ok($Collator->cmp($a_acute, $A_acute), -1); @@ -85,7 +85,7 @@ ok($Collator->lt("A", $A_acute)); ok($Collator->lt("A", $a_acute)); ok($Collator->lt($a_acute, $A_acute)); -##### 23..29 +# 22 $Collator->change(level => 2); @@ -98,7 +98,7 @@ ok( $Collator->cmp($hiragana, $katakana), 0); ok( $Collator->eq($hiragana, $katakana) ); ok( $Collator->ge($hiragana, $katakana) ); -##### 30..35 +# 29 # hangul ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") ); @@ -108,7 +108,7 @@ ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") ); ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") ); ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana -##### 36..44 +# 35 $Collator->change(%old_level, katakana_before_hiragana => 1); @@ -123,7 +123,7 @@ ok( $Collator->ne($hiragana, $katakana) ); ok( $Collator->gt($hiragana, $katakana) ); ok( $Collator->ge($hiragana, $katakana) ); -##### 45..50 +# 44 $Collator->change(upper_before_lower => 1); @@ -134,14 +134,14 @@ ok( $Collator->cmp($hiragana, $katakana), 1); ok( $Collator->ge($hiragana, $katakana), 1); ok( $Collator->gt($hiragana, $katakana), 1); -##### 51..52 +# 50 $Collator->change(katakana_before_hiragana => 0); ok( $Collator->cmp("abc", "ABC"), 1); ok( $Collator->cmp($hiragana, $katakana), -1); -##### 53..54 +# 52 $Collator->change(upper_before_lower => 0); @@ -150,19 +150,44 @@ ok( $Collator->le("abc", "ABC") ); ok( $Collator->cmp($hiragana, $katakana), -1); ok( $Collator->lt($hiragana, $katakana) ); -##### 55..60 +# 56 $Collator->change(level => 1); -my $SupCyril = Unicode::Collate->new( +my $Tailored = Unicode::Collate->new( normalization => undef, suppress => [0x400..0x4FF], level => 1, + entry => '0000 ; [.FFFE.0020.0005.0000]', ); # Ka vs Kje ok($Collator->gt("\x{45C}", "\x{43A}")); ok($Collator->gt("\x{40C}", "\x{41A}")); -ok($SupCyril->gt("\x{45C}", "\x{43A}")); -ok($SupCyril->gt("\x{40C}", "\x{41A}")); +ok($Tailored->gt("\x{45C}", "\x{43A}")); +ok($Tailored->gt("\x{40C}", "\x{41A}")); + +# 60 + +ok($Collator->eq("abc\0", "abc")); +ok($Tailored->gt("abc\0", "abc\x{4E00}")); +ok($Tailored->gt("abc\0", "abc\x{FFFD}")); +ok($Tailored->gt("abc\0", "abc\x{FFFD}")); + +# 64 + +$Tailored->change(UCA_Version => 9); + +ok($Tailored->gt("abc\0", "abc\x{4E00}")); +ok($Tailored->gt("abc\0", "abc\x{FFFD}")); +ok($Tailored->gt("abc\0", "abc\x{FFFD}")); + +# 67 + +$Tailored->change(UCA_Version => 8); + +ok($Tailored->gt("abc\0", "abc\x{4E00}")); +ok($Tailored->gt("abc\0", "abc\x{FFFD}")); +ok($Tailored->gt("abc\0", "abc\x{FFFD}")); +# 70 diff --git a/cpan/Unicode-Collate/t/loc_as.t b/cpan/Unicode-Collate/t/loc_as.t index 5fc9527571..e188f64078 100644 --- a/cpan/Unicode-Collate/t/loc_as.t +++ b/cpan/Unicode-Collate/t/loc_as.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..10\n"; } +BEGIN { $| = 1; print "1..24\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -37,13 +37,22 @@ ok($objAs->getlocale, 'as'); $objAs->change(level => 1); -ok($objAs->lt("\x{994}", "\x{982}")); -ok($objAs->lt("\x{982}", "\x{981}")); -ok($objAs->lt("\x{981}", "\x{983}")); -ok($objAs->lt("\x{983}", "\x{995}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objAs->change(highestFFFF => 1) if $h; -ok($objAs->lt("\x{9A3}","\x{9A4}\x{9CD}\x{200D}")); -ok($objAs->lt("\x{9A4}\x{9CD}\x{200D}","\x{9A4}")); + ok($objAs->lt("\x{993}$t", "\x{994}")); + ok($objAs->lt("\x{994}$t", "\x{982}")); + ok($objAs->lt("\x{982}$t", "\x{981}")); + ok($objAs->lt("\x{981}$t", "\x{983}")); + ok($objAs->lt("\x{983}$t", "\x{995}")); -ok($objAs->lt("\x{9B9}", "\x{995}\x{9CD}\x{9B7}")); -ok($objAs->lt("\x{995}\x{9CD}\x{9B7}", "\x{9BD}")); + ok($objAs->lt("\x{9A2}$t", "\x{9A3}")); + ok($objAs->lt("\x{9A3}$t", "\x{9A4}\x{9CD}\x{200D}")); + ok($objAs->lt("\x{9A4}\x{9CD}\x{200D}$t", "\x{9A4}")); + + ok($objAs->lt("\x{9B8}$t", "\x{9B9}")); + ok($objAs->lt("\x{9B9}$t", "\x{995}\x{9CD}\x{9B7}")); + ok($objAs->lt("\x{995}\x{9CD}\x{9B7}$t", "\x{9BD}")); +} diff --git a/cpan/Unicode-Collate/t/loc_bn.t b/cpan/Unicode-Collate/t/loc_bn.t index 50c6abc75c..f0fd826829 100644 --- a/cpan/Unicode-Collate/t/loc_bn.t +++ b/cpan/Unicode-Collate/t/loc_bn.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..6\n"; } +BEGIN { $| = 1; print "1..12\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -37,8 +37,14 @@ ok($objBn->getlocale, 'bn'); $objBn->change(level => 1); -ok($objBn->lt("\x{994}", "\x{982}")); -ok($objBn->lt("\x{982}", "\x{983}")); -ok($objBn->lt("\x{983}", "\x{981}")); -ok($objBn->lt("\x{981}", "\x{995}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objBn->change(highestFFFF => 1) if $h; + ok($objBn->lt("\x{993}$t", "\x{994}")); + ok($objBn->lt("\x{994}$t", "\x{982}")); + ok($objBn->lt("\x{982}$t", "\x{983}")); + ok($objBn->lt("\x{983}$t", "\x{981}")); + ok($objBn->lt("\x{981}$t", "\x{995}")); +} diff --git a/cpan/Unicode-Collate/t/loc_fa.t b/cpan/Unicode-Collate/t/loc_fa.t index 1ac1b044c3..94bcede172 100644 --- a/cpan/Unicode-Collate/t/loc_fa.t +++ b/cpan/Unicode-Collate/t/loc_fa.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..81\n"; } +BEGIN { $| = 1; print "1..90\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -35,57 +35,28 @@ my $objFa = Unicode::Collate::Locale-> ok($objFa->getlocale, 'fa'); -$objFa->change(level => 1); - -ok($objFa->lt("\x{622}", "\x{627}")); -ok($objFa->lt("\x{627}", "\x{621}")); -ok($objFa->lt("\x{621}", "\x{66E}")); - -ok($objFa->lt("\x{6CF}", "\x{647}")); -ok($objFa->lt("\x{647}", "\x{778}")); - -# 7 - -ok($objFa->eq("\x{64E}", "\x{650}")); -ok($objFa->eq("\x{650}", "\x{64F}")); -ok($objFa->eq("\x{64F}", "\x{64B}")); -ok($objFa->eq("\x{64B}", "\x{64D}")); -ok($objFa->eq("\x{64D}", "\x{64C}")); - -ok($objFa->eq("\x{627}", "\x{671}")); +$objFa->change(level => 3); -ok($objFa->eq("\x{621}", "\x{623}")); -ok($objFa->eq("\x{623}", "\x{672}")); -ok($objFa->eq("\x{672}", "\x{625}")); -ok($objFa->eq("\x{625}", "\x{673}")); -ok($objFa->eq("\x{673}", "\x{624}")); -ok($objFa->eq("\x{624}", "\x{6CC}\x{654}")); +ok($objFa->eq("\x{622}", "\x{627}\x{653}")); +ok($objFa->eq("\x{623}", "\x{627}\x{654}")); +ok($objFa->eq("\x{625}", "\x{627}\x{655}")); +ok($objFa->eq("\x{624}", "\x{648}\x{654}")); +ok($objFa->eq("\x{626}", "\x{64A}\x{654}")); +ok($objFa->eq("\x{6C2}", "\x{6C1}\x{654}")); +ok($objFa->eq("\x{6C0}", "\x{6D5}\x{654}")); +ok($objFa->eq("\x{6D3}", "\x{6D2}\x{654}")); -ok($objFa->eq("\x{6A9}", "\x{6AA}")); -ok($objFa->eq("\x{6AA}", "\x{6AB}")); -ok($objFa->eq("\x{6AB}", "\x{643}")); -ok($objFa->eq("\x{643}", "\x{6AC}")); -ok($objFa->eq("\x{6AC}", "\x{6AD}")); -ok($objFa->eq("\x{6AD}", "\x{6AE}")); +# 10 -ok($objFa->eq("\x{647}", "\x{6D5}")); -ok($objFa->eq("\x{6D5}", "\x{6C1}")); -ok($objFa->eq("\x{6C1}", "\x{629}")); -ok($objFa->eq("\x{629}", "\x{6C3}")); -ok($objFa->eq("\x{6C3}", "\x{6C0}")); -ok($objFa->eq("\x{6C0}", "\x{6BE}")); +ok($objFa->lt("\x{6CC}\x{654}", "\x{649}\x{654}")); +ok($objFa->lt("\x{649}\x{654}", "\x{626}")); -ok($objFa->eq("\x{6CC}", "\x{649}")); -ok($objFa->eq("\x{649}", "\x{6D2}")); -ok($objFa->eq("\x{6D2}", "\x{64A}")); -ok($objFa->eq("\x{64A}", "\x{6D0}")); -ok($objFa->eq("\x{6D0}", "\x{6D1}")); -ok($objFa->eq("\x{6D1}", "\x{6CD}")); -ok($objFa->eq("\x{6CD}", "\x{6CE}")); +$objFa->change(level => 2); -# 38 +ok($objFa->eq("\x{6CC}\x{654}", "\x{649}\x{654}")); +ok($objFa->eq("\x{649}\x{654}", "\x{626}")); -$objFa->change(level => 2); +# 14 ok($objFa->lt("\x{64E}", "\x{650}")); ok($objFa->lt("\x{650}", "\x{64F}")); @@ -124,27 +95,61 @@ ok($objFa->lt("\x{6D0}", "\x{6D1}")); ok($objFa->lt("\x{6D1}", "\x{6CD}")); ok($objFa->lt("\x{6CD}", "\x{6CE}")); -# 69 +# 45 -ok($objFa->eq("\x{6CC}\x{654}", "\x{649}\x{654}")); -ok($objFa->eq("\x{649}\x{654}", "\x{626}")); +$objFa->change(level => 1); -# 71 +ok($objFa->eq("\x{64E}", "\x{650}")); +ok($objFa->eq("\x{650}", "\x{64F}")); +ok($objFa->eq("\x{64F}", "\x{64B}")); +ok($objFa->eq("\x{64B}", "\x{64D}")); +ok($objFa->eq("\x{64D}", "\x{64C}")); -$objFa->change(level => 3); +ok($objFa->eq("\x{627}", "\x{671}")); -ok($objFa->lt("\x{6CC}\x{654}", "\x{649}\x{654}")); -ok($objFa->lt("\x{649}\x{654}", "\x{626}")); +ok($objFa->eq("\x{621}", "\x{623}")); +ok($objFa->eq("\x{623}", "\x{672}")); +ok($objFa->eq("\x{672}", "\x{625}")); +ok($objFa->eq("\x{625}", "\x{673}")); +ok($objFa->eq("\x{673}", "\x{624}")); +ok($objFa->eq("\x{624}", "\x{6CC}\x{654}")); -# 73 +ok($objFa->eq("\x{6A9}", "\x{6AA}")); +ok($objFa->eq("\x{6AA}", "\x{6AB}")); +ok($objFa->eq("\x{6AB}", "\x{643}")); +ok($objFa->eq("\x{643}", "\x{6AC}")); +ok($objFa->eq("\x{6AC}", "\x{6AD}")); +ok($objFa->eq("\x{6AD}", "\x{6AE}")); -ok($objFa->eq("\x{622}", "\x{627}\x{653}")); -ok($objFa->eq("\x{623}", "\x{627}\x{654}")); -ok($objFa->eq("\x{625}", "\x{627}\x{655}")); -ok($objFa->eq("\x{624}", "\x{648}\x{654}")); -ok($objFa->eq("\x{626}", "\x{64A}\x{654}")); -ok($objFa->eq("\x{6C2}", "\x{6C1}\x{654}")); -ok($objFa->eq("\x{6C0}", "\x{6D5}\x{654}")); -ok($objFa->eq("\x{6D3}", "\x{6D2}\x{654}")); +ok($objFa->eq("\x{647}", "\x{6D5}")); +ok($objFa->eq("\x{6D5}", "\x{6C1}")); +ok($objFa->eq("\x{6C1}", "\x{629}")); +ok($objFa->eq("\x{629}", "\x{6C3}")); +ok($objFa->eq("\x{6C3}", "\x{6C0}")); +ok($objFa->eq("\x{6C0}", "\x{6BE}")); -# 81 +ok($objFa->eq("\x{6CC}", "\x{649}")); +ok($objFa->eq("\x{649}", "\x{6D2}")); +ok($objFa->eq("\x{6D2}", "\x{64A}")); +ok($objFa->eq("\x{64A}", "\x{6D0}")); +ok($objFa->eq("\x{6D0}", "\x{6D1}")); +ok($objFa->eq("\x{6D1}", "\x{6CD}")); +ok($objFa->eq("\x{6CD}", "\x{6CE}")); + +# 76 + +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objFa->change(highestFFFF => 1) if $h; + + ok($objFa->lt("\x{8AC}$t", "\x{622}")); + ok($objFa->lt("\x{622}$t", "\x{627}")); + ok($objFa->lt("\x{627}$t", "\x{621}")); + ok($objFa->lt("\x{621}$t", "\x{66E}")); + ok($objFa->lt("\x{66E}$t", "\x{628}")); + + ok($objFa->lt("\x{6CF}$t", "\x{647}")); + ok($objFa->lt("\x{647}$t", "\x{778}")); +} +# 90 diff --git a/cpan/Unicode-Collate/t/loc_gu.t b/cpan/Unicode-Collate/t/loc_gu.t index 7b986620a9..4b8732ab64 100644 --- a/cpan/Unicode-Collate/t/loc_gu.t +++ b/cpan/Unicode-Collate/t/loc_gu.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..7\n"; } +BEGIN { $| = 1; print "1..10\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -35,15 +35,24 @@ my $objGu = Unicode::Collate::Locale-> ok($objGu->getlocale, 'gu'); -$objGu->change(level => 1); +$objGu->change(level => 2); -ok($objGu->lt("\x{AD0}", "\x{A82}")); -ok($objGu->lt("\x{A82}", "\x{A83}")); -ok($objGu->lt("\x{A83}", "\x{A85}")); +ok($objGu->lt("\x{A82}", "\x{A81}")); + +$objGu->change(level => 1); ok($objGu->eq("\x{A82}", "\x{A81}")); -$objGu->change(level => 2); +# 4 -ok($objGu->lt("\x{A82}", "\x{A81}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objGu->change(highestFFFF => 1) if $h; + + ok($objGu->lt("\x{AD0}$t", "\x{A82}")); + ok($objGu->lt("\x{A82}$t", "\x{A83}")); + ok($objGu->lt("\x{A83}$t", "\x{A85}")); +} +# 10 diff --git a/cpan/Unicode-Collate/t/loc_hi.t b/cpan/Unicode-Collate/t/loc_hi.t index eadca6fb35..847722d77e 100644 --- a/cpan/Unicode-Collate/t/loc_hi.t +++ b/cpan/Unicode-Collate/t/loc_hi.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..7\n"; } +BEGIN { $| = 1; print "1..10\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -35,15 +35,24 @@ my $objHi = Unicode::Collate::Locale-> ok($objHi->getlocale, 'hi'); -$objHi->change(level => 1); +$objHi->change(level => 2); -ok($objHi->lt("\x{950}", "\x{902}")); -ok($objHi->lt("\x{902}", "\x{903}")); -ok($objHi->lt("\x{903}", "\x{972}")); +ok($objHi->lt("\x{902}", "\x{901}")); + +$objHi->change(level => 1); ok($objHi->eq("\x{902}", "\x{901}")); -$objHi->change(level => 2); +# 4 -ok($objHi->lt("\x{902}", "\x{901}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objHi->change(highestFFFF => 1) if $h; + + ok($objHi->lt("\x{950}$t", "\x{902}")); + ok($objHi->lt("\x{902}$t", "\x{903}")); + ok($objHi->lt("\x{903}$t", "\x{972}")); +} +# 10 diff --git a/cpan/Unicode-Collate/t/loc_hy.t b/cpan/Unicode-Collate/t/loc_hy.t index 8719fa5d94..3ff93e336c 100644 --- a/cpan/Unicode-Collate/t/loc_hy.t +++ b/cpan/Unicode-Collate/t/loc_hy.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..13\n"; } +BEGIN { $| = 1; print "1..25\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -60,6 +60,8 @@ ok($objHy->gt("\x{587}", "\x{535}\x{582}")); # 10 +$objHy->change(level => 1); + $objHy->change(UCA_Version => 8); ok($objHy->lt("\x{584}\x{4E00}", "\x{587}")); @@ -67,3 +69,21 @@ ok($objHy->lt("\x{584}\x{20000}", "\x{587}")); ok($objHy->lt("\x{584}\x{10FFFD}","\x{587}")); # 13 + +$objHy->change(UCA_Version => 22); + +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objHy->change(highestFFFF => 1) if $h; + + ok($objHy->lt("\x{583}$t", "\x{584}")); + ok($objHy->lt("\x{584}$t", "\x{587}")); + ok($objHy->lt("\x{587}$t", "\x{585}")); + + ok($objHy->lt("\x{553}$t", "\x{554}")); + ok($objHy->lt("\x{554}$t", "\x{535}\x{582}")); + ok($objHy->lt("\x{535}\x{582}$t", "\x{555}")); +} + +# 25 diff --git a/cpan/Unicode-Collate/t/loc_kn.t b/cpan/Unicode-Collate/t/loc_kn.t index 6ac2f52a46..8d2f39eea3 100644 --- a/cpan/Unicode-Collate/t/loc_kn.t +++ b/cpan/Unicode-Collate/t/loc_kn.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..7\n"; } +BEGIN { $| = 1; print "1..14\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -37,9 +37,16 @@ ok($objKn->getlocale, 'kn'); $objKn->change(level => 1); -ok($objKn->lt("\x{C94}", "\x{C82}")); -ok($objKn->lt("\x{C82}", "\x{C83}")); -ok($objKn->lt("\x{C83}", "\x{CF1}")); -ok($objKn->lt("\x{CF1}", "\x{CF2}")); -ok($objKn->lt("\x{CF2}", "\x{C95}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objKn->change(highestFFFF => 1) if $h; + + ok($objKn->lt("\x{C93}$t", "\x{C94}")); + ok($objKn->lt("\x{C94}$t", "\x{C82}")); + ok($objKn->lt("\x{C82}$t", "\x{C83}")); + ok($objKn->lt("\x{C83}$t", "\x{CF1}")); + ok($objKn->lt("\x{CF1}$t", "\x{CF2}")); + ok($objKn->lt("\x{CF2}$t", "\x{C95}")); +} diff --git a/cpan/Unicode-Collate/t/loc_kok.t b/cpan/Unicode-Collate/t/loc_kok.t index b2f23a18d3..4581bdccf1 100644 --- a/cpan/Unicode-Collate/t/loc_kok.t +++ b/cpan/Unicode-Collate/t/loc_kok.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..13\n"; } +BEGIN { $| = 1; print "1..21\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -35,29 +35,35 @@ my $objKok = Unicode::Collate::Locale-> ok($objKok->getlocale, 'kok'); -$objKok->change(level => 1); +$objKok->change(level => 2); -ok($objKok->lt("\x{950}", "\x{902}")); -ok($objKok->lt("\x{902}", "\x{903}")); -ok($objKok->lt("\x{903}", "\x{972}")); +ok($objKok->lt("\x{902}", "\x{901}")); +ok($objKok->lt("\x{933}", "\x{934}")); -ok($objKok->eq("\x{902}", "\x{901}")); +$objKok->change(level => 3); -ok($objKok->lt("\x{939}", "\x{933}")); -ok($objKok->lt("\x{933}", "\x{915}\x{94D}\x{937}")); -ok($objKok->lt("\x{915}\x{94D}\x{937}", "\x{93D}")); +ok($objKok->eq("\x{933}\x{93C}", "\x{934}")); -ok($objKok->eq("\x{933}", "\x{934}")); +$objKok->change(level => 1); -# 10 +ok($objKok->eq("\x{902}", "\x{901}")); +ok($objKok->eq("\x{933}", "\x{934}")); -$objKok->change(level => 2); +# 7 -ok($objKok->lt("\x{902}", "\x{901}")); -ok($objKok->lt("\x{933}", "\x{934}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objKok->change(highestFFFF => 1) if $h; -$objKok->change(level => 3); + ok($objKok->lt("\x{950}$t", "\x{902}")); + ok($objKok->lt("\x{902}$t", "\x{903}")); + ok($objKok->lt("\x{903}$t", "\x{972}")); -ok($objKok->eq("\x{933}\x{93C}", "\x{934}")); + ok($objKok->lt("\x{938}$t", "\x{939}")); + ok($objKok->lt("\x{939}$t", "\x{933}")); + ok($objKok->lt("\x{933}$t", "\x{915}\x{94D}\x{937}")); + ok($objKok->lt("\x{915}\x{94D}\x{937}$t", "\x{93D}")); +} -# 13 +# 21 diff --git a/cpan/Unicode-Collate/t/loc_mr.t b/cpan/Unicode-Collate/t/loc_mr.t index dd66b4ee85..8c601f0027 100644 --- a/cpan/Unicode-Collate/t/loc_mr.t +++ b/cpan/Unicode-Collate/t/loc_mr.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..14\n"; } +BEGIN { $| = 1; print "1..23\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -35,30 +35,36 @@ my $objMr = Unicode::Collate::Locale-> ok($objMr->getlocale, 'mr'); -$objMr->change(level => 1); +$objMr->change(level => 2); -ok($objMr->lt("\x{950}", "\x{902}")); -ok($objMr->lt("\x{902}", "\x{903}")); -ok($objMr->lt("\x{903}", "\x{972}")); +ok($objMr->lt("\x{902}", "\x{901}")); +ok($objMr->lt("\x{933}", "\x{934}")); -ok($objMr->eq("\x{902}", "\x{901}")); +$objMr->change(level => 3); -ok($objMr->lt("\x{939}", "\x{933}")); -ok($objMr->lt("\x{933}", "\x{915}\x{94D}\x{937}")); -ok($objMr->lt("\x{915}\x{94D}\x{937}", "\x{91C}\x{94D}\x{91E}")); -ok($objMr->lt("\x{91C}\x{94D}\x{91E}", "\x{93D}")); +ok($objMr->eq("\x{933}\x{93C}", "\x{934}")); -ok($objMr->eq("\x{933}", "\x{934}")); +$objMr->change(level => 1); -# 11 +ok($objMr->eq("\x{902}", "\x{901}")); +ok($objMr->eq("\x{933}", "\x{934}")); -$objMr->change(level => 2); +# 7 -ok($objMr->lt("\x{902}", "\x{901}")); -ok($objMr->lt("\x{933}", "\x{934}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objMr->change(highestFFFF => 1) if $h; -$objMr->change(level => 3); + ok($objMr->lt("\x{950}$t", "\x{902}")); + ok($objMr->lt("\x{902}$t", "\x{903}")); + ok($objMr->lt("\x{903}$t", "\x{972}")); -ok($objMr->eq("\x{933}\x{93C}", "\x{934}")); + ok($objMr->lt("\x{938}$t", "\x{939}")); + ok($objMr->lt("\x{939}$t", "\x{933}")); + ok($objMr->lt("\x{933}$t", "\x{915}\x{94D}\x{937}")); + ok($objMr->lt("\x{915}\x{94D}\x{937}$t", "\x{91C}\x{94D}\x{91E}")); + ok($objMr->lt("\x{91C}\x{94D}\x{91E}$t", "\x{93D}")); +} -# 14 +# 23 diff --git a/cpan/Unicode-Collate/t/loc_or.t b/cpan/Unicode-Collate/t/loc_or.t index ef5bf6a6a8..f29e615eed 100644 --- a/cpan/Unicode-Collate/t/loc_or.t +++ b/cpan/Unicode-Collate/t/loc_or.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..10\n"; } +BEGIN { $| = 1; print "1..20\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -35,19 +35,30 @@ my $objOr = Unicode::Collate::Locale-> ok($objOr->getlocale, 'or'); -$objOr->change(level => 1); +$objOr->change(level => 2); -ok($objOr->lt("\x{B14}", "\x{B01}")); -ok($objOr->lt("\x{B01}", "\x{B02}")); -ok($objOr->lt("\x{B02}", "\x{B03}")); -ok($objOr->lt("\x{B03}", "\x{B15}")); +ok($objOr->lt("\x{B2F}", "\x{B5F}")); -ok($objOr->lt("\x{B39}", "\x{B15}\x{B4D}\x{B37}")); -ok($objOr->gt("\x{B3D}", "\x{B15}\x{B4D}\x{B37}")); +$objOr->change(level => 1); ok($objOr->eq("\x{B2F}", "\x{B5F}")); -$objOr->change(level => 2); +# 4 -ok($objOr->lt("\x{B2F}", "\x{B5F}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objOr->change(highestFFFF => 1) if $h; + + ok($objOr->lt("\x{B13}$t", "\x{B14}")); + ok($objOr->lt("\x{B14}$t", "\x{B01}")); + ok($objOr->lt("\x{B01}$t", "\x{B02}")); + ok($objOr->lt("\x{B02}$t", "\x{B03}")); + ok($objOr->lt("\x{B03}$t", "\x{B15}")); + + ok($objOr->lt("\x{B38}$t", "\x{B39}")); + ok($objOr->lt("\x{B39}$t", "\x{B15}\x{B4D}\x{B37}")); + ok($objOr->lt("\x{B15}\x{B4D}\x{B37}$t", "\x{B3D}")); +} +# 20 diff --git a/cpan/Unicode-Collate/t/loc_sa.t b/cpan/Unicode-Collate/t/loc_sa.t index 64e714bcbe..41f344eace 100644 --- a/cpan/Unicode-Collate/t/loc_sa.t +++ b/cpan/Unicode-Collate/t/loc_sa.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..14\n"; } +BEGIN { $| = 1; print "1..23\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -35,30 +35,36 @@ my $objSa = Unicode::Collate::Locale-> ok($objSa->getlocale, 'sa'); -$objSa->change(level => 1); +$objSa->change(level => 2); -ok($objSa->lt("\x{950}", "\x{902}")); -ok($objSa->lt("\x{902}", "\x{903}")); -ok($objSa->lt("\x{903}", "\x{972}")); +ok($objSa->lt("\x{902}", "\x{901}")); +ok($objSa->lt("\x{933}", "\x{934}")); -ok($objSa->eq("\x{902}", "\x{901}")); +$objSa->change(level => 3); -ok($objSa->lt("\x{939}", "\x{933}")); -ok($objSa->lt("\x{933}", "\x{915}\x{94D}\x{937}")); -ok($objSa->lt("\x{915}\x{94D}\x{937}", "\x{91C}\x{94D}\x{91E}")); -ok($objSa->lt("\x{91C}\x{94D}\x{91E}", "\x{93D}")); +ok($objSa->eq("\x{933}\x{93C}", "\x{934}")); -ok($objSa->eq("\x{933}", "\x{934}")); +$objSa->change(level => 1); -# 11 +ok($objSa->eq("\x{902}", "\x{901}")); +ok($objSa->eq("\x{933}", "\x{934}")); -$objSa->change(level => 2); +# 7 -ok($objSa->lt("\x{902}", "\x{901}")); -ok($objSa->lt("\x{933}", "\x{934}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objSa->change(highestFFFF => 1) if $h; -$objSa->change(level => 3); + ok($objSa->lt("\x{950}$t", "\x{902}")); + ok($objSa->lt("\x{902}$t", "\x{903}")); + ok($objSa->lt("\x{903}$t", "\x{972}")); -ok($objSa->eq("\x{933}\x{93C}", "\x{934}")); + ok($objSa->lt("\x{938}$t", "\x{939}")); + ok($objSa->lt("\x{939}$t", "\x{933}")); + ok($objSa->lt("\x{933}$t", "\x{915}\x{94D}\x{937}")); + ok($objSa->lt("\x{915}\x{94D}\x{937}$t", "\x{91C}\x{94D}\x{91E}")); + ok($objSa->lt("\x{91C}\x{94D}\x{91E}$t", "\x{93D}")); +} -# 14 +# 23 diff --git a/cpan/Unicode-Collate/t/loc_si.t b/cpan/Unicode-Collate/t/loc_si.t index 0a9d3bb507..adbb40169b 100644 --- a/cpan/Unicode-Collate/t/loc_si.t +++ b/cpan/Unicode-Collate/t/loc_si.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..8\n"; } +BEGIN { $| = 1; print "1..16\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -37,11 +37,18 @@ ok($objSi->getlocale, 'si'); $objSi->change(level => 1); -ok($objSi->lt("\x{D96}", "\x{D82}")); -ok($objSi->lt("\x{D82}", "\x{D83}")); -ok($objSi->lt("\x{D83}", "\x{D9A}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objSi->change(highestFFFF => 1) if $h; -ok($objSi->lt("\x{DA3}", "\x{DA5}")); -ok($objSi->lt("\x{DA5}", "\x{DA4}")); -ok($objSi->lt("\x{DA4}", "\x{DA6}")); + ok($objSi->lt("\x{D95}$t", "\x{D96}")); + ok($objSi->lt("\x{D96}$t", "\x{D82}")); + ok($objSi->lt("\x{D82}$t", "\x{D83}")); + ok($objSi->lt("\x{D83}$t", "\x{D9A}")); + + ok($objSi->lt("\x{DA3}$t", "\x{DA5}")); + ok($objSi->lt("\x{DA5}$t", "\x{DA4}")); + ok($objSi->lt("\x{DA4}$t", "\x{DA6}")); +} diff --git a/cpan/Unicode-Collate/t/loc_sidt.t b/cpan/Unicode-Collate/t/loc_sidt.t index 980edd894a..f5cbc54680 100644 --- a/cpan/Unicode-Collate/t/loc_sidt.t +++ b/cpan/Unicode-Collate/t/loc_sidt.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..9\n"; } +BEGIN { $| = 1; print "1..14\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -35,17 +35,28 @@ my $objSiDict = Unicode::Collate::Locale-> ok($objSiDict->getlocale, 'si__dictionary'); -$objSiDict->change(level => 1); +$objSiDict->change(level => 2); -ok($objSiDict->lt("\x{D96}", "\x{D82}")); -ok($objSiDict->lt("\x{D82}", "\x{D83}")); -ok($objSiDict->lt("\x{D83}", "\x{D9A}")); +ok($objSiDict->gt("\x{DA5}", "\x{DA2}\x{DCA}\x{DA4}")); + +$objSiDict->change(level => 1); -ok($objSiDict->gt("\x{DA5}", "\x{DA2}")); ok($objSiDict->eq("\x{DA5}", "\x{DA2}\x{DCA}\x{DA4}")); + +ok($objSiDict->lt("\x{DA2}", "\x{DA5}")); ok($objSiDict->lt("\x{DA5}", "\x{DA3}")); -$objSiDict->change(level => 2); +# 6 -ok($objSiDict->gt("\x{DA5}", "\x{DA2}\x{DCA}\x{DA4}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objSiDict->change(highestFFFF => 1) if $h; + + ok($objSiDict->lt("\x{D95}$t", "\x{D96}")); + ok($objSiDict->lt("\x{D96}$t", "\x{D82}")); + ok($objSiDict->lt("\x{D82}$t", "\x{D83}")); + ok($objSiDict->lt("\x{D83}$t", "\x{D9A}")); +} +# 14 diff --git a/cpan/Unicode-Collate/t/loc_ta.t b/cpan/Unicode-Collate/t/loc_ta.t index 6341829038..fa84bd89fc 100644 --- a/cpan/Unicode-Collate/t/loc_ta.t +++ b/cpan/Unicode-Collate/t/loc_ta.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..52\n"; } +BEGIN { $| = 1; print "1..104\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -31,6 +31,7 @@ ok(1); ######################### my $Kssa = "\x{B95}\x{BCD}\x{BB7}"; +my $v = "\x{BCD}"; my $objTa = Unicode::Collate::Locale-> new(locale => 'TA', normalization => undef); @@ -39,55 +40,60 @@ ok($objTa->getlocale, 'ta'); $objTa->change(level => 1); -ok($objTa->lt("\x{B94}", "\x{B82}")); -ok($objTa->gt("\x{B83}", "\x{B82}")); -ok($objTa->lt("\x{B83}", "\x{B95}\x{BCD}")); -ok($objTa->gt("\x{B95}", "\x{B95}\x{BCD}")); -ok($objTa->lt("\x{B95}", "\x{B99}\x{BCD}")); -ok($objTa->gt("\x{B99}", "\x{B99}\x{BCD}")); -ok($objTa->lt("\x{B99}", "\x{B9A}\x{BCD}")); -ok($objTa->gt("\x{B9A}", "\x{B9A}\x{BCD}")); -ok($objTa->lt("\x{B9A}", "\x{B9E}\x{BCD}")); -ok($objTa->gt("\x{B9E}", "\x{B9E}\x{BCD}")); -ok($objTa->lt("\x{B9E}", "\x{B9F}\x{BCD}")); -ok($objTa->gt("\x{B9F}", "\x{B9F}\x{BCD}")); -ok($objTa->lt("\x{B9F}", "\x{BA3}\x{BCD}")); -ok($objTa->gt("\x{BA3}", "\x{BA3}\x{BCD}")); -ok($objTa->lt("\x{BA3}", "\x{BA4}\x{BCD}")); -ok($objTa->gt("\x{BA4}", "\x{BA4}\x{BCD}")); -ok($objTa->lt("\x{BA4}", "\x{BA8}\x{BCD}")); -ok($objTa->gt("\x{BA8}", "\x{BA8}\x{BCD}")); -ok($objTa->lt("\x{BA8}", "\x{BAA}\x{BCD}")); -ok($objTa->gt("\x{BAA}", "\x{BAA}\x{BCD}")); -ok($objTa->lt("\x{BAA}", "\x{BAE}\x{BCD}")); -ok($objTa->gt("\x{BAE}", "\x{BAE}\x{BCD}")); -ok($objTa->lt("\x{BAE}", "\x{BAF}\x{BCD}")); -ok($objTa->gt("\x{BAF}", "\x{BAF}\x{BCD}")); -ok($objTa->lt("\x{BAF}", "\x{BB0}\x{BCD}")); -ok($objTa->gt("\x{BB0}", "\x{BB0}\x{BCD}")); -ok($objTa->lt("\x{BB0}", "\x{BB2}\x{BCD}")); -ok($objTa->gt("\x{BB2}", "\x{BB2}\x{BCD}")); -ok($objTa->lt("\x{BB2}", "\x{BB5}\x{BCD}")); -ok($objTa->gt("\x{BB5}", "\x{BB5}\x{BCD}")); -ok($objTa->lt("\x{BB5}", "\x{BB4}\x{BCD}")); -ok($objTa->gt("\x{BB4}", "\x{BB4}\x{BCD}")); -ok($objTa->lt("\x{BB4}", "\x{BB3}\x{BCD}")); -ok($objTa->gt("\x{BB3}", "\x{BB3}\x{BCD}")); -ok($objTa->lt("\x{BB3}", "\x{BB1}\x{BCD}")); -ok($objTa->gt("\x{BB1}", "\x{BB1}\x{BCD}")); -ok($objTa->lt("\x{BB1}", "\x{BA9}\x{BCD}")); -ok($objTa->gt("\x{BA9}", "\x{BA9}\x{BCD}")); -ok($objTa->lt("\x{BA9}", "\x{B9C}\x{BCD}")); -ok($objTa->gt("\x{B9C}", "\x{B9C}\x{BCD}")); -ok($objTa->lt("\x{B9C}", "\x{BB6}\x{BCD}")); -ok($objTa->gt("\x{BB6}", "\x{BB6}\x{BCD}")); -ok($objTa->lt("\x{BB6}", "\x{BB7}\x{BCD}")); -ok($objTa->gt("\x{BB7}", "\x{BB7}\x{BCD}")); -ok($objTa->lt("\x{BB7}", "\x{BB8}\x{BCD}")); -ok($objTa->gt("\x{BB8}", "\x{BB8}\x{BCD}")); -ok($objTa->lt("\x{BB8}", "\x{BB9}\x{BCD}")); -ok($objTa->gt("\x{BB9}", "\x{BB9}\x{BCD}")); -ok($objTa->lt("\x{BB9}", "${Kssa}\x{BCD}")); -ok($objTa->gt("${Kssa}", "${Kssa}\x{BCD}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objTa->change(highestFFFF => 1) if $h; -# 52 + ok($objTa->lt("\x{B94}$t", "\x{B82}")); + ok($objTa->lt("\x{B82}$t", "\x{B83}")); + ok($objTa->lt("\x{B83}$t", "\x{B95}$v")); + ok($objTa->lt("\x{B95}$v$t", "\x{B95}")); + ok($objTa->lt("\x{B95}$t", "\x{B99}$v")); + ok($objTa->lt("\x{B99}$v$t", "\x{B99}")); + ok($objTa->lt("\x{B99}$t", "\x{B9A}$v")); + ok($objTa->lt("\x{B9A}$v$t", "\x{B9A}")); + ok($objTa->lt("\x{B9A}$t", "\x{B9E}$v")); + ok($objTa->lt("\x{B9E}$v$t", "\x{B9E}")); + ok($objTa->lt("\x{B9E}$t", "\x{B9F}$v")); + ok($objTa->lt("\x{B9F}$v$t", "\x{B9F}")); + ok($objTa->lt("\x{B9F}$t", "\x{BA3}$v")); + ok($objTa->lt("\x{BA3}$v$t", "\x{BA3}")); + ok($objTa->lt("\x{BA3}$t", "\x{BA4}$v")); + ok($objTa->lt("\x{BA4}$v$t", "\x{BA4}")); + ok($objTa->lt("\x{BA4}$t", "\x{BA8}$v")); + ok($objTa->lt("\x{BA8}$v$t", "\x{BA8}")); + ok($objTa->lt("\x{BA8}$t", "\x{BAA}$v")); + ok($objTa->lt("\x{BAA}$v$t", "\x{BAA}")); + ok($objTa->lt("\x{BAA}$t", "\x{BAE}$v")); + ok($objTa->lt("\x{BAE}$v$t", "\x{BAE}")); + ok($objTa->lt("\x{BAE}$t", "\x{BAF}$v")); + ok($objTa->lt("\x{BAF}$v$t", "\x{BAF}")); + ok($objTa->lt("\x{BAF}$t", "\x{BB0}$v")); + ok($objTa->lt("\x{BB0}$v$t", "\x{BB0}")); + ok($objTa->lt("\x{BB0}$t", "\x{BB2}$v")); + ok($objTa->lt("\x{BB2}$v$t", "\x{BB2}")); + ok($objTa->lt("\x{BB2}$t", "\x{BB5}$v")); + ok($objTa->lt("\x{BB5}$v$t", "\x{BB5}")); + ok($objTa->lt("\x{BB5}$t", "\x{BB4}$v")); + ok($objTa->lt("\x{BB4}$v$t", "\x{BB4}")); + ok($objTa->lt("\x{BB4}$t", "\x{BB3}$v")); + ok($objTa->lt("\x{BB3}$v$t", "\x{BB3}")); + ok($objTa->lt("\x{BB3}$t", "\x{BB1}$v")); + ok($objTa->lt("\x{BB1}$v$t", "\x{BB1}")); + ok($objTa->lt("\x{BB1}$t", "\x{BA9}$v")); + ok($objTa->lt("\x{BA9}$v$t", "\x{BA9}")); + ok($objTa->lt("\x{BA9}$t", "\x{B9C}$v")); + ok($objTa->lt("\x{B9C}$v$t", "\x{B9C}")); + ok($objTa->lt("\x{B9C}$t", "\x{BB6}$v")); + ok($objTa->lt("\x{BB6}$v$t", "\x{BB6}")); + ok($objTa->lt("\x{BB6}$t", "\x{BB7}$v")); + ok($objTa->lt("\x{BB7}$v$t", "\x{BB7}")); + ok($objTa->lt("\x{BB7}$t", "\x{BB8}$v")); + ok($objTa->lt("\x{BB8}$v$t", "\x{BB8}")); + ok($objTa->lt("\x{BB8}$t", "\x{BB9}$v")); + ok($objTa->lt("\x{BB9}$v$t", "\x{BB9}")); + ok($objTa->lt("\x{BB9}$t", "${Kssa}$v")); + ok($objTa->lt("${Kssa}$v$t", "${Kssa}")); + ok($objTa->lt("${Kssa}$t", "\x{BBE}")); +} diff --git a/cpan/Unicode-Collate/t/loc_te.t b/cpan/Unicode-Collate/t/loc_te.t index 133cabfd31..be11514fbb 100644 --- a/cpan/Unicode-Collate/t/loc_te.t +++ b/cpan/Unicode-Collate/t/loc_te.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..6\n"; } +BEGIN { $| = 1; print "1..12\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -37,8 +37,14 @@ ok($objTe->getlocale, 'te'); $objTe->change(level => 1); -ok($objTe->lt("\x{C14}", "\x{C01}")); -ok($objTe->lt("\x{C01}", "\x{C02}")); -ok($objTe->lt("\x{C02}", "\x{C03}")); -ok($objTe->lt("\x{C03}", "\x{C15}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objTe->change(highestFFFF => 1) if $h; + ok($objTe->lt("\x{C13}$t", "\x{C14}")); + ok($objTe->lt("\x{C14}$t", "\x{C01}")); + ok($objTe->lt("\x{C01}$t", "\x{C02}")); + ok($objTe->lt("\x{C02}$t", "\x{C03}")); + ok($objTe->lt("\x{C03}$t", "\x{C15}")); +} diff --git a/cpan/Unicode-Collate/t/loc_test.t b/cpan/Unicode-Collate/t/loc_test.t index ba643083c3..191334402e 100644 --- a/cpan/Unicode-Collate/t/loc_test.t +++ b/cpan/Unicode-Collate/t/loc_test.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..130\n"; } +BEGIN { $| = 1; print "1..134\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -159,3 +159,21 @@ ok(Unicode::Collate::Locale::_locale('de-phonebk'), 'de__phonebook'); ok(Unicode::Collate::Locale::_locale('de--phonebk'), 'de__phonebook'); # 130 + +my $objEs2 = Unicode::Collate::Locale->new + (normalization => undef, locale => 'ES', + level => 1, + entry => << 'ENTRIES', +0000 ; [.FFFE.0020.0005.0000] +00F1 ; [.0010.0020.0002.00F1] # LATIN SMALL LETTER N WITH TILDE +006E 0303 ; [.0010.0020.0002.00F1] # LATIN SMALL LETTER N WITH TILDE +ENTRIES +); + +ok($objEs2->lt("abc\x{4E00}", "abc\0")); +ok($objEs2->lt("abc\x{FFFD}", "abc\0")); +ok($objEs2->lt("abc\x{FFFD}", "abc\0")); +ok($objEs2->lt("n\x{303}", "N\x{303}")); + +# 134 + diff --git a/cpan/Unicode-Collate/t/loc_th.t b/cpan/Unicode-Collate/t/loc_th.t index 2dd3994091..727b70649a 100644 --- a/cpan/Unicode-Collate/t/loc_th.t +++ b/cpan/Unicode-Collate/t/loc_th.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..47\n"; } +BEGIN { $| = 1; print "1..55\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -97,12 +97,14 @@ ok($objTh->eq("\x{E5B}", "\x{E46}")); $objTh->change(level => 4); -ok($objTh->lt("\x{E4F}", "\x{E2F}")); -ok($objTh->lt("\x{E2F}", "\x{E5A}")); -ok($objTh->lt("\x{E5A}", "\x{E5B}")); -ok($objTh->lt("\x{E5B}", "\x{E46}")); +for my $t ("", "\x{E01}") { + ok($objTh->lt("\x{E4F}$t", "\x{E2F}$t")); + ok($objTh->lt("\x{E2F}$t", "\x{E5A}$t")); + ok($objTh->lt("\x{E5A}$t", "\x{E5B}$t")); + ok($objTh->lt("\x{E5B}$t", "\x{E46}$t")); +} -# 39 +# 43 $objTh->change(level => 1); @@ -111,13 +113,19 @@ ok($objTh->eq("\x{E2F}", "\x{E5A}")); ok($objTh->eq("\x{E5A}", "\x{E5B}")); ok($objTh->eq("\x{E5B}", "\x{E46}")); -# 43 +# 47 $objTh->change(variable => "non-ignorable"); -ok($objTh->lt("\x{E4F}", "\x{E2F}")); -ok($objTh->lt("\x{E2F}", "\x{E5A}")); -ok($objTh->lt("\x{E5A}", "\x{E5B}")); -ok($objTh->lt("\x{E5B}", "\x{E46}")); +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objTh->change(highestFFFF => 1) if $h; -# 47 + ok($objTh->lt("\x{E4F}$t", "\x{E2F}")); + ok($objTh->lt("\x{E2F}$t", "\x{E5A}")); + ok($objTh->lt("\x{E5A}$t", "\x{E5B}")); + ok($objTh->lt("\x{E5B}$t", "\x{E46}")); +} + +# 55 diff --git a/cpan/Unicode-Collate/t/loc_ur.t b/cpan/Unicode-Collate/t/loc_ur.t index d614cd1e3c..e9093e5993 100644 --- a/cpan/Unicode-Collate/t/loc_ur.t +++ b/cpan/Unicode-Collate/t/loc_ur.t @@ -13,7 +13,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..91\n"; } +BEGIN { $| = 1; print "1..166\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -35,77 +35,16 @@ my $objUr = Unicode::Collate::Locale-> ok($objUr->getlocale, 'ur'); -$objUr->change(level => 1); - -ok($objUr->lt("\x{627}", "\x{622}")); -ok($objUr->lt("\x{622}", "\x{628}")); -ok($objUr->lt("\x{628}", "\x{628}\x{6BE}")); -ok($objUr->lt("\x{628}\x{6BE}", "\x{67E}")); -ok($objUr->lt("\x{67E}", "\x{67E}\x{6BE}")); -ok($objUr->lt("\x{67E}\x{6BE}", "\x{62A}")); -ok($objUr->lt("\x{62A}", "\x{62A}\x{6BE}")); -ok($objUr->lt("\x{62A}\x{6BE}", "\x{679}")); -ok($objUr->lt("\x{679}", "\x{679}\x{6BE}")); -ok($objUr->lt("\x{679}\x{6BE}", "\x{62B}")); -ok($objUr->lt("\x{62B}", "\x{62C}")); -ok($objUr->lt("\x{62C}", "\x{62C}\x{6BE}")); -ok($objUr->lt("\x{62C}\x{6BE}", "\x{686}")); -ok($objUr->lt("\x{686}", "\x{686}\x{6BE}")); -ok($objUr->lt("\x{686}\x{6BE}", "\x{62D}")); -ok($objUr->lt("\x{62D}", "\x{62E}")); -ok($objUr->lt("\x{62E}", "\x{62F}")); -ok($objUr->lt("\x{62F}", "\x{62F}\x{6BE}")); -ok($objUr->lt("\x{62F}\x{6BE}", "\x{688}")); -ok($objUr->lt("\x{688}", "\x{688}\x{6BE}")); -ok($objUr->lt("\x{688}\x{6BE}", "\x{630}")); -ok($objUr->lt("\x{630}", "\x{631}")); -ok($objUr->lt("\x{631}", "\x{631}\x{6BE}")); -ok($objUr->lt("\x{631}\x{6BE}", "\x{691}")); -ok($objUr->lt("\x{691}", "\x{691}\x{6BE}")); -ok($objUr->lt("\x{691}\x{6BE}", "\x{632}")); -ok($objUr->lt("\x{632}", "\x{698}")); -ok($objUr->lt("\x{698}", "\x{633}")); -ok($objUr->lt("\x{633}", "\x{634}")); -ok($objUr->lt("\x{634}", "\x{635}")); -ok($objUr->lt("\x{635}", "\x{636}")); -ok($objUr->lt("\x{636}", "\x{637}")); -ok($objUr->lt("\x{637}", "\x{638}")); -ok($objUr->lt("\x{638}", "\x{639}")); -ok($objUr->lt("\x{639}", "\x{63A}")); -ok($objUr->lt("\x{63A}", "\x{641}")); -ok($objUr->lt("\x{641}", "\x{642}")); -ok($objUr->lt("\x{642}", "\x{6A9}")); -ok($objUr->lt("\x{6A9}", "\x{6A9}\x{6BE}")); -ok($objUr->lt("\x{6A9}\x{6BE}", "\x{6AF}")); -ok($objUr->lt("\x{6AF}", "\x{6AF}\x{6BE}")); -ok($objUr->lt("\x{6AF}\x{6BE}", "\x{644}")); -ok($objUr->lt("\x{644}", "\x{644}\x{6BE}")); -ok($objUr->lt("\x{644}\x{6BE}", "\x{645}")); -ok($objUr->lt("\x{645}", "\x{645}\x{6BE}")); -ok($objUr->lt("\x{645}\x{6BE}", "\x{646}")); -ok($objUr->lt("\x{646}", "\x{646}\x{6BE}")); -ok($objUr->lt("\x{646}\x{6BE}", "\x{6BA}")); -ok($objUr->lt("\x{6BA}", "\x{6BA}\x{6BE}")); -ok($objUr->lt("\x{6BA}\x{6BE}", "\x{648}")); -ok($objUr->lt("\x{648}", "\x{648}\x{6BE}")); -ok($objUr->lt("\x{648}\x{6BE}", "\x{6C1}")); -ok($objUr->lt("\x{6C1}", "\x{6BE}")); -ok($objUr->lt("\x{6BE}", "\x{6C3}")); -ok($objUr->lt("\x{6C3}", "\x{621}")); -ok($objUr->lt("\x{621}", "\x{6CC}")); -ok($objUr->lt("\x{6CC}", "\x{6CC}\x{6BE}")); -ok($objUr->lt("\x{6CC}\x{6BE}", "\x{6D2}")); -ok($objUr->lt("\x{6D2}", "\x{67B}")); - -# 61 +$objUr->change(level => 3); -ok($objUr->eq("\x{627}", "\x{623}")); -ok($objUr->eq("\x{648}", "\x{624}")); -ok($objUr->eq("\x{6C1}", "\x{6C2}")); -ok($objUr->eq("\x{6CC}", "\x{626}")); -ok($objUr->eq("\x{6D2}", "\x{6D3}")); +ok($objUr->eq("\x{623}", "\x{627}\x{654}")); +ok($objUr->eq("\x{622}", "\x{627}\x{653}")); +ok($objUr->eq("\x{624}", "\x{648}\x{654}")); +ok($objUr->eq("\x{6C2}", "\x{6C1}\x{654}")); +ok($objUr->eq("\x{626}", "\x{64A}\x{654}")); +ok($objUr->eq("\x{6D3}", "\x{6D2}\x{654}")); -# 66 +# 8 $objUr->change(level => 2); @@ -115,7 +54,7 @@ ok($objUr->lt("\x{6C1}", "\x{6C2}")); ok($objUr->lt("\x{6CC}", "\x{626}")); ok($objUr->lt("\x{6D2}", "\x{6D3}")); -# 71 +# 13 ok($objUr->lt("\x{652}", "\x{64E}")); ok($objUr->lt("\x{64E}", "\x{650}")); @@ -132,13 +71,101 @@ ok($objUr->lt("\x{651}", "\x{658}")); ok($objUr->lt("\x{658}", "\x{653}")); ok($objUr->lt("\x{653}", "\x{655}")); -# 85 +# 27 -ok($objUr->eq("\x{623}", "\x{627}\x{654}")); -ok($objUr->eq("\x{622}", "\x{627}\x{653}")); -ok($objUr->eq("\x{624}", "\x{648}\x{654}")); -ok($objUr->eq("\x{6C2}", "\x{6C1}\x{654}")); -ok($objUr->eq("\x{626}", "\x{64A}\x{654}")); -ok($objUr->eq("\x{6D3}", "\x{6D2}\x{654}")); +$objUr->change(level => 1); + +ok($objUr->eq("\x{627}", "\x{623}")); +ok($objUr->eq("\x{648}", "\x{624}")); +ok($objUr->eq("\x{6C1}", "\x{6C2}")); +ok($objUr->eq("\x{6CC}", "\x{626}")); +ok($objUr->eq("\x{6D2}", "\x{6D3}")); + +# 32 + +ok($objUr->eq("\x{652}", "\x{64E}")); +ok($objUr->eq("\x{64E}", "\x{650}")); +ok($objUr->eq("\x{650}", "\x{64F}")); +ok($objUr->eq("\x{64F}", "\x{670}")); +ok($objUr->eq("\x{670}", "\x{656}")); +ok($objUr->eq("\x{656}", "\x{657}")); +ok($objUr->eq("\x{657}", "\x{64B}")); +ok($objUr->eq("\x{64B}", "\x{64D}")); +ok($objUr->eq("\x{64D}", "\x{64C}")); +ok($objUr->eq("\x{64C}", "\x{654}")); +ok($objUr->eq("\x{654}", "\x{651}")); +ok($objUr->eq("\x{651}", "\x{658}")); +ok($objUr->eq("\x{658}", "\x{653}")); +ok($objUr->eq("\x{653}", "\x{655}")); + +# 46 + +for my $h (0, 1) { + no warnings 'utf8'; + my $t = $h ? pack('U', 0xFFFF) : ""; + $objUr->change(highestFFFF => 1) if $h; + + ok($objUr->lt("\x{627}$t", "\x{622}")); + ok($objUr->lt("\x{622}$t", "\x{628}")); + ok($objUr->lt("\x{628}$t", "\x{628}\x{6BE}")); + ok($objUr->lt("\x{628}\x{6BE}$t", "\x{67E}")); + ok($objUr->lt("\x{67E}$t", "\x{67E}\x{6BE}")); + ok($objUr->lt("\x{67E}\x{6BE}$t", "\x{62A}")); + ok($objUr->lt("\x{62A}$t", "\x{62A}\x{6BE}")); + ok($objUr->lt("\x{62A}\x{6BE}$t", "\x{679}")); + ok($objUr->lt("\x{679}$t", "\x{679}\x{6BE}")); + ok($objUr->lt("\x{679}\x{6BE}$t", "\x{62B}")); + ok($objUr->lt("\x{62B}$t", "\x{62C}")); + ok($objUr->lt("\x{62C}$t", "\x{62C}\x{6BE}")); + ok($objUr->lt("\x{62C}\x{6BE}$t", "\x{686}")); + ok($objUr->lt("\x{686}$t", "\x{686}\x{6BE}")); + ok($objUr->lt("\x{686}\x{6BE}$t", "\x{62D}")); + ok($objUr->lt("\x{62D}$t", "\x{62E}")); + ok($objUr->lt("\x{62E}$t", "\x{62F}")); + ok($objUr->lt("\x{62F}$t", "\x{62F}\x{6BE}")); + ok($objUr->lt("\x{62F}\x{6BE}$t", "\x{688}")); + ok($objUr->lt("\x{688}$t", "\x{688}\x{6BE}")); + ok($objUr->lt("\x{688}\x{6BE}$t", "\x{630}")); + ok($objUr->lt("\x{630}$t", "\x{631}")); + ok($objUr->lt("\x{631}$t", "\x{631}\x{6BE}")); + ok($objUr->lt("\x{631}\x{6BE}$t", "\x{691}")); + ok($objUr->lt("\x{691}$t", "\x{691}\x{6BE}")); + ok($objUr->lt("\x{691}\x{6BE}$t", "\x{632}")); + ok($objUr->lt("\x{632}$t", "\x{698}")); + ok($objUr->lt("\x{698}$t", "\x{633}")); + ok($objUr->lt("\x{633}$t", "\x{634}")); + ok($objUr->lt("\x{634}$t", "\x{635}")); + ok($objUr->lt("\x{635}$t", "\x{636}")); + ok($objUr->lt("\x{636}$t", "\x{637}")); + ok($objUr->lt("\x{637}$t", "\x{638}")); + ok($objUr->lt("\x{638}$t", "\x{639}")); + ok($objUr->lt("\x{639}$t", "\x{63A}")); + ok($objUr->lt("\x{63A}$t", "\x{641}")); + ok($objUr->lt("\x{641}$t", "\x{642}")); + ok($objUr->lt("\x{642}$t", "\x{6A9}")); + ok($objUr->lt("\x{6A9}$t", "\x{6A9}\x{6BE}")); + ok($objUr->lt("\x{6A9}\x{6BE}$t", "\x{6AF}")); + ok($objUr->lt("\x{6AF}$t", "\x{6AF}\x{6BE}")); + ok($objUr->lt("\x{6AF}\x{6BE}$t", "\x{644}")); + ok($objUr->lt("\x{644}$t", "\x{644}\x{6BE}")); + ok($objUr->lt("\x{644}\x{6BE}$t", "\x{645}")); + ok($objUr->lt("\x{645}$t", "\x{645}\x{6BE}")); + ok($objUr->lt("\x{645}\x{6BE}$t", "\x{646}")); + ok($objUr->lt("\x{646}$t", "\x{646}\x{6BE}")); + ok($objUr->lt("\x{646}\x{6BE}$t", "\x{6BA}")); + ok($objUr->lt("\x{6BA}$t", "\x{6BA}\x{6BE}")); + ok($objUr->lt("\x{6BA}\x{6BE}$t", "\x{648}")); + ok($objUr->lt("\x{648}$t", "\x{648}\x{6BE}")); + ok($objUr->lt("\x{648}\x{6BE}$t", "\x{6C1}")); + ok($objUr->lt("\x{6C1}$t", "\x{6BE}")); + ok($objUr->lt("\x{6BE}$t", "\x{6C3}")); + ok($objUr->lt("\x{6C3}$t", "\x{621}")); + ok($objUr->lt("\x{621}$t", "\x{6CC}")); + ok($objUr->lt("\x{6CC}$t", "\x{6CC}\x{6BE}")); + ok($objUr->lt("\x{6CC}\x{6BE}$t", "\x{6D2}")); + ok($objUr->lt("\x{6D2}$t", "\x{66E}")); + ok($objUr->lt("\x{66E}$t", "\x{67B}")); +} + +# 166 -# 91 diff --git a/cpan/Unicode-Collate/t/nonchar.t b/cpan/Unicode-Collate/t/nonchar.t index 26a50e2b9a..2b47b28043 100644 --- a/cpan/Unicode-Collate/t/nonchar.t +++ b/cpan/Unicode-Collate/t/nonchar.t @@ -25,7 +25,7 @@ BEGIN { use strict; use warnings; -BEGIN { $| = 1; print "1..61\n"; } # 1 + 30 * 2 +BEGIN { $| = 1; print "1..90\n"; } my $count = 0; sub ok ($;$) { my $p = my $r = shift; @@ -54,7 +54,7 @@ no warnings 'utf8'; # allowing "Disi\x{301}lva<LOW>John" to sort next to "Disilva<LOW>John". my $entry = <<'ENTRIES'; -FFFE ; [*0001.0020.0005.FFFE] # <noncharacter-FFFE> +FFFE ; [.0001.0020.0005.FFFE] # <noncharacter-FFFE> FFFF ; [.FFFE.0020.0005.FFFF] # <noncharacter-FFFF> ENTRIES @@ -67,7 +67,7 @@ for my $norm (undef, 'NFD') { if (defined $norm) { eval { require Unicode::Normalize }; if ($@) { - ok(1) for 1..30; # silent skip + ok(1) for 1..34; # silent skip next; } } @@ -114,9 +114,59 @@ for my $norm (undef, 'NFD') { # 26 ok($coll->lt($dsf[-1], $dsj[0])); - # 27..30 + $coll->change(level => 1); + + # 27..34 for my $i (0 .. $#disilva) { + ok($coll->lt($dsf[$i], $dsJ[$i])); ok($coll->lt($dsj[$i], $dsJ[$i])); } } +# 69 + +{ + my $coll = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + highestFFFF => 1, + minimalFFFE => 1, + ); + + $coll->change(level => 1); + ok($coll->lt("perl\x{FFFD}", "perl\x{FFFF}")); + ok($coll->lt("perl\x{1FFFD}", "perl\x{FFFF}")); + ok($coll->lt("perl\x{1FFFE}", "perl\x{FFFF}")); + ok($coll->lt("perl\x{1FFFF}", "perl\x{FFFF}")); + ok($coll->lt("perl\x{2FFFD}", "perl\x{FFFF}")); + ok($coll->lt("perl\x{2FFFE}", "perl\x{FFFF}")); + ok($coll->lt("perl\x{2FFFF}", "perl\x{FFFF}")); + ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}")); + ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}")); + ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}")); + +# 79 + + $coll->change(level => 3); + my @list = ( + "ab\x{FFFE}a", + "Ab\x{FFFE}a", + "ab\x{FFFE}c", + "Ab\x{FFFE}c", + "ab\x{FFFE}xyz", + "abc\x{FFFE}def", + "abc\x{FFFE}xYz", + "aBc\x{FFFE}xyz", + "abcX\x{FFFE}def", + "abcx\x{FFFE}xyz", + "b\x{FFFE}aaa", + "bbb\x{FFFE}a", + ); + my $p = shift @list; + for my $c (@list) { + ok($coll->lt($p, $c)); + $p = $c; + } +} + +# 90 |