diff options
author | Chris 'BinGOs' Williams <chris@bingosnet.co.uk> | 2011-05-19 00:34:57 +0100 |
---|---|---|
committer | Chris 'BinGOs' Williams <chris@bingosnet.co.uk> | 2011-06-09 12:17:11 +0100 |
commit | e0a65de5f30351e0d7a451c03e62f80a5793c3b4 (patch) | |
tree | 5c3e6936f1afc4a8ed4bffd90f8f2f389c896ee2 /cpan | |
parent | 06fd9d7a579f00bca05b6ba920373565ea10c138 (diff) | |
download | perl-e0a65de5f30351e0d7a451c03e62f80a5793c3b4.tar.gz |
Updated Unicode-Normalize to CPAN version 1.12
[DELTA]
1.12 Mon May 16 23:36:07 2011
- removed Normalize/CompExcl.pl and Composition Exclusions are coded;
how to load CompExcl.pl seems not good, but I'm not sure...
1.11 Sun May 15 20:31:09 2011
- As perl 5.14.0 has removed unicore/CompositionExclusions.txt
in the installation, Normalize/CompExcl.pl in this distribution
is used instead. (see [rt.cpan.org #68106])
Diffstat (limited to 'cpan')
-rw-r--r-- | cpan/Unicode-Normalize/Changes | 9 | ||||
-rw-r--r-- | cpan/Unicode-Normalize/Normalize.pm | 6 | ||||
-rw-r--r-- | cpan/Unicode-Normalize/README | 3 | ||||
-rw-r--r-- | cpan/Unicode-Normalize/mkheader | 74 | ||||
-rw-r--r-- | cpan/Unicode-Normalize/t/func.t | 39 |
5 files changed, 87 insertions, 44 deletions
diff --git a/cpan/Unicode-Normalize/Changes b/cpan/Unicode-Normalize/Changes index f872619dbc..881fdf6257 100644 --- a/cpan/Unicode-Normalize/Changes +++ b/cpan/Unicode-Normalize/Changes @@ -1,5 +1,14 @@ Revision history for Perl extension Unicode::Normalize. +1.12 Mon May 16 23:36:07 2011 + - removed Normalize/CompExcl.pl and Composition Exclusions are coded; + how to load CompExcl.pl seems not good, but I'm not sure... + +1.11 Sun May 15 20:31:09 2011 + - As perl 5.14.0 has removed unicore/CompositionExclusions.txt + in the installation, Normalize/CompExcl.pl in this distribution + is used instead. (see [rt.cpan.org #68106]) + 1.10 Sun Jan 16 21:00:34 2011 - XSUB: reorder() and compose() treat with growing the string. - XSUB: provision against UTF8_ALLOW_* flags to be undefined in future. diff --git a/cpan/Unicode-Normalize/Normalize.pm b/cpan/Unicode-Normalize/Normalize.pm index f4bd7a4f1f..3ca56adca5 100644 --- a/cpan/Unicode-Normalize/Normalize.pm +++ b/cpan/Unicode-Normalize/Normalize.pm @@ -13,7 +13,7 @@ use Carp; no warnings 'utf8'; -our $VERSION = '1.10'; +our $VERSION = '1.12'; our $PACKAGE = __PACKAGE__; our @EXPORT = qw( NFC NFD NFKC NFKD ); @@ -548,8 +548,8 @@ normalization implemented by this module depends on your perl's version. 5.8.7-5.8.8 4.1.0 5.10.0 5.0.0 5.8.9, 5.10.1 5.1.0 - 5.12.0-5.12.2 5.2.0 - (5.13.7-5.13.11) 6.0.0 + 5.12.0-5.12.3 5.2.0 + 5.14.0 6.0.0 =item Correction of decomposition mapping diff --git a/cpan/Unicode-Normalize/README b/cpan/Unicode-Normalize/README index 8cddf34869..3fc6c75d18 100644 --- a/cpan/Unicode-Normalize/README +++ b/cpan/Unicode-Normalize/README @@ -1,4 +1,4 @@ -Unicode/Normalize version 1.10 +Unicode/Normalize version 1.12 =================================== Unicode::Normalize - Unicode Normalization Forms @@ -62,7 +62,6 @@ which are included in recent perl core distributions. - unicore/CombiningClass.pl (or unicode/CombiningClass.pl) - unicore/Decomposition.pl (or unicode/Decomposition.pl) -- unicore/CompositionExclusions.txt (or unicode/CompExcl.txt) NOTES diff --git a/cpan/Unicode-Normalize/mkheader b/cpan/Unicode-Normalize/mkheader index acc7eaff77..c694b7e9eb 100644 --- a/cpan/Unicode-Normalize/mkheader +++ b/cpan/Unicode-Normalize/mkheader @@ -9,7 +9,6 @@ # Input files: # unicore/CombiningClass.pl (or unicode/CombiningClass.pl) # unicore/Decomposition.pl (or unicode/Decomposition.pl) -# unicore/CompositionExclusions.txt (or unicode/CompExcl.txt) # # Output files: # unfcan.h @@ -54,7 +53,7 @@ our %Single; # $codepoint => 1 : singletons our %NonStD; # $codepoint => 1 : non-starter decompositions our %Comp2nd; # $codepoint => 1 : may be composed with a prev char. -# from Unicode database +# from core Unicode database our $Combin = do "unicore/CombiningClass.pl" || do "unicode/CombiningClass.pl" || croak "$PACKAGE: CombiningClass.pl not found"; @@ -62,6 +61,17 @@ our $Decomp = do "unicore/Decomposition.pl" || do "unicode/Decomposition.pl" || croak "$PACKAGE: Decomposition.pl not found"; +# CompositionExclusions.txt since Unicode 3.2.0 +our @CompEx = qw( + 0958 0959 095A 095B 095C 095D 095E 095F 09DC 09DD 09DF 0A33 0A36 + 0A59 0A5A 0A5B 0A5E 0B5C 0B5D 0F43 0F4D 0F52 0F57 0F5C 0F69 0F76 + 0F78 0F93 0F9D 0FA2 0FA7 0FAC 0FB9 FB1D FB1F FB2A FB2B FB2C FB2D + FB2E FB2F FB30 FB31 FB32 FB33 FB34 FB35 FB36 FB38 FB39 FB3A FB3B + FB3C FB3E FB40 FB41 FB43 FB44 FB46 FB47 FB48 FB49 FB4A FB4B FB4C + FB4D FB4E 2ADC 1D15E 1D15F 1D160 1D161 1D162 1D163 1D164 1D1BB + 1D1BC 1D1BD 1D1BE 1D1BF 1D1C0 +); + # definition of Hangul constants use constant SBase => 0xAC00; use constant SFinal => 0xD7A3; # SBase -1 + SCount @@ -91,27 +101,6 @@ sub decomposeHangul { } ########## getting full decomposition ########## -{ - my($f, $fh); - foreach my $d (@INC) { - $f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt"); - last if open($fh, $f); - $f = File::Spec->catfile($d, "unicore", "CompExcl.txt"); - last if open($fh, $f); - $f = File::Spec->catfile($d, "unicode", "CompExcl.txt"); - last if open($fh, $f); - $f = undef; - } - croak "$PACKAGE: neither unicore/CompositionExclusions.txt " - . "nor unicode/CompExcl.txt is found in @INC" unless defined $f; - - while (<$fh>) { - next if /^#/ or /^$/; - s/#.*//; - $Exclus{ hex($1) } = 1 if /([0-9A-Fa-f]+)/; - } - close $fh; -} ## converts string "hhhh hhhh hhhh" to a numeric list ## (hex digits separated by spaces) @@ -137,23 +126,32 @@ while ($Decomp =~ /(.+)/g) { foreach my $u ($ini .. $end) { $Compat{$u} = $dec; + $Canon{$u} = $dec if ! $compat; + } +} - if (! $compat) { - $Canon{$u} = $dec; - - if (@$dec == 2) { - if ($Combin{ $dec->[0] }) { - $NonStD{$u} = 1; - } else { - $Compos{ $dec->[0] }{ $dec->[1] } = $u; - $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u}; - } - } elsif (@$dec == 1) { - $Single{$u} = 1; - } else { - croak("Weird Canonical Decomposition of U+$tab[0]"); - } +for my $s (@CompEx) { + my $u = hex $s; + next if !$Canon{$u}; # not assigned + next if $u == 0xFB1D && !$Canon{0x1D15E}; # 3.0.1 before Corrigendum #2 + $Exclus{$u} = 1; +} + +foreach my $u (keys %Canon) { + my $dec = $Canon{$u}; + + if (@$dec == 2) { + if ($Combin{ $dec->[0] }) { + $NonStD{$u} = 1; + } else { + $Compos{ $dec->[0] }{ $dec->[1] } = $u; + $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u}; } + } elsif (@$dec == 1) { + $Single{$u} = 1; + } else { + my $h = sprintf '%04X', $u; + croak("Weird Canonical Decomposition of U+$h"); } } diff --git a/cpan/Unicode-Normalize/t/func.t b/cpan/Unicode-Normalize/t/func.t index 76ced03ea1..f18835ee8a 100644 --- a/cpan/Unicode-Normalize/t/func.t +++ b/cpan/Unicode-Normalize/t/func.t @@ -19,7 +19,7 @@ BEGIN { use Test; use strict; use warnings; -BEGIN { plan tests => 211 }; +BEGIN { plan tests => 217 }; use Unicode::Normalize qw(:all); ok(1); # If we made it this far, we're ok. @@ -49,6 +49,8 @@ ok(getCanon(0x212C), undef); ok(getCanon(0x3243), undef); ok(getCanon(0xFA2D), _pack_U(0x9DB4)); +# 20 + ok(getCompat( 0), undef); ok(getCompat(0x29), undef); ok(getCompat(0x41), undef); @@ -84,6 +86,8 @@ ok(getComposite(0xAC00, 0x11A7), undef); ok(getComposite(0xAC00, 0x11A8), 0xAC01); ok(getComposite(0xADF8, 0x11AF), 0xAE00); +# 53 + sub uprops { my $uv = shift; my $r = ""; @@ -120,6 +124,8 @@ ok(uprops(0xF900), 'xSnFbDmCKyG'); # CJK COMPATIBILITY IDEOGRAPH-F900 ok(uprops(0xFB4E), 'XsnFbDmCKyG'); # HEBREW LETTER PE WITH RAFE ok(uprops(0xFF71), 'xsnfbdmcKyG'); # HALFWIDTH KATAKANA LETTER A +# 71 + ok(decompose(""), ""); ok(decompose("A"), "A"); ok(decompose("", 1), ""); @@ -133,6 +139,8 @@ ok(decompose(hexU("1E14 AC01"), 1), hexU("0045 0304 0300 1100 1161 11A8")); ok(decompose(hexU("AC00 AE00"), 1), hexU("1100 1161 1100 1173 11AF")); ok(decompose(hexU("304C FF76"), 1), hexU("304B 3099 30AB")); +# 81 + # don't modify the source my $sDec = "\x{FA19}"; ok(decompose($sDec), "\x{795E}"); @@ -165,6 +173,8 @@ my $sCom = "\x{304B}\x{3099}"; ok(compose($sCom), "\x{304C}"); ok($sCom, "\x{304B}\x{3099}"); +# 100 + ok(composeContiguous(""), ""); ok(composeContiguous("A"), "A"); ok(composeContiguous(hexU("0061 0300")), hexU("00E0")); @@ -180,6 +190,8 @@ my $sCtg = "\x{30DB}\x{309A}"; ok(composeContiguous($sCtg), "\x{30DD}"); ok($sCtg, "\x{30DB}\x{309A}"); +# 111 + sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" } ok(answer(checkNFD("")), "YES"); @@ -220,6 +232,8 @@ ok(answer(checkNFKC(hexU("0041 0327 030A"))), "MAYBE"); # A+cedilla+ring ok(answer(checkNFKC(hexU("0041 030A 0327"))), "NO"); # A+ring+cedilla ok(answer(check("NFKC", hexU("20 C1 212B 300"))), "NO"); +# 145 + "012ABC" =~ /(\d+)(\w+)/; ok("012" eq NFC $1 && "ABC" eq NFC $2); @@ -240,6 +254,8 @@ ok(getComposite("065", "0768"), 192); ok(isNFD_NO ("0192")); ok(isNFKD_NO("0192")); +# 156 + # DEVANAGARI LETTER QA ok(isExclusion("02392")); ok(isComp_Ex ("02392")); @@ -276,6 +292,8 @@ ok(getCanon("044032"), _pack_U(0x1100, 0x1161)); ok(getCompat("044032"), _pack_U(0x1100, 0x1161)); ok(getComposite("04352", "04449"), 0xAC00); +# 182 + # string with 22 combining characters: (0x300..0x315) my $str_cc22 = _pack_U(0x3041, 0x300..0x315, 0x3042); ok(decompose($str_cc22), $str_cc22); @@ -302,6 +320,8 @@ ok(NFKC($str_cc40), $str_cc40); ok(FCD($str_cc40), $str_cc40); ok(FCC($str_cc40), $str_cc40); +# 202 + my $precomp = hexU("304C 304E 3050 3052 3054"); my $combseq = hexU("304B 3099 304D 3099 304F 3099 3051 3099 3053 3099"); ok(decompose($precomp x 5), $combseq x 5); @@ -319,4 +339,21 @@ ok(decompose($precomp . $notcomp), $combseq . $notcomp); ok(decompose($precomp . $notcomp x 5), $combseq . $notcomp x 5); ok(decompose($precomp . $notcomp x10), $combseq . $notcomp x10); +# 211 + +my $preUnicode3_1 = !defined getCanon(0x1D15E); +my $preUnicode3_2 = !defined getCanon(0x2ADC); + +# HEBREW LETTER YOD WITH HIRIQ +ok($preUnicode3_1 xor isExclusion(0xFB1D)); +ok($preUnicode3_1 xor isComp_Ex (0xFB1D)); + +# MUSICAL SYMBOL HALF NOTE +ok($preUnicode3_1 xor isExclusion(0x1D15E)); +ok($preUnicode3_1 xor isComp_Ex (0x1D15E)); + +# FORKING +ok($preUnicode3_2 xor isExclusion(0x2ADC)); +ok($preUnicode3_2 xor isComp_Ex (0x2ADC)); +# 217 |