diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-11-14 14:59:32 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-11-14 14:59:32 +0000 |
commit | e904f99525ffc0cd5f09346758a1931019c2f0b0 (patch) | |
tree | c7e1fdc85ff4b89160808dda5292c4f59542ab9b /lib/unicore | |
parent | 137352a2af7440ba507c46800e6906b0f4e09e61 (diff) | |
download | perl-e904f99525ffc0cd5f09346758a1931019c2f0b0.tar.gz |
The First, Last ranges in the Unicode data weren't
getting their general categories added properly;
noticed by Jeffrey Friedl.
p4raw-id: //depot/perl@12994
Diffstat (limited to 'lib/unicore')
-rw-r--r-- | lib/unicore/Category.pl | 8 | ||||
-rw-r--r-- | lib/unicore/In/0.pl | 2 | ||||
-rw-r--r-- | lib/unicore/In/1.pl | 2 | ||||
-rw-r--r-- | lib/unicore/In/164.pl | 12 | ||||
-rw-r--r-- | lib/unicore/In/169.pl | 12 | ||||
-rw-r--r-- | lib/unicore/In/170.pl | 12 | ||||
-rw-r--r-- | lib/unicore/In/2.pl | 2 | ||||
-rw-r--r-- | lib/unicore/In/3.pl | 2 | ||||
-rw-r--r-- | lib/unicore/In/4.pl | 2 | ||||
-rw-r--r-- | lib/unicore/In/5.pl | 2 | ||||
-rw-r--r-- | lib/unicore/In/6.pl | 2 | ||||
-rw-r--r-- | lib/unicore/In/7.pl | 2 | ||||
-rw-r--r-- | lib/unicore/In/8.pl | 2 | ||||
-rw-r--r-- | lib/unicore/In/9.pl | 2 | ||||
-rw-r--r-- | lib/unicore/Is/Alnum.pl | 4 | ||||
-rw-r--r-- | lib/unicore/Is/Alpha.pl | 4 | ||||
-rw-r--r-- | lib/unicore/Is/C.pl | 12 | ||||
-rw-r--r-- | lib/unicore/Is/Cntrl.pl | 3 | ||||
-rw-r--r-- | lib/unicore/Is/Co.pl | 9 | ||||
-rw-r--r-- | lib/unicore/Is/Cs.pl | 5 | ||||
-rw-r--r-- | lib/unicore/Is/Graph.pl | 8 | ||||
-rw-r--r-- | lib/unicore/Is/L.pl | 12 | ||||
-rw-r--r-- | lib/unicore/Is/Lo.pl | 12 | ||||
-rw-r--r-- | lib/unicore/Is/Print.pl | 8 | ||||
-rw-r--r-- | lib/unicore/Is/Word.pl | 4 | ||||
-rw-r--r-- | lib/unicore/Name.pl | 10 | ||||
-rw-r--r-- | lib/unicore/mktables | 108 |
27 files changed, 141 insertions, 122 deletions
diff --git a/lib/unicore/Category.pl b/lib/unicore/Category.pl index 6f0979ddc8..e8f676c88f 100644 --- a/lib/unicore/Category.pl +++ b/lib/unicore/Category.pl @@ -1381,12 +1381,17 @@ return <<'END'; 3300 3376 So 337B 33DD So 33E0 33FE So +3400 4DB5 Lo +4E00 9FA5 Lo A000 A48C Lo A490 A4A1 So A4A4 A4B3 So A4B5 A4C0 So A4C2 A4C4 So A4C6 So +AC00 D7A3 Lo +D800 DFFF Cs +E000 F8FF Co F900 FA2D Lo FB00 FB06 Ll FB13 FB17 Ll @@ -1587,7 +1592,10 @@ FFFC FFFD So 1D7C3 Sm 1D7C4 1D7C9 Ll 1D7CE 1D7FF Nd +20000 2A6D6 Lo 2F800 2FA1D Lo E0001 Cf E0020 E007F Cf +F0000 FFFFD Co +100000 10FFFD Co END diff --git a/lib/unicore/In/0.pl b/lib/unicore/In/0.pl index 6b95de3dd5..db52684f0b 100644 --- a/lib/unicore/In/0.pl +++ b/lib/unicore/In/0.pl @@ -2,5 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -3400 4DB5 CJK Ideograph Extension A +3400 4DB5 Lo END diff --git a/lib/unicore/In/1.pl b/lib/unicore/In/1.pl index 3ef31669c8..e1894b86f4 100644 --- a/lib/unicore/In/1.pl +++ b/lib/unicore/In/1.pl @@ -2,5 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -4E00 9FA5 CJK Ideograph +4E00 9FA5 Lo END diff --git a/lib/unicore/In/164.pl b/lib/unicore/In/164.pl index afa40c35c5..6a1e2c38c7 100644 --- a/lib/unicore/In/164.pl +++ b/lib/unicore/In/164.pl @@ -309,13 +309,10 @@ return <<'END'; 3105 312C 3131 318E 31A0 31B7 -3400 -4DB5 -4E00 -9FA5 +3400 4DB5 +4E00 9FA5 A000 A48C -AC00 -D7A3 +AC00 D7A3 F900 FA2D FB00 FB06 FB13 FB17 @@ -378,7 +375,6 @@ FFDA FFDC 1D78A 1D7A8 1D7AA 1D7C2 1D7C4 1D7C9 -20000 -2A6D6 +20000 2A6D6 2F800 2FA1D END diff --git a/lib/unicore/In/169.pl b/lib/unicore/In/169.pl index 570636ec4c..b41f21d77a 100644 --- a/lib/unicore/In/169.pl +++ b/lib/unicore/In/169.pl @@ -935,13 +935,10 @@ return <<'END'; 3105 312C 3131 318E 31A0 31B7 -3400 -4DB5 -4E00 -9FA5 +3400 4DB5 +4E00 9FA5 A000 A48C -AC00 -D7A3 +AC00 D7A3 F900 FA2D FB00 FB06 FB13 FB17 @@ -1034,7 +1031,6 @@ FFDA FFDC 1D790 1D7A8 1D7AA 1D7C2 1D7C4 1D7C9 -20000 -2A6D6 +20000 2A6D6 2F800 2FA1D END diff --git a/lib/unicore/In/170.pl b/lib/unicore/In/170.pl index a97c18fe1e..30cbfe93cd 100644 --- a/lib/unicore/In/170.pl +++ b/lib/unicore/In/170.pl @@ -1099,13 +1099,10 @@ return <<'END'; 3105 312C 3131 318E 31A0 31B7 -3400 -4DB5 -4E00 -9FA5 +3400 4DB5 +4E00 9FA5 A000 A48C -AC00 -D7A3 +AC00 D7A3 F900 FA2D FB00 FB06 FB13 FB17 @@ -1212,7 +1209,6 @@ FFDA FFDC 1D7AA 1D7C2 1D7C4 1D7C9 1D7CE 1D7FF -20000 -2A6D6 +20000 2A6D6 2F800 2FA1D END diff --git a/lib/unicore/In/2.pl b/lib/unicore/In/2.pl index eec928f290..c16f7d16cf 100644 --- a/lib/unicore/In/2.pl +++ b/lib/unicore/In/2.pl @@ -2,5 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -AC00 D7A3 Hangul Syllable +AC00 D7A3 Lo END diff --git a/lib/unicore/In/3.pl b/lib/unicore/In/3.pl index 5df4d5423a..2ca13f4112 100644 --- a/lib/unicore/In/3.pl +++ b/lib/unicore/In/3.pl @@ -2,5 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -D800 DB7F Non Private Use High Surrogate +D800 DB7F Cs END diff --git a/lib/unicore/In/4.pl b/lib/unicore/In/4.pl index f33e5c3871..acf09cc12e 100644 --- a/lib/unicore/In/4.pl +++ b/lib/unicore/In/4.pl @@ -2,5 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -DB80 DBFF Private Use High Surrogate +DB80 DBFF Cs END diff --git a/lib/unicore/In/5.pl b/lib/unicore/In/5.pl index fd896ff56b..15c3f92539 100644 --- a/lib/unicore/In/5.pl +++ b/lib/unicore/In/5.pl @@ -2,5 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -DC00 DFFF Low Surrogate +DC00 DFFF Cs END diff --git a/lib/unicore/In/6.pl b/lib/unicore/In/6.pl index 1404dba687..fc31fb8b26 100644 --- a/lib/unicore/In/6.pl +++ b/lib/unicore/In/6.pl @@ -2,5 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -E000 F8FF Private Use +E000 F8FF Co END diff --git a/lib/unicore/In/7.pl b/lib/unicore/In/7.pl index f5481cc91d..8eb0eee398 100644 --- a/lib/unicore/In/7.pl +++ b/lib/unicore/In/7.pl @@ -2,5 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -20000 2A6D6 CJK Ideograph Extension B +20000 2A6D6 Lo END diff --git a/lib/unicore/In/8.pl b/lib/unicore/In/8.pl index be01ceb30c..5c82bcd465 100644 --- a/lib/unicore/In/8.pl +++ b/lib/unicore/In/8.pl @@ -2,5 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -F0000 FFFFD Plane 15 Private Use +F0000 FFFFD Co END diff --git a/lib/unicore/In/9.pl b/lib/unicore/In/9.pl index 8eb12d1e3b..ec7132addf 100644 --- a/lib/unicore/In/9.pl +++ b/lib/unicore/In/9.pl @@ -2,5 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -100000 10FFFD Plane 16 Private Use +100000 10FFFD Co END diff --git a/lib/unicore/Is/Alnum.pl b/lib/unicore/Is/Alnum.pl index eb97eb86bb..97858ab55a 100644 --- a/lib/unicore/Is/Alnum.pl +++ b/lib/unicore/Is/Alnum.pl @@ -325,7 +325,10 @@ return <<'END'; 31A0 31B7 3220 3229 3280 3289 +3400 4DB5 +4E00 9FA5 A000 A48C +AC00 D7A3 F900 FA2D FB00 FB06 FB13 FB17 @@ -394,5 +397,6 @@ FFDA FFDC 1D7AA 1D7C2 1D7C4 1D7C9 1D7CE 1D7FF +20000 2A6D6 2F800 2FA1D END diff --git a/lib/unicore/Is/Alpha.pl b/lib/unicore/Is/Alpha.pl index cbd65d0f83..b8dc6c48c4 100644 --- a/lib/unicore/Is/Alpha.pl +++ b/lib/unicore/Is/Alpha.pl @@ -295,7 +295,10 @@ return <<'END'; 3105 312C 3131 318E 31A0 31B7 +3400 4DB5 +4E00 9FA5 A000 A48C +AC00 D7A3 F900 FA2D FB00 FB06 FB13 FB17 @@ -361,5 +364,6 @@ FFDA FFDC 1D78A 1D7A8 1D7AA 1D7C2 1D7C4 1D7C9 +20000 2A6D6 2F800 2FA1D END diff --git a/lib/unicore/Is/C.pl b/lib/unicore/Is/C.pl index 199094f200..b58d48d427 100644 --- a/lib/unicore/Is/C.pl +++ b/lib/unicore/Is/C.pl @@ -9,18 +9,12 @@ return <<'END'; 200C 200F 202A 202E 206A 206F -D800 -DB7F DB80 -DBFF DC00 -DFFF E000 -F8FF +D800 F8FF FEFF FFF9 FFFB 1D173 1D17A E0001 E0020 E007F -F0000 -FFFFD -100000 -10FFFD +F0000 FFFFD +100000 10FFFD END diff --git a/lib/unicore/Is/Cntrl.pl b/lib/unicore/Is/Cntrl.pl index 818cbc0267..b58d48d427 100644 --- a/lib/unicore/Is/Cntrl.pl +++ b/lib/unicore/Is/Cntrl.pl @@ -9,9 +9,12 @@ return <<'END'; 200C 200F 202A 202E 206A 206F +D800 F8FF FEFF FFF9 FFFB 1D173 1D17A E0001 E0020 E007F +F0000 FFFFD +100000 10FFFD END diff --git a/lib/unicore/Is/Co.pl b/lib/unicore/Is/Co.pl index b7ee129425..04f3129f6f 100644 --- a/lib/unicore/Is/Co.pl +++ b/lib/unicore/Is/Co.pl @@ -2,10 +2,7 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -E000 -F8FF -F0000 -FFFFD -100000 -10FFFD +E000 F8FF +F0000 FFFFD +100000 10FFFD END diff --git a/lib/unicore/Is/Cs.pl b/lib/unicore/Is/Cs.pl index 79faceca08..bd71bd18ea 100644 --- a/lib/unicore/Is/Cs.pl +++ b/lib/unicore/Is/Cs.pl @@ -2,8 +2,5 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -D800 -DB7F DB80 -DBFF DC00 -DFFF +D800 DFFF END diff --git a/lib/unicore/Is/Graph.pl b/lib/unicore/Is/Graph.pl index 15c9f1fc36..5c13624d65 100644 --- a/lib/unicore/Is/Graph.pl +++ b/lib/unicore/Is/Graph.pl @@ -319,13 +319,16 @@ return <<'END'; 3300 3376 337B 33DD 33E0 33FE +3400 4DB5 +4E00 9FA5 A000 A48C A490 A4A1 A4A4 A4B3 A4B5 A4C0 A4C2 A4C4 A4C6 -F900 FA2D +AC00 D7A3 +E000 FA2D FB00 FB06 FB13 FB17 FB1D FB36 @@ -386,5 +389,8 @@ FFFC FFFD 1D552 1D6A3 1D6A8 1D7C9 1D7CE 1D7FF +20000 2A6D6 2F800 2FA1D +F0000 FFFFD +100000 10FFFD END diff --git a/lib/unicore/Is/L.pl b/lib/unicore/Is/L.pl index bb341269f3..811603b4a0 100644 --- a/lib/unicore/Is/L.pl +++ b/lib/unicore/Is/L.pl @@ -228,13 +228,10 @@ return <<'END'; 3105 312C 3131 318E 31A0 31B7 -3400 -4DB5 -4E00 -9FA5 +3400 4DB5 +4E00 9FA5 A000 A48C -AC00 -D7A3 +AC00 D7A3 F900 FA2D FB00 FB06 FB13 FB17 @@ -295,7 +292,6 @@ FFDA FFDC 1D78A 1D7A8 1D7AA 1D7C2 1D7C4 1D7C9 -20000 -2A6D6 +20000 2A6D6 2F800 2FA1D END diff --git a/lib/unicore/Is/Lo.pl b/lib/unicore/Is/Lo.pl index ff84f2b996..726bbf7761 100644 --- a/lib/unicore/Is/Lo.pl +++ b/lib/unicore/Is/Lo.pl @@ -161,13 +161,10 @@ return <<'END'; 3105 312C 3131 318E 31A0 31B7 -3400 -4DB5 -4E00 -9FA5 +3400 4DB5 +4E00 9FA5 A000 A48C -AC00 -D7A3 +AC00 D7A3 F900 FA2D FB1D FB1F FB28 @@ -193,7 +190,6 @@ FFD2 FFD7 FFDA FFDC 10300 1031E 10330 10349 -20000 -2A6D6 +20000 2A6D6 2F800 2FA1D END diff --git a/lib/unicore/Is/Print.pl b/lib/unicore/Is/Print.pl index 27eb0566fe..0b947221fe 100644 --- a/lib/unicore/Is/Print.pl +++ b/lib/unicore/Is/Print.pl @@ -320,13 +320,16 @@ return <<'END'; 3300 3376 337B 33DD 33E0 33FE +3400 4DB5 +4E00 9FA5 A000 A48C A490 A4A1 A4A4 A4B3 A4B5 A4C0 A4C2 A4C4 A4C6 -F900 FA2D +AC00 D7A3 +E000 FA2D FB00 FB06 FB13 FB17 FB1D FB36 @@ -387,5 +390,8 @@ FFFC FFFD 1D552 1D6A3 1D6A8 1D7C9 1D7CE 1D7FF +20000 2A6D6 2F800 2FA1D +F0000 FFFFD +100000 10FFFD END diff --git a/lib/unicore/Is/Word.pl b/lib/unicore/Is/Word.pl index 437c067730..baba914391 100644 --- a/lib/unicore/Is/Word.pl +++ b/lib/unicore/Is/Word.pl @@ -326,7 +326,10 @@ return <<'END'; 31A0 31B7 3220 3229 3280 3289 +3400 4DB5 +4E00 9FA5 A000 A48C +AC00 D7A3 F900 FA2D FB00 FB06 FB13 FB17 @@ -395,5 +398,6 @@ FFDA FFDC 1D7AA 1D7C2 1D7C4 1D7C9 1D7CE 1D7FF +20000 2A6D6 2F800 2FA1D END diff --git a/lib/unicore/Name.pl b/lib/unicore/Name.pl index de76f40bd4..860f087605 100644 --- a/lib/unicore/Name.pl +++ b/lib/unicore/Name.pl @@ -7950,6 +7950,8 @@ return <<'END'; 33FC IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-NINE 33FD IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY 33FE IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE +3400 4DB5 CJK Ideograph Extension A +4E00 9FA5 CJK Ideograph A000 YI SYLLABLE IT A001 YI SYLLABLE IX A002 YI SYLLABLE I @@ -9165,6 +9167,11 @@ A4C2 YI RADICAL SHOP A4C3 YI RADICAL CHE A4C4 YI RADICAL ZZIET A4C6 YI RADICAL KE +AC00 D7A3 Hangul Syllable +D800 DB7F Non Private Use High Surrogate +DB80 DBFF Private Use High Surrogate +DC00 DFFF Low Surrogate +E000 F8FF Private Use F900 CJK COMPATIBILITY IDEOGRAPH-F900 F901 CJK COMPATIBILITY IDEOGRAPH-F901 F902 CJK COMPATIBILITY IDEOGRAPH-F902 @@ -12138,6 +12145,7 @@ FFFD REPLACEMENT CHARACTER 1D7FD MATHEMATICAL MONOSPACE DIGIT SEVEN 1D7FE MATHEMATICAL MONOSPACE DIGIT EIGHT 1D7FF MATHEMATICAL MONOSPACE DIGIT NINE +20000 2A6D6 CJK Ideograph Extension B 2F800 CJK COMPATIBILITY IDEOGRAPH-2F800 2F801 CJK COMPATIBILITY IDEOGRAPH-2F801 2F802 CJK COMPATIBILITY IDEOGRAPH-2F802 @@ -12777,4 +12785,6 @@ E007C TAG VERTICAL LINE E007D TAG RIGHT CURLY BRACKET E007E TAG TILDE E007F CANCEL TAG +F0000 FFFFD Plane 15 Private Use +100000 10FFFD Plane 16 Private Use END diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 676e189527..5615aeeb4a 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -114,6 +114,55 @@ my %Cat; my %General; my @General; +sub gencat { + my ($Name, $GeneralH, $GeneralA, $Cat, + $name, $cat, $code, $op) = @_; + + $op->($Name, $code, $name); + $op->($GeneralA, $code, $cat); + + $op->($GeneralH->{$name} ||= [], $code, $name); + + $op->($Cat->{$cat} ||= [], $code); + $op->($Cat->{substr($cat, 0, 1)} + ||= [], $code); + # 005F: SPACING UNDERSCORE + $op->($Cat->{Word} ||= [], $code) + if $cat =~ /^[LMN]/ or $code eq "005F"; + $op->($Cat->{Alnum} ||= [], $code) + if $cat =~ /^[LMN]/; + $op->($Cat->{Alpha} ||= [], $code) + if $cat =~ /^[LM]/; + # 0009: HORIZONTAL TABULATION + # 000A: LINE FEED + # 000B: VERTICAL TABULATION + # 000C: FORM FEED + # 000D: CARRIAGE RETURN + # 0020: SPACE + $op->($Cat->{Space} ||= [], $code) + if $cat =~ /^Z/ || + $code =~ /^(0009|000A|000B|000C|000D)$/; + $op->($Cat->{SpacePerl} ||= [], $code) + if $cat =~ /^Z/ || + $code =~ /^(0009|000A|000C|000D)$/; + $op->($Cat->{Blank} ||= [], $code) + if $code =~ /^(0020|0009)$/ || + $cat =~ /^Z[^lp]$/; + $op->($Cat->{Digit} ||= [], $code) if $cat eq "Nd"; + $op->($Cat->{Upper} ||= [], $code) if $cat eq "Lu"; + $op->($Cat->{Lower} ||= [], $code) if $cat eq "Ll"; + $op->($Cat->{Title} ||= [], $code) if $cat eq "Lt"; + $op->($Cat->{ASCII} ||= [], $code) if $code le "007F"; + $op->($Cat->{Cntrl} ||= [], $code) if $cat =~ /^C/; + $op->($Cat->{Graph} ||= [], $code) if $cat =~ /^([LMNPS]|Co)/; + $op->($Cat->{Print} ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/; + $op->($Cat->{Punct} ||= [], $code) if $cat =~ /^P/; + # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f + $op->($Cat->{XDigit} ||= [], $code) + if $code =~ /^00(3[0-9]|[46][1-6])$/; + +} + if (open(my $Unicode, "Unicode.txt")) { my @Name; my @Bidi; @@ -136,61 +185,18 @@ if (open(my $Unicode, "Unicode.txt")) { if ($name =~ /^<(.+), (First|Last)>$/) { $name = $1; - if ($2 eq 'First') { - append($General{$name} ||= [], $code, $name); - } else { - extend($General{$name} , $code); - } + gencat(\@Name, \%General, \@General, \%Cat, + $name, $cat, $code, + $2 eq 'First' ? \&append : \&extend); unless (defined $In{$name}) { $In{$name} = $InId++; $InIn{$name} = $General{$name}; } - append($Cat{$cat} ||= [], $code); - append($Cat{substr($cat, 0, 1)} - ||= [], $code); } else { - append(\@Name, $code, $name); - - append(\@General, $code, $cat); - - append($Cat{$cat} ||= [], $code); - append($Cat{substr($cat, 0, 1)} - ||= [], $code); - # 005F: SPACING UNDERSCORE - append($Cat{Word} ||= [], $code) - if $cat =~ /^[LMN]/ or $code eq "005F"; - append($Cat{Alnum} ||= [], $code) - if $cat =~ /^[LMN]/; - append($Cat{Alpha} ||= [], $code) - if $cat =~ /^[LM]/; - # 0009: HORIZONTAL TABULATION - # 000A: LINE FEED - # 000B: VERTICAL TABULATION - # 000C: FORM FEED - # 000D: CARRIAGE RETURN - # 0020: SPACE - append($Cat{Space} ||= [], $code) - if $cat =~ /^Z/ || - $code =~ /^(0009|000A|000B|000C|000D)$/; - append($Cat{SpacePerl} ||= [], $code) - if $cat =~ /^Z/ || - $code =~ /^(0009|000A|000C|000D)$/; - append($Cat{Blank} ||= [], $code) - if $code =~ /^(0020|0009)$/ || - $cat =~ /^Z[^lp]$/; - append($Cat{Digit} ||= [], $code) if $cat eq "Nd"; - append($Cat{Upper} ||= [], $code) if $cat eq "Lu"; - append($Cat{Lower} ||= [], $code) if $cat eq "Ll"; - append($Cat{Title} ||= [], $code) if $cat eq "Lt"; - append($Cat{ASCII} ||= [], $code) if $code le "007F"; - append($Cat{Cntrl} ||= [], $code) if $cat =~ /^C/; - append($Cat{Graph} ||= [], $code) if $cat =~ /^([LMNPS]|Co)/; - append($Cat{Print} ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/; - append($Cat{Punct} ||= [], $code) if $cat =~ /^P/; - # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f - append($Cat{XDigit} ||= [], $code) - if $code =~ /^00(3[0-9]|[46][1-6])$/; - + + gencat(\@Name, \%General, \@General, \%Cat, + $name, $cat, $code, \&append); + append($To{Upper} ||= [], $code, $upper) if $upper; append($To{Lower} ||= [], $code, $lower) if $lower; append($To{Title} ||= [], $code, $title) if $title; @@ -653,7 +659,7 @@ foreach my $in (sort { $In{$a} <=> $In{$b} } keys %In) { # # The mapping from General Category long forms to short forms is # currently hardwired here since no simple data file in the UCD -# seems to do that. +# seems to do that. Unicode 3.2 will assumedly correct this. # my %Is = ( |