summaryrefslogtreecommitdiff
path: root/lib/unicore
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-11-14 14:59:32 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-11-14 14:59:32 +0000
commite904f99525ffc0cd5f09346758a1931019c2f0b0 (patch)
treec7e1fdc85ff4b89160808dda5292c4f59542ab9b /lib/unicore
parent137352a2af7440ba507c46800e6906b0f4e09e61 (diff)
downloadperl-e904f99525ffc0cd5f09346758a1931019c2f0b0.tar.gz
The First, Last ranges in the Unicode data weren't
getting their general categories added properly; noticed by Jeffrey Friedl. p4raw-id: //depot/perl@12994
Diffstat (limited to 'lib/unicore')
-rw-r--r--lib/unicore/Category.pl8
-rw-r--r--lib/unicore/In/0.pl2
-rw-r--r--lib/unicore/In/1.pl2
-rw-r--r--lib/unicore/In/164.pl12
-rw-r--r--lib/unicore/In/169.pl12
-rw-r--r--lib/unicore/In/170.pl12
-rw-r--r--lib/unicore/In/2.pl2
-rw-r--r--lib/unicore/In/3.pl2
-rw-r--r--lib/unicore/In/4.pl2
-rw-r--r--lib/unicore/In/5.pl2
-rw-r--r--lib/unicore/In/6.pl2
-rw-r--r--lib/unicore/In/7.pl2
-rw-r--r--lib/unicore/In/8.pl2
-rw-r--r--lib/unicore/In/9.pl2
-rw-r--r--lib/unicore/Is/Alnum.pl4
-rw-r--r--lib/unicore/Is/Alpha.pl4
-rw-r--r--lib/unicore/Is/C.pl12
-rw-r--r--lib/unicore/Is/Cntrl.pl3
-rw-r--r--lib/unicore/Is/Co.pl9
-rw-r--r--lib/unicore/Is/Cs.pl5
-rw-r--r--lib/unicore/Is/Graph.pl8
-rw-r--r--lib/unicore/Is/L.pl12
-rw-r--r--lib/unicore/Is/Lo.pl12
-rw-r--r--lib/unicore/Is/Print.pl8
-rw-r--r--lib/unicore/Is/Word.pl4
-rw-r--r--lib/unicore/Name.pl10
-rw-r--r--lib/unicore/mktables108
27 files changed, 141 insertions, 122 deletions
diff --git a/lib/unicore/Category.pl b/lib/unicore/Category.pl
index 6f0979ddc8..e8f676c88f 100644
--- a/lib/unicore/Category.pl
+++ b/lib/unicore/Category.pl
@@ -1381,12 +1381,17 @@ return <<'END';
3300 3376 So
337B 33DD So
33E0 33FE So
+3400 4DB5 Lo
+4E00 9FA5 Lo
A000 A48C Lo
A490 A4A1 So
A4A4 A4B3 So
A4B5 A4C0 So
A4C2 A4C4 So
A4C6 So
+AC00 D7A3 Lo
+D800 DFFF Cs
+E000 F8FF Co
F900 FA2D Lo
FB00 FB06 Ll
FB13 FB17 Ll
@@ -1587,7 +1592,10 @@ FFFC FFFD So
1D7C3 Sm
1D7C4 1D7C9 Ll
1D7CE 1D7FF Nd
+20000 2A6D6 Lo
2F800 2FA1D Lo
E0001 Cf
E0020 E007F Cf
+F0000 FFFFD Co
+100000 10FFFD Co
END
diff --git a/lib/unicore/In/0.pl b/lib/unicore/In/0.pl
index 6b95de3dd5..db52684f0b 100644
--- a/lib/unicore/In/0.pl
+++ b/lib/unicore/In/0.pl
@@ -2,5 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+3400 4DB5 Lo
END
diff --git a/lib/unicore/In/1.pl b/lib/unicore/In/1.pl
index 3ef31669c8..e1894b86f4 100644
--- a/lib/unicore/In/1.pl
+++ b/lib/unicore/In/1.pl
@@ -2,5 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-4E00 9FA5 CJK Ideograph
+4E00 9FA5 Lo
END
diff --git a/lib/unicore/In/164.pl b/lib/unicore/In/164.pl
index afa40c35c5..6a1e2c38c7 100644
--- a/lib/unicore/In/164.pl
+++ b/lib/unicore/In/164.pl
@@ -309,13 +309,10 @@ return <<'END';
3105 312C
3131 318E
31A0 31B7
-3400
-4DB5
-4E00
-9FA5
+3400 4DB5
+4E00 9FA5
A000 A48C
-AC00
-D7A3
+AC00 D7A3
F900 FA2D
FB00 FB06
FB13 FB17
@@ -378,7 +375,6 @@ FFDA FFDC
1D78A 1D7A8
1D7AA 1D7C2
1D7C4 1D7C9
-20000
-2A6D6
+20000 2A6D6
2F800 2FA1D
END
diff --git a/lib/unicore/In/169.pl b/lib/unicore/In/169.pl
index 570636ec4c..b41f21d77a 100644
--- a/lib/unicore/In/169.pl
+++ b/lib/unicore/In/169.pl
@@ -935,13 +935,10 @@ return <<'END';
3105 312C
3131 318E
31A0 31B7
-3400
-4DB5
-4E00
-9FA5
+3400 4DB5
+4E00 9FA5
A000 A48C
-AC00
-D7A3
+AC00 D7A3
F900 FA2D
FB00 FB06
FB13 FB17
@@ -1034,7 +1031,6 @@ FFDA FFDC
1D790 1D7A8
1D7AA 1D7C2
1D7C4 1D7C9
-20000
-2A6D6
+20000 2A6D6
2F800 2FA1D
END
diff --git a/lib/unicore/In/170.pl b/lib/unicore/In/170.pl
index a97c18fe1e..30cbfe93cd 100644
--- a/lib/unicore/In/170.pl
+++ b/lib/unicore/In/170.pl
@@ -1099,13 +1099,10 @@ return <<'END';
3105 312C
3131 318E
31A0 31B7
-3400
-4DB5
-4E00
-9FA5
+3400 4DB5
+4E00 9FA5
A000 A48C
-AC00
-D7A3
+AC00 D7A3
F900 FA2D
FB00 FB06
FB13 FB17
@@ -1212,7 +1209,6 @@ FFDA FFDC
1D7AA 1D7C2
1D7C4 1D7C9
1D7CE 1D7FF
-20000
-2A6D6
+20000 2A6D6
2F800 2FA1D
END
diff --git a/lib/unicore/In/2.pl b/lib/unicore/In/2.pl
index eec928f290..c16f7d16cf 100644
--- a/lib/unicore/In/2.pl
+++ b/lib/unicore/In/2.pl
@@ -2,5 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-AC00 D7A3 Hangul Syllable
+AC00 D7A3 Lo
END
diff --git a/lib/unicore/In/3.pl b/lib/unicore/In/3.pl
index 5df4d5423a..2ca13f4112 100644
--- a/lib/unicore/In/3.pl
+++ b/lib/unicore/In/3.pl
@@ -2,5 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-D800 DB7F Non Private Use High Surrogate
+D800 DB7F Cs
END
diff --git a/lib/unicore/In/4.pl b/lib/unicore/In/4.pl
index f33e5c3871..acf09cc12e 100644
--- a/lib/unicore/In/4.pl
+++ b/lib/unicore/In/4.pl
@@ -2,5 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-DB80 DBFF Private Use High Surrogate
+DB80 DBFF Cs
END
diff --git a/lib/unicore/In/5.pl b/lib/unicore/In/5.pl
index fd896ff56b..15c3f92539 100644
--- a/lib/unicore/In/5.pl
+++ b/lib/unicore/In/5.pl
@@ -2,5 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-DC00 DFFF Low Surrogate
+DC00 DFFF Cs
END
diff --git a/lib/unicore/In/6.pl b/lib/unicore/In/6.pl
index 1404dba687..fc31fb8b26 100644
--- a/lib/unicore/In/6.pl
+++ b/lib/unicore/In/6.pl
@@ -2,5 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-E000 F8FF Private Use
+E000 F8FF Co
END
diff --git a/lib/unicore/In/7.pl b/lib/unicore/In/7.pl
index f5481cc91d..8eb0eee398 100644
--- a/lib/unicore/In/7.pl
+++ b/lib/unicore/In/7.pl
@@ -2,5 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-20000 2A6D6 CJK Ideograph Extension B
+20000 2A6D6 Lo
END
diff --git a/lib/unicore/In/8.pl b/lib/unicore/In/8.pl
index be01ceb30c..5c82bcd465 100644
--- a/lib/unicore/In/8.pl
+++ b/lib/unicore/In/8.pl
@@ -2,5 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-F0000 FFFFD Plane 15 Private Use
+F0000 FFFFD Co
END
diff --git a/lib/unicore/In/9.pl b/lib/unicore/In/9.pl
index 8eb12d1e3b..ec7132addf 100644
--- a/lib/unicore/In/9.pl
+++ b/lib/unicore/In/9.pl
@@ -2,5 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-100000 10FFFD Plane 16 Private Use
+100000 10FFFD Co
END
diff --git a/lib/unicore/Is/Alnum.pl b/lib/unicore/Is/Alnum.pl
index eb97eb86bb..97858ab55a 100644
--- a/lib/unicore/Is/Alnum.pl
+++ b/lib/unicore/Is/Alnum.pl
@@ -325,7 +325,10 @@ return <<'END';
31A0 31B7
3220 3229
3280 3289
+3400 4DB5
+4E00 9FA5
A000 A48C
+AC00 D7A3
F900 FA2D
FB00 FB06
FB13 FB17
@@ -394,5 +397,6 @@ FFDA FFDC
1D7AA 1D7C2
1D7C4 1D7C9
1D7CE 1D7FF
+20000 2A6D6
2F800 2FA1D
END
diff --git a/lib/unicore/Is/Alpha.pl b/lib/unicore/Is/Alpha.pl
index cbd65d0f83..b8dc6c48c4 100644
--- a/lib/unicore/Is/Alpha.pl
+++ b/lib/unicore/Is/Alpha.pl
@@ -295,7 +295,10 @@ return <<'END';
3105 312C
3131 318E
31A0 31B7
+3400 4DB5
+4E00 9FA5
A000 A48C
+AC00 D7A3
F900 FA2D
FB00 FB06
FB13 FB17
@@ -361,5 +364,6 @@ FFDA FFDC
1D78A 1D7A8
1D7AA 1D7C2
1D7C4 1D7C9
+20000 2A6D6
2F800 2FA1D
END
diff --git a/lib/unicore/Is/C.pl b/lib/unicore/Is/C.pl
index 199094f200..b58d48d427 100644
--- a/lib/unicore/Is/C.pl
+++ b/lib/unicore/Is/C.pl
@@ -9,18 +9,12 @@ return <<'END';
200C 200F
202A 202E
206A 206F
-D800
-DB7F DB80
-DBFF DC00
-DFFF E000
-F8FF
+D800 F8FF
FEFF
FFF9 FFFB
1D173 1D17A
E0001
E0020 E007F
-F0000
-FFFFD
-100000
-10FFFD
+F0000 FFFFD
+100000 10FFFD
END
diff --git a/lib/unicore/Is/Cntrl.pl b/lib/unicore/Is/Cntrl.pl
index 818cbc0267..b58d48d427 100644
--- a/lib/unicore/Is/Cntrl.pl
+++ b/lib/unicore/Is/Cntrl.pl
@@ -9,9 +9,12 @@ return <<'END';
200C 200F
202A 202E
206A 206F
+D800 F8FF
FEFF
FFF9 FFFB
1D173 1D17A
E0001
E0020 E007F
+F0000 FFFFD
+100000 10FFFD
END
diff --git a/lib/unicore/Is/Co.pl b/lib/unicore/Is/Co.pl
index b7ee129425..04f3129f6f 100644
--- a/lib/unicore/Is/Co.pl
+++ b/lib/unicore/Is/Co.pl
@@ -2,10 +2,7 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-E000
-F8FF
-F0000
-FFFFD
-100000
-10FFFD
+E000 F8FF
+F0000 FFFFD
+100000 10FFFD
END
diff --git a/lib/unicore/Is/Cs.pl b/lib/unicore/Is/Cs.pl
index 79faceca08..bd71bd18ea 100644
--- a/lib/unicore/Is/Cs.pl
+++ b/lib/unicore/Is/Cs.pl
@@ -2,8 +2,5 @@
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-D800
-DB7F DB80
-DBFF DC00
-DFFF
+D800 DFFF
END
diff --git a/lib/unicore/Is/Graph.pl b/lib/unicore/Is/Graph.pl
index 15c9f1fc36..5c13624d65 100644
--- a/lib/unicore/Is/Graph.pl
+++ b/lib/unicore/Is/Graph.pl
@@ -319,13 +319,16 @@ return <<'END';
3300 3376
337B 33DD
33E0 33FE
+3400 4DB5
+4E00 9FA5
A000 A48C
A490 A4A1
A4A4 A4B3
A4B5 A4C0
A4C2 A4C4
A4C6
-F900 FA2D
+AC00 D7A3
+E000 FA2D
FB00 FB06
FB13 FB17
FB1D FB36
@@ -386,5 +389,8 @@ FFFC FFFD
1D552 1D6A3
1D6A8 1D7C9
1D7CE 1D7FF
+20000 2A6D6
2F800 2FA1D
+F0000 FFFFD
+100000 10FFFD
END
diff --git a/lib/unicore/Is/L.pl b/lib/unicore/Is/L.pl
index bb341269f3..811603b4a0 100644
--- a/lib/unicore/Is/L.pl
+++ b/lib/unicore/Is/L.pl
@@ -228,13 +228,10 @@ return <<'END';
3105 312C
3131 318E
31A0 31B7
-3400
-4DB5
-4E00
-9FA5
+3400 4DB5
+4E00 9FA5
A000 A48C
-AC00
-D7A3
+AC00 D7A3
F900 FA2D
FB00 FB06
FB13 FB17
@@ -295,7 +292,6 @@ FFDA FFDC
1D78A 1D7A8
1D7AA 1D7C2
1D7C4 1D7C9
-20000
-2A6D6
+20000 2A6D6
2F800 2FA1D
END
diff --git a/lib/unicore/Is/Lo.pl b/lib/unicore/Is/Lo.pl
index ff84f2b996..726bbf7761 100644
--- a/lib/unicore/Is/Lo.pl
+++ b/lib/unicore/Is/Lo.pl
@@ -161,13 +161,10 @@ return <<'END';
3105 312C
3131 318E
31A0 31B7
-3400
-4DB5
-4E00
-9FA5
+3400 4DB5
+4E00 9FA5
A000 A48C
-AC00
-D7A3
+AC00 D7A3
F900 FA2D
FB1D
FB1F FB28
@@ -193,7 +190,6 @@ FFD2 FFD7
FFDA FFDC
10300 1031E
10330 10349
-20000
-2A6D6
+20000 2A6D6
2F800 2FA1D
END
diff --git a/lib/unicore/Is/Print.pl b/lib/unicore/Is/Print.pl
index 27eb0566fe..0b947221fe 100644
--- a/lib/unicore/Is/Print.pl
+++ b/lib/unicore/Is/Print.pl
@@ -320,13 +320,16 @@ return <<'END';
3300 3376
337B 33DD
33E0 33FE
+3400 4DB5
+4E00 9FA5
A000 A48C
A490 A4A1
A4A4 A4B3
A4B5 A4C0
A4C2 A4C4
A4C6
-F900 FA2D
+AC00 D7A3
+E000 FA2D
FB00 FB06
FB13 FB17
FB1D FB36
@@ -387,5 +390,8 @@ FFFC FFFD
1D552 1D6A3
1D6A8 1D7C9
1D7CE 1D7FF
+20000 2A6D6
2F800 2FA1D
+F0000 FFFFD
+100000 10FFFD
END
diff --git a/lib/unicore/Is/Word.pl b/lib/unicore/Is/Word.pl
index 437c067730..baba914391 100644
--- a/lib/unicore/Is/Word.pl
+++ b/lib/unicore/Is/Word.pl
@@ -326,7 +326,10 @@ return <<'END';
31A0 31B7
3220 3229
3280 3289
+3400 4DB5
+4E00 9FA5
A000 A48C
+AC00 D7A3
F900 FA2D
FB00 FB06
FB13 FB17
@@ -395,5 +398,6 @@ FFDA FFDC
1D7AA 1D7C2
1D7C4 1D7C9
1D7CE 1D7FF
+20000 2A6D6
2F800 2FA1D
END
diff --git a/lib/unicore/Name.pl b/lib/unicore/Name.pl
index de76f40bd4..860f087605 100644
--- a/lib/unicore/Name.pl
+++ b/lib/unicore/Name.pl
@@ -7950,6 +7950,8 @@ return <<'END';
33FC IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-NINE
33FD IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY
33FE IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE
+3400 4DB5 CJK Ideograph Extension A
+4E00 9FA5 CJK Ideograph
A000 YI SYLLABLE IT
A001 YI SYLLABLE IX
A002 YI SYLLABLE I
@@ -9165,6 +9167,11 @@ A4C2 YI RADICAL SHOP
A4C3 YI RADICAL CHE
A4C4 YI RADICAL ZZIET
A4C6 YI RADICAL KE
+AC00 D7A3 Hangul Syllable
+D800 DB7F Non Private Use High Surrogate
+DB80 DBFF Private Use High Surrogate
+DC00 DFFF Low Surrogate
+E000 F8FF Private Use
F900 CJK COMPATIBILITY IDEOGRAPH-F900
F901 CJK COMPATIBILITY IDEOGRAPH-F901
F902 CJK COMPATIBILITY IDEOGRAPH-F902
@@ -12138,6 +12145,7 @@ FFFD REPLACEMENT CHARACTER
1D7FD MATHEMATICAL MONOSPACE DIGIT SEVEN
1D7FE MATHEMATICAL MONOSPACE DIGIT EIGHT
1D7FF MATHEMATICAL MONOSPACE DIGIT NINE
+20000 2A6D6 CJK Ideograph Extension B
2F800 CJK COMPATIBILITY IDEOGRAPH-2F800
2F801 CJK COMPATIBILITY IDEOGRAPH-2F801
2F802 CJK COMPATIBILITY IDEOGRAPH-2F802
@@ -12777,4 +12785,6 @@ E007C TAG VERTICAL LINE
E007D TAG RIGHT CURLY BRACKET
E007E TAG TILDE
E007F CANCEL TAG
+F0000 FFFFD Plane 15 Private Use
+100000 10FFFD Plane 16 Private Use
END
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 676e189527..5615aeeb4a 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -114,6 +114,55 @@ my %Cat;
my %General;
my @General;
+sub gencat {
+ my ($Name, $GeneralH, $GeneralA, $Cat,
+ $name, $cat, $code, $op) = @_;
+
+ $op->($Name, $code, $name);
+ $op->($GeneralA, $code, $cat);
+
+ $op->($GeneralH->{$name} ||= [], $code, $name);
+
+ $op->($Cat->{$cat} ||= [], $code);
+ $op->($Cat->{substr($cat, 0, 1)}
+ ||= [], $code);
+ # 005F: SPACING UNDERSCORE
+ $op->($Cat->{Word} ||= [], $code)
+ if $cat =~ /^[LMN]/ or $code eq "005F";
+ $op->($Cat->{Alnum} ||= [], $code)
+ if $cat =~ /^[LMN]/;
+ $op->($Cat->{Alpha} ||= [], $code)
+ if $cat =~ /^[LM]/;
+ # 0009: HORIZONTAL TABULATION
+ # 000A: LINE FEED
+ # 000B: VERTICAL TABULATION
+ # 000C: FORM FEED
+ # 000D: CARRIAGE RETURN
+ # 0020: SPACE
+ $op->($Cat->{Space} ||= [], $code)
+ if $cat =~ /^Z/ ||
+ $code =~ /^(0009|000A|000B|000C|000D)$/;
+ $op->($Cat->{SpacePerl} ||= [], $code)
+ if $cat =~ /^Z/ ||
+ $code =~ /^(0009|000A|000C|000D)$/;
+ $op->($Cat->{Blank} ||= [], $code)
+ if $code =~ /^(0020|0009)$/ ||
+ $cat =~ /^Z[^lp]$/;
+ $op->($Cat->{Digit} ||= [], $code) if $cat eq "Nd";
+ $op->($Cat->{Upper} ||= [], $code) if $cat eq "Lu";
+ $op->($Cat->{Lower} ||= [], $code) if $cat eq "Ll";
+ $op->($Cat->{Title} ||= [], $code) if $cat eq "Lt";
+ $op->($Cat->{ASCII} ||= [], $code) if $code le "007F";
+ $op->($Cat->{Cntrl} ||= [], $code) if $cat =~ /^C/;
+ $op->($Cat->{Graph} ||= [], $code) if $cat =~ /^([LMNPS]|Co)/;
+ $op->($Cat->{Print} ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/;
+ $op->($Cat->{Punct} ||= [], $code) if $cat =~ /^P/;
+ # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f
+ $op->($Cat->{XDigit} ||= [], $code)
+ if $code =~ /^00(3[0-9]|[46][1-6])$/;
+
+}
+
if (open(my $Unicode, "Unicode.txt")) {
my @Name;
my @Bidi;
@@ -136,61 +185,18 @@ if (open(my $Unicode, "Unicode.txt")) {
if ($name =~ /^<(.+), (First|Last)>$/) {
$name = $1;
- if ($2 eq 'First') {
- append($General{$name} ||= [], $code, $name);
- } else {
- extend($General{$name} , $code);
- }
+ gencat(\@Name, \%General, \@General, \%Cat,
+ $name, $cat, $code,
+ $2 eq 'First' ? \&append : \&extend);
unless (defined $In{$name}) {
$In{$name} = $InId++;
$InIn{$name} = $General{$name};
}
- append($Cat{$cat} ||= [], $code);
- append($Cat{substr($cat, 0, 1)}
- ||= [], $code);
} else {
- append(\@Name, $code, $name);
-
- append(\@General, $code, $cat);
-
- append($Cat{$cat} ||= [], $code);
- append($Cat{substr($cat, 0, 1)}
- ||= [], $code);
- # 005F: SPACING UNDERSCORE
- append($Cat{Word} ||= [], $code)
- if $cat =~ /^[LMN]/ or $code eq "005F";
- append($Cat{Alnum} ||= [], $code)
- if $cat =~ /^[LMN]/;
- append($Cat{Alpha} ||= [], $code)
- if $cat =~ /^[LM]/;
- # 0009: HORIZONTAL TABULATION
- # 000A: LINE FEED
- # 000B: VERTICAL TABULATION
- # 000C: FORM FEED
- # 000D: CARRIAGE RETURN
- # 0020: SPACE
- append($Cat{Space} ||= [], $code)
- if $cat =~ /^Z/ ||
- $code =~ /^(0009|000A|000B|000C|000D)$/;
- append($Cat{SpacePerl} ||= [], $code)
- if $cat =~ /^Z/ ||
- $code =~ /^(0009|000A|000C|000D)$/;
- append($Cat{Blank} ||= [], $code)
- if $code =~ /^(0020|0009)$/ ||
- $cat =~ /^Z[^lp]$/;
- append($Cat{Digit} ||= [], $code) if $cat eq "Nd";
- append($Cat{Upper} ||= [], $code) if $cat eq "Lu";
- append($Cat{Lower} ||= [], $code) if $cat eq "Ll";
- append($Cat{Title} ||= [], $code) if $cat eq "Lt";
- append($Cat{ASCII} ||= [], $code) if $code le "007F";
- append($Cat{Cntrl} ||= [], $code) if $cat =~ /^C/;
- append($Cat{Graph} ||= [], $code) if $cat =~ /^([LMNPS]|Co)/;
- append($Cat{Print} ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/;
- append($Cat{Punct} ||= [], $code) if $cat =~ /^P/;
- # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f
- append($Cat{XDigit} ||= [], $code)
- if $code =~ /^00(3[0-9]|[46][1-6])$/;
-
+
+ gencat(\@Name, \%General, \@General, \%Cat,
+ $name, $cat, $code, \&append);
+
append($To{Upper} ||= [], $code, $upper) if $upper;
append($To{Lower} ||= [], $code, $lower) if $lower;
append($To{Title} ||= [], $code, $title) if $title;
@@ -653,7 +659,7 @@ foreach my $in (sort { $In{$a} <=> $In{$b} } keys %In) {
#
# The mapping from General Category long forms to short forms is
# currently hardwired here since no simple data file in the UCD
-# seems to do that.
+# seems to do that. Unicode 3.2 will assumedly correct this.
#
my %Is = (