summaryrefslogtreecommitdiff
path: root/lib/unicore/mktables
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-11-14 14:59:32 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-11-14 14:59:32 +0000
commite904f99525ffc0cd5f09346758a1931019c2f0b0 (patch)
treec7e1fdc85ff4b89160808dda5292c4f59542ab9b /lib/unicore/mktables
parent137352a2af7440ba507c46800e6906b0f4e09e61 (diff)
downloadperl-e904f99525ffc0cd5f09346758a1931019c2f0b0.tar.gz
The First, Last ranges in the Unicode data weren't
getting their general categories added properly; noticed by Jeffrey Friedl. p4raw-id: //depot/perl@12994
Diffstat (limited to 'lib/unicore/mktables')
-rw-r--r--lib/unicore/mktables108
1 files changed, 57 insertions, 51 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 676e189527..5615aeeb4a 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -114,6 +114,55 @@ my %Cat;
my %General;
my @General;
+sub gencat {
+ my ($Name, $GeneralH, $GeneralA, $Cat,
+ $name, $cat, $code, $op) = @_;
+
+ $op->($Name, $code, $name);
+ $op->($GeneralA, $code, $cat);
+
+ $op->($GeneralH->{$name} ||= [], $code, $name);
+
+ $op->($Cat->{$cat} ||= [], $code);
+ $op->($Cat->{substr($cat, 0, 1)}
+ ||= [], $code);
+ # 005F: SPACING UNDERSCORE
+ $op->($Cat->{Word} ||= [], $code)
+ if $cat =~ /^[LMN]/ or $code eq "005F";
+ $op->($Cat->{Alnum} ||= [], $code)
+ if $cat =~ /^[LMN]/;
+ $op->($Cat->{Alpha} ||= [], $code)
+ if $cat =~ /^[LM]/;
+ # 0009: HORIZONTAL TABULATION
+ # 000A: LINE FEED
+ # 000B: VERTICAL TABULATION
+ # 000C: FORM FEED
+ # 000D: CARRIAGE RETURN
+ # 0020: SPACE
+ $op->($Cat->{Space} ||= [], $code)
+ if $cat =~ /^Z/ ||
+ $code =~ /^(0009|000A|000B|000C|000D)$/;
+ $op->($Cat->{SpacePerl} ||= [], $code)
+ if $cat =~ /^Z/ ||
+ $code =~ /^(0009|000A|000C|000D)$/;
+ $op->($Cat->{Blank} ||= [], $code)
+ if $code =~ /^(0020|0009)$/ ||
+ $cat =~ /^Z[^lp]$/;
+ $op->($Cat->{Digit} ||= [], $code) if $cat eq "Nd";
+ $op->($Cat->{Upper} ||= [], $code) if $cat eq "Lu";
+ $op->($Cat->{Lower} ||= [], $code) if $cat eq "Ll";
+ $op->($Cat->{Title} ||= [], $code) if $cat eq "Lt";
+ $op->($Cat->{ASCII} ||= [], $code) if $code le "007F";
+ $op->($Cat->{Cntrl} ||= [], $code) if $cat =~ /^C/;
+ $op->($Cat->{Graph} ||= [], $code) if $cat =~ /^([LMNPS]|Co)/;
+ $op->($Cat->{Print} ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/;
+ $op->($Cat->{Punct} ||= [], $code) if $cat =~ /^P/;
+ # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f
+ $op->($Cat->{XDigit} ||= [], $code)
+ if $code =~ /^00(3[0-9]|[46][1-6])$/;
+
+}
+
if (open(my $Unicode, "Unicode.txt")) {
my @Name;
my @Bidi;
@@ -136,61 +185,18 @@ if (open(my $Unicode, "Unicode.txt")) {
if ($name =~ /^<(.+), (First|Last)>$/) {
$name = $1;
- if ($2 eq 'First') {
- append($General{$name} ||= [], $code, $name);
- } else {
- extend($General{$name} , $code);
- }
+ gencat(\@Name, \%General, \@General, \%Cat,
+ $name, $cat, $code,
+ $2 eq 'First' ? \&append : \&extend);
unless (defined $In{$name}) {
$In{$name} = $InId++;
$InIn{$name} = $General{$name};
}
- append($Cat{$cat} ||= [], $code);
- append($Cat{substr($cat, 0, 1)}
- ||= [], $code);
} else {
- append(\@Name, $code, $name);
-
- append(\@General, $code, $cat);
-
- append($Cat{$cat} ||= [], $code);
- append($Cat{substr($cat, 0, 1)}
- ||= [], $code);
- # 005F: SPACING UNDERSCORE
- append($Cat{Word} ||= [], $code)
- if $cat =~ /^[LMN]/ or $code eq "005F";
- append($Cat{Alnum} ||= [], $code)
- if $cat =~ /^[LMN]/;
- append($Cat{Alpha} ||= [], $code)
- if $cat =~ /^[LM]/;
- # 0009: HORIZONTAL TABULATION
- # 000A: LINE FEED
- # 000B: VERTICAL TABULATION
- # 000C: FORM FEED
- # 000D: CARRIAGE RETURN
- # 0020: SPACE
- append($Cat{Space} ||= [], $code)
- if $cat =~ /^Z/ ||
- $code =~ /^(0009|000A|000B|000C|000D)$/;
- append($Cat{SpacePerl} ||= [], $code)
- if $cat =~ /^Z/ ||
- $code =~ /^(0009|000A|000C|000D)$/;
- append($Cat{Blank} ||= [], $code)
- if $code =~ /^(0020|0009)$/ ||
- $cat =~ /^Z[^lp]$/;
- append($Cat{Digit} ||= [], $code) if $cat eq "Nd";
- append($Cat{Upper} ||= [], $code) if $cat eq "Lu";
- append($Cat{Lower} ||= [], $code) if $cat eq "Ll";
- append($Cat{Title} ||= [], $code) if $cat eq "Lt";
- append($Cat{ASCII} ||= [], $code) if $code le "007F";
- append($Cat{Cntrl} ||= [], $code) if $cat =~ /^C/;
- append($Cat{Graph} ||= [], $code) if $cat =~ /^([LMNPS]|Co)/;
- append($Cat{Print} ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/;
- append($Cat{Punct} ||= [], $code) if $cat =~ /^P/;
- # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f
- append($Cat{XDigit} ||= [], $code)
- if $code =~ /^00(3[0-9]|[46][1-6])$/;
-
+
+ gencat(\@Name, \%General, \@General, \%Cat,
+ $name, $cat, $code, \&append);
+
append($To{Upper} ||= [], $code, $upper) if $upper;
append($To{Lower} ||= [], $code, $lower) if $lower;
append($To{Title} ||= [], $code, $title) if $title;
@@ -653,7 +659,7 @@ foreach my $in (sort { $In{$a} <=> $In{$b} } keys %In) {
#
# The mapping from General Category long forms to short forms is
# currently hardwired here since no simple data file in the UCD
-# seems to do that.
+# seems to do that. Unicode 3.2 will assumedly correct this.
#
my %Is = (