summaryrefslogtreecommitdiff
path: root/lib/unicore/mktables.PL
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-09-29 04:57:42 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-09-29 04:57:42 +0000
commitd9efae67d76cc4acd8980b711b5bebc7142b5319 (patch)
tree85511ac1926809c78e0399fa9cde4033552336cb /lib/unicore/mktables.PL
parente8c9ad1b2aea45573ad656f23dcb17204fe59851 (diff)
downloadperl-d9efae67d76cc4acd8980b711b5bebc7142b5319.tar.gz
Allow for more flexibility in the \p{In...} names, now
case doesn't matter, and any space or dash can be matched by any space, dash, underbar, or empty. (may be going too far on leniency) p4raw-id: //depot/perl@12264
Diffstat (limited to 'lib/unicore/mktables.PL')
-rwxr-xr-xlib/unicore/mktables.PL88
1 files changed, 64 insertions, 24 deletions
diff --git a/lib/unicore/mktables.PL b/lib/unicore/mktables.PL
index f86ff696d1..642c66fc72 100755
--- a/lib/unicore/mktables.PL
+++ b/lib/unicore/mktables.PL
@@ -231,7 +231,8 @@ mkdir "To", 0755;
# This is not written for speed...
-my %InId;
+my %InIdScript;
+my %InIdBlock;
my $InId = 0;
foreach $file (@todo) {
@@ -258,9 +259,6 @@ END
close OUT;
}
-# Do Scripts before Blocks so that in case of naming conflicts
-# the more natural one (Script) wins over the artificial one (Block).
-
print "Scripts\n";
open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n";
open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n";
@@ -281,13 +279,11 @@ while (<UD>) {
chomp;
($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+;\s+(.+)\s+\#/i;
if ($name) {
- my $InName = lc($name);
- $InName =~ s/\b(\w)/uc($1)/ge;
- $InName =~ s/\W+//g;
+ my $InName = $name;
my $id;
- unless (exists $InId{$InName}) {
+ unless (exists $InIdScript{$InName}) {
print "\t$InName\n";
- $id = $Scripts{$InName} = $InId{$InName} = $InId++;
+ $id = $Scripts{$InName} = $InIdScript{$InName} = $InId++;
open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n";
print SCRIPT <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
@@ -297,10 +293,10 @@ return <<'END';
EOH
close(SCRIPT);
} else {
- $id = $InId{$InName};
+ $id = $InIdScript{$InName};
}
$last = "" unless defined $last;
- print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n";
+ print OUT "$code\t$last\t$name\t# In/$id.pl\n";
open(SCRIPT, ">>In/$id.pl");
print SCRIPT <<END;
$code $last
@@ -309,7 +305,7 @@ END
}
}
-for my $id (values %InId) {
+for my $id (values %InIdScript) {
open(SCRIPT, ">>In/$id.pl");
print SCRIPT <<END2;
END
@@ -339,22 +335,18 @@ while (<UD>) {
next if /^#/;
next if /^$/;
chomp;
- ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i;
+ ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+?)\s*$/i;
if ($name) {
my $InName = $name;
- $InName =~ s/\W+//g;
print "\t$InName\n";
my $id;
# TODO: only the first one of Private Use blocks qualifies
- unless (exists $InId{$InName}) {
- $InId{$InName} = $InId++;
- } elsif (exists $Scripts{$InName}) {
- $InName .= 'Block';
- $InId{$InName} = $InId++;
+ unless (exists $InIdBlock{$InName}) {
+ $InIdBlock{$InName} = $InId++;
}
- $id = $InId{$InName};
+ $id = $InIdBlock{$InName};
open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n";
- print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n";
+ print OUT "$code\t$last\t$name\t# In/$id.pl\n";
print BLOCK <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by $0 from e.g. $UnicodeData.
@@ -381,9 +373,57 @@ print INID <<EOH;
%utf8::In = (
EOH
-# Order doesn't matter but let's prettyprint anyway.
-foreach my $in (sort { $InId{$a} <=> $InId{$b} } keys %InId) {
- printf INID "%-40s => %3d,\n", "'$in'", $InId{$in};
+my %InIdScriptById = reverse %InIdScript;
+my %InIdBlockById = reverse %InIdBlock;
+
+my @InIdScriptById = sort { $a <=> $b } keys %InIdScriptById;
+my @InIdBlockById = sort { $a <=> $b } keys %InIdBlockById;
+
+my %InId;
+my %IdIdLcName;
+
+for my $id (@InIdScriptById) {
+ my $name = $InIdScriptById{$id};
+ my $lcname = lc($name);
+ $InId{$name} = $id;
+ $IdIdLcName{$lcname} = $id;
+}
+
+for my $id (@InIdBlockById) {
+ my $name = $InIdBlockById{$id};
+ my $lcname = lc($name);
+ if (exists $IdIdLcName{$lcname}) {
+ $InId{"$name Block"} = $id;
+ } else {
+ $InId{$name} = $id;
+ }
+ $IdIdLcName{$lcname} = $id;
+}
+
+my @InId = sort { $InId{$a} <=> $InId{$b} } keys %InId;
+
+my %InIdPrefix;
+
+foreach my $in (@InId) {
+ my $inpat = $in;
+ $inpat =~ s/([- ])/[- _]?/g;
+ push @{$InIdPrefix{lc(substr($in, 0, 3))}}, [ $in, $inpat ];
+ printf INID "%-45s => %3d,\n", "'$in'", $InId{$in};
+}
+
+print INID ");\n";
+
+print INID <<EOH;
+%utf8::InPat = (
+EOH
+
+foreach my $prefix (sort keys %InIdPrefix) {
+ printf INID "'$prefix' => {\n";
+ foreach my $ininpat (@{$InIdPrefix{$prefix}}) {
+ my ($in, $inpat) = @$ininpat;
+ printf INID "\t'$inpat' => '$in',\n";
+ }
+ printf INID "},\n";
}
print INID ");\n";