From d9efae67d76cc4acd8980b711b5bebc7142b5319 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Sat, 29 Sep 2001 04:57:42 +0000 Subject: Allow for more flexibility in the \p{In...} names, now case doesn't matter, and any space or dash can be matched by any space, dash, underbar, or empty. (may be going too far on leniency) p4raw-id: //depot/perl@12264 --- lib/unicore/mktables.PL | 88 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 24 deletions(-) (limited to 'lib/unicore/mktables.PL') diff --git a/lib/unicore/mktables.PL b/lib/unicore/mktables.PL index f86ff696d1..642c66fc72 100755 --- a/lib/unicore/mktables.PL +++ b/lib/unicore/mktables.PL @@ -231,7 +231,8 @@ mkdir "To", 0755; # This is not written for speed... -my %InId; +my %InIdScript; +my %InIdBlock; my $InId = 0; foreach $file (@todo) { @@ -258,9 +259,6 @@ END close OUT; } -# Do Scripts before Blocks so that in case of naming conflicts -# the more natural one (Script) wins over the artificial one (Block). - print "Scripts\n"; open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n"; open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n"; @@ -281,13 +279,11 @@ while () { chomp; ($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+;\s+(.+)\s+\#/i; if ($name) { - my $InName = lc($name); - $InName =~ s/\b(\w)/uc($1)/ge; - $InName =~ s/\W+//g; + my $InName = $name; my $id; - unless (exists $InId{$InName}) { + unless (exists $InIdScript{$InName}) { print "\t$InName\n"; - $id = $Scripts{$InName} = $InId{$InName} = $InId++; + $id = $Scripts{$InName} = $InIdScript{$InName} = $InId++; open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n"; print SCRIPT <>In/$id.pl"); print SCRIPT <>In/$id.pl"); print SCRIPT <) { next if /^#/; next if /^$/; chomp; - ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i; + ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+?)\s*$/i; if ($name) { my $InName = $name; - $InName =~ s/\W+//g; print "\t$InName\n"; my $id; # TODO: only the first one of Private Use blocks qualifies - unless (exists $InId{$InName}) { - $InId{$InName} = $InId++; - } elsif (exists $Scripts{$InName}) { - $InName .= 'Block'; - $InId{$InName} = $InId++; + unless (exists $InIdBlock{$InName}) { + $InIdBlock{$InName} = $InId++; } - $id = $InId{$InName}; + $id = $InIdBlock{$InName}; open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n"; - print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n"; + print OUT "$code\t$last\t$name\t# In/$id.pl\n"; print BLOCK < $InId{$b} } keys %InId) { - printf INID "%-40s => %3d,\n", "'$in'", $InId{$in}; +my %InIdScriptById = reverse %InIdScript; +my %InIdBlockById = reverse %InIdBlock; + +my @InIdScriptById = sort { $a <=> $b } keys %InIdScriptById; +my @InIdBlockById = sort { $a <=> $b } keys %InIdBlockById; + +my %InId; +my %IdIdLcName; + +for my $id (@InIdScriptById) { + my $name = $InIdScriptById{$id}; + my $lcname = lc($name); + $InId{$name} = $id; + $IdIdLcName{$lcname} = $id; +} + +for my $id (@InIdBlockById) { + my $name = $InIdBlockById{$id}; + my $lcname = lc($name); + if (exists $IdIdLcName{$lcname}) { + $InId{"$name Block"} = $id; + } else { + $InId{$name} = $id; + } + $IdIdLcName{$lcname} = $id; +} + +my @InId = sort { $InId{$a} <=> $InId{$b} } keys %InId; + +my %InIdPrefix; + +foreach my $in (@InId) { + my $inpat = $in; + $inpat =~ s/([- ])/[- _]?/g; + push @{$InIdPrefix{lc(substr($in, 0, 3))}}, [ $in, $inpat ]; + printf INID "%-45s => %3d,\n", "'$in'", $InId{$in}; +} + +print INID ");\n"; + +print INID < {\n"; + foreach my $ininpat (@{$InIdPrefix{$prefix}}) { + my ($in, $inpat) = @$ininpat; + printf INID "\t'$inpat' => '$in',\n"; + } + printf INID "},\n"; } print INID ");\n"; -- cgit v1.2.1