diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-09-29 04:57:42 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-09-29 04:57:42 +0000 |
commit | d9efae67d76cc4acd8980b711b5bebc7142b5319 (patch) | |
tree | 85511ac1926809c78e0399fa9cde4033552336cb /lib/unicore/mktables.PL | |
parent | e8c9ad1b2aea45573ad656f23dcb17204fe59851 (diff) | |
download | perl-d9efae67d76cc4acd8980b711b5bebc7142b5319.tar.gz |
Allow for more flexibility in the \p{In...} names, now
case doesn't matter, and any space or dash can be
matched by any space, dash, underbar, or empty.
(may be going too far on leniency)
p4raw-id: //depot/perl@12264
Diffstat (limited to 'lib/unicore/mktables.PL')
-rwxr-xr-x | lib/unicore/mktables.PL | 88 |
1 files changed, 64 insertions, 24 deletions
diff --git a/lib/unicore/mktables.PL b/lib/unicore/mktables.PL index f86ff696d1..642c66fc72 100755 --- a/lib/unicore/mktables.PL +++ b/lib/unicore/mktables.PL @@ -231,7 +231,8 @@ mkdir "To", 0755; # This is not written for speed... -my %InId; +my %InIdScript; +my %InIdBlock; my $InId = 0; foreach $file (@todo) { @@ -258,9 +259,6 @@ END close OUT; } -# Do Scripts before Blocks so that in case of naming conflicts -# the more natural one (Script) wins over the artificial one (Block). - print "Scripts\n"; open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n"; open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n"; @@ -281,13 +279,11 @@ while (<UD>) { chomp; ($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+;\s+(.+)\s+\#/i; if ($name) { - my $InName = lc($name); - $InName =~ s/\b(\w)/uc($1)/ge; - $InName =~ s/\W+//g; + my $InName = $name; my $id; - unless (exists $InId{$InName}) { + unless (exists $InIdScript{$InName}) { print "\t$InName\n"; - $id = $Scripts{$InName} = $InId{$InName} = $InId++; + $id = $Scripts{$InName} = $InIdScript{$InName} = $InId++; open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n"; print SCRIPT <<EOH; # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! @@ -297,10 +293,10 @@ return <<'END'; EOH close(SCRIPT); } else { - $id = $InId{$InName}; + $id = $InIdScript{$InName}; } $last = "" unless defined $last; - print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n"; + print OUT "$code\t$last\t$name\t# In/$id.pl\n"; open(SCRIPT, ">>In/$id.pl"); print SCRIPT <<END; $code $last @@ -309,7 +305,7 @@ END } } -for my $id (values %InId) { +for my $id (values %InIdScript) { open(SCRIPT, ">>In/$id.pl"); print SCRIPT <<END2; END @@ -339,22 +335,18 @@ while (<UD>) { next if /^#/; next if /^$/; chomp; - ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i; + ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+?)\s*$/i; if ($name) { my $InName = $name; - $InName =~ s/\W+//g; print "\t$InName\n"; my $id; # TODO: only the first one of Private Use blocks qualifies - unless (exists $InId{$InName}) { - $InId{$InName} = $InId++; - } elsif (exists $Scripts{$InName}) { - $InName .= 'Block'; - $InId{$InName} = $InId++; + unless (exists $InIdBlock{$InName}) { + $InIdBlock{$InName} = $InId++; } - $id = $InId{$InName}; + $id = $InIdBlock{$InName}; open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n"; - print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n"; + print OUT "$code\t$last\t$name\t# In/$id.pl\n"; print BLOCK <<EOH; # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! # This file is built by $0 from e.g. $UnicodeData. @@ -381,9 +373,57 @@ print INID <<EOH; %utf8::In = ( EOH -# Order doesn't matter but let's prettyprint anyway. -foreach my $in (sort { $InId{$a} <=> $InId{$b} } keys %InId) { - printf INID "%-40s => %3d,\n", "'$in'", $InId{$in}; +my %InIdScriptById = reverse %InIdScript; +my %InIdBlockById = reverse %InIdBlock; + +my @InIdScriptById = sort { $a <=> $b } keys %InIdScriptById; +my @InIdBlockById = sort { $a <=> $b } keys %InIdBlockById; + +my %InId; +my %IdIdLcName; + +for my $id (@InIdScriptById) { + my $name = $InIdScriptById{$id}; + my $lcname = lc($name); + $InId{$name} = $id; + $IdIdLcName{$lcname} = $id; +} + +for my $id (@InIdBlockById) { + my $name = $InIdBlockById{$id}; + my $lcname = lc($name); + if (exists $IdIdLcName{$lcname}) { + $InId{"$name Block"} = $id; + } else { + $InId{$name} = $id; + } + $IdIdLcName{$lcname} = $id; +} + +my @InId = sort { $InId{$a} <=> $InId{$b} } keys %InId; + +my %InIdPrefix; + +foreach my $in (@InId) { + my $inpat = $in; + $inpat =~ s/([- ])/[- _]?/g; + push @{$InIdPrefix{lc(substr($in, 0, 3))}}, [ $in, $inpat ]; + printf INID "%-45s => %3d,\n", "'$in'", $InId{$in}; +} + +print INID ");\n"; + +print INID <<EOH; +%utf8::InPat = ( +EOH + +foreach my $prefix (sort keys %InIdPrefix) { + printf INID "'$prefix' => {\n"; + foreach my $ininpat (@{$InIdPrefix{$prefix}}) { + my ($in, $inpat) = @$ininpat; + printf INID "\t'$inpat' => '$in',\n"; + } + printf INID "},\n"; } print INID ");\n"; |