diff options
author | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-02-28 18:18:01 +0000 |
---|---|---|
committer | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-02-28 18:18:01 +0000 |
commit | f39fb8cc9d88ca007aab35d5a7373417b639dd74 (patch) | |
tree | 4d5de0c9837989c46c54a897104a346f6fda2828 /ext/Encode/compile | |
parent | f970e606dd5249e7f15150b8baa7b8abd0904e24 (diff) | |
download | perl-f39fb8cc9d88ca007aab35d5a7373417b639dd74.tar.gz |
Add charname comments to .ucm files writen by 'compile',
also order charmaps in those files by source encoding.
regenerate distrubuted .ucm files
p4raw-id: //depot/perlio@8969
Diffstat (limited to 'ext/Encode/compile')
-rwxr-xr-x | ext/Encode/compile | 40 |
1 files changed, 33 insertions, 7 deletions
diff --git a/ext/Encode/compile b/ext/Encode/compile index a688c23962..5e3e645502 100755 --- a/ext/Encode/compile +++ b/ext/Encode/compile @@ -542,9 +542,28 @@ sub decode_U my $s = shift; } +my @uname; +sub char_names +{ + my $s = do "unicode/Name.pl"; + pos($s) = 0; + while ($s =~ /\G([0-9a-f]+)\t([0-9a-f]*)\t(.*?)\s*\n/igc) + { + my $name = $3; + my $s = hex($1); + last if $s >= 0x10000; + my $e = length($2) ? hex($2) : $s; + for (my $i = $s; $i <= $e; $i++) + { + $uname[$i] = $name; +# print sprintf("U%04X $name\n",$i); + } + } +} + sub output_ucm_page { - my ($fh,$a,$t,$pre) = @_; + my ($cmap,$a,$t,$pre) = @_; # warn sprintf("Page %x\n",$pre); foreach my $b (sort keys %$t) { @@ -553,7 +572,7 @@ sub output_ucm_page my $u = ord($s); if ($n != $a && $n != $t) { - output_ucm_page($fh,$a,$n,(($pre|($u &0x3F)) << 6)&0xFFFF); + output_ucm_page($cmap,$a,$n,(($pre|($u &0x3F)) << 6)&0xFFFF); } elsif (length($out)) { @@ -561,12 +580,13 @@ sub output_ucm_page { $u = $pre|($u &0x3f); } - printf $fh "<U%04X> ",$u; + my $s = sprintf "<U%04X> ",$u; foreach my $c (split(//,$out)) { - printf $fh "\\x%02X",ord($c); + $s .= sprintf "\\x%02X",ord($c); } - printf $fh " |%d\n",($fb ? 1 : 0); + $s .= sprintf " |%d # %s\n",($fb ? 1 : 0),$uname[$u]; + push(@$cmap,$s); } else { @@ -577,9 +597,10 @@ sub output_ucm_page sub output_ucm { - my ($fh,$name,$a,$rep,$min_el,$max_el) = @_; + my ($fh,$name,$h,$rep,$min_el,$max_el) = @_; print $fh "# Written $perforce\n# $0 @orig_ARGV\n" unless $opt{'q'}; print $fh "<code_set_name> \"$name\"\n"; + char_names(); if (defined $min_el) { print $fh "<mb_cur_min> $min_el\n"; @@ -597,8 +618,13 @@ sub output_ucm } print $fh "\n"; } + my @cmap; + output_ucm_page(\@cmap,$h,$h,0); print $fh "#\nCHARMAP\n"; - output_ucm_page($fh,$a,$a,0); + foreach my $line (sort { substr($a,8) cmp substr($b,8) } @cmap) + { + print $fh $line; + } print $fh "END CHARMAP\n"; } |