summaryrefslogtreecommitdiff
path: root/ext/Unicode
diff options
context:
space:
mode:
authorSADAHIRO Tomoyuki <BQW10602@nifty.com>2002-04-29 21:58:01 +0900
committerJarkko Hietaniemi <jhi@iki.fi>2002-04-29 12:12:37 +0000
commit48287974ec38b5e5a0fdeb938d98a7826686f752 (patch)
treebc6e44b3719316ba35d02155bc3f0a9481df71f5 /ext/Unicode
parentf027f50205c813d86c890c5f2f60eb2d68bf2fad (diff)
downloadperl-48287974ec38b5e5a0fdeb938d98a7826686f752.tar.gz
[Unicode::Normalize] mkheader tweak
Message-Id: <20020429125617.AA35.BQW10602@nifty.com> p4raw-id: //depot/perl@16262
Diffstat (limited to 'ext/Unicode')
-rw-r--r--ext/Unicode/Normalize/mkheader126
1 files changed, 65 insertions, 61 deletions
diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader
index 339f86643d..8dc47a38f8 100644
--- a/ext/Unicode/Normalize/mkheader
+++ b/ext/Unicode/Normalize/mkheader
@@ -2,7 +2,7 @@
#
# This script generates "unfcan.h", "unfcpt.h", "unfcmb.h",
# "unfcmp.h", and "unfexc.h"
-# from CombiningClass.pl, Decomposition.pl, CompExcl.txt
+# from CombiningClass.pl, Decomposition.pl, CompositionExclusions.txt
# in lib/unicore or unicode directory
# for Unicode::Normalize.xs. (cf. Makefile.PL)
#
@@ -24,13 +24,19 @@ our $Decomp = do "unicore/Decomposition.pl"
|| do "unicode/Decomposition.pl"
|| croak "$PACKAGE: Decomposition.pl not found";
-our %Combin; # $codepoint => $number : combination class
-our %Canon; # $codepoint => $hexstring : canonical decomp.
-our %Compat; # $codepoint => $hexstring : compat. decomp.
-our %Compos; # $1st,$2nd => $codepoint : composite
-our %Exclus; # $codepoint => 1 : composition exclusions
-our %Single; # $codepoint => 1 : singletons
-our %NonStD; # $codepoint => 1 : non-starter decompositions
+our %Combin; # $codepoint => $number : combination class
+our %Canon; # $codepoint => $hexstring : canonical decomp.
+our %Compat; # $codepoint => $hexstring : compat. decomp.
+our %Exclus; # $codepoint => 1 : composition exclusions
+our %Single; # $codepoint => 1 : singletons
+our %NonStD; # $codepoint => 1 : non-starter decompositions
+
+our %Comp1st; # $codepoint => $listname : may be composed with a next char.
+our %Comp2nd; # $codepoint => 1 : may be composed with a prev char.
+our %CompList; # $listname,$2nd => $codepoint : composite
+
+our $prefix = "UNF_";
+our $structname = "${prefix}complist";
{
my($f, $fh);
@@ -42,15 +48,22 @@ our %NonStD; # $codepoint => 1 : non-starter decompositions
last if open($fh, $f);
$f = undef;
}
- croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f;
- while (<$fh>) {
- next if /^#/ or /^$/;
- s/#.*//;
- $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/;
- }
+ croak "$PACKAGE: neither unicore/CompositionExclusions.txt "
+ . "nor unicode/CompExcl.txt is found in @INC" unless defined $f;
+
+ while (<$fh>) {
+ next if /^#/ or /^$/;
+ s/#.*//;
+ $Exclus{ hex($1) } = 1 if /([0-9A-Fa-f]+)/;
+ }
close $fh;
}
+##
+## converts string "hhhh hhhh hhhh" to a numeric list
+##
+sub _getHexArray { map hex, $_[0] =~ /([0-9A-Fa-f]+)/g }
+
while ($Combin =~ /(.+)/g) {
my @tab = split /\t/, $1;
my $ini = hex $tab[0];
@@ -66,17 +79,24 @@ while ($Decomp =~ /(.+)/g) {
my $compat = $tab[2] =~ s/<[^>]+>//;
my $dec = [ _getHexArray($tab[2]) ]; # decomposition
my $ini = hex($tab[0]); # initial decomposable character
+
+ my $listname =
+ @$dec == 2 ? sprintf("${structname}_%06x", $dec->[0]) : 'USELESS';
+ # %04x is bad since it'd place _3046 after _1d157.
+
if ($tab[1] eq '') {
$Compat{ $ini } = $dec;
if (! $compat) {
- $Canon{ $ini } = $dec;
+ $Canon{ $ini } = $dec;
if (@$dec == 2) {
if ($Combin{ $dec->[0] }) {
$NonStD{ $ini } = 1;
} else {
- $Compos{ $dec->[0] }{ $dec->[1] } = $ini;
+ $CompList{ $listname }{ $dec->[1] } = $ini;
+ $Comp1st{ $dec->[0] } = $listname;
+ $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$ini};
}
} elsif (@$dec == 1) {
$Single{ $ini } = 1;
@@ -85,16 +105,19 @@ while ($Decomp =~ /(.+)/g) {
}
}
} else {
- foreach my $u ($ini .. hex($tab[1])){
+ foreach my $u ($ini .. hex($tab[1])) {
$Compat{ $u } = $dec;
+
if (! $compat) {
- $Canon{ $u } = $dec;
+ $Canon{ $u } = $dec;
if (@$dec == 2) {
if ($Combin{ $dec->[0] }) {
$NonStD{ $u } = 1;
} else {
- $Compos{ $dec->[0] }{ $dec->[1] } = $u;
+ $CompList{ $listname }{ $dec->[1] } = $u;
+ $Comp1st{ $dec->[0] } = $listname;
+ $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u};
}
} elsif (@$dec == 1) {
$Single{ $u } = 1;
@@ -106,14 +129,9 @@ while ($Decomp =~ /(.+)/g) {
}
}
-# exhaustive decomposition
-foreach my $key (keys %Canon) {
- $Canon{$key} = [ getCanonList($key) ];
-}
-
-# exhaustive decomposition
-foreach my $key (keys %Compat) {
- $Compat{$key} = [ getCompatList($key) ];
+# modern HANGUL JUNGSEONG and HANGUL JONGSEONG jamo
+foreach my $j (0x1161..0x1175, 0x11A8..0x11C2) {
+ $Comp2nd{$j} = 1;
}
sub getCanonList {
@@ -130,7 +148,15 @@ sub getCompatList {
# condition @src == @dec is not ok.
}
-sub _getHexArray { map hex, $_[0] =~ /([0-9A-Fa-f]+)/g }
+# exhaustive decomposition
+foreach my $key (keys %Canon) {
+ $Canon{$key} = [ getCanonList($key) ];
+}
+
+# exhaustive decomposition
+foreach my $key (keys %Compat) {
+ $Compat{$key} = [ getCompatList($key) ];
+}
sub _U_stringify {
sprintf '"%s"', join '',
@@ -145,38 +171,6 @@ foreach my $hash (\%Canon, \%Compat) {
}
}
-my $prefix = "UNF_";
-my $structname = "${prefix}complist";
-
-our (%Comp1st, %Comp2nd, %CompList);
-
-foreach my $c1 (keys %Compos) {
- my $name = sprintf "${structname}_%06x", $c1;
- $Comp1st{$c1} = $name;
-
- foreach my $c2 (keys %{ $Compos{$c1} }) {
- my $composite = $Compos{$c1}{$c2};
- $Comp2nd{$c2} = 1 if ! $Exclus{$composite} && ! $Combin{$c1};
- $CompList{$name}{$c2} = $composite;
- }
-}
-
-# modern HANGUL JUNGSEONG and HANGUL JONGSEONG jamo
-foreach my $j (0x1161..0x1175, 0x11A8..0x11C2) {
- $Comp2nd{$j} = 1;
-}
-
-my $compinit =
- "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
-
-foreach my $i (sort keys %CompList) {
- $compinit .= "$structname $i [] = {\n";
- $compinit .= join ",\n",
- map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
- sort {$a <=> $b } keys %{ $CompList{$i} };
- $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
-}
-
####################################
my @boolfunc = (
@@ -238,9 +232,19 @@ foreach my $tbl (@boolfunc) {
close FH;
-
####################################
+my $compinit =
+ "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
+
+foreach my $i (sort keys %CompList) {
+ $compinit .= "$structname $i [] = {\n";
+ $compinit .= join ",\n",
+ map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
+ sort {$a <=> $b } keys %{ $CompList{$i} };
+ $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
+}
+
my @tripletable = (
{
file => "unfcmb",