diff options
Diffstat (limited to 'cpan/Unicode-Collate/mkheader')
-rw-r--r-- | cpan/Unicode-Collate/mkheader | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/cpan/Unicode-Collate/mkheader b/cpan/Unicode-Collate/mkheader new file mode 100644 index 0000000000..dde4ee110c --- /dev/null +++ b/cpan/Unicode-Collate/mkheader @@ -0,0 +1,196 @@ +#!perl +# +# This auxiliary script makes five header files +# used for building XSUB of Unicode::Collate. +# +# Usage: +# <do 'mkheader'> in perl, or <perl mkheader> in command line +# +# Input file: +# Collate/allkeys.txt +# +# Output file: +# ucatbl.h +# +use 5.006; +use strict; +use warnings; +use Carp; +use File::Spec; + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + die "Unicode::Collate cannot stringify a Unicode code point\n"; + } +} + +use constant TRUE => 1; +use constant FALSE => ""; +use constant VCE_TEMPLATE => 'Cn4'; + +sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g } + +our $PACKAGE = 'Unicode::Collate, mkheader'; +our $prefix = "UCA_"; + +our %SimpleEntries; # $codepoint => $keys +our @Rest; + +{ + my($f, $fh); + foreach my $d ('.') { + $f = File::Spec->catfile($d, "Collate", "allkeys.txt"); + last if open($fh, $f); + $f = undef; + } + croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f; + + while (my $line = <$fh>) { + next if $line =~ /^\s*#/; + if ($line =~ /^\s*\@/) { + push @Rest, $line; + next; + } + + next if $line !~ /^\s*[0-9A-Fa-f]/; + + $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name) + + # gets element + my($e, $k) = split /;/, $line; + + croak "Wrong Entry: <charList> must be separated by ';' ". + "from <collElement>" if ! $k; + + my @uv = _getHexArray($e); + next if !@uv; + + if (@uv != 1) { + push @Rest, $line; + next; + } + + my $is_L3_ignorable = TRUE; + + my @key; + foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed + my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient. + my @wt = _getHexArray($arr); + push @key, pack(VCE_TEMPLATE, $var, @wt); + $is_L3_ignorable = FALSE + if $wt[0] || $wt[1] || $wt[2]; + # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable + # is completely ignorable. + # For expansion, an entry $is_L3_ignorable + # if and only if "all" CEs are [.0000.0000.0000]. + } + my $mapping = $is_L3_ignorable ? [] : \@key; + my $num = @$mapping; + my $str = chr($num).join('', @$mapping); + $SimpleEntries{$uv[0]} = stringify($str); + } +} + +sub stringify { + my $str = shift; + return sprintf '"%s"', join '', + map sprintf("\\x%02x", ord $_), split //, $str; + +} + +########## writing header files ########## + +my $init = ''; +{ + my $type = "char*"; + my $head = $prefix."rest"; + + $init .= "static $type $head [] = {\n"; + for my $line (@Rest) { + $line =~ s/\s*\z//; + next if $line eq ''; + $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/; + $init .= "($type)".stringify($line).",\n"; + } + $init .= "NULL\n"; # sentinel + $init .= "};\n\n"; +} + +my @tripletable = ( + { + file => "ucatbl", + name => "simple", + type => "char*", + hash => \%SimpleEntries, + null => "NULL", + init => $init, + }, +); + +foreach my $tbl (@tripletable) { + my $file = "$tbl->{file}.h"; + my $head = "${prefix}$tbl->{name}"; + my $type = $tbl->{type}; + my $hash = $tbl->{hash}; + my $null = $tbl->{null}; + my $init = $tbl->{init}; + + open FH, ">$file" or croak "$PACKAGE: $file can't be made"; + binmode FH; select FH; + my %val; + + print FH << 'EOF'; +/* + * This file is auto-generated by mkheader. + * Any changes here will be lost! + */ +EOF + + print $init if defined $init; + + foreach my $uv (keys %$hash) { + croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv) + unless $uv <= 0x10FFFF; + my @c = unpack 'CCCC', pack 'N', $uv; + $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; + } + + foreach my $p (sort { $a <=> $b } keys %val) { + next if ! $val{ $p }; + for (my $r = 0; $r < 256; $r++) { + next if ! $val{ $p }{ $r }; + printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r; + for (my $c = 0; $c < 256; $c++) { + print "\t", defined $val{$p}{$r}{$c} + ? "($type)".$val{$p}{$r}{$c} + : $null; + print ',' if $c != 255; + print "\n" if $c % 8 == 7; + } + print "};\n\n"; + } + } + foreach my $p (sort { $a <=> $b } keys %val) { + next if ! $val{ $p }; + printf "static $type* ${head}_%02x [256] = {\n", $p; + for (my $r = 0; $r < 256; $r++) { + print $val{ $p }{ $r } + ? sprintf("${head}_%02x_%02x", $p, $r) + : "NULL"; + print ',' if $r != 255; + print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; + } + print "};\n\n"; + } + print "static $type** $head [] = {\n"; + for (my $p = 0; $p <= 0x10; $p++) { + print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; + print ',' if $p != 0x10; + print "\n"; + } + print "};\n\n"; + close FH; +} + +1; +__END__ |