diff options
Diffstat (limited to 'trunk/tools/gen-script-table.pl')
-rwxr-xr-x | trunk/tools/gen-script-table.pl | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/trunk/tools/gen-script-table.pl b/trunk/tools/gen-script-table.pl new file mode 100755 index 00000000..f3af7ce2 --- /dev/null +++ b/trunk/tools/gen-script-table.pl @@ -0,0 +1,117 @@ +#!/usr/bin/perl -w +# +# Script to convert http://www.unicode.org/Public/UNIDATA/Scripts.txt +# into a machine-readable table. +# +###################################################################### + +if (@ARGV != 1) { + die "Usage: gen-script-table.pl Scripts.txt > pango-script-table.h\n"; +} + +open IN, $ARGV[0] || die "Cannot open $ARGV[0]: $!\n"; + +my @ranges; +my $file; +my $easy_range; +my $i; +my $start; +my $end; +my $script; + + +while (<IN>) { + if (/^\#\s+(Scripts-.*.txt)/) { + $file = $1; + } + + s/#.*//; + next if /^\s*$/; + if (!/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)\s*$/) { + die "Cannot parse line: '$_'\n"; + } + + if (defined $2) { + push @ranges, [ hex $1, hex $2, uc $3 ]; + } else { + push @ranges, [ hex $1, hex $1, uc $3 ]; + } +} + +@ranges = sort { $a->[0] <=> $b->[0] } @ranges; +$date = gmtime; + +print <<"EOT"; +/* pango-script-table.h: Generated by gen-script-table.pl + * + * Date: $date + * Source: $file + * + * Do not edit. + */ + +EOT + +$easy_range = 0x2000; + +print <<"EOT"; +static const guchar pango_script_easy_table[$easy_range] = { +EOT + +$i = 0; +$end = -1; + +for (my $c = 0; $c < $easy_range; $c++) { + + if ($c % 3 == 0) { + printf "\n "; + } + + if ($c > $end) { + $start = $ranges[$i]->[0]; + $end = $ranges[$i]->[1]; + $script = $ranges[$i]->[2]; + $i++; + } + + if ($c < $start) { + printf " PANGO_SCRIPT_UNKNOWN,"; + } else { + printf " PANGO_SCRIPT_%s,", $script; + } +} + +if ($end >= $easy_range) { + $i--; + $ranges[$i]->[0] = $easy_range; +} + + +print <<"EOT"; + +}; + +static const struct { + gunichar start; + guint16 chars; + guint16 script; +} pango_script_table[] = { +EOT + +for (; $i <= $#ranges; $i++) { + $start = $ranges[$i]->[0]; + $end = $ranges[$i]->[1]; + $script = $ranges[$i]->[2]; + + while ($i <= $#ranges - 1 && + $ranges[$i + 1]->[0] == $end + 1 && + $ranges[$i + 1]->[2] eq $script) { + $i++; + $end = $ranges[$i]->[1]; + } + + printf " { %#06x, %5d, PANGO_SCRIPT_%s },\n", $start, $end - $start + 1, $script; +} + +printf "};\n"; + |