#!/usr/bin/perl -w # # Script to convert http://www.unicode.org/Public/UNIDATA/Scripts.txt # into a machine-readable table. # ###################################################################### if (@ARGV != 1) { die "Usage: gen-script-table.pl Scripts.txt > pango-script-table.h\n"; } open IN, $ARGV[0] || die "Cannot open $ARGV[0]: $!\n"; my @ranges; my $file; while () { if (/^\#\s+(Scripts-.*.txt)/) { $file = $1; } s/#.*//; next if /^\s*$/; if (!/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+([A-Z_]+)\s+$/) { die "Cannot parse line: $_\n"; } if (defined $2) { push @ranges, [ hex $1, hex $2, $3 ]; } else { push @ranges, [ hex $1, hex $1, $3 ]; } } $date = gmtime; print <<"EOT"; /* pango-script-table.h: Generated by gen-script-table.pl * * Date: $date * Source: $file * * Do not edit. */ static const struct { gunichar start; guint16 chars; guint16 script; /* PangoScript */ } pango_script_table[] = { EOT @ranges = sort { $a->[0] <=> $b->[0] } @ranges; for (my $i = 0; $i <= $#ranges; $i++) { my $start = $ranges[$i]->[0]; my $end = $ranges[$i]->[1]; my $script = $ranges[$i]->[2]; while ($i <= $#ranges - 1 && $ranges[$i + 1]->[0] == $end + 1 && $ranges[$i + 1]->[2] eq $script) { $i++; $end = $ranges[$i]->[1]; } printf " { %#06x, %5d, PANGO_SCRIPT_%s },\n", $start, $end - $start + 1, $script; } printf "};\n";