summaryrefslogtreecommitdiff
path: root/tools/gen-script-table.pl
blob: 6820eb5d76e8702042eb3ffa2d9ac9a374cc7295 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/perl -w 
#
# Script to convert http://www.unicode.org/Public/UNIDATA/Scripts.txt
# into a machine-readable table.
#
######################################################################

if (@ARGV != 1) {
    die "Usage: gen-script-table.pl Scripts.txt > pango-script-table.h\n";
}

open IN, $ARGV[0] || die "Cannot open $ARGV[0]: $!\n";

my @ranges;
my $file;

while (<IN>) {
    if (/^\#\s+(Scripts-.*.txt)/) {
	$file = $1;
    }
    
    s/#.*//;
    next if /^\s*$/;
    if (!/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)\s*$/) {
	die "Cannot parse line: '$_'\n";
    }

    if (defined $2) {
	push @ranges, [ hex $1, hex $2, uc $3 ];
    } else {
	push @ranges, [ hex $1, hex $1, uc $3 ];
    }
}

$date = gmtime;

print <<"EOT";
/* pango-script-table.h: Generated by gen-script-table.pl
 *
 *  Date: $date
 *  Source: $file
 *
 * Do not edit.   
 */
static const struct {
    gunichar    start;
    guint16     chars;
    guint16     script;		/* PangoScript */
} pango_script_table[] = { 
EOT

@ranges = sort { $a->[0] <=> $b->[0] } @ranges;

for (my $i = 0; $i <= $#ranges; $i++) {
    my $start = $ranges[$i]->[0];
    my $end = $ranges[$i]->[1];
    my $script = $ranges[$i]->[2];

    while ($i <= $#ranges - 1 &&
	   $ranges[$i + 1]->[0] == $end + 1 &&
	   $ranges[$i + 1]->[2] eq $script) {
	$i++;
	$end = $ranges[$i]->[1];
    }

    if ($script ne "COMMON") {
        printf " { %#06x, %5d, PANGO_SCRIPT_%s },\n", $start, $end - $start + 1, $script;
    }
}

printf "};\n";