summaryrefslogtreecommitdiff
path: root/tools/compress-table.pl
blob: 5512965147ac43cbadda161fd98796a63b238cb6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/perl -w

sub convert {
    $s = shift;
    $s = "ENC_".uc($s);
    $s =~ s/-/_/g;
    return $s;
}

$combo_index = 1;

sub add {
    my $combo = shift;
    if (!exists($combos{$combo})) {
	$combos{$combo} = $combo_index++;
	printf "  $combo,\n", $combos{$combo};
    }
}

my $col = 0;

sub output {
    my ($start,$u,$index) = @_;
    
    for (my $i = $start; $i < $u; $i++) {
	print "  0,";
	$col = ($col + 1) % 16;
	if ($col == 0) {
	    print "\n";
	}
    }
    printf " %2d,", $index;
    $col = ($col + 1) % 16;
    if ($col == 0) {
	print "\n";
    }
}

#
# Read in the maps
#
my @codepoints = ();

opendir (MAPS, "maps") || die "Cannot open maps/ subdirectory: $!\n";
while (defined (my $map = readdir (MAPS))) {
    next if ($map =~ /^\./);
    next if ($map =~ /~$/);
    next if ($map =~ /^CVS|README$/);

    open (MAP, "maps/$map") || die "Cannot open map '$map:!\n";

    $encoding = convert($map);
    while (<MAP>) {
	s/\s*#.*//;
	s/\s*$//;
	next if /^$/;
	if (!/^\s*(0x[A-Fa-f0-9]+)\s+(0x[A-Fa-f0-9]+)$/) {
	    die "Cannot parse line '%s' in map '$map'\n";
	}
	push @codepoints, [hex($2), $encoding];
    }
    close (MAP);
}

# 
# And sort them
#
@codepoints = sort { $a->[0] <=> $b->[0] } @codepoints;

print "const guint32 char_mask_map[] = {\n  0,\n";

$encodings = "";

for $cp (@codepoints) {
    $u = $cp->[0]; $e = $cp->[1];

    if (!defined $old_u) {
	$old_u = $u;
	$encodings = $e;
    } elsif ($old_u ne $u) {
	add($encodings);
	$old_u = $u;
	$encodings = $e;
    } else {
	$encodings .= "|".$e;
    }
}

if (defined $old_u) {
    add($encodings);
} 

print <<EOF;
};

const guchar char_masks[] = {
EOF

$encodings = "";

undef $old_u;
$start = 0;
for $cp (@codepoints) {
    $u = $cp->[0]; $e = $cp->[1];

    if (!defined $old_u) {
	$old_u = $u;
	$encodings = $e;
    } elsif ($old_u ne $u) {
	output($start, $old_u, $combos{$encodings});
	$start = $old_u + 1;
	$old_u = $u;
	$encodings = $e;
    } else {
	$encodings .= "|".$e;
    }
}

if (defined $old_u) {
    output($start, $old_u, $combos{$encodings});
} 

print "\n};\n";