diff options
author | Peng Wu <alexepico@gmail.com> | 2019-02-19 15:05:05 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2019-04-09 15:40:07 +0800 |
commit | 053c4878ef5572a5f88f0ce48b75a8ab86f035a1 (patch) | |
tree | 7ce1091d0493779df1344fc66d6aed53f8705f7a /tools | |
parent | 6b92a6dd844a877e69f24bcab3a6934fe7d6f393 (diff) | |
download | pango-053c4878ef5572a5f88f0ce48b75a8ab86f035a1.tar.gz |
import gen-break-table.py
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/gen-break-table.py | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/tools/gen-break-table.py b/tools/gen-break-table.py new file mode 100755 index 00000000..2c8a7d70 --- /dev/null +++ b/tools/gen-break-table.py @@ -0,0 +1,156 @@ +#!/usr/bin/python + +from __future__ import print_function, division, absolute_import +import sys +import os.path +from collections import OrderedDict + + +header = [] +ranges = OrderedDict() + +def load_data(filename): + global header, ranges + f = open(filename) + lines = f.readlines() + for line in lines: + header.append(line) + if not line.startswith("#"): + break + + for line in lines: + line = line.strip() + if not line or line[0] == '#': + continue + rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]] + + rang = [int(s, 16) for s in rang.split('..')] + if len(rang) > 1: + start, end = rang + else: + start = end = rang[0] + + if typ not in ranges: + ranges[typ] = [] + if ranges[typ] and ranges[typ][-1][1] == start - 1: + ranges[typ][-1] = (ranges[typ][-1][0], end) + else: + ranges[typ].append((start, end)) + + +def onecondition(start, end): + condition = '' + if start == end: + condition = 'wc == 0x' + format(start, '04X') + elif start < end: + condition = '(' + 'wc >= 0x' + format(start, '04X') + ' && ' + 'wc <= 0x' + format(end, '04X') + ')' + return condition + + +# print out the numbers in compact form +def print_if_branch(ranges): + conditions = [] + for start, end in ranges: + condition = onecondition(start, end) + conditions.append(condition) + statement = "||\n".join(conditions) + print("if (%s)" % statement) + print("\treturn TRUE;") + print("return FALSE;") + + +def print_one_line(start, end): + if start < end: + outline = 'if (' + onecondition(start, end) + ')' + print(outline) + +def print_ranges(ranges): + if 4 >= len(ranges): + conditions = [] + for start, end in ranges: + conditions.append(onecondition(start, end)) + + statement = " ||\n".join(conditions) + print('if (' + statement + ')') + print('\treturn TRUE;') + return + + start = ranges[0][0] + end = ranges[-1][1] + print_one_line(start, end) + print('{') + print_balanced_search(ranges) + print('}') + + +# print if branch like 4-way balanced search +def print_balanced_search(ranges): + if 4 >= len(ranges): + print_ranges(ranges) + return + + length = len(ranges) + step = int(length / 4) + first = step + second = int(length * 2 / 4) + third = second + step + + newranges = ranges[0:first] + print_ranges(newranges) + + newranges = ranges[first:second] + print_ranges(newranges) + + newranges = ranges[second:third] + print_ranges(newranges) + + newranges = ranges[third:] + print_ranges(newranges) + + print("return FALSE;") + + +def print_table(): + global header, ranges + print("/* == Start of generated table == */") + print("/*") + print(" * The following tables are generated by running:") + print(" *") + print(" * ./gen-break-table.py SentenceBreakProperty.txt IndicSyllabicCategory.txt | indent") + print(" *") + print(" * on files with these headers:") + print(" *") + for l in header: + print(" * %s" % (l.strip())) + print(" */") + print() + print("#ifndef PANGO_BREAK_TABLE_H") + print("#define PANGO_BREAK_TABLE_H") + print() + print("#include <glib.h>") + print() + + for typ,s in ranges.items(): + if typ not in ['STerm', + 'Virama', + 'Vowel_Dependent']: continue + print() + print("static inline gboolean _pango_is_%s (gunichar wc)" % typ) + print("{") + print_balanced_search(sorted(s)) + print("}") + + print() + print("#endif /* PANGO_BREAK_TABLE_H */") + print() + print("/* == End of generated table == */") + + +if __name__ == "__main__": + if len (sys.argv) != 3: + print("usage: ./gen-break-table.py SentenceBreakProperty.txt IndicSyllabicCategory.txt | indent", file=sys.stderr) + sys.exit (1) + + load_data(sys.argv[1]) + load_data(sys.argv[2]) + print_table() |