From f288eaafcccddde2f91606ca8f0785d9fd1b691b Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 21 Feb 2020 15:24:25 +0800 Subject: Update gen-break-table.py for Unicode 13 (cherry-picked from commit 2ac57751) --- tools/gen-break-table.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tools/gen-break-table.py b/tools/gen-break-table.py index 2c8a7d70..5d346c8c 100755 --- a/tools/gen-break-table.py +++ b/tools/gen-break-table.py @@ -9,20 +9,21 @@ from collections import OrderedDict header = [] ranges = OrderedDict() -def load_data(filename): +def load_data(filename, prefix=""): global header, ranges f = open(filename) lines = f.readlines() for line in lines: - header.append(line) if not line.startswith("#"): break + header.append(line) for line in lines: line = line.strip() if not line or line[0] == '#': continue rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]] + typ = prefix + typ rang = [int(s, 16) for s in rang.split('..')] if len(rang) > 1: @@ -87,6 +88,7 @@ def print_ranges(ranges): def print_balanced_search(ranges): if 4 >= len(ranges): print_ranges(ranges) + print("return FALSE;") return length = len(ranges) @@ -116,7 +118,7 @@ def print_table(): print("/*") print(" * The following tables are generated by running:") print(" *") - print(" * ./gen-break-table.py SentenceBreakProperty.txt IndicSyllabicCategory.txt | indent") + print(" * ./gen-break-table.py SentenceBreakProperty.txt IndicSyllabicCategory.txt EastAsianWidth.txt | indent") print(" *") print(" * on files with these headers:") print(" *") @@ -132,14 +134,22 @@ def print_table(): for typ,s in ranges.items(): if typ not in ['STerm', - 'Virama', - 'Vowel_Dependent']: continue + 'Virama', + 'Vowel_Dependent', + 'Consonant_Prefixed', + 'Consonant_Preceding_Repha']: continue print() print("static inline gboolean _pango_is_%s (gunichar wc)" % typ) print("{") - print_balanced_search(sorted(s)) + print_balanced_search(sorted(s)) print("}") + s = ranges["EastAsian_F"] + ranges["EastAsian_W"] + ranges["EastAsian_H"] + print("static inline gboolean _pango_is_EastAsianWide (gunichar wc)") + print("{") + print_balanced_search(sorted(s)) + print("}") + print() print("#endif /* PANGO_BREAK_TABLE_H */") print() @@ -147,10 +157,11 @@ def print_table(): if __name__ == "__main__": - if len (sys.argv) != 3: - print("usage: ./gen-break-table.py SentenceBreakProperty.txt IndicSyllabicCategory.txt | indent", file=sys.stderr) + if len (sys.argv) != 4: + print("usage: ./gen-break-table.py SentenceBreakProperty.txt IndicSyllabicCategory.txt EastAsianWidth.txt | indent", file=sys.stderr) sys.exit (1) load_data(sys.argv[1]) load_data(sys.argv[2]) + load_data(sys.argv[3], "EastAsian_") print_table() -- cgit v1.2.1