summaryrefslogtreecommitdiff
path: root/scripts/update-simptrad-table.py
blob: ce31fb40af9ccd36bf2c974845fc1d5e69f5695c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/python3
from ZhConversion import *
from valid_hanzi import *


def convert(s, d, n):
    out = ""
    end = len(s)
    begin = 0
    while begin < end:
        for i in range(min(n, end - begin), 0, -1):
            t = s[begin : begin + i]
            t = d.get(t, t if i == 1 else None)
            if t:
                break
        out = out + t
        begin += i
    return out


def filter_more(records, n):
    han = [(k, v) for (k, v) in records if len(k) <= 0]
    hand = dict(han)
    hanm = [(k, v) for (k, v) in records if convert(k, hand, n) != v]
    return hanm + han


def filter_func(args):
    k, v = args
    # length is not equal or length > 6
    if len(k) != len(v) or len(k) > 6:
        return False
    # k includes invalid hanzi
    if not all(c in valid_hanzi for c in k):
        return False
    # v includes invalid hanzi
    if not all(c in valid_hanzi for c in v):
        return False

    # # check chars in k and v
    # for c1, c2 in zip(k, v):
    #     if c1 == c2:
    #         continue
    #     if c2 not in S_2_T.get(c1, []):
    #         return False
    return True


def get_records():
    records = [kv for kv in zh2Hant.items() if filter_func(kv)]

    maxlen = max([len(k) for (k, v) in records])
    for i in range(1, maxlen - 1):
        records = filter_more(records, i)
    records.sort()
    return maxlen, records


def main():
    print("static const gchar *simp_to_trad[][2] = {")
    maxlen, records = get_records()
    for s, ts in records:
        print(f'    {{ "{s}", "{ts}" }},')
    print("};")
    print(f"#define SIMP_TO_TRAD_MAX_LEN ({maxlen})")


if __name__ == "__main__":
    main()