diff options
author | Tim-Philipp Müller <tim@centricular.com> | 2020-07-31 07:26:11 +0000 |
---|---|---|
committer | Akira TAGOH <akira@tagoh.org> | 2020-07-31 07:26:11 +0000 |
commit | 57a224f51d6c019e4ce5d75efb22f34a8330423e (patch) | |
tree | e3d7acfe511c07650db57c485c6dcf134e2c78a5 /fc-case | |
parent | 03aa12c75e117acb0d160212536f6f832e0dc8d9 (diff) | |
download | fontconfig-57a224f51d6c019e4ce5d75efb22f34a8330423e.tar.gz |
Add Meson build system
See https://mesonbuild.com
Diffstat (limited to 'fc-case')
-rwxr-xr-x | fc-case/fc-case.py | 240 | ||||
-rw-r--r-- | fc-case/meson.build | 4 |
2 files changed, 244 insertions, 0 deletions
diff --git a/fc-case/fc-case.py b/fc-case/fc-case.py new file mode 100755 index 0000000..360bd32 --- /dev/null +++ b/fc-case/fc-case.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# +# fontconfig/fc-case/fc-case.py +# +# Copyright © 2004 Keith Packard +# Copyright © 2019 Tim-Philipp Müller +# +# Permission to use, copy, modify, distribute, and sell this software and its +# documentation for any purpose is hereby granted without fee, provided that +# the above copyright notice appear in all copies and that both that +# copyright notice and this permission notice appear in supporting +# documentation, and that the name of the author(s) not be used in +# advertising or publicity pertaining to distribution of the software without +# specific, written prior permission. The authors make no +# representations about the suitability of this software for any purpose. It +# is provided "as is" without express or implied warranty. +# +# THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO +# EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR +# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. + +from enum import Enum +import argparse +import string +import sys + +class CaseFoldClass(Enum): + COMMON = 1 + FULL = 2 + SIMPLE = 3 + TURKIC = 4 + +class CaseFoldMethod(Enum): + RANGE = 0 + EVEN_ODD = 1 + FULL = 2 + +caseFoldClassMap = { + 'C' : CaseFoldClass.COMMON, + 'F' : CaseFoldClass.FULL, + 'S' : CaseFoldClass.SIMPLE, + 'T' : CaseFoldClass.TURKIC +} + +folds = [] + +def ucs4_to_utf8(ucs4): + utf8_rep = [] + + if ucs4 < 0x80: + utf8_rep.append(ucs4) + bits = -6 + elif ucs4 < 0x800: + utf8_rep.append(((ucs4 >> 6) & 0x1F) | 0xC0) + bits = 0 + elif ucs4 < 0x10000: + utf8_rep.append(((ucs4 >> 12) & 0x0F) | 0xE0) + bits = 6 + elif ucs4 < 0x200000: + utf8_rep.append(((ucs4 >> 18) & 0x07) | 0xF0) + bits = 12 + elif ucs4 < 0x4000000: + utf8_rep.append(((ucs4 >> 24) & 0x03) | 0xF8) + bits = 18 + elif ucs4 < 0x80000000: + utf8_rep.append(((ucs4 >> 30) & 0x01) | 0xFC) + bits = 24 + else: + return []; + + while bits >= 0: + utf8_rep.append(((ucs4 >> bits) & 0x3F) | 0x80) + bits-= 6 + + return utf8_rep + +def utf8_size(ucs4): + return len(ucs4_to_utf8(ucs4)) + +case_fold_method_name_map = { + CaseFoldMethod.RANGE: 'FC_CASE_FOLD_RANGE,', + CaseFoldMethod.EVEN_ODD: 'FC_CASE_FOLD_EVEN_ODD,', + CaseFoldMethod.FULL: 'FC_CASE_FOLD_FULL,', +} + +if __name__=='__main__': + parser = argparse.ArgumentParser() + parser.add_argument('case_folding_file') + parser.add_argument('--template', dest='template_file', default=None) + parser.add_argument('--output', dest='output_file', default=None) + + args = parser.parse_args() + + minFoldChar = None + maxFoldChar = None + fold = None + + foldChars = [] + maxFoldChars = 0 + + maxExpand = 0 + + # Read the standard Unicode CaseFolding.txt file + with open(args.case_folding_file, 'r', encoding='utf-8') as casefile: + for cnt, line in enumerate(casefile): + if not line or not line[0] in string.hexdigits: + continue + + # print('Line {}: {}'.format(cnt, line.strip())) + + tokens = line.split('; ') + + if len(tokens) < 3: + print('Not enough tokens in line {}'.format(cnt), file=sys.stderr) + sys.exit(1) + + # Get upper case value + upper = int(tokens.pop(0), 16) + + # Get class + cfclass = caseFoldClassMap[tokens.pop(0)] + + # Get list of result characters + lower = list(map(lambda s: int(s,16), tokens.pop(0).split())) + + # print('\t----> {:04X} {} {}'.format(upper, cfclass, lower)) + + if not minFoldChar: + minFoldChar = upper + + maxFoldChar = upper; + + if cfclass in [CaseFoldClass.COMMON, CaseFoldClass.FULL]: + if len(lower) == 1: + # foldExtends + if fold and fold['method'] == CaseFoldMethod.RANGE: + foldExtends = (lower[0] - upper) == fold['offset'] and upper == fold['upper'] + fold['count'] + elif fold and fold['method'] == CaseFoldMethod.EVEN_ODD: + foldExtends = (lower[0] - upper) == 1 and upper == (fold['upper'] + fold['count'] + 1) + else: + foldExtends = False + + if foldExtends: + # This modifies the last fold item in the array too + fold['count'] = upper - fold['upper'] + 1; + else: + fold = {} + fold['upper'] = upper + fold['offset'] = lower[0] - upper; + if fold['offset'] == 1: + fold['method'] = CaseFoldMethod.EVEN_ODD + else: + fold['method'] = CaseFoldMethod.RANGE + fold['count'] = 1 + folds.append(fold) + expand = utf8_size (lower[0]) - utf8_size(upper) + else: + fold = {} + fold['upper'] = upper + fold['method'] = CaseFoldMethod.FULL + fold['offset'] = len(foldChars) + + # add chars + for c in lower: + utf8_rep = ucs4_to_utf8(c) + # print('{} -> {}'.format(c,utf8_rep)) + for utf8_char in utf8_rep: + foldChars.append(utf8_char) + + fold['count'] = len(foldChars) - fold['offset'] + folds.append(fold) + + if fold['count'] > maxFoldChars: + maxFoldChars = fold['count'] + + expand = fold['count'] - utf8_size(upper) + if expand > maxExpand: + maxExpand = expand + + # Open output file + if args.output_file: + sys.stdout = open(args.output_file, 'w', encoding='utf-8') + + # Read the template file + if args.template_file: + tmpl_file = open(args.template_file, 'r', encoding='utf-8') + else: + tmpl_file = sys.stdin + + # Scan the input until the marker is found + # FIXME: this is a bit silly really, might just as well harcode + # the license header in the script and drop the template + for line in tmpl_file: + if line.strip() == '@@@': + break + print(line, end='') + + # Dump these tables + print('#define FC_NUM_CASE_FOLD\t{}'.format(len(folds))) + print('#define FC_NUM_CASE_FOLD_CHARS\t{}'.format(len(foldChars))) + print('#define FC_MAX_CASE_FOLD_CHARS\t{}'.format(maxFoldChars)) + print('#define FC_MAX_CASE_FOLD_EXPAND\t{}'.format(maxExpand)) + print('#define FC_MIN_FOLD_CHAR\t0x{:08x}'.format(minFoldChar)) + print('#define FC_MAX_FOLD_CHAR\t0x{:08x}'.format(maxFoldChar)) + print('') + + # Dump out ranges + print('static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {') + for f in folds: + short_offset = f['offset'] + if short_offset < -32367: + short_offset += 65536 + if short_offset > 32368: + short_offset -= 65536 + print(' {} 0x{:08x}, {:22s} 0x{:04x}, {:6d} {},'.format('{', + f['upper'], case_fold_method_name_map[f['method']], + f['count'], short_offset, '}')) + print('};\n') + + # Dump out "other" values + print('static const FcChar8\tfcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {') + for n, c in enumerate(foldChars): + if n == len(foldChars) - 1: + end = '' + elif n % 16 == 15: + end = ',\n' + else: + end = ',' + print('0x{:02x}'.format(c), end=end) + print('\n};') + + # And flush out the rest of the input file + for line in tmpl_file: + print(line, end='') + + sys.stdout.flush() diff --git a/fc-case/meson.build b/fc-case/meson.build new file mode 100644 index 0000000..a14b635 --- /dev/null +++ b/fc-case/meson.build @@ -0,0 +1,4 @@ +fccase_h = custom_target('fccase.h', + output: 'fccase.h', + input: ['CaseFolding.txt', 'fccase.tmpl.h'], + command: [find_program('fc-case.py'), '@INPUT0@', '--template', '@INPUT1@', '--output', '@OUTPUT@']) |