diff options
author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2022-01-26 19:02:15 +0000 |
---|---|---|
committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2022-01-26 19:02:15 +0000 |
commit | 67cb912a266a918ca79314995bc280dcbfbdddef (patch) | |
tree | a1fbfab1a375b7accb20b5d8b90e4da3ea33a724 /docutils/tools/dev/generate_punctuation_chars.py | |
parent | a4b569c7da7b9d41741f78c73c9e90b4c5d211cb (diff) | |
download | docutils-67cb912a266a918ca79314995bc280dcbfbdddef.tar.gz |
Modernise helper script generating `punctuation_chars.py`.
Update/clean code after droping Python 2.7 support.
Based on patch-set by Adam Turner.
git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk@8968 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/tools/dev/generate_punctuation_chars.py')
-rw-r--r-- | docutils/tools/dev/generate_punctuation_chars.py | 76 |
1 files changed, 37 insertions, 39 deletions
diff --git a/docutils/tools/dev/generate_punctuation_chars.py b/docutils/tools/dev/generate_punctuation_chars.py index 30b503cf2..352ee0c86 100644 --- a/docutils/tools/dev/generate_punctuation_chars.py +++ b/docutils/tools/dev/generate_punctuation_chars.py @@ -38,10 +38,8 @@ import unicodedata # Template for utils.punctuation_chars # ------------------------------------ -# -# Problem: ``ur`` prefix fails with Py 3.5 :: -module_template = u'''#!/usr/bin/env python3 +module_template = r'''#!/usr/bin/env python3 # :Id: $Id$ # :Copyright: © 2011, 2017 Günter Milde. # :License: Released under the terms of the `2-Clause BSD license`_, in short: @@ -90,21 +88,21 @@ import unicodedata %(delimiters)s if sys.maxunicode >= 0x10FFFF: # "wide" build %(delimiters_wide)s -closing_delimiters = u'\\\\\\\\.,;!?' +closing_delimiters = '\\\\.,;!?' # Matching open/close quotes # -------------------------- quote_pairs = {# open char: matching closing characters # usage example - u'\\xbb': u'\\xbb', # » » Swedish - u'\\u2018': u'\\u201a', # ‘ ‚ Albanian/Greek/Turkish - u'\\u2019': u'\\u2019', # ’ ’ Swedish - u'\\u201a': u'\\u2018\\u2019', # ‚ ‘ German ‚ ’ Polish - u'\\u201c': u'\\u201e', # “ „ Albanian/Greek/Turkish - u'\\u201e': u'\\u201c\\u201d', # „ “ German „ ” Polish - u'\\u201d': u'\\u201d', # ” ” Swedish - u'\\u203a': u'\\u203a', # › › Swedish + '\xbb': '\xbb', # » » Swedish + '\u2018': '\u201a', # ‘ ‚ Albanian/Greek/Turkish + '\u2019': '\u2019', # ’ ’ Swedish + '\u201a': '\u2018\u2019', # ‚ ‘ German ‚ ’ Polish + '\u201c': '\u201e', # “ „ Albanian/Greek/Turkish + '\u201e': '\u201c\u201d', # „ “ German „ ” Polish + '\u201d': '\u201d', # ” ” Swedish + '\u203a': '\u203a', # › › Swedish } """Additional open/close quote pairs.""" @@ -121,7 +119,7 @@ def match_chars(c1, c2): i = openers.index(c1) except ValueError: # c1 not in openers return False - return c2 == closers[i] or c2 in quote_pairs.get(c1, u'')\ + return c2 == closers[i] or c2 in quote_pairs.get(c1, '') ''' @@ -202,21 +200,21 @@ def character_category_patterns(): # low quotation marks are also used as closers (e.g. in Greek) # move them to category Pi: - ucharlists['Ps'].remove(u'‚') # 201A SINGLE LOW-9 QUOTATION MARK - ucharlists['Ps'].remove(u'„') # 201E DOUBLE LOW-9 QUOTATION MARK - ucharlists['Pi'] += [u'‚', u'„'] + ucharlists['Ps'].remove('‚') # 201A SINGLE LOW-9 QUOTATION MARK + ucharlists['Ps'].remove('„') # 201E DOUBLE LOW-9 QUOTATION MARK + ucharlists['Pi'] += ['‚', '„'] - ucharlists['Pi'].remove(u'‛') # 201B SINGLE HIGH-REVERSED-9 QUOTATION MARK - ucharlists['Pi'].remove(u'‟') # 201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK - ucharlists['Pf'] += [u'‛', u'‟'] + ucharlists['Pi'].remove('‛') # 201B SINGLE HIGH-REVERSED-9 QUOTATION MARK + ucharlists['Pi'].remove('‟') # 201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK + ucharlists['Pf'] += ['‛', '‟'] # 301F LOW DOUBLE PRIME QUOTATION MARK misses the opening pendant: - ucharlists['Ps'].insert(ucharlists['Pe'].index(u'\u301f'), u'\u301d') + ucharlists['Ps'].insert(ucharlists['Pe'].index('\u301f'), '\u301d') - # print(u''.join(ucharlists['Ps']).encode('utf8') - # print(u''.join(ucharlists['Pe']).encode('utf8') - # print(u''.join(ucharlists['Pi']).encode('utf8') - # print(u''.join(ucharlists['Pf']).encode('utf8') + # print(''.join(ucharlists['Ps']).encode('utf8') + # print(''.join(ucharlists['Pe']).encode('utf8') + # print(''.join(ucharlists['Pi']).encode('utf8') + # print(''.join(ucharlists['Pf']).encode('utf8') # The Docutils character categories # --------------------------------- @@ -226,24 +224,24 @@ def character_category_patterns(): # recognition rules`_) # allowed before markup if there is a matching closer - openers = [u'"\'(<\\[{'] + openers = ['"\'(<\\[{'] for category in ('Ps', 'Pi', 'Pf'): openers.extend(ucharlists[category]) # allowed after markup if there is a matching opener - closers = [u'"\')>\\]}'] + closers = ['"\')>\\]}'] for category in ('Pe', 'Pf', 'Pi'): closers.extend(ucharlists[category]) # non-matching, allowed on both sides - delimiters = [u'\\-/:'] + delimiters = [r'\-/:'] for category in ('Pd', 'Po'): delimiters.extend(ucharlists[category]) # non-matching, after markup closing_delimiters = [r'\\.,;!?'] - return [u''.join(chars) for chars in (openers, closers, delimiters, + return [''.join(chars) for chars in (openers, closers, delimiters, closing_delimiters)] def separate_wide_chars(s): @@ -259,7 +257,7 @@ def mark_intervals(s): Sort string and replace 'cdef' by 'c-f' and similar. """ l =[] - s = sorted([ord(ch) for ch in s]) + s = sorted(ord(ch) for ch in s) for n in s: try: if l[-1][-1]+1 == n: @@ -273,16 +271,16 @@ def mark_intervals(s): for i in l: i = [chr(n) for n in i] if len(i) > 2: - i = i[0], u'-', i[-1] + i = i[0], '-', i[-1] l2.extend(i) return ''.join(l2) -def wrap_string(s, startstring= "(u'", +def wrap_string(s, startstring= "('", endstring = "')", wrap=67): """Line-wrap a unicode string literal definition.""" c = len(startstring) - contstring = "'\n" + ' ' * (len(startstring)-2) + "u'" + contstring = "'\n" + ' ' * (len(startstring)-2) + "'" l = [startstring] for ch in s.replace("'", r"\'"): c += 1 @@ -300,7 +298,7 @@ def print_differences(old, new, name): print('new %s:' % name) for c in new: if c not in old: - print(' %04x'%ord(c), unicodedata.name(c)) + print(' %04x'%ord(c), c, unicodedata.name(c)) print('removed %s:' % name) for c in old: if c not in new: @@ -381,17 +379,17 @@ if __name__ == '__main__': # Replacements:: substitutions = { - 'python_version': '.'.join(str(s) for s in sys.version_info[:3]), + 'python_version': sys.version.split()[0], 'unidata_version': unicodedata.unidata_version, 'openers': wrap_string(o.encode('unicode-escape').decode(), - startstring="openers = (u'"), + startstring="openers = ('"), 'closers': wrap_string(c.encode('unicode-escape').decode(), - startstring="closers = (u'"), + startstring="closers = ('"), 'delimiters': wrap_string(d.encode('unicode-escape').decode(), - startstring="delimiters = (u'"), + startstring="delimiters = ('"), 'delimiters_wide': wrap_string( d_wide.encode('unicode-escape').decode(), - startstring=" delimiters += (u'") + startstring=" delimiters += ('") } - print(module_template % substitutions) + print(module_template % substitutions, end='') |