summaryrefslogtreecommitdiff
path: root/docutils/tools/dev/generate_punctuation_chars.py
diff options
context:
space:
mode:
authoraa-turner <aa-turner@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2022-11-16 01:17:33 +0000
committeraa-turner <aa-turner@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2022-11-16 01:17:33 +0000
commit24672bd4c6b547b3d800abee2399c665ceae285c (patch)
treef90bd605c06aecc1ed0898335e40cce2444edf69 /docutils/tools/dev/generate_punctuation_chars.py
parent114063f6e2fb71af37b96ee80933fefbb812b1cd (diff)
downloaddocutils-24672bd4c6b547b3d800abee2399c665ceae285c.tar.gz
Update ``generate_punctuation_chars`` templates
This changes the templates to match changes made to ``punctuation_chars``, making updating the file easier. It also changes the string wrapping algorithm, leaving less space blank on each line. git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk@9246 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/tools/dev/generate_punctuation_chars.py')
-rw-r--r--docutils/tools/dev/generate_punctuation_chars.py54
1 files changed, 28 insertions, 26 deletions
diff --git a/docutils/tools/dev/generate_punctuation_chars.py b/docutils/tools/dev/generate_punctuation_chars.py
index 976632521..8f0a1b08a 100644
--- a/docutils/tools/dev/generate_punctuation_chars.py
+++ b/docutils/tools/dev/generate_punctuation_chars.py
@@ -55,7 +55,6 @@ module_template = r'''#!/usr/bin/env python3
# ``docutils/tools/dev/generate_punctuation_chars.py``.
# ::
-import re
import sys
"""Docutils character category patterns.
@@ -77,37 +76,37 @@ import sys
The category of some characters changed with the development of the
Unicode standard. The current lists are generated with the help of the
- "unicodedata" module of Python %(python_version)s
- (based on Unicode version %(unidata_version)s).
+ "unicodedata" module of Python %(python_version)s (based on Unicode version %(unidata_version)s).
.. _inline markup recognition rules:
- https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html
- #inline-markup-recognition-rules
+ https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
"""
%(openers)s
%(closers)s
%(delimiters)s
-if sys.maxunicode >= 0x10FFFF: # "wide" build
+if sys.maxunicode >= 0x10FFFF: # "wide" build
%(delimiters_wide)s
-closing_delimiters = '\\\\.,;!?'
+closing_delimiters = r'\\.,;!?'
# Matching open/close quotes
# --------------------------
-quote_pairs = {# open char: matching closing characters # usage example
- '\xbb': '\xbb', # » » Swedish
- '\u2018': '\u201a', # ‘ ‚ Albanian/Greek/Turkish
- '\u2019': '\u2019', # ’ ’ Swedish
- '\u201a': '\u2018\u2019', # ‚ ‘ German ‚ ’ Polish
- '\u201c': '\u201e', # “ „ Albanian/Greek/Turkish
- '\u201e': '\u201c\u201d', # „ “ German „ ” Polish
- '\u201d': '\u201d', # ” ” Swedish
- '\u203a': '\u203a', # › › Swedish
- }
+quote_pairs = {
+ # open char: matching closing characters # usage example
+ '\xbb': '\xbb', # » » Swedish
+ '\u2018': '\u201a', # ‘ ‚ Albanian/Greek/Turkish
+ '\u2019': '\u2019', # ’ ’ Swedish
+ '\u201a': '\u2018\u2019', # ‚ ‘ German ‚ ’ Polish
+ '\u201c': '\u201e', # “ „ Albanian/Greek/Turkish
+ '\u201e': '\u201c\u201d', # „ “ German „ ” Polish
+ '\u201d': '\u201d', # ” ” Swedish
+ '\u203a': '\u203a', # › › Swedish
+}
"""Additional open/close quote pairs."""
+
def match_chars(c1, c2):
"""Test whether `c1` and `c2` are a matching open/close character pair.
@@ -281,18 +280,20 @@ def mark_intervals(s):
return ''.join(lst2)
-def wrap_string(s, startstring="('", endstring="')", wrap=67):
+def wrap_string(s, startstring="(", endstring=")", wrap=71):
"""Line-wrap a unicode string literal definition."""
c = len(startstring)
- contstring = "'\n" + ' '*(len(startstring)-2) + "'"
- lst = [startstring]
+ left_indent = ' '*(c - len(startstring.lstrip(' ')))
+ line_start_string = f"\n {left_indent}'"
+ cont_string = f"'{line_start_string}"
+ lst = [startstring, line_start_string]
for ch in s.replace("'", r"\'"):
c += 1
if ch == '\\' and c > wrap:
c = len(startstring)
- ch = contstring + ch
+ lst.append(cont_string)
lst.append(ch)
- lst.append(endstring)
+ lst.append(f"'\n{left_indent}{endstring}")
return ''.join(lst)
@@ -322,6 +323,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-t', '--test', action="store_true",
help='test for changed character categories')
+ parser.add_argument('-o', '--out')
args = parser.parse_args()
# (Re)create character patterns
@@ -386,14 +388,14 @@ if __name__ == '__main__':
'python_version': sys.version.split()[0],
'unidata_version': unicodedata.unidata_version,
'openers': wrap_string(o.encode('unicode-escape').decode(),
- startstring="openers = ('"),
+ startstring="openers = ("),
'closers': wrap_string(c.encode('unicode-escape').decode(),
- startstring="closers = ('"),
+ startstring="closers = ("),
'delimiters': wrap_string(d.encode('unicode-escape').decode(),
- startstring="delimiters = ('"),
+ startstring="delimiters = ("),
'delimiters_wide': wrap_string(
d_wide.encode('unicode-escape').decode(),
- startstring=" delimiters += ('")
+ startstring=" delimiters += (")
}
print(module_template % substitutions, end='')