diff options
author | Matthäus G. Chajdas <dev@anteru.net> | 2022-12-04 19:53:23 +0100 |
---|---|---|
committer | Matthäus G. Chajdas <dev@anteru.net> | 2022-12-04 19:53:23 +0100 |
commit | be2196eba29e52ef9fe93b0ff60e8d4af6b2524d (patch) | |
tree | 9ed000f888c3d1febcfd2b49cae1fc63cfb19f6b /scripts | |
parent | f8a75279139ec4b62a220ba92690c066a2de9ee0 (diff) | |
download | pygments-git-be2196eba29e52ef9fe93b0ff60e8d4af6b2524d.tar.gz |
Improve utility scripts.
Scan snippet files in addition to token output streams.
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/check_whitespace_token.py | 9 | ||||
-rw-r--r-- | scripts/utility.py | 47 |
2 files changed, 40 insertions, 16 deletions
diff --git a/scripts/check_whitespace_token.py b/scripts/check_whitespace_token.py index 9fb56ab3..cc3d1aa6 100644 --- a/scripts/check_whitespace_token.py +++ b/scripts/check_whitespace_token.py @@ -19,7 +19,14 @@ def check_file(path): whitespace_re = re.compile('\s+') for value, token, linenumber in unpack_output_file(path): - if whitespace_re.fullmatch(value) and 'Whitespace' not in token: + if whitespace_re.fullmatch(value): + # We allow " " if it's inside a Literal.String for example + if 'Literal' in token: + continue + + if 'Whitespace' in token: + continue + print(f'{path}:{linenumber}') return False diff --git a/scripts/utility.py b/scripts/utility.py index d816e3fd..066775f1 100644 --- a/scripts/utility.py +++ b/scripts/utility.py @@ -7,41 +7,58 @@ """ import os +import os.path def unpack_output_file(path): """ Unpack an output file into objects contining the line number, the text, - and the token name. + and the token name. The output file can be either a ``.output`` file + containing a token stream, or a ``.txt`` with input and tokens. """ from collections import namedtuple entry = namedtuple('OutputEntry', ['text', 'token', 'linenumber']) + + skip_until_tokens = path.endswith('.txt') + for linenumber, line in enumerate(open(path).readlines()): line = line.strip() - if line: - # Line can start with ' or ", so let's check which one it is - # and find the matching one - quotation_start = 0 - quotation_end = line.rfind(line[0]) - text = line[quotation_start+1:quotation_end] - token = line.split()[-1] - text = text.replace('\\n', '\n') - text = text.replace('\\t', '\t') - yield entry(text, token, linenumber + 1) + if not line: + continue + + if skip_until_tokens: + if line != '---tokens---': + continue + else: + skip_until_tokens = False + + # Line can start with ' or ", so let's check which one it is + # and find the matching one + quotation_start = 0 + quotation_end = line.rfind(line[0]) + text = line[quotation_start+1:quotation_end] + token = line.split()[-1] + text = text.replace('\\n', '\n') + text = text.replace('\\t', '\t') + yield entry(text, token, linenumber + 1) def process_output_files(root_directory, callback): """ - Process all output files in a directory using the provided callback. - The callback should return `True` in case of success, `False` otherwise. + Process all output (i.e. .output and .txt files for snippets) files + in a directory tree using the provided callback. + The callback should return ``True`` in case of success, ``False`` + otherwise. The function returns the number of files for which the callback returned - `False`. + ``False``. """ errors = 0 for dir, _, files in os.walk(root_directory): for file in files: - if not file.endswith('.output'): + _, ext = os.path.splitext(file) + + if ext not in {'.txt', '.output'}: continue path = os.path.join(dir, file) |