summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorMatthäus G. Chajdas <dev@anteru.net>2022-12-04 19:53:23 +0100
committerMatthäus G. Chajdas <dev@anteru.net>2022-12-04 19:53:23 +0100
commitbe2196eba29e52ef9fe93b0ff60e8d4af6b2524d (patch)
tree9ed000f888c3d1febcfd2b49cae1fc63cfb19f6b /scripts
parentf8a75279139ec4b62a220ba92690c066a2de9ee0 (diff)
downloadpygments-git-be2196eba29e52ef9fe93b0ff60e8d4af6b2524d.tar.gz
Improve utility scripts.
Scan snippet files in addition to token output streams.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/check_whitespace_token.py9
-rw-r--r--scripts/utility.py47
2 files changed, 40 insertions, 16 deletions
diff --git a/scripts/check_whitespace_token.py b/scripts/check_whitespace_token.py
index 9fb56ab3..cc3d1aa6 100644
--- a/scripts/check_whitespace_token.py
+++ b/scripts/check_whitespace_token.py
@@ -19,7 +19,14 @@ def check_file(path):
whitespace_re = re.compile('\s+')
for value, token, linenumber in unpack_output_file(path):
- if whitespace_re.fullmatch(value) and 'Whitespace' not in token:
+ if whitespace_re.fullmatch(value):
+ # We allow " " if it's inside a Literal.String for example
+ if 'Literal' in token:
+ continue
+
+ if 'Whitespace' in token:
+ continue
+
print(f'{path}:{linenumber}')
return False
diff --git a/scripts/utility.py b/scripts/utility.py
index d816e3fd..066775f1 100644
--- a/scripts/utility.py
+++ b/scripts/utility.py
@@ -7,41 +7,58 @@
"""
import os
+import os.path
def unpack_output_file(path):
"""
Unpack an output file into objects contining the line number, the text,
- and the token name.
+ and the token name. The output file can be either a ``.output`` file
+ containing a token stream, or a ``.txt`` with input and tokens.
"""
from collections import namedtuple
entry = namedtuple('OutputEntry', ['text', 'token', 'linenumber'])
+
+ skip_until_tokens = path.endswith('.txt')
+
for linenumber, line in enumerate(open(path).readlines()):
line = line.strip()
- if line:
- # Line can start with ' or ", so let's check which one it is
- # and find the matching one
- quotation_start = 0
- quotation_end = line.rfind(line[0])
- text = line[quotation_start+1:quotation_end]
- token = line.split()[-1]
- text = text.replace('\\n', '\n')
- text = text.replace('\\t', '\t')
- yield entry(text, token, linenumber + 1)
+ if not line:
+ continue
+
+ if skip_until_tokens:
+ if line != '---tokens---':
+ continue
+ else:
+ skip_until_tokens = False
+
+ # Line can start with ' or ", so let's check which one it is
+ # and find the matching one
+ quotation_start = 0
+ quotation_end = line.rfind(line[0])
+ text = line[quotation_start+1:quotation_end]
+ token = line.split()[-1]
+ text = text.replace('\\n', '\n')
+ text = text.replace('\\t', '\t')
+ yield entry(text, token, linenumber + 1)
def process_output_files(root_directory, callback):
"""
- Process all output files in a directory using the provided callback.
- The callback should return `True` in case of success, `False` otherwise.
+ Process all output (i.e. .output and .txt files for snippets) files
+ in a directory tree using the provided callback.
+ The callback should return ``True`` in case of success, ``False``
+ otherwise.
The function returns the number of files for which the callback returned
- `False`.
+ ``False``.
"""
errors = 0
for dir, _, files in os.walk(root_directory):
for file in files:
- if not file.endswith('.output'):
+ _, ext = os.path.splitext(file)
+
+ if ext not in {'.txt', '.output'}:
continue
path = os.path.join(dir, file)