Improve utility scripts.

Scan snippet files in addition to token output streams.
author: Matthäus G. Chajdas <dev@anteru.net> 2022-12-04 19:53:23 +0100
committer: Matthäus G. Chajdas <dev@anteru.net> 2022-12-04 19:53:23 +0100
commit: be2196eba29e52ef9fe93b0ff60e8d4af6b2524d (patch)
tree: 9ed000f888c3d1febcfd2b49cae1fc63cfb19f6b /scripts
parent: f8a75279139ec4b62a220ba92690c066a2de9ee0 (diff)
download: pygments-git-be2196eba29e52ef9fe93b0ff60e8d4af6b2524d.tar.gz
2 files changed, 40 insertions, 16 deletions
diff --git a/scripts/check_whitespace_token.py b/scripts/check_whitespace_token.py
index 9fb56ab3..cc3d1aa6 100644
--- a/scripts/check_whitespace_token.py
+++ b/scripts/check_whitespace_token.py
@@ -19,7 +19,14 @@ def check_file(path):
     whitespace_re = re.compile('\s+')
 
     for value, token, linenumber in unpack_output_file(path):
-        if whitespace_re.fullmatch(value) and 'Whitespace' not in token:
+        if whitespace_re.fullmatch(value):
+            # We allow " " if it's inside a Literal.String for example
+            if 'Literal' in token:
+                continue
+
+            if 'Whitespace' in token:
+                continue
+
             print(f'{path}:{linenumber}')
             return False
 
diff --git a/scripts/utility.py b/scripts/utility.py
index d816e3fd..066775f1 100644
--- a/scripts/utility.py
+++ b/scripts/utility.py
@@ -7,41 +7,58 @@
 """
 
 import os
+import os.path
 
 
 def unpack_output_file(path):
     """
     Unpack an output file into objects contining the line number, the text,
-    and the token name.
+    and the token name. The output file can be either a ``.output`` file
+    containing a token stream, or a ``.txt`` with input and tokens.
     """
     from collections import namedtuple
     entry = namedtuple('OutputEntry', ['text', 'token', 'linenumber'])
+
+    skip_until_tokens = path.endswith('.txt')
+
     for linenumber, line in enumerate(open(path).readlines()):
         line = line.strip()
-        if line:
-            # Line can start with ' or ", so let's check which one it is
-            # and find the matching one
-            quotation_start = 0
-            quotation_end = line.rfind(line[0])
-            text = line[quotation_start+1:quotation_end]
-            token = line.split()[-1]
-            text = text.replace('\\n', '\n')
-            text = text.replace('\\t', '\t')
-            yield entry(text, token, linenumber + 1)
+        if not line:
+            continue
+
+        if skip_until_tokens:
+            if line != '---tokens---':
+                continue
+            else:
+                skip_until_tokens = False
+
+        # Line can start with ' or ", so let's check which one it is
+        # and find the matching one
+        quotation_start = 0
+        quotation_end = line.rfind(line[0])
+        text = line[quotation_start+1:quotation_end]
+        token = line.split()[-1]
+        text = text.replace('\\n', '\n')
+        text = text.replace('\\t', '\t')
+        yield entry(text, token, linenumber + 1)
 
 
 def process_output_files(root_directory, callback):
     """
-    Process all output files in a directory using the provided callback.
-    The callback should return `True` in case of success, `False` otherwise.
+    Process all output (i.e. .output and .txt files for snippets) files
+    in a directory tree using the provided callback.
+    The callback should return ``True`` in case of success, ``False``
+    otherwise.
 
     The function returns the number of files for which the callback returned
-    `False`.
+    ``False``.
     """
     errors = 0
     for dir, _, files in os.walk(root_directory):
         for file in files:
-            if not file.endswith('.output'):
+            _, ext = os.path.splitext(file)
+
+            if ext not in {'.txt', '.output'}:
                 continue
 
             path = os.path.join(dir, file)
author	Matthäus G. Chajdas <dev@anteru.net>	2022-12-04 19:53:23 +0100
committer	Matthäus G. Chajdas <dev@anteru.net>	2022-12-04 19:53:23 +0100
commit	be2196eba29e52ef9fe93b0ff60e8d4af6b2524d (patch)
tree	9ed000f888c3d1febcfd2b49cae1fc63cfb19f6b /scripts
parent	f8a75279139ec4b62a220ba92690c066a2de9ee0 (diff)
download	pygments-git-be2196eba29e52ef9fe93b0ff60e8d4af6b2524d.tar.gz