summaryrefslogtreecommitdiff
path: root/scripts/check_whitespace_token.py
blob: ebe3dd3c73476d7851af10d536da8cdda0f99af6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
"""
    Checker for whitespace tokens
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    Helper script to find whitespace which is not of token type `Whitespace`

    :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""
import argparse
import sys
import re

from utility import unpack_output_file, process_output_files


def check_file(path):
    whitespace_re = re.compile('\\s+')

    for value, token, linenumber in unpack_output_file(path):
        if whitespace_re.fullmatch(value):
            # We allow " " if it's inside a Literal.String for example
            if 'Literal' in token:
                continue

            # If whitespace is part of a comment, we accept that as well,
            # as comments may be similarly highlighted to literals
            if 'Comment' in token:
                continue

            if 'Whitespace' in token:
                continue

            print(f'{path}:{linenumber}')
            return False

        if 'Whitespace' in token and value != '':
            print(f'{path}:{linenumber} - '
                  'Incorrectly marked as whitespace')
            return False
    return True


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('TEST_ROOT',
                        help='Root directory containing the tests')
    args = parser.parse_args()

    if process_output_files(args.TEST_ROOT, check_file) > 0:
        return 1
    return 0


if __name__ == '__main__':
    sys.exit(main())