tests/test_perllexer.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157

# -*- coding: utf-8 -*-
"""
    Pygments regex lexer tests
    ~~~~~~~~~~~~~~~~~~~~~~~~~~

    :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import time
import unittest

from pygments.token import Keyword, Name, String, Text
from pygments.lexers.perl import PerlLexer


class RunawayRegexTest(unittest.TestCase):
    # A previous version of the Perl lexer would spend a great deal of
    # time backtracking when given particular strings.  These tests show that
    # the runaway backtracking doesn't happen any more (at least for the given
    # cases).

    lexer = PerlLexer()

    ### Test helpers.

    def assert_single_token(self, s, token):
        """Show that a given string generates only one token."""
        tokens = list(self.lexer.get_tokens_unprocessed(s))
        self.assertEqual(len(tokens), 1, tokens)
        self.assertEqual(s, tokens[0][2])
        self.assertEqual(token, tokens[0][1])

    def assert_tokens(self, strings, expected_tokens):
        """Show that a given string generates the expected tokens."""
        tokens = list(self.lexer.get_tokens_unprocessed(''.join(strings)))
        self.assertEqual(len(tokens), len(expected_tokens), tokens)
        for index, s in enumerate(strings):
            self.assertEqual(s, tokens[index][2])
            self.assertEqual(expected_tokens[index], tokens[index][1])

    def assert_fast_tokenization(self, s):
        """Show that a given string is tokenized quickly."""
        start = time.time()
        tokens = list(self.lexer.get_tokens_unprocessed(s))
        end = time.time()
        # Isn't 10 seconds kind of a long time?  Yes, but we don't want false
        # positives when the tests are starved for CPU time.
        if end-start > 10:
            self.fail('tokenization took too long')
        return tokens

    ### Strings.

    def test_single_quote_strings(self):
        self.assert_single_token(r"'foo\tbar\\\'baz'", String)
        self.assert_fast_tokenization("'" + '\\'*999)

    def test_double_quote_strings(self):
        self.assert_single_token(r'"foo\tbar\\\"baz"', String)
        self.assert_fast_tokenization('"' + '\\'*999)

    def test_backtick_strings(self):
        self.assert_single_token(r'`foo\tbar\\\`baz`', String.Backtick)
        self.assert_fast_tokenization('`' + '\\'*999)

    ### Regex matches with various delimiters.

    def test_match(self):
        self.assert_single_token(r'/aa\tbb/', String.Regex)
        self.assert_fast_tokenization('/' + '\\'*999)

    def test_match_with_slash(self):
        self.assert_tokens(['m', '/\n\\t\\\\/'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m/xxx\n' + '\\'*999)

    def test_match_with_bang(self):
        self.assert_tokens(['m', r'!aa\t\!bb!'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m!' + '\\'*999)

    def test_match_with_brace(self):
        self.assert_tokens(['m', r'{aa\t\}bb}'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m{' + '\\'*999)

    def test_match_with_angle_brackets(self):
        self.assert_tokens(['m', r'<aa\t\>bb>'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m<' + '\\'*999)

    def test_match_with_parenthesis(self):
        self.assert_tokens(['m', r'(aa\t\)bb)'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m(' + '\\'*999)

    def test_match_with_at_sign(self):
        self.assert_tokens(['m', r'@aa\t\@bb@'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m@' + '\\'*999)

    def test_match_with_percent_sign(self):
        self.assert_tokens(['m', r'%aa\t\%bb%'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m%' + '\\'*999)

    def test_match_with_dollar_sign(self):
        self.assert_tokens(['m', r'$aa\t\$bb$'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m$' + '\\'*999)

    ### Regex substitutions with various delimeters.

    def test_substitution_with_slash(self):
        self.assert_single_token('s/aaa/bbb/g', String.Regex)
        self.assert_fast_tokenization('s/foo/' + '\\'*999)

    def test_substitution_with_at_sign(self):
        self.assert_single_token(r's@aaa@bbb@g', String.Regex)
        self.assert_fast_tokenization('s@foo@' + '\\'*999)

    def test_substitution_with_percent_sign(self):
        self.assert_single_token(r's%aaa%bbb%g', String.Regex)
        self.assert_fast_tokenization('s%foo%' + '\\'*999)

    def test_substitution_with_brace(self):
        self.assert_single_token(r's{aaa}', String.Regex)
        self.assert_fast_tokenization('s{' + '\\'*999)

    def test_substitution_with_angle_bracket(self):
        self.assert_single_token(r's<aaa>', String.Regex)
        self.assert_fast_tokenization('s<' + '\\'*999)

    def test_substitution_with_angle_bracket(self):
        self.assert_single_token(r's<aaa>', String.Regex)
        self.assert_fast_tokenization('s<' + '\\'*999)

    def test_substitution_with_square_bracket(self):
        self.assert_single_token(r's[aaa]', String.Regex)
        self.assert_fast_tokenization('s[' + '\\'*999)

    def test_substitution_with_parenthesis(self):
        self.assert_single_token(r's(aaa)', String.Regex)
        self.assert_fast_tokenization('s(' + '\\'*999)

    ### Namespaces/modules

    def test_package_statement(self):
        self.assert_tokens(['package', ' ', 'Foo'], [Keyword, Text, Name.Namespace])
        self.assert_tokens(['package', '  ', 'Foo::Bar'], [Keyword, Text, Name.Namespace])

    def test_use_statement(self):
        self.assert_tokens(['use', ' ', 'Foo'], [Keyword, Text, Name.Namespace])
        self.assert_tokens(['use', '  ', 'Foo::Bar'], [Keyword, Text, Name.Namespace])

    def test_no_statement(self):
        self.assert_tokens(['no', ' ', 'Foo'], [Keyword, Text, Name.Namespace])
        self.assert_tokens(['no', '  ', 'Foo::Bar'], [Keyword, Text, Name.Namespace])

    def test_require_statement(self):
        self.assert_tokens(['require', ' ', 'Foo'], [Keyword, Text, Name.Namespace])
        self.assert_tokens(['require', '  ', 'Foo::Bar'], [Keyword, Text, Name.Namespace])
        self.assert_tokens(['require', ' ', '"Foo/Bar.pm"'], [Keyword, Text, String])