diff options
author | Jean Abou-Samra <jean@abou-samra.fr> | 2023-04-17 18:41:55 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-17 18:41:55 +0200 |
commit | c97762448b1e4eac8d74b8d88415f23c32aa0cdd (patch) | |
tree | fea2ebba54f728956dc532ca2e508b86c488932f | |
parent | 50dd4d80e25c4c4afab503d41b471a536ed2af13 (diff) | |
download | pygments-git-c97762448b1e4eac8d74b8d88415f23c32aa0cdd.tar.gz |
Refactor PythonConsoleLexer as a DelegatingLexer (#2412)
This is simpler and more reliable than hand-coding the state machine.
Fixes #2411
-rw-r--r-- | pygments/lexers/python.py | 117 | ||||
-rw-r--r-- | tests/examplefiles/pycon/pycon_ctrlc_traceback.output | 70 | ||||
-rw-r--r-- | tests/examplefiles/pycon/pycon_test.pycon.output | 10 | ||||
-rw-r--r-- | tests/snippets/pycon/broken_tb.txt | 3 | ||||
-rw-r--r-- | tests/snippets/pycon/multiple_tb.txt | 4 | ||||
-rw-r--r-- | tests/snippets/pycon/unterminated_tb.txt | 17 |
6 files changed, 125 insertions, 96 deletions
diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index eaaf6476..6537d4d9 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -11,8 +11,8 @@ import re import keyword -from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ - default, words, combined, do_insertions, this, line_re +from pygments.lexer import DelegatingLexer, Lexer, RegexLexer, include, \ + bygroups, using, default, words, combined, do_insertions, this, line_re from pygments.util import get_bool_opt, shebang_matches from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic, Other, Error, Whitespace @@ -635,8 +635,43 @@ class Python2Lexer(RegexLexer): def analyse_text(text): return shebang_matches(text, r'pythonw?2(\.\d)?') +class _PythonConsoleLexerBase(RegexLexer): + name = 'Python console session' + aliases = ['pycon'] + mimetypes = ['text/x-python-doctest'] -class PythonConsoleLexer(Lexer): + """Auxiliary lexer for `PythonConsoleLexer`. + + Code tokens are output as ``Token.Other.Code``, traceback tokens as + ``Token.Other.Traceback``. + """ + tokens = { + 'root': [ + (r'(>>> )(.*\n)', bygroups(Generic.Prompt, Other.Code), 'continuations'), + # This happens, e.g., when tracebacks are embedded in documentation; + # trailing whitespaces are often stripped in such contexts. + (r'(>>>)(\n)', bygroups(Generic.Prompt, Whitespace)), + (r'(\^C)?Traceback \(most recent call last\):\n', Other.Traceback, 'traceback'), + # SyntaxError starts with this + (r' File "[^"]+", line \d+', Other.Traceback, 'traceback'), + (r'.*\n', Generic.Output), + ], + 'continuations': [ + (r'(\.\.\. )(.*\n)', bygroups(Generic.Prompt, Other.Code)), + # See above. + (r'(\.\.\.)(\n)', bygroups(Generic.Prompt, Whitespace)), + default('#pop'), + ], + 'traceback': [ + # As soon as we see a traceback, consume everything until the next + # >>> prompt. + (r'(?=>>>( |$))', Text, '#pop'), + (r'(KeyboardInterrupt)(\n)', bygroups(Name.Class, Whitespace)), + (r'.*\n', Other.Traceback), + ], + } + +class PythonConsoleLexer(DelegatingLexer): """ For Python console output or doctests, such as: @@ -659,70 +694,28 @@ class PythonConsoleLexer(Lexer): .. versionchanged:: 2.5 Now defaults to ``True``. """ + name = 'Python console session' aliases = ['pycon'] mimetypes = ['text/x-python-doctest'] def __init__(self, **options): - self.python3 = get_bool_opt(options, 'python3', True) - Lexer.__init__(self, **options) - - def get_tokens_unprocessed(self, text): - if self.python3: - pylexer = PythonLexer(**self.options) - tblexer = PythonTracebackLexer(**self.options) + python3 = get_bool_opt(options, 'python3', True) + if python3: + pylexer = PythonLexer + tblexer = PythonTracebackLexer else: - pylexer = Python2Lexer(**self.options) - tblexer = Python2TracebackLexer(**self.options) - - curcode = '' - insertions = [] - curtb = '' - tbindex = 0 - in_tb = False - for match in line_re.finditer(text): - line = match.group() - if line.startswith('>>> ') or line.startswith('... '): - in_tb = False - insertions.append((len(curcode), - [(0, Generic.Prompt, line[:4])])) - curcode += line[4:] - elif line.rstrip() == '...' and not in_tb: - # only a new >>> prompt can end an exception block - # otherwise an ellipsis in place of the traceback frames - # will be mishandled - insertions.append((len(curcode), - [(0, Generic.Prompt, '...')])) - curcode += line[3:] - else: - if curcode: - yield from do_insertions( - insertions, pylexer.get_tokens_unprocessed(curcode)) - curcode = '' - insertions = [] - if in_tb: - curtb += line - if not (line.startswith(' ') or line.strip() == '...'): - in_tb = False - for i, t, v in tblexer.get_tokens_unprocessed(curtb): - yield tbindex+i, t, v - curtb = '' - elif (line.startswith('Traceback (most recent call last):') or - re.match(' File "[^"]+", line \\d+\\n$', line)): - in_tb = True - curtb = line - tbindex = match.start() - elif line == 'KeyboardInterrupt\n': - yield match.start(), Name.Class, line - else: - yield match.start(), Generic.Output, line - if curcode: - yield from do_insertions(insertions, - pylexer.get_tokens_unprocessed(curcode)) - if curtb: - for i, t, v in tblexer.get_tokens_unprocessed(curtb): - yield tbindex+i, t, v - + pylexer = Python2Lexer + tblexer = Python2TracebackLexer + # We have two auxiliary lexers. Use DelegatingLexer twice with + # different tokens. TODO: DelegatingLexer should support this + # directly, by accepting a tuplet of auxiliary lexers and a tuple of + # distinguishing tokens. Then we wouldn't need this intermediary + # class. + class _ReplaceInnerCode(DelegatingLexer): + def __init__(self, **options): + super().__init__(pylexer, _PythonConsoleLexerBase, Other.Code, **options) + super().__init__(tblexer, _ReplaceInnerCode, Other.Traceback, **options) class PythonTracebackLexer(RegexLexer): """ @@ -743,7 +736,7 @@ class PythonTracebackLexer(RegexLexer): tokens = { 'root': [ (r'\n', Whitespace), - (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), + (r'^(\^C)?Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), (r'^During handling of the above exception, another ' r'exception occurred:\n\n', Generic.Traceback), (r'^The above exception was the direct cause of the ' diff --git a/tests/examplefiles/pycon/pycon_ctrlc_traceback.output b/tests/examplefiles/pycon/pycon_ctrlc_traceback.output index 6bdb3e01..efe5d35e 100644 --- a/tests/examplefiles/pycon/pycon_ctrlc_traceback.output +++ b/tests/examplefiles/pycon/pycon_ctrlc_traceback.output @@ -98,7 +98,9 @@ 'Exception' Generic.Error '\n' Text.Whitespace -'>>>\n' Generic.Output +'' Text +'>>>' Generic.Prompt +'\n' Text.Whitespace '>>> ' Generic.Prompt 'while' Keyword @@ -115,14 +117,22 @@ '...' Generic.Prompt '\n' Text.Whitespace -'^CTraceback (most recent call last):\n' Generic.Output +'^CTraceback (most recent call last):\n' Generic.Traceback -' File "<stdin>", line 1, in <module>\n' Generic.Output +' File ' Text +'"<stdin>"' Name.Builtin +', line ' Text +'1' Literal.Number +', in ' Text +'<module>' Name +'\n' Text.Whitespace -'KeyboardInterrupt\n' Name.Class +'KeyboardInterrupt' Name.Class +'\n' Text.Whitespace -'\n' Generic.Output +'\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt 'class' Keyword ' ' Text @@ -229,11 +239,9 @@ 'first' Name '\n' Text.Whitespace -'\n' Generic.Output - -'During handling of the above exception, another exception occurred:\n' Generic.Output +'\n' Text.Whitespace -'\n' Generic.Output +'During handling of the above exception, another exception occurred:\n\n' Generic.Traceback 'Traceback (most recent call last):\n' Generic.Traceback @@ -250,15 +258,15 @@ 'second' Name '\n' Text.Whitespace -'\n' Generic.Output +'\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt 'x' Name ' ' Text '=' Operator '\n' Text.Whitespace -'' Generic.Traceback ' File ' Text '"<stdin>"' Name.Builtin ', line ' Text @@ -280,7 +288,9 @@ 'invalid syntax' Name '\n' Text.Whitespace -'>>>\n' Generic.Output +'' Text +'>>>' Generic.Prompt +'\n' Text.Whitespace '\n' Generic.Output @@ -360,12 +370,13 @@ 'multi' Name '\n' Text.Whitespace -' line\n' Generic.Output +' line\n' Other -'detail\n' Generic.Output +'detail\n' Other -'\n' Generic.Output +'\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt 'raise' Keyword ' ' Text @@ -394,12 +405,13 @@ 'multi' Name '\n' Text.Whitespace -' line\n' Generic.Output +' line\n' Other -'detail\n' Generic.Output +'detail\n' Other -'\n' Generic.Output +'\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt 'raise' Keyword ' ' Text @@ -429,12 +441,13 @@ 'multi' Name '\n' Text.Whitespace -' line\n' Generic.Output +' line\n' Other -'detail\n' Generic.Output +'detail\n' Other -'\n' Generic.Output +'\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt 'raise' Keyword ' ' Text @@ -465,12 +478,13 @@ 'multi' Name '\n' Text.Whitespace -' line\n' Generic.Output +' line\n' Other -'detail\n' Generic.Output +'detail\n' Other -'\n' Generic.Output +'\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt 'raise' Keyword ' ' Text @@ -497,12 +511,13 @@ 'multi' Name '\n' Text.Whitespace -' line\n' Generic.Output +' line\n' Other -'detail\n' Generic.Output +'detail\n' Other -'\n' Generic.Output +'\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt 'raise' Keyword ' ' Text @@ -522,6 +537,7 @@ 'Exception' Generic.Error '\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt 'import' Keyword.Namespace ' ' Text diff --git a/tests/examplefiles/pycon/pycon_test.pycon.output b/tests/examplefiles/pycon/pycon_test.pycon.output index 30946876..66dcca03 100644 --- a/tests/examplefiles/pycon/pycon_test.pycon.output +++ b/tests/examplefiles/pycon/pycon_test.pycon.output @@ -2,7 +2,6 @@ ':' Punctuation '\n' Text.Whitespace -'' Generic.Traceback ' File ' Text '"<stdin>"' Name.Builtin ', line ' Text @@ -22,12 +21,14 @@ 'invalid syntax' Name '\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt '\n' Text.Whitespace -'KeyboardInterrupt\n' Name.Class +'KeyboardInterrupt\n' Generic.Output -'>>>\n' Generic.Output +'>>>' Generic.Prompt +'\n' Text.Whitespace '\n' Generic.Output @@ -46,8 +47,9 @@ 'ZeroDivisionError' Generic.Error '\n' Text.Whitespace -'\n' Generic.Output +'\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt '1' Literal.Number.Integer '/' Operator diff --git a/tests/snippets/pycon/broken_tb.txt b/tests/snippets/pycon/broken_tb.txt index 6cecf761..f3f6f5e6 100644 --- a/tests/snippets/pycon/broken_tb.txt +++ b/tests/snippets/pycon/broken_tb.txt @@ -54,8 +54,9 @@ SyntaxError: EOL while scanning string literal 'EOL while scanning string literal' Name '\n' Text.Whitespace -'\n' Generic.Output +'\n' Text.Whitespace +'' Text '>>> ' Generic.Prompt 'exec' Name '(' Punctuation diff --git a/tests/snippets/pycon/multiple_tb.txt b/tests/snippets/pycon/multiple_tb.txt index c8b71fc0..9461be0d 100644 --- a/tests/snippets/pycon/multiple_tb.txt +++ b/tests/snippets/pycon/multiple_tb.txt @@ -103,6 +103,6 @@ AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_i "Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>" Name '\n' Text.Whitespace -"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Generic.Output +"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Other -"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Generic.Output +"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Other diff --git a/tests/snippets/pycon/unterminated_tb.txt b/tests/snippets/pycon/unterminated_tb.txt new file mode 100644 index 00000000..9173ec04 --- /dev/null +++ b/tests/snippets/pycon/unterminated_tb.txt @@ -0,0 +1,17 @@ +---input--- +>>> unterminated_traceback() +Traceback (most recent call last): +>>> + +---tokens--- +'>>> ' Generic.Prompt +'unterminated_traceback' Name +'(' Punctuation +')' Punctuation +'\n' Text.Whitespace + +'Traceback (most recent call last):\n' Generic.Traceback + +'' Text +'>>>' Generic.Prompt +'\n' Text.Whitespace |