summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Abou-Samra <jean@abou-samra.fr>2023-04-17 18:41:55 +0200
committerGitHub <noreply@github.com>2023-04-17 18:41:55 +0200
commitc97762448b1e4eac8d74b8d88415f23c32aa0cdd (patch)
treefea2ebba54f728956dc532ca2e508b86c488932f
parent50dd4d80e25c4c4afab503d41b471a536ed2af13 (diff)
downloadpygments-git-c97762448b1e4eac8d74b8d88415f23c32aa0cdd.tar.gz
Refactor PythonConsoleLexer as a DelegatingLexer (#2412)
This is simpler and more reliable than hand-coding the state machine. Fixes #2411
-rw-r--r--pygments/lexers/python.py117
-rw-r--r--tests/examplefiles/pycon/pycon_ctrlc_traceback.output70
-rw-r--r--tests/examplefiles/pycon/pycon_test.pycon.output10
-rw-r--r--tests/snippets/pycon/broken_tb.txt3
-rw-r--r--tests/snippets/pycon/multiple_tb.txt4
-rw-r--r--tests/snippets/pycon/unterminated_tb.txt17
6 files changed, 125 insertions, 96 deletions
diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py
index eaaf6476..6537d4d9 100644
--- a/pygments/lexers/python.py
+++ b/pygments/lexers/python.py
@@ -11,8 +11,8 @@
import re
import keyword
-from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
- default, words, combined, do_insertions, this, line_re
+from pygments.lexer import DelegatingLexer, Lexer, RegexLexer, include, \
+ bygroups, using, default, words, combined, do_insertions, this, line_re
from pygments.util import get_bool_opt, shebang_matches
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Generic, Other, Error, Whitespace
@@ -635,8 +635,43 @@ class Python2Lexer(RegexLexer):
def analyse_text(text):
return shebang_matches(text, r'pythonw?2(\.\d)?')
+class _PythonConsoleLexerBase(RegexLexer):
+ name = 'Python console session'
+ aliases = ['pycon']
+ mimetypes = ['text/x-python-doctest']
-class PythonConsoleLexer(Lexer):
+ """Auxiliary lexer for `PythonConsoleLexer`.
+
+ Code tokens are output as ``Token.Other.Code``, traceback tokens as
+ ``Token.Other.Traceback``.
+ """
+ tokens = {
+ 'root': [
+ (r'(>>> )(.*\n)', bygroups(Generic.Prompt, Other.Code), 'continuations'),
+ # This happens, e.g., when tracebacks are embedded in documentation;
+ # trailing whitespaces are often stripped in such contexts.
+ (r'(>>>)(\n)', bygroups(Generic.Prompt, Whitespace)),
+ (r'(\^C)?Traceback \(most recent call last\):\n', Other.Traceback, 'traceback'),
+ # SyntaxError starts with this
+ (r' File "[^"]+", line \d+', Other.Traceback, 'traceback'),
+ (r'.*\n', Generic.Output),
+ ],
+ 'continuations': [
+ (r'(\.\.\. )(.*\n)', bygroups(Generic.Prompt, Other.Code)),
+ # See above.
+ (r'(\.\.\.)(\n)', bygroups(Generic.Prompt, Whitespace)),
+ default('#pop'),
+ ],
+ 'traceback': [
+ # As soon as we see a traceback, consume everything until the next
+ # >>> prompt.
+ (r'(?=>>>( |$))', Text, '#pop'),
+ (r'(KeyboardInterrupt)(\n)', bygroups(Name.Class, Whitespace)),
+ (r'.*\n', Other.Traceback),
+ ],
+ }
+
+class PythonConsoleLexer(DelegatingLexer):
"""
For Python console output or doctests, such as:
@@ -659,70 +694,28 @@ class PythonConsoleLexer(Lexer):
.. versionchanged:: 2.5
Now defaults to ``True``.
"""
+
name = 'Python console session'
aliases = ['pycon']
mimetypes = ['text/x-python-doctest']
def __init__(self, **options):
- self.python3 = get_bool_opt(options, 'python3', True)
- Lexer.__init__(self, **options)
-
- def get_tokens_unprocessed(self, text):
- if self.python3:
- pylexer = PythonLexer(**self.options)
- tblexer = PythonTracebackLexer(**self.options)
+ python3 = get_bool_opt(options, 'python3', True)
+ if python3:
+ pylexer = PythonLexer
+ tblexer = PythonTracebackLexer
else:
- pylexer = Python2Lexer(**self.options)
- tblexer = Python2TracebackLexer(**self.options)
-
- curcode = ''
- insertions = []
- curtb = ''
- tbindex = 0
- in_tb = False
- for match in line_re.finditer(text):
- line = match.group()
- if line.startswith('>>> ') or line.startswith('... '):
- in_tb = False
- insertions.append((len(curcode),
- [(0, Generic.Prompt, line[:4])]))
- curcode += line[4:]
- elif line.rstrip() == '...' and not in_tb:
- # only a new >>> prompt can end an exception block
- # otherwise an ellipsis in place of the traceback frames
- # will be mishandled
- insertions.append((len(curcode),
- [(0, Generic.Prompt, '...')]))
- curcode += line[3:]
- else:
- if curcode:
- yield from do_insertions(
- insertions, pylexer.get_tokens_unprocessed(curcode))
- curcode = ''
- insertions = []
- if in_tb:
- curtb += line
- if not (line.startswith(' ') or line.strip() == '...'):
- in_tb = False
- for i, t, v in tblexer.get_tokens_unprocessed(curtb):
- yield tbindex+i, t, v
- curtb = ''
- elif (line.startswith('Traceback (most recent call last):') or
- re.match(' File "[^"]+", line \\d+\\n$', line)):
- in_tb = True
- curtb = line
- tbindex = match.start()
- elif line == 'KeyboardInterrupt\n':
- yield match.start(), Name.Class, line
- else:
- yield match.start(), Generic.Output, line
- if curcode:
- yield from do_insertions(insertions,
- pylexer.get_tokens_unprocessed(curcode))
- if curtb:
- for i, t, v in tblexer.get_tokens_unprocessed(curtb):
- yield tbindex+i, t, v
-
+ pylexer = Python2Lexer
+ tblexer = Python2TracebackLexer
+ # We have two auxiliary lexers. Use DelegatingLexer twice with
+ # different tokens. TODO: DelegatingLexer should support this
+ # directly, by accepting a tuplet of auxiliary lexers and a tuple of
+ # distinguishing tokens. Then we wouldn't need this intermediary
+ # class.
+ class _ReplaceInnerCode(DelegatingLexer):
+ def __init__(self, **options):
+ super().__init__(pylexer, _PythonConsoleLexerBase, Other.Code, **options)
+ super().__init__(tblexer, _ReplaceInnerCode, Other.Traceback, **options)
class PythonTracebackLexer(RegexLexer):
"""
@@ -743,7 +736,7 @@ class PythonTracebackLexer(RegexLexer):
tokens = {
'root': [
(r'\n', Whitespace),
- (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
+ (r'^(\^C)?Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
(r'^During handling of the above exception, another '
r'exception occurred:\n\n', Generic.Traceback),
(r'^The above exception was the direct cause of the '
diff --git a/tests/examplefiles/pycon/pycon_ctrlc_traceback.output b/tests/examplefiles/pycon/pycon_ctrlc_traceback.output
index 6bdb3e01..efe5d35e 100644
--- a/tests/examplefiles/pycon/pycon_ctrlc_traceback.output
+++ b/tests/examplefiles/pycon/pycon_ctrlc_traceback.output
@@ -98,7 +98,9 @@
'Exception' Generic.Error
'\n' Text.Whitespace
-'>>>\n' Generic.Output
+'' Text
+'>>>' Generic.Prompt
+'\n' Text.Whitespace
'>>> ' Generic.Prompt
'while' Keyword
@@ -115,14 +117,22 @@
'...' Generic.Prompt
'\n' Text.Whitespace
-'^CTraceback (most recent call last):\n' Generic.Output
+'^CTraceback (most recent call last):\n' Generic.Traceback
-' File "<stdin>", line 1, in <module>\n' Generic.Output
+' File ' Text
+'"<stdin>"' Name.Builtin
+', line ' Text
+'1' Literal.Number
+', in ' Text
+'<module>' Name
+'\n' Text.Whitespace
-'KeyboardInterrupt\n' Name.Class
+'KeyboardInterrupt' Name.Class
+'\n' Text.Whitespace
-'\n' Generic.Output
+'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'class' Keyword
' ' Text
@@ -229,11 +239,9 @@
'first' Name
'\n' Text.Whitespace
-'\n' Generic.Output
-
-'During handling of the above exception, another exception occurred:\n' Generic.Output
+'\n' Text.Whitespace
-'\n' Generic.Output
+'During handling of the above exception, another exception occurred:\n\n' Generic.Traceback
'Traceback (most recent call last):\n' Generic.Traceback
@@ -250,15 +258,15 @@
'second' Name
'\n' Text.Whitespace
-'\n' Generic.Output
+'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'x' Name
' ' Text
'=' Operator
'\n' Text.Whitespace
-'' Generic.Traceback
' File ' Text
'"<stdin>"' Name.Builtin
', line ' Text
@@ -280,7 +288,9 @@
'invalid syntax' Name
'\n' Text.Whitespace
-'>>>\n' Generic.Output
+'' Text
+'>>>' Generic.Prompt
+'\n' Text.Whitespace
'\n' Generic.Output
@@ -360,12 +370,13 @@
'multi' Name
'\n' Text.Whitespace
-' line\n' Generic.Output
+' line\n' Other
-'detail\n' Generic.Output
+'detail\n' Other
-'\n' Generic.Output
+'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'raise' Keyword
' ' Text
@@ -394,12 +405,13 @@
'multi' Name
'\n' Text.Whitespace
-' line\n' Generic.Output
+' line\n' Other
-'detail\n' Generic.Output
+'detail\n' Other
-'\n' Generic.Output
+'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'raise' Keyword
' ' Text
@@ -429,12 +441,13 @@
'multi' Name
'\n' Text.Whitespace
-' line\n' Generic.Output
+' line\n' Other
-'detail\n' Generic.Output
+'detail\n' Other
-'\n' Generic.Output
+'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'raise' Keyword
' ' Text
@@ -465,12 +478,13 @@
'multi' Name
'\n' Text.Whitespace
-' line\n' Generic.Output
+' line\n' Other
-'detail\n' Generic.Output
+'detail\n' Other
-'\n' Generic.Output
+'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'raise' Keyword
' ' Text
@@ -497,12 +511,13 @@
'multi' Name
'\n' Text.Whitespace
-' line\n' Generic.Output
+' line\n' Other
-'detail\n' Generic.Output
+'detail\n' Other
-'\n' Generic.Output
+'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'raise' Keyword
' ' Text
@@ -522,6 +537,7 @@
'Exception' Generic.Error
'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'import' Keyword.Namespace
' ' Text
diff --git a/tests/examplefiles/pycon/pycon_test.pycon.output b/tests/examplefiles/pycon/pycon_test.pycon.output
index 30946876..66dcca03 100644
--- a/tests/examplefiles/pycon/pycon_test.pycon.output
+++ b/tests/examplefiles/pycon/pycon_test.pycon.output
@@ -2,7 +2,6 @@
':' Punctuation
'\n' Text.Whitespace
-'' Generic.Traceback
' File ' Text
'"<stdin>"' Name.Builtin
', line ' Text
@@ -22,12 +21,14 @@
'invalid syntax' Name
'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'\n' Text.Whitespace
-'KeyboardInterrupt\n' Name.Class
+'KeyboardInterrupt\n' Generic.Output
-'>>>\n' Generic.Output
+'>>>' Generic.Prompt
+'\n' Text.Whitespace
'\n' Generic.Output
@@ -46,8 +47,9 @@
'ZeroDivisionError' Generic.Error
'\n' Text.Whitespace
-'\n' Generic.Output
+'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'1' Literal.Number.Integer
'/' Operator
diff --git a/tests/snippets/pycon/broken_tb.txt b/tests/snippets/pycon/broken_tb.txt
index 6cecf761..f3f6f5e6 100644
--- a/tests/snippets/pycon/broken_tb.txt
+++ b/tests/snippets/pycon/broken_tb.txt
@@ -54,8 +54,9 @@ SyntaxError: EOL while scanning string literal
'EOL while scanning string literal' Name
'\n' Text.Whitespace
-'\n' Generic.Output
+'\n' Text.Whitespace
+'' Text
'>>> ' Generic.Prompt
'exec' Name
'(' Punctuation
diff --git a/tests/snippets/pycon/multiple_tb.txt b/tests/snippets/pycon/multiple_tb.txt
index c8b71fc0..9461be0d 100644
--- a/tests/snippets/pycon/multiple_tb.txt
+++ b/tests/snippets/pycon/multiple_tb.txt
@@ -103,6 +103,6 @@ AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_i
"Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>" Name
'\n' Text.Whitespace
-"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Generic.Output
+"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Other
-"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Generic.Output
+"AttributeError: Can't get attribute 'f' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>\n" Other
diff --git a/tests/snippets/pycon/unterminated_tb.txt b/tests/snippets/pycon/unterminated_tb.txt
new file mode 100644
index 00000000..9173ec04
--- /dev/null
+++ b/tests/snippets/pycon/unterminated_tb.txt
@@ -0,0 +1,17 @@
+---input---
+>>> unterminated_traceback()
+Traceback (most recent call last):
+>>>
+
+---tokens---
+'>>> ' Generic.Prompt
+'unterminated_traceback' Name
+'(' Punctuation
+')' Punctuation
+'\n' Text.Whitespace
+
+'Traceback (most recent call last):\n' Generic.Traceback
+
+'' Text
+'>>>' Generic.Prompt
+'\n' Text.Whitespace