summaryrefslogtreecommitdiff
path: root/pygments/lexers/python.py
diff options
context:
space:
mode:
authorMatthäus G. Chajdas <dev@anteru.net>2022-12-11 12:52:23 +0100
committerMatthäus G. Chajdas <dev@anteru.net>2022-12-11 12:52:23 +0100
commit147b22face65617514ccfa8512c6b097b07cad4c (patch)
tree56a06e1627d9e159f84becb4664d0ec46788a8f5 /pygments/lexers/python.py
parent956518d6d6b62e755f8a3869c5cb143a243fdc4d (diff)
downloadpygments-git-147b22face65617514ccfa8512c6b097b07cad4c.tar.gz
Improve whitespace handling in Python.
This triggers a new case in the HtmlFormatter, which emits an empty span at the end of the line for a new line, as those are removed by the split-by-parts code. This requires separate post-processing. Doesn't fix all whitespace issues with Python either, but we're done to 360 failing examples with that, from previously >400.
Diffstat (limited to 'pygments/lexers/python.py')
-rw-r--r--pygments/lexers/python.py40
1 files changed, 20 insertions, 20 deletions
diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py
index 64f260d1..0a318a9e 100644
--- a/pygments/lexers/python.py
+++ b/pygments/lexers/python.py
@@ -100,11 +100,11 @@ class PythonLexer(RegexLexer):
tokens = {
'root': [
- (r'\n', Text),
+ (r'\n', Whitespace),
(r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
- bygroups(Text, String.Affix, String.Doc)),
+ bygroups(Whitespace, String.Affix, String.Doc)),
(r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
- bygroups(Text, String.Affix, String.Doc)),
+ bygroups(Whitespace, String.Affix, String.Doc)),
(r'\A#!.+$', Comment.Hashbang),
(r'#.*$', Comment.Single),
(r'\\\n', Text),
@@ -192,13 +192,13 @@ class PythonLexer(RegexLexer):
(r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
r'(\![sraf])?' # conversion
r':', String.Interpol, '#pop'),
- (r'\s+', Text), # allow new lines
+ (r'\s+', Whitespace), # allow new lines
include('expr'),
],
'expr-inside-fstring-inner': [
(r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
(r'[])}]', Punctuation, '#pop'),
- (r'\s+', Text), # allow new lines
+ (r'\s+', Whitespace), # allow new lines
include('expr'),
],
'expr-keywords': [
@@ -229,7 +229,7 @@ class PythonLexer(RegexLexer):
],
'soft-keywords-inner': [
# optional `_` keyword
- (r'(\s+)([^\n_]*)(_\b)', bygroups(Text, using(this), Keyword)),
+ (r'(\s+)([^\n_]*)(_\b)', bygroups(Whitespace, using(this), Keyword)),
default('#pop')
],
'builtins': [
@@ -445,11 +445,11 @@ class Python2Lexer(RegexLexer):
tokens = {
'root': [
- (r'\n', Text),
+ (r'\n', Whitespace),
(r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
- bygroups(Text, String.Affix, String.Doc)),
+ bygroups(Whitespace, String.Affix, String.Doc)),
(r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
- bygroups(Text, String.Affix, String.Doc)),
+ bygroups(Whitespace, String.Affix, String.Doc)),
(r'[^\S\n]+', Text),
(r'\A#!.+$', Comment.Hashbang),
(r'#.*$', Comment.Single),
@@ -742,7 +742,7 @@ class PythonTracebackLexer(RegexLexer):
tokens = {
'root': [
- (r'\n', Text),
+ (r'\n', Whitespace),
(r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
(r'^During handling of the above exception, another '
r'exception occurred:\n\n', Generic.Traceback),
@@ -808,17 +808,17 @@ class Python2TracebackLexer(RegexLexer):
],
'intb': [
(r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
- bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
+ bygroups(Text, Name.Builtin, Text, Number, Text, Name, Whitespace)),
(r'^( File )("[^"]+")(, line )(\d+)(\n)',
- bygroups(Text, Name.Builtin, Text, Number, Text)),
+ bygroups(Text, Name.Builtin, Text, Number, Whitespace)),
(r'^( )(.+)(\n)',
- bygroups(Text, using(Python2Lexer), Text), 'marker'),
+ bygroups(Text, using(Python2Lexer), Whitespace), 'marker'),
(r'^([ \t]*)(\.\.\.)(\n)',
- bygroups(Text, Comment, Text)), # for doctests...
+ bygroups(Text, Comment, Whitespace)), # for doctests...
(r'^([^:]+)(: )(.+)(\n)',
- bygroups(Generic.Error, Text, Name, Text), '#pop'),
+ bygroups(Generic.Error, Text, Name, Whitespace), '#pop'),
(r'^([a-zA-Z_]\w*)(:?\n)',
- bygroups(Generic.Error, Text), '#pop')
+ bygroups(Generic.Error, Whitespace), '#pop')
],
'marker': [
# For syntax errors.
@@ -843,13 +843,13 @@ class CythonLexer(RegexLexer):
tokens = {
'root': [
- (r'\n', Text),
- (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
- (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
+ (r'\n', Whitespace),
+ (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Whitespace, String.Doc)),
+ (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Whitespace, String.Doc)),
(r'[^\S\n]+', Text),
(r'#.*$', Comment),
(r'[]{}:(),;[]', Punctuation),
- (r'\\\n', Text),
+ (r'\\\n', Whitespace),
(r'\\', Text),
(r'(in|is|and|or|not)\b', Operator.Word),
(r'(<)([a-zA-Z0-9.?]+)(>)',