Improve whitespace handling in Python.

This triggers a new case in the HtmlFormatter, which emits an empty span at the end of the line for a new line, as those are removed by the split-by-parts code. This requires separate post-processing. Doesn't fix all whitespace issues with Python either, but we're done to 360 failing examples with that, from previously >400.
author: Matthäus G. Chajdas <dev@anteru.net> 2022-12-11 12:52:23 +0100
committer: Matthäus G. Chajdas <dev@anteru.net> 2022-12-11 12:52:23 +0100
commit: 147b22face65617514ccfa8512c6b097b07cad4c (patch)
tree: 56a06e1627d9e159f84becb4664d0ec46788a8f5 /pygments/lexers/python.py
parent: 956518d6d6b62e755f8a3869c5cb143a243fdc4d (diff)
download: pygments-git-147b22face65617514ccfa8512c6b097b07cad4c.tar.gz
1 files changed, 20 insertions, 20 deletions
diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py
index 64f260d1..0a318a9e 100644
--- a/pygments/lexers/python.py
+++ b/pygments/lexers/python.py
@@ -100,11 +100,11 @@ class PythonLexer(RegexLexer):
 
     tokens = {
         'root': [
-            (r'\n', Text),
+            (r'\n', Whitespace),
             (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
-             bygroups(Text, String.Affix, String.Doc)),
+             bygroups(Whitespace, String.Affix, String.Doc)),
             (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
-             bygroups(Text, String.Affix, String.Doc)),
+             bygroups(Whitespace, String.Affix, String.Doc)),
             (r'\A#!.+$', Comment.Hashbang),
             (r'#.*$', Comment.Single),
             (r'\\\n', Text),
@@ -192,13 +192,13 @@ class PythonLexer(RegexLexer):
             (r'(=\s*)?'         # debug (https://bugs.python.org/issue36817)
              r'(\![sraf])?'     # conversion
              r':', String.Interpol, '#pop'),
-            (r'\s+', Text),  # allow new lines
+            (r'\s+', Whitespace),  # allow new lines
             include('expr'),
         ],
         'expr-inside-fstring-inner': [
             (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
             (r'[])}]', Punctuation, '#pop'),
-            (r'\s+', Text),  # allow new lines
+            (r'\s+', Whitespace),  # allow new lines
             include('expr'),
         ],
         'expr-keywords': [
@@ -229,7 +229,7 @@ class PythonLexer(RegexLexer):
         ],
         'soft-keywords-inner': [
             # optional `_` keyword
-            (r'(\s+)([^\n_]*)(_\b)', bygroups(Text, using(this), Keyword)),
+            (r'(\s+)([^\n_]*)(_\b)', bygroups(Whitespace, using(this), Keyword)),
             default('#pop')
         ],
         'builtins': [
@@ -445,11 +445,11 @@ class Python2Lexer(RegexLexer):
 
     tokens = {
         'root': [
-            (r'\n', Text),
+            (r'\n', Whitespace),
             (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
-             bygroups(Text, String.Affix, String.Doc)),
+             bygroups(Whitespace, String.Affix, String.Doc)),
             (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
-             bygroups(Text, String.Affix, String.Doc)),
+             bygroups(Whitespace, String.Affix, String.Doc)),
             (r'[^\S\n]+', Text),
             (r'\A#!.+$', Comment.Hashbang),
             (r'#.*$', Comment.Single),
@@ -742,7 +742,7 @@ class PythonTracebackLexer(RegexLexer):
 
     tokens = {
         'root': [
-            (r'\n', Text),
+            (r'\n', Whitespace),
             (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
             (r'^During handling of the above exception, another '
              r'exception occurred:\n\n', Generic.Traceback),
@@ -808,17 +808,17 @@ class Python2TracebackLexer(RegexLexer):
         ],
         'intb': [
             (r'^(  File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
-             bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
+             bygroups(Text, Name.Builtin, Text, Number, Text, Name, Whitespace)),
             (r'^(  File )("[^"]+")(, line )(\d+)(\n)',
-             bygroups(Text, Name.Builtin, Text, Number, Text)),
+             bygroups(Text, Name.Builtin, Text, Number, Whitespace)),
             (r'^(    )(.+)(\n)',
-             bygroups(Text, using(Python2Lexer), Text), 'marker'),
+             bygroups(Text, using(Python2Lexer), Whitespace), 'marker'),
             (r'^([ \t]*)(\.\.\.)(\n)',
-             bygroups(Text, Comment, Text)),  # for doctests...
+             bygroups(Text, Comment, Whitespace)),  # for doctests...
             (r'^([^:]+)(: )(.+)(\n)',
-             bygroups(Generic.Error, Text, Name, Text), '#pop'),
+             bygroups(Generic.Error, Text, Name, Whitespace), '#pop'),
             (r'^([a-zA-Z_]\w*)(:?\n)',
-             bygroups(Generic.Error, Text), '#pop')
+             bygroups(Generic.Error, Whitespace), '#pop')
         ],
         'marker': [
             # For syntax errors.
@@ -843,13 +843,13 @@ class CythonLexer(RegexLexer):
 
     tokens = {
         'root': [
-            (r'\n', Text),
-            (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
-            (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
+            (r'\n', Whitespace),
+            (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Whitespace, String.Doc)),
+            (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Whitespace, String.Doc)),
             (r'[^\S\n]+', Text),
             (r'#.*$', Comment),
             (r'[]{}:(),;[]', Punctuation),
-            (r'\\\n', Text),
+            (r'\\\n', Whitespace),
             (r'\\', Text),
             (r'(in|is|and|or|not)\b', Operator.Word),
             (r'(<)([a-zA-Z0-9.?]+)(>)',
author	Matthäus G. Chajdas <dev@anteru.net>	2022-12-11 12:52:23 +0100
committer	Matthäus G. Chajdas <dev@anteru.net>	2022-12-11 12:52:23 +0100
commit	147b22face65617514ccfa8512c6b097b07cad4c (patch)
tree	56a06e1627d9e159f84becb4664d0ec46788a8f5 /pygments/lexers/python.py
parent	956518d6d6b62e755f8a3869c5cb143a243fdc4d (diff)
download	pygments-git-147b22face65617514ccfa8512c6b097b07cad4c.tar.gz