Python f-strings: highlight expressions in curly braces

Fixes #1228
author: Tomer Vromen <tomer.vromen@intel.com> 2020-02-04 20:34:36 +0200
committer: Tomer Vromen <tomer.vromen@intel.com> 2020-02-04 20:34:36 +0200
commit: f7b14bcaaae689e243bb379713bbd28686ee7e19 (patch)
tree: f38534ed868035f2d889b28a2df8dc713a716d66 /pygments/lexers/python.py
parent: 55193857195b6a36c70708fadbe01f3ed61c0a88 (diff)
download: pygments-git-f7b14bcaaae689e243bb379713bbd28686ee7e19.tar.gz
1 files changed, 106 insertions, 15 deletions
diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py
index 8e1debac..24067d59 100644
--- a/pygments/lexers/python.py
+++ b/pygments/lexers/python.py
@@ -85,6 +85,20 @@ class PythonLexer(RegexLexer):
             # newlines are an error (use "nl" state)
         ]
 
+    def fstring_rules(ttype):
+        return [
+            # Assuming that a '}' is the closing brace after format specifier.
+            # Sadly, this means that we won't detect syntax error. But it's
+            # more important to parse correct syntax correctly, than to
+            # highlight invalid syntax.
+            (r'\}', String.Interpol),
+            (r'\{', String.Interpol, 'expr-inside-fstring'),
+            # backslashes, quotes and formatting signs must be parsed one at a time
+            (r'[^\\\'"{}\n]+', ttype),
+            (r'[\'"\\]', ttype),
+            # newlines are an error (use "nl" state)
+        ]
+
     tokens = {
         'root': [
             (r'\n', Text),
@@ -92,14 +106,10 @@ class PythonLexer(RegexLexer):
              bygroups(Text, String.Affix, String.Doc)),
             (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
              bygroups(Text, String.Affix, String.Doc)),
-            (r'[^\S\n]+', Text),
             (r'\A#!.+$', Comment.Hashbang),
             (r'#.*$', Comment.Single),
-            (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator),
-            (r'[]{}:(),;[]', Punctuation),
             (r'\\\n', Text),
             (r'\\', Text),
-            (r'(in|is|and|or|not)\b', Operator.Word),
             include('keywords'),
             (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
             (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
@@ -107,30 +117,84 @@ class PythonLexer(RegexLexer):
              'fromimport'),
             (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
              'import'),
-            include('builtins'),
-            include('magicfuncs'),
-            include('magicvars'),
+            include('expr'),
+        ],
+        'expr': [
+            # raw f-strings
+            ('(?i)(rf|fr)(""")',
+             bygroups(String.Affix, String.Double), 'tdqf'),
+            ("(?i)(rf|fr)(''')",
+             bygroups(String.Affix, String.Single), 'tsqf'),
+            ('(?i)(rf|fr)(")',
+             bygroups(String.Affix, String.Double), 'dqf'),
+            ("(?i)(rf|fr)(')",
+             bygroups(String.Affix, String.Single), 'sqf'),
+            # non-raw f-strings
+            ('([fF])(""")', bygroups(String.Affix, String.Double),
+             combined('fstringescape', 'tdqf')),
+            ("([fF])(''')", bygroups(String.Affix, String.Single),
+             combined('fstringescape', 'tsqf')),
+            ('([fF])(")', bygroups(String.Affix, String.Double),
+             combined('fstringescape', 'dqf')),
+            ("([fF])(')", bygroups(String.Affix, String.Single),
+             combined('fstringescape', 'sqf')),
             # raw strings
-            ('(?i)(rb|br|fr|rf|r)(""")',
+            ('(?i)(rb|br|r)(""")',
              bygroups(String.Affix, String.Double), 'tdqs'),
-            ("(?i)(rb|br|fr|rf|r)(''')",
+            ("(?i)(rb|br|r)(''')",
              bygroups(String.Affix, String.Single), 'tsqs'),
-            ('(?i)(rb|br|fr|rf|r)(")',
+            ('(?i)(rb|br|r)(")',
              bygroups(String.Affix, String.Double), 'dqs'),
-            ("(?i)(rb|br|fr|rf|r)(')",
+            ("(?i)(rb|br|r)(')",
              bygroups(String.Affix, String.Single), 'sqs'),
             # non-raw strings
-            ('([uUbBfF]?)(""")', bygroups(String.Affix, String.Double),
+            ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
              combined('stringescape', 'tdqs')),
-            ("([uUbBfF]?)(''')", bygroups(String.Affix, String.Single),
+            ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
              combined('stringescape', 'tsqs')),
-            ('([uUbBfF]?)(")', bygroups(String.Affix, String.Double),
+            ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
              combined('stringescape', 'dqs')),
-            ("([uUbBfF]?)(')", bygroups(String.Affix, String.Single),
+            ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
              combined('stringescape', 'sqs')),
+            (r'[^\S\n]+', Text),
+            (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator),
+            (r'[]{}:(),;[]', Punctuation),
+            (r'(in|is|and|or|not)\b', Operator.Word),
+            include('expr-keywords'),
+            include('builtins'),
+            include('magicfuncs'),
+            include('magicvars'),
             include('name'),
             include('numbers'),
         ],
+        'expr-inside-fstring': [
+            (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
+            # without format specifier
+            (r'(=\s*)?'         # debug (https://bugs.python.org/issue36817)
+             r'(\![sraf])?'     # conversion
+             r'}', String.Interpol, '#pop'),
+            # with format specifier
+            # we'll catch the remaining '}' in the outer scope
+            (r'(=\s*)?'         # debug (https://bugs.python.org/issue36817)
+             r'(\![sraf])?'     # conversion
+             r':', String.Interpol, '#pop'),
+            (r'[^\S]+', Text),  # allow new lines
+            include('expr'),
+        ],
+        'expr-inside-fstring-inner': [
+            (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
+            (r'[])}]', Punctuation, '#pop'),
+            (r'[^\S]+', Text),  # allow new lines
+            include('expr'),
+        ],
+        'expr-keywords': [
+            # Based on https://docs.python.org/3/reference/expressions.html
+            (words((
+                'async for', 'await', 'else', 'for', 'if', 'lambda',
+                'yield', 'yield from'), suffix=r'\b'),
+             Keyword),
+            (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
+        ],
         'keywords': [
             (words((
                 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif',
@@ -252,12 +316,29 @@ class PythonLexer(RegexLexer):
             (uni_name, Name.Namespace),
             default('#pop'),
         ],
+        'fstringescape': [
+            ('{{', String.Escape),
+            ('}}', String.Escape),
+            include('stringescape'),
+        ],
         'stringescape': [
             (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
              r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
         ],
+        'fstrings-single': fstring_rules(String.Single),
+        'fstrings-double': fstring_rules(String.Double),
         'strings-single': innerstring_rules(String.Single),
         'strings-double': innerstring_rules(String.Double),
+        'dqf': [
+            (r'"', String.Double, '#pop'),
+            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
+            include('fstrings-double')
+        ],
+        'sqf': [
+            (r"'", String.Single, '#pop'),
+            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
+            include('fstrings-single')
+        ],
         'dqs': [
             (r'"', String.Double, '#pop'),
             (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
@@ -268,6 +349,16 @@ class PythonLexer(RegexLexer):
             (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
             include('strings-single')
         ],
+        'tdqf': [
+            (r'"""', String.Double, '#pop'),
+            include('fstrings-double'),
+            (r'\n', String.Double)
+        ],
+        'tsqf': [
+            (r"'''", String.Single, '#pop'),
+            include('fstrings-single'),
+            (r'\n', String.Single)
+        ],
         'tdqs': [
             (r'"""', String.Double, '#pop'),
             include('strings-double'),
author	Tomer Vromen <tomer.vromen@intel.com>	2020-02-04 20:34:36 +0200
committer	Tomer Vromen <tomer.vromen@intel.com>	2020-02-04 20:34:36 +0200
commit	f7b14bcaaae689e243bb379713bbd28686ee7e19 (patch)
tree	f38534ed868035f2d889b28a2df8dc713a716d66 /pygments/lexers/python.py
parent	55193857195b6a36c70708fadbe01f3ed61c0a88 (diff)
download	pygments-git-f7b14bcaaae689e243bb379713bbd28686ee7e19.tar.gz