summaryrefslogtreecommitdiff
path: root/pygments/lexers/python.py
diff options
context:
space:
mode:
authoramitkummer <amit.kummer@gmail.com>2021-12-20 19:49:33 +0200
committeramitkummer <amit.kummer@gmail.com>2021-12-20 20:22:50 +0200
commit40de02ec6f1ee25efaeec80ad2964c84caa43b92 (patch)
treea3edf107449c6ce9a7431e74d42462d01f124c0b /pygments/lexers/python.py
parent8630e033313647d9579e314f3e8e2882f4558933 (diff)
downloadpygments-git-40de02ec6f1ee25efaeec80ad2964c84caa43b92.tar.gz
Python: lex soft keywords
Some notes: - This approach is not perfect, but it's rather simple and I can't think of an edge case. - I did not use the `words` function to create the regex matching the keywords list, because it returns a capturing group (`()`) and it needs to be non-capturing here (because of `bygroups` usage). - I chose to go to the 'soft-keywords-inner' state after both `match` and `case`, even though it's unnecessary for `match` (the inner state catches the `_` wildcard keyword which appears only after a `case`). This is mostly harmless and saves us from writing the 'soft-keywords' regex twice each for `match` and `case` with the extra inner state just for `case`. The only piece of code this will lex incorrectly is `match _:` (`_` will be lexed as keyword). I doubt though that pattern mathcing will be used like this.
Diffstat (limited to 'pygments/lexers/python.py')
-rw-r--r--pygments/lexers/python.py18
1 files changed, 17 insertions, 1 deletions
diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py
index f2773e99..8d478c2a 100644
--- a/pygments/lexers/python.py
+++ b/pygments/lexers/python.py
@@ -9,9 +9,10 @@
"""
import re
+import keyword
from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
- default, words, combined, do_insertions
+ default, words, combined, do_insertions, this
from pygments.util import get_bool_opt, shebang_matches
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Generic, Other, Error
@@ -110,6 +111,7 @@ class PythonLexer(RegexLexer):
(r'\\\n', Text),
(r'\\', Text),
include('keywords'),
+ include('soft-keywords'),
(r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
(r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
(r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
@@ -207,6 +209,20 @@ class PythonLexer(RegexLexer):
Keyword),
(words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
],
+ 'soft-keywords': [
+ # `match`, `case` and `_` soft keywords
+ (r'(^[ \t]*)' # at beginning of line + possible indentation
+ r'(match|case)\b' # a possible keyword
+ r'(?![ \t]*(?:' # not followed by...
+ r'[:,;=^&|@~)\]}]|(?:' + # characters and keywords that mean this isn't
+ r'|'.join(keyword.kwlist) + r')\b))', # pattern matching
+ bygroups(Text, Keyword), 'soft-keywords-inner'),
+ ],
+ 'soft-keywords-inner': [
+ # optional `_` keyword
+ (r'(\s+)([^\n_]*)(_\b)', bygroups(Text, using(this), Keyword)),
+ default('#pop')
+ ],
'builtins': [
(words((
'__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray',