summaryrefslogtreecommitdiff
path: root/pygments/lexers/python.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/lexers/python.py')
-rw-r--r--pygments/lexers/python.py166
1 files changed, 126 insertions, 40 deletions
diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py
index dee8e6c7..7601afa8 100644
--- a/pygments/lexers/python.py
+++ b/pygments/lexers/python.py
@@ -39,7 +39,7 @@ class PythonLexer(RegexLexer):
return [
# the old style '%s' % (...) string formatting
(r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
- '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
+ '[hlL]?[E-GXc-giorsux%]', String.Interpol),
# backslashes, quotes and formatting signs must be parsed one at a time
(r'[^\\\'"%\n]+', ttype),
(r'[\'"\\]', ttype),
@@ -51,8 +51,10 @@ class PythonLexer(RegexLexer):
tokens = {
'root': [
(r'\n', Text),
- (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
- (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
+ (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
+ bygroups(Text, String.Affix, String.Doc)),
+ (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
+ bygroups(Text, String.Affix, String.Doc)),
(r'[^\S\n]+', Text),
(r'\A#!.+$', Comment.Hashbang),
(r'#.*$', Comment.Single),
@@ -69,15 +71,25 @@ class PythonLexer(RegexLexer):
(r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
'import'),
include('builtins'),
+ include('magicfuncs'),
+ include('magicvars'),
include('backtick'),
- ('(?:[rR]|[uU][rR]|[rR][uU])"""', String.Double, 'tdqs'),
- ("(?:[rR]|[uU][rR]|[rR][uU])'''", String.Single, 'tsqs'),
- ('(?:[rR]|[uU][rR]|[rR][uU])"', String.Double, 'dqs'),
- ("(?:[rR]|[uU][rR]|[rR][uU])'", String.Single, 'sqs'),
- ('[uU]?"""', String.Double, combined('stringescape', 'tdqs')),
- ("[uU]?'''", String.Single, combined('stringescape', 'tsqs')),
- ('[uU]?"', String.Double, combined('stringescape', 'dqs')),
- ("[uU]?'", String.Single, combined('stringescape', 'sqs')),
+ ('([rR]|[uUbB][rR]|[rR][uUbB])(""")',
+ bygroups(String.Affix, String.Double), 'tdqs'),
+ ("([rR]|[uUbB][rR]|[rR][uUbB])(''')",
+ bygroups(String.Affix, String.Single), 'tsqs'),
+ ('([rR]|[uUbB][rR]|[rR][uUbB])(")',
+ bygroups(String.Affix, String.Double), 'dqs'),
+ ("([rR]|[uUbB][rR]|[rR][uUbB])(')",
+ bygroups(String.Affix, String.Single), 'sqs'),
+ ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
+ combined('stringescape', 'tdqs')),
+ ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
+ combined('stringescape', 'tsqs')),
+ ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
+ combined('stringescape', 'dqs')),
+ ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
+ combined('stringescape', 'sqs')),
include('name'),
include('numbers'),
],
@@ -123,6 +135,37 @@ class PythonLexer(RegexLexer):
'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
Name.Exception),
],
+ 'magicfuncs': [
+ (words((
+ '__abs__', '__add__', '__and__', '__call__', '__cmp__', '__coerce__',
+ '__complex__', '__contains__', '__del__', '__delattr__', '__delete__',
+ '__delitem__', '__delslice__', '__div__', '__divmod__', '__enter__',
+ '__eq__', '__exit__', '__float__', '__floordiv__', '__ge__', '__get__',
+ '__getattr__', '__getattribute__', '__getitem__', '__getslice__', '__gt__',
+ '__hash__', '__hex__', '__iadd__', '__iand__', '__idiv__', '__ifloordiv__',
+ '__ilshift__', '__imod__', '__imul__', '__index__', '__init__',
+ '__instancecheck__', '__int__', '__invert__', '__iop__', '__ior__',
+ '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__',
+ '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__',
+ '__missing__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__',
+ '__nonzero__', '__oct__', '__op__', '__or__', '__pos__', '__pow__',
+ '__radd__', '__rand__', '__rcmp__', '__rdiv__', '__rdivmod__', '__repr__',
+ '__reversed__', '__rfloordiv__', '__rlshift__', '__rmod__', '__rmul__',
+ '__rop__', '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__',
+ '__rtruediv__', '__rxor__', '__set__', '__setattr__', '__setitem__',
+ '__setslice__', '__str__', '__sub__', '__subclasscheck__', '__truediv__',
+ '__unicode__', '__xor__'), suffix=r'\b'),
+ Name.Function.Magic),
+ ],
+ 'magicvars': [
+ (words((
+ '__bases__', '__class__', '__closure__', '__code__', '__defaults__',
+ '__dict__', '__doc__', '__file__', '__func__', '__globals__',
+ '__metaclass__', '__module__', '__mro__', '__name__', '__self__',
+ '__slots__', '__weakref__'),
+ suffix=r'\b'),
+ Name.Variable.Magic),
+ ],
'numbers': [
(r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
(r'\d+[eE][+-]?[0-9]+j?', Number.Float),
@@ -140,7 +183,9 @@ class PythonLexer(RegexLexer):
('[a-zA-Z_]\w*', Name),
],
'funcname': [
- ('[a-zA-Z_]\w*', Name.Function, '#pop')
+ include('magicfuncs'),
+ ('[a-zA-Z_]\w*', Name.Function, '#pop'),
+ default('#pop'),
],
'classname': [
('[a-zA-Z_]\w*', Name.Class, '#pop')
@@ -213,6 +258,26 @@ class Python3Lexer(RegexLexer):
uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue)
+ def innerstring_rules(ttype):
+ return [
+ # the old style '%s' % (...) string formatting (still valid in Py3)
+ (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
+ '[hlL]?[E-GXc-giorsux%]', String.Interpol),
+ # the new style '{}'.format(...) string formatting
+ (r'\{'
+ '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
+ '(\![sra])?' # conversion
+ '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
+ '\}', String.Interpol),
+
+ # backslashes, quotes and formatting signs must be parsed one at a time
+ (r'[^\\\'"%{\n]+', ttype),
+ (r'[\'"\\]', ttype),
+ # unhandled string formatting sign
+ (r'%|(\{{1,2})', ttype)
+ # newlines are an error (use "nl" state)
+ ]
+
tokens = PythonLexer.tokens.copy()
tokens['keywords'] = [
(words((
@@ -263,6 +328,38 @@ class Python3Lexer(RegexLexer):
prefix=r'(?<!\.)', suffix=r'\b'),
Name.Exception),
]
+ tokens['magicfuncs'] = [
+ (words((
+ '__abs__', '__add__', '__aenter__', '__aexit__', '__aiter__', '__and__',
+ '__anext__', '__await__', '__bool__', '__bytes__', '__call__',
+ '__complex__', '__contains__', '__del__', '__delattr__', '__delete__',
+ '__delitem__', '__dir__', '__divmod__', '__enter__', '__eq__', '__exit__',
+ '__float__', '__floordiv__', '__format__', '__ge__', '__get__',
+ '__getattr__', '__getattribute__', '__getitem__', '__gt__', '__hash__',
+ '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__', '__imatmul__',
+ '__imod__', '__import__', '__imul__', '__index__', '__init__',
+ '__instancecheck__', '__int__', '__invert__', '__ior__', '__ipow__',
+ '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__',
+ '__le__', '__len__', '__length_hint__', '__lshift__', '__lt__',
+ '__matmul__', '__missing__', '__mod__', '__mul__', '__ne__', '__neg__',
+ '__new__', '__next__', '__or__', '__pos__', '__pow__', '__prepare__',
+ '__radd__', '__rand__', '__rdivmod__', '__repr__', '__reversed__',
+ '__rfloordiv__', '__rlshift__', '__rmatmul__', '__rmod__', '__rmul__',
+ '__ror__', '__round__', '__rpow__', '__rrshift__', '__rshift__',
+ '__rsub__', '__rtruediv__', '__rxor__', '__set__', '__setattr__',
+ '__setitem__', '__str__', '__sub__', '__subclasscheck__', '__truediv__',
+ '__xor__'), suffix=r'\b'),
+ Name.Function.Magic),
+ ]
+ tokens['magicvars'] = [
+ (words((
+ '__annotations__', '__bases__', '__class__', '__closure__', '__code__',
+ '__defaults__', '__dict__', '__doc__', '__file__', '__func__',
+ '__globals__', '__kwdefaults__', '__module__', '__mro__', '__name__',
+ '__objclass__', '__qualname__', '__self__', '__slots__', '__weakref__'),
+ suffix=r'\b'),
+ Name.Variable.Magic),
+ ]
tokens['numbers'] = [
(r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
(r'0[oO][0-7]+', Number.Oct),
@@ -295,23 +392,8 @@ class Python3Lexer(RegexLexer):
(uni_name, Name.Namespace),
default('#pop'),
]
- tokens['strings'] = [
- # the old style '%s' % (...) string formatting (still valid in Py3)
- (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
- '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
- # the new style '{}'.format(...) string formatting
- (r'\{'
- '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
- '(\![sra])?' # conversion
- '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[bcdeEfFgGnosxX%]?)?'
- '\}', String.Interpol),
- # backslashes, quotes and formatting signs must be parsed one at a time
- (r'[^\\\'"%\{\n]+', String),
- (r'[\'"\\]', String),
- # unhandled string formatting sign
- (r'%|(\{{1,2})', String)
- # newlines are an error (use "nl" state)
- ]
+ tokens['strings-single'] = innerstring_rules(String.Single)
+ tokens['strings-double'] = innerstring_rules(String.Double)
def analyse_text(text):
return shebang_matches(text, r'pythonw?3(\.\d)?')
@@ -515,6 +597,8 @@ class CythonLexer(RegexLexer):
include('keywords'),
(r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'),
(r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'),
+ # (should actually start a block with only cdefs)
+ (r'(cdef)(:)', bygroups(Keyword, Punctuation)),
(r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'),
(r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'),
(r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'),
@@ -534,7 +618,7 @@ class CythonLexer(RegexLexer):
'keywords': [
(words((
'assert', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif',
- 'else', 'except', 'except?', 'exec', 'finally', 'for', 'gil',
+ 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil',
'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print',
'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'),
Keyword),
@@ -626,7 +710,7 @@ class CythonLexer(RegexLexer):
],
'strings': [
(r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
- '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
+ '[hlL]?[E-GXc-giorsux%]', String.Interpol),
(r'[^\\\'"%\n]+', String),
# quotes, percents and backslashes must be parsed one at a time
(r'[\'"\\]', String),
@@ -697,18 +781,20 @@ class DgLexer(RegexLexer):
(words((
'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'',
'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object',
- 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', 'super',
- 'tuple', 'tuple\'', 'type'), prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
+ 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str',
+ 'super', 'tuple', 'tuple\'', 'type'),
+ prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
Name.Builtin),
(words((
'__import__', 'abs', 'all', 'any', 'bin', 'bind', 'chr', 'cmp', 'compile',
'complex', 'delattr', 'dir', 'divmod', 'drop', 'dropwhile', 'enumerate',
- 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst', 'getattr',
- 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init', 'input',
- 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len', 'locals',
- 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow', 'print', 'repr',
- 'reversed', 'round', 'setattr', 'scanl1?', 'snd', 'sorted', 'sum', 'tail',
- 'take', 'takewhile', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
+ 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst',
+ 'getattr', 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init',
+ 'input', 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len',
+ 'locals', 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow',
+ 'print', 'repr', 'reversed', 'round', 'setattr', 'scanl1?', 'snd',
+ 'sorted', 'sum', 'tail', 'take', 'takewhile', 'vars', 'zip'),
+ prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
Name.Builtin),
(r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])",
Name.Builtin.Pseudo),
@@ -734,7 +820,7 @@ class DgLexer(RegexLexer):
],
'string': [
(r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
- '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
+ '[hlL]?[E-GXc-giorsux%]', String.Interpol),
(r'[^\\\'"%\n]+', String),
# quotes, percents and backslashes must be parsed one at a time
(r'[\'"\\]', String),