diff options
-rw-r--r-- | pygments/formatters/html.py | 3 | ||||
-rw-r--r-- | pygments/formatters/latex.py | 4 | ||||
-rw-r--r-- | pygments/formatters/other.py | 4 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 235 | ||||
-rw-r--r-- | pygments/lexers/asm.py | 4 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 155 | ||||
-rw-r--r-- | pygments/lexers/dalvik.py | 8 | ||||
-rw-r--r-- | pygments/lexers/dotnet.py | 35 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 64 | ||||
-rw-r--r-- | pygments/lexers/graph.py | 2 | ||||
-rw-r--r-- | pygments/lexers/hdl.py | 34 | ||||
-rw-r--r-- | pygments/lexers/inferno.py | 4 | ||||
-rw-r--r-- | pygments/lexers/jvm.py | 134 | ||||
-rw-r--r-- | pygments/lexers/math.py | 24 | ||||
-rw-r--r-- | pygments/lexers/other.py | 148 | ||||
-rw-r--r-- | pygments/lexers/shell.py | 4 | ||||
-rw-r--r-- | pygments/lexers/sql.py | 8 | ||||
-rw-r--r-- | pygments/lexers/templates.py | 54 | ||||
-rw-r--r-- | pygments/lexers/text.py | 19 | ||||
-rw-r--r-- | pygments/lexers/web.py | 144 | ||||
-rw-r--r-- | tests/examplefiles/vimrc | 21 | ||||
-rw-r--r-- | tests/test_clexer.py | 20 |
22 files changed, 599 insertions, 529 deletions
diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py index 3bc60e8a..970e595b 100644 --- a/pygments/formatters/html.py +++ b/pygments/formatters/html.py @@ -523,7 +523,8 @@ class HtmlFormatter(Formatter): self.cssfile) except AttributeError: print('Note: Cannot determine output file name, ' \ - 'using current directory as base for the CSS file name', file=sys.stderr) + 'using current directory as base for the CSS file name', + file=sys.stderr) cssfilename = self.cssfile # write CSS file only if noclobber_cssfile isn't given as an option. try: diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py index 413cca63..352684b0 100644 --- a/pygments/formatters/latex.py +++ b/pygments/formatters/latex.py @@ -375,9 +375,9 @@ class LatexFormatter(Formatter): if len(sep1) > 0: b,sep2,text = text.partition(self.right) if len(sep2) > 0: - value = value + escape_tex(a, self.commandprefix) + b + value += escape_tex(a, self.commandprefix) + b else: - value = value + escape_tex(a + sep1 + b, self.commandprefix) + value += escape_tex(a + sep1 + b, self.commandprefix) else: value = value + escape_tex(a, self.commandprefix) else: diff --git a/pygments/formatters/other.py b/pygments/formatters/other.py index b6e4bc58..c8269b19 100644 --- a/pygments/formatters/other.py +++ b/pygments/formatters/other.py @@ -118,11 +118,11 @@ class RawTokenFormatter(Formatter): TESTCASE_BEFORE = u'''\ def testNeedsName(self): fragment = %r - expected = [ + tokens = [ ''' TESTCASE_AFTER = u'''\ ] - self.assertEqual(expected, list(self.lexer.get_tokens(fragment))) + self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) ''' diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index a3e60f59..a0cd63cf 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -119,20 +119,20 @@ class PythonLexer(RegexLexer): ('`.*?`', String.Backtick), ], 'name': [ - (r'@[a-zA-Z0-9_.]+', Name.Decorator), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'@[\w.]+', Name.Decorator), + ('[a-zA-Z_]\w*', Name), ], 'funcname': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop') + ('[a-zA-Z_]\w*', Name.Function, '#pop') ], 'classname': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + ('[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ (r'(?:[ \t]|\\\n)+', Text), (r'as\b', Keyword.Namespace), (r',', Operator), - (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace), + (r'[a-zA-Z_][\w.]*', Name.Namespace), (r'', Text, '#pop') # all else: go back ], 'fromimport': [ @@ -142,7 +142,7 @@ class PythonLexer(RegexLexer): # never be a module name (r'None\b', Name.Builtin.Pseudo, '#pop'), # sadly, in "raise x from y" y will be highlighted as namespace too - (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace), + (r'[a-zA-Z_.][\w.]*', Name.Namespace), # anything else here also means "raise x from y" and is therefore # not an error (r'', Text, '#pop'), @@ -152,7 +152,7 @@ class PythonLexer(RegexLexer): r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'strings': [ - (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time @@ -255,7 +255,7 @@ class Python3Lexer(RegexLexer): ] tokens['backtick'] = [] tokens['name'] = [ - (r'@[a-zA-Z0-9_]+', Name.Decorator), + (r'@\w+', Name.Decorator), (uni_name, Name), ] tokens['funcname'] = [ @@ -406,7 +406,7 @@ class PythonTracebackLexer(RegexLexer): bygroups(Text, Comment, Text)), # for doctests... (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), - (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)', + (r'^([a-zA-Z_]\w*)(:?\n)', bygroups(Generic.Error, Text), '#pop') ], } @@ -445,7 +445,7 @@ class Python3TracebackLexer(RegexLexer): bygroups(Text, Comment, Text)), # for doctests... (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), - (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)', + (r'^([a-zA-Z_]\w*)(:?\n)', bygroups(Generic.Error, Text), '#pop') ], } @@ -535,7 +535,7 @@ class RubyLexer(ExtendedRegexLexer): (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), (r':"', String.Symbol, 'simple-sym'), - (r'([a-zA-Z_][a-zA-Z0-9_]*)(:)(?!:)', + (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9 (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), @@ -621,8 +621,8 @@ class RubyLexer(ExtendedRegexLexer): r'rescue|raise|retry|return|super|then|undef|unless|until|when|' r'while|yield)\b', Keyword), # start of function, class and module names - (r'(module)(\s+)([a-zA-Z_][a-zA-Z0-9_]*' - r'(?:::[a-zA-Z_][a-zA-Z0-9_]*)*)', + (r'(module)(\s+)([a-zA-Z_]\w*' + r'(?:::[a-zA-Z_]\w*)*)', bygroups(Keyword, Text, Name.Namespace)), (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'), @@ -713,9 +713,9 @@ class RubyLexer(ExtendedRegexLexer): (r'([\d]+(?:_\d+)*)(\s*)([/?])?', bygroups(Number.Integer, Text, Operator)), # Names - (r'@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class), - (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance), - (r'\$[a-zA-Z0-9_]+', Name.Variable.Global), + (r'@@[a-zA-Z_]\w*', Name.Variable.Class), + (r'@[a-zA-Z_]\w*', Name.Variable.Instance), + (r'\$\w+', Name.Variable.Global), (r'\$[!@&`\'+~=/\\,;.<>_*$?:"]', Name.Variable.Global), (r'\$-[0adFiIlpvw]', Name.Variable.Global), (r'::', Operator), @@ -725,7 +725,7 @@ class RubyLexer(ExtendedRegexLexer): r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)' r'(?!\w)', String.Char), - (r'[A-Z][a-zA-Z0-9_]+', Name.Constant), + (r'[A-Z]\w+', Name.Constant), # this is needed because ruby attributes can look # like keywords (class) or like this: ` ?!? (r'(\.|::)([a-zA-Z_]\w*[\!\?]?|[*%&^`~+-/\[<>=])', @@ -739,7 +739,7 @@ class RubyLexer(ExtendedRegexLexer): ], 'funcname': [ (r'\(', Punctuation, 'defexpr'), - (r'(?:([a-zA-Z_][a-zA-Z0-9_]*)(\.))?' + (r'(?:([a-zA-Z_]\w*)(\.))?' r'([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|' r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', bygroups(Name.Class, Operator, Name.Function), '#pop'), @@ -762,8 +762,8 @@ class RubyLexer(ExtendedRegexLexer): ], 'string-intp': [ (r'#{', String.Interpol, 'in-intp'), - (r'#@@?[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol), - (r'#\$[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol) + (r'#@@?[a-zA-Z_]\w*', String.Interpol), + (r'#\$[a-zA-Z_]\w*', String.Interpol) ], 'string-intp-escaped': [ include('string-intp'), @@ -814,7 +814,7 @@ class RubyConsoleLexer(Lexer): aliases = ['rbcon', 'irb'] mimetypes = ['text/x-ruby-shellsession'] - _prompt_re = re.compile('irb\([a-zA-Z_][a-zA-Z0-9_]*\):\d{3}:\d+[>*"\'] ' + _prompt_re = re.compile('irb\([a-zA-Z_]\w*\):\d{3}:\d+[>*"\'] ' '|>> |\?> ') def get_tokens_unprocessed(self, text): @@ -875,7 +875,7 @@ class PerlLexer(RegexLexer): (r'(case|continue|do|else|elsif|for|foreach|if|last|my|' r'next|our|redo|reset|then|unless|until|while|use|' r'print|new|BEGIN|CHECK|INIT|END|return)\b', Keyword), - (r'(format)(\s+)([a-zA-Z0-9_]+)(\s*)(=)(\s*\n)', + (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)', bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'), (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word), # common delimiters @@ -928,7 +928,7 @@ class PerlLexer(RegexLexer): r'utime|values|vec|wait|waitpid|wantarray|warn|write' r')\b', Name.Builtin), (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo), - (r'<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n', String), + (r'<<([\'"]?)([a-zA-Z_]\w*)\1;?\n.*?\n\2\n', String), (r'__END__', Comment.Preproc, 'end-part'), (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global), (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global), @@ -966,14 +966,14 @@ class PerlLexer(RegexLexer): (r'\s+', Text), (r'\{', Punctuation, '#pop'), # hash syntax? (r'\)|,', Punctuation, '#pop'), # argument specifier - (r'[a-zA-Z0-9_]+::', Name.Namespace), - (r'[a-zA-Z0-9_:]+', Name.Variable, '#pop'), + (r'\w+::', Name.Namespace), + (r'[\w:]+', Name.Variable, '#pop'), ], 'name': [ - (r'[a-zA-Z0-9_]+::', Name.Namespace), - (r'[a-zA-Z0-9_:]+', Name, '#pop'), - (r'[A-Z_]+(?=[^a-zA-Z0-9_])', Name.Constant, '#pop'), - (r'(?=[^a-zA-Z0-9_])', Text, '#pop'), + (r'\w+::', Name.Namespace), + (r'[\w:]+', Name, '#pop'), + (r'[A-Z_]+(?=[^\w])', Name.Constant, '#pop'), + (r'(?=[^\w])', Text, '#pop'), ], 'modulename': [ (r'[a-zA-Z_]\w*', Name.Namespace, '#pop') @@ -1085,7 +1085,7 @@ class LuaLexer(RegexLexer): (r'(function)\b', Keyword, 'funcname'), - (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name), + (r'[A-Za-z_]\w*(\.[A-Za-z_]\w*)?', Name), ("'", String.Single, combined('stringescape', 'sqs')), ('"', String.Double, combined('stringescape', 'dqs')) @@ -1093,7 +1093,7 @@ class LuaLexer(RegexLexer): 'funcname': [ (r'\s+', Text), - ('(?:([A-Za-z_][A-Za-z0-9_]*)(\.))?([A-Za-z_][A-Za-z0-9_]*)', + ('(?:([A-Za-z_]\w*)(\.))?([A-Za-z_]\w*)', bygroups(Name.Class, Punctuation, Name.Function), '#pop'), # inline function ('\(', Punctuation, '#pop'), @@ -1176,20 +1176,20 @@ class MoonScriptLexer(LuaLexer): (r'[^\S\n]+', Text), (r'(?s)\[(=*)\[.*?\]\1\]', String), (r'(->|=>)', Name.Function), - (r':[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), + (r':[a-zA-Z_]\w*', Name.Variable), (r'(==|!=|~=|<=|>=|\.\.\.|\.\.|[=+\-*/%^<>#!.\\:])', Operator), (r'[;,]', Punctuation), (r'[\[\]\{\}\(\)]', Keyword.Type), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Variable), + (r'[a-zA-Z_]\w*:', Name.Variable), (r"(class|extends|if|then|super|do|with|import|export|" r"while|elseif|return|for|in|from|when|using|else|" r"and|or|not|switch|break)\b", Keyword), (r'(true|false|nil)\b', Keyword.Constant), (r'(and|or|not)\b', Operator.Word), (r'(self)\b', Name.Builtin.Pseudo), - (r'@@?([a-zA-Z_][a-zA-Z0-9_]*)?', Name.Variable.Class), + (r'@@?([a-zA-Z_]\w*)?', Name.Variable.Class), (r'[A-Z]\w*', Name.Class), # proper name - (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name), + (r'[A-Za-z_]\w*(\.[A-Za-z_]\w*)?', Name), ("'", String.Single, combined('stringescape', 'sqs')), ('"', String.Double, combined('stringescape', 'dqs')) ], @@ -1320,7 +1320,7 @@ class IoLexer(RegexLexer): # names (r'(Object|list|List|Map|args|Sequence|Coroutine|File)\b', Name.Builtin), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), # numbers (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), (r'\d+', Number.Integer) @@ -1857,14 +1857,14 @@ class FancyLexer(RegexLexer): r'FalseClass|Tuple|Symbol|Stack|Set|FancySpec|Method|Package|' r'Range)\b', Name.Builtin), # functions - (r'[a-zA-Z]([a-zA-Z0-9_]|[-+?!=*/^><%])*:', Name.Function), + (r'[a-zA-Z](\w|[-+?!=*/^><%])*:', Name.Function), # operators, must be below functions (r'[-+*/~,<>=&!?%^\[\]\.$]+', Operator), - ('[A-Z][a-zA-Z0-9_]*', Name.Constant), - ('@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance), - ('@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class), + ('[A-Z]\w*', Name.Constant), + ('@[a-zA-Z_]\w*', Name.Variable.Instance), + ('@@[a-zA-Z_]\w*', Name.Variable.Class), ('@@?', Operator), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), # numbers - / checks are necessary to avoid mismarking regexes, # see comment in RubyLexer (r'(0[oO]?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?', @@ -1949,7 +1949,7 @@ class DgLexer(RegexLexer): r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'string': [ - (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time @@ -1987,7 +1987,7 @@ class Perl6Lexer(ExtendedRegexLexer): mimetypes = ['text/x-perl6', 'application/x-perl6'] flags = re.MULTILINE | re.DOTALL | re.UNICODE - PERL6_IDENTIFIER_RANGE = "['a-zA-Z0-9_:-]" # if you alter this, search for a copy made of it below + PERL6_IDENTIFIER_RANGE = "['\w:-]" PERL6_KEYWORDS = ( 'BEGIN', 'CATCH', 'CHECK', 'CONTROL', 'END', 'ENTER', 'FIRST', 'INIT', @@ -2078,62 +2078,80 @@ class Perl6Lexer(ExtendedRegexLexer): # Perl 6 has a *lot* of possible bracketing characters # this list was lifted from STD.pm6 (https://github.com/perl6/std) PERL6_BRACKETS = { - u'\u0028' : u'\u0029', u'\u003c' : u'\u003e', u'\u005b' : u'\u005d', u'\u007b' : u'\u007d', - u'\u00ab' : u'\u00bb', u'\u0f3a' : u'\u0f3b', u'\u0f3c' : u'\u0f3d', u'\u169b' : u'\u169c', - u'\u2018' : u'\u2019', u'\u201a' : u'\u2019', u'\u201b' : u'\u2019', u'\u201c' : u'\u201d', - u'\u201e' : u'\u201d', u'\u201f' : u'\u201d', u'\u2039' : u'\u203a', u'\u2045' : u'\u2046', - u'\u207d' : u'\u207e', u'\u208d' : u'\u208e', u'\u2208' : u'\u220b', u'\u2209' : u'\u220c', - u'\u220a' : u'\u220d', u'\u2215' : u'\u29f5', u'\u223c' : u'\u223d', u'\u2243' : u'\u22cd', - u'\u2252' : u'\u2253', u'\u2254' : u'\u2255', u'\u2264' : u'\u2265', u'\u2266' : u'\u2267', - u'\u2268' : u'\u2269', u'\u226a' : u'\u226b', u'\u226e' : u'\u226f', u'\u2270' : u'\u2271', - u'\u2272' : u'\u2273', u'\u2274' : u'\u2275', u'\u2276' : u'\u2277', u'\u2278' : u'\u2279', - u'\u227a' : u'\u227b', u'\u227c' : u'\u227d', u'\u227e' : u'\u227f', u'\u2280' : u'\u2281', - u'\u2282' : u'\u2283', u'\u2284' : u'\u2285', u'\u2286' : u'\u2287', u'\u2288' : u'\u2289', - u'\u228a' : u'\u228b', u'\u228f' : u'\u2290', u'\u2291' : u'\u2292', u'\u2298' : u'\u29b8', - u'\u22a2' : u'\u22a3', u'\u22a6' : u'\u2ade', u'\u22a8' : u'\u2ae4', u'\u22a9' : u'\u2ae3', - u'\u22ab' : u'\u2ae5', u'\u22b0' : u'\u22b1', u'\u22b2' : u'\u22b3', u'\u22b4' : u'\u22b5', - u'\u22b6' : u'\u22b7', u'\u22c9' : u'\u22ca', u'\u22cb' : u'\u22cc', u'\u22d0' : u'\u22d1', - u'\u22d6' : u'\u22d7', u'\u22d8' : u'\u22d9', u'\u22da' : u'\u22db', u'\u22dc' : u'\u22dd', - u'\u22de' : u'\u22df', u'\u22e0' : u'\u22e1', u'\u22e2' : u'\u22e3', u'\u22e4' : u'\u22e5', - u'\u22e6' : u'\u22e7', u'\u22e8' : u'\u22e9', u'\u22ea' : u'\u22eb', u'\u22ec' : u'\u22ed', - u'\u22f0' : u'\u22f1', u'\u22f2' : u'\u22fa', u'\u22f3' : u'\u22fb', u'\u22f4' : u'\u22fc', - u'\u22f6' : u'\u22fd', u'\u22f7' : u'\u22fe', u'\u2308' : u'\u2309', u'\u230a' : u'\u230b', - u'\u2329' : u'\u232a', u'\u23b4' : u'\u23b5', u'\u2768' : u'\u2769', u'\u276a' : u'\u276b', - u'\u276c' : u'\u276d', u'\u276e' : u'\u276f', u'\u2770' : u'\u2771', u'\u2772' : u'\u2773', - u'\u2774' : u'\u2775', u'\u27c3' : u'\u27c4', u'\u27c5' : u'\u27c6', u'\u27d5' : u'\u27d6', - u'\u27dd' : u'\u27de', u'\u27e2' : u'\u27e3', u'\u27e4' : u'\u27e5', u'\u27e6' : u'\u27e7', - u'\u27e8' : u'\u27e9', u'\u27ea' : u'\u27eb', u'\u2983' : u'\u2984', u'\u2985' : u'\u2986', - u'\u2987' : u'\u2988', u'\u2989' : u'\u298a', u'\u298b' : u'\u298c', u'\u298d' : u'\u298e', - u'\u298f' : u'\u2990', u'\u2991' : u'\u2992', u'\u2993' : u'\u2994', u'\u2995' : u'\u2996', - u'\u2997' : u'\u2998', u'\u29c0' : u'\u29c1', u'\u29c4' : u'\u29c5', u'\u29cf' : u'\u29d0', - u'\u29d1' : u'\u29d2', u'\u29d4' : u'\u29d5', u'\u29d8' : u'\u29d9', u'\u29da' : u'\u29db', - u'\u29f8' : u'\u29f9', u'\u29fc' : u'\u29fd', u'\u2a2b' : u'\u2a2c', u'\u2a2d' : u'\u2a2e', - u'\u2a34' : u'\u2a35', u'\u2a3c' : u'\u2a3d', u'\u2a64' : u'\u2a65', u'\u2a79' : u'\u2a7a', - u'\u2a7d' : u'\u2a7e', u'\u2a7f' : u'\u2a80', u'\u2a81' : u'\u2a82', u'\u2a83' : u'\u2a84', - u'\u2a8b' : u'\u2a8c', u'\u2a91' : u'\u2a92', u'\u2a93' : u'\u2a94', u'\u2a95' : u'\u2a96', - u'\u2a97' : u'\u2a98', u'\u2a99' : u'\u2a9a', u'\u2a9b' : u'\u2a9c', u'\u2aa1' : u'\u2aa2', - u'\u2aa6' : u'\u2aa7', u'\u2aa8' : u'\u2aa9', u'\u2aaa' : u'\u2aab', u'\u2aac' : u'\u2aad', - u'\u2aaf' : u'\u2ab0', u'\u2ab3' : u'\u2ab4', u'\u2abb' : u'\u2abc', u'\u2abd' : u'\u2abe', - u'\u2abf' : u'\u2ac0', u'\u2ac1' : u'\u2ac2', u'\u2ac3' : u'\u2ac4', u'\u2ac5' : u'\u2ac6', - u'\u2acd' : u'\u2ace', u'\u2acf' : u'\u2ad0', u'\u2ad1' : u'\u2ad2', u'\u2ad3' : u'\u2ad4', - u'\u2ad5' : u'\u2ad6', u'\u2aec' : u'\u2aed', u'\u2af7' : u'\u2af8', u'\u2af9' : u'\u2afa', - u'\u2e02' : u'\u2e03', u'\u2e04' : u'\u2e05', u'\u2e09' : u'\u2e0a', u'\u2e0c' : u'\u2e0d', - u'\u2e1c' : u'\u2e1d', u'\u2e20' : u'\u2e21', u'\u3008' : u'\u3009', u'\u300a' : u'\u300b', - u'\u300c' : u'\u300d', u'\u300e' : u'\u300f', u'\u3010' : u'\u3011', u'\u3014' : u'\u3015', - u'\u3016' : u'\u3017', u'\u3018' : u'\u3019', u'\u301a' : u'\u301b', u'\u301d' : u'\u301e', - u'\ufd3e' : u'\ufd3f', u'\ufe17' : u'\ufe18', u'\ufe35' : u'\ufe36', u'\ufe37' : u'\ufe38', - u'\ufe39' : u'\ufe3a', u'\ufe3b' : u'\ufe3c', u'\ufe3d' : u'\ufe3e', u'\ufe3f' : u'\ufe40', - u'\ufe41' : u'\ufe42', u'\ufe43' : u'\ufe44', u'\ufe47' : u'\ufe48', u'\ufe59' : u'\ufe5a', - u'\ufe5b' : u'\ufe5c', u'\ufe5d' : u'\ufe5e', u'\uff08' : u'\uff09', u'\uff1c' : u'\uff1e', - u'\uff3b' : u'\uff3d', u'\uff5b' : u'\uff5d', u'\uff5f' : u'\uff60', u'\uff62' : u'\uff63', + u'\u0028' : u'\u0029', u'\u003c' : u'\u003e', u'\u005b' : u'\u005d', + u'\u007b' : u'\u007d', u'\u00ab' : u'\u00bb', u'\u0f3a' : u'\u0f3b', + u'\u0f3c' : u'\u0f3d', u'\u169b' : u'\u169c', u'\u2018' : u'\u2019', + u'\u201a' : u'\u2019', u'\u201b' : u'\u2019', u'\u201c' : u'\u201d', + u'\u201e' : u'\u201d', u'\u201f' : u'\u201d', u'\u2039' : u'\u203a', + u'\u2045' : u'\u2046', u'\u207d' : u'\u207e', u'\u208d' : u'\u208e', + u'\u2208' : u'\u220b', u'\u2209' : u'\u220c', u'\u220a' : u'\u220d', + u'\u2215' : u'\u29f5', u'\u223c' : u'\u223d', u'\u2243' : u'\u22cd', + u'\u2252' : u'\u2253', u'\u2254' : u'\u2255', u'\u2264' : u'\u2265', + u'\u2266' : u'\u2267', u'\u2268' : u'\u2269', u'\u226a' : u'\u226b', + u'\u226e' : u'\u226f', u'\u2270' : u'\u2271', u'\u2272' : u'\u2273', + u'\u2274' : u'\u2275', u'\u2276' : u'\u2277', u'\u2278' : u'\u2279', + u'\u227a' : u'\u227b', u'\u227c' : u'\u227d', u'\u227e' : u'\u227f', + u'\u2280' : u'\u2281', u'\u2282' : u'\u2283', u'\u2284' : u'\u2285', + u'\u2286' : u'\u2287', u'\u2288' : u'\u2289', u'\u228a' : u'\u228b', + u'\u228f' : u'\u2290', u'\u2291' : u'\u2292', u'\u2298' : u'\u29b8', + u'\u22a2' : u'\u22a3', u'\u22a6' : u'\u2ade', u'\u22a8' : u'\u2ae4', + u'\u22a9' : u'\u2ae3', u'\u22ab' : u'\u2ae5', u'\u22b0' : u'\u22b1', + u'\u22b2' : u'\u22b3', u'\u22b4' : u'\u22b5', u'\u22b6' : u'\u22b7', + u'\u22c9' : u'\u22ca', u'\u22cb' : u'\u22cc', u'\u22d0' : u'\u22d1', + u'\u22d6' : u'\u22d7', u'\u22d8' : u'\u22d9', u'\u22da' : u'\u22db', + u'\u22dc' : u'\u22dd', u'\u22de' : u'\u22df', u'\u22e0' : u'\u22e1', + u'\u22e2' : u'\u22e3', u'\u22e4' : u'\u22e5', u'\u22e6' : u'\u22e7', + u'\u22e8' : u'\u22e9', u'\u22ea' : u'\u22eb', u'\u22ec' : u'\u22ed', + u'\u22f0' : u'\u22f1', u'\u22f2' : u'\u22fa', u'\u22f3' : u'\u22fb', + u'\u22f4' : u'\u22fc', u'\u22f6' : u'\u22fd', u'\u22f7' : u'\u22fe', + u'\u2308' : u'\u2309', u'\u230a' : u'\u230b', u'\u2329' : u'\u232a', + u'\u23b4' : u'\u23b5', u'\u2768' : u'\u2769', u'\u276a' : u'\u276b', + u'\u276c' : u'\u276d', u'\u276e' : u'\u276f', u'\u2770' : u'\u2771', + u'\u2772' : u'\u2773', u'\u2774' : u'\u2775', u'\u27c3' : u'\u27c4', + u'\u27c5' : u'\u27c6', u'\u27d5' : u'\u27d6', u'\u27dd' : u'\u27de', + u'\u27e2' : u'\u27e3', u'\u27e4' : u'\u27e5', u'\u27e6' : u'\u27e7', + u'\u27e8' : u'\u27e9', u'\u27ea' : u'\u27eb', u'\u2983' : u'\u2984', + u'\u2985' : u'\u2986', u'\u2987' : u'\u2988', u'\u2989' : u'\u298a', + u'\u298b' : u'\u298c', u'\u298d' : u'\u298e', u'\u298f' : u'\u2990', + u'\u2991' : u'\u2992', u'\u2993' : u'\u2994', u'\u2995' : u'\u2996', + u'\u2997' : u'\u2998', u'\u29c0' : u'\u29c1', u'\u29c4' : u'\u29c5', + u'\u29cf' : u'\u29d0', u'\u29d1' : u'\u29d2', u'\u29d4' : u'\u29d5', + u'\u29d8' : u'\u29d9', u'\u29da' : u'\u29db', u'\u29f8' : u'\u29f9', + u'\u29fc' : u'\u29fd', u'\u2a2b' : u'\u2a2c', u'\u2a2d' : u'\u2a2e', + u'\u2a34' : u'\u2a35', u'\u2a3c' : u'\u2a3d', u'\u2a64' : u'\u2a65', + u'\u2a79' : u'\u2a7a', u'\u2a7d' : u'\u2a7e', u'\u2a7f' : u'\u2a80', + u'\u2a81' : u'\u2a82', u'\u2a83' : u'\u2a84', u'\u2a8b' : u'\u2a8c', + u'\u2a91' : u'\u2a92', u'\u2a93' : u'\u2a94', u'\u2a95' : u'\u2a96', + u'\u2a97' : u'\u2a98', u'\u2a99' : u'\u2a9a', u'\u2a9b' : u'\u2a9c', + u'\u2aa1' : u'\u2aa2', u'\u2aa6' : u'\u2aa7', u'\u2aa8' : u'\u2aa9', + u'\u2aaa' : u'\u2aab', u'\u2aac' : u'\u2aad', u'\u2aaf' : u'\u2ab0', + u'\u2ab3' : u'\u2ab4', u'\u2abb' : u'\u2abc', u'\u2abd' : u'\u2abe', + u'\u2abf' : u'\u2ac0', u'\u2ac1' : u'\u2ac2', u'\u2ac3' : u'\u2ac4', + u'\u2ac5' : u'\u2ac6', u'\u2acd' : u'\u2ace', u'\u2acf' : u'\u2ad0', + u'\u2ad1' : u'\u2ad2', u'\u2ad3' : u'\u2ad4', u'\u2ad5' : u'\u2ad6', + u'\u2aec' : u'\u2aed', u'\u2af7' : u'\u2af8', u'\u2af9' : u'\u2afa', + u'\u2e02' : u'\u2e03', u'\u2e04' : u'\u2e05', u'\u2e09' : u'\u2e0a', + u'\u2e0c' : u'\u2e0d', u'\u2e1c' : u'\u2e1d', u'\u2e20' : u'\u2e21', + u'\u3008' : u'\u3009', u'\u300a' : u'\u300b', u'\u300c' : u'\u300d', + u'\u300e' : u'\u300f', u'\u3010' : u'\u3011', u'\u3014' : u'\u3015', + u'\u3016' : u'\u3017', u'\u3018' : u'\u3019', u'\u301a' : u'\u301b', + u'\u301d' : u'\u301e', u'\ufd3e' : u'\ufd3f', u'\ufe17' : u'\ufe18', + u'\ufe35' : u'\ufe36', u'\ufe37' : u'\ufe38', u'\ufe39' : u'\ufe3a', + u'\ufe3b' : u'\ufe3c', u'\ufe3d' : u'\ufe3e', u'\ufe3f' : u'\ufe40', + u'\ufe41' : u'\ufe42', u'\ufe43' : u'\ufe44', u'\ufe47' : u'\ufe48', + u'\ufe59' : u'\ufe5a', u'\ufe5b' : u'\ufe5c', u'\ufe5d' : u'\ufe5e', + u'\uff08' : u'\uff09', u'\uff1c' : u'\uff1e', u'\uff3b' : u'\uff3d', + u'\uff5b' : u'\uff5d', u'\uff5f' : u'\uff60', u'\uff62' : u'\uff63', } def _build_word_match(words, boundary_regex_fragment = None, prefix = '', suffix = ''): if boundary_regex_fragment is None: - return r'\b(' + prefix + r'|'.join([ re.escape(x) for x in words]) + suffix + r')\b' + return r'\b(' + prefix + r'|'.join([ re.escape(x) for x in words]) + \ + suffix + r')\b' else: - return r'(?<!' + boundary_regex_fragment + ')' + prefix + '(' + \ - r'|'.join([ re.escape(x) for x in words]) + r')' + suffix + '(?!' + boundary_regex_fragment + ')' + return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \ + r'|'.join([ re.escape(x) for x in words]) + r')' + suffix + r'(?!' + \ + boundary_regex_fragment + r')' def brackets_callback(token_class): def callback(lexer, match, context): @@ -2223,10 +2241,10 @@ class Perl6Lexer(ExtendedRegexLexer): context.pos = match.end() context.stack.append('root') - # If you're modifying these rules, be careful if you need to process '{' or '}' characters. - # We have special logic for processing these characters (due to the fact that you can nest - # Perl 6 code in regex blocks), so if you need to process one of them, make sure you also - # process the corresponding one! + # If you're modifying these rules, be careful if you need to process '{' or '}' + # characters. We have special logic for processing these characters (due to the fact + # that you can nest Perl 6 code in regex blocks), so if you need to process one of + # them, make sure you also process the corresponding one! tokens = { 'common' : [ (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)), @@ -2234,7 +2252,8 @@ class Perl6Lexer(ExtendedRegexLexer): (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline), (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline), (r'^=.*?\n\s*?\n', Comment.Multiline), - (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', bygroups(Keyword, Name), 'token-sym-brackets'), + (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', + bygroups(Keyword, Name), 'token-sym-brackets'), (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + ')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?', bygroups(Keyword, Name), 'pre-token'), # deal with a special case in the Perl 6 grammar (role q { ... }) (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)), @@ -2242,24 +2261,28 @@ class Perl6Lexer(ExtendedRegexLexer): (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix = '(?::[UD])?'), Name.Builtin), (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin), # copied from PerlLexer - (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable), + (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', + Name.Variable), (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), (r'::\?\w+', Name.Variable.Global), - (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), + (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', + Name.Variable.Global), (r'\$(?:<.*?>)+', Name.Variable), (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])(?P=first_char)*)', brackets_callback(String)), # copied from PerlLexer (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), (r'0b[01]+(_[01]+)*', Number.Bin), - (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', Number.Float), + (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', + Number.Float), (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), (r'\d+(_\d+)*', Number.Integer), (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex), (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex), (r'm\w+(?=\()', Name), (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z_:\s])(?P=first_char)*)', brackets_callback(String.Regex)), - (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', String.Regex), + (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', + String.Regex), (r'<[^\s=].*?\S>', String), (_build_word_match(PERL6_OPERATORS), Operator), (r'[0-9a-zA-Z_]' + PERL6_IDENTIFIER_RANGE + '*', Name), @@ -2322,8 +2345,8 @@ class Perl6Lexer(ExtendedRegexLexer): rating = False # check for my/our/has declarations - # copied PERL6_IDENTIFIER_RANGE from above; not happy about that - if re.search("(?:my|our|has)\s+(?:['a-zA-Z0-9_:-]+\s+)?[$@%&(]", text): + if re.search("(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE + \ + "+\s+)?[$@%&(]", text): rating = 0.8 saw_perl_decl = True @@ -2511,7 +2534,7 @@ class ChaiscriptLexer(RegexLexer): (r'(true|false)\b', Keyword.Constant), (r'(eval|throw)\b', Name.Builtin), (r'`\S+`', Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), + (r'[$a-zA-Z_]\w*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py index fc361e2f..13649521 100644 --- a/pygments/lexers/asm.py +++ b/pygments/lexers/asm.py @@ -318,7 +318,7 @@ class NasmLexer(RegexLexer): filenames = ['*.asm', '*.ASM'] mimetypes = ['text/x-nasm'] - identifier = r'[a-zA-Z$._?][a-zA-Z0-9$._?#@~]*' + identifier = r'[a-zA-Z$._?][\w$.?#@~]*' hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9]+[0-9a-fA-F]*h)' octn = r'[0-7]+q' binn = r'[01]+b' @@ -417,7 +417,7 @@ class Ca65Lexer(RegexLexer): r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs' r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor' r'|bit)\b', Keyword), - (r'\.[a-z0-9_]+', Keyword.Pseudo), + (r'\.\w+', Keyword.Pseudo), (r'[-+~*/^&|!<>=]', Operator), (r'"[^"\n]*.', String), (r"'[^'\n]*.", String.Char), diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index 2e111deb..913de08b 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -87,23 +87,23 @@ class CFamilyLexer(RegexLexer): r'declspec|finally|int64|try|leave|wchar_t|w64|unaligned|' r'raise|noop|identifier|forceinline|assume)\b', Keyword.Reserved), (r'(true|false|NULL)\b', Name.Builtin), - (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)), + ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions - (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*\([^;]*?\))' # signature + (r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')?({)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations - (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*\([^;]*?\))' # signature + (r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')?(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), @@ -223,7 +223,7 @@ class CppLexer(CFamilyLexer): (r'(__offload|__blockingoffload|__outer)\b', Keyword.Pseudo), ], 'classname': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'), + (r'[a-zA-Z_]\w*', Name.Class, '#pop'), # template specification (r'\s*(?=>)', Text, '#pop'), ], @@ -268,7 +268,7 @@ class PikeLexer(CppLexer): inherit, ], 'classname': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'), + (r'[a-zA-Z_]\w*', Name.Class, '#pop'), # template specification (r'\s*(?=>)', Text, '#pop'), ], @@ -289,9 +289,12 @@ class SwigLexer(CppLexer): tokens = { 'statements': [ - (r'(%[a-z_][a-z0-9_]*)', Name.Function), # SWIG directives - ('\$\**\&?[a-zA-Z0-9_]+', Name), # Special variables - (r'##*[a-zA-Z_][a-zA-Z0-9_]*', Comment.Preproc), # Stringification / additional preprocessor directives + # SWIG directives + (r'(%[a-z_][a-z0-9_]*)', Name.Function), + # Special variables + ('\$\**\&?\w+', Name), + # Stringification / additional preprocessor directives + (r'##*[a-zA-Z_]\w*', Comment.Preproc), inherit, ], } @@ -360,7 +363,7 @@ class ECLexer(CLexer): inherit, ], 'classname': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'), + (r'[a-zA-Z_]\w*', Name.Class, '#pop'), # template specification (r'\s*(?=>)', Text, '#pop'), ], @@ -1371,9 +1374,9 @@ def objective(baselexer): # Matches [ <ws>? identifier <ws> ( identifier <ws>? ] | identifier? : ) # (note the identifier is *optional* when there is a ':'!) - _oc_message = re.compile(r'\[\s*[a-zA-Z_][a-zA-Z0-9_]*\s+' - r'(?:[a-zA-Z_][a-zA-Z0-9_]*\s*\]|' - r'(?:[a-zA-Z_][a-zA-Z0-9_]*)?:)') + _oc_message = re.compile(r'\[\s*[a-zA-Z_]\w*\s+' + r'(?:[a-zA-Z_]\w*\s*\]|' + r'(?:[a-zA-Z_]\w*)?:)') class GeneratedObjectiveCVariant(baselexer): """ @@ -1394,11 +1397,11 @@ def objective(baselexer): (r'@\[', Literal, 'literal_array'), (r'@\{', Literal, 'literal_dictionary'), (r'(@selector|@private|@protected|@public|@encode|' - r'@synchronized|@try|@throw|@catch|@finally|@end|@property|' + r'@synchronized|@try|@throw|@catch|@finally|@end|@property|@synthesize|' r'__bridge|__bridge_transfer|__autoreleasing|__block|__weak|__strong|' r'weak|strong|copy|retain|assign|unsafe_unretained|atomic|nonatomic|' r'readonly|readwrite|setter|getter|typeof|in|out|inout|release|class|' - r'@synthesize|@dynamic|@optional|@required|@autoreleasepool)\b', Keyword), + r'@dynamic|@optional|@required|@autoreleasepool)\b', Keyword), (r'(id|instancetype|Class|IMP|SEL|BOOL|IBOutlet|IBAction|unichar)\b', Keyword.Type), (r'@(true|false|YES|NO)\n', Name.Builtin), @@ -1417,24 +1420,26 @@ def objective(baselexer): ], 'oc_classname' : [ # interface definition that inherits - ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*:\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)?(\s*)({)', - bygroups(Name.Class, Text, Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), - ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*:\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)?', + ('([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)({)', + bygroups(Name.Class, Text, Name.Class, Text, Punctuation), + ('#pop', 'oc_ivars')), + ('([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?', bygroups(Name.Class, Text, Name.Class), '#pop'), # interface definition for a category - ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*)(\([a-zA-Z$_][a-zA-Z0-9$_]*\))(\s*)({)', - bygroups(Name.Class, Text, Name.Label, Text, Punctuation), ('#pop', 'oc_ivars')), - ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*)(\([a-zA-Z$_][a-zA-Z0-9$_]*\))', + ('([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)({)', + bygroups(Name.Class, Text, Name.Label, Text, Punctuation), + ('#pop', 'oc_ivars')), + ('([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))', bygroups(Name.Class, Text, Name.Label), '#pop'), # simple interface / implementation - ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*)({)', + ('([a-zA-Z$_][\w$]*)(\s*)({)', bygroups(Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), - ('([a-zA-Z$_][a-zA-Z0-9$_]*)', Name.Class, '#pop') + ('([a-zA-Z$_][\w$]*)', Name.Class, '#pop') ], 'oc_forward_classname' : [ - ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*,\s*)', + ('([a-zA-Z$_][\w$]*)(\s*,\s*)', bygroups(Name.Class, Text), 'oc_forward_classname'), - ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*;?)', + ('([a-zA-Z$_][\w$]*)(\s*;?)', bygroups(Name.Class, Text), '#pop') ], 'oc_ivars' : [ @@ -1448,7 +1453,7 @@ def objective(baselexer): # methods (r'^([-+])(\s*)' # method marker r'(\(.*?\))?(\s*)' # return type - r'([a-zA-Z$_][a-zA-Z0-9$_]*:?)', # begin of method name + r'([a-zA-Z$_][\w$]*:?)', # begin of method name bygroups(Punctuation, Text, using(this), Text, Name.Function), 'method'), @@ -1460,9 +1465,9 @@ def objective(baselexer): # discussion in Issue 789 (r',', Punctuation), (r'\.\.\.', Punctuation), - (r'(\(.*?\))(\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)', + (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)', bygroups(using(this), Text, Name.Variable)), - (r'[a-zA-Z$_][a-zA-Z0-9$_]*:', Name.Function), + (r'[a-zA-Z$_][\w$]*:', Name.Function), (';', Punctuation, '#pop'), ('{', Punctuation, 'function'), ('', Text, '#pop'), @@ -1743,20 +1748,20 @@ class PrologLexer(RegexLexer): (r'_', Keyword), # The don't-care variable (r'([a-z]+)(:)', bygroups(Name.Namespace, Punctuation)), (u'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]' - u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)' + u'[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)' u'(\\s*)(:-|-->)', bygroups(Name.Function, Text, Operator)), # function defn (u'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]' - u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)' + u'[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)' u'(\\s*)(\\()', bygroups(Name.Function, Text, Punctuation)), (u'[a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]' - u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*', + u'[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*', String.Atom), # atom, characters # This one includes ! (u'[#&*+\\-./:<=>?@\\\\^~\u00a1-\u00bf\u2010-\u303f]+', String.Atom), # atom, graphics - (r'[A-Z_][A-Za-z0-9_]*', Name.Variable), + (r'[A-Z_]\w*', Name.Variable), (u'\\s+|[\u2000-\u200f\ufff0-\ufffe\uffef]', Text), ], 'nested-comment': [ @@ -1862,38 +1867,38 @@ class CythonLexer(RegexLexer): ('`.*?`', String.Backtick), ], 'name': [ - (r'@[a-zA-Z0-9_]+', Name.Decorator), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'@\w+', Name.Decorator), + ('[a-zA-Z_]\w*', Name), ], 'funcname': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop') + ('[a-zA-Z_]\w*', Name.Function, '#pop') ], 'cdef': [ (r'(public|readonly|extern|api|inline)\b', Keyword.Reserved), (r'(struct|enum|union|class)\b', Keyword), - (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(?=[(:#=]|$)', + (r'([a-zA-Z_]\w*)(\s*)(?=[(:#=]|$)', bygroups(Name.Function, Text), '#pop'), - (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(,)', + (r'([a-zA-Z_]\w*)(\s*)(,)', bygroups(Name.Function, Text, Punctuation)), (r'from\b', Keyword, '#pop'), (r'as\b', Keyword), (r':', Punctuation, '#pop'), (r'(?=["\'])', Text, '#pop'), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Keyword.Type), + (r'[a-zA-Z_]\w*', Keyword.Type), (r'.', Text), ], 'classname': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + ('[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)), - (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace), + (r'[a-zA-Z_][\w.]*', Name.Namespace), (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)), (r'', Text, '#pop') # all else: go back ], 'fromimport': [ (r'(\s+)(c?import)\b', bygroups(Text, Keyword), '#pop'), - (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace), + (r'[a-zA-Z_.][\w.]*', Name.Namespace), # ``cdef foo from "header"``, or ``for foo from 0 < i < 10`` (r'', Text, '#pop'), ], @@ -1985,14 +1990,14 @@ class ValaLexer(RegexLexer): 'namespace'), (r'(class|errordomain|interface|struct)(\s+)', bygroups(Keyword.Declaration, Text), 'class'), - (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), # void is an actual keyword, others are in glib-2.0.vapi (r'(void|bool|char|double|float|int|int8|int16|int32|int64|long|' r'short|size_t|ssize_t|string|time_t|uchar|uint|uint8|uint16|' r'uint32|uint64|ulong|unichar|ushort)\b', Keyword.Type), (r'(true|false|null)\b', Name.Builtin), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), @@ -2018,10 +2023,10 @@ class ValaLexer(RegexLexer): (r'.*?\n', Comment), ], 'class': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'namespace': [ - (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace, '#pop') + (r'[a-zA-Z_][\w.]*', Name.Namespace, '#pop') ], } @@ -2045,9 +2050,9 @@ class OocLexer(RegexLexer): r'while|do|switch|case|as|in|version|return|true|false|null)\b', Keyword), (r'include\b', Keyword, 'include'), - (r'(cover)([ \t]+)(from)([ \t]+)([a-zA-Z0-9_]+[*@]?)', + (r'(cover)([ \t]+)(from)([ \t]+)(\w+[*@]?)', bygroups(Keyword, Text, Keyword, Text, Name.Class)), - (r'(func)((?:[ \t]|\\\n)+)(~[a-z_][a-zA-Z0-9_]*)', + (r'(func)((?:[ \t]|\\\n)+)(~[a-z_]\w*)', bygroups(Keyword, Text, Name.Function)), (r'\bfunc\b', Keyword), # Note: %= and ^= not listed on http://ooc-lang.org/syntax @@ -2058,11 +2063,11 @@ class OocLexer(RegexLexer): (r'(\.)([ \t]*)([a-z]\w*)', bygroups(Operator, Text, Name.Function)), (r'[A-Z][A-Z0-9_]+', Name.Constant), - (r'[A-Z][a-zA-Z0-9_]*([@*]|\[[ \t]*\])?', Name.Class), + (r'[A-Z]\w*([@*]|\[[ \t]*\])?', Name.Class), - (r'([a-z][a-zA-Z0-9_]*(?:~[a-z][a-zA-Z0-9_]*)?)((?:[ \t]|\\\n)*)(?=\()', + (r'([a-z]\w*(?:~[a-z]\w*)?)((?:[ \t]|\\\n)*)(?=\()', bygroups(Name.Function, Text)), - (r'[a-z][a-zA-Z0-9_]*', Name.Variable), + (r'[a-z]\w*', Name.Variable), # : introduces types (r'[:(){}\[\];,]', Punctuation), @@ -2444,7 +2449,7 @@ class AdaLexer(RegexLexer): (r'task|protected', Keyword.Declaration), (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)), (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'), - (r'(pragma)(\s+)([a-zA-Z0-9_]+)', bygroups(Keyword.Reserved, Text, + (r'(pragma)(\s+)(\w+)', bygroups(Keyword.Reserved, Text, Comment.Preproc)), (r'(true|false|null)\b', Keyword.Constant), (r'(Address|Byte|Boolean|Character|Controlled|Count|Cursor|' @@ -2487,7 +2492,7 @@ class AdaLexer(RegexLexer): (r'[0-9_]+', Number.Integer), ], 'attribute' : [ - (r"(')([a-zA-Z0-9_]+)", bygroups(Punctuation, Name.Attribute)), + (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)), ], 'subprogram' : [ (r'\(', Punctuation, ('#pop', 'formal_part')), @@ -2498,7 +2503,7 @@ class AdaLexer(RegexLexer): ], 'end' : [ ('(if|case|record|loop|select)', Keyword.Reserved), - ('"[^"]+"|[a-zA-Z0-9_.]+', Name.Function), + ('"[^"]+"|[\w.]+', Name.Function), ('\s+', Text), (';', Punctuation, '#pop'), ], @@ -2538,7 +2543,7 @@ class AdaLexer(RegexLexer): ('is', Keyword.Reserved, '#pop'), (';', Punctuation, '#pop'), ('\(', Punctuation, 'package_instantiation'), - ('([a-zA-Z0-9_.]+)', Name.Class), + ('([\w.]+)', Name.Class), include('root'), ], 'package_instantiation': [ @@ -2581,7 +2586,7 @@ class Modula2Lexer(RegexLexer): (r'\s+', Text), # whitespace ], 'identifiers': [ - (r'([a-zA-Z_\$][a-zA-Z0-9_\$]*)', Name), + (r'([a-zA-Z_\$][\w\$]*)', Name), ], 'numliterals': [ (r'[01]+B', Number.Binary), # binary number (ObjM2) @@ -3078,7 +3083,7 @@ class FantomLexer(RegexLexer): dict ( pod = r'[\"\w\.]+', eos = r'\n|;', - id = r'[a-zA-Z_][a-zA-Z0-9_]*', + id = r'[a-zA-Z_]\w*', # all chars which can be part of type definition. Starts with # either letter, or [ (maps), or | (funcs) type = r'(?:\[|[a-zA-Z_]|\|)[:\w\[\]\|\->\?]*?', @@ -3363,7 +3368,7 @@ class RustLexer(RegexLexer): r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|.)'""", String.Char), # Lifetime - (r"""'[a-zA-Z_][a-zA-Z0-9_]*""", Name.Label), + (r"""'[a-zA-Z_]\w*""", Name.Label), # Binary Literal (r'0b[01_]+', Number, 'number_lit'), # Octal Literal @@ -3491,10 +3496,10 @@ class MonkeyLexer(RegexLexer): filenames = ['*.monkey'] mimetypes = ['text/x-monkey'] - name_variable = r'[a-z_][a-zA-Z0-9_]*' - name_function = r'[A-Z][a-zA-Z0-9_]*' + name_variable = r'[a-z_]\w*' + name_function = r'[A-Z]\w*' name_constant = r'[A-Z_][A-Z0-9_]*' - name_class = r'[A-Z][a-zA-Z0-9_]*' + name_class = r'[A-Z]\w*' name_module = r'[a-z0-9_]*' keyword_type = r'(?:Int|Float|String|Bool|Object|Array|Void)' @@ -3836,12 +3841,12 @@ class LogosLexer(ObjectiveCppLexer): tokens = { 'statements': [ (r'(%orig|%log)\b', Keyword), - (r'(%c)\b(\()(\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*)(\))', + (r'(%c)\b(\()(\s*)([a-zA-Z$_][\w$]*)(\s*)(\))', bygroups(Keyword, Punctuation, Text, Name.Class, Text, Punctuation)), (r'(%init)\b(\()', bygroups(Keyword, Punctuation), 'logos_init_directive'), (r'(%init)(?=\s*;)', bygroups(Keyword)), - (r'(%hook|%group)(\s+)([a-zA-Z$_][a-zA-Z0-9$_]+)', + (r'(%hook|%group)(\s+)([a-zA-Z$_][\w$]+)', bygroups(Keyword, Text, Name.Class), '#pop'), (r'(%subclass)(\s+)', bygroups(Keyword, Text), ('#pop', 'logos_classname')), @@ -3850,20 +3855,20 @@ class LogosLexer(ObjectiveCppLexer): 'logos_init_directive' : [ ('\s+', Text), (',', Punctuation, ('logos_init_directive', '#pop')), - ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*)(=)(\s*)([^);]*)', + ('([a-zA-Z$_][\w$]*)(\s*)(=)(\s*)([^);]*)', bygroups(Name.Class, Text, Punctuation, Text, Text)), - ('([a-zA-Z$_][a-zA-Z0-9$_]*)', Name.Class), + ('([a-zA-Z$_][\w$]*)', Name.Class), ('\)', Punctuation, '#pop'), ], 'logos_classname' : [ - ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*:\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)?', + ('([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?', bygroups(Name.Class, Text, Name.Class), '#pop'), - ('([a-zA-Z$_][a-zA-Z0-9$_]*)', Name.Class, '#pop') + ('([a-zA-Z$_][\w$]*)', Name.Class, '#pop') ], 'root': [ (r'(%subclass)(\s+)', bygroups(Keyword, Text), 'logos_classname'), - (r'(%hook|%group)(\s+)([a-zA-Z$_][a-zA-Z0-9$_]+)', + (r'(%hook|%group)(\s+)([a-zA-Z$_][\w$]+)', bygroups(Keyword, Text, Name.Class)), (r'(%config)(\s*\(\s*)(\w+)(\s*=\s*)(.*?)(\s*\)\s*)', bygroups(Keyword, Text, Name.Variable, Text, String, Text)), @@ -3950,13 +3955,13 @@ class ChapelLexer(RegexLexer): (r'[:;,.?()\[\]{}]', Punctuation), # identifiers - (r'[a-zA-Z_][a-zA-Z0-9_$]*', Name.Other), + (r'[a-zA-Z_][\w$]*', Name.Other), ], 'classname': [ - (r'[a-zA-Z_][a-zA-Z0-9_$]*', Name.Class, '#pop'), + (r'[a-zA-Z_][\w$]*', Name.Class, '#pop'), ], 'procname': [ - (r'[a-zA-Z_][a-zA-Z0-9_$]*', Name.Function, '#pop'), + (r'[a-zA-Z_][\w$]*', Name.Function, '#pop'), ], } @@ -3992,7 +3997,7 @@ class EiffelLexer(RegexLexer): (r"'([^'%]|%'|%%)'", String.Char), (r"(//|\\\\|>=|<=|:=|/=|~|/~|[\\\?!#%&@|+/\-=\>\*$<|^\[\]])", Operator), (r"([{}():;,.])", Punctuation), - (r'([a-z][a-zA-Z0-9_]*)|([A-Z][A-Z0-9_]*[a-z][a-zA-Z0-9_]*)', Name), + (r'([a-z]\w*)|([A-Z][A-Z0-9_]*[a-z]\w*)', Name), (r'([A-Z][A-Z0-9_]*)', Name.Class), (r'\n+', Text), ], diff --git a/pygments/lexers/dalvik.py b/pygments/lexers/dalvik.py index 901b7c5a..695be6cf 100644 --- a/pygments/lexers/dalvik.py +++ b/pygments/lexers/dalvik.py @@ -73,22 +73,22 @@ class SmaliLexer(RegexLexer): (r'[0-9]+L?', Number.Integer), ], 'field': [ - (r'(\$?\b)([A-Za-z0-9_$]*)(:)', + (r'(\$?\b)([\w$]*)(:)', bygroups(Punctuation, Name.Variable, Punctuation)), ], 'method': [ (r'<(?:cl)?init>', Name.Function), # constructor - (r'(\$?\b)([A-Za-z0-9_$]*)(\()', + (r'(\$?\b)([\w$]*)(\()', bygroups(Punctuation, Name.Function, Punctuation)), ], 'label': [ - (r':[A-Za-z0-9_]+', Name.Label), + (r':\w+', Name.Label), ], 'class': [ # class names in the form Lcom/namespace/ClassName; # I only want to color the ClassName part, so the namespace part is # treated as 'Text' - (r'(L)((?:[A-Za-z0-9_$]+/)*)([A-Za-z0-9_$]+)(;)', + (r'(L)((?:[\w$]+/)*)([\w$]+)(;)', bygroups(Keyword.Type, Text, Name.Class, Text)), ], 'punctuation': [ diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index a281e6ab..bfb42f97 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -58,7 +58,7 @@ class CSharpLexer(RegexLexer): # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { - 'none': '@?[_a-zA-Z][a-zA-Z0-9_]*', + 'none': '@?[_a-zA-Z]\w*', 'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' + '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'), @@ -170,7 +170,7 @@ class NemerleLexer(RegexLexer): # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = dict( - none = '@?[_a-zA-Z][a-zA-Z0-9_]*', + none = '@?[_a-zA-Z]\w*', basic = ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' + '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'), @@ -336,7 +336,7 @@ class BooLexer(RegexLexer): (r'"""(\\\\|\\"|.*?)"""', String.Double), (r'"(\\\\|\\"|[^"]*?)"', String.Double), (r"'(\\\\|\\'|[^']*?)'", String.Single), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*', Name), (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float), (r'[0-9][0-9\.]*(ms?|d|h|s)', Number), (r'0\d+', Number.Oct), @@ -351,13 +351,13 @@ class BooLexer(RegexLexer): ('[*/]', Comment.Multiline) ], 'funcname': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop') + ('[a-zA-Z_]\w*', Name.Function, '#pop') ], 'classname': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + ('[a-zA-Z_]\w*', Name.Class, '#pop') ], 'namespace': [ - ('[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace, '#pop') + ('[a-zA-Z_][\w.]*', Name.Namespace, '#pop') ] } @@ -425,7 +425,7 @@ class VbNetLexer(RegexLexer): r'<=|>=|<>|[-&*/\\^+=<>]', Operator), ('"', String, 'string'), - ('[a-zA-Z_][a-zA-Z0-9_]*[%&@!#$]?', Name), + ('[a-zA-Z_]\w*[%&@!#$]?', Name), ('#.*?#', Literal.Date), (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float), (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer), @@ -460,7 +460,8 @@ class VbNetLexer(RegexLexer): } def analyse_text(text): - if re.search(r'^\s*#If', text, re.I) or re.search(r'^\s*(Module|Namespace)', re.I): + if (re.search(r'^[ \t]*#If', text, re.I) + or re.search(r'^[ \t]*(Module|Namespace)', re.I)): return 0.5 @@ -594,9 +595,9 @@ class FSharpLexer(RegexLexer): 'root': [ (r'\s+', Text), (r'\(\)|\[\]', Name.Builtin.Pseudo), - (r'\b(?<!\.)([A-Z][A-Za-z0-9_\']*)(?=\s*\.)', + (r'\b(?<!\.)([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), - (r'\b([A-Z][A-Za-z0-9_\']*)', Name), + (r'\b([A-Z][\w\']*)', Name), (r'///.*?\n', String.Doc), (r'//.*?\n', Comment.Single), (r'\(\*(?!\))', Comment, 'comment'), @@ -605,13 +606,13 @@ class FSharpLexer(RegexLexer): (r'"""', String, 'tqs'), (r'"', String, 'string'), - (r'\b(open|module)(\s+)([a-zA-Z0-9_.]+)', + (r'\b(open|module)(\s+)([\w.]+)', bygroups(Keyword, Text, Name.Namespace)), - (r'\b(let!?)(\s+)([a-zA-Z0-9_]+)', + (r'\b(let!?)(\s+)(\w+)', bygroups(Keyword, Text, Name.Variable)), - (r'\b(type)(\s+)([a-zA-Z0-9_]+)', + (r'\b(type)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class)), - (r'\b(member|override)(\s+)([a-zA-Z0-9_]+)(\.)([a-zA-Z0-9_]+)', + (r'\b(member|override)(\s+)(\w+)(\.)(\w+)', bygroups(Keyword, Text, Name, Punctuation, Name.Function)), (r'\b(%s)\b' % '|'.join(keywords), Keyword), (r'(%s)' % '|'.join(keyopts), Operator), @@ -640,9 +641,9 @@ class FSharpLexer(RegexLexer): 'dotted': [ (r'\s+', Text), (r'\.', Punctuation), - (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace), - (r'[A-Z][A-Za-z0-9_\']*', Name, '#pop'), - (r'[a-z_][A-Za-z0-9_\']*', Name, '#pop'), + (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), + (r'[A-Z][\w\']*', Name, '#pop'), + (r'[a-z_][\w\']*', Name, '#pop'), # e.g. dictionary index access (r'', Text, '#pop'), ], diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index af9918a0..72fb37bc 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -732,7 +732,7 @@ class CommonLispLexer(RegexLexer): ### couple of useful regexes # characters that are not macro-characters and can be used to begin a symbol - nonmacro = r'\\.|[a-zA-Z0-9!$%&*+-/<=>?@\[\]^_{}~]' + nonmacro = r'\\.|[\w!$%&*+-/<=>?@\[\]^{}~]' constituent = nonmacro + '|[#.:]' terminated = r'(?=[ "()\'\n,;`])' # whitespace or terminating macro characters @@ -956,26 +956,26 @@ class HaskellLexer(RegexLexer): (r'\)', Punctuation, '#pop'), (r'qualified\b', Keyword), # import X as Y - (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)', + (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)', bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), # import X hiding (functions) - (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()', + (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()', bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), # import X (functions) - (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', + (r'([A-Z][\w.]*)(\s+)(\()', bygroups(Name.Namespace, Text, Punctuation), 'funclist'), # import X - (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'), + (r'[\w.]+', Name.Namespace, '#pop'), ], 'module': [ (r'\s+', Text), - (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', + (r'([A-Z][\w.]*)(\s+)(\()', bygroups(Name.Namespace, Text, Punctuation), 'funclist'), - (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'), + (r'[A-Z][\w.]*', Name.Namespace, '#pop'), ], 'funclist': [ (r'\s+', Text), - (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type), + (r'[A-Z]\w*', Keyword.Type), (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), (r'--(?![!#$%&*+./<=>?@\^|_~:\\]).*?$', Comment.Single), (r'{-', Comment.Multiline, 'comment'), @@ -1061,7 +1061,7 @@ class IdrisLexer(RegexLexer): (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'), (r"('')?[A-Z][\w\']*", Keyword.Type), - (r'[a-z][A-Za-z0-9_\']*', Text), + (r'[a-z][\w\']*', Text), # Special Symbols (r'(<-|::|->|=>|=)', Operator.Word), # specials (r'([\(\)\{\}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials @@ -1078,13 +1078,13 @@ class IdrisLexer(RegexLexer): ], 'module': [ (r'\s+', Text), - (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', + (r'([A-Z][\w.]*)(\s+)(\()', bygroups(Name.Namespace, Text, Punctuation), 'funclist'), - (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'), + (r'[A-Z][\w.]*', Name.Namespace, '#pop'), ], 'funclist': [ (r'\s+', Text), - (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type), + (r'[A-Z]\w*', Keyword.Type), (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), (r'--.*$', Comment.Single), (r'{-', Comment.Multiline, 'comment'), @@ -1184,7 +1184,7 @@ class AgdaLexer(RegexLexer): ], 'module': [ (r'{-', Comment.Multiline, 'comment'), - (r'[a-zA-Z][a-zA-Z0-9_.]*', Name, '#pop'), + (r'[a-zA-Z][\w.]*', Name, '#pop'), (r'[^a-zA-Z]*', Text) ], 'comment': HaskellLexer.tokens['comment'], @@ -1360,7 +1360,7 @@ class SMLLexer(RegexLexer): nonid_reserved = [ '(', ')', '[', ']', '{', '}', ',', ';', '...', '_' ] - alphanumid_re = r"[a-zA-Z][a-zA-Z0-9_']*" + alphanumid_re = r"[a-zA-Z][\w']*" symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+" # A character constant is a sequence of the form #s, where s is a string @@ -1450,7 +1450,7 @@ class SMLLexer(RegexLexer): (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'), # Regular identifiers, long and otherwise - (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r'\'[\w\']*', Name.Decorator), (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"), (r'(%s)' % alphanumid_re, id_callback), (r'(%s)' % symbolicid_re, id_callback), @@ -1697,9 +1697,9 @@ class OcamlLexer(RegexLexer): 'root': [ (r'\s+', Text), (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), - (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)', + (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), - (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class), + (r'\b([A-Z][\w\']*)', Name.Class), (r'\(\*(?![)])', Comment, 'comment'), (r'\b(%s)\b' % '|'.join(keywords), Keyword), (r'(%s)' % '|'.join(keyopts[::-1]), Operator), @@ -1739,9 +1739,9 @@ class OcamlLexer(RegexLexer): 'dotted': [ (r'\s+', Text), (r'\.', Punctuation), - (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace), - (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'), - (r'[a-z_][A-Za-z0-9_\']*', Name, '#pop'), + (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), + (r'[A-Z][\w\']*', Name.Class, '#pop'), + (r'[a-z_][\w\']*', Name, '#pop'), ], } @@ -1801,9 +1801,9 @@ class ErlangLexer(RegexLexer): 'div', 'not', 'or', 'orelse', 'rem', 'xor' ] - atom_re = r"(?:[a-z][a-zA-Z0-9_]*|'[^\n']*[^\\]')" + atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')" - variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)' + variable_re = r'(?:[A-Z_]\w*)' escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))' @@ -2309,9 +2309,9 @@ class CoqLexer(RegexLexer): (r'\b(%s)\b' % '|'.join(keywords4), Keyword), (r'\b(%s)\b' % '|'.join(keywords5), Keyword.Pseudo), (r'\b(%s)\b' % '|'.join(keywords6), Keyword.Reserved), - (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)', + (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), - (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class), + (r'\b([A-Z][\w\']*)', Name.Class), (r'(%s)' % '|'.join(keyopts[::-1]), Operator), (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), @@ -2348,8 +2348,8 @@ class CoqLexer(RegexLexer): 'dotted': [ (r'\s+', Text), (r'\.', Punctuation), - (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace), - (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'), + (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), + (r'[A-Z][\w\']*', Name.Class, '#pop'), (r'[a-z][a-z0-9_\']*', Name, '#pop'), (r'', Text, '#pop') ], @@ -2437,7 +2437,7 @@ class NewLispLexer(RegexLexer): ] # valid names - valid_name = r'([a-zA-Z0-9!$%&*+.,/<=>?@^_~|-])+|(\[.*?\])+' + valid_name = r'([\w!$%&*+.,/<=>?@^~|-])+|(\[.*?\])+' tokens = { 'root': [ @@ -2552,15 +2552,15 @@ class NixLexer(RegexLexer): (r"''", String.Single, 'singlequote'), # paths - (r'[a-zA-Z0-9._+-]*(\/[a-zA-Z0-9._+-]+)+', Literal), - (r'\<[a-zA-Z0-9._+-]+(\/[a-zA-Z0-9._+-]+)*\>', Literal), + (r'[\w.+-]*(\/[\w.+-]+)+', Literal), + (r'\<[\w.+-]+(\/[\w.+-]+)*\>', Literal), # urls - (r'[a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9%/?:@&=+$,\\_.!~*\'-]+', Literal), + (r'[a-zA-Z][a-zA-Z0-9\+\-\.]*\:[\w%/?:@&=+$,\\.!~*\'-]+', Literal), # names of variables - (r'[a-zA-Z0-9-_]+\s*=', String.Symbol), - (r'[a-zA-Z_][a-zA-Z0-9_\'-]*', Text), + (r'[\w-]+\s*=', String.Symbol), + (r'[a-zA-Z_][\w\'-]*', Text), ], 'comment': [ diff --git a/pygments/lexers/graph.py b/pygments/lexers/graph.py index fccba5a4..6aa446c7 100644 --- a/pygments/lexers/graph.py +++ b/pygments/lexers/graph.py @@ -73,7 +73,7 @@ class CypherLexer(RegexLexer): (r'\s+', Whitespace), ], 'barewords': [ - (r'[a-z][a-zA-Z0-9_]*', Name), + (r'[a-z]\w*', Name), (r'\d+', Number), ], } diff --git a/pygments/lexers/hdl.py b/pygments/lexers/hdl.py index 1ebe4e5c..55eec252 100644 --- a/pygments/lexers/hdl.py +++ b/pygments/lexers/hdl.py @@ -54,7 +54,7 @@ class VerilogLexer(RegexLexer): (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.;\']', Punctuation), - (r'`[a-zA-Z_][a-zA-Z0-9_]*', Name.Constant), + (r'`[a-zA-Z_]\w*', Name.Constant), (r'^(\s*)(package)(\s+)', bygroups(Text, Keyword.Namespace, Text)), (r'^(\s*)(import)(\s+)', bygroups(Text, Keyword.Namespace, Text), @@ -96,8 +96,8 @@ class VerilogLexer(RegexLexer): r'bit|logic|reg|' r'supply0|supply1|tri|triand|trior|tri0|tri1|trireg|uwire|wire|wand|wor' r'shortreal|real|realtime)\b', Keyword.Type), - ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*:(?!:)', Name.Label), + ('[a-zA-Z_]\w*', Name), ], 'string': [ (r'"', String, '#pop'), @@ -115,7 +115,7 @@ class VerilogLexer(RegexLexer): (r'\n', Comment.Preproc, '#pop'), ], 'import': [ - (r'[a-zA-Z0-9_:]+\*?', Name.Namespace, '#pop') + (r'[\w:]+\*?', Name.Namespace, '#pop') ] } @@ -169,7 +169,7 @@ class SystemVerilogLexer(RegexLexer): (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.;\']', Punctuation), - (r'`[a-zA-Z_][a-zA-Z0-9_]*', Name.Constant), + (r'`[a-zA-Z_]\w*', Name.Constant), (r'(accept_on|alias|always|always_comb|always_ff|always_latch|' r'and|assert|assign|assume|automatic|before|begin|bind|bins|' @@ -230,11 +230,11 @@ class SystemVerilogLexer(RegexLexer): r'bit|logic|reg|' r'supply0|supply1|tri|triand|trior|tri0|tri1|trireg|uwire|wire|wand|wor' r'shortreal|real|realtime)\b', Keyword.Type), - ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*:(?!:)', Name.Label), + ('[a-zA-Z_]\w*', Name), ], 'classname': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'), + (r'[a-zA-Z_]\w*', Name.Class, '#pop'), ], 'string': [ (r'"', String, '#pop'), @@ -252,7 +252,7 @@ class SystemVerilogLexer(RegexLexer): (r'\n', Comment.Preproc, '#pop'), ], 'import': [ - (r'[a-zA-Z0-9_:]+\*?', Name.Namespace, '#pop') + (r'[\w:]+\*?', Name.Namespace, '#pop') ] } @@ -290,19 +290,19 @@ class VhdlLexer(RegexLexer): (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single), (r"'(U|X|0|1|Z|W|L|H|-)'", String.Char), (r'[~!%^&*+=|?:<>/-]', Operator), - (r"'[a-zA-Z_][a-zA-Z0-9_]*", Name.Attribute), + (r"'[a-zA-Z_]\w*", Name.Attribute), (r'[()\[\],.;\']', Punctuation), (r'"[^\n\\]*"', String), - (r'(library)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(library)(\s+)([a-zA-Z_]\w*)', bygroups(Keyword, Text, Name.Namespace)), (r'(use)(\s+)(entity)', bygroups(Keyword, Text, Keyword)), - (r'(use)(\s+)([a-zA-Z_][\.a-zA-Z0-9_]*)', + (r'(use)(\s+)([a-zA-Z_][\.\w]*)', bygroups(Keyword, Text, Name.Namespace)), - (r'(entity|component)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(entity|component)(\s+)([a-zA-Z_]\w*)', bygroups(Keyword, Text, Name.Class)), - (r'(architecture|configuration)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)(\s+)' - r'(of)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)(\s+)(is)', + (r'(architecture|configuration)(\s+)([a-zA-Z_]\w*)(\s+)' + r'(of)(\s+)([a-zA-Z_]\w*)(\s+)(is)', bygroups(Keyword, Text, Name.Class, Text, Keyword, Text, Name.Class, Text, Keyword)), @@ -312,11 +312,11 @@ class VhdlLexer(RegexLexer): include('keywords'), include('numbers'), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*', Name), ], 'endblock': [ include('keywords'), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class), + (r'[a-zA-Z_]\w*', Name.Class), (r'(\s+)', Text), (r';', Punctuation, '#pop'), ], diff --git a/pygments/lexers/inferno.py b/pygments/lexers/inferno.py index 56f02b98..6aecafdb 100644 --- a/pygments/lexers/inferno.py +++ b/pygments/lexers/inferno.py @@ -35,7 +35,7 @@ class LimboLexer(RegexLexer): tokens = { 'whitespace': [ - (r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*:(\s*)\n)', + (r'^(\s*)([a-zA-Z_]\w*:(\s*)\n)', bygroups(Text, Name.Label)), (r'\n', Text), (r'\s+', Text), @@ -64,7 +64,7 @@ class LimboLexer(RegexLexer): (r'(byte|int|big|real|string|array|chan|list|adt' r'|fn|ref|of|module|self|type)\b', Keyword.Type), (r'(con|iota|nil)\b', Keyword.Constant), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), ], 'statement' : [ include('whitespace'), diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 06dcc81b..24ff79ca 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -41,7 +41,7 @@ class JavaLexer(RegexLexer): (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + (r'@[a-zA-Z_][\w\.]*', Name.Decorator), (r'(assert|break|case|catch|continue|default|do|else|finally|for|' r'if|goto|instanceof|new|return|switch|this|throw|try|while)\b', Keyword), @@ -51,9 +51,9 @@ class JavaLexer(RegexLexer): (r'(boolean|byte|char|double|float|int|long|short|void)\b', Keyword.Type), # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]<>]*\s+)+?)' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]<>]*\s+)+?)' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'(package)(\s+)', bygroups(Keyword.Namespace, Text)), (r'(true|false|null)\b', Keyword.Constant), @@ -61,9 +61,9 @@ class JavaLexer(RegexLexer): (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'"(\\\\|\\"|[^"])*"', String), (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), - (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_\$]\w*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), @@ -71,10 +71,10 @@ class JavaLexer(RegexLexer): (r'\n', Text) ], 'class': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ - (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop') + (r'[\w.]+\*?', Name.Namespace, '#pop') ], } @@ -340,14 +340,14 @@ class GosuLexer(RegexLexer): tokens = { 'root': [ # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # modifiers etc. - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # modifiers etc. + r'([a-zA-Z_]\w*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + (r'@[a-zA-Z_][\w\.]*', Name.Decorator), (r'(in|as|typeof|statictypeof|typeis|typeas|if|else|foreach|for|' r'index|while|do|continue|break|return|try|catch|finally|this|' r'throw|new|switch|case|default|eval|super|outer|classpath|' @@ -360,16 +360,16 @@ class GosuLexer(RegexLexer): Keyword.Type), (r'(package)(\s+)', bygroups(Keyword.Namespace, Text)), (r'(true|false|null|NaN|Infinity)\b', Keyword.Constant), - (r'(class|interface|enhancement|enum)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(class|interface|enhancement|enum)(\s+)([a-zA-Z_]\w*)', bygroups(Keyword.Declaration, Text, Name.Class)), - (r'(uses)(\s+)([a-zA-Z0-9_.]+\*?)', + (r'(uses)(\s+)([\w.]+\*?)', bygroups(Keyword.Namespace, Text, Name.Namespace)), (r'"', String, 'string'), - (r'(\??[\.#])([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(\??[\.#])([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), - (r'(:)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(:)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_\$]\w*', Name), (r'and|or|not|[\\~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'[0-9]+', Number.Integer), @@ -438,14 +438,14 @@ class GroovyLexer(RegexLexer): tokens = { 'root': [ # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + (r'@[a-zA-Z_][\w\.]*', Name.Decorator), (r'(assert|break|case|catch|continue|default|do|else|finally|for|' r'if|goto|instanceof|new|return|switch|this|throw|try|while|in|as)\b', Keyword), @@ -464,9 +464,9 @@ class GroovyLexer(RegexLexer): (r'\$/((?!/\$).)*/\$', String), (r'/(\\\\|\\"|[^/])*/', String), (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), - (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_\$]\w*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), @@ -474,10 +474,10 @@ class GroovyLexer(RegexLexer): (r'\n', Text) ], 'class': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ - (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop') + (r'[\w.]+\*?', Name.Namespace, '#pop') ], } @@ -550,8 +550,8 @@ class IokeLexer(RegexLexer): (r'#r\[', String.Regex, 'squareRegexp'), #Symbols - (r':[a-zA-Z0-9_!:?]+', String.Symbol), - (r'[a-zA-Z0-9_!:?]+:(?![a-zA-Z0-9_!?])', String.Other), + (r':[\w!:?]+', String.Symbol), + (r'[\w!:?]+:(?![\w!?])', String.Other), (r':"(\\\\|\\"|[^"])*"', String.Symbol), #Documentation @@ -564,10 +564,10 @@ class IokeLexer(RegexLexer): (r'#\[', String, 'squareText'), #Mimic - (r'[a-zA-Z0-9_][a-zA-Z0-9!?_:]+(?=\s*=.*mimic\s)', Name.Entity), + (r'\w[a-zA-Z0-9!?_:]+(?=\s*=.*mimic\s)', Name.Entity), #Assignment - (r'[a-zA-Z_][a-zA-Z0-9_!:?]*(?=[\s]*[+*/-]?=[^=].*($|\.))', + (r'[a-zA-Z_][\w!:?]*(?=[\s]*[+*/-]?=[^=].*($|\.))', Name.Variable), # keywords @@ -658,17 +658,17 @@ class IokeLexer(RegexLexer): r'\+>|!>|<>|&>|%>|#>|\@>|\/>|\*>|\?>|\|>|\^>|~>|\$>|<\->|\->|' r'<<|>>|\*\*|\?\||\?&|\|\||>|<|\*|\/|%|\+|\-|&|\^|\||=|\$|!|~|' u'\\?|#|\u2260|\u2218|\u2208|\u2209)', Operator), - (r'(and|nand|or|xor|nor|return|import)(?![a-zA-Z0-9_!?])', + (r'(and|nand|or|xor|nor|return|import)(?![\w!?])', Operator), # Punctuation (r'(\`\`|\`|\'\'|\'|\.|\,|@@|@|\[|\]|\(|\)|{|})', Punctuation), #kinds - (r'[A-Z][a-zA-Z0-9_!:?]*', Name.Class), + (r'[A-Z][\w!:?]*', Name.Class), #default cellnames - (r'[a-z_][a-zA-Z0-9_!:?]*', Name) + (r'[a-z_][\w!:?]*', Name) ] } @@ -840,14 +840,14 @@ class TeaLangLexer(RegexLexer): tokens = { 'root': [ # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + (r'@[a-zA-Z_][\w\.]*', Name.Decorator), (r'(and|break|else|foreach|if|in|not|or|reverse)\b', Keyword), (r'(as|call|define)\b', Keyword.Declaration), @@ -856,9 +856,9 @@ class TeaLangLexer(RegexLexer): (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'"(\\\\|\\"|[^"])*"', String), (r'\'(\\\\|\\\'|[^\'])*\'', String), - (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_\$]\w*', Name), (r'(isa|[.]{3}|[.]{2}|[=#!<>+-/%&;,.\*\\\(\)\[\]\{\}])', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), @@ -866,10 +866,10 @@ class TeaLangLexer(RegexLexer): (r'\n', Text) ], 'template': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ - (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop') + (r'[\w.]+\*?', Name.Namespace, '#pop') ], } @@ -894,9 +894,9 @@ class CeylonLexer(RegexLexer): tokens = { 'root': [ # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), @@ -919,10 +919,10 @@ class CeylonLexer(RegexLexer): (r'"(\\\\|\\"|[^"])*"', String), (r"'\\.'|'[^\\]'|'\\\{#[0-9a-fA-F]{4}\}'", String.Char), (r'".*``.*``.*"', String.Interpol), - (r'(\.)([a-z_][a-zA-Z0-9_]*)', + (r'(\.)([a-z_]\w*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_]\w*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'\d{1,3}(_\d{3})+\.\d{1,3}(_\d{3})+[kMGTPmunpf]?', Number.Float), (r'\d{1,3}(_\d{3})+\.[0-9]+([eE][+-]?[0-9]+)?[kMGTPmunpf]?', @@ -939,10 +939,10 @@ class CeylonLexer(RegexLexer): (r'\n', Text) ], 'class': [ - (r'[A-Za-z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[A-Za-z_]\w*', Name.Class, '#pop') ], 'import': [ - (r'[a-z][a-zA-Z0-9_.]*', + (r'[a-z][\w.]*', Name.Namespace, '#pop') ], 'comment': [ @@ -1034,14 +1034,14 @@ class XtendLexer(RegexLexer): tokens = { 'root': [ # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments - r'([a-zA-Z_$][a-zA-Z0-9_$]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments + r'([a-zA-Z_$][\w$]*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + (r'@[a-zA-Z_][\w\.]*', Name.Decorator), (r'(assert|break|case|catch|continue|default|do|else|finally|for|' r'if|goto|instanceof|new|return|switch|this|throw|try|while|IF|' r'ELSE|ELSEIF|ENDIF|FOR|ENDFOR|SEPARATOR|BEFORE|AFTER)\b', @@ -1060,8 +1060,8 @@ class XtendLexer(RegexLexer): (u'(\u00BB)', String, 'template'), (r'"(\\\\|\\"|[^"])*"', String), (r"'(\\\\|\\'|[^'])*'", String), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_\$]\w*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), @@ -1069,10 +1069,10 @@ class XtendLexer(RegexLexer): (r'\n', Text) ], 'class': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ - (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop') + (r'[\w.]+\*?', Name.Namespace, '#pop') ], 'template': [ (r"'''", String, '#pop'), @@ -1175,8 +1175,8 @@ class GoloLexer(RegexLexer): (r'(module|import)(\s+)', bygroups(Keyword.Namespace, Text), 'modname'), - (r'\b([a-zA-Z_][a-z$A-Z0-9._]*)(::)', bygroups(Name.Namespace, Punctuation)), - (r'\b([a-zA-Z_][a-z$A-Z0-9_]*(?:\.[a-zA-Z_][a-z$A-Z0-9_]*)+)\b', Name.Namespace), + (r'\b([a-zA-Z_][\w$.]*)(::)', bygroups(Name.Namespace, Punctuation)), + (r'\b([a-zA-Z_][\w$]*(?:\.[a-zA-Z_][\w$]*)+)\b', Name.Namespace), (r'(let|var)(\s+)', bygroups(Keyword.Declaration, Text), @@ -1201,7 +1201,7 @@ class GoloLexer(RegexLexer): bygroups(Name.Builtin, Punctuation)), (r'(print|println|readln|raise|fun' r'|asInterfaceInstance)\b', Name.Builtin), - (r'(`?[a-zA-Z_][a-z$A-Z0-9_]*)(\()', + (r'(`?[a-zA-Z_][\w$]*)(\()', bygroups(Name.Function, Punctuation)), (r'-?[\d_]*\.[\d_]*([eE][+-]?\d[\d_]*)?F?', Number.Float), @@ -1210,7 +1210,7 @@ class GoloLexer(RegexLexer): (r'-?\d[\d_]*L', Number.Integer.Long), (r'-?\d[\d_]*', Number.Integer), - ('`?[a-zA-Z_][a-z$A-Z0-9_]*', Name), + ('`?[a-zA-Z_][\w$]*', Name), (r'"""', String, combined('stringescape', 'triplestring')), (r'"', String, combined('stringescape', 'doublestring')), @@ -1220,16 +1220,16 @@ class GoloLexer(RegexLexer): ], 'funcname': [ - (r'`?[a-zA-Z_][a-z$A-Z0-9_]*', Name.Function, '#pop'), + (r'`?[a-zA-Z_][\w$]*', Name.Function, '#pop'), ], 'modname': [ - (r'[a-zA-Z_][a-z$A-Z0-9._]*\*?', Name.Namespace, '#pop') + (r'[a-zA-Z_][\w$.]*\*?', Name.Namespace, '#pop') ], 'structname': [ - (r'`?[a-zA-Z0-9_.]+\*?', Name.Class, '#pop') + (r'`?[\w.]+\*?', Name.Class, '#pop') ], 'varname': [ - (r'`?[a-zA-Z_][a-z$A-Z0-9_]*', Name.Variable, '#pop'), + (r'`?[a-zA-Z_][\w$]*', Name.Variable, '#pop'), ], 'string': [ (r'[^\\\'"\n]+', String), diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py index cfbfc432..cd98a755 100644 --- a/pygments/lexers/math.py +++ b/pygments/lexers/math.py @@ -97,8 +97,8 @@ class JuliaLexer(RegexLexer): (r'[E]?"', String, combined('stringescape', 'string')), # names - (r'@[a-zA-Z0-9_.]+', Name.Decorator), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'@[\w.]+', Name.Decorator), + (r'[a-zA-Z_]\w*', Name), # numbers (r'(\d+(_\d+)+\.\d*|\d*\.\d+(_\d+)+)([eEf][+-]?[0-9]+)?', Number.Float), @@ -116,13 +116,13 @@ class JuliaLexer(RegexLexer): ], 'funcname': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop'), + ('[a-zA-Z_]\w*', Name.Function, '#pop'), ('\([^\s\w{]{1,2}\)', Operator, '#pop'), ('[^\s\w{]{1,2}', Operator, '#pop'), ], 'typename': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + ('[a-zA-Z_]\w*', Name.Class, '#pop') ], 'stringescape': [ @@ -133,7 +133,7 @@ class JuliaLexer(RegexLexer): 'string': [ (r'"', String, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings - (r'\$(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?', + (r'\$(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?', String.Interpol), (r'[^\\"$]+', String), # quotes, dollar signs, and backslashes must be parsed one at a time @@ -357,7 +357,7 @@ class MatlabLexer(RegexLexer): (r'\d+', Number.Integer), (r'(?<![\w\)\].])\'', String, 'string'), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), (r'.', Text), ], 'string': [ @@ -816,7 +816,7 @@ class OctaveLexer(RegexLexer): (r'(?<=[\w\)\].])\'+', Operator), (r'(?<![\w\)\].])\'', String, 'string'), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), (r'.', Text), ], 'string': [ @@ -881,7 +881,7 @@ class ScilabLexer(RegexLexer): (r'\d+[eEf][+-]?[0-9]+', Number.Float), (r'\d+', Number.Integer), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), (r'.', Text), ], 'string': [ @@ -1461,7 +1461,7 @@ class BugsLexer(RegexLexer): % r'|'.join(_FUNCTIONS + _DISTRIBUTIONS), Name.Builtin), # Regular variable names - (r'[A-Za-z][A-Za-z0-9_.]*', Name), + (r'[A-Za-z][\w.]*', Name), # Number Literals (r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', Number), # Punctuation @@ -1521,7 +1521,7 @@ class JagsLexer(RegexLexer): ], 'names' : [ # Regular variable names - (r'[a-zA-Z][a-zA-Z0-9_.]*\b', Name), + (r'[a-zA-Z][\w.]*\b', Name), ], 'comments' : [ # do not use stateful comments @@ -1619,10 +1619,10 @@ class StanLexer(RegexLexer): + _stan_builtins.DISTRIBUTIONS), Name.Builtin), # Special names ending in __, like lp__ - (r'[A-Za-z][A-Za-z0-9_]*__\b', Name.Builtin.Pseudo), + (r'[A-Za-z]\w*__\b', Name.Builtin.Pseudo), (r'(%s)\b' % r'|'.join(_stan_builtins.RESERVED), Keyword.Reserved), # Regular variable names - (r'[A-Za-z][A-Za-z0-9_]*\b', Name), + (r'[A-Za-z]\w*\b', Name), # Real Literals (r'-?[0-9]+(\.[0-9]+)?[eE]-?[0-9]+', Number.Float), (r'-?[0-9]*\.[0-9]*', Number.Float), diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index 01c61513..cef71f95 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -95,7 +95,7 @@ class LSLLexer(RegexLexer): (lsl_invalid_unimplemented, Error), (lsl_reserved_godmode, Keyword.Reserved), (lsl_reserved_log, Keyword.Reserved), - (r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b', Name.Variable), + (r'\b([a-zA-Z_]\w*)\b', Name.Variable), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d*', Number.Float), (r'(\d+\.\d*|\.\d+)', Number.Float), (r'0[xX][0-9a-fA-F]+', Number.Hex), @@ -164,7 +164,7 @@ class ECLLexer(RegexLexer): (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]+', Operator), (r'[{}()\[\],.;]', Punctuation), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*', Name), ], 'hash': [ (r'^#.*$', Comment.Preproc), @@ -514,7 +514,7 @@ class LogtalkLexer(RegexLexer): (r'0x[0-9a-fA-F]+', Number), (r'\d+\.?\d*((e|E)(\+|-)?\d+)?', Number), # Variables - (r'([A-Z_][a-zA-Z0-9_]*)', Name.Variable), + (r'([A-Z_]\w*)', Name.Variable), # Event handlers (r'(after|before)(?=[(])', Keyword), # Execution-context methods @@ -630,7 +630,7 @@ class LogtalkLexer(RegexLexer): # Ponctuation (r'[()\[\],.|]', Text), # Atoms - (r"[a-z][a-zA-Z0-9_]*", Text), + (r"[a-z]\w*", Text), (r"'", String, 'quoted_atom'), ], @@ -661,8 +661,8 @@ class LogtalkLexer(RegexLexer): (r'op(?=[(])', Keyword, 'root'), (r'(c(alls|oinductive)|reexport|use(s|_module))(?=[(])', Keyword, 'root'), - (r'[a-z][a-zA-Z0-9_]*(?=[(])', Text, 'root'), - (r'[a-z][a-zA-Z0-9_]*[.]', Text, 'root'), + (r'[a-z]\w*(?=[(])', Text, 'root'), + (r'[a-z]\w*[.]', Text, 'root'), ], 'entityrelations': [ @@ -675,9 +675,9 @@ class LogtalkLexer(RegexLexer): (r'0x[0-9a-fA-F]+', Number), (r'\d+\.?\d*((e|E)(\+|-)?\d+)?', Number), # Variables - (r'([A-Z_][a-zA-Z0-9_]*)', Name.Variable), + (r'([A-Z_]\w*)', Name.Variable), # Atoms - (r"[a-z][a-zA-Z0-9_]*", Text), + (r"[a-z]\w*", Text), (r"'", String, 'quoted_atom'), # Strings (r'"(\\\\|\\"|[^"])*"', String), @@ -747,11 +747,11 @@ class GnuplotLexer(RegexLexer): (_shortened_many('pwd$', 're$read', 'res$et', 'scr$eendump', 'she$ll', 'test$'), Keyword, 'noargs'), - ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(=)', + ('([a-zA-Z_]\w*)(\s*)(=)', bygroups(Name.Variable, Text, Operator), 'genericargs'), - ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*\(.*?\)\s*)(=)', + ('([a-zA-Z_]\w*)(\s*\(.*?\)\s*)(=)', bygroups(Name.Function, Text, Operator), 'genericargs'), - (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Constant), # macros + (r'@[a-zA-Z_]\w*', Name.Constant), # macros (r';', Keyword), ], 'comment': [ @@ -797,10 +797,10 @@ class GnuplotLexer(RegexLexer): ('[,.~!%^&*+=|?:<>/-]', Operator), ('[{}()\[\]]', Punctuation), (r'(eq|ne)\b', Operator.Word), - (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(\()', + (r'([a-zA-Z_]\w*)(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), - (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Constant), # macros + (r'[a-zA-Z_]\w*', Name), + (r'@[a-zA-Z_]\w*', Name.Constant), # macros (r'\\\n', Text), ], 'optionarg': [ @@ -1803,15 +1803,15 @@ class NewspeakLexer(RegexLexer): 'root' : [ (r'\b(Newsqueak2)\b',Keyword.Declaration), (r"'[^']*'",String), - (r'\b(class)(\s+)([a-zA-Z0-9_]+)(\s*)', + (r'\b(class)(\s+)(\w+)(\s*)', bygroups(Keyword.Declaration,Text,Name.Class,Text)), (r'\b(mixin|self|super|private|public|protected|nil|true|false)\b', Keyword), - (r'([a-zA-Z0-9_]+\:)(\s*)([a-zA-Z_]\w+)', + (r'(\w+\:)(\s*)([a-zA-Z_]\w+)', bygroups(Name.Function,Text,Name.Variable)), - (r'([a-zA-Z0-9_]+)(\s*)(=)', + (r'(\w+)(\s*)(=)', bygroups(Name.Attribute,Text,Operator)), - (r'<[a-zA-Z0-9_]+>', Comment.Special), + (r'<\w+>', Comment.Special), include('expressionstat'), include('whitespace') ], @@ -1890,7 +1890,9 @@ class GherkinLexer(RegexLexer): (r"[^\|]", Name.Variable), ], 'scenario_sections_on_stack': [ - (feature_element_keywords, bygroups(Name.Function, Keyword, Keyword, Name.Function), "feature_elements_on_stack"), + (feature_element_keywords, + bygroups(Name.Function, Keyword, Keyword, Name.Function), + "feature_elements_on_stack"), ], 'narrative': [ include('scenario_sections_on_stack'), @@ -2020,23 +2022,23 @@ class AsymptoteLexer(RegexLexer): r'bounds|coord|frame|guide|horner|int|linefit|marginT|pair|pen|' r'picture|position|real|revolution|slice|splitface|ticksgridT|' r'tickvalues|tree|triple|vertex|void)\b', Keyword.Type), - ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*:(?!:)', Name.Label), + ('[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions - (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|\*))' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*\([^;]*?\))' # signature + (r'((?:[\w*\s])+?(?:\s|\*))' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')({)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations - (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|\*))' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*\([^;]*?\))' # signature + (r'((?:[\w*\s])+?(?:\s|\*))' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), @@ -2189,7 +2191,7 @@ class AutohotkeyLexer(RegexLexer): (r'^;.*?$', Comment.Singleline), (r'[]{}(),;[]', Punctuation), (r'(in|is|and|or|not)\b', Operator.Word), - (r'\%[a-zA-Z_#@$][a-zA-Z0-9_#@$]*\%', Name.Variable), + (r'\%[a-zA-Z_#@$][\w#@$]*\%', Name.Variable), (r'!=|==|:=|\.=|<<|>>|[-~+/*%=<>&^|?:!.]', Operator), include('commands'), include('labels'), @@ -2197,7 +2199,7 @@ class AutohotkeyLexer(RegexLexer): include('builtInVariables'), (r'"', String, combined('stringescape', 'dqs')), include('numbers'), - (r'[a-zA-Z_#@$][a-zA-Z0-9_#@$]*', Name), + (r'[a-zA-Z_#@$][\w#@$]*', Name), (r'\\|\'', Text), (r'\`([\,\%\`abfnrtv\-\+;])', String.Escape), include('garbage'), @@ -2489,18 +2491,18 @@ class ProtoBufLexer(RegexLexer): (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'[+-=]', Operator), - (r'([a-zA-Z_][a-zA-Z0-9_\.]*)([ \t]*)(=)', + (r'([a-zA-Z_][\w\.]*)([ \t]*)(=)', bygroups(Name.Attribute, Text, Operator)), - ('[a-zA-Z_][a-zA-Z0-9_\.]*', Name), + ('[a-zA-Z_][\w\.]*', Name), ], 'package': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Namespace, '#pop') + (r'[a-zA-Z_]\w*', Name.Namespace, '#pop') ], 'message': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'type': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name, '#pop') + (r'[a-zA-Z_]\w*', Name, '#pop') ], } @@ -2523,12 +2525,12 @@ class HybrisLexer(RegexLexer): 'root': [ # method names (r'^(\s*(?:function|method|operator\s+)+?)' - r'([a-zA-Z_][a-zA-Z0-9_]*)' + r'([a-zA-Z_]\w*)' r'(\s*)(\()', bygroups(Keyword, Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + (r'@[a-zA-Z_][\w\.]*', Name.Decorator), (r'(break|case|catch|next|default|do|else|finally|for|foreach|of|' r'unless|if|new|return|switch|me|throw|try|while)\b', Keyword), (r'(extends|private|protected|public|static|throws|function|method|' @@ -2564,10 +2566,10 @@ class HybrisLexer(RegexLexer): r'Exception)\b', Keyword.Type), (r'"(\\\\|\\"|[^"])*"', String), (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), - (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_\$]\w*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?\-@]+', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-f]+', Number.Hex), @@ -2575,10 +2577,10 @@ class HybrisLexer(RegexLexer): (r'\n', Text), ], 'class': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ - (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop') + (r'[\w.]+\*?', Name.Namespace, '#pop') ], } @@ -2627,7 +2629,7 @@ class AwkLexer(RegexLexer): (r'(ARGC|ARGIND|ARGV|CONVFMT|ENVIRON|ERRNO|FIELDWIDTHS|FILENAME|FNR|FS|' r'IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|RSTART|RT|' r'SUBSEP)\b', Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), + (r'[$a-zA-Z_]\w*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), @@ -2844,7 +2846,7 @@ class UrbiscriptLexer(ExtendedRegexLexer): Operator.Word), (r'[{}\[\]()]+', Punctuation), (r'(?:;|\||,|&|\?|!)+', Punctuation), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), + (r'[$a-zA-Z_]\w*', Name.Other), (r'0x[0-9a-fA-F]+', Number.Hex), # Float, Integer, Angle and Duration (r'(?:[0-9]+(?:(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)?' @@ -3050,7 +3052,7 @@ class MscgenLexer(RegexLexer): aliases = ['mscgen', 'msc'] filenames = ['*.msc'] - _var = r'([a-zA-Z0-9_]+|"(?:\\"|[^"])*")' + _var = r'(\w+|"(?:\\"|[^"])*")' tokens = { 'root': [ @@ -3252,7 +3254,7 @@ class SourcePawnLexer(RegexLexer): r'public|return|sizeof|static|decl|struct|switch)\b', Keyword), (r'(bool|Float)\b', Keyword.Type), (r'(true|false)\b', Keyword.Constant), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), ], 'string': [ (r'"', String, '#pop'), @@ -3352,7 +3354,7 @@ class PuppetLexer(RegexLexer): ], 'names': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Attribute), + ('[a-zA-Z_]\w*', Name.Attribute), (r'(\$\S+)(\[)(\S+)(\])', bygroups(Name.Variable, Punctuation, String, Punctuation)), (r'\$\S+', Name.Variable), @@ -3589,9 +3591,9 @@ class RPMSpecLexer(RegexLexer): 'interpol': [ (r'%\{?__[a-z_]+\}?', Name.Function), (r'%\{?_([a-z_]+dir|[a-z_]+path|prefix)\}?', Keyword.Pseudo), - (r'%\{\?[A-Za-z0-9_]+\}', Name.Variable), + (r'%\{\?\w+\}', Name.Variable), (r'\$\{?RPM_[A-Z0-9_]+\}?', Name.Variable.Global), - (r'%\{[a-zA-Z][a-zA-Z0-9_]+\}', Keyword.Constant), + (r'%\{[a-zA-Z]\w+\}', Keyword.Constant), ] } @@ -3722,7 +3724,7 @@ class AutoItLexer(RegexLexer): (r'(#comments-start|#cs).*?(#comments-end|#ce)', Comment.Multiline), (r'[\[\]{}(),;]', Punctuation), (r'(and|or|not)\b', Operator.Word), - (r'[\$|@][a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), + (r'[\$|@][a-zA-Z_]\w*', Name.Variable), (r'!=|==|:=|\.=|<<|>>|[-~+/*%=<>&^|?:!.]', Operator), include('commands'), include('labels'), @@ -3730,7 +3732,7 @@ class AutoItLexer(RegexLexer): include('builtInMarcros'), (r'"', String, combined('stringescape', 'dqs')), include('numbers'), - (r'[a-zA-Z_#@$][a-zA-Z0-9_#@$]*', Name), + (r'[a-zA-Z_#@$][\w#@$]*', Name), (r'\\|\'', Text), (r'\`([\,\%\`abfnrtv\-\+;])', String.Escape), (r'_\n', Text), # Line continuation @@ -3999,12 +4001,12 @@ class AmbientTalkLexer(RegexLexer): (r'"(\\\\|\\"|[^"])*"', String), (r'\|', Punctuation, 'arglist'), (r'<:|[\^\*!%&<>+=,./?-]|:=', Operator), - (r"`[a-zA-Z_][a-zA-Z0-9_]*", String.Symbol), - (r"[a-zA-Z_][a-zA-Z0-9_]*:", Name.Function), + (r"`[a-zA-Z_]\w*", String.Symbol), + (r"[a-zA-Z_]\w*:", Name.Function), (r"[\{\}()\[\];`]", Punctuation), (r'(self|super)\b', Name.Variable.Instance), - (r"[a-zA-Z_][a-zA-Z0-9_]*", Name.Variable), - (r"@[a-zA-Z_][a-zA-Z0-9_]*", Name.Class), + (r"[a-zA-Z_]\w*", Name.Variable), + (r"@[a-zA-Z_]\w*", Name.Class), (r"@\[", Name.Class, 'annotations'), include('numbers'), ], @@ -4013,9 +4015,9 @@ class AmbientTalkLexer(RegexLexer): (r'\d+', Number.Integer) ], 'namespace': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*\.', Name.Namespace), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Function , '#pop'), - (r'[a-zA-Z_][a-zA-Z0-9_]*(?!\.)', Name.Function , '#pop') + (r'[a-zA-Z_]\w*\.', Name.Namespace), + (r'[a-zA-Z_]\w*:', Name.Function , '#pop'), + (r'[a-zA-Z_]\w*(?!\.)', Name.Function , '#pop') ], 'annotations' : [ (r"(.*?)\]", Name.Class, '#pop') @@ -4023,7 +4025,7 @@ class AmbientTalkLexer(RegexLexer): 'arglist' : [ (r'\|', Punctuation, '#pop'), (r'\s*(,)\s*', Punctuation), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), + (r'[a-zA-Z_]\w*', Name.Variable), ], } @@ -4070,7 +4072,7 @@ class PawnLexer(RegexLexer): r'public|return|sizeof|tagof|state|goto)\b', Keyword), (r'(bool|Float)\b', Keyword.Type), (r'(true|false)\b', Keyword.Constant), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), ], 'string': [ (r'"', String, '#pop'), @@ -4229,7 +4231,7 @@ class PanLexer(RegexLexer): 'curly': [ (r'}', Keyword, '#pop'), (r':-', Keyword), - (r'[a-zA-Z0-9_]+', Name.Variable), + (r'\w+', Name.Variable), (r'[^}:"\'`$]+', Punctuation), (r':', Punctuation), include('root'), @@ -4273,22 +4275,27 @@ class RedLexer(RegexLexer): r'power|remainder|round|subtract|even\?|odd\?|and~|complement|or~|xor~|' r'append|at|back|change|clear|copy|find|head|head\?|index\?|insert|' r'length\?|next|pick|poke|remove|reverse|select|sort|skip|swap|tail|tail\?|' - r'take|trim|create|close|delete|modify|open|open\?|query|read|rename|update|write)$', word): + r'take|trim|create|close|delete|modify|open|open\?|query|read|rename|' + r'update|write)$', word): yield match.start(), Name.Function, word elif re.match( - r'(yes|on|no|off|true|false|tab|cr|lf|newline|escape|slash|sp|space|null|none|crlf|dot|null-byte)$', word): + r'(yes|on|no|off|true|false|tab|cr|lf|newline|escape|slash|sp|space|null|' + r'none|crlf|dot|null-byte)$', word): yield match.start(), Name.Builtin.Pseudo, word elif re.match( - r'(#system-global|#include|#enum|#define|#either|#if|#import|#export|#switch|#default|#get-definition)$', word): + r'(#system-global|#include|#enum|#define|#either|#if|#import|#export|' + r'#switch|#default|#get-definition)$', word): yield match.start(), Keyword.Namespace, word elif re.match( - r'(system|halt|quit|quit-return|do|load|q|recycle|call|run|ask|parse|raise-error|' - r'return|exit|break|alias|push|pop|probe|\?\?|spec-of|body-of|quote|forever)$', word): + r'(system|halt|quit|quit-return|do|load|q|recycle|call|run|ask|parse|' + r'raise-error|return|exit|break|alias|push|pop|probe|\?\?|spec-of|body-of|' + r'quote|forever)$', word): yield match.start(), Name.Exception, word elif re.match( - r'(action\?|block\?|char\?|datatype\?|file\?|function\?|get-path\?|zero\?|any-struct\?|' - r'get-word\?|integer\?|issue\?|lit-path\?|lit-word\?|logic\?|native\?|none\?|' - r'op\?|paren\?|path\?|refinement\?|set-path\?|set-word\?|string\?|unset\?|word\?|any-series\?)$', word): + r'(action\?|block\?|char\?|datatype\?|file\?|function\?|get-path\?|zero\?|' + r'get-word\?|integer\?|issue\?|lit-path\?|lit-word\?|logic\?|native\?|' + r'op\?|paren\?|path\?|refinement\?|set-path\?|set-word\?|string\?|unset\?|' + r'any-struct\?|none\?|word\?|any-series\?)$', word): yield match.start(), Keyword, word elif re.match(r'(JNICALL|stdcall|cdecl|infix)$', word): yield match.start(), Keyword.Namespace, word @@ -4322,7 +4329,8 @@ class RedLexer(RegexLexer): (r'#{[0-9a-fA-F\s]*}', Number.Hex), (r'2#{', Number.Hex, 'bin2'), (r'64#{[0-9a-zA-Z+/=\s]*}', Number.Hex), - (r'([0-9a-fA-F]+)(h)((\s)|(?=[\[\]{}""\(\)]))', bygroups(Number.Hex, Name.Variable, Whitespace)), + (r'([0-9a-fA-F]+)(h)((\s)|(?=[\[\]{}""\(\)]))', + bygroups(Number.Hex, Name.Variable, Whitespace)), (r'"', String, 'string'), (r'{', String, 'string2'), (r';#+.*\n', Comment.Special), @@ -4349,7 +4357,7 @@ class RedLexer(RegexLexer): (r'comment\s[^(\s{\"\[]+', Comment), (r'/[^(\^{^")\s/[\]]*', Name.Attribute), (r'([^(\^{^")\s/[\]]+)(?=[:({"\s/\[\]])', word_callback), - (r'<[a-zA-Z0-9:._-]*>', Name.Tag), + (r'<[\w:.-]*>', Name.Tag), (r'<[^(<>\s")]+', Name.Tag, 'tag'), (r'([^(\^{^")\s]+)', Text), ], diff --git a/pygments/lexers/shell.py b/pygments/lexers/shell.py index f809dae9..13201912 100644 --- a/pygments/lexers/shell.py +++ b/pygments/lexers/shell.py @@ -79,7 +79,7 @@ class BashLexer(RegexLexer): 'curly': [ (r'}', Keyword, '#pop'), (r':-', Keyword), - (r'[a-zA-Z0-9_]+', Name.Variable), + (r'\w+', Name.Variable), (r'[^}:"\'`$]+', Punctuation), (r':', Punctuation), include('root'), @@ -314,7 +314,7 @@ class TcshLexer(RegexLexer): 'curly': [ (r'}', Keyword, '#pop'), (r':-', Keyword), - (r'[a-zA-Z0-9_]+', Name.Variable), + (r'\w+', Name.Variable), (r'[^}:"\'`$]+', Punctuation), (r':', Punctuation), include('root'), diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index 9a3dcb8d..53df0f75 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -150,7 +150,7 @@ class PostgresLexer(PostgresBase, RegexLexer): (r"(E|U&)?'(''|[^'])*'", String.Single), (r'(U&)?"(""|[^"])*"', String.Name), # quoted identifier (r'(?s)(\$[^\$]*\$)(.*?)(\1)', language_callback), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*', Name), # psql variable in SQL (r""":(['"]?)[a-z][a-z0-9_]*\b\1""", Name.Variable), @@ -435,7 +435,7 @@ class SqlLexer(RegexLexer): # TODO: Backslash escapes? (r"'(''|[^'])*'", String.Single), (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*', Name), (r'[;:()\[\],\.]', Punctuation) ], 'multiline-comments': [ @@ -506,9 +506,9 @@ class MySqlLexer(RegexLexer): # TODO: this list is not complete (r'\b(auto_increment|engine|charset|tables)\b', Keyword.Pseudo), (r'(true|false|null)', Name.Constant), - (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(\()', + (r'([a-zA-Z_]\w*)(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*', Name), (r'@[A-Za-z0-9]*[._]*[A-Za-z0-9]*', Name.Variable), (r'[;:()\[\],\.]', Punctuation) ], diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py index 9ed1c635..62d5da85 100644 --- a/pygments/lexers/templates.py +++ b/pygments/lexers/templates.py @@ -161,22 +161,22 @@ class SmartyLexer(RegexLexer): (r'(\{php\})(.*?)(\{/php\})', bygroups(Comment.Preproc, using(PhpLexer, startinline=True), Comment.Preproc)), - (r'(\{)(/?[a-zA-Z_][a-zA-Z0-9_]*)(\s*)', + (r'(\{)(/?[a-zA-Z_]\w*)(\s*)', bygroups(Comment.Preproc, Name.Function, Text), 'smarty'), (r'\{', Comment.Preproc, 'smarty') ], 'smarty': [ (r'\s+', Text), (r'\}', Comment.Preproc, '#pop'), - (r'#[a-zA-Z_][a-zA-Z0-9_]*#', Name.Variable), - (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z0-9_]+)*', Name.Variable), + (r'#[a-zA-Z_]\w*#', Name.Variable), + (r'\$[a-zA-Z_]\w*(\.\w+)*', Name.Variable), (r'[~!%^&*()+=|\[\]:;,.<>/?{}@-]', Operator), (r'(true|false|null)\b', Keyword.Constant), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Attribute) + (r'[a-zA-Z_]\w*', Name.Attribute) ] } @@ -207,7 +207,7 @@ class VelocityLexer(RegexLexer): flags = re.MULTILINE | re.DOTALL - identifier = r'[a-zA-Z_][a-zA-Z0-9_]*' + identifier = r'[a-zA-Z_]\w*' tokens = { 'root': [ @@ -267,8 +267,8 @@ class VelocityLexer(RegexLexer): rv += 0.15 if re.search(r'#\{?foreach\}?\(.+?\).*?#\{?end\}?', text): rv += 0.15 - if re.search(r'\$\{?[a-zA-Z_][a-zA-Z0-9_]*(\([^)]*\))?' - r'(\.[a-zA-Z0-9_]+(\([^)]*\))?)*\}?', text): + if re.search(r'\$\{?[a-zA-Z_]\w*(\([^)]*\))?' + r'(\.\w+(\([^)]*\))?)*\}?', text): rv += 0.01 return rv @@ -347,25 +347,25 @@ class DjangoLexer(RegexLexer): Text, Comment.Preproc, Text, Keyword, Text, Comment.Preproc)), # filter blocks - (r'(\{%)(-?\s*)(filter)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(\{%)(-?\s*)(filter)(\s+)([a-zA-Z_]\w*)', bygroups(Comment.Preproc, Text, Keyword, Text, Name.Function), 'block'), - (r'(\{%)(-?\s*)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(\{%)(-?\s*)([a-zA-Z_]\w*)', bygroups(Comment.Preproc, Text, Keyword), 'block'), (r'\{', Other) ], 'varnames': [ - (r'(\|)(\s*)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(\|)(\s*)([a-zA-Z_]\w*)', bygroups(Operator, Text, Name.Function)), - (r'(is)(\s+)(not)?(\s+)?([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(is)(\s+)(not)?(\s+)?([a-zA-Z_]\w*)', bygroups(Keyword, Text, Keyword, Text, Name.Function)), (r'(_|true|false|none|True|False|None)\b', Keyword.Pseudo), (r'(in|as|reversed|recursive|not|and|or|is|if|else|import|' r'with(?:(?:out)?\s*context)?|scoped|ignore\s+missing)\b', Keyword), (r'(loop|block|super|forloop)\b', Name.Builtin), - (r'[a-zA-Z][a-zA-Z0-9_-]*', Name.Variable), - (r'\.[a-zA-Z0-9_]+', Name.Variable), + (r'[a-zA-Z][\w-]*', Name.Variable), + (r'\.\w+', Name.Variable), (r':?"(\\\\|\\"|[^"])*"', String.Double), (r":?'(\\\\|\\'|[^'])*'", String.Single), (r'([{}()\[\]+\-*/,:~]|[><=]=?)', Operator), @@ -745,7 +745,7 @@ class CheetahLexer(RegexLexer): (bygroups(Comment.Preproc, using(CheetahPythonLexer), Comment.Preproc))), # TODO support other Python syntax like $foo['bar'] - (r'(\$)([a-zA-Z_][a-zA-Z0-9_\.]*[a-zA-Z0-9_])', + (r'(\$)([a-zA-Z_][\w\.]*\w)', bygroups(Comment.Preproc, using(CheetahPythonLexer))), (r'(\$\{!?)(.*?)(\})(?s)', bygroups(Comment.Preproc, using(CheetahPythonLexer), @@ -843,7 +843,7 @@ class GenshiTextLexer(RegexLexer): 'variable': [ (r'(?<!\$)(\$\{)(.+?)(\})', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), - (r'(?<!\$)(\$)([a-zA-Z_][a-zA-Z0-9_\.]*)', + (r'(?<!\$)(\$)([a-zA-Z_][\w\.]*)', Name.Variable), ] } @@ -871,7 +871,7 @@ class GenshiMarkupLexer(RegexLexer): ], 'pytag': [ (r'\s+', Text), - (r'[a-zA-Z0-9_:-]+\s*=', Name.Attribute, 'pyattr'), + (r'[\w:-]+\s*=', Name.Attribute, 'pyattr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'pyattr': [ @@ -881,8 +881,8 @@ class GenshiMarkupLexer(RegexLexer): ], 'tag': [ (r'\s+', Text), - (r'py:[a-zA-Z0-9_-]+\s*=', Name.Attribute, 'pyattr'), - (r'[a-zA-Z0-9_:-]+\s*=', Name.Attribute, 'attr'), + (r'py:[\w-]+\s*=', Name.Attribute, 'pyattr'), + (r'[\w:-]+\s*=', Name.Attribute, 'attr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'attr': [ @@ -907,7 +907,7 @@ class GenshiMarkupLexer(RegexLexer): 'variable': [ (r'(?<!\$)(\$\{)(.+?)(\})', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), - (r'(?<!\$)(\$)([a-zA-Z_][a-zA-Z0-9_\.]*)', + (r'(?<!\$)(\$)([a-zA-Z_][\w\.]*)', Name.Variable), ] } @@ -1498,13 +1498,15 @@ class ColdfusionLexer(RegexLexer): # strings, evidently. (r"'.*?'", String.Single), (r'\d+', Number), - (r'(if|else|len|var|case|default|break|switch|component|property|function|do|try|catch|in|continue|for|return|while)\b', Keyword), - (r'(required|any|array|binary|boolean|component|date|guid|numeric|query|string|struct|uuid|xml)\b', Keyword), + (r'(if|else|len|var|xml|default|break|switch|component|property|function|do|' + r'try|catch|in|continue|for|return|while|required|any|array|binary|boolean|' + r'component|date|guid|numeric|query|string|struct|uuid|case)\b', Keyword), (r'(true|false|null)\b', Keyword.Constant), - (r'(application|session|client|cookie|super|this|variables|arguments)\b', Name.Constant), - (r'([A-Za-z_$][A-Za-z0-9_.]*)(\s*)(\()', + (r'(application|session|client|cookie|super|this|variables|arguments)\b', + Name.Constant), + (r'([A-Za-z_$][\w.]*)(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), - (r'[A-Za-z_$][A-Za-z0-9_.]*', Name.Variable), + (r'[A-Za-z_$][\w.]*', Name.Variable), (r'[()\[\]{};:,.\\]', Punctuation), (r'\s+', Text), ], @@ -1809,8 +1811,8 @@ class HandlebarsLexer(RegexLexer): # borrowed from DjangoLexer (r':?"(\\\\|\\"|[^"])*"', String.Double), (r":?'(\\\\|\\'|[^'])*'", String.Single), - (r'[a-zA-Z][a-zA-Z0-9_-]*', Name.Variable), - (r'\.[a-zA-Z0-9_]+', Name.Variable), + (r'[a-zA-Z][\w-]*', Name.Variable), + (r'\.\w+', Name.Variable), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), ] diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py index fc1cc6a6..d855ec79 100644 --- a/pygments/lexers/text.py +++ b/pygments/lexers/text.py @@ -17,6 +17,7 @@ from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \ Generic, Operator, Number, Whitespace, Literal from pygments.util import get_bool_opt, ClassNotFound +from pygments.lexers.agile import PythonLexer from pygments.lexers.other import BashLexer __all__ = ['IniLexer', 'PropertiesLexer', 'SourcesListLexer', 'BaseMakefileLexer', @@ -234,11 +235,11 @@ class BaseMakefileLexer(RegexLexer): (r'\$[<@$+%?|*]', Keyword), (r'\s+', Text), (r'#.*?\n', Comment), - (r'(export)(\s+)(?=[a-zA-Z0-9_${}\t -]+\n)', + (r'(export)(\s+)(?=[\w${}\t -]+\n)', bygroups(Keyword, Text), 'export'), (r'export\s+', Keyword), # assignment - (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', + (r'([\w${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), # strings (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double), @@ -257,7 +258,7 @@ class BaseMakefileLexer(RegexLexer): (r'\)', Keyword, '#pop'), ], 'export': [ - (r'[a-zA-Z0-9_${}-]+', Name.Variable), + (r'[\w${}-]+', Name.Variable), (r'\n', Text, '#pop'), (r'\s+', Text), ], @@ -588,7 +589,7 @@ class ApacheConfLexer(RegexLexer): (r'(#.*?)$', Comment), (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)', bygroups(Name.Tag, Text, String, Name.Tag)), - (r'([a-zA-Z][a-zA-Z0-9_]*)(\s+)', + (r'([a-zA-Z]\w*)(\s+)', bygroups(Name.Builtin, Text), 'value'), (r'\.+', Text), ], @@ -597,7 +598,7 @@ class ApacheConfLexer(RegexLexer): (r'[^\S\n]+', Text), (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number), (r'\d+', Number), - (r'/([a-zA-Z0-9][a-zA-Z0-9_./-]+)', String.Other), + (r'/([a-zA-Z0-9][\w./-]+)', String.Other), (r'(on|off|none|any|all|double|email|dns|min|minimal|' r'os|productonly|full|emerg|alert|crit|error|warn|' r'notice|info|debug|registry|script|inetd|standalone|' @@ -835,8 +836,16 @@ class VimLexer(RegexLexer): mimetypes = ['text/x-vim'] flags = re.MULTILINE + _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?' + tokens = { 'root': [ + (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)', + bygroups(using(this), Keyword, Text, Operator, Text, Text, + using(PythonLexer), Text)), + (r'^([ \t:]*)(' + _python + r')([ \t])(.*)', + bygroups(using(this), Keyword, Text, using(PythonLexer))), + (r'^\s*".*', Comment), (r'[ \t]+', Text), diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index 5a2d6c92..ab8a6bcc 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -81,7 +81,7 @@ class JavascriptLexer(RegexLexer): r'decodeURIComponent|encodeURI|encodeURIComponent|' r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|' r'window)\b', Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), + (r'[$a-zA-Z_]\w*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), @@ -248,7 +248,7 @@ class ActionScriptLexer(RegexLexer): r'isXMLName|clearInterval|fscommand|getTimer|getURL|getVersion|' r'isFinite|parseFloat|parseInt|setInterval|trace|updateAfterEvent|' r'unescape)\b',Name.Function), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), + (r'[$a-zA-Z_]\w*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-f]+', Number.Hex), (r'[0-9]+', Number.Integer), @@ -271,7 +271,7 @@ class ActionScript3Lexer(RegexLexer): mimetypes = ['application/x-actionscript', 'text/x-actionscript', 'text/actionscript'] - identifier = r'[$a-zA-Z_][a-zA-Z0-9_]*' + identifier = r'[$a-zA-Z_]\w*' typeidentifier = identifier + '(?:\.<\w+>)?' flags = re.DOTALL | re.MULTILINE @@ -359,11 +359,11 @@ class CssLexer(RegexLexer): (r'\s+', Text), (r'/\*(?:.|\n)*?\*/', Comment), (r'{', Punctuation, 'content'), - (r'\:[a-zA-Z0-9_-]+', Name.Decorator), - (r'\.[a-zA-Z0-9_-]+', Name.Class), - (r'\#[a-zA-Z0-9_-]+', Name.Function), - (r'@[a-zA-Z0-9_-]+', Keyword, 'atrule'), - (r'[a-zA-Z0-9_-]+', Name.Tag), + (r'\:[\w-]+', Name.Decorator), + (r'\.[\w-]+', Name.Class), + (r'\#[\w-]+', Name.Function), + (r'@[\w-]+', Keyword, 'atrule'), + (r'[\w-]+', Name.Tag), (r'[~\^\*!%&$\[\]\(\)<>\|+=@:;,./?-]', Operator), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single) @@ -475,7 +475,7 @@ class CssLexer(RegexLexer): (r'[\[\]();]+', Punctuation), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name) + (r'[a-zA-Z_]\w*', Name) ] } @@ -595,26 +595,26 @@ class ObjectiveJLexer(RegexLexer): r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|' r'window)\b', Name.Builtin), - (r'([$a-zA-Z_][a-zA-Z0-9_]*)(' + _ws + r')(?=\()', + (r'([$a-zA-Z_]\w*)(' + _ws + r')(?=\()', bygroups(Name.Function, using(this))), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'[$a-zA-Z_]\w*', Name), ], 'classname' : [ # interface definition that inherits - (r'([a-zA-Z_][a-zA-Z0-9_]*)(' + _ws + r':' + _ws + - r')([a-zA-Z_][a-zA-Z0-9_]*)?', + (r'([a-zA-Z_]\w*)(' + _ws + r':' + _ws + + r')([a-zA-Z_]\w*)?', bygroups(Name.Class, using(this), Name.Class), '#pop'), # interface definition for a category - (r'([a-zA-Z_][a-zA-Z0-9_]*)(' + _ws + r'\()([a-zA-Z_][a-zA-Z0-9_]*)(\))', + (r'([a-zA-Z_]\w*)(' + _ws + r'\()([a-zA-Z_]\w*)(\))', bygroups(Name.Class, using(this), Name.Label, Text), '#pop'), # simple interface / implementation - (r'([a-zA-Z_][a-zA-Z0-9_]*)', Name.Class, '#pop'), + (r'([a-zA-Z_]\w*)', Name.Class, '#pop'), ], 'forward_classname' : [ - (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*,\s*)', + (r'([a-zA-Z_]\w*)(\s*,\s*)', bygroups(Name.Class, Text), '#push'), - (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*;?)', + (r'([a-zA-Z_]\w*)(\s*;?)', bygroups(Name.Class, Text), '#pop'), ], 'function_signature': [ @@ -622,26 +622,26 @@ class ObjectiveJLexer(RegexLexer): # start of a selector w/ parameters (r'(\(' + _ws + r')' # open paren - r'([a-zA-Z_][a-zA-Z0-9_]+)' # return type + r'([a-zA-Z_]\w+)' # return type r'(' + _ws + r'\)' + _ws + r')' # close paren - r'([$a-zA-Z_][a-zA-Z0-9_]+' + _ws + r':)', # function name + r'([$a-zA-Z_]\w+' + _ws + r':)', # function name bygroups(using(this), Keyword.Type, using(this), Name.Function), 'function_parameters'), # no-param function (r'(\(' + _ws + r')' # open paren - r'([a-zA-Z_][a-zA-Z0-9_]+)' # return type + r'([a-zA-Z_]\w+)' # return type r'(' + _ws + r'\)' + _ws + r')' # close paren - r'([$a-zA-Z_][a-zA-Z0-9_]+)', # function name + r'([$a-zA-Z_]\w+)', # function name bygroups(using(this), Keyword.Type, using(this), Name.Function), "#pop"), # no return type given, start of a selector w/ parameters - (r'([$a-zA-Z_][a-zA-Z0-9_]+' + _ws + r':)', # function name + (r'([$a-zA-Z_]\w+' + _ws + r':)', # function name bygroups (Name.Function), 'function_parameters'), # no return type given, no-param function - (r'([$a-zA-Z_][a-zA-Z0-9_]+)', # function name + (r'([$a-zA-Z_]\w+)', # function name bygroups(Name.Function), "#pop"), ('', Text, '#pop'), @@ -653,11 +653,11 @@ class ObjectiveJLexer(RegexLexer): (r'(\(' + _ws + ')' # open paren r'([^\)]+)' # type r'(' + _ws + r'\)' + _ws + r')' # close paren - r'([$a-zA-Z_][a-zA-Z0-9_]+)', # param name + r'([$a-zA-Z_]\w+)', # param name bygroups(using(this), Keyword.Type, using(this), Text)), # one piece of a selector name - (r'([$a-zA-Z_][a-zA-Z0-9_]+' + _ws + r':)', # function name + (r'([$a-zA-Z_]\w+' + _ws + r':)', # function name Name.Function), # smallest possible selector piece @@ -667,10 +667,10 @@ class ObjectiveJLexer(RegexLexer): (r'(,' + _ws + r'\.\.\.)', using(this)), # param name - (r'([$a-zA-Z_][a-zA-Z0-9_]+)', Text), + (r'([$a-zA-Z_]\w+)', Text), ], 'expression' : [ - (r'([$a-zA-Z_][a-zA-Z0-9_]*)(\()', bygroups(Name.Function, + (r'([$a-zA-Z_]\w*)(\()', bygroups(Name.Function, Punctuation)), (r'(\))', Punctuation, "#pop"), ], @@ -737,8 +737,8 @@ class HtmlLexer(RegexLexer): ], 'tag': [ (r'\s+', Text), - (r'[a-zA-Z0-9_:-]+\s*=', Name.Attribute, 'attr'), - (r'[a-zA-Z0-9_:-]+', Name.Attribute), + (r'[\w:-]+\s*=', Name.Attribute, 'attr'), + (r'[\w:-]+', Name.Attribute), (r'/?\s*>', Name.Tag, '#pop'), ], 'script-content': [ @@ -801,7 +801,7 @@ class PhpLexer(RegexLexer): # Note that a backslash is included in the following two patterns # PHP uses a backslash as a namespace separator - _ident_char = r'[\\_a-zA-Z0-9]|[^\x00-\x7f]' + _ident_char = r'[\\\w]|[^\x00-\x7f]' _ident_begin = r'(?:[\\_a-zA-Z]|[^\x00-\x7f])' _ident_end = r'(?:' + _ident_char + ')*' _ident_inner = _ident_begin + _ident_end @@ -1118,8 +1118,8 @@ class MxmlLexer(RegexLexer): ('<!--', Comment, 'comment'), (r'<\?.*?\?>', Comment.Preproc), ('<![^>]*>', Comment.Preproc), - (r'<\s*[a-zA-Z0-9:._-]+', Name.Tag, 'tag'), - (r'<\s*/\s*[a-zA-Z0-9:._-]+\s*>', Name.Tag), + (r'<\s*[\w:.-]+', Name.Tag, 'tag'), + (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag), ], 'comment': [ ('[^-]+', Comment), @@ -1128,7 +1128,7 @@ class MxmlLexer(RegexLexer): ], 'tag': [ (r'\s+', Text), - (r'[a-zA-Z0-9_.:-]+\s*=', Name.Attribute, 'attr'), + (r'[\w.:-]+\s*=', Name.Attribute, 'attr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'attr': [ @@ -1161,11 +1161,10 @@ class HaxeLexer(ExtendedRegexLexer): r'inline|using|null|true|false|abstract)\b') # idtype in lexer.mll - typeid = r'_*[A-Z][_a-zA-Z0-9]*' + typeid = r'_*[A-Z]\w*' # combined ident and dollar and idtype - ident = r'(?:_*[a-z][_a-zA-Z0-9]*|_+[0-9][_a-zA-Z0-9]*|' + typeid + \ - '|_+|\$[_a-zA-Z0-9]+)' + ident = r'(?:_*[a-z]\w*|_+[0-9]\w*|' + typeid + '|_+|\$\w+)' binop = (r'(?:%=|&=|\|=|\^=|\+=|\-=|\*=|/=|<<=|>\s*>\s*=|>\s*>\s*>\s*=|==|' r'!=|<=|>\s*=|&&|\|\||<<|>>>|>\s*>|\.\.\.|<|>|%|&|\||\^|\+|\*|' @@ -2066,8 +2065,8 @@ class HamlLexer(ExtendedRegexLexer): ], 'css': [ - (r'\.[a-z0-9_:-]+', Name.Class, 'tag'), - (r'\#[a-z0-9_:-]+', Name.Function, 'tag'), + (r'\.[\w:-]+', Name.Class, 'tag'), + (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ @@ -2080,7 +2079,7 @@ class HamlLexer(ExtendedRegexLexer): 'content': [ include('css'), - (r'%[a-z0-9_:-]+', Name.Tag, 'tag'), + (r'%[\w:-]+', Name.Tag, 'tag'), (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), @@ -2116,16 +2115,16 @@ class HamlLexer(ExtendedRegexLexer): 'html-attributes': [ (r'\s+', Text), - (r'[a-z0-9_:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), - (r'[a-z0-9_:-]+', Name.Attribute), + (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), + (r'[\w:-]+', Name.Attribute), (r'\)', Text, '#pop'), ], 'html-attribute-value': [ (r'[ \t]+', Text), - (r'[a-z0-9_]+', Name.Variable, '#pop'), - (r'@[a-z0-9_]+', Name.Variable.Instance, '#pop'), - (r'\$[a-z0-9_]+', Name.Variable.Global, '#pop'), + (r'\w+', Name.Variable, '#pop'), + (r'@\w+', Name.Variable.Instance, '#pop'), + (r'\$\w+', Name.Variable.Global, '#pop'), (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), ], @@ -2265,7 +2264,7 @@ common_sass_tokens = { (r'\:', Name.Decorator, 'pseudo-class'), (r'\.', Name.Class, 'class'), (r'\#', Name.Namespace, 'id'), - (r'[a-zA-Z0-9_-]+', Name.Tag), + (r'[\w-]+', Name.Tag), (r'#\{', String.Interpol, 'interpolation'), (r'&', Keyword), (r'[~\^\*!&\[\]\(\)<>\|+=@:;,./?-]', Operator), @@ -2501,12 +2500,12 @@ class CoffeeScriptLexer(RegexLexer): r'decodeURIComponent|encodeURI|encodeURIComponent|' r'eval|isFinite|isNaN|parseFloat|parseInt|document|window)\b', Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_\.:\$]*\s*[:=]\s', Name.Variable, + (r'[$a-zA-Z_][\w\.:\$]*\s*[:=]\s', Name.Variable, 'slashstartsregex'), - (r'@[$a-zA-Z_][a-zA-Z0-9_\.:\$]*\s*[:=]\s', Name.Variable.Instance, + (r'@[$a-zA-Z_][\w\.:\$]*\s*[:=]\s', Name.Variable.Instance, 'slashstartsregex'), (r'@', Name.Other, 'slashstartsregex'), - (r'@?[$a-zA-Z_][a-zA-Z0-9_\$]*', Name.Other, 'slashstartsregex'), + (r'@?[$a-zA-Z_][\w\$]*', Name.Other, 'slashstartsregex'), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), @@ -2574,13 +2573,13 @@ class KalLexer(RegexLexer): (r'#(?!##[^#]).*?\n', Comment.Single), ], 'functiondef': [ - (r'[$a-zA-Z_][a-zA-Z0-9_\$]*\s*', Name.Function, '#pop'), + (r'[$a-zA-Z_][\w\$]*\s*', Name.Function, '#pop'), include('commentsandwhitespace'), ], 'classdef': [ (r'\binherits\s+from\b', Keyword), - (r'[$a-zA-Z_][a-zA-Z0-9_\$]*\s*\n', Name.Class, '#pop'), - (r'[$a-zA-Z_][a-zA-Z0-9_\$]*\s*', Name.Class), + (r'[$a-zA-Z_][\w\$]*\s*\n', Name.Class, '#pop'), + (r'[$a-zA-Z_][\w\$]*\s*', Name.Class), include('commentsandwhitespace'), ], 'listcomprehension': [ @@ -2609,7 +2608,7 @@ class KalLexer(RegexLexer): (r'\b(function|method|task)\b', Keyword.Declaration, 'functiondef'), (r'\bclass\b', Keyword.Declaration, 'classdef'), (r'\b(safe\s+)?wait\s+for\b', Keyword, 'waitfor'), - (r'\b(me|this)(\.[$a-zA-Z_][a-zA-Z0-9_\.\$]*)?\b', Name.Variable.Instance), + (r'\b(me|this)(\.[$a-zA-Z_][\w\.\$]*)?\b', Name.Variable.Instance), (r'(?<![\.\$])(for(\s+(parallel|series))?|in|of|while|until|' r'break|return|continue|' r'when|if|unless|else|otherwise|except\s+when|' @@ -2625,7 +2624,7 @@ class KalLexer(RegexLexer): r'eval|isFinite|isNaN|parseFloat|parseInt|document|window|' r'print)\b', Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_\.\$]*\s*(:|[\+\-\*\/]?\=)?\b', Name.Variable), + (r'[$a-zA-Z_][\w\.\$]*\s*(:|[\+\-\*\/]?\=)?\b', Name.Variable), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), @@ -2728,12 +2727,12 @@ class LiveScriptLexer(RegexLexer): r'decodeURIComponent|encodeURI|encodeURIComponent|' r'eval|isFinite|isNaN|parseFloat|parseInt|document|window)\b', Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_\.\-:\$]*\s*[:=]\s', Name.Variable, + (r'[$a-zA-Z_][\w\.\-:\$]*\s*[:=]\s', Name.Variable, 'slashstartsregex'), - (r'@[$a-zA-Z_][a-zA-Z0-9_\.\-:\$]*\s*[:=]\s', Name.Variable.Instance, + (r'@[$a-zA-Z_][\w\.\-:\$]*\s*[:=]\s', Name.Variable.Instance, 'slashstartsregex'), (r'@', Name.Other, 'slashstartsregex'), - (r'@?[$a-zA-Z_][a-zA-Z0-9_\-]*', Name.Other, 'slashstartsregex'), + (r'@?[$a-zA-Z_][\w\-]*', Name.Other, 'slashstartsregex'), (r'[0-9]+\.[0-9]+([eE][0-9]+)?[fd]?(?:[a-zA-Z_]+)?', Number.Float), (r'[0-9]+(~[0-9a-z]+)?(?:[a-zA-Z_]+)?', Number.Integer), ('"""', String, 'tdqs'), @@ -3729,8 +3728,8 @@ class DartLexer(RegexLexer): (r'\b(bool|double|Dynamic|int|num|Object|String|void)\b', Keyword.Type), (r'\b(false|null|true)\b', Keyword.Constant), (r'[~!%^&*+=|?:<>/-]|as\b', Operator), - (r'[a-zA-Z_$][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_$][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_$]\w*:', Name.Label), + (r'[a-zA-Z_$]\w*', Name), (r'[(){}\[\],.;]', Punctuation), (r'0[xX][0-9a-fA-F]+', Number.Hex), # DIGIT+ (‘.’ DIGIT*)? EXPONENT? @@ -3740,13 +3739,13 @@ class DartLexer(RegexLexer): # pseudo-keyword negate intentionally left out ], 'class': [ - (r'[a-zA-Z_$][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_$]\w*', Name.Class, '#pop') ], 'import_decl': [ include('string_literal'), (r'\s+', Text), (r'\b(as|show|hide)\b', Keyword), - (r'[a-zA-Z_$][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_$]\w*', Name), (r'\,', Punctuation), (r'\;', Punctuation, '#pop') ], @@ -3765,7 +3764,7 @@ class DartLexer(RegexLexer): 'string_common': [ (r"\\(x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|u\{[0-9A-Fa-f]*\}|[a-z\'\"$\\])", String.Escape), - (r'(\$)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(String.Interpol, Name)), + (r'(\$)([a-zA-Z_]\w*)', bygroups(String.Interpol, Name)), (r'(\$\{)(.*?)(\})', bygroups(String.Interpol, using(this), String.Interpol)) ], @@ -3848,7 +3847,7 @@ class TypeScriptLexer(RegexLexer): r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|' r'window)\b', Name.Builtin), # Match stuff like: module name {...} - (r'\b(module)(\s*)(\s*[a-zA-Z0-9_?.$][\w?.$]*)(\s*)', + (r'\b(module)(\s*)(\s*[\w?.$][\w?.$]*)(\s*)', bygroups(Keyword.Reserved, Text, Name.Other, Text), 'slashstartsregex'), # Match variable type keywords (r'\b(string|bool|number)\b', Keyword.Type), @@ -3860,9 +3859,9 @@ class TypeScriptLexer(RegexLexer): # Match stuff like: function() {...} (r'([a-zA-Z_?.$][\w?.$]*)\(\) \{', Name.Other, 'slashstartsregex'), # Match stuff like: (function: return type) - (r'([a-zA-Z0-9_?.$][\w?.$]*)(\s*:\s*)([a-zA-Z0-9_?.$][\w?.$]*)', + (r'([\w?.$][\w?.$]*)(\s*:\s*)([\w?.$][\w?.$]*)', bygroups(Name.Other, Text, Keyword.Type)), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), + (r'[$a-zA-Z_]\w*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), @@ -4195,7 +4194,7 @@ class QmlLexer(RegexLexer): r'decodeURIComponent|encodeURI|encodeURIComponent|' r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|' r'window)\b', Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), + (r'[$a-zA-Z_]\w*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), @@ -4418,19 +4417,20 @@ class ZephirLexer(RegexLexer): r'(<<|>>>?|==?|!=?|->|[-<>+*%&\|\^/])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), - (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|require|inline|' - r'throw|try|catch|finally|new|delete|typeof|instanceof|void|namespace|use|extends|' - r'this|fetch|isset|unset|echo|fetch|likely|unlikely|empty)\b', Keyword, 'slashstartsregex'), + (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|' + r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|' + r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|' + r'empty)\b', Keyword, 'slashstartsregex'), (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), - (r'(abstract|boolean|bool|char|class|const|double|enum|export|' - r'extends|final|float|goto|implements|import|int|string|interface|long|ulong|char|uchar|native|unsigned|' - r'private|protected|public|short|static|self|throws|reverse|' + (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|' + r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|' + r'float|unsigned|private|protected|public|short|static|self|throws|reverse|' r'transient|volatile)\b', Keyword.Reserved), (r'(true|false|null|undefined)\b', Keyword.Constant), (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|' r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|' r'window)\b', Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_\\]*', Name.Other), + (r'[$a-zA-Z_][\w\\]*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), diff --git a/tests/examplefiles/vimrc b/tests/examplefiles/vimrc new file mode 100644 index 00000000..d2f9cd1b --- /dev/null +++ b/tests/examplefiles/vimrc @@ -0,0 +1,21 @@ +" A comment + +:py print "py" +::pyt print 'pyt' + pyth print '''pyth''' + : pytho print "pytho" +python print """python""" + + : : python<<E OF +print """my script""" + +def MyFunc(str): + """ My Function """ + print str +E OF + +let py = 42 +echo py + +let foo = 42 +echo foo diff --git a/tests/test_clexer.py b/tests/test_clexer.py index 5d251d2e..188569dc 100644 --- a/tests/test_clexer.py +++ b/tests/test_clexer.py @@ -43,7 +43,7 @@ class CLexerTest(unittest.TestCase): } } ''' - expected = [ + tokens = [ (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), @@ -85,7 +85,7 @@ class CLexerTest(unittest.TestCase): (Token.Text, u'\n'), (Token.Text, u''), ] - self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) + self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testSwitchSpaceBeforeColon(self): fragment = u'''\ @@ -99,7 +99,7 @@ class CLexerTest(unittest.TestCase): } } ''' - expected = [ + tokens = [ (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), @@ -143,7 +143,7 @@ class CLexerTest(unittest.TestCase): (Token.Text, u'\n'), (Token.Text, u''), ] - self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) + self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testLabel(self): fragment = u'''\ @@ -153,7 +153,7 @@ class CLexerTest(unittest.TestCase): goto foo; } ''' - expected = [ + tokens = [ (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), @@ -178,7 +178,7 @@ class CLexerTest(unittest.TestCase): (Token.Text, u'\n'), (Token.Text, u''), ] - self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) + self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testLabelSpaceBeforeColon(self): fragment = u'''\ @@ -188,7 +188,7 @@ class CLexerTest(unittest.TestCase): goto foo; } ''' - expected = [ + tokens = [ (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), @@ -214,7 +214,7 @@ class CLexerTest(unittest.TestCase): (Token.Text, u'\n'), (Token.Text, u''), ] - self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) + self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testLabelFollowedByStatement(self): fragment = u'''\ @@ -224,7 +224,7 @@ class CLexerTest(unittest.TestCase): goto foo; } ''' - expected = [ + tokens = [ (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), @@ -253,4 +253,4 @@ class CLexerTest(unittest.TestCase): (Token.Text, u'\n'), (Token.Text, u''), ] - self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) + self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) |