diff options
Diffstat (limited to 'pygments/lexers/agile.py')
-rw-r--r-- | pygments/lexers/agile.py | 317 |
1 files changed, 203 insertions, 114 deletions
diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 0a780a3e..cd105126 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -23,7 +23,8 @@ __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', 'Python3Lexer', 'Python3TracebackLexer', 'RubyLexer', 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', 'MoonScriptLexer', 'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer', - 'FancyLexer', 'DgLexer', 'Perl6Lexer', 'HyLexer'] + 'FancyLexer', 'DgLexer', 'Perl6Lexer', 'HyLexer', + 'ChaiscriptLexer'] # b/w compatibility from pygments.lexers.functional import SchemeLexer @@ -118,20 +119,20 @@ class PythonLexer(RegexLexer): ('`.*?`', String.Backtick), ], 'name': [ - (r'@[a-zA-Z0-9_.]+', Name.Decorator), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'@[\w.]+', Name.Decorator), + ('[a-zA-Z_]\w*', Name), ], 'funcname': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop') + ('[a-zA-Z_]\w*', Name.Function, '#pop') ], 'classname': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + ('[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ (r'(?:[ \t]|\\\n)+', Text), (r'as\b', Keyword.Namespace), (r',', Operator), - (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace), + (r'[a-zA-Z_][\w.]*', Name.Namespace), (r'', Text, '#pop') # all else: go back ], 'fromimport': [ @@ -141,7 +142,7 @@ class PythonLexer(RegexLexer): # never be a module name (r'None\b', Name.Builtin.Pseudo, '#pop'), # sadly, in "raise x from y" y will be highlighted as namespace too - (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace), + (r'[a-zA-Z_.][\w.]*', Name.Namespace), # anything else here also means "raise x from y" and is therefore # not an error (r'', Text, '#pop'), @@ -151,7 +152,7 @@ class PythonLexer(RegexLexer): r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'strings': [ - (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time @@ -254,7 +255,7 @@ class Python3Lexer(RegexLexer): ] tokens['backtick'] = [] tokens['name'] = [ - (r'@[a-zA-Z0-9_]+', Name.Decorator), + (r'@\w+', Name.Decorator), (uni_name, Name), ] tokens['funcname'] = [ @@ -405,7 +406,7 @@ class PythonTracebackLexer(RegexLexer): bygroups(Text, Comment, Text)), # for doctests... (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), - (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)', + (r'^([a-zA-Z_]\w*)(:?\n)', bygroups(Generic.Error, Text), '#pop') ], } @@ -444,7 +445,7 @@ class Python3TracebackLexer(RegexLexer): bygroups(Text, Comment, Text)), # for doctests... (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), - (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)', + (r'^([a-zA-Z_]\w*)(:?\n)', bygroups(Generic.Error, Text), '#pop') ], } @@ -534,7 +535,7 @@ class RubyLexer(ExtendedRegexLexer): (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), (r':"', String.Symbol, 'simple-sym'), - (r'([a-zA-Z_][a-zA-Z0-9_]*)(:)(?!:)', + (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9 (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), @@ -620,8 +621,8 @@ class RubyLexer(ExtendedRegexLexer): r'rescue|raise|retry|return|super|then|undef|unless|until|when|' r'while|yield)\b', Keyword), # start of function, class and module names - (r'(module)(\s+)([a-zA-Z_][a-zA-Z0-9_]*' - r'(?:::[a-zA-Z_][a-zA-Z0-9_]*)*)', + (r'(module)(\s+)([a-zA-Z_]\w*' + r'(?:::[a-zA-Z_]\w*)*)', bygroups(Keyword, Text, Name.Namespace)), (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'), @@ -712,9 +713,9 @@ class RubyLexer(ExtendedRegexLexer): (r'([\d]+(?:_\d+)*)(\s*)([/?])?', bygroups(Number.Integer, Text, Operator)), # Names - (r'@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class), - (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance), - (r'\$[a-zA-Z0-9_]+', Name.Variable.Global), + (r'@@[a-zA-Z_]\w*', Name.Variable.Class), + (r'@[a-zA-Z_]\w*', Name.Variable.Instance), + (r'\$\w+', Name.Variable.Global), (r'\$[!@&`\'+~=/\\,;.<>_*$?:"]', Name.Variable.Global), (r'\$-[0adFiIlpvw]', Name.Variable.Global), (r'::', Operator), @@ -724,7 +725,7 @@ class RubyLexer(ExtendedRegexLexer): r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)' r'(?!\w)', String.Char), - (r'[A-Z][a-zA-Z0-9_]+', Name.Constant), + (r'[A-Z]\w+', Name.Constant), # this is needed because ruby attributes can look # like keywords (class) or like this: ` ?!? (r'(\.|::)([a-zA-Z_]\w*[\!\?]?|[*%&^`~+-/\[<>=])', @@ -738,7 +739,7 @@ class RubyLexer(ExtendedRegexLexer): ], 'funcname': [ (r'\(', Punctuation, 'defexpr'), - (r'(?:([a-zA-Z_][a-zA-Z0-9_]*)(\.))?' + (r'(?:([a-zA-Z_]\w*)(\.))?' r'([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|' r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', bygroups(Name.Class, Operator, Name.Function), '#pop'), @@ -761,8 +762,8 @@ class RubyLexer(ExtendedRegexLexer): ], 'string-intp': [ (r'#{', String.Interpol, 'in-intp'), - (r'#@@?[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol), - (r'#\$[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol) + (r'#@@?[a-zA-Z_]\w*', String.Interpol), + (r'#\$[a-zA-Z_]\w*', String.Interpol) ], 'string-intp-escaped': [ include('string-intp'), @@ -813,7 +814,7 @@ class RubyConsoleLexer(Lexer): aliases = ['rbcon', 'irb'] mimetypes = ['text/x-ruby-shellsession'] - _prompt_re = re.compile('irb\([a-zA-Z_][a-zA-Z0-9_]*\):\d{3}:\d+[>*"\'] ' + _prompt_re = re.compile('irb\([a-zA-Z_]\w*\):\d{3}:\d+[>*"\'] ' '|>> |\?> ') def get_tokens_unprocessed(self, text): @@ -874,7 +875,7 @@ class PerlLexer(RegexLexer): (r'(case|continue|do|else|elsif|for|foreach|if|last|my|' r'next|our|redo|reset|then|unless|until|while|use|' r'print|new|BEGIN|CHECK|INIT|END|return)\b', Keyword), - (r'(format)(\s+)([a-zA-Z0-9_]+)(\s*)(=)(\s*\n)', + (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)', bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'), (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word), # common delimiters @@ -927,7 +928,7 @@ class PerlLexer(RegexLexer): r'utime|values|vec|wait|waitpid|wantarray|warn|write' r')\b', Name.Builtin), (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo), - (r'<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n', String), + (r'<<([\'"]?)([a-zA-Z_]\w*)\1;?\n.*?\n\2\n', String), (r'__END__', Comment.Preproc, 'end-part'), (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global), (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global), @@ -965,14 +966,14 @@ class PerlLexer(RegexLexer): (r'\s+', Text), (r'\{', Punctuation, '#pop'), # hash syntax? (r'\)|,', Punctuation, '#pop'), # argument specifier - (r'[a-zA-Z0-9_]+::', Name.Namespace), - (r'[a-zA-Z0-9_:]+', Name.Variable, '#pop'), + (r'\w+::', Name.Namespace), + (r'[\w:]+', Name.Variable, '#pop'), ], 'name': [ - (r'[a-zA-Z0-9_]+::', Name.Namespace), - (r'[a-zA-Z0-9_:]+', Name, '#pop'), - (r'[A-Z_]+(?=[^a-zA-Z0-9_])', Name.Constant, '#pop'), - (r'(?=[^a-zA-Z0-9_])', Text, '#pop'), + (r'\w+::', Name.Namespace), + (r'[\w:]+', Name, '#pop'), + (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'), + (r'(?=\W)', Text, '#pop'), ], 'modulename': [ (r'[a-zA-Z_]\w*', Name.Namespace, '#pop') @@ -1084,7 +1085,7 @@ class LuaLexer(RegexLexer): (r'(function)\b', Keyword, 'funcname'), - (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name), + (r'[A-Za-z_]\w*(\.[A-Za-z_]\w*)?', Name), ("'", String.Single, combined('stringescape', 'sqs')), ('"', String.Double, combined('stringescape', 'dqs')) @@ -1092,7 +1093,7 @@ class LuaLexer(RegexLexer): 'funcname': [ (r'\s+', Text), - ('(?:([A-Za-z_][A-Za-z0-9_]*)(\.))?([A-Za-z_][A-Za-z0-9_]*)', + ('(?:([A-Za-z_]\w*)(\.))?([A-Za-z_]\w*)', bygroups(Name.Class, Punctuation, Name.Function), '#pop'), # inline function ('\(', Punctuation, '#pop'), @@ -1175,20 +1176,20 @@ class MoonScriptLexer(LuaLexer): (r'[^\S\n]+', Text), (r'(?s)\[(=*)\[.*?\]\1\]', String), (r'(->|=>)', Name.Function), - (r':[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), + (r':[a-zA-Z_]\w*', Name.Variable), (r'(==|!=|~=|<=|>=|\.\.\.|\.\.|[=+\-*/%^<>#!.\\:])', Operator), (r'[;,]', Punctuation), (r'[\[\]\{\}\(\)]', Keyword.Type), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Variable), + (r'[a-zA-Z_]\w*:', Name.Variable), (r"(class|extends|if|then|super|do|with|import|export|" r"while|elseif|return|for|in|from|when|using|else|" r"and|or|not|switch|break)\b", Keyword), (r'(true|false|nil)\b', Keyword.Constant), (r'(and|or|not)\b', Operator.Word), (r'(self)\b', Name.Builtin.Pseudo), - (r'@@?([a-zA-Z_][a-zA-Z0-9_]*)?', Name.Variable.Class), + (r'@@?([a-zA-Z_]\w*)?', Name.Variable.Class), (r'[A-Z]\w*', Name.Class), # proper name - (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name), + (r'[A-Za-z_]\w*(\.[A-Za-z_]\w*)?', Name), ("'", String.Single, combined('stringescape', 'sqs')), ('"', String.Double, combined('stringescape', 'dqs')) ], @@ -1319,7 +1320,7 @@ class IoLexer(RegexLexer): # names (r'(Object|list|List|Map|args|Sequence|Coroutine|File)\b', Name.Builtin), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), # numbers (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), (r'\d+', Number.Integer) @@ -1720,7 +1721,7 @@ class FactorLexer(RegexLexer): (r'"""\s+(?:.|\n)*?\s+"""', String), (r'"(?:\\\\|\\"|[^"])*"', String), (r'\S+"\s+(?:\\\\|\\"|[^"])*"', String), - (r'CHAR:\s+(\\[\\abfnrstv]|[^\\]\S+)\s', String.Char), + (r'CHAR:\s+(?:\\[\\abfnrstv]|[^\\]\S*)\s', String.Char), # comments (r'!\s+.*$', Comment), @@ -1728,21 +1729,21 @@ class FactorLexer(RegexLexer): (r'/\*\s+(?:.|\n)*?\s\*/\s', Comment), # boolean constants - (r'(t|f)\s', Name.Constant), + (r'[tf]\s', Name.Constant), # symbols and literals (r'[\\$]\s+\S+', Name.Constant), (r'M\\\s+\S+\s+\S+', Name.Constant), # numbers - (r'[+-]?([\d,]*\d)?\.(\d([\d,]*\d)?)?([eE][+-]?\d+)?\s', Number), - (r'[+-]?\d([\d,]*\d)?([eE][+-]?\d+)?\s', Number), - (r'0x[a-fA-F\d]([a-fA-F\d,]*[a-fA-F\d])?(p\d([\d,]*\d)?)?\s', Number), - (r'NAN:\s+[a-fA-F\d]([a-fA-F\d,]*[a-fA-F\d])?(p\d([\d,]*\d)?)?\s', Number), + (r'[+-]?(?:[\d,]*\d)?\.(?:\d([\d,]*\d)?)?(?:[eE][+-]?\d+)?\s', Number), + (r'[+-]?\d(?:[\d,]*\d)?(?:[eE][+-]?\d+)?\s', Number), + (r'0x[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s', Number), + (r'NAN:\s+[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s', Number), (r'0b[01]+\s', Number), (r'0o[0-7]+\s', Number), - (r'(\d([\d,]*\d)?)?\+\d([\d,]*\d)?/\d([\d,]*\d)?\s', Number), - (r'(\-\d([\d,]*\d)?)?\-\d([\d,]*\d)?/\d([\d,]*\d)?\s', Number), + (r'(?:\d([\d,]*\d)?)?\+\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number), + (r'(?:\-\d([\d,]*\d)?)?\-\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number), # keywords (r'(?:deprecated|final|foldable|flushable|inline|recursive)\s', @@ -1856,14 +1857,14 @@ class FancyLexer(RegexLexer): r'FalseClass|Tuple|Symbol|Stack|Set|FancySpec|Method|Package|' r'Range)\b', Name.Builtin), # functions - (r'[a-zA-Z]([a-zA-Z0-9_]|[-+?!=*/^><%])*:', Name.Function), + (r'[a-zA-Z](\w|[-+?!=*/^><%])*:', Name.Function), # operators, must be below functions (r'[-+*/~,<>=&!?%^\[\]\.$]+', Operator), - ('[A-Z][a-zA-Z0-9_]*', Name.Constant), - ('@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance), - ('@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class), + ('[A-Z]\w*', Name.Constant), + ('@[a-zA-Z_]\w*', Name.Variable.Instance), + ('@@[a-zA-Z_]\w*', Name.Variable.Class), ('@@?', Operator), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), # numbers - / checks are necessary to avoid mismarking regexes, # see comment in RubyLexer (r'(0[oO]?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?', @@ -1948,7 +1949,7 @@ class DgLexer(RegexLexer): r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'string': [ - (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time @@ -1986,7 +1987,7 @@ class Perl6Lexer(ExtendedRegexLexer): mimetypes = ['text/x-perl6', 'application/x-perl6'] flags = re.MULTILINE | re.DOTALL | re.UNICODE - PERL6_IDENTIFIER_RANGE = "['a-zA-Z0-9_:-]" # if you alter this, search for a copy made of it below + PERL6_IDENTIFIER_RANGE = "['\w:-]" PERL6_KEYWORDS = ( 'BEGIN', 'CATCH', 'CHECK', 'CONTROL', 'END', 'ENTER', 'FIRST', 'INIT', @@ -2077,62 +2078,80 @@ class Perl6Lexer(ExtendedRegexLexer): # Perl 6 has a *lot* of possible bracketing characters # this list was lifted from STD.pm6 (https://github.com/perl6/std) PERL6_BRACKETS = { - u'\u0028' : u'\u0029', u'\u003c' : u'\u003e', u'\u005b' : u'\u005d', u'\u007b' : u'\u007d', - u'\u00ab' : u'\u00bb', u'\u0f3a' : u'\u0f3b', u'\u0f3c' : u'\u0f3d', u'\u169b' : u'\u169c', - u'\u2018' : u'\u2019', u'\u201a' : u'\u2019', u'\u201b' : u'\u2019', u'\u201c' : u'\u201d', - u'\u201e' : u'\u201d', u'\u201f' : u'\u201d', u'\u2039' : u'\u203a', u'\u2045' : u'\u2046', - u'\u207d' : u'\u207e', u'\u208d' : u'\u208e', u'\u2208' : u'\u220b', u'\u2209' : u'\u220c', - u'\u220a' : u'\u220d', u'\u2215' : u'\u29f5', u'\u223c' : u'\u223d', u'\u2243' : u'\u22cd', - u'\u2252' : u'\u2253', u'\u2254' : u'\u2255', u'\u2264' : u'\u2265', u'\u2266' : u'\u2267', - u'\u2268' : u'\u2269', u'\u226a' : u'\u226b', u'\u226e' : u'\u226f', u'\u2270' : u'\u2271', - u'\u2272' : u'\u2273', u'\u2274' : u'\u2275', u'\u2276' : u'\u2277', u'\u2278' : u'\u2279', - u'\u227a' : u'\u227b', u'\u227c' : u'\u227d', u'\u227e' : u'\u227f', u'\u2280' : u'\u2281', - u'\u2282' : u'\u2283', u'\u2284' : u'\u2285', u'\u2286' : u'\u2287', u'\u2288' : u'\u2289', - u'\u228a' : u'\u228b', u'\u228f' : u'\u2290', u'\u2291' : u'\u2292', u'\u2298' : u'\u29b8', - u'\u22a2' : u'\u22a3', u'\u22a6' : u'\u2ade', u'\u22a8' : u'\u2ae4', u'\u22a9' : u'\u2ae3', - u'\u22ab' : u'\u2ae5', u'\u22b0' : u'\u22b1', u'\u22b2' : u'\u22b3', u'\u22b4' : u'\u22b5', - u'\u22b6' : u'\u22b7', u'\u22c9' : u'\u22ca', u'\u22cb' : u'\u22cc', u'\u22d0' : u'\u22d1', - u'\u22d6' : u'\u22d7', u'\u22d8' : u'\u22d9', u'\u22da' : u'\u22db', u'\u22dc' : u'\u22dd', - u'\u22de' : u'\u22df', u'\u22e0' : u'\u22e1', u'\u22e2' : u'\u22e3', u'\u22e4' : u'\u22e5', - u'\u22e6' : u'\u22e7', u'\u22e8' : u'\u22e9', u'\u22ea' : u'\u22eb', u'\u22ec' : u'\u22ed', - u'\u22f0' : u'\u22f1', u'\u22f2' : u'\u22fa', u'\u22f3' : u'\u22fb', u'\u22f4' : u'\u22fc', - u'\u22f6' : u'\u22fd', u'\u22f7' : u'\u22fe', u'\u2308' : u'\u2309', u'\u230a' : u'\u230b', - u'\u2329' : u'\u232a', u'\u23b4' : u'\u23b5', u'\u2768' : u'\u2769', u'\u276a' : u'\u276b', - u'\u276c' : u'\u276d', u'\u276e' : u'\u276f', u'\u2770' : u'\u2771', u'\u2772' : u'\u2773', - u'\u2774' : u'\u2775', u'\u27c3' : u'\u27c4', u'\u27c5' : u'\u27c6', u'\u27d5' : u'\u27d6', - u'\u27dd' : u'\u27de', u'\u27e2' : u'\u27e3', u'\u27e4' : u'\u27e5', u'\u27e6' : u'\u27e7', - u'\u27e8' : u'\u27e9', u'\u27ea' : u'\u27eb', u'\u2983' : u'\u2984', u'\u2985' : u'\u2986', - u'\u2987' : u'\u2988', u'\u2989' : u'\u298a', u'\u298b' : u'\u298c', u'\u298d' : u'\u298e', - u'\u298f' : u'\u2990', u'\u2991' : u'\u2992', u'\u2993' : u'\u2994', u'\u2995' : u'\u2996', - u'\u2997' : u'\u2998', u'\u29c0' : u'\u29c1', u'\u29c4' : u'\u29c5', u'\u29cf' : u'\u29d0', - u'\u29d1' : u'\u29d2', u'\u29d4' : u'\u29d5', u'\u29d8' : u'\u29d9', u'\u29da' : u'\u29db', - u'\u29f8' : u'\u29f9', u'\u29fc' : u'\u29fd', u'\u2a2b' : u'\u2a2c', u'\u2a2d' : u'\u2a2e', - u'\u2a34' : u'\u2a35', u'\u2a3c' : u'\u2a3d', u'\u2a64' : u'\u2a65', u'\u2a79' : u'\u2a7a', - u'\u2a7d' : u'\u2a7e', u'\u2a7f' : u'\u2a80', u'\u2a81' : u'\u2a82', u'\u2a83' : u'\u2a84', - u'\u2a8b' : u'\u2a8c', u'\u2a91' : u'\u2a92', u'\u2a93' : u'\u2a94', u'\u2a95' : u'\u2a96', - u'\u2a97' : u'\u2a98', u'\u2a99' : u'\u2a9a', u'\u2a9b' : u'\u2a9c', u'\u2aa1' : u'\u2aa2', - u'\u2aa6' : u'\u2aa7', u'\u2aa8' : u'\u2aa9', u'\u2aaa' : u'\u2aab', u'\u2aac' : u'\u2aad', - u'\u2aaf' : u'\u2ab0', u'\u2ab3' : u'\u2ab4', u'\u2abb' : u'\u2abc', u'\u2abd' : u'\u2abe', - u'\u2abf' : u'\u2ac0', u'\u2ac1' : u'\u2ac2', u'\u2ac3' : u'\u2ac4', u'\u2ac5' : u'\u2ac6', - u'\u2acd' : u'\u2ace', u'\u2acf' : u'\u2ad0', u'\u2ad1' : u'\u2ad2', u'\u2ad3' : u'\u2ad4', - u'\u2ad5' : u'\u2ad6', u'\u2aec' : u'\u2aed', u'\u2af7' : u'\u2af8', u'\u2af9' : u'\u2afa', - u'\u2e02' : u'\u2e03', u'\u2e04' : u'\u2e05', u'\u2e09' : u'\u2e0a', u'\u2e0c' : u'\u2e0d', - u'\u2e1c' : u'\u2e1d', u'\u2e20' : u'\u2e21', u'\u3008' : u'\u3009', u'\u300a' : u'\u300b', - u'\u300c' : u'\u300d', u'\u300e' : u'\u300f', u'\u3010' : u'\u3011', u'\u3014' : u'\u3015', - u'\u3016' : u'\u3017', u'\u3018' : u'\u3019', u'\u301a' : u'\u301b', u'\u301d' : u'\u301e', - u'\ufd3e' : u'\ufd3f', u'\ufe17' : u'\ufe18', u'\ufe35' : u'\ufe36', u'\ufe37' : u'\ufe38', - u'\ufe39' : u'\ufe3a', u'\ufe3b' : u'\ufe3c', u'\ufe3d' : u'\ufe3e', u'\ufe3f' : u'\ufe40', - u'\ufe41' : u'\ufe42', u'\ufe43' : u'\ufe44', u'\ufe47' : u'\ufe48', u'\ufe59' : u'\ufe5a', - u'\ufe5b' : u'\ufe5c', u'\ufe5d' : u'\ufe5e', u'\uff08' : u'\uff09', u'\uff1c' : u'\uff1e', - u'\uff3b' : u'\uff3d', u'\uff5b' : u'\uff5d', u'\uff5f' : u'\uff60', u'\uff62' : u'\uff63', + u'\u0028' : u'\u0029', u'\u003c' : u'\u003e', u'\u005b' : u'\u005d', + u'\u007b' : u'\u007d', u'\u00ab' : u'\u00bb', u'\u0f3a' : u'\u0f3b', + u'\u0f3c' : u'\u0f3d', u'\u169b' : u'\u169c', u'\u2018' : u'\u2019', + u'\u201a' : u'\u2019', u'\u201b' : u'\u2019', u'\u201c' : u'\u201d', + u'\u201e' : u'\u201d', u'\u201f' : u'\u201d', u'\u2039' : u'\u203a', + u'\u2045' : u'\u2046', u'\u207d' : u'\u207e', u'\u208d' : u'\u208e', + u'\u2208' : u'\u220b', u'\u2209' : u'\u220c', u'\u220a' : u'\u220d', + u'\u2215' : u'\u29f5', u'\u223c' : u'\u223d', u'\u2243' : u'\u22cd', + u'\u2252' : u'\u2253', u'\u2254' : u'\u2255', u'\u2264' : u'\u2265', + u'\u2266' : u'\u2267', u'\u2268' : u'\u2269', u'\u226a' : u'\u226b', + u'\u226e' : u'\u226f', u'\u2270' : u'\u2271', u'\u2272' : u'\u2273', + u'\u2274' : u'\u2275', u'\u2276' : u'\u2277', u'\u2278' : u'\u2279', + u'\u227a' : u'\u227b', u'\u227c' : u'\u227d', u'\u227e' : u'\u227f', + u'\u2280' : u'\u2281', u'\u2282' : u'\u2283', u'\u2284' : u'\u2285', + u'\u2286' : u'\u2287', u'\u2288' : u'\u2289', u'\u228a' : u'\u228b', + u'\u228f' : u'\u2290', u'\u2291' : u'\u2292', u'\u2298' : u'\u29b8', + u'\u22a2' : u'\u22a3', u'\u22a6' : u'\u2ade', u'\u22a8' : u'\u2ae4', + u'\u22a9' : u'\u2ae3', u'\u22ab' : u'\u2ae5', u'\u22b0' : u'\u22b1', + u'\u22b2' : u'\u22b3', u'\u22b4' : u'\u22b5', u'\u22b6' : u'\u22b7', + u'\u22c9' : u'\u22ca', u'\u22cb' : u'\u22cc', u'\u22d0' : u'\u22d1', + u'\u22d6' : u'\u22d7', u'\u22d8' : u'\u22d9', u'\u22da' : u'\u22db', + u'\u22dc' : u'\u22dd', u'\u22de' : u'\u22df', u'\u22e0' : u'\u22e1', + u'\u22e2' : u'\u22e3', u'\u22e4' : u'\u22e5', u'\u22e6' : u'\u22e7', + u'\u22e8' : u'\u22e9', u'\u22ea' : u'\u22eb', u'\u22ec' : u'\u22ed', + u'\u22f0' : u'\u22f1', u'\u22f2' : u'\u22fa', u'\u22f3' : u'\u22fb', + u'\u22f4' : u'\u22fc', u'\u22f6' : u'\u22fd', u'\u22f7' : u'\u22fe', + u'\u2308' : u'\u2309', u'\u230a' : u'\u230b', u'\u2329' : u'\u232a', + u'\u23b4' : u'\u23b5', u'\u2768' : u'\u2769', u'\u276a' : u'\u276b', + u'\u276c' : u'\u276d', u'\u276e' : u'\u276f', u'\u2770' : u'\u2771', + u'\u2772' : u'\u2773', u'\u2774' : u'\u2775', u'\u27c3' : u'\u27c4', + u'\u27c5' : u'\u27c6', u'\u27d5' : u'\u27d6', u'\u27dd' : u'\u27de', + u'\u27e2' : u'\u27e3', u'\u27e4' : u'\u27e5', u'\u27e6' : u'\u27e7', + u'\u27e8' : u'\u27e9', u'\u27ea' : u'\u27eb', u'\u2983' : u'\u2984', + u'\u2985' : u'\u2986', u'\u2987' : u'\u2988', u'\u2989' : u'\u298a', + u'\u298b' : u'\u298c', u'\u298d' : u'\u298e', u'\u298f' : u'\u2990', + u'\u2991' : u'\u2992', u'\u2993' : u'\u2994', u'\u2995' : u'\u2996', + u'\u2997' : u'\u2998', u'\u29c0' : u'\u29c1', u'\u29c4' : u'\u29c5', + u'\u29cf' : u'\u29d0', u'\u29d1' : u'\u29d2', u'\u29d4' : u'\u29d5', + u'\u29d8' : u'\u29d9', u'\u29da' : u'\u29db', u'\u29f8' : u'\u29f9', + u'\u29fc' : u'\u29fd', u'\u2a2b' : u'\u2a2c', u'\u2a2d' : u'\u2a2e', + u'\u2a34' : u'\u2a35', u'\u2a3c' : u'\u2a3d', u'\u2a64' : u'\u2a65', + u'\u2a79' : u'\u2a7a', u'\u2a7d' : u'\u2a7e', u'\u2a7f' : u'\u2a80', + u'\u2a81' : u'\u2a82', u'\u2a83' : u'\u2a84', u'\u2a8b' : u'\u2a8c', + u'\u2a91' : u'\u2a92', u'\u2a93' : u'\u2a94', u'\u2a95' : u'\u2a96', + u'\u2a97' : u'\u2a98', u'\u2a99' : u'\u2a9a', u'\u2a9b' : u'\u2a9c', + u'\u2aa1' : u'\u2aa2', u'\u2aa6' : u'\u2aa7', u'\u2aa8' : u'\u2aa9', + u'\u2aaa' : u'\u2aab', u'\u2aac' : u'\u2aad', u'\u2aaf' : u'\u2ab0', + u'\u2ab3' : u'\u2ab4', u'\u2abb' : u'\u2abc', u'\u2abd' : u'\u2abe', + u'\u2abf' : u'\u2ac0', u'\u2ac1' : u'\u2ac2', u'\u2ac3' : u'\u2ac4', + u'\u2ac5' : u'\u2ac6', u'\u2acd' : u'\u2ace', u'\u2acf' : u'\u2ad0', + u'\u2ad1' : u'\u2ad2', u'\u2ad3' : u'\u2ad4', u'\u2ad5' : u'\u2ad6', + u'\u2aec' : u'\u2aed', u'\u2af7' : u'\u2af8', u'\u2af9' : u'\u2afa', + u'\u2e02' : u'\u2e03', u'\u2e04' : u'\u2e05', u'\u2e09' : u'\u2e0a', + u'\u2e0c' : u'\u2e0d', u'\u2e1c' : u'\u2e1d', u'\u2e20' : u'\u2e21', + u'\u3008' : u'\u3009', u'\u300a' : u'\u300b', u'\u300c' : u'\u300d', + u'\u300e' : u'\u300f', u'\u3010' : u'\u3011', u'\u3014' : u'\u3015', + u'\u3016' : u'\u3017', u'\u3018' : u'\u3019', u'\u301a' : u'\u301b', + u'\u301d' : u'\u301e', u'\ufd3e' : u'\ufd3f', u'\ufe17' : u'\ufe18', + u'\ufe35' : u'\ufe36', u'\ufe37' : u'\ufe38', u'\ufe39' : u'\ufe3a', + u'\ufe3b' : u'\ufe3c', u'\ufe3d' : u'\ufe3e', u'\ufe3f' : u'\ufe40', + u'\ufe41' : u'\ufe42', u'\ufe43' : u'\ufe44', u'\ufe47' : u'\ufe48', + u'\ufe59' : u'\ufe5a', u'\ufe5b' : u'\ufe5c', u'\ufe5d' : u'\ufe5e', + u'\uff08' : u'\uff09', u'\uff1c' : u'\uff1e', u'\uff3b' : u'\uff3d', + u'\uff5b' : u'\uff5d', u'\uff5f' : u'\uff60', u'\uff62' : u'\uff63', } def _build_word_match(words, boundary_regex_fragment = None, prefix = '', suffix = ''): if boundary_regex_fragment is None: - return r'\b(' + prefix + r'|'.join([ re.escape(x) for x in words]) + suffix + r')\b' + return r'\b(' + prefix + r'|'.join([ re.escape(x) for x in words]) + \ + suffix + r')\b' else: - return r'(?<!' + boundary_regex_fragment + ')' + prefix + '(' + \ - r'|'.join([ re.escape(x) for x in words]) + r')' + suffix + '(?!' + boundary_regex_fragment + ')' + return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \ + r'|'.join([ re.escape(x) for x in words]) + r')' + suffix + r'(?!' + \ + boundary_regex_fragment + r')' def brackets_callback(token_class): def callback(lexer, match, context): @@ -2222,10 +2241,10 @@ class Perl6Lexer(ExtendedRegexLexer): context.pos = match.end() context.stack.append('root') - # If you're modifying these rules, be careful if you need to process '{' or '}' characters. - # We have special logic for processing these characters (due to the fact that you can nest - # Perl 6 code in regex blocks), so if you need to process one of them, make sure you also - # process the corresponding one! + # If you're modifying these rules, be careful if you need to process '{' or '}' + # characters. We have special logic for processing these characters (due to the fact + # that you can nest Perl 6 code in regex blocks), so if you need to process one of + # them, make sure you also process the corresponding one! tokens = { 'common' : [ (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)), @@ -2233,7 +2252,8 @@ class Perl6Lexer(ExtendedRegexLexer): (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline), (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline), (r'^=.*?\n\s*?\n', Comment.Multiline), - (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', bygroups(Keyword, Name), 'token-sym-brackets'), + (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', + bygroups(Keyword, Name), 'token-sym-brackets'), (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + ')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?', bygroups(Keyword, Name), 'pre-token'), # deal with a special case in the Perl 6 grammar (role q { ... }) (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)), @@ -2241,24 +2261,28 @@ class Perl6Lexer(ExtendedRegexLexer): (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix = '(?::[UD])?'), Name.Builtin), (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin), # copied from PerlLexer - (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable), + (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', + Name.Variable), (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), (r'::\?\w+', Name.Variable.Global), - (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), + (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', + Name.Variable.Global), (r'\$(?:<.*?>)+', Name.Variable), (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])(?P=first_char)*)', brackets_callback(String)), # copied from PerlLexer (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), (r'0b[01]+(_[01]+)*', Number.Bin), - (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', Number.Float), + (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', + Number.Float), (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), (r'\d+(_\d+)*', Number.Integer), (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex), (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex), (r'm\w+(?=\()', Name), (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z_:\s])(?P=first_char)*)', brackets_callback(String.Regex)), - (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', String.Regex), + (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', + String.Regex), (r'<[^\s=].*?\S>', String), (_build_word_match(PERL6_OPERATORS), Operator), (r'[0-9a-zA-Z_]' + PERL6_IDENTIFIER_RANGE + '*', Name), @@ -2321,8 +2345,8 @@ class Perl6Lexer(ExtendedRegexLexer): rating = False # check for my/our/has declarations - # copied PERL6_IDENTIFIER_RANGE from above; not happy about that - if re.search("(?:my|our|has)\s+(?:['a-zA-Z0-9_:-]+\s+)?[$@%&(]", text): + if re.search("(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE + \ + "+\s+)?[$@%&(]", text): rating = 0.8 saw_perl_decl = True @@ -2461,3 +2485,68 @@ class HyLexer(RegexLexer): def analyse_text(text): if '(import ' in text or '(defn ' in text: return 0.9 + + +class ChaiscriptLexer(RegexLexer): + """ + For `ChaiScript <http://chaiscript.com/>`_ source code. + + .. versionadded:: 2.0 + """ + + name = 'ChaiScript' + aliases = ['chai', 'chaiscript'] + filenames = ['*.chai'] + mimetypes = ['text/x-chaiscript', 'application/x-chaiscript'] + + flags = re.DOTALL + tokens = { + 'commentsandwhitespace': [ + (r'\s+', Text), + (r'//.*?\n', Comment.Single), + (r'/\*.*?\*/', Comment.Multiline), + (r'^\#.*?\n', Comment.Single) + ], + 'slashstartsregex': [ + include('commentsandwhitespace'), + (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' + r'([gim]+\b|\B)', String.Regex, '#pop'), + (r'(?=/)', Text, ('#pop', 'badregex')), + (r'', Text, '#pop') + ], + 'badregex': [ + ('\n', Text, '#pop') + ], + 'root': [ + include('commentsandwhitespace'), + (r'\n', Text), + (r'[^\S\n]+', Text), + (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|\.\.' + r'(<<|>>>?|==?|!=?|[-<>+*%&\|\^/])=?', Operator, 'slashstartsregex'), + (r'[{(\[;,]', Punctuation, 'slashstartsregex'), + (r'[})\].]', Punctuation), + (r'[=+\-*/]', Operator), + (r'(for|in|while|do|break|return|continue|if|else|' + r'throw|try|catch' + r')\b', Keyword, 'slashstartsregex'), + (r'(var)\b', Keyword.Declaration, 'slashstartsregex'), + (r'(attr|def|fun)\b', Keyword.Reserved), + (r'(true|false)\b', Keyword.Constant), + (r'(eval|throw)\b', Name.Builtin), + (r'`\S+`', Name.Builtin), + (r'[$a-zA-Z_]\w*', Name.Other), + (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), + (r'"', String.Double, 'dqstring'), + (r"'(\\\\|\\'|[^'])*'", String.Single), + ], + 'dqstring': [ + (r'\${[^"}]+?}', String.Iterpol), + (r'\$', String.Double), + (r'\\\\', String.Double), + (r'\\"', String.Double), + (r'[^\\\\\\"$]+', String.Double), + (r'"', String.Double, '#pop'), + ], + } |