summaryrefslogtreecommitdiff
path: root/pygments/lexers/agile.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/lexers/agile.py')
-rw-r--r--pygments/lexers/agile.py317
1 files changed, 203 insertions, 114 deletions
diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py
index 0a780a3e..cd105126 100644
--- a/pygments/lexers/agile.py
+++ b/pygments/lexers/agile.py
@@ -23,7 +23,8 @@ __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
'Python3Lexer', 'Python3TracebackLexer', 'RubyLexer',
'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', 'MoonScriptLexer',
'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer',
- 'FancyLexer', 'DgLexer', 'Perl6Lexer', 'HyLexer']
+ 'FancyLexer', 'DgLexer', 'Perl6Lexer', 'HyLexer',
+ 'ChaiscriptLexer']
# b/w compatibility
from pygments.lexers.functional import SchemeLexer
@@ -118,20 +119,20 @@ class PythonLexer(RegexLexer):
('`.*?`', String.Backtick),
],
'name': [
- (r'@[a-zA-Z0-9_.]+', Name.Decorator),
- ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
+ (r'@[\w.]+', Name.Decorator),
+ ('[a-zA-Z_]\w*', Name),
],
'funcname': [
- ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')
+ ('[a-zA-Z_]\w*', Name.Function, '#pop')
],
'classname': [
- ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
+ ('[a-zA-Z_]\w*', Name.Class, '#pop')
],
'import': [
(r'(?:[ \t]|\\\n)+', Text),
(r'as\b', Keyword.Namespace),
(r',', Operator),
- (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace),
+ (r'[a-zA-Z_][\w.]*', Name.Namespace),
(r'', Text, '#pop') # all else: go back
],
'fromimport': [
@@ -141,7 +142,7 @@ class PythonLexer(RegexLexer):
# never be a module name
(r'None\b', Name.Builtin.Pseudo, '#pop'),
# sadly, in "raise x from y" y will be highlighted as namespace too
- (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace),
+ (r'[a-zA-Z_.][\w.]*', Name.Namespace),
# anything else here also means "raise x from y" and is therefore
# not an error
(r'', Text, '#pop'),
@@ -151,7 +152,7 @@ class PythonLexer(RegexLexer):
r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
],
'strings': [
- (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
+ (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
'[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
(r'[^\\\'"%\n]+', String),
# quotes, percents and backslashes must be parsed one at a time
@@ -254,7 +255,7 @@ class Python3Lexer(RegexLexer):
]
tokens['backtick'] = []
tokens['name'] = [
- (r'@[a-zA-Z0-9_]+', Name.Decorator),
+ (r'@\w+', Name.Decorator),
(uni_name, Name),
]
tokens['funcname'] = [
@@ -405,7 +406,7 @@ class PythonTracebackLexer(RegexLexer):
bygroups(Text, Comment, Text)), # for doctests...
(r'^([^:]+)(: )(.+)(\n)',
bygroups(Generic.Error, Text, Name, Text), '#pop'),
- (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
+ (r'^([a-zA-Z_]\w*)(:?\n)',
bygroups(Generic.Error, Text), '#pop')
],
}
@@ -444,7 +445,7 @@ class Python3TracebackLexer(RegexLexer):
bygroups(Text, Comment, Text)), # for doctests...
(r'^([^:]+)(: )(.+)(\n)',
bygroups(Generic.Error, Text, Name, Text), '#pop'),
- (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
+ (r'^([a-zA-Z_]\w*)(:?\n)',
bygroups(Generic.Error, Text), '#pop')
],
}
@@ -534,7 +535,7 @@ class RubyLexer(ExtendedRegexLexer):
(r":'(\\\\|\\'|[^'])*'", String.Symbol),
(r"'(\\\\|\\'|[^'])*'", String.Single),
(r':"', String.Symbol, 'simple-sym'),
- (r'([a-zA-Z_][a-zA-Z0-9_]*)(:)(?!:)',
+ (r'([a-zA-Z_]\w*)(:)(?!:)',
bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9
(r'"', String.Double, 'simple-string'),
(r'(?<!\.)`', String.Backtick, 'simple-backtick'),
@@ -620,8 +621,8 @@ class RubyLexer(ExtendedRegexLexer):
r'rescue|raise|retry|return|super|then|undef|unless|until|when|'
r'while|yield)\b', Keyword),
# start of function, class and module names
- (r'(module)(\s+)([a-zA-Z_][a-zA-Z0-9_]*'
- r'(?:::[a-zA-Z_][a-zA-Z0-9_]*)*)',
+ (r'(module)(\s+)([a-zA-Z_]\w*'
+ r'(?:::[a-zA-Z_]\w*)*)',
bygroups(Keyword, Text, Name.Namespace)),
(r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
(r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
@@ -712,9 +713,9 @@ class RubyLexer(ExtendedRegexLexer):
(r'([\d]+(?:_\d+)*)(\s*)([/?])?',
bygroups(Number.Integer, Text, Operator)),
# Names
- (r'@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class),
- (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance),
- (r'\$[a-zA-Z0-9_]+', Name.Variable.Global),
+ (r'@@[a-zA-Z_]\w*', Name.Variable.Class),
+ (r'@[a-zA-Z_]\w*', Name.Variable.Instance),
+ (r'\$\w+', Name.Variable.Global),
(r'\$[!@&`\'+~=/\\,;.<>_*$?:"]', Name.Variable.Global),
(r'\$-[0adFiIlpvw]', Name.Variable.Global),
(r'::', Operator),
@@ -724,7 +725,7 @@ class RubyLexer(ExtendedRegexLexer):
r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
r'(?!\w)',
String.Char),
- (r'[A-Z][a-zA-Z0-9_]+', Name.Constant),
+ (r'[A-Z]\w+', Name.Constant),
# this is needed because ruby attributes can look
# like keywords (class) or like this: ` ?!?
(r'(\.|::)([a-zA-Z_]\w*[\!\?]?|[*%&^`~+-/\[<>=])',
@@ -738,7 +739,7 @@ class RubyLexer(ExtendedRegexLexer):
],
'funcname': [
(r'\(', Punctuation, 'defexpr'),
- (r'(?:([a-zA-Z_][a-zA-Z0-9_]*)(\.))?'
+ (r'(?:([a-zA-Z_]\w*)(\.))?'
r'([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|'
r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
bygroups(Name.Class, Operator, Name.Function), '#pop'),
@@ -761,8 +762,8 @@ class RubyLexer(ExtendedRegexLexer):
],
'string-intp': [
(r'#{', String.Interpol, 'in-intp'),
- (r'#@@?[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol),
- (r'#\$[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol)
+ (r'#@@?[a-zA-Z_]\w*', String.Interpol),
+ (r'#\$[a-zA-Z_]\w*', String.Interpol)
],
'string-intp-escaped': [
include('string-intp'),
@@ -813,7 +814,7 @@ class RubyConsoleLexer(Lexer):
aliases = ['rbcon', 'irb']
mimetypes = ['text/x-ruby-shellsession']
- _prompt_re = re.compile('irb\([a-zA-Z_][a-zA-Z0-9_]*\):\d{3}:\d+[>*"\'] '
+ _prompt_re = re.compile('irb\([a-zA-Z_]\w*\):\d{3}:\d+[>*"\'] '
'|>> |\?> ')
def get_tokens_unprocessed(self, text):
@@ -874,7 +875,7 @@ class PerlLexer(RegexLexer):
(r'(case|continue|do|else|elsif|for|foreach|if|last|my|'
r'next|our|redo|reset|then|unless|until|while|use|'
r'print|new|BEGIN|CHECK|INIT|END|return)\b', Keyword),
- (r'(format)(\s+)([a-zA-Z0-9_]+)(\s*)(=)(\s*\n)',
+ (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)',
bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'),
(r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
# common delimiters
@@ -927,7 +928,7 @@ class PerlLexer(RegexLexer):
r'utime|values|vec|wait|waitpid|wantarray|warn|write'
r')\b', Name.Builtin),
(r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
- (r'<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n', String),
+ (r'<<([\'"]?)([a-zA-Z_]\w*)\1;?\n.*?\n\2\n', String),
(r'__END__', Comment.Preproc, 'end-part'),
(r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
(r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
@@ -965,14 +966,14 @@ class PerlLexer(RegexLexer):
(r'\s+', Text),
(r'\{', Punctuation, '#pop'), # hash syntax?
(r'\)|,', Punctuation, '#pop'), # argument specifier
- (r'[a-zA-Z0-9_]+::', Name.Namespace),
- (r'[a-zA-Z0-9_:]+', Name.Variable, '#pop'),
+ (r'\w+::', Name.Namespace),
+ (r'[\w:]+', Name.Variable, '#pop'),
],
'name': [
- (r'[a-zA-Z0-9_]+::', Name.Namespace),
- (r'[a-zA-Z0-9_:]+', Name, '#pop'),
- (r'[A-Z_]+(?=[^a-zA-Z0-9_])', Name.Constant, '#pop'),
- (r'(?=[^a-zA-Z0-9_])', Text, '#pop'),
+ (r'\w+::', Name.Namespace),
+ (r'[\w:]+', Name, '#pop'),
+ (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'),
+ (r'(?=\W)', Text, '#pop'),
],
'modulename': [
(r'[a-zA-Z_]\w*', Name.Namespace, '#pop')
@@ -1084,7 +1085,7 @@ class LuaLexer(RegexLexer):
(r'(function)\b', Keyword, 'funcname'),
- (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name),
+ (r'[A-Za-z_]\w*(\.[A-Za-z_]\w*)?', Name),
("'", String.Single, combined('stringescape', 'sqs')),
('"', String.Double, combined('stringescape', 'dqs'))
@@ -1092,7 +1093,7 @@ class LuaLexer(RegexLexer):
'funcname': [
(r'\s+', Text),
- ('(?:([A-Za-z_][A-Za-z0-9_]*)(\.))?([A-Za-z_][A-Za-z0-9_]*)',
+ ('(?:([A-Za-z_]\w*)(\.))?([A-Za-z_]\w*)',
bygroups(Name.Class, Punctuation, Name.Function), '#pop'),
# inline function
('\(', Punctuation, '#pop'),
@@ -1175,20 +1176,20 @@ class MoonScriptLexer(LuaLexer):
(r'[^\S\n]+', Text),
(r'(?s)\[(=*)\[.*?\]\1\]', String),
(r'(->|=>)', Name.Function),
- (r':[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable),
+ (r':[a-zA-Z_]\w*', Name.Variable),
(r'(==|!=|~=|<=|>=|\.\.\.|\.\.|[=+\-*/%^<>#!.\\:])', Operator),
(r'[;,]', Punctuation),
(r'[\[\]\{\}\(\)]', Keyword.Type),
- (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Variable),
+ (r'[a-zA-Z_]\w*:', Name.Variable),
(r"(class|extends|if|then|super|do|with|import|export|"
r"while|elseif|return|for|in|from|when|using|else|"
r"and|or|not|switch|break)\b", Keyword),
(r'(true|false|nil)\b', Keyword.Constant),
(r'(and|or|not)\b', Operator.Word),
(r'(self)\b', Name.Builtin.Pseudo),
- (r'@@?([a-zA-Z_][a-zA-Z0-9_]*)?', Name.Variable.Class),
+ (r'@@?([a-zA-Z_]\w*)?', Name.Variable.Class),
(r'[A-Z]\w*', Name.Class), # proper name
- (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name),
+ (r'[A-Za-z_]\w*(\.[A-Za-z_]\w*)?', Name),
("'", String.Single, combined('stringescape', 'sqs')),
('"', String.Double, combined('stringescape', 'dqs'))
],
@@ -1319,7 +1320,7 @@ class IoLexer(RegexLexer):
# names
(r'(Object|list|List|Map|args|Sequence|Coroutine|File)\b',
Name.Builtin),
- ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
+ ('[a-zA-Z_]\w*', Name),
# numbers
(r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
(r'\d+', Number.Integer)
@@ -1720,7 +1721,7 @@ class FactorLexer(RegexLexer):
(r'"""\s+(?:.|\n)*?\s+"""', String),
(r'"(?:\\\\|\\"|[^"])*"', String),
(r'\S+"\s+(?:\\\\|\\"|[^"])*"', String),
- (r'CHAR:\s+(\\[\\abfnrstv]|[^\\]\S+)\s', String.Char),
+ (r'CHAR:\s+(?:\\[\\abfnrstv]|[^\\]\S*)\s', String.Char),
# comments
(r'!\s+.*$', Comment),
@@ -1728,21 +1729,21 @@ class FactorLexer(RegexLexer):
(r'/\*\s+(?:.|\n)*?\s\*/\s', Comment),
# boolean constants
- (r'(t|f)\s', Name.Constant),
+ (r'[tf]\s', Name.Constant),
# symbols and literals
(r'[\\$]\s+\S+', Name.Constant),
(r'M\\\s+\S+\s+\S+', Name.Constant),
# numbers
- (r'[+-]?([\d,]*\d)?\.(\d([\d,]*\d)?)?([eE][+-]?\d+)?\s', Number),
- (r'[+-]?\d([\d,]*\d)?([eE][+-]?\d+)?\s', Number),
- (r'0x[a-fA-F\d]([a-fA-F\d,]*[a-fA-F\d])?(p\d([\d,]*\d)?)?\s', Number),
- (r'NAN:\s+[a-fA-F\d]([a-fA-F\d,]*[a-fA-F\d])?(p\d([\d,]*\d)?)?\s', Number),
+ (r'[+-]?(?:[\d,]*\d)?\.(?:\d([\d,]*\d)?)?(?:[eE][+-]?\d+)?\s', Number),
+ (r'[+-]?\d(?:[\d,]*\d)?(?:[eE][+-]?\d+)?\s', Number),
+ (r'0x[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s', Number),
+ (r'NAN:\s+[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s', Number),
(r'0b[01]+\s', Number),
(r'0o[0-7]+\s', Number),
- (r'(\d([\d,]*\d)?)?\+\d([\d,]*\d)?/\d([\d,]*\d)?\s', Number),
- (r'(\-\d([\d,]*\d)?)?\-\d([\d,]*\d)?/\d([\d,]*\d)?\s', Number),
+ (r'(?:\d([\d,]*\d)?)?\+\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number),
+ (r'(?:\-\d([\d,]*\d)?)?\-\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number),
# keywords
(r'(?:deprecated|final|foldable|flushable|inline|recursive)\s',
@@ -1856,14 +1857,14 @@ class FancyLexer(RegexLexer):
r'FalseClass|Tuple|Symbol|Stack|Set|FancySpec|Method|Package|'
r'Range)\b', Name.Builtin),
# functions
- (r'[a-zA-Z]([a-zA-Z0-9_]|[-+?!=*/^><%])*:', Name.Function),
+ (r'[a-zA-Z](\w|[-+?!=*/^><%])*:', Name.Function),
# operators, must be below functions
(r'[-+*/~,<>=&!?%^\[\]\.$]+', Operator),
- ('[A-Z][a-zA-Z0-9_]*', Name.Constant),
- ('@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance),
- ('@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class),
+ ('[A-Z]\w*', Name.Constant),
+ ('@[a-zA-Z_]\w*', Name.Variable.Instance),
+ ('@@[a-zA-Z_]\w*', Name.Variable.Class),
('@@?', Operator),
- ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
+ ('[a-zA-Z_]\w*', Name),
# numbers - / checks are necessary to avoid mismarking regexes,
# see comment in RubyLexer
(r'(0[oO]?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?',
@@ -1948,7 +1949,7 @@ class DgLexer(RegexLexer):
r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
],
'string': [
- (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
+ (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
'[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
(r'[^\\\'"%\n]+', String),
# quotes, percents and backslashes must be parsed one at a time
@@ -1986,7 +1987,7 @@ class Perl6Lexer(ExtendedRegexLexer):
mimetypes = ['text/x-perl6', 'application/x-perl6']
flags = re.MULTILINE | re.DOTALL | re.UNICODE
- PERL6_IDENTIFIER_RANGE = "['a-zA-Z0-9_:-]" # if you alter this, search for a copy made of it below
+ PERL6_IDENTIFIER_RANGE = "['\w:-]"
PERL6_KEYWORDS = (
'BEGIN', 'CATCH', 'CHECK', 'CONTROL', 'END', 'ENTER', 'FIRST', 'INIT',
@@ -2077,62 +2078,80 @@ class Perl6Lexer(ExtendedRegexLexer):
# Perl 6 has a *lot* of possible bracketing characters
# this list was lifted from STD.pm6 (https://github.com/perl6/std)
PERL6_BRACKETS = {
- u'\u0028' : u'\u0029', u'\u003c' : u'\u003e', u'\u005b' : u'\u005d', u'\u007b' : u'\u007d',
- u'\u00ab' : u'\u00bb', u'\u0f3a' : u'\u0f3b', u'\u0f3c' : u'\u0f3d', u'\u169b' : u'\u169c',
- u'\u2018' : u'\u2019', u'\u201a' : u'\u2019', u'\u201b' : u'\u2019', u'\u201c' : u'\u201d',
- u'\u201e' : u'\u201d', u'\u201f' : u'\u201d', u'\u2039' : u'\u203a', u'\u2045' : u'\u2046',
- u'\u207d' : u'\u207e', u'\u208d' : u'\u208e', u'\u2208' : u'\u220b', u'\u2209' : u'\u220c',
- u'\u220a' : u'\u220d', u'\u2215' : u'\u29f5', u'\u223c' : u'\u223d', u'\u2243' : u'\u22cd',
- u'\u2252' : u'\u2253', u'\u2254' : u'\u2255', u'\u2264' : u'\u2265', u'\u2266' : u'\u2267',
- u'\u2268' : u'\u2269', u'\u226a' : u'\u226b', u'\u226e' : u'\u226f', u'\u2270' : u'\u2271',
- u'\u2272' : u'\u2273', u'\u2274' : u'\u2275', u'\u2276' : u'\u2277', u'\u2278' : u'\u2279',
- u'\u227a' : u'\u227b', u'\u227c' : u'\u227d', u'\u227e' : u'\u227f', u'\u2280' : u'\u2281',
- u'\u2282' : u'\u2283', u'\u2284' : u'\u2285', u'\u2286' : u'\u2287', u'\u2288' : u'\u2289',
- u'\u228a' : u'\u228b', u'\u228f' : u'\u2290', u'\u2291' : u'\u2292', u'\u2298' : u'\u29b8',
- u'\u22a2' : u'\u22a3', u'\u22a6' : u'\u2ade', u'\u22a8' : u'\u2ae4', u'\u22a9' : u'\u2ae3',
- u'\u22ab' : u'\u2ae5', u'\u22b0' : u'\u22b1', u'\u22b2' : u'\u22b3', u'\u22b4' : u'\u22b5',
- u'\u22b6' : u'\u22b7', u'\u22c9' : u'\u22ca', u'\u22cb' : u'\u22cc', u'\u22d0' : u'\u22d1',
- u'\u22d6' : u'\u22d7', u'\u22d8' : u'\u22d9', u'\u22da' : u'\u22db', u'\u22dc' : u'\u22dd',
- u'\u22de' : u'\u22df', u'\u22e0' : u'\u22e1', u'\u22e2' : u'\u22e3', u'\u22e4' : u'\u22e5',
- u'\u22e6' : u'\u22e7', u'\u22e8' : u'\u22e9', u'\u22ea' : u'\u22eb', u'\u22ec' : u'\u22ed',
- u'\u22f0' : u'\u22f1', u'\u22f2' : u'\u22fa', u'\u22f3' : u'\u22fb', u'\u22f4' : u'\u22fc',
- u'\u22f6' : u'\u22fd', u'\u22f7' : u'\u22fe', u'\u2308' : u'\u2309', u'\u230a' : u'\u230b',
- u'\u2329' : u'\u232a', u'\u23b4' : u'\u23b5', u'\u2768' : u'\u2769', u'\u276a' : u'\u276b',
- u'\u276c' : u'\u276d', u'\u276e' : u'\u276f', u'\u2770' : u'\u2771', u'\u2772' : u'\u2773',
- u'\u2774' : u'\u2775', u'\u27c3' : u'\u27c4', u'\u27c5' : u'\u27c6', u'\u27d5' : u'\u27d6',
- u'\u27dd' : u'\u27de', u'\u27e2' : u'\u27e3', u'\u27e4' : u'\u27e5', u'\u27e6' : u'\u27e7',
- u'\u27e8' : u'\u27e9', u'\u27ea' : u'\u27eb', u'\u2983' : u'\u2984', u'\u2985' : u'\u2986',
- u'\u2987' : u'\u2988', u'\u2989' : u'\u298a', u'\u298b' : u'\u298c', u'\u298d' : u'\u298e',
- u'\u298f' : u'\u2990', u'\u2991' : u'\u2992', u'\u2993' : u'\u2994', u'\u2995' : u'\u2996',
- u'\u2997' : u'\u2998', u'\u29c0' : u'\u29c1', u'\u29c4' : u'\u29c5', u'\u29cf' : u'\u29d0',
- u'\u29d1' : u'\u29d2', u'\u29d4' : u'\u29d5', u'\u29d8' : u'\u29d9', u'\u29da' : u'\u29db',
- u'\u29f8' : u'\u29f9', u'\u29fc' : u'\u29fd', u'\u2a2b' : u'\u2a2c', u'\u2a2d' : u'\u2a2e',
- u'\u2a34' : u'\u2a35', u'\u2a3c' : u'\u2a3d', u'\u2a64' : u'\u2a65', u'\u2a79' : u'\u2a7a',
- u'\u2a7d' : u'\u2a7e', u'\u2a7f' : u'\u2a80', u'\u2a81' : u'\u2a82', u'\u2a83' : u'\u2a84',
- u'\u2a8b' : u'\u2a8c', u'\u2a91' : u'\u2a92', u'\u2a93' : u'\u2a94', u'\u2a95' : u'\u2a96',
- u'\u2a97' : u'\u2a98', u'\u2a99' : u'\u2a9a', u'\u2a9b' : u'\u2a9c', u'\u2aa1' : u'\u2aa2',
- u'\u2aa6' : u'\u2aa7', u'\u2aa8' : u'\u2aa9', u'\u2aaa' : u'\u2aab', u'\u2aac' : u'\u2aad',
- u'\u2aaf' : u'\u2ab0', u'\u2ab3' : u'\u2ab4', u'\u2abb' : u'\u2abc', u'\u2abd' : u'\u2abe',
- u'\u2abf' : u'\u2ac0', u'\u2ac1' : u'\u2ac2', u'\u2ac3' : u'\u2ac4', u'\u2ac5' : u'\u2ac6',
- u'\u2acd' : u'\u2ace', u'\u2acf' : u'\u2ad0', u'\u2ad1' : u'\u2ad2', u'\u2ad3' : u'\u2ad4',
- u'\u2ad5' : u'\u2ad6', u'\u2aec' : u'\u2aed', u'\u2af7' : u'\u2af8', u'\u2af9' : u'\u2afa',
- u'\u2e02' : u'\u2e03', u'\u2e04' : u'\u2e05', u'\u2e09' : u'\u2e0a', u'\u2e0c' : u'\u2e0d',
- u'\u2e1c' : u'\u2e1d', u'\u2e20' : u'\u2e21', u'\u3008' : u'\u3009', u'\u300a' : u'\u300b',
- u'\u300c' : u'\u300d', u'\u300e' : u'\u300f', u'\u3010' : u'\u3011', u'\u3014' : u'\u3015',
- u'\u3016' : u'\u3017', u'\u3018' : u'\u3019', u'\u301a' : u'\u301b', u'\u301d' : u'\u301e',
- u'\ufd3e' : u'\ufd3f', u'\ufe17' : u'\ufe18', u'\ufe35' : u'\ufe36', u'\ufe37' : u'\ufe38',
- u'\ufe39' : u'\ufe3a', u'\ufe3b' : u'\ufe3c', u'\ufe3d' : u'\ufe3e', u'\ufe3f' : u'\ufe40',
- u'\ufe41' : u'\ufe42', u'\ufe43' : u'\ufe44', u'\ufe47' : u'\ufe48', u'\ufe59' : u'\ufe5a',
- u'\ufe5b' : u'\ufe5c', u'\ufe5d' : u'\ufe5e', u'\uff08' : u'\uff09', u'\uff1c' : u'\uff1e',
- u'\uff3b' : u'\uff3d', u'\uff5b' : u'\uff5d', u'\uff5f' : u'\uff60', u'\uff62' : u'\uff63',
+ u'\u0028' : u'\u0029', u'\u003c' : u'\u003e', u'\u005b' : u'\u005d',
+ u'\u007b' : u'\u007d', u'\u00ab' : u'\u00bb', u'\u0f3a' : u'\u0f3b',
+ u'\u0f3c' : u'\u0f3d', u'\u169b' : u'\u169c', u'\u2018' : u'\u2019',
+ u'\u201a' : u'\u2019', u'\u201b' : u'\u2019', u'\u201c' : u'\u201d',
+ u'\u201e' : u'\u201d', u'\u201f' : u'\u201d', u'\u2039' : u'\u203a',
+ u'\u2045' : u'\u2046', u'\u207d' : u'\u207e', u'\u208d' : u'\u208e',
+ u'\u2208' : u'\u220b', u'\u2209' : u'\u220c', u'\u220a' : u'\u220d',
+ u'\u2215' : u'\u29f5', u'\u223c' : u'\u223d', u'\u2243' : u'\u22cd',
+ u'\u2252' : u'\u2253', u'\u2254' : u'\u2255', u'\u2264' : u'\u2265',
+ u'\u2266' : u'\u2267', u'\u2268' : u'\u2269', u'\u226a' : u'\u226b',
+ u'\u226e' : u'\u226f', u'\u2270' : u'\u2271', u'\u2272' : u'\u2273',
+ u'\u2274' : u'\u2275', u'\u2276' : u'\u2277', u'\u2278' : u'\u2279',
+ u'\u227a' : u'\u227b', u'\u227c' : u'\u227d', u'\u227e' : u'\u227f',
+ u'\u2280' : u'\u2281', u'\u2282' : u'\u2283', u'\u2284' : u'\u2285',
+ u'\u2286' : u'\u2287', u'\u2288' : u'\u2289', u'\u228a' : u'\u228b',
+ u'\u228f' : u'\u2290', u'\u2291' : u'\u2292', u'\u2298' : u'\u29b8',
+ u'\u22a2' : u'\u22a3', u'\u22a6' : u'\u2ade', u'\u22a8' : u'\u2ae4',
+ u'\u22a9' : u'\u2ae3', u'\u22ab' : u'\u2ae5', u'\u22b0' : u'\u22b1',
+ u'\u22b2' : u'\u22b3', u'\u22b4' : u'\u22b5', u'\u22b6' : u'\u22b7',
+ u'\u22c9' : u'\u22ca', u'\u22cb' : u'\u22cc', u'\u22d0' : u'\u22d1',
+ u'\u22d6' : u'\u22d7', u'\u22d8' : u'\u22d9', u'\u22da' : u'\u22db',
+ u'\u22dc' : u'\u22dd', u'\u22de' : u'\u22df', u'\u22e0' : u'\u22e1',
+ u'\u22e2' : u'\u22e3', u'\u22e4' : u'\u22e5', u'\u22e6' : u'\u22e7',
+ u'\u22e8' : u'\u22e9', u'\u22ea' : u'\u22eb', u'\u22ec' : u'\u22ed',
+ u'\u22f0' : u'\u22f1', u'\u22f2' : u'\u22fa', u'\u22f3' : u'\u22fb',
+ u'\u22f4' : u'\u22fc', u'\u22f6' : u'\u22fd', u'\u22f7' : u'\u22fe',
+ u'\u2308' : u'\u2309', u'\u230a' : u'\u230b', u'\u2329' : u'\u232a',
+ u'\u23b4' : u'\u23b5', u'\u2768' : u'\u2769', u'\u276a' : u'\u276b',
+ u'\u276c' : u'\u276d', u'\u276e' : u'\u276f', u'\u2770' : u'\u2771',
+ u'\u2772' : u'\u2773', u'\u2774' : u'\u2775', u'\u27c3' : u'\u27c4',
+ u'\u27c5' : u'\u27c6', u'\u27d5' : u'\u27d6', u'\u27dd' : u'\u27de',
+ u'\u27e2' : u'\u27e3', u'\u27e4' : u'\u27e5', u'\u27e6' : u'\u27e7',
+ u'\u27e8' : u'\u27e9', u'\u27ea' : u'\u27eb', u'\u2983' : u'\u2984',
+ u'\u2985' : u'\u2986', u'\u2987' : u'\u2988', u'\u2989' : u'\u298a',
+ u'\u298b' : u'\u298c', u'\u298d' : u'\u298e', u'\u298f' : u'\u2990',
+ u'\u2991' : u'\u2992', u'\u2993' : u'\u2994', u'\u2995' : u'\u2996',
+ u'\u2997' : u'\u2998', u'\u29c0' : u'\u29c1', u'\u29c4' : u'\u29c5',
+ u'\u29cf' : u'\u29d0', u'\u29d1' : u'\u29d2', u'\u29d4' : u'\u29d5',
+ u'\u29d8' : u'\u29d9', u'\u29da' : u'\u29db', u'\u29f8' : u'\u29f9',
+ u'\u29fc' : u'\u29fd', u'\u2a2b' : u'\u2a2c', u'\u2a2d' : u'\u2a2e',
+ u'\u2a34' : u'\u2a35', u'\u2a3c' : u'\u2a3d', u'\u2a64' : u'\u2a65',
+ u'\u2a79' : u'\u2a7a', u'\u2a7d' : u'\u2a7e', u'\u2a7f' : u'\u2a80',
+ u'\u2a81' : u'\u2a82', u'\u2a83' : u'\u2a84', u'\u2a8b' : u'\u2a8c',
+ u'\u2a91' : u'\u2a92', u'\u2a93' : u'\u2a94', u'\u2a95' : u'\u2a96',
+ u'\u2a97' : u'\u2a98', u'\u2a99' : u'\u2a9a', u'\u2a9b' : u'\u2a9c',
+ u'\u2aa1' : u'\u2aa2', u'\u2aa6' : u'\u2aa7', u'\u2aa8' : u'\u2aa9',
+ u'\u2aaa' : u'\u2aab', u'\u2aac' : u'\u2aad', u'\u2aaf' : u'\u2ab0',
+ u'\u2ab3' : u'\u2ab4', u'\u2abb' : u'\u2abc', u'\u2abd' : u'\u2abe',
+ u'\u2abf' : u'\u2ac0', u'\u2ac1' : u'\u2ac2', u'\u2ac3' : u'\u2ac4',
+ u'\u2ac5' : u'\u2ac6', u'\u2acd' : u'\u2ace', u'\u2acf' : u'\u2ad0',
+ u'\u2ad1' : u'\u2ad2', u'\u2ad3' : u'\u2ad4', u'\u2ad5' : u'\u2ad6',
+ u'\u2aec' : u'\u2aed', u'\u2af7' : u'\u2af8', u'\u2af9' : u'\u2afa',
+ u'\u2e02' : u'\u2e03', u'\u2e04' : u'\u2e05', u'\u2e09' : u'\u2e0a',
+ u'\u2e0c' : u'\u2e0d', u'\u2e1c' : u'\u2e1d', u'\u2e20' : u'\u2e21',
+ u'\u3008' : u'\u3009', u'\u300a' : u'\u300b', u'\u300c' : u'\u300d',
+ u'\u300e' : u'\u300f', u'\u3010' : u'\u3011', u'\u3014' : u'\u3015',
+ u'\u3016' : u'\u3017', u'\u3018' : u'\u3019', u'\u301a' : u'\u301b',
+ u'\u301d' : u'\u301e', u'\ufd3e' : u'\ufd3f', u'\ufe17' : u'\ufe18',
+ u'\ufe35' : u'\ufe36', u'\ufe37' : u'\ufe38', u'\ufe39' : u'\ufe3a',
+ u'\ufe3b' : u'\ufe3c', u'\ufe3d' : u'\ufe3e', u'\ufe3f' : u'\ufe40',
+ u'\ufe41' : u'\ufe42', u'\ufe43' : u'\ufe44', u'\ufe47' : u'\ufe48',
+ u'\ufe59' : u'\ufe5a', u'\ufe5b' : u'\ufe5c', u'\ufe5d' : u'\ufe5e',
+ u'\uff08' : u'\uff09', u'\uff1c' : u'\uff1e', u'\uff3b' : u'\uff3d',
+ u'\uff5b' : u'\uff5d', u'\uff5f' : u'\uff60', u'\uff62' : u'\uff63',
}
def _build_word_match(words, boundary_regex_fragment = None, prefix = '', suffix = ''):
if boundary_regex_fragment is None:
- return r'\b(' + prefix + r'|'.join([ re.escape(x) for x in words]) + suffix + r')\b'
+ return r'\b(' + prefix + r'|'.join([ re.escape(x) for x in words]) + \
+ suffix + r')\b'
else:
- return r'(?<!' + boundary_regex_fragment + ')' + prefix + '(' + \
- r'|'.join([ re.escape(x) for x in words]) + r')' + suffix + '(?!' + boundary_regex_fragment + ')'
+ return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \
+ r'|'.join([ re.escape(x) for x in words]) + r')' + suffix + r'(?!' + \
+ boundary_regex_fragment + r')'
def brackets_callback(token_class):
def callback(lexer, match, context):
@@ -2222,10 +2241,10 @@ class Perl6Lexer(ExtendedRegexLexer):
context.pos = match.end()
context.stack.append('root')
- # If you're modifying these rules, be careful if you need to process '{' or '}' characters.
- # We have special logic for processing these characters (due to the fact that you can nest
- # Perl 6 code in regex blocks), so if you need to process one of them, make sure you also
- # process the corresponding one!
+ # If you're modifying these rules, be careful if you need to process '{' or '}'
+ # characters. We have special logic for processing these characters (due to the fact
+ # that you can nest Perl 6 code in regex blocks), so if you need to process one of
+ # them, make sure you also process the corresponding one!
tokens = {
'common' : [
(r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)),
@@ -2233,7 +2252,8 @@ class Perl6Lexer(ExtendedRegexLexer):
(r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
(r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
(r'^=.*?\n\s*?\n', Comment.Multiline),
- (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', bygroups(Keyword, Name), 'token-sym-brackets'),
+ (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)',
+ bygroups(Keyword, Name), 'token-sym-brackets'),
(r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + ')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?', bygroups(Keyword, Name), 'pre-token'),
# deal with a special case in the Perl 6 grammar (role q { ... })
(r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)),
@@ -2241,24 +2261,28 @@ class Perl6Lexer(ExtendedRegexLexer):
(_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix = '(?::[UD])?'), Name.Builtin),
(_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin),
# copied from PerlLexer
- (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable),
+ (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*',
+ Name.Variable),
(r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
(r'::\?\w+', Name.Variable.Global),
- (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
+ (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*',
+ Name.Variable.Global),
(r'\$(?:<.*?>)+', Name.Variable),
(r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])(?P=first_char)*)', brackets_callback(String)),
# copied from PerlLexer
(r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
(r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
(r'0b[01]+(_[01]+)*', Number.Bin),
- (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', Number.Float),
+ (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
+ Number.Float),
(r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
(r'\d+(_\d+)*', Number.Integer),
(r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
(r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
(r'm\w+(?=\()', Name),
(r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z_:\s])(?P=first_char)*)', brackets_callback(String.Regex)),
- (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', String.Regex),
+ (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/',
+ String.Regex),
(r'<[^\s=].*?\S>', String),
(_build_word_match(PERL6_OPERATORS), Operator),
(r'[0-9a-zA-Z_]' + PERL6_IDENTIFIER_RANGE + '*', Name),
@@ -2321,8 +2345,8 @@ class Perl6Lexer(ExtendedRegexLexer):
rating = False
# check for my/our/has declarations
- # copied PERL6_IDENTIFIER_RANGE from above; not happy about that
- if re.search("(?:my|our|has)\s+(?:['a-zA-Z0-9_:-]+\s+)?[$@%&(]", text):
+ if re.search("(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE + \
+ "+\s+)?[$@%&(]", text):
rating = 0.8
saw_perl_decl = True
@@ -2461,3 +2485,68 @@ class HyLexer(RegexLexer):
def analyse_text(text):
if '(import ' in text or '(defn ' in text:
return 0.9
+
+
+class ChaiscriptLexer(RegexLexer):
+ """
+ For `ChaiScript <http://chaiscript.com/>`_ source code.
+
+ .. versionadded:: 2.0
+ """
+
+ name = 'ChaiScript'
+ aliases = ['chai', 'chaiscript']
+ filenames = ['*.chai']
+ mimetypes = ['text/x-chaiscript', 'application/x-chaiscript']
+
+ flags = re.DOTALL
+ tokens = {
+ 'commentsandwhitespace': [
+ (r'\s+', Text),
+ (r'//.*?\n', Comment.Single),
+ (r'/\*.*?\*/', Comment.Multiline),
+ (r'^\#.*?\n', Comment.Single)
+ ],
+ 'slashstartsregex': [
+ include('commentsandwhitespace'),
+ (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
+ r'([gim]+\b|\B)', String.Regex, '#pop'),
+ (r'(?=/)', Text, ('#pop', 'badregex')),
+ (r'', Text, '#pop')
+ ],
+ 'badregex': [
+ ('\n', Text, '#pop')
+ ],
+ 'root': [
+ include('commentsandwhitespace'),
+ (r'\n', Text),
+ (r'[^\S\n]+', Text),
+ (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|\.\.'
+ r'(<<|>>>?|==?|!=?|[-<>+*%&\|\^/])=?', Operator, 'slashstartsregex'),
+ (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
+ (r'[})\].]', Punctuation),
+ (r'[=+\-*/]', Operator),
+ (r'(for|in|while|do|break|return|continue|if|else|'
+ r'throw|try|catch'
+ r')\b', Keyword, 'slashstartsregex'),
+ (r'(var)\b', Keyword.Declaration, 'slashstartsregex'),
+ (r'(attr|def|fun)\b', Keyword.Reserved),
+ (r'(true|false)\b', Keyword.Constant),
+ (r'(eval|throw)\b', Name.Builtin),
+ (r'`\S+`', Name.Builtin),
+ (r'[$a-zA-Z_]\w*', Name.Other),
+ (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
+ (r'0x[0-9a-fA-F]+', Number.Hex),
+ (r'[0-9]+', Number.Integer),
+ (r'"', String.Double, 'dqstring'),
+ (r"'(\\\\|\\'|[^'])*'", String.Single),
+ ],
+ 'dqstring': [
+ (r'\${[^"}]+?}', String.Iterpol),
+ (r'\$', String.Double),
+ (r'\\\\', String.Double),
+ (r'\\"', String.Double),
+ (r'[^\\\\\\"$]+', String.Double),
+ (r'"', String.Double, '#pop'),
+ ],
+ }