diff options
author | Georg Brandl <georg@python.org> | 2014-09-20 00:06:48 +0200 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-09-20 00:06:48 +0200 |
commit | 3aa632c6c357746459a8847feb056cc3c86db93c (patch) | |
tree | 3b285fdac519a208586a72ad2fa8ce0fcf0993d8 /pygments/lexers/misc | |
parent | 827f32f543bd55ad65d01acca45658169a9b4a8a (diff) | |
parent | ed8910f067a347021854227f194f374c5b8cce2d (diff) | |
download | pygments-3aa632c6c357746459a8847feb056cc3c86db93c.tar.gz |
Merged in jaingaurav2/pygments-main-1011 (pull request #376)
Diffstat (limited to 'pygments/lexers/misc')
24 files changed, 5238 insertions, 0 deletions
diff --git a/pygments/lexers/misc/__init__.py b/pygments/lexers/misc/__init__.py new file mode 100644 index 00000000..c10f5a29 --- /dev/null +++ b/pygments/lexers/misc/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc + ~~~~~~~~~~~~~~~~~~~~ + + Lexers for miscellaneous languages. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" diff --git a/pygments/lexers/misc/ambient.py b/pygments/lexers/misc/ambient.py new file mode 100644 index 00000000..3dcd4da2 --- /dev/null +++ b/pygments/lexers/misc/ambient.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.ambient + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for AmbientTalk language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +__all__ = ['AmbientTalkLexer'] + + +class AmbientTalkLexer(RegexLexer): + """ + Lexer for `AmbientTalk <https://code.google.com/p/ambienttalk>`_ source code. + + .. versionadded:: 2.0 + """ + name = 'AmbientTalk' + filenames = ['*.at'] + aliases = ['at', 'ambienttalk', 'ambienttalk/2'] + mimetypes = ['text/x-ambienttalk'] + + flags = re.MULTILINE | re.DOTALL + + builtin = words(('if:', 'then:', 'else:', 'when:', 'whenever:', 'discovered:', + 'disconnected:', 'reconnected:', 'takenOffline:', 'becomes:', + 'export:', 'as:', 'object:', 'actor:', 'mirror:', 'taggedAs:', + 'mirroredBy:', 'is:')) + tokens = { + 'root': [ + (r'\s+', Text), + (r'//.*?\n', Comment.Single), + (r'/\*.*?\*/', Comment.Multiline), + (r'(def|deftype|import|alias|exclude)\b', Keyword), + (builtin, Name.Builtin), + (r'(true|false|nil)\b', Keyword.Constant), + (r'(~|lobby|jlobby|/)\.', Keyword.Constant, 'namespace'), + (r'"(\\\\|\\"|[^"])*"', String), + (r'\|', Punctuation, 'arglist'), + (r'<:|[\^\*!%&<>+=,./?-]|:=', Operator), + (r"`[a-zA-Z_]\w*", String.Symbol), + (r"[a-zA-Z_]\w*:", Name.Function), + (r"[\{\}()\[\];`]", Punctuation), + (r'(self|super)\b', Name.Variable.Instance), + (r"[a-zA-Z_]\w*", Name.Variable), + (r"@[a-zA-Z_]\w*", Name.Class), + (r"@\[", Name.Class, 'annotations'), + include('numbers'), + ], + 'numbers': [ + (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), + (r'\d+', Number.Integer) + ], + 'namespace': [ + (r'[a-zA-Z_]\w*\.', Name.Namespace), + (r'[a-zA-Z_]\w*:', Name.Function, '#pop'), + (r'[a-zA-Z_]\w*(?!\.)', Name.Function, '#pop') + ], + 'annotations': [ + (r"(.*?)\]", Name.Class, '#pop') + ], + 'arglist': [ + (r'\|', Punctuation, '#pop'), + (r'\s*(,)\s*', Punctuation), + (r'[a-zA-Z_]\w*', Name.Variable), + ], + } diff --git a/pygments/lexers/misc/apl.py b/pygments/lexers/misc/apl.py new file mode 100644 index 00000000..a710721e --- /dev/null +++ b/pygments/lexers/misc/apl.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.apl + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for APL. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +__all__ = ['APLLexer'] + + +class APLLexer(RegexLexer): + """ + A simple APL lexer. + + .. versionadded:: 2.0 + """ + name = 'APL' + aliases = ['apl'] + filenames = ['*.apl'] + + tokens = { + 'root': [ + # Whitespace + # ========== + (r'\s+', Text), + # + # Comment + # ======= + # '⍝' is traditional; '#' is supported by GNU APL and NGN (but not Dyalog) + (u'[⍝#].*$', Comment.Single), + # + # Strings + # ======= + (r'\'((\'\')|[^\'])*\'', String.Single), + (r'"(("")|[^"])*"', String.Double), # supported by NGN APL + # + # Punctuation + # =========== + # This token type is used for diamond and parenthesis + # but not for bracket and ; (see below) + (u'[⋄◇()]', Punctuation), + # + # Array indexing + # ============== + # Since this token type is very important in APL, it is not included in + # the punctuation token type but rather in the following one + (r'[\[\];]', String.Regex), + # + # Distinguished names + # =================== + # following IBM APL2 standard + (u'⎕[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Function), + # + # Labels + # ====== + # following IBM APL2 standard + # (u'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*:', Name.Label), + # + # Variables + # ========= + # following IBM APL2 standard + (u'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Variable), + # + # Numbers + # ======= + (u'¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞)' + u'([Jj]¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞))?', + Number), + # + # Operators + # ========== + (u'[\.\\\/⌿⍀¨⍣⍨⍠⍤∘]', Name.Attribute), # closest token type + (u'[+\-×÷⌈⌊∣|⍳?*⍟○!⌹<≤=>≥≠≡≢∊⍷∪∩~∨∧⍱⍲⍴,⍪⌽⊖⍉↑↓⊂⊃⌷⍋⍒⊤⊥⍕⍎⊣⊢⍁⍂≈⌸⍯↗]', + Operator), + # + # Constant + # ======== + (u'⍬', Name.Constant), + # + # Quad symbol + # =========== + (u'[⎕⍞]', Name.Variable.Global), + # + # Arrows left/right + # ================= + (u'[←→]', Keyword.Declaration), + # + # D-Fn + # ==== + (u'[⍺⍵⍶⍹∇:]', Name.Builtin.Pseudo), + (r'[{}]', Keyword.Type), + ], + } diff --git a/pygments/lexers/misc/basic.py b/pygments/lexers/misc/basic.py new file mode 100644 index 00000000..5faf205e --- /dev/null +++ b/pygments/lexers/misc/basic.py @@ -0,0 +1,497 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.basic + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for BASIC like languages (other than VB.net). + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, bygroups, default, words, include +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +__all__ = ['BlitzBasicLexer', 'BlitzMaxLexer', 'MonkeyLexer', 'CbmBasicV2Lexer', + 'QBasicLexer'] + + +class BlitzMaxLexer(RegexLexer): + """ + For `BlitzMax <http://blitzbasic.com>`_ source code. + + .. versionadded:: 1.4 + """ + + name = 'BlitzMax' + aliases = ['blitzmax', 'bmax'] + filenames = ['*.bmx'] + mimetypes = ['text/x-bmx'] + + bmax_vopwords = r'\b(Shl|Shr|Sar|Mod)\b' + bmax_sktypes = r'@{1,2}|[!#$%]' + bmax_lktypes = r'\b(Int|Byte|Short|Float|Double|Long)\b' + bmax_name = r'[a-z_]\w*' + bmax_var = (r'(%s)(?:(?:([ \t]*)(%s)|([ \t]*:[ \t]*\b(?:Shl|Shr|Sar|Mod)\b)' + r'|([ \t]*)(:)([ \t]*)(?:%s|(%s)))(?:([ \t]*)(Ptr))?)') % \ + (bmax_name, bmax_sktypes, bmax_lktypes, bmax_name) + bmax_func = bmax_var + r'?((?:[ \t]|\.\.\n)*)([(])' + + flags = re.MULTILINE | re.IGNORECASE + tokens = { + 'root': [ + # Text + (r'[ \t]+', Text), + (r'\.\.\n', Text), # Line continuation + # Comments + (r"'.*?\n", Comment.Single), + (r'([ \t]*)\bRem\n(\n|.)*?\s*\bEnd([ \t]*)Rem', Comment.Multiline), + # Data types + ('"', String.Double, 'string'), + # Numbers + (r'[0-9]+\.[0-9]*(?!\.)', Number.Float), + (r'\.[0-9]*(?!\.)', Number.Float), + (r'[0-9]+', Number.Integer), + (r'\$[0-9a-f]+', Number.Hex), + (r'\%[10]+', Number.Bin), + # Other + (r'(?:(?:(:)?([ \t]*)(:?%s|([+\-*/&|~]))|Or|And|Not|[=<>^]))' % + (bmax_vopwords), Operator), + (r'[(),.:\[\]]', Punctuation), + (r'(?:#[\w \t]*)', Name.Label), + (r'(?:\?[\w \t]*)', Comment.Preproc), + # Identifiers + (r'\b(New)\b([ \t]?)([(]?)(%s)' % (bmax_name), + bygroups(Keyword.Reserved, Text, Punctuation, Name.Class)), + (r'\b(Import|Framework|Module)([ \t]+)(%s\.%s)' % + (bmax_name, bmax_name), + bygroups(Keyword.Reserved, Text, Keyword.Namespace)), + (bmax_func, bygroups(Name.Function, Text, Keyword.Type, + Operator, Text, Punctuation, Text, + Keyword.Type, Name.Class, Text, + Keyword.Type, Text, Punctuation)), + (bmax_var, bygroups(Name.Variable, Text, Keyword.Type, Operator, + Text, Punctuation, Text, Keyword.Type, + Name.Class, Text, Keyword.Type)), + (r'\b(Type|Extends)([ \t]+)(%s)' % (bmax_name), + bygroups(Keyword.Reserved, Text, Name.Class)), + # Keywords + (r'\b(Ptr)\b', Keyword.Type), + (r'\b(Pi|True|False|Null|Self|Super)\b', Keyword.Constant), + (r'\b(Local|Global|Const|Field)\b', Keyword.Declaration), + (words(( + 'TNullMethodException', 'TNullFunctionException', + 'TNullObjectException', 'TArrayBoundsException', + 'TRuntimeException'), prefix=r'\b', suffix=r'\b'), Name.Exception), + (words(( + 'Strict', 'SuperStrict', 'Module', 'ModuleInfo', + 'End', 'Return', 'Continue', 'Exit', 'Public', 'Private', + 'Var', 'VarPtr', 'Chr', 'Len', 'Asc', 'SizeOf', 'Sgn', 'Abs', 'Min', 'Max', + 'New', 'Release', 'Delete', 'Incbin', 'IncbinPtr', 'IncbinLen', + 'Framework', 'Include', 'Import', 'Extern', 'EndExtern', + 'Function', 'EndFunction', 'Type', 'EndType', 'Extends', 'Method', 'EndMethod', + 'Abstract', 'Final', 'If', 'Then', 'Else', 'ElseIf', 'EndIf', + 'For', 'To', 'Next', 'Step', 'EachIn', 'While', 'Wend', 'EndWhile', + 'Repeat', 'Until', 'Forever', 'Select', 'Case', 'Default', 'EndSelect', + 'Try', 'Catch', 'EndTry', 'Throw', 'Assert', 'Goto', 'DefData', 'ReadData', + 'RestoreData'), prefix=r'\b', suffix=r'\b'), + Keyword.Reserved), + # Final resolve (for variable names and such) + (r'(%s)' % (bmax_name), Name.Variable), + ], + 'string': [ + (r'""', String.Double), + (r'"C?', String.Double, '#pop'), + (r'[^"]+', String.Double), + ], + } + + +class BlitzBasicLexer(RegexLexer): + """ + For `BlitzBasic <http://blitzbasic.com>`_ source code. + + .. versionadded:: 2.0 + """ + + name = 'BlitzBasic' + aliases = ['blitzbasic', 'b3d', 'bplus'] + filenames = ['*.bb', '*.decls'] + mimetypes = ['text/x-bb'] + + bb_sktypes = r'@{1,2}|[#$%]' + bb_name = r'[a-z]\w*' + bb_var = (r'(%s)(?:([ \t]*)(%s)|([ \t]*)([.])([ \t]*)(?:(%s)))?') % \ + (bb_name, bb_sktypes, bb_name) + + flags = re.MULTILINE | re.IGNORECASE + tokens = { + 'root': [ + # Text + (r'[ \t]+', Text), + # Comments + (r";.*?\n", Comment.Single), + # Data types + ('"', String.Double, 'string'), + # Numbers + (r'[0-9]+\.[0-9]*(?!\.)', Number.Float), + (r'\.[0-9]+(?!\.)', Number.Float), + (r'[0-9]+', Number.Integer), + (r'\$[0-9a-f]+', Number.Hex), + (r'\%[10]+', Number.Bin), + # Other + (words(('Shl', 'Shr', 'Sar', 'Mod', 'Or', 'And', 'Not', + 'Abs', 'Sgn', 'Handle', 'Int', 'Float', 'Str', + 'First', 'Last', 'Before', 'After'), + prefix=r'\b', suffix=r'\b'), + Operator), + (r'([+\-*/~=<>^])', Operator), + (r'[(),:\[\]\\]', Punctuation), + (r'\.([ \t]*)(%s)' % bb_name, Name.Label), + # Identifiers + (r'\b(New)\b([ \t]+)(%s)' % (bb_name), + bygroups(Keyword.Reserved, Text, Name.Class)), + (r'\b(Gosub|Goto)\b([ \t]+)(%s)' % (bb_name), + bygroups(Keyword.Reserved, Text, Name.Label)), + (r'\b(Object)\b([ \t]*)([.])([ \t]*)(%s)\b' % (bb_name), + bygroups(Operator, Text, Punctuation, Text, Name.Class)), + (r'\b%s\b([ \t]*)(\()' % bb_var, + bygroups(Name.Function, Text, Keyword.Type, Text, Punctuation, + Text, Name.Class, Text, Punctuation)), + (r'\b(Function)\b([ \t]+)%s' % bb_var, + bygroups(Keyword.Reserved, Text, Name.Function, Text, Keyword.Type, + Text, Punctuation, Text, Name.Class)), + (r'\b(Type)([ \t]+)(%s)' % (bb_name), + bygroups(Keyword.Reserved, Text, Name.Class)), + # Keywords + (r'\b(Pi|True|False|Null)\b', Keyword.Constant), + (r'\b(Local|Global|Const|Field|Dim)\b', Keyword.Declaration), + (words(( + 'End', 'Return', 'Exit', 'Chr', 'Len', 'Asc', 'New', 'Delete', 'Insert', + 'Include', 'Function', 'Type', 'If', 'Then', 'Else', 'ElseIf', 'EndIf', + 'For', 'To', 'Next', 'Step', 'Each', 'While', 'Wend', + 'Repeat', 'Until', 'Forever', 'Select', 'Case', 'Default', + 'Goto', 'Gosub', 'Data', 'Read', 'Restore'), prefix=r'\b', suffix=r'\b'), + Keyword.Reserved), + # Final resolve (for variable names and such) + # (r'(%s)' % (bb_name), Name.Variable), + (bb_var, bygroups(Name.Variable, Text, Keyword.Type, + Text, Punctuation, Text, Name.Class)), + ], + 'string': [ + (r'""', String.Double), + (r'"C?', String.Double, '#pop'), + (r'[^"]+', String.Double), + ], + } + + +class MonkeyLexer(RegexLexer): + """ + For + `Monkey <https://en.wikipedia.org/wiki/Monkey_(programming_language)>`_ + source code. + + .. versionadded:: 1.6 + """ + + name = 'Monkey' + aliases = ['monkey'] + filenames = ['*.monkey'] + mimetypes = ['text/x-monkey'] + + name_variable = r'[a-z_]\w*' + name_function = r'[A-Z]\w*' + name_constant = r'[A-Z_][A-Z0-9_]*' + name_class = r'[A-Z]\w*' + name_module = r'[a-z0-9_]*' + + keyword_type = r'(?:Int|Float|String|Bool|Object|Array|Void)' + # ? == Bool // % == Int // # == Float // $ == String + keyword_type_special = r'[?%#$]' + + flags = re.MULTILINE + + tokens = { + 'root': [ + # Text + (r'\s+', Text), + # Comments + (r"'.*", Comment), + (r'(?i)^#rem\b', Comment.Multiline, 'comment'), + # preprocessor directives + (r'(?i)^(?:#If|#ElseIf|#Else|#EndIf|#End|#Print|#Error)\b', Comment.Preproc), + # preprocessor variable (any line starting with '#' that is not a directive) + (r'^#', Comment.Preproc, 'variables'), + # String + ('"', String.Double, 'string'), + # Numbers + (r'[0-9]+\.[0-9]*(?!\.)', Number.Float), + (r'\.[0-9]+(?!\.)', Number.Float), + (r'[0-9]+', Number.Integer), + (r'\$[0-9a-fA-Z]+', Number.Hex), + (r'\%[10]+', Number.Bin), + # Native data types + (r'\b%s\b' % keyword_type, Keyword.Type), + # Exception handling + (r'(?i)\b(?:Try|Catch|Throw)\b', Keyword.Reserved), + (r'Throwable', Name.Exception), + # Builtins + (r'(?i)\b(?:Null|True|False)\b', Name.Builtin), + (r'(?i)\b(?:Self|Super)\b', Name.Builtin.Pseudo), + (r'\b(?:HOST|LANG|TARGET|CONFIG)\b', Name.Constant), + # Keywords + (r'(?i)^(Import)(\s+)(.*)(\n)', + bygroups(Keyword.Namespace, Text, Name.Namespace, Text)), + (r'(?i)^Strict\b.*\n', Keyword.Reserved), + (r'(?i)(Const|Local|Global|Field)(\s+)', + bygroups(Keyword.Declaration, Text), 'variables'), + (r'(?i)(New|Class|Interface|Extends|Implements)(\s+)', + bygroups(Keyword.Reserved, Text), 'classname'), + (r'(?i)(Function|Method)(\s+)', + bygroups(Keyword.Reserved, Text), 'funcname'), + (r'(?i)(?:End|Return|Public|Private|Extern|Property|' + r'Final|Abstract)\b', Keyword.Reserved), + # Flow Control stuff + (r'(?i)(?:If|Then|Else|ElseIf|EndIf|' + r'Select|Case|Default|' + r'While|Wend|' + r'Repeat|Until|Forever|' + r'For|To|Until|Step|EachIn|Next|' + r'Exit|Continue)\s+', Keyword.Reserved), + # not used yet + (r'(?i)\b(?:Module|Inline)\b', Keyword.Reserved), + # Array + (r'[\[\]]', Punctuation), + # Other + (r'<=|>=|<>|\*=|/=|\+=|-=|&=|~=|\|=|[-&*/^+=<>|~]', Operator), + (r'(?i)(?:Not|Mod|Shl|Shr|And|Or)', Operator.Word), + (r'[\(\){}!#,.:]', Punctuation), + # catch the rest + (r'%s\b' % name_constant, Name.Constant), + (r'%s\b' % name_function, Name.Function), + (r'%s\b' % name_variable, Name.Variable), + ], + 'funcname': [ + (r'(?i)%s\b' % name_function, Name.Function), + (r':', Punctuation, 'classname'), + (r'\s+', Text), + (r'\(', Punctuation, 'variables'), + (r'\)', Punctuation, '#pop') + ], + 'classname': [ + (r'%s\.' % name_module, Name.Namespace), + (r'%s\b' % keyword_type, Keyword.Type), + (r'%s\b' % name_class, Name.Class), + # array (of given size) + (r'(\[)(\s*)(\d*)(\s*)(\])', + bygroups(Punctuation, Text, Number.Integer, Text, Punctuation)), + # generics + (r'\s+(?!<)', Text, '#pop'), + (r'<', Punctuation, '#push'), + (r'>', Punctuation, '#pop'), + (r'\n', Text, '#pop'), + default('#pop') + ], + 'variables': [ + (r'%s\b' % name_constant, Name.Constant), + (r'%s\b' % name_variable, Name.Variable), + (r'%s' % keyword_type_special, Keyword.Type), + (r'\s+', Text), + (r':', Punctuation, 'classname'), + (r',', Punctuation, '#push'), + default('#pop') + ], + 'string': [ + (r'[^"~]+', String.Double), + (r'~q|~n|~r|~t|~z|~~', String.Escape), + (r'"', String.Double, '#pop'), + ], + 'comment': [ + (r'(?i)^#rem.*?', Comment.Multiline, "#push"), + (r'(?i)^#end.*?', Comment.Multiline, "#pop"), + (r'\n', Comment.Multiline), + (r'.+', Comment.Multiline), + ], + } + + +class CbmBasicV2Lexer(RegexLexer): + """ + For CBM BASIC V2 sources. + + .. versionadded:: 1.6 + """ + name = 'CBM BASIC V2' + aliases = ['cbmbas'] + filenames = ['*.bas'] + + flags = re.IGNORECASE + + tokens = { + 'root': [ + (r'rem.*\n', Comment.Single), + (r'\s+', Text), + (r'new|run|end|for|to|next|step|go(to|sub)?|on|return|stop|cont' + r'|if|then|input#?|read|wait|load|save|verify|poke|sys|print#?' + r'|list|clr|cmd|open|close|get#?', Keyword.Reserved), + (r'data|restore|dim|let|def|fn', Keyword.Declaration), + (r'tab|spc|sgn|int|abs|usr|fre|pos|sqr|rnd|log|exp|cos|sin|tan|atn' + r'|peek|len|val|asc|(str|chr|left|right|mid)\$', Name.Builtin), + (r'[-+*/^<>=]', Operator), + (r'not|and|or', Operator.Word), + (r'"[^"\n]*.', String), + (r'\d+|[-+]?\d*\.\d*(e[-+]?\d+)?', Number.Float), + (r'[\(\),:;]', Punctuation), + (r'\w+[$%]?', Name), + ] + } + + def analyse_text(self, text): + # if it starts with a line number, it shouldn't be a "modern" Basic + # like VB.net + if re.match(r'\d+', text): + return True + + +class QBasicLexer(RegexLexer): + """ + For + `QBasic <http://en.wikipedia.org/wiki/QBasic>`_ + source code. + """ + + name = 'QBasic' + aliases = ['qbasic', 'basic'] + filenames = ['*.BAS', '*.bas'] + mimetypes = ['text/basic'] + + declarations = ('DATA', 'LET') + + functions = ( + 'ABS', 'ASC', 'ATN', 'CDBL', 'CHR$', 'CINT', 'CLNG', + 'COMMAND$', 'COS', 'CSNG', 'CSRLIN', 'CVD', 'CVDMBF', 'CVI', + 'CVL', 'CVS', 'CVSMBF', 'DATE$', 'ENVIRON$', 'EOF', 'ERDEV', + 'ERDEV$', 'ERL', 'ERR', 'EXP', 'FILEATTR', 'FIX', 'FRE', + 'FREEFILE', 'HEX$', 'INKEY$', 'INP', 'INPUT$', 'INSTR', 'INT', + 'IOCTL$', 'LBOUND', 'LCASE$', 'LEFT$', 'LEN', 'LOC', 'LOF', + 'LOG', 'LPOS', 'LTRIM$', 'MID$', 'MKD$', 'MKDMBF$', 'MKI$', + 'MKL$', 'MKS$', 'MKSMBF$', 'OCT$', 'PEEK', 'PEN', 'PLAY', + 'PMAP', 'POINT', 'POS', 'RIGHT$', 'RND', 'RTRIM$', 'SADD', + 'SCREEN', 'SEEK', 'SETMEM', 'SGN', 'SIN', 'SPACE$', 'SPC', + 'SQR', 'STICK', 'STR$', 'STRIG', 'STRING$', 'TAB', 'TAN', + 'TIME$', 'TIMER', 'UBOUND', 'UCASE$', 'VAL', 'VARPTR', + 'VARPTR$', 'VARSEG' + ) + + metacommands = ('$DYNAMIC', '$INCLUDE', '$STATIC') + + operators = ('AND', 'EQV', 'IMP', 'NOT', 'OR', 'XOR') + + statements = ( + 'BEEP', 'BLOAD', 'BSAVE', 'CALL', 'CALL ABSOLUTE', + 'CALL INTERRUPT', 'CALLS', 'CHAIN', 'CHDIR', 'CIRCLE', 'CLEAR', + 'CLOSE', 'CLS', 'COLOR', 'COM', 'COMMON', 'CONST', 'DATA', + 'DATE$', 'DECLARE', 'DEF FN', 'DEF SEG', 'DEFDBL', 'DEFINT', + 'DEFLNG', 'DEFSNG', 'DEFSTR', 'DEF', 'DIM', 'DO', 'LOOP', + 'DRAW', 'END', 'ENVIRON', 'ERASE', 'ERROR', 'EXIT', 'FIELD', + 'FILES', 'FOR', 'NEXT', 'FUNCTION', 'GET', 'GOSUB', 'GOTO', + 'IF', 'THEN', 'INPUT', 'INPUT #', 'IOCTL', 'KEY', 'KEY', + 'KILL', 'LET', 'LINE', 'LINE INPUT', 'LINE INPUT #', 'LOCATE', + 'LOCK', 'UNLOCK', 'LPRINT', 'LSET', 'MID$', 'MKDIR', 'NAME', + 'ON COM', 'ON ERROR', 'ON KEY', 'ON PEN', 'ON PLAY', + 'ON STRIG', 'ON TIMER', 'ON UEVENT', 'ON', 'OPEN', 'OPEN COM', + 'OPTION BASE', 'OUT', 'PAINT', 'PALETTE', 'PCOPY', 'PEN', + 'PLAY', 'POKE', 'PRESET', 'PRINT', 'PRINT #', 'PRINT USING', + 'PSET', 'PUT', 'PUT', 'RANDOMIZE', 'READ', 'REDIM', 'REM', + 'RESET', 'RESTORE', 'RESUME', 'RETURN', 'RMDIR', 'RSET', 'RUN', + 'SCREEN', 'SEEK', 'SELECT CASE', 'SHARED', 'SHELL', 'SLEEP', + 'SOUND', 'STATIC', 'STOP', 'STRIG', 'SUB', 'SWAP', 'SYSTEM', + 'TIME$', 'TIMER', 'TROFF', 'TRON', 'TYPE', 'UEVENT', 'UNLOCK', + 'VIEW', 'WAIT', 'WHILE', 'WEND', 'WIDTH', 'WINDOW', 'WRITE' + ) + + keywords = ( + 'ACCESS', 'ALIAS', 'ANY', 'APPEND', 'AS', 'BASE', 'BINARY', + 'BYVAL', 'CASE', 'CDECL', 'DOUBLE', 'ELSE', 'ELSEIF', 'ENDIF', + 'INTEGER', 'IS', 'LIST', 'LOCAL', 'LONG', 'LOOP', 'MOD', + 'NEXT', 'OFF', 'ON', 'OUTPUT', 'RANDOM', 'SIGNAL', 'SINGLE', + 'STEP', 'STRING', 'THEN', 'TO', 'UNTIL', 'USING', 'WEND' + ) + + tokens = { + 'root': [ + (r'\n+', Text), + (r'\s+', Text.Whitespace), + (r'^(\s*)(\d*)(\s*)(REM .*)$', + bygroups(Text.Whitespace, Name.Label, Text.Whitespace, + Comment.Single)), + (r'^(\s*)(\d+)(\s*)', + bygroups(Text.Whitespace, Name.Label, Text.Whitespace)), + (r'(?=[\s]*)(\w+)(?=[\s]*=)', Name.Variable.Global), + (r'(?=[^"]*)\'.*$', Comment.Single), + (r'"[^\n\"]*"', String.Double), + (r'(END)(\s+)(FUNCTION|IF|SELECT|SUB)', + bygroups(Keyword.Reserved, Text.Whitespace, Keyword.Reserved)), + (r'(DECLARE)(\s+)([A-Z]+)(\s+)(\S+)', + bygroups(Keyword.Declaration, Text.Whitespace, Name.Variable, + Text.Whitespace, Name)), + (r'(DIM)(\s+)(SHARED)(\s+)([^\s\(]+)', + bygroups(Keyword.Declaration, Text.Whitespace, Name.Variable, + Text.Whitespace, Name.Variable.Global)), + (r'(DIM)(\s+)([^\s\(]+)', + bygroups(Keyword.Declaration, Text.Whitespace, Name.Variable.Global)), + (r'^(\s*)([a-zA-Z_]+)(\s*)(\=)', + bygroups(Text.Whitespace, Name.Variable.Global, Text.Whitespace, + Operator)), + (r'(GOTO|GOSUB)(\s+)(\w+\:?)', + bygroups(Keyword.Reserved, Text.Whitespace, Name.Label)), + (r'(SUB)(\s+)(\w+\:?)', + bygroups(Keyword.Reserved, Text.Whitespace, Name.Label)), + include('declarations'), + include('functions'), + include('metacommands'), + include('operators'), + include('statements'), + include('keywords'), + (r'[a-zA-Z_]\w*[\$@#&!]', Name.Variable.Global), + (r'[a-zA-Z_]\w*\:', Name.Label), + (r'\-?\d*\.\d+[@|#]?', Number.Float), + (r'\-?\d+[@|#]', Number.Float), + (r'\-?\d+#?', Number.Integer.Long), + (r'\-?\d+#?', Number.Integer), + (r'!=|==|:=|\.=|<<|>>|[-~+/\\*%=<>&^|?:!.]', Operator), + (r'[\[\]{}(),;]', Punctuation), + (r'[\w]+', Name.Variable.Global), + ], + # can't use regular \b because of X$() + # XXX: use words() here + 'declarations': [ + (r'\b(%s)(?=\(|\b)' % '|'.join(map(re.escape, declarations)), + Keyword.Declaration), + ], + 'functions': [ + (r'\b(%s)(?=\(|\b)' % '|'.join(map(re.escape, functions)), + Keyword.Reserved), + ], + 'metacommands': [ + (r'\b(%s)(?=\(|\b)' % '|'.join(map(re.escape, metacommands)), + Keyword.Constant), + ], + 'operators': [ + (r'\b(%s)(?=\(|\b)' % '|'.join(map(re.escape, operators)), Operator.Word), + ], + 'statements': [ + (r'\b(%s)\b' % '|'.join(map(re.escape, statements)), + Keyword.Reserved), + ], + 'keywords': [ + (r'\b(%s)\b' % '|'.join(keywords), Keyword), + ], + } + + def analyse_text(text): + return 0.2 diff --git a/pygments/lexers/misc/chapel.py b/pygments/lexers/misc/chapel.py new file mode 100644 index 00000000..c2fe6936 --- /dev/null +++ b/pygments/lexers/misc/chapel.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.chapel + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for the Chapel language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, bygroups, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +__all__ = ['ChapelLexer'] + + +class ChapelLexer(RegexLexer): + """ + For `Chapel <http://chapel.cray.com/>`_ source. + + .. versionadded:: 2.0 + """ + name = 'Chapel' + filenames = ['*.chpl'] + aliases = ['chapel', 'chpl'] + # mimetypes = ['text/x-chapel'] + + tokens = { + 'root': [ + (r'\n', Text), + (r'\s+', Text), + (r'\\\n', Text), + + (r'//(.*?)\n', Comment.Single), + (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), + + (r'(config|const|in|inout|out|param|ref|type|var)\b', + Keyword.Declaration), + (r'(false|nil|true)\b', Keyword.Constant), + (r'(bool|complex|imag|int|opaque|range|real|string|uint)\b', + Keyword.Type), + (words(( + 'align', 'atomic', 'begin', 'break', 'by', 'cobegin', 'coforall', + 'continue', 'delete', 'dmapped', 'do', 'domain', 'else', 'enum', + 'export', 'extern', 'for', 'forall', 'if', 'index', 'inline', + 'iter', 'label', 'lambda', 'let', 'local', 'new', 'noinit', 'on', + 'otherwise', 'pragma', 'reduce', 'return', 'scan', 'select', + 'serial', 'single', 'sparse', 'subdomain', 'sync', 'then', 'use', + 'when', 'where', 'while', 'with', 'yield', 'zip'), suffix=r'\b'), + Keyword), + (r'(proc)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'procname'), + (r'(class|module|record|union)(\s+)', bygroups(Keyword, Text), + 'classname'), + + # imaginary integers + (r'\d+i', Number), + (r'\d+\.\d*([Ee][-+]\d+)?i', Number), + (r'\.\d+([Ee][-+]\d+)?i', Number), + (r'\d+[Ee][-+]\d+i', Number), + + # reals cannot end with a period due to lexical ambiguity with + # .. operator. See reference for rationale. + (r'(\d*\.\d+)([eE][+-]?[0-9]+)?i?', Number.Float), + (r'\d+[eE][+-]?[0-9]+i?', Number.Float), + + # integer literals + # -- binary + (r'0[bB][0-1]+', Number.Bin), + # -- hex + (r'0[xX][0-9a-fA-F]+', Number.Hex), + # -- octal + (r'0[oO][0-7]+', Number.Oct), + # -- decimal + (r'[0-9]+', Number.Integer), + + # strings + (r'["\'](\\\\|\\"|[^"\'])*["\']', String), + + # tokens + (r'(=|\+=|-=|\*=|/=|\*\*=|%=|&=|\|=|\^=|&&=|\|\|=|<<=|>>=|' + r'<=>|<~>|\.\.|by|#|\.\.\.|' + r'&&|\|\||!|&|\||\^|~|<<|>>|' + r'==|!=|<=|>=|<|>|' + r'[+\-*/%]|\*\*)', Operator), + (r'[:;,.?()\[\]{}]', Punctuation), + + # identifiers + (r'[a-zA-Z_][\w$]*', Name.Other), + ], + 'classname': [ + (r'[a-zA-Z_][\w$]*', Name.Class, '#pop'), + ], + 'procname': [ + (r'[a-zA-Z_][\w$]*', Name.Function, '#pop'), + ], + } diff --git a/pygments/lexers/misc/dylan.py b/pygments/lexers/misc/dylan.py new file mode 100644 index 00000000..80484cb4 --- /dev/null +++ b/pygments/lexers/misc/dylan.py @@ -0,0 +1,289 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.dylan + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for the Dylan language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Generic, Literal + +__all__ = ['DylanLexer', 'DylanConsoleLexer', 'DylanLidLexer'] + + +class DylanLexer(RegexLexer): + """ + For the `Dylan <http://www.opendylan.org/>`_ language. + + .. versionadded:: 0.7 + """ + + name = 'Dylan' + aliases = ['dylan'] + filenames = ['*.dylan', '*.dyl', '*.intr'] + mimetypes = ['text/x-dylan'] + + flags = re.IGNORECASE + + builtins = set(( + 'subclass', 'abstract', 'block', 'concrete', 'constant', 'class', + 'compiler-open', 'compiler-sideways', 'domain', 'dynamic', + 'each-subclass', 'exception', 'exclude', 'function', 'generic', + 'handler', 'inherited', 'inline', 'inline-only', 'instance', + 'interface', 'import', 'keyword', 'library', 'macro', 'method', + 'module', 'open', 'primary', 'required', 'sealed', 'sideways', + 'singleton', 'slot', 'thread', 'variable', 'virtual')) + + keywords = set(( + 'above', 'afterwards', 'begin', 'below', 'by', 'case', 'cleanup', + 'create', 'define', 'else', 'elseif', 'end', 'export', 'finally', + 'for', 'from', 'if', 'in', 'let', 'local', 'otherwise', 'rename', + 'select', 'signal', 'then', 'to', 'unless', 'until', 'use', 'when', + 'while')) + + operators = set(( + '~', '+', '-', '*', '|', '^', '=', '==', '~=', '~==', '<', '<=', + '>', '>=', '&', '|')) + + functions = set(( + 'abort', 'abs', 'add', 'add!', 'add-method', 'add-new', 'add-new!', + 'all-superclasses', 'always', 'any?', 'applicable-method?', 'apply', + 'aref', 'aref-setter', 'as', 'as-lowercase', 'as-lowercase!', + 'as-uppercase', 'as-uppercase!', 'ash', 'backward-iteration-protocol', + 'break', 'ceiling', 'ceiling/', 'cerror', 'check-type', 'choose', + 'choose-by', 'complement', 'compose', 'concatenate', 'concatenate-as', + 'condition-format-arguments', 'condition-format-string', 'conjoin', + 'copy-sequence', 'curry', 'default-handler', 'dimension', 'dimensions', + 'direct-subclasses', 'direct-superclasses', 'disjoin', 'do', + 'do-handlers', 'element', 'element-setter', 'empty?', 'error', 'even?', + 'every?', 'false-or', 'fill!', 'find-key', 'find-method', 'first', + 'first-setter', 'floor', 'floor/', 'forward-iteration-protocol', + 'function-arguments', 'function-return-values', + 'function-specializers', 'gcd', 'generic-function-mandatory-keywords', + 'generic-function-methods', 'head', 'head-setter', 'identity', + 'initialize', 'instance?', 'integral?', 'intersection', + 'key-sequence', 'key-test', 'last', 'last-setter', 'lcm', 'limited', + 'list', 'logand', 'logbit?', 'logior', 'lognot', 'logxor', 'make', + 'map', 'map-as', 'map-into', 'max', 'member?', 'merge-hash-codes', + 'min', 'modulo', 'negative', 'negative?', 'next-method', + 'object-class', 'object-hash', 'odd?', 'one-of', 'pair', 'pop', + 'pop-last', 'positive?', 'push', 'push-last', 'range', 'rank', + 'rcurry', 'reduce', 'reduce1', 'remainder', 'remove', 'remove!', + 'remove-duplicates', 'remove-duplicates!', 'remove-key!', + 'remove-method', 'replace-elements!', 'replace-subsequence!', + 'restart-query', 'return-allowed?', 'return-description', + 'return-query', 'reverse', 'reverse!', 'round', 'round/', + 'row-major-index', 'second', 'second-setter', 'shallow-copy', + 'signal', 'singleton', 'size', 'size-setter', 'slot-initialized?', + 'sort', 'sort!', 'sorted-applicable-methods', 'subsequence-position', + 'subtype?', 'table-protocol', 'tail', 'tail-setter', 'third', + 'third-setter', 'truncate', 'truncate/', 'type-error-expected-type', + 'type-error-value', 'type-for-copy', 'type-union', 'union', 'values', + 'vector', 'zero?')) + + valid_name = '\\\\?[a-z0-9' + re.escape('!&*<>|^$%@_-+~?/=') + ']+' + + def get_tokens_unprocessed(self, text): + for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): + if token is Name: + lowercase_value = value.lower() + if lowercase_value in self.builtins: + yield index, Name.Builtin, value + continue + if lowercase_value in self.keywords: + yield index, Keyword, value + continue + if lowercase_value in self.functions: + yield index, Name.Builtin, value + continue + if lowercase_value in self.operators: + yield index, Operator, value + continue + yield index, token, value + + tokens = { + 'root': [ + # Whitespace + (r'\s+', Text), + + # single line comment + (r'//.*?\n', Comment.Single), + + # lid header + (r'([a-z0-9-]+)(:)([ \t]*)(.*(?:\n[ \t].+)*)', + bygroups(Name.Attribute, Operator, Text, String)), + + ('', Text, 'code') # no header match, switch to code + ], + 'code': [ + # Whitespace + (r'\s+', Text), + + # single line comment + (r'//.*?\n', Comment.Single), + + # multi-line comment + (r'/\*', Comment.Multiline, 'comment'), + + # strings and characters + (r'"', String, 'string'), + (r"'(\\.|\\[0-7]{1,3}|\\x[a-f0-9]{1,2}|[^\\\'\n])'", String.Char), + + # binary integer + (r'#[bB][01]+', Number.Bin), + + # octal integer + (r'#[oO][0-7]+', Number.Oct), + + # floating point + (r'[-+]?(\d*\.\d+(e[-+]?\d+)?|\d+(\.\d*)?e[-+]?\d+)', Number.Float), + + # decimal integer + (r'[-+]?\d+', Number.Integer), + + # hex integer + (r'#[xX][0-9a-f]+', Number.Hex), + + # Macro parameters + (r'(\?' + valid_name + ')(:)' + r'(token|name|variable|expression|body|case-body|\*)', + bygroups(Name.Tag, Operator, Name.Builtin)), + (r'(\?)(:)(token|name|variable|expression|body|case-body|\*)', + bygroups(Name.Tag, Operator, Name.Builtin)), + (r'\?' + valid_name, Name.Tag), + + # Punctuation + (r'(=>|::|#\(|#\[|##|\?|\?\?|\?=|[(){}\[\],\.;])', Punctuation), + + # Most operators are picked up as names and then re-flagged. + # This one isn't valid in a name though, so we pick it up now. + (r':=', Operator), + + # Pick up #t / #f before we match other stuff with #. + (r'#[tf]', Literal), + + # #"foo" style keywords + (r'#"', String.Symbol, 'keyword'), + + # #rest, #key, #all-keys, etc. + (r'#[a-z0-9-]+', Keyword), + + # required-init-keyword: style keywords. + (valid_name + ':', Keyword), + + # class names + (r'<' + valid_name + '>', Name.Class), + + # define variable forms. + (r'\*' + valid_name + '\*', Name.Variable.Global), + + # define constant forms. + (r'\$' + valid_name, Name.Constant), + + # everything else. We re-flag some of these in the method above. + (valid_name, Name), + ], + 'comment': [ + (r'[^*/]', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline) + ], + 'keyword': [ + (r'"', String.Symbol, '#pop'), + (r'[^\\"]+', String.Symbol), # all other characters + ], + 'string': [ + (r'"', String, '#pop'), + (r'\\([\\abfnrtv"\']|x[a-f0-9]{2,4}|[0-7]{1,3})', String.Escape), + (r'[^\\"\n]+', String), # all other characters + (r'\\\n', String), # line continuation + (r'\\', String), # stray backslash + ] + } + + +class DylanLidLexer(RegexLexer): + """ + For Dylan LID (Library Interchange Definition) files. + + .. versionadded:: 1.6 + """ + + name = 'DylanLID' + aliases = ['dylan-lid', 'lid'] + filenames = ['*.lid', '*.hdp'] + mimetypes = ['text/x-dylan-lid'] + + flags = re.IGNORECASE + + tokens = { + 'root': [ + # Whitespace + (r'\s+', Text), + + # single line comment + (r'//.*?\n', Comment.Single), + + # lid header + (r'(.*?)(:)([ \t]*)(.*(?:\n[ \t].+)*)', + bygroups(Name.Attribute, Operator, Text, String)), + ] + } + + +class DylanConsoleLexer(Lexer): + """ + For Dylan interactive console output like: + + .. sourcecode:: dylan-console + + ? let a = 1; + => 1 + ? a + => 1 + + This is based on a copy of the RubyConsoleLexer. + + .. versionadded:: 1.6 + """ + name = 'Dylan session' + aliases = ['dylan-console', 'dylan-repl'] + filenames = ['*.dylan-console'] + mimetypes = ['text/x-dylan-console'] + + _line_re = re.compile('.*?\n') + _prompt_re = re.compile('\?| ') + + def get_tokens_unprocessed(self, text): + dylexer = DylanLexer(**self.options) + + curcode = '' + insertions = [] + for match in self._line_re.finditer(text): + line = match.group() + m = self._prompt_re.match(line) + if m is not None: + end = m.end() + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:end])])) + curcode += line[end:] + else: + if curcode: + for item in do_insertions(insertions, + dylexer.get_tokens_unprocessed(curcode)): + yield item + curcode = '' + insertions = [] + yield match.start(), Generic.Output, line + if curcode: + for item in do_insertions(insertions, + dylexer.get_tokens_unprocessed(curcode)): + yield item diff --git a/pygments/lexers/misc/ecl.py b/pygments/lexers/misc/ecl.py new file mode 100644 index 00000000..1dd8999b --- /dev/null +++ b/pygments/lexers/misc/ecl.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.ecl + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for the ECL language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include, bygroups, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Error + +__all__ = ['ECLLexer'] + + +class ECLLexer(RegexLexer): + """ + Lexer for the declarative big-data `ECL + <http://hpccsystems.com/community/docs/ecl-language-reference/html>`_ + language. + + .. versionadded:: 1.5 + """ + + name = 'ECL' + aliases = ['ecl'] + filenames = ['*.ecl'] + mimetypes = ['application/x-ecl'] + + flags = re.IGNORECASE | re.MULTILINE + + tokens = { + 'root': [ + include('whitespace'), + include('statements'), + ], + 'whitespace': [ + (r'\s+', Text), + (r'\/\/.*', Comment.Single), + (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline), + ], + 'statements': [ + include('types'), + include('keywords'), + include('functions'), + include('hash'), + (r'"', String, 'string'), + (r'\'', String, 'string'), + (r'(\d+\.\d*|\.\d+|\d+)e[+-]?\d+[lu]*', Number.Float), + (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), + (r'0x[0-9a-f]+[lu]*', Number.Hex), + (r'0[0-7]+[lu]*', Number.Oct), + (r'\d+[LlUu]*', Number.Integer), + (r'\*/', Error), + (r'[~!%^&*+=|?:<>/-]+', Operator), + (r'[{}()\[\],.;]', Punctuation), + (r'[a-z_]\w*', Name), + ], + 'hash': [ + (r'^#.*$', Comment.Preproc), + ], + 'types': [ + (r'(RECORD|END)\D', Keyword.Declaration), + (r'((?:ASCII|BIG_ENDIAN|BOOLEAN|DATA|DECIMAL|EBCDIC|INTEGER|PATTERN|' + r'QSTRING|REAL|RECORD|RULE|SET OF|STRING|TOKEN|UDECIMAL|UNICODE|' + r'UNSIGNED|VARSTRING|VARUNICODE)\d*)(\s+)', + bygroups(Keyword.Type, Text)), + ], + 'keywords': [ + (words(( + 'APPLY', 'ASSERT', 'BUILD', 'BUILDINDEX', 'EVALUATE', 'FAIL', + 'KEYDIFF', 'KEYPATCH', 'LOADXML', 'NOTHOR', 'NOTIFY', 'OUTPUT', + 'PARALLEL', 'SEQUENTIAL', 'SOAPCALL', 'CHECKPOINT', 'DEPRECATED', + 'FAILCODE', 'FAILMESSAGE', 'FAILURE', 'GLOBAL', 'INDEPENDENT', + 'ONWARNING', 'PERSIST', 'PRIORITY', 'RECOVERY', 'STORED', 'SUCCESS', + 'WAIT', 'WHEN'), suffix=r'\b'), + Keyword.Reserved), + # These are classed differently, check later + (words(( + 'ALL', 'AND', 'ANY', 'AS', 'ATMOST', 'BEFORE', 'BEGINC++', 'BEST', 'BETWEEN', 'CASE', 'CONST', + 'COUNTER', 'CSV', 'DESCEND', 'ENCRYPT', 'ENDC++', 'ENDMACRO', 'EXCEPT', 'EXCLUSIVE', + 'EXPIRE', 'EXPORT', 'EXTEND', 'FALSE', 'FEW', 'FIRST', 'FLAT', 'FULL', 'FUNCTION', 'GROUP', + 'HEADER', 'HEADING', 'HOLE', 'IFBLOCK', 'IMPORT', 'IN', 'JOINED', 'KEEP', 'KEYED', 'LAST', + 'LEFT', 'LIMIT', 'LOAD', 'LOCAL', 'LOCALE', 'LOOKUP', 'MACRO', 'MANY', 'MAXCOUNT', + 'MAXLENGTH', 'MIN SKEW', 'MODULE', 'INTERFACE', 'NAMED', 'NOCASE', 'NOROOT', 'NOSCAN', + 'NOSORT', 'NOT', 'OF', 'ONLY', 'OPT', 'OR', 'OUTER', 'OVERWRITE', 'PACKED', 'PARTITION', + 'PENALTY', 'PHYSICALLENGTH', 'PIPE', 'QUOTE', 'RELATIONSHIP', 'REPEAT', 'RETURN', + 'RIGHT', 'SCAN', 'SELF', 'SEPARATOR', 'SERVICE', 'SHARED', 'SKEW', 'SKIP', 'SQL', 'STORE', + 'TERMINATOR', 'THOR', 'THRESHOLD', 'TOKEN', 'TRANSFORM', 'TRIM', 'TRUE', 'TYPE', + 'UNICODEORDER', 'UNSORTED', 'VALIDATE', 'VIRTUAL', 'WHOLE', 'WILD', 'WITHIN', 'XML', + 'XPATH', '__COMPRESSED__'), suffix=r'\b'), + Keyword.Reserved), + ], + 'functions': [ + (words(( + 'ABS', 'ACOS', 'ALLNODES', 'ASCII', 'ASIN', 'ASSTRING', 'ATAN', 'ATAN2', 'AVE', 'CASE', + 'CHOOSE', 'CHOOSEN', 'CHOOSESETS', 'CLUSTERSIZE', 'COMBINE', 'CORRELATION', 'COS', + 'COSH', 'COUNT', 'COVARIANCE', 'CRON', 'DATASET', 'DEDUP', 'DEFINE', 'DENORMALIZE', + 'DISTRIBUTE', 'DISTRIBUTED', 'DISTRIBUTION', 'EBCDIC', 'ENTH', 'ERROR', 'EVALUATE', + 'EVENT', 'EVENTEXTRA', 'EVENTNAME', 'EXISTS', 'EXP', 'FAILCODE', 'FAILMESSAGE', + 'FETCH', 'FROMUNICODE', 'GETISVALID', 'GLOBAL', 'GRAPH', 'GROUP', 'HASH', 'HASH32', + 'HASH64', 'HASHCRC', 'HASHMD5', 'HAVING', 'IF', 'INDEX', 'INTFORMAT', 'ISVALID', + 'ITERATE', 'JOIN', 'KEYUNICODE', 'LENGTH', 'LIBRARY', 'LIMIT', 'LN', 'LOCAL', 'LOG', 'LOOP', + 'MAP', 'MATCHED', 'MATCHLENGTH', 'MATCHPOSITION', 'MATCHTEXT', 'MATCHUNICODE', + 'MAX', 'MERGE', 'MERGEJOIN', 'MIN', 'NOLOCAL', 'NONEMPTY', 'NORMALIZE', 'PARSE', 'PIPE', + 'POWER', 'PRELOAD', 'PROCESS', 'PROJECT', 'PULL', 'RANDOM', 'RANGE', 'RANK', 'RANKED', + 'REALFORMAT', 'RECORDOF', 'REGEXFIND', 'REGEXREPLACE', 'REGROUP', 'REJECTED', + 'ROLLUP', 'ROUND', 'ROUNDUP', 'ROW', 'ROWDIFF', 'SAMPLE', 'SET', 'SIN', 'SINH', 'SIZEOF', + 'SOAPCALL', 'SORT', 'SORTED', 'SQRT', 'STEPPED', 'STORED', 'SUM', 'TABLE', 'TAN', 'TANH', + 'THISNODE', 'TOPN', 'TOUNICODE', 'TRANSFER', 'TRIM', 'TRUNCATE', 'TYPEOF', 'UNGROUP', + 'UNICODEORDER', 'VARIANCE', 'WHICH', 'WORKUNIT', 'XMLDECODE', 'XMLENCODE', + 'XMLTEXT', 'XMLUNICODE'), suffix=r'\b'), + Name.Function), + ], + 'string': [ + (r'"', String, '#pop'), + (r'\'', String, '#pop'), + (r'[^"\']+', String), + ], + } diff --git a/pygments/lexers/misc/erlang.py b/pygments/lexers/misc/erlang.py new file mode 100644 index 00000000..824cc055 --- /dev/null +++ b/pygments/lexers/misc/erlang.py @@ -0,0 +1,508 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.erlang + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for Erlang. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \ + include +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Generic + +__all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer', + 'ElixirLexer'] + + +line_re = re.compile('.*?\n') + + +class ErlangLexer(RegexLexer): + """ + For the Erlang functional programming language. + + Blame Jeremy Thurgood (http://jerith.za.net/). + + .. versionadded:: 0.9 + """ + + name = 'Erlang' + aliases = ['erlang'] + filenames = ['*.erl', '*.hrl', '*.es', '*.escript'] + mimetypes = ['text/x-erlang'] + + keywords = ( + 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', + 'let', 'of', 'query', 'receive', 'try', 'when', + ) + + builtins = ( # See erlang(3) man page + 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', + 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', + 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', + 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', + 'float', 'float_to_list', 'fun_info', 'fun_to_list', + 'function_exported', 'garbage_collect', 'get', 'get_keys', + 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', + 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', + 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', + 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', + 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', + 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', + 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', + 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', + 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', + 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', + 'pid_to_list', 'port_close', 'port_command', 'port_connect', + 'port_control', 'port_call', 'port_info', 'port_to_list', + 'process_display', 'process_flag', 'process_info', 'purge_module', + 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', + 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', + 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', + 'spawn_opt', 'split_binary', 'start_timer', 'statistics', + 'suspend_process', 'system_flag', 'system_info', 'system_monitor', + 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', + 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', + 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' + ) + + operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)' + word_operators = ( + 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', + 'div', 'not', 'or', 'orelse', 'rem', 'xor' + ) + + atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')" + + variable_re = r'(?:[A-Z_]\w*)' + + escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))' + + macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' + + base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' + + tokens = { + 'root': [ + (r'\s+', Text), + (r'%.*\n', Comment), + (words(keywords, suffix=r'\b'), Keyword), + (words(builtins, suffix=r'\b'), Name.Builtin), + (words(word_operators, suffix='\b'), Operator.Word), + (r'^-', Punctuation, 'directive'), + (operators, Operator), + (r'"', String, 'string'), + (r'<<', Name.Label), + (r'>>', Name.Label), + ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)), + ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()', + bygroups(Name.Function, Text, Punctuation)), + (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer), + (r'[+-]?\d+', Number.Integer), + (r'[+-]?\d+.\d+', Number.Float), + (r'[]\[:_@\".{}()|;,]', Punctuation), + (variable_re, Name.Variable), + (atom_re, Name), + (r'\?'+macro_re, Name.Constant), + (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char), + (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label), + ], + 'string': [ + (escape_re, String.Escape), + (r'"', String, '#pop'), + (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), + (r'[^"\\~]+', String), + (r'~', String), + ], + 'directive': [ + (r'(define)(\s*)(\()('+macro_re+r')', + bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'), + (r'(record)(\s*)(\()('+macro_re+r')', + bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'), + (atom_re, Name.Entity, '#pop'), + ], + } + + +class ErlangShellLexer(Lexer): + """ + Shell sessions in erl (for Erlang code). + + .. versionadded:: 1.1 + """ + name = 'Erlang erl session' + aliases = ['erl'] + filenames = ['*.erl-sh'] + mimetypes = ['text/x-erl-shellsession'] + + _prompt_re = re.compile(r'\d+>(?=\s|\Z)') + + def get_tokens_unprocessed(self, text): + erlexer = ErlangLexer(**self.options) + + curcode = '' + insertions = [] + for match in line_re.finditer(text): + line = match.group() + m = self._prompt_re.match(line) + if m is not None: + end = m.end() + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:end])])) + curcode += line[end:] + else: + if curcode: + for item in do_insertions(insertions, + erlexer.get_tokens_unprocessed(curcode)): + yield item + curcode = '' + insertions = [] + if line.startswith('*'): + yield match.start(), Generic.Traceback, line + else: + yield match.start(), Generic.Output, line + if curcode: + for item in do_insertions(insertions, + erlexer.get_tokens_unprocessed(curcode)): + yield item + + +def gen_elixir_string_rules(name, symbol, token): + states = {} + states['string_' + name] = [ + (r'[^#%s\\]+' % (symbol,), token), + include('escapes'), + (r'\\.', token), + (r'(%s)' % (symbol,), bygroups(token), "#pop"), + include('interpol') + ] + return states + + +def gen_elixir_sigstr_rules(term, token, interpol=True): + if interpol: + return [ + (r'[^#%s\\]+' % (term,), token), + include('escapes'), + (r'\\.', token), + (r'%s[a-zA-Z]*' % (term,), token, '#pop'), + include('interpol') + ] + else: + return [ + (r'[^%s\\]+' % (term,), token), + (r'\\.', token), + (r'%s[a-zA-Z]*' % (term,), token, '#pop'), + ] + + +class ElixirLexer(RegexLexer): + """ + For the `Elixir language <http://elixir-lang.org>`_. + + .. versionadded:: 1.5 + """ + + name = 'Elixir' + aliases = ['elixir', 'ex', 'exs'] + filenames = ['*.ex', '*.exs'] + mimetypes = ['text/x-elixir'] + + KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch') + KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in') + BUILTIN = ( + 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise', + 'quote', 'unquote', 'unquote_splicing', 'throw', 'super' + ) + BUILTIN_DECLARATION = ( + 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop', + 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback' + ) + + BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias') + CONSTANT = ('nil', 'true', 'false') + + PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__') + + OPERATORS3 = ( + '<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==', + '~>>', '<~>', '|~>', '<|>', + ) + OPERATORS2 = ( + '==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~', + '->', '<-', '|', '.', '=', '~>', '<~', + ) + OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&') + + PUNCTUATION = ( + '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']' + ) + + def get_tokens_unprocessed(self, text): + for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): + if token is Name: + if value in self.KEYWORD: + yield index, Keyword, value + elif value in self.KEYWORD_OPERATOR: + yield index, Operator.Word, value + elif value in self.BUILTIN: + yield index, Keyword, value + elif value in self.BUILTIN_DECLARATION: + yield index, Keyword.Declaration, value + elif value in self.BUILTIN_NAMESPACE: + yield index, Keyword.Namespace, value + elif value in self.CONSTANT: + yield index, Name.Constant, value + elif value in self.PSEUDO_VAR: + yield index, Name.Builtin.Pseudo, value + else: + yield index, token, value + else: + yield index, token, value + + def gen_elixir_sigil_rules(): + # all valid sigil terminators (excluding heredocs) + terminators = [ + (r'\{', r'\}', 'cb'), + (r'\[', r'\]', 'sb'), + (r'\(', r'\)', 'pa'), + (r'\<', r'\>', 'ab'), + (r'/', r'/', 'slas'), + (r'\|', r'\|', 'pipe'), + ('"', '"', 'quot'), + ("'", "'", 'apos'), + ] + + # heredocs have slightly different rules + triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] + + token = String.Other + states = {'sigils': []} + + for term, name in triquotes: + states['sigils'] += [ + (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc), + (name + '-end', name + '-intp')), + (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc), + (name + '-end', name + '-no-intp')), + ] + + states[name + '-end'] = [(r'[a-zA-Z]*', token, '#pop')] + states[name + '-intp'] = [ + (r'^\s*' + term, String.Heredoc, '#pop'), + include('heredoc_interpol'), + ] + states[name + '-no-intp'] = [ + (r'^\s*' + term, String.Heredoc, '#pop'), + include('heredoc_no_interpol'), + ] + + for lterm, rterm, name in terminators: + states['sigils'] += [ + (r'~[a-z]' + lterm, token, name + '-intp'), + (r'~[A-Z]' + lterm, token, name + '-no-intp'), + ] + states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token) + states[name + '-no-intp'] = \ + gen_elixir_sigstr_rules(rterm, token, interpol=False) + + return states + + op3_re = "|".join(re.escape(s) for s in OPERATORS3) + op2_re = "|".join(re.escape(s) for s in OPERATORS2) + op1_re = "|".join(re.escape(s) for s in OPERATORS1) + ops_re = r'(?:%s|%s|%s)' % (op3_re, op2_re, op1_re) + punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION) + alnum = '[A-Za-z_0-9]' + name_re = r'(?:\.\.\.|[a-z_]%s*[!\?]?)' % alnum + modname_re = r'[A-Z]%(alnum)s*(?:\.[A-Z]%(alnum)s*)*' % {'alnum': alnum} + complex_name_re = r'(?:%s|%s|%s)' % (name_re, modname_re, ops_re) + special_atom_re = r'(?:\.\.\.|<<>>|%{}|%|{})' + + long_hex_char_re = r'(\\x{)([\da-fA-F]+)(})' + hex_char_re = r'(\\x[\da-fA-F]{1,2})' + escape_char_re = r'(\\[abdefnrstv])' + + tokens = { + 'root': [ + (r'\s+', Text), + (r'#.*$', Comment.Single), + + # Various kinds of characters + (r'(\?)' + long_hex_char_re, + bygroups(String.Char, + String.Escape, Number.Hex, String.Escape)), + (r'(\?)' + hex_char_re, + bygroups(String.Char, String.Escape)), + (r'(\?)' + escape_char_re, + bygroups(String.Char, String.Escape)), + (r'\?\\?.', String.Char), + + # '::' has to go before atoms + (r':::', String.Symbol), + (r'::', Operator), + + # atoms + (r':' + special_atom_re, String.Symbol), + (r':' + complex_name_re, String.Symbol), + (r':"', String.Symbol, 'string_double_atom'), + (r":'", String.Symbol, 'string_single_atom'), + + # [keywords: ...] + (r'(%s|%s)(:)(?=\s|\n)' % (special_atom_re, complex_name_re), + bygroups(String.Symbol, Punctuation)), + + # @attributes + (r'@' + name_re, Name.Attribute), + + # identifiers + (name_re, Name), + (r'(%%?)(%s)' % (modname_re,), bygroups(Punctuation, Name.Class)), + + # operators and punctuation + (op3_re, Operator), + (op2_re, Operator), + (punctuation_re, Punctuation), + (r'&\d', Name.Entity), # anon func arguments + (op1_re, Operator), + + # numbers + (r'0b[01]+', Number.Bin), + (r'0o[0-7]+', Number.Oct), + (r'0x[\da-fA-F]+', Number.Hex), + (r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float), + (r'\d(_?\d)*', Number.Integer), + + # strings and heredocs + (r'"""\s*', String.Heredoc, 'heredoc_double'), + (r"'''\s*$", String.Heredoc, 'heredoc_single'), + (r'"', String.Double, 'string_double'), + (r"'", String.Single, 'string_single'), + + include('sigils'), + + (r'%{', Punctuation, 'map_key'), + (r'{', Punctuation, 'tuple'), + ], + 'heredoc_double': [ + (r'^\s*"""', String.Heredoc, '#pop'), + include('heredoc_interpol'), + ], + 'heredoc_single': [ + (r"^\s*'''", String.Heredoc, '#pop'), + include('heredoc_interpol'), + ], + 'heredoc_interpol': [ + (r'[^#\\\n]+', String.Heredoc), + include('escapes'), + (r'\\.', String.Heredoc), + (r'\n+', String.Heredoc), + include('interpol'), + ], + 'heredoc_no_interpol': [ + (r'[^\\\n]+', String.Heredoc), + (r'\\.', String.Heredoc), + (r'\n+', String.Heredoc), + ], + 'escapes': [ + (long_hex_char_re, + bygroups(String.Escape, Number.Hex, String.Escape)), + (hex_char_re, String.Escape), + (escape_char_re, String.Escape), + ], + 'interpol': [ + (r'#{', String.Interpol, 'interpol_string'), + ], + 'interpol_string': [ + (r'}', String.Interpol, "#pop"), + include('root') + ], + 'map_key': [ + include('root'), + (r':', Punctuation, 'map_val'), + (r'=>', Punctuation, 'map_val'), + (r'}', Punctuation, '#pop'), + ], + 'map_val': [ + include('root'), + (r',', Punctuation, '#pop'), + (r'(?=})', Punctuation, '#pop'), + ], + 'tuple': [ + include('root'), + (r'}', Punctuation, '#pop'), + ], + } + tokens.update(gen_elixir_string_rules('double', '"', String.Double)) + tokens.update(gen_elixir_string_rules('single', "'", String.Single)) + tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol)) + tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol)) + tokens.update(gen_elixir_sigil_rules()) + + +class ElixirConsoleLexer(Lexer): + """ + For Elixir interactive console (iex) output like: + + .. sourcecode:: iex + + iex> [head | tail] = [1,2,3] + [1,2,3] + iex> head + 1 + iex> tail + [2,3] + iex> [head | tail] + [1,2,3] + iex> length [head | tail] + 3 + + .. versionadded:: 1.5 + """ + + name = 'Elixir iex session' + aliases = ['iex'] + mimetypes = ['text/x-elixir-shellsession'] + + _prompt_re = re.compile('(iex|\.{3})(\(\d+\))?> ') + + def get_tokens_unprocessed(self, text): + exlexer = ElixirLexer(**self.options) + + curcode = '' + in_error = False + insertions = [] + for match in line_re.finditer(text): + line = match.group() + if line.startswith(u'** '): + in_error = True + insertions.append((len(curcode), + [(0, Generic.Error, line[:-1])])) + curcode += line[-1:] + else: + m = self._prompt_re.match(line) + if m is not None: + in_error = False + end = m.end() + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:end])])) + curcode += line[end:] + else: + if curcode: + for item in do_insertions( + insertions, exlexer.get_tokens_unprocessed(curcode)): + yield item + curcode = '' + insertions = [] + token = Generic.Error if in_error else Generic.Output + yield match.start(), token, line + if curcode: + for item in do_insertions( + insertions, exlexer.get_tokens_unprocessed(curcode)): + yield item diff --git a/pygments/lexers/misc/factor.py b/pygments/lexers/misc/factor.py new file mode 100644 index 00000000..04051976 --- /dev/null +++ b/pygments/lexers/misc/factor.py @@ -0,0 +1,344 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.factor + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for the Factor language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, bygroups, default, words +from pygments.token import Text, Comment, Keyword, Name, String, Number + +__all__ = ['FactorLexer'] + + +class FactorLexer(RegexLexer): + """ + Lexer for the `Factor <http://factorcode.org>`_ language. + + .. versionadded:: 1.4 + """ + name = 'Factor' + aliases = ['factor'] + filenames = ['*.factor'] + mimetypes = ['text/x-factor'] + + flags = re.MULTILINE | re.UNICODE + + builtin_kernel = words(( + '-rot', '2bi', '2bi@', '2bi*', '2curry', '2dip', '2drop', '2dup', '2keep', '2nip', + '2over', '2tri', '2tri@', '2tri*', '3bi', '3curry', '3dip', '3drop', '3dup', '3keep', + '3tri', '4dip', '4drop', '4dup', '4keep', '<wrapper>', '=', '>boolean', 'clone', + '?', '?execute', '?if', 'and', 'assert', 'assert=', 'assert?', 'bi', 'bi-curry', + 'bi-curry@', 'bi-curry*', 'bi@', 'bi*', 'boa', 'boolean', 'boolean?', 'both?', + 'build', 'call', 'callstack', 'callstack>array', 'callstack?', 'clear', '(clone)', + 'compose', 'compose?', 'curry', 'curry?', 'datastack', 'die', 'dip', 'do', 'drop', + 'dup', 'dupd', 'either?', 'eq?', 'equal?', 'execute', 'hashcode', 'hashcode*', + 'identity-hashcode', 'identity-tuple', 'identity-tuple?', 'if', 'if*', + 'keep', 'loop', 'most', 'new', 'nip', 'not', 'null', 'object', 'or', 'over', + 'pick', 'prepose', 'retainstack', 'rot', 'same?', 'swap', 'swapd', 'throw', + 'tri', 'tri-curry', 'tri-curry@', 'tri-curry*', 'tri@', 'tri*', 'tuple', + 'tuple?', 'unless', 'unless*', 'until', 'when', 'when*', 'while', 'with', + 'wrapper', 'wrapper?', 'xor'), suffix=r'\s') + + builtin_assocs = words(( + '2cache', '<enum>', '>alist', '?at', '?of', 'assoc', 'assoc-all?', + 'assoc-any?', 'assoc-clone-like', 'assoc-combine', 'assoc-diff', + 'assoc-diff!', 'assoc-differ', 'assoc-each', 'assoc-empty?', + 'assoc-filter', 'assoc-filter!', 'assoc-filter-as', 'assoc-find', + 'assoc-hashcode', 'assoc-intersect', 'assoc-like', 'assoc-map', + 'assoc-map-as', 'assoc-partition', 'assoc-refine', 'assoc-size', + 'assoc-stack', 'assoc-subset?', 'assoc-union', 'assoc-union!', + 'assoc=', 'assoc>map', 'assoc?', 'at', 'at+', 'at*', 'cache', 'change-at', + 'clear-assoc', 'delete-at', 'delete-at*', 'enum', 'enum?', 'extract-keys', + 'inc-at', 'key?', 'keys', 'map>assoc', 'maybe-set-at', 'new-assoc', 'of', + 'push-at', 'rename-at', 'set-at', 'sift-keys', 'sift-values', 'substitute', + 'unzip', 'value-at', 'value-at*', 'value?', 'values', 'zip'), suffix=r'\s') + + builtin_combinators = words(( + '2cleave', '2cleave>quot', '3cleave', '3cleave>quot', '4cleave', + '4cleave>quot', 'alist>quot', 'call-effect', 'case', 'case-find', + 'case>quot', 'cleave', 'cleave>quot', 'cond', 'cond>quot', 'deep-spread>quot', + 'execute-effect', 'linear-case-quot', 'no-case', 'no-case?', 'no-cond', + 'no-cond?', 'recursive-hashcode', 'shallow-spread>quot', 'spread', + 'to-fixed-point', 'wrong-values', 'wrong-values?'), suffix=r'\s') + + builtin_math = words(( + '-', '/', '/f', '/i', '/mod', '2/', '2^', '<', '<=', '<fp-nan>', '>', + '>=', '>bignum', '>fixnum', '>float', '>integer', '(all-integers?)', + '(each-integer)', '(find-integer)', '*', '+', '?1+', + 'abs', 'align', 'all-integers?', 'bignum', 'bignum?', 'bit?', 'bitand', + 'bitnot', 'bitor', 'bits>double', 'bits>float', 'bitxor', 'complex', + 'complex?', 'denominator', 'double>bits', 'each-integer', 'even?', + 'find-integer', 'find-last-integer', 'fixnum', 'fixnum?', 'float', + 'float>bits', 'float?', 'fp-bitwise=', 'fp-infinity?', 'fp-nan-payload', + 'fp-nan?', 'fp-qnan?', 'fp-sign', 'fp-snan?', 'fp-special?', + 'if-zero', 'imaginary-part', 'integer', 'integer>fixnum', + 'integer>fixnum-strict', 'integer?', 'log2', 'log2-expects-positive', + 'log2-expects-positive?', 'mod', 'neg', 'neg?', 'next-float', + 'next-power-of-2', 'number', 'number=', 'number?', 'numerator', 'odd?', + 'out-of-fixnum-range', 'out-of-fixnum-range?', 'power-of-2?', + 'prev-float', 'ratio', 'ratio?', 'rational', 'rational?', 'real', + 'real-part', 'real?', 'recip', 'rem', 'sgn', 'shift', 'sq', 'times', + 'u<', 'u<=', 'u>', 'u>=', 'unless-zero', 'unordered?', 'when-zero', + 'zero?'), suffix=r'\s') + + builtin_sequences = words(( + '1sequence', '2all?', '2each', '2map', '2map-as', '2map-reduce', '2reduce', + '2selector', '2sequence', '3append', '3append-as', '3each', '3map', '3map-as', + '3sequence', '4sequence', '<repetition>', '<reversed>', '<slice>', '?first', + '?last', '?nth', '?second', '?set-nth', 'accumulate', 'accumulate!', + 'accumulate-as', 'all?', 'any?', 'append', 'append!', 'append-as', + 'assert-sequence', 'assert-sequence=', 'assert-sequence?', + 'binary-reduce', 'bounds-check', 'bounds-check?', 'bounds-error', + 'bounds-error?', 'but-last', 'but-last-slice', 'cartesian-each', + 'cartesian-map', 'cartesian-product', 'change-nth', 'check-slice', + 'check-slice-error', 'clone-like', 'collapse-slice', 'collector', + 'collector-for', 'concat', 'concat-as', 'copy', 'count', 'cut', 'cut-slice', + 'cut*', 'delete-all', 'delete-slice', 'drop-prefix', 'each', 'each-from', + 'each-index', 'empty?', 'exchange', 'filter', 'filter!', 'filter-as', 'find', + 'find-from', 'find-index', 'find-index-from', 'find-last', 'find-last-from', + 'first', 'first2', 'first3', 'first4', 'flip', 'follow', 'fourth', 'glue', 'halves', + 'harvest', 'head', 'head-slice', 'head-slice*', 'head*', 'head?', + 'if-empty', 'immutable', 'immutable-sequence', 'immutable-sequence?', + 'immutable?', 'index', 'index-from', 'indices', 'infimum', 'infimum-by', + 'insert-nth', 'interleave', 'iota', 'iota-tuple', 'iota-tuple?', 'join', + 'join-as', 'last', 'last-index', 'last-index-from', 'length', 'lengthen', + 'like', 'longer', 'longer?', 'longest', 'map', 'map!', 'map-as', 'map-find', + 'map-find-last', 'map-index', 'map-integers', 'map-reduce', 'map-sum', + 'max-length', 'member-eq?', 'member?', 'midpoint@', 'min-length', + 'mismatch', 'move', 'new-like', 'new-resizable', 'new-sequence', + 'non-negative-integer-expected', 'non-negative-integer-expected?', + 'nth', 'nths', 'pad-head', 'pad-tail', 'padding', 'partition', 'pop', 'pop*', + 'prefix', 'prepend', 'prepend-as', 'produce', 'produce-as', 'product', 'push', + 'push-all', 'push-either', 'push-if', 'reduce', 'reduce-index', 'remove', + 'remove!', 'remove-eq', 'remove-eq!', 'remove-nth', 'remove-nth!', 'repetition', + 'repetition?', 'replace-slice', 'replicate', 'replicate-as', 'rest', + 'rest-slice', 'reverse', 'reverse!', 'reversed', 'reversed?', 'second', + 'selector', 'selector-for', 'sequence', 'sequence-hashcode', 'sequence=', + 'sequence?', 'set-first', 'set-fourth', 'set-last', 'set-length', 'set-nth', + 'set-second', 'set-third', 'short', 'shorten', 'shorter', 'shorter?', + 'shortest', 'sift', 'slice', 'slice-error', 'slice-error?', 'slice?', + 'snip', 'snip-slice', 'start', 'start*', 'subseq', 'subseq?', 'suffix', + 'suffix!', 'sum', 'sum-lengths', 'supremum', 'supremum-by', 'surround', 'tail', + 'tail-slice', 'tail-slice*', 'tail*', 'tail?', 'third', 'trim', + 'trim-head', 'trim-head-slice', 'trim-slice', 'trim-tail', 'trim-tail-slice', + 'unclip', 'unclip-last', 'unclip-last-slice', 'unclip-slice', 'unless-empty', + 'virtual-exemplar', 'virtual-sequence', 'virtual-sequence?', 'virtual@', + 'when-empty'), suffix=r'\s') + + builtin_namespaces = words(( + '+@', 'change', 'change-global', 'counter', 'dec', 'get', 'get-global', + 'global', 'inc', 'init-namespaces', 'initialize', 'is-global', 'make-assoc', + 'namespace', 'namestack', 'off', 'on', 'set', 'set-global', 'set-namestack', + 'toggle', 'with-global', 'with-scope', 'with-variable', 'with-variables'), + suffix=r'\s') + + builtin_arrays = words(( + '1array', '2array', '3array', '4array', '<array>', '>array', 'array', + 'array?', 'pair', 'pair?', 'resize-array'), suffix=r'\s') + + builtin_io = words(( + '(each-stream-block-slice)', '(each-stream-block)', + '(stream-contents-by-block)', '(stream-contents-by-element)', + '(stream-contents-by-length-or-block)', + '(stream-contents-by-length)', '+byte+', '+character+', + 'bad-seek-type', 'bad-seek-type?', 'bl', 'contents', 'each-block', + 'each-block-size', 'each-block-slice', 'each-line', 'each-morsel', + 'each-stream-block', 'each-stream-block-slice', 'each-stream-line', + 'error-stream', 'flush', 'input-stream', 'input-stream?', + 'invalid-read-buffer', 'invalid-read-buffer?', 'lines', 'nl', + 'output-stream', 'output-stream?', 'print', 'read', 'read-into', + 'read-partial', 'read-partial-into', 'read-until', 'read1', 'readln', + 'seek-absolute', 'seek-absolute?', 'seek-end', 'seek-end?', + 'seek-input', 'seek-output', 'seek-relative', 'seek-relative?', + 'stream-bl', 'stream-contents', 'stream-contents*', 'stream-copy', + 'stream-copy*', 'stream-element-type', 'stream-flush', + 'stream-length', 'stream-lines', 'stream-nl', 'stream-print', + 'stream-read', 'stream-read-into', 'stream-read-partial', + 'stream-read-partial-into', 'stream-read-partial-unsafe', + 'stream-read-unsafe', 'stream-read-until', 'stream-read1', + 'stream-readln', 'stream-seek', 'stream-seekable?', 'stream-tell', + 'stream-write', 'stream-write1', 'tell-input', 'tell-output', + 'with-error-stream', 'with-error-stream*', 'with-error>output', + 'with-input-output+error-streams', + 'with-input-output+error-streams*', 'with-input-stream', + 'with-input-stream*', 'with-output-stream', 'with-output-stream*', + 'with-output>error', 'with-output+error-stream', + 'with-output+error-stream*', 'with-streams', 'with-streams*', + 'write', 'write1'), suffix=r'\s') + + builtin_strings = words(( + '1string', '<string>', '>string', 'resize-string', 'string', + 'string?'), suffix=r'\s') + + builtin_vectors = words(( + '1vector', '<vector>', '>vector', '?push', 'vector', 'vector?'), + suffix=r'\s') + + builtin_continuations = words(( + '<condition>', '<continuation>', '<restart>', 'attempt-all', + 'attempt-all-error', 'attempt-all-error?', 'callback-error-hook', + 'callcc0', 'callcc1', 'cleanup', 'compute-restarts', 'condition', + 'condition?', 'continuation', 'continuation?', 'continue', + 'continue-restart', 'continue-with', 'current-continuation', + 'error', 'error-continuation', 'error-in-thread', 'error-thread', + 'ifcc', 'ignore-errors', 'in-callback?', 'original-error', 'recover', + 'restart', 'restart?', 'restarts', 'rethrow', 'rethrow-restarts', + 'return', 'return-continuation', 'thread-error-hook', 'throw-continue', + 'throw-restarts', 'with-datastack', 'with-return'), suffix=r'\s') + + tokens = { + 'root': [ + # factor allows a file to start with a shebang + (r'#!.*$', Comment.Preproc), + default('base'), + ], + 'base': [ + (r'\s+', Text), + + # defining words + (r'((?:MACRO|MEMO|TYPED)?:[:]?)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Function)), + (r'(M:[:]?)(\s+)(\S+)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Class, Text, Name.Function)), + (r'(C:)(\s+)(\S+)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Function, Text, Name.Class)), + (r'(GENERIC:)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Function)), + (r'(HOOK:|GENERIC#)(\s+)(\S+)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Function, Text, Name.Function)), + (r'\(\s', Name.Function, 'stackeffect'), + (r';\s', Keyword), + + # imports and namespaces + (r'(USING:)(\s+)', + bygroups(Keyword.Namespace, Text), 'vocabs'), + (r'(USE:|UNUSE:|IN:|QUALIFIED:)(\s+)(\S+)', + bygroups(Keyword.Namespace, Text, Name.Namespace)), + (r'(QUALIFIED-WITH:)(\s+)(\S+)(\s+)(\S+)', + bygroups(Keyword.Namespace, Text, Name.Namespace, Text, Name.Namespace)), + (r'(FROM:|EXCLUDE:)(\s+)(\S+)(\s+=>\s)', + bygroups(Keyword.Namespace, Text, Name.Namespace, Text), 'words'), + (r'(RENAME:)(\s+)(\S+)(\s+)(\S+)(\s+=>\s+)(\S+)', + bygroups(Keyword.Namespace, Text, Name.Function, Text, Name.Namespace, Text, Name.Function)), + (r'(ALIAS:|TYPEDEF:)(\s+)(\S+)(\s+)(\S+)', + bygroups(Keyword.Namespace, Text, Name.Function, Text, Name.Function)), + (r'(DEFER:|FORGET:|POSTPONE:)(\s+)(\S+)', + bygroups(Keyword.Namespace, Text, Name.Function)), + + # tuples and classes + (r'(TUPLE:|ERROR:)(\s+)(\S+)(\s+<\s+)(\S+)', + bygroups(Keyword, Text, Name.Class, Text, Name.Class), 'slots'), + (r'(TUPLE:|ERROR:|BUILTIN:)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Class), 'slots'), + (r'(MIXIN:|UNION:|INTERSECTION:)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Class)), + (r'(PREDICATE:)(\s+)(\S+)(\s+<\s+)(\S+)', + bygroups(Keyword, Text, Name.Class, Text, Name.Class)), + (r'(C:)(\s+)(\S+)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Function, Text, Name.Class)), + (r'(INSTANCE:)(\s+)(\S+)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Class, Text, Name.Class)), + (r'(SLOT:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)), + (r'(SINGLETON:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)), + (r'SINGLETONS:', Keyword, 'classes'), + + # other syntax + (r'(CONSTANT:|SYMBOL:|MAIN:|HELP:)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Function)), + (r'SYMBOLS:\s', Keyword, 'words'), + (r'SYNTAX:\s', Keyword), + (r'ALIEN:\s', Keyword), + (r'(STRUCT:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)), + (r'(FUNCTION:)(\s+\S+\s+)(\S+)(\s+\(\s+[^\)]+\)\s)', + bygroups(Keyword.Namespace, Text, Name.Function, Text)), + (r'(FUNCTION-ALIAS:)(\s+)(\S+)(\s+\S+\s+)(\S+)(\s+\(\s+[^\)]+\)\s)', + bygroups(Keyword.Namespace, Text, Name.Function, Text, Name.Function, Text)), + + # vocab.private + (r'(?:<PRIVATE|PRIVATE>)\s', Keyword.Namespace), + + # strings + (r'"""\s+(?:.|\n)*?\s+"""', String), + (r'"(?:\\\\|\\"|[^"])*"', String), + (r'\S+"\s+(?:\\\\|\\"|[^"])*"', String), + (r'CHAR:\s+(?:\\[\\abfnrstv]|[^\\]\S*)\s', String.Char), + + # comments + (r'!\s+.*$', Comment), + (r'#!\s+.*$', Comment), + (r'/\*\s+(?:.|\n)*?\s\*/\s', Comment), + + # boolean constants + (r'[tf]\s', Name.Constant), + + # symbols and literals + (r'[\\$]\s+\S+', Name.Constant), + (r'M\\\s+\S+\s+\S+', Name.Constant), + + # numbers + (r'[+-]?(?:[\d,]*\d)?\.(?:\d([\d,]*\d)?)?(?:[eE][+-]?\d+)?\s', Number), + (r'[+-]?\d(?:[\d,]*\d)?(?:[eE][+-]?\d+)?\s', Number), + (r'0x[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s', Number), + (r'NAN:\s+[a-fA-F\d](?:[a-fA-F\d,]*[a-fA-F\d])?(?:p\d([\d,]*\d)?)?\s', Number), + (r'0b[01]+\s', Number.Bin), + (r'0o[0-7]+\s', Number.Oct), + (r'(?:\d([\d,]*\d)?)?\+\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number), + (r'(?:\-\d([\d,]*\d)?)?\-\d(?:[\d,]*\d)?/\d(?:[\d,]*\d)?\s', Number), + + # keywords + (r'(?:deprecated|final|foldable|flushable|inline|recursive)\s', + Keyword), + + # builtins + (builtin_kernel, Name.Builtin), + (builtin_assocs, Name.Builtin), + (builtin_combinators, Name.Builtin), + (builtin_math, Name.Builtin), + (builtin_sequences, Name.Builtin), + (builtin_namespaces, Name.Builtin), + (builtin_arrays, Name.Builtin), + (builtin_io, Name.Builtin), + (builtin_strings, Name.Builtin), + (builtin_vectors, Name.Builtin), + (builtin_continuations, Name.Builtin), + + # everything else is text + (r'\S+', Text), + ], + 'stackeffect': [ + (r'\s+', Text), + (r'\(\s+', Name.Function, 'stackeffect'), + (r'\)\s', Name.Function, '#pop'), + (r'--\s', Name.Function), + (r'\S+', Name.Variable), + ], + 'slots': [ + (r'\s+', Text), + (r';\s', Keyword, '#pop'), + (r'({\s+)(\S+)(\s+[^}]+\s+}\s)', + bygroups(Text, Name.Variable, Text)), + (r'\S+', Name.Variable), + ], + 'vocabs': [ + (r'\s+', Text), + (r';\s', Keyword, '#pop'), + (r'\S+', Name.Namespace), + ], + 'classes': [ + (r'\s+', Text), + (r';\s', Keyword, '#pop'), + (r'\S+', Name.Class), + ], + 'words': [ + (r'\s+', Text), + (r';\s', Keyword, '#pop'), + (r'\S+', Name.Function), + ], + } diff --git a/pygments/lexers/misc/fantom.py b/pygments/lexers/misc/fantom.py new file mode 100644 index 00000000..d5a7550f --- /dev/null +++ b/pygments/lexers/misc/fantom.py @@ -0,0 +1,250 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.fantom + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for the Fantom language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from string import Template + +from pygments.lexer import RegexLexer, include, bygroups, using, \ + this, default, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Literal + +__all__ = ['FantomLexer'] + + +class FantomLexer(RegexLexer): + """ + For Fantom source code. + + .. versionadded:: 1.5 + """ + name = 'Fantom' + aliases = ['fan'] + filenames = ['*.fan'] + mimetypes = ['application/x-fantom'] + + # often used regexes + def s(str): + return Template(str).substitute( + dict( + pod=r'[\"\w\.]+', + eos=r'\n|;', + id=r'[a-zA-Z_]\w*', + # all chars which can be part of type definition. Starts with + # either letter, or [ (maps), or | (funcs) + type=r'(?:\[|[a-zA-Z_]|\|)[:\w\[\]\|\->\?]*?', + ) + ) + + tokens = { + 'comments': [ + (r'(?s)/\*.*?\*/', Comment.Multiline), # Multiline + (r'//.*?\n', Comment.Single), # Single line + # TODO: highlight references in fandocs + (r'\*\*.*?\n', Comment.Special), # Fandoc + (r'#.*\n', Comment.Single) # Shell-style + ], + 'literals': [ + (r'\b-?[\d_]+(ns|ms|sec|min|hr|day)', Number), # Duration + (r'\b-?[\d_]*\.[\d_]+(ns|ms|sec|min|hr|day)', Number), # Duration with dot + (r'\b-?(\d+)?\.\d+(f|F|d|D)?', Number.Float), # Float/Decimal + (r'\b-?0x[0-9a-fA-F_]+', Number.Hex), # Hex + (r'\b-?[\d_]+', Number.Integer), # Int + (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), # Char + (r'"', Punctuation, 'insideStr'), # Opening quote + (r'`', Punctuation, 'insideUri'), # Opening accent + (r'\b(true|false|null)\b', Keyword.Constant), # Bool & null + (r'(?:(\w+)(::))?(\w+)(<\|)(.*?)(\|>)', # DSL + bygroups(Name.Namespace, Punctuation, Name.Class, + Punctuation, String, Punctuation)), + (r'(?:(\w+)(::))?(\w+)?(#)(\w+)?', # Type/slot literal + bygroups(Name.Namespace, Punctuation, Name.Class, + Punctuation, Name.Function)), + (r'\[,\]', Literal), # Empty list + (s(r'($type)(\[,\])'), # Typed empty list + bygroups(using(this, state='inType'), Literal)), + (r'\[:\]', Literal), # Empty Map + (s(r'($type)(\[:\])'), + bygroups(using(this, state='inType'), Literal)), + ], + 'insideStr': [ + (r'\\\\', String.Escape), # Escaped backslash + (r'\\"', String.Escape), # Escaped " + (r'\\`', String.Escape), # Escaped ` + (r'\$\w+', String.Interpol), # Subst var + (r'\${.*?}', String.Interpol), # Subst expr + (r'"', Punctuation, '#pop'), # Closing quot + (r'.', String) # String content + ], + 'insideUri': [ # TODO: remove copy/paste str/uri + (r'\\\\', String.Escape), # Escaped backslash + (r'\\"', String.Escape), # Escaped " + (r'\\`', String.Escape), # Escaped ` + (r'\$\w+', String.Interpol), # Subst var + (r'\${.*?}', String.Interpol), # Subst expr + (r'`', Punctuation, '#pop'), # Closing tick + (r'.', String.Backtick) # URI content + ], + 'protectionKeywords': [ + (r'\b(public|protected|private|internal)\b', Keyword), + ], + 'typeKeywords': [ + (r'\b(abstract|final|const|native|facet|enum)\b', Keyword), + ], + 'methodKeywords': [ + (r'\b(abstract|native|once|override|static|virtual|final)\b', + Keyword), + ], + 'fieldKeywords': [ + (r'\b(abstract|const|final|native|override|static|virtual|' + r'readonly)\b', Keyword) + ], + 'otherKeywords': [ + (words(( + 'try', 'catch', 'throw', 'finally', 'for', 'if', 'else', 'while', + 'as', 'is', 'isnot', 'switch', 'case', 'default', 'continue', + 'break', 'do', 'return', 'get', 'set'), prefix=r'\b', suffix=r'\b'), + Keyword), + (r'\b(it|this|super)\b', Name.Builtin.Pseudo), + ], + 'operators': [ + (r'\+\+|\-\-|\+|\-|\*|/|\|\||&&|<=>|<=|<|>=|>|=|!|\[|\]', Operator) + ], + 'inType': [ + (r'[\[\]\|\->:\?]', Punctuation), + (s(r'$id'), Name.Class), + default('#pop'), + + ], + 'root': [ + include('comments'), + include('protectionKeywords'), + include('typeKeywords'), + include('methodKeywords'), + include('fieldKeywords'), + include('literals'), + include('otherKeywords'), + include('operators'), + (r'using\b', Keyword.Namespace, 'using'), # Using stmt + (r'@\w+', Name.Decorator, 'facet'), # Symbol + (r'(class|mixin)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class), + 'inheritance'), # Inheritance list + + # Type var := val + (s(r'($type)([ \t]+)($id)(\s*)(:=)'), + bygroups(using(this, state='inType'), Text, + Name.Variable, Text, Operator)), + + # var := val + (s(r'($id)(\s*)(:=)'), + bygroups(Name.Variable, Text, Operator)), + + # .someId( or ->someId( ### + (s(r'(\.|(?:\->))($id)(\s*)(\()'), + bygroups(Operator, Name.Function, Text, Punctuation), + 'insideParen'), + + # .someId or ->someId + (s(r'(\.|(?:\->))($id)'), + bygroups(Operator, Name.Function)), + + # new makeXXX ( + (r'(new)(\s+)(make\w*)(\s*)(\()', + bygroups(Keyword, Text, Name.Function, Text, Punctuation), + 'insideMethodDeclArgs'), + + # Type name ( + (s(r'($type)([ \t]+)' # Return type and whitespace + r'($id)(\s*)(\()'), # method name + open brace + bygroups(using(this, state='inType'), Text, + Name.Function, Text, Punctuation), + 'insideMethodDeclArgs'), + + # ArgType argName, + (s(r'($type)(\s+)($id)(\s*)(,)'), + bygroups(using(this, state='inType'), Text, Name.Variable, + Text, Punctuation)), + + # ArgType argName) + # Covered in 'insideParen' state + + # ArgType argName -> ArgType| + (s(r'($type)(\s+)($id)(\s*)(\->)(\s*)($type)(\|)'), + bygroups(using(this, state='inType'), Text, Name.Variable, + Text, Punctuation, Text, using(this, state='inType'), + Punctuation)), + + # ArgType argName| + (s(r'($type)(\s+)($id)(\s*)(\|)'), + bygroups(using(this, state='inType'), Text, Name.Variable, + Text, Punctuation)), + + # Type var + (s(r'($type)([ \t]+)($id)'), + bygroups(using(this, state='inType'), Text, + Name.Variable)), + + (r'\(', Punctuation, 'insideParen'), + (r'\{', Punctuation, 'insideBrace'), + (r'.', Text) + ], + 'insideParen': [ + (r'\)', Punctuation, '#pop'), + include('root'), + ], + 'insideMethodDeclArgs': [ + (r'\)', Punctuation, '#pop'), + (s(r'($type)(\s+)($id)(\s*)(\))'), + bygroups(using(this, state='inType'), Text, Name.Variable, + Text, Punctuation), '#pop'), + include('root'), + ], + 'insideBrace': [ + (r'\}', Punctuation, '#pop'), + include('root'), + ], + 'inheritance': [ + (r'\s+', Text), # Whitespace + (r':|,', Punctuation), + (r'(?:(\w+)(::))?(\w+)', + bygroups(Name.Namespace, Punctuation, Name.Class)), + (r'{', Punctuation, '#pop') + ], + 'using': [ + (r'[ \t]+', Text), # consume whitespaces + (r'(\[)(\w+)(\])', + bygroups(Punctuation, Comment.Special, Punctuation)), # ffi + (r'(\")?([\w\.]+)(\")?', + bygroups(Punctuation, Name.Namespace, Punctuation)), # podname + (r'::', Punctuation, 'usingClass'), + default('#pop') + ], + 'usingClass': [ + (r'[ \t]+', Text), # consume whitespaces + (r'(as)(\s+)(\w+)', + bygroups(Keyword.Declaration, Text, Name.Class), '#pop:2'), + (r'[\w\$]+', Name.Class), + default('#pop:2') # jump out to root state + ], + 'facet': [ + (r'\s+', Text), + (r'{', Punctuation, 'facetFields'), + default('#pop') + ], + 'facetFields': [ + include('comments'), + include('literals'), + include('operators'), + (r'\s+', Text), + (r'(\s*)(\w+)(\s*)(=)', bygroups(Text, Name, Text, Operator)), + (r'}', Punctuation, '#pop'), + (r'.', Text) + ], + } diff --git a/pygments/lexers/misc/felix.py b/pygments/lexers/misc/felix.py new file mode 100644 index 00000000..a33b2efe --- /dev/null +++ b/pygments/lexers/misc/felix.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.felix + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for the Felix language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, include, bygroups, default, words, \ + combined +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +__all__ = ['FelixLexer'] + + +class FelixLexer(RegexLexer): + """ + For `Felix <http://www.felix-lang.org>`_ source code. + + .. versionadded:: 1.2 + """ + + name = 'Felix' + aliases = ['felix', 'flx'] + filenames = ['*.flx', '*.flxh'] + mimetypes = ['text/x-felix'] + + preproc = ( + 'elif', 'else', 'endif', 'if', 'ifdef', 'ifndef', + ) + + keywords = ( + '_', '_deref', 'all', 'as', + 'assert', 'attempt', 'call', 'callback', 'case', 'caseno', 'cclass', + 'code', 'compound', 'ctypes', 'do', 'done', 'downto', 'elif', 'else', + 'endattempt', 'endcase', 'endif', 'endmatch', 'enum', 'except', + 'exceptions', 'expect', 'finally', 'for', 'forall', 'forget', 'fork', + 'functor', 'goto', 'ident', 'if', 'incomplete', 'inherit', 'instance', + 'interface', 'jump', 'lambda', 'loop', 'match', 'module', 'namespace', + 'new', 'noexpand', 'nonterm', 'obj', 'of', 'open', 'parse', 'raise', + 'regexp', 'reglex', 'regmatch', 'rename', 'return', 'the', 'then', + 'to', 'type', 'typecase', 'typedef', 'typematch', 'typeof', 'upto', + 'when', 'whilst', 'with', 'yield', + ) + + keyword_directives = ( + '_gc_pointer', '_gc_type', 'body', 'comment', 'const', 'export', + 'header', 'inline', 'lval', 'macro', 'noinline', 'noreturn', + 'package', 'private', 'pod', 'property', 'public', 'publish', + 'requires', 'todo', 'virtual', 'use', + ) + + keyword_declarations = ( + 'def', 'let', 'ref', 'val', 'var', + ) + + keyword_types = ( + 'unit', 'void', 'any', 'bool', + 'byte', 'offset', + 'address', 'caddress', 'cvaddress', 'vaddress', + 'tiny', 'short', 'int', 'long', 'vlong', + 'utiny', 'ushort', 'vshort', 'uint', 'ulong', 'uvlong', + 'int8', 'int16', 'int32', 'int64', + 'uint8', 'uint16', 'uint32', 'uint64', + 'float', 'double', 'ldouble', + 'complex', 'dcomplex', 'lcomplex', + 'imaginary', 'dimaginary', 'limaginary', + 'char', 'wchar', 'uchar', + 'charp', 'charcp', 'ucharp', 'ucharcp', + 'string', 'wstring', 'ustring', + 'cont', + 'array', 'varray', 'list', + 'lvalue', 'opt', 'slice', + ) + + keyword_constants = ( + 'false', 'true', + ) + + operator_words = ( + 'and', 'not', 'in', 'is', 'isin', 'or', 'xor', + ) + + name_builtins = ( + '_svc', 'while', + ) + + name_pseudo = ( + 'root', 'self', 'this', + ) + + decimal_suffixes = '([tTsSiIlLvV]|ll|LL|([iIuU])(8|16|32|64))?' + + tokens = { + 'root': [ + include('whitespace'), + + # Keywords + (words(('axiom', 'ctor', 'fun', 'gen', 'proc', 'reduce', + 'union'), suffix=r'\b'), + Keyword, 'funcname'), + (words(('class', 'cclass', 'cstruct', 'obj', 'struct'), suffix=r'\b'), + Keyword, 'classname'), + (r'(instance|module|typeclass)\b', Keyword, 'modulename'), + + (words(keywords, suffix=r'\b'), Keyword), + (words(keyword_directives, suffix=r'\b'), Name.Decorator), + (words(keyword_declarations, suffix=r'\b'), Keyword.Declaration), + (words(keyword_types, suffix=r'\b'), Keyword.Type), + (words(keyword_constants, suffix=r'\b'), Keyword.Constant), + + # Operators + include('operators'), + + # Float Literal + # -- Hex Float + (r'0[xX]([0-9a-fA-F_]*\.[0-9a-fA-F_]+|[0-9a-fA-F_]+)' + r'[pP][+\-]?[0-9_]+[lLfFdD]?', Number.Float), + # -- DecimalFloat + (r'[0-9_]+(\.[0-9_]+[eE][+\-]?[0-9_]+|' + r'\.[0-9_]*|[eE][+\-]?[0-9_]+)[lLfFdD]?', Number.Float), + (r'\.(0|[1-9][0-9_]*)([eE][+\-]?[0-9_]+)?[lLfFdD]?', + Number.Float), + + # IntegerLiteral + # -- Binary + (r'0[Bb][01_]+%s' % decimal_suffixes, Number.Bin), + # -- Octal + (r'0[0-7_]+%s' % decimal_suffixes, Number.Oct), + # -- Hexadecimal + (r'0[xX][0-9a-fA-F_]+%s' % decimal_suffixes, Number.Hex), + # -- Decimal + (r'(0|[1-9][0-9_]*)%s' % decimal_suffixes, Number.Integer), + + # Strings + ('([rR][cC]?|[cC][rR])"""', String, 'tdqs'), + ("([rR][cC]?|[cC][rR])'''", String, 'tsqs'), + ('([rR][cC]?|[cC][rR])"', String, 'dqs'), + ("([rR][cC]?|[cC][rR])'", String, 'sqs'), + ('[cCfFqQwWuU]?"""', String, combined('stringescape', 'tdqs')), + ("[cCfFqQwWuU]?'''", String, combined('stringescape', 'tsqs')), + ('[cCfFqQwWuU]?"', String, combined('stringescape', 'dqs')), + ("[cCfFqQwWuU]?'", String, combined('stringescape', 'sqs')), + + # Punctuation + (r'[\[\]{}:(),;?]', Punctuation), + + # Labels + (r'[a-zA-Z_]\w*:>', Name.Label), + + # Identifiers + (r'(%s)\b' % '|'.join(name_builtins), Name.Builtin), + (r'(%s)\b' % '|'.join(name_pseudo), Name.Builtin.Pseudo), + (r'[a-zA-Z_]\w*', Name), + ], + 'whitespace': [ + (r'\n', Text), + (r'\s+', Text), + + include('comment'), + + # Preprocessor + (r'#\s*if\s+0', Comment.Preproc, 'if0'), + (r'#', Comment.Preproc, 'macro'), + ], + 'operators': [ + (r'(%s)\b' % '|'.join(operator_words), Operator.Word), + (r'!=|==|<<|>>|\|\||&&|[-~+/*%=<>&^|.$]', Operator), + ], + 'comment': [ + (r'//(.*?)\n', Comment.Single), + (r'/[*]', Comment.Multiline, 'comment2'), + ], + 'comment2': [ + (r'[^\/*]', Comment.Multiline), + (r'/[*]', Comment.Multiline, '#push'), + (r'[*]/', Comment.Multiline, '#pop'), + (r'[\/*]', Comment.Multiline), + ], + 'if0': [ + (r'^\s*#if.*?(?<!\\)\n', Comment, '#push'), + (r'^\s*#endif.*?(?<!\\)\n', Comment, '#pop'), + (r'.*?\n', Comment), + ], + 'macro': [ + include('comment'), + (r'(import|include)(\s+)(<[^>]*?>)', + bygroups(Comment.Preproc, Text, String), '#pop'), + (r'(import|include)(\s+)("[^"]*?")', + bygroups(Comment.Preproc, Text, String), '#pop'), + (r"(import|include)(\s+)('[^']*?')", + bygroups(Comment.Preproc, Text, String), '#pop'), + (r'[^/\n]+', Comment.Preproc), + # (r'/[*](.|\n)*?[*]/', Comment), + # (r'//.*?\n', Comment, '#pop'), + (r'/', Comment.Preproc), + (r'(?<=\\)\n', Comment.Preproc), + (r'\n', Comment.Preproc, '#pop'), + ], + 'funcname': [ + include('whitespace'), + (r'[a-zA-Z_]\w*', Name.Function, '#pop'), + # anonymous functions + (r'(?=\()', Text, '#pop'), + ], + 'classname': [ + include('whitespace'), + (r'[a-zA-Z_]\w*', Name.Class, '#pop'), + # anonymous classes + (r'(?=\{)', Text, '#pop'), + ], + 'modulename': [ + include('whitespace'), + (r'\[', Punctuation, ('modulename2', 'tvarlist')), + default('modulename2'), + ], + 'modulename2': [ + include('whitespace'), + (r'([a-zA-Z_]\w*)', Name.Namespace, '#pop:2'), + ], + 'tvarlist': [ + include('whitespace'), + include('operators'), + (r'\[', Punctuation, '#push'), + (r'\]', Punctuation, '#pop'), + (r',', Punctuation), + (r'(with|where)\b', Keyword), + (r'[a-zA-Z_]\w*', Name), + ], + 'stringescape': [ + (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' + r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) + ], + 'strings': [ + (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + (r'[^\\\'"%\n]+', String), + # quotes, percents and backslashes must be parsed one at a time + (r'[\'"\\]', String), + # unhandled string formatting sign + (r'%', String) + # newlines are an error (use "nl" state) + ], + 'nl': [ + (r'\n', String) + ], + 'dqs': [ + (r'"', String, '#pop'), + # included here again for raw strings + (r'\\\\|\\"|\\\n', String.Escape), + include('strings') + ], + 'sqs': [ + (r"'", String, '#pop'), + # included here again for raw strings + (r"\\\\|\\'|\\\n", String.Escape), + include('strings') + ], + 'tdqs': [ + (r'"""', String, '#pop'), + include('strings'), + include('nl') + ], + 'tsqs': [ + (r"'''", String, '#pop'), + include('strings'), + include('nl') + ], + } diff --git a/pygments/lexers/misc/iolang.py b/pygments/lexers/misc/iolang.py new file mode 100644 index 00000000..f6d6bb47 --- /dev/null +++ b/pygments/lexers/misc/iolang.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.iolang + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for the Io language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number + +__all__ = ['IoLexer'] + + +class IoLexer(RegexLexer): + """ + For `Io <http://iolanguage.com/>`_ (a small, prototype-based + programming language) source. + + .. versionadded:: 0.10 + """ + name = 'Io' + filenames = ['*.io'] + aliases = ['io'] + mimetypes = ['text/x-iosrc'] + tokens = { + 'root': [ + (r'\n', Text), + (r'\s+', Text), + # Comments + (r'//(.*?)\n', Comment.Single), + (r'#(.*?)\n', Comment.Single), + (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), + (r'/\+', Comment.Multiline, 'nestedcomment'), + # DoubleQuotedString + (r'"(\\\\|\\"|[^"])*"', String), + # Operators + (r'::=|:=|=|\(|\)|;|,|\*|-|\+|>|<|@|!|/|\||\^|\.|%|&|\[|\]|\{|\}', + Operator), + # keywords + (r'(clone|do|doFile|doString|method|for|if|else|elseif|then)\b', + Keyword), + # constants + (r'(nil|false|true)\b', Name.Constant), + # names + (r'(Object|list|List|Map|args|Sequence|Coroutine|File)\b', + Name.Builtin), + ('[a-zA-Z_]\w*', Name), + # numbers + (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), + (r'\d+', Number.Integer) + ], + 'nestedcomment': [ + (r'[^+/]+', Comment.Multiline), + (r'/\+', Comment.Multiline, '#push'), + (r'\+/', Comment.Multiline, '#pop'), + (r'[+/]', Comment.Multiline), + ] + } diff --git a/pygments/lexers/misc/make.py b/pygments/lexers/misc/make.py new file mode 100644 index 00000000..c585640f --- /dev/null +++ b/pygments/lexers/misc/make.py @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.make + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for Makefiles and similar. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import Lexer, RegexLexer, include, bygroups, \ + do_insertions, using +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Punctuation +from pygments.lexers.shell import BashLexer + +__all__ = ['MakefileLexer', 'BaseMakefileLexer', 'CMakeLexer'] + + +class MakefileLexer(Lexer): + """ + Lexer for BSD and GNU make extensions (lenient enough to handle both in + the same file even). + + *Rewritten in Pygments 0.10.* + """ + + name = 'Makefile' + aliases = ['make', 'makefile', 'mf', 'bsdmake'] + filenames = ['*.mak', '*.mk', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'] + mimetypes = ['text/x-makefile'] + + r_special = re.compile( + r'^(?:' + # BSD Make + r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|' + # GNU Make + r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)') + r_comment = re.compile(r'^\s*@?#') + + def get_tokens_unprocessed(self, text): + ins = [] + lines = text.splitlines(True) + done = '' + lex = BaseMakefileLexer(**self.options) + backslashflag = False + for line in lines: + if self.r_special.match(line) or backslashflag: + ins.append((len(done), [(0, Comment.Preproc, line)])) + backslashflag = line.strip().endswith('\\') + elif self.r_comment.match(line): + ins.append((len(done), [(0, Comment, line)])) + else: + done += line + for item in do_insertions(ins, lex.get_tokens_unprocessed(done)): + yield item + + def analyse_text(text): + # Many makefiles have $(BIG_CAPS) style variables + if re.search(r'\$\([A-Z_]+\)', text): + return 0.1 + + +class BaseMakefileLexer(RegexLexer): + """ + Lexer for simple Makefiles (no preprocessing). + + .. versionadded:: 0.10 + """ + + name = 'Base Makefile' + aliases = ['basemake'] + filenames = [] + mimetypes = [] + + tokens = { + 'root': [ + # recipes (need to allow spaces because of expandtabs) + (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)), + # special variables + (r'\$[<@$+%?|*]', Keyword), + (r'\s+', Text), + (r'#.*?\n', Comment), + (r'(export)(\s+)(?=[\w${}\t -]+\n)', + bygroups(Keyword, Text), 'export'), + (r'export\s+', Keyword), + # assignment + (r'([\w${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', + bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), + # strings + (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double), + (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single), + # targets + (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), + 'block-header'), + # expansions + (r'\$\(', Keyword, 'expansion'), + ], + 'expansion': [ + (r'[^$a-zA-Z_)]+', Text), + (r'[a-zA-Z_]+', Name.Variable), + (r'\$', Keyword), + (r'\(', Keyword, '#push'), + (r'\)', Keyword, '#pop'), + ], + 'export': [ + (r'[\w${}-]+', Name.Variable), + (r'\n', Text, '#pop'), + (r'\s+', Text), + ], + 'block-header': [ + (r'[,|]', Punctuation), + (r'#.*?\n', Comment, '#pop'), + (r'\\\n', Text), # line continuation + (r'\$\(', Keyword, 'expansion'), + (r'[a-zA-Z_]+', Name), + (r'\n', Text, '#pop'), + (r'.', Text), + ], + } + + +class CMakeLexer(RegexLexer): + """ + Lexer for `CMake <http://cmake.org/Wiki/CMake>`_ files. + + .. versionadded:: 1.2 + """ + name = 'CMake' + aliases = ['cmake'] + filenames = ['*.cmake', 'CMakeLists.txt'] + mimetypes = ['text/x-cmake'] + + tokens = { + 'root': [ + # (r'(ADD_CUSTOM_COMMAND|ADD_CUSTOM_TARGET|ADD_DEFINITIONS|' + # r'ADD_DEPENDENCIES|ADD_EXECUTABLE|ADD_LIBRARY|ADD_SUBDIRECTORY|' + # r'ADD_TEST|AUX_SOURCE_DIRECTORY|BUILD_COMMAND|BUILD_NAME|' + # r'CMAKE_MINIMUM_REQUIRED|CONFIGURE_FILE|CREATE_TEST_SOURCELIST|' + # r'ELSE|ELSEIF|ENABLE_LANGUAGE|ENABLE_TESTING|ENDFOREACH|' + # r'ENDFUNCTION|ENDIF|ENDMACRO|ENDWHILE|EXEC_PROGRAM|' + # r'EXECUTE_PROCESS|EXPORT_LIBRARY_DEPENDENCIES|FILE|FIND_FILE|' + # r'FIND_LIBRARY|FIND_PACKAGE|FIND_PATH|FIND_PROGRAM|FLTK_WRAP_UI|' + # r'FOREACH|FUNCTION|GET_CMAKE_PROPERTY|GET_DIRECTORY_PROPERTY|' + # r'GET_FILENAME_COMPONENT|GET_SOURCE_FILE_PROPERTY|' + # r'GET_TARGET_PROPERTY|GET_TEST_PROPERTY|IF|INCLUDE|' + # r'INCLUDE_DIRECTORIES|INCLUDE_EXTERNAL_MSPROJECT|' + # r'INCLUDE_REGULAR_EXPRESSION|INSTALL|INSTALL_FILES|' + # r'INSTALL_PROGRAMS|INSTALL_TARGETS|LINK_DIRECTORIES|' + # r'LINK_LIBRARIES|LIST|LOAD_CACHE|LOAD_COMMAND|MACRO|' + # r'MAKE_DIRECTORY|MARK_AS_ADVANCED|MATH|MESSAGE|OPTION|' + # r'OUTPUT_REQUIRED_FILES|PROJECT|QT_WRAP_CPP|QT_WRAP_UI|REMOVE|' + # r'REMOVE_DEFINITIONS|SEPARATE_ARGUMENTS|SET|' + # r'SET_DIRECTORY_PROPERTIES|SET_SOURCE_FILES_PROPERTIES|' + # r'SET_TARGET_PROPERTIES|SET_TESTS_PROPERTIES|SITE_NAME|' + # r'SOURCE_GROUP|STRING|SUBDIR_DEPENDS|SUBDIRS|' + # r'TARGET_LINK_LIBRARIES|TRY_COMPILE|TRY_RUN|UNSET|' + # r'USE_MANGLED_MESA|UTILITY_SOURCE|VARIABLE_REQUIRES|' + # r'VTK_MAKE_INSTANTIATOR|VTK_WRAP_JAVA|VTK_WRAP_PYTHON|' + # r'VTK_WRAP_TCL|WHILE|WRITE_FILE|' + # r'COUNTARGS)\b', Name.Builtin, 'args'), + (r'\b(\w+)([ \t]*)(\()', bygroups(Name.Builtin, Text, + Punctuation), 'args'), + include('keywords'), + include('ws') + ], + 'args': [ + (r'\(', Punctuation, '#push'), + (r'\)', Punctuation, '#pop'), + (r'(\${)(.+?)(})', bygroups(Operator, Name.Variable, Operator)), + (r'(\$<)(.+?)(>)', bygroups(Operator, Name.Variable, Operator)), + (r'(?s)".*?"', String.Double), + (r'\\\S+', String), + (r'[^\)$"# \t\n]+', String), + (r'\n', Text), # explicitly legal + include('keywords'), + include('ws') + ], + 'string': [ + + ], + 'keywords': [ + (r'\b(WIN32|UNIX|APPLE|CYGWIN|BORLAND|MINGW|MSVC|MSVC_IDE|MSVC60|' + r'MSVC70|MSVC71|MSVC80|MSVC90)\b', Keyword), + ], + 'ws': [ + (r'[ \t]+', Text), + (r'#.*\n', Comment), + ] + } + + def analyse_text(text): + exp = r'^ *CMAKE_MINIMUM_REQUIRED *\( *VERSION *\d(\.\d)* *( FATAL_ERROR)? *\) *$' + if re.search(exp, text, flags=re.MULTILINE | re.IGNORECASE): + return 0.8 + return 0.0 diff --git a/pygments/lexers/misc/ml.py b/pygments/lexers/misc/ml.py new file mode 100644 index 00000000..661b693a --- /dev/null +++ b/pygments/lexers/misc/ml.py @@ -0,0 +1,768 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.ml + ~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for ML family languages. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include, bygroups, default, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Error + +__all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer'] + + +class SMLLexer(RegexLexer): + """ + For the Standard ML language. + + .. versionadded:: 1.5 + """ + + name = 'Standard ML' + aliases = ['sml'] + filenames = ['*.sml', '*.sig', '*.fun'] + mimetypes = ['text/x-standardml', 'application/x-standardml'] + + alphanumid_reserved = set(( + # Core + 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else', + 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', + 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse', + 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', + # Modules + 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', + 'struct', 'structure', 'where', + )) + + symbolicid_reserved = set(( + # Core + ':', '\|', '=', '=>', '->', '#', + # Modules + ':>', + )) + + nonid_reserved = set(('(', ')', '[', ']', '{', '}', ',', ';', '...', '_')) + + alphanumid_re = r"[a-zA-Z][\w']*" + symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+" + + # A character constant is a sequence of the form #s, where s is a string + # constant denoting a string of size one character. This setup just parses + # the entire string as either a String.Double or a String.Char (depending + # on the argument), even if the String.Char is an erronous + # multiple-character string. + def stringy(whatkind): + return [ + (r'[^"\\]', whatkind), + (r'\\[\\\"abtnvfr]', String.Escape), + # Control-character notation is used for codes < 32, + # where \^@ == \000 + (r'\\\^[\x40-\x5e]', String.Escape), + # Docs say 'decimal digits' + (r'\\[0-9]{3}', String.Escape), + (r'\\u[0-9a-fA-F]{4}', String.Escape), + (r'\\\s+\\', String.Interpol), + (r'"', whatkind, '#pop'), + ] + + # Callbacks for distinguishing tokens and reserved words + def long_id_callback(self, match): + if match.group(1) in self.alphanumid_reserved: + token = Error + else: + token = Name.Namespace + yield match.start(1), token, match.group(1) + yield match.start(2), Punctuation, match.group(2) + + def end_id_callback(self, match): + if match.group(1) in self.alphanumid_reserved: + token = Error + elif match.group(1) in self.symbolicid_reserved: + token = Error + else: + token = Name + yield match.start(1), token, match.group(1) + + def id_callback(self, match): + str = match.group(1) + if str in self.alphanumid_reserved: + token = Keyword.Reserved + elif str in self.symbolicid_reserved: + token = Punctuation + else: + token = Name + yield match.start(1), token, str + + tokens = { + # Whitespace and comments are (almost) everywhere + 'whitespace': [ + (r'\s+', Text), + (r'\(\*', Comment.Multiline, 'comment'), + ], + + 'delimiters': [ + # This lexer treats these delimiters specially: + # Delimiters define scopes, and the scope is how the meaning of + # the `|' is resolved - is it a case/handle expression, or function + # definition by cases? (This is not how the Definition works, but + # it's how MLton behaves, see http://mlton.org/SMLNJDeviations) + (r'\(|\[|{', Punctuation, 'main'), + (r'\)|\]|}', Punctuation, '#pop'), + (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')), + (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'), + (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'), + ], + + 'core': [ + # Punctuation that doesn't overlap symbolic identifiers + (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved), + Punctuation), + + # Special constants: strings, floats, numbers in decimal and hex + (r'#"', String.Char, 'char'), + (r'"', String.Double, 'string'), + (r'~?0x[0-9a-fA-F]+', Number.Hex), + (r'0wx[0-9a-fA-F]+', Number.Hex), + (r'0w\d+', Number.Integer), + (r'~?\d+\.\d+[eE]~?\d+', Number.Float), + (r'~?\d+\.\d+', Number.Float), + (r'~?\d+[eE]~?\d+', Number.Float), + (r'~?\d+', Number.Integer), + + # Labels + (r'#\s*[1-9][0-9]*', Name.Label), + (r'#\s*(%s)' % alphanumid_re, Name.Label), + (r'#\s+(%s)' % symbolicid_re, Name.Label), + # Some reserved words trigger a special, local lexer state change + (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'), + (r'(?=\b(exception)\b(?!\'))', Text, ('ename')), + (r'\b(functor|include|open|signature|structure)\b(?!\')', + Keyword.Reserved, 'sname'), + (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'), + + # Regular identifiers, long and otherwise + (r'\'[\w\']*', Name.Decorator), + (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"), + (r'(%s)' % alphanumid_re, id_callback), + (r'(%s)' % symbolicid_re, id_callback), + ], + 'dotted': [ + (r'(%s)(\.)' % alphanumid_re, long_id_callback), + (r'(%s)' % alphanumid_re, end_id_callback, "#pop"), + (r'(%s)' % symbolicid_re, end_id_callback, "#pop"), + (r'\s+', Error), + (r'\S+', Error), + ], + + + # Main parser (prevents errors in files that have scoping errors) + 'root': [ + default('main') + ], + + # In this scope, I expect '|' to not be followed by a function name, + # and I expect 'and' to be followed by a binding site + 'main': [ + include('whitespace'), + + # Special behavior of val/and/fun + (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'), + (r'\b(fun)\b(?!\')', Keyword.Reserved, + ('#pop', 'main-fun', 'fname')), + + include('delimiters'), + include('core'), + (r'\S+', Error), + ], + + # In this scope, I expect '|' and 'and' to be followed by a function + 'main-fun': [ + include('whitespace'), + + (r'\s', Text), + (r'\(\*', Comment.Multiline, 'comment'), + + # Special behavior of val/and/fun + (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'), + (r'\b(val)\b(?!\')', Keyword.Reserved, + ('#pop', 'main', 'vname')), + + # Special behavior of '|' and '|'-manipulating keywords + (r'\|', Punctuation, 'fname'), + (r'\b(case|handle)\b(?!\')', Keyword.Reserved, + ('#pop', 'main')), + + include('delimiters'), + include('core'), + (r'\S+', Error), + ], + + # Character and string parsers + 'char': stringy(String.Char), + 'string': stringy(String.Double), + + 'breakout': [ + (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'), + ], + + # Dealing with what comes after module system keywords + 'sname': [ + include('whitespace'), + include('breakout'), + + (r'(%s)' % alphanumid_re, Name.Namespace), + default('#pop'), + ], + + # Dealing with what comes after the 'fun' (or 'and' or '|') keyword + 'fname': [ + include('whitespace'), + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r'\(', Punctuation, 'tyvarseq'), + + (r'(%s)' % alphanumid_re, Name.Function, '#pop'), + (r'(%s)' % symbolicid_re, Name.Function, '#pop'), + + # Ignore interesting function declarations like "fun (x + y) = ..." + default('#pop'), + ], + + # Dealing with what comes after the 'val' (or 'and') keyword + 'vname': [ + include('whitespace'), + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r'\(', Punctuation, 'tyvarseq'), + + (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re), + bygroups(Name.Variable, Text, Punctuation), '#pop'), + (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re), + bygroups(Name.Variable, Text, Punctuation), '#pop'), + (r'(%s)' % alphanumid_re, Name.Variable, '#pop'), + (r'(%s)' % symbolicid_re, Name.Variable, '#pop'), + + # Ignore interesting patterns like 'val (x, y)' + default('#pop'), + ], + + # Dealing with what comes after the 'type' (or 'and') keyword + 'tname': [ + include('whitespace'), + include('breakout'), + + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r'\(', Punctuation, 'tyvarseq'), + (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')), + + (r'(%s)' % alphanumid_re, Keyword.Type), + (r'(%s)' % symbolicid_re, Keyword.Type), + (r'\S+', Error, '#pop'), + ], + + # A type binding includes most identifiers + 'typbind': [ + include('whitespace'), + + (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), + + include('breakout'), + include('core'), + (r'\S+', Error, '#pop'), + ], + + # Dealing with what comes after the 'datatype' (or 'and') keyword + 'dname': [ + include('whitespace'), + include('breakout'), + + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r'\(', Punctuation, 'tyvarseq'), + (r'(=)(\s*)(datatype)', + bygroups(Punctuation, Text, Keyword.Reserved), '#pop'), + (r'=(?!%s)' % symbolicid_re, Punctuation, + ('#pop', 'datbind', 'datcon')), + + (r'(%s)' % alphanumid_re, Keyword.Type), + (r'(%s)' % symbolicid_re, Keyword.Type), + (r'\S+', Error, '#pop'), + ], + + # common case - A | B | C of int + 'datbind': [ + include('whitespace'), + + (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')), + (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), + (r'\b(of)\b(?!\')', Keyword.Reserved), + + (r'(\|)(\s*)(%s)' % alphanumid_re, + bygroups(Punctuation, Text, Name.Class)), + (r'(\|)(\s+)(%s)' % symbolicid_re, + bygroups(Punctuation, Text, Name.Class)), + + include('breakout'), + include('core'), + (r'\S+', Error), + ], + + # Dealing with what comes after an exception + 'ename': [ + include('whitespace'), + + (r'(exception|and)\b(\s+)(%s)' % alphanumid_re, + bygroups(Keyword.Reserved, Text, Name.Class)), + (r'(exception|and)\b(\s*)(%s)' % symbolicid_re, + bygroups(Keyword.Reserved, Text, Name.Class)), + (r'\b(of)\b(?!\')', Keyword.Reserved), + + include('breakout'), + include('core'), + (r'\S+', Error), + ], + + 'datcon': [ + include('whitespace'), + (r'(%s)' % alphanumid_re, Name.Class, '#pop'), + (r'(%s)' % symbolicid_re, Name.Class, '#pop'), + (r'\S+', Error, '#pop'), + ], + + # Series of type variables + 'tyvarseq': [ + (r'\s', Text), + (r'\(\*', Comment.Multiline, 'comment'), + + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (alphanumid_re, Name), + (r',', Punctuation), + (r'\)', Punctuation, '#pop'), + (symbolicid_re, Name), + ], + + 'comment': [ + (r'[^(*)]', Comment.Multiline), + (r'\(\*', Comment.Multiline, '#push'), + (r'\*\)', Comment.Multiline, '#pop'), + (r'[(*)]', Comment.Multiline), + ], + } + + +class OcamlLexer(RegexLexer): + """ + For the OCaml language. + + .. versionadded:: 0.7 + """ + + name = 'OCaml' + aliases = ['ocaml'] + filenames = ['*.ml', '*.mli', '*.mll', '*.mly'] + mimetypes = ['text/x-ocaml'] + + keywords = ( + 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', + 'downto', 'else', 'end', 'exception', 'external', 'false', + 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', + 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', + 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', + 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', + 'type', 'value', 'val', 'virtual', 'when', 'while', 'with', + ) + keyopts = ( + '!=', '#', '&', '&&', '\(', '\)', '\*', '\+', ',', '-', + '-\.', '->', '\.', '\.\.', ':', '::', ':=', ':>', ';', ';;', '<', + '<-', '=', '>', '>]', '>}', '\?', '\?\?', '\[', '\[<', '\[>', '\[\|', + ']', '_', '`', '{', '{<', '\|', '\|]', '}', '~' + ) + + operators = r'[!$%&*+\./:<=>?@^|~-]' + word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or') + prefix_syms = r'[!?~]' + infix_syms = r'[=<>@^|&+\*/$%-]' + primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') + + tokens = { + 'escape-sequence': [ + (r'\\[\\\"\'ntbr]', String.Escape), + (r'\\[0-9]{3}', String.Escape), + (r'\\x[0-9a-fA-F]{2}', String.Escape), + ], + 'root': [ + (r'\s+', Text), + (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), + (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), + (r'\b([A-Z][\w\']*)', Name.Class), + (r'\(\*(?![)])', Comment, 'comment'), + (r'\b(%s)\b' % '|'.join(keywords), Keyword), + (r'(%s)' % '|'.join(keyopts[::-1]), Operator), + (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), + (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), + (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), + + (r"[^\W\d][\w']*", Name), + + (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), + (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), + (r'0[oO][0-7][0-7_]*', Number.Oct), + (r'0[bB][01][01_]*', Number.Bin), + (r'\d[\d_]*', Number.Integer), + + (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", + String.Char), + (r"'.'", String.Char), + (r"'", Keyword), # a stray quote is another syntax element + + (r'"', String.Double, 'string'), + + (r'[~?][a-z][\w\']*:', Name.Variable), + ], + 'comment': [ + (r'[^(*)]+', Comment), + (r'\(\*', Comment, '#push'), + (r'\*\)', Comment, '#pop'), + (r'[(*)]', Comment), + ], + 'string': [ + (r'[^\\"]+', String.Double), + include('escape-sequence'), + (r'\\\n', String.Double), + (r'"', String.Double, '#pop'), + ], + 'dotted': [ + (r'\s+', Text), + (r'\.', Punctuation), + (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), + (r'[A-Z][\w\']*', Name.Class, '#pop'), + (r'[a-z_][\w\']*', Name, '#pop'), + ], + } + + +class OpaLexer(RegexLexer): + """ + Lexer for the Opa language (http://opalang.org). + + .. versionadded:: 1.5 + """ + + name = 'Opa' + aliases = ['opa'] + filenames = ['*.opa'] + mimetypes = ['text/x-opa'] + + # most of these aren't strictly keywords + # but if you color only real keywords, you might just + # as well not color anything + keywords = ( + 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do', + 'else', 'end', 'external', 'forall', 'function', 'if', 'import', + 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then', + 'type', 'val', 'with', 'xml_parser', + ) + + # matches both stuff and `stuff` + ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))' + + op_re = r'[.=\-<>,@~%/+?*&^!]' + punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere + # because they are also used for inserts + + tokens = { + # copied from the caml lexer, should be adapted + 'escape-sequence': [ + (r'\\[\\\"\'ntr}]', String.Escape), + (r'\\[0-9]{3}', String.Escape), + (r'\\x[0-9a-fA-F]{2}', String.Escape), + ], + + # factorizing these rules, because they are inserted many times + 'comments': [ + (r'/\*', Comment, 'nested-comment'), + (r'//.*?$', Comment), + ], + 'comments-and-spaces': [ + include('comments'), + (r'\s+', Text), + ], + + 'root': [ + include('comments-and-spaces'), + # keywords + (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword), + # directives + # we could parse the actual set of directives instead of anything + # starting with @, but this is troublesome + # because it needs to be adjusted all the time + # and assuming we parse only sources that compile, it is useless + (r'@' + ident_re + r'\b', Name.Builtin.Pseudo), + + # number literals + (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float), + (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float), + (r'-?\d+[eE][+\-]?\d+', Number.Float), + (r'0[xX][\da-fA-F]+', Number.Hex), + (r'0[oO][0-7]+', Number.Oct), + (r'0[bB][01]+', Number.Bin), + (r'\d+', Number.Integer), + # color literals + (r'#[\da-fA-F]{3,6}', Number.Integer), + + # string literals + (r'"', String.Double, 'string'), + # char literal, should be checked because this is the regexp from + # the caml lexer + (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'", + String.Char), + + # this is meant to deal with embedded exprs in strings + # every time we find a '}' we pop a state so that if we were + # inside a string, we are back in the string state + # as a consequence, we must also push a state every time we find a + # '{' or else we will have errors when parsing {} for instance + (r'{', Operator, '#push'), + (r'}', Operator, '#pop'), + + # html literals + # this is a much more strict that the actual parser, + # since a<b would not be parsed as html + # but then again, the parser is way too lax, and we can't hope + # to have something as tolerant + (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'), + + # db path + # matching the '[_]' in '/a[_]' because it is a part + # of the syntax of the db path definition + # unfortunately, i don't know how to match the ']' in + # /a[1], so this is somewhat inconsistent + (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable), + # putting the same color on <- as on db path, since + # it can be used only to mean Db.write + (r'<-(?!'+op_re+r')', Name.Variable), + + # 'modules' + # although modules are not distinguished by their names as in caml + # the standard library seems to follow the convention that modules + # only area capitalized + (r'\b([A-Z]\w*)(?=\.)', Name.Namespace), + + # operators + # = has a special role because this is the only + # way to syntactic distinguish binding constructions + # unfortunately, this colors the equal in {x=2} too + (r'=(?!'+op_re+r')', Keyword), + (r'(%s)+' % op_re, Operator), + (r'(%s)+' % punc_re, Operator), + + # coercions + (r':', Operator, 'type'), + # type variables + # we need this rule because we don't parse specially type + # definitions so in "type t('a) = ...", "'a" is parsed by 'root' + ("'"+ident_re, Keyword.Type), + + # id literal, #something, or #{expr} + (r'#'+ident_re, String.Single), + (r'#(?={)', String.Single), + + # identifiers + # this avoids to color '2' in 'a2' as an integer + (ident_re, Text), + + # default, not sure if that is needed or not + # (r'.', Text), + ], + + # it is quite painful to have to parse types to know where they end + # this is the general rule for a type + # a type is either: + # * -> ty + # * type-with-slash + # * type-with-slash -> ty + # * type-with-slash (, type-with-slash)+ -> ty + # + # the code is pretty funky in here, but this code would roughly + # translate in caml to: + # let rec type stream = + # match stream with + # | [< "->"; stream >] -> type stream + # | [< ""; stream >] -> + # type_with_slash stream + # type_lhs_1 stream; + # and type_1 stream = ... + 'type': [ + include('comments-and-spaces'), + (r'->', Keyword.Type), + default(('#pop', 'type-lhs-1', 'type-with-slash')), + ], + + # parses all the atomic or closed constructions in the syntax of type + # expressions: record types, tuple types, type constructors, basic type + # and type variables + 'type-1': [ + include('comments-and-spaces'), + (r'\(', Keyword.Type, ('#pop', 'type-tuple')), + (r'~?{', Keyword.Type, ('#pop', 'type-record')), + (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')), + (ident_re, Keyword.Type, '#pop'), + ("'"+ident_re, Keyword.Type), + # this case is not in the syntax but sometimes + # we think we are parsing types when in fact we are parsing + # some css, so we just pop the states until we get back into + # the root state + default('#pop'), + ], + + # type-with-slash is either: + # * type-1 + # * type-1 (/ type-1)+ + 'type-with-slash': [ + include('comments-and-spaces'), + default(('#pop', 'slash-type-1', 'type-1')), + ], + 'slash-type-1': [ + include('comments-and-spaces'), + ('/', Keyword.Type, ('#pop', 'type-1')), + # same remark as above + default('#pop'), + ], + + # we go in this state after having parsed a type-with-slash + # while trying to parse a type + # and at this point we must determine if we are parsing an arrow + # type (in which case we must continue parsing) or not (in which + # case we stop) + 'type-lhs-1': [ + include('comments-and-spaces'), + (r'->', Keyword.Type, ('#pop', 'type')), + (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')), + default('#pop'), + ], + 'type-arrow': [ + include('comments-and-spaces'), + # the look ahead here allows to parse f(x : int, y : float -> truc) + # correctly + (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'), + (r'->', Keyword.Type, ('#pop', 'type')), + # same remark as above + default('#pop'), + ], + + # no need to do precise parsing for tuples and records + # because they are closed constructions, so we can simply + # find the closing delimiter + # note that this function would be not work if the source + # contained identifiers like `{)` (although it could be patched + # to support it) + 'type-tuple': [ + include('comments-and-spaces'), + (r'[^\(\)/*]+', Keyword.Type), + (r'[/*]', Keyword.Type), + (r'\(', Keyword.Type, '#push'), + (r'\)', Keyword.Type, '#pop'), + ], + 'type-record': [ + include('comments-and-spaces'), + (r'[^{}/*]+', Keyword.Type), + (r'[/*]', Keyword.Type), + (r'{', Keyword.Type, '#push'), + (r'}', Keyword.Type, '#pop'), + ], + + # 'type-tuple': [ + # include('comments-and-spaces'), + # (r'\)', Keyword.Type, '#pop'), + # default(('#pop', 'type-tuple-1', 'type-1')), + # ], + # 'type-tuple-1': [ + # include('comments-and-spaces'), + # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,) + # (r',', Keyword.Type, 'type-1'), + # ], + # 'type-record':[ + # include('comments-and-spaces'), + # (r'}', Keyword.Type, '#pop'), + # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'), + # ], + # 'type-record-field-expr': [ + # + # ], + + 'nested-comment': [ + (r'[^/*]+', Comment), + (r'/\*', Comment, '#push'), + (r'\*/', Comment, '#pop'), + (r'[/*]', Comment), + ], + + # the copy pasting between string and single-string + # is kinda sad. Is there a way to avoid that?? + 'string': [ + (r'[^\\"{]+', String.Double), + (r'"', String.Double, '#pop'), + (r'{', Operator, 'root'), + include('escape-sequence'), + ], + 'single-string': [ + (r'[^\\\'{]+', String.Double), + (r'\'', String.Double, '#pop'), + (r'{', Operator, 'root'), + include('escape-sequence'), + ], + + # all the html stuff + # can't really reuse some existing html parser + # because we must be able to parse embedded expressions + + # we are in this state after someone parsed the '<' that + # started the html literal + 'html-open-tag': [ + (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')), + (r'>', String.Single, ('#pop', 'html-content')), + ], + + # we are in this state after someone parsed the '</' that + # started the end of the closing tag + 'html-end-tag': [ + # this is a star, because </> is allowed + (r'[\w\-:]*>', String.Single, '#pop'), + ], + + # we are in this state after having parsed '<ident(:ident)?' + # we thus parse a possibly empty list of attributes + 'html-attr': [ + (r'\s+', Text), + (r'[\w\-:]+=', String.Single, 'html-attr-value'), + (r'/>', String.Single, '#pop'), + (r'>', String.Single, ('#pop', 'html-content')), + ], + + 'html-attr-value': [ + (r"'", String.Single, ('#pop', 'single-string')), + (r'"', String.Single, ('#pop', 'string')), + (r'#'+ident_re, String.Single, '#pop'), + (r'#(?={)', String.Single, ('#pop', 'root')), + (r'[^"\'{`=<>]+', String.Single, '#pop'), + (r'{', Operator, ('#pop', 'root')), # this is a tail call! + ], + + # we should probably deal with '\' escapes here + 'html-content': [ + (r'<!--', Comment, 'html-comment'), + (r'</', String.Single, ('#pop', 'html-end-tag')), + (r'<', String.Single, 'html-open-tag'), + (r'{', Operator, 'root'), + (r'[^<{]+', String.Single), + ], + + 'html-comment': [ + (r'-->', Comment, '#pop'), + (r'[^\-]+|-', Comment), + ], + } diff --git a/pygments/lexers/misc/nimrod.py b/pygments/lexers/misc/nimrod.py new file mode 100644 index 00000000..60977c8a --- /dev/null +++ b/pygments/lexers/misc/nimrod.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.nimrod + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for the Nimrod language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include, default +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Error + +__all__ = ['NimrodLexer'] + + +class NimrodLexer(RegexLexer): + """ + For `Nimrod <http://nimrod-code.org/>`_ source code. + + .. versionadded:: 1.5 + """ + + name = 'Nimrod' + aliases = ['nimrod', 'nim'] + filenames = ['*.nim', '*.nimrod'] + mimetypes = ['text/x-nimrod'] + + flags = re.MULTILINE | re.IGNORECASE | re.UNICODE + + def underscorize(words): + newWords = [] + new = "" + for word in words: + for ch in word: + new += (ch + "_?") + newWords.append(new) + new = "" + return "|".join(newWords) + + keywords = [ + 'addr', 'and', 'as', 'asm', 'atomic', 'bind', 'block', 'break', + 'case', 'cast', 'const', 'continue', 'converter', 'discard', + 'distinct', 'div', 'elif', 'else', 'end', 'enum', 'except', 'finally', + 'for', 'generic', 'if', 'implies', 'in', 'yield', + 'is', 'isnot', 'iterator', 'lambda', 'let', 'macro', 'method', + 'mod', 'not', 'notin', 'object', 'of', 'or', 'out', 'proc', + 'ptr', 'raise', 'ref', 'return', 'shl', 'shr', 'template', 'try', + 'tuple', 'type', 'when', 'while', 'with', 'without', 'xor' + ] + + keywordsPseudo = [ + 'nil', 'true', 'false' + ] + + opWords = [ + 'and', 'or', 'not', 'xor', 'shl', 'shr', 'div', 'mod', 'in', + 'notin', 'is', 'isnot' + ] + + types = [ + 'int', 'int8', 'int16', 'int32', 'int64', 'float', 'float32', 'float64', + 'bool', 'char', 'range', 'array', 'seq', 'set', 'string' + ] + + tokens = { + 'root': [ + (r'##.*$', String.Doc), + (r'#.*$', Comment), + (r'\*|=|>|<|\+|-|/|@|\$|~|&|%|\!|\?|\||\\|\[|\]', Operator), + (r'\.\.|\.|,|\[\.|\.\]|{\.|\.}|\(\.|\.\)|{|}|\(|\)|:|\^|`|;', + Punctuation), + + # Strings + (r'(?:[\w]+)"', String, 'rdqs'), + (r'"""', String, 'tdqs'), + ('"', String, 'dqs'), + + # Char + ("'", String.Char, 'chars'), + + # Keywords + (r'(%s)\b' % underscorize(opWords), Operator.Word), + (r'(p_?r_?o_?c_?\s)(?![\(\[\]])', Keyword, 'funcname'), + (r'(%s)\b' % underscorize(keywords), Keyword), + (r'(%s)\b' % underscorize(['from', 'import', 'include']), + Keyword.Namespace), + (r'(v_?a_?r)\b', Keyword.Declaration), + (r'(%s)\b' % underscorize(types), Keyword.Type), + (r'(%s)\b' % underscorize(keywordsPseudo), Keyword.Pseudo), + # Identifiers + (r'\b((?![_\d])\w)(((?!_)\w)|(_(?!_)\w))*', Name), + # Numbers + (r'[0-9][0-9_]*(?=([eE.]|\'[fF](32|64)))', + Number.Float, ('float-suffix', 'float-number')), + (r'0[xX][a-f0-9][a-f0-9_]*', Number.Hex, 'int-suffix'), + (r'0[bB][01][01_]*', Number.Bin, 'int-suffix'), + (r'0o[0-7][0-7_]*', Number.Oct, 'int-suffix'), + (r'[0-9][0-9_]*', Number.Integer, 'int-suffix'), + # Whitespace + (r'\s+', Text), + (r'.+$', Error), + ], + 'chars': [ + (r'\\([\\abcefnrtvl"\']|x[a-f0-9]{2}|[0-9]{1,3})', String.Escape), + (r"'", String.Char, '#pop'), + (r".", String.Char) + ], + 'strings': [ + (r'(?<!\$)\$(\d+|#|\w+)+', String.Interpol), + (r'[^\\\'"\$\n]+', String), + # quotes, dollars and backslashes must be parsed one at a time + (r'[\'"\\]', String), + # unhandled string formatting sign + (r'\$', String) + # newlines are an error (use "nl" state) + ], + 'dqs': [ + (r'\\([\\abcefnrtvl"\']|\n|x[a-f0-9]{2}|[0-9]{1,3})', + String.Escape), + (r'"', String, '#pop'), + include('strings') + ], + 'rdqs': [ + (r'"(?!")', String, '#pop'), + (r'""', String.Escape), + include('strings') + ], + 'tdqs': [ + (r'"""(?!")', String, '#pop'), + include('strings'), + include('nl') + ], + 'funcname': [ + (r'((?![\d_])\w)(((?!_)\w)|(_(?!_)\w))*', Name.Function, '#pop'), + (r'`.+`', Name.Function, '#pop') + ], + 'nl': [ + (r'\n', String) + ], + 'float-number': [ + (r'\.(?!\.)[0-9_]*', Number.Float), + (r'[eE][+-]?[0-9][0-9_]*', Number.Float), + default('#pop') + ], + 'float-suffix': [ + (r'\'[fF](32|64)', Number.Float), + default('#pop') + ], + 'int-suffix': [ + (r'\'[iI](32|64)', Number.Integer.Long), + (r'\'[iI](8|16)', Number.Integer), + default('#pop') + ], + } diff --git a/pygments/lexers/misc/nit.py b/pygments/lexers/misc/nit.py new file mode 100644 index 00000000..ef3271b1 --- /dev/null +++ b/pygments/lexers/misc/nit.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.nit + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for the Nit language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +__all__ = ['NitLexer'] + + +class NitLexer(RegexLexer): + """ + For `nit <http://nitlanguage.org>`_ source. + + .. versionadded:: 2.0 + """ + + name = 'Nit' + aliases = ['nit'] + filenames = ['*.nit'] + tokens = { + 'root': [ + (r'#.*?$', Comment.Single), + (words(( + 'package', 'module', 'import', 'class', 'abstract', 'interface', + 'universal', 'enum', 'end', 'fun', 'type', 'init', 'redef', + 'isa', 'do', 'readable', 'writable', 'var', 'intern', 'extern', + 'public', 'protected', 'private', 'intrude', 'if', 'then', + 'else', 'while', 'loop', 'for', 'in', 'and', 'or', 'not', + 'implies', 'return', 'continue', 'break', 'abort', 'assert', + 'new', 'is', 'once', 'super', 'self', 'true', 'false', 'nullable', + 'null', 'as', 'isset', 'label', '__debug__'), suffix='(?=( |\n|\t|\r|\())'), + Keyword), + (r'[A-Z][A-Za-z0-9_]*', Name.Class), + (r'"""(([^\'\\]|\\.)|\\r|\\n)*(({{?)?(""?{{?)*""""*)', String), # Simple long string + (r'\'\'\'(((\\.|[^\'\\])|\\r|\\n)|\'((\\.|[^\'\\])|\\r|\\n)|' + r'\'\'((\\.|[^\'\\])|\\r|\\n))*\'\'\'', String), # Simple long string alt + (r'"""(([^\'\\]|\\.)|\\r|\\n)*((""?)?({{?""?)*{{{{*)', String), # Start long string + (r'}}}(((\\.|[^\'\\])|\\r|\\n))*(""?)?({{?""?)*{{{{*', String), # Mid long string + (r'}}}(((\\.|[^\'\\])|\\r|\\n))*({{?)?(""?{{?)*""""*', String), # End long string + (r'"(\\.|([^"}{\\]))*"', String), # Simple String + (r'"(\\.|([^"}{\\]))*{', String), # Start string + (r'}(\\.|([^"}{\\]))*{', String), # Mid String + (r'}(\\.|([^"}{\\]))*"', String), # End String + (r'(\'[^\'\\]\')|(\'\\.\')', String.Char), + (r'[0-9]+', Number.Integer), + (r'[0-9]*.[0-9]+', Number.Float), + (r'0(x|X)[0-9A-Fa-f]+', Number.Hex), + (r'[a-z][A-Za-z0-9_]*', Name), + (r'_[A-Za-z0-9_]+', Name.Variable.Instance), + (r'==|!=|<==>|>=|>>|>|<=|<<|<|\+|-|=|/|\*|%|\+=|-=|!|@', Operator), + (r'\(|\)|\[|\]|,|\.\.\.|\.\.|\.|::|:', Punctuation), + (r'`{[^`]*`}', Text), # Extern blocks won't be Lexed by Nit + ('(\r|\n| |\t)+', Text), + ], + } diff --git a/pygments/lexers/misc/nix.py b/pygments/lexers/misc/nix.py new file mode 100644 index 00000000..c1799638 --- /dev/null +++ b/pygments/lexers/misc/nix.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.nix + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for the NixOS Nix language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Literal + +__all__ = ['NixLexer'] + + +class NixLexer(RegexLexer): + """ + For the `Nix language <http://nixos.org/nix/>`_. + + .. versionadded:: 2.0 + """ + + name = 'Nix' + aliases = ['nixos', 'nix'] + filenames = ['*.nix'] + mimetypes = ['text/x-nix'] + + flags = re.MULTILINE | re.UNICODE + + keywords = ['rec', 'with', 'let', 'in', 'inherit', 'assert', 'if', + 'else', 'then', '...'] + builtins = ['import', 'abort', 'baseNameOf', 'dirOf', 'isNull', 'builtins', + 'map', 'removeAttrs', 'throw', 'toString', 'derivation'] + operators = ['++', '+', '?', '.', '!', '//', '==', + '!=', '&&', '||', '->', '='] + + punctuations = ["(", ")", "[", "]", ";", "{", "}", ":", ",", "@"] + + tokens = { + 'root': [ + # comments starting with # + (r'#.*$', Comment.Single), + + # multiline comments + (r'/\*', Comment.Multiline, 'comment'), + + # whitespace + (r'\s+', Text), + + # keywords + ('(%s)' % '|'.join(re.escape(entry) + '\\b' for entry in keywords), Keyword), + + # highlight the builtins + ('(%s)' % '|'.join(re.escape(entry) + '\\b' for entry in builtins), + Name.Builtin), + + (r'\b(true|false|null)\b', Name.Constant), + + # operators + ('(%s)' % '|'.join(re.escape(entry) for entry in operators), + Operator), + + # word operators + (r'\b(or|and)\b', Operator.Word), + + # punctuations + ('(%s)' % '|'.join(re.escape(entry) for entry in punctuations), Punctuation), + + # integers + (r'[0-9]+', Number.Integer), + + # strings + (r'"', String.Double, 'doublequote'), + (r"''", String.Single, 'singlequote'), + + # paths + (r'[\w.+-]*(\/[\w.+-]+)+', Literal), + (r'\<[\w.+-]+(\/[\w.+-]+)*\>', Literal), + + # urls + (r'[a-zA-Z][a-zA-Z0-9\+\-\.]*\:[\w%/?:@&=+$,\\.!~*\'-]+', Literal), + + # names of variables + (r'[\w-]+\s*=', String.Symbol), + (r'[a-zA-Z_][\w\'-]*', Text), + + ], + 'comment': [ + (r'[^/\*]+', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[\*/]', Comment.Multiline), + ], + 'singlequote': [ + (r"'''", String.Escape), + (r"''\$\{", String.Escape), + (r"''\n", String.Escape), + (r"''\r", String.Escape), + (r"''\t", String.Escape), + (r"''", String.Single, '#pop'), + (r'\$\{', String.Interpol, 'antiquote'), + (r"[^']", String.Single), + ], + 'doublequote': [ + (r'\\', String.Escape), + (r'\\"', String.Escape), + (r'\\${', String.Escape), + (r'"', String.Double, '#pop'), + (r'\$\{', String.Interpol, 'antiquote'), + (r'[^"]', String.Double), + ], + 'antiquote': [ + (r"}", String.Interpol, '#pop'), + # TODO: we should probably escape also here ''${ \${ + (r"\$\{", String.Interpol, '#push'), + include('root'), + ], + } + + def analyse_text(text): + rv = 0.0 + # TODO: let/in + if re.search(r'import.+?<[^>]+>', text): + rv += 0.4 + if re.search(r'mkDerivation\s+(\(|\{|rec)', text): + rv += 0.4 + if re.search(r'with\s+[a-zA-Z\.]+;', text): + rv += 0.2 + if re.search(r'inherit\s+[a-zA-Z()\.];', text): + rv += 0.2 + if re.search(r'=\s+mkIf\s+', text): + rv += 0.4 + if re.search(r'\{[a-zA-Z,\s]+\}:', text): + rv += 0.1 + return rv diff --git a/pygments/lexers/misc/ooc.py b/pygments/lexers/misc/ooc.py new file mode 100644 index 00000000..30376853 --- /dev/null +++ b/pygments/lexers/misc/ooc.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.ooc + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for the Ooc language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, bygroups, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +__all__ = ['OocLexer'] + + +class OocLexer(RegexLexer): + """ + For `Ooc <http://ooc-lang.org/>`_ source code + + .. versionadded:: 1.2 + """ + name = 'Ooc' + aliases = ['ooc'] + filenames = ['*.ooc'] + mimetypes = ['text/x-ooc'] + + tokens = { + 'root': [ + (words(( + 'class', 'interface', 'implement', 'abstract', 'extends', 'from', + 'this', 'super', 'new', 'const', 'final', 'static', 'import', + 'use', 'extern', 'inline', 'proto', 'break', 'continue', + 'fallthrough', 'operator', 'if', 'else', 'for', 'while', 'do', + 'switch', 'case', 'as', 'in', 'version', 'return', 'true', + 'false', 'null'), prefix=r'\b', suffix=r'\b'), + Keyword), + (r'include\b', Keyword, 'include'), + (r'(cover)([ \t]+)(from)([ \t]+)(\w+[*@]?)', + bygroups(Keyword, Text, Keyword, Text, Name.Class)), + (r'(func)((?:[ \t]|\\\n)+)(~[a-z_]\w*)', + bygroups(Keyword, Text, Name.Function)), + (r'\bfunc\b', Keyword), + # Note: %= and ^= not listed on http://ooc-lang.org/syntax + (r'//.*', Comment), + (r'(?s)/\*.*?\*/', Comment.Multiline), + (r'(==?|\+=?|-[=>]?|\*=?|/=?|:=|!=?|%=?|\?|>{1,3}=?|<{1,3}=?|\.\.|' + r'&&?|\|\|?|\^=?)', Operator), + (r'(\.)([ \t]*)([a-z]\w*)', bygroups(Operator, Text, + Name.Function)), + (r'[A-Z][A-Z0-9_]+', Name.Constant), + (r'[A-Z]\w*([@*]|\[[ \t]*\])?', Name.Class), + + (r'([a-z]\w*(?:~[a-z]\w*)?)((?:[ \t]|\\\n)*)(?=\()', + bygroups(Name.Function, Text)), + (r'[a-z]\w*', Name.Variable), + + # : introduces types + (r'[:(){}\[\];,]', Punctuation), + + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'0c[0-9]+', Number.Oct), + (r'0b[01]+', Number.Bin), + (r'[0-9_]\.[0-9_]*(?!\.)', Number.Float), + (r'[0-9_]+', Number.Decimal), + + (r'"(?:\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\"])*"', + String.Double), + (r"'(?:\\.|\\[0-9]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", + String.Char), + (r'@', Punctuation), # pointer dereference + (r'\.', Punctuation), # imports or chain operator + + (r'\\[ \t\n]', Text), + (r'[ \t]+', Text), + ], + 'include': [ + (r'[\w/]+', Name), + (r',', Punctuation), + (r'[ \t]', Text), + (r'[;\n]', Text, '#pop'), + ], + } diff --git a/pygments/lexers/misc/pawn.py b/pygments/lexers/misc/pawn.py new file mode 100644 index 00000000..79161caf --- /dev/null +++ b/pygments/lexers/misc/pawn.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.pawn + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for the Pawn languages. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Error +from pygments.util import get_bool_opt + +__all__ = ['SourcePawnLexer', 'PawnLexer'] + + +class SourcePawnLexer(RegexLexer): + """ + For SourcePawn source code with preprocessor directives. + + .. versionadded:: 1.6 + """ + name = 'SourcePawn' + aliases = ['sp'] + filenames = ['*.sp'] + mimetypes = ['text/x-sourcepawn'] + + #: optional Comment or Whitespace + _ws = r'(?:\s|//.*?\n|/\*.*?\*/)+' + + tokens = { + 'root': [ + # preprocessor directives: without whitespace + ('^#if\s+0', Comment.Preproc, 'if0'), + ('^#', Comment.Preproc, 'macro'), + # or with whitespace + ('^' + _ws + r'#if\s+0', Comment.Preproc, 'if0'), + ('^' + _ws + '#', Comment.Preproc, 'macro'), + (r'\n', Text), + (r'\s+', Text), + (r'\\\n', Text), # line continuation + (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single), + (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline), + (r'[{}]', Punctuation), + (r'L?"', String, 'string'), + (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), + (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), + (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), + (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), + (r'0[0-7]+[LlUu]*', Number.Oct), + (r'\d+[LlUu]*', Number.Integer), + (r'\*/', Error), + (r'[~!%^&*+=|?:<>/-]', Operator), + (r'[()\[\],.;]', Punctuation), + (r'(case|const|continue|native|' + r'default|else|enum|for|if|new|operator|' + r'public|return|sizeof|static|decl|struct|switch)\b', Keyword), + (r'(bool|Float)\b', Keyword.Type), + (r'(true|false)\b', Keyword.Constant), + ('[a-zA-Z_]\w*', Name), + ], + 'string': [ + (r'"', String, '#pop'), + (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), + (r'[^\\"\n]+', String), # all other characters + (r'\\\n', String), # line continuation + (r'\\', String), # stray backslash + ], + 'macro': [ + (r'[^/\n]+', Comment.Preproc), + (r'/\*(.|\n)*?\*/', Comment.Multiline), + (r'//.*?\n', Comment.Single, '#pop'), + (r'/', Comment.Preproc), + (r'(?<=\\)\n', Comment.Preproc), + (r'\n', Comment.Preproc, '#pop'), + ], + 'if0': [ + (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), + (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), + (r'.*?\n', Comment), + ] + } + + SM_TYPES = set(('Action', 'bool', 'Float', 'Plugin', 'String', 'any', + 'AdminFlag', 'OverrideType', 'OverrideRule', 'ImmunityType', + 'GroupId', 'AdminId', 'AdmAccessMode', 'AdminCachePart', + 'CookieAccess', 'CookieMenu', 'CookieMenuAction', 'NetFlow', + 'ConVarBounds', 'QueryCookie', 'ReplySource', + 'ConVarQueryResult', 'ConVarQueryFinished', 'Function', + 'Action', 'Identity', 'PluginStatus', 'PluginInfo', 'DBResult', + 'DBBindType', 'DBPriority', 'PropType', 'PropFieldType', + 'MoveType', 'RenderMode', 'RenderFx', 'EventHookMode', + 'EventHook', 'FileType', 'FileTimeMode', 'PathType', + 'ParamType', 'ExecType', 'DialogType', 'Handle', 'KvDataTypes', + 'NominateResult', 'MapChange', 'MenuStyle', 'MenuAction', + 'MenuSource', 'RegexError', 'SDKCallType', 'SDKLibrary', + 'SDKFuncConfSource', 'SDKType', 'SDKPassMethod', 'RayType', + 'TraceEntityFilter', 'ListenOverride', 'SortOrder', 'SortType', + 'SortFunc2D', 'APLRes', 'FeatureType', 'FeatureStatus', + 'SMCResult', 'SMCError', 'TFClassType', 'TFTeam', 'TFCond', + 'TFResourceType', 'Timer', 'TopMenuAction', 'TopMenuObjectType', + 'TopMenuPosition', 'TopMenuObject', 'UserMsg')) + + def __init__(self, **options): + self.smhighlighting = get_bool_opt(options, + 'sourcemod', True) + + self._functions = set() + if self.smhighlighting: + from pygments.lexers._sourcemodbuiltins import FUNCTIONS + self._functions.update(FUNCTIONS) + RegexLexer.__init__(self, **options) + + def get_tokens_unprocessed(self, text): + for index, token, value in \ + RegexLexer.get_tokens_unprocessed(self, text): + if token is Name: + if self.smhighlighting: + if value in self.SM_TYPES: + token = Keyword.Type + elif value in self._functions: + token = Name.Builtin + yield index, token, value + + +class PawnLexer(RegexLexer): + """ + For Pawn source code. + + .. versionadded:: 2.0 + """ + + name = 'Pawn' + aliases = ['pawn'] + filenames = ['*.p', '*.pwn', '*.inc'] + mimetypes = ['text/x-pawn'] + + #: optional Comment or Whitespace + _ws = r'(?:\s|//.*?\n|/[*][\w\W]*?[*]/)+' + + tokens = { + 'root': [ + # preprocessor directives: without whitespace + ('^#if\s+0', Comment.Preproc, 'if0'), + ('^#', Comment.Preproc, 'macro'), + # or with whitespace + ('^' + _ws + r'#if\s+0', Comment.Preproc, 'if0'), + ('^' + _ws + '#', Comment.Preproc, 'macro'), + (r'\n', Text), + (r'\s+', Text), + (r'\\\n', Text), # line continuation + (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single), + (r'/(\\\n)?\*[\w\W]*?\*(\\\n)?/', Comment.Multiline), + (r'[{}]', Punctuation), + (r'L?"', String, 'string'), + (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), + (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), + (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), + (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), + (r'0[0-7]+[LlUu]*', Number.Oct), + (r'\d+[LlUu]*', Number.Integer), + (r'\*/', Error), + (r'[~!%^&*+=|?:<>/-]', Operator), + (r'[()\[\],.;]', Punctuation), + (r'(switch|case|default|const|new|static|char|continue|break|' + r'if|else|for|while|do|operator|enum|' + r'public|return|sizeof|tagof|state|goto)\b', Keyword), + (r'(bool|Float)\b', Keyword.Type), + (r'(true|false)\b', Keyword.Constant), + ('[a-zA-Z_]\w*', Name), + ], + 'string': [ + (r'"', String, '#pop'), + (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), + (r'[^\\"\n]+', String), # all other characters + (r'\\\n', String), # line continuation + (r'\\', String), # stray backslash + ], + 'macro': [ + (r'[^/\n]+', Comment.Preproc), + (r'/\*(.|\n)*?\*/', Comment.Multiline), + (r'//.*?\n', Comment.Single, '#pop'), + (r'/', Comment.Preproc), + (r'(?<=\\)\n', Comment.Preproc), + (r'\n', Comment.Preproc, '#pop'), + ], + 'if0': [ + (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), + (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), + (r'.*?\n', Comment), + ] + } diff --git a/pygments/lexers/misc/rebol.py b/pygments/lexers/misc/rebol.py new file mode 100644 index 00000000..cde5f4a4 --- /dev/null +++ b/pygments/lexers/misc/rebol.py @@ -0,0 +1,438 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.rebol + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for the REBOL and related languages. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, bygroups +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Generic, Whitespace + +__all__ = ['RebolLexer', 'RedLexer'] + + +class RebolLexer(RegexLexer): + """ + A `REBOL <http://www.rebol.com/>`_ lexer. + + .. versionadded:: 1.1 + """ + name = 'REBOL' + aliases = ['rebol'] + filenames = ['*.r', '*.r3', '*.reb'] + mimetypes = ['text/x-rebol'] + + flags = re.IGNORECASE | re.MULTILINE + + re.IGNORECASE + + escape_re = r'(?:\^\([0-9a-f]{1,4}\)*)' + + def word_callback(lexer, match): + word = match.group() + + if re.match(".*:$", word): + yield match.start(), Generic.Subheading, word + elif re.match( + r'(native|alias|all|any|as-string|as-binary|bind|bound\?|case|' + r'catch|checksum|comment|debase|dehex|exclude|difference|disarm|' + r'either|else|enbase|foreach|remove-each|form|free|get|get-env|if|' + r'in|intersect|loop|minimum-of|maximum-of|mold|new-line|' + r'new-line\?|not|now|prin|print|reduce|compose|construct|repeat|' + r'reverse|save|script\?|set|shift|switch|throw|to-hex|trace|try|' + r'type\?|union|unique|unless|unprotect|unset|until|use|value\?|' + r'while|compress|decompress|secure|open|close|read|read-io|' + r'write-io|write|update|query|wait|input\?|exp|log-10|log-2|' + r'log-e|square-root|cosine|sine|tangent|arccosine|arcsine|' + r'arctangent|protect|lowercase|uppercase|entab|detab|connected\?|' + r'browse|launch|stats|get-modes|set-modes|to-local-file|' + r'to-rebol-file|encloak|decloak|create-link|do-browser|bind\?|' + r'hide|draw|show|size-text|textinfo|offset-to-caret|' + r'caret-to-offset|local-request-file|rgb-to-hsv|hsv-to-rgb|' + r'crypt-strength\?|dh-make-key|dh-generate-key|dh-compute-key|' + r'dsa-make-key|dsa-generate-key|dsa-make-signature|' + r'dsa-verify-signature|rsa-make-key|rsa-generate-key|' + r'rsa-encrypt)$', word): + yield match.start(), Name.Builtin, word + elif re.match( + r'(add|subtract|multiply|divide|remainder|power|and~|or~|xor~|' + r'minimum|maximum|negate|complement|absolute|random|head|tail|' + r'next|back|skip|at|pick|first|second|third|fourth|fifth|sixth|' + r'seventh|eighth|ninth|tenth|last|path|find|select|make|to|copy\*|' + r'insert|remove|change|poke|clear|trim|sort|min|max|abs|cp|' + r'copy)$', word): + yield match.start(), Name.Function, word + elif re.match( + r'(error|source|input|license|help|install|echo|Usage|with|func|' + r'throw-on-error|function|does|has|context|probe|\?\?|as-pair|' + r'mod|modulo|round|repend|about|set-net|append|join|rejoin|reform|' + r'remold|charset|array|replace|move|extract|forskip|forall|alter|' + r'first+|also|take|for|forever|dispatch|attempt|what-dir|' + r'change-dir|clean-path|list-dir|dirize|rename|split-path|delete|' + r'make-dir|delete-dir|in-dir|confirm|dump-obj|upgrade|what|' + r'build-tag|process-source|build-markup|decode-cgi|read-cgi|' + r'write-user|save-user|set-user-name|protect-system|parse-xml|' + r'cvs-date|cvs-version|do-boot|get-net-info|desktop|layout|' + r'scroll-para|get-face|alert|set-face|uninstall|unfocus|' + r'request-dir|center-face|do-events|net-error|decode-url|' + r'parse-header|parse-header-date|parse-email-addrs|import-email|' + r'send|build-attach-body|resend|show-popup|hide-popup|open-events|' + r'find-key-face|do-face|viewtop|confine|find-window|' + r'insert-event-func|remove-event-func|inform|dump-pane|dump-face|' + r'flag-face|deflag-face|clear-fields|read-net|vbug|path-thru|' + r'read-thru|load-thru|do-thru|launch-thru|load-image|' + r'request-download|do-face-alt|set-font|set-para|get-style|' + r'set-style|make-face|stylize|choose|hilight-text|hilight-all|' + r'unlight-text|focus|scroll-drag|clear-face|reset-face|scroll-face|' + r'resize-face|load-stock|load-stock-block|notify|request|flash|' + r'request-color|request-pass|request-text|request-list|' + r'request-date|request-file|dbug|editor|link-relative-path|' + r'emailer|parse-error)$', word): + yield match.start(), Keyword.Namespace, word + elif re.match( + r'(halt|quit|do|load|q|recycle|call|run|ask|parse|view|unview|' + r'return|exit|break)$', word): + yield match.start(), Name.Exception, word + elif re.match('REBOL$', word): + yield match.start(), Generic.Heading, word + elif re.match("to-.*", word): + yield match.start(), Keyword, word + elif re.match('(\+|-|\*|/|//|\*\*|and|or|xor|=\?|=|==|<>|<|>|<=|>=)$', + word): + yield match.start(), Operator, word + elif re.match(".*\?$", word): + yield match.start(), Keyword, word + elif re.match(".*\!$", word): + yield match.start(), Keyword.Type, word + elif re.match("'.*", word): + yield match.start(), Name.Variable.Instance, word # lit-word + elif re.match("#.*", word): + yield match.start(), Name.Label, word # issue + elif re.match("%.*", word): + yield match.start(), Name.Decorator, word # file + else: + yield match.start(), Name.Variable, word + + tokens = { + 'root': [ + (r'[^R]+', Comment), + (r'REBOL\s+\[', Generic.Strong, 'script'), + (r'R', Comment) + ], + 'script': [ + (r'\s+', Text), + (r'#"', String.Char, 'char'), + (r'#{[0-9a-f]*}', Number.Hex), + (r'2#{', Number.Hex, 'bin2'), + (r'64#{[0-9a-z+/=\s]*}', Number.Hex), + (r'"', String, 'string'), + (r'{', String, 'string2'), + (r';#+.*\n', Comment.Special), + (r';\*+.*\n', Comment.Preproc), + (r';.*\n', Comment), + (r'%"', Name.Decorator, 'stringFile'), + (r'%[^(\^{^")\s\[\]]+', Name.Decorator), + (r'[+-]?([a-z]{1,3})?\$\d+(\.\d+)?', Number.Float), # money + (r'[+-]?\d+\:\d+(\:\d+)?(\.\d+)?', String.Other), # time + (r'\d+[\-\/][0-9a-z]+[\-\/]\d+(\/\d+\:\d+((\:\d+)?' + r'([\.\d+]?([+-]?\d+:\d+)?)?)?)?', String.Other), # date + (r'\d+(\.\d+)+\.\d+', Keyword.Constant), # tuple + (r'\d+[xX]\d+', Keyword.Constant), # pair + (r'[+-]?\d+(\'\d+)?([\.,]\d*)?[eE][+-]?\d+', Number.Float), + (r'[+-]?\d+(\'\d+)?[\.,]\d*', Number.Float), + (r'[+-]?\d+(\'\d+)?', Number), + (r'[\[\]\(\)]', Generic.Strong), + (r'[a-z]+[^(\^{"\s:)]*://[^(\^{"\s)]*', Name.Decorator), # url + (r'mailto:[^(\^{"@\s)]+@[^(\^{"@\s)]+', Name.Decorator), # url + (r'[^(\^{"@\s)]+@[^(\^{"@\s)]+', Name.Decorator), # email + (r'comment\s"', Comment, 'commentString1'), + (r'comment\s{', Comment, 'commentString2'), + (r'comment\s\[', Comment, 'commentBlock'), + (r'comment\s[^(\s{\"\[]+', Comment), + (r'/[^(\^{^")\s/[\]]*', Name.Attribute), + (r'([^(\^{^")\s/[\]]+)(?=[:({"\s/\[\]])', word_callback), + (r'<[\w:.-]*>', Name.Tag), + (r'<[^(<>\s")]+', Name.Tag, 'tag'), + (r'([^(\^{^")\s]+)', Text), + ], + 'string': [ + (r'[^(\^")]+', String), + (escape_re, String.Escape), + (r'[\(|\)]+', String), + (r'\^.', String.Escape), + (r'"', String, '#pop'), + ], + 'string2': [ + (r'[^(\^{^})]+', String), + (escape_re, String.Escape), + (r'[\(|\)]+', String), + (r'\^.', String.Escape), + (r'{', String, '#push'), + (r'}', String, '#pop'), + ], + 'stringFile': [ + (r'[^(\^")]+', Name.Decorator), + (escape_re, Name.Decorator), + (r'\^.', Name.Decorator), + (r'"', Name.Decorator, '#pop'), + ], + 'char': [ + (escape_re + '"', String.Char, '#pop'), + (r'\^."', String.Char, '#pop'), + (r'."', String.Char, '#pop'), + ], + 'tag': [ + (escape_re, Name.Tag), + (r'"', Name.Tag, 'tagString'), + (r'[^(<>\r\n")]+', Name.Tag), + (r'>', Name.Tag, '#pop'), + ], + 'tagString': [ + (r'[^(\^")]+', Name.Tag), + (escape_re, Name.Tag), + (r'[\(|\)]+', Name.Tag), + (r'\^.', Name.Tag), + (r'"', Name.Tag, '#pop'), + ], + 'tuple': [ + (r'(\d+\.)+', Keyword.Constant), + (r'\d+', Keyword.Constant, '#pop'), + ], + 'bin2': [ + (r'\s+', Number.Hex), + (r'([0-1]\s*){8}', Number.Hex), + (r'}', Number.Hex, '#pop'), + ], + 'commentString1': [ + (r'[^(\^")]+', Comment), + (escape_re, Comment), + (r'[\(|\)]+', Comment), + (r'\^.', Comment), + (r'"', Comment, '#pop'), + ], + 'commentString2': [ + (r'[^(\^{^})]+', Comment), + (escape_re, Comment), + (r'[\(|\)]+', Comment), + (r'\^.', Comment), + (r'{', Comment, '#push'), + (r'}', Comment, '#pop'), + ], + 'commentBlock': [ + (r'\[', Comment, '#push'), + (r'\]', Comment, '#pop'), + (r'"', Comment, "commentString1"), + (r'{', Comment, "commentString2"), + (r'[^(\[\]\"{)]+', Comment), + ], + } + + def analyse_text(text): + """ + Check if code contains REBOL header and so it probably not R code + """ + if re.match(r'^\s*REBOL\s*\[', text, re.IGNORECASE): + # The code starts with REBOL header + return 1.0 + elif re.search(r'\s*REBOL\s*[', text, re.IGNORECASE): + # The code contains REBOL header but also some text before it + return 0.5 + + +class RedLexer(RegexLexer): + """ + A `Red-language <http://www.red-lang.org/>`_ lexer. + + .. versionadded:: 2.0 + """ + name = 'Red' + aliases = ['red', 'red/system'] + filenames = ['*.red', '*.reds'] + mimetypes = ['text/x-red', 'text/x-red-system'] + + flags = re.IGNORECASE | re.MULTILINE + + escape_re = r'(?:\^\([0-9a-f]{1,4}\)*)' + + def word_callback(lexer, match): + word = match.group() + + if re.match(".*:$", word): + yield match.start(), Generic.Subheading, word + elif re.match( + r'(if|unless|either|any|all|while|until|loop|repeat|' + r'foreach|forall|func|function|does|has|switch|' + r'case|reduce|compose|get|set|print|prin|equal\?|' + r'not-equal\?|strict-equal\?|lesser\?|greater\?|lesser-or-equal\?|' + r'greater-or-equal\?|same\?|not|type\?|stats|' + r'bind|union|replace|charset|routine)$', word): + yield match.start(), Name.Builtin, word + elif re.match( + r'(make|random|reflect|to|form|mold|absolute|add|divide|multiply|negate|' + r'power|remainder|round|subtract|even\?|odd\?|and~|complement|or~|xor~|' + r'append|at|back|change|clear|copy|find|head|head\?|index\?|insert|' + r'length\?|next|pick|poke|remove|reverse|select|sort|skip|swap|tail|tail\?|' + r'take|trim|create|close|delete|modify|open|open\?|query|read|rename|' + r'update|write)$', word): + yield match.start(), Name.Function, word + elif re.match( + r'(yes|on|no|off|true|false|tab|cr|lf|newline|escape|slash|sp|space|null|' + r'none|crlf|dot|null-byte)$', word): + yield match.start(), Name.Builtin.Pseudo, word + elif re.match( + r'(#system-global|#include|#enum|#define|#either|#if|#import|#export|' + r'#switch|#default|#get-definition)$', word): + yield match.start(), Keyword.Namespace, word + elif re.match( + r'(system|halt|quit|quit-return|do|load|q|recycle|call|run|ask|parse|' + r'raise-error|return|exit|break|alias|push|pop|probe|\?\?|spec-of|body-of|' + r'quote|forever)$', word): + yield match.start(), Name.Exception, word + elif re.match( + r'(action\?|block\?|char\?|datatype\?|file\?|function\?|get-path\?|zero\?|' + r'get-word\?|integer\?|issue\?|lit-path\?|lit-word\?|logic\?|native\?|' + r'op\?|paren\?|path\?|refinement\?|set-path\?|set-word\?|string\?|unset\?|' + r'any-struct\?|none\?|word\?|any-series\?)$', word): + yield match.start(), Keyword, word + elif re.match(r'(JNICALL|stdcall|cdecl|infix)$', word): + yield match.start(), Keyword.Namespace, word + elif re.match("to-.*", word): + yield match.start(), Keyword, word + elif re.match('(\+|-|\*|/|//|\*\*|and|or|xor|=\?|=|==|===|<>|<|>|<=|>=|<<|>>|<<<|>>>|%|-\*\*)$', word): + yield match.start(), Operator, word + elif re.match(".*\!$", word): + yield match.start(), Keyword.Type, word + elif re.match("'.*", word): + yield match.start(), Name.Variable.Instance, word # lit-word + elif re.match("#.*", word): + yield match.start(), Name.Label, word # issue + elif re.match("%.*", word): + yield match.start(), Name.Decorator, word # file + elif re.match(":.*", word): + yield match.start(), Generic.Subheading, word # get-word + else: + yield match.start(), Name.Variable, word + + tokens = { + 'root': [ + (r'[^R]+', Comment), + (r'Red/System\s+\[', Generic.Strong, 'script'), + (r'Red\s+\[', Generic.Strong, 'script'), + (r'R', Comment) + ], + 'script': [ + (r'\s+', Text), + (r'#"', String.Char, 'char'), + (r'#{[0-9a-f\s]*}', Number.Hex), + (r'2#{', Number.Hex, 'bin2'), + (r'64#{[0-9a-z+/=\s]*}', Number.Hex), + (r'([0-9a-f]+)(h)((\s)|(?=[\[\]{}""\(\)]))', + bygroups(Number.Hex, Name.Variable, Whitespace)), + (r'"', String, 'string'), + (r'{', String, 'string2'), + (r';#+.*\n', Comment.Special), + (r';\*+.*\n', Comment.Preproc), + (r';.*\n', Comment), + (r'%"', Name.Decorator, 'stringFile'), + (r'%[^(\^{^")\s\[\]]+', Name.Decorator), + (r'[+-]?([a-z]{1,3})?\$\d+(\.\d+)?', Number.Float), # money + (r'[+-]?\d+\:\d+(\:\d+)?(\.\d+)?', String.Other), # time + (r'\d+[\-\/][0-9a-z]+[\-\/]\d+(\/\d+\:\d+((\:\d+)?' + r'([\.\d+]?([+-]?\d+:\d+)?)?)?)?', String.Other), # date + (r'\d+(\.\d+)+\.\d+', Keyword.Constant), # tuple + (r'\d+[xX]\d+', Keyword.Constant), # pair + (r'[+-]?\d+(\'\d+)?([\.,]\d*)?[eE][+-]?\d+', Number.Float), + (r'[+-]?\d+(\'\d+)?[\.,]\d*', Number.Float), + (r'[+-]?\d+(\'\d+)?', Number), + (r'[\[\]\(\)]', Generic.Strong), + (r'[a-z]+[^(\^{"\s:)]*://[^(\^{"\s)]*', Name.Decorator), # url + (r'mailto:[^(\^{"@\s)]+@[^(\^{"@\s)]+', Name.Decorator), # url + (r'[^(\^{"@\s)]+@[^(\^{"@\s)]+', Name.Decorator), # email + (r'comment\s"', Comment, 'commentString1'), + (r'comment\s{', Comment, 'commentString2'), + (r'comment\s\[', Comment, 'commentBlock'), + (r'comment\s[^(\s{\"\[]+', Comment), + (r'/[^(\^{^")\s/[\]]*', Name.Attribute), + (r'([^(\^{^")\s/[\]]+)(?=[:({"\s/\[\]])', word_callback), + (r'<[\w:.-]*>', Name.Tag), + (r'<[^(<>\s")]+', Name.Tag, 'tag'), + (r'([^(\^{^")\s]+)', Text), + ], + 'string': [ + (r'[^(\^")]+', String), + (escape_re, String.Escape), + (r'[\(|\)]+', String), + (r'\^.', String.Escape), + (r'"', String, '#pop'), + ], + 'string2': [ + (r'[^(\^{^})]+', String), + (escape_re, String.Escape), + (r'[\(|\)]+', String), + (r'\^.', String.Escape), + (r'{', String, '#push'), + (r'}', String, '#pop'), + ], + 'stringFile': [ + (r'[^(\^")]+', Name.Decorator), + (escape_re, Name.Decorator), + (r'\^.', Name.Decorator), + (r'"', Name.Decorator, '#pop'), + ], + 'char': [ + (escape_re + '"', String.Char, '#pop'), + (r'\^."', String.Char, '#pop'), + (r'."', String.Char, '#pop'), + ], + 'tag': [ + (escape_re, Name.Tag), + (r'"', Name.Tag, 'tagString'), + (r'[^(<>\r\n")]+', Name.Tag), + (r'>', Name.Tag, '#pop'), + ], + 'tagString': [ + (r'[^(\^")]+', Name.Tag), + (escape_re, Name.Tag), + (r'[\(|\)]+', Name.Tag), + (r'\^.', Name.Tag), + (r'"', Name.Tag, '#pop'), + ], + 'tuple': [ + (r'(\d+\.)+', Keyword.Constant), + (r'\d+', Keyword.Constant, '#pop'), + ], + 'bin2': [ + (r'\s+', Number.Hex), + (r'([0-1]\s*){8}', Number.Hex), + (r'}', Number.Hex, '#pop'), + ], + 'commentString1': [ + (r'[^(\^")]+', Comment), + (escape_re, Comment), + (r'[\(|\)]+', Comment), + (r'\^.', Comment), + (r'"', Comment, '#pop'), + ], + 'commentString2': [ + (r'[^(\^{^})]+', Comment), + (escape_re, Comment), + (r'[\(|\)]+', Comment), + (r'\^.', Comment), + (r'{', Comment, '#push'), + (r'}', Comment, '#pop'), + ], + 'commentBlock': [ + (r'\[', Comment, '#push'), + (r'\]', Comment, '#pop'), + (r'"', Comment, "commentString1"), + (r'{', Comment, "commentString2"), + (r'[^(\[\]\"{)]+', Comment), + ], + } diff --git a/pygments/lexers/misc/smalltalk.py b/pygments/lexers/misc/smalltalk.py new file mode 100644 index 00000000..6467a16d --- /dev/null +++ b/pygments/lexers/misc/smalltalk.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.smalltalk + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for Smalltalk and related languages. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, include, bygroups, default +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +__all__ = ['SmalltalkLexer', 'NewspeakLexer'] + + +class SmalltalkLexer(RegexLexer): + """ + For `Smalltalk <http://www.smalltalk.org/>`_ syntax. + Contributed by Stefan Matthias Aust. + Rewritten by Nils Winter. + + .. versionadded:: 0.10 + """ + name = 'Smalltalk' + filenames = ['*.st'] + aliases = ['smalltalk', 'squeak', 'st'] + mimetypes = ['text/x-smalltalk'] + + tokens = { + 'root': [ + (r'(<)(\w+:)(.*?)(>)', bygroups(Text, Keyword, Text, Text)), + include('squeak fileout'), + include('whitespaces'), + include('method definition'), + (r'(\|)([\w\s]*)(\|)', bygroups(Operator, Name.Variable, Operator)), + include('objects'), + (r'\^|\:=|\_', Operator), + # temporaries + (r'[\]({}.;!]', Text), + ], + 'method definition': [ + # Not perfect can't allow whitespaces at the beginning and the + # without breaking everything + (r'([a-zA-Z]+\w*:)(\s*)(\w+)', + bygroups(Name.Function, Text, Name.Variable)), + (r'^(\b[a-zA-Z]+\w*\b)(\s*)$', bygroups(Name.Function, Text)), + (r'^([-+*/\\~<>=|&!?,@%]+)(\s*)(\w+)(\s*)$', + bygroups(Name.Function, Text, Name.Variable, Text)), + ], + 'blockvariables': [ + include('whitespaces'), + (r'(:)(\s*)(\w+)', + bygroups(Operator, Text, Name.Variable)), + (r'\|', Operator, '#pop'), + default('#pop'), # else pop + ], + 'literals': [ + (r"'(''|[^'])*'", String, 'afterobject'), + (r'\$.', String.Char, 'afterobject'), + (r'#\(', String.Symbol, 'parenth'), + (r'\)', Text, 'afterobject'), + (r'(\d+r)?-?\d+(\.\d+)?(e-?\d+)?', Number, 'afterobject'), + ], + '_parenth_helper': [ + include('whitespaces'), + (r'(\d+r)?-?\d+(\.\d+)?(e-?\d+)?', Number), + (r'[-+*/\\~<>=|&#!?,@%\w:]+', String.Symbol), + # literals + (r"'(''|[^'])*'", String), + (r'\$.', String.Char), + (r'#*\(', String.Symbol, 'inner_parenth'), + ], + 'parenth': [ + # This state is a bit tricky since + # we can't just pop this state + (r'\)', String.Symbol, ('root', 'afterobject')), + include('_parenth_helper'), + ], + 'inner_parenth': [ + (r'\)', String.Symbol, '#pop'), + include('_parenth_helper'), + ], + 'whitespaces': [ + # skip whitespace and comments + (r'\s+', Text), + (r'"(""|[^"])*"', Comment), + ], + 'objects': [ + (r'\[', Text, 'blockvariables'), + (r'\]', Text, 'afterobject'), + (r'\b(self|super|true|false|nil|thisContext)\b', + Name.Builtin.Pseudo, 'afterobject'), + (r'\b[A-Z]\w*(?!:)\b', Name.Class, 'afterobject'), + (r'\b[a-z]\w*(?!:)\b', Name.Variable, 'afterobject'), + (r'#("(""|[^"])*"|[-+*/\\~<>=|&!?,@%]+|[\w:]+)', + String.Symbol, 'afterobject'), + include('literals'), + ], + 'afterobject': [ + (r'! !$', Keyword, '#pop'), # squeak chunk delimiter + include('whitespaces'), + (r'\b(ifTrue:|ifFalse:|whileTrue:|whileFalse:|timesRepeat:)', + Name.Builtin, '#pop'), + (r'\b(new\b(?!:))', Name.Builtin), + (r'\:=|\_', Operator, '#pop'), + (r'\b[a-zA-Z]+\w*:', Name.Function, '#pop'), + (r'\b[a-zA-Z]+\w*', Name.Function), + (r'\w+:?|[-+*/\\~<>=|&!?,@%]+', Name.Function, '#pop'), + (r'\.', Punctuation, '#pop'), + (r';', Punctuation), + (r'[\])}]', Text), + (r'[\[({]', Text, '#pop'), + ], + 'squeak fileout': [ + # Squeak fileout format (optional) + (r'^"(""|[^"])*"!', Keyword), + (r"^'(''|[^'])*'!", Keyword), + (r'^(!)(\w+)( commentStamp: )(.*?)( prior: .*?!\n)(.*?)(!)', + bygroups(Keyword, Name.Class, Keyword, String, Keyword, Text, Keyword)), + (r"^(!)(\w+(?: class)?)( methodsFor: )('(?:''|[^'])*')(.*?!)", + bygroups(Keyword, Name.Class, Keyword, String, Keyword)), + (r'^(\w+)( subclass: )(#\w+)' + r'(\s+instanceVariableNames: )(.*?)' + r'(\s+classVariableNames: )(.*?)' + r'(\s+poolDictionaries: )(.*?)' + r'(\s+category: )(.*?)(!)', + bygroups(Name.Class, Keyword, String.Symbol, Keyword, String, Keyword, + String, Keyword, String, Keyword, String, Keyword)), + (r'^(\w+(?: class)?)(\s+instanceVariableNames: )(.*?)(!)', + bygroups(Name.Class, Keyword, String, Keyword)), + (r'(!\n)(\].*)(! !)$', bygroups(Keyword, Text, Keyword)), + (r'! !$', Keyword), + ], + } + + +class NewspeakLexer(RegexLexer): + """ + For `Newspeak <http://newspeaklanguage.org/>` syntax. + + .. versionadded:: 1.1 + """ + name = 'Newspeak' + filenames = ['*.ns2'] + aliases = ['newspeak', ] + mimetypes = ['text/x-newspeak'] + + tokens = { + 'root': [ + (r'\b(Newsqueak2)\b', Keyword.Declaration), + (r"'[^']*'", String), + (r'\b(class)(\s+)(\w+)(\s*)', + bygroups(Keyword.Declaration, Text, Name.Class, Text)), + (r'\b(mixin|self|super|private|public|protected|nil|true|false)\b', + Keyword), + (r'(\w+\:)(\s*)([a-zA-Z_]\w+)', + bygroups(Name.Function, Text, Name.Variable)), + (r'(\w+)(\s*)(=)', + bygroups(Name.Attribute, Text, Operator)), + (r'<\w+>', Comment.Special), + include('expressionstat'), + include('whitespace') + ], + + 'expressionstat': [ + (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), + (r'\d+', Number.Integer), + (r':\w+', Name.Variable), + (r'(\w+)(::)', bygroups(Name.Variable, Operator)), + (r'\w+:', Name.Function), + (r'\w+', Name.Variable), + (r'\(|\)', Punctuation), + (r'\[|\]', Punctuation), + (r'\{|\}', Punctuation), + + (r'(\^|\+|\/|~|\*|<|>|=|@|%|\||&|\?|!|,|-|:)', Operator), + (r'\.|;', Punctuation), + include('whitespace'), + include('literals'), + ], + 'literals': [ + (r'\$.', String), + (r"'[^']*'", String), + (r"#'[^']*'", String.Symbol), + (r"#\w+:?", String.Symbol), + (r"#(\+|\/|~|\*|<|>|=|@|%|\||&|\?|!|,|-)+", String.Symbol) + ], + 'whitespace': [ + (r'\s+', Text), + (r'"[^"]*"', Comment) + ], + } diff --git a/pygments/lexers/misc/snobol.py b/pygments/lexers/misc/snobol.py new file mode 100644 index 00000000..97f614bd --- /dev/null +++ b/pygments/lexers/misc/snobol.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.snobol + ~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for the SNOBOL language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, bygroups +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +__all__ = ['SnobolLexer'] + + +class SnobolLexer(RegexLexer): + """ + Lexer for the SNOBOL4 programming language. + + Recognizes the common ASCII equivalents of the original SNOBOL4 operators. + Does not require spaces around binary operators. + + .. versionadded:: 1.5 + """ + + name = "Snobol" + aliases = ["snobol"] + filenames = ['*.snobol'] + mimetypes = ['text/x-snobol'] + + tokens = { + # root state, start of line + # comments, continuation lines, and directives start in column 1 + # as do labels + 'root': [ + (r'\*.*\n', Comment), + (r'[\+\.] ', Punctuation, 'statement'), + (r'-.*\n', Comment), + (r'END\s*\n', Name.Label, 'heredoc'), + (r'[A-Za-z\$][\w$]*', Name.Label, 'statement'), + (r'\s+', Text, 'statement'), + ], + # statement state, line after continuation or label + 'statement': [ + (r'\s*\n', Text, '#pop'), + (r'\s+', Text), + (r'(?<=[^\w.])(LT|LE|EQ|NE|GE|GT|INTEGER|IDENT|DIFFER|LGT|SIZE|' + r'REPLACE|TRIM|DUPL|REMDR|DATE|TIME|EVAL|APPLY|OPSYN|LOAD|UNLOAD|' + r'LEN|SPAN|BREAK|ANY|NOTANY|TAB|RTAB|REM|POS|RPOS|FAIL|FENCE|' + r'ABORT|ARB|ARBNO|BAL|SUCCEED|INPUT|OUTPUT|TERMINAL)(?=[^\w.])', + Name.Builtin), + (r'[A-Za-z][\w\.]*', Name), + # ASCII equivalents of original operators + # | for the EBCDIC equivalent, ! likewise + # \ for EBCDIC negation + (r'\*\*|[\?\$\.!%\*/#+\-@\|&\\=]', Operator), + (r'"[^"]*"', String), + (r"'[^']*'", String), + # Accept SPITBOL syntax for real numbers + # as well as Macro SNOBOL4 + (r'[0-9]+(?=[^\.EeDd])', Number.Integer), + (r'[0-9]+(\.[0-9]*)?([EDed][-+]?[0-9]+)?', Number.Float), + # Goto + (r':', Punctuation, 'goto'), + (r'[\(\)<>,;]', Punctuation), + ], + # Goto block + 'goto': [ + (r'\s*\n', Text, "#pop:2"), + (r'\s+', Text), + (r'F|S', Keyword), + (r'(\()([A-Za-z][\w.]*)(\))', + bygroups(Punctuation, Name.Label, Punctuation)) + ], + # everything after the END statement is basically one + # big heredoc. + 'heredoc': [ + (r'.*\n', String.Heredoc) + ] + } diff --git a/pygments/lexers/misc/tcl.py b/pygments/lexers/misc/tcl.py new file mode 100644 index 00000000..d276aa94 --- /dev/null +++ b/pygments/lexers/misc/tcl.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.tcl + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for Tcl and related languages. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, include, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number +from pygments.util import shebang_matches + +__all__ = ['TclLexer'] + + +class TclLexer(RegexLexer): + """ + For Tcl source code. + + .. versionadded:: 0.10 + """ + + keyword_cmds_re = words(( + 'after', 'apply', 'array', 'break', 'catch', 'continue', 'elseif', 'else', 'error', + 'eval', 'expr', 'for', 'foreach', 'global', 'if', 'namespace', 'proc', 'rename', 'return', + 'set', 'switch', 'then', 'trace', 'unset', 'update', 'uplevel', 'upvar', 'variable', + 'vwait', 'while'), prefix=r'\b', suffix=r'\b') + + builtin_cmds_re = words(( + 'append', 'bgerror', 'binary', 'cd', 'chan', 'clock', 'close', 'concat', 'dde', 'dict', + 'encoding', 'eof', 'exec', 'exit', 'fblocked', 'fconfigure', 'fcopy', 'file', + 'fileevent', 'flush', 'format', 'gets', 'glob', 'history', 'http', 'incr', 'info', 'interp', + 'join', 'lappend', 'lassign', 'lindex', 'linsert', 'list', 'llength', 'load', 'loadTk', + 'lrange', 'lrepeat', 'lreplace', 'lreverse', 'lsearch', 'lset', 'lsort', 'mathfunc', + 'mathop', 'memory', 'msgcat', 'open', 'package', 'pid', 'pkg::create', 'pkg_mkIndex', + 'platform', 'platform::shell', 'puts', 'pwd', 're_syntax', 'read', 'refchan', + 'regexp', 'registry', 'regsub', 'scan', 'seek', 'socket', 'source', 'split', 'string', + 'subst', 'tell', 'time', 'tm', 'unknown', 'unload'), prefix=r'\b', suffix=r'\b') + + name = 'Tcl' + aliases = ['tcl'] + filenames = ['*.tcl'] + mimetypes = ['text/x-tcl', 'text/x-script.tcl', 'application/x-tcl'] + + def _gen_command_rules(keyword_cmds_re, builtin_cmds_re, context=""): + return [ + (keyword_cmds_re, Keyword, 'params' + context), + (builtin_cmds_re, Name.Builtin, 'params' + context), + (r'([\w\.\-]+)', Name.Variable, 'params' + context), + (r'#', Comment, 'comment'), + ] + + tokens = { + 'root': [ + include('command'), + include('basic'), + include('data'), + (r'}', Keyword), # HACK: somehow we miscounted our braces + ], + 'command': _gen_command_rules(keyword_cmds_re, builtin_cmds_re), + 'command-in-brace': _gen_command_rules(keyword_cmds_re, + builtin_cmds_re, + "-in-brace"), + 'command-in-bracket': _gen_command_rules(keyword_cmds_re, + builtin_cmds_re, + "-in-bracket"), + 'command-in-paren': _gen_command_rules(keyword_cmds_re, + builtin_cmds_re, + "-in-paren"), + 'basic': [ + (r'\(', Keyword, 'paren'), + (r'\[', Keyword, 'bracket'), + (r'\{', Keyword, 'brace'), + (r'"', String.Double, 'string'), + (r'(eq|ne|in|ni)\b', Operator.Word), + (r'!=|==|<<|>>|<=|>=|&&|\|\||\*\*|[-+~!*/%<>&^|?:]', Operator), + ], + 'data': [ + (r'\s+', Text), + (r'0x[a-fA-F0-9]+', Number.Hex), + (r'0[0-7]+', Number.Oct), + (r'\d+\.\d+', Number.Float), + (r'\d+', Number.Integer), + (r'\$([\w\.\-\:]+)', Name.Variable), + (r'([\w\.\-\:]+)', Text), + ], + 'params': [ + (r';', Keyword, '#pop'), + (r'\n', Text, '#pop'), + (r'(else|elseif|then)\b', Keyword), + include('basic'), + include('data'), + ], + 'params-in-brace': [ + (r'}', Keyword, ('#pop', '#pop')), + include('params') + ], + 'params-in-paren': [ + (r'\)', Keyword, ('#pop', '#pop')), + include('params') + ], + 'params-in-bracket': [ + (r'\]', Keyword, ('#pop', '#pop')), + include('params') + ], + 'string': [ + (r'\[', String.Double, 'string-square'), + (r'(?s)(\\\\|\\[0-7]+|\\.|[^"\\])', String.Double), + (r'"', String.Double, '#pop') + ], + 'string-square': [ + (r'\[', String.Double, 'string-square'), + (r'(?s)(\\\\|\\[0-7]+|\\.|\\\n|[^\]\\])', String.Double), + (r'\]', String.Double, '#pop') + ], + 'brace': [ + (r'}', Keyword, '#pop'), + include('command-in-brace'), + include('basic'), + include('data'), + ], + 'paren': [ + (r'\)', Keyword, '#pop'), + include('command-in-paren'), + include('basic'), + include('data'), + ], + 'bracket': [ + (r'\]', Keyword, '#pop'), + include('command-in-bracket'), + include('basic'), + include('data'), + ], + 'comment': [ + (r'.*[^\\]\n', Comment, '#pop'), + (r'.*\\\n', Comment), + ], + } + + def analyse_text(text): + return shebang_matches(text, r'(tcl)') diff --git a/pygments/lexers/misc/urbi.py b/pygments/lexers/misc/urbi.py new file mode 100644 index 00000000..ad84d1db --- /dev/null +++ b/pygments/lexers/misc/urbi.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.urbi + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for UrbiScript language. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import ExtendedRegexLexer, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +__all__ = ['UrbiscriptLexer'] + + +class UrbiscriptLexer(ExtendedRegexLexer): + """ + For UrbiScript source code. + + .. versionadded:: 1.5 + """ + + name = 'UrbiScript' + aliases = ['urbiscript'] + filenames = ['*.u'] + mimetypes = ['application/x-urbiscript'] + + flags = re.DOTALL + + # TODO + # - handle Experimental and deprecated tags with specific tokens + # - handle Angles and Durations with specific tokens + + def blob_callback(lexer, match, ctx): + text_before_blob = match.group(1) + blob_start = match.group(2) + blob_size_str = match.group(3) + blob_size = int(blob_size_str) + yield match.start(), String, text_before_blob + ctx.pos += len(text_before_blob) + + # if blob size doesn't match blob format (example : "\B(2)(aaa)") + # yield blob as a string + if ctx.text[match.end() + blob_size] != ")": + result = "\\B(" + blob_size_str + ")(" + yield match.start(), String, result + ctx.pos += len(result) + return + + # if blob is well formated, yield as Escape + blob_text = blob_start + ctx.text[match.end():match.end()+blob_size] + ")" + yield match.start(), String.Escape, blob_text + ctx.pos = match.end() + blob_size + 1 # +1 is the ending ")" + + tokens = { + 'root': [ + (r'\s+', Text), + # comments + (r'//.*?\n', Comment), + (r'/\*', Comment.Multiline, 'comment'), + (r'(every|for|loop|while)(?:;|&|\||,)', Keyword), + (words(( + 'assert', 'at', 'break', 'case', 'catch', 'closure', 'compl', + 'continue', 'default', 'else', 'enum', 'every', 'external', + 'finally', 'for', 'freezeif', 'if', 'new', 'onleave', 'return', + 'stopif', 'switch', 'this', 'throw', 'timeout', 'try', + 'waituntil', 'whenever', 'while'), suffix=r'\b'), + Keyword), + (words(( + 'asm', 'auto', 'bool', 'char', 'const_cast', 'delete', 'double', + 'dynamic_cast', 'explicit', 'export', 'extern', 'float', 'friend', + 'goto', 'inline', 'int', 'long', 'mutable', 'namespace', 'register', + 'reinterpret_cast', 'short', 'signed', 'sizeof', 'static_cast', + 'struct', 'template', 'typedef', 'typeid', 'typename', 'union', + 'unsigned', 'using', 'virtual', 'volatile', 'wchar_t'), suffix=r'\b'), + Keyword.Reserved), + # deprecated keywords, use a meaningfull token when available + (r'(emit|foreach|internal|loopn|static)\b', Keyword), + # ignored keywords, use a meaningfull token when available + (r'(private|protected|public)\b', Keyword), + (r'(var|do|const|function|class)\b', Keyword.Declaration), + (r'(true|false|nil|void)\b', Keyword.Constant), + (words(( + 'Barrier', 'Binary', 'Boolean', 'CallMessage', 'Channel', 'Code', + 'Comparable', 'Container', 'Control', 'Date', 'Dictionary', 'Directory', + 'Duration', 'Enumeration', 'Event', 'Exception', 'Executable', 'File', + 'Finalizable', 'Float', 'FormatInfo', 'Formatter', 'Global', 'Group', + 'Hash', 'InputStream', 'IoService', 'Job', 'Kernel', 'Lazy', 'List', + 'Loadable', 'Lobby', 'Location', 'Logger', 'Math', 'Mutex', 'nil', + 'Object', 'Orderable', 'OutputStream', 'Pair', 'Path', 'Pattern', + 'Position', 'Primitive', 'Process', 'Profile', 'PseudoLazy', 'PubSub', + 'RangeIterable', 'Regexp', 'Semaphore', 'Server', 'Singleton', 'Socket', + 'StackFrame', 'Stream', 'String', 'System', 'Tag', 'Timeout', + 'Traceable', 'TrajectoryGenerator', 'Triplet', 'Tuple', 'UObject', + 'UValue', 'UVar'), suffix=r'\b'), + Name.Builtin), + (r'(?:this)\b', Name.Builtin.Pseudo), + # don't match single | and & + (r'(?:[-=+*%/<>~^:]+|\.&?|\|\||&&)', Operator), + (r'(?:and_eq|and|bitand|bitor|in|not|not_eq|or_eq|or|xor_eq|xor)\b', + Operator.Word), + (r'[{}\[\]()]+', Punctuation), + (r'(?:;|\||,|&|\?|!)+', Punctuation), + (r'[$a-zA-Z_]\w*', Name.Other), + (r'0x[0-9a-fA-F]+', Number.Hex), + # Float, Integer, Angle and Duration + (r'(?:[0-9]+(?:(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)?' + r'((?:rad|deg|grad)|(?:ms|s|min|h|d))?)\b', Number.Float), + # handle binary blob in strings + (r'"', String.Double, "string.double"), + (r"'", String.Single, "string.single"), + ], + 'string.double': [ + (r'((?:\\\\|\\"|[^"])*?)(\\B\((\d+)\)\()', blob_callback), + (r'(\\\\|\\"|[^"])*?"', String.Double, '#pop'), + ], + 'string.single': [ + (r"((?:\\\\|\\'|[^'])*?)(\\B\((\d+)\)\()", blob_callback), + (r"(\\\\|\\'|[^'])*?'", String.Single, '#pop'), + ], + # from http://pygments.org/docs/lexerdevelopment/#changing-states + 'comment': [ + (r'[^*/]', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline), + ] + } |