diff options
Diffstat (limited to 'pygments/lexers/other.py')
-rw-r--r-- | pygments/lexers/other.py | 153 |
1 files changed, 132 insertions, 21 deletions
diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index c8557922..10598fb4 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -14,7 +14,8 @@ import re from pygments.lexer import RegexLexer, include, bygroups, using, \ this, combined, ExtendedRegexLexer from pygments.token import Error, Punctuation, Literal, Token, \ - Text, Comment, Operator, Keyword, Name, String, Number, Generic + Text, Comment, Operator, Keyword, Name, String, Number, Generic, \ + Whitespace from pygments.util import get_bool_opt from pygments.lexers.web import HtmlLexer @@ -35,7 +36,7 @@ __all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'MOOCodeLexer', 'ECLLexer', 'UrbiscriptLexer', 'OpenEdgeLexer', 'BroLexer', 'MscgenLexer', 'KconfigLexer', 'VGLLexer', 'SourcePawnLexer', 'RobotFrameworkLexer', 'PuppetLexer', 'NSISLexer', 'RPMSpecLexer', - 'CbmBasicV2Lexer', 'AutoItLexer'] + 'CbmBasicV2Lexer', 'AutoItLexer', 'RexxLexer'] class ECLLexer(RegexLexer): @@ -245,7 +246,7 @@ class MOOCodeLexer(RegexLexer): """ name = 'MOOCode' filenames = ['*.moo'] - aliases = ['moocode'] + aliases = ['moocode', 'moo'] mimetypes = ['text/x-moocode'] tokens = { @@ -289,7 +290,7 @@ class SmalltalkLexer(RegexLexer): """ name = 'Smalltalk' filenames = ['*.st'] - aliases = ['smalltalk', 'squeak'] + aliases = ['smalltalk', 'squeak', 'st'] mimetypes = ['text/x-smalltalk'] tokens = { @@ -363,7 +364,7 @@ class SmalltalkLexer(RegexLexer): include('literals'), ], 'afterobject' : [ - (r'! !$', Keyword , '#pop'), # squeak chunk delimeter + (r'! !$', Keyword , '#pop'), # squeak chunk delimiter include('whitespaces'), (r'\b(ifTrue:|ifFalse:|whileTrue:|whileFalse:|timesRepeat:)', Name.Builtin, '#pop'), @@ -1397,8 +1398,6 @@ class RebolLexer(RegexLexer): (r';.*\n', Comment), (r'%"', Name.Decorator, 'stringFile'), (r'%[^(\^{^")\s\[\]]+', Name.Decorator), - (r'<[a-zA-Z0-9:._-]*>', Name.Tag), - (r'<[^(<>\s")]+', Name.Tag, 'tag'), (r'[+-]?([a-zA-Z]{1,3})?\$\d+(\.\d+)?', Number.Float), # money (r'[+-]?\d+\:\d+(\:\d+)?(\.\d+)?', String.Other), # time (r'\d+\-[0-9a-zA-Z]+\-\d+(\/\d+\:\d+(\:\d+)?' @@ -1415,6 +1414,8 @@ class RebolLexer(RegexLexer): (r'comment\s', Comment, 'comment'), (r'/[^(\^{^")\s/[\]]*', Name.Attribute), (r'([^(\^{^")\s/[\]]+)(?=[:({"\s/\[\]])', word_callback), + (r'<[a-zA-Z0-9:._-]*>', Name.Tag), + (r'<[^(<>\s")]+', Name.Tag, 'tag'), (r'([^(\^{^")\s]+)', Text), ], 'string': [ @@ -1961,11 +1962,11 @@ class AsymptoteLexer(RegexLexer): from pygments.lexers._asybuiltins import ASYFUNCNAME, ASYVARNAME for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): - if token is Name and value in ASYFUNCNAME: - token = Name.Function - elif token is Name and value in ASYVARNAME: - token = Name.Variable - yield index, token, value + if token is Name and value in ASYFUNCNAME: + token = Name.Function + elif token is Name and value in ASYVARNAME: + token = Name.Variable + yield index, token, value class PostScriptLexer(RegexLexer): @@ -1979,7 +1980,7 @@ class PostScriptLexer(RegexLexer): *New in Pygments 1.4.* """ name = 'PostScript' - aliases = ['postscript'] + aliases = ['postscript', 'postscr'] filenames = ['*.ps', '*.eps'] mimetypes = ['application/postscript'] @@ -2067,7 +2068,7 @@ class AutohotkeyLexer(RegexLexer): *New in Pygments 1.4.* """ name = 'autohotkey' - aliases = ['ahk'] + aliases = ['ahk', 'autohotkey'] filenames = ['*.ahk', '*.ahkl'] mimetypes = ['text/x-autohotkey'] @@ -2352,7 +2353,7 @@ class ProtoBufLexer(RegexLexer): """ name = 'Protocol Buffer' - aliases = ['protobuf'] + aliases = ['protobuf', 'proto'] filenames = ['*.proto'] tokens = { @@ -2839,8 +2840,8 @@ class BroLexer(RegexLexer): (r'\\\n', Text), # Keywords (r'(add|alarm|break|case|const|continue|delete|do|else|enum|event' - r'|export|for|function|if|global|local|module|next' - r'|of|print|redef|return|schedule|type|when|while)\b', Keyword), + r'|export|for|function|if|global|hook|local|module|next' + r'|of|print|redef|return|schedule|switch|type|when|while)\b', Keyword), (r'(addr|any|bool|count|counter|double|file|int|interval|net' r'|pattern|port|record|set|string|subnet|table|time|timer' r'|vector)\b', Keyword.Type), @@ -3306,7 +3307,7 @@ class NSISLexer(RegexLexer): tokens = { 'root': [ (r'[;\#].*\n', Comment), - (r"'.*'", String.Single), + (r"'.*?'", String.Single), (r'"', String.Double, 'str_double'), (r'`', String.Backtick, 'str_backtick'), include('macro'), @@ -3457,7 +3458,7 @@ class RPMSpecLexer(RegexLexer): include('macro'), (r'(?i)^(Name|Version|Release|Epoch|Summary|Group|License|Packager|' r'Vendor|Icon|URL|Distribution|Prefix|Patch[0-9]*|Source[0-9]*|' - r'Requires\(?[a-z]*\)?|[a-z]+Req|Obsoletes|Provides|Conflicts|' + r'Requires\(?[a-z]*\)?|[a-z]+Req|Obsoletes|Suggests|Provides|Conflicts|' r'Build[a-z]+|[a-z]+Arch|Auto[a-z]+)(:)(.*)$', bygroups(Generic.Heading, Punctuation, using(this))), (r'^%description', Name.Decorator, 'description'), @@ -3467,7 +3468,7 @@ class RPMSpecLexer(RegexLexer): r'make(?:install)|ghost|patch[0-9]+|find_lang|exclude|verify)', Keyword), include('interpol'), - (r"'.*'", String.Single), + (r"'.*?'", String.Single), (r'"', String.Double, 'string'), (r'.', Text), ], @@ -3624,7 +3625,7 @@ class AutoItLexer(RegexLexer): (r'[a-zA-Z_#@$][a-zA-Z0-9_#@$]*', Name), (r'\\|\'', Text), (r'\`([\,\%\`abfnrtv\-\+;])', String.Escape), - (r'_\n', Text), # Line continuation + (r'_\n', Text), # Line continuation include('garbage'), ], 'commands': [ @@ -3665,3 +3666,113 @@ class AutoItLexer(RegexLexer): (r'[^\S\n]', Text), ], } + + +class RexxLexer(RegexLexer): + """ + `Rexx <http://www.rexxinfo.org/>`_ is a scripting language available for + a wide range of different platforms with its roots found on mainframe + systems. It is popular for I/O- and data based tasks and can act as glue + language to bind different applications together. + + *New in Pygments 1.7.* + """ + name = 'Rexx' + aliases = ['rexx', 'ARexx', 'arexx'] + filenames = ['*.rexx', '*.rex', '*.rx', '*.arexx'] + mimetypes = ['text/x-rexx'] + flags = re.IGNORECASE + + tokens = { + 'root': [ + (r'\s', Whitespace), + (r'/\*', Comment.Multiline, 'comment'), + (r'"', String, 'string_double'), + (r"'", String, 'string_single'), + (r'[0-9]+(\.[0-9]+)?(e[+-]?[0-9])?', Number), + (r'([a-z_][a-z0-9_]*)(\s*)(:)(\s*)(procedure)\b', + bygroups(Name.Function, Whitespace, Operator, Whitespace, + Keyword.Declaration)), + (r'([a-z_][a-z0-9_]*)(\s*)(:)', + bygroups(Name.Label, Whitespace, Operator)), + include('function'), + include('keyword'), + include('operator'), + (r'[a-z_][a-z0-9_]*', Text), + ], + 'function': [ + (r'(abbrev|abs|address|arg|b2x|bitand|bitor|bitxor|c2d|c2x|' + r'center|charin|charout|chars|compare|condition|copies|d2c|' + r'd2x|datatype|date|delstr|delword|digits|errortext|form|' + r'format|fuzz|insert|lastpos|left|length|linein|lineout|lines|' + r'max|min|overlay|pos|queued|random|reverse|right|sign|' + r'sourceline|space|stream|strip|substr|subword|symbol|time|' + r'trace|translate|trunc|value|verify|word|wordindex|' + r'wordlength|wordpos|words|x2b|x2c|x2d|xrange)(\s*)(\()', + bygroups(Name.Builtin, Whitespace, Operator)), + ], + 'keyword': [ + (r'(address|arg|by|call|do|drop|else|end|exit|for|forever|if|' + r'interpret|iterate|leave|nop|numeric|off|on|options|parse|' + r'pull|push|queue|return|say|select|signal|to|then|trace|until|' + r'while)\b', Keyword.Reserved), + ], + 'operator': [ + (ur'(-|//|/|\(|\)|\*\*|\*|\\<<|\\<|\\==|\\=|\\>>|\\>|\\|\|\||\||' + ur'&&|&|%|\+|<<=|<<|<=|<>|<|==|=|><|>=|>>=|>>|>|¬<<|¬<|¬==|¬=|' + ur'¬>>|¬>|¬|\.|,)', Operator), + ], + 'string_double': [ + (r'[^"\n]+', String), + (r'""', String), + (r'"', String, '#pop'), + (r'\n', Text, '#pop'), # Stray linefeed also terminates strings. + ], + 'string_single': [ + (r'[^\'\n]', String), + (r'\'\'', String), + (r'\'', String, '#pop'), + (r'\n', Text, '#pop'), # Stray linefeed also terminates strings. + ], + 'comment': [ + (r'[^*]+', Comment.Multiline), + (r'\*/', Comment.Multiline, '#pop'), + (r'\*', Comment.Multiline), + ] + } + + _c = lambda s: re.compile(s, re.MULTILINE) + _ADDRESS_COMMAND_PATTERN = _c(r'^\s*address\s+command\b') + _ADDRESS_PATTERN = _c(r'^\s*address\s+') + _DO_WHILE_PATTERN = _c(r'^\s*do\s+while\b') + _IF_THEN_DO_PATTERN = _c(r'^\s*if\b.+\bthen\s+do\s*$') + _PROCEDURE_PATTERN = _c(r'^\s*([a-z_][a-z0-9_]*)(\s*)(:)(\s*)(procedure)\b') + _ELSE_DO_PATTERN = _c(r'\belse\s+do\s*$') + _PARSE_ARG_PATTERN = _c(r'^\s*parse\s+(upper\s+)?(arg|value)\b') + PATTERNS_AND_WEIGHTS = ( + (_ADDRESS_COMMAND_PATTERN, 0.2), + (_ADDRESS_PATTERN, 0.05), + (_DO_WHILE_PATTERN, 0.1), + (_ELSE_DO_PATTERN, 0.1), + (_IF_THEN_DO_PATTERN, 0.1), + (_PROCEDURE_PATTERN, 0.5), + (_PARSE_ARG_PATTERN, 0.2), + ) + + def analyse_text(text): + """ + Check for inital comment and patterns that distinguish Rexx from other + C-like languages. + """ + if re.search(r'/\*\**\s*rexx', text, re.IGNORECASE): + # Header matches MVS Rexx requirements, this is certainly a Rexx + # script. + return 1.0 + elif text.startswith('/*'): + # Header matches general Rexx requirements; the source code might + # still be any language using C comments such as C++, C# or Java. + lowerText = text.lower() + result = sum(weight + for (pattern, weight) in RexxLexer.PATTERNS_AND_WEIGHTS + if pattern.search(lowerText)) + 0.01 + return min(result, 1.0) |