diff options
author | Thomas Aglassinger <roskakori@users.sourceforge.net> | 2013-05-10 01:02:34 +0200 |
---|---|---|
committer | Thomas Aglassinger <roskakori@users.sourceforge.net> | 2013-05-10 01:02:34 +0200 |
commit | c8c931a3908eff4467eca2bcc46e8bd828e94bd5 (patch) | |
tree | 8e61233af8a57adfac6e52c3a6b415c94f184f0c | |
parent | 3ea722b4b51d7136a489dc58c2f9add443b7c697 (diff) | |
download | pygments-c8c931a3908eff4467eca2bcc46e8bd828e94bd5.tar.gz |
Added lexer for Rexx.
-rw-r--r-- | AUTHORS | 3 | ||||
-rw-r--r-- | pygments/lexers/_mapping.py | 1 | ||||
-rw-r--r-- | pygments/lexers/other.py | 158 | ||||
-rw-r--r-- | tests/examplefiles/example.rexx | 39 | ||||
-rw-r--r-- | tests/test_lexers_other.py | 68 |
5 files changed, 259 insertions, 10 deletions
@@ -6,8 +6,9 @@ Major developers are Tim Hatch <tim@timhatch.com> and Armin Ronacher Other contributors, listed alphabetically, are: * Sam Aaron -- Ioke lexer -* Kumar Appaiah -- Debian control lexer * Ali Afshar -- image formatter +* Thomas Aglassinger -- Rexx lexer +* Kumar Appaiah -- Debian control lexer * Andreas Amann -- AppleScript lexer * Timothy Armstrong -- Dart lexer fixes * Jeffrey Arnold -- R/S, Rd, BUGS, Jags, and Stan lexers diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index c881209f..20830941 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -247,6 +247,7 @@ LEXERS = { 'RebolLexer': ('pygments.lexers.other', 'REBOL', ('rebol',), ('*.r', '*.r3'), ('text/x-rebol',)), 'RedcodeLexer': ('pygments.lexers.other', 'Redcode', ('redcode',), ('*.cw',), ()), 'RegeditLexer': ('pygments.lexers.text', 'reg', ('registry',), ('*.reg',), ('text/x-windows-registry',)), + 'RexxLexer': ('pygments.lexers.other', 'REXX', ('rexx', 'ARexx', 'arexx'), ('*.rexx', '*.rex', '*.rx', '*.arexx'), ('text/x-rexx',)), 'RhtmlLexer': ('pygments.lexers.templates', 'RHTML', ('rhtml', 'html+erb', 'html+ruby'), ('*.rhtml',), ('text/html+ruby',)), 'RobotFrameworkLexer': ('pygments.lexers.other', 'RobotFramework', ('RobotFramework', 'robotframework'), ('*.txt', '*.robot'), ('text/x-robotframework',)), 'RstLexer': ('pygments.lexers.text', 'reStructuredText', ('rst', 'rest', 'restructuredtext'), ('*.rst', '*.rest'), ('text/x-rst', 'text/prs.fallenstein.rst')), diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index 803212b0..c13840af 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -14,7 +14,8 @@ import re from pygments.lexer import RegexLexer, include, bygroups, using, \ this, combined, ExtendedRegexLexer from pygments.token import Error, Punctuation, Literal, Token, \ - Text, Comment, Operator, Keyword, Name, String, Number, Generic + Text, Comment, Operator, Keyword, Name, String, Number, Generic, Other, \ + Whitespace from pygments.util import get_bool_opt from pygments.lexers.web import HtmlLexer @@ -35,7 +36,7 @@ __all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'MOOCodeLexer', 'ECLLexer', 'UrbiscriptLexer', 'OpenEdgeLexer', 'BroLexer', 'MscgenLexer', 'KconfigLexer', 'VGLLexer', 'SourcePawnLexer', 'RobotFrameworkLexer', 'PuppetLexer', 'NSISLexer', 'RPMSpecLexer', - 'CbmBasicV2Lexer', 'AutoItLexer'] + 'CbmBasicV2Lexer', 'AutoItLexer', 'RexxLexer'] class ECLLexer(RegexLexer): @@ -363,7 +364,7 @@ class SmalltalkLexer(RegexLexer): include('literals'), ], 'afterobject' : [ - (r'! !$', Keyword , '#pop'), # squeak chunk delimeter + (r'! !$', Keyword , '#pop'), # squeak chunk delimiter include('whitespaces'), (r'\b(ifTrue:|ifFalse:|whileTrue:|whileFalse:|timesRepeat:)', Name.Builtin, '#pop'), @@ -1961,11 +1962,11 @@ class AsymptoteLexer(RegexLexer): from pygments.lexers._asybuiltins import ASYFUNCNAME, ASYVARNAME for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): - if token is Name and value in ASYFUNCNAME: - token = Name.Function - elif token is Name and value in ASYVARNAME: - token = Name.Variable - yield index, token, value + if token is Name and value in ASYFUNCNAME: + token = Name.Function + elif token is Name and value in ASYVARNAME: + token = Name.Variable + yield index, token, value class PostScriptLexer(RegexLexer): @@ -3624,7 +3625,7 @@ class AutoItLexer(RegexLexer): (r'[a-zA-Z_#@$][a-zA-Z0-9_#@$]*', Name), (r'\\|\'', Text), (r'\`([\,\%\`abfnrtv\-\+;])', String.Escape), - (r'_\n', Text), # Line continuation + (r'_\n', Text), # Line continuation include('garbage'), ], 'commands': [ @@ -3665,3 +3666,142 @@ class AutoItLexer(RegexLexer): (r'[^\S\n]', Text), ], } + + +class RexxLexer(RegexLexer): + """ + `Rexx <http://www.rexxinfo.org/>`_ is a scripting language available for + a wide range of different platforms with its roots found on mainframe + systems. It is popular for I/O- and data based tasks and can act as glue + language to bind different applications together. + + *New in Pygments 1.7.* + """ + name = 'REXX' + aliases = ['rexx', 'ARexx', 'arexx'] + filenames = ['*.rexx', '*.rex', '*.rx', '*.arexx'] + mimetypes = ['text/x-rexx'] + flags = re.IGNORECASE + + tokens = { + 'root': [ + (r'\s', Whitespace), + (r'/\*', Comment.Multiline, 'comment'), + (r'"', String, 'string_double'), + (r"'", String, 'string_single'), + (r'[0-9]+(\.[0-9]+)?(e[+-]?[0-9])?', Number), + (r'([a-z_][a-z0-9_]*)(\s*)(:)(\s*)(procedure)\b', + bygroups(Name.Function, Whitespace, Operator, Whitespace, Keyword.Declaration)), + (r'([a-z_][a-z0-9_]*)(\s*)(:)', + bygroups(Name.Label, Whitespace, Operator)), + include('function'), + include('keyword'), + include('operator'), + (r'[a-z_][a-z0-9_]*', Text), + ], + 'function': [ + (r'(abbrev|abs|address|arg|b2x|bitand|bitor|bitxor|c2d|c2x|' + r'center|charin|charout|chars|compare|condition|copies|d2c|' + r'd2x|datatype|date|delstr|delword|digits|errortext|form|' + r'format|fuzz|insert|lastpos|left|length|linein|lineout|lines|' + r'max|min|overlay|pos|queued|random|reverse|right|sign|' + r'sourceline|space|stream|strip|substr|subword|symbol|time|' + r'trace|translate|trunc|value|verify|word|wordindex|' + r'wordlength|wordpos|words|x2b|x2c|x2d|xrange)(\s*)([(])', + bygroups(Name.Builtin, Whitespace, Operator)), + ], + 'keyword': [ + (r'(address|arg|by|call|do|drop|else|end|exit|for|forever|if|' + r'interpret|iterate|leave|nop|numeric|off|on|options|parse|' + r'pull|push|queue|return|say|select|signal|to|then|trace|until|' + r'while)\b', Keyword.Reserved), + ], + 'operator': [ + (ur'(-|//|/|\(|\)|\*\*|\*|\\<<|\\<|\\==|\\=|\\>>|\\>|\\|\|\||\||' + ur'&&|&|%|\+|<<=|<<|<=|<>|<|==|=|><|>=|>>=|>>|>|¬<<|¬<|¬==|¬=|' + ur'¬>>|¬>|¬|\.|,)', Operator), + ], + 'string_double': [ + (r'[^"\n]', String), + (r'""', String), + (r'"', String, '#pop'), + (r'', Text, '#pop'), # Linefeed also terminates strings. + ], + 'string_single': [ + (r'[^\'\n]', String), + (r'\'\'', String), + (r'\'', String, '#pop'), + (r'', Text, '#pop'), # Linefeed also terminates strings. + ], + 'comment': [ + (r'\*/', Comment.Multiline, '#pop'), + (r'(.|\n)', Comment.Multiline), + ] + } + + _ADDRESS_COMMAND_REGEX = re.compile(r'\s*address\s+command\b', re.IGNORECASE) + _ADDRESS_REGEX = re.compile(r'\s*address\s+', re.IGNORECASE) + _DO_WHILE_REGEX = re.compile(r'\s*do\s+while\b', re.IGNORECASE) + _IF_THEN_DO_REGEX = re.compile(r'\s*if\b.+\bthen\s+do\s*$', re.IGNORECASE) + _PROCEDURE_REGEX = re.compile(r'([a-z_][a-z0-9_]*)(\s*)(:)(\s*)(procedure)\b', re.IGNORECASE) + _ELSE_DO_REGEX = re.compile(r'\s*else\s+do\s*$', re.IGNORECASE) + _PARSE_ARG_REGEX = re.compile(r'\s*parse\s+(upper\s+)?(arg|value)\b', re.IGNORECASE) + _REGEXS = [ + _ADDRESS_COMMAND_REGEX, + _ADDRESS_REGEX, + _DO_WHILE_REGEX, + _ELSE_DO_REGEX, + _IF_THEN_DO_REGEX, + _PROCEDURE_REGEX, + _PARSE_ARG_REGEX, + ] + + def analyse_text(text): + """ + Check for inital comment. + """ + result = 0.0 + if re.search(r'/\*\**\s*rexx', text, re.IGNORECASE): + # Header matches MVS Rexx requirements, this is certainly a Rexx + # script. + result = 1.0 + elif text.startswith('/*'): + # Header matches general Rexx requirements; the source code might + # still be any language using C comments such as C++, C# or Java. + result = 0.01 + + # Check if lines match certain regular expressions and + # collect the respective counts in a dictionary. + regexCount = len(RexxLexer._REGEXS) + regexToCountMap = {} + for regex in RexxLexer._REGEXS: + regexToCountMap[regex] = 0 + for line in (text.split('\n'))[1:]: + regexIndex = 0 + lineHasAnyRegex = False + while not lineHasAnyRegex and (regexIndex < regexCount): + regexToCheck = RexxLexer._REGEXS[regexIndex] + if regexToCheck.match(line) is not None: + regexToCountMap[regexToCheck] = \ + regexToCountMap[regexToCheck] + 1 + lineHasAnyRegex = True + else: + regexIndex += 1 + # Evaluate the findings. + if regexToCountMap[RexxLexer._PROCEDURE_REGEX] > 0: + result += 0.5 + elif regexToCountMap[RexxLexer._ADDRESS_COMMAND_REGEX] > 0: + result += 0.2 + elif regexToCountMap[RexxLexer._ADDRESS_REGEX] > 0: + result += 0.05 + if regexToCountMap[RexxLexer._DO_WHILE_REGEX] > 0: + result += 0.1 + if regexToCountMap[RexxLexer._ELSE_DO_REGEX] > 0: + result += 0.1 + if regexToCountMap[RexxLexer._PARSE_ARG_REGEX] > 0: + result += 0.2 + if regexToCountMap[RexxLexer._IF_THEN_DO_REGEX] > 0: + result += 0.1 + result = min(result, 1.0) + assert 0.0 <= result <= 1.0 + return result diff --git a/tests/examplefiles/example.rexx b/tests/examplefiles/example.rexx new file mode 100644 index 00000000..07b7bacf --- /dev/null +++ b/tests/examplefiles/example.rexx @@ -0,0 +1,39 @@ +/* REXX example. */ + +/* Some basic constructs. */ +almost_pi = 0.1415 + 3 +if almost_pi < 3 then + say 'huh?' +else do + say 'almost_pi=' almost_pi || " - ok" +end +x = '"' || "'" || '''' || """" /* quotes */ + +/* A comment + * spawning multiple + lines. /* / */ + +/* Built-in functions. */ +line = 'line containing some short text' +say WordPos(line, 'some') +say Word(line, 4) + +/* Labels and procedures. */ +some_label : + +divide: procedure + parse arg some other + return some / other + +call divide(5, 2) + +/* Print a text file on MVS. */ +ADDRESS TSO +"ALLOC F(TEXTFILE) DSN('some.text.dsn') SHR REU" +"EXECIO * DISKR TEXTFILE ( FINIS STEM LINES." +"FREE F(TEXTFILE)" +I = 1 +DO WHILE I <= LINES.0 + SAY ' LINE ' I ' : ' LINES.I + I = I + 1 +END diff --git a/tests/test_lexers_other.py b/tests/test_lexers_other.py new file mode 100644 index 00000000..987887bc --- /dev/null +++ b/tests/test_lexers_other.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +""" + Tests for other lexers + ~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" +import glob +import os +import unittest + +from pygments.lexers import guess_lexer +from pygments.lexers.other import RexxLexer + + +def _exampleFilePath(filename): + return os.path.join(os.path.dirname(__file__), 'examplefiles', filename) + + +class _AnalyseTextTest(unittest.TestCase): + def setUp(self): + raise NotImplementedError('self.lexer must be set') + + def testCanRecognizeAndGuessExampleFiles(self): + for pattern in self.lexer.filenames: + exampleFilesPattern = _exampleFilePath(pattern) + for exampleFilePath in glob.glob(exampleFilesPattern): + exampleFile = open(exampleFilePath, 'rb') + try: + text = exampleFile.read() + probability = self.lexer.analyse_text(text) + self.assertTrue(probability > 0, + '%s must recognize %r' % ( + self.lexer.name, exampleFilePath)) + guessedLexer = guess_lexer(text) + self.assertEqual(guessedLexer.name, self.lexer.name) + finally: + exampleFile.close() + + +class RexxLexerTest(_AnalyseTextTest): + def setUp(self): + self.lexer = RexxLexer() + + def testCanGuessFromText(self): + self.assertAlmostEqual(0.01, + self.lexer.analyse_text('/* */')) + self.assertAlmostEqual(1.0, + self.lexer.analyse_text('''/* Rexx */ + say "hello world"''')) + self.assertLess(0.5, + self.lexer.analyse_text('/* */\n' \ + + 'hello:pRoceduRe\n' \ + + ' say "hello world"')) + self.assertLess(0.2, + self.lexer.analyse_text('''/* */ + if 1 > 0 then do + say "ok" + end + else do + say "huh?" + end''')) + self.assertLess(0.2, + self.lexer.analyse_text('''/* */ + greeting = "hello world!" + parse value greeting "hello" name "!" + say name''')) |