Improved lexers for Easytrieve and JCL

* Added highlighting names of Easytrive file, macro, procedure and report declarations. * Added missing Easytrive keyword "MACRO". * Cleaned up JCL lexer stack by changing a few named target states to '#pop'. There are still several named states left though.
author: Thomas Aglassinger <roskakori@users.sourceforge.net> 2013-06-02 14:37:59 +0200
committer: Thomas Aglassinger <roskakori@users.sourceforge.net> 2013-06-02 14:37:59 +0200
commit: b3cbb1e20b74269deeb146513b6de353e5583a67 (patch)
tree: ab2797f515131fb5bbee9aeb2fa036e7ac404ead
parent: 26e8af5761aab8daeb85f287d7abf2fa42cb43e1 (diff)
download: pygments-b3cbb1e20b74269deeb146513b6de353e5583a67.tar.gz
3 files changed, 87 insertions, 45 deletions
diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py
index 72c6bc43..6c094c7e 100644
--- a/pygments/lexers/other.py
+++ b/pygments/lexers/other.py
@@ -3720,21 +3720,21 @@ class EasytrieveLexer(RegexLexer):
         'JUSTIFY', 'KANJI-DATE', 'KANJI-DATE-LONG', 'KANJI-TIME', 'KEY',
         'KEY-PRESSED', 'KOKUGO', 'KUN', 'LAST-DUP', 'LE', 'LEVEL', 'LIKE',
         'LINE', 'LINE-COUNT', 'LINE-NUMBER', 'LINK', 'LIST', 'LOW-VALUES',
-        'LQ', 'LS', 'LT', 'MASK', 'MATCHED', 'MEND', 'MESSAGE', 'MOVE',
-        'MSTART', 'NE', 'NEWPAGE', 'NOMASK', 'NOPRINT', 'NOT', 'NOTE',
-        'NOVERIFY', 'NQ', 'NULL', 'OF', 'OR', 'OTHERWISE', 'PA1', 'PA2',
-        'PA3', 'PAGE-COUNT', 'PAGE-NUMBER', 'PARM-REGISTER', 'PATH-ID',
-        'PATTERN', 'PERFORM', 'POINT', 'POS', 'PRIMARY', 'PRINT', 'PROCEDURE',
-        'PROGRAM', 'PUT', 'READ', 'RECORD', 'RECORD-COUNT', 'RECORD-LENGTH',
-        'REFRESH', 'RELEASE', 'RENUM', 'REPEAT', 'REPORT', 'REPORT-INPUT',
-        'RESHOW', 'RESTART', 'RETRIEVE', 'RETURN-CODE', 'ROLLBACK', 'ROW',
-        'S', 'SCREEN', 'SEARCH', 'SECONDARY', 'SELECT', 'SEQUENCE', 'SIZE',
-        'SKIP', 'SOKAKU', 'SORT', 'SQL', 'STOP', 'SUM', 'SYSDATE',
-        'SYSDATE-LONG', 'SYSIN', 'SYSIPT', 'SYSLST', 'SYSPRINT', 'SYSSNAP',
-        'SYSTIME', 'TALLY', 'TERM-COLUMNS', 'TERM-NAME', 'TERM-ROWS',
-        'TERMINATION', 'TITLE', 'TO', 'TRANSFER', 'TRC', 'UNIQUE', 'UNTIL',
-        'UPDATE', 'UPPERCASE', 'USER', 'USERID', 'VALUE', 'VERIFY', 'W',
-        'WHEN', 'WHILE', 'WORK', 'WRITE', 'X', 'XDM', 'XRST'
+        'LQ', 'LS', 'LT', 'MACRO', 'MASK', 'MATCHED', 'MEND', 'MESSAGE',
+        'MOVE', 'MSTART', 'NE', 'NEWPAGE', 'NOMASK', 'NOPRINT', 'NOT',
+        'NOTE', 'NOVERIFY', 'NQ', 'NULL', 'OF', 'OR', 'OTHERWISE', 'PA1',
+        'PA2', 'PA3', 'PAGE-COUNT', 'PAGE-NUMBER', 'PARM-REGISTER',
+        'PATH-ID', 'PATTERN', 'PERFORM', 'POINT', 'POS', 'PRIMARY', 'PRINT',
+        'PROCEDURE', 'PROGRAM', 'PUT', 'READ', 'RECORD', 'RECORD-COUNT',
+        'RECORD-LENGTH', 'REFRESH', 'RELEASE', 'RENUM', 'REPEAT', 'REPORT',
+        'REPORT-INPUT', 'RESHOW', 'RESTART', 'RETRIEVE', 'RETURN-CODE',
+        'ROLLBACK', 'ROW', 'S', 'SCREEN', 'SEARCH', 'SECONDARY', 'SELECT',
+        'SEQUENCE', 'SIZE', 'SKIP', 'SOKAKU', 'SORT', 'SQL', 'STOP', 'SUM',
+        'SYSDATE', 'SYSDATE-LONG', 'SYSIN', 'SYSIPT', 'SYSLST', 'SYSPRINT',
+        'SYSSNAP', 'SYSTIME', 'TALLY', 'TERM-COLUMNS', 'TERM-NAME',
+        'TERM-ROWS', 'TERMINATION', 'TITLE', 'TO', 'TRANSFER', 'TRC',
+        'UNIQUE', 'UNTIL', 'UPDATE', 'UPPERCASE', 'USER', 'USERID', 'VALUE',
+        'VERIFY', 'W', 'WHEN', 'WHILE', 'WORK', 'WRITE', 'X', 'XDM', 'XRST'
     ])
     tokens = {
         'root': [
@@ -3744,17 +3744,26 @@ class EasytrieveLexer(RegexLexer):
             (r'&' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+\.', Name.Variable, 'after_macro_argument'),
             # Macro call
             (r'%' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Variable),
-            (r'(FILE|JOB|PARM|PROC|REPORT)(' + _DELIMITER_PATTERN + r')',
+            (r'(FILE|MACRO|REPORT)(\s+)',
+             bygroups(Keyword.Declaration, Whitespace), 'after_declaration'),
+            (r'(JOB|PARM)' + r'(' + _DELIMITER_PATTERN + r')',
              bygroups(Keyword.Declaration, Operator)),
             (_KEYWORDS_PATTERN + r'(' + _DELIMITER_PATTERN + r')',
              bygroups(Keyword.Reserved, Operator)),
             (_OPERATORS_PATTERN, Operator),
+            # Procedure declaration
+            (r'(' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+)(\s*)(\.?)(\s*)(PROC)(\s*\n)',
+             bygroups(Name.Function, Whitespace, Operator, Whitespace, Keyword.Declaration, Whitespace)),
             (r'[0-9]+\.[0-9]*', Number.Float),
             (r'[0-9]+', Number.Integer),
             (r"'(''|[^'])*'", String),
             (r'\s+', Whitespace),
             (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name)  # Everything else just belongs to a name
          ],
+        'after_declaration': [
+            (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Function),
+            ('', Whitespace, '#pop')
+        ],
         'after_macro_argument': [
             (r'\*.*\n', Comment.Single, '#pop'),
             (r'\s+', Whitespace, '#pop'),
@@ -3763,6 +3772,8 @@ class EasytrieveLexer(RegexLexer):
             (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name)  # Everything else just belongs to a name
         ],
     }
+    _COMMENT_LINE_REGEX = re.compile(r'^\s*\*')
+    _MACRO_HEADER_REGEX = re.compile(r'^\s*MACRO')
 
     def analyse_text(text):
         """
@@ -3777,15 +3788,20 @@ class EasytrieveLexer(RegexLexer):
         hasProc = False
         hasParm = False
         hasReport = False
-        isBroken = False
 
-        # Skip possible header comments.
-        while len(lines) and lines[0].startswith('*'):
-            hasHeaderComment = True
+        def isCommentLine(line):
+            return EasytrieveLexer._COMMENT_LINE_REGEX.match(lines[0]) is not None
+
+        def isEmptyLine(line):
+            return not bool(line.strip())
+
+        # Remove possible empty lines and header comments.
+        while lines and (isEmptyLine(lines[0]) or isCommentLine(lines[0])):
+            if not isEmptyLine(lines[0]):
+                hasHeaderComment = True
             del lines[0]
 
-        firstLine = lines[0]
-        if firstLine[:6] in ('MACRO', 'MACRO '):
+        if EasytrieveLexer._MACRO_HEADER_REGEX.match(lines[0]):
             # Looks like an Easytrieve macro.
             result = 0.4
             if hasHeaderComment:
@@ -3795,37 +3811,39 @@ class EasytrieveLexer(RegexLexer):
             for line in lines:
                 words = line.split()
                 if (len(words) >= 2):
-                    first_word = words[0]
+                    firstWord = words[0]
                     if not hasReport:
                         if not hasJob:
                             if not hasFile:
                                 if not hasParm:
-                                    if first_word == 'PARM':
+                                    if firstWord == 'PARM':
                                         hasParm = True
-                                if first_word == 'FILE':
+                                if firstWord == 'FILE':
                                     hasFile = True
-                            if first_word == 'JOB':
+                            if firstWord == 'JOB':
                                 hasJob = True
-                        elif first_word == 'PROC':
+                        elif firstWord == 'PROC':
                             hasProc = True
-                        elif first_word == 'END-PROC':
+                        elif firstWord == 'END-PROC':
                             hasEndProc = True
-                        elif first_word == 'REPORT':
+                        elif firstWord == 'REPORT':
                             hasReport = True
 
             # Weight the findings.
-            if not isBroken and hasJob and (hasProc == hasEndProc):
+            if hasJob and (hasProc == hasEndProc):
+                if hasHeaderComment:
+                    result += 0.1
                 if hasParm:
                     if hasProc:
                         # Found PARM, JOB and PROC/END-PROC:
                         # pretty sure this is Easytrieve.
-                        result = 0.8
+                        result += 0.8
                     else:
                         # Found PARAM and  JOB: probably this is Easytrieve
-                        result = 0.5
+                        result += 0.5
                 else:
                     # Found JOB and possibly other keywords: might be Easytrieve
-                    result = 0.11
+                    result += 0.11
                     if hasParm:
                         # Note: PARAM is not a proper English word, so this is
                         # regarded a much better indicator for Easytrieve than
@@ -3843,7 +3861,7 @@ class JclLexer(RegexLexer):
     """
     `Job Control Language (JCL) <http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/IEA2B570/CCONTENTS>`_
     is a scripting language used on mainframe platforms to instruct the system
-    on how to run a batch job or start a  subsystem. It is somewhat
+    on how to run a batch job or start a subsystem. It is somewhat
     comparable to MS DOS batch and Unix shell scripts.
 
     *New in Pygments 1.7.*
@@ -3863,7 +3881,7 @@ class JclLexer(RegexLexer):
             (r'.*\n', Other)  # Input text or inline code in any language.
         ],
         'statement': [
-            (r'\s*\n', Whitespace, 'root'),
+            (r'\s*\n', Whitespace, '#pop'),
             (r'([a-z][a-z_0-9]*)(\s+)(exec|job)(\s*)',
              bygroups(Name.Label, Whitespace, Keyword.Reserved, Whitespace),
              'option'),
@@ -3872,11 +3890,11 @@ class JclLexer(RegexLexer):
         ],
         'statement_command': [
             (r'\s+(command|cntl|dd|endctl|endif|else|include|jcllib|'
-             r'output|pend|proc|set|then|xmit)\s*', Keyword.Reserved, 'option'),
+             r'output|pend|proc|set|then|xmit)\s+', Keyword.Reserved, 'option'),
             include('option')
         ],
         'jes2_statement': [
-            (r'\s*\n', Whitespace, 'root'),
+            (r'\s*\n', Whitespace, '#pop'),
             (r'\$', Keyword, 'option'),
             (r'\b(jobparam|message|netacct|notify|output|priority|route|'
              r'setup|signoff|xeq|xmit)\b', Keyword, 'option'),
@@ -3898,7 +3916,7 @@ class JclLexer(RegexLexer):
             (r"(\n)(//)", bygroups(Text, Keyword.Pseudo)),
             (r"''", String),
             (r"[^']", String),
-            (r"'", String, 'option'),
+            (r"'", String, '#pop'),
         ],
         'option_comment': [
             (r'\n', Text, 'root'),
diff --git a/tests/examplefiles/example.ezt b/tests/examplefiles/example.ezt
index b068fca3..fec2aa4c 100644
--- a/tests/examplefiles/example.ezt
+++ b/tests/examplefiles/example.ezt
@@ -1,4 +1,4 @@
-* Easytrieve Plus Test Programm.
+* Easytrieve Plus example programm.
 
 * Environtment section.
 PARM DEBUG(FLOW FLDCHK)
@@ -10,13 +10,23 @@ FILE PERSNL FB(150 1800)
   DEPT  98 3 N. GROSS 94 4 P 2
   *           ^ 2 field definitions in 1 line.
 
+* Call macro in example.mac.
 FILE EXAMPLE FB(80 200)
 %EXAMPLE SOMEFILE SOME
 
-* Macro declaration (to be valid, this would
 * Activity Section.
-JOB INPUT PERSNL NAME FIRST-PROGRAM
+JOB INPUT PERSNL NAME FIRST-PROGRAM START AT-START FINISH AT_FINISH
   PRINT PAY-RPT
 REPORT PAY-RPT LINESIZE 80
   TITLE 01 'PERSONNEL REPORT EXAMPLE-1'
   LINE 01 DEPT NAME EMP# GROSS
+
+* Procedure declarations.
+AT-START. PROC
+  DISPLAY 'PROCESSING...'
+END-PROC
+
+AT-FINISH
+PROC
+  DISPLAY 'DONE.'
+END-PROC
diff --git a/tests/test_lexers_other.py b/tests/test_lexers_other.py
index 7936fe38..83330c90 100644
--- a/tests/test_lexers_other.py
+++ b/tests/test_lexers_other.py
@@ -39,10 +39,24 @@ class AnalyseTextTest(unittest.TestCase):
                     exampleFile.close()
 
     def testCanRecognizeAndGuessExampleFiles(self):
-        self._testCanRecognizeAndGuessExampleFiles(EasytrieveLexer)
-        self._testCanRecognizeAndGuessExampleFiles(JclLexer)
-        self._testCanRecognizeAndGuessExampleFiles(RexxLexer)
-        self._testCanRecognizeAndGuessExampleFiles(WebFocusLexer)
+        LEXERS_TO_TEST = [
+            EasytrieveLexer,
+            JclLexer,
+            RexxLexer,
+        ]
+        for lexerToTest in LEXERS_TO_TEST:
+            self._testCanRecognizeAndGuessExampleFiles(lexerToTest)
+
+
+class EasyTrieveLexerTest(unittest.TestCase):
+    def testCanGuessFromText(self):
+        self.assertLess(0, EasytrieveLexer.analyse_text('MACRO'))
+        self.assertLess(0, EasytrieveLexer.analyse_text('\nMACRO'))
+        self.assertLess(0, EasytrieveLexer.analyse_text(' \nMACRO'))
+        self.assertLess(0, EasytrieveLexer.analyse_text(' \n MACRO'))
+        self.assertLess(0, EasytrieveLexer.analyse_text('*\nMACRO'))
+        self.assertLess(0, EasytrieveLexer.analyse_text(
+            '*\n *\n\n \n*\n MACRO'))
 
 
 class RexxLexerTest(unittest.TestCase):
author	Thomas Aglassinger <roskakori@users.sourceforge.net>	2013-06-02 14:37:59 +0200
committer	Thomas Aglassinger <roskakori@users.sourceforge.net>	2013-06-02 14:37:59 +0200
commit	b3cbb1e20b74269deeb146513b6de353e5583a67 (patch)
tree	ab2797f515131fb5bbee9aeb2fa036e7ac404ead
parent	26e8af5761aab8daeb85f287d7abf2fa42cb43e1 (diff)
download	pygments-b3cbb1e20b74269deeb146513b6de353e5583a67.tar.gz