Change result value returned by runTests to be a list of (test,result) tuples

Add pyparsing_common helpers convertToDate and convertToDatetime Add named regex fields to iso8601_date and iso8601_datetime Fix pyparsing_common method stripHTMLTags Corresponding cleanups in unittests git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@364 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
author: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2016-06-12 22:00:04 +0000
committer: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2016-06-12 22:00:04 +0000
commit: 5d9802adead9380b29824492f140c17054376081 (patch)
tree: 7bd22545c6eac17ff7c7d52c516e13a41ff23f22
parent: 954d356444a76281e4f5f18b70b041a32d475cc6 (diff)
download: pyparsing-5d9802adead9380b29824492f140c17054376081.tar.gz
3 files changed, 176 insertions, 85 deletions
diff --git a/src/CHANGES b/src/CHANGES
index 33f1266..10c5593 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -33,12 +33,14 @@ Verison 2.1.5 - June, 2016
   . IPv4 and IPv6 addresses (including long, short, and mixed forms
     of IPv6)
   . MAC address
-  . ISO8601 date and date time strings
+  . ISO8601 date and date time strings (with named fields for year, month, etc.)
   . UUID (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)
   . hex integer (returned as int)
   . fraction (integer '/' integer, returned as float)
   . mixed integer (integer '-' fraction, or just fraction, returned as float)
   . stripHTMLTags (parse action to remove tags from HTML source)
+  . parse action helpers convertToDate and convertToDatetime to do custom parse
+    time conversions of parsed ISO8601 strings
 
 - runTests now returns a two-tuple: success if all tests succeed,
   and an output list of each test and its output lines.
diff --git a/src/pyparsing.py b/src/pyparsing.py
index a72a2e1..2cbd023 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
 """
 
 __version__ = "2.1.5"
-__versionTime__ = "08 Jun 2016 21:53 UTC"
+__versionTime__ = "12 Jun 2016 21:53 UTC"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
@@ -70,9 +70,8 @@ import re
 import sre_constants
 import collections
 import pprint
-import functools
-import itertools
 import traceback
+from datetime import datetime
 
 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
 
@@ -1605,9 +1604,8 @@ class ParserElement(object):
         try:
             file_contents = file_or_filename.read()
         except AttributeError:
-            f = open(file_or_filename, "r")
-            file_contents = f.read()
-            f.close()
+            with open(file_or_filename, "r") as f:
+                file_contents = f.read()
         try:
             return self.parseString(file_contents, parseAll)
         except ParseBaseException as exc:
@@ -1666,7 +1664,7 @@ class ParserElement(object):
               string; pass None to disable comment filtering
             - printResults - (default=True) prints test output to stdout
             - failureTests - (default=False) indicates if these tests are expected to fail parsing
-            
+
             Returns: a (success, results) tuple, where success indicates that all tests succeeded
             (or failed if C{failureTest} is True), and the results contain a list of lines of each 
             test's output
@@ -1687,7 +1685,8 @@ class ParserElement(object):
             out = ['\n'.join(comments), t]
             comments = []
             try:
-                out.append(self.parseString(t, parseAll=parseAll).dump())
+                result = self.parseString(t, parseAll=parseAll)
+                out.append(result.dump())
                 success = success and not failureTests
             except ParseBaseException as pe:
                 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
@@ -1698,12 +1697,13 @@ class ParserElement(object):
                     out.append(' '*pe.loc + '^' + fatal)
                 out.append("FAIL: " + str(pe))
                 success = success and failureTests
+                result = pe
 
             if printResults:
                 out.append('')
                 print('\n'.join(out))
-            else:
-                allResults.append(out)
+
+            allResults.append((t, result))
         
         return success, allResults
 
@@ -3948,6 +3948,8 @@ class pyparsing_common:
     Parse actions:
      - C{L{convertToInteger}}
      - C{L{convertToFloat}}
+     - C{L{convertToDate}}
+     - C{L{convertToDatetime}}
      - C{L{stripHTMLTags}}
     """
 
@@ -4008,19 +4010,40 @@ class pyparsing_common:
     mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
     "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
 
-    iso8601_date = Regex(r'\d{4}(?:-\d\d(?:-\d\d)?)?').setName("ISO8601 date")
+    @staticmethod
+    def convertToDate(fmt="%Y-%m-%d"):
+        """
+        Helper to create a parse action for converting parsed date string to Python datetime.date
+
+        Params -
+        - fmt - format to be passed to datetime.strptime (default="%Y-%m-%d")
+        """
+        return lambda s,l,t: datetime.strptime(t[0], fmt).date()
+
+    @staticmethod
+    def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
+        """
+        Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
+
+        Params -
+        - fmt - format to be passed to datetime.strptime (default="%Y-%m-%dT%H:%M:%S.%f")
+        """
+        return lambda s,l,t: datetime.strptime(t[0], fmt)
+
+    iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
     "ISO8601 date (C{yyyy-mm-dd})"
 
-    iso8601_datetime = Regex(r'\d{4}-\d\d-\d\dT\d\d:\d\d(:\d\d(\.\d*)?)?(Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
-    "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)})"
+    iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
+    "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds and timezone optional"
 
     uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
     "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
-    
+
     _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
-    def stripHTMLTags(s,l,tokens):
+    @staticmethod
+    def stripHTMLTags(s, l, tokens):
         """Parse action to remove HTML tags from web page HTML source"""
-        return _html_stripper.transformString(tokens[0])
+        return pyparsing_common._html_stripper.transformString(tokens[0])
 
 if __name__ == "__main__":
 
diff --git a/src/unitTests.py b/src/unitTests.py
index d9eece9..d615b3b 100644
--- a/src/unitTests.py
+++ b/src/unitTests.py
@@ -1,6 +1,8 @@
 # -*- coding: UTF-8 -*-
 from unittest import TestCase, TestSuite, TextTestRunner
 import unittest
+import datetime
+
 from pyparsing import ParseException
 #~ import HTMLTestRunner
 
@@ -12,6 +14,8 @@ PY_3 = sys.version.startswith('3')
 if PY_3:
     import builtins
     print_ = getattr(builtins, "print")
+
+    from io import StringIO
 else:
     def _print(*args, **kwargs):
         if 'end' in kwargs:
@@ -19,6 +23,8 @@ else:
         else:
             sys.stdout.write(' '.join(map(str,args)) + '\n')
     print_ = _print
+    from cStringIO import StringIO
+
 
 # see which Python implementation we are running
 CPYTHON_ENV = (sys.platform == "win32")
@@ -71,42 +77,43 @@ class PyparsingTestInit(ParseTestCase):
     def tearDown(self):
         pass
 
-class ParseASMLTest(ParseTestCase):
-    def runTest(self):
-        import parseASML
-        files = [ ("A52759.txt", 2150, True, True, 0.38, 25, "21:47:17", "22:07:32", 235),
-                  ("24141506_P5107RM59_399A1457N1_PHS04", 373,True, True, 0.5, 1, "11:35:25", "11:37:05", 183),
-                  ("24141506_P5107RM59_399A1457N1_PHS04B", 373, True, True, 0.5, 1, "01:02:54", "01:04:49", 186),
-                  ("24157800_P5107RM74_399A1828M1_PHS04", 1141, True, False, 0.5, 13, "00:00:54", "23:59:48", 154) ]
-        for testFile,numToks,trkInpUsed,trkOutpUsed,maxDelta,numWafers,minProcBeg,maxProcEnd,maxLevStatsIV in files:
-            print_("Parsing",testFile,"...", end=' ')
-            #~ text = "\n".join( [ line for line in file(testFile) ] )
-            #~ results = parseASML.BNF().parseString( text )
-            results = parseASML.BNF().parseFile( testFile )
-            #~ pprint.pprint( results.asList() )
-            #~ pprint.pprint( results.batchData.asList() )
-            #~ print results.batchData.keys()
-
-            allToks = flatten( results.asList() )
-            assert len(allToks) == numToks, \
-                "wrong number of tokens parsed (%s), got %d, expected %d" % (testFile, len(allToks),numToks)
-            assert results.batchData.trackInputUsed == trkInpUsed, "error evaluating results.batchData.trackInputUsed"
-            assert results.batchData.trackOutputUsed == trkOutpUsed, "error evaluating results.batchData.trackOutputUsed"
-            assert results.batchData.maxDelta == maxDelta,"error evaluating results.batchData.maxDelta"
-            assert len(results.waferData) == numWafers, "did not read correct number of wafers"
-            assert min([wd.procBegin for wd in results.waferData]) == minProcBeg, "error reading waferData.procBegin"
-            assert max([results.waferData[k].procEnd for k in range(len(results.waferData))]) == maxProcEnd, "error reading waferData.procEnd"
-            assert sum(results.levelStatsIV['MAX']) == maxLevStatsIV, "error reading levelStatsIV"
-            assert sum(results.levelStatsIV.MAX) == maxLevStatsIV, "error reading levelStatsIV"
-            print_("OK")
-            print_(testFile,len(allToks))
-            #~ print "results.batchData.trackInputUsed =",results.batchData.trackInputUsed
-            #~ print "results.batchData.trackOutputUsed =",results.batchData.trackOutputUsed
-            #~ print "results.batchData.maxDelta =",results.batchData.maxDelta
-            #~ print len(results.waferData)," wafers"
-            #~ print min([wd.procBegin for wd in results.waferData])
-            #~ print max([results.waferData[k].procEnd for k in range(len(results.waferData))])
-            #~ print sum(results.levelStatsIV['MAX.'])
+if 0:
+    class ParseASMLTest(ParseTestCase):
+        def runTest(self):
+            import parseASML
+            files = [ ("A52759.txt", 2150, True, True, 0.38, 25, "21:47:17", "22:07:32", 235),
+                      ("24141506_P5107RM59_399A1457N1_PHS04", 373,True, True, 0.5, 1, "11:35:25", "11:37:05", 183),
+                      ("24141506_P5107RM59_399A1457N1_PHS04B", 373, True, True, 0.5, 1, "01:02:54", "01:04:49", 186),
+                      ("24157800_P5107RM74_399A1828M1_PHS04", 1141, True, False, 0.5, 13, "00:00:54", "23:59:48", 154) ]
+            for testFile,numToks,trkInpUsed,trkOutpUsed,maxDelta,numWafers,minProcBeg,maxProcEnd,maxLevStatsIV in files:
+                print_("Parsing",testFile,"...", end=' ')
+                #~ text = "\n".join( [ line for line in file(testFile) ] )
+                #~ results = parseASML.BNF().parseString( text )
+                results = parseASML.BNF().parseFile( testFile )
+                #~ pprint.pprint( results.asList() )
+                #~ pprint.pprint( results.batchData.asList() )
+                #~ print results.batchData.keys()
+
+                allToks = flatten( results.asList() )
+                assert len(allToks) == numToks, \
+                    "wrong number of tokens parsed (%s), got %d, expected %d" % (testFile, len(allToks),numToks)
+                assert results.batchData.trackInputUsed == trkInpUsed, "error evaluating results.batchData.trackInputUsed"
+                assert results.batchData.trackOutputUsed == trkOutpUsed, "error evaluating results.batchData.trackOutputUsed"
+                assert results.batchData.maxDelta == maxDelta,"error evaluating results.batchData.maxDelta"
+                assert len(results.waferData) == numWafers, "did not read correct number of wafers"
+                assert min([wd.procBegin for wd in results.waferData]) == minProcBeg, "error reading waferData.procBegin"
+                assert max([results.waferData[k].procEnd for k in range(len(results.waferData))]) == maxProcEnd, "error reading waferData.procEnd"
+                assert sum(results.levelStatsIV['MAX']) == maxLevStatsIV, "error reading levelStatsIV"
+                assert sum(results.levelStatsIV.MAX) == maxLevStatsIV, "error reading levelStatsIV"
+                print_("OK")
+                print_(testFile,len(allToks))
+                #~ print "results.batchData.trackInputUsed =",results.batchData.trackInputUsed
+                #~ print "results.batchData.trackOutputUsed =",results.batchData.trackOutputUsed
+                #~ print "results.batchData.maxDelta =",results.batchData.maxDelta
+                #~ print len(results.waferData)," wafers"
+                #~ print min([wd.procBegin for wd in results.waferData])
+                #~ print max([results.waferData[k].procEnd for k in range(len(results.waferData))])
+                #~ print sum(results.levelStatsIV['MAX.'])
         
 
 class ParseFourFnTest(ParseTestCase):
@@ -977,7 +984,11 @@ class CustomQuotesTest(ParseTestCase):
 
 class RepeaterTest(ParseTestCase):
     def runTest(self):
-        from pyparsing import matchPreviousLiteral,matchPreviousExpr, Forward, Literal, Word, alphas, nums
+        from pyparsing import matchPreviousLiteral,matchPreviousExpr, Forward, Literal, Word, alphas, nums, ParserElement
+
+        if ParserElement._packratEnabled:
+            print_("skipping this test, not compatible with packratting")
+            return
 
         first = Word("abcdef").setName("word1")
         bridge = Word(nums).setName("number")
@@ -2569,7 +2580,7 @@ class NestedAsDictTest(ParseTestCase):
 
         rsp = 'username=goat; errors={username=[already taken, too short]}; empty_field='
         result_dict = response.parseString(rsp).asDict()
-        print(result_dict)
+        print_(result_dict)
         assert result_dict['username'] == 'goat', "failed to process string in ParseResults correctly"
         assert result_dict['errors']['username'] == ['already taken', 'too short'], "failed to process nested ParseResults correctly"
 
@@ -2606,25 +2617,26 @@ class RunTestsTest(ParseTestCase):
             # normal data
             1-3,2-4,6,8-10,16
             
-            # invalid range
-            1-2, 3-1, 4-6, 7, 12
-            
             # lone integer
             11"""
         results = indices.runTests(tests, printResults=False)[1]
-        #~ import pprint
-        #~ pprint.pprint(results)
-        
-        expectedResults = [
-            ['# normal data', '1-3,2-4,6,8-10,16', '[1, 2, 3, 4, 6, 8, 9, 10, 16]'],
-            ['# invalid range',
-            '1-2, 3-1, 4-6, 7, 12',
-            '     ^(FATAL)',
-            'FAIL: invalid range, start must be <= end (at char 5), (line:1, col:6)'],
-            ['# lone integer', '11', '[11]']]
 
+        expectedResults = [
+            [1, 2, 3, 4, 6, 8, 9, 10, 16],
+            [11],
+            ]
         for res,expected in zip(results, expectedResults):
-            assert res == expected, "failed test: " + expected[0][2:]
+            print_(res[1].asList())
+            print_(expected)
+            assert res[1].asList() == expected, "failed test: " + expected[0][2:]
+
+
+        tests = """\
+            # invalid range
+            1-2, 3-1, 4-6, 7, 12
+            """
+        success = indices.runTests(tests, printResults=False, failureTests=True)[0]
+        assert success, "failed to raise exception on improper range test"
 
 class CommonExpressionsTest(ParseTestCase):
     def runTest(self):
@@ -2706,20 +2718,40 @@ class CommonExpressionsTest(ParseTestCase):
             """)[0]
         assert success, "error in parsing valid numerics"
 
-        success = pyparsing_common.iso8601_date.runTests("""
+        success, results = pyparsing_common.iso8601_date.runTests("""
             1997
             1997-07
             1997-07-16
-            """)[0]
+            """)
         assert success, "error in parsing valid iso8601_date"
- 
-        success = pyparsing_common.iso8601_datetime.runTests("""
+        expected = [
+            ('1997', None, None),
+            ('1997', '07', None),
+            ('1997', '07', '16'),
+        ]
+        for r,exp in zip(results, expected):
+            assert (r[1].year,r[1].month,r[1].day,) == exp, "failed to parse date into fields"
+
+        success, results = pyparsing_common.iso8601_date().addParseAction(pyparsing_common.convertToDate()).runTests("""
+            1997-07-16
+            """)
+        assert success, "error in parsing valid iso8601_date with parse action"
+        assert results[0][1][0] == datetime.date(1997, 7, 16)
+
+        success, results = pyparsing_common.iso8601_datetime.runTests("""
             1997-07-16T19:20+01:00
             1997-07-16T19:20:30+01:00
-            1997-07-16T19:20:30.45+01:00
-            """)[0]
+            1997-07-16T19:20:30.45Z
+            1997-07-16 19:20:30.45
+            """)
         assert success, "error in parsing valid iso8601_datetime"
-        
+
+        success, results = pyparsing_common.iso8601_datetime().addParseAction(pyparsing_common.convertToDatetime()).runTests("""
+            1997-07-16T19:20:30.45
+            """)
+        assert success, "error in parsing valid iso8601_datetime"
+        assert results[0][1][0] == datetime.datetime(1997, 7, 16, 19, 20, 30, 450000)
+
         success = pyparsing_common.uuid.runTests("""
             123e4567-e89b-12d3-a456-426655440000
             """)[0]
@@ -2755,8 +2787,42 @@ class TokenMapTest(ParseTestCase):
             00 11 22 aa FF 0a 0d 1a
             """, printResults=False)
         assert success, "failed to parse hex integers"
-        assert results[0][-1] == '[0, 17, 34, 170, 255, 10, 13, 26]', "tokenMap parse action failed"
-        
+        print_(results)
+        assert results[0][-1].asList() == [0, 17, 34, 170, 255, 10, 13, 26], "tokenMap parse action failed"
+
+
+class ParseFileTest(ParseTestCase):
+    def runTest(self):
+        from pyparsing import pyparsing_common, OneOrMore
+        s = """
+        123 456 789
+        """
+        input_file = StringIO(s)
+        integer = pyparsing_common.integer
+
+        results = OneOrMore(integer).parseFile(input_file)
+        print_(results)
+
+        results = OneOrMore(integer).parseFile('test\parsefiletest_input_file.txt')
+        print_(results)
+
+
+class HTMLStripperTest(ParseTestCase):
+    def runTest(self):
+        from pyparsing import pyparsing_common, originalTextFor, OneOrMore, Word, printables
+
+        sample = """
+        <html>
+        Here is some sample <i>HTML</i> text.
+        </html>
+        """
+        read_everything = originalTextFor(OneOrMore(Word(printables)))
+        read_everything.addParseAction(pyparsing_common.stripHTMLTags)
+
+        result = read_everything.parseString(sample)
+        assert result[0].strip() == 'Here is some sample HTML text.'
+
+
 class MiscellaneousParserTests(ParseTestCase):
     def runTest(self):
         import pyparsing
@@ -2917,27 +2983,27 @@ def makeTestSuite():
 
     test_case_classes = ParseTestCase.__subclasses__()
     test_case_classes.remove(PyparsingTestInit)
-    test_case_classes.remove(ParseASMLTest)
+    # test_case_classes.remove(ParseASMLTest)
     test_case_classes.remove(EnablePackratParsing)
     if IRON_PYTHON_ENV:
         test_case_classes.remove(OriginalTextForTest)
 
     suite.addTests(T() for T in test_case_classes)
-    
+
     if TEST_USING_PACKRAT:
         # retest using packrat parsing (disable those tests that aren't compatible)
         suite.addTest( EnablePackratParsing() )
-        
+
         unpackrattables = [ EnablePackratParsing, RepeaterTest, ]
-        
+
         # add tests to test suite a second time, to run with packrat parsing
         # (leaving out those that we know wont work with packrat)
         packratTests = [t.__class__() for t in suite._tests
                             if t.__class__ not in unpackrattables]
         suite.addTests( packratTests )
-        
+
     return suite
-    
+
 def makeTestSuiteTemp(classes):
     suite = TestSuite()
     suite.addTest( PyparsingTestInit() )
@@ -2954,7 +3020,7 @@ console = True
 lp = None
 
 #~ if __name__ == '__main__':
-    #~ unittest.main() 
+    #~ unittest.main()
 if console:
     #~ # console mode
     testRunner = TextTestRunner()
author	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2016-06-12 22:00:04 +0000
committer	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2016-06-12 22:00:04 +0000
commit	5d9802adead9380b29824492f140c17054376081 (patch)
tree	7bd22545c6eac17ff7c7d52c516e13a41ff23f22
parent	954d356444a76281e4f5f18b70b041a32d475cc6 (diff)
download	pyparsing-5d9802adead9380b29824492f140c17054376081.tar.gz