From 5d9802adead9380b29824492f140c17054376081 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sun, 12 Jun 2016 22:00:04 +0000 Subject: Change result value returned by runTests to be a list of (test,result) tuples Add pyparsing_common helpers convertToDate and convertToDatetime Add named regex fields to iso8601_date and iso8601_datetime Fix pyparsing_common method stripHTMLTags Corresponding cleanups in unittests git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@364 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b --- src/CHANGES | 4 +- src/pyparsing.py | 55 ++++++++++----- src/unitTests.py | 202 ++++++++++++++++++++++++++++++++++++------------------- 3 files changed, 176 insertions(+), 85 deletions(-) diff --git a/src/CHANGES b/src/CHANGES index 33f1266..10c5593 100644 --- a/src/CHANGES +++ b/src/CHANGES @@ -33,12 +33,14 @@ Verison 2.1.5 - June, 2016 . IPv4 and IPv6 addresses (including long, short, and mixed forms of IPv6) . MAC address - . ISO8601 date and date time strings + . ISO8601 date and date time strings (with named fields for year, month, etc.) . UUID (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx) . hex integer (returned as int) . fraction (integer '/' integer, returned as float) . mixed integer (integer '-' fraction, or just fraction, returned as float) . stripHTMLTags (parse action to remove tags from HTML source) + . parse action helpers convertToDate and convertToDatetime to do custom parse + time conversions of parsed ISO8601 strings - runTests now returns a two-tuple: success if all tests succeed, and an output list of each test and its output lines. diff --git a/src/pyparsing.py b/src/pyparsing.py index a72a2e1..2cbd023 100644 --- a/src/pyparsing.py +++ b/src/pyparsing.py @@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when """ __version__ = "2.1.5" -__versionTime__ = "08 Jun 2016 21:53 UTC" +__versionTime__ = "12 Jun 2016 21:53 UTC" __author__ = "Paul McGuire " import string @@ -70,9 +70,8 @@ import re import sre_constants import collections import pprint -import functools -import itertools import traceback +from datetime import datetime #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) @@ -1605,9 +1604,8 @@ class ParserElement(object): try: file_contents = file_or_filename.read() except AttributeError: - f = open(file_or_filename, "r") - file_contents = f.read() - f.close() + with open(file_or_filename, "r") as f: + file_contents = f.read() try: return self.parseString(file_contents, parseAll) except ParseBaseException as exc: @@ -1666,7 +1664,7 @@ class ParserElement(object): string; pass None to disable comment filtering - printResults - (default=True) prints test output to stdout - failureTests - (default=False) indicates if these tests are expected to fail parsing - + Returns: a (success, results) tuple, where success indicates that all tests succeeded (or failed if C{failureTest} is True), and the results contain a list of lines of each test's output @@ -1687,7 +1685,8 @@ class ParserElement(object): out = ['\n'.join(comments), t] comments = [] try: - out.append(self.parseString(t, parseAll=parseAll).dump()) + result = self.parseString(t, parseAll=parseAll) + out.append(result.dump()) success = success and not failureTests except ParseBaseException as pe: fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" @@ -1698,12 +1697,13 @@ class ParserElement(object): out.append(' '*pe.loc + '^' + fatal) out.append("FAIL: " + str(pe)) success = success and failureTests + result = pe if printResults: out.append('') print('\n'.join(out)) - else: - allResults.append(out) + + allResults.append((t, result)) return success, allResults @@ -3948,6 +3948,8 @@ class pyparsing_common: Parse actions: - C{L{convertToInteger}} - C{L{convertToFloat}} + - C{L{convertToDate}} + - C{L{convertToDatetime}} - C{L{stripHTMLTags}} """ @@ -4008,19 +4010,40 @@ class pyparsing_common: mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" - iso8601_date = Regex(r'\d{4}(?:-\d\d(?:-\d\d)?)?').setName("ISO8601 date") + @staticmethod + def convertToDate(fmt="%Y-%m-%d"): + """ + Helper to create a parse action for converting parsed date string to Python datetime.date + + Params - + - fmt - format to be passed to datetime.strptime (default="%Y-%m-%d") + """ + return lambda s,l,t: datetime.strptime(t[0], fmt).date() + + @staticmethod + def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): + """ + Helper to create a parse action for converting parsed datetime string to Python datetime.datetime + + Params - + - fmt - format to be passed to datetime.strptime (default="%Y-%m-%dT%H:%M:%S.%f") + """ + return lambda s,l,t: datetime.strptime(t[0], fmt) + + iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") "ISO8601 date (C{yyyy-mm-dd})" - iso8601_datetime = Regex(r'\d{4}-\d\d-\d\dT\d\d:\d\d(:\d\d(\.\d*)?)?(Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") - "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)})" + iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") + "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds and timezone optional" uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" - + _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() - def stripHTMLTags(s,l,tokens): + @staticmethod + def stripHTMLTags(s, l, tokens): """Parse action to remove HTML tags from web page HTML source""" - return _html_stripper.transformString(tokens[0]) + return pyparsing_common._html_stripper.transformString(tokens[0]) if __name__ == "__main__": diff --git a/src/unitTests.py b/src/unitTests.py index d9eece9..d615b3b 100644 --- a/src/unitTests.py +++ b/src/unitTests.py @@ -1,6 +1,8 @@ # -*- coding: UTF-8 -*- from unittest import TestCase, TestSuite, TextTestRunner import unittest +import datetime + from pyparsing import ParseException #~ import HTMLTestRunner @@ -12,6 +14,8 @@ PY_3 = sys.version.startswith('3') if PY_3: import builtins print_ = getattr(builtins, "print") + + from io import StringIO else: def _print(*args, **kwargs): if 'end' in kwargs: @@ -19,6 +23,8 @@ else: else: sys.stdout.write(' '.join(map(str,args)) + '\n') print_ = _print + from cStringIO import StringIO + # see which Python implementation we are running CPYTHON_ENV = (sys.platform == "win32") @@ -71,42 +77,43 @@ class PyparsingTestInit(ParseTestCase): def tearDown(self): pass -class ParseASMLTest(ParseTestCase): - def runTest(self): - import parseASML - files = [ ("A52759.txt", 2150, True, True, 0.38, 25, "21:47:17", "22:07:32", 235), - ("24141506_P5107RM59_399A1457N1_PHS04", 373,True, True, 0.5, 1, "11:35:25", "11:37:05", 183), - ("24141506_P5107RM59_399A1457N1_PHS04B", 373, True, True, 0.5, 1, "01:02:54", "01:04:49", 186), - ("24157800_P5107RM74_399A1828M1_PHS04", 1141, True, False, 0.5, 13, "00:00:54", "23:59:48", 154) ] - for testFile,numToks,trkInpUsed,trkOutpUsed,maxDelta,numWafers,minProcBeg,maxProcEnd,maxLevStatsIV in files: - print_("Parsing",testFile,"...", end=' ') - #~ text = "\n".join( [ line for line in file(testFile) ] ) - #~ results = parseASML.BNF().parseString( text ) - results = parseASML.BNF().parseFile( testFile ) - #~ pprint.pprint( results.asList() ) - #~ pprint.pprint( results.batchData.asList() ) - #~ print results.batchData.keys() - - allToks = flatten( results.asList() ) - assert len(allToks) == numToks, \ - "wrong number of tokens parsed (%s), got %d, expected %d" % (testFile, len(allToks),numToks) - assert results.batchData.trackInputUsed == trkInpUsed, "error evaluating results.batchData.trackInputUsed" - assert results.batchData.trackOutputUsed == trkOutpUsed, "error evaluating results.batchData.trackOutputUsed" - assert results.batchData.maxDelta == maxDelta,"error evaluating results.batchData.maxDelta" - assert len(results.waferData) == numWafers, "did not read correct number of wafers" - assert min([wd.procBegin for wd in results.waferData]) == minProcBeg, "error reading waferData.procBegin" - assert max([results.waferData[k].procEnd for k in range(len(results.waferData))]) == maxProcEnd, "error reading waferData.procEnd" - assert sum(results.levelStatsIV['MAX']) == maxLevStatsIV, "error reading levelStatsIV" - assert sum(results.levelStatsIV.MAX) == maxLevStatsIV, "error reading levelStatsIV" - print_("OK") - print_(testFile,len(allToks)) - #~ print "results.batchData.trackInputUsed =",results.batchData.trackInputUsed - #~ print "results.batchData.trackOutputUsed =",results.batchData.trackOutputUsed - #~ print "results.batchData.maxDelta =",results.batchData.maxDelta - #~ print len(results.waferData)," wafers" - #~ print min([wd.procBegin for wd in results.waferData]) - #~ print max([results.waferData[k].procEnd for k in range(len(results.waferData))]) - #~ print sum(results.levelStatsIV['MAX.']) +if 0: + class ParseASMLTest(ParseTestCase): + def runTest(self): + import parseASML + files = [ ("A52759.txt", 2150, True, True, 0.38, 25, "21:47:17", "22:07:32", 235), + ("24141506_P5107RM59_399A1457N1_PHS04", 373,True, True, 0.5, 1, "11:35:25", "11:37:05", 183), + ("24141506_P5107RM59_399A1457N1_PHS04B", 373, True, True, 0.5, 1, "01:02:54", "01:04:49", 186), + ("24157800_P5107RM74_399A1828M1_PHS04", 1141, True, False, 0.5, 13, "00:00:54", "23:59:48", 154) ] + for testFile,numToks,trkInpUsed,trkOutpUsed,maxDelta,numWafers,minProcBeg,maxProcEnd,maxLevStatsIV in files: + print_("Parsing",testFile,"...", end=' ') + #~ text = "\n".join( [ line for line in file(testFile) ] ) + #~ results = parseASML.BNF().parseString( text ) + results = parseASML.BNF().parseFile( testFile ) + #~ pprint.pprint( results.asList() ) + #~ pprint.pprint( results.batchData.asList() ) + #~ print results.batchData.keys() + + allToks = flatten( results.asList() ) + assert len(allToks) == numToks, \ + "wrong number of tokens parsed (%s), got %d, expected %d" % (testFile, len(allToks),numToks) + assert results.batchData.trackInputUsed == trkInpUsed, "error evaluating results.batchData.trackInputUsed" + assert results.batchData.trackOutputUsed == trkOutpUsed, "error evaluating results.batchData.trackOutputUsed" + assert results.batchData.maxDelta == maxDelta,"error evaluating results.batchData.maxDelta" + assert len(results.waferData) == numWafers, "did not read correct number of wafers" + assert min([wd.procBegin for wd in results.waferData]) == minProcBeg, "error reading waferData.procBegin" + assert max([results.waferData[k].procEnd for k in range(len(results.waferData))]) == maxProcEnd, "error reading waferData.procEnd" + assert sum(results.levelStatsIV['MAX']) == maxLevStatsIV, "error reading levelStatsIV" + assert sum(results.levelStatsIV.MAX) == maxLevStatsIV, "error reading levelStatsIV" + print_("OK") + print_(testFile,len(allToks)) + #~ print "results.batchData.trackInputUsed =",results.batchData.trackInputUsed + #~ print "results.batchData.trackOutputUsed =",results.batchData.trackOutputUsed + #~ print "results.batchData.maxDelta =",results.batchData.maxDelta + #~ print len(results.waferData)," wafers" + #~ print min([wd.procBegin for wd in results.waferData]) + #~ print max([results.waferData[k].procEnd for k in range(len(results.waferData))]) + #~ print sum(results.levelStatsIV['MAX.']) class ParseFourFnTest(ParseTestCase): @@ -977,7 +984,11 @@ class CustomQuotesTest(ParseTestCase): class RepeaterTest(ParseTestCase): def runTest(self): - from pyparsing import matchPreviousLiteral,matchPreviousExpr, Forward, Literal, Word, alphas, nums + from pyparsing import matchPreviousLiteral,matchPreviousExpr, Forward, Literal, Word, alphas, nums, ParserElement + + if ParserElement._packratEnabled: + print_("skipping this test, not compatible with packratting") + return first = Word("abcdef").setName("word1") bridge = Word(nums).setName("number") @@ -2569,7 +2580,7 @@ class NestedAsDictTest(ParseTestCase): rsp = 'username=goat; errors={username=[already taken, too short]}; empty_field=' result_dict = response.parseString(rsp).asDict() - print(result_dict) + print_(result_dict) assert result_dict['username'] == 'goat', "failed to process string in ParseResults correctly" assert result_dict['errors']['username'] == ['already taken', 'too short'], "failed to process nested ParseResults correctly" @@ -2606,25 +2617,26 @@ class RunTestsTest(ParseTestCase): # normal data 1-3,2-4,6,8-10,16 - # invalid range - 1-2, 3-1, 4-6, 7, 12 - # lone integer 11""" results = indices.runTests(tests, printResults=False)[1] - #~ import pprint - #~ pprint.pprint(results) - - expectedResults = [ - ['# normal data', '1-3,2-4,6,8-10,16', '[1, 2, 3, 4, 6, 8, 9, 10, 16]'], - ['# invalid range', - '1-2, 3-1, 4-6, 7, 12', - ' ^(FATAL)', - 'FAIL: invalid range, start must be <= end (at char 5), (line:1, col:6)'], - ['# lone integer', '11', '[11]']] + expectedResults = [ + [1, 2, 3, 4, 6, 8, 9, 10, 16], + [11], + ] for res,expected in zip(results, expectedResults): - assert res == expected, "failed test: " + expected[0][2:] + print_(res[1].asList()) + print_(expected) + assert res[1].asList() == expected, "failed test: " + expected[0][2:] + + + tests = """\ + # invalid range + 1-2, 3-1, 4-6, 7, 12 + """ + success = indices.runTests(tests, printResults=False, failureTests=True)[0] + assert success, "failed to raise exception on improper range test" class CommonExpressionsTest(ParseTestCase): def runTest(self): @@ -2706,20 +2718,40 @@ class CommonExpressionsTest(ParseTestCase): """)[0] assert success, "error in parsing valid numerics" - success = pyparsing_common.iso8601_date.runTests(""" + success, results = pyparsing_common.iso8601_date.runTests(""" 1997 1997-07 1997-07-16 - """)[0] + """) assert success, "error in parsing valid iso8601_date" - - success = pyparsing_common.iso8601_datetime.runTests(""" + expected = [ + ('1997', None, None), + ('1997', '07', None), + ('1997', '07', '16'), + ] + for r,exp in zip(results, expected): + assert (r[1].year,r[1].month,r[1].day,) == exp, "failed to parse date into fields" + + success, results = pyparsing_common.iso8601_date().addParseAction(pyparsing_common.convertToDate()).runTests(""" + 1997-07-16 + """) + assert success, "error in parsing valid iso8601_date with parse action" + assert results[0][1][0] == datetime.date(1997, 7, 16) + + success, results = pyparsing_common.iso8601_datetime.runTests(""" 1997-07-16T19:20+01:00 1997-07-16T19:20:30+01:00 - 1997-07-16T19:20:30.45+01:00 - """)[0] + 1997-07-16T19:20:30.45Z + 1997-07-16 19:20:30.45 + """) assert success, "error in parsing valid iso8601_datetime" - + + success, results = pyparsing_common.iso8601_datetime().addParseAction(pyparsing_common.convertToDatetime()).runTests(""" + 1997-07-16T19:20:30.45 + """) + assert success, "error in parsing valid iso8601_datetime" + assert results[0][1][0] == datetime.datetime(1997, 7, 16, 19, 20, 30, 450000) + success = pyparsing_common.uuid.runTests(""" 123e4567-e89b-12d3-a456-426655440000 """)[0] @@ -2755,8 +2787,42 @@ class TokenMapTest(ParseTestCase): 00 11 22 aa FF 0a 0d 1a """, printResults=False) assert success, "failed to parse hex integers" - assert results[0][-1] == '[0, 17, 34, 170, 255, 10, 13, 26]', "tokenMap parse action failed" - + print_(results) + assert results[0][-1].asList() == [0, 17, 34, 170, 255, 10, 13, 26], "tokenMap parse action failed" + + +class ParseFileTest(ParseTestCase): + def runTest(self): + from pyparsing import pyparsing_common, OneOrMore + s = """ + 123 456 789 + """ + input_file = StringIO(s) + integer = pyparsing_common.integer + + results = OneOrMore(integer).parseFile(input_file) + print_(results) + + results = OneOrMore(integer).parseFile('test\parsefiletest_input_file.txt') + print_(results) + + +class HTMLStripperTest(ParseTestCase): + def runTest(self): + from pyparsing import pyparsing_common, originalTextFor, OneOrMore, Word, printables + + sample = """ + + Here is some sample HTML text. + + """ + read_everything = originalTextFor(OneOrMore(Word(printables))) + read_everything.addParseAction(pyparsing_common.stripHTMLTags) + + result = read_everything.parseString(sample) + assert result[0].strip() == 'Here is some sample HTML text.' + + class MiscellaneousParserTests(ParseTestCase): def runTest(self): import pyparsing @@ -2917,27 +2983,27 @@ def makeTestSuite(): test_case_classes = ParseTestCase.__subclasses__() test_case_classes.remove(PyparsingTestInit) - test_case_classes.remove(ParseASMLTest) + # test_case_classes.remove(ParseASMLTest) test_case_classes.remove(EnablePackratParsing) if IRON_PYTHON_ENV: test_case_classes.remove(OriginalTextForTest) suite.addTests(T() for T in test_case_classes) - + if TEST_USING_PACKRAT: # retest using packrat parsing (disable those tests that aren't compatible) suite.addTest( EnablePackratParsing() ) - + unpackrattables = [ EnablePackratParsing, RepeaterTest, ] - + # add tests to test suite a second time, to run with packrat parsing # (leaving out those that we know wont work with packrat) packratTests = [t.__class__() for t in suite._tests if t.__class__ not in unpackrattables] suite.addTests( packratTests ) - + return suite - + def makeTestSuiteTemp(classes): suite = TestSuite() suite.addTest( PyparsingTestInit() ) @@ -2954,7 +3020,7 @@ console = True lp = None #~ if __name__ == '__main__': - #~ unittest.main() + #~ unittest.main() if console: #~ # console mode testRunner = TextTestRunner() -- cgit v1.2.1