diff options
3 files changed, 176 insertions, 85 deletions
diff --git a/src/CHANGES b/src/CHANGES
index 33f1266..10c5593 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -33,12 +33,14 @@ Verison 2.1.5 - June, 2016
. IPv4 and IPv6 addresses (including long, short, and mixed forms
of IPv6)
. MAC address
- . ISO8601 date and date time strings
+ . ISO8601 date and date time strings (with named fields for year, month, etc.)
. UUID (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)
. hex integer (returned as int)
. fraction (integer '/' integer, returned as float)
. mixed integer (integer '-' fraction, or just fraction, returned as float)
. stripHTMLTags (parse action to remove tags from HTML source)
+ . parse action helpers convertToDate and convertToDatetime to do custom parse
+ time conversions of parsed ISO8601 strings
- runTests now returns a two-tuple: success if all tests succeed,
and an output list of each test and its output lines.
diff --git a/src/ b/src/
index a72a2e1..2cbd023 100644
--- a/src/
+++ b/src/
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
__version__ = "2.1.5"
-__versionTime__ = "08 Jun 2016 21:53 UTC"
+__versionTime__ = "12 Jun 2016 21:53 UTC"
__author__ = "Paul McGuire <>"
import string
@@ -70,9 +70,8 @@ import re
import sre_constants
import collections
import pprint
-import functools
-import itertools
import traceback
+from datetime import datetime
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
@@ -1605,9 +1604,8 @@ class ParserElement(object):
file_contents =
except AttributeError:
- f = open(file_or_filename, "r")
- file_contents =
- f.close()
+ with open(file_or_filename, "r") as f:
+ file_contents =
return self.parseString(file_contents, parseAll)
except ParseBaseException as exc:
@@ -1666,7 +1664,7 @@ class ParserElement(object):
string; pass None to disable comment filtering
- printResults - (default=True) prints test output to stdout
- failureTests - (default=False) indicates if these tests are expected to fail parsing
Returns: a (success, results) tuple, where success indicates that all tests succeeded
(or failed if C{failureTest} is True), and the results contain a list of lines of each
test's output
@@ -1687,7 +1685,8 @@ class ParserElement(object):
out = ['\n'.join(comments), t]
comments = []
- out.append(self.parseString(t, parseAll=parseAll).dump())
+ result = self.parseString(t, parseAll=parseAll)
+ out.append(result.dump())
success = success and not failureTests
except ParseBaseException as pe:
fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
@@ -1698,12 +1697,13 @@ class ParserElement(object):
out.append(' '*pe.loc + '^' + fatal)
out.append("FAIL: " + str(pe))
success = success and failureTests
+ result = pe
if printResults:
- else:
- allResults.append(out)
+ allResults.append((t, result))
return success, allResults
@@ -3948,6 +3948,8 @@ class pyparsing_common:
Parse actions:
- C{L{convertToInteger}}
- C{L{convertToFloat}}
+ - C{L{convertToDate}}
+ - C{L{convertToDatetime}}
- C{L{stripHTMLTags}}
@@ -4008,19 +4010,40 @@ class pyparsing_common:
mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
- iso8601_date = Regex(r'\d{4}(?:-\d\d(?:-\d\d)?)?').setName("ISO8601 date")
+ @staticmethod
+ def convertToDate(fmt="%Y-%m-%d"):
+ """
+ Helper to create a parse action for converting parsed date string to Python
+ Params -
+ - fmt - format to be passed to datetime.strptime (default="%Y-%m-%d")
+ """
+ return lambda s,l,t: datetime.strptime(t[0], fmt).date()
+ @staticmethod
+ def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
+ """
+ Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
+ Params -
+ - fmt - format to be passed to datetime.strptime (default="%Y-%m-%dT%H:%M:%S.%f")
+ """
+ return lambda s,l,t: datetime.strptime(t[0], fmt)
+ iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
"ISO8601 date (C{yyyy-mm-dd})"
- iso8601_datetime = Regex(r'\d{4}-\d\d-\d\dT\d\d:\d\d(:\d\d(\.\d*)?)?(Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
- "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)})"
+ iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
+ "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds and timezone optional"
uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
"UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
_html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
- def stripHTMLTags(s,l,tokens):
+ @staticmethod
+ def stripHTMLTags(s, l, tokens):
"""Parse action to remove HTML tags from web page HTML source"""
- return _html_stripper.transformString(tokens[0])
+ return pyparsing_common._html_stripper.transformString(tokens[0])
if __name__ == "__main__":
diff --git a/src/ b/src/
index d9eece9..d615b3b 100644
--- a/src/
+++ b/src/
@@ -1,6 +1,8 @@
# -*- coding: UTF-8 -*-
from unittest import TestCase, TestSuite, TextTestRunner
import unittest
+import datetime
from pyparsing import ParseException
#~ import HTMLTestRunner
@@ -12,6 +14,8 @@ PY_3 = sys.version.startswith('3')
if PY_3:
import builtins
print_ = getattr(builtins, "print")
+ from io import StringIO
def _print(*args, **kwargs):
if 'end' in kwargs:
@@ -19,6 +23,8 @@ else:
sys.stdout.write(' '.join(map(str,args)) + '\n')
print_ = _print
+ from cStringIO import StringIO
# see which Python implementation we are running
CPYTHON_ENV = (sys.platform == "win32")
@@ -71,42 +77,43 @@ class PyparsingTestInit(ParseTestCase):
def tearDown(self):
-class ParseASMLTest(ParseTestCase):
- def runTest(self):
- import parseASML
- files = [ ("A52759.txt", 2150, True, True, 0.38, 25, "21:47:17", "22:07:32", 235),
- ("24141506_P5107RM59_399A1457N1_PHS04", 373,True, True, 0.5, 1, "11:35:25", "11:37:05", 183),
- ("24141506_P5107RM59_399A1457N1_PHS04B", 373, True, True, 0.5, 1, "01:02:54", "01:04:49", 186),
- ("24157800_P5107RM74_399A1828M1_PHS04", 1141, True, False, 0.5, 13, "00:00:54", "23:59:48", 154) ]
- for testFile,numToks,trkInpUsed,trkOutpUsed,maxDelta,numWafers,minProcBeg,maxProcEnd,maxLevStatsIV in files:
- print_("Parsing",testFile,"...", end=' ')
- #~ text = "\n".join( [ line for line in file(testFile) ] )
- #~ results = parseASML.BNF().parseString( text )
- results = parseASML.BNF().parseFile( testFile )
- #~ pprint.pprint( results.asList() )
- #~ pprint.pprint( results.batchData.asList() )
- #~ print results.batchData.keys()
- allToks = flatten( results.asList() )
- assert len(allToks) == numToks, \
- "wrong number of tokens parsed (%s), got %d, expected %d" % (testFile, len(allToks),numToks)
- assert results.batchData.trackInputUsed == trkInpUsed, "error evaluating results.batchData.trackInputUsed"
- assert results.batchData.trackOutputUsed == trkOutpUsed, "error evaluating results.batchData.trackOutputUsed"
- assert results.batchData.maxDelta == maxDelta,"error evaluating results.batchData.maxDelta"
- assert len(results.waferData) == numWafers, "did not read correct number of wafers"
- assert min([wd.procBegin for wd in results.waferData]) == minProcBeg, "error reading waferData.procBegin"
- assert max([results.waferData[k].procEnd for k in range(len(results.waferData))]) == maxProcEnd, "error reading waferData.procEnd"
- assert sum(results.levelStatsIV['MAX']) == maxLevStatsIV, "error reading levelStatsIV"
- assert sum(results.levelStatsIV.MAX) == maxLevStatsIV, "error reading levelStatsIV"
- print_("OK")
- print_(testFile,len(allToks))
- #~ print "results.batchData.trackInputUsed =",results.batchData.trackInputUsed
- #~ print "results.batchData.trackOutputUsed =",results.batchData.trackOutputUsed
- #~ print "results.batchData.maxDelta =",results.batchData.maxDelta
- #~ print len(results.waferData)," wafers"
- #~ print min([wd.procBegin for wd in results.waferData])
- #~ print max([results.waferData[k].procEnd for k in range(len(results.waferData))])
- #~ print sum(results.levelStatsIV['MAX.'])
+if 0:
+ class ParseASMLTest(ParseTestCase):
+ def runTest(self):
+ import parseASML
+ files = [ ("A52759.txt", 2150, True, True, 0.38, 25, "21:47:17", "22:07:32", 235),
+ ("24141506_P5107RM59_399A1457N1_PHS04", 373,True, True, 0.5, 1, "11:35:25", "11:37:05", 183),
+ ("24141506_P5107RM59_399A1457N1_PHS04B", 373, True, True, 0.5, 1, "01:02:54", "01:04:49", 186),
+ ("24157800_P5107RM74_399A1828M1_PHS04", 1141, True, False, 0.5, 13, "00:00:54", "23:59:48", 154) ]
+ for testFile,numToks,trkInpUsed,trkOutpUsed,maxDelta,numWafers,minProcBeg,maxProcEnd,maxLevStatsIV in files:
+ print_("Parsing",testFile,"...", end=' ')
+ #~ text = "\n".join( [ line for line in file(testFile) ] )
+ #~ results = parseASML.BNF().parseString( text )
+ results = parseASML.BNF().parseFile( testFile )
+ #~ pprint.pprint( results.asList() )
+ #~ pprint.pprint( results.batchData.asList() )
+ #~ print results.batchData.keys()
+ allToks = flatten( results.asList() )
+ assert len(allToks) == numToks, \
+ "wrong number of tokens parsed (%s), got %d, expected %d" % (testFile, len(allToks),numToks)
+ assert results.batchData.trackInputUsed == trkInpUsed, "error evaluating results.batchData.trackInputUsed"
+ assert results.batchData.trackOutputUsed == trkOutpUsed, "error evaluating results.batchData.trackOutputUsed"
+ assert results.batchData.maxDelta == maxDelta,"error evaluating results.batchData.maxDelta"
+ assert len(results.waferData) == numWafers, "did not read correct number of wafers"
+ assert min([wd.procBegin for wd in results.waferData]) == minProcBeg, "error reading waferData.procBegin"
+ assert max([results.waferData[k].procEnd for k in range(len(results.waferData))]) == maxProcEnd, "error reading waferData.procEnd"
+ assert sum(results.levelStatsIV['MAX']) == maxLevStatsIV, "error reading levelStatsIV"
+ assert sum(results.levelStatsIV.MAX) == maxLevStatsIV, "error reading levelStatsIV"
+ print_("OK")
+ print_(testFile,len(allToks))
+ #~ print "results.batchData.trackInputUsed =",results.batchData.trackInputUsed
+ #~ print "results.batchData.trackOutputUsed =",results.batchData.trackOutputUsed
+ #~ print "results.batchData.maxDelta =",results.batchData.maxDelta
+ #~ print len(results.waferData)," wafers"
+ #~ print min([wd.procBegin for wd in results.waferData])
+ #~ print max([results.waferData[k].procEnd for k in range(len(results.waferData))])
+ #~ print sum(results.levelStatsIV['MAX.'])
class ParseFourFnTest(ParseTestCase):
@@ -977,7 +984,11 @@ class CustomQuotesTest(ParseTestCase):
class RepeaterTest(ParseTestCase):
def runTest(self):
- from pyparsing import matchPreviousLiteral,matchPreviousExpr, Forward, Literal, Word, alphas, nums
+ from pyparsing import matchPreviousLiteral,matchPreviousExpr, Forward, Literal, Word, alphas, nums, ParserElement
+ if ParserElement._packratEnabled:
+ print_("skipping this test, not compatible with packratting")
+ return
first = Word("abcdef").setName("word1")
bridge = Word(nums).setName("number")
@@ -2569,7 +2580,7 @@ class NestedAsDictTest(ParseTestCase):
rsp = 'username=goat; errors={username=[already taken, too short]}; empty_field='
result_dict = response.parseString(rsp).asDict()
- print(result_dict)
+ print_(result_dict)
assert result_dict['username'] == 'goat', "failed to process string in ParseResults correctly"
assert result_dict['errors']['username'] == ['already taken', 'too short'], "failed to process nested ParseResults correctly"
@@ -2606,25 +2617,26 @@ class RunTestsTest(ParseTestCase):
# normal data
- # invalid range
- 1-2, 3-1, 4-6, 7, 12
# lone integer
results = indices.runTests(tests, printResults=False)[1]
- #~ import pprint
- #~ pprint.pprint(results)
- expectedResults = [
- ['# normal data', '1-3,2-4,6,8-10,16', '[1, 2, 3, 4, 6, 8, 9, 10, 16]'],
- ['# invalid range',
- '1-2, 3-1, 4-6, 7, 12',
- ' ^(FATAL)',
- 'FAIL: invalid range, start must be <= end (at char 5), (line:1, col:6)'],
- ['# lone integer', '11', '[11]']]
+ expectedResults = [
+ [1, 2, 3, 4, 6, 8, 9, 10, 16],
+ [11],
+ ]
for res,expected in zip(results, expectedResults):
- assert res == expected, "failed test: " + expected[0][2:]
+ print_(res[1].asList())
+ print_(expected)
+ assert res[1].asList() == expected, "failed test: " + expected[0][2:]
+ tests = """\
+ # invalid range
+ 1-2, 3-1, 4-6, 7, 12
+ """
+ success = indices.runTests(tests, printResults=False, failureTests=True)[0]
+ assert success, "failed to raise exception on improper range test"
class CommonExpressionsTest(ParseTestCase):
def runTest(self):
@@ -2706,20 +2718,40 @@ class CommonExpressionsTest(ParseTestCase):
assert success, "error in parsing valid numerics"
- success = pyparsing_common.iso8601_date.runTests("""
+ success, results = pyparsing_common.iso8601_date.runTests("""
- """)[0]
+ """)
assert success, "error in parsing valid iso8601_date"
- success = pyparsing_common.iso8601_datetime.runTests("""
+ expected = [
+ ('1997', None, None),
+ ('1997', '07', None),
+ ('1997', '07', '16'),
+ ]
+ for r,exp in zip(results, expected):
+ assert (r[1].year,r[1].month,r[1].day,) == exp, "failed to parse date into fields"
+ success, results = pyparsing_common.iso8601_date().addParseAction(pyparsing_common.convertToDate()).runTests("""
+ 1997-07-16
+ """)
+ assert success, "error in parsing valid iso8601_date with parse action"
+ assert results[0][1][0] ==, 7, 16)
+ success, results = pyparsing_common.iso8601_datetime.runTests("""
- 1997-07-16T19:20:30.45+01:00
- """)[0]
+ 1997-07-16T19:20:30.45Z
+ 1997-07-16 19:20:30.45
+ """)
assert success, "error in parsing valid iso8601_datetime"
+ success, results = pyparsing_common.iso8601_datetime().addParseAction(pyparsing_common.convertToDatetime()).runTests("""
+ 1997-07-16T19:20:30.45
+ """)
+ assert success, "error in parsing valid iso8601_datetime"
+ assert results[0][1][0] == datetime.datetime(1997, 7, 16, 19, 20, 30, 450000)
success = pyparsing_common.uuid.runTests("""
@@ -2755,8 +2787,42 @@ class TokenMapTest(ParseTestCase):
00 11 22 aa FF 0a 0d 1a
""", printResults=False)
assert success, "failed to parse hex integers"
- assert results[0][-1] == '[0, 17, 34, 170, 255, 10, 13, 26]', "tokenMap parse action failed"
+ print_(results)
+ assert results[0][-1].asList() == [0, 17, 34, 170, 255, 10, 13, 26], "tokenMap parse action failed"
+class ParseFileTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import pyparsing_common, OneOrMore
+ s = """
+ 123 456 789
+ """
+ input_file = StringIO(s)
+ integer = pyparsing_common.integer
+ results = OneOrMore(integer).parseFile(input_file)
+ print_(results)
+ results = OneOrMore(integer).parseFile('test\parsefiletest_input_file.txt')
+ print_(results)
+class HTMLStripperTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import pyparsing_common, originalTextFor, OneOrMore, Word, printables
+ sample = """
+ <html>
+ Here is some sample <i>HTML</i> text.
+ </html>
+ """
+ read_everything = originalTextFor(OneOrMore(Word(printables)))
+ read_everything.addParseAction(pyparsing_common.stripHTMLTags)
+ result = read_everything.parseString(sample)
+ assert result[0].strip() == 'Here is some sample HTML text.'
class MiscellaneousParserTests(ParseTestCase):
def runTest(self):
import pyparsing
@@ -2917,27 +2983,27 @@ def makeTestSuite():
test_case_classes = ParseTestCase.__subclasses__()
- test_case_classes.remove(ParseASMLTest)
+ # test_case_classes.remove(ParseASMLTest)
suite.addTests(T() for T in test_case_classes)
# retest using packrat parsing (disable those tests that aren't compatible)
suite.addTest( EnablePackratParsing() )
unpackrattables = [ EnablePackratParsing, RepeaterTest, ]
# add tests to test suite a second time, to run with packrat parsing
# (leaving out those that we know wont work with packrat)
packratTests = [t.__class__() for t in suite._tests
if t.__class__ not in unpackrattables]
suite.addTests( packratTests )
return suite
def makeTestSuiteTemp(classes):
suite = TestSuite()
suite.addTest( PyparsingTestInit() )
@@ -2954,7 +3020,7 @@ console = True
lp = None
#~ if __name__ == '__main__':
- #~ unittest.main()
+ #~ unittest.main()
if console:
#~ # console mode
testRunner = TextTestRunner()