summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-07-03 23:49:50 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-07-03 23:49:50 -0500
commit4c59256fd3af3206241419b7e8d51abaf9bc8498 (patch)
tree9fb4315649044353353644cdcc25bd90424fc57b
parentf20f8c038bebb81e7184ac87a6f13d5d81d3b495 (diff)
downloadpyparsing-git-4c59256fd3af3206241419b7e8d51abaf9bc8498.tar.gz
Add support for ... as short cut for SkipTo in And, and for repetition as OneOrMore and ZeroOrMore; fix PY2 test bug in unitTests.py
-rw-r--r--CHANGES29
-rw-r--r--pyparsing.py107
-rw-r--r--simple_unit_tests.py61
-rw-r--r--unitTests.py46
4 files changed, 207 insertions, 36 deletions
diff --git a/CHANGES b/CHANGES
index a96d042..eb5ed44 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,35 @@ Change Log
Version 2.4.1 -
----------------------
+- A new shorthand notation has been added for repetition
+ expressions: expr[min, max], with '...' valid as a min
+ or max value:
+ - expr[...] is equivalent to OneOrMore(expr)
+ - expr[0, ...] is equivalent to ZeroOrMore(expr)
+ - expr[1, ...] is equivalent to OneOrMore(expr)
+ - expr[n, ...] or expr[n,] is equivalent
+ to expr*n + ZeroOrMore(expr)
+ (read as "n or more instances of expr")
+ - expr[..., n] is equivalent to expr*(0, n)
+ - expr[m, n] is equivalent to expr*(m, n)
+ Note that expr[..., n] and expr[m, n]do not raise an exception
+ if more than n exprs exist in the input stream. If this
+ behavior is desired, then write expr[..., n] + ~expr.
+
+- '...' can also be used as short hand for SkipTo when used
+ in adding parse expressions to compose an And expression.
+
+ Literal('start') + ... + Literal('end')
+ And(['start', ..., 'end'])
+
+ are both equivalent to:
+
+ Literal('start') + SkipTo('end')("_skipped") + Literal('end')
+
+ The '...' form has the added benefit of not requiring repeating
+ the skip target expression. Note that the skipped text is
+ returned with '_skipped' as a results name.
+
- While investigating issue #93, I found that Or and
addCondition could interact to select an alternative that
is not the longest match. This is because Or first checks
diff --git a/pyparsing.py b/pyparsing.py
index 15b7c48..98f8708 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -96,7 +96,7 @@ classes inherit from. Use the docstrings for examples of how to:
"""
__version__ = "2.4.1"
-__versionTime__ = "02 Jul 2019 21:24 UTC"
+__versionTime__ = "04 Jul 2019 04:40 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -2036,7 +2036,36 @@ class ParserElement(object):
prints::
Hello, World! -> ['Hello', ',', 'World', '!']
+
+ ``...`` may be used as a parse expression as a short form of :class:`SkipTo`.
+
+ Literal('start') + ... + Literal('end')
+
+ is equivalent to:
+
+ Literal('start') + SkipTo('end')("_skipped") + Literal('end')
+
+ Note that the skipped text is returned with '_skipped' as a results name.
+
"""
+
+ class _PendingSkip(ParserElement):
+ # internal placeholder class to hold a place were '...' is added to a parser element,
+ # once another ParserElement is added, this placeholder will be replaced with a
+ # SkipTo
+ def __init__(self, expr):
+ super(_PendingSkip, self).__init__()
+ self.name = str(expr + '').replace('""', '...')
+ self.expr = expr
+
+ def __add__(self, other):
+ return self.expr + SkipTo(other)("_skipped") + other
+
+ def parseImpl(self, *args):
+ raise Exception("use of `...` expression without following SkipTo target expression")
+
+ if other is Ellipsis:
+ return _PendingSkip(self)
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -2049,9 +2078,12 @@ class ParserElement(object):
"""
Implementation of + operator when left operand is not a :class:`ParserElement`
"""
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
+ if other is Ellipsis:
+ return SkipTo(self)("_skipped") + self
+
+ if isinstance(other, basestring):
+ other = ParserElement._literalStringClass(other)
+ if not isinstance(other, ParserElement):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
return None
@@ -2081,6 +2113,43 @@ class ParserElement(object):
return None
return other - self
+ def __getitem__(self, key):
+ """
+ use ``[]`` indexing notation as a short form for expression repetition:
+ - ``expr[n]`` is equivalent to ``expr*n``
+ - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
+ - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
+ to ``expr*n + ZeroOrMore(expr)``
+ (read as "at least n instances of ``expr``")
+ - ``expr[..., n]`` is equivalent to ``expr*(0,n)``
+ (read as "0 to n instances of ``expr``")
+ - ``expr[0, ...]`` is equivalent to ``ZeroOrMore(expr)``
+ - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
+ - ``expr[...]`` is equivalent to ``OneOrMore(expr)``
+ ``None`` may be used in place of ``...``.
+
+ Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
+ if more than ``n`` ``expr``s exist in the input stream. If this behavior is
+ desired, then write ``expr[..., n] + ~expr``.
+ """
+
+ # convert single arg keys to tuples
+ try:
+ if isinstance(key, str):
+ key = (key,)
+ iter(key)
+ except TypeError:
+ key = (key,)
+
+ if len(key) > 2:
+ warnings.warn("only 1 or 2 index arguments supported ({}{})".format(key[:5],
+ '... [{}]'.format(len(key))
+ if len(key) > 5 else ''))
+
+ # clip to 2 elements
+ ret = self * tuple(key[:2])
+ return ret
+
def __mul__(self,other):
"""
Implementation of * operator, allows use of ``expr * 3`` in place of
@@ -2101,9 +2170,12 @@ class ParserElement(object):
occurrences. If this behavior is desired, then write
``expr*(None,n) + ~expr``
"""
+ if other is Ellipsis or other == (Ellipsis, ):
+ other = (1, None)
if isinstance(other,int):
minElements, optElements = other,0
- elif isinstance(other,tuple):
+ elif isinstance(other, tuple):
+ other = tuple(o if o is not Ellipsis else None for o in other)
other = (other + (None, None))[:2]
if other[0] is None:
other = (0, other[1])
@@ -3626,8 +3698,8 @@ class ParseExpression(ParserElement):
elif isinstance( exprs, Iterable ):
exprs = list(exprs)
# if sequence of strings provided, wrap with Literal
- if all(isinstance(expr, basestring) for expr in exprs):
- exprs = map(ParserElement._literalStringClass, exprs)
+ if any(isinstance(expr, basestring) for expr in exprs):
+ exprs = (ParserElement._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs)
self.exprs = list(exprs)
else:
try:
@@ -3636,9 +3708,6 @@ class ParseExpression(ParserElement):
self.exprs = [ exprs ]
self.callPreparse = False
- def __getitem__( self, i ):
- return self.exprs[i]
-
def append( self, other ):
self.exprs.append( other )
self.strRepr = None
@@ -3745,6 +3814,18 @@ class And(ParseExpression):
self.leaveWhitespace()
def __init__( self, exprs, savelist = True ):
+ if exprs and Ellipsis in exprs:
+ tmp = []
+ for i, expr in enumerate(exprs):
+ if expr is Ellipsis:
+ if i < len(exprs)-1:
+ skipto_arg = (Empty() + exprs[i+1]).exprs[-1]
+ tmp.append(SkipTo(skipto_arg)("_skipped"))
+ else:
+ raise Exception("cannot construct And with sequence ending in ...")
+ else:
+ tmp.append(expr)
+ exprs[:] = tmp
super(And,self).__init__(exprs, savelist)
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
self.setWhitespaceChars( self.exprs[0].whiteChars )
@@ -4350,7 +4431,13 @@ class _MultipleMatch(ParseElementEnhance):
ender = stopOn
if isinstance(ender, basestring):
ender = ParserElement._literalStringClass(ender)
+ self.stopOn(ender)
+
+ def stopOn(self, ender):
+ if isinstance(ender, basestring):
+ ender = ParserElement._literalStringClass(ender)
self.not_ender = ~ender if ender is not None else None
+ return self
def parseImpl( self, instring, loc, doActions=True ):
self_expr_parse = self.expr._parse
diff --git a/simple_unit_tests.py b/simple_unit_tests.py
index 7e42003..1af7474 100644
--- a/simple_unit_tests.py
+++ b/simple_unit_tests.py
@@ -140,7 +140,9 @@ class TestCaselessLiteral(PyparsingExpressionTestCase):
tests = [
PpTestSpec(
desc = "Match colors, converting to consistent case",
- expr = pp.OneOrMore(pp.CaselessLiteral("RED") | pp.CaselessLiteral("GREEN") | pp.CaselessLiteral("BLUE")),
+ expr = (pp.CaselessLiteral("RED")
+ | pp.CaselessLiteral("GREEN")
+ | pp.CaselessLiteral("BLUE"))[...],
text = "red Green BluE blue GREEN green rEd",
expected_list = ['RED', 'GREEN', 'BLUE', 'BLUE', 'GREEN', 'GREEN', 'RED'],
),
@@ -172,13 +174,13 @@ class TestCombine(PyparsingExpressionTestCase):
tests = [
PpTestSpec(
desc="Parsing real numbers - fail, parsed numbers are in pieces",
- expr=pp.OneOrMore(pp.Word(pp.nums) + '.' + pp.Word(pp.nums)),
+ expr=(pp.Word(pp.nums) + '.' + pp.Word(pp.nums))[...],
text="1.2 2.3 3.1416 98.6",
expected_list=['1', '.', '2', '2', '.', '3', '3', '.', '1416', '98', '.', '6'],
),
PpTestSpec(
desc="Parsing real numbers - better, use Combine to combine multiple tokens into one",
- expr=pp.OneOrMore(pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums))),
+ expr=pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums))[...],
text="1.2 2.3 3.1416 98.6",
expected_list=['1.2', '2.3', '3.1416', '98.6'],
),
@@ -188,19 +190,26 @@ class TestRepetition(PyparsingExpressionTestCase):
tests = [
PpTestSpec(
desc = "Match several words",
- expr = pp.OneOrMore(pp.Word("x") | pp.Word("y")),
+ expr = (pp.Word("x") | pp.Word("y"))[...],
text = "xxyxxyyxxyxyxxxy",
expected_list = ['xx', 'y', 'xx', 'yy', 'xx', 'y', 'x', 'y', 'xxx', 'y'],
),
PpTestSpec(
desc = "Match several words, skipping whitespace",
+ expr = (pp.Word("x") | pp.Word("y"))[...],
+ text = "x x y xxy yxx y xyx xxy",
+ expected_list = ['x', 'x', 'y', 'xx', 'y', 'y', 'xx', 'y', 'x', 'y', 'x', 'xx', 'y'],
+ ),
+ PpTestSpec(
+ desc = "Match several words, skipping whitespace (old style)",
expr = pp.OneOrMore(pp.Word("x") | pp.Word("y")),
text = "x x y xxy yxx y xyx xxy",
expected_list = ['x', 'x', 'y', 'xx', 'y', 'y', 'xx', 'y', 'x', 'y', 'x', 'xx', 'y'],
),
PpTestSpec(
desc = "Match words and numbers - show use of results names to collect types of tokens",
- expr = pp.OneOrMore(pp.Word(pp.alphas)("alpha*") | pp.pyparsing_common.integer("int*")),
+ expr = (pp.Word(pp.alphas)("alpha*")
+ | pp.pyparsing_common.integer("int*"))[...],
text = "sdlfj23084ksdfs08234kjsdlfkjd0934",
expected_list = ['sdlfj', 23084, 'ksdfs', 8234, 'kjsdlfkjd', 934],
expected_dict = { 'alpha': ['sdlfj', 'ksdfs', 'kjsdlfkjd'], 'int': [23084, 8234, 934] }
@@ -249,27 +258,28 @@ class TestGroups(PyparsingExpressionTestCase):
tests = [
PpTestSpec(
desc = "Define multiple results names in groups",
- expr = pp.OneOrMore(pp.Group(pp.Word(pp.alphas)("key")
- + EQ
- + pp.pyparsing_common.number("value"))),
+ expr = pp.Group(pp.Word(pp.alphas)("key")
+ + EQ
+ + pp.pyparsing_common.number("value"))[...],
text = "range=5280 long=-138.52 lat=46.91",
expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]],
),
PpTestSpec(
desc = "Define multiple results names in groups - use Dict to define results names using parsed keys",
- expr = pp.Dict(pp.OneOrMore(pp.Group(pp.Word(pp.alphas)
- + EQ
- + pp.pyparsing_common.number))),
+ expr = pp.Dict(pp.Group(pp.Word(pp.alphas)
+ + EQ
+ + pp.pyparsing_common.number)[...]),
text = "range=5280 long=-138.52 lat=46.91",
expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]],
expected_dict = {'lat': 46.91, 'long': -138.52, 'range': 5280}
),
PpTestSpec(
desc = "Define multiple value types",
- expr = pp.Dict(pp.OneOrMore(pp.Group(pp.Word(pp.alphas)
+ expr = pp.Dict(pp.Group(pp.Word(pp.alphas)
+ EQ
+ (pp.pyparsing_common.number | pp.oneOf("True False") | pp.QuotedString("'"))
- ))),
+ )[...]
+ ),
text = "long=-122.47 lat=37.82 public=True name='Golden Gate Bridge'",
expected_list = [['long', -122.47], ['lat', 37.82], ['public', 'True'], ['name', 'Golden Gate Bridge']],
expected_dict = {'long': -122.47, 'lat': 37.82, 'public': 'True', 'name': 'Golden Gate Bridge'}
@@ -280,7 +290,7 @@ class TestParseAction(PyparsingExpressionTestCase):
tests = [
PpTestSpec(
desc="Parsing real numbers - use parse action to convert to float at parse time",
- expr=pp.OneOrMore(pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums)).addParseAction(lambda t: float(t[0]))),
+ expr=pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums)).addParseAction(lambda t: float(t[0]))[...],
text="1.2 2.3 3.1416 98.6",
expected_list= [1.2, 2.3, 3.1416, 98.6], # note, these are now floats, not strs
),
@@ -306,13 +316,13 @@ class TestParseAction(PyparsingExpressionTestCase):
),
PpTestSpec(
desc = "Using a built-in function that takes a sequence of strs as a parse action",
- expr = pp.OneOrMore(pp.Word(pp.hexnums, exact=2)).addParseAction(':'.join),
+ expr = pp.Word(pp.hexnums, exact=2)[...].addParseAction(':'.join),
text = "0A4B7321FE76",
expected_list = ['0A:4B:73:21:FE:76'],
),
PpTestSpec(
desc = "Using a built-in function that takes a sequence of strs as a parse action",
- expr = pp.OneOrMore(pp.Word(pp.hexnums, exact=2)).addParseAction(sorted),
+ expr = pp.Word(pp.hexnums, exact=2)[...].addParseAction(sorted),
text = "0A4B7321FE76",
expected_list = ['0A', '21', '4B', '73', '76', 'FE'],
),
@@ -331,7 +341,7 @@ class TestResultsModifyingParseAction(PyparsingExpressionTestCase):
tests = [
PpTestSpec(
desc = "A parse action that adds new key-values",
- expr = pp.OneOrMore(pp.pyparsing_common.integer).addParseAction(compute_stats_parse_action),
+ expr = pp.pyparsing_common.integer[...].addParseAction(compute_stats_parse_action),
text = "27 1 14 22 89",
expected_list = [27, 1, 14, 22, 89],
expected_dict = {'ave': 30.6, 'max': 89, 'min': 1, 'sum': 153}
@@ -342,7 +352,7 @@ class TestRegex(PyparsingExpressionTestCase):
tests = [
PpTestSpec(
desc="Parsing real numbers - using Regex instead of Combine",
- expr=pp.OneOrMore(pp.Regex(r'\d+\.\d+').addParseAction(lambda t: float(t[0]))),
+ expr=pp.Regex(r'\d+\.\d+').addParseAction(lambda t: float(t[0]))[...],
text="1.2 2.3 3.1416 98.6",
expected_list=[1.2, 2.3, 3.1416, 98.6], # note, these are now floats, not strs
),
@@ -352,14 +362,14 @@ class TestParseCondition(PyparsingExpressionTestCase):
tests = [
PpTestSpec(
desc = "Define a condition to only match numeric values that are multiples of 7",
- expr = pp.OneOrMore(pp.Word(pp.nums).addCondition(lambda t: int(t[0]) % 7 == 0)),
+ expr = pp.Word(pp.nums).addCondition(lambda t: int(t[0]) % 7 == 0)[...],
text = "14 35 77 12 28",
expected_list = ['14', '35', '77'],
),
PpTestSpec(
desc = "Separate conversion to int and condition into separate parse action/conditions",
- expr = pp.OneOrMore(pp.Word(pp.nums).addParseAction(lambda t: int(t[0]))
- .addCondition(lambda t: t[0] % 7 == 0)),
+ expr = pp.Word(pp.nums).addParseAction(lambda t: int(t[0]))
+ .addCondition(lambda t: t[0] % 7 == 0)[...],
text = "14 35 77 12 28",
expected_list = [14, 35, 77],
),
@@ -396,7 +406,7 @@ class TestCommonHelperExpressions(PyparsingExpressionTestCase):
),
PpTestSpec(
desc = "A counted array of words",
- expr = pp.OneOrMore(pp.countedArray(pp.Word('ab'))),
+ expr = pp.countedArray(pp.Word('ab'))[...],
text = "2 aaa bbb 0 3 abab bbaa abbab",
expected_list = [['aaa', 'bbb'], [], ['abab', 'bbaa', 'abbab']],
),
@@ -421,7 +431,7 @@ class TestCommonHelperExpressions(PyparsingExpressionTestCase):
),
PpTestSpec(
desc = "using oneOf (shortcut for Literal('a') | Literal('b') | Literal('c'))",
- expr = pp.OneOrMore(pp.oneOf("a b c")),
+ expr = pp.oneOf("a b c")[...],
text = "a b a b b a c c a b b",
expected_list = ['a', 'b', 'a', 'b', 'b', 'a', 'c', 'c', 'a', 'b', 'b'],
),
@@ -464,6 +474,11 @@ suite = unittest.TestSuite(cls() for cls in test_case_classes)
# ============ MAIN ================
if __name__ == '__main__':
+ import sys
+ if sys.version_info[0] < 3:
+ print("simple_unit_tests.py requires Python 3.x - exiting...")
+ exit(0)
+
result = unittest.TextTestRunner().run(suite)
exit(0 if result.wasSuccessful() else 1)
diff --git a/unitTests.py b/unitTests.py
index 455c54b..25415ac 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -993,7 +993,7 @@ class ReStringRangeTest(ParseTestCase):
class SkipToParserTests(ParseTestCase):
def runTest(self):
- from pyparsing import Literal, SkipTo, cStyleComment, ParseBaseException
+ from pyparsing import Literal, SkipTo, cStyleComment, ParseBaseException, And
thingToFind = Literal('working')
testExpr = SkipTo(Literal(';'), include=True, ignore=cStyleComment) + thingToFind
@@ -1025,6 +1025,38 @@ class SkipToParserTests(ParseTestCase):
result = expr.parseString(text)
self.assertTrue(isinstance(result.prefix, str), "SkipTo created with wrong saveAsList attribute")
+ if PY_3:
+ def test(expr, test_string, expected_list, expected_dict):
+
+ try:
+ result = expr.parseString("start 123 end")
+ except Exception as pe:
+ if expected_list is not None:
+ self.assertTrue(False, "{} failed to parse {!r}".format(expr, test_string))
+ else:
+ self.assertEqual(result.asList(), expected_list)
+ self.assertEqual(result.asDict(), expected_dict)
+
+ # ellipses for SkipTo
+ a = ... + Literal("end")
+ a.streamline()
+ print_(a)
+ test(a, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '})
+ b = Literal("start") + ... + Literal("end")
+ b.streamline()
+ print_(b)
+ test(b, "start 123 end", ['start', '123 ', 'end'], {'_skipped': '123 '})
+ c = Literal("start") + ...
+ print_(c)
+ test(c, "start 123 end", None, None)
+ d = And(["start", ..., "end"])
+ print_(d)
+ test(d, "start 123 end", ['start', '123 ', 'end'], {'_skipped': '123 '})
+ e = And([..., "end"])
+ print_(e)
+ test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '})
+
+
class CustomQuotesTest(ParseTestCase):
def runTest(self):
from pyparsing import QuotedString
@@ -3051,6 +3083,10 @@ class OneOrMoreStopTest(ParseTestCase):
expr = BEGIN + OneOrMore(body_word, stopOn=ender) + END
self.assertEqual(test, expr, "Did not successfully stop on ending expression %r" % ender)
+ if PY_3:
+ expr = BEGIN + body_word[...].stopOn(ender) + END
+ self.assertEqual(test, expr, "Did not successfully stop on ending expression %r" % ender)
+
number = Word(nums+',.()').setName("number with optional commas")
parser= (OneOrMore(Word(alphanums+'-/.'), stopOn=number)('id').setParseAction(' '.join)
+ number('data'))
@@ -3069,6 +3105,10 @@ class ZeroOrMoreStopTest(ParseTestCase):
expr = BEGIN + ZeroOrMore(body_word, stopOn=ender) + END
self.assertEqual(test, expr, "Did not successfully stop on ending expression %r" % ender)
+ if PY_3:
+ expr = BEGIN + body_word[0, ...].stopOn(ender) + END
+ self.assertEqual(test, expr, "Did not successfully stop on ending expression %r" % ender)
+
class NestedAsDictTest(ParseTestCase):
def runTest(self):
from pyparsing import Literal, Forward, alphanums, Group, delimitedList, Dict, Word, Optional
@@ -4017,7 +4057,7 @@ class IndentedBlockTest(ParseTestCase):
value <<= pp.pyparsing_common.integer | pp.QuotedString("'") | compound_value
parser = pp.Dict(pp.OneOrMore(pp.Group(key_value)))
- text = """\
+ text = """
a = 100
b = 101
c =
@@ -4069,7 +4109,7 @@ class IndentedBlockTest2(ParseTestCase):
parser = OneOrMore(contents)
- sample = dedent("""\
+ sample = dedent("""
extra:
[test]
one0: