diff options
-rw-r--r-- | CHANGES | 29 | ||||
-rw-r--r-- | pyparsing.py | 107 | ||||
-rw-r--r-- | simple_unit_tests.py | 61 | ||||
-rw-r--r-- | unitTests.py | 46 |
4 files changed, 207 insertions, 36 deletions
@@ -4,6 +4,35 @@ Change Log Version 2.4.1 - ---------------------- +- A new shorthand notation has been added for repetition + expressions: expr[min, max], with '...' valid as a min + or max value: + - expr[...] is equivalent to OneOrMore(expr) + - expr[0, ...] is equivalent to ZeroOrMore(expr) + - expr[1, ...] is equivalent to OneOrMore(expr) + - expr[n, ...] or expr[n,] is equivalent + to expr*n + ZeroOrMore(expr) + (read as "n or more instances of expr") + - expr[..., n] is equivalent to expr*(0, n) + - expr[m, n] is equivalent to expr*(m, n) + Note that expr[..., n] and expr[m, n]do not raise an exception + if more than n exprs exist in the input stream. If this + behavior is desired, then write expr[..., n] + ~expr. + +- '...' can also be used as short hand for SkipTo when used + in adding parse expressions to compose an And expression. + + Literal('start') + ... + Literal('end') + And(['start', ..., 'end']) + + are both equivalent to: + + Literal('start') + SkipTo('end')("_skipped") + Literal('end') + + The '...' form has the added benefit of not requiring repeating + the skip target expression. Note that the skipped text is + returned with '_skipped' as a results name. + - While investigating issue #93, I found that Or and addCondition could interact to select an alternative that is not the longest match. This is because Or first checks diff --git a/pyparsing.py b/pyparsing.py index 15b7c48..98f8708 100644 --- a/pyparsing.py +++ b/pyparsing.py @@ -96,7 +96,7 @@ classes inherit from. Use the docstrings for examples of how to: """ __version__ = "2.4.1" -__versionTime__ = "02 Jul 2019 21:24 UTC" +__versionTime__ = "04 Jul 2019 04:40 UTC" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" import string @@ -2036,7 +2036,36 @@ class ParserElement(object): prints:: Hello, World! -> ['Hello', ',', 'World', '!'] + + ``...`` may be used as a parse expression as a short form of :class:`SkipTo`. + + Literal('start') + ... + Literal('end') + + is equivalent to: + + Literal('start') + SkipTo('end')("_skipped") + Literal('end') + + Note that the skipped text is returned with '_skipped' as a results name. + """ + + class _PendingSkip(ParserElement): + # internal placeholder class to hold a place were '...' is added to a parser element, + # once another ParserElement is added, this placeholder will be replaced with a + # SkipTo + def __init__(self, expr): + super(_PendingSkip, self).__init__() + self.name = str(expr + '').replace('""', '...') + self.expr = expr + + def __add__(self, other): + return self.expr + SkipTo(other)("_skipped") + other + + def parseImpl(self, *args): + raise Exception("use of `...` expression without following SkipTo target expression") + + if other is Ellipsis: + return _PendingSkip(self) if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -2049,9 +2078,12 @@ class ParserElement(object): """ Implementation of + operator when left operand is not a :class:`ParserElement` """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): + if other is Ellipsis: + return SkipTo(self)("_skipped") + self + + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) return None @@ -2081,6 +2113,43 @@ class ParserElement(object): return None return other - self + def __getitem__(self, key): + """ + use ``[]`` indexing notation as a short form for expression repetition: + - ``expr[n]`` is equivalent to ``expr*n`` + - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` + - ``expr[n, ...]`` or ``expr[n,]`` is equivalent + to ``expr*n + ZeroOrMore(expr)`` + (read as "at least n instances of ``expr``") + - ``expr[..., n]`` is equivalent to ``expr*(0,n)`` + (read as "0 to n instances of ``expr``") + - ``expr[0, ...]`` is equivalent to ``ZeroOrMore(expr)`` + - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` + - ``expr[...]`` is equivalent to ``OneOrMore(expr)`` + ``None`` may be used in place of ``...``. + + Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception + if more than ``n`` ``expr``s exist in the input stream. If this behavior is + desired, then write ``expr[..., n] + ~expr``. + """ + + # convert single arg keys to tuples + try: + if isinstance(key, str): + key = (key,) + iter(key) + except TypeError: + key = (key,) + + if len(key) > 2: + warnings.warn("only 1 or 2 index arguments supported ({}{})".format(key[:5], + '... [{}]'.format(len(key)) + if len(key) > 5 else '')) + + # clip to 2 elements + ret = self * tuple(key[:2]) + return ret + def __mul__(self,other): """ Implementation of * operator, allows use of ``expr * 3`` in place of @@ -2101,9 +2170,12 @@ class ParserElement(object): occurrences. If this behavior is desired, then write ``expr*(None,n) + ~expr`` """ + if other is Ellipsis or other == (Ellipsis, ): + other = (1, None) if isinstance(other,int): minElements, optElements = other,0 - elif isinstance(other,tuple): + elif isinstance(other, tuple): + other = tuple(o if o is not Ellipsis else None for o in other) other = (other + (None, None))[:2] if other[0] is None: other = (0, other[1]) @@ -3626,8 +3698,8 @@ class ParseExpression(ParserElement): elif isinstance( exprs, Iterable ): exprs = list(exprs) # if sequence of strings provided, wrap with Literal - if all(isinstance(expr, basestring) for expr in exprs): - exprs = map(ParserElement._literalStringClass, exprs) + if any(isinstance(expr, basestring) for expr in exprs): + exprs = (ParserElement._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs) self.exprs = list(exprs) else: try: @@ -3636,9 +3708,6 @@ class ParseExpression(ParserElement): self.exprs = [ exprs ] self.callPreparse = False - def __getitem__( self, i ): - return self.exprs[i] - def append( self, other ): self.exprs.append( other ) self.strRepr = None @@ -3745,6 +3814,18 @@ class And(ParseExpression): self.leaveWhitespace() def __init__( self, exprs, savelist = True ): + if exprs and Ellipsis in exprs: + tmp = [] + for i, expr in enumerate(exprs): + if expr is Ellipsis: + if i < len(exprs)-1: + skipto_arg = (Empty() + exprs[i+1]).exprs[-1] + tmp.append(SkipTo(skipto_arg)("_skipped")) + else: + raise Exception("cannot construct And with sequence ending in ...") + else: + tmp.append(expr) + exprs[:] = tmp super(And,self).__init__(exprs, savelist) self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) self.setWhitespaceChars( self.exprs[0].whiteChars ) @@ -4350,7 +4431,13 @@ class _MultipleMatch(ParseElementEnhance): ender = stopOn if isinstance(ender, basestring): ender = ParserElement._literalStringClass(ender) + self.stopOn(ender) + + def stopOn(self, ender): + if isinstance(ender, basestring): + ender = ParserElement._literalStringClass(ender) self.not_ender = ~ender if ender is not None else None + return self def parseImpl( self, instring, loc, doActions=True ): self_expr_parse = self.expr._parse diff --git a/simple_unit_tests.py b/simple_unit_tests.py index 7e42003..1af7474 100644 --- a/simple_unit_tests.py +++ b/simple_unit_tests.py @@ -140,7 +140,9 @@ class TestCaselessLiteral(PyparsingExpressionTestCase): tests = [ PpTestSpec( desc = "Match colors, converting to consistent case", - expr = pp.OneOrMore(pp.CaselessLiteral("RED") | pp.CaselessLiteral("GREEN") | pp.CaselessLiteral("BLUE")), + expr = (pp.CaselessLiteral("RED") + | pp.CaselessLiteral("GREEN") + | pp.CaselessLiteral("BLUE"))[...], text = "red Green BluE blue GREEN green rEd", expected_list = ['RED', 'GREEN', 'BLUE', 'BLUE', 'GREEN', 'GREEN', 'RED'], ), @@ -172,13 +174,13 @@ class TestCombine(PyparsingExpressionTestCase): tests = [ PpTestSpec( desc="Parsing real numbers - fail, parsed numbers are in pieces", - expr=pp.OneOrMore(pp.Word(pp.nums) + '.' + pp.Word(pp.nums)), + expr=(pp.Word(pp.nums) + '.' + pp.Word(pp.nums))[...], text="1.2 2.3 3.1416 98.6", expected_list=['1', '.', '2', '2', '.', '3', '3', '.', '1416', '98', '.', '6'], ), PpTestSpec( desc="Parsing real numbers - better, use Combine to combine multiple tokens into one", - expr=pp.OneOrMore(pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums))), + expr=pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums))[...], text="1.2 2.3 3.1416 98.6", expected_list=['1.2', '2.3', '3.1416', '98.6'], ), @@ -188,19 +190,26 @@ class TestRepetition(PyparsingExpressionTestCase): tests = [ PpTestSpec( desc = "Match several words", - expr = pp.OneOrMore(pp.Word("x") | pp.Word("y")), + expr = (pp.Word("x") | pp.Word("y"))[...], text = "xxyxxyyxxyxyxxxy", expected_list = ['xx', 'y', 'xx', 'yy', 'xx', 'y', 'x', 'y', 'xxx', 'y'], ), PpTestSpec( desc = "Match several words, skipping whitespace", + expr = (pp.Word("x") | pp.Word("y"))[...], + text = "x x y xxy yxx y xyx xxy", + expected_list = ['x', 'x', 'y', 'xx', 'y', 'y', 'xx', 'y', 'x', 'y', 'x', 'xx', 'y'], + ), + PpTestSpec( + desc = "Match several words, skipping whitespace (old style)", expr = pp.OneOrMore(pp.Word("x") | pp.Word("y")), text = "x x y xxy yxx y xyx xxy", expected_list = ['x', 'x', 'y', 'xx', 'y', 'y', 'xx', 'y', 'x', 'y', 'x', 'xx', 'y'], ), PpTestSpec( desc = "Match words and numbers - show use of results names to collect types of tokens", - expr = pp.OneOrMore(pp.Word(pp.alphas)("alpha*") | pp.pyparsing_common.integer("int*")), + expr = (pp.Word(pp.alphas)("alpha*") + | pp.pyparsing_common.integer("int*"))[...], text = "sdlfj23084ksdfs08234kjsdlfkjd0934", expected_list = ['sdlfj', 23084, 'ksdfs', 8234, 'kjsdlfkjd', 934], expected_dict = { 'alpha': ['sdlfj', 'ksdfs', 'kjsdlfkjd'], 'int': [23084, 8234, 934] } @@ -249,27 +258,28 @@ class TestGroups(PyparsingExpressionTestCase): tests = [ PpTestSpec( desc = "Define multiple results names in groups", - expr = pp.OneOrMore(pp.Group(pp.Word(pp.alphas)("key") - + EQ - + pp.pyparsing_common.number("value"))), + expr = pp.Group(pp.Word(pp.alphas)("key") + + EQ + + pp.pyparsing_common.number("value"))[...], text = "range=5280 long=-138.52 lat=46.91", expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]], ), PpTestSpec( desc = "Define multiple results names in groups - use Dict to define results names using parsed keys", - expr = pp.Dict(pp.OneOrMore(pp.Group(pp.Word(pp.alphas) - + EQ - + pp.pyparsing_common.number))), + expr = pp.Dict(pp.Group(pp.Word(pp.alphas) + + EQ + + pp.pyparsing_common.number)[...]), text = "range=5280 long=-138.52 lat=46.91", expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]], expected_dict = {'lat': 46.91, 'long': -138.52, 'range': 5280} ), PpTestSpec( desc = "Define multiple value types", - expr = pp.Dict(pp.OneOrMore(pp.Group(pp.Word(pp.alphas) + expr = pp.Dict(pp.Group(pp.Word(pp.alphas) + EQ + (pp.pyparsing_common.number | pp.oneOf("True False") | pp.QuotedString("'")) - ))), + )[...] + ), text = "long=-122.47 lat=37.82 public=True name='Golden Gate Bridge'", expected_list = [['long', -122.47], ['lat', 37.82], ['public', 'True'], ['name', 'Golden Gate Bridge']], expected_dict = {'long': -122.47, 'lat': 37.82, 'public': 'True', 'name': 'Golden Gate Bridge'} @@ -280,7 +290,7 @@ class TestParseAction(PyparsingExpressionTestCase): tests = [ PpTestSpec( desc="Parsing real numbers - use parse action to convert to float at parse time", - expr=pp.OneOrMore(pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums)).addParseAction(lambda t: float(t[0]))), + expr=pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums)).addParseAction(lambda t: float(t[0]))[...], text="1.2 2.3 3.1416 98.6", expected_list= [1.2, 2.3, 3.1416, 98.6], # note, these are now floats, not strs ), @@ -306,13 +316,13 @@ class TestParseAction(PyparsingExpressionTestCase): ), PpTestSpec( desc = "Using a built-in function that takes a sequence of strs as a parse action", - expr = pp.OneOrMore(pp.Word(pp.hexnums, exact=2)).addParseAction(':'.join), + expr = pp.Word(pp.hexnums, exact=2)[...].addParseAction(':'.join), text = "0A4B7321FE76", expected_list = ['0A:4B:73:21:FE:76'], ), PpTestSpec( desc = "Using a built-in function that takes a sequence of strs as a parse action", - expr = pp.OneOrMore(pp.Word(pp.hexnums, exact=2)).addParseAction(sorted), + expr = pp.Word(pp.hexnums, exact=2)[...].addParseAction(sorted), text = "0A4B7321FE76", expected_list = ['0A', '21', '4B', '73', '76', 'FE'], ), @@ -331,7 +341,7 @@ class TestResultsModifyingParseAction(PyparsingExpressionTestCase): tests = [ PpTestSpec( desc = "A parse action that adds new key-values", - expr = pp.OneOrMore(pp.pyparsing_common.integer).addParseAction(compute_stats_parse_action), + expr = pp.pyparsing_common.integer[...].addParseAction(compute_stats_parse_action), text = "27 1 14 22 89", expected_list = [27, 1, 14, 22, 89], expected_dict = {'ave': 30.6, 'max': 89, 'min': 1, 'sum': 153} @@ -342,7 +352,7 @@ class TestRegex(PyparsingExpressionTestCase): tests = [ PpTestSpec( desc="Parsing real numbers - using Regex instead of Combine", - expr=pp.OneOrMore(pp.Regex(r'\d+\.\d+').addParseAction(lambda t: float(t[0]))), + expr=pp.Regex(r'\d+\.\d+').addParseAction(lambda t: float(t[0]))[...], text="1.2 2.3 3.1416 98.6", expected_list=[1.2, 2.3, 3.1416, 98.6], # note, these are now floats, not strs ), @@ -352,14 +362,14 @@ class TestParseCondition(PyparsingExpressionTestCase): tests = [ PpTestSpec( desc = "Define a condition to only match numeric values that are multiples of 7", - expr = pp.OneOrMore(pp.Word(pp.nums).addCondition(lambda t: int(t[0]) % 7 == 0)), + expr = pp.Word(pp.nums).addCondition(lambda t: int(t[0]) % 7 == 0)[...], text = "14 35 77 12 28", expected_list = ['14', '35', '77'], ), PpTestSpec( desc = "Separate conversion to int and condition into separate parse action/conditions", - expr = pp.OneOrMore(pp.Word(pp.nums).addParseAction(lambda t: int(t[0])) - .addCondition(lambda t: t[0] % 7 == 0)), + expr = pp.Word(pp.nums).addParseAction(lambda t: int(t[0])) + .addCondition(lambda t: t[0] % 7 == 0)[...], text = "14 35 77 12 28", expected_list = [14, 35, 77], ), @@ -396,7 +406,7 @@ class TestCommonHelperExpressions(PyparsingExpressionTestCase): ), PpTestSpec( desc = "A counted array of words", - expr = pp.OneOrMore(pp.countedArray(pp.Word('ab'))), + expr = pp.countedArray(pp.Word('ab'))[...], text = "2 aaa bbb 0 3 abab bbaa abbab", expected_list = [['aaa', 'bbb'], [], ['abab', 'bbaa', 'abbab']], ), @@ -421,7 +431,7 @@ class TestCommonHelperExpressions(PyparsingExpressionTestCase): ), PpTestSpec( desc = "using oneOf (shortcut for Literal('a') | Literal('b') | Literal('c'))", - expr = pp.OneOrMore(pp.oneOf("a b c")), + expr = pp.oneOf("a b c")[...], text = "a b a b b a c c a b b", expected_list = ['a', 'b', 'a', 'b', 'b', 'a', 'c', 'c', 'a', 'b', 'b'], ), @@ -464,6 +474,11 @@ suite = unittest.TestSuite(cls() for cls in test_case_classes) # ============ MAIN ================ if __name__ == '__main__': + import sys + if sys.version_info[0] < 3: + print("simple_unit_tests.py requires Python 3.x - exiting...") + exit(0) + result = unittest.TextTestRunner().run(suite) exit(0 if result.wasSuccessful() else 1) diff --git a/unitTests.py b/unitTests.py index 455c54b..25415ac 100644 --- a/unitTests.py +++ b/unitTests.py @@ -993,7 +993,7 @@ class ReStringRangeTest(ParseTestCase): class SkipToParserTests(ParseTestCase): def runTest(self): - from pyparsing import Literal, SkipTo, cStyleComment, ParseBaseException + from pyparsing import Literal, SkipTo, cStyleComment, ParseBaseException, And thingToFind = Literal('working') testExpr = SkipTo(Literal(';'), include=True, ignore=cStyleComment) + thingToFind @@ -1025,6 +1025,38 @@ class SkipToParserTests(ParseTestCase): result = expr.parseString(text) self.assertTrue(isinstance(result.prefix, str), "SkipTo created with wrong saveAsList attribute") + if PY_3: + def test(expr, test_string, expected_list, expected_dict): + + try: + result = expr.parseString("start 123 end") + except Exception as pe: + if expected_list is not None: + self.assertTrue(False, "{} failed to parse {!r}".format(expr, test_string)) + else: + self.assertEqual(result.asList(), expected_list) + self.assertEqual(result.asDict(), expected_dict) + + # ellipses for SkipTo + a = ... + Literal("end") + a.streamline() + print_(a) + test(a, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '}) + b = Literal("start") + ... + Literal("end") + b.streamline() + print_(b) + test(b, "start 123 end", ['start', '123 ', 'end'], {'_skipped': '123 '}) + c = Literal("start") + ... + print_(c) + test(c, "start 123 end", None, None) + d = And(["start", ..., "end"]) + print_(d) + test(d, "start 123 end", ['start', '123 ', 'end'], {'_skipped': '123 '}) + e = And([..., "end"]) + print_(e) + test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '}) + + class CustomQuotesTest(ParseTestCase): def runTest(self): from pyparsing import QuotedString @@ -3051,6 +3083,10 @@ class OneOrMoreStopTest(ParseTestCase): expr = BEGIN + OneOrMore(body_word, stopOn=ender) + END self.assertEqual(test, expr, "Did not successfully stop on ending expression %r" % ender) + if PY_3: + expr = BEGIN + body_word[...].stopOn(ender) + END + self.assertEqual(test, expr, "Did not successfully stop on ending expression %r" % ender) + number = Word(nums+',.()').setName("number with optional commas") parser= (OneOrMore(Word(alphanums+'-/.'), stopOn=number)('id').setParseAction(' '.join) + number('data')) @@ -3069,6 +3105,10 @@ class ZeroOrMoreStopTest(ParseTestCase): expr = BEGIN + ZeroOrMore(body_word, stopOn=ender) + END self.assertEqual(test, expr, "Did not successfully stop on ending expression %r" % ender) + if PY_3: + expr = BEGIN + body_word[0, ...].stopOn(ender) + END + self.assertEqual(test, expr, "Did not successfully stop on ending expression %r" % ender) + class NestedAsDictTest(ParseTestCase): def runTest(self): from pyparsing import Literal, Forward, alphanums, Group, delimitedList, Dict, Word, Optional @@ -4017,7 +4057,7 @@ class IndentedBlockTest(ParseTestCase): value <<= pp.pyparsing_common.integer | pp.QuotedString("'") | compound_value parser = pp.Dict(pp.OneOrMore(pp.Group(key_value))) - text = """\ + text = """ a = 100 b = 101 c = @@ -4069,7 +4109,7 @@ class IndentedBlockTest2(ParseTestCase): parser = OneOrMore(contents) - sample = dedent("""\ + sample = dedent(""" extra: [test] one0: |