diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2019-07-06 00:34:19 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2019-07-06 00:34:19 -0500 |
commit | ffee388a149836b1f8d128bc953c1b0363cee82b (patch) | |
tree | 8811ad21cdb6b09c0aecc42328ebd2b6f45341f0 | |
parent | 8b73519b483f155cfee69e36357833310ffe9dd6 (diff) | |
download | pyparsing-git-ffee388a149836b1f8d128bc953c1b0363cee82b.tar.gz |
Add support for multiple '...' skips in a single expression; `_skippped` results name will always return a list of skipped items
-rw-r--r-- | CHANGES | 15 | ||||
-rw-r--r-- | examples/nested_markup.py | 9 | ||||
-rw-r--r-- | pyparsing.py | 18 | ||||
-rw-r--r-- | unitTests.py | 38 |
4 files changed, 49 insertions, 31 deletions
@@ -27,11 +27,12 @@ Version 2.4.1 - are both equivalent to: - Literal('start') + SkipTo('end')("_skipped") + Literal('end') + Literal('start') + SkipTo('end')("_skipped*") + Literal('end') The '...' form has the added benefit of not requiring repeating the skip target expression. Note that the skipped text is - returned with '_skipped' as a results name. + returned with '_skipped' as a results name, and that the contents of + `_skipped` will contain a list of text from all `...`s in the expression. '...' can also be used as a "skip forward in case of error" expression: @@ -42,18 +43,20 @@ Version 2.4.1 - expr.parseString("start 456 foo 789 end") ['start', '456', 'foo 789 ', 'end'] - - _skipped: 'foo 789 ' + - _skipped: ['foo 789 '] expr.parseString("start foo end") ['start', 'foo ', 'end'] - - _skipped: 'foo ' + - _skipped: ['foo '] expr.parseString("start end") ['start', '', 'end'] - - _skipped: 'missing <int>' + - _skipped: ['missing <int>'] Note that in all the error cases, the '_skipped' results name is - present, show the extra or missing items. + present, showing a list of the extra or missing items. + + This form is only valid when used with the '|' operator. - While investigating issue #93, I found that Or and addCondition could interact to select an alternative that diff --git a/examples/nested_markup.py b/examples/nested_markup.py index 40267e6..6d83636 100644 --- a/examples/nested_markup.py +++ b/examples/nested_markup.py @@ -27,11 +27,12 @@ bolded = ('bold' + markup_body).setParseAction(convert_markup_to_html("<B>", "</ # another markup and parse action to parse links - again using transform string # to recursively parse any markup in the link text def convert_link_to_html(s, l, t): - t['link_text'] = wiki_markup.transformString(t['link_text']) + link_text, url = t._skipped + t['link_text'] = wiki_markup.transformString(link_text) + t['url'] = url return '<A href="{url}">{link_text}</A>'.format_map(t) -urlRef = ('link' - + '{' + pp.SkipTo('->')('link_text') + '->' + pp.SkipTo('}')('url') + '}' - ).setParseAction(convert_link_to_html) + +urlRef = (pp.Keyword('link') + '{' + ... + '->' + ... + '}').setParseAction(convert_link_to_html) # now inject all the markup bits as possible markup expressions wiki_markup <<= urlRef | italicized | bolded diff --git a/pyparsing.py b/pyparsing.py index 0f6d499..3d42289 100644 --- a/pyparsing.py +++ b/pyparsing.py @@ -2044,10 +2044,11 @@ class ParserElement(object): is equivalent to: - Literal('start') + SkipTo('end')("_skipped") + Literal('end') - - Note that the skipped text is returned with '_skipped' as a results name. + Literal('start') + SkipTo('end')("_skipped*") + Literal('end') + Note that the skipped text is returned with '_skipped' as a results name, + and to support having multiple skips in the same parser, the value returned is + a list of all skipped text. """ if other is Ellipsis: return _PendingSkip(self) @@ -2065,7 +2066,7 @@ class ParserElement(object): Implementation of + operator when left operand is not a :class:`ParserElement` """ if other is Ellipsis: - return SkipTo(self)("_skipped") + self + return SkipTo(self)("_skipped*") + self if isinstance(other, basestring): other = ParserElement._literalStringClass(other) @@ -2669,14 +2670,15 @@ class _PendingSkip(ParserElement): self.must_skip = must_skip def __add__(self, other): - skipper = SkipTo(other).setName("...")("_skipped") + skipper = SkipTo(other).setName("...")("_skipped*") if self.must_skip: def must_skip(t): - if not t._skipped: + if not t._skipped or t._skipped.asList() == ['']: del t[0] t.pop("_skipped", None) def show_skip(t): - if not t._skipped: + if t._skipped.asList()[-1:] == ['']: + skipped = t.pop('_skipped') t['_skipped'] = 'missing <' + repr(self.anchor) + '>' return (self.anchor + skipper().addParseAction(must_skip) | skipper().addParseAction(show_skip)) + other @@ -3842,7 +3844,7 @@ class And(ParseExpression): if expr is Ellipsis: if i < len(exprs)-1: skipto_arg = (Empty() + exprs[i+1]).exprs[-1] - tmp.append(SkipTo(skipto_arg)("_skipped")) + tmp.append(SkipTo(skipto_arg)("_skipped*")) else: raise Exception("cannot construct And with sequence ending in ...") else: diff --git a/unitTests.py b/unitTests.py index 2bbb45a..b024b14 100644 --- a/unitTests.py +++ b/unitTests.py @@ -1049,42 +1049,53 @@ class SkipToParserTests(ParseTestCase): # ellipses for SkipTo # (use eval() to avoid syntax problems when running in Py2) e = define_expr('... + Literal("end")') - test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '}) + test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': ['start 123 ']}) e = define_expr('Literal("start") + ... + Literal("end")') - test(e, "start 123 end", ['start', '123 ', 'end'], {'_skipped': '123 '}) + test(e, "start 123 end", ['start', '123 ', 'end'], {'_skipped': ['123 ']}) e = define_expr('Literal("start") + ...') test(e, "start 123 end", None, None) e = define_expr('And(["start", ..., "end"])') - test(e, "start 123 end", ['start', '123 ', 'end'], {'_skipped': '123 '}) + test(e, "start 123 end", ['start', '123 ', 'end'], {'_skipped': ['123 ']}) e = define_expr('And([..., "end"])') - test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '}) + test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': ['start 123 ']}) e = define_expr('"start" + (num_word | ...) + "end"') test(e, "start 456 end", ['start', '456', 'end'], {}) - test(e, "start 123 456 end", ['start', '123', '456 ', 'end'], {'_skipped': '456 '}) - test(e, "start end", ['start', '', 'end'], {'_skipped': 'missing <int>'}) + test(e, "start 123 456 end", ['start', '123', '456 ', 'end'], {'_skipped': ['456 ']}) + test(e, "start end", ['start', '', 'end'], {'_skipped': ['missing <int>']}) + + # e = define_expr('"start" + (num_word | ...)("inner") + "end"') + # test(e, "start 456 end", ['start', '456', 'end'], {'inner': '456'}) e = define_expr('"start" + (alpha_word[0, ...] & num_word[0, ...] | ...) + "end"') test(e, "start 456 red end", ['start', '456', 'red', 'end'], {}) test(e, "start red 456 end", ['start', 'red', '456', 'end'], {}) - test(e, "start 456 red + end", ['start', '456', 'red', '+ ', 'end'], {'_skipped': '+ '}) + test(e, "start 456 red + end", ['start', '456', 'red', '+ ', 'end'], {'_skipped': ['+ ']}) test(e, "start red end", ['start', 'red', 'end'], {}) test(e, "start 456 end", ['start', '456', 'end'], {}) test(e, "start end", ['start', 'end'], {}) - test(e, "start 456 + end", ['start', '456', '+ ', 'end'], {'_skipped': '+ '}) + test(e, "start 456 + end", ['start', '456', '+ ', 'end'], {'_skipped': ['+ ']}) e = define_expr('"start" + (alpha_word[...] & num_word[...] | ...) + "end"') test(e, "start 456 red end", ['start', '456', 'red', 'end'], {}) test(e, "start red 456 end", ['start', 'red', '456', 'end'], {}) - test(e, "start 456 red + end", ['start', '456', 'red', '+ ', 'end'], {'_skipped': '+ '}) - test(e, "start red end", ['start', 'red ', 'end'], {'_skipped': 'red '}) - test(e, "start 456 end", ['start', '456 ', 'end'], {'_skipped': '456 '}) - test(e, "start end", ['start', '', 'end'], {'_skipped': 'missing <{{alpha}... & {int}...}>'}) - test(e, "start 456 + end", ['start', '456 + ', 'end'], {'_skipped': '456 + '}) + test(e, "start 456 red + end", ['start', '456', 'red', '+ ', 'end'], {'_skipped': ['+ ']}) + test(e, "start red end", ['start', 'red ', 'end'], {'_skipped': ['red ']}) + test(e, "start 456 end", ['start', '456 ', 'end'], {'_skipped': ['456 ']}) + test(e, "start end", ['start', '', 'end'], {'_skipped': ['missing <{{alpha}... & {int}...}>']}) + test(e, "start 456 + end", ['start', '456 + ', 'end'], {'_skipped': ['456 + ']}) + + e = define_expr('"start" + (alpha_word | ...) + (num_word | ...) + "end"') + test(e, "start red 456 end", ['start', 'red', '456', 'end'], {}) + test(e, "start red end", ['start', 'red', '', 'end'], {'_skipped': ['missing <int>']}) + test(e, "start end", ['start', '', '', 'end'], {'_skipped': ['missing <alpha>', 'missing <int>']}) + + e = define_expr('Literal("start") + ... + "+" + ... + "end"') + test(e, "start red + 456 end", ['start', 'red ', '+', '456 ', 'end'], {'_skipped': ['red ', '456 ']}) class CustomQuotesTest(ParseTestCase): @@ -4661,6 +4672,7 @@ if __name__ == '__main__': # run specific tests by including them in this list, otherwise # all tests will be run testclasses = [ + SkipToParserTests ] if not testclasses: |