summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-07-06 00:34:19 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-07-06 00:34:19 -0500
commitffee388a149836b1f8d128bc953c1b0363cee82b (patch)
tree8811ad21cdb6b09c0aecc42328ebd2b6f45341f0
parent8b73519b483f155cfee69e36357833310ffe9dd6 (diff)
downloadpyparsing-git-ffee388a149836b1f8d128bc953c1b0363cee82b.tar.gz
Add support for multiple '...' skips in a single expression; `_skippped` results name will always return a list of skipped items
-rw-r--r--CHANGES15
-rw-r--r--examples/nested_markup.py9
-rw-r--r--pyparsing.py18
-rw-r--r--unitTests.py38
4 files changed, 49 insertions, 31 deletions
diff --git a/CHANGES b/CHANGES
index 1b7f33b..cee1b99 100644
--- a/CHANGES
+++ b/CHANGES
@@ -27,11 +27,12 @@ Version 2.4.1 -
are both equivalent to:
- Literal('start') + SkipTo('end')("_skipped") + Literal('end')
+ Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
The '...' form has the added benefit of not requiring repeating
the skip target expression. Note that the skipped text is
- returned with '_skipped' as a results name.
+ returned with '_skipped' as a results name, and that the contents of
+ `_skipped` will contain a list of text from all `...`s in the expression.
'...' can also be used as a "skip forward in case of error" expression:
@@ -42,18 +43,20 @@ Version 2.4.1 -
expr.parseString("start 456 foo 789 end")
['start', '456', 'foo 789 ', 'end']
- - _skipped: 'foo 789 '
+ - _skipped: ['foo 789 ']
expr.parseString("start foo end")
['start', 'foo ', 'end']
- - _skipped: 'foo '
+ - _skipped: ['foo ']
expr.parseString("start end")
['start', '', 'end']
- - _skipped: 'missing <int>'
+ - _skipped: ['missing <int>']
Note that in all the error cases, the '_skipped' results name is
- present, show the extra or missing items.
+ present, showing a list of the extra or missing items.
+
+ This form is only valid when used with the '|' operator.
- While investigating issue #93, I found that Or and
addCondition could interact to select an alternative that
diff --git a/examples/nested_markup.py b/examples/nested_markup.py
index 40267e6..6d83636 100644
--- a/examples/nested_markup.py
+++ b/examples/nested_markup.py
@@ -27,11 +27,12 @@ bolded = ('bold' + markup_body).setParseAction(convert_markup_to_html("<B>", "</
# another markup and parse action to parse links - again using transform string
# to recursively parse any markup in the link text
def convert_link_to_html(s, l, t):
- t['link_text'] = wiki_markup.transformString(t['link_text'])
+ link_text, url = t._skipped
+ t['link_text'] = wiki_markup.transformString(link_text)
+ t['url'] = url
return '<A href="{url}">{link_text}</A>'.format_map(t)
-urlRef = ('link'
- + '{' + pp.SkipTo('->')('link_text') + '->' + pp.SkipTo('}')('url') + '}'
- ).setParseAction(convert_link_to_html)
+
+urlRef = (pp.Keyword('link') + '{' + ... + '->' + ... + '}').setParseAction(convert_link_to_html)
# now inject all the markup bits as possible markup expressions
wiki_markup <<= urlRef | italicized | bolded
diff --git a/pyparsing.py b/pyparsing.py
index 0f6d499..3d42289 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -2044,10 +2044,11 @@ class ParserElement(object):
is equivalent to:
- Literal('start') + SkipTo('end')("_skipped") + Literal('end')
-
- Note that the skipped text is returned with '_skipped' as a results name.
+ Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
+ Note that the skipped text is returned with '_skipped' as a results name,
+ and to support having multiple skips in the same parser, the value returned is
+ a list of all skipped text.
"""
if other is Ellipsis:
return _PendingSkip(self)
@@ -2065,7 +2066,7 @@ class ParserElement(object):
Implementation of + operator when left operand is not a :class:`ParserElement`
"""
if other is Ellipsis:
- return SkipTo(self)("_skipped") + self
+ return SkipTo(self)("_skipped*") + self
if isinstance(other, basestring):
other = ParserElement._literalStringClass(other)
@@ -2669,14 +2670,15 @@ class _PendingSkip(ParserElement):
self.must_skip = must_skip
def __add__(self, other):
- skipper = SkipTo(other).setName("...")("_skipped")
+ skipper = SkipTo(other).setName("...")("_skipped*")
if self.must_skip:
def must_skip(t):
- if not t._skipped:
+ if not t._skipped or t._skipped.asList() == ['']:
del t[0]
t.pop("_skipped", None)
def show_skip(t):
- if not t._skipped:
+ if t._skipped.asList()[-1:] == ['']:
+ skipped = t.pop('_skipped')
t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
return (self.anchor + skipper().addParseAction(must_skip)
| skipper().addParseAction(show_skip)) + other
@@ -3842,7 +3844,7 @@ class And(ParseExpression):
if expr is Ellipsis:
if i < len(exprs)-1:
skipto_arg = (Empty() + exprs[i+1]).exprs[-1]
- tmp.append(SkipTo(skipto_arg)("_skipped"))
+ tmp.append(SkipTo(skipto_arg)("_skipped*"))
else:
raise Exception("cannot construct And with sequence ending in ...")
else:
diff --git a/unitTests.py b/unitTests.py
index 2bbb45a..b024b14 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -1049,42 +1049,53 @@ class SkipToParserTests(ParseTestCase):
# ellipses for SkipTo
# (use eval() to avoid syntax problems when running in Py2)
e = define_expr('... + Literal("end")')
- test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '})
+ test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': ['start 123 ']})
e = define_expr('Literal("start") + ... + Literal("end")')
- test(e, "start 123 end", ['start', '123 ', 'end'], {'_skipped': '123 '})
+ test(e, "start 123 end", ['start', '123 ', 'end'], {'_skipped': ['123 ']})
e = define_expr('Literal("start") + ...')
test(e, "start 123 end", None, None)
e = define_expr('And(["start", ..., "end"])')
- test(e, "start 123 end", ['start', '123 ', 'end'], {'_skipped': '123 '})
+ test(e, "start 123 end", ['start', '123 ', 'end'], {'_skipped': ['123 ']})
e = define_expr('And([..., "end"])')
- test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '})
+ test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': ['start 123 ']})
e = define_expr('"start" + (num_word | ...) + "end"')
test(e, "start 456 end", ['start', '456', 'end'], {})
- test(e, "start 123 456 end", ['start', '123', '456 ', 'end'], {'_skipped': '456 '})
- test(e, "start end", ['start', '', 'end'], {'_skipped': 'missing <int>'})
+ test(e, "start 123 456 end", ['start', '123', '456 ', 'end'], {'_skipped': ['456 ']})
+ test(e, "start end", ['start', '', 'end'], {'_skipped': ['missing <int>']})
+
+ # e = define_expr('"start" + (num_word | ...)("inner") + "end"')
+ # test(e, "start 456 end", ['start', '456', 'end'], {'inner': '456'})
e = define_expr('"start" + (alpha_word[0, ...] & num_word[0, ...] | ...) + "end"')
test(e, "start 456 red end", ['start', '456', 'red', 'end'], {})
test(e, "start red 456 end", ['start', 'red', '456', 'end'], {})
- test(e, "start 456 red + end", ['start', '456', 'red', '+ ', 'end'], {'_skipped': '+ '})
+ test(e, "start 456 red + end", ['start', '456', 'red', '+ ', 'end'], {'_skipped': ['+ ']})
test(e, "start red end", ['start', 'red', 'end'], {})
test(e, "start 456 end", ['start', '456', 'end'], {})
test(e, "start end", ['start', 'end'], {})
- test(e, "start 456 + end", ['start', '456', '+ ', 'end'], {'_skipped': '+ '})
+ test(e, "start 456 + end", ['start', '456', '+ ', 'end'], {'_skipped': ['+ ']})
e = define_expr('"start" + (alpha_word[...] & num_word[...] | ...) + "end"')
test(e, "start 456 red end", ['start', '456', 'red', 'end'], {})
test(e, "start red 456 end", ['start', 'red', '456', 'end'], {})
- test(e, "start 456 red + end", ['start', '456', 'red', '+ ', 'end'], {'_skipped': '+ '})
- test(e, "start red end", ['start', 'red ', 'end'], {'_skipped': 'red '})
- test(e, "start 456 end", ['start', '456 ', 'end'], {'_skipped': '456 '})
- test(e, "start end", ['start', '', 'end'], {'_skipped': 'missing <{{alpha}... & {int}...}>'})
- test(e, "start 456 + end", ['start', '456 + ', 'end'], {'_skipped': '456 + '})
+ test(e, "start 456 red + end", ['start', '456', 'red', '+ ', 'end'], {'_skipped': ['+ ']})
+ test(e, "start red end", ['start', 'red ', 'end'], {'_skipped': ['red ']})
+ test(e, "start 456 end", ['start', '456 ', 'end'], {'_skipped': ['456 ']})
+ test(e, "start end", ['start', '', 'end'], {'_skipped': ['missing <{{alpha}... & {int}...}>']})
+ test(e, "start 456 + end", ['start', '456 + ', 'end'], {'_skipped': ['456 + ']})
+
+ e = define_expr('"start" + (alpha_word | ...) + (num_word | ...) + "end"')
+ test(e, "start red 456 end", ['start', 'red', '456', 'end'], {})
+ test(e, "start red end", ['start', 'red', '', 'end'], {'_skipped': ['missing <int>']})
+ test(e, "start end", ['start', '', '', 'end'], {'_skipped': ['missing <alpha>', 'missing <int>']})
+
+ e = define_expr('Literal("start") + ... + "+" + ... + "end"')
+ test(e, "start red + 456 end", ['start', 'red ', '+', '456 ', 'end'], {'_skipped': ['red ', '456 ']})
class CustomQuotesTest(ParseTestCase):
@@ -4661,6 +4672,7 @@ if __name__ == '__main__':
# run specific tests by including them in this list, otherwise
# all tests will be run
testclasses = [
+ SkipToParserTests
]
if not testclasses: