summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2019-07-05 18:27:55 -0500
committerptmcg <ptmcg@austin.rr.com>2019-07-05 18:27:55 -0500
commitbafd437305c82f064940314574ef8071dc641429 (patch)
tree3e13d4b23dd6aa50c751d78ff9332057381662aa
parentc5c4ca6438ec0114c187beb6b8a2298b41eb9fe2 (diff)
downloadpyparsing-git-bafd437305c82f064940314574ef8071dc641429.tar.gz
Add support for "(expr | ...)" as a form of "on-error, skip" notation
-rw-r--r--CHANGES25
-rw-r--r--pyparsing.py139
-rw-r--r--unitTests.py19
3 files changed, 126 insertions, 57 deletions
diff --git a/CHANGES b/CHANGES
index eb5ed44..1b7f33b 100644
--- a/CHANGES
+++ b/CHANGES
@@ -33,6 +33,28 @@ Version 2.4.1 -
the skip target expression. Note that the skipped text is
returned with '_skipped' as a results name.
+ '...' can also be used as a "skip forward in case of error" expression:
+
+ expr = "start" + (Word(nums).setName("int") | ...) + "end"
+
+ expr.parseString("start 456 end")
+ ['start', '456', 'end']
+
+ expr.parseString("start 456 foo 789 end")
+ ['start', '456', 'foo 789 ', 'end']
+ - _skipped: 'foo 789 '
+
+ expr.parseString("start foo end")
+ ['start', 'foo ', 'end']
+ - _skipped: 'foo '
+
+ expr.parseString("start end")
+ ['start', '', 'end']
+ - _skipped: 'missing <int>'
+
+ Note that in all the error cases, the '_skipped' results name is
+ present, show the extra or missing items.
+
- While investigating issue #93, I found that Or and
addCondition could interact to select an alternative that
is not the longest match. This is because Or first checks
@@ -51,6 +73,9 @@ Version 2.4.1 -
example, which makes the bug-fixing process a lot easier,
thanks!
+- Fixed MemoryError issue #85 and #91 with str generation for
+ Forwards. Thanks decalage2 and Harmon758 for your patience.
+
- Modified setParseAction to accept None as an argument,
indicating that all previously-defined parse actions for the
expression should be cleared.
diff --git a/pyparsing.py b/pyparsing.py
index febefc2..0f6d499 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -96,7 +96,7 @@ classes inherit from. Use the docstrings for examples of how to:
"""
__version__ = "2.4.1"
-__versionTime__ = "05 Jul 2019 15:20 UTC"
+__versionTime__ = "05 Jul 2019 23:23 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -1248,6 +1248,7 @@ def _trim_arity(func, maxargs=2):
return wrapper
+
class ParserElement(object):
"""Abstract base level parser element class."""
DEFAULT_WHITE_CHARS = " \n\t\r"
@@ -2048,24 +2049,9 @@ class ParserElement(object):
Note that the skipped text is returned with '_skipped' as a results name.
"""
-
- class _PendingSkip(ParserElement):
- # internal placeholder class to hold a place were '...' is added to a parser element,
- # once another ParserElement is added, this placeholder will be replaced with a
- # SkipTo
- def __init__(self, expr):
- super(_PendingSkip, self).__init__()
- self.name = str(expr + '').replace('""', '...')
- self.expr = expr
-
- def __add__(self, other):
- return self.expr + SkipTo(other)("_skipped") + other
-
- def parseImpl(self, *args):
- raise Exception("use of `...` expression without following SkipTo target expression")
-
if other is Ellipsis:
return _PendingSkip(self)
+
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -2113,43 +2099,6 @@ class ParserElement(object):
return None
return other - self
- def __getitem__(self, key):
- """
- use ``[]`` indexing notation as a short form for expression repetition:
- - ``expr[n]`` is equivalent to ``expr*n``
- - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
- - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
- to ``expr*n + ZeroOrMore(expr)``
- (read as "at least n instances of ``expr``")
- - ``expr[..., n]`` is equivalent to ``expr*(0,n)``
- (read as "0 to n instances of ``expr``")
- - ``expr[0, ...]`` is equivalent to ``ZeroOrMore(expr)``
- - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
- - ``expr[...]`` is equivalent to ``OneOrMore(expr)``
- ``None`` may be used in place of ``...``.
-
- Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
- if more than ``n`` ``expr``s exist in the input stream. If this behavior is
- desired, then write ``expr[..., n] + ~expr``.
- """
-
- # convert single arg keys to tuples
- try:
- if isinstance(key, str):
- key = (key,)
- iter(key)
- except TypeError:
- key = (key,)
-
- if len(key) > 2:
- warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
- '... [{0}]'.format(len(key))
- if len(key) > 5 else ''))
-
- # clip to 2 elements
- ret = self * tuple(key[:2])
- return ret
-
def __mul__(self,other):
"""
Implementation of * operator, allows use of ``expr * 3`` in place of
@@ -2228,6 +2177,9 @@ class ParserElement(object):
"""
Implementation of | operator - returns :class:`MatchFirst`
"""
+ if other is Ellipsis:
+ return _PendingSkip(self, must_skip=True)
+
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -2302,6 +2254,43 @@ class ParserElement(object):
"""
return NotAny( self )
+ def __getitem__(self, key):
+ """
+ use ``[]`` indexing notation as a short form for expression repetition:
+ - ``expr[n]`` is equivalent to ``expr*n``
+ - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
+ - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
+ to ``expr*n + ZeroOrMore(expr)``
+ (read as "at least n instances of ``expr``")
+ - ``expr[..., n]`` is equivalent to ``expr*(0,n)``
+ (read as "0 to n instances of ``expr``")
+ - ``expr[0, ...]`` is equivalent to ``ZeroOrMore(expr)``
+ - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
+ - ``expr[...]`` is equivalent to ``OneOrMore(expr)``
+ ``None`` may be used in place of ``...``.
+
+ Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
+ if more than ``n`` ``expr``s exist in the input stream. If this behavior is
+ desired, then write ``expr[..., n] + ~expr``.
+ """
+
+ # convert single arg keys to tuples
+ try:
+ if isinstance(key, str):
+ key = (key,)
+ iter(key)
+ except TypeError:
+ key = (key,)
+
+ if len(key) > 2:
+ warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
+ '... [{0}]'.format(len(key))
+ if len(key) > 5 else ''))
+
+ # clip to 2 elements
+ ret = self * tuple(key[:2])
+ return ret
+
def __call__(self, name=None):
"""
Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
@@ -2669,6 +2658,38 @@ class ParserElement(object):
return success, allResults
+class _PendingSkip(ParserElement):
+ # internal placeholder class to hold a place were '...' is added to a parser element,
+ # once another ParserElement is added, this placeholder will be replaced with a SkipTo
+ def __init__(self, expr, must_skip=False):
+ super(_PendingSkip, self).__init__()
+ self.strRepr = str(expr + Empty()).replace('Empty', '...')
+ self.name = self.strRepr
+ self.anchor = expr
+ self.must_skip = must_skip
+
+ def __add__(self, other):
+ skipper = SkipTo(other).setName("...")("_skipped")
+ if self.must_skip:
+ def must_skip(t):
+ if not t._skipped:
+ del t[0]
+ t.pop("_skipped", None)
+ def show_skip(t):
+ if not t._skipped:
+ t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
+ return (self.anchor + skipper().addParseAction(must_skip)
+ | skipper().addParseAction(show_skip)) + other
+
+ return self.anchor + skipper + other
+
+ def __repr__(self):
+ return self.strRepr
+
+ def parseImpl(self, *args):
+ raise Exception("use of `...` expression without following SkipTo target expression")
+
+
class Token(ParserElement):
"""Abstract :class:`ParserElement` subclass, for defining atomic
matching patterns.
@@ -3834,6 +3855,16 @@ class And(ParseExpression):
self.callPreparse = True
def streamline(self):
+ # collapse any _PendingSkip's
+ if any(isinstance(e, ParseExpression) and isinstance(e.exprs[-1], _PendingSkip) for e in self.exprs[:-1]):
+ for i, e in enumerate(self.exprs[:-1]):
+ if e is None:
+ continue
+ if (isinstance(e, ParseExpression) and isinstance(e.exprs[-1], _PendingSkip)):
+ e.exprs[-1] = e.exprs[-1] + self.exprs[i+1]
+ self.exprs[i+1] = None
+ self.exprs = [e for e in self.exprs if e is not None]
+
super(And, self).streamline()
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
return self
diff --git a/unitTests.py b/unitTests.py
index 6411350..71b9e8b 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -993,7 +993,7 @@ class ReStringRangeTest(ParseTestCase):
class SkipToParserTests(ParseTestCase):
def runTest(self):
- from pyparsing import Literal, SkipTo, cStyleComment, ParseBaseException, And
+ from pyparsing import Literal, SkipTo, cStyleComment, ParseBaseException, And, Word, nums, Optional
thingToFind = Literal('working')
testExpr = SkipTo(Literal(';'), include=True, ignore=cStyleComment) + thingToFind
@@ -1029,9 +1029,9 @@ class SkipToParserTests(ParseTestCase):
def test(expr, test_string, expected_list, expected_dict):
try:
- result = expr.parseString("start 123 end")
+ result = expr.parseString(test_string)
except Exception as pe:
- if expected_list is not None:
+ if any(expected is not None for expected in (expected_list, expected_dict)):
self.assertTrue(False, "{} failed to parse {!r}".format(expr, test_string))
else:
self.assertEqual(result.asList(), expected_list)
@@ -1056,6 +1056,19 @@ class SkipToParserTests(ParseTestCase):
e = eval('And([..., "end"])') #, globals(), locals())
print_(e)
test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '})
+ f = eval('"start" + (Word(nums).setName("int") | ...) + "end"')
+ f.streamline()
+ print_(f)
+ test(f, "start 456 end", ['start', '456', 'end'], {})
+ test(f, "start 123 456 end", ['start', '123', '456 ', 'end'], {'_skipped': '456 '})
+ test(f, "start end", ['start', '', 'end'], {'_skipped': 'missing <int>'})
+ g = eval('"start" + (Optional(Word(nums).setName("int")) | ...) + "end"')
+ g.streamline()
+ print_(g)
+ test(g, "start 456 end", ['start', '456', 'end'], {})
+ test(g, "start 123 456 end", ['start', '123', '456 ', 'end'], {'_skipped': '456 '})
+ test(g, "start foo end", ['start', 'foo ', 'end'], {'_skipped': 'foo '})
+ test(g, "start end", ['start', 'end'], {})
class CustomQuotesTest(ParseTestCase):