Add support for "(expr | ...)" as a form of "on-error, skip" notation

author: ptmcg <ptmcg@austin.rr.com> 2019-07-05 18:27:55 -0500
committer: ptmcg <ptmcg@austin.rr.com> 2019-07-05 18:27:55 -0500
commit: bafd437305c82f064940314574ef8071dc641429 (patch)
tree: 3e13d4b23dd6aa50c751d78ff9332057381662aa
parent: c5c4ca6438ec0114c187beb6b8a2298b41eb9fe2 (diff)
download: pyparsing-git-bafd437305c82f064940314574ef8071dc641429.tar.gz
3 files changed, 126 insertions, 57 deletions
diff --git a/CHANGES b/CHANGES
index eb5ed44..1b7f33b 100644
--- a/CHANGES
+++ b/CHANGES
@@ -33,6 +33,28 @@ Version 2.4.1 -
   the skip target expression. Note that the skipped text is
   returned with '_skipped' as a results name.
 
+  '...' can also be used as a "skip forward in case of error" expression:
+
+        expr = "start" + (Word(nums).setName("int") | ...) + "end"
+
+        expr.parseString("start 456 end")
+        ['start', '456', 'end']
+
+        expr.parseString("start 456 foo 789 end")
+        ['start', '456', 'foo 789 ', 'end']
+        - _skipped: 'foo 789 '
+
+        expr.parseString("start foo end")
+        ['start', 'foo ', 'end']
+        - _skipped: 'foo '
+
+        expr.parseString("start end")
+        ['start', '', 'end']
+        - _skipped: 'missing <int>'
+
+  Note that in all the error cases, the '_skipped' results name is
+  present, show the extra or missing items.
+
 - While investigating issue #93, I found that Or and
   addCondition could interact to select an alternative that
   is not the longest match. This is because Or first checks
@@ -51,6 +73,9 @@ Version 2.4.1 -
   example, which makes the bug-fixing process a lot easier,
   thanks!
 
+- Fixed MemoryError issue #85 and #91 with str generation for
+  Forwards. Thanks decalage2 and Harmon758 for your patience.
+
 - Modified setParseAction to accept None as an argument,
   indicating that all previously-defined parse actions for the
   expression should be cleared.
diff --git a/pyparsing.py b/pyparsing.py
index febefc2..0f6d499 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -96,7 +96,7 @@ classes inherit from. Use the docstrings for examples of how to:
 """
 
 __version__ = "2.4.1"
-__versionTime__ = "05 Jul 2019 15:20 UTC"
+__versionTime__ = "05 Jul 2019 23:23 UTC"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
@@ -1248,6 +1248,7 @@ def _trim_arity(func, maxargs=2):
 
     return wrapper
 
+
 class ParserElement(object):
     """Abstract base level parser element class."""
     DEFAULT_WHITE_CHARS = " \n\t\r"
@@ -2048,24 +2049,9 @@ class ParserElement(object):
         Note that the skipped text is returned with '_skipped' as a results name.
 
         """
-
-        class _PendingSkip(ParserElement):
-            # internal placeholder class to hold a place were '...' is added to a parser element,
-            # once another ParserElement is added, this placeholder will be replaced with a
-            # SkipTo
-            def __init__(self, expr):
-                super(_PendingSkip, self).__init__()
-                self.name = str(expr + '').replace('""', '...')
-                self.expr = expr
-
-            def __add__(self, other):
-                return self.expr + SkipTo(other)("_skipped") + other
-
-            def parseImpl(self, *args):
-                raise Exception("use of `...` expression without following SkipTo target expression")
-
         if other is Ellipsis:
             return _PendingSkip(self)
+
         if isinstance( other, basestring ):
             other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
@@ -2113,43 +2099,6 @@ class ParserElement(object):
             return None
         return other - self
 
-    def __getitem__(self, key):
-        """
-        use ``[]`` indexing notation as a short form for expression repetition:
-         - ``expr[n]`` is equivalent to ``expr*n``
-         - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
-         - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
-              to ``expr*n + ZeroOrMore(expr)``
-              (read as "at least n instances of ``expr``")
-         - ``expr[..., n]`` is equivalent to ``expr*(0,n)``
-              (read as "0 to n instances of ``expr``")
-         - ``expr[0, ...]`` is equivalent to ``ZeroOrMore(expr)``
-         - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
-         - ``expr[...]`` is equivalent to ``OneOrMore(expr)``
-         ``None`` may be used in place of ``...``.
-
-        Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
-        if more than ``n`` ``expr``s exist in the input stream.  If this behavior is
-        desired, then write ``expr[..., n] + ~expr``.
-       """
-
-        # convert single arg keys to tuples
-        try:
-            if isinstance(key, str):
-                key = (key,)
-            iter(key)
-        except TypeError:
-            key = (key,)
-
-        if len(key) > 2:
-            warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
-                                                                                '... [{0}]'.format(len(key))
-                                                                                if len(key) > 5 else ''))
-
-        # clip to 2 elements
-        ret = self * tuple(key[:2])
-        return ret
-
     def __mul__(self,other):
         """
         Implementation of * operator, allows use of ``expr * 3`` in place of
@@ -2228,6 +2177,9 @@ class ParserElement(object):
         """
         Implementation of | operator - returns :class:`MatchFirst`
         """
+        if other is Ellipsis:
+            return _PendingSkip(self, must_skip=True)
+
         if isinstance( other, basestring ):
             other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
@@ -2302,6 +2254,43 @@ class ParserElement(object):
         """
         return NotAny( self )
 
+    def __getitem__(self, key):
+        """
+        use ``[]`` indexing notation as a short form for expression repetition:
+         - ``expr[n]`` is equivalent to ``expr*n``
+         - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
+         - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
+              to ``expr*n + ZeroOrMore(expr)``
+              (read as "at least n instances of ``expr``")
+         - ``expr[..., n]`` is equivalent to ``expr*(0,n)``
+              (read as "0 to n instances of ``expr``")
+         - ``expr[0, ...]`` is equivalent to ``ZeroOrMore(expr)``
+         - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
+         - ``expr[...]`` is equivalent to ``OneOrMore(expr)``
+         ``None`` may be used in place of ``...``.
+
+        Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
+        if more than ``n`` ``expr``s exist in the input stream.  If this behavior is
+        desired, then write ``expr[..., n] + ~expr``.
+       """
+
+        # convert single arg keys to tuples
+        try:
+            if isinstance(key, str):
+                key = (key,)
+            iter(key)
+        except TypeError:
+            key = (key,)
+
+        if len(key) > 2:
+            warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
+                                                                                '... [{0}]'.format(len(key))
+                                                                                if len(key) > 5 else ''))
+
+        # clip to 2 elements
+        ret = self * tuple(key[:2])
+        return ret
+
     def __call__(self, name=None):
         """
         Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
@@ -2669,6 +2658,38 @@ class ParserElement(object):
         return success, allResults
 
 
+class _PendingSkip(ParserElement):
+    # internal placeholder class to hold a place were '...' is added to a parser element,
+    # once another ParserElement is added, this placeholder will be replaced with a SkipTo
+    def __init__(self, expr, must_skip=False):
+        super(_PendingSkip, self).__init__()
+        self.strRepr = str(expr + Empty()).replace('Empty', '...')
+        self.name = self.strRepr
+        self.anchor = expr
+        self.must_skip = must_skip
+
+    def __add__(self, other):
+        skipper = SkipTo(other).setName("...")("_skipped")
+        if self.must_skip:
+            def must_skip(t):
+                if not t._skipped:
+                    del t[0]
+                    t.pop("_skipped", None)
+            def show_skip(t):
+                if not t._skipped:
+                    t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
+            return (self.anchor + skipper().addParseAction(must_skip)
+                    | skipper().addParseAction(show_skip)) + other
+
+        return self.anchor + skipper + other
+
+    def __repr__(self):
+        return self.strRepr
+
+    def parseImpl(self, *args):
+        raise Exception("use of `...` expression without following SkipTo target expression")
+
+
 class Token(ParserElement):
     """Abstract :class:`ParserElement` subclass, for defining atomic
     matching patterns.
@@ -3834,6 +3855,16 @@ class And(ParseExpression):
         self.callPreparse = True
 
     def streamline(self):
+        # collapse any _PendingSkip's
+        if any(isinstance(e, ParseExpression) and isinstance(e.exprs[-1], _PendingSkip) for e in self.exprs[:-1]):
+            for i, e in enumerate(self.exprs[:-1]):
+                if e is None:
+                    continue
+                if (isinstance(e, ParseExpression) and isinstance(e.exprs[-1], _PendingSkip)):
+                    e.exprs[-1] = e.exprs[-1] + self.exprs[i+1]
+                    self.exprs[i+1] = None
+            self.exprs = [e for e in self.exprs if e is not None]
+
         super(And, self).streamline()
         self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
         return self
diff --git a/unitTests.py b/unitTests.py
index 6411350..71b9e8b 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -993,7 +993,7 @@ class ReStringRangeTest(ParseTestCase):
 class SkipToParserTests(ParseTestCase):
     def runTest(self):
 
-        from pyparsing import Literal, SkipTo, cStyleComment, ParseBaseException, And
+        from pyparsing import Literal, SkipTo, cStyleComment, ParseBaseException, And, Word, nums, Optional
 
         thingToFind = Literal('working')
         testExpr = SkipTo(Literal(';'), include=True, ignore=cStyleComment) + thingToFind
@@ -1029,9 +1029,9 @@ class SkipToParserTests(ParseTestCase):
             def test(expr, test_string, expected_list, expected_dict):
 
                 try:
-                    result = expr.parseString("start 123 end")
+                    result = expr.parseString(test_string)
                 except Exception as pe:
-                    if expected_list is not None:
+                    if any(expected is not None for expected in (expected_list, expected_dict)):
                         self.assertTrue(False, "{} failed to parse {!r}".format(expr, test_string))
                 else:
                     self.assertEqual(result.asList(), expected_list)
@@ -1056,6 +1056,19 @@ class SkipToParserTests(ParseTestCase):
             e = eval('And([..., "end"])') #, globals(), locals())
             print_(e)
             test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '})
+            f = eval('"start" + (Word(nums).setName("int") | ...) + "end"')
+            f.streamline()
+            print_(f)
+            test(f, "start 456 end", ['start', '456', 'end'], {})
+            test(f, "start 123 456 end", ['start', '123', '456 ', 'end'], {'_skipped': '456 '})
+            test(f, "start end", ['start', '', 'end'], {'_skipped': 'missing <int>'})
+            g = eval('"start" + (Optional(Word(nums).setName("int")) | ...) + "end"')
+            g.streamline()
+            print_(g)
+            test(g, "start 456 end", ['start', '456', 'end'], {})
+            test(g, "start 123 456 end", ['start', '123', '456 ', 'end'], {'_skipped': '456 '})
+            test(g, "start foo end", ['start', 'foo ', 'end'], {'_skipped': 'foo '})
+            test(g, "start end", ['start', 'end'], {})
 
 
 class CustomQuotesTest(ParseTestCase):
author	ptmcg <ptmcg@austin.rr.com>	2019-07-05 18:27:55 -0500
committer	ptmcg <ptmcg@austin.rr.com>	2019-07-05 18:27:55 -0500
commit	bafd437305c82f064940314574ef8071dc641429 (patch)
tree	3e13d4b23dd6aa50c751d78ff9332057381662aa
parent	c5c4ca6438ec0114c187beb6b8a2298b41eb9fe2 (diff)
download	pyparsing-git-bafd437305c82f064940314574ef8071dc641429.tar.gz