diff options
author | ptmcg <ptmcg@austin.rr.com> | 2019-07-05 18:27:55 -0500 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2019-07-05 18:27:55 -0500 |
commit | bafd437305c82f064940314574ef8071dc641429 (patch) | |
tree | 3e13d4b23dd6aa50c751d78ff9332057381662aa | |
parent | c5c4ca6438ec0114c187beb6b8a2298b41eb9fe2 (diff) | |
download | pyparsing-git-bafd437305c82f064940314574ef8071dc641429.tar.gz |
Add support for "(expr | ...)" as a form of "on-error, skip" notation
-rw-r--r-- | CHANGES | 25 | ||||
-rw-r--r-- | pyparsing.py | 139 | ||||
-rw-r--r-- | unitTests.py | 19 |
3 files changed, 126 insertions, 57 deletions
@@ -33,6 +33,28 @@ Version 2.4.1 - the skip target expression. Note that the skipped text is returned with '_skipped' as a results name. + '...' can also be used as a "skip forward in case of error" expression: + + expr = "start" + (Word(nums).setName("int") | ...) + "end" + + expr.parseString("start 456 end") + ['start', '456', 'end'] + + expr.parseString("start 456 foo 789 end") + ['start', '456', 'foo 789 ', 'end'] + - _skipped: 'foo 789 ' + + expr.parseString("start foo end") + ['start', 'foo ', 'end'] + - _skipped: 'foo ' + + expr.parseString("start end") + ['start', '', 'end'] + - _skipped: 'missing <int>' + + Note that in all the error cases, the '_skipped' results name is + present, show the extra or missing items. + - While investigating issue #93, I found that Or and addCondition could interact to select an alternative that is not the longest match. This is because Or first checks @@ -51,6 +73,9 @@ Version 2.4.1 - example, which makes the bug-fixing process a lot easier, thanks! +- Fixed MemoryError issue #85 and #91 with str generation for + Forwards. Thanks decalage2 and Harmon758 for your patience. + - Modified setParseAction to accept None as an argument, indicating that all previously-defined parse actions for the expression should be cleared. diff --git a/pyparsing.py b/pyparsing.py index febefc2..0f6d499 100644 --- a/pyparsing.py +++ b/pyparsing.py @@ -96,7 +96,7 @@ classes inherit from. Use the docstrings for examples of how to: """ __version__ = "2.4.1" -__versionTime__ = "05 Jul 2019 15:20 UTC" +__versionTime__ = "05 Jul 2019 23:23 UTC" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" import string @@ -1248,6 +1248,7 @@ def _trim_arity(func, maxargs=2): return wrapper + class ParserElement(object): """Abstract base level parser element class.""" DEFAULT_WHITE_CHARS = " \n\t\r" @@ -2048,24 +2049,9 @@ class ParserElement(object): Note that the skipped text is returned with '_skipped' as a results name. """ - - class _PendingSkip(ParserElement): - # internal placeholder class to hold a place were '...' is added to a parser element, - # once another ParserElement is added, this placeholder will be replaced with a - # SkipTo - def __init__(self, expr): - super(_PendingSkip, self).__init__() - self.name = str(expr + '').replace('""', '...') - self.expr = expr - - def __add__(self, other): - return self.expr + SkipTo(other)("_skipped") + other - - def parseImpl(self, *args): - raise Exception("use of `...` expression without following SkipTo target expression") - if other is Ellipsis: return _PendingSkip(self) + if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -2113,43 +2099,6 @@ class ParserElement(object): return None return other - self - def __getitem__(self, key): - """ - use ``[]`` indexing notation as a short form for expression repetition: - - ``expr[n]`` is equivalent to ``expr*n`` - - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` - - ``expr[n, ...]`` or ``expr[n,]`` is equivalent - to ``expr*n + ZeroOrMore(expr)`` - (read as "at least n instances of ``expr``") - - ``expr[..., n]`` is equivalent to ``expr*(0,n)`` - (read as "0 to n instances of ``expr``") - - ``expr[0, ...]`` is equivalent to ``ZeroOrMore(expr)`` - - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` - - ``expr[...]`` is equivalent to ``OneOrMore(expr)`` - ``None`` may be used in place of ``...``. - - Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception - if more than ``n`` ``expr``s exist in the input stream. If this behavior is - desired, then write ``expr[..., n] + ~expr``. - """ - - # convert single arg keys to tuples - try: - if isinstance(key, str): - key = (key,) - iter(key) - except TypeError: - key = (key,) - - if len(key) > 2: - warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5], - '... [{0}]'.format(len(key)) - if len(key) > 5 else '')) - - # clip to 2 elements - ret = self * tuple(key[:2]) - return ret - def __mul__(self,other): """ Implementation of * operator, allows use of ``expr * 3`` in place of @@ -2228,6 +2177,9 @@ class ParserElement(object): """ Implementation of | operator - returns :class:`MatchFirst` """ + if other is Ellipsis: + return _PendingSkip(self, must_skip=True) + if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -2302,6 +2254,43 @@ class ParserElement(object): """ return NotAny( self ) + def __getitem__(self, key): + """ + use ``[]`` indexing notation as a short form for expression repetition: + - ``expr[n]`` is equivalent to ``expr*n`` + - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` + - ``expr[n, ...]`` or ``expr[n,]`` is equivalent + to ``expr*n + ZeroOrMore(expr)`` + (read as "at least n instances of ``expr``") + - ``expr[..., n]`` is equivalent to ``expr*(0,n)`` + (read as "0 to n instances of ``expr``") + - ``expr[0, ...]`` is equivalent to ``ZeroOrMore(expr)`` + - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` + - ``expr[...]`` is equivalent to ``OneOrMore(expr)`` + ``None`` may be used in place of ``...``. + + Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception + if more than ``n`` ``expr``s exist in the input stream. If this behavior is + desired, then write ``expr[..., n] + ~expr``. + """ + + # convert single arg keys to tuples + try: + if isinstance(key, str): + key = (key,) + iter(key) + except TypeError: + key = (key,) + + if len(key) > 2: + warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5], + '... [{0}]'.format(len(key)) + if len(key) > 5 else '')) + + # clip to 2 elements + ret = self * tuple(key[:2]) + return ret + def __call__(self, name=None): """ Shortcut for :class:`setResultsName`, with ``listAllMatches=False``. @@ -2669,6 +2658,38 @@ class ParserElement(object): return success, allResults +class _PendingSkip(ParserElement): + # internal placeholder class to hold a place were '...' is added to a parser element, + # once another ParserElement is added, this placeholder will be replaced with a SkipTo + def __init__(self, expr, must_skip=False): + super(_PendingSkip, self).__init__() + self.strRepr = str(expr + Empty()).replace('Empty', '...') + self.name = self.strRepr + self.anchor = expr + self.must_skip = must_skip + + def __add__(self, other): + skipper = SkipTo(other).setName("...")("_skipped") + if self.must_skip: + def must_skip(t): + if not t._skipped: + del t[0] + t.pop("_skipped", None) + def show_skip(t): + if not t._skipped: + t['_skipped'] = 'missing <' + repr(self.anchor) + '>' + return (self.anchor + skipper().addParseAction(must_skip) + | skipper().addParseAction(show_skip)) + other + + return self.anchor + skipper + other + + def __repr__(self): + return self.strRepr + + def parseImpl(self, *args): + raise Exception("use of `...` expression without following SkipTo target expression") + + class Token(ParserElement): """Abstract :class:`ParserElement` subclass, for defining atomic matching patterns. @@ -3834,6 +3855,16 @@ class And(ParseExpression): self.callPreparse = True def streamline(self): + # collapse any _PendingSkip's + if any(isinstance(e, ParseExpression) and isinstance(e.exprs[-1], _PendingSkip) for e in self.exprs[:-1]): + for i, e in enumerate(self.exprs[:-1]): + if e is None: + continue + if (isinstance(e, ParseExpression) and isinstance(e.exprs[-1], _PendingSkip)): + e.exprs[-1] = e.exprs[-1] + self.exprs[i+1] + self.exprs[i+1] = None + self.exprs = [e for e in self.exprs if e is not None] + super(And, self).streamline() self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) return self diff --git a/unitTests.py b/unitTests.py index 6411350..71b9e8b 100644 --- a/unitTests.py +++ b/unitTests.py @@ -993,7 +993,7 @@ class ReStringRangeTest(ParseTestCase): class SkipToParserTests(ParseTestCase): def runTest(self): - from pyparsing import Literal, SkipTo, cStyleComment, ParseBaseException, And + from pyparsing import Literal, SkipTo, cStyleComment, ParseBaseException, And, Word, nums, Optional thingToFind = Literal('working') testExpr = SkipTo(Literal(';'), include=True, ignore=cStyleComment) + thingToFind @@ -1029,9 +1029,9 @@ class SkipToParserTests(ParseTestCase): def test(expr, test_string, expected_list, expected_dict): try: - result = expr.parseString("start 123 end") + result = expr.parseString(test_string) except Exception as pe: - if expected_list is not None: + if any(expected is not None for expected in (expected_list, expected_dict)): self.assertTrue(False, "{} failed to parse {!r}".format(expr, test_string)) else: self.assertEqual(result.asList(), expected_list) @@ -1056,6 +1056,19 @@ class SkipToParserTests(ParseTestCase): e = eval('And([..., "end"])') #, globals(), locals()) print_(e) test(e, "start 123 end", ['start 123 ', 'end'], {'_skipped': 'start 123 '}) + f = eval('"start" + (Word(nums).setName("int") | ...) + "end"') + f.streamline() + print_(f) + test(f, "start 456 end", ['start', '456', 'end'], {}) + test(f, "start 123 456 end", ['start', '123', '456 ', 'end'], {'_skipped': '456 '}) + test(f, "start end", ['start', '', 'end'], {'_skipped': 'missing <int>'}) + g = eval('"start" + (Optional(Word(nums).setName("int")) | ...) + "end"') + g.streamline() + print_(g) + test(g, "start 456 end", ['start', '456', 'end'], {}) + test(g, "start 123 456 end", ['start', '123', '456 ', 'end'], {'_skipped': '456 '}) + test(g, "start foo end", ['start', 'foo ', 'end'], {'_skipped': 'foo '}) + test(g, "start end", ['start', 'end'], {}) class CustomQuotesTest(ParseTestCase): |