diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2019-07-02 16:32:22 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2019-07-02 16:32:22 -0500 |
commit | f20f8c038bebb81e7184ac87a6f13d5d81d3b495 (patch) | |
tree | 8bb5dcb34b8b87d15df62ee7e85adc331878290c | |
parent | f5de46966a55b8c651f7ff92440665af02567df4 (diff) | |
download | pyparsing-git-f20f8c038bebb81e7184ac87a6f13d5d81d3b495.tar.gz |
Issue #93 - interaction of Or and addCondition sometimes selects alternative that is not the longest
-rw-r--r-- | CHANGES | 13 | ||||
-rw-r--r-- | pyparsing.py | 69 | ||||
-rw-r--r-- | unitTests.py | 18 |
3 files changed, 76 insertions, 24 deletions
@@ -4,6 +4,19 @@ Change Log Version 2.4.1 - ---------------------- +- While investigating issue #93, I found that Or and + addCondition could interact to select an alternative that + is not the longest match. This is because Or first checks + all alternatives for matches without running attached + parse actions or conditions, orders by longest match, and + then rechecks for matches with conditions and parse actions. + Some expressions, when checking with conditions, may end + up matching on a shorter token list than originally matched, + but would be selected because of its original priority. + This matching code has been expanded to do more extensive + searching for matches when a second-pass check matches a + smaller list than in the first pass. + - Fixed issue #87, a regression in indented block. Reported by Renz Bagaporo, who submitted a very nice repro example, which makes the bug-fixing process a lot easier, diff --git a/pyparsing.py b/pyparsing.py index fe9b8cb..15b7c48 100644 --- a/pyparsing.py +++ b/pyparsing.py @@ -96,7 +96,7 @@ classes inherit from. Use the docstrings for examples of how to: """ __version__ = "2.4.1" -__versionTime__ = "29 Jun 2019 06:56 UTC" +__versionTime__ = "02 Jul 2019 21:24 UTC" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" import string @@ -111,6 +111,7 @@ import pprint import traceback import types from datetime import datetime +from operator import itemgetter try: # Python 3 @@ -1538,26 +1539,30 @@ class ParserElement(object): #~ @profile def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): + TRY, MATCH, FAIL = 0, 1, 2 debugging = ( self.debug ) #and doActions ) if debugging or self.failAction: #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) - if (self.debugActions[0] ): - self.debugActions[0]( instring, loc, self ) - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = preloc + if self.debugActions[TRY]: + self.debugActions[TRY]( instring, loc, self ) try: - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - except ParseBaseException as err: + if callPreParse and self.callPreparse: + preloc = self.preParse(instring, loc) + else: + preloc = loc + tokensStart = preloc + if self.mayIndexError or preloc >= len(instring): + try: + loc, tokens = self.parseImpl(instring, preloc, doActions) + except IndexError: + raise ParseException(instring, len(instring), self.errmsg, self) + else: + loc, tokens = self.parseImpl(instring, preloc, doActions) + except Exception as err: #~ print ("Exception raised:", err) - if self.debugActions[2]: - self.debugActions[2]( instring, tokensStart, self, err ) + if self.debugActions[FAIL]: + self.debugActions[FAIL]( instring, tokensStart, self, err ) if self.failAction: self.failAction( instring, tokensStart, self, err ) raise @@ -1594,10 +1599,10 @@ class ParserElement(object): self.resultsName, asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), modal=self.modalResults ) - except ParseBaseException as err: + except Exception as err: #~ print "Exception raised in user parse action:", err - if (self.debugActions[2] ): - self.debugActions[2]( instring, tokensStart, self, err ) + if self.debugActions[FAIL]: + self.debugActions[FAIL]( instring, tokensStart, self, err ) raise else: for fn in self.parseAction: @@ -1615,8 +1620,9 @@ class ParserElement(object): modal=self.modalResults ) if debugging: #~ print ("Matched",self,"->",retTokens.asList()) - if (self.debugActions[1] ): - self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) + if self.debugActions[MATCH]: + self.debugActions[MATCH]( instring, tokensStart, loc, self, retTokens ) + print("do_actions =", doActions) return loc, retTokens @@ -3848,15 +3854,32 @@ class Or(ParseExpression): matches.append((loc2, e)) if matches: - matches.sort(key=lambda x: -x[0]) - for _,e in matches: + # re-evaluate all matches in descending order of length of match, in case attached actions + # might change whether or how much they match of the input. + matches.sort(key=itemgetter(0), reverse=True) + + longest = -1, None + for loc1, expr1 in matches: + if loc1 <= longest[0]: + # already have a longer match than this one will deliver, we are done + return longest + try: - return e._parse( instring, loc, doActions ) + loc2, toks = expr1._parse(instring, loc, doActions) except ParseException as err: err.__traceback__ = None if err.loc > maxExcLoc: maxException = err maxExcLoc = err.loc + else: + if loc2 >= loc1: + return loc2, toks + # didn't match as much as before + elif loc2 > longest[0]: + longest = loc2, toks + + if longest != (-1, None): + return longest if maxException is not None: maxException.msg = self.errmsg diff --git a/unitTests.py b/unitTests.py index f3be763..455c54b 100644 --- a/unitTests.py +++ b/unitTests.py @@ -2860,7 +2860,23 @@ class PatientOrTest(ParseTestCase): failed = True else: failed = False - self.assertFalse(failed, "invalid logic in Or, fails on longest match with exception in parse action") + self.assertFalse(failed, "invalid logic in Or, fails on longest match with exception in parse action") + + # from issue #93 + word = pp.Word(pp.alphas).setName('word') + word_1 = pp.Word(pp.alphas).setName('word_1').addCondition(lambda t: len(t[0]) == 1) + + a = word + (word_1 + word ^ word) + b = word * 3 + c = a ^ b + c.streamline() + print_(c) + test_string = 'foo bar temp' + result = c.parseString(test_string) + print_(test_string, '->', result.asList()) + + self.assertEqual(result.asList(), test_string.split(), "failed to match longest choice") + class EachWithOptionalWithResultsNameTest(ParseTestCase): def runTest(self): |