summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-07-02 16:32:22 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-07-02 16:32:22 -0500
commitf20f8c038bebb81e7184ac87a6f13d5d81d3b495 (patch)
tree8bb5dcb34b8b87d15df62ee7e85adc331878290c
parentf5de46966a55b8c651f7ff92440665af02567df4 (diff)
downloadpyparsing-git-f20f8c038bebb81e7184ac87a6f13d5d81d3b495.tar.gz
Issue #93 - interaction of Or and addCondition sometimes selects alternative that is not the longest
-rw-r--r--CHANGES13
-rw-r--r--pyparsing.py69
-rw-r--r--unitTests.py18
3 files changed, 76 insertions, 24 deletions
diff --git a/CHANGES b/CHANGES
index 037411a..a96d042 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,19 @@ Change Log
Version 2.4.1 -
----------------------
+- While investigating issue #93, I found that Or and
+ addCondition could interact to select an alternative that
+ is not the longest match. This is because Or first checks
+ all alternatives for matches without running attached
+ parse actions or conditions, orders by longest match, and
+ then rechecks for matches with conditions and parse actions.
+ Some expressions, when checking with conditions, may end
+ up matching on a shorter token list than originally matched,
+ but would be selected because of its original priority.
+ This matching code has been expanded to do more extensive
+ searching for matches when a second-pass check matches a
+ smaller list than in the first pass.
+
- Fixed issue #87, a regression in indented block.
Reported by Renz Bagaporo, who submitted a very nice repro
example, which makes the bug-fixing process a lot easier,
diff --git a/pyparsing.py b/pyparsing.py
index fe9b8cb..15b7c48 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -96,7 +96,7 @@ classes inherit from. Use the docstrings for examples of how to:
"""
__version__ = "2.4.1"
-__versionTime__ = "29 Jun 2019 06:56 UTC"
+__versionTime__ = "02 Jul 2019 21:24 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -111,6 +111,7 @@ import pprint
import traceback
import types
from datetime import datetime
+from operator import itemgetter
try:
# Python 3
@@ -1538,26 +1539,30 @@ class ParserElement(object):
#~ @profile
def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
+ TRY, MATCH, FAIL = 0, 1, 2
debugging = ( self.debug ) #and doActions )
if debugging or self.failAction:
#~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
- if (self.debugActions[0] ):
- self.debugActions[0]( instring, loc, self )
- if callPreParse and self.callPreparse:
- preloc = self.preParse( instring, loc )
- else:
- preloc = loc
- tokensStart = preloc
+ if self.debugActions[TRY]:
+ self.debugActions[TRY]( instring, loc, self )
try:
- try:
- loc,tokens = self.parseImpl( instring, preloc, doActions )
- except IndexError:
- raise ParseException( instring, len(instring), self.errmsg, self )
- except ParseBaseException as err:
+ if callPreParse and self.callPreparse:
+ preloc = self.preParse(instring, loc)
+ else:
+ preloc = loc
+ tokensStart = preloc
+ if self.mayIndexError or preloc >= len(instring):
+ try:
+ loc, tokens = self.parseImpl(instring, preloc, doActions)
+ except IndexError:
+ raise ParseException(instring, len(instring), self.errmsg, self)
+ else:
+ loc, tokens = self.parseImpl(instring, preloc, doActions)
+ except Exception as err:
#~ print ("Exception raised:", err)
- if self.debugActions[2]:
- self.debugActions[2]( instring, tokensStart, self, err )
+ if self.debugActions[FAIL]:
+ self.debugActions[FAIL]( instring, tokensStart, self, err )
if self.failAction:
self.failAction( instring, tokensStart, self, err )
raise
@@ -1594,10 +1599,10 @@ class ParserElement(object):
self.resultsName,
asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
modal=self.modalResults )
- except ParseBaseException as err:
+ except Exception as err:
#~ print "Exception raised in user parse action:", err
- if (self.debugActions[2] ):
- self.debugActions[2]( instring, tokensStart, self, err )
+ if self.debugActions[FAIL]:
+ self.debugActions[FAIL]( instring, tokensStart, self, err )
raise
else:
for fn in self.parseAction:
@@ -1615,8 +1620,9 @@ class ParserElement(object):
modal=self.modalResults )
if debugging:
#~ print ("Matched",self,"->",retTokens.asList())
- if (self.debugActions[1] ):
- self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
+ if self.debugActions[MATCH]:
+ self.debugActions[MATCH]( instring, tokensStart, loc, self, retTokens )
+ print("do_actions =", doActions)
return loc, retTokens
@@ -3848,15 +3854,32 @@ class Or(ParseExpression):
matches.append((loc2, e))
if matches:
- matches.sort(key=lambda x: -x[0])
- for _,e in matches:
+ # re-evaluate all matches in descending order of length of match, in case attached actions
+ # might change whether or how much they match of the input.
+ matches.sort(key=itemgetter(0), reverse=True)
+
+ longest = -1, None
+ for loc1, expr1 in matches:
+ if loc1 <= longest[0]:
+ # already have a longer match than this one will deliver, we are done
+ return longest
+
try:
- return e._parse( instring, loc, doActions )
+ loc2, toks = expr1._parse(instring, loc, doActions)
except ParseException as err:
err.__traceback__ = None
if err.loc > maxExcLoc:
maxException = err
maxExcLoc = err.loc
+ else:
+ if loc2 >= loc1:
+ return loc2, toks
+ # didn't match as much as before
+ elif loc2 > longest[0]:
+ longest = loc2, toks
+
+ if longest != (-1, None):
+ return longest
if maxException is not None:
maxException.msg = self.errmsg
diff --git a/unitTests.py b/unitTests.py
index f3be763..455c54b 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -2860,7 +2860,23 @@ class PatientOrTest(ParseTestCase):
failed = True
else:
failed = False
- self.assertFalse(failed, "invalid logic in Or, fails on longest match with exception in parse action")
+ self.assertFalse(failed, "invalid logic in Or, fails on longest match with exception in parse action")
+
+ # from issue #93
+ word = pp.Word(pp.alphas).setName('word')
+ word_1 = pp.Word(pp.alphas).setName('word_1').addCondition(lambda t: len(t[0]) == 1)
+
+ a = word + (word_1 + word ^ word)
+ b = word * 3
+ c = a ^ b
+ c.streamline()
+ print_(c)
+ test_string = 'foo bar temp'
+ result = c.parseString(test_string)
+ print_(test_string, '->', result.asList())
+
+ self.assertEqual(result.asList(), test_string.split(), "failed to match longest choice")
+
class EachWithOptionalWithResultsNameTest(ParseTestCase):
def runTest(self):