Issue #93 - interaction of Or and addCondition sometimes selects alternative that is not the longest

author: Paul McGuire <ptmcg@austin.rr.com> 2019-07-02 16:32:22 -0500
committer: Paul McGuire <ptmcg@austin.rr.com> 2019-07-02 16:32:22 -0500
commit: f20f8c038bebb81e7184ac87a6f13d5d81d3b495 (patch)
tree: 8bb5dcb34b8b87d15df62ee7e85adc331878290c
parent: f5de46966a55b8c651f7ff92440665af02567df4 (diff)
download: pyparsing-git-f20f8c038bebb81e7184ac87a6f13d5d81d3b495.tar.gz
3 files changed, 76 insertions, 24 deletions
diff --git a/CHANGES b/CHANGES
index 037411a..a96d042 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,19 @@ Change Log
 
 Version 2.4.1 -
 ----------------------
+- While investigating issue #93, I found that Or and
+  addCondition could interact to select an alternative that
+  is not the longest match. This is because Or first checks
+  all alternatives for matches without running attached
+  parse actions or conditions, orders by longest match, and
+  then rechecks for matches with conditions and parse actions.
+  Some expressions, when checking with conditions, may end
+  up matching on a shorter token list than originally matched,
+  but would be selected because of its original priority.
+  This matching code has been expanded to do more extensive
+  searching for matches when a second-pass check matches a
+  smaller list than in the first pass.
+
 - Fixed issue #87, a regression in indented block.
   Reported by Renz Bagaporo, who submitted a very nice repro
   example, which makes the bug-fixing process a lot easier,
diff --git a/pyparsing.py b/pyparsing.py
index fe9b8cb..15b7c48 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -96,7 +96,7 @@ classes inherit from. Use the docstrings for examples of how to:
 """
 
 __version__ = "2.4.1"
-__versionTime__ = "29 Jun 2019 06:56 UTC"
+__versionTime__ = "02 Jul 2019 21:24 UTC"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
@@ -111,6 +111,7 @@ import pprint
 import traceback
 import types
 from datetime import datetime
+from operator import itemgetter
 
 try:
     # Python 3
@@ -1538,26 +1539,30 @@ class ParserElement(object):
 
     #~ @profile
     def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
+        TRY, MATCH, FAIL = 0, 1, 2
         debugging = ( self.debug ) #and doActions )
 
         if debugging or self.failAction:
             #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
-            if (self.debugActions[0] ):
-                self.debugActions[0]( instring, loc, self )
-            if callPreParse and self.callPreparse:
-                preloc = self.preParse( instring, loc )
-            else:
-                preloc = loc
-            tokensStart = preloc
+            if self.debugActions[TRY]:
+                self.debugActions[TRY]( instring, loc, self )
             try:
-                try:
-                    loc,tokens = self.parseImpl( instring, preloc, doActions )
-                except IndexError:
-                    raise ParseException( instring, len(instring), self.errmsg, self )
-            except ParseBaseException as err:
+                if callPreParse and self.callPreparse:
+                    preloc = self.preParse(instring, loc)
+                else:
+                    preloc = loc
+                tokensStart = preloc
+                if self.mayIndexError or preloc >= len(instring):
+                    try:
+                        loc, tokens = self.parseImpl(instring, preloc, doActions)
+                    except IndexError:
+                        raise ParseException(instring, len(instring), self.errmsg, self)
+                else:
+                    loc, tokens = self.parseImpl(instring, preloc, doActions)
+            except Exception as err:
                 #~ print ("Exception raised:", err)
-                if self.debugActions[2]:
-                    self.debugActions[2]( instring, tokensStart, self, err )
+                if self.debugActions[FAIL]:
+                    self.debugActions[FAIL]( instring, tokensStart, self, err )
                 if self.failAction:
                     self.failAction( instring, tokensStart, self, err )
                 raise
@@ -1594,10 +1599,10 @@ class ParserElement(object):
                                                       self.resultsName,
                                                       asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
                                                       modal=self.modalResults )
-                except ParseBaseException as err:
+                except Exception as err:
                     #~ print "Exception raised in user parse action:", err
-                    if (self.debugActions[2] ):
-                        self.debugActions[2]( instring, tokensStart, self, err )
+                    if self.debugActions[FAIL]:
+                        self.debugActions[FAIL]( instring, tokensStart, self, err )
                     raise
             else:
                 for fn in self.parseAction:
@@ -1615,8 +1620,9 @@ class ParserElement(object):
                                                   modal=self.modalResults )
         if debugging:
             #~ print ("Matched",self,"->",retTokens.asList())
-            if (self.debugActions[1] ):
-                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
+            if self.debugActions[MATCH]:
+                self.debugActions[MATCH]( instring, tokensStart, loc, self, retTokens )
+                print("do_actions =", doActions)
 
         return loc, retTokens
 
@@ -3848,15 +3854,32 @@ class Or(ParseExpression):
                 matches.append((loc2, e))
 
         if matches:
-            matches.sort(key=lambda x: -x[0])
-            for _,e in matches:
+            # re-evaluate all matches in descending order of length of match, in case attached actions
+            # might change whether or how much they match of the input.
+            matches.sort(key=itemgetter(0), reverse=True)
+
+            longest = -1, None
+            for loc1, expr1 in matches:
+                if loc1 <= longest[0]:
+                    # already have a longer match than this one will deliver, we are done
+                    return longest
+
                 try:
-                    return e._parse( instring, loc, doActions )
+                    loc2, toks = expr1._parse(instring, loc, doActions)
                 except ParseException as err:
                     err.__traceback__ = None
                     if err.loc > maxExcLoc:
                         maxException = err
                         maxExcLoc = err.loc
+                else:
+                    if loc2 >= loc1:
+                        return loc2, toks
+                    # didn't match as much as before
+                    elif loc2 > longest[0]:
+                        longest = loc2, toks
+
+            if longest != (-1, None):
+                return longest
 
         if maxException is not None:
             maxException.msg = self.errmsg
diff --git a/unitTests.py b/unitTests.py
index f3be763..455c54b 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -2860,7 +2860,23 @@ class PatientOrTest(ParseTestCase):
             failed = True
         else:
             failed = False
-            self.assertFalse(failed, "invalid logic in Or, fails on longest match with exception in parse action")
+        self.assertFalse(failed, "invalid logic in Or, fails on longest match with exception in parse action")
+
+        # from issue #93
+        word = pp.Word(pp.alphas).setName('word')
+        word_1 = pp.Word(pp.alphas).setName('word_1').addCondition(lambda t: len(t[0]) == 1)
+
+        a = word + (word_1 + word ^ word)
+        b = word * 3
+        c = a ^ b
+        c.streamline()
+        print_(c)
+        test_string = 'foo bar temp'
+        result = c.parseString(test_string)
+        print_(test_string, '->', result.asList())
+
+        self.assertEqual(result.asList(), test_string.split(), "failed to match longest choice")
+
 
 class EachWithOptionalWithResultsNameTest(ParseTestCase):
     def runTest(self):
author	Paul McGuire <ptmcg@austin.rr.com>	2019-07-02 16:32:22 -0500
committer	Paul McGuire <ptmcg@austin.rr.com>	2019-07-02 16:32:22 -0500
commit	f20f8c038bebb81e7184ac87a6f13d5d81d3b495 (patch)
tree	8bb5dcb34b8b87d15df62ee7e85adc331878290c
parent	f5de46966a55b8c651f7ff92440665af02567df4 (diff)
download	pyparsing-git-f20f8c038bebb81e7184ac87a6f13d5d81d3b495.tar.gz