Add '*' markers to debug output to indicate cached parse expression try/pass/fail events (which were previously omitted from debugging output)

author: ptmcg <ptmcg@austin.rr.com> 2020-08-18 00:29:06 -0500
committer: ptmcg <ptmcg@austin.rr.com> 2020-08-18 00:29:06 -0500
commit: 4e258967a47e2740199eb7a43d18e8ea1af68247 (patch)
tree: fa48b36a44eaba76875eab8c9612e63cb130efdb
parent: c1c9c8dcf5bee8bdf885767751eebfff2ed49f7c (diff)
download: pyparsing-git-4e258967a47e2740199eb7a43d18e8ea1af68247.tar.gz
3 files changed, 133 insertions, 16 deletions
diff --git a/CHANGES b/CHANGES
index 616b040..6b835ee 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,24 @@ Change Log
 
 Version 3.0.0b1
 ---------------
+- When using setDebug with packrat parsing enabled, packrat cache hits will
+  now be included in the output, shown with a leading '*'. (Previously, cache
+  hits and responses were not included in debug output.) For those using custom
+  debug actions, see the following bullet regarding an optional API change
+  for those methods.
+
+- API CHANGE
+  Added `cache_hit` keyword argument to debug actions. Previously, if packrat
+  parsing was enabled, the debug methods were not called in the event of cache
+  hits. Now these methods will be called, with an added argument
+  `cache_hit=True`.
+
+  If you are using packrat parsing and enable debug on expressions using a
+  custom debug method, you can add the `cache_hit=False` keyword argument,
+  and your method will be called on packrat cache hits. If you choose not
+  to add this keyword argument, the debug methods will fail silently,
+  behaving as they did previously.
+
 - Fixed traceback trimming, and added ParserElement.verbose_traceback
   save/restore to reset_pyparsing_context().
 
diff --git a/pyparsing/core.py b/pyparsing/core.py
index 3bcd6cb..d8d3b9b 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -223,22 +223,25 @@ def conditionAsParseAction(fn, message=None, fatal=False):
     return pa
 
 
-def _defaultStartDebugAction(instring, loc, expr):
+def _defaultStartDebugAction(instring, loc, expr, cache_hit=False):
+    cache_hit_str = "*" if cache_hit else ""
     print(
         (
-            "Match {} at loc {}({},{})".format(
-                expr, loc, lineno(loc, instring), col(loc, instring)
+            "{}Match {} at loc {}({},{})".format(
+                cache_hit_str, expr, loc, lineno(loc, instring), col(loc, instring)
             )
         )
     )
 
 
-def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks):
-    print("Matched " + str(expr) + " -> " + str(toks.asList()))
+def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks, cache_hit=False):
+    cache_hit_str = "*" if cache_hit else ""
+    print("{}Matched {} -> {}".format(cache_hit_str, expr, toks.asList()))
 
 
-def _defaultExceptionDebugAction(instring, loc, expr, exc):
-    print("Exception raised:" + str(exc))
+def _defaultExceptionDebugAction(instring, loc, expr, exc, cache_hit=False):
+    cache_hit_str = "*" if cache_hit else ""
+    print("{}{} raised: {}".format(cache_hit_str, type(exc).__name__, exc))
 
 
 def nullDebugAction(*args):
@@ -667,6 +670,7 @@ class ParserElement(ABC):
     # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
     def _parseCache(self, instring, loc, doActions=True, callPreParse=True):
         HIT, MISS = 0, 1
+        TRY, MATCH, FAIL = 0, 1, 2
         lookup = (self, instring, loc, callPreParse, doActions)
         with ParserElement.packrat_cache_lock:
             cache = ParserElement.packrat_cache
@@ -680,13 +684,35 @@ class ParserElement(ABC):
                     cache.set(lookup, pe.__class__(*pe.args))
                     raise
                 else:
-                    cache.set(lookup, (value[0], value[1].copy()))
+                    cache.set(lookup, (value[0], value[1].copy(), loc))
                     return value
             else:
                 ParserElement.packrat_cache_stats[HIT] += 1
+                if self.debug and self.debugActions[TRY]:
+                    try:
+                        self.debugActions[TRY](instring, loc, self, cache_hit=True)
+                    except TypeError:
+                        pass
                 if isinstance(value, Exception):
+                    if self.debug and self.debugActions[FAIL]:
+                        try:
+                            self.debugActions[FAIL](
+                                instring, loc, self, value, cache_hit=True
+                            )
+                        except TypeError:
+                            pass
                     raise value
-                return value[0], value[1].copy()
+
+                loc_, result, endloc = value[0], value[1].copy(), value[2]
+                if self.debug and self.debugActions[MATCH]:
+                    try:
+                        self.debugActions[MATCH](
+                            instring, loc_, endloc, self, result, cache_hit=True
+                        )
+                    except TypeError:
+                        pass
+
+                return loc_, result
 
     _parse = _parseNoCache
 
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 32677f6..d5076af 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -7023,7 +7023,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
                 Match integer at loc 3(1,4)
                 Matched integer -> ['3']
                 Match integer at loc 5(1,6)
-                Exception raised:Expected integer, found end of text  (at char 5), (line:1, col:6)
+                ParseException raised: Expected integer, found end of text  (at char 5), (line:1, col:6)
                 """
             )
             output = test_stdout.getvalue()
@@ -7058,25 +7058,25 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             Match integer at loc 0(1,1)
             Matched integer -> [123]
             Match integer at loc 3(1,4)
-            Exception raised:Expected integer, found 'A'  (at char 4), (line:1, col:5)
+            ParseException raised: Expected integer, found 'A'  (at char 4), (line:1, col:5)
             Match W:(0-9A-Za-z) at loc 3(1,4)
             Matched W:(0-9A-Za-z) -> ['A100']
             Match integer at loc 8(1,9)
-            Exception raised:Expected integer, found end of text  (at char 8), (line:1, col:9)
+            ParseException raised: Expected integer, found end of text  (at char 8), (line:1, col:9)
             Match W:(0-9A-Za-z) at loc 8(1,9)
-            Exception raised:Expected W:(0-9A-Za-z), found end of text  (at char 8), (line:1, col:9)
+            ParseException raised: Expected W:(0-9A-Za-z), found end of text  (at char 8), (line:1, col:9)
             Matched [{integer | W:(0-9A-Za-z)}]... -> [123, 'A100']
             
             Match integer at loc 0(1,1)
             Matched integer -> [123]
             Match integer at loc 3(1,4)
-            Exception raised:Expected integer, found 'A'  (at char 4), (line:1, col:5)
+            ParseException raised: Expected integer, found 'A'  (at char 4), (line:1, col:5)
             Match W:(0-9A-Za-z) at loc 3(1,4)
             Matched W:(0-9A-Za-z) -> ['A100']
             Match integer at loc 8(1,9)
-            Exception raised:Expected integer, found end of text  (at char 8), (line:1, col:9)
+            ParseException raised: Expected integer, found end of text  (at char 8), (line:1, col:9)
             Match W:(0-9A-Za-z) at loc 8(1,9)
-            Exception raised:Expected W:(0-9A-Za-z), found end of text  (at char 8), (line:1, col:9)
+            ParseException raised: Expected W:(0-9A-Za-z), found end of text  (at char 8), (line:1, col:9)
             """
         )
         output = test_stdout.getvalue()
@@ -7087,6 +7087,79 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             "invalid debug output when using parse action",
         )
 
+    def testEnableDebugWithCachedExpressionsMarkedWithAsterisk(self):
+        import textwrap
+
+        test_stdout = StringIO()
+        with resetting(sys, "stdout", "stderr"):
+            sys.stdout = test_stdout
+            sys.stderr = test_stdout
+
+            a = pp.Literal("a").setName("A").setDebug()
+            b = pp.Literal("b").setName("B").setDebug()
+            z = pp.Literal("z").setName("Z").setDebug()
+            leading_a = a + pp.FollowedBy(z | a | b)
+            leading_a.setName("leading_a").setDebug()
+
+            grammar = (z | leading_a | b)[...] + "a"
+            grammar.parseString("aba")
+
+        expected_debug_output = textwrap.dedent(
+            """\
+            Match Z at loc 0(1,1)
+            ParseException raised: Expected Z, found 'a'  (at char 0), (line:1, col:1)
+            Match leading_a at loc 0(1,1)
+            Match A at loc 0(1,1)
+            Matched A -> ['a']
+            Match Z at loc 1(1,2)
+            ParseException raised: Expected Z, found 'b'  (at char 1), (line:1, col:2)
+            Match A at loc 1(1,2)
+            ParseException raised: Expected A, found 'b'  (at char 1), (line:1, col:2)
+            Match B at loc 1(1,2)
+            Matched B -> ['b']
+            Matched leading_a -> ['a']
+            *Match Z at loc 1(1,2)
+            *ParseException raised: Expected Z, found 'b'  (at char 1), (line:1, col:2)
+            Match leading_a at loc 1(1,2)
+            Match A at loc 1(1,2)
+            ParseException raised: Expected A, found 'b'  (at char 1), (line:1, col:2)
+            ParseException raised: Expected A, found 'b'  (at char 1), (line:1, col:2)
+            *Match B at loc 1(1,2)
+            *Matched B -> ['b']
+            Match Z at loc 2(1,3)
+            ParseException raised: Expected Z, found 'a'  (at char 2), (line:1, col:3)
+            Match leading_a at loc 2(1,3)
+            Match A at loc 2(1,3)
+            Matched A -> ['a']
+            Match Z at loc 3(1,4)
+            ParseException raised: Expected Z, found end of text  (at char 3), (line:1, col:4)
+            Match A at loc 3(1,4)
+            ParseException raised: Expected A, found end of text  (at char 3), (line:1, col:4)
+            Match B at loc 3(1,4)
+            ParseException raised: Expected B, found end of text  (at char 3), (line:1, col:4)
+            ParseException raised: Expected {Z | A | B}, found end of text  (at char 3), (line:1, col:4)
+            Match B at loc 2(1,3)
+            ParseException raised: Expected B, found 'a'  (at char 2), (line:1, col:3)
+            """
+        )
+        if pp.ParserElement._packratEnabled:
+            packrat_status = "enabled"
+        else:
+            # remove '*' cache markers from expected output
+            expected_debug_output = expected_debug_output.replace("*", "")
+            packrat_status = "disabled"
+
+        output = test_stdout.getvalue()
+        print(output)
+        self.assertEqual(
+            expected_debug_output,
+            output,
+            (
+                "invalid debug output showing cached results marked with '*',"
+                " and packrat parsing {}".format(packrat_status)
+            ),
+        )
+
     def testUndesirableButCommonPractices(self):
 
         # While these are valid constructs, and they are not encouraged
author	ptmcg <ptmcg@austin.rr.com>	2020-08-18 00:29:06 -0500
committer	ptmcg <ptmcg@austin.rr.com>	2020-08-18 00:29:06 -0500
commit	4e258967a47e2740199eb7a43d18e8ea1af68247 (patch)
tree	fa48b36a44eaba76875eab8c9612e63cb130efdb
parent	c1c9c8dcf5bee8bdf885767751eebfff2ed49f7c (diff)
download	pyparsing-git-4e258967a47e2740199eb7a43d18e8ea1af68247.tar.gz