Added new warning 'warn_on_match_first_with_lshift_operator' to warn when doing `fwd << a | b`; fixed potential FutureWarning when including unescaped '[' in a regex range definition.

author: ptmcg <ptmcg@austin.rr.com> 2020-01-26 19:05:23 -0600
committer: ptmcg <ptmcg@austin.rr.com> 2020-01-26 19:05:23 -0600
commit: 40cbbf34a62d023fc52a5e2571b01e726856eac2 (patch)
tree: 46f60d94d47a051362afe7211cf122900bc3f90e
parent: 1c57a6d4bd8351ed047691226286cd86c4d999a2 (diff)
download: pyparsing-git-40cbbf34a62d023fc52a5e2571b01e726856eac2.tar.gz
4 files changed, 68 insertions, 28 deletions
diff --git a/CHANGES b/CHANGES
index 0bf4db2..a4fcf50 100644
--- a/CHANGES
+++ b/CHANGES
@@ -78,6 +78,26 @@ Version 3.0.0a1
 
         pp.__diag__.enable_all_warnings()
 
+  - added new warning, "warn_on_match_first_with_lshift_operator" to
+    warn when using '<<' with a '|' MatchFirst operator, which will
+    create an unintended expression due to precedence of operations.
+
+    Example: This statement will erroneously define the `fwd` expression
+    as just `expr_a`, even though `expr_a | expr_b` was intended,
+    since '<<' operator has precedence over '|':
+
+        fwd << expr_a | expr_b
+
+    To correct this, use the '<<=' operator (preferred) or parentheses
+    to override operator precedence:
+
+        fwd <<= expr_a | expr_b
+                 or
+        fwd << (expr_a | expr_b)
+
+- Fixed FutureWarnings that sometimes are raised when '[' passed as a
+  character to Word.
+
 - New namespace, assert methods and classes added to support writing
   unit tests.
   - assertParseResultsEquals
@@ -98,20 +118,6 @@ Version 3.0.0a1
     # would use regex for this expression
     integer_parser = pp.Regex(regex.compile(r'\d+'))
 
-  You can also replace the use of the re module as it is used internally
-  by pyparsing in a number of classes by overwriting pyparsing's imported
-  re symbol:
-
-    import pyparsing as pp
-    import regex
-    pp.re = regex  # redirects all internal re usage in pyparsing to regex
-
-    # would now use regex instead of re to compile this string
-    integer_parser = pp.Regex(r'\d+')
-
-    # would also now use regex internally instead of re
-    integer_parser = pp.Word(pp.nums)
-
   Inspired by PR submitted by bjrnfrdnnd on GitHub, very nice!
 
 - Fixed handling of ParseSyntaxExceptions raised as part of Each
diff --git a/pyparsing/core.py b/pyparsing/core.py
index e309068..bdbd2d5 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -57,7 +57,7 @@ str_type = (str, bytes)
 #
 
 __version__ = "3.0.0a1"
-__versionTime__ = "13 Oct 2019 05:49 UTC"
+__versionTime__ = "27 Jan 2020 00:56 UTC"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 
@@ -106,6 +106,7 @@ class __diag__(__config_flags):
     warn_ungrouped_named_tokens_in_collection = False
     warn_name_set_on_empty_Forward = False
     warn_on_multiple_string_args_to_oneof = False
+    warn_on_match_first_with_lshift_operator = False
     enable_debug_on_named_expressions = False
 
     _all_names = [__ for __ in locals() if not __.startswith("_")]
@@ -142,13 +143,6 @@ def _trim_arity(func, maxargs=2):
     limit = 0
     found_arity = False
 
-    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
-    def extract_stack(limit=0):
-        # special handling for Python 3.5.0 - extra deep call stack by 1
-        offset = -3 if system_version == (3, 5, 0) else -2
-        frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset]
-        return [frame_summary[:2]]
-
     def extract_tb(tb, limit=0):
         frames = traceback.extract_tb(tb, limit=limit)
         frame_summary = frames[-1]
@@ -160,7 +154,7 @@ def _trim_arity(func, maxargs=2):
     LINE_DIFF = 7
     # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
     # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
-    this_line = extract_stack(limit=2)[-1]
+    this_line = traceback.extract_stack(limit=2)[-1]
     pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF)
 
     def wrapper(*args):
@@ -4226,6 +4220,7 @@ class Forward(ParseElementEnhance):
 
     def __init__(self, other=None):
         super().__init__(other, savelist=False)
+        self.lshift_line = None
 
     def __lshift__(self, other):
         if isinstance(other, str_type):
@@ -4240,11 +4235,20 @@ class Forward(ParseElementEnhance):
         self.skipWhitespace = self.expr.skipWhitespace
         self.saveAsList = self.expr.saveAsList
         self.ignoreExprs.extend(self.expr.ignoreExprs)
+        self.lshift_line = traceback.extract_stack(limit=2)[-2]
         return self
 
     def __ilshift__(self, other):
         return self << other
 
+    def __or__(self, other):
+        caller_line = traceback.extract_stack(limit=2)[-2]
+        if (__diag__.warn_on_match_first_with_lshift_operator
+                and caller_line == self.lshift_line):
+            warnings.warn("using '<<' operator with '|' is probably error, use '<<='", SyntaxWarning, stacklevel=3)
+        ret = super().__or__(other)
+        return ret
+
     def leaveWhitespace(self):
         self.skipWhitespace = False
         return self
diff --git a/pyparsing/util.py b/pyparsing/util.py
index 376b9ae..468fefc 100644
--- a/pyparsing/util.py
+++ b/pyparsing/util.py
@@ -145,7 +145,7 @@ def _collapseAndEscapeRegexRangeChars(s):
     is_consecutive.value = -1
 
     def escape_re_range_char(c):
-        return "\\" + c if c in r"\^-]" else c
+        return "\\" + c if c in r"\^-][" else c
 
     ret = []
     for _, chars in itertools.groupby(sorted(s), key=is_consecutive):
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 65e0ddb..cdc1f59 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -2490,7 +2490,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         )
 
         try:
-            # ~ print "lets try an invalid RE"
+            print("lets try an invalid RE")
             invRe = pp.Regex("(\"[^\"]*\")|('[^']*'")
         except Exception as e:
             print("successfully rejected an invalid RE:", end=" ")
@@ -2498,7 +2498,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         else:
             self.assertTrue(False, "failed to reject invalid RE")
 
-        invRe = pp.Regex("")
+        with self.assertWarns(SyntaxWarning, msg="failed to warn empty string passed to Regex"):
+            invRe = pp.Regex("")
 
     def testRegexAsType(self):
         import pyparsing as pp
@@ -6208,8 +6209,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             "failed to generate correct internal re",
         )
 
-        esc_chars = r"\^-]"
-        esc_chars2 = r"*+.?["
+        esc_chars = r"\^-]["
+        esc_chars2 = r"*+.?"
         for esc_char in esc_chars + esc_chars2:
             # test escape char as first character in range
             next_char = chr(ord(esc_char) + 1)
@@ -6648,6 +6649,35 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             "multipled(3) failure with setResultsName",
         )
 
+    def testWarnUsingLshiftForward(self):
+        import warnings
+        print("verify that using '<<' operator with a Forward raises a warning if there is a dangling '|' operator")
+
+        fwd = pp.Forward()
+        print('unsafe << and |, but diag not enabled, should not warn')
+        fwd << pp.Word('a') | pp.Word('b')
+
+        pp.__diag__.enable('warn_on_match_first_with_lshift_operator')
+        with self.assertWarns(SyntaxWarning, msg="failed to warn of using << and | operators"):
+            fwd = pp.Forward()
+            print('unsafe << and |, should warn')
+            fwd << pp.Word('a') | pp.Word('b')
+
+        fwd = pp.Forward()
+        print('safe <<= and |, should not warn')
+        fwd <<= pp.Word('a') | pp.Word('b')
+        c = fwd | pp.Word('c')
+
+        print('safe << and (|), should not warn')
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("error")
+
+            fwd = pp.Forward()
+            fwd << (pp.Word('a') | pp.Word('b'))
+            try:
+                c = fwd | pp.Word('c')
+            except Exception as e:
+                self.fail("raised warning when it should not have")
 
 class PickleTest_Greeting:
     def __init__(self, toks):
author	ptmcg <ptmcg@austin.rr.com>	2020-01-26 19:05:23 -0600
committer	ptmcg <ptmcg@austin.rr.com>	2020-01-26 19:05:23 -0600
commit	40cbbf34a62d023fc52a5e2571b01e726856eac2 (patch)
tree	46f60d94d47a051362afe7211cf122900bc3f90e
parent	1c57a6d4bd8351ed047691226286cd86c4d999a2 (diff)
download	pyparsing-git-40cbbf34a62d023fc52a5e2571b01e726856eac2.tar.gz