summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2020-01-26 19:05:23 -0600
committerptmcg <ptmcg@austin.rr.com>2020-01-26 19:05:23 -0600
commit40cbbf34a62d023fc52a5e2571b01e726856eac2 (patch)
tree46f60d94d47a051362afe7211cf122900bc3f90e
parent1c57a6d4bd8351ed047691226286cd86c4d999a2 (diff)
downloadpyparsing-git-40cbbf34a62d023fc52a5e2571b01e726856eac2.tar.gz
Added new warning 'warn_on_match_first_with_lshift_operator' to warn when doing `fwd << a | b`; fixed potential FutureWarning when including unescaped '[' in a regex range definition.
-rw-r--r--CHANGES34
-rw-r--r--pyparsing/core.py22
-rw-r--r--pyparsing/util.py2
-rw-r--r--tests/test_unit.py38
4 files changed, 68 insertions, 28 deletions
diff --git a/CHANGES b/CHANGES
index 0bf4db2..a4fcf50 100644
--- a/CHANGES
+++ b/CHANGES
@@ -78,6 +78,26 @@ Version 3.0.0a1
pp.__diag__.enable_all_warnings()
+ - added new warning, "warn_on_match_first_with_lshift_operator" to
+ warn when using '<<' with a '|' MatchFirst operator, which will
+ create an unintended expression due to precedence of operations.
+
+ Example: This statement will erroneously define the `fwd` expression
+ as just `expr_a`, even though `expr_a | expr_b` was intended,
+ since '<<' operator has precedence over '|':
+
+ fwd << expr_a | expr_b
+
+ To correct this, use the '<<=' operator (preferred) or parentheses
+ to override operator precedence:
+
+ fwd <<= expr_a | expr_b
+ or
+ fwd << (expr_a | expr_b)
+
+- Fixed FutureWarnings that sometimes are raised when '[' passed as a
+ character to Word.
+
- New namespace, assert methods and classes added to support writing
unit tests.
- assertParseResultsEquals
@@ -98,20 +118,6 @@ Version 3.0.0a1
# would use regex for this expression
integer_parser = pp.Regex(regex.compile(r'\d+'))
- You can also replace the use of the re module as it is used internally
- by pyparsing in a number of classes by overwriting pyparsing's imported
- re symbol:
-
- import pyparsing as pp
- import regex
- pp.re = regex # redirects all internal re usage in pyparsing to regex
-
- # would now use regex instead of re to compile this string
- integer_parser = pp.Regex(r'\d+')
-
- # would also now use regex internally instead of re
- integer_parser = pp.Word(pp.nums)
-
Inspired by PR submitted by bjrnfrdnnd on GitHub, very nice!
- Fixed handling of ParseSyntaxExceptions raised as part of Each
diff --git a/pyparsing/core.py b/pyparsing/core.py
index e309068..bdbd2d5 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -57,7 +57,7 @@ str_type = (str, bytes)
#
__version__ = "3.0.0a1"
-__versionTime__ = "13 Oct 2019 05:49 UTC"
+__versionTime__ = "27 Jan 2020 00:56 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
@@ -106,6 +106,7 @@ class __diag__(__config_flags):
warn_ungrouped_named_tokens_in_collection = False
warn_name_set_on_empty_Forward = False
warn_on_multiple_string_args_to_oneof = False
+ warn_on_match_first_with_lshift_operator = False
enable_debug_on_named_expressions = False
_all_names = [__ for __ in locals() if not __.startswith("_")]
@@ -142,13 +143,6 @@ def _trim_arity(func, maxargs=2):
limit = 0
found_arity = False
- # traceback return data structure changed in Py3.5 - normalize back to plain tuples
- def extract_stack(limit=0):
- # special handling for Python 3.5.0 - extra deep call stack by 1
- offset = -3 if system_version == (3, 5, 0) else -2
- frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset]
- return [frame_summary[:2]]
-
def extract_tb(tb, limit=0):
frames = traceback.extract_tb(tb, limit=limit)
frame_summary = frames[-1]
@@ -160,7 +154,7 @@ def _trim_arity(func, maxargs=2):
LINE_DIFF = 7
# IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
# THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
- this_line = extract_stack(limit=2)[-1]
+ this_line = traceback.extract_stack(limit=2)[-1]
pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF)
def wrapper(*args):
@@ -4226,6 +4220,7 @@ class Forward(ParseElementEnhance):
def __init__(self, other=None):
super().__init__(other, savelist=False)
+ self.lshift_line = None
def __lshift__(self, other):
if isinstance(other, str_type):
@@ -4240,11 +4235,20 @@ class Forward(ParseElementEnhance):
self.skipWhitespace = self.expr.skipWhitespace
self.saveAsList = self.expr.saveAsList
self.ignoreExprs.extend(self.expr.ignoreExprs)
+ self.lshift_line = traceback.extract_stack(limit=2)[-2]
return self
def __ilshift__(self, other):
return self << other
+ def __or__(self, other):
+ caller_line = traceback.extract_stack(limit=2)[-2]
+ if (__diag__.warn_on_match_first_with_lshift_operator
+ and caller_line == self.lshift_line):
+ warnings.warn("using '<<' operator with '|' is probably error, use '<<='", SyntaxWarning, stacklevel=3)
+ ret = super().__or__(other)
+ return ret
+
def leaveWhitespace(self):
self.skipWhitespace = False
return self
diff --git a/pyparsing/util.py b/pyparsing/util.py
index 376b9ae..468fefc 100644
--- a/pyparsing/util.py
+++ b/pyparsing/util.py
@@ -145,7 +145,7 @@ def _collapseAndEscapeRegexRangeChars(s):
is_consecutive.value = -1
def escape_re_range_char(c):
- return "\\" + c if c in r"\^-]" else c
+ return "\\" + c if c in r"\^-][" else c
ret = []
for _, chars in itertools.groupby(sorted(s), key=is_consecutive):
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 65e0ddb..cdc1f59 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -2490,7 +2490,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
)
try:
- # ~ print "lets try an invalid RE"
+ print("lets try an invalid RE")
invRe = pp.Regex("(\"[^\"]*\")|('[^']*'")
except Exception as e:
print("successfully rejected an invalid RE:", end=" ")
@@ -2498,7 +2498,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
else:
self.assertTrue(False, "failed to reject invalid RE")
- invRe = pp.Regex("")
+ with self.assertWarns(SyntaxWarning, msg="failed to warn empty string passed to Regex"):
+ invRe = pp.Regex("")
def testRegexAsType(self):
import pyparsing as pp
@@ -6208,8 +6209,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
"failed to generate correct internal re",
)
- esc_chars = r"\^-]"
- esc_chars2 = r"*+.?["
+ esc_chars = r"\^-]["
+ esc_chars2 = r"*+.?"
for esc_char in esc_chars + esc_chars2:
# test escape char as first character in range
next_char = chr(ord(esc_char) + 1)
@@ -6648,6 +6649,35 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
"multipled(3) failure with setResultsName",
)
+ def testWarnUsingLshiftForward(self):
+ import warnings
+ print("verify that using '<<' operator with a Forward raises a warning if there is a dangling '|' operator")
+
+ fwd = pp.Forward()
+ print('unsafe << and |, but diag not enabled, should not warn')
+ fwd << pp.Word('a') | pp.Word('b')
+
+ pp.__diag__.enable('warn_on_match_first_with_lshift_operator')
+ with self.assertWarns(SyntaxWarning, msg="failed to warn of using << and | operators"):
+ fwd = pp.Forward()
+ print('unsafe << and |, should warn')
+ fwd << pp.Word('a') | pp.Word('b')
+
+ fwd = pp.Forward()
+ print('safe <<= and |, should not warn')
+ fwd <<= pp.Word('a') | pp.Word('b')
+ c = fwd | pp.Word('c')
+
+ print('safe << and (|), should not warn')
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter("error")
+
+ fwd = pp.Forward()
+ fwd << (pp.Word('a') | pp.Word('b'))
+ try:
+ c = fwd | pp.Word('c')
+ except Exception as e:
+ self.fail("raised warning when it should not have")
class PickleTest_Greeting:
def __init__(self, toks):