diff options
author | ptmcg <ptmcg@austin.rr.com> | 2020-01-26 19:05:23 -0600 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2020-01-26 19:05:23 -0600 |
commit | 40cbbf34a62d023fc52a5e2571b01e726856eac2 (patch) | |
tree | 46f60d94d47a051362afe7211cf122900bc3f90e | |
parent | 1c57a6d4bd8351ed047691226286cd86c4d999a2 (diff) | |
download | pyparsing-git-40cbbf34a62d023fc52a5e2571b01e726856eac2.tar.gz |
Added new warning 'warn_on_match_first_with_lshift_operator' to warn when doing `fwd << a | b`; fixed potential FutureWarning when including unescaped '[' in a regex range definition.
-rw-r--r-- | CHANGES | 34 | ||||
-rw-r--r-- | pyparsing/core.py | 22 | ||||
-rw-r--r-- | pyparsing/util.py | 2 | ||||
-rw-r--r-- | tests/test_unit.py | 38 |
4 files changed, 68 insertions, 28 deletions
@@ -78,6 +78,26 @@ Version 3.0.0a1 pp.__diag__.enable_all_warnings() + - added new warning, "warn_on_match_first_with_lshift_operator" to + warn when using '<<' with a '|' MatchFirst operator, which will + create an unintended expression due to precedence of operations. + + Example: This statement will erroneously define the `fwd` expression + as just `expr_a`, even though `expr_a | expr_b` was intended, + since '<<' operator has precedence over '|': + + fwd << expr_a | expr_b + + To correct this, use the '<<=' operator (preferred) or parentheses + to override operator precedence: + + fwd <<= expr_a | expr_b + or + fwd << (expr_a | expr_b) + +- Fixed FutureWarnings that sometimes are raised when '[' passed as a + character to Word. + - New namespace, assert methods and classes added to support writing unit tests. - assertParseResultsEquals @@ -98,20 +118,6 @@ Version 3.0.0a1 # would use regex for this expression integer_parser = pp.Regex(regex.compile(r'\d+')) - You can also replace the use of the re module as it is used internally - by pyparsing in a number of classes by overwriting pyparsing's imported - re symbol: - - import pyparsing as pp - import regex - pp.re = regex # redirects all internal re usage in pyparsing to regex - - # would now use regex instead of re to compile this string - integer_parser = pp.Regex(r'\d+') - - # would also now use regex internally instead of re - integer_parser = pp.Word(pp.nums) - Inspired by PR submitted by bjrnfrdnnd on GitHub, very nice! - Fixed handling of ParseSyntaxExceptions raised as part of Each diff --git a/pyparsing/core.py b/pyparsing/core.py index e309068..bdbd2d5 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -57,7 +57,7 @@ str_type = (str, bytes) # __version__ = "3.0.0a1" -__versionTime__ = "13 Oct 2019 05:49 UTC" +__versionTime__ = "27 Jan 2020 00:56 UTC" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" @@ -106,6 +106,7 @@ class __diag__(__config_flags): warn_ungrouped_named_tokens_in_collection = False warn_name_set_on_empty_Forward = False warn_on_multiple_string_args_to_oneof = False + warn_on_match_first_with_lshift_operator = False enable_debug_on_named_expressions = False _all_names = [__ for __ in locals() if not __.startswith("_")] @@ -142,13 +143,6 @@ def _trim_arity(func, maxargs=2): limit = 0 found_arity = False - # traceback return data structure changed in Py3.5 - normalize back to plain tuples - def extract_stack(limit=0): - # special handling for Python 3.5.0 - extra deep call stack by 1 - offset = -3 if system_version == (3, 5, 0) else -2 - frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset] - return [frame_summary[:2]] - def extract_tb(tb, limit=0): frames = traceback.extract_tb(tb, limit=limit) frame_summary = frames[-1] @@ -160,7 +154,7 @@ def _trim_arity(func, maxargs=2): LINE_DIFF = 7 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! - this_line = extract_stack(limit=2)[-1] + this_line = traceback.extract_stack(limit=2)[-1] pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF) def wrapper(*args): @@ -4226,6 +4220,7 @@ class Forward(ParseElementEnhance): def __init__(self, other=None): super().__init__(other, savelist=False) + self.lshift_line = None def __lshift__(self, other): if isinstance(other, str_type): @@ -4240,11 +4235,20 @@ class Forward(ParseElementEnhance): self.skipWhitespace = self.expr.skipWhitespace self.saveAsList = self.expr.saveAsList self.ignoreExprs.extend(self.expr.ignoreExprs) + self.lshift_line = traceback.extract_stack(limit=2)[-2] return self def __ilshift__(self, other): return self << other + def __or__(self, other): + caller_line = traceback.extract_stack(limit=2)[-2] + if (__diag__.warn_on_match_first_with_lshift_operator + and caller_line == self.lshift_line): + warnings.warn("using '<<' operator with '|' is probably error, use '<<='", SyntaxWarning, stacklevel=3) + ret = super().__or__(other) + return ret + def leaveWhitespace(self): self.skipWhitespace = False return self diff --git a/pyparsing/util.py b/pyparsing/util.py index 376b9ae..468fefc 100644 --- a/pyparsing/util.py +++ b/pyparsing/util.py @@ -145,7 +145,7 @@ def _collapseAndEscapeRegexRangeChars(s): is_consecutive.value = -1 def escape_re_range_char(c): - return "\\" + c if c in r"\^-]" else c + return "\\" + c if c in r"\^-][" else c ret = [] for _, chars in itertools.groupby(sorted(s), key=is_consecutive): diff --git a/tests/test_unit.py b/tests/test_unit.py index 65e0ddb..cdc1f59 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -2490,7 +2490,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): ) try: - # ~ print "lets try an invalid RE" + print("lets try an invalid RE") invRe = pp.Regex("(\"[^\"]*\")|('[^']*'") except Exception as e: print("successfully rejected an invalid RE:", end=" ") @@ -2498,7 +2498,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): else: self.assertTrue(False, "failed to reject invalid RE") - invRe = pp.Regex("") + with self.assertWarns(SyntaxWarning, msg="failed to warn empty string passed to Regex"): + invRe = pp.Regex("") def testRegexAsType(self): import pyparsing as pp @@ -6208,8 +6209,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): "failed to generate correct internal re", ) - esc_chars = r"\^-]" - esc_chars2 = r"*+.?[" + esc_chars = r"\^-][" + esc_chars2 = r"*+.?" for esc_char in esc_chars + esc_chars2: # test escape char as first character in range next_char = chr(ord(esc_char) + 1) @@ -6648,6 +6649,35 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): "multipled(3) failure with setResultsName", ) + def testWarnUsingLshiftForward(self): + import warnings + print("verify that using '<<' operator with a Forward raises a warning if there is a dangling '|' operator") + + fwd = pp.Forward() + print('unsafe << and |, but diag not enabled, should not warn') + fwd << pp.Word('a') | pp.Word('b') + + pp.__diag__.enable('warn_on_match_first_with_lshift_operator') + with self.assertWarns(SyntaxWarning, msg="failed to warn of using << and | operators"): + fwd = pp.Forward() + print('unsafe << and |, should warn') + fwd << pp.Word('a') | pp.Word('b') + + fwd = pp.Forward() + print('safe <<= and |, should not warn') + fwd <<= pp.Word('a') | pp.Word('b') + c = fwd | pp.Word('c') + + print('safe << and (|), should not warn') + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("error") + + fwd = pp.Forward() + fwd << (pp.Word('a') | pp.Word('b')) + try: + c = fwd | pp.Word('c') + except Exception as e: + self.fail("raised warning when it should not have") class PickleTest_Greeting: def __init__(self, toks): |