diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2019-07-08 20:54:30 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2019-07-08 20:54:30 -0500 |
commit | d5b06c46cb87c0491bf7a2c02ebdfe7aac0bbdd5 (patch) | |
tree | 61ef123efeb7f2d6ef6c4466f6e73f4f0eab5ec1 | |
parent | e30333f5eb4262069a8863e8fa276ecbcd364660 (diff) | |
download | pyparsing-git-d5b06c46cb87c0491bf7a2c02ebdfe7aac0bbdd5.tar.gz |
Add __diag__ namespace to enable diagnostic switches; add asKeyword optional arg for oneOf to emit Keyword expressions instead of Literals
-rw-r--r-- | CHANGES | 26 | ||||
-rw-r--r-- | pyparsing.py | 150 | ||||
-rw-r--r-- | unitTests.py | 47 |
3 files changed, 189 insertions, 34 deletions
@@ -99,12 +99,38 @@ Version 2.4.1 - July, 2019 pyparsing.ParseException: Expected end of text, found '1' (at char 8), (line:1, col:9) +- Added diagnostic switches to help detect and warn about common + parser construction mistakes, or enable additional parse + debugging. Switches are attached to the pyparsing.__diag__ + namespace object: + - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results + name is defined on a MatchFirst or Or expression with one or more And subexpressions + (default=True) + - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results + name is defined on a containing expression with ungrouped subexpressions that also + have results names (default=True) + - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined + with a results name, but has no contents defined (default=True) + - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is + incorrectly called with multiple str arguments (default=True) + - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent + calls to ParserElement.setName() (default=False) + + warn_multiple_tokens_in_named_alternation is intended to help + those who currently have set __compat__.collect_all_And_tokens to + False as a workaround for using the pre-2.3.0 code with ungrouped + results names nested in an outer expression also having a results + name. + - Added ParseResults.from_dict classmethod, to simplify creation of a ParseResults with results names. May be called with a dict argument, or with a series of named arguments (or both). This makes it easy to add a sub-level of named items to the parsed tokens in a parse action. +- Added asKeyword argument (default=False) to oneOf, to force + keyword-style matching on the generated expressions. + - While investigating issue #93, I found that Or and addCondition could interact to select an alternative that is not the longest match. This is because Or first checks diff --git a/pyparsing.py b/pyparsing.py index d41ceba..77617d2 100644 --- a/pyparsing.py +++ b/pyparsing.py @@ -157,11 +157,34 @@ __compat__.__doc__ = """ - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping of results names when an And expression is nested within an Or or MatchFirst; set to - True to enable bugfix to be released in pyparsing 2.4 + True to enable bugfix released in pyparsing 2.3.0, or False to preserve + pre-2.3.0 handling of named results """ __compat__.collect_all_And_tokens = True +__diag__ = SimpleNamespace() +__diag__.__doc__ = """ +Diagnostic configuration + - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results + name is defined on a MatchFirst or Or expression with one or more And subexpressions + (default=True) (only warns if __compat__.collect_all_And_tokens is False) + - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results + name is defined on a containing expression with ungrouped subexpressions that also + have results names (default=True) + - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined + with a results name, but has no contents defined (default=False) + - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is + incorrectly called with multiple str arguments (default=True) + - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent + calls to ParserElement.setName() (default=False) +""" +__diag__.warn_multiple_tokens_in_named_alternation = True +__diag__.warn_ungrouped_named_tokens_in_collection = True +__diag__.warn_name_set_on_empty_Forward = False +__diag__.warn_on_multiple_string_args_to_oneof = True +__diag__.enable_debug_on_named_expressions = False + #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) __all__ = [ '__version__', '__versionTime__', '__author__', '__compat__', @@ -1385,8 +1408,8 @@ class ParserElement(object): """ self.name = name self.errmsg = "Expected " + self.name - if hasattr(self,"exception"): - self.exception.msg = self.errmsg + if __diag__.enable_debug_on_named_expressions: + self.setDebug() return self def setResultsName( self, name, listAllMatches=False ): @@ -1410,6 +1433,9 @@ class ParserElement(object): # equivalent form: date_str = integer("year") + '/' + integer("month") + '/' + integer("day") """ + return self._setResultsName(name, listAllMatches) + + def _setResultsName(self, name, listAllMatches=False): newself = self.copy() if name.endswith("*"): name = name[:-1] @@ -2338,7 +2364,7 @@ class ParserElement(object): userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") """ if name is not None: - return self.setResultsName(name) + return self._setResultsName(name) else: return self.copy() @@ -3843,6 +3869,20 @@ class ParseExpression(ParserElement): ret.exprs = [e.copy() for e in self.exprs] return ret + def _setResultsName(self, name, listAllMatches=False): + if __diag__.warn_ungrouped_named_tokens_in_collection: + for e in self.exprs: + if isinstance(e, ParserElement) and e.resultsName: + warnings.warn("{0}: setting results name {1!r} on {2} expression " + "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection", + name, + type(self).__name__, + e.resultsName), + stacklevel=3) + + return super(ParseExpression, self)._setResultsName(name, listAllMatches) + + class And(ParseExpression): """ Requires all given :class:`ParseExpression` s to be found in the given order. @@ -4052,6 +4092,17 @@ class Or(ParseExpression): for e in self.exprs: e.checkRecursion( subRecCheckList ) + def _setResultsName(self, name, listAllMatches=False): + if (not __compat__.collect_all_And_tokens + and __diag__.warn_multiple_tokens_in_named_alternation): + if any(isinstance(e, And) for e in self.exprs): + warnings.warn("{0}: setting results name {1!r} on {2} expression " + "may only return a single token for an And alternative".format( + "warn_multiple_tokens_in_named_alternation", name, type(self).__name__), + stacklevel=3) + + return super(Or, self)._setResultsName(name, listAllMatches) + class MatchFirst(ParseExpression): """Requires that at least one :class:`ParseExpression` is found. If @@ -4126,6 +4177,17 @@ class MatchFirst(ParseExpression): for e in self.exprs: e.checkRecursion( subRecCheckList ) + def _setResultsName(self, name, listAllMatches=False): + if (not __compat__.collect_all_And_tokens + and __diag__.warn_multiple_tokens_in_named_alternation): + if any(isinstance(e, And) for e in self.exprs): + warnings.warn("{0}: setting results name {1!r} on {2} expression " + "may only return a single token for an And alternative".format( + "warn_multiple_tokens_in_named_alternation", name, type(self).__name__), + stacklevel=3) + + return super(MatchFirst, self)._setResultsName(name, listAllMatches) + class Each(ParseExpression): """Requires all given :class:`ParseExpression` s to be found, but in @@ -4532,6 +4594,20 @@ class _MultipleMatch(ParseElementEnhance): return loc, tokens + def _setResultsName(self, name, listAllMatches=False): + if __diag__.warn_ungrouped_named_tokens_in_collection: + for e in [self.expr] + getattr(self.expr, 'exprs', []): + if isinstance(e, ParserElement) and e.resultsName: + warnings.warn("{0}: setting results name {1!r} on {2} expression " + "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection", + name, + type(self).__name__, + e.resultsName), + stacklevel=3) + + return super(_MultipleMatch, self)._setResultsName(name, listAllMatches) + + class OneOrMore(_MultipleMatch): """Repetition of one or more of the given expression. @@ -4881,6 +4957,17 @@ class Forward(ParseElementEnhance): ret <<= self return ret + def _setResultsName(self, name, listAllMatches=False): + if __diag__.warn_name_set_on_empty_Forward: + if self.expr is None: + warnings.warn("{0}: setting results name {0!r} on {1} expression " + "that has no contained expression".format("warn_name_set_on_empty_Forward", + name, + type(self).__name__), + stacklevel=3) + + return super(Forward, self)._setResultsName(name, listAllMatches) + class TokenConverter(ParseElementEnhance): """ Abstract subclass of :class:`ParseExpression`, for converting parsed results. @@ -5249,7 +5336,7 @@ def _escapeRegexRangeChars(s): s = s.replace("\t",r"\t") return _ustr(s) -def oneOf( strs, caseless=False, useRegex=True ): +def oneOf(strs, caseless=False, useRegex=True, asKeyword=False): """Helper to quickly define a set of alternative Literals, and makes sure to do longest-first testing when there is a conflict, regardless of the input order, but returns @@ -5263,8 +5350,10 @@ def oneOf( strs, caseless=False, useRegex=True ): caseless - useRegex - (default= ``True``) - as an optimization, will generate a Regex object; otherwise, will generate - a :class:`MatchFirst` object (if ``caseless=True``, or if + a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if creating a :class:`Regex` raises an exception) + - asKeyword - (default=``False``) - enforce Keyword-style matching on the + generated expressions Example:: @@ -5279,14 +5368,18 @@ def oneOf( strs, caseless=False, useRegex=True ): [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] """ + if isinstance(caseless, basestring): + warnings.warn("More than one string argument passed to oneOf, pass " + "choices as a list or space-delimited string", stacklevel=2) + if caseless: - isequal = ( lambda a,b: a.upper() == b.upper() ) - masks = ( lambda a,b: b.upper().startswith(a.upper()) ) - parseElementClass = CaselessLiteral + isequal = (lambda a, b: a.upper() == b.upper()) + masks = (lambda a, b: b.upper().startswith(a.upper())) + parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral else: - isequal = ( lambda a,b: a == b ) - masks = ( lambda a,b: b.startswith(a) ) - parseElementClass = Literal + isequal = (lambda a, b: a == b) + masks = (lambda a, b: b.startswith(a)) + parseElementClass = Keyword if asKeyword else Literal symbols = [] if isinstance(strs,basestring): @@ -5299,22 +5392,24 @@ def oneOf( strs, caseless=False, useRegex=True ): if not symbols: return NoMatch() - i = 0 - while i < len(symbols)-1: - cur = symbols[i] - for j,other in enumerate(symbols[i+1:]): - if ( isequal(other, cur) ): - del symbols[i+j+1] - break - elif ( masks(cur, other) ): - del symbols[i+j+1] - symbols.insert(i,other) - cur = other - break - else: - i += 1 + if not asKeyword: + # if not producing keywords, need to reorder to take care to avoid masking + # longer choices with shorter ones + i = 0 + while i < len(symbols)-1: + cur = symbols[i] + for j, other in enumerate(symbols[i+1:]): + if (isequal(other, cur)): + del symbols[i+j+1] + break + elif (masks(cur, other)): + del symbols[i+j+1] + symbols.insert(i,other) + break + else: + i += 1 - if not caseless and useRegex: + if not (caseless or asKeyword) and useRegex: #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) try: if len(symbols)==len("".join(symbols)): @@ -5325,7 +5420,6 @@ def oneOf( strs, caseless=False, useRegex=True ): warnings.warn("Exception creating Regex for oneOf, building MatchFirst", SyntaxWarning, stacklevel=2) - # last resort, just use MatchFirst return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) diff --git a/unitTests.py b/unitTests.py index 586c8f0..53fc554 100644 --- a/unitTests.py +++ b/unitTests.py @@ -30,9 +30,9 @@ if PY_3: else: def _print(*args, **kwargs): if 'end' in kwargs: - sys.stdout.write(' '.join(map(str,args)) + kwargs['end']) + sys.stdout.write(' '.join(map(str,args)) + kwargs['end'], flush=True) else: - sys.stdout.write(' '.join(map(str,args)) + '\n') + sys.stdout.write(' '.join(map(str,args)) + '\n', flush=True) print_ = _print from cStringIO import StringIO @@ -4347,10 +4347,15 @@ class ParseResultsWithNameMatchFirst(ParseTestCase): # test compatibility mode, restoring pre-2.3.1 behavior with AutoReset(pp.__compat__, "collect_all_And_tokens"): pp.__compat__.collect_all_And_tokens = False + pp.__diag__.warn_multiple_tokens_in_named_alternation = True expr_a = pp.Literal('not') + pp.Literal('the') + pp.Literal('bird') expr_b = pp.Literal('the') + pp.Literal('bird') - expr = (expr_a | expr_b)('rexp') - expr.runTests("""\ + if PY_3: + with self.assertWarns(UserWarning, msg="failed to warn of And within alternation"): + expr = (expr_a | expr_b)('rexp') + else: + expr = (expr_a | expr_b)('rexp') + expr.runTests(""" not the bird the bird """) @@ -4489,6 +4494,35 @@ class CaselessKeywordVsKeywordCaselessTest(ParseTestCase): self.assertEqual(flist, clist, "CaselessKeyword not working the same as Keyword(caseless=True)") +class OneOfKeywordsTest(ParseTestCase): + def runTest(self): + import pyparsing as pp + + literal_expr = pp.oneOf("a b c") + success, _ = literal_expr[...].runTests(""" + # literal oneOf tests + a b c + a a a + abc + """) + self.assertTrue(success, "failed literal oneOf matching") + + keyword_expr = pp.oneOf("a b c", asKeyword=True) + success, _ = keyword_expr[...].runTests(""" + # keyword oneOf tests + a b c + a a a + """) + self.assertTrue(success, "failed keyword oneOf matching") + + success, _ = keyword_expr[...].runTests(""" + # keyword oneOf failure tests + abc + """, failureTests=True) + self.assertTrue(success, "failed keyword oneOf failure tests") + + + class MiscellaneousParserTests(ParseTestCase): def runTest(self): @@ -4735,16 +4769,17 @@ suite = makeTestSuite() if __name__ == '__main__': - testRunner = TextTestRunner() - # run specific tests by including them in this list, otherwise # all tests will be run testclasses = [ ] if not testclasses: + testRunner = TextTestRunner() result = testRunner.run(suite) else: + # disable chaser '.' display + testRunner = TextTestRunner(verbosity=0) BUFFER_OUTPUT = False result = testRunner.run(makeTestSuiteTemp(testclasses)) |