summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-07-08 20:54:30 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-07-08 20:54:30 -0500
commitd5b06c46cb87c0491bf7a2c02ebdfe7aac0bbdd5 (patch)
tree61ef123efeb7f2d6ef6c4466f6e73f4f0eab5ec1
parente30333f5eb4262069a8863e8fa276ecbcd364660 (diff)
downloadpyparsing-git-d5b06c46cb87c0491bf7a2c02ebdfe7aac0bbdd5.tar.gz
Add __diag__ namespace to enable diagnostic switches; add asKeyword optional arg for oneOf to emit Keyword expressions instead of Literals
-rw-r--r--CHANGES26
-rw-r--r--pyparsing.py150
-rw-r--r--unitTests.py47
3 files changed, 189 insertions, 34 deletions
diff --git a/CHANGES b/CHANGES
index 1d54567..e1e98e3 100644
--- a/CHANGES
+++ b/CHANGES
@@ -99,12 +99,38 @@ Version 2.4.1 - July, 2019
pyparsing.ParseException: Expected end of text, found '1' (at char 8), (line:1, col:9)
+- Added diagnostic switches to help detect and warn about common
+ parser construction mistakes, or enable additional parse
+ debugging. Switches are attached to the pyparsing.__diag__
+ namespace object:
+ - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
+ name is defined on a MatchFirst or Or expression with one or more And subexpressions
+ (default=True)
+ - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
+ name is defined on a containing expression with ungrouped subexpressions that also
+ have results names (default=True)
+ - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
+ with a results name, but has no contents defined (default=True)
+ - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
+ incorrectly called with multiple str arguments (default=True)
+ - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
+ calls to ParserElement.setName() (default=False)
+
+ warn_multiple_tokens_in_named_alternation is intended to help
+ those who currently have set __compat__.collect_all_And_tokens to
+ False as a workaround for using the pre-2.3.0 code with ungrouped
+ results names nested in an outer expression also having a results
+ name.
+
- Added ParseResults.from_dict classmethod, to simplify creation
of a ParseResults with results names. May be called with a dict
argument, or with a series of named arguments (or both). This
makes it easy to add a sub-level of named items to the parsed
tokens in a parse action.
+- Added asKeyword argument (default=False) to oneOf, to force
+ keyword-style matching on the generated expressions.
+
- While investigating issue #93, I found that Or and
addCondition could interact to select an alternative that
is not the longest match. This is because Or first checks
diff --git a/pyparsing.py b/pyparsing.py
index d41ceba..77617d2 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -157,11 +157,34 @@ __compat__.__doc__ = """
- collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
of results names when an And expression is nested within an Or or MatchFirst; set to
- True to enable bugfix to be released in pyparsing 2.4
+ True to enable bugfix released in pyparsing 2.3.0, or False to preserve
+ pre-2.3.0 handling of named results
"""
__compat__.collect_all_And_tokens = True
+__diag__ = SimpleNamespace()
+__diag__.__doc__ = """
+Diagnostic configuration
+ - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
+ name is defined on a MatchFirst or Or expression with one or more And subexpressions
+ (default=True) (only warns if __compat__.collect_all_And_tokens is False)
+ - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
+ name is defined on a containing expression with ungrouped subexpressions that also
+ have results names (default=True)
+ - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
+ with a results name, but has no contents defined (default=False)
+ - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
+ incorrectly called with multiple str arguments (default=True)
+ - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
+ calls to ParserElement.setName() (default=False)
+"""
+__diag__.warn_multiple_tokens_in_named_alternation = True
+__diag__.warn_ungrouped_named_tokens_in_collection = True
+__diag__.warn_name_set_on_empty_Forward = False
+__diag__.warn_on_multiple_string_args_to_oneof = True
+__diag__.enable_debug_on_named_expressions = False
+
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
__all__ = [ '__version__', '__versionTime__', '__author__', '__compat__',
@@ -1385,8 +1408,8 @@ class ParserElement(object):
"""
self.name = name
self.errmsg = "Expected " + self.name
- if hasattr(self,"exception"):
- self.exception.msg = self.errmsg
+ if __diag__.enable_debug_on_named_expressions:
+ self.setDebug()
return self
def setResultsName( self, name, listAllMatches=False ):
@@ -1410,6 +1433,9 @@ class ParserElement(object):
# equivalent form:
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
"""
+ return self._setResultsName(name, listAllMatches)
+
+ def _setResultsName(self, name, listAllMatches=False):
newself = self.copy()
if name.endswith("*"):
name = name[:-1]
@@ -2338,7 +2364,7 @@ class ParserElement(object):
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
"""
if name is not None:
- return self.setResultsName(name)
+ return self._setResultsName(name)
else:
return self.copy()
@@ -3843,6 +3869,20 @@ class ParseExpression(ParserElement):
ret.exprs = [e.copy() for e in self.exprs]
return ret
+ def _setResultsName(self, name, listAllMatches=False):
+ if __diag__.warn_ungrouped_named_tokens_in_collection:
+ for e in self.exprs:
+ if isinstance(e, ParserElement) and e.resultsName:
+ warnings.warn("{0}: setting results name {1!r} on {2} expression "
+ "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
+ name,
+ type(self).__name__,
+ e.resultsName),
+ stacklevel=3)
+
+ return super(ParseExpression, self)._setResultsName(name, listAllMatches)
+
+
class And(ParseExpression):
"""
Requires all given :class:`ParseExpression` s to be found in the given order.
@@ -4052,6 +4092,17 @@ class Or(ParseExpression):
for e in self.exprs:
e.checkRecursion( subRecCheckList )
+ def _setResultsName(self, name, listAllMatches=False):
+ if (not __compat__.collect_all_And_tokens
+ and __diag__.warn_multiple_tokens_in_named_alternation):
+ if any(isinstance(e, And) for e in self.exprs):
+ warnings.warn("{0}: setting results name {1!r} on {2} expression "
+ "may only return a single token for an And alternative".format(
+ "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
+ stacklevel=3)
+
+ return super(Or, self)._setResultsName(name, listAllMatches)
+
class MatchFirst(ParseExpression):
"""Requires that at least one :class:`ParseExpression` is found. If
@@ -4126,6 +4177,17 @@ class MatchFirst(ParseExpression):
for e in self.exprs:
e.checkRecursion( subRecCheckList )
+ def _setResultsName(self, name, listAllMatches=False):
+ if (not __compat__.collect_all_And_tokens
+ and __diag__.warn_multiple_tokens_in_named_alternation):
+ if any(isinstance(e, And) for e in self.exprs):
+ warnings.warn("{0}: setting results name {1!r} on {2} expression "
+ "may only return a single token for an And alternative".format(
+ "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
+ stacklevel=3)
+
+ return super(MatchFirst, self)._setResultsName(name, listAllMatches)
+
class Each(ParseExpression):
"""Requires all given :class:`ParseExpression` s to be found, but in
@@ -4532,6 +4594,20 @@ class _MultipleMatch(ParseElementEnhance):
return loc, tokens
+ def _setResultsName(self, name, listAllMatches=False):
+ if __diag__.warn_ungrouped_named_tokens_in_collection:
+ for e in [self.expr] + getattr(self.expr, 'exprs', []):
+ if isinstance(e, ParserElement) and e.resultsName:
+ warnings.warn("{0}: setting results name {1!r} on {2} expression "
+ "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
+ name,
+ type(self).__name__,
+ e.resultsName),
+ stacklevel=3)
+
+ return super(_MultipleMatch, self)._setResultsName(name, listAllMatches)
+
+
class OneOrMore(_MultipleMatch):
"""Repetition of one or more of the given expression.
@@ -4881,6 +4957,17 @@ class Forward(ParseElementEnhance):
ret <<= self
return ret
+ def _setResultsName(self, name, listAllMatches=False):
+ if __diag__.warn_name_set_on_empty_Forward:
+ if self.expr is None:
+ warnings.warn("{0}: setting results name {0!r} on {1} expression "
+ "that has no contained expression".format("warn_name_set_on_empty_Forward",
+ name,
+ type(self).__name__),
+ stacklevel=3)
+
+ return super(Forward, self)._setResultsName(name, listAllMatches)
+
class TokenConverter(ParseElementEnhance):
"""
Abstract subclass of :class:`ParseExpression`, for converting parsed results.
@@ -5249,7 +5336,7 @@ def _escapeRegexRangeChars(s):
s = s.replace("\t",r"\t")
return _ustr(s)
-def oneOf( strs, caseless=False, useRegex=True ):
+def oneOf(strs, caseless=False, useRegex=True, asKeyword=False):
"""Helper to quickly define a set of alternative Literals, and makes
sure to do longest-first testing when there is a conflict,
regardless of the input order, but returns
@@ -5263,8 +5350,10 @@ def oneOf( strs, caseless=False, useRegex=True ):
caseless
- useRegex - (default= ``True``) - as an optimization, will
generate a Regex object; otherwise, will generate
- a :class:`MatchFirst` object (if ``caseless=True``, or if
+ a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if
creating a :class:`Regex` raises an exception)
+ - asKeyword - (default=``False``) - enforce Keyword-style matching on the
+ generated expressions
Example::
@@ -5279,14 +5368,18 @@ def oneOf( strs, caseless=False, useRegex=True ):
[['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
"""
+ if isinstance(caseless, basestring):
+ warnings.warn("More than one string argument passed to oneOf, pass "
+ "choices as a list or space-delimited string", stacklevel=2)
+
if caseless:
- isequal = ( lambda a,b: a.upper() == b.upper() )
- masks = ( lambda a,b: b.upper().startswith(a.upper()) )
- parseElementClass = CaselessLiteral
+ isequal = (lambda a, b: a.upper() == b.upper())
+ masks = (lambda a, b: b.upper().startswith(a.upper()))
+ parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
else:
- isequal = ( lambda a,b: a == b )
- masks = ( lambda a,b: b.startswith(a) )
- parseElementClass = Literal
+ isequal = (lambda a, b: a == b)
+ masks = (lambda a, b: b.startswith(a))
+ parseElementClass = Keyword if asKeyword else Literal
symbols = []
if isinstance(strs,basestring):
@@ -5299,22 +5392,24 @@ def oneOf( strs, caseless=False, useRegex=True ):
if not symbols:
return NoMatch()
- i = 0
- while i < len(symbols)-1:
- cur = symbols[i]
- for j,other in enumerate(symbols[i+1:]):
- if ( isequal(other, cur) ):
- del symbols[i+j+1]
- break
- elif ( masks(cur, other) ):
- del symbols[i+j+1]
- symbols.insert(i,other)
- cur = other
- break
- else:
- i += 1
+ if not asKeyword:
+ # if not producing keywords, need to reorder to take care to avoid masking
+ # longer choices with shorter ones
+ i = 0
+ while i < len(symbols)-1:
+ cur = symbols[i]
+ for j, other in enumerate(symbols[i+1:]):
+ if (isequal(other, cur)):
+ del symbols[i+j+1]
+ break
+ elif (masks(cur, other)):
+ del symbols[i+j+1]
+ symbols.insert(i,other)
+ break
+ else:
+ i += 1
- if not caseless and useRegex:
+ if not (caseless or asKeyword) and useRegex:
#~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
try:
if len(symbols)==len("".join(symbols)):
@@ -5325,7 +5420,6 @@ def oneOf( strs, caseless=False, useRegex=True ):
warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
SyntaxWarning, stacklevel=2)
-
# last resort, just use MatchFirst
return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
diff --git a/unitTests.py b/unitTests.py
index 586c8f0..53fc554 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -30,9 +30,9 @@ if PY_3:
else:
def _print(*args, **kwargs):
if 'end' in kwargs:
- sys.stdout.write(' '.join(map(str,args)) + kwargs['end'])
+ sys.stdout.write(' '.join(map(str,args)) + kwargs['end'], flush=True)
else:
- sys.stdout.write(' '.join(map(str,args)) + '\n')
+ sys.stdout.write(' '.join(map(str,args)) + '\n', flush=True)
print_ = _print
from cStringIO import StringIO
@@ -4347,10 +4347,15 @@ class ParseResultsWithNameMatchFirst(ParseTestCase):
# test compatibility mode, restoring pre-2.3.1 behavior
with AutoReset(pp.__compat__, "collect_all_And_tokens"):
pp.__compat__.collect_all_And_tokens = False
+ pp.__diag__.warn_multiple_tokens_in_named_alternation = True
expr_a = pp.Literal('not') + pp.Literal('the') + pp.Literal('bird')
expr_b = pp.Literal('the') + pp.Literal('bird')
- expr = (expr_a | expr_b)('rexp')
- expr.runTests("""\
+ if PY_3:
+ with self.assertWarns(UserWarning, msg="failed to warn of And within alternation"):
+ expr = (expr_a | expr_b)('rexp')
+ else:
+ expr = (expr_a | expr_b)('rexp')
+ expr.runTests("""
not the bird
the bird
""")
@@ -4489,6 +4494,35 @@ class CaselessKeywordVsKeywordCaselessTest(ParseTestCase):
self.assertEqual(flist, clist, "CaselessKeyword not working the same as Keyword(caseless=True)")
+class OneOfKeywordsTest(ParseTestCase):
+ def runTest(self):
+ import pyparsing as pp
+
+ literal_expr = pp.oneOf("a b c")
+ success, _ = literal_expr[...].runTests("""
+ # literal oneOf tests
+ a b c
+ a a a
+ abc
+ """)
+ self.assertTrue(success, "failed literal oneOf matching")
+
+ keyword_expr = pp.oneOf("a b c", asKeyword=True)
+ success, _ = keyword_expr[...].runTests("""
+ # keyword oneOf tests
+ a b c
+ a a a
+ """)
+ self.assertTrue(success, "failed keyword oneOf matching")
+
+ success, _ = keyword_expr[...].runTests("""
+ # keyword oneOf failure tests
+ abc
+ """, failureTests=True)
+ self.assertTrue(success, "failed keyword oneOf failure tests")
+
+
+
class MiscellaneousParserTests(ParseTestCase):
def runTest(self):
@@ -4735,16 +4769,17 @@ suite = makeTestSuite()
if __name__ == '__main__':
- testRunner = TextTestRunner()
-
# run specific tests by including them in this list, otherwise
# all tests will be run
testclasses = [
]
if not testclasses:
+ testRunner = TextTestRunner()
result = testRunner.run(suite)
else:
+ # disable chaser '.' display
+ testRunner = TextTestRunner(verbosity=0)
BUFFER_OUTPUT = False
result = testRunner.run(makeTestSuiteTemp(testclasses))