Add __diag__ namespace to enable diagnostic switches; add asKeyword optional arg for oneOf to emit Keyword expressions instead of Literals

author: Paul McGuire <ptmcg@austin.rr.com> 2019-07-08 20:54:30 -0500
committer: Paul McGuire <ptmcg@austin.rr.com> 2019-07-08 20:54:30 -0500
commit: d5b06c46cb87c0491bf7a2c02ebdfe7aac0bbdd5 (patch)
tree: 61ef123efeb7f2d6ef6c4466f6e73f4f0eab5ec1
parent: e30333f5eb4262069a8863e8fa276ecbcd364660 (diff)
download: pyparsing-git-d5b06c46cb87c0491bf7a2c02ebdfe7aac0bbdd5.tar.gz
3 files changed, 189 insertions, 34 deletions
diff --git a/CHANGES b/CHANGES
index 1d54567..e1e98e3 100644
--- a/CHANGES
+++ b/CHANGES
@@ -99,12 +99,38 @@ Version 2.4.1 - July, 2019
 
     pyparsing.ParseException: Expected end of text, found '1' (at char 8), (line:1, col:9)
 
+- Added diagnostic switches to help detect and warn about common
+  parser construction mistakes, or enable additional parse
+  debugging. Switches are attached to the pyparsing.__diag__
+  namespace object:
+     - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
+       name is defined on a MatchFirst or Or expression with one or more And subexpressions
+       (default=True)
+     - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
+       name is defined on a containing expression with ungrouped subexpressions that also
+       have results names (default=True)
+     - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
+       with a results name, but has no contents defined (default=True)
+     - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
+       incorrectly called with multiple str arguments (default=True)
+     - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
+       calls to ParserElement.setName() (default=False)
+
+  warn_multiple_tokens_in_named_alternation is intended to help
+  those who currently have set __compat__.collect_all_And_tokens to
+  False as a workaround for using the pre-2.3.0 code with ungrouped
+  results names nested in an outer expression also having a results
+  name.
+
 - Added ParseResults.from_dict classmethod, to simplify creation
   of a ParseResults with results names. May be called with a dict
   argument, or with a series of named arguments (or both). This
   makes it easy to add a sub-level of named items to the parsed
   tokens in a parse action.
 
+- Added asKeyword argument (default=False) to oneOf, to force
+  keyword-style matching on the generated expressions.
+
 - While investigating issue #93, I found that Or and
   addCondition could interact to select an alternative that
   is not the longest match. This is because Or first checks
diff --git a/pyparsing.py b/pyparsing.py
index d41ceba..77617d2 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -157,11 +157,34 @@ __compat__.__doc__ = """
     
      - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
        of results names when an And expression is nested within an Or or MatchFirst; set to 
-       True to enable bugfix to be released in pyparsing 2.4
+       True to enable bugfix released in pyparsing 2.3.0, or False to preserve
+       pre-2.3.0 handling of named results
 """
 __compat__.collect_all_And_tokens = True
 
 
+__diag__ = SimpleNamespace()
+__diag__.__doc__ = """
+Diagnostic configuration
+     - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
+       name is defined on a MatchFirst or Or expression with one or more And subexpressions
+       (default=True) (only warns if __compat__.collect_all_And_tokens is False)
+     - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
+       name is defined on a containing expression with ungrouped subexpressions that also 
+       have results names (default=True)
+     - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
+       with a results name, but has no contents defined (default=False)
+     - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
+       incorrectly called with multiple str arguments (default=True)
+     - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent 
+       calls to ParserElement.setName() (default=False)
+"""
+__diag__.warn_multiple_tokens_in_named_alternation = True
+__diag__.warn_ungrouped_named_tokens_in_collection = True
+__diag__.warn_name_set_on_empty_Forward = False
+__diag__.warn_on_multiple_string_args_to_oneof = True
+__diag__.enable_debug_on_named_expressions = False
+
 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
 
 __all__ = [ '__version__', '__versionTime__', '__author__', '__compat__',
@@ -1385,8 +1408,8 @@ class ParserElement(object):
         """
         self.name = name
         self.errmsg = "Expected " + self.name
-        if hasattr(self,"exception"):
-            self.exception.msg = self.errmsg
+        if __diag__.enable_debug_on_named_expressions:
+            self.setDebug()
         return self
 
     def setResultsName( self, name, listAllMatches=False ):
@@ -1410,6 +1433,9 @@ class ParserElement(object):
             # equivalent form:
             date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
         """
+        return self._setResultsName(name, listAllMatches)
+
+    def _setResultsName(self, name, listAllMatches=False):
         newself = self.copy()
         if name.endswith("*"):
             name = name[:-1]
@@ -2338,7 +2364,7 @@ class ParserElement(object):
             userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
         """
         if name is not None:
-            return self.setResultsName(name)
+            return self._setResultsName(name)
         else:
             return self.copy()
 
@@ -3843,6 +3869,20 @@ class ParseExpression(ParserElement):
         ret.exprs = [e.copy() for e in self.exprs]
         return ret
 
+    def _setResultsName(self, name, listAllMatches=False):
+        if __diag__.warn_ungrouped_named_tokens_in_collection:
+            for e in self.exprs:
+                if isinstance(e, ParserElement) and e.resultsName:
+                    warnings.warn("{0}: setting results name {1!r} on {2} expression "
+                                  "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
+                                                                                       name,
+                                                                                       type(self).__name__,
+                                                                                       e.resultsName),
+                                  stacklevel=3)
+
+        return super(ParseExpression, self)._setResultsName(name, listAllMatches)
+
+
 class And(ParseExpression):
     """
     Requires all given :class:`ParseExpression` s to be found in the given order.
@@ -4052,6 +4092,17 @@ class Or(ParseExpression):
         for e in self.exprs:
             e.checkRecursion( subRecCheckList )
 
+    def _setResultsName(self, name, listAllMatches=False):
+        if (not __compat__.collect_all_And_tokens
+                and __diag__.warn_multiple_tokens_in_named_alternation):
+            if any(isinstance(e, And) for e in self.exprs):
+                warnings.warn("{0}: setting results name {1!r} on {2} expression "
+                              "may only return a single token for an And alternative".format(
+                    "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
+                    stacklevel=3)
+
+        return super(Or, self)._setResultsName(name, listAllMatches)
+
 
 class MatchFirst(ParseExpression):
     """Requires that at least one :class:`ParseExpression` is found. If
@@ -4126,6 +4177,17 @@ class MatchFirst(ParseExpression):
         for e in self.exprs:
             e.checkRecursion( subRecCheckList )
 
+    def _setResultsName(self, name, listAllMatches=False):
+        if (not __compat__.collect_all_And_tokens
+                and __diag__.warn_multiple_tokens_in_named_alternation):
+            if any(isinstance(e, And) for e in self.exprs):
+                warnings.warn("{0}: setting results name {1!r} on {2} expression "
+                              "may only return a single token for an And alternative".format(
+                    "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
+                    stacklevel=3)
+
+        return super(MatchFirst, self)._setResultsName(name, listAllMatches)
+
 
 class Each(ParseExpression):
     """Requires all given :class:`ParseExpression` s to be found, but in
@@ -4532,6 +4594,20 @@ class _MultipleMatch(ParseElementEnhance):
 
         return loc, tokens
 
+    def _setResultsName(self, name, listAllMatches=False):
+        if __diag__.warn_ungrouped_named_tokens_in_collection:
+            for e in [self.expr] + getattr(self.expr, 'exprs', []):
+                if isinstance(e, ParserElement) and e.resultsName:
+                    warnings.warn("{0}: setting results name {1!r} on {2} expression "
+                                  "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
+                                                                                       name,
+                                                                                       type(self).__name__,
+                                                                                       e.resultsName),
+                                  stacklevel=3)
+
+        return super(_MultipleMatch, self)._setResultsName(name, listAllMatches)
+
+
 class OneOrMore(_MultipleMatch):
     """Repetition of one or more of the given expression.
 
@@ -4881,6 +4957,17 @@ class Forward(ParseElementEnhance):
             ret <<= self
             return ret
 
+    def _setResultsName(self, name, listAllMatches=False):
+        if __diag__.warn_name_set_on_empty_Forward:
+            if self.expr is None:
+                warnings.warn("{0}: setting results name {0!r} on {1} expression "
+                              "that has no contained expression".format("warn_name_set_on_empty_Forward",
+                                                                        name,
+                                                                        type(self).__name__),
+                              stacklevel=3)
+
+        return super(Forward, self)._setResultsName(name, listAllMatches)
+
 class TokenConverter(ParseElementEnhance):
     """
     Abstract subclass of :class:`ParseExpression`, for converting parsed results.
@@ -5249,7 +5336,7 @@ def _escapeRegexRangeChars(s):
     s = s.replace("\t",r"\t")
     return _ustr(s)
 
-def oneOf( strs, caseless=False, useRegex=True ):
+def oneOf(strs, caseless=False, useRegex=True, asKeyword=False):
     """Helper to quickly define a set of alternative Literals, and makes
     sure to do longest-first testing when there is a conflict,
     regardless of the input order, but returns
@@ -5263,8 +5350,10 @@ def oneOf( strs, caseless=False, useRegex=True ):
        caseless
      - useRegex - (default= ``True``) - as an optimization, will
        generate a Regex object; otherwise, will generate
-       a :class:`MatchFirst` object (if ``caseless=True``, or if
+       a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if
        creating a :class:`Regex` raises an exception)
+     - asKeyword - (default=``False``) - enforce Keyword-style matching on the
+       generated expressions
 
     Example::
 
@@ -5279,14 +5368,18 @@ def oneOf( strs, caseless=False, useRegex=True ):
 
         [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
     """
+    if isinstance(caseless, basestring):
+        warnings.warn("More than one string argument passed to oneOf, pass "
+                      "choices as a list or space-delimited string", stacklevel=2)
+
     if caseless:
-        isequal = ( lambda a,b: a.upper() == b.upper() )
-        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
-        parseElementClass = CaselessLiteral
+        isequal = (lambda a, b: a.upper() == b.upper())
+        masks = (lambda a, b: b.upper().startswith(a.upper()))
+        parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
     else:
-        isequal = ( lambda a,b: a == b )
-        masks = ( lambda a,b: b.startswith(a) )
-        parseElementClass = Literal
+        isequal = (lambda a, b: a == b)
+        masks = (lambda a, b: b.startswith(a))
+        parseElementClass = Keyword if asKeyword else Literal
 
     symbols = []
     if isinstance(strs,basestring):
@@ -5299,22 +5392,24 @@ def oneOf( strs, caseless=False, useRegex=True ):
     if not symbols:
         return NoMatch()
 
-    i = 0
-    while i < len(symbols)-1:
-        cur = symbols[i]
-        for j,other in enumerate(symbols[i+1:]):
-            if ( isequal(other, cur) ):
-                del symbols[i+j+1]
-                break
-            elif ( masks(cur, other) ):
-                del symbols[i+j+1]
-                symbols.insert(i,other)
-                cur = other
-                break
-        else:
-            i += 1
+    if not asKeyword:
+        # if not producing keywords, need to reorder to take care to avoid masking
+        # longer choices with shorter ones
+        i = 0
+        while i < len(symbols)-1:
+            cur = symbols[i]
+            for j, other in enumerate(symbols[i+1:]):
+                if (isequal(other, cur)):
+                    del symbols[i+j+1]
+                    break
+                elif (masks(cur, other)):
+                    del symbols[i+j+1]
+                    symbols.insert(i,other)
+                    break
+            else:
+                i += 1
 
-    if not caseless and useRegex:
+    if not (caseless or asKeyword) and useRegex:
         #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
         try:
             if len(symbols)==len("".join(symbols)):
@@ -5325,7 +5420,6 @@ def oneOf( strs, caseless=False, useRegex=True ):
             warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
                     SyntaxWarning, stacklevel=2)
 
-
     # last resort, just use MatchFirst
     return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
 
diff --git a/unitTests.py b/unitTests.py
index 586c8f0..53fc554 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -30,9 +30,9 @@ if PY_3:
 else:
     def _print(*args, **kwargs):
         if 'end' in kwargs:
-            sys.stdout.write(' '.join(map(str,args)) + kwargs['end'])
+            sys.stdout.write(' '.join(map(str,args)) + kwargs['end'], flush=True)
         else:
-            sys.stdout.write(' '.join(map(str,args)) + '\n')
+            sys.stdout.write(' '.join(map(str,args)) + '\n', flush=True)
     print_ = _print
     from cStringIO import StringIO
 
@@ -4347,10 +4347,15 @@ class ParseResultsWithNameMatchFirst(ParseTestCase):
         # test compatibility mode, restoring pre-2.3.1 behavior
         with AutoReset(pp.__compat__, "collect_all_And_tokens"):
             pp.__compat__.collect_all_And_tokens = False
+            pp.__diag__.warn_multiple_tokens_in_named_alternation = True
             expr_a = pp.Literal('not') + pp.Literal('the') + pp.Literal('bird')
             expr_b = pp.Literal('the') + pp.Literal('bird')
-            expr = (expr_a | expr_b)('rexp')
-            expr.runTests("""\
+            if PY_3:
+                with self.assertWarns(UserWarning, msg="failed to warn of And within alternation"):
+                    expr = (expr_a | expr_b)('rexp')
+            else:
+                expr = (expr_a | expr_b)('rexp')
+            expr.runTests("""
                 not the bird
                 the bird
             """)
@@ -4489,6 +4494,35 @@ class CaselessKeywordVsKeywordCaselessTest(ParseTestCase):
         self.assertEqual(flist, clist, "CaselessKeyword not working the same as Keyword(caseless=True)")
 
 
+class OneOfKeywordsTest(ParseTestCase):
+    def runTest(self):
+        import pyparsing as pp
+
+        literal_expr = pp.oneOf("a b c")
+        success, _ = literal_expr[...].runTests("""
+            # literal oneOf tests
+            a b c
+            a a a
+            abc
+        """)
+        self.assertTrue(success, "failed literal oneOf matching")
+
+        keyword_expr = pp.oneOf("a b c", asKeyword=True)
+        success, _ = keyword_expr[...].runTests("""
+            # keyword oneOf tests
+            a b c
+            a a a
+        """)
+        self.assertTrue(success, "failed keyword oneOf matching")
+
+        success, _ = keyword_expr[...].runTests("""
+            # keyword oneOf failure tests
+            abc
+        """, failureTests=True)
+        self.assertTrue(success, "failed keyword oneOf failure tests")
+
+
+
 class MiscellaneousParserTests(ParseTestCase):
     def runTest(self):
 
@@ -4735,16 +4769,17 @@ suite = makeTestSuite()
 
 if __name__ == '__main__':
 
-    testRunner = TextTestRunner()
-
     # run specific tests by including them in this list, otherwise
     # all tests will be run
     testclasses = [
         ]
 
     if not testclasses:
+        testRunner = TextTestRunner()
         result = testRunner.run(suite)
     else:
+        # disable chaser '.' display
+        testRunner = TextTestRunner(verbosity=0)
         BUFFER_OUTPUT = False
         result = testRunner.run(makeTestSuiteTemp(testclasses))
author	Paul McGuire <ptmcg@austin.rr.com>	2019-07-08 20:54:30 -0500
committer	Paul McGuire <ptmcg@austin.rr.com>	2019-07-08 20:54:30 -0500
commit	d5b06c46cb87c0491bf7a2c02ebdfe7aac0bbdd5 (patch)
tree	61ef123efeb7f2d6ef6c4466f6e73f4f0eab5ec1
parent	e30333f5eb4262069a8863e8fa276ecbcd364660 (diff)
download	pyparsing-git-d5b06c46cb87c0491bf7a2c02ebdfe7aac0bbdd5.tar.gz