Added tokenMap parse action helper; general code cleanup; renamed literalStringClass to _literalStringClass throughout

git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@358 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
author: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2016-05-24 05:25:54 +0000
committer: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2016-05-24 05:25:54 +0000
commit: e4d24d41d23ffbd97845d38f18d2c807e7142981 (patch)
tree: 5c863b9ccb9842b453cb50838067cd5fd63d1d53
parent: ce69f78f782ef6e6b6212308666389f43145f2dc (diff)
download: pyparsing-e4d24d41d23ffbd97845d38f18d2c807e7142981.tar.gz
3 files changed, 153 insertions, 66 deletions
diff --git a/src/CHANGES b/src/CHANGES
index aa0dcee..446e3b4 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -4,6 +4,31 @@ Change Log
 
 Verison 2.1.5 - 
 ------------------------------
+- Added a new parse action construction helper tokenMap, which will
+  apply a function and optional arguments to each element in a 
+  ParseResults. So this parse action:
+  
+      def lowercase_all(tokens):
+          return [str(t).lower() for t in tokens]
+      OneOrMore(Word(alphas)).setParseAction(lowercase_all)
+
+  can now be written:
+  
+      OneOrMore(Word(alphas)).setParseAction(tokenMap(str.lower))
+
+  Also simplifies writing conversion parse actions like:
+  
+      integer = Word(nums).setParseAction(lambda t: int(t[0]))
+
+  to just:
+  
+      integer = Word(nums).setParseAction(tokenMap(int))
+
+  If additional arguments are necessary, they can be included in the
+  call to tokenMap, as in:
+  
+      hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))
+
 - Added more expressions to pyparsing_common:
   . IPv4 and IPv6 addresses (including long, short, and mixed forms
     of IPv6)
@@ -16,9 +41,9 @@ Verison 2.1.5 -
   and an output list of each test and its output lines.
 
 - Added failureTests argument (default=False) to runTests, so that
-  tests can be run that are expected failures, and if all tests fail
-  as expected, the runTests' success value will return True. Also,
-  parseAll now default to True.
+  tests can be run that are expected failures, and runTests' success 
+  value will return True only if all tests *fail* as expected. Also,
+  parseAll now defaults to True.
 
 
 Version 2.1.4 - May, 2016
diff --git a/src/pyparsing.py b/src/pyparsing.py
index c6a0af9..a9edfcb 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
 """
 
 __version__ = "2.1.5"
-__versionTime__ = "18 May 2016 22:03 UTC"
+__versionTime__ = "24 May 2016 04:18 UTC"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
@@ -94,7 +94,7 @@ __all__ = [
 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
-'pyparsing_common',
+'tokenMap', 'pyparsing_common',
 ]
 
 system_version = tuple(sys.version_info)[:3]
@@ -373,35 +373,48 @@ class ParseResults(object):
     __nonzero__ = __bool__
     def __iter__( self ): return iter( self.__toklist )
     def __reversed__( self ): return iter( self.__toklist[::-1] )
-    def iterkeys( self ):
-        """Returns all named result keys."""
+    def _iterkeys( self ):
         if hasattr(self.__tokdict, "iterkeys"):
             return self.__tokdict.iterkeys()
         else:
             return iter(self.__tokdict)
 
-    def itervalues( self ):
-        """Returns all named result values."""
-        return (self[k] for k in self.iterkeys())
+    def _itervalues( self ):
+        return (self[k] for k in self._iterkeys())
             
-    def iteritems( self ):
-        return ((k, self[k]) for k in self.iterkeys())
+    def _iteritems( self ):
+        return ((k, self[k]) for k in self._iterkeys())
 
     if PY_3:
-        keys = iterkeys
-        values = itervalues
-        items = iteritems
+        keys = _iterkeys       
+        """Returns an iterator of all named result keys (Python 3.x only)."""
+
+        values = _itervalues
+        """Returns an iterator of all named result values (Python 3.x only)."""
+
+        items = _iteritems
+        """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
+
     else:
+        iterkeys = _iterkeys
+        """Returns an iterator of all named result keys (Python 2.x only)."""
+
+        itervalues = _itervalues
+        """Returns an iterator of all named result values (Python 2.x only)."""
+
+        iteritems = _iteritems
+        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
+
         def keys( self ):
-            """Returns all named result keys."""
+            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
             return list(self.iterkeys())
 
         def values( self ):
-            """Returns all named result values."""
+            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
             return list(self.itervalues())
                 
         def items( self ):
-            """Returns all named result keys and values as a list of tuples."""
+            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
             return list(self.iteritems())
 
     def haskeys( self ):
@@ -863,7 +876,7 @@ class ParserElement(object):
         """
         Set class to be used for inclusion of string literals into a parser.
         """
-        ParserElement.literalStringClass = cls
+        ParserElement._literalStringClass = cls
 
     def __init__( self, savelist=False ):
         self.parseAction = list()
@@ -1317,7 +1330,7 @@ class ParserElement(object):
     def __add__(self, other ):
         """Implementation of + operator - returns C{L{And}}"""
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
                     SyntaxWarning, stacklevel=2)
@@ -1327,7 +1340,7 @@ class ParserElement(object):
     def __radd__(self, other ):
         """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
                     SyntaxWarning, stacklevel=2)
@@ -1337,7 +1350,7 @@ class ParserElement(object):
     def __sub__(self, other):
         """Implementation of - operator, returns C{L{And}} with error stop"""
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
                     SyntaxWarning, stacklevel=2)
@@ -1347,7 +1360,7 @@ class ParserElement(object):
     def __rsub__(self, other ):
         """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
                     SyntaxWarning, stacklevel=2)
@@ -1428,7 +1441,7 @@ class ParserElement(object):
     def __or__(self, other ):
         """Implementation of | operator - returns C{L{MatchFirst}}"""
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
                     SyntaxWarning, stacklevel=2)
@@ -1438,7 +1451,7 @@ class ParserElement(object):
     def __ror__(self, other ):
         """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
                     SyntaxWarning, stacklevel=2)
@@ -1448,7 +1461,7 @@ class ParserElement(object):
     def __xor__(self, other ):
         """Implementation of ^ operator - returns C{L{Or}}"""
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
                     SyntaxWarning, stacklevel=2)
@@ -1458,7 +1471,7 @@ class ParserElement(object):
     def __rxor__(self, other ):
         """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
                     SyntaxWarning, stacklevel=2)
@@ -1468,7 +1481,7 @@ class ParserElement(object):
     def __and__(self, other ):
         """Implementation of & operator - returns C{L{Each}}"""
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
                     SyntaxWarning, stacklevel=2)
@@ -1478,7 +1491,7 @@ class ParserElement(object):
     def __rand__(self, other ):
         """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         if not isinstance( other, ParserElement ):
             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
                     SyntaxWarning, stacklevel=2)
@@ -1655,11 +1668,11 @@ class ParserElement(object):
             - failureTests - (default=False) indicates if these tests are expected to fail parsing
             
             Returns: a (success, results) tuple, where success indicates that all tests succeeded
-              (or failed if C{failureTest} is True), 
-              and the results contain a list of lines of each test's output
+            (or failed if C{failureTest} is True), and the results contain a list of lines of each 
+            test's output
         """
         if isinstance(tests, basestring):
-            tests = list(map(str.strip, tests.splitlines()))
+            tests = list(map(str.strip, tests.rstrip().splitlines()))
         if isinstance(comment, basestring):
             comment = Literal(comment)
         allResults = []
@@ -1750,17 +1763,17 @@ class Literal(Token):
             return loc+self.matchLen, self.match
         raise ParseException(instring, loc, self.errmsg, self)
 _L = Literal
-ParserElement.literalStringClass = Literal
+ParserElement._literalStringClass = Literal
 
 class Keyword(Token):
     """Token to exactly match a specified string as a keyword, that is, it must be
-       immediately followed by a non-keyword character.  Compare with C{L{Literal}}::
-         Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
-         Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
+       immediately followed by a non-keyword character.  Compare with C{L{Literal}}:
+        - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
+        - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
        Accepts two optional constructor arguments in addition to the keyword string:
-       C{identChars} is a string of characters that would be valid identifier characters,
-       defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
-       matching, default is C{False}.
+        - C{identChars} is a string of characters that would be valid identifier characters,
+          defaulting to all alphanumerics + "_" and "$"
+        - C{caseless} allows case-insensitive matching, default is C{False}.
     """
     DEFAULT_KEYWORD_CHARS = alphanums+"_$"
 
@@ -2404,11 +2417,11 @@ class ParseExpression(ParserElement):
             exprs = list(exprs)
 
         if isinstance( exprs, basestring ):
-            self.exprs = [ Literal( exprs ) ]
+            self.exprs = [ ParserElement._literalStringClass( exprs ) ]
         elif isinstance( exprs, collections.Sequence ):
             # if sequence of strings provided, wrap with Literal
             if all(isinstance(expr, basestring) for expr in exprs):
-                exprs = map(Literal, exprs)
+                exprs = map(ParserElement._literalStringClass, exprs)
             self.exprs = list(exprs)
         else:
             try:
@@ -2509,6 +2522,7 @@ class And(ParseExpression):
     """Requires all given C{ParseExpression}s to be found in the given order.
        Expressions may be separated by whitespace.
        May be constructed using the C{'+'} operator.
+       May also be constructed using the C{'-'} operator, which will suppress backtracking.
     """
 
     class _ErrorStop(Empty):
@@ -2551,7 +2565,7 @@ class And(ParseExpression):
 
     def __iadd__(self, other ):
         if isinstance( other, basestring ):
-            other = Literal( other )
+            other = ParserElement._literalStringClass( other )
         return self.append( other ) #And( [ self, other ] )
 
     def checkRecursion( self, parseElementList ):
@@ -2623,7 +2637,7 @@ class Or(ParseExpression):
 
     def __ixor__(self, other ):
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         return self.append( other ) #Or( [ self, other ] )
 
     def __str__( self ):
@@ -2679,7 +2693,7 @@ class MatchFirst(ParseExpression):
 
     def __ior__(self, other ):
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass( other )
+            other = ParserElement._literalStringClass( other )
         return self.append( other ) #MatchFirst( [ self, other ] )
 
     def __str__( self ):
@@ -2787,7 +2801,7 @@ class ParseElementEnhance(ParserElement):
     def __init__( self, expr, savelist=False ):
         super(ParseElementEnhance,self).__init__(savelist)
         if isinstance( expr, basestring ):
-            expr = Literal(expr)
+            expr = ParserElement._literalStringClass(expr)
         self.expr = expr
         self.strRepr = None
         if expr is not None:
@@ -2909,7 +2923,7 @@ class OneOrMore(ParseElementEnhance):
         super(OneOrMore, self).__init__(expr)
         ender = stopOn
         if isinstance(ender, basestring):
-            ender = Literal(ender)
+            ender = ParserElement._literalStringClass(ender)
         self.not_ender = ~ender if ender is not None else None
 
     def parseImpl( self, instring, loc, doActions=True ):
@@ -3048,7 +3062,7 @@ class SkipTo(ParseElementEnhance):
         self.includeMatch = include
         self.asList = False
         if isinstance(failOn, basestring):
-            self.failOn = Literal(failOn)
+            self.failOn = ParserElement._literalStringClass(failOn)
         else:
             self.failOn = failOn
         self.errmsg = "No match found for "+_ustr(self.expr)
@@ -3120,7 +3134,7 @@ class Forward(ParseElementEnhance):
 
     def __lshift__( self, other ):
         if isinstance( other, basestring ):
-            other = ParserElement.literalStringClass(other)
+            other = ParserElement._literalStringClass(other)
         self.expr = other
         self.strRepr = None
         self.mayIndexError = self.expr.mayIndexError
@@ -3591,14 +3605,30 @@ def removeQuotes(s,l,t):
     """
     return t[0][1:-1]
 
-def upcaseTokens(s,l,t):
-    """Helper parse action to convert tokens to upper case."""
-    return [ tt.upper() for tt in map(_ustr,t) ]
+def tokenMap(func, *args):
+    """Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 
+       args are passed, they are forwarded to the given function as additional arguments after
+       the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
+       parsed data to an integer using base 16.
+    """
+    def pa(s,l,t):
+        t[:] = [func(tokn, *args) for tokn in t]
 
-def downcaseTokens(s,l,t):
-    """Helper parse action to convert tokens to lower case."""
-    return [ tt.lower() for tt in map(_ustr,t) ]
+    try:
+        func_name = getattr(func, '__name__', 
+                            getattr(func, '__class__').__name__)
+    except Exception:
+        func_name = str(func)
+    pa.__name__ = func_name
+
+    return pa
+
+upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
+"""Helper parse action to convert tokens to upper case."""
 
+downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
+"""Helper parse action to convert tokens to lower case."""
+    
 def _makeTags(tagStr, xml):
     """Internal helper to construct opening and closing tag expressions, given a tag name"""
     if isinstance(tagStr,basestring):
@@ -3755,7 +3785,9 @@ def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
         lastExpr = thisExpr
     ret <<= lastExpr
     return ret
+
 operatorPrecedence = infixNotation
+"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
 
 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
@@ -3880,18 +3912,29 @@ def replaceHTMLEntity(t):
 
 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
+"Comment of the form C{/* ... */}"
 
 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
+"Comment of the form C{<!-- ... -->}"
+
 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
+"Comment of the form C{// ... (to end of line)}"
+
 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
+"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
 
 javaStyleComment = cppStyleComment
+"Same as C{L{cppStyleComment}}"
+
 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
+"Comment of the form C{# ... (to end of line)}"
+
 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
                                   Optional( Word(" \t") +
                                             ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
+"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
 
 # some other useful expressions - using lower-case class name since we are really using this as a namespace
 class pyparsing_common:
@@ -3905,22 +3948,20 @@ class pyparsing_common:
      - UUID
     """
 
-    def convertToInteger(t):
-        """
-        Parse action for converting parsed integers to Python int
-        """
-        return int(t[0])
+    convertToInteger = tokenMap(int)
+    """
+    Parse action for converting parsed integers to Python int
+    """
 
-    def convertToFloat(t):
-        """
-        Parse action for converting parsed numbers to Python float
-        """
-        return float(t[0])
+    convertToFloat = tokenMap(float)
+    """
+    Parse action for converting parsed numbers to Python float
+    """
 
     integer = Word(nums).setName("integer").setParseAction(convertToInteger)
     """expression that parses an unsigned integer and returns an int"""
 
-    hex_integer = Word(hexnums).setName("hex integer").setParseAction(lambda t: int(t[0], 16))
+    hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
     """expression that parses a hexadecimal integer and returns an int"""
 
     signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
@@ -4025,3 +4066,13 @@ if __name__ == "__main__":
         1e-12
         """)
 
+    pyparsing_common.hex_integer.runTests("""
+        100
+        FF
+        """)
+
+    import uuid
+    pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
+    pyparsing_common.uuid.runTests("""
+        12345678-1234-5678-1234-567812345678
+        """)
diff --git a/src/unitTests.py b/src/unitTests.py
index e2706be..d43912c 100644
--- a/src/unitTests.py
+++ b/src/unitTests.py
@@ -2724,7 +2724,18 @@ class CommonExpressionsTest(ParseTestCase):
             123e4567-e89b-12d3-a456-426655440000
             """)[0]
         assert success, "failed to parse valid uuid"
-            
+
+class TokenMapTest(ParseTestCase):
+    def runTest(self):
+        from pyparsing import tokenMap, Word, hexnums, OneOrMore
+        
+        parser = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
+        success, results = parser.runTests("""
+            00 11 22 aa FF 0a 0d 1a
+            """, printResults=False)
+        assert success, "failed to parse hex integers"
+        assert results[0][-1] == '[0, 17, 34, 170, 255, 10, 13, 26]', "tokenMap parse action failed"
+        
 class MiscellaneousParserTests(ParseTestCase):
     def runTest(self):
         import pyparsing
author	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2016-05-24 05:25:54 +0000
committer	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2016-05-24 05:25:54 +0000
commit	e4d24d41d23ffbd97845d38f18d2c807e7142981 (patch)
tree	5c863b9ccb9842b453cb50838067cd5fd63d1d53
parent	ce69f78f782ef6e6b6212308666389f43145f2dc (diff)
download	pyparsing-e4d24d41d23ffbd97845d38f18d2c807e7142981.tar.gz