From bfbff79863151d6444818f74562cce20df6ca4bb Mon Sep 17 00:00:00 2001 From: ptmcg Date: Fri, 5 Aug 2016 21:38:41 +0000 Subject: Add inline examples to docstrings, for real git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@389 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b --- src/pyparsing.py | 1575 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 1094 insertions(+), 481 deletions(-) diff --git a/src/pyparsing.py b/src/pyparsing.py index 088cf26..a9ff9c8 100644 --- a/src/pyparsing.py +++ b/src/pyparsing.py @@ -48,7 +48,7 @@ The program outputs the following:: The Python representation of the grammar is quite readable, owing to the self-explanatory class names, and the use of '+', '|' and '^' operators. -The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an +The parsed results returned from L{I{ParserElement.parseString}} can be accessed as a nested list, a dictionary, or an object with named attributes. The pyparsing module handles some of the problems that are typically vexing when writing text parsers: @@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when """ __version__ = "2.1.6" -__versionTime__ = "05 Aug 2016 19:32 UTC" +__versionTime__ = "27 Jul 2016 19:13 UTC" __author__ = "Paul McGuire " import string @@ -225,11 +225,23 @@ class ParseBaseException(Exception): return "lineno col line".split() + dir(type(self)) class ParseException(ParseBaseException): - """exception thrown when parse expressions don't match class; - supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text + """ + Exception thrown when parse expressions don't match class; + supported attributes by name are: + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text + + Example:: + try: + Word(nums).setName("integer").parseString("ABC") + except ParseException as pe: + print(pe) + print("column: {}".format(pe.col)) + + prints:: + Expected integer (at char 0), (line:1, col:1) + column: 1 """ pass @@ -278,11 +290,40 @@ class _ParseResultsWithOffset(object): self.tup = (self.tup[0],i) class ParseResults(object): - """Structured parse results, to provide multiple means of access to the parsed data: + """ + Structured parse results, to provide multiple means of access to the parsed data: - as a list (C{len(results)}) - by list index (C{results[0], results[1]}, etc.) - - by attribute (C{results.}) - """ + - by attribute (C{results.} - see L{ParserElement.setResultsName}) + + Example:: + integer = Word(nums) + date_str = (integer.setResultsName("year") + '/' + + integer.setResultsName("month") + '/' + + integer.setResultsName("day")) + # equivalent form: + # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + result = date_str.parseString("1999/12/31") + print(list(result)) + print(result[0]) + print(result['month']) + print(result.day) + print('month' in result) + print('minutes' in result) + print(result.dump()) + prints:: + ['1999', '/', '12', '/', '31'] + 1999 + 12 + 31 + True + False + ['1999', '/', '12', '/', '31'] + - day: 31 + - month: 12 + - year: 1999 + """ def __new__(cls, toklist=None, name=None, asList=True, modal=True ): if isinstance(toklist, cls): return toklist @@ -367,11 +408,6 @@ class ParseResults(object): removed = list(range(*i.indices(mylen))) removed.reverse() # fixup indices in token dictionary - #~ for name in self.__tokdict: - #~ occurrences = self.__tokdict[name] - #~ for j in removed: - #~ for k, (value, position) in enumerate(occurrences): - #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) for name,occurrences in self.__tokdict.items(): for j in removed: for k, (value, position) in enumerate(occurrences): @@ -437,14 +473,39 @@ class ParseResults(object): return bool(self.__tokdict) def pop( self, *args, **kwargs): - """Removes and returns item at specified index (default=last). - Supports both list and dict semantics for pop(). If passed no - argument or an integer argument, it will use list semantics - and pop tokens from the list of parsed tokens. If passed a - non-integer argument (most likely a string), it will use dict - semantics and pop the corresponding value from any defined - results names. A second default return value argument is - supported, just as in dict.pop().""" + """ + Removes and returns item at specified index (C{default=last}). + Supports both C{list} and C{dict} semantics for C{pop()}. If passed no + argument or an integer argument, it will use C{list} semantics + and pop tokens from the list of parsed tokens. If passed a + non-integer argument (most likely a string), it will use C{dict} + semantics and pop the corresponding value from any defined + results names. A second default return value argument is + supported, just as in C{dict.pop()}. + + Example:: + def remove_first(tokens): + tokens.pop(0) + print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] + print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] + + label = Word(alphas) + patt = label("LABEL") + OneOrMore(Word(nums)) + print(patt.parseString("AAB 123 321").dump()) + + # Use pop() in a parse action to remove named result (note that corresponding value is not + # removed from list form of results) + def remove_LABEL(tokens): + tokens.pop("LABEL") + return tokens + patt.addParseAction(remove_LABEL) + print(patt.parseString("AAB 123 321").dump()) + prints:: + ['AAB', '123', '321'] + - LABEL: AAB + + ['AAB', '123', '321'] + """ if not args: args = [-1] for k,v in kwargs.items(): @@ -464,39 +525,83 @@ class ParseResults(object): return defaultvalue def get(self, key, defaultValue=None): - """Returns named result matching the given key, or if there is no - such name, then returns the given C{defaultValue} or C{None} if no - C{defaultValue} is specified.""" + """ + Returns named result matching the given key, or if there is no + such name, then returns the given C{defaultValue} or C{None} if no + C{defaultValue} is specified. + + Similar to C{dict.get()}. + + Example:: + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + result = date_str.parseString("1999/12/31") + print(result.get("year")) # -> '1999' + print(result.get("hour", "not specified")) # -> 'not specified' + print(result.get("hour")) # -> None + """ if key in self: return self[key] else: return defaultValue def insert( self, index, insStr ): - """Inserts new element at location index in the list of parsed tokens.""" + """ + Inserts new element at location index in the list of parsed tokens. + + Similar to C{list.insert()}. + + Example:: + print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] + + # use a parse action to insert the parse location in the front of the parsed results + def insert_locn(locn, tokens): + tokens.insert(0, locn) + print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] + """ self.__toklist.insert(index, insStr) # fixup indices in token dictionary - #~ for name in self.__tokdict: - #~ occurrences = self.__tokdict[name] - #~ for k, (value, position) in enumerate(occurrences): - #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) for name,occurrences in self.__tokdict.items(): for k, (value, position) in enumerate(occurrences): occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) def append( self, item ): - """Add single element to end of ParseResults list of elements.""" + """ + Add single element to end of ParseResults list of elements. + + Example:: + print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] + + # use a parse action to compute the sum of the parsed integers, and add it to the end + def append_sum(tokens): + tokens.append(sum(map(int, tokens))) + print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] + """ self.__toklist.append(item) def extend( self, itemseq ): - """Add sequence of elements to end of ParseResults list of elements.""" + """ + Add sequence of elements to end of ParseResults list of elements. + + Example:: + patt = OneOrMore(Word(alphas)) + + # use a parse action to append the reverse of the matched strings, to make a palindrome + def make_palindrome(tokens): + tokens.extend(reversed([t[::-1] for t in tokens])) + return ''.join(tokens) + print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' + """ if isinstance(itemseq, ParseResults): self += itemseq else: self.__toklist.extend(itemseq) def clear( self ): - """Clear all elements and results names.""" + """ + Clear all elements and results names. + """ del self.__toklist[:] self.__tokdict.clear() @@ -561,11 +666,40 @@ class ParseResults(object): return out def asList( self ): - """Returns the parse results as a nested list of matching tokens, all converted to strings.""" + """ + Returns the parse results as a nested list of matching tokens, all converted to strings. + + Example:: + patt = OneOrMore(Word(alphas)) + result = patt.parseString("sldkj lsdkj sldkj") + # even though the result prints in string-like form, it is actually a pyparsing ParseResults + print(type(result), result) # -> ['sldkj', 'lsdkj', 'sldkj'] + + # Use asList() to create an actual list + result_list = result.asList() + print(type(result_list), result_list) # -> ['sldkj', 'lsdkj', 'sldkj'] + """ return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist] def asDict( self ): - """Returns the named parse results as a nested dictionary.""" + """ + Returns the named parse results as a nested dictionary. + + Example:: + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + result = date_str.parseString('12/31/1999') + print(type(result), repr(result)) # -> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) + + result_dict = result.asDict() + print(type(result_dict), repr(result_dict)) # -> {'day': '1999', 'year': '12', 'month': '31'} + + # even though a ParseResults supports dict-like access, sometime you just need to have a dict + import json + print(json.dumps(result_dict)) # -> {"month": "31", "day": "1999", "year": "12"} + print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable + """ if PY_3: item_fn = self.items else: @@ -583,7 +717,9 @@ class ParseResults(object): return dict((k,toItem(v)) for k,v in item_fn()) def copy( self ): - """Returns a new copy of a C{ParseResults} object.""" + """ + Returns a new copy of a C{ParseResults} object. + """ ret = ParseResults( self.__toklist ) ret.__tokdict = self.__tokdict.copy() ret.__parent = self.__parent @@ -592,7 +728,9 @@ class ParseResults(object): return ret def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): - """(Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" + """ + (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. + """ nl = "\n" out = [] namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() @@ -658,7 +796,12 @@ class ParseResults(object): return None def getName(self): - """Returns the results name for this token expression.""" + """ + Returns the results name for this token expression. + + Example:: + TODO + """ if self.__name: return self.__name elif self.__parent: @@ -675,9 +818,23 @@ class ParseResults(object): return None def dump(self,indent='',depth=0): - """Diagnostic method for listing out the contents of a C{ParseResults}. - Accepts an optional C{indent} argument so that this string can be embedded - in a nested display of other data.""" + """ + Diagnostic method for listing out the contents of a C{ParseResults}. + Accepts an optional C{indent} argument so that this string can be embedded + in a nested display of other data. + + Example:: + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + result = date_str.parseString('12/31/1999') + print(result.dump()) + prints:: + ['12', '/', '31', '/', '1999'] + - day: 1999 + - month: 31 + - year: 12 + """ out = [] NL = '\n' out.append( indent+_ustr(self.asList()) ) @@ -705,9 +862,26 @@ class ParseResults(object): return "".join(out) def pprint(self, *args, **kwargs): - """Pretty-printer for parsed results as a list, using the C{pprint} module. - Accepts additional positional or keyword args as defined for the - C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" + """ + Pretty-printer for parsed results as a list, using the C{pprint} module. + Accepts additional positional or keyword args as defined for the + C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) + + Example:: + ident = Word(alphas, alphanums) + num = Word(nums) + func = Forward() + term = ident | num | Group('(' + func + ')') + func <<= ident + Group(Optional(delimitedList(term))) + result = func.parseString("fna a,b,(fnb c,d,200),100") + result.pprint(width=40) + prints:: + ['fna', + ['a', + 'b', + ['(', 'fnb', ['c', 'd', '200'], ')'], + '100']] + """ pprint.pprint(self.asList(), *args, **kwargs) # add support for pickle protocol @@ -881,7 +1055,16 @@ class ParserElement(object): @staticmethod def setDefaultWhitespaceChars( chars ): - """Overrides the default whitespace chars + r""" + Overrides the default whitespace chars + + Example:: + # default whitespace chars are space, and newline + OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] + + # change to just treat newline as significant + ParserElement.setDefaultWhitespaceChars(" \t") + OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] """ ParserElement.DEFAULT_WHITE_CHARS = chars @@ -889,6 +1072,20 @@ class ParserElement(object): def inlineLiteralsUsing(cls): """ Set class to be used for inclusion of string literals into a parser. + + Example:: + # default literal class used is Literal + integer = Word(nums) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] + + + # change to Suppress + ParserElement.inlineLiteralsUsing(Suppress) + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + + date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] """ ParserElement._literalStringClass = cls @@ -916,8 +1113,19 @@ class ParserElement(object): self.callDuringTry = False def copy( self ): - """Make a copy of this C{ParserElement}. Useful for defining different parse actions - for the same parsing pattern, using copies of the original parse element.""" + """ + Make a copy of this C{ParserElement}. Useful for defining different parse actions + for the same parsing pattern, using copies of the original parse element. + + Example:: + integer = Word(nums).setParseAction(lambda toks: int(toks[0])) + integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") + integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") + + print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) + prints:: + [5120, 100, 655360, 268435456] + """ cpy = copy.copy( self ) cpy.parseAction = self.parseAction[:] cpy.ignoreExprs = self.ignoreExprs[:] @@ -926,7 +1134,13 @@ class ParserElement(object): return cpy def setName( self, name ): - """Define name for this expression, for use in debugging.""" + """ + Define name for this expression, makes exception messages clearer. + + Example:: + Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) + Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) + """ self.name = name self.errmsg = "Expected " + self.name if hasattr(self,"exception"): @@ -934,15 +1148,24 @@ class ParserElement(object): return self def setResultsName( self, name, listAllMatches=False ): - """Define name for referencing matching tokens as a nested attribute - of the returned parse results. - NOTE: this returns a *copy* of the original C{ParserElement} object; - this is so that the client can define a basic element, such as an - integer, and reference it in multiple places with different names. - - You can also set results names using the abbreviated syntax, - C{expr("name")} in place of C{expr.setResultsName("name")} - - see L{I{__call__}<__call__>}. + """ + Define name for referencing matching tokens as a nested attribute + of the returned parse results. + NOTE: this returns a *copy* of the original C{ParserElement} object; + this is so that the client can define a basic element, such as an + integer, and reference it in multiple places with different names. + + You can also set results names using the abbreviated syntax, + C{expr("name")} in place of C{expr.setResultsName("name")} - + see L{I{__call__}<__call__>}. + + Example:: + date_str = (integer.setResultsName("year") + '/' + + integer.setResultsName("month") + '/' + + integer.setResultsName("day")) + + # equivalent form: + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") """ newself = self.copy() if name.endswith("*"): @@ -971,31 +1194,49 @@ class ParserElement(object): return self def setParseAction( self, *fns, **kwargs ): - """Define action to perform when successfully matching parse element definition. - Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, - C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: - - s = the original string being parsed (see note below) - - loc = the location of the matching substring - - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object - If the functions in fns modify the tokens, they can return them as the return - value from fn, and the modified list of tokens will replace the original. - Otherwise, fn does not need to return any value. - - Optional keyword arguments: - - callDuringTry = (default=False) indicate if parse action should be run during lookaheads and alternate testing - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{parseString}} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ + """ + Define action to perform when successfully matching parse element definition. + Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, + C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: + - s = the original string being parsed (see note below) + - loc = the location of the matching substring + - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object + If the functions in fns modify the tokens, they can return them as the return + value from fn, and the modified list of tokens will replace the original. + Otherwise, fn does not need to return any value. + + Optional keyword arguments: + - callDuringTry = (default=False) indicate if parse action should be run during lookaheads and alternate testing + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{parseString}} for more information + on parsing strings containing C{}s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + + Example:: + integer = Word(nums) + date_str = integer + '/' + integer + '/' + integer + + date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] + + # use parse action to convert to ints at parse time + integer = Word(nums).setParseAction(lambda toks: int(toks[0])) + date_str = integer + '/' + integer + '/' + integer + + # note that integer fields are now ints, not strings + date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] + """ self.parseAction = list(map(_trim_arity, list(fns))) self.callDuringTry = kwargs.get("callDuringTry", False) return self def addParseAction( self, *fns, **kwargs ): - """Add parse action to expression's list of parse actions. See L{I{setParseAction}}.""" + """ + Add parse action to expression's list of parse actions. See L{I{setParseAction}}. + + See examples in L{I{copy}}. + """ self.parseAction += list(map(_trim_arity, list(fns))) self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) return self @@ -1008,6 +1249,14 @@ class ParserElement(object): Optional keyword arguments: - message = define a custom message to be used in the raised exception - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException + + Example:: + integer = Word(nums).setParseAction(lambda toks: int(toks[0])) + year_int = integer.copy() + year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") + date_str = year_int + '/' + integer + '/' + integer + + result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) """ msg = kwargs.get("message", "failed user-defined condition") exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException @@ -1184,7 +1433,7 @@ class ParserElement(object): cache.popitem(False) def clear(self): - cache.clear() + cache.clear() self.get = types.MethodType(get, self) self.set = types.MethodType(set, self) @@ -1275,6 +1524,10 @@ class ParserElement(object): C{enablePackrat} before calling C{psyco.full()}. If you do not do this, Python will crash. For best results, call C{enablePackrat()} immediately after importing pyparsing. + + Example:: + import pyparsing + pyparsing.ParserElement.enablePackrat() """ if not ParserElement._packratEnabled: ParserElement._packratEnabled = True @@ -1284,28 +1537,32 @@ class ParserElement(object): ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) ParserElement._parse = ParserElement._parseCache - def parseString( self, instring, parseAll=False ): - """Execute the parse expression with the given string. - This is the main interface to the client code, once the complete - expression has been built. - - If you want the grammar to require that the entire input string be - successfully parsed, then set C{parseAll} to True (equivalent to ending - the grammar with C{L{StringEnd()}}). - - Note: C{parseString} implicitly calls C{expandtabs()} on the input string, - in order to report proper column numbers in parse actions. - If the input string contains tabs and - the grammar uses parse actions that use the C{loc} argument to index into the - string being parsed, you can ensure you have a consistent view of the input - string by: - - calling C{parseWithTabs} on your grammar before calling C{parseString} - (see L{I{parseWithTabs}}) - - define your parse action using the full C{(s,loc,toks)} signature, and - reference the input string using the parse action's C{s} argument - - explictly expand the tabs in your input string before calling - C{parseString} + """ + Execute the parse expression with the given string. + This is the main interface to the client code, once the complete + expression has been built. + + If you want the grammar to require that the entire input string be + successfully parsed, then set C{parseAll} to True (equivalent to ending + the grammar with C{L{StringEnd()}}). + + Note: C{parseString} implicitly calls C{expandtabs()} on the input string, + in order to report proper column numbers in parse actions. + If the input string contains tabs and + the grammar uses parse actions that use the C{loc} argument to index into the + string being parsed, you can ensure you have a consistent view of the input + string by: + - calling C{parseWithTabs} on your grammar before calling C{parseString} + (see L{I{parseWithTabs}}) + - define your parse action using the full C{(s,loc,toks)} signature, and + reference the input string using the parse action's C{s} argument + - explictly expand the tabs in your input string before calling + C{parseString} + + Example:: + Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] + Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text """ ParserElement.resetCache() if not self.streamlined: @@ -1331,14 +1588,35 @@ class ParserElement(object): return tokens def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): - """Scan the input string for expression matches. Each match will return the - matching tokens, start location, and end location. May be called with optional - C{maxMatches} argument, to clip scanning after 'n' matches are found. If - C{overlap} is specified, then overlapping matches will be reported. - - Note that the start and end locations are reported relative to the string - being parsed. See L{I{parseString}} for more information on parsing - strings with embedded tabs.""" + """ + Scan the input string for expression matches. Each match will return the + matching tokens, start location, and end location. May be called with optional + C{maxMatches} argument, to clip scanning after 'n' matches are found. If + C{overlap} is specified, then overlapping matches will be reported. + + Note that the start and end locations are reported relative to the string + being parsed. See L{I{parseString}} for more information on parsing + strings with embedded tabs. + + Example:: + source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" + print(source) + for tokens,start,end in Word(alphas).scanString(source): + print(' '*start + '^'*(end-start)) + print(' '*start + tokens[0]) + + prints:: + + sldjf123lsdjjkf345sldkjf879lkjsfd987 + ^^^^^ + sldjf + ^^^^^^^ + lsdjjkf + ^^^^^^ + sldkjf + ^^^^^^ + lkjsfd + """ if not self.streamlined: self.streamline() for e in self.ignoreExprs: @@ -1381,12 +1659,17 @@ class ParserElement(object): raise exc def transformString( self, instring ): - """Extension to C{L{scanString}}, to modify matching text with modified tokens that may - be returned from a parse action. To use C{transformString}, define a grammar and - attach a parse action to it that modifies the returned token list. - Invoking C{transformString()} on a target string will then scan for matches, - and replace the matched text patterns according to the logic in the parse - action. C{transformString()} returns the resulting transformed string.""" + """ + Extension to C{L{scanString}}, to modify matching text with modified tokens that may + be returned from a parse action. To use C{transformString}, define a grammar and + attach a parse action to it that modifies the returned token list. + Invoking C{transformString()} on a target string will then scan for matches, + and replace the matched text patterns according to the logic in the parse + action. C{transformString()} returns the resulting transformed string. + + Example:: + TODO + """ out = [] lastE = 0 # force preservation of s, to minimize unwanted transformation of string, and to @@ -1414,9 +1697,13 @@ class ParserElement(object): raise exc def searchString( self, instring, maxMatches=_MAX_INT ): - """Another extension to C{L{scanString}}, simplifying the access to the tokens found - to match the given parse expression. May be called with optional - C{maxMatches} argument, to clip searching after 'n' matches are found. + """ + Another extension to C{L{scanString}}, simplifying the access to the tokens found + to match the given parse expression. May be called with optional + C{maxMatches} argument, to clip searching after 'n' matches are found. + + Example:: + TODO """ try: return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) @@ -1428,10 +1715,14 @@ class ParserElement(object): raise exc def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): - """Generator method to split a string using the given expression as a separator. - May be called with optional C{maxsplit} argument, to limit the number of splits; - and the optional C{includeSeparators} argument (default=C{False}), if the separating - matching text should be included in the split results. + """ + Generator method to split a string using the given expression as a separator. + May be called with optional C{maxsplit} argument, to limit the number of splits; + and the optional C{includeSeparators} argument (default=C{False}), if the separating + matching text should be included in the split results. + + Example:: + TODO """ splits = 0 last = 0 @@ -1443,7 +1734,9 @@ class ParserElement(object): yield instring[last:] def __add__(self, other ): - """Implementation of + operator - returns C{L{And}}""" + """ + Implementation of + operator - returns C{L{And}} + """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -1453,7 +1746,9 @@ class ParserElement(object): return And( [ self, other ] ) def __radd__(self, other ): - """Implementation of + operator when left operand is not a C{L{ParserElement}}""" + """ + Implementation of + operator when left operand is not a C{L{ParserElement}} + """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -1463,7 +1758,9 @@ class ParserElement(object): return other + self def __sub__(self, other): - """Implementation of - operator, returns C{L{And}} with error stop""" + """ + Implementation of - operator, returns C{L{And}} with error stop + """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -1473,7 +1770,9 @@ class ParserElement(object): return And( [ self, And._ErrorStop(), other ] ) def __rsub__(self, other ): - """Implementation of - operator when left operand is not a C{L{ParserElement}}""" + """ + Implementation of - operator when left operand is not a C{L{ParserElement}} + """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -1483,24 +1782,24 @@ class ParserElement(object): return other - self def __mul__(self,other): - """Implementation of * operator, allows use of C{expr * 3} in place of - C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer - tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples - may also include C{None} as in: - - C{expr*(n,None)} or C{expr*(n,)} is equivalent + """ + Implementation of * operator, allows use of C{expr * 3} in place of + C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer + tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples + may also include C{None} as in: + - C{expr*(n,None)} or C{expr*(n,)} is equivalent to C{expr*n + L{ZeroOrMore}(expr)} (read as "at least n instances of C{expr}") - - C{expr*(None,n)} is equivalent to C{expr*(0,n)} + - C{expr*(None,n)} is equivalent to C{expr*(0,n)} (read as "0 to n instances of C{expr}") - - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} - - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} - - Note that C{expr*(None,n)} does not raise an exception if - more than n exprs exist in the input stream; that is, - C{expr*(None,n)} does not enforce a maximum number of expr - occurrences. If this behavior is desired, then write - C{expr*(None,n) + ~expr} - + - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} + - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} + + Note that C{expr*(None,n)} does not raise an exception if + more than n exprs exist in the input stream; that is, + C{expr*(None,n)} does not enforce a maximum number of expr + occurrences. If this behavior is desired, then write + C{expr*(None,n) + ~expr} """ if isinstance(other,int): minElements, optElements = other,0 @@ -1554,7 +1853,9 @@ class ParserElement(object): return self.__mul__(other) def __or__(self, other ): - """Implementation of | operator - returns C{L{MatchFirst}}""" + """ + Implementation of | operator - returns C{L{MatchFirst}} + """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -1564,7 +1865,9 @@ class ParserElement(object): return MatchFirst( [ self, other ] ) def __ror__(self, other ): - """Implementation of | operator when left operand is not a C{L{ParserElement}}""" + """ + Implementation of | operator when left operand is not a C{L{ParserElement}} + """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -1574,7 +1877,9 @@ class ParserElement(object): return other | self def __xor__(self, other ): - """Implementation of ^ operator - returns C{L{Or}}""" + """ + Implementation of ^ operator - returns C{L{Or}} + """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -1584,7 +1889,9 @@ class ParserElement(object): return Or( [ self, other ] ) def __rxor__(self, other ): - """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" + """ + Implementation of ^ operator when left operand is not a C{L{ParserElement}} + """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -1594,7 +1901,9 @@ class ParserElement(object): return other ^ self def __and__(self, other ): - """Implementation of & operator - returns C{L{Each}}""" + """ + Implementation of & operator - returns C{L{Each}} + """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -1604,7 +1913,9 @@ class ParserElement(object): return Each( [ self, other ] ) def __rand__(self, other ): - """Implementation of & operator when left operand is not a C{L{ParserElement}}""" + """ + Implementation of & operator when left operand is not a C{L{ParserElement}} + """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): @@ -1614,41 +1925,49 @@ class ParserElement(object): return other & self def __invert__( self ): - """Implementation of ~ operator - returns C{L{NotAny}}""" + """ + Implementation of ~ operator - returns C{L{NotAny}} + """ return NotAny( self ) def __call__(self, name=None): - """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: - userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") - could be written as:: - userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") - - If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be - passed as C{True}. + """ + Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}. + + If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be + passed as C{True}. - If C{name} is omitted, same as calling C{L{copy}}. - """ + If C{name} is omitted, same as calling C{L{copy}}. + + Example:: + # these are equivalent + userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") + userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") + """ if name is not None: return self.setResultsName(name) else: return self.copy() def suppress( self ): - """Suppresses the output of this C{ParserElement}; useful to keep punctuation from - cluttering up returned output. + """ + Suppresses the output of this C{ParserElement}; useful to keep punctuation from + cluttering up returned output. """ return Suppress( self ) def leaveWhitespace( self ): - """Disables the skipping of whitespace before matching the characters in the - C{ParserElement}'s defined pattern. This is normally only used internally by - the pyparsing module, but may be needed in some whitespace-sensitive grammars. + """ + Disables the skipping of whitespace before matching the characters in the + C{ParserElement}'s defined pattern. This is normally only used internally by + the pyparsing module, but may be needed in some whitespace-sensitive grammars. """ self.skipWhitespace = False return self def setWhitespaceChars( self, chars ): - """Overrides the default whitespace chars + """ + Overrides the default whitespace chars """ self.skipWhitespace = True self.whiteChars = chars @@ -1656,16 +1975,26 @@ class ParserElement(object): return self def parseWithTabs( self ): - """Overrides default behavior to expand C{}s to spaces before parsing the input string. - Must be called before C{parseString} when the input grammar contains elements that - match C{} characters.""" + """ + Overrides default behavior to expand C{}s to spaces before parsing the input string. + Must be called before C{parseString} when the input grammar contains elements that + match C{} characters. + """ self.keepTabs = True return self def ignore( self, other ): - """Define expression to be ignored (e.g., comments) while doing pattern - matching; may be called repeatedly, to define multiple comment or other - ignorable patterns. + """ + Define expression to be ignored (e.g., comments) while doing pattern + matching; may be called repeatedly, to define multiple comment or other + ignorable patterns. + + Example:: + patt = OneOrMore(Word(alphas)) + patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] + + patt.ignore(cStyleComment) + patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] """ if isinstance(other, basestring): other = Suppress(other) @@ -1678,7 +2007,9 @@ class ParserElement(object): return self def setDebugActions( self, startAction, successAction, exceptionAction ): - """Enable display of debugging messages while doing pattern matching.""" + """ + Enable display of debugging messages while doing pattern matching. + """ self.debugActions = (startAction or _defaultStartDebugAction, successAction or _defaultSuccessDebugAction, exceptionAction or _defaultExceptionDebugAction) @@ -1686,8 +2017,10 @@ class ParserElement(object): return self def setDebug( self, flag=True ): - """Enable display of debugging messages while doing pattern matching. - Set C{flag} to True to enable, False to disable.""" + """ + Enable display of debugging messages while doing pattern matching. + Set C{flag} to True to enable, False to disable. + """ if flag: self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) else: @@ -1709,13 +2042,16 @@ class ParserElement(object): pass def validate( self, validateTrace=[] ): - """Check defined expressions for valid structure, check for infinite recursive definitions.""" + """ + Check defined expressions for valid structure, check for infinite recursive definitions. + """ self.checkRecursion( [] ) def parseFile( self, file_or_filename, parseAll=False ): - """Execute the parse expression on the given file or filename. - If a filename is specified (instead of a file object), - the entire file is opened, read, and closed before parsing. + """ + Execute the parse expression on the given file or filename. + If a filename is specified (instead of a file object), + the entire file is opened, read, and closed before parsing. """ try: file_contents = file_or_filename.read() @@ -1752,15 +2088,17 @@ class ParserElement(object): return not (self == other) def matches(self, testString, parseAll=True): - """Method for quick testing of a parser against a test string. Good for simple - inline microtests of sub expressions while building up larger parser, as in:: - - expr = Word(nums) - assert expr.matches("100") + """ + Method for quick testing of a parser against a test string. Good for simple + inline microtests of sub expressions while building up larger parser.0 - Parameters: - - testString - to test against this expression for a match - - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests + Parameters: + - testString - to test against this expression for a match + - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests + + Example:: + expr = Word(nums) + assert expr.matches("100") """ try: self.parseString(_ustr(testString), parseAll=parseAll) @@ -1769,21 +2107,25 @@ class ParserElement(object): return False def runTests(self, tests, parseAll=True, comment='#', printResults=True, failureTests=False): - """Execute the parse expression on a series of test strings, showing each - test, the parsed results or where the parse failed. Quick and easy way to - run a parse expression against a list of sample strings. + """ + Execute the parse expression on a series of test strings, showing each + test, the parsed results or where the parse failed. Quick and easy way to + run a parse expression against a list of sample strings. - Parameters: - - tests - a list of separate test strings, or a multiline string of test strings - - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests - - comment - (default='#') - expression for indicating embedded comments in the test + Parameters: + - tests - a list of separate test strings, or a multiline string of test strings + - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests + - comment - (default='#') - expression for indicating embedded comments in the test string; pass None to disable comment filtering - - printResults - (default=True) prints test output to stdout - - failureTests - (default=False) indicates if these tests are expected to fail parsing + - printResults - (default=True) prints test output to stdout + - failureTests - (default=False) indicates if these tests are expected to fail parsing - Returns: a (success, results) tuple, where success indicates that all tests succeeded - (or failed if C{failureTest} is True), and the results contain a list of lines of each - test's output + Returns: a (success, results) tuple, where success indicates that all tests succeeded + (or failed if C{failureTest} is True), and the results contain a list of lines of each + test's output + + Example:: + TODO """ if isinstance(tests, basestring): tests = list(map(str.strip, tests.rstrip().splitlines())) @@ -1825,13 +2167,17 @@ class ParserElement(object): class Token(ParserElement): - """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" + """ + Abstract C{ParserElement} subclass, for defining atomic matching patterns. + """ def __init__( self ): super(Token,self).__init__( savelist=False ) class Empty(Token): - """An empty token, will always match.""" + """ + An empty token, will always match. + """ def __init__( self ): super(Empty,self).__init__() self.name = "Empty" @@ -1840,7 +2186,9 @@ class Empty(Token): class NoMatch(Token): - """A token that will never match.""" + """ + A token that will never match. + """ def __init__( self ): super(NoMatch,self).__init__() self.name = "NoMatch" @@ -1853,7 +2201,14 @@ class NoMatch(Token): class Literal(Token): - """Token to exactly match a specified string.""" + """ + Token to exactly match a specified string. + + Example:: + Literal('blah').parseString('blah') # -> ['blah'] + Literal('blah').parseString('blahfooblah') # -> ['blah'] + Literal('blah').parseString('bla') # -> Exception: Expected "blah" + """ def __init__( self, matchString ): super(Literal,self).__init__() self.match = matchString @@ -1882,14 +2237,19 @@ _L = Literal ParserElement._literalStringClass = Literal class Keyword(Token): - """Token to exactly match a specified string as a keyword, that is, it must be - immediately followed by a non-keyword character. Compare with C{L{Literal}}: - - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. - - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} - Accepts two optional constructor arguments in addition to the keyword string: - - C{identChars} is a string of characters that would be valid identifier characters, + """ + Token to exactly match a specified string as a keyword, that is, it must be + immediately followed by a non-keyword character. Compare with C{L{Literal}}: + - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. + - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} + Accepts two optional constructor arguments in addition to the keyword string: + - C{identChars} is a string of characters that would be valid identifier characters, defaulting to all alphanumerics + "_" and "$" - - C{caseless} allows case-insensitive matching, default is C{False}. + - C{caseless} allows case-insensitive matching, default is C{False}. + + Example:: + Keyword("start").parseString("start") # -> ['start'] + Keyword("start").parseStrign("starting") # -> Exception """ DEFAULT_KEYWORD_CHARS = alphanums+"_$" @@ -1938,9 +2298,15 @@ class Keyword(Token): Keyword.DEFAULT_KEYWORD_CHARS = chars class CaselessLiteral(Literal): - """Token to match a specified string, ignoring case of letters. - Note: the matched results will always be in the case of the given - match string, NOT the case of the input text. + """ + Token to match a specified string, ignoring case of letters. + Note: the matched results will always be in the case of the given + match string, NOT the case of the input text. + + Example:: + OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] + + (Contrast with example for L{CaselessKeyword}.) """ def __init__( self, matchString ): super(CaselessLiteral,self).__init__( matchString.upper() ) @@ -1955,6 +2321,14 @@ class CaselessLiteral(Literal): raise ParseException(instring, loc, self.errmsg, self) class CaselessKeyword(Keyword): + """ + Caseless version of L{Keyword}. + + Example:: + OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] + + (Contrast with example for L{CaselessLiteral}.) + """ def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) @@ -1965,16 +2339,20 @@ class CaselessKeyword(Keyword): raise ParseException(instring, loc, self.errmsg, self) class Word(Token): - """Token for matching words composed of allowed character sets. - Defined with string containing all allowed initial characters, - an optional string containing allowed body characters (if omitted, - defaults to the initial character set), and an optional minimum, - maximum, and/or exact length. The default value for C{min} is 1 (a - minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. An optional - C{excludeChars} parameter can list characters that might be found in - the input C{bodyChars} string; useful to define a word of all printables - except for one or two characters, for instance. + """ + Token for matching words composed of allowed character sets. + Defined with string containing all allowed initial characters, + an optional string containing allowed body characters (if omitted, + defaults to the initial character set), and an optional minimum, + maximum, and/or exact length. The default value for C{min} is 1 (a + minimum value < 1 is not valid); the default values for C{max} and C{exact} + are 0, meaning no maximum or exact length restriction. An optional + C{excludeChars} parameter can list characters that might be found in + the input C{bodyChars} string; useful to define a word of all printables + except for one or two characters, for instance. + + Example:: + TODO """ def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): super(Word,self).__init__() @@ -2089,8 +2467,12 @@ class Word(Token): class Regex(Token): - """Token for matching strings that match a given regular expression. - Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. + """ + Token for matching strings that match a given regular expression. + Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. + + Example:: + TODO """ compiledREtype = type(re.compile("[A-Z]")) def __init__( self, pattern, flags=0): @@ -2153,18 +2535,22 @@ class Regex(Token): class QuotedString(Token): - """Token for matching strings that are delimited by quoting characters. + r""" + Token for matching strings that are delimited by quoting characters. + + Defined with the following parameters: + - quoteChar - string of one or more characters defining the quote delimiting string + - escChar - character to escape quotes, typically backslash (default=None) + - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) + - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) + - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) + - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) + - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) + + Example:: + TODO """ def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): - r"""Defined with the following parameters: - - quoteChar - string of one or more characters defining the quote delimiting string - - escChar - character to escape quotes, typically backslash (default=None) - - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) - - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) - - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) - - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) - - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) - """ super(QuotedString,self).__init__() # remove white space from quote chars - wont work anyway @@ -2277,11 +2663,15 @@ class QuotedString(Token): class CharsNotIn(Token): - """Token for matching words composed of characters *not* in a given set. - Defined with string containing all disallowed characters, and an optional - minimum, maximum, and/or exact length. The default value for C{min} is 1 (a - minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. + """ + Token for matching words composed of characters *not* in a given set. + Defined with string containing all disallowed characters, and an optional + minimum, maximum, and/or exact length. The default value for C{min} is 1 (a + minimum value < 1 is not valid); the default values for C{max} and C{exact} + are 0, meaning no maximum or exact length restriction. + + Example:: + TODO """ def __init__( self, notChars, min=1, max=0, exact=0 ): super(CharsNotIn,self).__init__() @@ -2339,11 +2729,13 @@ class CharsNotIn(Token): return self.strRepr class White(Token): - """Special matching class for matching whitespace. Normally, whitespace is ignored - by pyparsing grammars. This class is included when some whitespace structures - are significant. Define with a string containing the whitespace characters to be - matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, - as defined for the C{L{Word}} class.""" + """ + Special matching class for matching whitespace. Normally, whitespace is ignored + by pyparsing grammars. This class is included when some whitespace structures + are significant. Define with a string containing the whitespace characters to be + matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, + as defined for the C{L{Word}} class. + """ whiteStrs = { " " : "", "\t": "", @@ -2395,7 +2787,9 @@ class _PositionToken(Token): self.mayIndexError = False class GoToColumn(_PositionToken): - """Token to advance to a specific column of input text; useful for tabular report scraping.""" + """ + Token to advance to a specific column of input text; useful for tabular report scraping. + """ def __init__( self, colno ): super(GoToColumn,self).__init__() self.col = colno @@ -2418,7 +2812,9 @@ class GoToColumn(_PositionToken): return newloc, ret class LineStart(_PositionToken): - """Matches if current position is at the beginning of a line within the parse string""" + """ + Matches if current position is at the beginning of a line within the parse string + """ def __init__( self ): super(LineStart,self).__init__() self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) @@ -2438,7 +2834,12 @@ class LineStart(_PositionToken): return loc, [] class LineEnd(_PositionToken): - """Matches if current position is at the end of a line within the parse string""" + """ + Matches if current position is at the end of a line within the parse string + + Example:: + TODO + """ def __init__( self ): super(LineEnd,self).__init__() self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) @@ -2456,7 +2857,9 @@ class LineEnd(_PositionToken): raise ParseException(instring, loc, self.errmsg, self) class StringStart(_PositionToken): - """Matches if current position is at the beginning of the parse string""" + """ + Matches if current position is at the beginning of the parse string + """ def __init__( self ): super(StringStart,self).__init__() self.errmsg = "Expected start of text" @@ -2469,7 +2872,9 @@ class StringStart(_PositionToken): return loc, [] class StringEnd(_PositionToken): - """Matches if current position is at the end of the parse string""" + """ + Matches if current position is at the end of the parse string + """ def __init__( self ): super(StringEnd,self).__init__() self.errmsg = "Expected end of text" @@ -2485,11 +2890,12 @@ class StringEnd(_PositionToken): raise ParseException(instring, loc, self.errmsg, self) class WordStart(_PositionToken): - """Matches if the current position is at the beginning of a Word, and - is not preceded by any character in a given set of C{wordChars} - (default=C{printables}). To emulate the C{\b} behavior of regular expressions, - use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of - the string being parsed, or at the beginning of a line. + """ + Matches if the current position is at the beginning of a Word, and + is not preceded by any character in a given set of C{wordChars} + (default=C{printables}). To emulate the C{\b} behavior of regular expressions, + use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of + the string being parsed, or at the beginning of a line. """ def __init__(self, wordChars = printables): super(WordStart,self).__init__() @@ -2504,11 +2910,12 @@ class WordStart(_PositionToken): return loc, [] class WordEnd(_PositionToken): - """Matches if the current position is at the end of a Word, and - is not followed by any character in a given set of C{wordChars} - (default=C{printables}). To emulate the C{\b} behavior of regular expressions, - use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of - the string being parsed, or at the end of a line. + """ + Matches if the current position is at the end of a Word, and + is not followed by any character in a given set of C{wordChars} + (default=C{printables}). To emulate the C{\b} behavior of regular expressions, + use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of + the string being parsed, or at the end of a line. """ def __init__(self, wordChars = printables): super(WordEnd,self).__init__() @@ -2526,7 +2933,9 @@ class WordEnd(_PositionToken): class ParseExpression(ParserElement): - """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" + """ + Abstract subclass of ParserElement, for combining and post-processing parsed tokens. + """ def __init__( self, exprs, savelist = False ): super(ParseExpression,self).__init__(savelist) if isinstance( exprs, _generatorType ): @@ -2635,10 +3044,14 @@ class ParseExpression(ParserElement): return ret class And(ParseExpression): - """Requires all given C{ParseExpression}s to be found in the given order. - Expressions may be separated by whitespace. - May be constructed using the C{'+'} operator. - May also be constructed using the C{'-'} operator, which will suppress backtracking. + """ + Requires all given C{ParseExpression}s to be found in the given order. + Expressions may be separated by whitespace. + May be constructed using the C{'+'} operator. + May also be constructed using the C{'-'} operator, which will suppress backtracking. + + Example:: + TODO """ class _ErrorStop(Empty): @@ -2702,9 +3115,13 @@ class And(ParseExpression): class Or(ParseExpression): - """Requires that at least one C{ParseExpression} is found. - If two expressions match, the expression that matches the longest string will be used. - May be constructed using the C{'^'} operator. + """ + Requires that at least one C{ParseExpression} is found. + If two expressions match, the expression that matches the longest string will be used. + May be constructed using the C{'^'} operator. + + Example:: + TODO """ def __init__( self, exprs, savelist = False ): super(Or,self).__init__(exprs, savelist) @@ -2772,9 +3189,13 @@ class Or(ParseExpression): class MatchFirst(ParseExpression): - """Requires that at least one C{ParseExpression} is found. - If two expressions match, the first one listed is the one that will match. - May be constructed using the C{'|'} operator. + """ + Requires that at least one C{ParseExpression} is found. + If two expressions match, the first one listed is the one that will match. + May be constructed using the C{'|'} operator. + + Example:: + TODO """ def __init__( self, exprs, savelist = False ): super(MatchFirst,self).__init__(exprs, savelist) @@ -2828,9 +3249,13 @@ class MatchFirst(ParseExpression): class Each(ParseExpression): - """Requires all given C{ParseExpression}s to be found, but in any order. - Expressions may be separated by whitespace. - May be constructed using the C{'&'} operator. + """ + Requires all given C{ParseExpression}s to be found, but in any order. + Expressions may be separated by whitespace. + May be constructed using the C{'&'} operator. + + Example:: + TODO """ def __init__( self, exprs, savelist = True ): super(Each,self).__init__(exprs, savelist) @@ -2913,7 +3338,9 @@ class Each(ParseExpression): class ParseElementEnhance(ParserElement): - """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.""" + """ + Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. + """ def __init__( self, expr, savelist=False ): super(ParseElementEnhance,self).__init__(savelist) if isinstance( expr, basestring ): @@ -2985,10 +3412,15 @@ class ParseElementEnhance(ParserElement): class FollowedBy(ParseElementEnhance): - """Lookahead matching of the given parse expression. C{FollowedBy} + """ + Lookahead matching of the given parse expression. C{FollowedBy} does *not* advance the parsing position within the input string, it only verifies that the specified parse expression matches at the current - position. C{FollowedBy} always returns a null token list.""" + position. C{FollowedBy} always returns a null token list. + + Example:: + TODO + """ def __init__( self, expr ): super(FollowedBy,self).__init__(expr) self.mayReturnEmpty = True @@ -2999,11 +3431,16 @@ class FollowedBy(ParseElementEnhance): class NotAny(ParseElementEnhance): - """Lookahead to disallow matching with the given parse expression. C{NotAny} + """ + Lookahead to disallow matching with the given parse expression. C{NotAny} does *not* advance the parsing position within the input string, it only verifies that the specified parse expression does *not* match at the current position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} - always returns a null token list. May be constructed using the '~' operator.""" + always returns a null token list. May be constructed using the '~' operator. + + Example:: + TODO + """ def __init__( self, expr ): super(NotAny,self).__init__(expr) #~ self.leaveWhitespace() @@ -3027,13 +3464,17 @@ class NotAny(ParseElementEnhance): class OneOrMore(ParseElementEnhance): - """Repetition of one or more of the given expression. + """ + Repetition of one or more of the given expression. - Parameters: - - expr - expression that must match one or more times - - stopOn - (default=None) - expression for a terminating sentinel + Parameters: + - expr - expression that must match one or more times + - stopOn - (default=None) - expression for a terminating sentinel (only required if the sentinel would ordinarily match the repetition expression) + + Example:: + TODO """ def __init__( self, expr, stopOn=None): super(OneOrMore, self).__init__(expr) @@ -3086,13 +3527,17 @@ class OneOrMore(ParseElementEnhance): return ret class ZeroOrMore(OneOrMore): - """Optional repetition of zero or more of the given expression. + """ + Optional repetition of zero or more of the given expression. - Parameters: - - expr - expression that must match zero or more times - - stopOn - (default=None) - expression for a terminating sentinel + Parameters: + - expr - expression that must match zero or more times + - stopOn - (default=None) - expression for a terminating sentinel (only required if the sentinel would ordinarily match the repetition expression) + + Example:: + TODO """ def __init__( self, expr, stopOn=None): super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) @@ -3122,12 +3567,16 @@ class _NullToken(object): _optionalNotMatched = _NullToken() class Optional(ParseElementEnhance): - """Optional matching of the given expression. + """ + Optional matching of the given expression. - Parameters: - - expr - expression that must match zero or more times - - default (optional) - value to be returned if the optional expression + Parameters: + - expr - expression that must match zero or more times + - default (optional) - value to be returned if the optional expression is not found. + + Example:: + TODO """ def __init__( self, expr, default=_optionalNotMatched ): super(Optional,self).__init__( expr, savelist=False ) @@ -3158,17 +3607,21 @@ class Optional(ParseElementEnhance): return self.strRepr class SkipTo(ParseElementEnhance): - """Token for skipping over all undefined text until the matched expression is found. + """ + Token for skipping over all undefined text until the matched expression is found. - Parameters: - - expr - target expression marking the end of the data to be skipped - - include - (default=False) if True, the target expression is also parsed + Parameters: + - expr - target expression marking the end of the data to be skipped + - include - (default=False) if True, the target expression is also parsed (the skipped text and target expression are returned as a 2-element list). - - ignore - (default=None) used to define grammars (typically quoted strings and + - ignore - (default=None) used to define grammars (typically quoted strings and comments) that might contain false matches to the target expression - - failOn - (default=None) define expressions that are not allowed to be + - failOn - (default=None) define expressions that are not allowed to be included in the skipped test; if found before the target expression is found, the SkipTo is not a match + + Example:: + TODO """ def __init__( self, other, include=False, ignore=None, failOn=None ): super( SkipTo, self ).__init__( other ) @@ -3231,19 +3684,23 @@ class SkipTo(ParseElementEnhance): return loc, skipresult class Forward(ParseElementEnhance): - """Forward declaration of an expression to be defined later - - used for recursive grammars, such as algebraic infix notation. - When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. - - Note: take care when assigning to C{Forward} not to overlook precedence of operators. - Specifically, '|' has a lower precedence than '<<', so that:: - fwdExpr << a | b | c - will actually be evaluated as:: - (fwdExpr << a) | b | c - thereby leaving b and c out as parseable alternatives. It is recommended that you - explicitly group the values inserted into the C{Forward}:: - fwdExpr << (a | b | c) - Converting to use the '<<=' operator instead will avoid this problem. + """ + Forward declaration of an expression to be defined later - + used for recursive grammars, such as algebraic infix notation. + When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. + + Note: take care when assigning to C{Forward} not to overlook precedence of operators. + Specifically, '|' has a lower precedence than '<<', so that:: + fwdExpr << a | b | c + will actually be evaluated as:: + (fwdExpr << a) | b | c + thereby leaving b and c out as parseable alternatives. It is recommended that you + explicitly group the values inserted into the C{Forward}:: + fwdExpr << (a | b | c) + Converting to use the '<<=' operator instead will avoid this problem. + + Example:: + TODO """ def __init__( self, other=None ): super(Forward,self).__init__( other, savelist=False ) @@ -3312,15 +3769,29 @@ class _ForwardNoRecurse(Forward): return "..." class TokenConverter(ParseElementEnhance): - """Abstract subclass of C{ParseExpression}, for converting parsed results.""" + """ + Abstract subclass of C{ParseExpression}, for converting parsed results. + """ def __init__( self, expr, savelist=False ): super(TokenConverter,self).__init__( expr )#, savelist ) self.saveAsList = False class Combine(TokenConverter): - """Converter to concatenate all matching tokens to a single string. - By default, the matching patterns must also be contiguous in the input string; - this can be disabled by specifying C{'adjacent=False'} in the constructor. + """ + Converter to concatenate all matching tokens to a single string. + By default, the matching patterns must also be contiguous in the input string; + this can be disabled by specifying C{'adjacent=False'} in the constructor. + + Example:: + real = Word(nums) + '.' + Word(nums) + print(real.parseString('3.1416')) # -> ['3', '.', '1416'] + # will also erroneously match the following + print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] + + real = Combine(Word(nums) + '.' + Word(nums)) + print(real.parseString('3.1416')) # -> ['3.1416'] + # no match when there are internal spaces + print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) """ def __init__( self, expr, joinString="", adjacent=True ): super(Combine,self).__init__( expr ) @@ -3350,7 +3821,19 @@ class Combine(TokenConverter): return retToks class Group(TokenConverter): - """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.""" + """ + Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. + + Example:: + ident = Word(alphas) + num = Word(nums) + term = ident | num + func = ident + Optional(delimitedList(term)) + print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] + + func = ident + Group(Optional(delimitedList(term))) + print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] + """ def __init__( self, expr ): super(Group,self).__init__( expr ) self.saveAsList = True @@ -3359,9 +3842,13 @@ class Group(TokenConverter): return [ tokenlist ] class Dict(TokenConverter): - """Converter to return a repetitive expression as a list, but also as a dictionary. - Each element can also be referenced using the first token in the expression as its key. - Useful for tabular report scraping when the first column can be used as a item key. + """ + Converter to return a repetitive expression as a list, but also as a dictionary. + Each element can also be referenced using the first token in the expression as its key. + Useful for tabular report scraping when the first column can be used as a item key. + + Example:: + TODO """ def __init__( self, expr ): super(Dict,self).__init__( expr ) @@ -3393,7 +3880,12 @@ class Dict(TokenConverter): class Suppress(TokenConverter): - """Converter for ignoring the results of a parsed expression.""" + """ + Converter for ignoring the results of a parsed expression. + + Example:: + TODO + """ def postParse( self, instring, loc, tokenlist ): return [] @@ -3402,7 +3894,9 @@ class Suppress(TokenConverter): class OnlyOnce(object): - """Wrapper for parse actions, to ensure they are only called once.""" + """ + Wrapper for parse actions, to ensure they are only called once. + """ def __init__(self, methodCall): self.callable = _trim_arity(methodCall) self.called = False @@ -3416,7 +3910,12 @@ class OnlyOnce(object): self.called = False def traceParseAction(f): - """Decorator for debugging parse actions.""" + """ + Decorator for debugging parse actions. + + Example:: + TODO + """ f = _trim_arity(f) def z(*paArgs): thisFunc = f.__name__ @@ -3441,12 +3940,17 @@ def traceParseAction(f): # global helpers # def delimitedList( expr, delim=",", combine=False ): - """Helper to define a delimited list of expressions - the delimiter defaults to ','. - By default, the list elements and delimiters can have intervening whitespace, and - comments, but this can be overridden by passing C{combine=True} in the constructor. - If C{combine} is set to C{True}, the matching tokens are returned as a single token - string, with the delimiters included; otherwise, the matching tokens are returned - as a list of tokens, with the delimiters suppressed. + """ + Helper to define a delimited list of expressions - the delimiter defaults to ','. + By default, the list elements and delimiters can have intervening whitespace, and + comments, but this can be overridden by passing C{combine=True} in the constructor. + If C{combine} is set to C{True}, the matching tokens are returned as a single token + string, with the delimiters included; otherwise, the matching tokens are returned + as a list of tokens, with the delimiters suppressed. + + Example:: + delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] + delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] """ dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." if combine: @@ -3455,11 +3959,15 @@ def delimitedList( expr, delim=",", combine=False ): return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) def countedArray( expr, intExpr=None ): - """Helper to define a counted list of expressions. - This helper defines a pattern of the form:: - integer expr expr expr... - where the leading integer tells how many expr expressions follow. - The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. + """ + Helper to define a counted list of expressions. + This helper defines a pattern of the form:: + integer expr expr expr... + where the leading integer tells how many expr expressions follow. + The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. + + Example:: + countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] """ arrayExpr = Forward() def countFieldParseAction(s,l,t): @@ -3484,16 +3992,17 @@ def _flatten(L): return ret def matchPreviousLiteral(expr): - """Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks - for a 'repeat' of a previous expression. For example:: - first = Word(nums) - second = matchPreviousLiteral(first) - matchExpr = first + ":" + second - will match C{"1:1"}, but not C{"1:2"}. Because this matches a - previous literal, will also match the leading C{"1:1"} in C{"1:10"}. - If this is not desired, use C{matchPreviousExpr}. - Do *not* use with packrat parsing enabled. + """ + Helper to define an expression that is indirectly defined from + the tokens matched in a previous expression, that is, it looks + for a 'repeat' of a previous expression. For example:: + first = Word(nums) + second = matchPreviousLiteral(first) + matchExpr = first + ":" + second + will match C{"1:1"}, but not C{"1:2"}. Because this matches a + previous literal, will also match the leading C{"1:1"} in C{"1:10"}. + If this is not desired, use C{matchPreviousExpr}. + Do *not* use with packrat parsing enabled. """ rep = Forward() def copyTokenToRepeater(s,l,t): @@ -3511,17 +4020,18 @@ def matchPreviousLiteral(expr): return rep def matchPreviousExpr(expr): - """Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks - for a 'repeat' of a previous expression. For example:: - first = Word(nums) - second = matchPreviousExpr(first) - matchExpr = first + ":" + second - will match C{"1:1"}, but not C{"1:2"}. Because this matches by - expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; - the expressions are evaluated first, and then compared, so - C{"1"} is compared with C{"10"}. - Do *not* use with packrat parsing enabled. + """ + Helper to define an expression that is indirectly defined from + the tokens matched in a previous expression, that is, it looks + for a 'repeat' of a previous expression. For example:: + first = Word(nums) + second = matchPreviousExpr(first) + matchExpr = first + ":" + second + will match C{"1:1"}, but not C{"1:2"}. Because this matches by + expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; + the expressions are evaluated first, and then compared, so + C{"1"} is compared with C{"10"}. + Do *not* use with packrat parsing enabled. """ rep = Forward() e2 = expr.copy() @@ -3546,16 +4056,27 @@ def _escapeRegexRangeChars(s): return _ustr(s) def oneOf( strs, caseless=False, useRegex=True ): - """Helper to quickly define a set of alternative Literals, and makes sure to do - longest-first testing when there is a conflict, regardless of the input order, - but returns a C{L{MatchFirst}} for best performance. - - Parameters: - - strs - a string of space-delimited literals, or a list of string literals - - caseless - (default=False) - treat all literals as caseless - - useRegex - (default=True) - as an optimization, will generate a Regex + """ + Helper to quickly define a set of alternative Literals, and makes sure to do + longest-first testing when there is a conflict, regardless of the input order, + but returns a C{L{MatchFirst}} for best performance. + + Parameters: + - strs - a string of space-delimited literals, or a list of string literals + - caseless - (default=False) - treat all literals as caseless + - useRegex - (default=True) - as an optimization, will generate a Regex object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or if creating a C{Regex} raises an exception) + + Example:: + comp_oper = oneOf("< = > <= >= !=") + var = Word(alphas) + number = Word(nums) + term = var | number + comparison_expr = term + comp_oper + term + print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) + prints:: + [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] """ if caseless: isequal = ( lambda a,b: a.upper() == b.upper() ) @@ -3610,27 +4131,36 @@ def oneOf( strs, caseless=False, useRegex=True ): return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) def dictOf( key, value ): - """Helper to easily and clearly define a dictionary by specifying the respective patterns - for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens - in the proper order. The key pattern can include delimiting markers or punctuation, - as long as they are suppressed, thereby leaving the significant key text. The value - pattern can include named results, so that the C{Dict} results can include named token - fields. + """ + Helper to easily and clearly define a dictionary by specifying the respective patterns + for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens + in the proper order. The key pattern can include delimiting markers or punctuation, + as long as they are suppressed, thereby leaving the significant key text. The value + pattern can include named results, so that the C{Dict} results can include named token + fields. + + Example:: + TODO """ return Dict( ZeroOrMore( Group ( key + value ) ) ) def originalTextFor(expr, asString=True): - """Helper to return the original, untokenized text for a given expression. Useful to - restore the parsed fields of an HTML start tag into the raw tag text itself, or to - revert separate tokens with intervening whitespace back to the original matching - input text. By default, returns astring containing the original parsed text. + """ + Helper to return the original, untokenized text for a given expression. Useful to + restore the parsed fields of an HTML start tag into the raw tag text itself, or to + revert separate tokens with intervening whitespace back to the original matching + input text. By default, returns astring containing the original parsed text. - If the optional C{asString} argument is passed as C{False}, then the return value is a - C{L{ParseResults}} containing any results names that were originally matched, and a - single token containing the original matched text from the input string. So if - the expression passed to C{L{originalTextFor}} contains expressions with defined - results names, you must set C{asString} to C{False} if you want to preserve those - results name values.""" + If the optional C{asString} argument is passed as C{False}, then the return value is a + C{L{ParseResults}} containing any results names that were originally matched, and a + single token containing the original matched text from the input string. So if + the expression passed to C{L{originalTextFor}} contains expressions with defined + results names, you must set C{asString} to C{False} if you want to preserve those + results name values. + + Example:: + TODO + """ locMarker = Empty().setParseAction(lambda s,loc,t: loc) endlocMarker = locMarker.copy() endlocMarker.callPreparse = False @@ -3645,19 +4175,31 @@ def originalTextFor(expr, asString=True): return matchExpr def ungroup(expr): - """Helper to undo pyparsing's default grouping of And expressions, even - if all but one are non-empty.""" + """ + Helper to undo pyparsing's default grouping of And expressions, even + if all but one are non-empty. + """ return TokenConverter(expr).setParseAction(lambda t:t[0]) def locatedExpr(expr): - """Helper to decorate a returned token with its starting and ending locations in the input string. - This helper adds the following results names: - - locn_start = location where matched expression begins - - locn_end = location where matched expression ends - - value = the actual parsed results - - Be careful if the input text contains C{} characters, you may want to call - C{L{ParserElement.parseWithTabs}} + """ + Helper to decorate a returned token with its starting and ending locations in the input string. + This helper adds the following results names: + - locn_start = location where matched expression begins + - locn_end = location where matched expression ends + - value = the actual parsed results + + Be careful if the input text contains C{} characters, you may want to call + C{L{ParserElement.parseWithTabs}} + + Example:: + wd = Word(alphas) + for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): + print(match) + prints:: + [[0, 'ljsdf', 5]] + [[8, 'lksdjjf', 15]] + [[18, 'lkkjj', 23]] """ locator = Empty().setParseAction(lambda s,l,t: l) return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) @@ -3678,21 +4220,25 @@ _charRange = Group(_singleChar + Suppress("-") + _singleChar) _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" def srange(s): - r"""Helper to easily define string ranges for use in Word construction. Borrows - syntax from regexp '[]' string range definitions:: - srange("[0-9]") -> "0123456789" - srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" - srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" - The input string must be enclosed in []'s, and the returned string is the expanded - character set joined into a single string. - The values enclosed in the []'s may be: - - a single character - - an escaped character with a leading backslash (such as C{\-} or C{\]}) - - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) - (C{\0x##} is also supported for backwards compatibility) - - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) - - a range of any of the above, separated by a dash (C{'a-z'}, etc.) - - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) + r""" + Helper to easily define string ranges for use in Word construction. Borrows + syntax from regexp '[]' string range definitions:: + srange("[0-9]") -> "0123456789" + srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" + srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" + The input string must be enclosed in []'s, and the returned string is the expanded + character set joined into a single string. + The values enclosed in the []'s may be: + - a single character + - an escaped character with a leading backslash (such as C{\-} or C{\]}) + - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) + (C{\0x##} is also supported for backwards compatibility) + - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) + - a range of any of the above, separated by a dash (C{'a-z'}, etc.) + - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) + + Example:: + TODO """ _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) try: @@ -3701,8 +4247,12 @@ def srange(s): return "" def matchOnlyAtCol(n): - """Helper method for defining parse actions that require matching at a specific - column in the input text. + """ + Helper method for defining parse actions that require matching at a specific + column in the input text. + + Example:: + TODO """ def verifyCol(strg,locn,toks): if col(locn,strg) != n: @@ -3710,23 +4260,42 @@ def matchOnlyAtCol(n): return verifyCol def replaceWith(replStr): - """Helper method for common parse actions that simply return a literal value. Especially - useful when used with C{L{transformString}()}. + """ + Helper method for common parse actions that simply return a literal value. Especially + useful when used with C{L{transformString}()}. + + Example:: + num = Word(nums).setParseAction(lambda toks: int(toks[0])) + na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) + term = na | num + + OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] """ return lambda s,l,t: [replStr] def removeQuotes(s,l,t): - """Helper parse action for removing quotation marks from parsed quoted strings. - To use, add this parse action to quoted string using:: - quotedString.setParseAction( removeQuotes ) + """ + Helper parse action for removing quotation marks from parsed quoted strings. + + Example:: + # by default, quotation marks are included in parsed results + quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] + + # use removeQuotes to strip quotation marks from parsed results + quotedString.setParseAction(removeQuotes) + quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] """ return t[0][1:-1] def tokenMap(func, *args): - """Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional - args are passed, they are forwarded to the given function as additional arguments after - the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the - parsed data to an integer using base 16. + """ + Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional + args are passed, they are forwarded to the given function as additional arguments after + the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the + parsed data to an integer using base 16. + + Example:: + TODO """ def pa(s,l,t): return [func(tokn, *args) for tokn in t] @@ -3776,33 +4345,47 @@ def _makeTags(tagStr, xml): return openTag, closeTag def makeHTMLTags(tagStr): - """Helper to construct opening and closing tag expressions for HTML, given a tag name""" + """ + Helper to construct opening and closing tag expressions for HTML, given a tag name + + Example:: + TODO + """ return _makeTags( tagStr, False ) def makeXMLTags(tagStr): - """Helper to construct opening and closing tag expressions for XML, given a tag name""" + """ + Helper to construct opening and closing tag expressions for XML, given a tag name + + Example:: + TODO + """ return _makeTags( tagStr, True ) def withAttribute(*args,**attrDict): - """Helper to create a validating parse action to be used with start tags created - with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag - with a required attribute value, to avoid false matches on common tags such as - C{} or C{
}. - - Call C{withAttribute} with a series of attribute names and values. Specify the list - of filter attributes names and values as: - - keyword arguments, as in C{(align="right")}, or - - as an explicit dict with C{**} operator, when an attribute name is also a Python + """ + Helper to create a validating parse action to be used with start tags created + with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag + with a required attribute value, to avoid false matches on common tags such as + C{} or C{
}. + + Call C{withAttribute} with a series of attribute names and values. Specify the list + of filter attributes names and values as: + - keyword arguments, as in C{(align="right")}, or + - as an explicit dict with C{**} operator, when an attribute name is also a Python reserved word, as in C{**{"class":"Customer", "align":"right"}} - - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) - For attribute names with a namespace prefix, you must use the second form. Attribute - names are matched insensitive to upper/lower case. + - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) + For attribute names with a namespace prefix, you must use the second form. Attribute + names are matched insensitive to upper/lower case. - If just testing for C{class} (with or without a namespace), use C{L{withClass}}. + If just testing for C{class} (with or without a namespace), use C{L{withClass}}. - To verify that the attribute exists, but without specifying a value, pass - C{withAttribute.ANY_VALUE} as the value. - """ + To verify that the attribute exists, but without specifying a value, pass + C{withAttribute.ANY_VALUE} as the value. + + Example:: + TODO + """ if args: attrs = args[:] else: @@ -3819,9 +4402,13 @@ def withAttribute(*args,**attrDict): withAttribute.ANY_VALUE = object() def withClass(classname, namespace=''): - """Simplified version of C{L{withAttribute}} when matching on a div class - made - difficult because C{class} is a reserved word in Python. - """ + """ + Simplified version of C{L{withAttribute}} when matching on a div class - made + difficult because C{class} is a reserved word in Python. + + Example:: + TODO + """ classattr = "%s:class" % namespace if namespace else "class" return withAttribute(**{classattr : classname}) @@ -3830,30 +4417,34 @@ opAssoc.LEFT = object() opAssoc.RIGHT = object() def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): - """Helper method for constructing grammars of expressions made up of - operators working in a precedence hierarchy. Operators may be unary or - binary, left- or right-associative. Parse actions can also be attached - to operator expressions. - - Parameters: - - baseExpr - expression representing the most basic element for the nested - - opList - list of tuples, one for each operator precedence level in the - expression grammar; each tuple is of the form - (opExpr, numTerms, rightLeftAssoc, parseAction), where: - - opExpr is the pyparsing expression for the operator; - may also be a string, which will be converted to a Literal; - if numTerms is 3, opExpr is a tuple of two expressions, for the - two operators separating the 3 terms - - numTerms is the number of terms for this operator (must - be 1, 2, or 3) - - rightLeftAssoc is the indicator whether the operator is - right or left associative, using the pyparsing-defined - constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. - - parseAction is the parse action to be associated with - expressions matching this operator expression (the - parse action tuple member may be omitted) - - lpar - expression for matching left-parentheses (default=Suppress('(')) - - rpar - expression for matching right-parentheses (default=Suppress(')')) + """ + Helper method for constructing grammars of expressions made up of + operators working in a precedence hierarchy. Operators may be unary or + binary, left- or right-associative. Parse actions can also be attached + to operator expressions. + + Parameters: + - baseExpr - expression representing the most basic element for the nested + - opList - list of tuples, one for each operator precedence level in the + expression grammar; each tuple is of the form + (opExpr, numTerms, rightLeftAssoc, parseAction), where: + - opExpr is the pyparsing expression for the operator; + may also be a string, which will be converted to a Literal; + if numTerms is 3, opExpr is a tuple of two expressions, for the + two operators separating the 3 terms + - numTerms is the number of terms for this operator (must + be 1, 2, or 3) + - rightLeftAssoc is the indicator whether the operator is + right or left associative, using the pyparsing-defined + constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. + - parseAction is the parse action to be associated with + expressions matching this operator expression (the + parse action tuple member may be omitted) + - lpar - expression for matching left-parentheses (default=Suppress('(')) + - rpar - expression for matching right-parentheses (default=Suppress(')')) + + Example:: + TODO """ ret = Forward() lastExpr = baseExpr | ( lpar + ret + rpar ) @@ -3913,25 +4504,29 @@ quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+) unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): - """Helper method for defining nested lists enclosed in opening and closing - delimiters ("(" and ")" are the default). - - Parameters: - - opener - opening character for a nested list (default="("); can also be a pyparsing expression - - closer - closing character for a nested list (default=")"); can also be a pyparsing expression - - content - expression for items within the nested lists (default=None) - - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) - - If an expression is not provided for the content argument, the nested - expression will capture all whitespace-delimited content between delimiters - as a list of separate values. - - Use the C{ignoreExpr} argument to define expressions that may contain - opening or closing characters that should not be treated as opening - or closing characters for nesting, such as quotedString or a comment - expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. - The default is L{quotedString}, but if no expressions are to be ignored, - then pass C{None} for this argument. + """ + Helper method for defining nested lists enclosed in opening and closing + delimiters ("(" and ")" are the default). + + Parameters: + - opener - opening character for a nested list (default="("); can also be a pyparsing expression + - closer - closing character for a nested list (default=")"); can also be a pyparsing expression + - content - expression for items within the nested lists (default=None) + - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) + + If an expression is not provided for the content argument, the nested + expression will capture all whitespace-delimited content between delimiters + as a list of separate values. + + Use the C{ignoreExpr} argument to define expressions that may contain + opening or closing characters that should not be treated as opening + or closing characters for nesting, such as quotedString or a comment + expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. + The default is L{quotedString}, but if no expressions are to be ignored, + then pass C{None} for this argument. + + Example:: + TODO """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") @@ -3966,20 +4561,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop return ret def indentedBlock(blockStatementExpr, indentStack, indent=True): - """Helper method for defining space-delimited indentation blocks, such as - those used to define block statements in Python source code. + """ + Helper method for defining space-delimited indentation blocks, such as + those used to define block statements in Python source code. - Parameters: - - blockStatementExpr - expression defining syntax of statement that + Parameters: + - blockStatementExpr - expression defining syntax of statement that is repeated within the indented block - - indentStack - list created by caller to manage indentation stack + - indentStack - list created by caller to manage indentation stack (multiple statementWithIndentedBlock expressions within a single grammar should share a common indentStack) - - indent - boolean indicating whether block must be indented beyond the + - indent - boolean indicating whether block must be indented beyond the the current level; set to False for block of left-most statements (default=True) - A valid block must contain at least one C{blockStatement}. + A valid block must contain at least one C{blockStatement}. + + Example:: + TODO """ def checkPeerIndent(s,l,t): if l >= len(s): return @@ -4068,6 +4667,9 @@ class pyparsing_common: - C{L{convertToDate}} - C{L{convertToDatetime}} - C{L{stripHTMLTags}} + + Example:: + TODO """ convertToInteger = tokenMap(int) @@ -4134,6 +4736,9 @@ class pyparsing_common: Params - - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) + + Example:: + TODO """ return lambda s,l,t: datetime.strptime(t[0], fmt).date() @@ -4144,6 +4749,9 @@ class pyparsing_common: Params - - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) + + Example:: + TODO """ return lambda s,l,t: datetime.strptime(t[0], fmt) @@ -4159,7 +4767,12 @@ class pyparsing_common: _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() @staticmethod def stripHTMLTags(s, l, tokens): - """Parse action to remove HTML tags from web page HTML source""" + """ + Parse action to remove HTML tags from web page HTML source + + Example:: + TODO + """ return pyparsing_common._html_stripper.transformString(tokens[0]) if __name__ == "__main__": -- cgit v1.2.1