summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-05 21:38:41 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-05 21:38:41 +0000
commitbfbff79863151d6444818f74562cce20df6ca4bb (patch)
treeb5d60f08e4b3f9ba06c24322de09930a77c14950
parent71211524f1e8e2b33015ae6d00541f2dec18acf6 (diff)
downloadpyparsing-bfbff79863151d6444818f74562cce20df6ca4bb.tar.gz
Add inline examples to docstrings, for real
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@389 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r--src/pyparsing.py1575
1 files changed, 1094 insertions, 481 deletions
diff --git a/src/pyparsing.py b/src/pyparsing.py
index 088cf26..a9ff9c8 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -48,7 +48,7 @@ The program outputs the following::
The Python representation of the grammar is quite readable, owing to the self-explanatory
class names, and the use of '+', '|' and '^' operators.
-The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
+The parsed results returned from L{I{ParserElement.parseString}<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
object with named attributes.
The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
"""
__version__ = "2.1.6"
-__versionTime__ = "05 Aug 2016 19:32 UTC"
+__versionTime__ = "27 Jul 2016 19:13 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -225,11 +225,23 @@ class ParseBaseException(Exception):
return "lineno col line".split() + dir(type(self))
class ParseException(ParseBaseException):
- """exception thrown when parse expressions don't match class;
- supported attributes by name are:
- - lineno - returns the line number of the exception text
- - col - returns the column number of the exception text
- - line - returns the line containing the exception text
+ """
+ Exception thrown when parse expressions don't match class;
+ supported attributes by name are:
+ - lineno - returns the line number of the exception text
+ - col - returns the column number of the exception text
+ - line - returns the line containing the exception text
+
+ Example::
+ try:
+ Word(nums).setName("integer").parseString("ABC")
+ except ParseException as pe:
+ print(pe)
+ print("column: {}".format(pe.col))
+
+ prints::
+ Expected integer (at char 0), (line:1, col:1)
+ column: 1
"""
pass
@@ -278,11 +290,40 @@ class _ParseResultsWithOffset(object):
self.tup = (self.tup[0],i)
class ParseResults(object):
- """Structured parse results, to provide multiple means of access to the parsed data:
+ """
+ Structured parse results, to provide multiple means of access to the parsed data:
- as a list (C{len(results)})
- by list index (C{results[0], results[1]}, etc.)
- - by attribute (C{results.<resultsName>})
- """
+ - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
+
+ Example::
+ integer = Word(nums)
+ date_str = (integer.setResultsName("year") + '/'
+ + integer.setResultsName("month") + '/'
+ + integer.setResultsName("day"))
+ # equivalent form:
+ # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ result = date_str.parseString("1999/12/31")
+ print(list(result))
+ print(result[0])
+ print(result['month'])
+ print(result.day)
+ print('month' in result)
+ print('minutes' in result)
+ print(result.dump())
+ prints::
+ ['1999', '/', '12', '/', '31']
+ 1999
+ 12
+ 31
+ True
+ False
+ ['1999', '/', '12', '/', '31']
+ - day: 31
+ - month: 12
+ - year: 1999
+ """
def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
if isinstance(toklist, cls):
return toklist
@@ -367,11 +408,6 @@ class ParseResults(object):
removed = list(range(*i.indices(mylen)))
removed.reverse()
# fixup indices in token dictionary
- #~ for name in self.__tokdict:
- #~ occurrences = self.__tokdict[name]
- #~ for j in removed:
- #~ for k, (value, position) in enumerate(occurrences):
- #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
for name,occurrences in self.__tokdict.items():
for j in removed:
for k, (value, position) in enumerate(occurrences):
@@ -437,14 +473,39 @@ class ParseResults(object):
return bool(self.__tokdict)
def pop( self, *args, **kwargs):
- """Removes and returns item at specified index (default=last).
- Supports both list and dict semantics for pop(). If passed no
- argument or an integer argument, it will use list semantics
- and pop tokens from the list of parsed tokens. If passed a
- non-integer argument (most likely a string), it will use dict
- semantics and pop the corresponding value from any defined
- results names. A second default return value argument is
- supported, just as in dict.pop()."""
+ """
+ Removes and returns item at specified index (C{default=last}).
+ Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
+ argument or an integer argument, it will use C{list} semantics
+ and pop tokens from the list of parsed tokens. If passed a
+ non-integer argument (most likely a string), it will use C{dict}
+ semantics and pop the corresponding value from any defined
+ results names. A second default return value argument is
+ supported, just as in C{dict.pop()}.
+
+ Example::
+ def remove_first(tokens):
+ tokens.pop(0)
+ print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+ print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
+
+ label = Word(alphas)
+ patt = label("LABEL") + OneOrMore(Word(nums))
+ print(patt.parseString("AAB 123 321").dump())
+
+ # Use pop() in a parse action to remove named result (note that corresponding value is not
+ # removed from list form of results)
+ def remove_LABEL(tokens):
+ tokens.pop("LABEL")
+ return tokens
+ patt.addParseAction(remove_LABEL)
+ print(patt.parseString("AAB 123 321").dump())
+ prints::
+ ['AAB', '123', '321']
+ - LABEL: AAB
+
+ ['AAB', '123', '321']
+ """
if not args:
args = [-1]
for k,v in kwargs.items():
@@ -464,39 +525,83 @@ class ParseResults(object):
return defaultvalue
def get(self, key, defaultValue=None):
- """Returns named result matching the given key, or if there is no
- such name, then returns the given C{defaultValue} or C{None} if no
- C{defaultValue} is specified."""
+ """
+ Returns named result matching the given key, or if there is no
+ such name, then returns the given C{defaultValue} or C{None} if no
+ C{defaultValue} is specified.
+
+ Similar to C{dict.get()}.
+
+ Example::
+ integer = Word(nums)
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ result = date_str.parseString("1999/12/31")
+ print(result.get("year")) # -> '1999'
+ print(result.get("hour", "not specified")) # -> 'not specified'
+ print(result.get("hour")) # -> None
+ """
if key in self:
return self[key]
else:
return defaultValue
def insert( self, index, insStr ):
- """Inserts new element at location index in the list of parsed tokens."""
+ """
+ Inserts new element at location index in the list of parsed tokens.
+
+ Similar to C{list.insert()}.
+
+ Example::
+ print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+
+ # use a parse action to insert the parse location in the front of the parsed results
+ def insert_locn(locn, tokens):
+ tokens.insert(0, locn)
+ print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
+ """
self.__toklist.insert(index, insStr)
# fixup indices in token dictionary
- #~ for name in self.__tokdict:
- #~ occurrences = self.__tokdict[name]
- #~ for k, (value, position) in enumerate(occurrences):
- #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
for name,occurrences in self.__tokdict.items():
for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def append( self, item ):
- """Add single element to end of ParseResults list of elements."""
+ """
+ Add single element to end of ParseResults list of elements.
+
+ Example::
+ print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+
+ # use a parse action to compute the sum of the parsed integers, and add it to the end
+ def append_sum(tokens):
+ tokens.append(sum(map(int, tokens)))
+ print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
+ """
self.__toklist.append(item)
def extend( self, itemseq ):
- """Add sequence of elements to end of ParseResults list of elements."""
+ """
+ Add sequence of elements to end of ParseResults list of elements.
+
+ Example::
+ patt = OneOrMore(Word(alphas))
+
+ # use a parse action to append the reverse of the matched strings, to make a palindrome
+ def make_palindrome(tokens):
+ tokens.extend(reversed([t[::-1] for t in tokens]))
+ return ''.join(tokens)
+ print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
+ """
if isinstance(itemseq, ParseResults):
self += itemseq
else:
self.__toklist.extend(itemseq)
def clear( self ):
- """Clear all elements and results names."""
+ """
+ Clear all elements and results names.
+ """
del self.__toklist[:]
self.__tokdict.clear()
@@ -561,11 +666,40 @@ class ParseResults(object):
return out
def asList( self ):
- """Returns the parse results as a nested list of matching tokens, all converted to strings."""
+ """
+ Returns the parse results as a nested list of matching tokens, all converted to strings.
+
+ Example::
+ patt = OneOrMore(Word(alphas))
+ result = patt.parseString("sldkj lsdkj sldkj")
+ # even though the result prints in string-like form, it is actually a pyparsing ParseResults
+ print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
+
+ # Use asList() to create an actual list
+ result_list = result.asList()
+ print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
+ """
return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
def asDict( self ):
- """Returns the named parse results as a nested dictionary."""
+ """
+ Returns the named parse results as a nested dictionary.
+
+ Example::
+ integer = Word(nums)
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ result = date_str.parseString('12/31/1999')
+ print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
+
+ result_dict = result.asDict()
+ print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
+
+ # even though a ParseResults supports dict-like access, sometime you just need to have a dict
+ import json
+ print(json.dumps(result_dict)) # -> {"month": "31", "day": "1999", "year": "12"}
+ print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
+ """
if PY_3:
item_fn = self.items
else:
@@ -583,7 +717,9 @@ class ParseResults(object):
return dict((k,toItem(v)) for k,v in item_fn())
def copy( self ):
- """Returns a new copy of a C{ParseResults} object."""
+ """
+ Returns a new copy of a C{ParseResults} object.
+ """
ret = ParseResults( self.__toklist )
ret.__tokdict = self.__tokdict.copy()
ret.__parent = self.__parent
@@ -592,7 +728,9 @@ class ParseResults(object):
return ret
def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
- """(Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
+ """
+ (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
+ """
nl = "\n"
out = []
namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
@@ -658,7 +796,12 @@ class ParseResults(object):
return None
def getName(self):
- """Returns the results name for this token expression."""
+ """
+ Returns the results name for this token expression.
+
+ Example::
+ TODO
+ """
if self.__name:
return self.__name
elif self.__parent:
@@ -675,9 +818,23 @@ class ParseResults(object):
return None
def dump(self,indent='',depth=0):
- """Diagnostic method for listing out the contents of a C{ParseResults}.
- Accepts an optional C{indent} argument so that this string can be embedded
- in a nested display of other data."""
+ """
+ Diagnostic method for listing out the contents of a C{ParseResults}.
+ Accepts an optional C{indent} argument so that this string can be embedded
+ in a nested display of other data.
+
+ Example::
+ integer = Word(nums)
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ result = date_str.parseString('12/31/1999')
+ print(result.dump())
+ prints::
+ ['12', '/', '31', '/', '1999']
+ - day: 1999
+ - month: 31
+ - year: 12
+ """
out = []
NL = '\n'
out.append( indent+_ustr(self.asList()) )
@@ -705,9 +862,26 @@ class ParseResults(object):
return "".join(out)
def pprint(self, *args, **kwargs):
- """Pretty-printer for parsed results as a list, using the C{pprint} module.
- Accepts additional positional or keyword args as defined for the
- C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})"""
+ """
+ Pretty-printer for parsed results as a list, using the C{pprint} module.
+ Accepts additional positional or keyword args as defined for the
+ C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
+
+ Example::
+ ident = Word(alphas, alphanums)
+ num = Word(nums)
+ func = Forward()
+ term = ident | num | Group('(' + func + ')')
+ func <<= ident + Group(Optional(delimitedList(term)))
+ result = func.parseString("fna a,b,(fnb c,d,200),100")
+ result.pprint(width=40)
+ prints::
+ ['fna',
+ ['a',
+ 'b',
+ ['(', 'fnb', ['c', 'd', '200'], ')'],
+ '100']]
+ """
pprint.pprint(self.asList(), *args, **kwargs)
# add support for pickle protocol
@@ -881,7 +1055,16 @@ class ParserElement(object):
@staticmethod
def setDefaultWhitespaceChars( chars ):
- """Overrides the default whitespace chars
+ r"""
+ Overrides the default whitespace chars
+
+ Example::
+ # default whitespace chars are space, <TAB> and newline
+ OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
+
+ # change to just treat newline as significant
+ ParserElement.setDefaultWhitespaceChars(" \t")
+ OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
"""
ParserElement.DEFAULT_WHITE_CHARS = chars
@@ -889,6 +1072,20 @@ class ParserElement(object):
def inlineLiteralsUsing(cls):
"""
Set class to be used for inclusion of string literals into a parser.
+
+ Example::
+ # default literal class used is Literal
+ integer = Word(nums)
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
+
+
+ # change to Suppress
+ ParserElement.inlineLiteralsUsing(Suppress)
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+ date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
"""
ParserElement._literalStringClass = cls
@@ -916,8 +1113,19 @@ class ParserElement(object):
self.callDuringTry = False
def copy( self ):
- """Make a copy of this C{ParserElement}. Useful for defining different parse actions
- for the same parsing pattern, using copies of the original parse element."""
+ """
+ Make a copy of this C{ParserElement}. Useful for defining different parse actions
+ for the same parsing pattern, using copies of the original parse element.
+
+ Example::
+ integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+ integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
+ integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
+
+ print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
+ prints::
+ [5120, 100, 655360, 268435456]
+ """
cpy = copy.copy( self )
cpy.parseAction = self.parseAction[:]
cpy.ignoreExprs = self.ignoreExprs[:]
@@ -926,7 +1134,13 @@ class ParserElement(object):
return cpy
def setName( self, name ):
- """Define name for this expression, for use in debugging."""
+ """
+ Define name for this expression, makes exception messages clearer.
+
+ Example::
+ Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
+ Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
+ """
self.name = name
self.errmsg = "Expected " + self.name
if hasattr(self,"exception"):
@@ -934,15 +1148,24 @@ class ParserElement(object):
return self
def setResultsName( self, name, listAllMatches=False ):
- """Define name for referencing matching tokens as a nested attribute
- of the returned parse results.
- NOTE: this returns a *copy* of the original C{ParserElement} object;
- this is so that the client can define a basic element, such as an
- integer, and reference it in multiple places with different names.
-
- You can also set results names using the abbreviated syntax,
- C{expr("name")} in place of C{expr.setResultsName("name")} -
- see L{I{__call__}<__call__>}.
+ """
+ Define name for referencing matching tokens as a nested attribute
+ of the returned parse results.
+ NOTE: this returns a *copy* of the original C{ParserElement} object;
+ this is so that the client can define a basic element, such as an
+ integer, and reference it in multiple places with different names.
+
+ You can also set results names using the abbreviated syntax,
+ C{expr("name")} in place of C{expr.setResultsName("name")} -
+ see L{I{__call__}<__call__>}.
+
+ Example::
+ date_str = (integer.setResultsName("year") + '/'
+ + integer.setResultsName("month") + '/'
+ + integer.setResultsName("day"))
+
+ # equivalent form:
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
"""
newself = self.copy()
if name.endswith("*"):
@@ -971,31 +1194,49 @@ class ParserElement(object):
return self
def setParseAction( self, *fns, **kwargs ):
- """Define action to perform when successfully matching parse element definition.
- Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
- C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
- - s = the original string being parsed (see note below)
- - loc = the location of the matching substring
- - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
- If the functions in fns modify the tokens, they can return them as the return
- value from fn, and the modified list of tokens will replace the original.
- Otherwise, fn does not need to return any value.
-
- Optional keyword arguments:
- - callDuringTry = (default=False) indicate if parse action should be run during lookaheads and alternate testing
-
- Note: the default parsing behavior is to expand tabs in the input string
- before starting the parsing process. See L{I{parseString}<parseString>} for more information
- on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
- consistent view of the parsed string, the parse location, and line and column
- positions within the parsed string.
- """
+ """
+ Define action to perform when successfully matching parse element definition.
+ Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
+ C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
+ - s = the original string being parsed (see note below)
+ - loc = the location of the matching substring
+ - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
+ If the functions in fns modify the tokens, they can return them as the return
+ value from fn, and the modified list of tokens will replace the original.
+ Otherwise, fn does not need to return any value.
+
+ Optional keyword arguments:
+ - callDuringTry = (default=False) indicate if parse action should be run during lookaheads and alternate testing
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{parseString}<parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+
+ Example::
+ integer = Word(nums)
+ date_str = integer + '/' + integer + '/' + integer
+
+ date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
+
+ # use parse action to convert to ints at parse time
+ integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+ date_str = integer + '/' + integer + '/' + integer
+
+ # note that integer fields are now ints, not strings
+ date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
+ """
self.parseAction = list(map(_trim_arity, list(fns)))
self.callDuringTry = kwargs.get("callDuringTry", False)
return self
def addParseAction( self, *fns, **kwargs ):
- """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
+ """
+ Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
+
+ See examples in L{I{copy}<copy>}.
+ """
self.parseAction += list(map(_trim_arity, list(fns)))
self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
return self
@@ -1008,6 +1249,14 @@ class ParserElement(object):
Optional keyword arguments:
- message = define a custom message to be used in the raised exception
- fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
+
+ Example::
+ integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+ year_int = integer.copy()
+ year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
+ date_str = year_int + '/' + integer + '/' + integer
+
+ result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
"""
msg = kwargs.get("message", "failed user-defined condition")
exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
@@ -1184,7 +1433,7 @@ class ParserElement(object):
cache.popitem(False)
def clear(self):
- cache.clear()
+ cache.clear()
self.get = types.MethodType(get, self)
self.set = types.MethodType(set, self)
@@ -1275,6 +1524,10 @@ class ParserElement(object):
C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
Python will crash. For best results, call C{enablePackrat()} immediately
after importing pyparsing.
+
+ Example::
+ import pyparsing
+ pyparsing.ParserElement.enablePackrat()
"""
if not ParserElement._packratEnabled:
ParserElement._packratEnabled = True
@@ -1284,28 +1537,32 @@ class ParserElement(object):
ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
ParserElement._parse = ParserElement._parseCache
-
def parseString( self, instring, parseAll=False ):
- """Execute the parse expression with the given string.
- This is the main interface to the client code, once the complete
- expression has been built.
-
- If you want the grammar to require that the entire input string be
- successfully parsed, then set C{parseAll} to True (equivalent to ending
- the grammar with C{L{StringEnd()}}).
-
- Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
- in order to report proper column numbers in parse actions.
- If the input string contains tabs and
- the grammar uses parse actions that use the C{loc} argument to index into the
- string being parsed, you can ensure you have a consistent view of the input
- string by:
- - calling C{parseWithTabs} on your grammar before calling C{parseString}
- (see L{I{parseWithTabs}<parseWithTabs>})
- - define your parse action using the full C{(s,loc,toks)} signature, and
- reference the input string using the parse action's C{s} argument
- - explictly expand the tabs in your input string before calling
- C{parseString}
+ """
+ Execute the parse expression with the given string.
+ This is the main interface to the client code, once the complete
+ expression has been built.
+
+ If you want the grammar to require that the entire input string be
+ successfully parsed, then set C{parseAll} to True (equivalent to ending
+ the grammar with C{L{StringEnd()}}).
+
+ Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
+ in order to report proper column numbers in parse actions.
+ If the input string contains tabs and
+ the grammar uses parse actions that use the C{loc} argument to index into the
+ string being parsed, you can ensure you have a consistent view of the input
+ string by:
+ - calling C{parseWithTabs} on your grammar before calling C{parseString}
+ (see L{I{parseWithTabs}<parseWithTabs>})
+ - define your parse action using the full C{(s,loc,toks)} signature, and
+ reference the input string using the parse action's C{s} argument
+ - explictly expand the tabs in your input string before calling
+ C{parseString}
+
+ Example::
+ Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
+ Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
"""
ParserElement.resetCache()
if not self.streamlined:
@@ -1331,14 +1588,35 @@ class ParserElement(object):
return tokens
def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
- """Scan the input string for expression matches. Each match will return the
- matching tokens, start location, and end location. May be called with optional
- C{maxMatches} argument, to clip scanning after 'n' matches are found. If
- C{overlap} is specified, then overlapping matches will be reported.
-
- Note that the start and end locations are reported relative to the string
- being parsed. See L{I{parseString}<parseString>} for more information on parsing
- strings with embedded tabs."""
+ """
+ Scan the input string for expression matches. Each match will return the
+ matching tokens, start location, and end location. May be called with optional
+ C{maxMatches} argument, to clip scanning after 'n' matches are found. If
+ C{overlap} is specified, then overlapping matches will be reported.
+
+ Note that the start and end locations are reported relative to the string
+ being parsed. See L{I{parseString}<parseString>} for more information on parsing
+ strings with embedded tabs.
+
+ Example::
+ source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
+ print(source)
+ for tokens,start,end in Word(alphas).scanString(source):
+ print(' '*start + '^'*(end-start))
+ print(' '*start + tokens[0])
+
+ prints::
+
+ sldjf123lsdjjkf345sldkjf879lkjsfd987
+ ^^^^^
+ sldjf
+ ^^^^^^^
+ lsdjjkf
+ ^^^^^^
+ sldkjf
+ ^^^^^^
+ lkjsfd
+ """
if not self.streamlined:
self.streamline()
for e in self.ignoreExprs:
@@ -1381,12 +1659,17 @@ class ParserElement(object):
raise exc
def transformString( self, instring ):
- """Extension to C{L{scanString}}, to modify matching text with modified tokens that may
- be returned from a parse action. To use C{transformString}, define a grammar and
- attach a parse action to it that modifies the returned token list.
- Invoking C{transformString()} on a target string will then scan for matches,
- and replace the matched text patterns according to the logic in the parse
- action. C{transformString()} returns the resulting transformed string."""
+ """
+ Extension to C{L{scanString}}, to modify matching text with modified tokens that may
+ be returned from a parse action. To use C{transformString}, define a grammar and
+ attach a parse action to it that modifies the returned token list.
+ Invoking C{transformString()} on a target string will then scan for matches,
+ and replace the matched text patterns according to the logic in the parse
+ action. C{transformString()} returns the resulting transformed string.
+
+ Example::
+ TODO
+ """
out = []
lastE = 0
# force preservation of <TAB>s, to minimize unwanted transformation of string, and to
@@ -1414,9 +1697,13 @@ class ParserElement(object):
raise exc
def searchString( self, instring, maxMatches=_MAX_INT ):
- """Another extension to C{L{scanString}}, simplifying the access to the tokens found
- to match the given parse expression. May be called with optional
- C{maxMatches} argument, to clip searching after 'n' matches are found.
+ """
+ Another extension to C{L{scanString}}, simplifying the access to the tokens found
+ to match the given parse expression. May be called with optional
+ C{maxMatches} argument, to clip searching after 'n' matches are found.
+
+ Example::
+ TODO
"""
try:
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
@@ -1428,10 +1715,14 @@ class ParserElement(object):
raise exc
def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
- """Generator method to split a string using the given expression as a separator.
- May be called with optional C{maxsplit} argument, to limit the number of splits;
- and the optional C{includeSeparators} argument (default=C{False}), if the separating
- matching text should be included in the split results.
+ """
+ Generator method to split a string using the given expression as a separator.
+ May be called with optional C{maxsplit} argument, to limit the number of splits;
+ and the optional C{includeSeparators} argument (default=C{False}), if the separating
+ matching text should be included in the split results.
+
+ Example::
+ TODO
"""
splits = 0
last = 0
@@ -1443,7 +1734,9 @@ class ParserElement(object):
yield instring[last:]
def __add__(self, other ):
- """Implementation of + operator - returns C{L{And}}"""
+ """
+ Implementation of + operator - returns C{L{And}}
+ """
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -1453,7 +1746,9 @@ class ParserElement(object):
return And( [ self, other ] )
def __radd__(self, other ):
- """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
+ """
+ Implementation of + operator when left operand is not a C{L{ParserElement}}
+ """
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -1463,7 +1758,9 @@ class ParserElement(object):
return other + self
def __sub__(self, other):
- """Implementation of - operator, returns C{L{And}} with error stop"""
+ """
+ Implementation of - operator, returns C{L{And}} with error stop
+ """
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -1473,7 +1770,9 @@ class ParserElement(object):
return And( [ self, And._ErrorStop(), other ] )
def __rsub__(self, other ):
- """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
+ """
+ Implementation of - operator when left operand is not a C{L{ParserElement}}
+ """
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -1483,24 +1782,24 @@ class ParserElement(object):
return other - self
def __mul__(self,other):
- """Implementation of * operator, allows use of C{expr * 3} in place of
- C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
- tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
- may also include C{None} as in:
- - C{expr*(n,None)} or C{expr*(n,)} is equivalent
+ """
+ Implementation of * operator, allows use of C{expr * 3} in place of
+ C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
+ tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
+ may also include C{None} as in:
+ - C{expr*(n,None)} or C{expr*(n,)} is equivalent
to C{expr*n + L{ZeroOrMore}(expr)}
(read as "at least n instances of C{expr}")
- - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
+ - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
(read as "0 to n instances of C{expr}")
- - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
- - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
-
- Note that C{expr*(None,n)} does not raise an exception if
- more than n exprs exist in the input stream; that is,
- C{expr*(None,n)} does not enforce a maximum number of expr
- occurrences. If this behavior is desired, then write
- C{expr*(None,n) + ~expr}
-
+ - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
+ - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
+
+ Note that C{expr*(None,n)} does not raise an exception if
+ more than n exprs exist in the input stream; that is,
+ C{expr*(None,n)} does not enforce a maximum number of expr
+ occurrences. If this behavior is desired, then write
+ C{expr*(None,n) + ~expr}
"""
if isinstance(other,int):
minElements, optElements = other,0
@@ -1554,7 +1853,9 @@ class ParserElement(object):
return self.__mul__(other)
def __or__(self, other ):
- """Implementation of | operator - returns C{L{MatchFirst}}"""
+ """
+ Implementation of | operator - returns C{L{MatchFirst}}
+ """
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -1564,7 +1865,9 @@ class ParserElement(object):
return MatchFirst( [ self, other ] )
def __ror__(self, other ):
- """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
+ """
+ Implementation of | operator when left operand is not a C{L{ParserElement}}
+ """
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -1574,7 +1877,9 @@ class ParserElement(object):
return other | self
def __xor__(self, other ):
- """Implementation of ^ operator - returns C{L{Or}}"""
+ """
+ Implementation of ^ operator - returns C{L{Or}}
+ """
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -1584,7 +1889,9 @@ class ParserElement(object):
return Or( [ self, other ] )
def __rxor__(self, other ):
- """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
+ """
+ Implementation of ^ operator when left operand is not a C{L{ParserElement}}
+ """
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -1594,7 +1901,9 @@ class ParserElement(object):
return other ^ self
def __and__(self, other ):
- """Implementation of & operator - returns C{L{Each}}"""
+ """
+ Implementation of & operator - returns C{L{Each}}
+ """
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -1604,7 +1913,9 @@ class ParserElement(object):
return Each( [ self, other ] )
def __rand__(self, other ):
- """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
+ """
+ Implementation of & operator when left operand is not a C{L{ParserElement}}
+ """
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
@@ -1614,41 +1925,49 @@ class ParserElement(object):
return other & self
def __invert__( self ):
- """Implementation of ~ operator - returns C{L{NotAny}}"""
+ """
+ Implementation of ~ operator - returns C{L{NotAny}}
+ """
return NotAny( self )
def __call__(self, name=None):
- """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
- userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
- could be written as::
- userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
-
- If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
- passed as C{True}.
+ """
+ Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}.
+
+ If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
+ passed as C{True}.
- If C{name} is omitted, same as calling C{L{copy}}.
- """
+ If C{name} is omitted, same as calling C{L{copy}}.
+
+ Example::
+ # these are equivalent
+ userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
+ userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
+ """
if name is not None:
return self.setResultsName(name)
else:
return self.copy()
def suppress( self ):
- """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
- cluttering up returned output.
+ """
+ Suppresses the output of this C{ParserElement}; useful to keep punctuation from
+ cluttering up returned output.
"""
return Suppress( self )
def leaveWhitespace( self ):
- """Disables the skipping of whitespace before matching the characters in the
- C{ParserElement}'s defined pattern. This is normally only used internally by
- the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+ """
+ Disables the skipping of whitespace before matching the characters in the
+ C{ParserElement}'s defined pattern. This is normally only used internally by
+ the pyparsing module, but may be needed in some whitespace-sensitive grammars.
"""
self.skipWhitespace = False
return self
def setWhitespaceChars( self, chars ):
- """Overrides the default whitespace chars
+ """
+ Overrides the default whitespace chars
"""
self.skipWhitespace = True
self.whiteChars = chars
@@ -1656,16 +1975,26 @@ class ParserElement(object):
return self
def parseWithTabs( self ):
- """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
- Must be called before C{parseString} when the input grammar contains elements that
- match C{<TAB>} characters."""
+ """
+ Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
+ Must be called before C{parseString} when the input grammar contains elements that
+ match C{<TAB>} characters.
+ """
self.keepTabs = True
return self
def ignore( self, other ):
- """Define expression to be ignored (e.g., comments) while doing pattern
- matching; may be called repeatedly, to define multiple comment or other
- ignorable patterns.
+ """
+ Define expression to be ignored (e.g., comments) while doing pattern
+ matching; may be called repeatedly, to define multiple comment or other
+ ignorable patterns.
+
+ Example::
+ patt = OneOrMore(Word(alphas))
+ patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
+
+ patt.ignore(cStyleComment)
+ patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
"""
if isinstance(other, basestring):
other = Suppress(other)
@@ -1678,7 +2007,9 @@ class ParserElement(object):
return self
def setDebugActions( self, startAction, successAction, exceptionAction ):
- """Enable display of debugging messages while doing pattern matching."""
+ """
+ Enable display of debugging messages while doing pattern matching.
+ """
self.debugActions = (startAction or _defaultStartDebugAction,
successAction or _defaultSuccessDebugAction,
exceptionAction or _defaultExceptionDebugAction)
@@ -1686,8 +2017,10 @@ class ParserElement(object):
return self
def setDebug( self, flag=True ):
- """Enable display of debugging messages while doing pattern matching.
- Set C{flag} to True to enable, False to disable."""
+ """
+ Enable display of debugging messages while doing pattern matching.
+ Set C{flag} to True to enable, False to disable.
+ """
if flag:
self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
else:
@@ -1709,13 +2042,16 @@ class ParserElement(object):
pass
def validate( self, validateTrace=[] ):
- """Check defined expressions for valid structure, check for infinite recursive definitions."""
+ """
+ Check defined expressions for valid structure, check for infinite recursive definitions.
+ """
self.checkRecursion( [] )
def parseFile( self, file_or_filename, parseAll=False ):
- """Execute the parse expression on the given file or filename.
- If a filename is specified (instead of a file object),
- the entire file is opened, read, and closed before parsing.
+ """
+ Execute the parse expression on the given file or filename.
+ If a filename is specified (instead of a file object),
+ the entire file is opened, read, and closed before parsing.
"""
try:
file_contents = file_or_filename.read()
@@ -1752,15 +2088,17 @@ class ParserElement(object):
return not (self == other)
def matches(self, testString, parseAll=True):
- """Method for quick testing of a parser against a test string. Good for simple
- inline microtests of sub expressions while building up larger parser, as in::
-
- expr = Word(nums)
- assert expr.matches("100")
+ """
+ Method for quick testing of a parser against a test string. Good for simple
+ inline microtests of sub expressions while building up larger parser.0
- Parameters:
- - testString - to test against this expression for a match
- - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests
+ Parameters:
+ - testString - to test against this expression for a match
+ - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests
+
+ Example::
+ expr = Word(nums)
+ assert expr.matches("100")
"""
try:
self.parseString(_ustr(testString), parseAll=parseAll)
@@ -1769,21 +2107,25 @@ class ParserElement(object):
return False
def runTests(self, tests, parseAll=True, comment='#', printResults=True, failureTests=False):
- """Execute the parse expression on a series of test strings, showing each
- test, the parsed results or where the parse failed. Quick and easy way to
- run a parse expression against a list of sample strings.
+ """
+ Execute the parse expression on a series of test strings, showing each
+ test, the parsed results or where the parse failed. Quick and easy way to
+ run a parse expression against a list of sample strings.
- Parameters:
- - tests - a list of separate test strings, or a multiline string of test strings
- - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests
- - comment - (default='#') - expression for indicating embedded comments in the test
+ Parameters:
+ - tests - a list of separate test strings, or a multiline string of test strings
+ - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests
+ - comment - (default='#') - expression for indicating embedded comments in the test
string; pass None to disable comment filtering
- - printResults - (default=True) prints test output to stdout
- - failureTests - (default=False) indicates if these tests are expected to fail parsing
+ - printResults - (default=True) prints test output to stdout
+ - failureTests - (default=False) indicates if these tests are expected to fail parsing
- Returns: a (success, results) tuple, where success indicates that all tests succeeded
- (or failed if C{failureTest} is True), and the results contain a list of lines of each
- test's output
+ Returns: a (success, results) tuple, where success indicates that all tests succeeded
+ (or failed if C{failureTest} is True), and the results contain a list of lines of each
+ test's output
+
+ Example::
+ TODO
"""
if isinstance(tests, basestring):
tests = list(map(str.strip, tests.rstrip().splitlines()))
@@ -1825,13 +2167,17 @@ class ParserElement(object):
class Token(ParserElement):
- """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
+ """
+ Abstract C{ParserElement} subclass, for defining atomic matching patterns.
+ """
def __init__( self ):
super(Token,self).__init__( savelist=False )
class Empty(Token):
- """An empty token, will always match."""
+ """
+ An empty token, will always match.
+ """
def __init__( self ):
super(Empty,self).__init__()
self.name = "Empty"
@@ -1840,7 +2186,9 @@ class Empty(Token):
class NoMatch(Token):
- """A token that will never match."""
+ """
+ A token that will never match.
+ """
def __init__( self ):
super(NoMatch,self).__init__()
self.name = "NoMatch"
@@ -1853,7 +2201,14 @@ class NoMatch(Token):
class Literal(Token):
- """Token to exactly match a specified string."""
+ """
+ Token to exactly match a specified string.
+
+ Example::
+ Literal('blah').parseString('blah') # -> ['blah']
+ Literal('blah').parseString('blahfooblah') # -> ['blah']
+ Literal('blah').parseString('bla') # -> Exception: Expected "blah"
+ """
def __init__( self, matchString ):
super(Literal,self).__init__()
self.match = matchString
@@ -1882,14 +2237,19 @@ _L = Literal
ParserElement._literalStringClass = Literal
class Keyword(Token):
- """Token to exactly match a specified string as a keyword, that is, it must be
- immediately followed by a non-keyword character. Compare with C{L{Literal}}:
- - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
- - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
- Accepts two optional constructor arguments in addition to the keyword string:
- - C{identChars} is a string of characters that would be valid identifier characters,
+ """
+ Token to exactly match a specified string as a keyword, that is, it must be
+ immediately followed by a non-keyword character. Compare with C{L{Literal}}:
+ - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
+ - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
+ Accepts two optional constructor arguments in addition to the keyword string:
+ - C{identChars} is a string of characters that would be valid identifier characters,
defaulting to all alphanumerics + "_" and "$"
- - C{caseless} allows case-insensitive matching, default is C{False}.
+ - C{caseless} allows case-insensitive matching, default is C{False}.
+
+ Example::
+ Keyword("start").parseString("start") # -> ['start']
+ Keyword("start").parseStrign("starting") # -> Exception
"""
DEFAULT_KEYWORD_CHARS = alphanums+"_$"
@@ -1938,9 +2298,15 @@ class Keyword(Token):
Keyword.DEFAULT_KEYWORD_CHARS = chars
class CaselessLiteral(Literal):
- """Token to match a specified string, ignoring case of letters.
- Note: the matched results will always be in the case of the given
- match string, NOT the case of the input text.
+ """
+ Token to match a specified string, ignoring case of letters.
+ Note: the matched results will always be in the case of the given
+ match string, NOT the case of the input text.
+
+ Example::
+ OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
+
+ (Contrast with example for L{CaselessKeyword}.)
"""
def __init__( self, matchString ):
super(CaselessLiteral,self).__init__( matchString.upper() )
@@ -1955,6 +2321,14 @@ class CaselessLiteral(Literal):
raise ParseException(instring, loc, self.errmsg, self)
class CaselessKeyword(Keyword):
+ """
+ Caseless version of L{Keyword}.
+
+ Example::
+ OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
+
+ (Contrast with example for L{CaselessLiteral}.)
+ """
def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
@@ -1965,16 +2339,20 @@ class CaselessKeyword(Keyword):
raise ParseException(instring, loc, self.errmsg, self)
class Word(Token):
- """Token for matching words composed of allowed character sets.
- Defined with string containing all allowed initial characters,
- an optional string containing allowed body characters (if omitted,
- defaults to the initial character set), and an optional minimum,
- maximum, and/or exact length. The default value for C{min} is 1 (a
- minimum value < 1 is not valid); the default values for C{max} and C{exact}
- are 0, meaning no maximum or exact length restriction. An optional
- C{excludeChars} parameter can list characters that might be found in
- the input C{bodyChars} string; useful to define a word of all printables
- except for one or two characters, for instance.
+ """
+ Token for matching words composed of allowed character sets.
+ Defined with string containing all allowed initial characters,
+ an optional string containing allowed body characters (if omitted,
+ defaults to the initial character set), and an optional minimum,
+ maximum, and/or exact length. The default value for C{min} is 1 (a
+ minimum value < 1 is not valid); the default values for C{max} and C{exact}
+ are 0, meaning no maximum or exact length restriction. An optional
+ C{excludeChars} parameter can list characters that might be found in
+ the input C{bodyChars} string; useful to define a word of all printables
+ except for one or two characters, for instance.
+
+ Example::
+ TODO
"""
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
super(Word,self).__init__()
@@ -2089,8 +2467,12 @@ class Word(Token):
class Regex(Token):
- """Token for matching strings that match a given regular expression.
- Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+ """
+ Token for matching strings that match a given regular expression.
+ Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+
+ Example::
+ TODO
"""
compiledREtype = type(re.compile("[A-Z]"))
def __init__( self, pattern, flags=0):
@@ -2153,18 +2535,22 @@ class Regex(Token):
class QuotedString(Token):
- """Token for matching strings that are delimited by quoting characters.
+ r"""
+ Token for matching strings that are delimited by quoting characters.
+
+ Defined with the following parameters:
+ - quoteChar - string of one or more characters defining the quote delimiting string
+ - escChar - character to escape quotes, typically backslash (default=None)
+ - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
+ - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
+ - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
+ - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
+ - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
+
+ Example::
+ TODO
"""
def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
- r"""Defined with the following parameters:
- - quoteChar - string of one or more characters defining the quote delimiting string
- - escChar - character to escape quotes, typically backslash (default=None)
- - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
- - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
- - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
- - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
- - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
- """
super(QuotedString,self).__init__()
# remove white space from quote chars - wont work anyway
@@ -2277,11 +2663,15 @@ class QuotedString(Token):
class CharsNotIn(Token):
- """Token for matching words composed of characters *not* in a given set.
- Defined with string containing all disallowed characters, and an optional
- minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
- minimum value < 1 is not valid); the default values for C{max} and C{exact}
- are 0, meaning no maximum or exact length restriction.
+ """
+ Token for matching words composed of characters *not* in a given set.
+ Defined with string containing all disallowed characters, and an optional
+ minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
+ minimum value < 1 is not valid); the default values for C{max} and C{exact}
+ are 0, meaning no maximum or exact length restriction.
+
+ Example::
+ TODO
"""
def __init__( self, notChars, min=1, max=0, exact=0 ):
super(CharsNotIn,self).__init__()
@@ -2339,11 +2729,13 @@ class CharsNotIn(Token):
return self.strRepr
class White(Token):
- """Special matching class for matching whitespace. Normally, whitespace is ignored
- by pyparsing grammars. This class is included when some whitespace structures
- are significant. Define with a string containing the whitespace characters to be
- matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
- as defined for the C{L{Word}} class."""
+ """
+ Special matching class for matching whitespace. Normally, whitespace is ignored
+ by pyparsing grammars. This class is included when some whitespace structures
+ are significant. Define with a string containing the whitespace characters to be
+ matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
+ as defined for the C{L{Word}} class.
+ """
whiteStrs = {
" " : "<SPC>",
"\t": "<TAB>",
@@ -2395,7 +2787,9 @@ class _PositionToken(Token):
self.mayIndexError = False
class GoToColumn(_PositionToken):
- """Token to advance to a specific column of input text; useful for tabular report scraping."""
+ """
+ Token to advance to a specific column of input text; useful for tabular report scraping.
+ """
def __init__( self, colno ):
super(GoToColumn,self).__init__()
self.col = colno
@@ -2418,7 +2812,9 @@ class GoToColumn(_PositionToken):
return newloc, ret
class LineStart(_PositionToken):
- """Matches if current position is at the beginning of a line within the parse string"""
+ """
+ Matches if current position is at the beginning of a line within the parse string
+ """
def __init__( self ):
super(LineStart,self).__init__()
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
@@ -2438,7 +2834,12 @@ class LineStart(_PositionToken):
return loc, []
class LineEnd(_PositionToken):
- """Matches if current position is at the end of a line within the parse string"""
+ """
+ Matches if current position is at the end of a line within the parse string
+
+ Example::
+ TODO
+ """
def __init__( self ):
super(LineEnd,self).__init__()
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
@@ -2456,7 +2857,9 @@ class LineEnd(_PositionToken):
raise ParseException(instring, loc, self.errmsg, self)
class StringStart(_PositionToken):
- """Matches if current position is at the beginning of the parse string"""
+ """
+ Matches if current position is at the beginning of the parse string
+ """
def __init__( self ):
super(StringStart,self).__init__()
self.errmsg = "Expected start of text"
@@ -2469,7 +2872,9 @@ class StringStart(_PositionToken):
return loc, []
class StringEnd(_PositionToken):
- """Matches if current position is at the end of the parse string"""
+ """
+ Matches if current position is at the end of the parse string
+ """
def __init__( self ):
super(StringEnd,self).__init__()
self.errmsg = "Expected end of text"
@@ -2485,11 +2890,12 @@ class StringEnd(_PositionToken):
raise ParseException(instring, loc, self.errmsg, self)
class WordStart(_PositionToken):
- """Matches if the current position is at the beginning of a Word, and
- is not preceded by any character in a given set of C{wordChars}
- (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
- use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
- the string being parsed, or at the beginning of a line.
+ """
+ Matches if the current position is at the beginning of a Word, and
+ is not preceded by any character in a given set of C{wordChars}
+ (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+ use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
+ the string being parsed, or at the beginning of a line.
"""
def __init__(self, wordChars = printables):
super(WordStart,self).__init__()
@@ -2504,11 +2910,12 @@ class WordStart(_PositionToken):
return loc, []
class WordEnd(_PositionToken):
- """Matches if the current position is at the end of a Word, and
- is not followed by any character in a given set of C{wordChars}
- (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
- use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
- the string being parsed, or at the end of a line.
+ """
+ Matches if the current position is at the end of a Word, and
+ is not followed by any character in a given set of C{wordChars}
+ (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+ use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
+ the string being parsed, or at the end of a line.
"""
def __init__(self, wordChars = printables):
super(WordEnd,self).__init__()
@@ -2526,7 +2933,9 @@ class WordEnd(_PositionToken):
class ParseExpression(ParserElement):
- """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
+ """
+ Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
+ """
def __init__( self, exprs, savelist = False ):
super(ParseExpression,self).__init__(savelist)
if isinstance( exprs, _generatorType ):
@@ -2635,10 +3044,14 @@ class ParseExpression(ParserElement):
return ret
class And(ParseExpression):
- """Requires all given C{ParseExpression}s to be found in the given order.
- Expressions may be separated by whitespace.
- May be constructed using the C{'+'} operator.
- May also be constructed using the C{'-'} operator, which will suppress backtracking.
+ """
+ Requires all given C{ParseExpression}s to be found in the given order.
+ Expressions may be separated by whitespace.
+ May be constructed using the C{'+'} operator.
+ May also be constructed using the C{'-'} operator, which will suppress backtracking.
+
+ Example::
+ TODO
"""
class _ErrorStop(Empty):
@@ -2702,9 +3115,13 @@ class And(ParseExpression):
class Or(ParseExpression):
- """Requires that at least one C{ParseExpression} is found.
- If two expressions match, the expression that matches the longest string will be used.
- May be constructed using the C{'^'} operator.
+ """
+ Requires that at least one C{ParseExpression} is found.
+ If two expressions match, the expression that matches the longest string will be used.
+ May be constructed using the C{'^'} operator.
+
+ Example::
+ TODO
"""
def __init__( self, exprs, savelist = False ):
super(Or,self).__init__(exprs, savelist)
@@ -2772,9 +3189,13 @@ class Or(ParseExpression):
class MatchFirst(ParseExpression):
- """Requires that at least one C{ParseExpression} is found.
- If two expressions match, the first one listed is the one that will match.
- May be constructed using the C{'|'} operator.
+ """
+ Requires that at least one C{ParseExpression} is found.
+ If two expressions match, the first one listed is the one that will match.
+ May be constructed using the C{'|'} operator.
+
+ Example::
+ TODO
"""
def __init__( self, exprs, savelist = False ):
super(MatchFirst,self).__init__(exprs, savelist)
@@ -2828,9 +3249,13 @@ class MatchFirst(ParseExpression):
class Each(ParseExpression):
- """Requires all given C{ParseExpression}s to be found, but in any order.
- Expressions may be separated by whitespace.
- May be constructed using the C{'&'} operator.
+ """
+ Requires all given C{ParseExpression}s to be found, but in any order.
+ Expressions may be separated by whitespace.
+ May be constructed using the C{'&'} operator.
+
+ Example::
+ TODO
"""
def __init__( self, exprs, savelist = True ):
super(Each,self).__init__(exprs, savelist)
@@ -2913,7 +3338,9 @@ class Each(ParseExpression):
class ParseElementEnhance(ParserElement):
- """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
+ """
+ Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
+ """
def __init__( self, expr, savelist=False ):
super(ParseElementEnhance,self).__init__(savelist)
if isinstance( expr, basestring ):
@@ -2985,10 +3412,15 @@ class ParseElementEnhance(ParserElement):
class FollowedBy(ParseElementEnhance):
- """Lookahead matching of the given parse expression. C{FollowedBy}
+ """
+ Lookahead matching of the given parse expression. C{FollowedBy}
does *not* advance the parsing position within the input string, it only
verifies that the specified parse expression matches at the current
- position. C{FollowedBy} always returns a null token list."""
+ position. C{FollowedBy} always returns a null token list.
+
+ Example::
+ TODO
+ """
def __init__( self, expr ):
super(FollowedBy,self).__init__(expr)
self.mayReturnEmpty = True
@@ -2999,11 +3431,16 @@ class FollowedBy(ParseElementEnhance):
class NotAny(ParseElementEnhance):
- """Lookahead to disallow matching with the given parse expression. C{NotAny}
+ """
+ Lookahead to disallow matching with the given parse expression. C{NotAny}
does *not* advance the parsing position within the input string, it only
verifies that the specified parse expression does *not* match at the current
position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
- always returns a null token list. May be constructed using the '~' operator."""
+ always returns a null token list. May be constructed using the '~' operator.
+
+ Example::
+ TODO
+ """
def __init__( self, expr ):
super(NotAny,self).__init__(expr)
#~ self.leaveWhitespace()
@@ -3027,13 +3464,17 @@ class NotAny(ParseElementEnhance):
class OneOrMore(ParseElementEnhance):
- """Repetition of one or more of the given expression.
+ """
+ Repetition of one or more of the given expression.
- Parameters:
- - expr - expression that must match one or more times
- - stopOn - (default=None) - expression for a terminating sentinel
+ Parameters:
+ - expr - expression that must match one or more times
+ - stopOn - (default=None) - expression for a terminating sentinel
(only required if the sentinel would ordinarily match the repetition
expression)
+
+ Example::
+ TODO
"""
def __init__( self, expr, stopOn=None):
super(OneOrMore, self).__init__(expr)
@@ -3086,13 +3527,17 @@ class OneOrMore(ParseElementEnhance):
return ret
class ZeroOrMore(OneOrMore):
- """Optional repetition of zero or more of the given expression.
+ """
+ Optional repetition of zero or more of the given expression.
- Parameters:
- - expr - expression that must match zero or more times
- - stopOn - (default=None) - expression for a terminating sentinel
+ Parameters:
+ - expr - expression that must match zero or more times
+ - stopOn - (default=None) - expression for a terminating sentinel
(only required if the sentinel would ordinarily match the repetition
expression)
+
+ Example::
+ TODO
"""
def __init__( self, expr, stopOn=None):
super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
@@ -3122,12 +3567,16 @@ class _NullToken(object):
_optionalNotMatched = _NullToken()
class Optional(ParseElementEnhance):
- """Optional matching of the given expression.
+ """
+ Optional matching of the given expression.
- Parameters:
- - expr - expression that must match zero or more times
- - default (optional) - value to be returned if the optional expression
+ Parameters:
+ - expr - expression that must match zero or more times
+ - default (optional) - value to be returned if the optional expression
is not found.
+
+ Example::
+ TODO
"""
def __init__( self, expr, default=_optionalNotMatched ):
super(Optional,self).__init__( expr, savelist=False )
@@ -3158,17 +3607,21 @@ class Optional(ParseElementEnhance):
return self.strRepr
class SkipTo(ParseElementEnhance):
- """Token for skipping over all undefined text until the matched expression is found.
+ """
+ Token for skipping over all undefined text until the matched expression is found.
- Parameters:
- - expr - target expression marking the end of the data to be skipped
- - include - (default=False) if True, the target expression is also parsed
+ Parameters:
+ - expr - target expression marking the end of the data to be skipped
+ - include - (default=False) if True, the target expression is also parsed
(the skipped text and target expression are returned as a 2-element list).
- - ignore - (default=None) used to define grammars (typically quoted strings and
+ - ignore - (default=None) used to define grammars (typically quoted strings and
comments) that might contain false matches to the target expression
- - failOn - (default=None) define expressions that are not allowed to be
+ - failOn - (default=None) define expressions that are not allowed to be
included in the skipped test; if found before the target expression is found,
the SkipTo is not a match
+
+ Example::
+ TODO
"""
def __init__( self, other, include=False, ignore=None, failOn=None ):
super( SkipTo, self ).__init__( other )
@@ -3231,19 +3684,23 @@ class SkipTo(ParseElementEnhance):
return loc, skipresult
class Forward(ParseElementEnhance):
- """Forward declaration of an expression to be defined later -
- used for recursive grammars, such as algebraic infix notation.
- When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
-
- Note: take care when assigning to C{Forward} not to overlook precedence of operators.
- Specifically, '|' has a lower precedence than '<<', so that::
- fwdExpr << a | b | c
- will actually be evaluated as::
- (fwdExpr << a) | b | c
- thereby leaving b and c out as parseable alternatives. It is recommended that you
- explicitly group the values inserted into the C{Forward}::
- fwdExpr << (a | b | c)
- Converting to use the '<<=' operator instead will avoid this problem.
+ """
+ Forward declaration of an expression to be defined later -
+ used for recursive grammars, such as algebraic infix notation.
+ When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
+
+ Note: take care when assigning to C{Forward} not to overlook precedence of operators.
+ Specifically, '|' has a lower precedence than '<<', so that::
+ fwdExpr << a | b | c
+ will actually be evaluated as::
+ (fwdExpr << a) | b | c
+ thereby leaving b and c out as parseable alternatives. It is recommended that you
+ explicitly group the values inserted into the C{Forward}::
+ fwdExpr << (a | b | c)
+ Converting to use the '<<=' operator instead will avoid this problem.
+
+ Example::
+ TODO
"""
def __init__( self, other=None ):
super(Forward,self).__init__( other, savelist=False )
@@ -3312,15 +3769,29 @@ class _ForwardNoRecurse(Forward):
return "..."
class TokenConverter(ParseElementEnhance):
- """Abstract subclass of C{ParseExpression}, for converting parsed results."""
+ """
+ Abstract subclass of C{ParseExpression}, for converting parsed results.
+ """
def __init__( self, expr, savelist=False ):
super(TokenConverter,self).__init__( expr )#, savelist )
self.saveAsList = False
class Combine(TokenConverter):
- """Converter to concatenate all matching tokens to a single string.
- By default, the matching patterns must also be contiguous in the input string;
- this can be disabled by specifying C{'adjacent=False'} in the constructor.
+ """
+ Converter to concatenate all matching tokens to a single string.
+ By default, the matching patterns must also be contiguous in the input string;
+ this can be disabled by specifying C{'adjacent=False'} in the constructor.
+
+ Example::
+ real = Word(nums) + '.' + Word(nums)
+ print(real.parseString('3.1416')) # -> ['3', '.', '1416']
+ # will also erroneously match the following
+ print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
+
+ real = Combine(Word(nums) + '.' + Word(nums))
+ print(real.parseString('3.1416')) # -> ['3.1416']
+ # no match when there are internal spaces
+ print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
"""
def __init__( self, expr, joinString="", adjacent=True ):
super(Combine,self).__init__( expr )
@@ -3350,7 +3821,19 @@ class Combine(TokenConverter):
return retToks
class Group(TokenConverter):
- """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
+ """
+ Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
+
+ Example::
+ ident = Word(alphas)
+ num = Word(nums)
+ term = ident | num
+ func = ident + Optional(delimitedList(term))
+ print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
+
+ func = ident + Group(Optional(delimitedList(term)))
+ print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
+ """
def __init__( self, expr ):
super(Group,self).__init__( expr )
self.saveAsList = True
@@ -3359,9 +3842,13 @@ class Group(TokenConverter):
return [ tokenlist ]
class Dict(TokenConverter):
- """Converter to return a repetitive expression as a list, but also as a dictionary.
- Each element can also be referenced using the first token in the expression as its key.
- Useful for tabular report scraping when the first column can be used as a item key.
+ """
+ Converter to return a repetitive expression as a list, but also as a dictionary.
+ Each element can also be referenced using the first token in the expression as its key.
+ Useful for tabular report scraping when the first column can be used as a item key.
+
+ Example::
+ TODO
"""
def __init__( self, expr ):
super(Dict,self).__init__( expr )
@@ -3393,7 +3880,12 @@ class Dict(TokenConverter):
class Suppress(TokenConverter):
- """Converter for ignoring the results of a parsed expression."""
+ """
+ Converter for ignoring the results of a parsed expression.
+
+ Example::
+ TODO
+ """
def postParse( self, instring, loc, tokenlist ):
return []
@@ -3402,7 +3894,9 @@ class Suppress(TokenConverter):
class OnlyOnce(object):
- """Wrapper for parse actions, to ensure they are only called once."""
+ """
+ Wrapper for parse actions, to ensure they are only called once.
+ """
def __init__(self, methodCall):
self.callable = _trim_arity(methodCall)
self.called = False
@@ -3416,7 +3910,12 @@ class OnlyOnce(object):
self.called = False
def traceParseAction(f):
- """Decorator for debugging parse actions."""
+ """
+ Decorator for debugging parse actions.
+
+ Example::
+ TODO
+ """
f = _trim_arity(f)
def z(*paArgs):
thisFunc = f.__name__
@@ -3441,12 +3940,17 @@ def traceParseAction(f):
# global helpers
#
def delimitedList( expr, delim=",", combine=False ):
- """Helper to define a delimited list of expressions - the delimiter defaults to ','.
- By default, the list elements and delimiters can have intervening whitespace, and
- comments, but this can be overridden by passing C{combine=True} in the constructor.
- If C{combine} is set to C{True}, the matching tokens are returned as a single token
- string, with the delimiters included; otherwise, the matching tokens are returned
- as a list of tokens, with the delimiters suppressed.
+ """
+ Helper to define a delimited list of expressions - the delimiter defaults to ','.
+ By default, the list elements and delimiters can have intervening whitespace, and
+ comments, but this can be overridden by passing C{combine=True} in the constructor.
+ If C{combine} is set to C{True}, the matching tokens are returned as a single token
+ string, with the delimiters included; otherwise, the matching tokens are returned
+ as a list of tokens, with the delimiters suppressed.
+
+ Example::
+ delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
+ delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
"""
dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
if combine:
@@ -3455,11 +3959,15 @@ def delimitedList( expr, delim=",", combine=False ):
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
def countedArray( expr, intExpr=None ):
- """Helper to define a counted list of expressions.
- This helper defines a pattern of the form::
- integer expr expr expr...
- where the leading integer tells how many expr expressions follow.
- The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+ """
+ Helper to define a counted list of expressions.
+ This helper defines a pattern of the form::
+ integer expr expr expr...
+ where the leading integer tells how many expr expressions follow.
+ The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+
+ Example::
+ countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
"""
arrayExpr = Forward()
def countFieldParseAction(s,l,t):
@@ -3484,16 +3992,17 @@ def _flatten(L):
return ret
def matchPreviousLiteral(expr):
- """Helper to define an expression that is indirectly defined from
- the tokens matched in a previous expression, that is, it looks
- for a 'repeat' of a previous expression. For example::
- first = Word(nums)
- second = matchPreviousLiteral(first)
- matchExpr = first + ":" + second
- will match C{"1:1"}, but not C{"1:2"}. Because this matches a
- previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
- If this is not desired, use C{matchPreviousExpr}.
- Do *not* use with packrat parsing enabled.
+ """
+ Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousLiteral(first)
+ matchExpr = first + ":" + second
+ will match C{"1:1"}, but not C{"1:2"}. Because this matches a
+ previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
+ If this is not desired, use C{matchPreviousExpr}.
+ Do *not* use with packrat parsing enabled.
"""
rep = Forward()
def copyTokenToRepeater(s,l,t):
@@ -3511,17 +4020,18 @@ def matchPreviousLiteral(expr):
return rep
def matchPreviousExpr(expr):
- """Helper to define an expression that is indirectly defined from
- the tokens matched in a previous expression, that is, it looks
- for a 'repeat' of a previous expression. For example::
- first = Word(nums)
- second = matchPreviousExpr(first)
- matchExpr = first + ":" + second
- will match C{"1:1"}, but not C{"1:2"}. Because this matches by
- expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
- the expressions are evaluated first, and then compared, so
- C{"1"} is compared with C{"10"}.
- Do *not* use with packrat parsing enabled.
+ """
+ Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousExpr(first)
+ matchExpr = first + ":" + second
+ will match C{"1:1"}, but not C{"1:2"}. Because this matches by
+ expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
+ the expressions are evaluated first, and then compared, so
+ C{"1"} is compared with C{"10"}.
+ Do *not* use with packrat parsing enabled.
"""
rep = Forward()
e2 = expr.copy()
@@ -3546,16 +4056,27 @@ def _escapeRegexRangeChars(s):
return _ustr(s)
def oneOf( strs, caseless=False, useRegex=True ):
- """Helper to quickly define a set of alternative Literals, and makes sure to do
- longest-first testing when there is a conflict, regardless of the input order,
- but returns a C{L{MatchFirst}} for best performance.
-
- Parameters:
- - strs - a string of space-delimited literals, or a list of string literals
- - caseless - (default=False) - treat all literals as caseless
- - useRegex - (default=True) - as an optimization, will generate a Regex
+ """
+ Helper to quickly define a set of alternative Literals, and makes sure to do
+ longest-first testing when there is a conflict, regardless of the input order,
+ but returns a C{L{MatchFirst}} for best performance.
+
+ Parameters:
+ - strs - a string of space-delimited literals, or a list of string literals
+ - caseless - (default=False) - treat all literals as caseless
+ - useRegex - (default=True) - as an optimization, will generate a Regex
object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
if creating a C{Regex} raises an exception)
+
+ Example::
+ comp_oper = oneOf("< = > <= >= !=")
+ var = Word(alphas)
+ number = Word(nums)
+ term = var | number
+ comparison_expr = term + comp_oper + term
+ print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
+ prints::
+ [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
"""
if caseless:
isequal = ( lambda a,b: a.upper() == b.upper() )
@@ -3610,27 +4131,36 @@ def oneOf( strs, caseless=False, useRegex=True ):
return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
def dictOf( key, value ):
- """Helper to easily and clearly define a dictionary by specifying the respective patterns
- for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
- in the proper order. The key pattern can include delimiting markers or punctuation,
- as long as they are suppressed, thereby leaving the significant key text. The value
- pattern can include named results, so that the C{Dict} results can include named token
- fields.
+ """
+ Helper to easily and clearly define a dictionary by specifying the respective patterns
+ for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
+ in the proper order. The key pattern can include delimiting markers or punctuation,
+ as long as they are suppressed, thereby leaving the significant key text. The value
+ pattern can include named results, so that the C{Dict} results can include named token
+ fields.
+
+ Example::
+ TODO
"""
return Dict( ZeroOrMore( Group ( key + value ) ) )
def originalTextFor(expr, asString=True):
- """Helper to return the original, untokenized text for a given expression. Useful to
- restore the parsed fields of an HTML start tag into the raw tag text itself, or to
- revert separate tokens with intervening whitespace back to the original matching
- input text. By default, returns astring containing the original parsed text.
+ """
+ Helper to return the original, untokenized text for a given expression. Useful to
+ restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+ revert separate tokens with intervening whitespace back to the original matching
+ input text. By default, returns astring containing the original parsed text.
- If the optional C{asString} argument is passed as C{False}, then the return value is a
- C{L{ParseResults}} containing any results names that were originally matched, and a
- single token containing the original matched text from the input string. So if
- the expression passed to C{L{originalTextFor}} contains expressions with defined
- results names, you must set C{asString} to C{False} if you want to preserve those
- results name values."""
+ If the optional C{asString} argument is passed as C{False}, then the return value is a
+ C{L{ParseResults}} containing any results names that were originally matched, and a
+ single token containing the original matched text from the input string. So if
+ the expression passed to C{L{originalTextFor}} contains expressions with defined
+ results names, you must set C{asString} to C{False} if you want to preserve those
+ results name values.
+
+ Example::
+ TODO
+ """
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
endlocMarker = locMarker.copy()
endlocMarker.callPreparse = False
@@ -3645,19 +4175,31 @@ def originalTextFor(expr, asString=True):
return matchExpr
def ungroup(expr):
- """Helper to undo pyparsing's default grouping of And expressions, even
- if all but one are non-empty."""
+ """
+ Helper to undo pyparsing's default grouping of And expressions, even
+ if all but one are non-empty.
+ """
return TokenConverter(expr).setParseAction(lambda t:t[0])
def locatedExpr(expr):
- """Helper to decorate a returned token with its starting and ending locations in the input string.
- This helper adds the following results names:
- - locn_start = location where matched expression begins
- - locn_end = location where matched expression ends
- - value = the actual parsed results
-
- Be careful if the input text contains C{<TAB>} characters, you may want to call
- C{L{ParserElement.parseWithTabs}}
+ """
+ Helper to decorate a returned token with its starting and ending locations in the input string.
+ This helper adds the following results names:
+ - locn_start = location where matched expression begins
+ - locn_end = location where matched expression ends
+ - value = the actual parsed results
+
+ Be careful if the input text contains C{<TAB>} characters, you may want to call
+ C{L{ParserElement.parseWithTabs}}
+
+ Example::
+ wd = Word(alphas)
+ for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
+ print(match)
+ prints::
+ [[0, 'ljsdf', 5]]
+ [[8, 'lksdjjf', 15]]
+ [[18, 'lkkjj', 23]]
"""
locator = Empty().setParseAction(lambda s,l,t: l)
return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
@@ -3678,21 +4220,25 @@ _charRange = Group(_singleChar + Suppress("-") + _singleChar)
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
def srange(s):
- r"""Helper to easily define string ranges for use in Word construction. Borrows
- syntax from regexp '[]' string range definitions::
- srange("[0-9]") -> "0123456789"
- srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
- srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
- The input string must be enclosed in []'s, and the returned string is the expanded
- character set joined into a single string.
- The values enclosed in the []'s may be:
- - a single character
- - an escaped character with a leading backslash (such as C{\-} or C{\]})
- - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
- (C{\0x##} is also supported for backwards compatibility)
- - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
- - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
- - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
+ r"""
+ Helper to easily define string ranges for use in Word construction. Borrows
+ syntax from regexp '[]' string range definitions::
+ srange("[0-9]") -> "0123456789"
+ srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
+ srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
+ The input string must be enclosed in []'s, and the returned string is the expanded
+ character set joined into a single string.
+ The values enclosed in the []'s may be:
+ - a single character
+ - an escaped character with a leading backslash (such as C{\-} or C{\]})
+ - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
+ (C{\0x##} is also supported for backwards compatibility)
+ - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
+ - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
+ - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
+
+ Example::
+ TODO
"""
_expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
try:
@@ -3701,8 +4247,12 @@ def srange(s):
return ""
def matchOnlyAtCol(n):
- """Helper method for defining parse actions that require matching at a specific
- column in the input text.
+ """
+ Helper method for defining parse actions that require matching at a specific
+ column in the input text.
+
+ Example::
+ TODO
"""
def verifyCol(strg,locn,toks):
if col(locn,strg) != n:
@@ -3710,23 +4260,42 @@ def matchOnlyAtCol(n):
return verifyCol
def replaceWith(replStr):
- """Helper method for common parse actions that simply return a literal value. Especially
- useful when used with C{L{transformString<ParserElement.transformString>}()}.
+ """
+ Helper method for common parse actions that simply return a literal value. Especially
+ useful when used with C{L{transformString<ParserElement.transformString>}()}.
+
+ Example::
+ num = Word(nums).setParseAction(lambda toks: int(toks[0]))
+ na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
+ term = na | num
+
+ OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
"""
return lambda s,l,t: [replStr]
def removeQuotes(s,l,t):
- """Helper parse action for removing quotation marks from parsed quoted strings.
- To use, add this parse action to quoted string using::
- quotedString.setParseAction( removeQuotes )
+ """
+ Helper parse action for removing quotation marks from parsed quoted strings.
+
+ Example::
+ # by default, quotation marks are included in parsed results
+ quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
+
+ # use removeQuotes to strip quotation marks from parsed results
+ quotedString.setParseAction(removeQuotes)
+ quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
"""
return t[0][1:-1]
def tokenMap(func, *args):
- """Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
- args are passed, they are forwarded to the given function as additional arguments after
- the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
- parsed data to an integer using base 16.
+ """
+ Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
+ args are passed, they are forwarded to the given function as additional arguments after
+ the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
+ parsed data to an integer using base 16.
+
+ Example::
+ TODO
"""
def pa(s,l,t):
return [func(tokn, *args) for tokn in t]
@@ -3776,33 +4345,47 @@ def _makeTags(tagStr, xml):
return openTag, closeTag
def makeHTMLTags(tagStr):
- """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
+ """
+ Helper to construct opening and closing tag expressions for HTML, given a tag name
+
+ Example::
+ TODO
+ """
return _makeTags( tagStr, False )
def makeXMLTags(tagStr):
- """Helper to construct opening and closing tag expressions for XML, given a tag name"""
+ """
+ Helper to construct opening and closing tag expressions for XML, given a tag name
+
+ Example::
+ TODO
+ """
return _makeTags( tagStr, True )
def withAttribute(*args,**attrDict):
- """Helper to create a validating parse action to be used with start tags created
- with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
- with a required attribute value, to avoid false matches on common tags such as
- C{<TD>} or C{<DIV>}.
-
- Call C{withAttribute} with a series of attribute names and values. Specify the list
- of filter attributes names and values as:
- - keyword arguments, as in C{(align="right")}, or
- - as an explicit dict with C{**} operator, when an attribute name is also a Python
+ """
+ Helper to create a validating parse action to be used with start tags created
+ with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
+ with a required attribute value, to avoid false matches on common tags such as
+ C{<TD>} or C{<DIV>}.
+
+ Call C{withAttribute} with a series of attribute names and values. Specify the list
+ of filter attributes names and values as:
+ - keyword arguments, as in C{(align="right")}, or
+ - as an explicit dict with C{**} operator, when an attribute name is also a Python
reserved word, as in C{**{"class":"Customer", "align":"right"}}
- - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
- For attribute names with a namespace prefix, you must use the second form. Attribute
- names are matched insensitive to upper/lower case.
+ - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
+ For attribute names with a namespace prefix, you must use the second form. Attribute
+ names are matched insensitive to upper/lower case.
- If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
+ If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
- To verify that the attribute exists, but without specifying a value, pass
- C{withAttribute.ANY_VALUE} as the value.
- """
+ To verify that the attribute exists, but without specifying a value, pass
+ C{withAttribute.ANY_VALUE} as the value.
+
+ Example::
+ TODO
+ """
if args:
attrs = args[:]
else:
@@ -3819,9 +4402,13 @@ def withAttribute(*args,**attrDict):
withAttribute.ANY_VALUE = object()
def withClass(classname, namespace=''):
- """Simplified version of C{L{withAttribute}} when matching on a div class - made
- difficult because C{class} is a reserved word in Python.
- """
+ """
+ Simplified version of C{L{withAttribute}} when matching on a div class - made
+ difficult because C{class} is a reserved word in Python.
+
+ Example::
+ TODO
+ """
classattr = "%s:class" % namespace if namespace else "class"
return withAttribute(**{classattr : classname})
@@ -3830,30 +4417,34 @@ opAssoc.LEFT = object()
opAssoc.RIGHT = object()
def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
- """Helper method for constructing grammars of expressions made up of
- operators working in a precedence hierarchy. Operators may be unary or
- binary, left- or right-associative. Parse actions can also be attached
- to operator expressions.
-
- Parameters:
- - baseExpr - expression representing the most basic element for the nested
- - opList - list of tuples, one for each operator precedence level in the
- expression grammar; each tuple is of the form
- (opExpr, numTerms, rightLeftAssoc, parseAction), where:
- - opExpr is the pyparsing expression for the operator;
- may also be a string, which will be converted to a Literal;
- if numTerms is 3, opExpr is a tuple of two expressions, for the
- two operators separating the 3 terms
- - numTerms is the number of terms for this operator (must
- be 1, 2, or 3)
- - rightLeftAssoc is the indicator whether the operator is
- right or left associative, using the pyparsing-defined
- constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
- - parseAction is the parse action to be associated with
- expressions matching this operator expression (the
- parse action tuple member may be omitted)
- - lpar - expression for matching left-parentheses (default=Suppress('('))
- - rpar - expression for matching right-parentheses (default=Suppress(')'))
+ """
+ Helper method for constructing grammars of expressions made up of
+ operators working in a precedence hierarchy. Operators may be unary or
+ binary, left- or right-associative. Parse actions can also be attached
+ to operator expressions.
+
+ Parameters:
+ - baseExpr - expression representing the most basic element for the nested
+ - opList - list of tuples, one for each operator precedence level in the
+ expression grammar; each tuple is of the form
+ (opExpr, numTerms, rightLeftAssoc, parseAction), where:
+ - opExpr is the pyparsing expression for the operator;
+ may also be a string, which will be converted to a Literal;
+ if numTerms is 3, opExpr is a tuple of two expressions, for the
+ two operators separating the 3 terms
+ - numTerms is the number of terms for this operator (must
+ be 1, 2, or 3)
+ - rightLeftAssoc is the indicator whether the operator is
+ right or left associative, using the pyparsing-defined
+ constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
+ - parseAction is the parse action to be associated with
+ expressions matching this operator expression (the
+ parse action tuple member may be omitted)
+ - lpar - expression for matching left-parentheses (default=Suppress('('))
+ - rpar - expression for matching right-parentheses (default=Suppress(')'))
+
+ Example::
+ TODO
"""
ret = Forward()
lastExpr = baseExpr | ( lpar + ret + rpar )
@@ -3913,25 +4504,29 @@ quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)
unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
- """Helper method for defining nested lists enclosed in opening and closing
- delimiters ("(" and ")" are the default).
-
- Parameters:
- - opener - opening character for a nested list (default="("); can also be a pyparsing expression
- - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
- - content - expression for items within the nested lists (default=None)
- - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
-
- If an expression is not provided for the content argument, the nested
- expression will capture all whitespace-delimited content between delimiters
- as a list of separate values.
-
- Use the C{ignoreExpr} argument to define expressions that may contain
- opening or closing characters that should not be treated as opening
- or closing characters for nesting, such as quotedString or a comment
- expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
- The default is L{quotedString}, but if no expressions are to be ignored,
- then pass C{None} for this argument.
+ """
+ Helper method for defining nested lists enclosed in opening and closing
+ delimiters ("(" and ")" are the default).
+
+ Parameters:
+ - opener - opening character for a nested list (default="("); can also be a pyparsing expression
+ - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
+ - content - expression for items within the nested lists (default=None)
+ - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
+
+ If an expression is not provided for the content argument, the nested
+ expression will capture all whitespace-delimited content between delimiters
+ as a list of separate values.
+
+ Use the C{ignoreExpr} argument to define expressions that may contain
+ opening or closing characters that should not be treated as opening
+ or closing characters for nesting, such as quotedString or a comment
+ expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
+ The default is L{quotedString}, but if no expressions are to be ignored,
+ then pass C{None} for this argument.
+
+ Example::
+ TODO
"""
if opener == closer:
raise ValueError("opening and closing strings cannot be the same")
@@ -3966,20 +4561,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop
return ret
def indentedBlock(blockStatementExpr, indentStack, indent=True):
- """Helper method for defining space-delimited indentation blocks, such as
- those used to define block statements in Python source code.
+ """
+ Helper method for defining space-delimited indentation blocks, such as
+ those used to define block statements in Python source code.
- Parameters:
- - blockStatementExpr - expression defining syntax of statement that
+ Parameters:
+ - blockStatementExpr - expression defining syntax of statement that
is repeated within the indented block
- - indentStack - list created by caller to manage indentation stack
+ - indentStack - list created by caller to manage indentation stack
(multiple statementWithIndentedBlock expressions within a single grammar
should share a common indentStack)
- - indent - boolean indicating whether block must be indented beyond the
+ - indent - boolean indicating whether block must be indented beyond the
the current level; set to False for block of left-most statements
(default=True)
- A valid block must contain at least one C{blockStatement}.
+ A valid block must contain at least one C{blockStatement}.
+
+ Example::
+ TODO
"""
def checkPeerIndent(s,l,t):
if l >= len(s): return
@@ -4068,6 +4667,9 @@ class pyparsing_common:
- C{L{convertToDate}}
- C{L{convertToDatetime}}
- C{L{stripHTMLTags}}
+
+ Example::
+ TODO
"""
convertToInteger = tokenMap(int)
@@ -4134,6 +4736,9 @@ class pyparsing_common:
Params -
- fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
+
+ Example::
+ TODO
"""
return lambda s,l,t: datetime.strptime(t[0], fmt).date()
@@ -4144,6 +4749,9 @@ class pyparsing_common:
Params -
- fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
+
+ Example::
+ TODO
"""
return lambda s,l,t: datetime.strptime(t[0], fmt)
@@ -4159,7 +4767,12 @@ class pyparsing_common:
_html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
@staticmethod
def stripHTMLTags(s, l, tokens):
- """Parse action to remove HTML tags from web page HTML source"""
+ """
+ Parse action to remove HTML tags from web page HTML source
+
+ Example::
+ TODO
+ """
return pyparsing_common._html_stripper.transformString(tokens[0])
if __name__ == "__main__":