diff options
Diffstat (limited to 'HowToUsePyparsing.html')
-rw-r--r-- | HowToUsePyparsing.html | 1289 |
1 files changed, 1289 insertions, 0 deletions
diff --git a/HowToUsePyparsing.html b/HowToUsePyparsing.html new file mode 100644 index 0000000..1bd180b --- /dev/null +++ b/HowToUsePyparsing.html @@ -0,0 +1,1289 @@ +<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="Docutils 0.8: http://docutils.sourceforge.net/" />
+<title>Using the pyparsing module</title>
+<meta name="author" content="Paul McGuire" />
+<meta name="date" content="June, 2011" />
+<meta name="copyright" content="Copyright © 2003-2011 Paul McGuire." />
+<style type="text/css">
+
+/*
+:Author: David Goodger (goodger@python.org)
+:Id: $Id: html4css1.css 6387 2010-08-13 12:23:41Z milde $
+:Copyright: This stylesheet has been placed in the public domain.
+
+Default cascading style sheet for the HTML output of Docutils.
+
+See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
+customize this style sheet.
+*/
+
+/* used to remove borders from tables and images */
+.borderless, table.borderless td, table.borderless th {
+ border: 0 }
+
+table.borderless td, table.borderless th {
+ /* Override padding for "table.docutils td" with "! important".
+ The right padding separates the table cells. */
+ padding: 0 0.5em 0 0 ! important }
+
+.first {
+ /* Override more specific margin styles with "! important". */
+ margin-top: 0 ! important }
+
+.last, .with-subtitle {
+ margin-bottom: 0 ! important }
+
+.hidden {
+ display: none }
+
+a.toc-backref {
+ text-decoration: none ;
+ color: black }
+
+blockquote.epigraph {
+ margin: 2em 5em ; }
+
+dl.docutils dd {
+ margin-bottom: 0.5em }
+
+object[type="image/svg+xml"], object[type="application/x-shockwave-flash"] {
+ overflow: hidden;
+}
+
+/* Uncomment (and remove this text!) to get bold-faced definition list terms
+dl.docutils dt {
+ font-weight: bold }
+*/
+
+div.abstract {
+ margin: 2em 5em }
+
+div.abstract p.topic-title {
+ font-weight: bold ;
+ text-align: center }
+
+div.admonition, div.attention, div.caution, div.danger, div.error,
+div.hint, div.important, div.note, div.tip, div.warning {
+ margin: 2em ;
+ border: medium outset ;
+ padding: 1em }
+
+div.admonition p.admonition-title, div.hint p.admonition-title,
+div.important p.admonition-title, div.note p.admonition-title,
+div.tip p.admonition-title {
+ font-weight: bold ;
+ font-family: sans-serif }
+
+div.attention p.admonition-title, div.caution p.admonition-title,
+div.danger p.admonition-title, div.error p.admonition-title,
+div.warning p.admonition-title {
+ color: red ;
+ font-weight: bold ;
+ font-family: sans-serif }
+
+/* Uncomment (and remove this text!) to get reduced vertical space in
+ compound paragraphs.
+div.compound .compound-first, div.compound .compound-middle {
+ margin-bottom: 0.5em }
+
+div.compound .compound-last, div.compound .compound-middle {
+ margin-top: 0.5em }
+*/
+
+div.dedication {
+ margin: 2em 5em ;
+ text-align: center ;
+ font-style: italic }
+
+div.dedication p.topic-title {
+ font-weight: bold ;
+ font-style: normal }
+
+div.figure {
+ margin-left: 2em ;
+ margin-right: 2em }
+
+div.footer, div.header {
+ clear: both;
+ font-size: smaller }
+
+div.line-block {
+ display: block ;
+ margin-top: 1em ;
+ margin-bottom: 1em }
+
+div.line-block div.line-block {
+ margin-top: 0 ;
+ margin-bottom: 0 ;
+ margin-left: 1.5em }
+
+div.sidebar {
+ margin: 0 0 0.5em 1em ;
+ border: medium outset ;
+ padding: 1em ;
+ background-color: #ffffee ;
+ width: 40% ;
+ float: right ;
+ clear: right }
+
+div.sidebar p.rubric {
+ font-family: sans-serif ;
+ font-size: medium }
+
+div.system-messages {
+ margin: 5em }
+
+div.system-messages h1 {
+ color: red }
+
+div.system-message {
+ border: medium outset ;
+ padding: 1em }
+
+div.system-message p.system-message-title {
+ color: red ;
+ font-weight: bold }
+
+div.topic {
+ margin: 2em }
+
+h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
+h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
+ margin-top: 0.4em }
+
+h1.title {
+ text-align: center }
+
+h2.subtitle {
+ text-align: center }
+
+hr.docutils {
+ width: 75% }
+
+img.align-left, .figure.align-left, object.align-left {
+ clear: left ;
+ float: left ;
+ margin-right: 1em }
+
+img.align-right, .figure.align-right, object.align-right {
+ clear: right ;
+ float: right ;
+ margin-left: 1em }
+
+img.align-center, .figure.align-center, object.align-center {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+.align-left {
+ text-align: left }
+
+.align-center {
+ clear: both ;
+ text-align: center }
+
+.align-right {
+ text-align: right }
+
+/* reset inner alignment in figures */
+div.align-right {
+ text-align: left }
+
+/* div.align-center * { */
+/* text-align: left } */
+
+ol.simple, ul.simple {
+ margin-bottom: 1em }
+
+ol.arabic {
+ list-style: decimal }
+
+ol.loweralpha {
+ list-style: lower-alpha }
+
+ol.upperalpha {
+ list-style: upper-alpha }
+
+ol.lowerroman {
+ list-style: lower-roman }
+
+ol.upperroman {
+ list-style: upper-roman }
+
+p.attribution {
+ text-align: right ;
+ margin-left: 50% }
+
+p.caption {
+ font-style: italic }
+
+p.credits {
+ font-style: italic ;
+ font-size: smaller }
+
+p.label {
+ white-space: nowrap }
+
+p.rubric {
+ font-weight: bold ;
+ font-size: larger ;
+ color: maroon ;
+ text-align: center }
+
+p.sidebar-title {
+ font-family: sans-serif ;
+ font-weight: bold ;
+ font-size: larger }
+
+p.sidebar-subtitle {
+ font-family: sans-serif ;
+ font-weight: bold }
+
+p.topic-title {
+ font-weight: bold }
+
+pre.address {
+ margin-bottom: 0 ;
+ margin-top: 0 ;
+ font: inherit }
+
+pre.literal-block, pre.doctest-block {
+ margin-left: 2em ;
+ margin-right: 2em }
+
+span.classifier {
+ font-family: sans-serif ;
+ font-style: oblique }
+
+span.classifier-delimiter {
+ font-family: sans-serif ;
+ font-weight: bold }
+
+span.interpreted {
+ font-family: sans-serif }
+
+span.option {
+ white-space: nowrap }
+
+span.pre {
+ white-space: pre }
+
+span.problematic {
+ color: red }
+
+span.section-subtitle {
+ /* font-size relative to parent (h1..h6 element) */
+ font-size: 80% }
+
+table.citation {
+ border-left: solid 1px gray;
+ margin-left: 1px }
+
+table.docinfo {
+ margin: 2em 4em }
+
+table.docutils {
+ margin-top: 0.5em ;
+ margin-bottom: 0.5em }
+
+table.footnote {
+ border-left: solid 1px black;
+ margin-left: 1px }
+
+table.docutils td, table.docutils th,
+table.docinfo td, table.docinfo th {
+ padding-left: 0.5em ;
+ padding-right: 0.5em ;
+ vertical-align: top }
+
+table.docutils th.field-name, table.docinfo th.docinfo-name {
+ font-weight: bold ;
+ text-align: left ;
+ white-space: nowrap ;
+ padding-left: 0 }
+
+h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
+h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
+ font-size: 100% }
+
+ul.auto-toc {
+ list-style-type: none }
+
+</style>
+</head>
+<body>
+<div class="document" id="using-the-pyparsing-module">
+<h1 class="title">Using the pyparsing module</h1>
+<table class="docinfo" frame="void" rules="none">
+<col class="docinfo-name" />
+<col class="docinfo-content" />
+<tbody valign="top">
+<tr><th class="docinfo-name">Author:</th>
+<td>Paul McGuire</td></tr>
+<tr><th class="docinfo-name">Address:</th>
+<td><pre class="address">
+<a class="first last reference external" href="mailto:ptmcg@users.sourceforge.net">ptmcg@users.sourceforge.net</a>
+</pre>
+</td></tr>
+<tr><th class="docinfo-name">Revision:</th>
+<td>2.0.1</td></tr>
+<tr><th class="docinfo-name">Date:</th>
+<td>July, 2013</td></tr>
+<tr><th class="docinfo-name">Copyright:</th>
+<td>Copyright © 2003-2013 Paul McGuire.</td></tr>
+</tbody>
+</table>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">abstract:</th><td class="field-body">This document provides how-to instructions for the
+pyparsing library, an easy-to-use Python module for constructing
+and executing basic text parsers. The pyparsing module is useful
+for evaluating user-definable
+expressions, processing custom application language commands, or
+extracting data from formatted reports.</td>
+</tr>
+</tbody>
+</table>
+<div class="contents topic" id="contents">
+<p class="topic-title first">Contents</p>
+<ul class="auto-toc simple">
+<li><a class="reference internal" href="#steps-to-follow" id="id1">1 Steps to follow</a><ul class="auto-toc">
+<li><a class="reference internal" href="#hello-world" id="id2">1.1 Hello, World!</a></li>
+<li><a class="reference internal" href="#usage-notes" id="id3">1.2 Usage notes</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#classes" id="id4">2 Classes</a><ul class="auto-toc">
+<li><a class="reference internal" href="#classes-in-the-pyparsing-module" id="id5">2.1 Classes in the pyparsing module</a></li>
+<li><a class="reference internal" href="#basic-parserelement-subclasses" id="id6">2.2 Basic ParserElement subclasses</a></li>
+<li><a class="reference internal" href="#expression-subclasses" id="id7">2.3 Expression subclasses</a></li>
+<li><a class="reference internal" href="#expression-operators" id="id8">2.4 Expression operators</a></li>
+<li><a class="reference internal" href="#positional-subclasses" id="id9">2.5 Positional subclasses</a></li>
+<li><a class="reference internal" href="#converter-subclasses" id="id10">2.6 Converter subclasses</a></li>
+<li><a class="reference internal" href="#special-subclasses" id="id11">2.7 Special subclasses</a></li>
+<li><a class="reference internal" href="#other-classes" id="id12">2.8 Other classes</a></li>
+<li><a class="reference internal" href="#exception-classes-and-troubleshooting" id="id13">2.9 Exception classes and Troubleshooting</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#miscellaneous-attributes-and-methods" id="id14">3 Miscellaneous attributes and methods</a><ul class="auto-toc">
+<li><a class="reference internal" href="#helper-methods" id="id15">3.1 Helper methods</a></li>
+<li><a class="reference internal" href="#helper-parse-actions" id="id16">3.2 Helper parse actions</a></li>
+<li><a class="reference internal" href="#common-string-and-token-constants" id="id17">3.3 Common string and token constants</a></li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="section" id="steps-to-follow">
+<h1><a class="toc-backref" href="#id1">1 Steps to follow</a></h1>
+<p>To parse an incoming data string, the client code must follow these steps:</p>
+<ol class="arabic simple">
+<li>First define the tokens and patterns to be matched, and assign
+this to a program variable. Optional results names or parsing
+actions can also be defined at this time.</li>
+<li>Call <tt class="docutils literal">parseString()</tt> or <tt class="docutils literal">scanString()</tt> on this variable, passing in
+the string to
+be parsed. During the matching process, whitespace between
+tokens is skipped by default (although this can be changed).
+When token matches occur, any defined parse action methods are
+called.</li>
+<li>Process the parsed results, returned as a list of strings.
+Matching results may also be accessed as named attributes of
+the returned results, if names are defined in the definition of
+the token pattern, using <tt class="docutils literal">setResultsName()</tt>.</li>
+</ol>
+<div class="section" id="hello-world">
+<h2><a class="toc-backref" href="#id2">1.1 Hello, World!</a></h2>
+<p>The following complete Python program will parse the greeting "Hello, World!",
+or any other greeting of the form "<salutation>, <addressee>!":</p>
+<pre class="literal-block">
+from pyparsing import Word, alphas
+
+greet = Word( alphas ) + "," + Word( alphas ) + "!"
+greeting = greet.parseString( "Hello, World!" )
+print greeting
+</pre>
+<p>The parsed tokens are returned in the following form:</p>
+<pre class="literal-block">
+['Hello', ',', 'World', '!']
+</pre>
+</div>
+<div class="section" id="usage-notes">
+<h2><a class="toc-backref" href="#id3">1.2 Usage notes</a></h2>
+<ul>
+<li><p class="first">The pyparsing module can be used to interpret simple command
+strings or algebraic expressions, or can be used to extract data
+from text reports with complicated format and structure ("screen
+or report scraping"). However, it is possible that your defined
+matching patterns may accept invalid inputs. Use pyparsing to
+extract data from strings assumed to be well-formatted.</p>
+</li>
+<li><p class="first">To keep up the readability of your code, use <a class="reference internal" href="#operators">operators</a> such as <tt class="docutils literal">+</tt>, <tt class="docutils literal">|</tt>,
+<tt class="docutils literal">^</tt>, and <tt class="docutils literal">~</tt> to combine expressions. You can also combine
+string literals with ParseExpressions - they will be
+automatically converted to Literal objects. For example:</p>
+<pre class="literal-block">
+integer = Word( nums ) # simple unsigned integer
+variable = Word( alphas, max=1 ) # single letter variable, such as x, z, m, etc.
+arithOp = Word( "+-*/", max=1 ) # arithmetic operators
+equation = variable + "=" + integer + arithOp + integer # will match "x=2+2", etc.
+</pre>
+<p>In the definition of <tt class="docutils literal">equation</tt>, the string <tt class="docutils literal">"="</tt> will get added as
+a <tt class="docutils literal"><span class="pre">Literal("=")</span></tt>, but in a more readable way.</p>
+</li>
+<li><p class="first">The pyparsing module's default behavior is to ignore whitespace. This is the
+case for 99% of all parsers ever written. This allows you to write simple, clean,
+grammars, such as the above <tt class="docutils literal">equation</tt>, without having to clutter it up with
+extraneous <tt class="docutils literal">ws</tt> markers. The <tt class="docutils literal">equation</tt> grammar will successfully parse all of the
+following statements:</p>
+<pre class="literal-block">
+x=2+2
+x = 2+2
+a = 10 * 4
+r= 1234/ 100000
+</pre>
+<p>Of course, it is quite simple to extend this example to support more elaborate expressions, with
+nesting with parentheses, floating point numbers, scientific notation, and named constants
+(such as <tt class="docutils literal">e</tt> or <tt class="docutils literal">pi</tt>). See <tt class="docutils literal">fourFn.py</tt>, included in the examples directory.</p>
+</li>
+<li><p class="first">To modify pyparsing's default whitespace skipping, you can use one or
+more of the following methods:</p>
+<ul>
+<li><p class="first">use the static method <tt class="docutils literal">ParserElement.setDefaultWhitespaceChars</tt>
+to override the normal set of whitespace chars (' tn'). For instance
+when defining a grammar in which newlines are significant, you should
+call <tt class="docutils literal">ParserElement.setDefaultWhitespaceChars(' \t')</tt> to remove
+newline from the set of skippable whitespace characters. Calling
+this method will affect all pyparsing expressions defined afterward.</p>
+</li>
+<li><p class="first">call <tt class="docutils literal">leaveWhitespace()</tt> on individual expressions, to suppress the
+skipping of whitespace before trying to match the expression</p>
+</li>
+<li><p class="first">use <tt class="docutils literal">Combine</tt> to require that successive expressions must be
+adjacent in the input string. For instance, this expression:</p>
+<pre class="literal-block">
+real = Word(nums) + '.' + Word(nums)
+</pre>
+<p>will match "3.14159", but will also match "3 . 12". It will also
+return the matched results as ['3', '.', '14159']. By changing this
+expression to:</p>
+<pre class="literal-block">
+real = Combine( Word(nums) + '.' + Word(nums) )
+</pre>
+<p>it will not match numbers with embedded spaces, and it will return a
+single concatenated string '3.14159' as the parsed token.</p>
+</li>
+</ul>
+</li>
+<li><p class="first">Repetition of expressions can be indicated using the '*' operator. An
+expression may be multiplied by an integer value (to indicate an exact
+repetition count), or by a tuple containing
+two integers, or None and an integer, representing min and max repetitions
+(with None representing no min or no max, depending whether it is the first or
+second tuple element). See the following examples, where n is used to
+indicate an integer value:</p>
+<ul class="simple">
+<li><tt class="docutils literal">expr*3</tt> is equivalent to <tt class="docutils literal">expr + expr + expr</tt></li>
+<li><tt class="docutils literal"><span class="pre">expr*(2,3)</span></tt> is equivalent to <tt class="docutils literal">expr + expr + Optional(expr)</tt></li>
+<li><tt class="docutils literal"><span class="pre">expr*(n,None)</span></tt> or <tt class="docutils literal"><span class="pre">expr*(n,)</span></tt> is equivalent
+to <tt class="docutils literal">expr*n + ZeroOrMore(expr)</tt> (read as "at least n instances of expr")</li>
+<li><tt class="docutils literal"><span class="pre">expr*(None,n)</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">expr*(0,n)</span></tt>
+(read as "0 to n instances of expr")</li>
+<li><tt class="docutils literal"><span class="pre">expr*(None,None)</span></tt> is equivalent to <tt class="docutils literal">ZeroOrMore(expr)</tt></li>
+<li><tt class="docutils literal"><span class="pre">expr*(1,None)</span></tt> is equivalent to <tt class="docutils literal">OneOrMore(expr)</tt></li>
+</ul>
+<p>Note that <tt class="docutils literal"><span class="pre">expr*(None,n)</span></tt> does not raise an exception if
+more than n exprs exist in the input stream; that is,
+<tt class="docutils literal"><span class="pre">expr*(None,n)</span></tt> does not enforce a maximum number of expr
+occurrences. If this behavior is desired, then write
+<tt class="docutils literal"><span class="pre">expr*(None,n)</span> + ~expr</tt>.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">MatchFirst</tt> expressions are matched left-to-right, and the first
+match found will skip all later expressions within, so be sure
+to define less-specific patterns after more-specific patterns.
+If you are not sure which expressions are most specific, use Or
+expressions (defined using the <tt class="docutils literal">^</tt> operator) - they will always
+match the longest expression, although they are more
+compute-intensive.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">Or</tt> expressions will evaluate all of the specified subexpressions
+to determine which is the "best" match, that is, which matches
+the longest string in the input data. In case of a tie, the
+left-most expression in the <tt class="docutils literal">Or</tt> list will win.</p>
+</li>
+<li><p class="first">If parsing the contents of an entire file, pass it to the
+<tt class="docutils literal">parseFile</tt> method using:</p>
+<pre class="literal-block">
+expr.parseFile( sourceFile )
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">ParseExceptions</tt> will report the location where an expected token
+or expression failed to match. For example, if we tried to use our
+"Hello, World!" parser to parse "Hello World!" (leaving out the separating
+comma), we would get an exception, with the message:</p>
+<pre class="literal-block">
+pyparsing.ParseException: Expected "," (6), (1,7)
+</pre>
+<p>In the case of complex
+expressions, the reported location may not be exactly where you
+would expect. See more information under <a class="reference internal" href="#parseexception">ParseException</a> .</p>
+</li>
+<li><p class="first">Use the <tt class="docutils literal">Group</tt> class to enclose logical groups of tokens within a
+sublist. This will help organize your results into more
+hierarchical form (the default behavior is to return matching
+tokens as a flat list of matching input strings).</p>
+</li>
+<li><p class="first">Punctuation may be significant for matching, but is rarely of
+much interest in the parsed results. Use the <tt class="docutils literal">suppress()</tt> method
+to keep these tokens from cluttering up your returned lists of
+tokens. For example, <tt class="docutils literal">delimitedList()</tt> matches a succession of
+one or more expressions, separated by delimiters (commas by
+default), but only returns a list of the actual expressions -
+the delimiters are used for parsing, but are suppressed from the
+returned output.</p>
+</li>
+<li><p class="first">Parse actions can be used to convert values from strings to
+other data types (ints, floats, booleans, etc.).</p>
+</li>
+<li><p class="first">Results names are recommended for retrieving tokens from complex
+expressions. It is much easier to access a token using its field
+name than using a positional index, especially if the expression
+contains optional elements. You can also shortcut
+the <tt class="docutils literal">setResultsName</tt> call:</p>
+<pre class="literal-block">
+stats = "AVE:" + realNum.setResultsName("average") + \
+ "MIN:" + realNum.setResultsName("min") + \
+ "MAX:" + realNum.setResultsName("max")
+</pre>
+<p>can now be written as this:</p>
+<pre class="literal-block">
+stats = "AVE:" + realNum("average") + \
+ "MIN:" + realNum("min") + \
+ "MAX:" + realNum("max")
+</pre>
+</li>
+<li><p class="first">Be careful when defining parse actions that modify global variables or
+data structures (as in <tt class="docutils literal">fourFn.py</tt>), especially for low level tokens
+or expressions that may occur within an <tt class="docutils literal">And</tt> expression; an early element
+of an <tt class="docutils literal">And</tt> may match, but the overall expression may fail.</p>
+</li>
+<li><p class="first">Performance of pyparsing may be slow for complex grammars and/or large
+input strings. The <a class="reference external" href="http://psyco.sourceforge.net/">psyco</a> package can be used to improve the speed of the
+pyparsing module with no changes to grammar or program logic - observed
+improvments have been in the 20-50% range.</p>
+</li>
+</ul>
+</div>
+</div>
+<div class="section" id="classes">
+<h1><a class="toc-backref" href="#id4">2 Classes</a></h1>
+<div class="section" id="classes-in-the-pyparsing-module">
+<h2><a class="toc-backref" href="#id5">2.1 Classes in the pyparsing module</a></h2>
+<p><tt class="docutils literal">ParserElement</tt> - abstract base class for all pyparsing classes;
+methods for code to use are:</p>
+<ul>
+<li><p class="first"><tt class="docutils literal">parseString( sourceString, parseAll=False )</tt> - only called once, on the overall
+matching pattern; returns a <a class="reference internal" href="#parseresults">ParseResults</a> object that makes the
+matched tokens available as a list, and optionally as a dictionary,
+or as an object with named attributes; if parseAll is set to True, then
+parseString will raise a ParseException if the grammar does not process
+the complete input string.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">parseFile( sourceFile )</tt> - a convenience function, that accepts an
+input file object or filename. The file contents are passed as a
+string to <tt class="docutils literal">parseString()</tt>. <tt class="docutils literal">parseFile</tt> also supports the <tt class="docutils literal">parseAll</tt> argument.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">scanString( sourceString )</tt> - generator function, used to find and
+extract matching text in the given source string; for each matched text,
+returns a tuple of:</p>
+<ul class="simple">
+<li>matched tokens (packaged as a <a class="reference internal" href="#parseresults">ParseResults</a> object)</li>
+<li>start location of the matched text in the given source string</li>
+<li>end location in the given source string</li>
+</ul>
+<p><tt class="docutils literal">scanString</tt> allows you to scan through the input source string for
+random matches, instead of exhaustively defining the grammar for the entire
+source text (as would be required with <tt class="docutils literal">parseString</tt>).</p>
+</li>
+<li><p class="first"><tt class="docutils literal">transformString( sourceString )</tt> - convenience wrapper function for
+<tt class="docutils literal">scanString</tt>, to process the input source string, and replace matching
+text with the tokens returned from parse actions defined in the grammar
+(see <a class="reference internal" href="#setparseaction">setParseAction</a>).</p>
+</li>
+<li><p class="first"><tt class="docutils literal">searchString( sourceString )</tt> - another convenience wrapper function for
+<tt class="docutils literal">scanString</tt>, returns a list of the matching tokens returned from each
+call to <tt class="docutils literal">scanString</tt>.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setName( name )</tt> - associate a short descriptive name for this
+element, useful in displaying exceptions and trace information</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setResultsName( string, listAllMatches=False )</tt> - name to be given
+to tokens matching
+the element; if multiple tokens within
+a repetition group (such as <tt class="docutils literal">ZeroOrMore</tt> or <tt class="docutils literal">delimitedList</tt>) the
+default is to return only the last matching token - if listAllMatches
+is set to True, then a list of all the matching tokens is returned.
+(New in 1.5.6 - a results name with a trailing '*' character will be
+interpreted as setting listAllMatches to True.)
+Note:
+<tt class="docutils literal">setResultsName</tt> returns a <em>copy</em> of the element so that a single
+basic element can be referenced multiple times and given
+different names within a complex grammar.</p>
+</li>
+</ul>
+<ul id="setparseaction">
+<li><p class="first"><tt class="docutils literal">setParseAction( *fn )</tt> - specify one or more functions to call after successful
+matching of the element; each function is defined as <tt class="docutils literal">fn( s,
+loc, toks )</tt>, where:</p>
+<ul class="simple">
+<li><tt class="docutils literal">s</tt> is the original parse string</li>
+<li><tt class="docutils literal">loc</tt> is the location in the string where matching started</li>
+<li><tt class="docutils literal">toks</tt> is the list of the matched tokens, packaged as a <a class="reference internal" href="#parseresults">ParseResults</a> object</li>
+</ul>
+<p>Multiple functions can be attached to a ParserElement by specifying multiple
+arguments to setParseAction, or by calling setParseAction multiple times.</p>
+<p>Each parse action function can return a modified <tt class="docutils literal">toks</tt> list, to perform conversion, or
+string modifications. For brevity, <tt class="docutils literal">fn</tt> may also be a
+lambda - here is an example of using a parse action to convert matched
+integer tokens from strings to integers:</p>
+<pre class="literal-block">
+intNumber = Word(nums).setParseAction( lambda s,l,t: [ int(t[0]) ] )
+</pre>
+<p>If <tt class="docutils literal">fn</tt> does not modify the <tt class="docutils literal">toks</tt> list, it does not need to return
+anything at all.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setBreak( breakFlag=True )</tt> - if breakFlag is True, calls pdb.set_break()
+as this expression is about to be parsed</p>
+</li>
+<li><p class="first"><tt class="docutils literal">copy()</tt> - returns a copy of a ParserElement; can be used to use the same
+parse expression in different places in a grammar, with different parse actions
+attached to each</p>
+</li>
+<li><p class="first"><tt class="docutils literal">leaveWhitespace()</tt> - change default behavior of skipping
+whitespace before starting matching (mostly used internally to the
+pyparsing module, rarely used by client code)</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setWhitespaceChars( chars )</tt> - define the set of chars to be ignored
+as whitespace before trying to match a specific ParserElement, in place of the
+default set of whitespace (space, tab, newline, and return)</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setDefaultWhitespaceChars( chars )</tt> - class-level method to override
+the default set of whitespace chars for all subsequently created ParserElements
+(including copies); useful when defining grammars that treat one or more of the
+default whitespace characters as significant (such as a line-sensitive grammar, to
+omit newline from the list of ignorable whitespace)</p>
+</li>
+<li><p class="first"><tt class="docutils literal">suppress()</tt> - convenience function to suppress the output of the
+given element, instead of wrapping it with a Suppress object.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">ignore( expr )</tt> - function to specify parse expression to be
+ignored while matching defined patterns; can be called
+repeatedly to specify multiple expressions; useful to specify
+patterns of comment syntax, for example</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setDebug( dbgFlag=True )</tt> - function to enable/disable tracing output
+when trying to match this element</p>
+</li>
+<li><p class="first"><tt class="docutils literal">validate()</tt> - function to verify that the defined grammar does not
+contain infinitely recursive constructs</p>
+</li>
+</ul>
+<ul class="simple" id="parsewithtabs">
+<li><tt class="docutils literal">parseWithTabs()</tt> - function to override default behavior of converting
+tabs to spaces before parsing the input string; rarely used, except when
+specifying whitespace-significant grammars using the <a class="reference internal" href="#white">White</a> class.</li>
+<li><tt class="docutils literal">enablePackrat()</tt> - a class-level static method to enable a memoizing
+performance enhancement, known as "packrat parsing". packrat parsing is
+disabled by default, since it may conflict with some user programs that use
+parse actions. To activate the packrat feature, your
+program must call the class method ParserElement.enablePackrat(). If
+your program uses psyco to "compile as you go", you must call
+enablePackrat before calling psyco.full(). If you do not do this,
+Python will crash. For best results, call enablePackrat() immediately
+after importing pyparsing.</li>
+</ul>
+</div>
+<div class="section" id="basic-parserelement-subclasses">
+<h2><a class="toc-backref" href="#id6">2.2 Basic ParserElement subclasses</a></h2>
+<ul class="simple">
+<li><tt class="docutils literal">Literal</tt> - construct with a string to be matched exactly</li>
+<li><tt class="docutils literal">CaselessLiteral</tt> - construct with a string to be matched, but
+without case checking; results are always returned as the
+defining literal, NOT as they are found in the input string</li>
+<li><tt class="docutils literal">Keyword</tt> - similar to Literal, but must be immediately followed by
+whitespace, punctuation, or other non-keyword characters; prevents
+accidental matching of a non-keyword that happens to begin with a
+defined keyword</li>
+<li><tt class="docutils literal">CaselessKeyword</tt> - similar to Keyword, but with caseless matching
+behavior</li>
+</ul>
+<ul id="word">
+<li><p class="first"><tt class="docutils literal">Word</tt> - one or more contiguous characters; construct with a
+string containing the set of allowed initial characters, and an
+optional second string of allowed body characters; for instance,
+a common Word construct is to match a code identifier - in C, a
+valid identifier must start with an alphabetic character or an
+underscore ('_'), followed by a body that can also include numeric
+digits. That is, <tt class="docutils literal">a</tt>, <tt class="docutils literal">i</tt>, <tt class="docutils literal">MAX_LENGTH</tt>, <tt class="docutils literal">_a1</tt>, <tt class="docutils literal">b_109_</tt>, and
+<tt class="docutils literal">plan9FromOuterSpace</tt>
+are all valid identifiers; <tt class="docutils literal">9b7z</tt>, <tt class="docutils literal">$a</tt>, <tt class="docutils literal">.section</tt>, and <tt class="docutils literal">0debug</tt>
+are not. To
+define an identifier using a Word, use either of the following:</p>
+<pre class="literal-block">
+- Word( alphas+"_", alphanums+"_" )
+- Word( srange("[a-zA-Z_]"), srange("[a-zA-Z0-9_]") )
+</pre>
+<p>If only one
+string given, it specifies that the same character set defined
+for the initial character is used for the word body; for instance, to
+define an identifier that can only be composed of capital letters and
+underscores, use:</p>
+<pre class="literal-block">
+- Word( "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" )
+- Word( srange("[A-Z_]") )
+</pre>
+<p>A Word may
+also be constructed with any of the following optional parameters:</p>
+<ul class="simple">
+<li><tt class="docutils literal">min</tt> - indicating a minimum length of matching characters</li>
+<li><tt class="docutils literal">max</tt> - indicating a maximum length of matching characters</li>
+<li><tt class="docutils literal">exact</tt> - indicating an exact length of matching characters</li>
+</ul>
+<p>If exact is specified, it will override any values for min or max.</p>
+<p>New in 1.5.6 - Sometimes you want to define a word using all
+characters in a range except for one or two of them; you can do this
+with the new <tt class="docutils literal">excludeChars</tt> argument. This is helpful if you want to define
+a word with all printables except for a single delimiter character, such
+as '.'. Previously, you would have to create a custom string to pass to <tt class="docutils literal">Word</tt>.
+With this change, you can just create <tt class="docutils literal">Word(printables, <span class="pre">excludeChars='.')</span></tt>.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">CharsNotIn</tt> - similar to <a class="reference internal" href="#word">Word</a>, but matches characters not
+in the given constructor string (accepts only one string for both
+initial and body characters); also supports min, max, and exact
+optional parameters.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">Regex</tt> - a powerful construct, that accepts a regular expression
+to be matched at the current parse position; accepts an optional
+flags parameter, corresponding to the flags parameter in the re.compile
+method; if the expression includes named sub-fields, they will be
+represented in the returned <a class="reference internal" href="#parseresults">ParseResults</a></p>
+</li>
+<li><p class="first"><tt class="docutils literal">QuotedString</tt> - supports the definition of custom quoted string
+formats, in addition to pyparsing's built-in dblQuotedString and
+sglQuotedString. QuotedString allows you to specify the following
+parameters:</p>
+<ul class="simple">
+<li><tt class="docutils literal">quoteChar</tt> - string of one or more characters defining the quote delimiting string</li>
+<li><tt class="docutils literal">escChar</tt> - character to escape quotes, typically backslash (default=None)</li>
+<li><tt class="docutils literal">escQuote</tt> - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)</li>
+<li><tt class="docutils literal">multiline</tt> - boolean indicating whether quotes can span multiple lines (default=<tt class="docutils literal">False</tt>)</li>
+<li><tt class="docutils literal">unquoteResults</tt> - boolean indicating whether the matched text should be unquoted (default=<tt class="docutils literal">True</tt>)</li>
+<li><tt class="docutils literal">endQuoteChar</tt> - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)</li>
+</ul>
+</li>
+<li><p class="first"><tt class="docutils literal">SkipTo</tt> - skips ahead in the input string, accepting any
+characters up to the specified pattern; may be constructed with
+the following optional parameters:</p>
+<ul class="simple">
+<li><tt class="docutils literal">include</tt> - if set to true, also consumes the match expression
+(default is <tt class="docutils literal">False</tt>)</li>
+<li><tt class="docutils literal">ignore</tt> - allows the user to specify patterns to not be matched,
+to prevent false matches</li>
+<li><tt class="docutils literal">failOn</tt> - if a literal string or expression is given for this argument, it defines an expression that
+should cause the <tt class="docutils literal">SkipTo</tt> expression to fail, and not skip over that expression</li>
+</ul>
+</li>
+</ul>
+<ul class="simple" id="white">
+<li><tt class="docutils literal">White</tt> - also similar to <a class="reference internal" href="#word">Word</a>, but matches whitespace
+characters. Not usually needed, as whitespace is implicitly
+ignored by pyparsing. However, some grammars are whitespace-sensitive,
+such as those that use leading tabs or spaces to indicating grouping
+or hierarchy. (If matching on tab characters, be sure to call
+<a class="reference internal" href="#parsewithtabs">parseWithTabs</a> on the top-level parse element.)</li>
+<li><tt class="docutils literal">Empty</tt> - a null expression, requiring no characters - will always
+match; useful for debugging and for specialized grammars</li>
+<li><tt class="docutils literal">NoMatch</tt> - opposite of Empty, will never match; useful for debugging
+and for specialized grammars</li>
+</ul>
+</div>
+<div class="section" id="expression-subclasses">
+<h2><a class="toc-backref" href="#id7">2.3 Expression subclasses</a></h2>
+<ul>
+<li><p class="first"><tt class="docutils literal">And</tt> - construct with a list of ParserElements, all of which must
+match for And to match; can also be created using the '+'
+operator; multiple expressions can be Anded together using the '*'
+operator as in:</p>
+<pre class="literal-block">
+ipAddress = Word(nums) + ('.'+Word(nums))*3
+</pre>
+<p>A tuple can be used as the multiplier, indicating a min/max:</p>
+<pre class="literal-block">
+usPhoneNumber = Word(nums) + ('-'+Word(nums))*(1,2)
+</pre>
+<p>A special form of <tt class="docutils literal">And</tt> is created if the '-' operator is used
+instead of the '+' operator. In the ipAddress example above, if
+no trailing '.' and Word(nums) are found after matching the initial
+Word(nums), then pyparsing will back up in the grammar and try other
+alternatives to ipAddress. However, if ipAddress is defined as:</p>
+<pre class="literal-block">
+strictIpAddress = Word(nums) - ('.'+Word(nums))*3
+</pre>
+<p>then no backing up is done. If the first Word(nums) of strictIpAddress
+is matched, then any mismatch after that will raise a ParseSyntaxException,
+which will halt the parsing process immediately. By careful use of the
+'-' operator, grammars can provide meaningful error messages close to
+the location where the incoming text does not match the specified
+grammar.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">Or</tt> - construct with a list of ParserElements, any of which must
+match for Or to match; if more than one expression matches, the
+expression that makes the longest match will be used; can also
+be created using the '^' operator</p>
+</li>
+<li><p class="first"><tt class="docutils literal">MatchFirst</tt> - construct with a list of ParserElements, any of
+which must match for MatchFirst to match; matching is done
+left-to-right, taking the first expression that matches; can
+also be created using the '|' operator</p>
+</li>
+<li><p class="first"><tt class="docutils literal">Each</tt> - similar to And, in that all of the provided expressions
+must match; however, Each permits matching to be done in any order;
+can also be created using the '&' operator</p>
+</li>
+<li><p class="first"><tt class="docutils literal">Optional</tt> - construct with a ParserElement, but this element is
+not required to match; can be constructed with an optional <tt class="docutils literal">default</tt> argument,
+containing a default string or object to be supplied if the given optional
+parse element is not found in the input string; parse action will only
+be called if a match is found, or if a default is specified</p>
+</li>
+<li><p class="first"><tt class="docutils literal">ZeroOrMore</tt> - similar to Optional, but can be repeated</p>
+</li>
+<li><p class="first"><tt class="docutils literal">OneOrMore</tt> - similar to ZeroOrMore, but at least one match must
+be present</p>
+</li>
+<li><p class="first"><tt class="docutils literal">FollowedBy</tt> - a lookahead expression, requires matching of the given
+expressions, but does not advance the parsing position within the input string</p>
+</li>
+<li><p class="first"><tt class="docutils literal">NotAny</tt> - a negative lookahead expression, prevents matching of named
+expressions, does not advance the parsing position within the input string;
+can also be created using the unary '~' operator</p>
+</li>
+</ul>
+</div>
+<div class="section" id="expression-operators">
+<span id="operators"></span><h2><a class="toc-backref" href="#id8">2.4 Expression operators</a></h2>
+<ul class="simple">
+<li><tt class="docutils literal">~</tt> - creates NotAny using the expression after the operator</li>
+<li><tt class="docutils literal">+</tt> - creates And using the expressions before and after the operator</li>
+<li><tt class="docutils literal">|</tt> - creates MatchFirst (first left-to-right match) using the expressions before and after the operator</li>
+<li><tt class="docutils literal">^</tt> - creates Or (longest match) using the expressions before and after the operator</li>
+<li><tt class="docutils literal">&</tt> - creates Each using the expressions before and after the operator</li>
+<li><tt class="docutils literal">*</tt> - creates And by multiplying the expression by the integer operand; if
+expression is multiplied by a 2-tuple, creates an And of (min,max)
+expressions (similar to "{min,max}" form in regular expressions); if
+min is None, intepret as (0,max); if max is None, interpret as
+expr*min + ZeroOrMore(expr)</li>
+<li><tt class="docutils literal">-</tt> - like <tt class="docutils literal">+</tt> but with no backup and retry of alternatives</li>
+<li><tt class="docutils literal">*</tt> - repetition of expression</li>
+<li><tt class="docutils literal">==</tt> - matching expression to string; returns True if the string matches the given expression</li>
+<li><tt class="docutils literal"><<=</tt> - inserts the expression following the operator as the body of the
+Forward expression before the operator (formerly <<, which is now deprecated)</li>
+</ul>
+</div>
+<div class="section" id="positional-subclasses">
+<h2><a class="toc-backref" href="#id9">2.5 Positional subclasses</a></h2>
+<ul class="simple">
+<li><tt class="docutils literal">StringStart</tt> - matches beginning of the text</li>
+<li><tt class="docutils literal">StringEnd</tt> - matches the end of the text</li>
+<li><tt class="docutils literal">LineStart</tt> - matches beginning of a line (lines delimited by <tt class="docutils literal">\n</tt> characters)</li>
+<li><tt class="docutils literal">LineEnd</tt> - matches the end of a line</li>
+<li><tt class="docutils literal">WordStart</tt> - matches a leading word boundary</li>
+<li><tt class="docutils literal">WordEnd</tt> - matches a trailing word boundary</li>
+</ul>
+</div>
+<div class="section" id="converter-subclasses">
+<h2><a class="toc-backref" href="#id10">2.6 Converter subclasses</a></h2>
+<ul class="simple">
+<li><tt class="docutils literal">Upcase</tt> - converts matched tokens to uppercase (deprecated -
+use <tt class="docutils literal">upcaseTokens</tt> parse action instead)</li>
+<li><tt class="docutils literal">Combine</tt> - joins all matched tokens into a single string, using
+specified joinString (default <tt class="docutils literal"><span class="pre">joinString=""</span></tt>); expects
+all matching tokens to be adjacent, with no intervening
+whitespace (can be overridden by specifying <tt class="docutils literal">adjacent=False</tt> in constructor)</li>
+<li><tt class="docutils literal">Suppress</tt> - clears matched tokens; useful to keep returned
+results from being cluttered with required but uninteresting
+tokens (such as list delimiters)</li>
+</ul>
+</div>
+<div class="section" id="special-subclasses">
+<h2><a class="toc-backref" href="#id11">2.7 Special subclasses</a></h2>
+<ul class="simple">
+<li><tt class="docutils literal">Group</tt> - causes the matched tokens to be enclosed in a list;
+useful in repeated elements like <tt class="docutils literal">ZeroOrMore</tt> and <tt class="docutils literal">OneOrMore</tt> to
+break up matched tokens into groups for each repeated pattern</li>
+<li><tt class="docutils literal">Dict</tt> - like <tt class="docutils literal">Group</tt>, but also constructs a dictionary, using the
+[0]'th elements of all enclosed token lists as the keys, and
+each token list as the value</li>
+<li><tt class="docutils literal">SkipTo</tt> - catch-all matching expression that accepts all characters
+up until the given pattern is found to match; useful for specifying
+incomplete grammars</li>
+<li><tt class="docutils literal">Forward</tt> - placeholder token used to define recursive token
+patterns; when defining the actual expression later in the
+program, insert it into the <tt class="docutils literal">Forward</tt> object using the <tt class="docutils literal"><<=</tt>
+operator (see <tt class="docutils literal">fourFn.py</tt> for an example).</li>
+</ul>
+</div>
+<div class="section" id="other-classes">
+<h2><a class="toc-backref" href="#id12">2.8 Other classes</a></h2>
+<ul id="parseresults">
+<li><p class="first"><tt class="docutils literal">ParseResults</tt> - class used to contain and manage the lists of tokens
+created from parsing the input using the user-defined parse
+expression. ParseResults can be accessed in a number of ways:</p>
+<ul class="simple">
+<li>as a list<ul>
+<li>total list of elements can be found using len()</li>
+<li>individual elements can be found using [0], [1], [-1], etc.</li>
+<li>elements can be deleted using <tt class="docutils literal">del</tt></li>
+<li>the -1th element can be extracted and removed in a single operation
+using <tt class="docutils literal">pop()</tt>, or any element can be extracted and removed
+using <tt class="docutils literal">pop(n)</tt></li>
+</ul>
+</li>
+<li>as a dictionary<ul>
+<li>if <tt class="docutils literal">setResultsName()</tt> is used to name elements within the
+overall parse expression, then these fields can be referenced
+as dictionary elements or as attributes</li>
+<li>the Dict class generates dictionary entries using the data of the
+input text - in addition to ParseResults listed as <tt class="docutils literal">[ [ a1, b1, c1, <span class="pre">...],</span> [ a2, b2, c2, <span class="pre">...]</span> ]</tt>
+it also acts as a dictionary with entries defined as <tt class="docutils literal">{ a1 : [ b1, c1, ... ] }, { a2 : [ b2, c2, ... ] }</tt>;
+this is especially useful when processing tabular data where the first column contains a key
+value for that line of data</li>
+<li>list elements that are deleted using <tt class="docutils literal">del</tt> will still be accessible by their
+dictionary keys</li>
+<li>supports <tt class="docutils literal">get()</tt>, <tt class="docutils literal">items()</tt> and <tt class="docutils literal">keys()</tt> methods, similar to a dictionary</li>
+<li>a keyed item can be extracted and removed using <tt class="docutils literal">pop(key)</tt>. Here
+key must be non-numeric (such as a string), in order to use dict
+extraction instead of list extraction.</li>
+<li>new named elements can be added (in a parse action, for instance), using the same
+syntax as adding an item to a dict (<tt class="docutils literal"><span class="pre">parseResults["X"]="new</span> item"</tt>); named elements can be removed using <tt class="docutils literal">del <span class="pre">parseResults["X"]</span></tt></li>
+</ul>
+</li>
+<li>as a nested list<ul>
+<li>results returned from the Group class are encapsulated within their
+own list structure, so that the tokens can be handled as a hierarchical
+tree</li>
+</ul>
+</li>
+</ul>
+<p>ParseResults can also be converted to an ordinary list of strings
+by calling <tt class="docutils literal">asList()</tt>. Note that this will strip the results of any
+field names that have been defined for any embedded parse elements.
+(The <tt class="docutils literal">pprint</tt> module is especially good at printing out the nested contents
+given by <tt class="docutils literal">asList()</tt>.)</p>
+<p>Finally, ParseResults can be converted to an XML string by calling <tt class="docutils literal">asXML()</tt>. Where
+possible, results will be tagged using the results names defined for the respective
+ParseExpressions. <tt class="docutils literal">asXML()</tt> takes two optional arguments:</p>
+<ul class="simple">
+<li><tt class="docutils literal">doctagname</tt> - for ParseResults that do not have a defined name, this argument
+will wrap the resulting XML in a set of opening and closing tags <tt class="docutils literal"><doctagname></tt>
+and <tt class="docutils literal"></doctagname></tt>.</li>
+<li><tt class="docutils literal">namedItemsOnly</tt> (default=<tt class="docutils literal">False</tt>) - flag to indicate if the generated XML should
+skip items that do not have defined names. If a nested group item is named, then all
+embedded items will be included, whether they have names or not.</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="section" id="exception-classes-and-troubleshooting">
+<h2><a class="toc-backref" href="#id13">2.9 Exception classes and Troubleshooting</a></h2>
+<ul id="parseexception">
+<li><p class="first"><tt class="docutils literal">ParseException</tt> - exception returned when a grammar parse fails;
+ParseExceptions have attributes loc, msg, line, lineno, and column; to view the
+text line and location where the reported ParseException occurs, use:</p>
+<pre class="literal-block">
+except ParseException, err:
+ print err.line
+ print " "*(err.column-1) + "^"
+ print err
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">RecursiveGrammarException</tt> - exception returned by <tt class="docutils literal">validate()</tt> if
+the grammar contains a recursive infinite loop, such as:</p>
+<pre class="literal-block">
+badGrammar = Forward()
+goodToken = Literal("A")
+badGrammar <<= Optional(goodToken) + badGrammar
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">ParseFatalException</tt> - exception that parse actions can raise to stop parsing
+immediately. Should be used when a semantic error is found in the input text, such
+as a mismatched XML tag.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">ParseSyntaxException</tt> - subclass of <tt class="docutils literal">ParseFatalException</tt> raised when a
+syntax error is found, based on the use of the '-' operator when defining
+a sequence of expressions in an <tt class="docutils literal">And</tt> expression.</p>
+</li>
+</ul>
+<p>You can also get some insights into the parsing logic using diagnostic parse actions,
+and setDebug(), or test the matching of expression fragments by testing them using
+scanString().</p>
+</div>
+</div>
+<div class="section" id="miscellaneous-attributes-and-methods">
+<h1><a class="toc-backref" href="#id14">3 Miscellaneous attributes and methods</a></h1>
+<div class="section" id="helper-methods">
+<h2><a class="toc-backref" href="#id15">3.1 Helper methods</a></h2>
+<ul>
+<li><p class="first"><tt class="docutils literal">delimitedList( expr, <span class="pre">delim=',')</span></tt> - convenience function for
+matching one or more occurrences of expr, separated by delim.
+By default, the delimiters are suppressed, so the returned results contain
+only the separate list elements. Can optionally specify <tt class="docutils literal">combine=True</tt>,
+indicating that the expressions and delimiters should be returned as one
+combined value (useful for scoped variables, such as "a.b.c", or
+"a::b::c", or paths such as "a/b/c").</p>
+</li>
+<li><p class="first"><tt class="docutils literal">countedArray( expr )</tt> - convenience function for a pattern where an list of
+instances of the given expression are preceded by an integer giving the count of
+elements in the list. Returns an expression that parses the leading integer,
+reads exactly that many expressions, and returns the array of expressions in the
+parse results - the leading integer is suppressed from the results (although it
+is easily reconstructed by using len on the returned array).</p>
+</li>
+<li><p class="first"><tt class="docutils literal">oneOf( string, caseless=False )</tt> - convenience function for quickly declaring an
+alternative set of <tt class="docutils literal">Literal</tt> tokens, by splitting the given string on
+whitespace boundaries. The tokens are sorted so that longer
+matches are attempted first; this ensures that a short token does
+not mask a longer one that starts with the same characters. If <tt class="docutils literal">caseless=True</tt>,
+will create an alternative set of CaselessLiteral tokens.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">dictOf( key, value )</tt> - convenience function for quickly declaring a
+dictionary pattern of <tt class="docutils literal">Dict( ZeroOrMore( Group( key + value ) ) )</tt>.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">makeHTMLTags( tagName )</tt> and <tt class="docutils literal">makeXMLTags( tagName )</tt> - convenience
+functions to create definitions of opening and closing tag expressions. Returns
+a pair of expressions, for the corresponding <tag> and </tag> strings. Includes
+support for attributes in the opening tag, such as <tag attr1="abc"> - attributes
+are returned as keyed tokens in the returned ParseResults. <tt class="docutils literal">makeHTMLTags</tt> is less
+restrictive than <tt class="docutils literal">makeXMLTags</tt>, especially with respect to case sensitivity.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">infixNotation(baseOperand, operatorList)</tt> - (formerly named <tt class="docutils literal">operatorPrecedence</tt>) convenience function to define a
+grammar for parsing infix notation
+expressions with a hierarchical precedence of operators. To use the <tt class="docutils literal">infixNotation</tt>
+helper:</p>
+<ol class="arabic simple">
+<li>Define the base "atom" operand term of the grammar.
+For this simple grammar, the smallest operand is either
+and integer or a variable. This will be the first argument
+to the <tt class="docutils literal">infixNotation</tt> method.</li>
+<li>Define a list of tuples for each level of operator
+precendence. Each tuple is of the form
+<tt class="docutils literal">(opExpr, numTerms, rightLeftAssoc, parseAction)</tt>, where:<ul>
+<li><tt class="docutils literal">opExpr</tt> is the pyparsing expression for the operator;
+may also be a string, which will be converted to a Literal; if
+None, indicates an empty operator, such as the implied
+multiplication operation between 'm' and 'x' in "y = mx + b".
+If <tt class="docutils literal">numTerms</tt> parameter is 3, this must be a 2-tuple containing the 2 delimiting operators.</li>
+<li><tt class="docutils literal">numTerms</tt> is the number of terms for this operator (must
+be 1,2, or 3)</li>
+<li><tt class="docutils literal">rightLeftAssoc</tt> is the indicator whether the operator is
+right or left associative, using the pyparsing-defined
+constants <tt class="docutils literal">opAssoc.RIGHT</tt> and <tt class="docutils literal">opAssoc.LEFT</tt>.</li>
+<li><tt class="docutils literal">parseAction</tt> is the parse action to be associated with
+expressions matching this operator expression (the
+parse action tuple member may be omitted)</li>
+</ul>
+</li>
+<li>Call <tt class="docutils literal">infixNotation</tt> passing the operand expression and
+the operator precedence list, and save the returned value
+as the generated pyparsing expression. You can then use
+this expression to parse input strings, or incorporate it
+into a larger, more complex grammar.</li>
+</ol>
+</li>
+<li><p class="first"><tt class="docutils literal">matchPreviousLiteral</tt> and <tt class="docutils literal">matchPreviousExpr</tt> - function to define and
+expression that matches the same content
+as was parsed in a previous parse expression. For instance:</p>
+<pre class="literal-block">
+first = Word(nums)
+matchExpr = first + ":" + matchPreviousLiteral(first)
+</pre>
+<p>will match "1:1", but not "1:2". Since this matches at the literal
+level, this will also match the leading "1:1" in "1:10".</p>
+<p>In contrast:</p>
+<pre class="literal-block">
+first = Word(nums)
+matchExpr = first + ":" + matchPreviousExpr(first)
+</pre>
+<p>will <em>not</em> match the leading "1:1" in "1:10"; the expressions are
+evaluated first, and then compared, so "1" is compared with "10".</p>
+</li>
+<li><p class="first"><tt class="docutils literal">nestedExpr(opener, closer, content=None, ignoreExpr=quotedString)</tt> - method for defining nested
+lists enclosed in opening and closing delimiters.</p>
+<ul class="simple">
+<li><tt class="docutils literal">opener</tt> - opening character for a nested list (default="("); can also be a pyparsing expression</li>
+<li><tt class="docutils literal">closer</tt> - closing character for a nested list (default=")"); can also be a pyparsing expression</li>
+<li><tt class="docutils literal">content</tt> - expression for items within the nested lists (default=None)</li>
+<li><tt class="docutils literal">ignoreExpr</tt> - expression for ignoring opening and closing delimiters (default=quotedString)</li>
+</ul>
+<p>If an expression is not provided for the content argument, the nested
+expression will capture all whitespace-delimited content between delimiters
+as a list of separate values.</p>
+<p>Use the <tt class="docutils literal">ignoreExpr</tt> argument to define expressions that may contain
+opening or closing characters that should not be treated as opening
+or closing characters for nesting, such as quotedString or a comment
+expression. Specify multiple expressions using an Or or MatchFirst.
+The default is quotedString, but if no expressions are to be ignored,
+then pass None for this argument.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">indentedBlock( statementExpr, indentationStackVar, indent=True)</tt> -
+function to define an indented block of statements, similar to
+indentation-based blocking in Python source code:</p>
+<ul class="simple">
+<li><tt class="docutils literal">statementExpr</tt> - the expression defining a statement that
+will be found in the indented block; a valid indentedBlock
+must contain at least 1 matching statementExpr</li>
+<li><tt class="docutils literal">indentationStackVar</tt> - a Python list variable; this variable
+should be common to all <tt class="docutils literal">indentedBlock</tt> expressions defined
+within the same grammar, and should be reinitialized to [1]
+each time the grammar is to be used</li>
+<li><tt class="docutils literal">indent</tt> - a boolean flag indicating whether the expressions
+within the block must be indented from the current parse
+location; if using indentedBlock to define the left-most
+statements (all starting in column 1), set indent to False</li>
+</ul>
+</li>
+</ul>
+<ul id="originaltextfor">
+<li><p class="first"><tt class="docutils literal">originalTextFor( expr )</tt> - helper function to preserve the originally parsed text, regardless of any
+token processing or conversion done by the contained expression. For instance, the following expression:</p>
+<pre class="literal-block">
+fullName = Word(alphas) + Word(alphas)
+</pre>
+<p>will return the parse of "John Smith" as ['John', 'Smith']. In some applications, the actual name as it
+was given in the input string is what is desired. To do this, use <tt class="docutils literal">originalTextFor</tt>:</p>
+<pre class="literal-block">
+fullName = originalTextFor(Word(alphas) + Word(alphas))
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">ungroup( expr )</tt> - function to "ungroup" returned tokens; useful
+to undo the default behavior of And to always group the returned tokens, even
+if there is only one in the list. (New in 1.5.6)</p>
+</li>
+<li><p class="first"><tt class="docutils literal">lineno( loc, string )</tt> - function to give the line number of the
+location within the string; the first line is line 1, newlines
+start new rows</p>
+</li>
+<li><p class="first"><tt class="docutils literal">col( loc, string )</tt> - function to give the column number of the
+location within the string; the first column is column 1,
+newlines reset the column number to 1</p>
+</li>
+<li><p class="first"><tt class="docutils literal">line( loc, string )</tt> - function to retrieve the line of text
+representing <tt class="docutils literal">lineno( loc, string )</tt>; useful when printing out diagnostic
+messages for exceptions</p>
+</li>
+<li><p class="first"><tt class="docutils literal">srange( rangeSpec )</tt> - function to define a string of characters,
+given a string of the form used by regexp string ranges, such as <tt class="docutils literal"><span class="pre">"[0-9]"</span></tt> for
+all numeric digits, <tt class="docutils literal"><span class="pre">"[A-Z_]"</span></tt> for uppercase characters plus underscore, and
+so on (note that rangeSpec does not include support for generic regular
+expressions, just string range specs)</p>
+</li>
+<li><p class="first"><tt class="docutils literal">getTokensEndLoc()</tt> - function to call from within a parse action to get
+the ending location for the matched tokens</p>
+</li>
+<li><p class="first"><tt class="docutils literal">traceParseAction(fn)</tt> - decorator function to debug parse actions. Lists
+each call, called arguments, and return value or exception</p>
+</li>
+</ul>
+</div>
+<div class="section" id="helper-parse-actions">
+<h2><a class="toc-backref" href="#id16">3.2 Helper parse actions</a></h2>
+<ul>
+<li><p class="first"><tt class="docutils literal">removeQuotes</tt> - removes the first and last characters of a quoted string;
+useful to remove the delimiting quotes from quoted strings</p>
+</li>
+<li><p class="first"><tt class="docutils literal">replaceWith(replString)</tt> - returns a parse action that simply returns the
+replString; useful when using transformString, or converting HTML entities, as in:</p>
+<pre class="literal-block">
+nbsp = Literal("&nbsp;").setParseAction( replaceWith("<BLANK>") )
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">keepOriginalText</tt>- (deprecated, use <a class="reference internal" href="#originaltextfor">originalTextFor</a> instead) restores any internal whitespace or suppressed
+text within the tokens for a matched parse
+expression. This is especially useful when defining expressions
+for scanString or transformString applications.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">withAttribute( *args, **kwargs )</tt> - helper to create a validating parse action to be used with start tags created
+with <tt class="docutils literal">makeXMLTags</tt> or <tt class="docutils literal">makeHTMLTags</tt>. Use <tt class="docutils literal">withAttribute</tt> to qualify a starting tag
+with a required attribute value, to avoid false matches on common tags such as
+<tt class="docutils literal"><TD></tt> or <tt class="docutils literal"><DIV></tt>.</p>
+<p><tt class="docutils literal">withAttribute</tt> can be called with:</p>
+<ul class="simple">
+<li>keyword arguments, as in <tt class="docutils literal"><span class="pre">(class="Customer",align="right")</span></tt>, or</li>
+<li>a list of name-value tuples, as in <tt class="docutils literal">( ("ns1:class", <span class="pre">"Customer"),</span> <span class="pre">("ns2:align","right")</span> )</tt></li>
+</ul>
+<p>An attribute can be specified to have the special value
+<tt class="docutils literal">withAttribute.ANY_VALUE</tt>, which will match any value - use this to
+ensure that an attribute is present but any attribute value is
+acceptable.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">downcaseTokens</tt> - converts all matched tokens to lowercase</p>
+</li>
+<li><p class="first"><tt class="docutils literal">upcaseTokens</tt> - converts all matched tokens to uppercase</p>
+</li>
+<li><p class="first"><tt class="docutils literal">matchOnlyAtCol( columnNumber )</tt> - a parse action that verifies that
+an expression was matched at a particular column, raising a
+ParseException if matching at a different column number; useful when parsing
+tabular data</p>
+</li>
+</ul>
+</div>
+<div class="section" id="common-string-and-token-constants">
+<h2><a class="toc-backref" href="#id17">3.3 Common string and token constants</a></h2>
+<ul>
+<li><p class="first"><tt class="docutils literal">alphas</tt> - same as <tt class="docutils literal">string.letters</tt></p>
+</li>
+<li><p class="first"><tt class="docutils literal">nums</tt> - same as <tt class="docutils literal">string.digits</tt></p>
+</li>
+<li><p class="first"><tt class="docutils literal">alphanums</tt> - a string containing <tt class="docutils literal">alphas + nums</tt></p>
+</li>
+<li><p class="first"><tt class="docutils literal">alphas8bit</tt> - a string containing alphabetic 8-bit characters:</p>
+<pre class="literal-block">
+ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">printables</tt> - same as <tt class="docutils literal">string.printable</tt>, minus the space (<tt class="docutils literal">' '</tt>) character</p>
+</li>
+<li><p class="first"><tt class="docutils literal">empty</tt> - a global <tt class="docutils literal">Empty()</tt>; will always match</p>
+</li>
+<li><p class="first"><tt class="docutils literal">sglQuotedString</tt> - a string of characters enclosed in 's; may
+include whitespace, but not newlines</p>
+</li>
+<li><p class="first"><tt class="docutils literal">dblQuotedString</tt> - a string of characters enclosed in "s; may
+include whitespace, but not newlines</p>
+</li>
+<li><p class="first"><tt class="docutils literal">quotedString</tt> - <tt class="docutils literal">sglQuotedString | dblQuotedString</tt></p>
+</li>
+<li><p class="first"><tt class="docutils literal">cStyleComment</tt> - a comment block delimited by <tt class="docutils literal"><span class="pre">'/*'</span></tt> and <tt class="docutils literal"><span class="pre">'*/'</span></tt> sequences; can span
+multiple lines, but does not support nesting of comments</p>
+</li>
+<li><p class="first"><tt class="docutils literal">htmlComment</tt> - a comment block delimited by <tt class="docutils literal"><span class="pre">'<!--'</span></tt> and <tt class="docutils literal"><span class="pre">'-->'</span></tt> sequences; can span
+multiple lines, but does not support nesting of comments</p>
+</li>
+<li><p class="first"><tt class="docutils literal">commaSeparatedList</tt> - similar to <tt class="docutils literal">delimitedList</tt>, except that the
+list expressions can be any text value, or a quoted string; quoted strings can
+safely include commas without incorrectly breaking the string into two tokens</p>
+</li>
+<li><p class="first"><tt class="docutils literal">restOfLine</tt> - all remaining printable characters up to but not including the next
+newline</p>
+</li>
+</ul>
+</div>
+</div>
+</div>
+</body>
+</html>
|