summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2008-05-12 08:36:20 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2008-05-12 08:36:20 +0000
commitcc5bc1be1f2651ebcc07ed28c71bffd2206495f1 (patch)
tree932a6fa44b74c65919f98880bb3ef5e6923ee849
parent21e0f3b9c0afeab9b77e52511a1633916823b989 (diff)
downloadpyparsing-cc5bc1be1f2651ebcc07ed28c71bffd2206495f1.tar.gz
Updated PY3K references
Added '-' operator to implement ErrorStop feature (detecting syntax errors in optional grammar branches) git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/src@152 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r--CHANGES681
-rw-r--r--pyparsing.py753
2 files changed, 796 insertions, 638 deletions
diff --git a/CHANGES b/CHANGES
index c079c84..6e84229 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,19 +2,116 @@
Change Log
==========
-Version 1.4.11 - ???
+Version 1.5.0 - ???
--------------------
+This version of pyparsing includes work on two long-standing
+FAQ's: support for forcing parsing of the complete input string
+(without having to explicitly append StringEnd() to the grammar),
+and a method to improve the mechanism of detecting where syntax
+errors occur in an input string with various optional and
+alternative paths. With these changes (and the past few minor
+updates), I thought it was finally time to bump the minor rev
+number on pyparsing - so 1.5.0 is now available! Read on...
+
+- AT LAST!!! You can now call parseString and have it raise
+ an exception if the expression does not parse the entire
+ input string. This has been an FAQ for a LONG time.
+
+ The parseString method now includes an optional parseAll
+ argument (default=False). If parseAll is set to True, then
+ the given parse expression must parse the entire input
+ string. (This is equivalent to adding StringEnd() to the
+ end of the expression.) The default value is False to
+ retain backward compatibility.
+
+ Inspired by MANY requests over the years, most recently by
+ ecir-hana on the pyparsing wiki!
+
+- Added new operator '-' for composing grammar sequences. '-'
+ behaves just like '+' in creating And expressions, but '-'
+ is used to mark grammar structures that should stop parsing
+ immediately and report a syntax error, rather than just
+ backtracking to the last successful parse and trying another
+ alternative. For instance, running the following code:
+
+ port_definition = Keyword("port") + '=' + Word(nums)
+ entity_definition = Keyword("entity") + "{" +
+ Optional(port_definition) + "}"
+
+ entity_definition.parseString("entity { port 100 }")
+
+ pyparsing fails to detect the missing '=' in the port definition.
+ But, since this expression is optional, pyparsing then proceeds
+ to try to match the closing '}' of the entity_definition. Not
+ finding it, pyparsing reports that there was no '}' after the '{'
+ character. Instead, we would like pyparsing to parse the 'port'
+ keyword, and if not followed by an equals sign and an integer,
+ to signal this as a syntax error.
+
+ This can now be done simply by changing the port_definition to:
+
+ port_definition = Keyword("port") - '=' + Word(nums)
+
+ Now after successfully parsing 'port', pyparsing must also find
+ an equals sign and an integer, or it will raise a fatal syntax
+ exception.
+
+ By judicious insertion of '-' operators, a pyparsing developer
+ can have their grammar report much more informative syntax error
+ messages.
+
+ Patches and suggestions proposed by several contributors on
+ the pyparsing mailing list and wiki - special thanks to
+ Eike Welk and Thomas/Poldy on the wiki!
+
+- Fixed bug in nestedExpr in which ignored expressions needed
+ to be set off with whitespace. Reported by Stefaan Himpe,
+ nice catch!
+
+- Expanded multiplication of an expression by a tuple, to
+ accept tuple values of None:
+ . expr*(n,None) or expr*(n,) is equivalent
+ to expr*n + ZeroOrMore(expr)
+ (read as "at least n instances of expr")
+ . expr*(None,n) is equivalent to expr*(0,n)
+ (read as "0 to n instances of expr")
+ . expr*(None,None) is equivalent to ZeroOrMore(expr)
+ . expr*(1,None) is equivalent to OneOrMore(expr)
+
+ Note that expr*(None,n) does not raise an exception if
+ more than n exprs exist in the input stream; that is
+ expr*(None,n) does not enforce a maximum number of expr
+ occurrences. If this behavior is desired, then write
+ expr*(None,n) + ~expr
+
+- Added None as a possible operator for operatorPrecedence.
+ None signifies "no operator", as in multiplying m times x
+ in "y=mx+b".
+
+- Fixed bug in Each, reported by Michael Ramirez, in which the
+ order of terms in the Each affected the parsing of the results.
+ Problem was due to premature grouping of the expressions in
+ the overall Each during grammar construction, before the
+ complete Each was defined. Thanks, Michael!
+
+- Cleaned up Py3K compatibility statements, including exception
+ construction statements, and better equivalence between _ustr
+ and basestring, and __nonzero__ and __bool__.
+
+
+Version 1.4.11 - February, 2008
+-------------------------------
- With help from Robert A. Clark, this version of pyparsing
- is compatible with Python 3.0a3. Thanks for the help,
+ is compatible with Python 3.0a3. Thanks for the help,
Robert!
- Added WordStart and WordEnd positional classes, to support
expressions that must occur at the start or end of a word.
Proposed by piranha on the pyparsing wiki, good idea!
-- Added matchOnlyAtCol helper parser action, to simplify
+- Added matchOnlyAtCol helper parser action, to simplify
parsing log or data files that have optional fields that are
- column dependent. Inspired by a discussion thread with
+ column dependent. Inspired by a discussion thread with
hubritic on comp.lang.python.
- Added withAttribute.ANY_VALUE as a match-all value when using
@@ -24,24 +121,24 @@ Version 1.4.11 - ???
- Added get() method to ParseResults, similar to dict.get().
Suggested by new pyparsing user, Alejandro Dubrovksy, thanks!
-- Added '==' short-cut to see if a given string matches a
+- Added '==' short-cut to see if a given string matches a
pyparsing expression. For instance, you can now write:
integer = Word(nums)
if "123" == integer:
# do something
-
+
print [ x for x in "123 234 asld".split() if x==integer ]
# prints ['123', '234']
- Simplified the use of nestedExpr when using an expression for
the opening or closing delimiters. Now the content expression
- will not have to explicitly negate closing delimiters. Found
+ will not have to explicitly negate closing delimiters. Found
while working with dfinnie on GHOP Task #277, thanks!
- Fixed bug when defining ignorable expressions that are
later enclosed in a wrapper expression (such as ZeroOrMore,
- OneOrMore, etc.) - found while working with Prabhu
+ OneOrMore, etc.) - found while working with Prabhu
Gurumurthy, thanks Prahbu!
- Fixed bug in withAttribute in which keys were automatically
@@ -52,13 +149,13 @@ Version 1.4.11 - ???
XML.
- Changed '<<' operator on Forward to return None, since this
- is really used as a pseudo-assignment operator, not as a
- left-shift operator. By returning None, it is easier to
+ is really used as a pseudo-assignment operator, not as a
+ left-shift operator. By returning None, it is easier to
catch faulty statements such as a << b | c, where precedence
- of operations causes the '|' operation to be performed
+ of operations causes the '|' operation to be performed
*after* inserting b into a, so no alternation is actually
implemented. The correct form is a << (b | c). With this
- change, an error will be reported instead of silently
+ change, an error will be reported instead of silently
clipping the alternative term. (Note: this may break some
existing code, but if it does, the code had a silent bug in
it anyway.) Proposed by wcbarksdale on the pyparsing wiki,
@@ -66,7 +163,7 @@ Version 1.4.11 - ???
- Several unit tests were added to pyparsing's regression
suite, courtesy of the Google Highly-Open Participation
- Contest. Thanks to all who administered and took part in
+ Contest. Thanks to all who administered and took part in
this event!
@@ -74,13 +171,13 @@ Version 1.4.10 - December 9, 2007
---------------------------------
- Fixed bug introduced in v1.4.8, parse actions were called for
intermediate operator levels, not just the deepest matching
- operation level. Again, big thanks to Torsten Marek for
+ operation level. Again, big thanks to Torsten Marek for
helping isolate this problem!
-
+
Version 1.4.9 - December 8, 2007
--------------------------------
-- Added '*' multiplication operator support when creating
+- Added '*' multiplication operator support when creating
grammars, accepting either an integer, or a two-integer
tuple multiplier, as in:
ipAddress = Word(nums) + ('.'+Word(nums))*3
@@ -93,7 +190,7 @@ Version 1.4.9 - December 8, 2007
Thanks to Michael Ramirez for raising this issue.
- Fixed internal bug in ParseResults - when an item was deleted,
- the key indices were not updated. Thanks to Tim Mitchell for
+ the key indices were not updated. Thanks to Tim Mitchell for
posting a bugfix patch to the SF bug tracking system!
- Fixed internal bug in operatorPrecedence - when the results of
@@ -101,13 +198,13 @@ Version 1.4.9 - December 8, 2007
tokens were sent. Reported by Torsten Marek, nice job!
- Added pop() method to ParseResults. If pop is called with an
- integer or with no arguments, it will use list semantics and
- update the ParseResults' list of tokens. If pop is called with
- a non-integer (a string, for instance), then it will use dict
- semantics and update the ParseResults' internal dict.
+ integer or with no arguments, it will use list semantics and
+ update the ParseResults' list of tokens. If pop is called with
+ a non-integer (a string, for instance), then it will use dict
+ semantics and update the ParseResults' internal dict.
Suggested by Donn Ingle, thanks Donn!
-- Fixed quoted string built-ins to accept '\xHH' hex characters
+- Fixed quoted string built-ins to accept '\xHH' hex characters
within the string.
@@ -120,7 +217,7 @@ Version 1.4.8 - October, 2007
- Added withAttribute parse action helper, to simplify creating
filtering parse actions to attach to expressions returned by
makeHTMLTags and makeXMLTags. Use withAttribute to qualify a
- starting tag with one or more required attribute values, to avoid
+ starting tag with one or more required attribute values, to avoid
false matches on common tags such as <TD> or <DIV>.
- Added new examples nested.py and withAttribute.py to demonstrate
@@ -128,10 +225,10 @@ Version 1.4.8 - October, 2007
- Added performance speedup to grammars using operatorPrecedence,
instigated by Stefan Reichör - thanks for the feedback, Stefan!
-
+
- Fixed bug/typo when deleting an element from a ParseResults by
using the element's results name.
-
+
- Fixed whitespace-skipping bug in wrapper classes (such as Group,
Suppress, Combine, etc.) and when using setDebug(), reported by
new pyparsing user dazzawazza on SourceForge, nice job!
@@ -139,11 +236,11 @@ Version 1.4.8 - October, 2007
- Added restriction to prevent defining Word or CharsNotIn expressions
with minimum length of 0 (should use Optional if this is desired),
and enhanced docstrings to reflect this limitation. Issue was
- raised by Joey Tallieu, who submitted a patch with a slightly
+ raised by Joey Tallieu, who submitted a patch with a slightly
different solution. Thanks for taking the initiative, Joey, and
please keep submitting your ideas!
-
-- Fixed bug in makeHTMLTags that did not detect HTML tag attributes
+
+- Fixed bug in makeHTMLTags that did not detect HTML tag attributes
with no '= value' portion (such as "<td nowrap>"), reported by
hamidh on the pyparsing wiki - thanks!
@@ -159,18 +256,18 @@ Version 1.4.7 - July, 2007
stats = "AVE:" + realNum.setResultsName("average") + \
"MIN:" + realNum.setResultsName("min") + \
- "MAX:" + realNum.setResultsName("max")
+ "MAX:" + realNum.setResultsName("max")
can now be written as this:
-
+
stats = "AVE:" + realNum("average") + \
"MIN:" + realNum("min") + \
- "MAX:" + realNum("max")
+ "MAX:" + realNum("max")
The intent behind this change is to make it simpler to define results
names for significant fields within the expression, while keeping
the grammar syntax clean and uncluttered.
-
+
- Fixed bug when packrat parsing is enabled, with cached ParseResults
being updated by subsequent parsing. Reported on the pyparsing
wiki by Kambiz, thanks!
@@ -180,20 +277,20 @@ Version 1.4.7 - July, 2007
- Fixed bug in example simpleBool.py, corrected precedence of "and" vs.
"or" operations.
-
+
- Fixed bug in Dict class, in which keys were converted to strings
- whether they needed to be or not. Have narrowed this logic to
- convert keys to strings only if the keys are ints (which would
+ whether they needed to be or not. Have narrowed this logic to
+ convert keys to strings only if the keys are ints (which would
confuse __getitem__ behavior for list indexing vs. key lookup).
- Added ParserElement method setBreak(), which will invoke the pdb
- module's set_trace() function when this expression is about to be
+ module's set_trace() function when this expression is about to be
parsed.
- Fixed bug in StringEnd in which reading off the end of the input
string raises an exception - should match. Resolved while
answering a question for Shawn on the pyparsing wiki.
-
+
Version 1.4.6 - April, 2007
---------------------------
@@ -202,39 +299,39 @@ Version 1.4.6 - April, 2007
raise ParseFatalException, "unexpected text: 'Spanish Inquisition'"
- Added method getTokensEndLoc(), to be called from within a parse action,
- for those parse actions that need both the starting *and* ending
- location of the parsed tokens within the input text.
+ for those parse actions that need both the starting *and* ending
+ location of the parsed tokens within the input text.
- Enhanced behavior of keepOriginalText so that named parse fields are
preserved, even though tokens are replaced with the original input
text matched by the current expression. Also, cleaned up the stack
traversal to be more robust. Suggested by Tim Arnold - thanks, Tim!
-- Fixed subtle bug in which countedArray (and similar dynamic
+- Fixed subtle bug in which countedArray (and similar dynamic
expressions configured in parse actions) failed to match within Or,
- Each, FollowedBy, or NotAny. Reported by Ralf Vosseler, thanks for
+ Each, FollowedBy, or NotAny. Reported by Ralf Vosseler, thanks for
your patience, Ralf!
-
+
- Fixed Unicode bug in upcaseTokens and downcaseTokens parse actions,
scanString, and default debugging actions; reported (and patch submitted)
by Nikolai Zamkovoi, spasibo!
- Fixed bug when saving a tuple as a named result. The returned
token list gave the proper tuple value, but accessing the result by
- name only gave the first element of the tuple. Reported by
+ name only gave the first element of the tuple. Reported by
Poromenos, nice catch!
- Fixed bug in makeHTMLTags/makeXMLTags, which failed to match tag
attributes with namespaces.
- Fixed bug in SkipTo when setting include=True, to have the skipped-to
- tokens correctly included in the returned data. Reported by gunars on
+ tokens correctly included in the returned data. Reported by gunars on
the pyparsing wiki, thanks!
- Fixed typobug in OnceOnly.reset method, omitted self argument.
Submitted by eike welk, thanks for the lint-picking!
-- Added performance enhancement to Forward class, suggested by
+- Added performance enhancement to Forward class, suggested by
akkartik on the pyparsing Wiki discussion, nice work!
- Added optional asKeyword to Word constructor, to indicate that the
@@ -247,9 +344,9 @@ Version 1.4.6 - April, 2007
- Added holaMundo.py example, excerpted from Marco Alfonso's blog -
muchas gracias, Marco!
-
+
- Modified internal cyclic references in ParseResults to use weakrefs;
- this should help reduce the memory footprint of large parsing
+ this should help reduce the memory footprint of large parsing
programs, at some cost to performance (3-5%). Suggested by bca48150 on
the pyparsing wiki, thanks!
@@ -261,7 +358,7 @@ Version 1.4.6 - April, 2007
(Suggested by eike welk in response to some unexplained inconsistencies
between parsed location and offsets in the input string.)
-- Cleaned up internal decorators to preserve function names,
+- Cleaned up internal decorators to preserve function names,
docstrings, etc.
@@ -278,21 +375,21 @@ Version 1.4.5 - December, 2006
Jeff Poole, thanks Jeff!)
- Fixed minor bug in makeHTMLTags that did not recognize tag attributes
- with embedded '-' or '_' characters. Also, added support for
- passing expressions to makeHTMLTags and makeXMLTags, and used this
+ with embedded '-' or '_' characters. Also, added support for
+ passing expressions to makeHTMLTags and makeXMLTags, and used this
feature to define the globals anyOpenTag and anyCloseTag.
-
+
- Fixed error in alphas8bit, I had omitted the y-with-umlaut character.
- Added punc8bit string to complement alphas8bit - it contains all the
non-alphabetic, non-blank 8-bit characters.
-
+
- Added commonHTMLEntity expression, to match common HTML "ampersand"
codes, such as "&lt;", "&gt;", "&amp;", "&nbsp;", and "&quot;". This
expression also defines a results name 'entity', which can be used
to extract the entity field (that is, "lt", "gt", etc.). Also added
built-in parse action replaceHTMLEntity, which can be attached to
- commonHTMLEntity to translate "&lt;", "&gt;", "&amp;", "&nbsp;", and
+ commonHTMLEntity to translate "&lt;", "&gt;", "&amp;", "&nbsp;", and
"&quot;" to "<", ">", "&", " ", and "'".
- Added example, htmlStripper.py, that strips HTML tags and scripts
@@ -307,12 +404,12 @@ Version 1.4.4 - October, 2006
1, 2, or 3 arguments.
- Enhanced parse action normalization to support using classes as
- parse actions; that is, the class constructor is called at parse
- time and the __init__ function is called with 0, 1, 2, or 3
- arguments. If passing a class as a parse action, the __init__
- method must use one of the valid parse action parameter list
- formats. (This technique is useful when using pyparsing to compile
- parsed text into a series of application objects - see the new
+ parse actions; that is, the class constructor is called at parse
+ time and the __init__ function is called with 0, 1, 2, or 3
+ arguments. If passing a class as a parse action, the __init__
+ method must use one of the valid parse action parameter list
+ formats. (This technique is useful when using pyparsing to compile
+ parsed text into a series of application objects - see the new
example simpleBool.py.)
- Fixed bug in ParseResults when setting an item using an integer
@@ -323,9 +420,9 @@ Version 1.4.4 - October, 2006
- Fixed bug when a Combine contained an embedded Forward expression,
reported by cie on the pyparsing wiki - good catch!
-
+
- Fixed listAllMatches bug, when a listAllMatches result was
- nested within another result. (Reported by don pasquale on
+ nested within another result. (Reported by don pasquale on
comp.lang.python, well done!)
- Fixed bug in ParseResults items() method, when returning an item
@@ -339,7 +436,7 @@ Version 1.4.4 - October, 2006
- Optimized re's for cppStyleComment and quotedString for better
re performance - also provided by Ralph Corderoy, thanks!
-- Added new example, indentedGrammarExample.py, showing how to
+- Added new example, indentedGrammarExample.py, showing how to
define a grammar using indentation to show grouping (as Python
does for defining statement nesting). Instigated by an e-mail
discussion with Andrew Dalke, thanks Andrew!
@@ -348,9 +445,9 @@ Version 1.4.4 - October, 2006
with Ralph Corderoy and Paolo Losi), to facilitate definition of
grammars for expressions with unary and binary operators. For
instance, this grammar defines a 6-function arithmetic expression
- grammar, with unary plus and minus, proper operator precedence,and
+ grammar, with unary plus and minus, proper operator precedence,and
right- and left-associativity:
-
+
expr = operatorPrecedence( operand,
[("!", 1, opAssoc.LEFT),
("^", 2, opAssoc.RIGHT),
@@ -365,22 +462,22 @@ Version 1.4.4 - October, 2006
- Added new helpers matchPreviousLiteral and matchPreviousExpr, for
creating adaptive parsing expressions that match the same content
as was parsed in a previous parse expression. For instance:
-
+
first = Word(nums)
matchExpr = first + ":" + matchPreviousLiteral(first)
-
+
will match "1:1", but not "1:2". Since this matches at the literal
level, this will also match the leading "1:1" in "1:10".
-
+
In contrast:
-
+
first = Word(nums)
matchExpr = first + ":" + matchPreviousExpr(first)
-
+
will *not* match the leading "1:1" in "1:10"; the expressions are
evaluated first, and then compared, so "1" is compared with "10".
-- Added keepOriginalText parse action. Sometimes pyparsing's
+- Added keepOriginalText parse action. Sometimes pyparsing's
whitespace-skipping leaves out too much whitespace. Adding this
parse action will restore any internal whitespace for a parse
expression. This is especially useful when defining expressions
@@ -389,10 +486,10 @@ Version 1.4.4 - October, 2006
- Added __add__ method for ParseResults class, to better support
using Python sum built-in for summing ParseResults objects returned
from scanString.
-
+
- Added reset method for the new OnlyOnce class wrapper for parse
actions (to allow a grammar to be used multiple times).
-
+
- Added optional maxMatches argument to scanString and searchString,
to short-circuit scanning after 'n' expression matches are found.
@@ -412,7 +509,7 @@ Version 1.4.3 - July, 2006
- Simplified interface to parse actions that do not require all 3
parse action arguments. Very rarely do parse actions require more
- than just the parsed tokens, yet parse actions still require all
+ than just the parsed tokens, yet parse actions still require all
3 arguments including the string being parsed and the location
within the string where the parse expression was matched. With this
release, parse actions may now be defined to be called as:
@@ -420,17 +517,17 @@ Version 1.4.3 - July, 2006
. fn(locn,tokens)
. fn(tokens)
. fn()
- The setParseAction and addParseAction methods will internally decorate
- the provided parse actions with compatible wrappers to conform to
+ The setParseAction and addParseAction methods will internally decorate
+ the provided parse actions with compatible wrappers to conform to
the full (string,locn,tokens) argument sequence.
- REMOVED SUPPORT FOR RETURNING PARSE LOCATION FROM A PARSE ACTION.
I announced this in March, 2004, and gave a final warning in the last
release. Now you can return a tuple from a parse action, and it will
- be treated like any other return value (i.e., the tuple will be
- substituted for the incoming tokens passed to the parse action,
+ be treated like any other return value (i.e., the tuple will be
+ substituted for the incoming tokens passed to the parse action,
which is useful when trying to parse strings into tuples).
-
+
- Added setFailAction method, taking a callable function fn that
takes the arguments fn(s,loc,expr,err) where:
. s - string being parsed
@@ -444,41 +541,41 @@ Version 1.4.3 - July, 2006
- Added class OnlyOnce as helper wrapper for parse actions. OnlyOnce
only permits a parse action to be called one time, after which
all subsequent calls throw a ParseException.
-
+
- Added traceParseAction decorator to help debug parse actions.
Simply insert "@traceParseAction" ahead of the definition of your
- parse action, and each invocation will be displayed, along with
+ parse action, and each invocation will be displayed, along with
incoming arguments, and returned value.
-
-- Fixed bug when copying ParserElements using copy() or
+
+- Fixed bug when copying ParserElements using copy() or
setResultsName(). (Reported by Dan Thill, great catch!)
-
-- Fixed bug in asXML() where token text contains <, >, and &
+
+- Fixed bug in asXML() where token text contains <, >, and &
characters - generated XML now escapes these as &lt;, &gt; and
&amp;. (Reported by Jacek Sieka, thanks!)
-
-- Fixed bug in SkipTo() when searching for a StringEnd(). (Reported
+
+- Fixed bug in SkipTo() when searching for a StringEnd(). (Reported
by Pete McEvoy, thanks Pete!)
-
+
- Fixed "except Exception" statements, the most critical added as part
of the packrat parsing enhancement. (Thanks, Erick Tryzelaar!)
-
-- Fixed end-of-string infinite looping on LineEnd and StringEnd
+
+- Fixed end-of-string infinite looping on LineEnd and StringEnd
expressions. (Thanks again to Erick Tryzelaar.)
-
+
- Modified setWhitespaceChars to return self, to be consistent with
other ParserElement modifiers. (Suggested by Erick Tryzelaar.)
- Fixed bug/typo in new ParseResults.dump() method.
- Fixed bug in searchString() method, in which only the first token of
- an expression was returned. searchString() now returns a
+ an expression was returned. searchString() now returns a
ParseResults collection of all search matches.
-- Added example program removeLineBreaks.py, a string transformer that
+- Added example program removeLineBreaks.py, a string transformer that
converts text files with hard line-breaks into one with line breaks
only between paragraphs.
-
+
- Added example program listAllMatches.py, to illustrate using the
listAllMatches option when specifying results names (also shows new
support for passing lists to oneOf).
@@ -489,30 +586,30 @@ Version 1.4.3 - July, 2006
- Added example program parseListString.py, to which can parse the
string representation of a Python list back into a true list. Taken
- mostly from my PyCon presentation examples, but now with support
+ mostly from my PyCon presentation examples, but now with support
for tuple elements, too!
Version 1.4.2 - April 1, 2006 (No foolin'!)
-------------------------------------------
-- Significant speedup from memoizing nested expressions (a technique
- known as "packrat parsing"), thanks to Chris Lesniewski-Laas! Your
- mileage may vary, but my Verilog parser almost doubled in speed to
+- Significant speedup from memoizing nested expressions (a technique
+ known as "packrat parsing"), thanks to Chris Lesniewski-Laas! Your
+ mileage may vary, but my Verilog parser almost doubled in speed to
over 600 lines/sec!
-
- This speedup may break existing programs that use parse actions that
+
+ This speedup may break existing programs that use parse actions that
have side-effects. For this reason, packrat parsing is disabled when
you first import pyparsing. To activate the packrat feature, your
program must call the class method ParserElement.enablePackrat(). If
- your program uses psyco to "compile as you go", you must call
+ your program uses psyco to "compile as you go", you must call
enablePackrat before calling psyco.full(). If you do not do this,
Python will crash. For best results, call enablePackrat() immediately
after importing pyparsing.
- Added new helper method countedArray(expr), for defining patterns that
- start with a leading integer to indicate the number of array elements,
- followed by that many elements, matching the given expr parse
+ start with a leading integer to indicate the number of array elements,
+ followed by that many elements, matching the given expr parse
expression. For instance, this two-liner:
wordArray = countedArray(Word(alphas))
print wordArray.parseString("3 Practicality beats purity")[0]
@@ -522,7 +619,7 @@ Version 1.4.2 - April 1, 2006 (No foolin'!)
from the length of the returned array.
(Inspired by e-mail discussion with Ralf Vosseler.)
-- Added support for attaching multiple parse actions to a single
+- Added support for attaching multiple parse actions to a single
ParserElement. (Suggested by Dan "Dang" Griffith - nice idea, Dan!)
- Added support for asymmetric quoting characters in the recently-added
@@ -530,27 +627,27 @@ Version 1.4.2 - April 1, 2006 (No foolin'!)
like "<<This is a string in double angle brackets.>>". To define
this custom form of QuotedString, your code would define:
dblAngleQuotedString = QuotedString('<<',endQuoteChar='>>')
- QuotedString also supports escaped quotes, escape character other
+ QuotedString also supports escaped quotes, escape character other
than '\', and multiline.
- Changed the default value returned internally by Optional, so that
- None can be used as a default value. (Suggested by Steven Bethard -
+ None can be used as a default value. (Suggested by Steven Bethard -
I finally saw the light!)
- Added dump() method to ParseResults, to make it easier to list out
and diagnose values returned from calling parseString.
-- A new example, a search query string parser, submitted by Steven
+- A new example, a search query string parser, submitted by Steven
Mooij and Rudolph Froger - a very interesting application, thanks!
-- Added an example that parses the BNF in Python's Grammar file, in
+- Added an example that parses the BNF in Python's Grammar file, in
support of generating Python grammar documentation. (Suggested by
J H Stovall.)
-
+
- A new example, submitted by Tim Cera, of a flexible parser module,
using a simple config variable to adjust parsing for input formats
that have slight variations - thanks, Tim!
-
+
- Added an example for parsing Roman numerals, showing the capability
of parse actions to "compile" Roman numerals into their integer
values during parsing.
@@ -558,29 +655,29 @@ Version 1.4.2 - April 1, 2006 (No foolin'!)
- Added a new docs directory, for additional documentation or help.
Currently, this includes the text and examples from my recent
presentation at PyCon.
-
+
- Fixed another typo in CaselessKeyword, thanks Stefan Behnel.
-- Expanded oneOf to also accept tuples, not just lists. This really
+- Expanded oneOf to also accept tuples, not just lists. This really
should be sufficient...
- Added deprecation warnings when tuple is returned from a parse action.
Looking back, I see that I originally deprecated this feature in March,
- 2004, so I'm guessing people really shouldn't have been using this
- feature - I'll drop it altogether in the next release, which will
- allow users to return a tuple from a parse action (which is really
- handy when trying to reconstuct tuples from a tuple string
+ 2004, so I'm guessing people really shouldn't have been using this
+ feature - I'll drop it altogether in the next release, which will
+ allow users to return a tuple from a parse action (which is really
+ handy when trying to reconstuct tuples from a tuple string
representation!).
Version 1.4.1 - February, 2006
------------------------------
-- Converted generator expression in QuotedString class to list
- comprehension, to retain compatibility with Python 2.3. (Thanks, Titus
+- Converted generator expression in QuotedString class to list
+ comprehension, to retain compatibility with Python 2.3. (Thanks, Titus
Brown for the heads-up!)
- Added searchString() method to ParserElement, as an alternative to
- using "scanString(instring).next()[0][0]" to search through a string
+ using "scanString(instring).next()[0][0]" to search through a string
looking for a substring matching a given parse expression. (Inspired by
e-mail conversation with Dave Feustel.)
@@ -591,10 +688,10 @@ Version 1.4.1 - February, 2006
Titus Brown.)
- Removed lstrip() call from Literal - too aggressive in stripping
- whitespace which may be valid for some grammars. (Point raised by Jacek
- Sieka). Also, made Literal more robust in the event of passing an empty
+ whitespace which may be valid for some grammars. (Point raised by Jacek
+ Sieka). Also, made Literal more robust in the event of passing an empty
string.
-
+
- Fixed bug in replaceWith when returning None.
- Added cautionary documentation for Forward class when assigning a
@@ -606,42 +703,42 @@ Version 1.4.1 - February, 2006
explicitly group the values inserted into the Forward:
fwdExpr << (a | b | c)
(Suggested by Scot Wilcoxon - thanks, Scot!)
-
-
+
+
Version 1.4 - January 18, 2006
------------------------------
-- Added Regex class, to permit definition of complex embedded expressions
- using regular expressions. (Enhancement provided by John Beisley, great
+- Added Regex class, to permit definition of complex embedded expressions
+ using regular expressions. (Enhancement provided by John Beisley, great
job!)
-- Converted implementations of Word, oneOf, quoted string, and comment
- helpers to utilize regular expression matching. Performance improvements
+- Converted implementations of Word, oneOf, quoted string, and comment
+ helpers to utilize regular expression matching. Performance improvements
in the 20-40% range.
-- Added QuotedString class, to support definition of non-standard quoted
+- Added QuotedString class, to support definition of non-standard quoted
strings (Suggested by Guillaume Proulx, thanks!)
-
-- Added CaselessKeyword class, to streamline grammars with, well, caseless
+
+- Added CaselessKeyword class, to streamline grammars with, well, caseless
keywords (Proposed by Stefan Behnel, thanks!)
-
-- Fixed bug in SkipTo, when using an ignoreable expression. (Patch provided
+
+- Fixed bug in SkipTo, when using an ignoreable expression. (Patch provided
by Anonymous, thanks, whoever-you-are!)
-
+
- Fixed typo in NoMatch class. (Good catch, Stefan Behnel!)
-- Fixed minor bug in _makeTags(), using string.printables instead of
+- Fixed minor bug in _makeTags(), using string.printables instead of
pyparsing.printables.
-- Cleaned up some of the expressions created by makeXXXTags helpers, to
+- Cleaned up some of the expressions created by makeXXXTags helpers, to
suppress extraneous <> characters.
-
-- Added some grammar definition-time checking to verify that a grammar is
+
+- Added some grammar definition-time checking to verify that a grammar is
being built using proper ParserElements.
-
+
- Added examples:
- . LAparser.py - linear algebra C preprocessor (submitted by Mike Ellis,
+ . LAparser.py - linear algebra C preprocessor (submitted by Mike Ellis,
thanks Mike!)
- . wordsToNum.py - converts word description of a number back to
+ . wordsToNum.py - converts word description of a number back to
the original number (such as 'one hundred and twenty three' -> 123)
. updated fourFn.py to support unary minus, added BNF comments
@@ -652,28 +749,28 @@ Version 1.3.3 - September 12, 2005
srange. Added greetingInKorean.py example, for a Korean version of
"Hello, World!" using Unicode. (Thanks, June Kim!)
-- Added 'hexnums' string constant (nums+"ABCDEFabcdef") for defining
+- Added 'hexnums' string constant (nums+"ABCDEFabcdef") for defining
hexadecimal value expressions.
-
+
- NOTE: ===THIS CHANGE MAY BREAK EXISTING CODE===
Modified tag and results definitions returned by makeHTMLTags(),
- to better support the looseness of HTML parsing. Tags to be
- parsed are now caseless, and keys generated for tag attributes are
+ to better support the looseness of HTML parsing. Tags to be
+ parsed are now caseless, and keys generated for tag attributes are
now converted to lower case.
-
- Formerly, makeXMLTags("XYZ") would return a tag with results
+
+ Formerly, makeXMLTags("XYZ") would return a tag with results
name of "startXYZ", this has been changed to "startXyz". If this
- tag is matched against '<XYZ Abc="1" DEF="2" ghi="3">', the
+ tag is matched against '<XYZ Abc="1" DEF="2" ghi="3">', the
matched keys formerly would be "Abc", "DEF", and "ghi"; keys are
- now converted to lower case, giving keys of "abc", "def", and
+ now converted to lower case, giving keys of "abc", "def", and
"ghi". These changes were made to try to address the lax
- case sensitivity agreement between start and end tags in many
+ case sensitivity agreement between start and end tags in many
HTML pages.
-
- No changes were made to makeXMLTags(), which assumes more rigorous
+
+ No changes were made to makeXMLTags(), which assumes more rigorous
parsing rules.
-
- Also, cleaned up case-sensitivity bugs in closing tags, and
+
+ Also, cleaned up case-sensitivity bugs in closing tags, and
switched to using Keyword instead of Literal class for tags.
(Thanks, Steve Young, for getting me to look at these in more
detail!)
@@ -682,11 +779,11 @@ Version 1.3.3 - September 12, 2005
which will convert matched text to all uppercase or lowercase,
respectively.
-- Deprecated Upcase class, to be replaced by upcaseTokens parse
+- Deprecated Upcase class, to be replaced by upcaseTokens parse
action.
-
-- Converted messages sent to stderr to use warnings module, such as
- when constructing a Literal with an empty string, one should use
+
+- Converted messages sent to stderr to use warnings module, such as
+ when constructing a Literal with an empty string, one should use
the Empty() class or the empty helper instead.
- Added ' ' (space) as an escapable character within a quoted
@@ -701,69 +798,69 @@ Version 1.3.3 - September 12, 2005
. pythonStyleComment = # ... (to end of line)
-
+
Version 1.3.2 - July 24, 2005
-----------------------------
-- Added Each class as an enhanced version of And. 'Each' requires
- that all given expressions be present, but may occur in any order.
- Special handling is provided to group ZeroOrMore and OneOrMore
- elements that occur out-of-order in the input string. You can also
- construct 'Each' objects by joining expressions with the '&'
- operator. When using the Each class, results names are strongly
- recommended for accessing the matched tokens. (Suggested by Pradam
+- Added Each class as an enhanced version of And. 'Each' requires
+ that all given expressions be present, but may occur in any order.
+ Special handling is provided to group ZeroOrMore and OneOrMore
+ elements that occur out-of-order in the input string. You can also
+ construct 'Each' objects by joining expressions with the '&'
+ operator. When using the Each class, results names are strongly
+ recommended for accessing the matched tokens. (Suggested by Pradam
Amini - thanks, Pradam!)
-
-- Stricter interpretation of 'max' qualifier on Word elements. If the
- 'max' attribute is specified, matching will fail if an input field
- contains more than 'max' consecutive body characters. For example,
- previously, Word(nums,max=3) would match the first three characters
- of '0123456', returning '012' and continuing parsing at '3'. Now,
- when constructed using the max attribute, Word will raise an
+
+- Stricter interpretation of 'max' qualifier on Word elements. If the
+ 'max' attribute is specified, matching will fail if an input field
+ contains more than 'max' consecutive body characters. For example,
+ previously, Word(nums,max=3) would match the first three characters
+ of '0123456', returning '012' and continuing parsing at '3'. Now,
+ when constructed using the max attribute, Word will raise an
exception with this string.
- Cleaner handling of nested dictionaries returned by Dict. No
longer necessary to dereference sub-dictionaries as element [0] of
- their parents.
- === NOTE: THIS CHANGE MAY BREAK SOME EXISTING CODE, BUT ONLY IF
+ their parents.
+ === NOTE: THIS CHANGE MAY BREAK SOME EXISTING CODE, BUT ONLY IF
PARSING NESTED DICTIONARIES USING THE LITTLE-USED DICT CLASS ===
(Prompted by discussion thread on the Python Tutor list, with
contributions from Danny Yoo, Kent Johnson, and original post by
Liam Clarke - thanks all!)
-
+
Version 1.3.1 - June, 2005
----------------------------------
- Added markInputline() method to ParseException, to display the input
text line location of the parsing exception. (Thanks, Stefan Behnel!)
-
-- Added setDefaultKeywordChars(), so that Keyword definitions using a
+
+- Added setDefaultKeywordChars(), so that Keyword definitions using a
custom keyword character set do not all need to add the keywordChars
- constructor argument (similar to setDefaultWhitespaceChars()).
+ constructor argument (similar to setDefaultWhitespaceChars()).
(suggested by rzhanka on the SourceForge pyparsing forum.)
-
+
- Simplified passing debug actions to setDebugAction(). You can now
- pass 'None' for a debug action if you want to take the default
+ pass 'None' for a debug action if you want to take the default
debug behavior. To suppress a particular debug action, you can pass
the pyparsing method nullDebugAction.
-
-- Refactored parse exception classes, moved all behavior to
+
+- Refactored parse exception classes, moved all behavior to
ParseBaseException, and the former ParseException is now a subclass of
ParseBaseException. Added a second subclass, ParseFatalException, as
a subclass of ParseBaseException. User-defined parse actions can raise
- ParseFatalException if a data inconsistency is detected (such as a
+ ParseFatalException if a data inconsistency is detected (such as a
begin-tag/end-tag mismatch), and this will stop all parsing immediately.
(Inspired by e-mail thread with Michele Petrazzo - thanks, Michelle!)
-
+
- Added helper methods makeXMLTags and makeHTMLTags, that simplify the
- definition of XML or HTML tag parse expressions for a given tagname.
- Both functions return a pair of parse expressions, one for the opening
+ definition of XML or HTML tag parse expressions for a given tagname.
+ Both functions return a pair of parse expressions, one for the opening
tag (that is, '<tagname>') and one for the closing tag ('</tagname>').
The opening tagame also recognizes any attribute definitions that have
been included in the opening tag, as well as an empty tag (one with a
trailing '/', as in '<BODY/>' which is equivalent to '<BODY></BODY>').
makeXMLTags uses stricter XML syntax for attributes, requiring that they
- be enclosed in double quote characters - makeHTMLTags is more lenient,
+ be enclosed in double quote characters - makeHTMLTags is more lenient,
and accepts single-quoted strings or any contiguous string of characters
up to the next whitespace character or '>' character. Attributes can
be retrieved as dictionary or attribute values of the returned results
@@ -771,13 +868,13 @@ Version 1.3.1 - June, 2005
- Added example minimath2.py, a refinement on fourFn.py that adds
an interactive session and support for variables. (Thanks, Steven Siew!)
-
+
- Added performance improvement, up to 20% reduction! (Found while working
with Wolfgang Borgert on performance tuning of his TTCN3 parser.)
-
+
- And another performance improvement, up to 25%, when using scanString!
(Found while working with Henrik Westlund on his C header file scanner.)
-
+
- Updated UML diagrams to reflect latest class/method changes.
@@ -788,87 +885,87 @@ Version 1.3 - March, 2005
distinguish them from variables or other identifiers that just
happen to start with the same characters as a keyword. For instance,
the input string containing "ifOnlyIfOnly" will match a Literal("if")
- at the beginning and in the middle, but will fail to match a
- Keyword("if"). Keyword("if") will match only strings such as "if only"
- or "if(only)". (Proposed by Wolfgang Borgert, and Berteun Damman
+ at the beginning and in the middle, but will fail to match a
+ Keyword("if"). Keyword("if") will match only strings such as "if only"
+ or "if(only)". (Proposed by Wolfgang Borgert, and Berteun Damman
separately requested this on comp.lang.python - great idea!)
-- Added setWhitespaceChars() method to override the characters to be
+- Added setWhitespaceChars() method to override the characters to be
skipped as whitespace before matching a particular ParseElement. Also
added the class-level method setDefaultWhitespaceChars(), to allow
users to override the default set of whitespace characters (space,
tab, newline, and return) for all subsequently defined ParseElements.
(Inspired by Klaas Hofstra's inquiry on the Sourceforge pyparsing
forum.)
-
-- Added helper parse actions to support some very common parse
+
+- Added helper parse actions to support some very common parse
action use cases:
- . replaceWith(replStr) - replaces the matching tokens with the
- provided replStr replacement string; especially useful with
+ . replaceWith(replStr) - replaces the matching tokens with the
+ provided replStr replacement string; especially useful with
transformString()
. removeQuotes - removes first and last character from string enclosed
in quotes (note - NOT the same as the string strip() method, as only
a single character is removed at each end)
-
-- Added copy() method to ParseElement, to make it easier to define
+
+- Added copy() method to ParseElement, to make it easier to define
different parse actions for the same basic parse expression. (Note, copy
is implicitly called when using setResultsName().)
- (The following changes were posted to CVS as Version 1.2.3 -
+ (The following changes were posted to CVS as Version 1.2.3 -
October-December, 2004)
-
-- Added support for Unicode strings in creating grammar definitions.
+
+- Added support for Unicode strings in creating grammar definitions.
(Big thanks to Gavin Panella!)
- Added constant alphas8bit to include the following 8-bit characters:
ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ
-
-- Added srange() function to simplify definition of Word elements, using
+
+- Added srange() function to simplify definition of Word elements, using
regexp-like '[A-Za-z0-9]' syntax. This also simplifies referencing
common 8-bit characters.
-
+
- Fixed bug in Dict when a single element Dict was embedded within another
Dict. (Thanks Andy Yates for catching this one!)
-
+
- Added 'formatted' argument to ParseResults.asXML(). If set to False,
suppresses insertion of whitespace for pretty-print formatting. Default
equals True for backward compatibility.
-
+
- Added setDebugActions() function to ParserElement, to allow user-defined
debugging actions.
-- Added support for escaped quotes (either in \', \", or doubled quote
+- Added support for escaped quotes (either in \', \", or doubled quote
form) to the predefined expressions for quoted strings. (Thanks, Ero
Carrera!)
-
+
- Minor performance improvement (~5%) converting "char in string" tests
to "char in dict". (Suggested by Gavin Panella, cool idea!)
-
+
Version 1.2.2 - September 27, 2004
----------------------------------
- Modified delimitedList to accept an expression as the delimiter, instead
of only accepting strings.
-
-- Modified ParseResults, to convert integer field keys to strings (to
+
+- Modified ParseResults, to convert integer field keys to strings (to
avoid confusion with list access).
- Modified Combine, to convert all embedded tokens to strings before
combining.
-
-- Fixed bug in MatchFirst in which parse actions would be called for
+
+- Fixed bug in MatchFirst in which parse actions would be called for
expressions that only partially match. (Thanks, John Hunter!)
-
-- Fixed bug in fourFn.py example that fixes right-associativity of ^
+
+- Fixed bug in fourFn.py example that fixes right-associativity of ^
operator. (Thanks, Andrea Griffini!)
-- Added class FollowedBy(expression), to look ahead in the input string
+- Added class FollowedBy(expression), to look ahead in the input string
without consuming tokens.
- Added class NoMatch that never matches any input. Can be useful in
debugging, and in very specialized grammars.
-
+
- Added example pgn.py, for parsing chess game files stored in Portable
Game Notation. (Thanks, Alberto Santini!)
@@ -878,33 +975,33 @@ Version 1.2.1 - August 19, 2004
- Added SkipTo(expression) token type, simplifying grammars that only
want to specify delimiting expressions, and want to match any characters
between them.
-
+
- Added helper method dictOf(key,value), making it easier to work with
the Dict class. (Inspired by Pavel Volkovitskiy, thanks!).
-- Added optional argument listAllMatches (default=False) to
+- Added optional argument listAllMatches (default=False) to
setResultsName(). Setting listAllMatches to True overrides the default
modal setting of tokens to results names; instead, the results name
- acts as an accumulator for all matching tokens within the local
+ acts as an accumulator for all matching tokens within the local
repetition group. (Suggested by Amaury Le Leyzour - thanks!)
-
+
- Fixed bug in ParseResults, throwing exception when trying to extract
slice, or make a copy using [:]. (Thanks, Wilson Fowlie!)
-
+
- Fixed bug in transformString() when the input string contains <TAB>'s
(Thanks, Rick Walia!).
-
-- Fixed bug in returning tokens from un-Grouped And's, Or's and
- MatchFirst's, where too many tokens would be included in the results,
+
+- Fixed bug in returning tokens from un-Grouped And's, Or's and
+ MatchFirst's, where too many tokens would be included in the results,
confounding parse actions and returned results.
-
+
- Fixed bug in naming ParseResults returned by And's, Or's, and Match
First's.
- Fixed bug in LineEnd() - matching this token now correctly consumes
and returns the end of line "\n".
-
-- Added a beautiful example for parsing Mozilla calendar files (Thanks,
+
+- Added a beautiful example for parsing Mozilla calendar files (Thanks,
Petri Savolainen!).
- Added support for dynamically modifying Forward expressions during
@@ -915,7 +1012,7 @@ Version 1.2 - 20 June 2004
--------------------------
- Added definition for htmlComment to help support HTML scanning and
parsing.
-
+
- Fixed bug in generating XML for Dict classes, in which trailing item was
duplicated in the output XML.
@@ -927,42 +1024,42 @@ Version 1.2 - 20 June 2004
- Added example urlExtractor.py, as another example of using scanString
and parse actions.
-
+
Version 1.2beta3 - 4 June 2004
------------------------------
- Added White() token type, analogous to Word, to match on whitespace
- characters. Use White in parsers with significant whitespace (such as
- configuration file parsers that use indentation to indicate grouping).
- Construct White with a string containing the whitespace characters to be
- matched. Similar to Word, White also takes optional min, max, and exact
- parameters.
+ characters. Use White in parsers with significant whitespace (such as
+ configuration file parsers that use indentation to indicate grouping).
+ Construct White with a string containing the whitespace characters to be
+ matched. Similar to Word, White also takes optional min, max, and exact
+ parameters.
-- As part of supporting whitespace-signficant parsing, added parseWithTabs()
+- As part of supporting whitespace-signficant parsing, added parseWithTabs()
method to ParserElement, to override the default behavior in parseString
of automatically expanding tabs to spaces. To retain tabs during
- parsing, call parseWithTabs() before calling parseString(), parseFile() or
+ parsing, call parseWithTabs() before calling parseString(), parseFile() or
scanString(). (Thanks, Jean-Guillaume Paradis for catching this, and for
your suggestions on whitespace-significant parsing.)
-
-- Added transformString() method to ParseElement, as a complement to
+
+- Added transformString() method to ParseElement, as a complement to
scanString(). To use transformString, define a grammar and attach a parse
- action to the overall grammar that modifies the returned token list.
- Invoking transformString() on a target string will then scan for matches,
- and replace the matched text patterns according to the logic in the parse
+ action to the overall grammar that modifies the returned token list.
+ Invoking transformString() on a target string will then scan for matches,
+ and replace the matched text patterns according to the logic in the parse
action. transformString() returns the resulting transformed string.
(Note: transformString() does *not* automatically expand tabs to spaces.)
Also added scanExamples.py to the examples directory to show sample uses of
scanString() and transformString().
-
+
- Removed group() method that was introduced in beta2. This turns out NOT to
be equivalent to nesting within a Group() object, and I'd prefer not to sow
more seeds of confusion.
-
+
- Fixed behavior of asXML() where tags for groups were incorrectly duplicated.
(Thanks, Brad Clements!)
-
-- Changed beta version message to display to stderr instead of stdout, to
+
+- Changed beta version message to display to stderr instead of stdout, to
make asXML() easier to use. (Thanks again, Brad.)
@@ -974,48 +1071,48 @@ Version 1.2beta2 - 19 May 2004
token content; these parse actions can simply end without having to specify
'return toks'.
-- *** POSSIBLE API INCOMPATIBILITY *** - Fixed CaselessLiteral bug, where the
- returned token text was not the original string (as stated in the docs),
- but the original string converted to upper case. (Thanks, Dang Griffith!)
+- *** POSSIBLE API INCOMPATIBILITY *** - Fixed CaselessLiteral bug, where the
+ returned token text was not the original string (as stated in the docs),
+ but the original string converted to upper case. (Thanks, Dang Griffith!)
**NOTE: this may break some code that relied on this erroneous behavior.
Users should scan their code for uses of CaselessLiteral.**
-- *** POSSIBLE CODE INCOMPATIBILITY *** - I have renamed the internal
- attributes on ParseResults from 'dict' and 'list' to '__tokdict' and
+- *** POSSIBLE CODE INCOMPATIBILITY *** - I have renamed the internal
+ attributes on ParseResults from 'dict' and 'list' to '__tokdict' and
'__toklist', to avoid collisions with user-defined data fields named 'dict'
and 'list'. Any client code that accesses these attributes directly will
need to be modified. Hopefully the implementation of methods such as keys(),
items(), len(), etc. on ParseResults will make such direct attribute
accessess unnecessary.
-
-- Added asXML() method to ParseResults. This greatly simplifies the process
+
+- Added asXML() method to ParseResults. This greatly simplifies the process
of parsing an input data file and generating XML-structured data.
- Added getName() method to ParseResults. This method is helpful when
a grammar specifies ZeroOrMore or OneOrMore of a MatchFirst or Or
expression, and the parsing code needs to know which expression matched.
(Thanks, Eric van der Vlist, for this idea!)
-
+
- Added items() and values() methods to ParseResults, to better support using
ParseResults as a Dictionary.
-- Added parseFile() as a convenience function to parse the contents of an
+- Added parseFile() as a convenience function to parse the contents of an
entire text file. Accepts either a file name or a file object. (Thanks
again, Dang!)
- Added group() method to And, Or, and MatchFirst, as a short-cut alternative
to enclosing a construct inside a Group object.
-
+
- Extended fourFn.py to support exponentiation, and simple built-in functions.
- Added EBNF parser to examples, including a demo where it parses its own
EBNF! (Thanks to Seo Sanghyeon!)
-
+
- Added Delphi Form parser to examples, dfmparse.py, plus a couple of
sample Delphi forms as tests. (Well done, Dang!)
- Another performance speedup, 5-10%, inspired by Dang! Plus about a 20%
- speedup, by pre-constructing and cacheing exception objects instead of
+ speedup, by pre-constructing and cacheing exception objects instead of
constructing them on the fly.
- Fixed minor bug when specifying oneOf() with 'caseless=True'.
@@ -1025,8 +1122,8 @@ Version 1.2beta2 - 19 May 2004
Version 1.1.2 - 21 Mar 2004
---------------------------
-- Fixed minor bug in scanString(), so that start location is at the start of
- the matched tokens, not at the start of the whitespace before the matched
+- Fixed minor bug in scanString(), so that start location is at the start of
+ the matched tokens, not at the start of the whitespace before the matched
tokens.
- Inclusion of HTML documentation, generated using Epydoc. Reformatted some
@@ -1037,43 +1134,43 @@ Version 1.1.2 - 21 Mar 2004
- And on a process note, I've used the unittest module to define a series of
unit tests, to help avoid the embarrassment of the version 1.1 snafu.
-
+
Version 1.1.1 - 6 Mar 2004
--------------------------
-- Fixed critical bug introduced in 1.1, which broke MatchFirst(!) token
+- Fixed critical bug introduced in 1.1, which broke MatchFirst(!) token
matching.
**THANK YOU, SEO SANGHYEON!!!**
-
-- Added "from future import __generators__" to permit running under
+
+- Added "from future import __generators__" to permit running under
pre-Python 2.3.
- Added example getNTPservers.py, showing how to use pyparsing to extract
a text pattern from the HTML of a web page.
-
-
+
+
Version 1.1 - 3 Mar 2004
-------------------------
-- ***Changed API*** - While testing out parse actions, I found that the value
+- ***Changed API*** - While testing out parse actions, I found that the value
of loc passed in was not the starting location of the matched tokens, but
the location of the next token in the list. With this version, the location
passed to the parse action is now the starting location of the tokens that
matched.
-
- A second part of this change is that the return value of parse actions no
+
+ A second part of this change is that the return value of parse actions no
longer needs to return a tuple containing both the location and the parsed
tokens (which may optionally be modified); parse actions only need to return
- the list of tokens. Parse actions that return a tuple are deprecated; they
- will still work properly for conversion/compatibility, but this behavior will
+ the list of tokens. Parse actions that return a tuple are deprecated; they
+ will still work properly for conversion/compatibility, but this behavior will
be removed in a future version.
-
+
- Added validate() method, to help diagnose infinite recursion in a grammar tree.
validate() is not 100% fool-proof, but it can help track down nasty infinite
- looping due to recursively referencing the same grammar construct without some
+ looping due to recursively referencing the same grammar construct without some
intervening characters.
- Cleaned up default listing of some parse element types, to more closely match
- ordinary BNF. Instead of the form <classname>:[contents-list], some changes
+ ordinary BNF. Instead of the form <classname>:[contents-list], some changes
are:
. And(token1,token2,token3) is "{ token1 token2 token3 }"
. Or(token1,token2,token3) is "{ token1 ^ token2 ^ token3 }"
@@ -1081,13 +1178,13 @@ Version 1.1 - 3 Mar 2004
. Optional(token) is "[ token ]"
. OneOrMore(token) is "{ token }..."
. ZeroOrMore(token) is "[ token ]..."
-
+
- Fixed an infinite loop in oneOf if the input string contains a duplicated
option. (Thanks Brad Clements)
-- Fixed a bug when specifying a results name on an Optional token. (Thanks
+- Fixed a bug when specifying a results name on an Optional token. (Thanks
again, Brad Clements)
-
+
- Fixed a bug introduced in 1.0.6 when I converted quotedString to use
CharsNotIn; I accidentally permitted quoted strings to span newlines. I have
fixed this in this version to go back to the original behavior, in which
@@ -1098,14 +1195,14 @@ Version 1.1 - 3 Mar 2004
Version 1.0.6 - 13 Feb 2004
----------------------------
-- Added CharsNotIn class (Thanks, Lee SangYeong). This is the opposite of
+- Added CharsNotIn class (Thanks, Lee SangYeong). This is the opposite of
Word, in that it is constructed with a set of characters *not* to be matched.
(This enhancement also allowed me to clean up and simplify some of the
definitions for quoted strings, cStyleComment, and restOfLine.)
-
-- **MINOR API CHANGE** - Added joinString argument to the __init__ method of
- Combine (Thanks, Thomas Kalka). joinString defaults to "", but some
- applications might choose some other string to use instead, such as a blank
+
+- **MINOR API CHANGE** - Added joinString argument to the __init__ method of
+ Combine (Thanks, Thomas Kalka). joinString defaults to "", but some
+ applications might choose some other string to use instead, such as a blank
or newline. joinString was inserted as the second argument to __init__,
so if you have code that specifies an adjacent value, without using
'adjacent=', this code will break.
@@ -1127,9 +1224,9 @@ Version 1.0.5 - 19 Jan 2004
- Added scanString() generator method to ParseElement, to support regex-like
pattern-searching
-- Added items() list to ParseResults, to return named results as a
+- Added items() list to ParseResults, to return named results as a
list of (key,value) pairs
-
+
- Fixed memory overflow in asList() for deeply nested ParseResults (Thanks,
Sverrir Valgeirsson)
@@ -1140,8 +1237,8 @@ Version 1.0.4 - 8 Jan 2004
---------------------------
- Added positional tokens StringStart, StringEnd, LineStart, and LineEnd
-- Added commaSeparatedList to pre-defined global token definitions; also added
- commasep.py to the examples directory, to demonstrate the differences between
+- Added commaSeparatedList to pre-defined global token definitions; also added
+ commasep.py to the examples directory, to demonstrate the differences between
parsing comma-separated data and simple line-splitting at commas
- Minor API change: delimitedList does not automatically enclose the
@@ -1149,12 +1246,12 @@ Version 1.0.4 - 8 Jan 2004
also, if invoked using 'combine=True', the list delimiters are also included
in the returned text (good for scoped variables, such as a.b.c or a::b::c, or
for directory paths such as a/b/c)
-
+
- Performance speed-up again, 30-40%
- Added httpServerLogParser.py to examples directory, as this is
a common parsing task
-
+
Version 1.0.3 - 23 Dec 2003
---------------------------
@@ -1166,7 +1263,7 @@ Version 1.0.3 - 23 Dec 2003
Version 1.0.2 - 18 Dec 2003
---------------------------
- **NOTE: Changed API again!!!** (for the last time, I hope)
-
+
+ Renamed module from parsing to pyparsing, to better reflect Python
linkage.
@@ -1177,7 +1274,7 @@ Version 1.0.2 - 18 Dec 2003
Version 1.0.1 - 17 Dec 2003
---------------------------
- **NOTE: Changed API!**
-
+
+ Renamed 'len' argument on Word.__init__() to 'exact'
- Performance speed-up, 10-30%
diff --git a/pyparsing.py b/pyparsing.py
index a7da821..4c88bd6 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -27,18 +27,18 @@ __doc__ = \
"""
pyparsing module - Classes and methods to define and execute parsing grammars
-The pyparsing module is an alternative approach to creating and executing simple grammars,
+The pyparsing module is an alternative approach to creating and executing simple grammars,
vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
-don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
+don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
provides a library of classes that you use to construct the grammar directly in Python.
Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
from pyparsing import Word, alphas
-
+
# define grammar of a greeting
- greet = Word( alphas ) + "," + Word( alphas ) + "!"
-
+ greet = Word( alphas ) + "," + Word( alphas ) + "!"
+
hello = "Hello, World!"
print hello, "->", greet.parseString( hello )
@@ -46,10 +46,10 @@ The program outputs the following::
Hello, World! -> ['Hello', ',', 'World', '!']
-The Python representation of the grammar is quite readable, owing to the self-explanatory
+The Python representation of the grammar is quite readable, owing to the self-explanatory
class names, and the use of '+', '|' and '^' operators.
-The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
+The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
object with named attributes.
The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
@@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when
- embedded comments
"""
-__version__ = "1.4.12"
-__versionTime__ = "2 May 2008 02:10"
+__version__ = "1.5.0"
+__versionTime__ = "12 May 2008 02:25"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -71,40 +71,51 @@ import sre_constants
import xml.sax.saxutils
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
+__all__ = """__version__ __versionTime__ __author__ ParseBaseException ParseException ParseFatalException
+ ParseSyntaxException RecursiveGrammarException ParseResults col lineno line nullDebugAction ParserElement
+ Token Empty NoMatch Literal Keyword CaselessLiteral CaselessKeyword Word Regex QuotedString CharsNotIn
+ White GoToColumn LineStart LineEnd StringStart StringEnd WordStart WordEnd And Or MatchFirst Each
+ FollowedBy NotAny ZeroOrMore
+""".split()
+
"""
Detect if we are running version 3.X and make appropriate changes
Robert A. Clark
"""
if sys.version_info[0] > 2:
- __MAX_INT__ = sys.maxsize
- __BASE_STRING__ = str
+ _PY3K = True
+ _MAX_INT = sys.maxsize
+ basestring = str
else:
- __MAX_INT__ = sys.maxint
- __BASE_STRING__ = basestring
-
-def _ustr(obj):
- """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
- str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
- then < returns the unicode object | encodes it with the default encoding | ... >.
- """
- try:
- # If this works, then _ustr(obj) has the same behaviour as str(obj), so
- # it won't break any existing code.
- return str(obj)
-
- except UnicodeEncodeError:
- # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
- # state that "The return value must be a string object". However, does a
- # unicode object (being a subclass of basestring) count as a "string
- # object"?
- # If so, then return a unicode object:
- return unicode(obj)
- # Else encode it... but how? There are many choices... :)
- # Replace unprintables with escape codes?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
- # Replace unprintables with question marks?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
- # ...
+ _PY3K = False
+ _MAX_INT = sys.maxint
+
+if not _PY3K:
+ def _ustr(obj):
+ """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
+ str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
+ then < returns the unicode object | encodes it with the default encoding | ... >.
+ """
+ try:
+ # If this works, then _ustr(obj) has the same behaviour as str(obj), so
+ # it won't break any existing code.
+ return str(obj)
+
+ except UnicodeEncodeError:
+ # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
+ # state that "The return value must be a string object". However, does a
+ # unicode object (being a subclass of basestring) count as a "string
+ # object"?
+ # If so, then return a unicode object:
+ return unicode(obj)
+ # Else encode it... but how? There are many choices... :)
+ # Replace unprintables with escape codes?
+ #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
+ # Replace unprintables with question marks?
+ #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
+ # ...
+else:
+ _ustr = str
def _str2dict(strg):
return dict( [(c,0) for c in strg] )
@@ -112,11 +123,11 @@ def _str2dict(strg):
class _Constants(object):
pass
-
+
alphas = string.lowercase + string.uppercase
nums = string.digits
hexnums = nums + "ABCDEFabcdef"
-alphanums = alphas + nums
+alphanums = alphas + nums
_bslash = "\\"
printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
@@ -124,7 +135,7 @@ class ParseBaseException(Exception):
"""base exception class for all parsing runtime exceptions"""
__slots__ = ( "loc","msg","pstr","parserElement" )
# Performance tuning: we construct a *lot* of these, so keep this
- # constructor as small and fast as possible
+ # constructor as small and fast as possible
def __init__( self, pstr, loc=0, msg=None, elem=None ):
self.loc = loc
if msg is None:
@@ -148,7 +159,7 @@ class ParseBaseException(Exception):
elif( aname == "line" ):
return line( self.loc, self.pstr )
else:
- raise AttributeError, aname
+ raise AttributeError(aname)
def __str__( self ):
return "%s (at char %d), (line:%d, col:%d)" % \
@@ -156,7 +167,7 @@ class ParseBaseException(Exception):
def __repr__( self ):
return _ustr(self)
def markInputline( self, markerString = ">!<" ):
- """Extracts the exception line from the input string, and marks
+ """Extracts the exception line from the input string, and marks
the location of the exception with a special symbol.
"""
line_str = self.line
@@ -174,12 +185,20 @@ class ParseException(ParseBaseException):
- line - returns the line containing the exception text
"""
pass
-
+
class ParseFatalException(ParseBaseException):
"""user-throwable exception thrown when inconsistent parse content
is found; stops all parsing immediately"""
pass
+class ParseSyntaxException(ParseFatalException):
+ """just like ParseFatalException, but thrown internally when an
+ ErrorStop indicates that parsing is to stop immediately because
+ an unbacktrackable syntax error has been found"""
+ def __init__(self, pe):
+ super(ParseSyntaxException, self).__init__(
+ pe.pstr, pe.loc, pe.msg, pe.parserElement)
+
#~ class ReparseException(ParseBaseException):
#~ """Experimental class - parse actions can raise this exception to cause
#~ pyparsing to reparse the input string:
@@ -197,7 +216,7 @@ class RecursiveGrammarException(Exception):
"""exception thrown by validate() if the grammar could be improperly recursive"""
def __init__( self, parseElementList ):
self.parseElementTrace = parseElementList
-
+
def __str__( self ):
return "RecursiveGrammarException: %s" % self.parseElementTrace
@@ -222,7 +241,7 @@ class ParseResults(object):
retobj = object.__new__(cls)
retobj.__doinit = True
return retobj
-
+
# Performance tuning: we construct a *lot* of these, so keep this
# constructor as small and fast as possible
def __init__( self, toklist, name=None, asList=True, modal=True ):
@@ -239,7 +258,7 @@ class ParseResults(object):
# this line is related to debugging the asXML bug
#~ asList = False
-
+
if name:
if not modal:
self.__accumNames[name] = 0
@@ -247,7 +266,7 @@ class ParseResults(object):
name = _ustr(name) # will always return a str, but use _ustr for consistency
self.__name = name
if not toklist in (None,'',[]):
- if isinstance(toklist,__BASE_STRING__):
+ if isinstance(toklist,basestring):
toklist = [ toklist ]
if asList:
if isinstance(toklist,ParseResults):
@@ -282,12 +301,12 @@ class ParseResults(object):
sub = v
if isinstance(sub,ParseResults):
sub.__parent = wkref(self)
-
+
def __delitem__( self, i ):
if isinstance(i,(int,slice)):
mylen = len( self.__toklist )
del self.__toklist[i]
-
+
# convert int to slice
if isinstance(i, int):
if i < 0:
@@ -307,16 +326,16 @@ class ParseResults(object):
def __contains__( self, k ):
return self.__tokdict.has_key(k)
-
+
def __len__( self ): return len( self.__toklist )
def __bool__(self): return len( self.__toklist ) > 0
- def __nonzero__( self ): return self.__bool__()
+ __nonzero__ = __bool__
def __iter__( self ): return iter( self.__toklist )
def __reversed__( self ): return iter( reversed(self.__toklist) )
- def keys( self ):
+ def keys( self ):
"""Returns all named result keys."""
return self.__tokdict.keys()
-
+
def pop( self, index=-1 ):
"""Removes and returns item at specified index (default=last).
Will work with either numeric indices or dict-key indicies."""
@@ -325,8 +344,8 @@ class ParseResults(object):
return ret
def get(self, key, defaultValue=None):
- """Returns named result matching the given key, or if there is no
- such name, then returns the given defaultValue or None if no
+ """Returns named result matching the given key, or if there is no
+ such name, then returns the given defaultValue or None if no
defaultValue is specified."""
if key in self:
return self[key]
@@ -340,12 +359,12 @@ class ParseResults(object):
occurrences = self.__tokdict[name]
for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
-
- def items( self ):
+
+ def items( self ):
"""Returns all named result keys and values as a list of tuples."""
return [(k,self[k]) for k in self.__tokdict]
-
- def values( self ):
+
+ def values( self ):
"""Returns all named result values."""
return [ v[-1][0] for v in self.__tokdict.values() ]
@@ -364,7 +383,7 @@ class ParseResults(object):
ret = self.copy()
ret += other
return ret
-
+
def __iadd__( self, other ):
if other.__tokdict:
offset = len(self.__toklist)
@@ -380,7 +399,7 @@ class ParseResults(object):
self.__accumNames.update( other.__accumNames )
del other
return self
-
+
def __repr__( self ):
return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
@@ -429,7 +448,7 @@ class ParseResults(object):
ret.__accumNames.update( self.__accumNames )
ret.__name = self.__name
return ret
-
+
def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
"""Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
nl = "\n"
@@ -437,28 +456,28 @@ class ParseResults(object):
namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
for v in vlist ] )
nextLevelIndent = indent + " "
-
+
# collapse out indents if formatting is not desired
if not formatted:
indent = ""
nextLevelIndent = ""
nl = ""
-
+
selfTag = None
if doctag is not None:
selfTag = doctag
else:
if self.__name:
selfTag = self.__name
-
+
if not selfTag:
if namedItemsOnly:
return ""
else:
selfTag = "ITEM"
-
+
out += [ nl, indent, "<", selfTag, ">" ]
-
+
worklist = self.__toklist
for i,res in enumerate(worklist):
if isinstance(res,ParseResults):
@@ -486,7 +505,7 @@ class ParseResults(object):
out += [ nl, nextLevelIndent, "<", resTag, ">",
xmlBodyText,
"</", resTag, ">" ]
-
+
out += [ nl, indent, "</", selfTag, ">" ]
return "".join(out)
@@ -496,7 +515,7 @@ class ParseResults(object):
if sub is v:
return k
return None
-
+
def getName(self):
"""Returns the results name for this token expression."""
if self.__name:
@@ -507,13 +526,13 @@ class ParseResults(object):
return par.__lookup(self)
else:
return None
- elif (len(self) == 1 and
+ elif (len(self) == 1 and
len(self.__tokdict) == 1 and
self.__tokdict.values()[0][0][1] in (0,-1)):
return self.__tokdict.keys()[0]
else:
return None
-
+
def dump(self,indent='',depth=0):
"""Diagnostic method for listing out the contents of a ParseResults.
Accepts an optional indent argument so that this string can be embedded
@@ -545,7 +564,7 @@ class ParseResults(object):
self.__parent is not None and self.__parent() or None,
self.__accumNames,
self.__name ) )
-
+
def __setstate__(self,state):
self.__toklist = state[0]
self.__tokdict, \
@@ -575,7 +594,7 @@ def col (loc,strg):
def lineno(loc,strg):
"""Returns current line number within a string, counting newlines as line separators.
The first line is number 1.
-
+
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a
@@ -595,13 +614,13 @@ def line( loc, strg ):
return strg[lastCR+1:]
def _defaultStartDebugAction( instring, loc, expr ):
- print ("Match",_ustr(expr),"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+ print ("Match " + _ustr(expr) + " at loc " + loc + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
- print ("Matched",_ustr(expr),"->",toks.asList())
-
+ print ("Matched " + _ustr(expr) + " -> " + toks.asList())
+
def _defaultExceptionDebugAction( instring, loc, expr, exc ):
- print ("Exception raised:", _ustr(exc))
+ print ("Exception raised:" + _ustr(exc))
def nullDebugAction(*args):
"""'Do-nothing' debug action, to suppress debugging output during parsing."""
@@ -610,13 +629,13 @@ def nullDebugAction(*args):
class ParserElement(object):
"""Abstract base level parser element class."""
DEFAULT_WHITE_CHARS = " \n\t\r"
-
+
def setDefaultWhitespaceChars( chars ):
"""Overrides the default whitespace chars
"""
ParserElement.DEFAULT_WHITE_CHARS = chars
setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
-
+
def __init__( self, savelist=False ):
self.parseAction = list()
self.failAction = None
@@ -659,7 +678,7 @@ class ParserElement(object):
return self
def setResultsName( self, name, listAllMatches=False ):
- """Define name for referencing matching tokens as a nested attribute
+ """Define name for referencing matching tokens as a nested attribute
of the returned parse results.
NOTE: this returns a *copy* of the original ParserElement object;
this is so that the client can define a basic element, such as an
@@ -752,7 +771,7 @@ class ParserElement(object):
pass
return tmp
_normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
-
+
def setParseAction( self, *fns, **kwargs ):
"""Define action to perform when successfully matching parse element definition.
Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
@@ -763,7 +782,7 @@ class ParserElement(object):
If the functions in fns modify the tokens, they can return them as the return
value from fn, and the modified list of tokens will replace the original.
Otherwise, fn does not need to return any value.
-
+
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{parseString}<parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a
@@ -781,8 +800,8 @@ class ParserElement(object):
return self
def setFailAction( self, fn ):
- """Define action to perform if parsing fails at this expression.
- Fail acton fn is a callable function that takes the arguments
+ """Define action to perform if parsing fails at this expression.
+ Fail acton fn is a callable function that takes the arguments
fn(s,loc,expr,err) where:
- s = string being parsed
- loc = location where expression match was attempted and failed
@@ -792,7 +811,7 @@ class ParserElement(object):
if it is desired to stop parsing immediately."""
self.failAction = fn
return self
-
+
def _skipIgnorables( self, instring, loc ):
exprsFound = True
while exprsFound:
@@ -809,13 +828,13 @@ class ParserElement(object):
def preParse( self, instring, loc ):
if self.ignoreExprs:
loc = self._skipIgnorables( instring, loc )
-
+
if self.skipWhitespace:
wt = self.whiteChars
instrlen = len(instring)
while loc < instrlen and instring[loc] in wt:
loc += 1
-
+
return loc
def parseImpl( self, instring, loc, doActions=True ):
@@ -862,7 +881,7 @@ class ParserElement(object):
raise ParseException( instring, len(instring), self.errmsg, self )
else:
loc,tokens = self.parseImpl( instring, preloc, doActions )
-
+
tokens = self.postParse( instring, loc, tokens )
retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
@@ -872,9 +891,9 @@ class ParserElement(object):
for fn in self.parseAction:
tokens = fn( instring, tokensStart, retTokens )
if tokens is not None:
- retTokens = ParseResults( tokens,
- self.resultsName,
- asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ retTokens = ParseResults( tokens,
+ self.resultsName,
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
modal=self.modalResults )
except ParseException, err:
#~ print "Exception raised in user parse action:", err
@@ -885,9 +904,9 @@ class ParserElement(object):
for fn in self.parseAction:
tokens = fn( instring, tokensStart, retTokens )
if tokens is not None:
- retTokens = ParseResults( tokens,
- self.resultsName,
- asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ retTokens = ParseResults( tokens,
+ self.resultsName,
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
modal=self.modalResults )
if debugging:
@@ -898,8 +917,11 @@ class ParserElement(object):
return loc, retTokens
def tryParse( self, instring, loc ):
- return self._parse( instring, loc, doActions=False )[0]
-
+ try:
+ return self._parse( instring, loc, doActions=False )[0]
+ except ParseFatalException, pbe:
+ raise ParseException( instring, loc, self.errmsg, self)
+
# this method gets repeatedly called during backtracking with the same arguments -
# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
@@ -925,20 +947,20 @@ class ParserElement(object):
def resetCache():
ParserElement._exprArgCache.clear()
resetCache = staticmethod(resetCache)
-
+
_packratEnabled = False
def enablePackrat():
"""Enables "packrat" parsing, which adds memoizing to the parsing logic.
- Repeated parse attempts at the same string location (which happens
- often in many complex grammars) can immediately return a cached value,
+ Repeated parse attempts at the same string location (which happens
+ often in many complex grammars) can immediately return a cached value,
instead of re-executing parsing/validating code. Memoizing is done of
both valid results and parsing exceptions.
-
- This speedup may break existing programs that use parse actions that
+
+ This speedup may break existing programs that use parse actions that
have side-effects. For this reason, packrat parsing is disabled when
you first import pyparsing. To activate the packrat feature, your
program must call the class method ParserElement.enablePackrat(). If
- your program uses psyco to "compile as you go", you must call
+ your program uses psyco to "compile as you go", you must call
enablePackrat before calling psyco.full(). If you do not do this,
Python will crash. For best results, call enablePackrat() immediately
after importing pyparsing.
@@ -950,25 +972,25 @@ class ParserElement(object):
def parseString( self, instring, parseAll=False ):
"""Execute the parse expression with the given string.
- This is the main interface to the client code, once the complete
+ This is the main interface to the client code, once the complete
expression has been built.
-
+
If you want the grammar to require that the entire input string be
successfully parsed, then set parseAll to True (equivalent to ending
the grammar with StringEnd()).
-
+
Note: parseString implicitly calls expandtabs() on the input string,
- in order to report proper column numbers in parse actions.
+ in order to report proper column numbers in parse actions.
If the input string contains tabs and
- the grammar uses parse actions that use the loc argument to index into the
- string being parsed, you can ensure you have a consistent view of the input
+ the grammar uses parse actions that use the loc argument to index into the
+ string being parsed, you can ensure you have a consistent view of the input
string by:
- calling parseWithTabs on your grammar before calling parseString
(see L{I{parseWithTabs}<parseWithTabs>})
- - define your parse action using the full (s,loc,toks) signature, and
+ - define your parse action using the full (s,loc,toks) signature, and
reference the input string using the parse action's s argument
- - explictly expand the tabs in your input string before calling
- parseString
+ - explictly expand the tabs in your input string before calling
+ parseString
"""
ParserElement.resetCache()
if not self.streamlined:
@@ -983,19 +1005,19 @@ class ParserElement(object):
StringEnd()._parse( instring, loc )
return tokens
- def scanString( self, instring, maxMatches=__MAX_INT__ ):
- """Scan the input string for expression matches. Each match will return the
+ def scanString( self, instring, maxMatches=_MAX_INT ):
+ """Scan the input string for expression matches. Each match will return the
matching tokens, start location, and end location. May be called with optional
maxMatches argument, to clip scanning after 'n' matches are found.
-
+
Note that the start and end locations are reported relative to the string
- being parsed. See L{I{parseString}<parseString>} for more information on parsing
+ being parsed. See L{I{parseString}<parseString>} for more information on parsing
strings with embedded tabs."""
if not self.streamlined:
self.streamline()
for e in self.ignoreExprs:
e.streamline()
-
+
if not self.keepTabs:
instring = _ustr(instring).expandtabs()
instrlen = len(instring)
@@ -1014,13 +1036,13 @@ class ParserElement(object):
matches += 1
yield tokens, preloc, nextLoc
loc = nextLoc
-
+
def transformString( self, instring ):
"""Extension to scanString, to modify matching text with modified tokens that may
- be returned from a parse action. To use transformString, define a grammar and
- attach a parse action to it that modifies the returned token list.
- Invoking transformString() on a target string will then scan for matches,
- and replace the matched text patterns according to the logic in the parse
+ be returned from a parse action. To use transformString, define a grammar and
+ attach a parse action to it that modifies the returned token list.
+ Invoking transformString() on a target string will then scan for matches,
+ and replace the matched text patterns according to the logic in the parse
action. transformString() returns the resulting transformed string."""
out = []
lastE = 0
@@ -1040,16 +1062,16 @@ class ParserElement(object):
out.append(instring[lastE:])
return "".join(map(_ustr,out))
- def searchString( self, instring, maxMatches=__MAX_INT__ ):
+ def searchString( self, instring, maxMatches=_MAX_INT ):
"""Another extension to scanString, simplifying the access to the tokens found
to match the given parse expression. May be called with optional
maxMatches argument, to clip searching after 'n' matches are found.
"""
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
-
+
def __add__(self, other ):
"""Implementation of + operator - returns And"""
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
@@ -1059,7 +1081,7 @@ class ParserElement(object):
def __radd__(self, other ):
"""Implementation of + operator when left operand is not a ParserElement"""
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
@@ -1067,6 +1089,26 @@ class ParserElement(object):
return None
return other + self
+ def __sub__(self, other):
+ """Implementation of - operator, returns And with error stop"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return And( [ self, And._ErrorStop(), other ] )
+
+ def __rsub__(self, other ):
+ """Implementation of - operator when left operand is not a ParserElement"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other - self
+
def __mul__(self,other):
if isinstance(other,int):
minElements, optElements = other,0
@@ -1094,14 +1136,14 @@ class ParserElement(object):
raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
else:
raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
-
+
if minElements < 0:
raise ValueError("cannot multiply ParserElement by negative value")
if optElements < 0:
raise ValueError("second tuple value must be greater or equal to first tuple value")
if minElements == optElements == 0:
raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
-
+
if (optElements):
def makeOptionalList(n):
if n>1:
@@ -1121,13 +1163,13 @@ class ParserElement(object):
else:
ret = And([self]*minElements)
return ret
-
+
def __rmul__(self, other):
return self.__mul__(other)
def __or__(self, other ):
"""Implementation of | operator - returns MatchFirst"""
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
@@ -1137,7 +1179,7 @@ class ParserElement(object):
def __ror__(self, other ):
"""Implementation of | operator when left operand is not a ParserElement"""
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
@@ -1147,7 +1189,7 @@ class ParserElement(object):
def __xor__(self, other ):
"""Implementation of ^ operator - returns Or"""
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
@@ -1157,7 +1199,7 @@ class ParserElement(object):
def __rxor__(self, other ):
"""Implementation of ^ operator when left operand is not a ParserElement"""
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
@@ -1167,7 +1209,7 @@ class ParserElement(object):
def __and__(self, other ):
"""Implementation of & operator - returns Each"""
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
@@ -1177,7 +1219,7 @@ class ParserElement(object):
def __rand__(self, other ):
"""Implementation of & operator when left operand is not a ParserElement"""
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
@@ -1204,7 +1246,7 @@ class ParserElement(object):
return Suppress( self )
def leaveWhitespace( self ):
- """Disables the skipping of whitespace before matching the characters in the
+ """Disables the skipping of whitespace before matching the characters in the
ParserElement's defined pattern. This is normally only used internally by
the pyparsing module, but may be needed in some whitespace-sensitive grammars.
"""
@@ -1218,16 +1260,16 @@ class ParserElement(object):
self.whiteChars = chars
self.copyDefaultWhiteChars = False
return self
-
+
def parseWithTabs( self ):
"""Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
- Must be called before parseString when the input grammar contains elements that
+ Must be called before parseString when the input grammar contains elements that
match <TAB> characters."""
self.keepTabs = True
return self
-
+
def ignore( self, other ):
- """Define expression to be ignored (e.g., comments) while doing pattern
+ """Define expression to be ignored (e.g., comments) while doing pattern
matching; may be called repeatedly, to define multiple comment or other
ignorable patterns.
"""
@@ -1240,8 +1282,8 @@ class ParserElement(object):
def setDebugActions( self, startAction, successAction, exceptionAction ):
"""Enable display of debugging messages while doing pattern matching."""
- self.debugActions = (startAction or _defaultStartDebugAction,
- successAction or _defaultSuccessDebugAction,
+ self.debugActions = (startAction or _defaultStartDebugAction,
+ successAction or _defaultSuccessDebugAction,
exceptionAction or _defaultExceptionDebugAction)
self.debug = True
return self
@@ -1260,15 +1302,15 @@ class ParserElement(object):
def __repr__( self ):
return _ustr(self)
-
+
def streamline( self ):
self.streamlined = True
self.strRepr = None
return self
-
+
def checkRecursion( self, parseElementList ):
pass
-
+
def validate( self, validateTrace=[] ):
"""Check defined expressions for valid structure, check for infinite recursive definitions."""
self.checkRecursion( [] )
@@ -1288,16 +1330,16 @@ class ParserElement(object):
def getException(self):
return ParseException("",0,self.errmsg,self)
-
+
def __getattr__(self,aname):
if aname == "myException":
self.myException = ret = self.getException();
return ret;
else:
- raise AttributeError, "no such attribute " + aname
-
+ raise AttributeError("no such attribute " + aname)
+
def __eq__(self,other):
- if isinstance(other, __BASE_STRING__):
+ if isinstance(other, basestring):
try:
(self + StringEnd()).parseString(_ustr(other))
return True
@@ -1305,7 +1347,7 @@ class ParserElement(object):
return False
else:
return super(ParserElement,self)==other
-
+
def __hash__(self):
return hash(id(self))
@@ -1324,7 +1366,7 @@ class Token(ParserElement):
self.errmsg = "Expected " + self.name
#s.myException.msg = self.errmsg
return s
-
+
class Empty(Token):
"""An empty token, will always match."""
@@ -1344,7 +1386,7 @@ class NoMatch(Token):
self.mayIndexError = False
self.errmsg = "Unmatchable token"
#self.myException.msg = self.errmsg
-
+
def parseImpl( self, instring, loc, doActions=True ):
exc = self.myException
exc.loc = loc
@@ -1361,7 +1403,7 @@ class Literal(Token):
try:
self.firstMatchChar = matchString[0]
except IndexError:
- warnings.warn("null string passed to Literal; use Empty() instead",
+ warnings.warn("null string passed to Literal; use Empty() instead",
SyntaxWarning, stacklevel=2)
self.__class__ = Empty
self.name = '"%s"' % _ustr(self.match)
@@ -1386,7 +1428,7 @@ class Literal(Token):
_L = Literal
class Keyword(Token):
- """Token to exactly match a specified string as a keyword, that is, it must be
+ """Token to exactly match a specified string as a keyword, that is, it must be
immediately followed by a non-keyword character. Compare with Literal::
Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
@@ -1396,7 +1438,7 @@ class Keyword(Token):
matching, default is False.
"""
DEFAULT_KEYWORD_CHARS = alphanums+"_$"
-
+
def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
super(Keyword,self).__init__()
self.match = matchString
@@ -1404,7 +1446,7 @@ class Keyword(Token):
try:
self.firstMatchChar = matchString[0]
except IndexError:
- warnings.warn("null string passed to Keyword; use Empty() instead",
+ warnings.warn("null string passed to Keyword; use Empty() instead",
SyntaxWarning, stacklevel=2)
self.name = '"%s"' % self.match
self.errmsg = "Expected " + self.name
@@ -1434,17 +1476,17 @@ class Keyword(Token):
exc.loc = loc
exc.pstr = instring
raise exc
-
+
def copy(self):
c = super(Keyword,self).copy()
c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
return c
-
+
def setDefaultKeywordChars( chars ):
"""Overrides the default Keyword chars
"""
Keyword.DEFAULT_KEYWORD_CHARS = chars
- setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
+ setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
class CaselessLiteral(Literal):
@@ -1502,18 +1544,18 @@ class Word(Token):
else:
self.bodyCharsOrig = initChars
self.bodyChars = _str2dict(initChars)
-
+
self.maxSpecified = max > 0
-
+
if min < 1:
- raise ValueError, "cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted"
+ raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
self.minLen = min
if max > 0:
self.maxLen = max
else:
- self.maxLen = __MAX_INT__
+ self.maxLen = _MAX_INT
if exact > 0:
self.maxLen = exact
@@ -1524,7 +1566,7 @@ class Word(Token):
#self.myException.msg = self.errmsg
self.mayIndexError = False
self.asKeyword = asKeyword
-
+
if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
if self.bodyCharsOrig == self.initCharsOrig:
self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
@@ -1542,7 +1584,7 @@ class Word(Token):
self.re = re.compile( self.reString )
except:
self.re = None
-
+
def parseImpl( self, instring, loc, doActions=True ):
if self.re:
result = self.re.match(instring,loc)
@@ -1551,10 +1593,10 @@ class Word(Token):
exc.loc = loc
exc.pstr = instring
raise exc
-
+
loc = result.end()
return loc,result.group()
-
+
if not(instring[ loc ] in self.initChars):
#~ raise ParseException( instring, loc, self.errmsg )
exc = self.myException
@@ -1569,7 +1611,7 @@ class Word(Token):
maxloc = min( maxloc, instrlen )
while loc < maxloc and instring[loc] in bodychars:
loc += 1
-
+
throwException = False
if loc - start < self.minLen:
throwException = True
@@ -1594,15 +1636,15 @@ class Word(Token):
except:
pass
-
+
if self.strRepr is None:
-
+
def charsAsStr(s):
if len(s)>4:
return s[:4]+"..."
else:
return s
-
+
if ( self.initCharsOrig != self.bodyCharsOrig ):
self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
else:
@@ -1618,19 +1660,19 @@ class Regex(Token):
def __init__( self, pattern, flags=0):
"""The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
super(Regex,self).__init__()
-
+
if len(pattern) == 0:
- warnings.warn("null string passed to Regex; use Empty() instead",
+ warnings.warn("null string passed to Regex; use Empty() instead",
SyntaxWarning, stacklevel=2)
-
+
self.pattern = pattern
self.flags = flags
-
+
try:
self.re = re.compile(self.pattern, self.flags)
self.reString = self.pattern
except sre_constants.error,e:
- warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+ warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
SyntaxWarning, stacklevel=2)
raise
@@ -1639,7 +1681,7 @@ class Regex(Token):
#self.myException.msg = self.errmsg
self.mayIndexError = False
self.mayReturnEmpty = True
-
+
def parseImpl( self, instring, loc, doActions=True ):
result = self.re.match(instring,loc)
if not result:
@@ -1647,7 +1689,7 @@ class Regex(Token):
exc.loc = loc
exc.pstr = instring
raise exc
-
+
loc = result.end()
d = result.groupdict()
ret = ParseResults(result.group())
@@ -1655,16 +1697,16 @@ class Regex(Token):
for k in d:
ret[k] = d[k]
return loc,ret
-
+
def __str__( self ):
try:
return super(Regex,self).__str__()
except:
pass
-
+
if self.strRepr is None:
self.strRepr = "Re:(%s)" % repr(self.pattern)
-
+
return self.strRepr
@@ -1682,13 +1724,13 @@ class QuotedString(Token):
- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
"""
super(QuotedString,self).__init__()
-
+
# remove white space from quote chars - wont work anyway
quoteChar = quoteChar.strip()
if len(quoteChar) == 0:
warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
raise SyntaxError()
-
+
if endQuoteChar is None:
endQuoteChar = quoteChar
else:
@@ -1696,7 +1738,7 @@ class QuotedString(Token):
if len(endQuoteChar) == 0:
warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
raise SyntaxError()
-
+
self.quoteChar = quoteChar
self.quoteCharLen = len(quoteChar)
self.firstQuoteChar = quoteChar[0]
@@ -1705,7 +1747,7 @@ class QuotedString(Token):
self.escChar = escChar
self.escQuote = escQuote
self.unquoteResults = unquoteResults
-
+
if multiline:
self.flags = re.MULTILINE | re.DOTALL
self.pattern = r'%s(?:[^%s%s]' % \
@@ -1721,7 +1763,7 @@ class QuotedString(Token):
if len(self.endQuoteChar) > 1:
self.pattern += (
'|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
- _escapeRegexRangeChars(self.endQuoteChar[i]))
+ _escapeRegexRangeChars(self.endQuoteChar[i]))
for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
)
if escQuote:
@@ -1730,12 +1772,12 @@ class QuotedString(Token):
self.pattern += (r'|(?:%s.)' % re.escape(escChar))
self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
-
+
try:
self.re = re.compile(self.pattern, self.flags)
self.reString = self.pattern
except sre_constants.error,e:
- warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
+ warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
SyntaxWarning, stacklevel=2)
raise
@@ -1744,7 +1786,7 @@ class QuotedString(Token):
#self.myException.msg = self.errmsg
self.mayIndexError = False
self.mayReturnEmpty = True
-
+
def parseImpl( self, instring, loc, doActions=True ):
result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
if not result:
@@ -1752,16 +1794,16 @@ class QuotedString(Token):
exc.loc = loc
exc.pstr = instring
raise exc
-
+
loc = result.end()
ret = result.group()
-
+
if self.unquoteResults:
-
+
# strip off quotes
ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
-
- if isinstance(ret,__BASE_STRING__):
+
+ if isinstance(ret,basestring):
# replace escaped characters
if self.escChar:
ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
@@ -1771,22 +1813,22 @@ class QuotedString(Token):
ret = ret.replace(self.escQuote, self.endQuoteChar)
return loc, ret
-
+
def __str__( self ):
try:
return super(QuotedString,self).__str__()
except:
pass
-
+
if self.strRepr is None:
self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
-
+
return self.strRepr
class CharsNotIn(Token):
"""Token for matching words composed of characters *not* in a given set.
- Defined with string containing all disallowed characters, and an optional
+ Defined with string containing all disallowed characters, and an optional
minimum, maximum, and/or exact length. The default value for min is 1 (a
minimum value < 1 is not valid); the default values for max and exact
are 0, meaning no maximum or exact length restriction.
@@ -1795,21 +1837,21 @@ class CharsNotIn(Token):
super(CharsNotIn,self).__init__()
self.skipWhitespace = False
self.notChars = notChars
-
+
if min < 1:
- raise ValueError, "cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted"
+ raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
self.minLen = min
if max > 0:
self.maxLen = max
else:
- self.maxLen = __MAX_INT__
+ self.maxLen = _MAX_INT
if exact > 0:
self.maxLen = exact
self.minLen = exact
-
+
self.name = _ustr(self)
self.errmsg = "Expected " + self.name
self.mayReturnEmpty = ( self.minLen == 0 )
@@ -1823,7 +1865,7 @@ class CharsNotIn(Token):
exc.loc = loc
exc.pstr = instring
raise exc
-
+
start = loc
loc += 1
notchars = self.notChars
@@ -1852,7 +1894,7 @@ class CharsNotIn(Token):
self.strRepr = "!W:(%s...)" % self.notChars[:4]
else:
self.strRepr = "!W:(%s)" % self.notChars
-
+
return self.strRepr
class White(Token):
@@ -1883,12 +1925,12 @@ class White(Token):
if max > 0:
self.maxLen = max
else:
- self.maxLen = __MAX_INT__
+ self.maxLen = _MAX_INT
if exact > 0:
self.maxLen = exact
self.minLen = exact
-
+
def parseImpl( self, instring, loc, doActions=True ):
if not(instring[ loc ] in self.matchWhite):
#~ raise ParseException( instring, loc, self.errmsg )
@@ -1975,7 +2017,7 @@ class LineEnd(_PositionToken):
self.setWhitespaceChars( " \t" )
self.errmsg = "Expected end of line"
#self.myException.msg = self.errmsg
-
+
def parseImpl( self, instring, loc, doActions=True ):
if loc<len(instring):
if instring[loc] == "\n":
@@ -2000,7 +2042,7 @@ class StringStart(_PositionToken):
super(StringStart,self).__init__()
self.errmsg = "Expected start of text"
#self.myException.msg = self.errmsg
-
+
def parseImpl( self, instring, loc, doActions=True ):
if loc != 0:
# see if entire string up to here is just whitespace and ignoreables
@@ -2018,7 +2060,7 @@ class StringEnd(_PositionToken):
super(StringEnd,self).__init__()
self.errmsg = "Expected end of text"
#self.myException.msg = self.errmsg
-
+
def parseImpl( self, instring, loc, doActions=True ):
if loc < len(instring):
#~ raise ParseException( instring, loc, "Expected end of text" )
@@ -2037,17 +2079,17 @@ class StringEnd(_PositionToken):
raise exc
class WordStart(_PositionToken):
- """Matches if the current position is at the beginning of a Word, and
- is not preceded by any character in a given set of wordChars
- (default=printables). To emulate the \b behavior of regular expressions,
- use WordStart(alphanums). WordStart will also match at the beginning of
+ """Matches if the current position is at the beginning of a Word, and
+ is not preceded by any character in a given set of wordChars
+ (default=printables). To emulate the \b behavior of regular expressions,
+ use WordStart(alphanums). WordStart will also match at the beginning of
the string being parsed, or at the beginning of a line.
"""
def __init__(self, wordChars = printables):
super(WordStart,self).__init__()
self.wordChars = _str2dict(wordChars)
self.errmsg = "Not at the start of a word"
-
+
def parseImpl(self, instring, loc, doActions=True ):
if loc != 0:
if (instring[loc-1] in self.wordChars or
@@ -2059,10 +2101,10 @@ class WordStart(_PositionToken):
return loc, []
class WordEnd(_PositionToken):
- """Matches if the current position is at the end of a Word, and
- is not followed by any character in a given set of wordChars
- (default=printables). To emulate the \b behavior of regular expressions,
- use WordEnd(alphanums). WordEnd will also match at the end of
+ """Matches if the current position is at the end of a Word, and
+ is not followed by any character in a given set of wordChars
+ (default=printables). To emulate the \b behavior of regular expressions,
+ use WordEnd(alphanums). WordEnd will also match at the end of
the string being parsed, or at the end of a line.
"""
def __init__(self, wordChars = printables):
@@ -2070,7 +2112,7 @@ class WordEnd(_PositionToken):
self.wordChars = _str2dict(wordChars)
self.skipWhitespace = False
self.errmsg = "Not at the end of a word"
-
+
def parseImpl(self, instring, loc, doActions=True ):
instrlen = len(instring)
if instrlen>0 and loc<instrlen:
@@ -2090,7 +2132,7 @@ class ParseExpression(ParserElement):
super(ParseExpression,self).__init__(savelist)
if isinstance( exprs, list ):
self.exprs = exprs
- elif isinstance( exprs, __BASE_STRING__ ):
+ elif isinstance( exprs, basestring ):
self.exprs = [ Literal( exprs ) ]
else:
self.exprs = [ exprs ]
@@ -2130,7 +2172,7 @@ class ParseExpression(ParserElement):
return super(ParseExpression,self).__str__()
except:
pass
-
+
if self.strRepr is None:
self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
return self.strRepr
@@ -2170,7 +2212,7 @@ class ParseExpression(ParserElement):
def setResultsName( self, name, listAllMatches=False ):
ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
return ret
-
+
def validate( self, validateTrace=[] ):
tmp = validateTrace[:]+[self]
for e in self.exprs:
@@ -2182,6 +2224,13 @@ class And(ParseExpression):
Expressions may be separated by whitespace.
May be constructed using the '+' operator.
"""
+
+ class _ErrorStop(Empty):
+ def __new__(cls,*args,**kwargs):
+ return And._ErrorStop.instance
+ _ErrorStop.instance = Empty()
+ _ErrorStop.instance.leaveWhitespace()
+
def __init__( self, exprs, savelist = True ):
super(And,self).__init__(exprs, savelist)
self.mayReturnEmpty = True
@@ -2197,33 +2246,45 @@ class And(ParseExpression):
# pass False as last arg to _parse for first element, since we already
# pre-parsed the string as part of our And pre-parsing
loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
+ errorStop = False
for e in self.exprs[1:]:
- loc, exprtokens = e._parse( instring, loc, doActions )
+ if e is And._ErrorStop.instance:
+ errorStop = True
+ continue
+ if errorStop:
+ try:
+ loc, exprtokens = e._parse( instring, loc, doActions )
+ except ParseException, pe:
+ raise ParseSyntaxException(pe)
+ except IndexError, ie:
+ raise ParseException( instring, len(instring), self.errmsg, self )
+ else:
+ loc, exprtokens = e._parse( instring, loc, doActions )
if exprtokens or exprtokens.keys():
resultlist += exprtokens
return loc, resultlist
def __iadd__(self, other ):
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
return self.append( other ) #And( [ self, other ] )
-
+
def checkRecursion( self, parseElementList ):
subRecCheckList = parseElementList[:] + [ self ]
for e in self.exprs:
e.checkRecursion( subRecCheckList )
if not e.mayReturnEmpty:
break
-
+
def __str__( self ):
if hasattr(self,"name"):
return self.name
-
+
if self.strRepr is None:
self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
-
+
return self.strRepr
-
+
class Or(ParseExpression):
"""Requires that at least one ParseExpression is found.
@@ -2237,7 +2298,7 @@ class Or(ParseExpression):
if e.mayReturnEmpty:
self.mayReturnEmpty = True
break
-
+
def parseImpl( self, instring, loc, doActions=True ):
maxExcLoc = -1
maxMatchLoc = -1
@@ -2256,7 +2317,7 @@ class Or(ParseExpression):
if loc2 > maxMatchLoc:
maxMatchLoc = loc2
maxMatchExp = e
-
+
if maxMatchLoc < 0:
if self.exprs:
raise maxException
@@ -2266,19 +2327,19 @@ class Or(ParseExpression):
return maxMatchExp._parse( instring, loc, doActions )
def __ixor__(self, other ):
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
return self.append( other ) #Or( [ self, other ] )
def __str__( self ):
if hasattr(self,"name"):
return self.name
-
+
if self.strRepr is None:
self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
-
+
return self.strRepr
-
+
def checkRecursion( self, parseElementList ):
subRecCheckList = parseElementList[:] + [ self ]
for e in self.exprs:
@@ -2300,7 +2361,7 @@ class MatchFirst(ParseExpression):
break
else:
self.mayReturnEmpty = True
-
+
def parseImpl( self, instring, loc, doActions=True ):
maxExcLoc = -1
for e in self.exprs:
@@ -2324,19 +2385,19 @@ class MatchFirst(ParseExpression):
raise ParseException(instring, loc, "no defined alternatives to match", self)
def __ior__(self, other ):
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal( other )
return self.append( other ) #MatchFirst( [ self, other ] )
def __str__( self ):
if hasattr(self,"name"):
return self.name
-
+
if self.strRepr is None:
self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
-
+
return self.strRepr
-
+
def checkRecursion( self, parseElementList ):
subRecCheckList = parseElementList[:] + [ self ]
for e in self.exprs:
@@ -2388,7 +2449,7 @@ class Each(ParseExpression):
tmpOpt.remove(e)
if len(failed) == len(tmpExprs):
keepMatching = False
-
+
if tmpReqd:
missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
@@ -2397,7 +2458,7 @@ class Each(ParseExpression):
for e in matchOrder:
loc,results = e._parse(instring,loc,doActions)
resultlist.append(results)
-
+
finalResults = ParseResults([])
for r in resultlist:
dups = {}
@@ -2414,12 +2475,12 @@ class Each(ParseExpression):
def __str__( self ):
if hasattr(self,"name"):
return self.name
-
+
if self.strRepr is None:
self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
-
+
return self.strRepr
-
+
def checkRecursion( self, parseElementList ):
subRecCheckList = parseElementList[:] + [ self ]
for e in self.exprs:
@@ -2430,7 +2491,7 @@ class ParseElementEnhance(ParserElement):
"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
def __init__( self, expr, savelist=False ):
super(ParseElementEnhance,self).__init__(savelist)
- if isinstance( expr, __BASE_STRING__ ):
+ if isinstance( expr, basestring ):
expr = Literal(expr)
self.expr = expr
self.strRepr = None
@@ -2448,7 +2509,7 @@ class ParseElementEnhance(ParserElement):
return self.expr._parse( instring, loc, doActions, callPreParse=False )
else:
raise ParseException("",loc,self.errmsg,self)
-
+
def leaveWhitespace( self ):
self.skipWhitespace = False
self.expr = self.expr.copy()
@@ -2480,19 +2541,19 @@ class ParseElementEnhance(ParserElement):
subRecCheckList = parseElementList[:] + [ self ]
if self.expr is not None:
self.expr.checkRecursion( subRecCheckList )
-
+
def validate( self, validateTrace=[] ):
tmp = validateTrace[:]+[self]
if self.expr is not None:
self.expr.validate(tmp)
self.checkRecursion( [] )
-
+
def __str__( self ):
try:
return super(ParseElementEnhance,self).__str__()
except:
pass
-
+
if self.strRepr is None and self.expr is not None:
self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
return self.strRepr
@@ -2500,13 +2561,13 @@ class ParseElementEnhance(ParserElement):
class FollowedBy(ParseElementEnhance):
"""Lookahead matching of the given parse expression. FollowedBy
- does *not* advance the parsing position within the input string, it only
- verifies that the specified parse expression matches at the current
+ does *not* advance the parsing position within the input string, it only
+ verifies that the specified parse expression matches at the current
position. FollowedBy always returns a null token list."""
def __init__( self, expr ):
super(FollowedBy,self).__init__(expr)
self.mayReturnEmpty = True
-
+
def parseImpl( self, instring, loc, doActions=True ):
self.expr.tryParse( instring, loc )
return loc, []
@@ -2514,9 +2575,9 @@ class FollowedBy(ParseElementEnhance):
class NotAny(ParseElementEnhance):
"""Lookahead to disallow matching with the given parse expression. NotAny
- does *not* advance the parsing position within the input string, it only
- verifies that the specified parse expression does *not* match at the current
- position. Also, NotAny does *not* skip over leading whitespace. NotAny
+ does *not* advance the parsing position within the input string, it only
+ verifies that the specified parse expression does *not* match at the current
+ position. Also, NotAny does *not* skip over leading whitespace. NotAny
always returns a null token list. May be constructed using the '~' operator."""
def __init__( self, expr ):
super(NotAny,self).__init__(expr)
@@ -2525,7 +2586,7 @@ class NotAny(ParseElementEnhance):
self.mayReturnEmpty = True
self.errmsg = "Found unwanted token, "+_ustr(self.expr)
#self.myException = ParseException("",0,self.errmsg,self)
-
+
def parseImpl( self, instring, loc, doActions=True ):
try:
self.expr.tryParse( instring, loc )
@@ -2542,10 +2603,10 @@ class NotAny(ParseElementEnhance):
def __str__( self ):
if hasattr(self,"name"):
return self.name
-
+
if self.strRepr is None:
self.strRepr = "~{" + _ustr(self.expr) + "}"
-
+
return self.strRepr
@@ -2554,7 +2615,7 @@ class ZeroOrMore(ParseElementEnhance):
def __init__( self, expr ):
super(ZeroOrMore,self).__init__(expr)
self.mayReturnEmpty = True
-
+
def parseImpl( self, instring, loc, doActions=True ):
tokens = []
try:
@@ -2576,17 +2637,17 @@ class ZeroOrMore(ParseElementEnhance):
def __str__( self ):
if hasattr(self,"name"):
return self.name
-
+
if self.strRepr is None:
self.strRepr = "[" + _ustr(self.expr) + "]..."
-
+
return self.strRepr
-
+
def setResultsName( self, name, listAllMatches=False ):
ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
ret.saveAsList = True
return ret
-
+
class OneOrMore(ParseElementEnhance):
"""Repetition of one or more of the given expression."""
@@ -2611,12 +2672,12 @@ class OneOrMore(ParseElementEnhance):
def __str__( self ):
if hasattr(self,"name"):
return self.name
-
+
if self.strRepr is None:
self.strRepr = "{" + _ustr(self.expr) + "}..."
-
+
return self.strRepr
-
+
def setResultsName( self, name, listAllMatches=False ):
ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
ret.saveAsList = True
@@ -2652,17 +2713,17 @@ class Optional(ParseElementEnhance):
def __str__( self ):
if hasattr(self,"name"):
return self.name
-
+
if self.strRepr is None:
self.strRepr = "[" + _ustr(self.expr) + "]"
-
+
return self.strRepr
class SkipTo(ParseElementEnhance):
"""Token for skipping over all undefined text until the matched expression is found.
If include is set to true, the matched expression is also consumed. The ignore
- argument is used to define grammars (typically quoted strings and comments) that
+ argument is used to define grammars (typically quoted strings and comments) that
might contain false matches.
"""
def __init__( self, other, include=False, ignore=None ):
@@ -2707,7 +2768,7 @@ class Forward(ParseElementEnhance):
"""Forward declaration of an expression to be defined later -
used for recursive grammars, such as algebraic infix notation.
When the expression is known, it is assigned to the Forward variable using the '<<' operator.
-
+
Note: take care when assigning to Forward not to overlook precedence of operators.
Specifically, '|' has a lower precedence than '<<', so that::
fwdExpr << a | b | c
@@ -2721,7 +2782,7 @@ class Forward(ParseElementEnhance):
super(Forward,self).__init__( other, savelist=False )
def __lshift__( self, other ):
- if isinstance( other, __BASE_STRING__ ):
+ if isinstance( other, basestring ):
other = Literal(other)
self.expr = other
self.mayReturnEmpty = other.mayReturnEmpty
@@ -2730,7 +2791,7 @@ class Forward(ParseElementEnhance):
self.mayReturnEmpty = self.expr.mayReturnEmpty
self.setWhitespaceChars( self.expr.whiteChars )
self.skipWhitespace = self.expr.skipWhitespace
- self.saveAsList = self.expr.saveAsList
+ self.saveAsList = self.expr.saveAsList
self.ignoreExprs.extend(self.expr.ignoreExprs)
return None
@@ -2741,31 +2802,31 @@ class Forward(ParseElementEnhance):
def streamline( self ):
if not self.streamlined:
self.streamlined = True
- if self.expr is not None:
+ if self.expr is not None:
self.expr.streamline()
return self
def validate( self, validateTrace=[] ):
if self not in validateTrace:
tmp = validateTrace[:]+[self]
- if self.expr is not None:
+ if self.expr is not None:
self.expr.validate(tmp)
- self.checkRecursion([])
-
+ self.checkRecursion([])
+
def __str__( self ):
if hasattr(self,"name"):
return self.name
self.__class__ = _ForwardNoRecurse
try:
- if self.expr is not None:
+ if self.expr is not None:
retString = _ustr(self.expr)
else:
retString = "None"
finally:
self.__class__ = Forward
return "Forward: "+retString
-
+
def copy(self):
if self.expr is not None:
return super(Forward,self).copy()
@@ -2777,7 +2838,7 @@ class Forward(ParseElementEnhance):
class _ForwardNoRecurse(Forward):
def __str__( self ):
return "..."
-
+
class TokenConverter(ParseElementEnhance):
"""Abstract subclass of ParseExpression, for converting parsed results."""
def __init__( self, expr, savelist=False ):
@@ -2788,9 +2849,9 @@ class Upcase(TokenConverter):
"""Converter to upper case all matching tokens."""
def __init__(self, *args):
super(Upcase,self).__init__(*args)
- warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
+ warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
DeprecationWarning,stacklevel=2)
-
+
def postParse( self, instring, loc, tokenlist ):
return map( string.upper, tokenlist )
@@ -2834,7 +2895,7 @@ class Group(TokenConverter):
def postParse( self, instring, loc, tokenlist ):
return [ tokenlist ]
-
+
class Dict(TokenConverter):
"""Converter to return a repetitive expression as a list, but also as a dictionary.
Each element can also be referenced using the first token in the expression as its key.
@@ -2846,7 +2907,7 @@ class Dict(TokenConverter):
def postParse( self, instring, loc, tokenlist ):
for i,tok in enumerate(tokenlist):
- if len(tok) == 0:
+ if len(tok) == 0:
continue
ikey = tok[0]
if isinstance(ikey,int):
@@ -2873,7 +2934,7 @@ class Suppress(TokenConverter):
"""Converter for ignoring the results of a parsed expression."""
def postParse( self, instring, loc, tokenlist ):
return []
-
+
def suppress( self ):
return self
@@ -2913,13 +2974,13 @@ def traceParseAction(f):
except AttributeError:
pass
return z
-
+
#
# global helpers
#
def delimitedList( expr, delim=",", combine=False ):
"""Helper to define a delimited list of expressions - the delimiter defaults to ','.
- By default, the list elements and delimiters can have intervening whitespace, and
+ By default, the list elements and delimiters can have intervening whitespace, and
comments, but this can be overridden by passing 'combine=True' in the constructor.
If combine is set to True, the matching tokens are returned as a single token
string, with the delimiters included; otherwise, the matching tokens are returned
@@ -2957,8 +3018,8 @@ def matchPreviousLiteral(expr):
first = Word(nums)
second = matchPreviousLiteral(first)
matchExpr = first + ":" + second
- will match "1:1", but not "1:2". Because this matches a
- previous literal, will also match the leading "1:1" in "1:10".
+ will match "1:1", but not "1:2". Because this matches a
+ previous literal, will also match the leading "1:1" in "1:10".
If this is not desired, use matchPreviousExpr.
Do *not* use with packrat parsing enabled.
"""
@@ -2975,7 +3036,7 @@ def matchPreviousLiteral(expr):
rep << Empty()
expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
return rep
-
+
def matchPreviousExpr(expr):
"""Helper to define an expression that is indirectly defined from
the tokens matched in a previous expression, that is, it looks
@@ -3001,7 +3062,7 @@ def matchPreviousExpr(expr):
rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
return rep
-
+
def _escapeRegexRangeChars(s):
#~ escape these chars: ^-]
for c in r"\^-]":
@@ -3009,12 +3070,12 @@ def _escapeRegexRangeChars(s):
s = s.replace("\n",r"\n")
s = s.replace("\t",r"\t")
return _ustr(s)
-
+
def oneOf( strs, caseless=False, useRegex=True ):
- """Helper to quickly define a set of alternative Literals, and makes sure to do
- longest-first testing when there is a conflict, regardless of the input order,
- but returns a MatchFirst for best performance.
-
+ """Helper to quickly define a set of alternative Literals, and makes sure to do
+ longest-first testing when there is a conflict, regardless of the input order,
+ but returns a MatchFirst for best performance.
+
Parameters:
- strs - a string of space-delimited literals, or a list of string literals
- caseless - (default=False) - treat all literals as caseless
@@ -3030,15 +3091,15 @@ def oneOf( strs, caseless=False, useRegex=True ):
isequal = ( lambda a,b: a == b )
masks = ( lambda a,b: b.startswith(a) )
parseElementClass = Literal
-
+
if isinstance(strs,(list,tuple)):
symbols = strs[:]
- elif isinstance(strs,__BASE_STRING__):
+ elif isinstance(strs,basestring):
symbols = strs.split()
else:
warnings.warn("Invalid argument to oneOf, expected string or list",
SyntaxWarning, stacklevel=2)
-
+
i = 0
while i < len(symbols)-1:
cur = symbols[i]
@@ -3074,7 +3135,7 @@ def dictOf( key, value ):
for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
in the proper order. The key pattern can include delimiting markers or punctuation,
as long as they are suppressed, thereby leaving the significant key text. The value
- pattern can include named results, so that the Dict results can include named token
+ pattern can include named results, so that the Dict results can include named token
fields.
"""
return Dict( ZeroOrMore( Group ( key + value ) ) )
@@ -3095,14 +3156,14 @@ _charRange = Group(_singleChar + Suppress("-") + _singleChar)
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
-
+
def srange(s):
r"""Helper to easily define string ranges for use in Word construction. Borrows
syntax from regexp '[]' string range definitions::
srange("[0-9]") -> "0123456789"
srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
- The input string must be enclosed in []'s, and the returned string is the expanded
+ The input string must be enclosed in []'s, and the returned string is the expanded
character set joined into a single string.
The values enclosed in the []'s may be::
a single character
@@ -3117,17 +3178,17 @@ def srange(s):
except:
return ""
-def matchOnlyAtCol(n):
- """Helper method for defining parse actions that require matching at a specific
+def matchOnlyAtCol(n):
+ """Helper method for defining parse actions that require matching at a specific
column in the input text.
"""
- def verifyCol(strg,locn,toks):
- if col(locn,strg) != n:
- raise ParseException(strg,locn,"matched token not at column %d" % n)
+ def verifyCol(strg,locn,toks):
+ if col(locn,strg) != n:
+ raise ParseException(strg,locn,"matched token not at column %d" % n)
return verifyCol
def replaceWith(replStr):
- """Helper method for common parse actions that simply return a literal value. Especially
+ """Helper method for common parse actions that simply return a literal value. Especially
useful when used with transformString().
"""
def _replFunc(*args):
@@ -3155,13 +3216,13 @@ def keepOriginalText(s,startLoc,t):
try:
endloc = getTokensEndLoc()
except ParseException:
- raise ParseFatalException, "incorrect usage of keepOriginalText - may only be called as a parse action"
+ raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
del t[:]
t += ParseResults(s[startLoc:endloc])
return t
def getTokensEndLoc():
- """Method to be called from within a parse action to determine the end
+ """Method to be called from within a parse action to determine the end
location of the parsed tokens."""
import inspect
fstack = inspect.stack()
@@ -3172,18 +3233,18 @@ def getTokensEndLoc():
endloc = f[0].f_locals["loc"]
return endloc
else:
- raise ParseFatalException, "incorrect usage of getTokensEndLoc - may only be called from within a parse action"
+ raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
finally:
del fstack
def _makeTags(tagStr, xml):
"""Internal helper to construct opening and closing tag expressions, given a tag name"""
- if isinstance(tagStr,__BASE_STRING__):
+ if isinstance(tagStr,basestring):
resname = tagStr
tagStr = Keyword(tagStr, caseless=not xml)
else:
resname = tagStr.name
-
+
tagAttrName = Word(alphas,alphanums+"_-:")
if (xml):
tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
@@ -3198,10 +3259,10 @@ def _makeTags(tagStr, xml):
Optional( Suppress("=") + tagAttrValue ) ))) + \
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
closeTag = Combine(_L("</") + tagStr + ">")
-
+
openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
-
+
return openTag, closeTag
def makeHTMLTags(tagStr):
@@ -3213,18 +3274,18 @@ def makeXMLTags(tagStr):
return _makeTags( tagStr, True )
def withAttribute(*args,**attrDict):
- """Helper to create a validating parse action to be used with start tags created
- with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
- with a required attribute value, to avoid false matches on common tags such as
+ """Helper to create a validating parse action to be used with start tags created
+ with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
+ with a required attribute value, to avoid false matches on common tags such as
<TD> or <DIV>.
- Call withAttribute with a series of attribute names and values. Specify the list
+ Call withAttribute with a series of attribute names and values. Specify the list
of filter attributes names and values as:
- keyword arguments, as in (class="Customer",align="right"), or
- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
For attribute names with a namespace prefix, you must use the second form. Attribute
names are matched insensitive to upper/lower case.
-
+
To verify that the attribute exists, but without specifying a value, pass
withAttribute.ANY_VALUE as the value.
"""
@@ -3238,7 +3299,7 @@ def withAttribute(*args,**attrDict):
if attrName not in tokens:
raise ParseException(s,l,"no matching attribute " + attrName)
if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
- raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
+ raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
(attrName, tokens[attrName], attrValue))
return pa
withAttribute.ANY_VALUE = object()
@@ -3248,26 +3309,26 @@ opAssoc.LEFT = object()
opAssoc.RIGHT = object()
def operatorPrecedence( baseExpr, opList ):
- """Helper method for constructing grammars of expressions made up of
+ """Helper method for constructing grammars of expressions made up of
operators working in a precedence hierarchy. Operators may be unary or
binary, left- or right-associative. Parse actions can also be attached
to operator expressions.
-
+
Parameters:
- - baseExpr - expression representing the most basic element for the nested
- - opList - list of tuples, one for each operator precedence level in the
+ - baseExpr - expression representing the most basic element for the nested
+ - opList - list of tuples, one for each operator precedence level in the
expression grammar; each tuple is of the form
(opExpr, numTerms, rightLeftAssoc, parseAction), where:
- opExpr is the pyparsing expression for the operator;
may also be a string, which will be converted to a Literal;
- if numTerms is 3, opExpr is a tuple of two expressions, for the
+ if numTerms is 3, opExpr is a tuple of two expressions, for the
two operators separating the 3 terms
- numTerms is the number of terms for this operator (must
be 1, 2, or 3)
- rightLeftAssoc is the indicator whether the operator is
right or left associative, using the pyparsing-defined
constants opAssoc.RIGHT and opAssoc.LEFT.
- - parseAction is the parse action to be associated with
+ - parseAction is the parse action to be associated with
expressions matching this operator expression (the
parse action tuple member may be omitted)
"""
@@ -3277,7 +3338,7 @@ def operatorPrecedence( baseExpr, opList ):
opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
if arity == 3:
if opExpr is None or len(opExpr) != 2:
- raise ValueError, "if numterms=3, opExpr must be a tuple or list of two expressions"
+ raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
opExpr1, opExpr2 = opExpr
thisExpr = Forward()#.setName("expr%d" % i)
if rightLeftAssoc == opAssoc.LEFT:
@@ -3292,13 +3353,13 @@ def operatorPrecedence( baseExpr, opList ):
matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
else:
- raise ValueError, "operator must be unary (1), binary (2), or ternary (3)"
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
elif rightLeftAssoc == opAssoc.RIGHT:
if arity == 1:
# try to avoid LR with this extra test
if not isinstance(opExpr, Optional):
opExpr = Optional(opExpr)
- matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
+ matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
elif arity == 2:
if opExpr is not None:
matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
@@ -3308,9 +3369,9 @@ def operatorPrecedence( baseExpr, opList ):
matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
else:
- raise ValueError, "operator must be unary (1), binary (2), or ternary (3)"
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
else:
- raise ValueError, "operator must indicate right or left associativity"
+ raise ValueError("operator must indicate right or left associativity")
if pa:
matchExpr.setParseAction( pa )
thisExpr << ( matchExpr | lastExpr )
@@ -3326,18 +3387,18 @@ unicodeString = Combine(_L('u') + quotedString.copy())
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
"""Helper method for defining nested lists enclosed in opening and closing
delimiters ("(" and ")" are the default).
-
+
Parameters:
- opener - opening character for a nested list (default="("); can also be a pyparsing expression
- closer - closing character for a nested list (default=")"); can also be a pyparsing expression
- content - expression for items within the nested lists (default=None)
- ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
-
+
If an expression is not provided for the content argument, the nested
expression will capture all whitespace-delimited content between delimiters
as a list of separate values.
-
- Use the ignoreExpr argument to define expressions that may contain
+
+ Use the ignoreExpr argument to define expressions that may contain
opening or closing characters that should not be treated as opening
or closing characters for nesting, such as quotedString or a comment
expression. Specify multiple expressions using an Or or MatchFirst.
@@ -3347,9 +3408,9 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
if opener == closer:
raise ValueError("opening and closing strings cannot be the same")
if content is None:
- if isinstance(opener,__BASE_STRING__) and isinstance(closer,__BASE_STRING__):
+ if isinstance(opener,basestring) and isinstance(closer,basestring):
if ignoreExpr is not None:
- content = (Combine(OneOrMore(~ignoreExpr +
+ content = (Combine(OneOrMore(~ignoreExpr +
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else:
@@ -3370,7 +3431,7 @@ anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
-
+
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
@@ -3382,8 +3443,8 @@ cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|
javaStyleComment = cppStyleComment
pythonStyleComment = Regex(r"#.*").setName("Python style comment")
_noncomma = "".join( [ c for c in printables if c != "," ] )
-_commasepitem = Combine(OneOrMore(Word(_noncomma) +
- Optional( Word(" \t") +
+_commasepitem = Combine(OneOrMore(Word(_noncomma) +
+ Optional( Word(" \t") +
~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
@@ -3391,16 +3452,16 @@ commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, defa
if __name__ == "__main__":
def test( teststring ):
- print (teststring,"->",)
try:
tokens = simpleSQL.parseString( teststring )
tokenlist = tokens.asList()
- print (tokenlist)
- print ("tokens = ", tokens)
- print ("tokens.columns =", tokens.columns)
- print ("tokens.tables =", tokens.tables)
+ print (teststring + "->" + str(tokenlist))
+ print ("tokens = " + str(tokens))
+ print ("tokens.columns = " + str(tokens.columns))
+ print ("tokens.tables = " + str(tokens.tables))
print (tokens.asXML("SQL",True))
except ParseException,err:
+ print (teststring + "->")
print (err.line)
print (" "*(err.column-1) + "^")
print (err)
@@ -3418,7 +3479,7 @@ if __name__ == "__main__":
( '*' | columnNameList ).setResultsName( "columns" ) + \
fromToken + \
tableNameList.setResultsName( "tables" ) )
-
+
test( "SELECT * from XYZZY, ABC" )
test( "select * from SYS.XYZZY" )
test( "Select A from Sys.dual" )