summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-10-13 13:15:18 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-10-13 13:15:18 -0500
commit79379431f2382739b53041c83d485140eaf5207c (patch)
tree0079b142c04abe2ec5453b1fe2d1389ccd94424b
parenta64494dd3ce957c5bdd9ec2d7114c9de88af7c28 (diff)
parent63f49e9f5fca11d4168eb01ada8c049ef83c5299 (diff)
downloadpyparsing-git-79379431f2382739b53041c83d485140eaf5207c.tar.gz
Merge remote-tracking branch 'origin/pyparsing_2.4.x' into pyparsing_2.4.x
# Conflicts: # CHANGES # docs/HowToUsePyparsing.rst # pyparsing.py
-rw-r--r--CHANGES108
-rw-r--r--MANIFEST.in8
-rw-r--r--README.rst44
-rw-r--r--docs/HowToUsePyparsing.rst103
-rw-r--r--pyparsing.py53
-rw-r--r--unitTests.py29
6 files changed, 238 insertions, 107 deletions
diff --git a/CHANGES b/CHANGES
index 328e0c4..22e2a30 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,26 +2,20 @@
Change Log
==========
-Version 2.4.2a - July, 2019
----------------------------
-It turns out I got the meaning of `[...]` absolutely backwards,
-so I've deleted 2.4.1 and am repushing this release as 2.4.2a
-for people to give it a try before I call it ready to go.
+Version 2.4.3 - September, 2019
+-------------------------------
+- Fixed a bug in ParserElement.__eq__ that would for some parsers
+ create a recursion error at parser definition time. Thanks to
+ Michael Clerx for the assist. (Addresses issue #123)
-The `expr[...]` notation was pushed out to be synonymous with
-`OneOrMore(expr)`, but this is really counter to most Python
-notations (and even other internal pyparsing notations as well).
+- Backport from pyparsing 3.0.0 of __diag__.enable_all_warnings().
-It also seems that I introduced an ugly bug in the changes made
-to Or, so 2.4.1 really needs to be unreleased. So sorry,
-everyone!
-(Updated)
-- A new shorthand notation has been added for repetition
- expressions: expr[min, max], with '...' valid as a min
- or max value:
- - expr[...] and expr[0, ...] are equivalent to
- ZeroOrMore(expr)
+Version 2.4.2 - July, 2019
+--------------------------
+- Updated the shorthand notation that has been added for repetition
+ expressions: expr[min, max], with '...' valid as a min or max value:
+ - expr[...] and expr[0, ...] are equivalent to ZeroOrMore(expr)
- expr[1, ...] is equivalent to OneOrMore(expr)
- expr[n, ...] or expr[n,] is equivalent
to expr*n + ZeroOrMore(expr)
@@ -32,13 +26,91 @@ everyone!
if more than n exprs exist in the input stream. If this
behavior is desired, then write expr[..., n] + ~expr.
+ Better interpretation of [...] as ZeroOrMore raised by crowsonkb,
+ thanks for keeping me in line!
+
+ If upgrading from 2.4.1 or 2.4.1.1 and you have used `expr[...]`
+ for `OneOrMore(expr)`, it must be updated to `expr[1, ...]`.
+
- The defaults on all the `__diag__` switches have been set to False,
to avoid getting alarming warnings. To use these diagnostics, set
- them to True after importing pyparsing. Example:
+ them to True after importing pyparsing.
+
+ Example:
import pyparsing as pp
pp.__diag__.warn_multiple_tokens_in_named_alternation = True
+- Fixed bug introduced by the use of __getitem__ for repetition,
+ overlooking Python's legacy implementation of iteration
+ by sequentially calling __getitem__ with increasing numbers until
+ getting an IndexError. Found during investigation of problem
+ reported by murlock, merci!
+
+
+Version 2.4.2a1 - July, 2019
+----------------------------
+It turns out I got the meaning of `[...]` absolutely backwards,
+so I've deleted 2.4.1 and am repushing this release as 2.4.2a1
+for people to give it a try before I can call it ready to go.
+
+The `expr[...]` notation was pushed out to be synonymous with
+`OneOrMore(expr)`, but this is really counter to most Python
+notations (and even other internal pyparsing notations as well).
+It should have been defined to be equivalent to ZeroOrMore(expr).
+
+- Changed [...] to emit ZeroOrMore instead of OneOrMore.
+
+- Removed code that treats ParserElements like iterables.
+
+- Change all __diag__ switches to False.
+
+
+Version 2.4.1.1 - July 24, 2019
+-------------------------------
+This is a re-release of version 2.4.1 to restore the release history
+in PyPI, since the 2.4.1 release was deleted.
+
+There are 3 known issues in this release, which are fixed in
+the upcoming 2.4.2:
+
+- API change adding support for `expr[...]` - the original
+ code in 2.4.1 incorrectly implemented this as OneOrMore.
+ Code using this feature under this relase should explicitly
+ use `expr[0, ...]` for ZeroOrMore and `expr[1, ...]` for
+ OneOrMore. In 2.4.2 you will be able to write `expr[...]`
+ equivalent to `ZeroOrMore(expr)`.
+
+- Bug if composing And, Or, MatchFirst, or Each expressions
+ using an expression. This only affects code which uses
+ explicit expression construction using the And, Or, etc.
+ classes instead of using overloaded operators '+', '^', and
+ so on. If constructing an And using a single expression,
+ you may get an error that "cannot multiply ParserElement by
+ 0 or (0, 0)" or a Python `IndexError`. Change code like
+
+ cmd = Or(Word(alphas))
+
+ to
+
+ cmd = Or([Word(alphas)])
+
+ (Note that this is not the recommended style for constructing
+ Or expressions.)
+
+- Some newly-added `__diag__` switches are enabled by default,
+ which may give rise to noisy user warnings for existing parsers.
+ You can disable them using:
+
+ import pyparsing as pp
+ pp.__diag__.warn_multiple_tokens_in_named_alternation = False
+ pp.__diag__.warn_ungrouped_named_tokens_in_collection = False
+ pp.__diag__.warn_name_set_on_empty_Forward = False
+ pp.__diag__.warn_on_multiple_string_args_to_oneof = False
+ pp.__diag__.enable_debug_on_named_expressions = False
+
+ In 2.4.2 these will all be set to False by default.
+
Version 2.4.1 - July, 2019
--------------------------
diff --git a/MANIFEST.in b/MANIFEST.in
index a13fe7f..48d9e1a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,8 +1,8 @@
include pyparsing.py
-include HowToUsePyparsing.html pyparsingClassDiagram.*
-include README.md CODE_OF_CONDUCT.md CHANGES LICENSE
-include examples/*.py examples/Setup.ini examples/*.dfm examples/*.ics examples/*.html examples/*.h
+include HowToUsePyparsing.rst pyparsingClassDiagram.*
+include README.md CODE_OF_CONDUCT.rst CHANGES LICENSE CONTRIBUTING.md modules.rst
+include examples/*.py examples/Setup.ini examples/*.dfm examples/*.ics examples/*.html examples/*.h examples/*.g examples/statemachine/*
recursive-include docs *
prune docs/_build/*
recursive-include test *
-include simple_unit_tests.py unitTests.py
+include setup.py simple_unit_tests.py unitTests.py
diff --git a/README.rst b/README.rst
index 0d702d7..dca0a71 100644
--- a/README.rst
+++ b/README.rst
@@ -1,5 +1,5 @@
-PyParsing – A Python Parsing Module
-===================================
+PyParsing -- A Python Parsing Module
+====================================
|Build Status|
@@ -12,45 +12,63 @@ use of regular expressions. The pyparsing module provides a library of
classes that client code uses to construct the grammar directly in
Python code.
-Here is a program to parse “Hello, World!” (or any greeting of the form
-“salutation, addressee!”):
+*[Since first writing this description of pyparsing in late 2003, this
+technique for developing parsers has become more widespread, under the
+name Parsing Expression Grammars - PEGs. See more information on PEGs at*
+https://en.wikipedia.org/wiki/Parsing_expression_grammar *.]*
+
+Here is a program to parse ``"Hello, World!"`` (or any greeting of the form
+``"salutation, addressee!"``):
.. code:: python
from pyparsing import Word, alphas
- greet = Word( alphas ) + "," + Word( alphas ) + "!"
+ greet = Word(alphas) + "," + Word(alphas) + "!"
hello = "Hello, World!"
- print(hello, "->", greet.parseString( hello ))
+ print(hello, "->", greet.parseString(hello))
The program outputs the following::
Hello, World! -> ['Hello', ',', 'World', '!']
The Python representation of the grammar is quite readable, owing to the
-self-explanatory class names, and the use of ‘+’, ‘\|’ and ‘^’ operator
+self-explanatory class names, and the use of '+', '|' and '^' operator
definitions.
-The parsed results returned from parseString() can be accessed as a
+The parsed results returned from ``parseString()`` can be accessed as a
nested list, a dictionary, or an object with named attributes.
The pyparsing module handles some of the problems that are typically
-vexing when writing text parsers: - extra or missing whitespace (the
-above program will also handle “Hello,World!”, “Hello , World !”, etc.)
-- quoted strings - embedded comments
+vexing when writing text parsers:
+
+- extra or missing whitespace (the above program will also handle ``"Hello,World!"``, ``"Hello , World !"``, etc.)
+- quoted strings
+- embedded comments
The examples directory includes a simple SQL parser, simple CORBA IDL
parser, a config file parser, a chemical formula parser, and a four-
function algebraic notation parser, among many others.
+Documentation
+=============
+
+There are many examples in the online docstrings of the classes
+and methods in pyparsing. You can find them compiled into online docs
+at https://pyparsing-docs.readthedocs.io/en/latest/. Additional
+documentation resources and project info are listed in the online
+GitHub wiki, at https://github.com/pyparsing/pyparsing/wiki. An
+entire directory of examples is at
+https://github.com/pyparsing/pyparsing/tree/master/examples.
+
License
=======
- MIT License. See header of pyparsing.py
+MIT License. See header of pyparsing.py
History
=======
- See CHANGES file.
+See CHANGES file.
.. |Build Status| image:: https://travis-ci.org/pyparsing/pyparsing.svg?branch=master
:target: https://travis-ci.org/pyparsing/pyparsing
diff --git a/docs/HowToUsePyparsing.rst b/docs/HowToUsePyparsing.rst
index 3e9e1f8..4a7cfb8 100644
--- a/docs/HowToUsePyparsing.rst
+++ b/docs/HowToUsePyparsing.rst
@@ -148,9 +148,9 @@ Usage notes
- ``expr[... ,n]`` is equivalent to ``expr*(0, n)``
(read as "0 to n instances of expr")
- - ``expr[...]`` is equivalent to ``ZeroOrMore(expr)``
+ - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
- - ``expr[0, ...]`` is equivalent to ``ZeroOrMore(expr)``
+ - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
Note that ``expr[..., n]`` does not raise an exception if
more than n exprs exist in the input stream; that is,
@@ -174,7 +174,7 @@ Usage notes
- If parsing the contents of an entire file, pass it to the
``parseFile`` method using::
- expr.parseFile( sourceFile )
+ expr.parseFile(sourceFile)
- ``ParseExceptions`` will report the location where an expected token
or expression failed to match. For example, if we tried to use our
@@ -210,15 +210,15 @@ Usage notes
contains optional elements. You can also shortcut
the ``setResultsName`` call::
- stats = "AVE:" + realNum.setResultsName("average") + \
- "MIN:" + realNum.setResultsName("min") + \
- "MAX:" + realNum.setResultsName("max")
+ stats = ("AVE:" + realNum.setResultsName("average")
+ + "MIN:" + realNum.setResultsName("min")
+ + "MAX:" + realNum.setResultsName("max"))
can now be written as this::
- stats = "AVE:" + realNum("average") + \
- "MIN:" + realNum("min") + \
- "MAX:" + realNum("max")
+ stats = ("AVE:" + realNum("average")
+ + "MIN:" + realNum("min")
+ + "MAX:" + realNum("max"))
- Be careful when defining parse actions that modify global variables or
data structures (as in ``fourFn.py``), especially for low level tokens
@@ -235,18 +235,18 @@ Classes in the pyparsing module
``ParserElement`` - abstract base class for all pyparsing classes;
methods for code to use are:
-- ``parseString( sourceString, parseAll=False )`` - only called once, on the overall
+- ``parseString(sourceString, parseAll=False)`` - only called once, on the overall
matching pattern; returns a ParseResults_ object that makes the
matched tokens available as a list, and optionally as a dictionary,
or as an object with named attributes; if parseAll is set to True, then
parseString will raise a ParseException if the grammar does not process
the complete input string.
-- ``parseFile( sourceFile )`` - a convenience function, that accepts an
+- ``parseFile(sourceFile)`` - a convenience function, that accepts an
input file object or filename. The file contents are passed as a
string to ``parseString()``. ``parseFile`` also supports the ``parseAll`` argument.
-- ``scanString( sourceString )`` - generator function, used to find and
+- ``scanString(sourceString)`` - generator function, used to find and
extract matching text in the given source string; for each matched text,
returns a tuple of:
@@ -260,19 +260,19 @@ methods for code to use are:
random matches, instead of exhaustively defining the grammar for the entire
source text (as would be required with ``parseString``).
-- ``transformString( sourceString )`` - convenience wrapper function for
+- ``transformString(sourceString)`` - convenience wrapper function for
``scanString``, to process the input source string, and replace matching
text with the tokens returned from parse actions defined in the grammar
(see setParseAction_).
-- ``searchString( sourceString )`` - another convenience wrapper function for
+- ``searchString(sourceString)`` - another convenience wrapper function for
``scanString``, returns a list of the matching tokens returned from each
call to ``scanString``.
-- ``setName( name )`` - associate a short descriptive name for this
+- ``setName(name)`` - associate a short descriptive name for this
element, useful in displaying exceptions and trace information
-- ``setResultsName( string, listAllMatches=False )`` - name to be given
+- ``setResultsName(string, listAllMatches=False)`` - name to be given
to tokens matching
the element; if multiple tokens within
a repetition group (such as ``ZeroOrMore`` or ``delimitedList``) the
@@ -287,9 +287,8 @@ methods for code to use are:
.. _setParseAction:
-- ``setParseAction( *fn )`` - specify one or more functions to call after successful
- matching of the element; each function is defined as ``fn( s,
- loc, toks )``, where:
+- ``setParseAction(*fn)`` - specify one or more functions to call after successful
+ matching of the element; each function is defined as ``fn(s, loc, toks)``, where:
- ``s`` is the original parse string
@@ -305,12 +304,12 @@ methods for code to use are:
lambda - here is an example of using a parse action to convert matched
integer tokens from strings to integers::
- intNumber = Word(nums).setParseAction( lambda s,l,t: [ int(t[0]) ] )
+ intNumber = Word(nums).setParseAction(lambda s,l,t: [int(t[0])])
If ``fn`` does not modify the ``toks`` list, it does not need to return
anything at all.
-- ``setBreak( breakFlag=True )`` - if breakFlag is True, calls pdb.set_break()
+- ``setBreak(breakFlag=True)`` - if breakFlag is True, calls pdb.set_break()
as this expression is about to be parsed
- ``copy()`` - returns a copy of a ParserElement; can be used to use the same
@@ -321,11 +320,11 @@ methods for code to use are:
whitespace before starting matching (mostly used internally to the
pyparsing module, rarely used by client code)
-- ``setWhitespaceChars( chars )`` - define the set of chars to be ignored
+- ``setWhitespaceChars(chars)`` - define the set of chars to be ignored
as whitespace before trying to match a specific ParserElement, in place of the
default set of whitespace (space, tab, newline, and return)
-- ``setDefaultWhitespaceChars( chars )`` - class-level method to override
+- ``setDefaultWhitespaceChars(chars)`` - class-level method to override
the default set of whitespace chars for all subsequently created ParserElements
(including copies); useful when defining grammars that treat one or more of the
default whitespace characters as significant (such as a line-sensitive grammar, to
@@ -334,12 +333,12 @@ methods for code to use are:
- ``suppress()`` - convenience function to suppress the output of the
given element, instead of wrapping it with a Suppress object.
-- ``ignore( expr )`` - function to specify parse expression to be
+- ``ignore(expr)`` - function to specify parse expression to be
ignored while matching defined patterns; can be called
repeatedly to specify multiple expressions; useful to specify
patterns of comment syntax, for example
-- ``setDebug( dbgFlag=True )`` - function to enable/disable tracing output
+- ``setDebug(dbgFlag=True)`` - function to enable/disable tracing output
when trying to match this element
- ``validate()`` - function to verify that the defined grammar does not
@@ -390,8 +389,8 @@ Basic ParserElement subclasses
are not. To
define an identifier using a Word, use either of the following::
- - Word( alphas+"_", alphanums+"_" )
- - Word( srange("[a-zA-Z_]"), srange("[a-zA-Z0-9_]") )
+ - Word(alphas+"_", alphanums+"_")
+ - Word(srange("[a-zA-Z_]"), srange("[a-zA-Z0-9_]"))
If only one
string given, it specifies that the same character set defined
@@ -399,8 +398,8 @@ Basic ParserElement subclasses
define an identifier that can only be composed of capital letters and
underscores, use::
- - Word( "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" )
- - Word( srange("[A-Z_]") )
+ - Word("ABCDEFGHIJKLMNOPQRSTUVWXYZ_")
+ - Word(srange("[A-Z_]"))
A Word may
also be constructed with any of the following optional parameters:
@@ -485,11 +484,11 @@ Expression subclasses
operator; multiple expressions can be Anded together using the '*'
operator as in::
- ipAddress = Word(nums) + ('.'+Word(nums))*3
+ ipAddress = Word(nums) + ('.' + Word(nums)) * 3
A tuple can be used as the multiplier, indicating a min/max::
- usPhoneNumber = Word(nums) + ('-'+Word(nums))*(1,2)
+ usPhoneNumber = Word(nums) + ('-' + Word(nums)) * (1,2)
A special form of ``And`` is created if the '-' operator is used
instead of the '+' operator. In the ipAddress example above, if
@@ -664,7 +663,7 @@ Other classes
extraction instead of list extraction.
- new named elements can be added (in a parse action, for instance), using the same
- syntax as adding an item to a dict (``parseResults["X"]="new item"``); named elements can be removed using ``del parseResults["X"]``
+ syntax as adding an item to a dict (``parseResults["X"] = "new item"``); named elements can be removed using ``del parseResults["X"]``
- as a nested list
@@ -694,7 +693,7 @@ Exception classes and Troubleshooting
except ParseException, err:
print err.line
- print " "*(err.column-1) + "^"
+ print " " * (err.column - 1) + "^"
print err
- ``RecursiveGrammarException`` - exception returned by ``validate()`` if
@@ -723,7 +722,7 @@ Miscellaneous attributes and methods
Helper methods
--------------
-- ``delimitedList( expr, delim=',')`` - convenience function for
+- ``delimitedList(expr, delim=',')`` - convenience function for
matching one or more occurrences of expr, separated by delim.
By default, the delimiters are suppressed, so the returned results contain
only the separate list elements. Can optionally specify ``combine=True``,
@@ -731,32 +730,32 @@ Helper methods
combined value (useful for scoped variables, such as ``"a.b.c"``, or
``"a::b::c"``, or paths such as ``"a/b/c"``).
-- ``countedArray( expr )`` - convenience function for a pattern where an list of
+- ``countedArray(expr)`` - convenience function for a pattern where an list of
instances of the given expression are preceded by an integer giving the count of
elements in the list. Returns an expression that parses the leading integer,
reads exactly that many expressions, and returns the array of expressions in the
parse results - the leading integer is suppressed from the results (although it
is easily reconstructed by using len on the returned array).
-- ``oneOf( string, caseless=False )`` - convenience function for quickly declaring an
+- ``oneOf(string, caseless=False)`` - convenience function for quickly declaring an
alternative set of ``Literal`` tokens, by splitting the given string on
whitespace boundaries. The tokens are sorted so that longer
matches are attempted first; this ensures that a short token does
not mask a longer one that starts with the same characters. If ``caseless=True``,
will create an alternative set of CaselessLiteral tokens.
-- ``dictOf( key, value )`` - convenience function for quickly declaring a
- dictionary pattern of ``Dict( ZeroOrMore( Group( key + value ) ) )``.
+- ``dictOf(key, value)`` - convenience function for quickly declaring a
+ dictionary pattern of ``Dict(ZeroOrMore(Group(key + value)))``.
-- ``makeHTMLTags( tagName )`` and ``makeXMLTags( tagName )`` - convenience
+- ``makeHTMLTags(tagName)`` and ``makeXMLTags(tagName)`` - convenience
functions to create definitions of opening and closing tag expressions. Returns
a pair of expressions, for the corresponding <tag> and </tag> strings. Includes
support for attributes in the opening tag, such as <tag attr1="abc"> - attributes
are returned as keyed tokens in the returned ParseResults. ``makeHTMLTags`` is less
restrictive than ``makeXMLTags``, especially with respect to case sensitivity.
-- ``infixNotation(baseOperand, operatorList)`` - (formerly named ``operatorPrecedence``) convenience function to define a
- grammar for parsing infix notation
+- ``infixNotation(baseOperand, operatorList)`` - (formerly named ``operatorPrecedence``)
+ convenience function to define a grammar for parsing infix notation
expressions with a hierarchical precedence of operators. To use the ``infixNotation``
helper:
@@ -832,7 +831,7 @@ Helper methods
then pass None for this argument.
-- ``indentedBlock( statementExpr, indentationStackVar, indent=True)`` -
+- ``indentedBlock(statementExpr, indentationStackVar, indent=True)`` -
function to define an indented block of statements, similar to
indentation-based blocking in Python source code:
@@ -852,7 +851,7 @@ Helper methods
.. _originalTextFor:
-- ``originalTextFor( expr )`` - helper function to preserve the originally parsed text, regardless of any
+- ``originalTextFor(expr)`` - helper function to preserve the originally parsed text, regardless of any
token processing or conversion done by the contained expression. For instance, the following expression::
fullName = Word(alphas) + Word(alphas)
@@ -862,23 +861,23 @@ Helper methods
fullName = originalTextFor(Word(alphas) + Word(alphas))
-- ``ungroup( expr )`` - function to "ungroup" returned tokens; useful
+- ``ungroup(expr)`` - function to "ungroup" returned tokens; useful
to undo the default behavior of And to always group the returned tokens, even
if there is only one in the list. (New in 1.5.6)
-- ``lineno( loc, string )`` - function to give the line number of the
+- ``lineno(loc, string)`` - function to give the line number of the
location within the string; the first line is line 1, newlines
start new rows
-- ``col( loc, string )`` - function to give the column number of the
+- ``col(loc, string)`` - function to give the column number of the
location within the string; the first column is column 1,
newlines reset the column number to 1
-- ``line( loc, string )`` - function to retrieve the line of text
- representing ``lineno( loc, string )``; useful when printing out diagnostic
+- ``line(loc, string)`` - function to retrieve the line of text
+ representing ``lineno(loc, string)``; useful when printing out diagnostic
messages for exceptions
-- ``srange( rangeSpec )`` - function to define a string of characters,
+- ``srange(rangeSpec)`` - function to define a string of characters,
given a string of the form used by regexp string ranges, such as ``"[0-9]"`` for
all numeric digits, ``"[A-Z_]"`` for uppercase characters plus underscore, and
so on (note that rangeSpec does not include support for generic regular
@@ -915,9 +914,9 @@ Helper parse actions
``withAttribute`` can be called with:
- - keyword arguments, as in ``(class="Customer",align="right")``, or
+ - keyword arguments, as in ``(class="Customer", align="right")``, or
- - a list of name-value tuples, as in ``( ("ns1:class", "Customer"), ("ns2:align","right") )``
+ - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
An attribute can be specified to have the special value
``withAttribute.ANY_VALUE``, which will match any value - use this to
@@ -928,7 +927,7 @@ Helper parse actions
- ``upcaseTokens`` - converts all matched tokens to uppercase
-- ``matchOnlyAtCol( columnNumber )`` - a parse action that verifies that
+- ``matchOnlyAtCol(columnNumber)`` - a parse action that verifies that
an expression was matched at a particular column, raising a
ParseException if matching at a different column number; useful when parsing
tabular data
diff --git a/pyparsing.py b/pyparsing.py
index 05fb177..ffbe78b 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -95,8 +95,8 @@ classes inherit from. Use the docstrings for examples of how to:
namespace class
"""
-__version__ = "2.4.2a1"
-__versionTime__ = "24 Jul 2019 01:26 UTC"
+__version__ = "2.4.3"
+__versionTime__ = "25 Sep 2019 23:51 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -185,7 +185,13 @@ __diag__.warn_name_set_on_empty_Forward = False
__diag__.warn_on_multiple_string_args_to_oneof = False
__diag__.enable_debug_on_named_expressions = False
-# ~ sys.stderr.write("testing pyparsing module, version %s, %s\n" % (__version__, __versionTime__))
+def _enable_all_warnings():
+ __diag__.warn_multiple_tokens_in_named_alternation = True
+ __diag__.warn_ungrouped_named_tokens_in_collection = True
+ __diag__.warn_name_set_on_empty_Forward = True
+ __diag__.warn_on_multiple_string_args_to_oneof = True
+__diag__.enable_all_warnings = _enable_all_warnings
+
__all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__',
'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
@@ -2348,6 +2354,11 @@ class ParserElement(object):
"""
return NotAny(self)
+ def __iter__(self):
+ # must implement __iter__ to override legacy use of sequential access to __getitem__ to
+ # iterate over a sequence
+ raise TypeError('%r object is not iterable' % self.__class__.__name__)
+
def __getitem__(self, key):
"""
use ``[]`` indexing notation as a short form for expression repetition:
@@ -2556,15 +2567,13 @@ class ParserElement(object):
raise exc
def __eq__(self, other):
- if isinstance(other, ParserElement):
- if PY_3:
- self is other or super(ParserElement, self).__eq__(other)
- else:
- return self is other or vars(self) == vars(other)
+ if self is other:
+ return True
elif isinstance(other, basestring):
return self.matches(other)
- else:
- return super(ParserElement, self) == other
+ elif isinstance(other, ParserElement):
+ return vars(self) == vars(other)
+ return False
def __ne__(self, other):
return not (self == other)
@@ -3838,6 +3847,8 @@ class ParseExpression(ParserElement):
if isinstance(exprs, basestring):
self.exprs = [self._literalStringClass(exprs)]
+ elif isinstance(exprs, ParserElement):
+ self.exprs = [exprs]
elif isinstance(exprs, Iterable):
exprs = list(exprs)
# if sequence of strings provided, wrap with Literal
@@ -3991,15 +4002,17 @@ class And(ParseExpression):
def streamline(self):
# collapse any _PendingSkip's
- if any(isinstance(e, ParseExpression) and isinstance(e.exprs[-1], _PendingSkip) for e in self.exprs[:-1]):
- for i, e in enumerate(self.exprs[:-1]):
- if e is None:
- continue
- if (isinstance(e, ParseExpression)
- and isinstance(e.exprs[-1], _PendingSkip)):
- e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
- self.exprs[i + 1] = None
- self.exprs = [e for e in self.exprs if e is not None]
+ if self.exprs:
+ if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip)
+ for e in self.exprs[:-1]):
+ for i, e in enumerate(self.exprs[:-1]):
+ if e is None:
+ continue
+ if (isinstance(e, ParseExpression)
+ and e.exprs and isinstance(e.exprs[-1], _PendingSkip)):
+ e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
+ self.exprs[i + 1] = None
+ self.exprs = [e for e in self.exprs if e is not None]
super(And, self).streamline()
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
@@ -5495,7 +5508,7 @@ def oneOf(strs, caseless=False, useRegex=True, asKeyword=False):
# ~ print (strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols]))
try:
if len(symbols) == len("".join(symbols)):
- return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols))
+ return Regex("[%s]" % "".join(_collapseAndEscapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols))
else:
return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols))
except Exception:
diff --git a/unitTests.py b/unitTests.py
index 725622d..40dd58b 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -4670,6 +4670,34 @@ class EnableDebugOnNamedExpressionsTest(ParseTestCase):
"using enable_debug_on_named_expressions")
+class UndesirableButCommonPracticesTest(ParseTestCase):
+ def runTest(self):
+ import pyparsing as pp
+ ppc = pp.pyparsing_common
+
+ # While these are valid constructs, and they are not encouraged
+ # there is apparently a lot of code out there using these
+ # coding styles.
+ #
+ # Even though they are not encouraged, we shouldn't break them.
+
+ # Create an And using a list of expressions instead of using '+' operator
+ expr = pp.And([pp.Word('abc'), pp.Word('123')])
+ expr.runTests("""
+ aaa 333
+ b 1
+ ababab 32123
+ """)
+
+ # Passing a single expression to a ParseExpression, when it really wants a sequence
+ expr = pp.Or(pp.Or(ppc.integer))
+ expr.runTests("""
+ 123
+ 456
+ abc
+ """)
+
+
class MiscellaneousParserTests(ParseTestCase):
def runTest(self):
@@ -4930,4 +4958,5 @@ if __name__ == '__main__':
BUFFER_OUTPUT = False
result = testRunner.run(makeTestSuiteTemp(testclasses))
+ sys.stdout.flush()
exit(0 if result.wasSuccessful() else 1)