diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2019-10-13 13:15:18 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2019-10-13 13:15:18 -0500 |
commit | 79379431f2382739b53041c83d485140eaf5207c (patch) | |
tree | 0079b142c04abe2ec5453b1fe2d1389ccd94424b | |
parent | a64494dd3ce957c5bdd9ec2d7114c9de88af7c28 (diff) | |
parent | 63f49e9f5fca11d4168eb01ada8c049ef83c5299 (diff) | |
download | pyparsing-git-79379431f2382739b53041c83d485140eaf5207c.tar.gz |
Merge remote-tracking branch 'origin/pyparsing_2.4.x' into pyparsing_2.4.x
# Conflicts:
# CHANGES
# docs/HowToUsePyparsing.rst
# pyparsing.py
-rw-r--r-- | CHANGES | 108 | ||||
-rw-r--r-- | MANIFEST.in | 8 | ||||
-rw-r--r-- | README.rst | 44 | ||||
-rw-r--r-- | docs/HowToUsePyparsing.rst | 103 | ||||
-rw-r--r-- | pyparsing.py | 53 | ||||
-rw-r--r-- | unitTests.py | 29 |
6 files changed, 238 insertions, 107 deletions
@@ -2,26 +2,20 @@ Change Log ========== -Version 2.4.2a - July, 2019 ---------------------------- -It turns out I got the meaning of `[...]` absolutely backwards, -so I've deleted 2.4.1 and am repushing this release as 2.4.2a -for people to give it a try before I call it ready to go. +Version 2.4.3 - September, 2019 +------------------------------- +- Fixed a bug in ParserElement.__eq__ that would for some parsers + create a recursion error at parser definition time. Thanks to + Michael Clerx for the assist. (Addresses issue #123) -The `expr[...]` notation was pushed out to be synonymous with -`OneOrMore(expr)`, but this is really counter to most Python -notations (and even other internal pyparsing notations as well). +- Backport from pyparsing 3.0.0 of __diag__.enable_all_warnings(). -It also seems that I introduced an ugly bug in the changes made -to Or, so 2.4.1 really needs to be unreleased. So sorry, -everyone! -(Updated) -- A new shorthand notation has been added for repetition - expressions: expr[min, max], with '...' valid as a min - or max value: - - expr[...] and expr[0, ...] are equivalent to - ZeroOrMore(expr) +Version 2.4.2 - July, 2019 +-------------------------- +- Updated the shorthand notation that has been added for repetition + expressions: expr[min, max], with '...' valid as a min or max value: + - expr[...] and expr[0, ...] are equivalent to ZeroOrMore(expr) - expr[1, ...] is equivalent to OneOrMore(expr) - expr[n, ...] or expr[n,] is equivalent to expr*n + ZeroOrMore(expr) @@ -32,13 +26,91 @@ everyone! if more than n exprs exist in the input stream. If this behavior is desired, then write expr[..., n] + ~expr. + Better interpretation of [...] as ZeroOrMore raised by crowsonkb, + thanks for keeping me in line! + + If upgrading from 2.4.1 or 2.4.1.1 and you have used `expr[...]` + for `OneOrMore(expr)`, it must be updated to `expr[1, ...]`. + - The defaults on all the `__diag__` switches have been set to False, to avoid getting alarming warnings. To use these diagnostics, set - them to True after importing pyparsing. Example: + them to True after importing pyparsing. + + Example: import pyparsing as pp pp.__diag__.warn_multiple_tokens_in_named_alternation = True +- Fixed bug introduced by the use of __getitem__ for repetition, + overlooking Python's legacy implementation of iteration + by sequentially calling __getitem__ with increasing numbers until + getting an IndexError. Found during investigation of problem + reported by murlock, merci! + + +Version 2.4.2a1 - July, 2019 +---------------------------- +It turns out I got the meaning of `[...]` absolutely backwards, +so I've deleted 2.4.1 and am repushing this release as 2.4.2a1 +for people to give it a try before I can call it ready to go. + +The `expr[...]` notation was pushed out to be synonymous with +`OneOrMore(expr)`, but this is really counter to most Python +notations (and even other internal pyparsing notations as well). +It should have been defined to be equivalent to ZeroOrMore(expr). + +- Changed [...] to emit ZeroOrMore instead of OneOrMore. + +- Removed code that treats ParserElements like iterables. + +- Change all __diag__ switches to False. + + +Version 2.4.1.1 - July 24, 2019 +------------------------------- +This is a re-release of version 2.4.1 to restore the release history +in PyPI, since the 2.4.1 release was deleted. + +There are 3 known issues in this release, which are fixed in +the upcoming 2.4.2: + +- API change adding support for `expr[...]` - the original + code in 2.4.1 incorrectly implemented this as OneOrMore. + Code using this feature under this relase should explicitly + use `expr[0, ...]` for ZeroOrMore and `expr[1, ...]` for + OneOrMore. In 2.4.2 you will be able to write `expr[...]` + equivalent to `ZeroOrMore(expr)`. + +- Bug if composing And, Or, MatchFirst, or Each expressions + using an expression. This only affects code which uses + explicit expression construction using the And, Or, etc. + classes instead of using overloaded operators '+', '^', and + so on. If constructing an And using a single expression, + you may get an error that "cannot multiply ParserElement by + 0 or (0, 0)" or a Python `IndexError`. Change code like + + cmd = Or(Word(alphas)) + + to + + cmd = Or([Word(alphas)]) + + (Note that this is not the recommended style for constructing + Or expressions.) + +- Some newly-added `__diag__` switches are enabled by default, + which may give rise to noisy user warnings for existing parsers. + You can disable them using: + + import pyparsing as pp + pp.__diag__.warn_multiple_tokens_in_named_alternation = False + pp.__diag__.warn_ungrouped_named_tokens_in_collection = False + pp.__diag__.warn_name_set_on_empty_Forward = False + pp.__diag__.warn_on_multiple_string_args_to_oneof = False + pp.__diag__.enable_debug_on_named_expressions = False + + In 2.4.2 these will all be set to False by default. + Version 2.4.1 - July, 2019 -------------------------- diff --git a/MANIFEST.in b/MANIFEST.in index a13fe7f..48d9e1a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,8 +1,8 @@ include pyparsing.py -include HowToUsePyparsing.html pyparsingClassDiagram.* -include README.md CODE_OF_CONDUCT.md CHANGES LICENSE -include examples/*.py examples/Setup.ini examples/*.dfm examples/*.ics examples/*.html examples/*.h +include HowToUsePyparsing.rst pyparsingClassDiagram.* +include README.md CODE_OF_CONDUCT.rst CHANGES LICENSE CONTRIBUTING.md modules.rst +include examples/*.py examples/Setup.ini examples/*.dfm examples/*.ics examples/*.html examples/*.h examples/*.g examples/statemachine/* recursive-include docs * prune docs/_build/* recursive-include test * -include simple_unit_tests.py unitTests.py +include setup.py simple_unit_tests.py unitTests.py @@ -1,5 +1,5 @@ -PyParsing – A Python Parsing Module -=================================== +PyParsing -- A Python Parsing Module +==================================== |Build Status| @@ -12,45 +12,63 @@ use of regular expressions. The pyparsing module provides a library of classes that client code uses to construct the grammar directly in Python code. -Here is a program to parse “Hello, World!” (or any greeting of the form -“salutation, addressee!”): +*[Since first writing this description of pyparsing in late 2003, this +technique for developing parsers has become more widespread, under the +name Parsing Expression Grammars - PEGs. See more information on PEGs at* +https://en.wikipedia.org/wiki/Parsing_expression_grammar *.]* + +Here is a program to parse ``"Hello, World!"`` (or any greeting of the form +``"salutation, addressee!"``): .. code:: python from pyparsing import Word, alphas - greet = Word( alphas ) + "," + Word( alphas ) + "!" + greet = Word(alphas) + "," + Word(alphas) + "!" hello = "Hello, World!" - print(hello, "->", greet.parseString( hello )) + print(hello, "->", greet.parseString(hello)) The program outputs the following:: Hello, World! -> ['Hello', ',', 'World', '!'] The Python representation of the grammar is quite readable, owing to the -self-explanatory class names, and the use of ‘+’, ‘\|’ and ‘^’ operator +self-explanatory class names, and the use of '+', '|' and '^' operator definitions. -The parsed results returned from parseString() can be accessed as a +The parsed results returned from ``parseString()`` can be accessed as a nested list, a dictionary, or an object with named attributes. The pyparsing module handles some of the problems that are typically -vexing when writing text parsers: - extra or missing whitespace (the -above program will also handle “Hello,World!”, “Hello , World !”, etc.) -- quoted strings - embedded comments +vexing when writing text parsers: + +- extra or missing whitespace (the above program will also handle ``"Hello,World!"``, ``"Hello , World !"``, etc.) +- quoted strings +- embedded comments The examples directory includes a simple SQL parser, simple CORBA IDL parser, a config file parser, a chemical formula parser, and a four- function algebraic notation parser, among many others. +Documentation +============= + +There are many examples in the online docstrings of the classes +and methods in pyparsing. You can find them compiled into online docs +at https://pyparsing-docs.readthedocs.io/en/latest/. Additional +documentation resources and project info are listed in the online +GitHub wiki, at https://github.com/pyparsing/pyparsing/wiki. An +entire directory of examples is at +https://github.com/pyparsing/pyparsing/tree/master/examples. + License ======= - MIT License. See header of pyparsing.py +MIT License. See header of pyparsing.py History ======= - See CHANGES file. +See CHANGES file. .. |Build Status| image:: https://travis-ci.org/pyparsing/pyparsing.svg?branch=master :target: https://travis-ci.org/pyparsing/pyparsing diff --git a/docs/HowToUsePyparsing.rst b/docs/HowToUsePyparsing.rst index 3e9e1f8..4a7cfb8 100644 --- a/docs/HowToUsePyparsing.rst +++ b/docs/HowToUsePyparsing.rst @@ -148,9 +148,9 @@ Usage notes - ``expr[... ,n]`` is equivalent to ``expr*(0, n)`` (read as "0 to n instances of expr") - - ``expr[...]`` is equivalent to ``ZeroOrMore(expr)`` + - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` - - ``expr[0, ...]`` is equivalent to ``ZeroOrMore(expr)`` + - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` Note that ``expr[..., n]`` does not raise an exception if more than n exprs exist in the input stream; that is, @@ -174,7 +174,7 @@ Usage notes - If parsing the contents of an entire file, pass it to the ``parseFile`` method using:: - expr.parseFile( sourceFile ) + expr.parseFile(sourceFile) - ``ParseExceptions`` will report the location where an expected token or expression failed to match. For example, if we tried to use our @@ -210,15 +210,15 @@ Usage notes contains optional elements. You can also shortcut the ``setResultsName`` call:: - stats = "AVE:" + realNum.setResultsName("average") + \ - "MIN:" + realNum.setResultsName("min") + \ - "MAX:" + realNum.setResultsName("max") + stats = ("AVE:" + realNum.setResultsName("average") + + "MIN:" + realNum.setResultsName("min") + + "MAX:" + realNum.setResultsName("max")) can now be written as this:: - stats = "AVE:" + realNum("average") + \ - "MIN:" + realNum("min") + \ - "MAX:" + realNum("max") + stats = ("AVE:" + realNum("average") + + "MIN:" + realNum("min") + + "MAX:" + realNum("max")) - Be careful when defining parse actions that modify global variables or data structures (as in ``fourFn.py``), especially for low level tokens @@ -235,18 +235,18 @@ Classes in the pyparsing module ``ParserElement`` - abstract base class for all pyparsing classes; methods for code to use are: -- ``parseString( sourceString, parseAll=False )`` - only called once, on the overall +- ``parseString(sourceString, parseAll=False)`` - only called once, on the overall matching pattern; returns a ParseResults_ object that makes the matched tokens available as a list, and optionally as a dictionary, or as an object with named attributes; if parseAll is set to True, then parseString will raise a ParseException if the grammar does not process the complete input string. -- ``parseFile( sourceFile )`` - a convenience function, that accepts an +- ``parseFile(sourceFile)`` - a convenience function, that accepts an input file object or filename. The file contents are passed as a string to ``parseString()``. ``parseFile`` also supports the ``parseAll`` argument. -- ``scanString( sourceString )`` - generator function, used to find and +- ``scanString(sourceString)`` - generator function, used to find and extract matching text in the given source string; for each matched text, returns a tuple of: @@ -260,19 +260,19 @@ methods for code to use are: random matches, instead of exhaustively defining the grammar for the entire source text (as would be required with ``parseString``). -- ``transformString( sourceString )`` - convenience wrapper function for +- ``transformString(sourceString)`` - convenience wrapper function for ``scanString``, to process the input source string, and replace matching text with the tokens returned from parse actions defined in the grammar (see setParseAction_). -- ``searchString( sourceString )`` - another convenience wrapper function for +- ``searchString(sourceString)`` - another convenience wrapper function for ``scanString``, returns a list of the matching tokens returned from each call to ``scanString``. -- ``setName( name )`` - associate a short descriptive name for this +- ``setName(name)`` - associate a short descriptive name for this element, useful in displaying exceptions and trace information -- ``setResultsName( string, listAllMatches=False )`` - name to be given +- ``setResultsName(string, listAllMatches=False)`` - name to be given to tokens matching the element; if multiple tokens within a repetition group (such as ``ZeroOrMore`` or ``delimitedList``) the @@ -287,9 +287,8 @@ methods for code to use are: .. _setParseAction: -- ``setParseAction( *fn )`` - specify one or more functions to call after successful - matching of the element; each function is defined as ``fn( s, - loc, toks )``, where: +- ``setParseAction(*fn)`` - specify one or more functions to call after successful + matching of the element; each function is defined as ``fn(s, loc, toks)``, where: - ``s`` is the original parse string @@ -305,12 +304,12 @@ methods for code to use are: lambda - here is an example of using a parse action to convert matched integer tokens from strings to integers:: - intNumber = Word(nums).setParseAction( lambda s,l,t: [ int(t[0]) ] ) + intNumber = Word(nums).setParseAction(lambda s,l,t: [int(t[0])]) If ``fn`` does not modify the ``toks`` list, it does not need to return anything at all. -- ``setBreak( breakFlag=True )`` - if breakFlag is True, calls pdb.set_break() +- ``setBreak(breakFlag=True)`` - if breakFlag is True, calls pdb.set_break() as this expression is about to be parsed - ``copy()`` - returns a copy of a ParserElement; can be used to use the same @@ -321,11 +320,11 @@ methods for code to use are: whitespace before starting matching (mostly used internally to the pyparsing module, rarely used by client code) -- ``setWhitespaceChars( chars )`` - define the set of chars to be ignored +- ``setWhitespaceChars(chars)`` - define the set of chars to be ignored as whitespace before trying to match a specific ParserElement, in place of the default set of whitespace (space, tab, newline, and return) -- ``setDefaultWhitespaceChars( chars )`` - class-level method to override +- ``setDefaultWhitespaceChars(chars)`` - class-level method to override the default set of whitespace chars for all subsequently created ParserElements (including copies); useful when defining grammars that treat one or more of the default whitespace characters as significant (such as a line-sensitive grammar, to @@ -334,12 +333,12 @@ methods for code to use are: - ``suppress()`` - convenience function to suppress the output of the given element, instead of wrapping it with a Suppress object. -- ``ignore( expr )`` - function to specify parse expression to be +- ``ignore(expr)`` - function to specify parse expression to be ignored while matching defined patterns; can be called repeatedly to specify multiple expressions; useful to specify patterns of comment syntax, for example -- ``setDebug( dbgFlag=True )`` - function to enable/disable tracing output +- ``setDebug(dbgFlag=True)`` - function to enable/disable tracing output when trying to match this element - ``validate()`` - function to verify that the defined grammar does not @@ -390,8 +389,8 @@ Basic ParserElement subclasses are not. To define an identifier using a Word, use either of the following:: - - Word( alphas+"_", alphanums+"_" ) - - Word( srange("[a-zA-Z_]"), srange("[a-zA-Z0-9_]") ) + - Word(alphas+"_", alphanums+"_") + - Word(srange("[a-zA-Z_]"), srange("[a-zA-Z0-9_]")) If only one string given, it specifies that the same character set defined @@ -399,8 +398,8 @@ Basic ParserElement subclasses define an identifier that can only be composed of capital letters and underscores, use:: - - Word( "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" ) - - Word( srange("[A-Z_]") ) + - Word("ABCDEFGHIJKLMNOPQRSTUVWXYZ_") + - Word(srange("[A-Z_]")) A Word may also be constructed with any of the following optional parameters: @@ -485,11 +484,11 @@ Expression subclasses operator; multiple expressions can be Anded together using the '*' operator as in:: - ipAddress = Word(nums) + ('.'+Word(nums))*3 + ipAddress = Word(nums) + ('.' + Word(nums)) * 3 A tuple can be used as the multiplier, indicating a min/max:: - usPhoneNumber = Word(nums) + ('-'+Word(nums))*(1,2) + usPhoneNumber = Word(nums) + ('-' + Word(nums)) * (1,2) A special form of ``And`` is created if the '-' operator is used instead of the '+' operator. In the ipAddress example above, if @@ -664,7 +663,7 @@ Other classes extraction instead of list extraction. - new named elements can be added (in a parse action, for instance), using the same - syntax as adding an item to a dict (``parseResults["X"]="new item"``); named elements can be removed using ``del parseResults["X"]`` + syntax as adding an item to a dict (``parseResults["X"] = "new item"``); named elements can be removed using ``del parseResults["X"]`` - as a nested list @@ -694,7 +693,7 @@ Exception classes and Troubleshooting except ParseException, err: print err.line - print " "*(err.column-1) + "^" + print " " * (err.column - 1) + "^" print err - ``RecursiveGrammarException`` - exception returned by ``validate()`` if @@ -723,7 +722,7 @@ Miscellaneous attributes and methods Helper methods -------------- -- ``delimitedList( expr, delim=',')`` - convenience function for +- ``delimitedList(expr, delim=',')`` - convenience function for matching one or more occurrences of expr, separated by delim. By default, the delimiters are suppressed, so the returned results contain only the separate list elements. Can optionally specify ``combine=True``, @@ -731,32 +730,32 @@ Helper methods combined value (useful for scoped variables, such as ``"a.b.c"``, or ``"a::b::c"``, or paths such as ``"a/b/c"``). -- ``countedArray( expr )`` - convenience function for a pattern where an list of +- ``countedArray(expr)`` - convenience function for a pattern where an list of instances of the given expression are preceded by an integer giving the count of elements in the list. Returns an expression that parses the leading integer, reads exactly that many expressions, and returns the array of expressions in the parse results - the leading integer is suppressed from the results (although it is easily reconstructed by using len on the returned array). -- ``oneOf( string, caseless=False )`` - convenience function for quickly declaring an +- ``oneOf(string, caseless=False)`` - convenience function for quickly declaring an alternative set of ``Literal`` tokens, by splitting the given string on whitespace boundaries. The tokens are sorted so that longer matches are attempted first; this ensures that a short token does not mask a longer one that starts with the same characters. If ``caseless=True``, will create an alternative set of CaselessLiteral tokens. -- ``dictOf( key, value )`` - convenience function for quickly declaring a - dictionary pattern of ``Dict( ZeroOrMore( Group( key + value ) ) )``. +- ``dictOf(key, value)`` - convenience function for quickly declaring a + dictionary pattern of ``Dict(ZeroOrMore(Group(key + value)))``. -- ``makeHTMLTags( tagName )`` and ``makeXMLTags( tagName )`` - convenience +- ``makeHTMLTags(tagName)`` and ``makeXMLTags(tagName)`` - convenience functions to create definitions of opening and closing tag expressions. Returns a pair of expressions, for the corresponding <tag> and </tag> strings. Includes support for attributes in the opening tag, such as <tag attr1="abc"> - attributes are returned as keyed tokens in the returned ParseResults. ``makeHTMLTags`` is less restrictive than ``makeXMLTags``, especially with respect to case sensitivity. -- ``infixNotation(baseOperand, operatorList)`` - (formerly named ``operatorPrecedence``) convenience function to define a - grammar for parsing infix notation +- ``infixNotation(baseOperand, operatorList)`` - (formerly named ``operatorPrecedence``) + convenience function to define a grammar for parsing infix notation expressions with a hierarchical precedence of operators. To use the ``infixNotation`` helper: @@ -832,7 +831,7 @@ Helper methods then pass None for this argument. -- ``indentedBlock( statementExpr, indentationStackVar, indent=True)`` - +- ``indentedBlock(statementExpr, indentationStackVar, indent=True)`` - function to define an indented block of statements, similar to indentation-based blocking in Python source code: @@ -852,7 +851,7 @@ Helper methods .. _originalTextFor: -- ``originalTextFor( expr )`` - helper function to preserve the originally parsed text, regardless of any +- ``originalTextFor(expr)`` - helper function to preserve the originally parsed text, regardless of any token processing or conversion done by the contained expression. For instance, the following expression:: fullName = Word(alphas) + Word(alphas) @@ -862,23 +861,23 @@ Helper methods fullName = originalTextFor(Word(alphas) + Word(alphas)) -- ``ungroup( expr )`` - function to "ungroup" returned tokens; useful +- ``ungroup(expr)`` - function to "ungroup" returned tokens; useful to undo the default behavior of And to always group the returned tokens, even if there is only one in the list. (New in 1.5.6) -- ``lineno( loc, string )`` - function to give the line number of the +- ``lineno(loc, string)`` - function to give the line number of the location within the string; the first line is line 1, newlines start new rows -- ``col( loc, string )`` - function to give the column number of the +- ``col(loc, string)`` - function to give the column number of the location within the string; the first column is column 1, newlines reset the column number to 1 -- ``line( loc, string )`` - function to retrieve the line of text - representing ``lineno( loc, string )``; useful when printing out diagnostic +- ``line(loc, string)`` - function to retrieve the line of text + representing ``lineno(loc, string)``; useful when printing out diagnostic messages for exceptions -- ``srange( rangeSpec )`` - function to define a string of characters, +- ``srange(rangeSpec)`` - function to define a string of characters, given a string of the form used by regexp string ranges, such as ``"[0-9]"`` for all numeric digits, ``"[A-Z_]"`` for uppercase characters plus underscore, and so on (note that rangeSpec does not include support for generic regular @@ -915,9 +914,9 @@ Helper parse actions ``withAttribute`` can be called with: - - keyword arguments, as in ``(class="Customer",align="right")``, or + - keyword arguments, as in ``(class="Customer", align="right")``, or - - a list of name-value tuples, as in ``( ("ns1:class", "Customer"), ("ns2:align","right") )`` + - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` An attribute can be specified to have the special value ``withAttribute.ANY_VALUE``, which will match any value - use this to @@ -928,7 +927,7 @@ Helper parse actions - ``upcaseTokens`` - converts all matched tokens to uppercase -- ``matchOnlyAtCol( columnNumber )`` - a parse action that verifies that +- ``matchOnlyAtCol(columnNumber)`` - a parse action that verifies that an expression was matched at a particular column, raising a ParseException if matching at a different column number; useful when parsing tabular data diff --git a/pyparsing.py b/pyparsing.py index 05fb177..ffbe78b 100644 --- a/pyparsing.py +++ b/pyparsing.py @@ -95,8 +95,8 @@ classes inherit from. Use the docstrings for examples of how to: namespace class """ -__version__ = "2.4.2a1" -__versionTime__ = "24 Jul 2019 01:26 UTC" +__version__ = "2.4.3" +__versionTime__ = "25 Sep 2019 23:51 UTC" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" import string @@ -185,7 +185,13 @@ __diag__.warn_name_set_on_empty_Forward = False __diag__.warn_on_multiple_string_args_to_oneof = False __diag__.enable_debug_on_named_expressions = False -# ~ sys.stderr.write("testing pyparsing module, version %s, %s\n" % (__version__, __versionTime__)) +def _enable_all_warnings(): + __diag__.warn_multiple_tokens_in_named_alternation = True + __diag__.warn_ungrouped_named_tokens_in_collection = True + __diag__.warn_name_set_on_empty_Forward = True + __diag__.warn_on_multiple_string_args_to_oneof = True +__diag__.enable_all_warnings = _enable_all_warnings + __all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__', 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', @@ -2348,6 +2354,11 @@ class ParserElement(object): """ return NotAny(self) + def __iter__(self): + # must implement __iter__ to override legacy use of sequential access to __getitem__ to + # iterate over a sequence + raise TypeError('%r object is not iterable' % self.__class__.__name__) + def __getitem__(self, key): """ use ``[]`` indexing notation as a short form for expression repetition: @@ -2556,15 +2567,13 @@ class ParserElement(object): raise exc def __eq__(self, other): - if isinstance(other, ParserElement): - if PY_3: - self is other or super(ParserElement, self).__eq__(other) - else: - return self is other or vars(self) == vars(other) + if self is other: + return True elif isinstance(other, basestring): return self.matches(other) - else: - return super(ParserElement, self) == other + elif isinstance(other, ParserElement): + return vars(self) == vars(other) + return False def __ne__(self, other): return not (self == other) @@ -3838,6 +3847,8 @@ class ParseExpression(ParserElement): if isinstance(exprs, basestring): self.exprs = [self._literalStringClass(exprs)] + elif isinstance(exprs, ParserElement): + self.exprs = [exprs] elif isinstance(exprs, Iterable): exprs = list(exprs) # if sequence of strings provided, wrap with Literal @@ -3991,15 +4002,17 @@ class And(ParseExpression): def streamline(self): # collapse any _PendingSkip's - if any(isinstance(e, ParseExpression) and isinstance(e.exprs[-1], _PendingSkip) for e in self.exprs[:-1]): - for i, e in enumerate(self.exprs[:-1]): - if e is None: - continue - if (isinstance(e, ParseExpression) - and isinstance(e.exprs[-1], _PendingSkip)): - e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] - self.exprs[i + 1] = None - self.exprs = [e for e in self.exprs if e is not None] + if self.exprs: + if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip) + for e in self.exprs[:-1]): + for i, e in enumerate(self.exprs[:-1]): + if e is None: + continue + if (isinstance(e, ParseExpression) + and e.exprs and isinstance(e.exprs[-1], _PendingSkip)): + e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] + self.exprs[i + 1] = None + self.exprs = [e for e in self.exprs if e is not None] super(And, self).streamline() self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) @@ -5495,7 +5508,7 @@ def oneOf(strs, caseless=False, useRegex=True, asKeyword=False): # ~ print (strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols])) try: if len(symbols) == len("".join(symbols)): - return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols)) + return Regex("[%s]" % "".join(_collapseAndEscapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols)) else: return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols)) except Exception: diff --git a/unitTests.py b/unitTests.py index 725622d..40dd58b 100644 --- a/unitTests.py +++ b/unitTests.py @@ -4670,6 +4670,34 @@ class EnableDebugOnNamedExpressionsTest(ParseTestCase): "using enable_debug_on_named_expressions") +class UndesirableButCommonPracticesTest(ParseTestCase): + def runTest(self): + import pyparsing as pp + ppc = pp.pyparsing_common + + # While these are valid constructs, and they are not encouraged + # there is apparently a lot of code out there using these + # coding styles. + # + # Even though they are not encouraged, we shouldn't break them. + + # Create an And using a list of expressions instead of using '+' operator + expr = pp.And([pp.Word('abc'), pp.Word('123')]) + expr.runTests(""" + aaa 333 + b 1 + ababab 32123 + """) + + # Passing a single expression to a ParseExpression, when it really wants a sequence + expr = pp.Or(pp.Or(ppc.integer)) + expr.runTests(""" + 123 + 456 + abc + """) + + class MiscellaneousParserTests(ParseTestCase): def runTest(self): @@ -4930,4 +4958,5 @@ if __name__ == '__main__': BUFFER_OUTPUT = False result = testRunner.run(makeTestSuiteTemp(testclasses)) + sys.stdout.flush() exit(0 if result.wasSuccessful() else 1) |