diff options
author | ptmcg <ptmcg@austin.rr.com> | 2022-07-09 15:00:28 -0500 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2022-07-09 15:00:28 -0500 |
commit | b5cf93e096bb36bd9227a9fe3bede0a076123b9d (patch) | |
tree | e585b188c7818d8c53b6d721f39a83ef225b121f | |
parent | 1016f59d3f302aae71250a81acd0f6a2dc37c4f5 (diff) | |
download | pyparsing-git-b5cf93e096bb36bd9227a9fe3bede0a076123b9d.tar.gz |
Fix delimited_list bug (Issue #408)
-rw-r--r-- | CHANGES | 32 | ||||
-rw-r--r-- | pyparsing/__init__.py | 2 | ||||
-rw-r--r-- | pyparsing/helpers.py | 30 | ||||
-rw-r--r-- | tests/test_unit.py | 108 |
4 files changed, 155 insertions, 17 deletions
@@ -23,22 +23,11 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit "{" + (Literal("A") | Literal("a") | "") + "}" Some related changes implemented as part of this work: - - Literal("") now internally generates an Empty() (and no longer raises an exception) - - Empty is now a subclass of Literal + - `Literal("")` now internally generates an `Empty()` (and no longer raises an exception) + - `Empty` is now a subclass of `Literal` Suggested by Antony Lee (issue #412), PR (#413) by Devin J. Pohly. -- Added new builtin `python_quoted_string`, which will match any form - of single-line or multiline quoted strings defined in Python. (Inspired - by discussion with Andreas Schörgenhumer in Issue #421.) - -- Fixed bug in `Word` when `max=2`. Also added performance enhancement - when specifying `exact` argument. Reported in issue #409 by - panda-34, nice catch! - -- `Word` arguments are now validated if `min` and `max` are both - given, that `min` <= `max`; raises `ValueError` if values are invalid. - - Extended `expr[]` notation for repetition of `expr` to accept a slice, where the slice's stop value indicates a `stop_on` expression: @@ -57,16 +46,31 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit ['BEGIN', ['aaa', 'bbb', 'ccc'], 'END'] +- Added new builtin `python_quoted_string`, which will match any form + of single-line or multiline quoted strings defined in Python. (Inspired + by discussion with Andreas Schörgenhumer in Issue #421.) + - Added bool `embed` argument to `ParserElement.create_diagram()`. When passed as True, the resulting diagram will omit the `<DOCTYPE>`, `<HEAD>`, and `<BODY>` tags so that it can be embedded in other HTML source. (Useful when embedding a call to `create_diagram()` in a PyScript HTML page.) +- Fixed bug in `Word` when `max=2`. Also added performance enhancement + when specifying `exact` argument. Reported in issue #409 by + panda-34, nice catch! + +- `Word` arguments are now validated if `min` and `max` are both + given, that `min` <= `max`; raises `ValueError` if values are invalid. + +- Fixed bug in `delimited_list`, where sub-expressions within the given + expr might not get assigned names or parse actions. Raised in Issue + #408 by Mostafa Razi, nice catch, thanks! + - Fixed bug in srange, when parsing escaped '/' and '\' inside a range set. -- Fixed exception messages for some ParserElements with custom names, +- Fixed exception messages for some `ParserElements` with custom names, which instead showed their contained expression names. - Multiple added and corrected type annotations. With much help from diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index ffe89d0..622fce5 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ class version_info(NamedTuple): __version_info__ = version_info(3, 0, 10, "final", 0) -__version_time__ = "05 Jul 2022 01:03 UTC" +__version_time__ = "09 Jul 2022 19:45 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index 802389c..ecb382b 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -1,6 +1,7 @@ # helpers.py import html.entities import re +import sys import typing from . import __diag__ @@ -46,7 +47,34 @@ def delimited_list( expr = ParserElement._literalStringClass(expr) expr = typing.cast(ParserElement, expr) - expr_copy = expr.copy().streamline() + def make_deep_name_copy(expr): + from collections import deque + MAX_EXPRS = sys.getrecursionlimit() + seen = set() + to_visit = deque([(None, expr)]) + cpy = None + num_exprs = 0 + while to_visit and num_exprs < MAX_EXPRS: + parent, cur = to_visit.pop() + num_exprs += 1 + if cur in seen: + continue + seen.add(cur) + cur = cur.copy() + if parent is None: + cpy = cur + else: + if hasattr(parent, "expr"): + parent.expr = cur + elif hasattr(parent, "exprs"): + parent.exprs.append(cur) + + to_visit.extend((cur, sub) for sub in cur.recurse()[::-1]) + getattr(cur, "exprs", []).clear() + + return cpy + + expr_copy = make_deep_name_copy(expr).streamline() dlName = f"{expr_copy} [{delim} {expr_copy}]...{f' [{delim}]' if allow_trailing_delim else ''}" if not combine: diff --git a/tests/test_unit.py b/tests/test_unit.py index 59e549a..997a4be 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -8262,7 +8262,26 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): print(bool_constant) print(bool_constant.streamline()) print(bool_list2) - self.assertEqual("bool [, bool]...", str(bool_list2)) + with self.subTest(): + self.assertEqual("bool [, bool]...", str(bool_list2)) + + with self.subTest(): + street_address = pp.common.integer.set_name("integer") + pp.Word(pp.alphas)[1, ...].set_name("street_name") + self.assertEqual( + "{integer street_name} [, {integer street_name}]...", + str(pp.delimitedList(street_address)) + ) + + with self.subTest(): + operand = pp.Char(pp.alphas).set_name("var") + math = pp.infixNotation(operand, + [ + (pp.one_of("+ -"), 2, pp.opAssoc.LEFT), + ]) + self.assertEqual( + "Forward: + | - term [, Forward: + | - term]...", + str(pp.delimitedList(math)) + ) def testDelimitedListOfStrLiterals(self): expr = pp.delimitedList("ABC") @@ -8293,6 +8312,93 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): expr, source, [s.strip() for s in source.split(",")] ) + def testDelimitedListParseActions1(self): + # from issue #408 + keyword = pp.Keyword('foobar') + untyped_identifier = ~keyword + pp.Word(pp.alphas) + dotted_vars = pp.delimited_list(untyped_identifier, delim='.') + lvalue = pp.Opt(dotted_vars) + + # uncomment this line to see the problem + stmt = pp.delimited_list(pp.Opt(dotted_vars)) + # stmt = delimited_list(dotted_vars) + # stmt = pp.Opt(dotted_vars) + + def parse_identifier(toks): + print('YAY!', toks) + + untyped_identifier.set_parse_action(parse_identifier) + + save_stdout = StringIO() + with contextlib.redirect_stdout(save_stdout): + dotted_vars.parse_string('B.C') + + self.assertEqual( + dedent("""\ + YAY! ['B'] + YAY! ['C'] + """), + save_stdout.getvalue() + ) + + def testDelimitedListParseActions2(self): + # from issue #408 + keyword = pp.Keyword('foobar') + untyped_identifier = ~keyword + pp.Word(pp.alphas) + dotted_vars = pp.delimited_list(untyped_identifier, delim='.') + lvalue = pp.Opt(dotted_vars) + + # uncomment this line to see the problem + # stmt = delimited_list(Opt(dotted_vars)) + stmt = pp.delimited_list(dotted_vars) + # stmt = pp.Opt(dotted_vars) + + def parse_identifier(toks): + print('YAY!', toks) + + untyped_identifier.set_parse_action(parse_identifier) + + save_stdout = StringIO() + with contextlib.redirect_stdout(save_stdout): + dotted_vars.parse_string('B.C') + + self.assertEqual( + dedent("""\ + YAY! ['B'] + YAY! ['C'] + """), + save_stdout.getvalue() + ) + + def testDelimitedListParseActions3(self): + # from issue #408 + keyword = pp.Keyword('foobar') + untyped_identifier = ~keyword + pp.Word(pp.alphas) + dotted_vars = pp.delimited_list(untyped_identifier, delim='.') + lvalue = pp.Opt(dotted_vars) + + # uncomment this line to see the problem + # stmt = delimited_list(Opt(dotted_vars)) + # stmt = delimited_list(dotted_vars) + stmt = pp.Opt(dotted_vars) + + def parse_identifier(toks): + print('YAY!', toks) + + untyped_identifier.set_parse_action(parse_identifier) + + save_stdout = StringIO() + with contextlib.redirect_stdout(save_stdout): + dotted_vars.parse_string('B.C') + + self.assertEqual( + dedent("""\ + YAY! ['B'] + YAY! ['C'] + """), + save_stdout.getvalue() + ) + def testEnableDebugOnNamedExpressions(self): """ - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent |