diff options
author | ptmcg <ptmcg@austin.rr.com> | 2022-05-18 23:44:36 -0500 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2022-05-18 23:44:36 -0500 |
commit | dbe71461b5a56967ff0abf79ce7c8b0eddb75a66 (patch) | |
tree | d1a597f9dc7562badc327b31d983dc7c2fbdf045 | |
parent | 8195b5650a647e7449aecd2e898ab7d0bb1ca6ed (diff) | |
download | pyparsing-git-dbe71461b5a56967ff0abf79ce7c8b0eddb75a66.tar.gz |
Add support for slice in expr[] notation, to pass stop_on repetition sentinel
-rw-r--r-- | CHANGES | 13 | ||||
-rw-r--r-- | pyparsing/__init__.py | 2 | ||||
-rw-r--r-- | pyparsing/core.py | 20 | ||||
-rw-r--r-- | tests/test_unit.py | 25 |
4 files changed, 48 insertions, 12 deletions
@@ -4,6 +4,19 @@ Change Log Version 3.0.10 - (in development) --------------------------------- +- Extended `expr[]` notation to accept a slice, indicating a `stop_on` + expression: + + test = "BEGIN aaa bbb ccc END" + BEGIN, END = map(Keyword, "BEGIN END".split()) + body_word = Word(alphas) + expr = BEGIN + Group(body_word[...: END]) + END + print(expr.parse_string(test).as_list()) + + Prints: + + ['BEGIN', ['aaa', 'bbb', 'ccc'], 'END'] + - Fixed bug in srange, when parsing escaped '/' and '\' inside a range set. diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index b2333ee..485f471 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -129,7 +129,7 @@ class version_info(NamedTuple): __version_info__ = version_info(3, 0, 10, "final", 0) -__version_time__ = "14 May 2022 07:35 UTC" +__version_time__ = "19 May 2022 04:43 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" diff --git a/pyparsing/core.py b/pyparsing/core.py index 13ff51b..4638d19 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -1639,8 +1639,23 @@ class ParserElement(ABC): Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception if more than ``n`` ``expr``s exist in the input stream. If this behavior is desired, then write ``expr[..., n] + ~expr``. + + For repetition with a stop_on expression, use slice notation: + + - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` + - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` + """ + stop_on_defined = False + stop_on = NoMatch() + if isinstance(key, slice): + key, stop_on = key.start, key.stop + stop_on_defined = True + elif isinstance(key, tuple) and isinstance(key[-1], slice): + key, stop_on = (key[0], key[1].start), key[1].stop + stop_on_defined = True + # convert single arg keys to tuples try: if isinstance(key, str_type): @@ -1658,6 +1673,11 @@ class ParserElement(ABC): # clip to 2 elements ret = self * tuple(key[:2]) + ret = typing.cast(_MultipleMatch, ret) + + if stop_on_defined: + ret.stopOn(stop_on) + return ret def __call__(self, name: str = None) -> "ParserElement": diff --git a/tests/test_unit.py b/tests/test_unit.py index e11524f..c070c52 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -5728,10 +5728,14 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): expr, test, "Did not successfully stop on ending expression %r" % ender ) - expr = BEGIN + body_word[...].stopOn(ender) + END - self.assertEqual( - expr, test, "Did not successfully stop on ending expression %r" % ender - ) + expr = BEGIN + body_word[1, ...].stopOn(ender) + END + self.assertParseAndCheckList(expr, test, test.split(), "Did not successfully stop on ending expression %r" % ender) + + expr = BEGIN + body_word[1, ...: ender] + END + self.assertParseAndCheckList(expr, test, test.split(), "Did not successfully stop on ending expression %r" % ender) + + expr = BEGIN + body_word[(1, ...): ender] + END + self.assertParseAndCheckList(expr, test, test.split(), "Did not successfully stop on ending expression %r" % ender) number = pp.Word(pp.nums + ",.()").setName("number with optional commas") parser = pp.OneOrMore(pp.Word(pp.alphanums + "-/."), stopOn=number)( @@ -5751,14 +5755,13 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): body_word = pp.Word(pp.alphas).setName("word") for ender in (END, "END", pp.CaselessKeyword("END")): expr = BEGIN + pp.ZeroOrMore(body_word, stopOn=ender) + END - self.assertEqual( - expr, test, "Did not successfully stop on ending expression %r" % ender - ) + self.assertParseAndCheckList(expr, test, test.split(), "Did not successfully stop on ending expression %r" % ender) - expr = BEGIN + body_word[0, ...].stopOn(ender) + END - self.assertEqual( - expr, test, "Did not successfully stop on ending expression %r" % ender - ) + expr = BEGIN + body_word[...].stopOn(ender) + END + self.assertParseAndCheckList(expr, test, test.split(), "Did not successfully stop on ending expression %r" % ender) + + expr = BEGIN + body_word[...: ender] + END + self.assertParseAndCheckList(expr, test, test.split(), "Did not successfully stop on ending expression %r" % ender) def testNestedAsDict(self): |