diff options
author | ptmcg <ptmcg@austin.rr.com> | 2022-06-10 00:51:04 -0500 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2022-06-10 00:51:04 -0500 |
commit | 7ec34f497ebc9cbfd51ffeb5cce569133eb7c3c1 (patch) | |
tree | 4591d2b71cdf950779e8a46bbd019121bfe9eaed | |
parent | 8782a9c652bef352d085cf35a4b3195ce9d0faed (diff) | |
download | pyparsing-git-7ec34f497ebc9cbfd51ffeb5cce569133eb7c3c1.tar.gz |
Fix Word(max=2) (issue #409); create re for Word(exact=n) exprs; validate that min <= max if both given
-rw-r--r-- | CHANGES | 7 | ||||
-rw-r--r-- | pyparsing/__init__.py | 2 | ||||
-rw-r--r-- | pyparsing/core.py | 54 | ||||
-rw-r--r-- | tests/test_unit.py | 52 |
4 files changed, 92 insertions, 23 deletions
@@ -4,6 +4,13 @@ Change Log Version 3.0.10 - (in development) --------------------------------- +- Fixed bug in `Word` when `max=2`. Also added performance enhancement + when specifying `exact` argument. Reported in issue #409 by + panda-34, nice catch! + +- `Word` arguments are now validated if `min` and `max` are both + given, that `min` <= `max`; raises `ValueError` if values are invalid. + - Extended `expr[]` notation for repetition of expr to accept a slice, where the slice's stop value indicates a `stop_on` expression: diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 3d69f2a..d26557f 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ class version_info(NamedTuple): __version_info__ = version_info(3, 0, 10, "final", 0) -__version_time__ = "30 May 2022 23:00 UTC" +__version_time__ = "10 Jun 2022 05:40 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" diff --git a/pyparsing/core.py b/pyparsing/core.py index 8cdde2c..fed67f7 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -2722,6 +2722,11 @@ class Word(Token): "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" ) + if self.maxSpecified and min > max: + raise ValueError( + f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" + ) + self.minLen = min if max > 0: @@ -2730,6 +2735,7 @@ class Word(Token): self.maxLen = _MAX_INT if exact > 0: + min = max = exact self.maxLen = exact self.minLen = exact @@ -2738,39 +2744,43 @@ class Word(Token): self.asKeyword = asKeyword # see if we can make a regex for this Word - if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0): + if " " not in (self.initChars | self.bodyChars): + if len(self.initChars) == 1: + re_leading_fragment = re.escape(self.initCharsOrig) + else: + re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" + if self.bodyChars == self.initChars: if max == 0: repeat = "+" elif max == 1: repeat = "" else: - repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" - self.reString = ( - f"[{_collapse_string_to_ranges(self.initChars)}]{repeat}" - ) - elif len(self.initChars) == 1: - if max == 0: - repeat = "*" - else: - repeat = f"{{0,{max - 1}}}" - self.reString = ( - f"{re.escape(self.initCharsOrig)}" - f"[{_collapse_string_to_ranges(self.bodyChars)}]" - f"{repeat}" - ) + if self.minLen != self.maxLen: + repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" + else: + repeat = f"{{{self.minLen}}}" + self.reString = f"{re_leading_fragment}{repeat}" else: - if max == 0: - repeat = "*" - elif max == 2: + if max == 1: + re_body_fragment = "" repeat = "" else: - repeat = f"{{0,{max - 1}}}" + re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" + if max == 0: + repeat = "*" + elif max == 2: + repeat = "?" if min <= 1 else "" + else: + if min != max: + repeat = f"{{{min - 1 if min > 0 else 0},{max - 1}}}" + else: + repeat = f"{{{min - 1 if min > 0 else 0}}}" + self.reString = ( - f"[{_collapse_string_to_ranges(self.initChars)}]" - f"[{_collapse_string_to_ranges(self.bodyChars)}]" - f"{repeat}" + f"{re_leading_fragment}" f"{re_body_fragment}" f"{repeat}" ) + if self.asKeyword: self.reString = rf"\b{self.reString}\b" diff --git a/tests/test_unit.py b/tests/test_unit.py index 570b8e1..1548959 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -4708,6 +4708,58 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): if fails: self.fail(f"{','.join(str(f) for f in fails)} failed to match") + def testWordMinMaxExactArgs(self): + for minarg in range(1, 9): + for maxarg in range(minarg, 10): + with self.subTest(minarg=minarg, maxarg=maxarg): + expr = pp.Word("AB", pp.nums, min=minarg, max=maxarg) + print(minarg, maxarg, expr.reString, end=" ") + trailing = expr.reString.rpartition("]")[-1] + expected_special = { + (1, 1): "", + (1, 2): "?", + (2, 2): "", + } + expected_default = ( + f"{{{minarg - 1}}}" + if minarg == maxarg + else f"{{{minarg - 1},{maxarg - 1}}}" + ) + expected = expected_special.get((minarg, maxarg), expected_default) + + print(trailing == expected) + + self.assertEqual(trailing, expected) + + self.assertParseAndCheckList( + expr + pp.restOfLine.suppress(), + "A1234567890", + ["A1234567890"[:maxarg]], + ) + + for exarg in range(1, 9): + with self.subTest(exarg=exarg): + expr = pp.Word("AB", pp.nums, exact=exarg) + print(exarg, expr.reString, end=" ") + trailing = expr.reString.rpartition("]")[-1] + if exarg < 3: + expected = "" + else: + expected = f"{{{exarg - 1}}}" + print(trailing == expected) + + self.assertEqual(trailing, expected) + + self.assertParseAndCheckList( + expr + pp.restOfLine.suppress(), + "A1234567890", + ["A1234567890"[:exarg]], + ) + + def testInvalidMinMaxArgs(self): + with self.assertRaises(ValueError): + wd = pp.Word(min=2, max=1) + def testWordExclude(self): allButPunc = pp.Word(pp.printables, excludeChars=".,:;-_!?") |