summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2022-06-10 00:51:04 -0500
committerptmcg <ptmcg@austin.rr.com>2022-06-10 00:51:04 -0500
commit7ec34f497ebc9cbfd51ffeb5cce569133eb7c3c1 (patch)
tree4591d2b71cdf950779e8a46bbd019121bfe9eaed
parent8782a9c652bef352d085cf35a4b3195ce9d0faed (diff)
downloadpyparsing-git-7ec34f497ebc9cbfd51ffeb5cce569133eb7c3c1.tar.gz
Fix Word(max=2) (issue #409); create re for Word(exact=n) exprs; validate that min <= max if both given
-rw-r--r--CHANGES7
-rw-r--r--pyparsing/__init__.py2
-rw-r--r--pyparsing/core.py54
-rw-r--r--tests/test_unit.py52
4 files changed, 92 insertions, 23 deletions
diff --git a/CHANGES b/CHANGES
index b7f2b90..46d6903 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,13 @@ Change Log
Version 3.0.10 - (in development)
---------------------------------
+- Fixed bug in `Word` when `max=2`. Also added performance enhancement
+ when specifying `exact` argument. Reported in issue #409 by
+ panda-34, nice catch!
+
+- `Word` arguments are now validated if `min` and `max` are both
+ given, that `min` <= `max`; raises `ValueError` if values are invalid.
+
- Extended `expr[]` notation for repetition of expr to accept a
slice, where the slice's stop value indicates a `stop_on`
expression:
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
index 3d69f2a..d26557f 100644
--- a/pyparsing/__init__.py
+++ b/pyparsing/__init__.py
@@ -121,7 +121,7 @@ class version_info(NamedTuple):
__version_info__ = version_info(3, 0, 10, "final", 0)
-__version_time__ = "30 May 2022 23:00 UTC"
+__version_time__ = "10 Jun 2022 05:40 UTC"
__version__ = __version_info__.__version__
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
diff --git a/pyparsing/core.py b/pyparsing/core.py
index 8cdde2c..fed67f7 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -2722,6 +2722,11 @@ class Word(Token):
"cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
)
+ if self.maxSpecified and min > max:
+ raise ValueError(
+ f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"
+ )
+
self.minLen = min
if max > 0:
@@ -2730,6 +2735,7 @@ class Word(Token):
self.maxLen = _MAX_INT
if exact > 0:
+ min = max = exact
self.maxLen = exact
self.minLen = exact
@@ -2738,39 +2744,43 @@ class Word(Token):
self.asKeyword = asKeyword
# see if we can make a regex for this Word
- if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0):
+ if " " not in (self.initChars | self.bodyChars):
+ if len(self.initChars) == 1:
+ re_leading_fragment = re.escape(self.initCharsOrig)
+ else:
+ re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"
+
if self.bodyChars == self.initChars:
if max == 0:
repeat = "+"
elif max == 1:
repeat = ""
else:
- repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
- self.reString = (
- f"[{_collapse_string_to_ranges(self.initChars)}]{repeat}"
- )
- elif len(self.initChars) == 1:
- if max == 0:
- repeat = "*"
- else:
- repeat = f"{{0,{max - 1}}}"
- self.reString = (
- f"{re.escape(self.initCharsOrig)}"
- f"[{_collapse_string_to_ranges(self.bodyChars)}]"
- f"{repeat}"
- )
+ if self.minLen != self.maxLen:
+ repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
+ else:
+ repeat = f"{{{self.minLen}}}"
+ self.reString = f"{re_leading_fragment}{repeat}"
else:
- if max == 0:
- repeat = "*"
- elif max == 2:
+ if max == 1:
+ re_body_fragment = ""
repeat = ""
else:
- repeat = f"{{0,{max - 1}}}"
+ re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"
+ if max == 0:
+ repeat = "*"
+ elif max == 2:
+ repeat = "?" if min <= 1 else ""
+ else:
+ if min != max:
+ repeat = f"{{{min - 1 if min > 0 else 0},{max - 1}}}"
+ else:
+ repeat = f"{{{min - 1 if min > 0 else 0}}}"
+
self.reString = (
- f"[{_collapse_string_to_ranges(self.initChars)}]"
- f"[{_collapse_string_to_ranges(self.bodyChars)}]"
- f"{repeat}"
+ f"{re_leading_fragment}" f"{re_body_fragment}" f"{repeat}"
)
+
if self.asKeyword:
self.reString = rf"\b{self.reString}\b"
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 570b8e1..1548959 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -4708,6 +4708,58 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
if fails:
self.fail(f"{','.join(str(f) for f in fails)} failed to match")
+ def testWordMinMaxExactArgs(self):
+ for minarg in range(1, 9):
+ for maxarg in range(minarg, 10):
+ with self.subTest(minarg=minarg, maxarg=maxarg):
+ expr = pp.Word("AB", pp.nums, min=minarg, max=maxarg)
+ print(minarg, maxarg, expr.reString, end=" ")
+ trailing = expr.reString.rpartition("]")[-1]
+ expected_special = {
+ (1, 1): "",
+ (1, 2): "?",
+ (2, 2): "",
+ }
+ expected_default = (
+ f"{{{minarg - 1}}}"
+ if minarg == maxarg
+ else f"{{{minarg - 1},{maxarg - 1}}}"
+ )
+ expected = expected_special.get((minarg, maxarg), expected_default)
+
+ print(trailing == expected)
+
+ self.assertEqual(trailing, expected)
+
+ self.assertParseAndCheckList(
+ expr + pp.restOfLine.suppress(),
+ "A1234567890",
+ ["A1234567890"[:maxarg]],
+ )
+
+ for exarg in range(1, 9):
+ with self.subTest(exarg=exarg):
+ expr = pp.Word("AB", pp.nums, exact=exarg)
+ print(exarg, expr.reString, end=" ")
+ trailing = expr.reString.rpartition("]")[-1]
+ if exarg < 3:
+ expected = ""
+ else:
+ expected = f"{{{exarg - 1}}}"
+ print(trailing == expected)
+
+ self.assertEqual(trailing, expected)
+
+ self.assertParseAndCheckList(
+ expr + pp.restOfLine.suppress(),
+ "A1234567890",
+ ["A1234567890"[:exarg]],
+ )
+
+ def testInvalidMinMaxArgs(self):
+ with self.assertRaises(ValueError):
+ wd = pp.Word(min=2, max=1)
+
def testWordExclude(self):
allButPunc = pp.Word(pp.printables, excludeChars=".,:;-_!?")