diff options
author | ptmcg <ptmcg@austin.rr.com> | 2022-12-11 10:56:32 -0600 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2022-12-11 10:56:32 -0600 |
commit | bcaeea1fa924a66a5b13c67cd830621e4eff4214 (patch) | |
tree | 405ee5a30ffb7ec292242e3828957dfbca430aec | |
parent | 8c72861e1ca49507f5cc201295c061d0e49e1bfa (diff) | |
download | pyparsing-git-bcaeea1fa924a66a5b13c67cd830621e4eff4214.tar.gz |
Added DelimitedList class, for better handling of naming and diagramming (replaces delimited_list function)
-rw-r--r-- | pyparsing/__init__.py | 2 | ||||
-rw-r--r-- | pyparsing/core.py | 65 | ||||
-rw-r--r-- | pyparsing/helpers.py | 115 |
3 files changed, 98 insertions, 84 deletions
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index c27052e..30ecbd5 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ class version_info(NamedTuple): __version_info__ = version_info(3, 0, 10, "final", 0) -__version_time__ = "06 Nov 2022 23:07 UTC" +__version_time__ = "11 Dec 2022 16:48 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" diff --git a/pyparsing/core.py b/pyparsing/core.py index 320d882..5c845ae 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -5049,6 +5049,71 @@ class ZeroOrMore(_MultipleMatch): return "[" + str(self.expr) + "]..." +class DelimitedList(ParseElementEnhance): + def __init__( + self, + expr: Union[str, ParserElement], + delim: Union[str, ParserElement] = ",", + combine: bool = False, + min: typing.Optional[int] = None, + max: typing.Optional[int] = None, + *, + allow_trailing_delim: bool = False, + ): + """Helper to define a delimited list of expressions - the delimiter + defaults to ','. By default, the list elements and delimiters can + have intervening whitespace, and comments, but this can be + overridden by passing ``combine=True`` in the constructor. If + ``combine`` is set to ``True``, the matching tokens are + returned as a single token string, with the delimiters included; + otherwise, the matching tokens are returned as a list of tokens, + with the delimiters suppressed. + + If ``allow_trailing_delim`` is set to True, then the list may end with + a delimiter. + + Example:: + + DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] + DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] + """ + if isinstance(expr, str_type): + expr = ParserElement._literalStringClass(expr) + expr = typing.cast(ParserElement, expr) + + if min is not None: + if min < 1: + raise ValueError("min must be greater than 0") + if max is not None: + if min is not None and max < min: + raise ValueError("max must be greater than, or equal to min") + + self.content = expr + self.raw_delim = str(delim) + self.delim = delim + self.combine = combine + if not combine: + self.delim = Suppress(delim) + self.min = min or 1 + self.max = max + self.allow_trailing_delim = allow_trailing_delim + + delim_list_expr = self.content + (self.delim + self.content) * ( + self.min - 1, + None if self.max is None else self.max - 1, + ) + if self.allow_trailing_delim: + delim_list_expr += Opt(self.delim) + + if self.combine: + delim_list_expr = Combine(delim_list_expr) + + super().__init__(delim_list_expr, savelist=True) + + def _generateDefaultName(self) -> str: + return "{0} [{1} {0}]...".format(self.content.streamline(), self.raw_delim) + + class _NullToken: def __bool__(self): return False diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index 4b2655f..dcb6249 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -17,89 +17,6 @@ from .util import ( # # global helpers # -def delimited_list( - expr: Union[str, ParserElement], - delim: Union[str, ParserElement] = ",", - combine: bool = False, - min: typing.Optional[int] = None, - max: typing.Optional[int] = None, - *, - allow_trailing_delim: bool = False, -) -> ParserElement: - """Helper to define a delimited list of expressions - the delimiter - defaults to ','. By default, the list elements and delimiters can - have intervening whitespace, and comments, but this can be - overridden by passing ``combine=True`` in the constructor. If - ``combine`` is set to ``True``, the matching tokens are - returned as a single token string, with the delimiters included; - otherwise, the matching tokens are returned as a list of tokens, - with the delimiters suppressed. - - If ``allow_trailing_delim`` is set to True, then the list may end with - a delimiter. - - Example:: - - delimited_list(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] - delimited_list(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] - """ - if isinstance(expr, str_type): - expr = ParserElement._literalStringClass(expr) - expr = typing.cast(ParserElement, expr) - - def make_deep_name_copy(expr): - from collections import deque - - MAX_EXPRS = sys.getrecursionlimit() - seen = set() - to_visit = deque([(None, expr)]) - cpy = None - num_exprs = 0 - while to_visit and num_exprs < MAX_EXPRS: - parent, cur = to_visit.pop() - num_exprs += 1 - if id(cur) in seen: - continue - seen.add(id(cur)) - cur = cur.copy() - if parent is None: - cpy = cur - else: - if hasattr(parent, "expr"): - parent.expr = cur - elif hasattr(parent, "exprs"): - parent.exprs.append(cur) - - to_visit.extend((cur, sub) for sub in cur.recurse()[::-1]) - getattr(cur, "exprs", []).clear() - - return cpy - - expr_copy = make_deep_name_copy(expr).streamline() - dlName = f"{expr_copy} [{delim} {expr_copy}]...{f' [{delim}]' if allow_trailing_delim else ''}" - - if not combine: - delim = Suppress(delim) - - if min is not None: - if min < 1: - raise ValueError("min must be greater than 0") - min -= 1 - if max is not None: - if min is not None and max <= min: - raise ValueError("max must be greater than, or equal to min") - max -= 1 - delimited_list_expr: ParserElement = expr + (delim + expr)[min, max] - - if allow_trailing_delim: - delimited_list_expr += Opt(delim) - - if combine: - return Combine(delimited_list_expr).set_name(dlName) - else: - return delimited_list_expr.set_name(dlName) - - def counted_array( expr: ParserElement, int_expr: typing.Optional[ParserElement] = None, @@ -1111,6 +1028,38 @@ _builtin_exprs: List[ParserElement] = [ ] +# compatibility function, superseded by DelimitedList class +def delimited_list( + expr: Union[str, ParserElement], + delim: Union[str, ParserElement] = ",", + combine: bool = False, + min: typing.Optional[int] = None, + max: typing.Optional[int] = None, + *, + allow_trailing_delim: bool = False, +) -> ParserElement: + """Helper to define a delimited list of expressions - the delimiter + defaults to ','. By default, the list elements and delimiters can + have intervening whitespace, and comments, but this can be + overridden by passing ``combine=True`` in the constructor. If + ``combine`` is set to ``True``, the matching tokens are + returned as a single token string, with the delimiters included; + otherwise, the matching tokens are returned as a list of tokens, + with the delimiters suppressed. + + If ``allow_trailing_delim`` is set to True, then the list may end with + a delimiter. + + Example:: + + delimited_list(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] + delimited_list(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] + """ + return DelimitedList( + expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim + ) + + # pre-PEP8 compatible names # fmt: off opAssoc = OpAssoc |