diff options
author | ptmcg <ptmcg@austin.rr.com> | 2021-10-27 06:31:23 -0500 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2021-10-27 06:31:23 -0500 |
commit | 1be3b5398f8f8d57811712c39b57228bdffe6032 (patch) | |
tree | 168eb35020d43317bb3029bf86451f7f81092c04 /pyparsing/core.py | |
parent | 4ab17bb55d1ba72adef66c01232711d421650767 (diff) | |
download | pyparsing-git-1be3b5398f8f8d57811712c39b57228bdffe6032.tar.gz |
Fixed IndentedBlock
Diffstat (limited to 'pyparsing/core.py')
-rw-r--r-- | pyparsing/core.py | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/pyparsing/core.py b/pyparsing/core.py index 775c7b4..9370101 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -3753,6 +3753,24 @@ class And(ParseExpression): self.exprs = [e for e in self.exprs if e is not None] super().streamline() + + # link any IndentedBlocks to the prior expression + for prev, cur in zip(self.exprs, self.exprs[1:]): + # traverse cur or any first embedded expr of cur looking for an IndentedBlock + # (but watch out for recursive grammar) + seen = set() + while cur: + if id(cur) in seen: + break + seen.add(id(cur)) + if isinstance(cur, IndentedBlock): + prev.add_parse_action( + lambda s, l, t: setattr(cur, "parent_anchor", col(l, s)) + ) + break + subs = cur.recurse() + cur = next(iter(subs), None) + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) return self @@ -4305,6 +4323,68 @@ class ParseElementEnhance(ParserElement): leaveWhitespace = leave_whitespace +class IndentedBlock(ParseElementEnhance): + """ + Expression to match one or more expressions at a given indentation level. + Useful for parsing text where structure is implied by indentation (like Python source code). + """ + + class _Indent(Empty): + def __init__(self, ref_col: int): + super().__init__() + self.errmsg = "expected indent at column {}".format(ref_col) + self.add_condition(lambda s, l, t: col(l, s) == ref_col) + + class _IndentGreater(Empty): + def __init__(self, ref_col: int): + super().__init__() + self.errmsg = "expected indent at column greater than {}".format(ref_col) + self.add_condition(lambda s, l, t: col(l, s) > ref_col) + + def __init__(self, expr: ParserElement, *, recursive: bool = False, grouped=True): + super().__init__(expr, savelist=True) + # if recursive: + # raise NotImplementedError("IndentedBlock with recursive is not implemented") + self._recursive = recursive + self._grouped = grouped + self.parent_anchor = 1 + + def parseImpl(self, instring, loc, doActions=True): + # advance parse position to non-whitespace by using an Empty() + # this should be the column to be used for all subsequent indented lines + anchor_loc = Empty().preParse(instring, loc) + + # see if self.expr matches at the current location - if not it will raise an exception + # and no further work is necessary + self.expr.try_parse(instring, anchor_loc, doActions) + + indent_col = col(anchor_loc, instring) + peer_detect_expr = self._Indent(indent_col) + + inner_expr = Empty() + peer_detect_expr + self.expr + if self._recursive: + sub_indent = self._IndentGreater(indent_col) + nested_block = IndentedBlock( + self.expr, recursive=self._recursive, grouped=self._grouped + ) + nested_block.set_debug(self.debug) + nested_block.parent_anchor = indent_col + inner_expr += Opt(sub_indent + nested_block) + + inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") + block = OneOrMore(inner_expr) + + trailing_undent = self._Indent(self.parent_anchor) | StringEnd() + + if self._grouped: + wrapper = Group + else: + wrapper = lambda expr: expr + return (wrapper(block) + Optional(trailing_undent)).parseImpl( + instring, anchor_loc, doActions + ) + + class AtStringStart(ParseElementEnhance): """Matches if expression matches at the beginning of the parse string:: |