summaryrefslogtreecommitdiff
path: root/pyparsing/core.py
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2021-10-27 06:31:23 -0500
committerptmcg <ptmcg@austin.rr.com>2021-10-27 06:31:23 -0500
commit1be3b5398f8f8d57811712c39b57228bdffe6032 (patch)
tree168eb35020d43317bb3029bf86451f7f81092c04 /pyparsing/core.py
parent4ab17bb55d1ba72adef66c01232711d421650767 (diff)
downloadpyparsing-git-1be3b5398f8f8d57811712c39b57228bdffe6032.tar.gz
Fixed IndentedBlock
Diffstat (limited to 'pyparsing/core.py')
-rw-r--r--pyparsing/core.py80
1 files changed, 80 insertions, 0 deletions
diff --git a/pyparsing/core.py b/pyparsing/core.py
index 775c7b4..9370101 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -3753,6 +3753,24 @@ class And(ParseExpression):
self.exprs = [e for e in self.exprs if e is not None]
super().streamline()
+
+ # link any IndentedBlocks to the prior expression
+ for prev, cur in zip(self.exprs, self.exprs[1:]):
+ # traverse cur or any first embedded expr of cur looking for an IndentedBlock
+ # (but watch out for recursive grammar)
+ seen = set()
+ while cur:
+ if id(cur) in seen:
+ break
+ seen.add(id(cur))
+ if isinstance(cur, IndentedBlock):
+ prev.add_parse_action(
+ lambda s, l, t: setattr(cur, "parent_anchor", col(l, s))
+ )
+ break
+ subs = cur.recurse()
+ cur = next(iter(subs), None)
+
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
return self
@@ -4305,6 +4323,68 @@ class ParseElementEnhance(ParserElement):
leaveWhitespace = leave_whitespace
+class IndentedBlock(ParseElementEnhance):
+ """
+ Expression to match one or more expressions at a given indentation level.
+ Useful for parsing text where structure is implied by indentation (like Python source code).
+ """
+
+ class _Indent(Empty):
+ def __init__(self, ref_col: int):
+ super().__init__()
+ self.errmsg = "expected indent at column {}".format(ref_col)
+ self.add_condition(lambda s, l, t: col(l, s) == ref_col)
+
+ class _IndentGreater(Empty):
+ def __init__(self, ref_col: int):
+ super().__init__()
+ self.errmsg = "expected indent at column greater than {}".format(ref_col)
+ self.add_condition(lambda s, l, t: col(l, s) > ref_col)
+
+ def __init__(self, expr: ParserElement, *, recursive: bool = False, grouped=True):
+ super().__init__(expr, savelist=True)
+ # if recursive:
+ # raise NotImplementedError("IndentedBlock with recursive is not implemented")
+ self._recursive = recursive
+ self._grouped = grouped
+ self.parent_anchor = 1
+
+ def parseImpl(self, instring, loc, doActions=True):
+ # advance parse position to non-whitespace by using an Empty()
+ # this should be the column to be used for all subsequent indented lines
+ anchor_loc = Empty().preParse(instring, loc)
+
+ # see if self.expr matches at the current location - if not it will raise an exception
+ # and no further work is necessary
+ self.expr.try_parse(instring, anchor_loc, doActions)
+
+ indent_col = col(anchor_loc, instring)
+ peer_detect_expr = self._Indent(indent_col)
+
+ inner_expr = Empty() + peer_detect_expr + self.expr
+ if self._recursive:
+ sub_indent = self._IndentGreater(indent_col)
+ nested_block = IndentedBlock(
+ self.expr, recursive=self._recursive, grouped=self._grouped
+ )
+ nested_block.set_debug(self.debug)
+ nested_block.parent_anchor = indent_col
+ inner_expr += Opt(sub_indent + nested_block)
+
+ inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
+ block = OneOrMore(inner_expr)
+
+ trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
+
+ if self._grouped:
+ wrapper = Group
+ else:
+ wrapper = lambda expr: expr
+ return (wrapper(block) + Optional(trailing_undent)).parseImpl(
+ instring, anchor_loc, doActions
+ )
+
+
class AtStringStart(ParseElementEnhance):
"""Matches if expression matches at the beginning of the parse
string::