Diagram Improvements Episode III: Revenge of the Setuptools (#224)

* Add diagram documentation, add more diagram tests, allow more customization of diagrams * Remove accidental edit of unrelated documentation * Add diagram package * Add jinja file to manifest * Add to bdist also * package_data * Railroad improvements * Partial rewrite * Update * Use partials everywhere so we can edit the tree before it's constructed * Rewrite the diagram generator to not duplicate any content; use monospaced font for titles * Small documentation change * Revert back to Python 3.5 type hints, fix a small bug
author: Michael Milton <ttmigueltt@gmail.com> 2020-06-23 03:31:59 +1000
committer: GitHub <noreply@github.com> 2020-06-22 12:31:59 -0500
commit: 2d1163471e0bef2c527e5f959d78dd1605a51183 (patch)
tree: b6a0a6cd1bb8b4371c964a192211435770aafa38
parent: 6fc0f329eec924c760877eaddb0abb164a861267 (diff)
download: pyparsing-git-2d1163471e0bef2c527e5f959d78dd1605a51183.tar.gz
5 files changed, 324 insertions, 84 deletions
diff --git a/MANIFEST.in b/MANIFEST.in
index 7055114..4a0e687 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
 include pyparsing.py
+include pyparsing/diagram/*.jinja2
 include HowToUsePyparsing.rst pyparsingClassDiagram.*
 include README.md CODE_OF_CONDUCT.rst CHANGES LICENSE CONTRIBUTING.md modules.rst
 include examples/*.py examples/Setup.ini examples/*.dfm examples/*.ics examples/*.html examples/*.h examples/*.g examples/statemachine/*
diff --git a/MANIFEST.in_bdist b/MANIFEST.in_bdist
index 57ce301..95ce807 100644
--- a/MANIFEST.in_bdist
+++ b/MANIFEST.in_bdist
@@ -1,4 +1,5 @@
 include pyparsing.py
+include pyparsing/diagram/*.jinja2
 include HowToUsePyparsing.html pyparsingClassDiagram.*
 include README CHANGES LICENSE
 include examples/*.py examples/Setup.ini examples/*.dfm examples/*.ics examples/*.html
diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py
index ec354a0..85ed314 100644
--- a/pyparsing/diagram/__init__.py
+++ b/pyparsing/diagram/__init__.py
@@ -1,21 +1,73 @@
 import railroad
 import pyparsing
 from pkg_resources import resource_filename
-import typing
+from typing import (
+    Union,
+    List,
+    Optional,
+    NamedTuple,
+    Generic,
+    TypeVar,
+    Any,
+    Dict,
+    Callable,
+)
 from jinja2 import Template
 from io import StringIO
+import inspect
 
 with open(resource_filename(__name__, "template.jinja2"), encoding="utf-8") as fp:
     template = Template(fp.read())
 
 # Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
-NamedDiagram = typing.NamedTuple(
-    "NamedDiagram", [("name", str), ("diagram", typing.Optional[railroad.DiagramItem])]
+NamedDiagram = NamedTuple(
+    "NamedDiagram",
+    [("name", str), ("diagram", Optional[railroad.DiagramItem]), ("index", int)],
 )
 """
 A simple structure for associating a name with a railroad diagram
 """
 
+T = TypeVar("T")
+
+
+class EditablePartial(Generic[T]):
+    """
+    Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
+    constructed.
+    """
+
+    # We need this here because the railroad constructors actually transform the data, so can't be called until the
+    # entire tree is assembled
+
+    def __init__(self, func: Callable[..., T], args: list, kwargs: dict):
+        self.func = func
+        self.args = args
+        self.kwargs = kwargs
+
+    @classmethod
+    def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
+        """
+        If you call this function in the same way that you would call the constructor, it will store the arguments
+        as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
+        """
+        return EditablePartial(func=func, args=list(args), kwargs=kwargs)
+
+    def __call__(self) -> T:
+        """
+        Evaluate the partial and return the result
+        """
+        args = self.args.copy()
+        kwargs = self.kwargs.copy()
+
+        # This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
+        # args=['list', 'of', 'things'])
+        arg_spec = inspect.getfullargspec(self.func)
+        if arg_spec.varargs in self.kwargs:
+            args += kwargs.pop(arg_spec.varargs)
+
+        return self.func(*args, **kwargs)
+
 
 def get_name(element: pyparsing.ParserElement, default: str = None) -> str:
     """
@@ -30,7 +82,7 @@ def get_name(element: pyparsing.ParserElement, default: str = None) -> str:
     return getattr(element, "name", default)
 
 
-def railroad_to_html(diagrams: typing.List[NamedDiagram], **kwargs) -> str:
+def railroad_to_html(diagrams: List[NamedDiagram], **kwargs) -> str:
     """
     Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
     :params kwargs: kwargs to be passed in to the template
@@ -39,28 +91,55 @@ def railroad_to_html(diagrams: typing.List[NamedDiagram], **kwargs) -> str:
     for diagram in diagrams:
         io = StringIO()
         diagram.diagram.writeSvg(io.write)
-        data.append({"title": diagram.name, "text": "", "svg": io.getvalue()})
+        title = diagram.name
+        if diagram.index == 0:
+            title += " (root)"
+        data.append({"title": title, "text": "", "svg": io.getvalue()})
 
     return template.render(diagrams=data, **kwargs)
 
 
+def resolve_partial(partial: "EditablePartial[T]") -> T:
+    """
+    Recursively resolves a collection of Partials into whatever type they are
+    """
+    if isinstance(partial, EditablePartial):
+        partial.args = resolve_partial(partial.args)
+        partial.kwargs = resolve_partial(partial.kwargs)
+        return partial()
+    elif isinstance(partial, list):
+        return [resolve_partial(x) for x in partial]
+    elif isinstance(partial, dict):
+        return {key: resolve_partial(x) for key, x in partial.items()}
+    else:
+        return partial
+
+
 def to_railroad(
-    element: pyparsing.ParserElement, diagram_kwargs: dict = {}
-) -> typing.List[NamedDiagram]:
+    element: pyparsing.ParserElement,
+    diagram_kwargs: dict = {},
+    vertical: Union[int, bool] = 5,
+) -> List[NamedDiagram]:
     """
     Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
     creation if you want to access the Railroad tree before it is converted to HTML
     :param diagram_kwargs: kwargs to pass to the Diagram() constructor
     """
-    diagram_element, subdiagrams = _to_diagram_element(element)
-    diagram = NamedDiagram(
-        get_name(element, "Grammar"),
-        railroad.Diagram(diagram_element, **diagram_kwargs),
-    )
-    return [diagram, *subdiagrams.values()]
+    # Convert the whole tree underneath the root
+    lookup = ConverterState(diagram_kwargs=diagram_kwargs)
+    _to_diagram_element(element, lookup=lookup, parent=None, vertical=vertical)
+
+    # Convert the root if it hasn't been already
+    root_id = id(element)
+    if root_id in lookup.first:
+        lookup.first[root_id].mark_for_extraction(root_id, lookup)
+
+    # Now that we're finished, we can convert from intermediate structures into Railroad elements
+    resolved = [resolve_partial(partial) for partial in lookup.diagrams.values()]
+    return sorted(resolved, key=lambda diag: diag.index)
 
 
-def _should_vertical(specification: typing.Tuple[int, bool], count: int) -> bool:
+def _should_vertical(specification: Union[int, bool], count: int) -> bool:
     """
     Returns true if we should return a vertical list of elements
     """
@@ -72,96 +151,257 @@ def _should_vertical(specification: typing.Tuple[int, bool], count: int) -> bool
         raise Exception()
 
 
+class ElementState:
+    """
+    State recorded for an individual pyparsing Element
+    """
+
+    # Note: this should be a dataclass, but we have to support Python 3.5
+    def __init__(
+        self,
+        element: pyparsing.ParserElement,
+        converted: EditablePartial,
+        parent: EditablePartial,
+        number: int = None,
+        name: str = None,
+        index: Optional[int] = None,
+    ):
+        #: The pyparsing element that this represents
+        self.element = element  # type: pyparsing.ParserElement
+        #: The name of the element
+        self.name = name  # type: str
+        #: The output Railroad element in an unconverted state
+        self.converted = converted  # type: EditablePartial
+        #: The parent Railroad element, which we store so that we can extract this if it's duplicated
+        self.parent = parent  # type: EditablePartial
+        #: The diagram number of this, when it gets turned into a diagram. This is only set when we know it's going to
+        # be extracted into a new diagram
+        self.number = number  # type: int
+        #: The index of this inside its parent
+        self.parent_index = index  # type: Optional[int]
+        #: If true, we should extract this out into a subdiagram
+        self.extract = False  # type: bool
+        #: If true, all of this element's chilren have been filled out
+        self.complete = False  # type: bool
+
+    def mark_for_extraction(self, el_id: int, state: "ConverterState"):
+        """
+        Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
+        """
+        self.extract = True
+
+        if self.number is None:
+            if self.parent is None:
+                self.number = 0
+            else:
+                self.number = state.generate_index()
+
+        # Set the name
+        if not self.name:
+            if hasattr(self.element, "name") and self.element.name:
+                self.name = self.element.name
+            else:
+                unnamed_number = 1 if self.parent is None else state.generate_unnamed()
+                self.name = "Unnamed {}".format(unnamed_number)
+
+        # Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
+        # to be added
+        if self.complete:
+            state.extract_into_diagram(el_id)
+
+
+class ConverterState:
+    """
+    Stores some state that persists between recursions into the element tree
+    """
+
+    def __init__(self, diagram_kwargs: dict = {}):
+        #: A dictionary mapping ParserElement IDs to state relating to them
+        self.first = {}  # type:  Dict[int, ElementState]
+        #: A dictionary mapping ParserElement IDs to subdiagrams generated from them
+        self.diagrams = {}  # type: Dict[int, EditablePartial[NamedDiagram]]
+        #: The index of the next unnamed element
+        self.unnamed_index = 1  # type:  int
+        #: The index of the next element. This is used for sorting
+        self.index = 0  # type:  int
+        #: Shared kwargs that are used to customize the construction of diagrams
+        self.diagram_kwargs = diagram_kwargs  # type:  dict
+
+    def generate_unnamed(self) -> int:
+        """
+        Generate a number used in the name of an otherwise unnamed diagram
+        """
+        self.unnamed_index += 1
+        return self.unnamed_index
+
+    def generate_index(self) -> int:
+        """
+        Generate a number used to index a diagram
+        """
+        self.index += 1
+        return self.index
+
+    def extract_into_diagram(self, el_id: int):
+        """
+        Used when we encounter the same token twice in the same tree. When this happens, we replace all instances of that
+        token with a terminal, and create a new subdiagram for the token
+        """
+        position = self.first[el_id]
+
+        # Replace the original definition of this element with a regular block
+        if position.parent:
+            ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name)
+            if "item" in position.parent.kwargs:
+                position.parent.kwargs["item"] = ret
+            else:
+                position.parent.kwargs["items"][position.parent_index] = ret
+
+        self.diagrams[el_id] = EditablePartial.from_call(
+            NamedDiagram,
+            name=position.name,
+            diagram=EditablePartial.from_call(
+                railroad.Diagram, position.converted, **self.diagram_kwargs
+            ),
+            index=position.number,
+        )
+        del self.first[el_id]
+
+
+def _worth_extracting(children: List[pyparsing.ParserElement]) -> bool:
+    """
+    Returns true if the element with these children is worth having its own element. Simply, if any of its children
+    themselves have children, then its complex enough to extract
+    """
+    return any(
+        [hasattr(child, "expr") or hasattr(child, "exprs") for child in children]
+    )
+
+
+def _element_children(
+    element: pyparsing.ParserElement,
+) -> List[Union[str, pyparsing.ParserElement]]:
+    """
+    Converts the nebulous list of child elements into a single list objects for easy use
+    """
+    if hasattr(element, "exprs"):
+        return list(element.exprs)
+    elif hasattr(element, "expr"):
+        return [element.expr]
+    else:
+        return []
+
+
 def _to_diagram_element(
     element: pyparsing.ParserElement,
-    diagrams=None,
-    vertical: typing.Union[int, bool] = 5,
-    diagram_kwargs: dict = {},
-) -> typing.Tuple[railroad.DiagramItem, typing.Dict[int, NamedDiagram]]:
+    parent: Optional[EditablePartial],
+    lookup: ConverterState = None,
+    vertical: Union[int, bool] = 5,
+    index: int = 0,
+) -> Optional[EditablePartial]:
     """
     Recursively converts a PyParsing Element to a railroad Element
+    :param lookup: The shared converter state that keeps track of useful things
+    :param index: The index of this element within the parent
+    :param parent: The parent of this element in the output tree
     :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
     it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
     do so
-    :returns: A tuple, where the first item is the converted version of the input element, and the second item is a
-    list of extra diagrams that also need to be displayed in order to represent recursive grammars
+    :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
     """
-    if diagrams is None:
-        diagrams = {}
-    else:
-        # We don't want to be modifying the parent's version of the dict, although we do use it as a foundation
-        diagrams = diagrams.copy()
-
-    # Convert the nebulous list of child elements into a single list objects for easy use
-    if hasattr(element, "exprs"):
-        exprs = element.exprs
-    elif hasattr(element, "expr"):
-        exprs = [element.expr]
-    else:
-        exprs = []
+    exprs = _element_children(element)
 
     name = get_name(element)
+    # Python's id() is used to provide a unique identifier for elements
+    el_id = id(element)
 
-    if isinstance(element, pyparsing.Forward):
-        # If we encounter a forward reference, we have to split the diagram in two and return a new diagram which
-        # represents the forward reference on its own
+    if el_id in lookup.first:
+        # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
+        # so we have to extract it into a new diagram.
+        looked_up = lookup.first[el_id]
+        looked_up.mark_for_extraction(el_id, lookup)
+        return EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name)
 
-        # Python's id() is used to provide a unique identifier for elements
-        el_id = id(element)
-        if el_id in diagrams:
-            name = diagrams[el_id].name
-        else:
-            # If the Forward has no real name, we name it Group N to at least make it unique
-            count = len(diagrams) + 1
-            name = get_name(element, "Group {}".format(count))
-            # We have to first put in a placeholder so that, if we encounter this element deeper down in the tree,
-            # we won't have an infinite loop
-            diagrams[el_id] = NamedDiagram(name=name, diagram=None)
-
-            # At this point we create a new subdiagram, and add it to the dictionary of diagrams
-            forward_element, forward_diagrams = _to_diagram_element(exprs[0], diagrams)
-            diagram = railroad.Diagram(forward_element, **diagram_kwargs)
-            diagrams.update(forward_diagrams)
-            diagrams[el_id] = diagrams[el_id]._replace(diagram=diagram)
-            diagram.format(20)
-
-        # Here we just use the element's name as a placeholder for the recursive grammar which is defined separately
-        ret = railroad.NonTerminal(text=name)
-    else:
-        # If we don't encounter a Forward, we can continue to recurse into the tree
+    elif el_id in lookup.diagrams:
+        # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
+        # just put in a marker element that refers to the sub-diagram
+        return EditablePartial.from_call(
+            railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
+        )
 
+    else:
         # Recursively convert child elements
-        children = []
-        for expr in exprs:
-            item, subdiagrams = _to_diagram_element(expr, diagrams)
-            children.append(item)
-            diagrams.update(subdiagrams)
-
         # Here we find the most relevant Railroad element for matching pyparsing Element
+        # We use ``items=None`` here to hold the place for where the child elements will go once created
         if isinstance(element, pyparsing.And):
-            if _should_vertical(vertical, len(children)):
-                ret = railroad.Stack(*children)
+            if _should_vertical(vertical, len(exprs)):
+                ret = EditablePartial.from_call(railroad.Stack, items=[])
             else:
-                ret = railroad.Sequence(*children)
+                ret = EditablePartial.from_call(railroad.Sequence, items=[])
         elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
-            if _should_vertical(vertical, len(children)):
-                ret = railroad.HorizontalChoice(*children)
+            if _should_vertical(vertical, len(exprs)):
+                ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
             else:
-                ret = railroad.Choice(0, *children)
+                ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
         elif isinstance(element, pyparsing.Optional):
-            ret = railroad.Optional(children[0])
+            ret = EditablePartial.from_call(railroad.Optional, item="")
         elif isinstance(element, pyparsing.OneOrMore):
-            ret = railroad.OneOrMore(children[0])
+            ret = EditablePartial.from_call(railroad.OneOrMore, item="")
         elif isinstance(element, pyparsing.ZeroOrMore):
-            ret = railroad.ZeroOrMore(children[0])
+            ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
         elif isinstance(element, pyparsing.Group):
             # Generally there isn't any merit in labelling a group as a group if it doesn't have a custom name
-            ret = railroad.Group(children[0], label=get_name(element, ""))
+            if name != "Group":
+                ret = EditablePartial.from_call(railroad.Group, item=None, label=name)
+            else:
+                ret = EditablePartial.from_call(railroad.Group, item=None, label="")
+        elif isinstance(element, pyparsing.Empty) and name == "Empty":
+            # Skip unnamed "Empty" elements
+            ret = None
         elif len(exprs) > 1:
-            ret = railroad.Sequence(children[0])
+            ret = EditablePartial.from_call(railroad.Sequence, items=[])
         elif len(exprs) > 0:
-            ret = railroad.Group(children[0], label=name)
+            ret = EditablePartial.from_call(railroad.Group, item="", label=name)
         else:
-            ret = railroad.Terminal(name)
+            ret = EditablePartial.from_call(railroad.Terminal, name)
+
+        # Indicate this element's position in the tree so we can extract it if necessary
+        if _worth_extracting(exprs):
+            lookup.first[el_id] = ElementState(
+                element=element, converted=ret, parent=parent, index=index
+            )
+
+        i = 0
+        for expr in exprs:
+            # Add a placeholder index in case we have to extract the child before we even add it to the parent
+            if "items" in ret.kwargs:
+                ret.kwargs["items"].insert(i, None)
 
-    return ret, diagrams
+            item = _to_diagram_element(
+                expr, parent=ret, lookup=lookup, vertical=vertical, index=i
+            )
+
+            # Some elements don't need to be shown in the diagram
+            if item is not None:
+                if "item" in ret.kwargs:
+                    ret.kwargs["item"] = item
+                elif "items" in ret.kwargs:
+                    # If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
+                    if ret.kwargs["items"][i] is None:
+                        ret.kwargs["items"][i] = item
+                    i += 1
+
+        # Mark this element as "complete", ie it has all of its children
+        if el_id in lookup.first:
+            lookup.first[el_id].complete = True
+
+        if (
+            el_id in lookup.first
+            and lookup.first[el_id].extract
+            and lookup.first[el_id].complete
+        ):
+            lookup.extract_into_diagram(el_id)
+            return EditablePartial.from_call(
+                railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
+            )
+        else:
+            return ret
diff --git a/pyparsing/diagram/template.jinja2 b/pyparsing/diagram/template.jinja2
index ff09bcd..d2219fb 100644
--- a/pyparsing/diagram/template.jinja2
+++ b/pyparsing/diagram/template.jinja2
@@ -2,10 +2,9 @@
 <html>
 <head>
     {% if not head %}
-        <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300&display=swap" rel="stylesheet">
         <style type="text/css">
             .railroad-heading {
-                font-family: 'Roboto', sans-serif;
+                font-family: monospace;
             }
         </style>
     {% else %}
diff --git a/setup.py b/setup.py
index 9a64c9b..1cd3917 100644
--- a/setup.py
+++ b/setup.py
@@ -21,9 +21,7 @@ README_name = __file__.replace("setup.py", "README.rst")
 with io.open(README_name, encoding="utf8") as README:
     pyparsing_main_doc = README.read()
 
-packages = [
-    "pyparsing",
-]
+packages = ["pyparsing", "pyparsing.diagram"]
 
 setup(  # Distribution meta-data
     name="pyparsing",
@@ -39,6 +37,7 @@ setup(  # Distribution meta-data
     packages=packages,
     python_requires=">=3.5",
     extras_require={"diagrams": ["railroad-diagrams", "jinja2"],},
+    package_data={"pyparsing.diagram": ["*.jinja2"]},
     classifiers=[
         "Development Status :: 5 - Production/Stable",
         "Intended Audience :: Developers",
author	Michael Milton <ttmigueltt@gmail.com>	2020-06-23 03:31:59 +1000
committer	GitHub <noreply@github.com>	2020-06-22 12:31:59 -0500
commit	2d1163471e0bef2c527e5f959d78dd1605a51183 (patch)
tree	b6a0a6cd1bb8b4371c964a192211435770aafa38
parent	6fc0f329eec924c760877eaddb0abb164a861267 (diff)
download	pyparsing-git-2d1163471e0bef2c527e5f959d78dd1605a51183.tar.gz