Railroad Diagrams (#218)

* Basic framework * Initial effort * Clean up and document code * jinja newline * Pre-commit, and add extras to tox * We can't use the class type-annotations syntax in Python 3.5
author: Michael Milton <ttmigueltt@gmail.com> 2020-06-01 03:34:41 +1000
committer: GitHub <noreply@github.com> 2020-05-31 12:34:41 -0500
commit: 58c171bb5077f615dc36fc55f470a462e56da891 (patch)
tree: 9aa19d80ccfa85195c68064900b89ccf972eca04
parent: 2952e92bcc4990580dee6f1d83b591700bc1fdc3 (diff)
download: pyparsing-git-58c171bb5077f615dc36fc55f470a462e56da891.tar.gz
5 files changed, 211 insertions, 0 deletions
diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py
new file mode 100644
index 0000000..4721c78
--- /dev/null
+++ b/pyparsing/diagram/__init__.py
@@ -0,0 +1,161 @@
+import railroad
+import pyparsing
+from pkg_resources import resource_filename
+import typing
+from jinja2 import Template
+from io import StringIO
+
+with open(resource_filename(__name__, "template.jinja2"), encoding="utf-8") as fp:
+    template = Template(fp.read())
+
+# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
+NamedDiagram = typing.NamedTuple(
+    "NamedDiagram", [("name", str), ("diagram", typing.Optional[railroad.DiagramItem])]
+)
+"""
+A simple structure for associating a name with a railroad diagram
+"""
+
+
+def get_name(element: pyparsing.ParserElement, default: str = None) -> str:
+    """
+    Returns a human readable string for a parser element. By default it will first check the element's `name` attribute
+    for a user-defined string, and will fall back to the element type name if this doesn't exist. However, the fallback
+    value can be customized
+    """
+    # return str(element)
+    if default is None:
+        default = element.__class__.__name__
+
+    return getattr(element, "name", default)
+
+
+def railroad_to_html(diagrams: typing.List[NamedDiagram]) -> str:
+    """
+    Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
+    """
+    data = []
+    for diagram in diagrams:
+        io = StringIO()
+        diagram.diagram.writeSvg(io.write)
+        data.append({"title": diagram.name, "text": "", "svg": io.getvalue()})
+
+    return template.render(diagrams=data)
+
+
+def to_railroad(element: pyparsing.ParserElement) -> typing.List[NamedDiagram]:
+    """
+    Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
+    creation if you want to access the Railroad tree before it is converted to HTML
+    """
+    diagram_element, subdiagrams = _to_diagram_element(element)
+    diagram = NamedDiagram(
+        get_name(element, "Grammar"), railroad.Diagram(diagram_element)
+    )
+    return [diagram, *subdiagrams.values()]
+
+
+def _should_vertical(specification: typing.Tuple[int, bool], count: int) -> bool:
+    """
+    Returns true if we should return a vertical list of elements
+    """
+    if isinstance(specification, bool):
+        return specification
+    elif isinstance(specification, int):
+        return count >= specification
+    else:
+        raise Exception()
+
+
+def _to_diagram_element(
+    element: pyparsing.ParserElement,
+    diagrams=None,
+    vertical: typing.Union[int, bool] = 5,
+) -> typing.Tuple[railroad.DiagramItem, typing.Dict[int, NamedDiagram]]:
+    """
+    Recursively converts a PyParsing Element to a railroad Element
+    :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
+    it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
+    do so
+    :returns: A tuple, where the first item is the converted version of the input element, and the second item is a
+    list of extra diagrams that also need to be displayed in order to represent recursive grammars
+    """
+    if diagrams is None:
+        diagrams = {}
+    else:
+        # We don't want to be modifying the parent's version of the dict, although we do use it as a foundation
+        diagrams = diagrams.copy()
+
+    # Convert the nebulous list of child elements into a single list objects for easy use
+    if hasattr(element, "exprs"):
+        exprs = element.exprs
+    elif hasattr(element, "expr"):
+        exprs = [element.expr]
+    else:
+        exprs = []
+
+    name = get_name(element)
+
+    if isinstance(element, pyparsing.Forward):
+        # If we encounter a forward reference, we have to split the diagram in two and return a new diagram which
+        # represents the forward reference on its own
+
+        # Python's id() is used to provide a unique identifier for elements
+        el_id = id(element)
+        if el_id in diagrams:
+            name = diagrams[el_id].name
+        else:
+            # If the Forward has no real name, we name it Group N to at least make it unique
+            count = len(diagrams) + 1
+            name = get_name(element, "Group {}".format(count))
+            # We have to first put in a placeholder so that, if we encounter this element deeper down in the tree,
+            # we won't have an infinite loop
+            diagrams[el_id] = NamedDiagram(name=name, diagram=None)
+
+            # At this point we create a new subdiagram, and add it to the dictionary of diagrams
+            forward_element, forward_diagrams = _to_diagram_element(exprs[0], diagrams)
+            diagram = railroad.Diagram(forward_element)
+            diagrams.update(forward_diagrams)
+            diagrams[el_id] = diagrams[el_id]._replace(diagram=diagram)
+            diagram.format(20)
+
+        # Here we just use the element's name as a placeholder for the recursive grammar which is defined separately
+        ret = railroad.NonTerminal(text=name)
+    else:
+        # If we don't encounter a Forward, we can continue to recurse into the tree
+
+        # Recursively convert child elements
+        children = []
+        for expr in exprs:
+            item, subdiagrams = _to_diagram_element(expr, diagrams)
+            children.append(item)
+            diagrams.update(subdiagrams)
+
+        # Here we find the most relevant Railroad element for matching pyparsing Element
+        if isinstance(element, pyparsing.And):
+            if _should_vertical(vertical, len(children)):
+                ret = railroad.Stack(*children)
+            else:
+                ret = railroad.Sequence(*children)
+        elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
+            if _should_vertical(vertical, len(children)):
+                ret = railroad.HorizontalChoice(*children)
+            else:
+                ret = railroad.Choice(0, *children)
+        elif isinstance(element, pyparsing.Optional):
+            ret = railroad.Optional(children[0])
+        elif isinstance(element, pyparsing.OneOrMore):
+            ret = railroad.OneOrMore(children[0])
+        elif isinstance(element, pyparsing.ZeroOrMore):
+            ret = railroad.ZeroOrMore(children[0])
+        elif isinstance(element, pyparsing.Group):
+            # Generally there isn't any merit in labelling a group as a group if it doesn't have a custom name
+            ret = railroad.Group(children[0], label=get_name(element, ""))
+        elif len(exprs) > 1:
+            ret = railroad.Sequence(children[0])
+        elif len(exprs) > 0:
+            ret = railroad.Group(children[0], label=name)
+        else:
+            ret = railroad.Terminal(name)
+
+    return ret, diagrams
diff --git a/pyparsing/diagram/template.jinja2 b/pyparsing/diagram/template.jinja2
new file mode 100644
index 0000000..0f62426
--- /dev/null
+++ b/pyparsing/diagram/template.jinja2
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html>
+<body>
+{% for diagram in diagrams %}
+    <h1>{{ diagram.title }}</h1>
+    <div>{{ diagram.text }}</div>
+    {{ diagram.svg }}
+{% endfor %}
+</body>
+</html>
diff --git a/setup.py b/setup.py
index 910831f..b8208fd 100644
--- a/setup.py
+++ b/setup.py
@@ -31,6 +31,7 @@ setup(  # Distribution meta-data
     license="MIT License",
     packages=packages,
     python_requires=">=3.5",
+    extras_require={"diagrams": ["railroad-diagrams", "jinja2"],},
     classifiers=[
         "Development Status :: 5 - Production/Stable",
         "Intended Audience :: Developers",
diff --git a/tests/test_diagram.py b/tests/test_diagram.py
new file mode 100644
index 0000000..843228b
--- /dev/null
+++ b/tests/test_diagram.py
@@ -0,0 +1,38 @@
+import unittest
+from examples.jsonParser import jsonObject
+from examples.simpleBool import boolExpr
+from pyparsing.diagram import to_railroad, railroad_to_html
+import tempfile
+import os
+
+
+class TestRailroadDiagrams(unittest.TestCase):
+    def railroad_debug(self) -> bool:
+        """
+        Returns True if we're in debug mode
+        """
+        return os.environ.get("RAILROAD_DEBUG", False)
+
+    def get_temp(self):
+        """
+        Returns an appropriate temporary file for writing a railroad diagram
+        """
+        return tempfile.NamedTemporaryFile(
+            delete=not self.railroad_debug(), mode="w", encoding="utf-8", suffix=".html"
+        )
+
+    def test_bool_expr(self):
+        with self.get_temp() as temp:
+            railroad = to_railroad(boolExpr)
+            temp.write(railroad_to_html(railroad))
+
+            if self.railroad_debug():
+                print(temp.name)
+
+    def test_json(self):
+        with self.get_temp() as temp:
+            railroad = to_railroad(jsonObject)
+            temp.write(railroad_to_html(railroad))
+
+            if self.railroad_debug():
+                print(temp.name)
diff --git a/tox.ini b/tox.ini
index 5bf8404..b227547 100644
--- a/tox.ini
+++ b/tox.ini
@@ -5,6 +5,7 @@ envlist =
 
 [testenv]
 deps=coverage
+extras=diagrams
 commands=
     coverage run --parallel --branch -m unittest
author	Michael Milton <ttmigueltt@gmail.com>	2020-06-01 03:34:41 +1000
committer	GitHub <noreply@github.com>	2020-05-31 12:34:41 -0500
commit	58c171bb5077f615dc36fc55f470a462e56da891 (patch)
tree	9aa19d80ccfa85195c68064900b89ccf972eca04
parent	2952e92bcc4990580dee6f1d83b591700bc1fdc3 (diff)
download	pyparsing-git-58c171bb5077f615dc36fc55f470a462e56da891.tar.gz