summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Milton <ttmigueltt@gmail.com>2020-06-01 03:34:41 +1000
committerGitHub <noreply@github.com>2020-05-31 12:34:41 -0500
commit58c171bb5077f615dc36fc55f470a462e56da891 (patch)
tree9aa19d80ccfa85195c68064900b89ccf972eca04
parent2952e92bcc4990580dee6f1d83b591700bc1fdc3 (diff)
downloadpyparsing-git-58c171bb5077f615dc36fc55f470a462e56da891.tar.gz
Railroad Diagrams (#218)
* Basic framework * Initial effort * Clean up and document code * jinja newline * Pre-commit, and add extras to tox * We can't use the class type-annotations syntax in Python 3.5
-rw-r--r--pyparsing/diagram/__init__.py161
-rw-r--r--pyparsing/diagram/template.jinja210
-rw-r--r--setup.py1
-rw-r--r--tests/test_diagram.py38
-rw-r--r--tox.ini1
5 files changed, 211 insertions, 0 deletions
diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py
new file mode 100644
index 0000000..4721c78
--- /dev/null
+++ b/pyparsing/diagram/__init__.py
@@ -0,0 +1,161 @@
+import railroad
+import pyparsing
+from pkg_resources import resource_filename
+import typing
+from jinja2 import Template
+from io import StringIO
+
+with open(resource_filename(__name__, "template.jinja2"), encoding="utf-8") as fp:
+ template = Template(fp.read())
+
+# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
+NamedDiagram = typing.NamedTuple(
+ "NamedDiagram", [("name", str), ("diagram", typing.Optional[railroad.DiagramItem])]
+)
+"""
+A simple structure for associating a name with a railroad diagram
+"""
+
+
+def get_name(element: pyparsing.ParserElement, default: str = None) -> str:
+ """
+ Returns a human readable string for a parser element. By default it will first check the element's `name` attribute
+ for a user-defined string, and will fall back to the element type name if this doesn't exist. However, the fallback
+ value can be customized
+ """
+ # return str(element)
+ if default is None:
+ default = element.__class__.__name__
+
+ return getattr(element, "name", default)
+
+
+def railroad_to_html(diagrams: typing.List[NamedDiagram]) -> str:
+ """
+ Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
+ """
+ data = []
+ for diagram in diagrams:
+ io = StringIO()
+ diagram.diagram.writeSvg(io.write)
+ data.append({"title": diagram.name, "text": "", "svg": io.getvalue()})
+
+ return template.render(diagrams=data)
+
+
+def to_railroad(element: pyparsing.ParserElement) -> typing.List[NamedDiagram]:
+ """
+ Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
+ creation if you want to access the Railroad tree before it is converted to HTML
+ """
+ diagram_element, subdiagrams = _to_diagram_element(element)
+ diagram = NamedDiagram(
+ get_name(element, "Grammar"), railroad.Diagram(diagram_element)
+ )
+ return [diagram, *subdiagrams.values()]
+
+
+def _should_vertical(specification: typing.Tuple[int, bool], count: int) -> bool:
+ """
+ Returns true if we should return a vertical list of elements
+ """
+ if isinstance(specification, bool):
+ return specification
+ elif isinstance(specification, int):
+ return count >= specification
+ else:
+ raise Exception()
+
+
+def _to_diagram_element(
+ element: pyparsing.ParserElement,
+ diagrams=None,
+ vertical: typing.Union[int, bool] = 5,
+) -> typing.Tuple[railroad.DiagramItem, typing.Dict[int, NamedDiagram]]:
+ """
+ Recursively converts a PyParsing Element to a railroad Element
+ :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
+ it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
+ do so
+ :returns: A tuple, where the first item is the converted version of the input element, and the second item is a
+ list of extra diagrams that also need to be displayed in order to represent recursive grammars
+ """
+ if diagrams is None:
+ diagrams = {}
+ else:
+ # We don't want to be modifying the parent's version of the dict, although we do use it as a foundation
+ diagrams = diagrams.copy()
+
+ # Convert the nebulous list of child elements into a single list objects for easy use
+ if hasattr(element, "exprs"):
+ exprs = element.exprs
+ elif hasattr(element, "expr"):
+ exprs = [element.expr]
+ else:
+ exprs = []
+
+ name = get_name(element)
+
+ if isinstance(element, pyparsing.Forward):
+ # If we encounter a forward reference, we have to split the diagram in two and return a new diagram which
+ # represents the forward reference on its own
+
+ # Python's id() is used to provide a unique identifier for elements
+ el_id = id(element)
+ if el_id in diagrams:
+ name = diagrams[el_id].name
+ else:
+ # If the Forward has no real name, we name it Group N to at least make it unique
+ count = len(diagrams) + 1
+ name = get_name(element, "Group {}".format(count))
+ # We have to first put in a placeholder so that, if we encounter this element deeper down in the tree,
+ # we won't have an infinite loop
+ diagrams[el_id] = NamedDiagram(name=name, diagram=None)
+
+ # At this point we create a new subdiagram, and add it to the dictionary of diagrams
+ forward_element, forward_diagrams = _to_diagram_element(exprs[0], diagrams)
+ diagram = railroad.Diagram(forward_element)
+ diagrams.update(forward_diagrams)
+ diagrams[el_id] = diagrams[el_id]._replace(diagram=diagram)
+ diagram.format(20)
+
+ # Here we just use the element's name as a placeholder for the recursive grammar which is defined separately
+ ret = railroad.NonTerminal(text=name)
+ else:
+ # If we don't encounter a Forward, we can continue to recurse into the tree
+
+ # Recursively convert child elements
+ children = []
+ for expr in exprs:
+ item, subdiagrams = _to_diagram_element(expr, diagrams)
+ children.append(item)
+ diagrams.update(subdiagrams)
+
+ # Here we find the most relevant Railroad element for matching pyparsing Element
+ if isinstance(element, pyparsing.And):
+ if _should_vertical(vertical, len(children)):
+ ret = railroad.Stack(*children)
+ else:
+ ret = railroad.Sequence(*children)
+ elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
+ if _should_vertical(vertical, len(children)):
+ ret = railroad.HorizontalChoice(*children)
+ else:
+ ret = railroad.Choice(0, *children)
+ elif isinstance(element, pyparsing.Optional):
+ ret = railroad.Optional(children[0])
+ elif isinstance(element, pyparsing.OneOrMore):
+ ret = railroad.OneOrMore(children[0])
+ elif isinstance(element, pyparsing.ZeroOrMore):
+ ret = railroad.ZeroOrMore(children[0])
+ elif isinstance(element, pyparsing.Group):
+ # Generally there isn't any merit in labelling a group as a group if it doesn't have a custom name
+ ret = railroad.Group(children[0], label=get_name(element, ""))
+ elif len(exprs) > 1:
+ ret = railroad.Sequence(children[0])
+ elif len(exprs) > 0:
+ ret = railroad.Group(children[0], label=name)
+ else:
+ ret = railroad.Terminal(name)
+
+ return ret, diagrams
diff --git a/pyparsing/diagram/template.jinja2 b/pyparsing/diagram/template.jinja2
new file mode 100644
index 0000000..0f62426
--- /dev/null
+++ b/pyparsing/diagram/template.jinja2
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html>
+<body>
+{% for diagram in diagrams %}
+ <h1>{{ diagram.title }}</h1>
+ <div>{{ diagram.text }}</div>
+ {{ diagram.svg }}
+{% endfor %}
+</body>
+</html>
diff --git a/setup.py b/setup.py
index 910831f..b8208fd 100644
--- a/setup.py
+++ b/setup.py
@@ -31,6 +31,7 @@ setup( # Distribution meta-data
license="MIT License",
packages=packages,
python_requires=">=3.5",
+ extras_require={"diagrams": ["railroad-diagrams", "jinja2"],},
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
diff --git a/tests/test_diagram.py b/tests/test_diagram.py
new file mode 100644
index 0000000..843228b
--- /dev/null
+++ b/tests/test_diagram.py
@@ -0,0 +1,38 @@
+import unittest
+from examples.jsonParser import jsonObject
+from examples.simpleBool import boolExpr
+from pyparsing.diagram import to_railroad, railroad_to_html
+import tempfile
+import os
+
+
+class TestRailroadDiagrams(unittest.TestCase):
+ def railroad_debug(self) -> bool:
+ """
+ Returns True if we're in debug mode
+ """
+ return os.environ.get("RAILROAD_DEBUG", False)
+
+ def get_temp(self):
+ """
+ Returns an appropriate temporary file for writing a railroad diagram
+ """
+ return tempfile.NamedTemporaryFile(
+ delete=not self.railroad_debug(), mode="w", encoding="utf-8", suffix=".html"
+ )
+
+ def test_bool_expr(self):
+ with self.get_temp() as temp:
+ railroad = to_railroad(boolExpr)
+ temp.write(railroad_to_html(railroad))
+
+ if self.railroad_debug():
+ print(temp.name)
+
+ def test_json(self):
+ with self.get_temp() as temp:
+ railroad = to_railroad(jsonObject)
+ temp.write(railroad_to_html(railroad))
+
+ if self.railroad_debug():
+ print(temp.name)
diff --git a/tox.ini b/tox.ini
index 5bf8404..b227547 100644
--- a/tox.ini
+++ b/tox.ini
@@ -5,6 +5,7 @@ envlist =
[testenv]
deps=coverage
+extras=diagrams
commands=
coverage run --parallel --branch -m unittest