diff options
author | Michael Milton <ttmigueltt@gmail.com> | 2020-06-01 03:34:41 +1000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-31 12:34:41 -0500 |
commit | 58c171bb5077f615dc36fc55f470a462e56da891 (patch) | |
tree | 9aa19d80ccfa85195c68064900b89ccf972eca04 | |
parent | 2952e92bcc4990580dee6f1d83b591700bc1fdc3 (diff) | |
download | pyparsing-git-58c171bb5077f615dc36fc55f470a462e56da891.tar.gz |
Railroad Diagrams (#218)
* Basic framework
* Initial effort
* Clean up and document code
* jinja newline
* Pre-commit, and add extras to tox
* We can't use the class type-annotations syntax in Python 3.5
-rw-r--r-- | pyparsing/diagram/__init__.py | 161 | ||||
-rw-r--r-- | pyparsing/diagram/template.jinja2 | 10 | ||||
-rw-r--r-- | setup.py | 1 | ||||
-rw-r--r-- | tests/test_diagram.py | 38 | ||||
-rw-r--r-- | tox.ini | 1 |
5 files changed, 211 insertions, 0 deletions
diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py new file mode 100644 index 0000000..4721c78 --- /dev/null +++ b/pyparsing/diagram/__init__.py @@ -0,0 +1,161 @@ +import railroad +import pyparsing +from pkg_resources import resource_filename +import typing +from jinja2 import Template +from io import StringIO + +with open(resource_filename(__name__, "template.jinja2"), encoding="utf-8") as fp: + template = Template(fp.read()) + +# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet +NamedDiagram = typing.NamedTuple( + "NamedDiagram", [("name", str), ("diagram", typing.Optional[railroad.DiagramItem])] +) +""" +A simple structure for associating a name with a railroad diagram +""" + + +def get_name(element: pyparsing.ParserElement, default: str = None) -> str: + """ + Returns a human readable string for a parser element. By default it will first check the element's `name` attribute + for a user-defined string, and will fall back to the element type name if this doesn't exist. However, the fallback + value can be customized + """ + # return str(element) + if default is None: + default = element.__class__.__name__ + + return getattr(element, "name", default) + + +def railroad_to_html(diagrams: typing.List[NamedDiagram]) -> str: + """ + Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams + """ + data = [] + for diagram in diagrams: + io = StringIO() + diagram.diagram.writeSvg(io.write) + data.append({"title": diagram.name, "text": "", "svg": io.getvalue()}) + + return template.render(diagrams=data) + + +def to_railroad(element: pyparsing.ParserElement) -> typing.List[NamedDiagram]: + """ + Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram + creation if you want to access the Railroad tree before it is converted to HTML + """ + diagram_element, subdiagrams = _to_diagram_element(element) + diagram = NamedDiagram( + get_name(element, "Grammar"), railroad.Diagram(diagram_element) + ) + return [diagram, *subdiagrams.values()] + + +def _should_vertical(specification: typing.Tuple[int, bool], count: int) -> bool: + """ + Returns true if we should return a vertical list of elements + """ + if isinstance(specification, bool): + return specification + elif isinstance(specification, int): + return count >= specification + else: + raise Exception() + + +def _to_diagram_element( + element: pyparsing.ParserElement, + diagrams=None, + vertical: typing.Union[int, bool] = 5, +) -> typing.Tuple[railroad.DiagramItem, typing.Dict[int, NamedDiagram]]: + """ + Recursively converts a PyParsing Element to a railroad Element + :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default), + it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never + do so + :returns: A tuple, where the first item is the converted version of the input element, and the second item is a + list of extra diagrams that also need to be displayed in order to represent recursive grammars + """ + if diagrams is None: + diagrams = {} + else: + # We don't want to be modifying the parent's version of the dict, although we do use it as a foundation + diagrams = diagrams.copy() + + # Convert the nebulous list of child elements into a single list objects for easy use + if hasattr(element, "exprs"): + exprs = element.exprs + elif hasattr(element, "expr"): + exprs = [element.expr] + else: + exprs = [] + + name = get_name(element) + + if isinstance(element, pyparsing.Forward): + # If we encounter a forward reference, we have to split the diagram in two and return a new diagram which + # represents the forward reference on its own + + # Python's id() is used to provide a unique identifier for elements + el_id = id(element) + if el_id in diagrams: + name = diagrams[el_id].name + else: + # If the Forward has no real name, we name it Group N to at least make it unique + count = len(diagrams) + 1 + name = get_name(element, "Group {}".format(count)) + # We have to first put in a placeholder so that, if we encounter this element deeper down in the tree, + # we won't have an infinite loop + diagrams[el_id] = NamedDiagram(name=name, diagram=None) + + # At this point we create a new subdiagram, and add it to the dictionary of diagrams + forward_element, forward_diagrams = _to_diagram_element(exprs[0], diagrams) + diagram = railroad.Diagram(forward_element) + diagrams.update(forward_diagrams) + diagrams[el_id] = diagrams[el_id]._replace(diagram=diagram) + diagram.format(20) + + # Here we just use the element's name as a placeholder for the recursive grammar which is defined separately + ret = railroad.NonTerminal(text=name) + else: + # If we don't encounter a Forward, we can continue to recurse into the tree + + # Recursively convert child elements + children = [] + for expr in exprs: + item, subdiagrams = _to_diagram_element(expr, diagrams) + children.append(item) + diagrams.update(subdiagrams) + + # Here we find the most relevant Railroad element for matching pyparsing Element + if isinstance(element, pyparsing.And): + if _should_vertical(vertical, len(children)): + ret = railroad.Stack(*children) + else: + ret = railroad.Sequence(*children) + elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)): + if _should_vertical(vertical, len(children)): + ret = railroad.HorizontalChoice(*children) + else: + ret = railroad.Choice(0, *children) + elif isinstance(element, pyparsing.Optional): + ret = railroad.Optional(children[0]) + elif isinstance(element, pyparsing.OneOrMore): + ret = railroad.OneOrMore(children[0]) + elif isinstance(element, pyparsing.ZeroOrMore): + ret = railroad.ZeroOrMore(children[0]) + elif isinstance(element, pyparsing.Group): + # Generally there isn't any merit in labelling a group as a group if it doesn't have a custom name + ret = railroad.Group(children[0], label=get_name(element, "")) + elif len(exprs) > 1: + ret = railroad.Sequence(children[0]) + elif len(exprs) > 0: + ret = railroad.Group(children[0], label=name) + else: + ret = railroad.Terminal(name) + + return ret, diagrams diff --git a/pyparsing/diagram/template.jinja2 b/pyparsing/diagram/template.jinja2 new file mode 100644 index 0000000..0f62426 --- /dev/null +++ b/pyparsing/diagram/template.jinja2 @@ -0,0 +1,10 @@ +<!DOCTYPE html> +<html> +<body> +{% for diagram in diagrams %} + <h1>{{ diagram.title }}</h1> + <div>{{ diagram.text }}</div> + {{ diagram.svg }} +{% endfor %} +</body> +</html> @@ -31,6 +31,7 @@ setup( # Distribution meta-data license="MIT License", packages=packages, python_requires=">=3.5", + extras_require={"diagrams": ["railroad-diagrams", "jinja2"],}, classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", diff --git a/tests/test_diagram.py b/tests/test_diagram.py new file mode 100644 index 0000000..843228b --- /dev/null +++ b/tests/test_diagram.py @@ -0,0 +1,38 @@ +import unittest +from examples.jsonParser import jsonObject +from examples.simpleBool import boolExpr +from pyparsing.diagram import to_railroad, railroad_to_html +import tempfile +import os + + +class TestRailroadDiagrams(unittest.TestCase): + def railroad_debug(self) -> bool: + """ + Returns True if we're in debug mode + """ + return os.environ.get("RAILROAD_DEBUG", False) + + def get_temp(self): + """ + Returns an appropriate temporary file for writing a railroad diagram + """ + return tempfile.NamedTemporaryFile( + delete=not self.railroad_debug(), mode="w", encoding="utf-8", suffix=".html" + ) + + def test_bool_expr(self): + with self.get_temp() as temp: + railroad = to_railroad(boolExpr) + temp.write(railroad_to_html(railroad)) + + if self.railroad_debug(): + print(temp.name) + + def test_json(self): + with self.get_temp() as temp: + railroad = to_railroad(jsonObject) + temp.write(railroad_to_html(railroad)) + + if self.railroad_debug(): + print(temp.name) @@ -5,6 +5,7 @@ envlist = [testenv] deps=coverage +extras=diagrams commands= coverage run --parallel --branch -m unittest |