summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorMichael White <mikewhite22@yahoo.com>2017-02-02 20:02:20 -0800
committerEli Bendersky <eliben@users.noreply.github.com>2017-02-02 20:02:20 -0800
commit18c284431f7a50ade94950970a67d4afbf6b9084 (patch)
tree2d412c2aecfdae782c27772e426187d896ee3748 /examples
parent6d45ff70f30ba8a5be8fd7aa8ab020360a8f9e9d (diff)
downloadpycparser-18c284431f7a50ade94950970a67d4afbf6b9084.tar.gz
dump and load as json (#163)
* ast to json working * Now roundtrippable * Serialize all attrs to json. Handle coords attr which was silently dropped previously. * Documentation and comment fixes. * Minor comment tweak.
Diffstat (limited to 'examples')
-rw-r--r--examples/c_json.py203
1 files changed, 203 insertions, 0 deletions
diff --git a/examples/c_json.py b/examples/c_json.py
new file mode 100644
index 0000000..3bfef5b
--- /dev/null
+++ b/examples/c_json.py
@@ -0,0 +1,203 @@
+#------------------------------------------------------------------------------
+# pycparser: c_json.py
+#
+# by Michael White (@mypalmike)
+#
+# This example includes functions to serialize and deserialize an ast
+# to and from json format. Serializing involves walking the ast and converting
+# each node from a python Node object into a python dict. Deserializing
+# involves the opposite conversion, walking the tree formed by the
+# dict and converting each dict into the specific Node object it represents.
+# The dict itself is serialized and deserialized using the python json module.
+#
+# The dict representation is a fairly direct transformation of the object
+# attributes. Each node in the dict gets one metadata field referring to the
+# specific node class name, _nodetype. Each local attribute (i.e. not linking
+# to child nodes) has a string value or array of string values. Each child
+# attribute is either another dict or an array of dicts, exactly as in the
+# Node object representation. The "coord" attribute, representing the
+# node's location within the source code, is serialized/deserialized from
+# a Coord object into a string of the format "filename:line[:column]".
+#
+# Example TypeDecl node, with IdentifierType child node, represented as a dict:
+# "type": {
+# "_nodetype": "TypeDecl",
+# "coord": "c_files/funky.c:8",
+# "declname": "o",
+# "quals": [],
+# "type": {
+# "_nodetype": "IdentifierType",
+# "coord": "c_files/funky.c:8",
+# "names": [
+# "char"
+# ]
+# }
+# }
+#------------------------------------------------------------------------------
+from __future__ import print_function
+
+import json
+import sys
+import re
+
+# This is not required if you've installed pycparser into
+# your site-packages/ with setup.py
+#
+sys.path.extend(['.', '..'])
+
+from pycparser import parse_file, c_ast
+from pycparser.plyparser import Coord
+
+
+RE_CHILD_ARRAY = re.compile('(.*)\[(.*)\]')
+RE_INTERNAL_ATTR = re.compile('__.*__')
+
+
+class CJsonError(Exception):
+ pass
+
+
+def memodict(fn):
+ """ Fast memoization decorator for a function taking a single argument """
+ class memodict(dict):
+ def __missing__(self, key):
+ ret = self[key] = fn(key)
+ return ret
+ return memodict().__getitem__
+
+
+@memodict
+def child_attrs_of(klass):
+ """
+ Given a Node class, get a set of child attrs.
+ Memoized to avoid highly repetitive string manipulation
+
+ """
+ non_child_attrs = set(klass.attr_names)
+ all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)])
+ return all_attrs - non_child_attrs
+
+
+def to_dict(node):
+ """ Recursively convert an ast into dict representation. """
+ klass = node.__class__
+
+ result = {}
+
+ # Metadata
+ result['_nodetype'] = klass.__name__
+
+ # Local node attributes
+ for attr in klass.attr_names:
+ result[attr] = getattr(node, attr)
+
+ # Coord object
+ if node.coord:
+ result['coord'] = str(node.coord)
+ else:
+ result['coord'] = None
+
+ # Child attributes
+ for child_name, child in node.children():
+ # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]')
+ match = RE_CHILD_ARRAY.match(child_name)
+ if match:
+ array_name, array_index = match.groups()
+ array_index = int(array_index)
+ # arrays come in order, so we verify and append.
+ result[array_name] = result.get(array_name, [])
+ if array_index != len(result[array_name]):
+ raise CJsonError('Internal ast error. Array {} out of order. '
+ 'Expected index {}, got {}'.format(
+ array_name, len(result[array_name]), array_index))
+ result[array_name].append(to_dict(child))
+ else:
+ result[child_name] = to_dict(child)
+
+ # Any child attributes that were missing need "None" values in the json.
+ for child_attr in child_attrs_of(klass):
+ if child_attr not in result:
+ result[child_attr] = None
+
+ return result
+
+
+def to_json(node, **kwargs):
+ """ Convert ast node to json string """
+ return json.dumps(to_dict(node), **kwargs)
+
+
+def file_to_dict(filename):
+ """ Load C file into dict representation of ast """
+ ast = parse_file(filename, use_cpp=True)
+ return to_dict(ast)
+
+
+def file_to_json(filename, **kwargs):
+ """ Load C file into json string representation of ast """
+ ast = parse_file(filename, use_cpp=True)
+ return to_json(ast, **kwargs)
+
+
+def _parse_coord(coord_str):
+ """ Parse coord string (file:line[:column]) into Coord object. """
+ if coord_str is None:
+ return None
+
+ vals = coord_str.split(':')
+ vals.extend([None] * 3)
+ filename, line, column = vals[:3]
+ return Coord(filename, line, column)
+
+
+def _convert_to_obj(value):
+ """
+ Convert an object in the dict representation into an object.
+ Note: Mutually recursive with from_dict.
+
+ """
+ value_type = type(value)
+ if value_type == dict:
+ return from_dict(value)
+ elif value_type == list:
+ return [_convert_to_obj(item) for item in value]
+ else:
+ # String
+ return value
+
+
+def from_dict(node_dict):
+ """ Recursively build an ast from dict representation """
+ class_name = node_dict.pop('_nodetype')
+
+ klass = getattr(c_ast, class_name)
+
+ # Create a new dict containing the key-value pairs which we can pass
+ # to node constructors.
+ objs = {}
+ for key, value in node_dict.items():
+ if key == 'coord':
+ objs[key] = _parse_coord(value)
+ else:
+ objs[key] = _convert_to_obj(value)
+
+ # Use keyword parameters, which works thanks to beautifully consistent
+ # ast Node initializers.
+ return klass(**objs)
+
+
+def from_json(ast_json):
+ """ Build an ast from json string representation """
+ return from_dict(json.loads(ast_json))
+
+
+#------------------------------------------------------------------------------
+if __name__ == "__main__":
+ if len(sys.argv) > 1:
+ # Some test code...
+ # Do trip from C -> ast -> dict -> ast -> json, then print.
+ ast_dict = file_to_dict(sys.argv[1])
+ ast = from_dict(ast_dict)
+ print(to_json(ast, sort_keys=True, indent=4))
+ else:
+ print("Please provide a filename as argument")