diff options
author | Michael White <mikewhite22@yahoo.com> | 2017-02-02 20:02:20 -0800 |
---|---|---|
committer | Eli Bendersky <eliben@users.noreply.github.com> | 2017-02-02 20:02:20 -0800 |
commit | 18c284431f7a50ade94950970a67d4afbf6b9084 (patch) | |
tree | 2d412c2aecfdae782c27772e426187d896ee3748 /examples | |
parent | 6d45ff70f30ba8a5be8fd7aa8ab020360a8f9e9d (diff) | |
download | pycparser-18c284431f7a50ade94950970a67d4afbf6b9084.tar.gz |
dump and load as json (#163)
* ast to json working
* Now roundtrippable
* Serialize all attrs to json. Handle coords attr which was silently dropped previously.
* Documentation and comment fixes.
* Minor comment tweak.
Diffstat (limited to 'examples')
-rw-r--r-- | examples/c_json.py | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/examples/c_json.py b/examples/c_json.py new file mode 100644 index 0000000..3bfef5b --- /dev/null +++ b/examples/c_json.py @@ -0,0 +1,203 @@ +#------------------------------------------------------------------------------ +# pycparser: c_json.py +# +# by Michael White (@mypalmike) +# +# This example includes functions to serialize and deserialize an ast +# to and from json format. Serializing involves walking the ast and converting +# each node from a python Node object into a python dict. Deserializing +# involves the opposite conversion, walking the tree formed by the +# dict and converting each dict into the specific Node object it represents. +# The dict itself is serialized and deserialized using the python json module. +# +# The dict representation is a fairly direct transformation of the object +# attributes. Each node in the dict gets one metadata field referring to the +# specific node class name, _nodetype. Each local attribute (i.e. not linking +# to child nodes) has a string value or array of string values. Each child +# attribute is either another dict or an array of dicts, exactly as in the +# Node object representation. The "coord" attribute, representing the +# node's location within the source code, is serialized/deserialized from +# a Coord object into a string of the format "filename:line[:column]". +# +# Example TypeDecl node, with IdentifierType child node, represented as a dict: +# "type": { +# "_nodetype": "TypeDecl", +# "coord": "c_files/funky.c:8", +# "declname": "o", +# "quals": [], +# "type": { +# "_nodetype": "IdentifierType", +# "coord": "c_files/funky.c:8", +# "names": [ +# "char" +# ] +# } +# } +#------------------------------------------------------------------------------ +from __future__ import print_function + +import json +import sys +import re + +# This is not required if you've installed pycparser into +# your site-packages/ with setup.py +# +sys.path.extend(['.', '..']) + +from pycparser import parse_file, c_ast +from pycparser.plyparser import Coord + + +RE_CHILD_ARRAY = re.compile('(.*)\[(.*)\]') +RE_INTERNAL_ATTR = re.compile('__.*__') + + +class CJsonError(Exception): + pass + + +def memodict(fn): + """ Fast memoization decorator for a function taking a single argument """ + class memodict(dict): + def __missing__(self, key): + ret = self[key] = fn(key) + return ret + return memodict().__getitem__ + + +@memodict +def child_attrs_of(klass): + """ + Given a Node class, get a set of child attrs. + Memoized to avoid highly repetitive string manipulation + + """ + non_child_attrs = set(klass.attr_names) + all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)]) + return all_attrs - non_child_attrs + + +def to_dict(node): + """ Recursively convert an ast into dict representation. """ + klass = node.__class__ + + result = {} + + # Metadata + result['_nodetype'] = klass.__name__ + + # Local node attributes + for attr in klass.attr_names: + result[attr] = getattr(node, attr) + + # Coord object + if node.coord: + result['coord'] = str(node.coord) + else: + result['coord'] = None + + # Child attributes + for child_name, child in node.children(): + # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]') + match = RE_CHILD_ARRAY.match(child_name) + if match: + array_name, array_index = match.groups() + array_index = int(array_index) + # arrays come in order, so we verify and append. + result[array_name] = result.get(array_name, []) + if array_index != len(result[array_name]): + raise CJsonError('Internal ast error. Array {} out of order. ' + 'Expected index {}, got {}'.format( + array_name, len(result[array_name]), array_index)) + result[array_name].append(to_dict(child)) + else: + result[child_name] = to_dict(child) + + # Any child attributes that were missing need "None" values in the json. + for child_attr in child_attrs_of(klass): + if child_attr not in result: + result[child_attr] = None + + return result + + +def to_json(node, **kwargs): + """ Convert ast node to json string """ + return json.dumps(to_dict(node), **kwargs) + + +def file_to_dict(filename): + """ Load C file into dict representation of ast """ + ast = parse_file(filename, use_cpp=True) + return to_dict(ast) + + +def file_to_json(filename, **kwargs): + """ Load C file into json string representation of ast """ + ast = parse_file(filename, use_cpp=True) + return to_json(ast, **kwargs) + + +def _parse_coord(coord_str): + """ Parse coord string (file:line[:column]) into Coord object. """ + if coord_str is None: + return None + + vals = coord_str.split(':') + vals.extend([None] * 3) + filename, line, column = vals[:3] + return Coord(filename, line, column) + + +def _convert_to_obj(value): + """ + Convert an object in the dict representation into an object. + Note: Mutually recursive with from_dict. + + """ + value_type = type(value) + if value_type == dict: + return from_dict(value) + elif value_type == list: + return [_convert_to_obj(item) for item in value] + else: + # String + return value + + +def from_dict(node_dict): + """ Recursively build an ast from dict representation """ + class_name = node_dict.pop('_nodetype') + + klass = getattr(c_ast, class_name) + + # Create a new dict containing the key-value pairs which we can pass + # to node constructors. + objs = {} + for key, value in node_dict.items(): + if key == 'coord': + objs[key] = _parse_coord(value) + else: + objs[key] = _convert_to_obj(value) + + # Use keyword parameters, which works thanks to beautifully consistent + # ast Node initializers. + return klass(**objs) + + +def from_json(ast_json): + """ Build an ast from json string representation """ + return from_dict(json.loads(ast_json)) + + +#------------------------------------------------------------------------------ +if __name__ == "__main__": + if len(sys.argv) > 1: + # Some test code... + # Do trip from C -> ast -> dict -> ast -> json, then print. + ast_dict = file_to_dict(sys.argv[1]) + ast = from_dict(ast_dict) + print(to_json(ast, sort_keys=True, indent=4)) + else: + print("Please provide a filename as argument") |