#------------------------------------------------------------------------------ # pycparser: c_json.py # # by Michael White (@mypalmike) # # This example includes functions to serialize and deserialize an ast # to and from json format. Serializing involves walking the ast and converting # each node from a python Node object into a python dict. Deserializing # involves the opposite conversion, walking the tree formed by the # dict and converting each dict into the specific Node object it represents. # The dict itself is serialized and deserialized using the python json module. # # The dict representation is a fairly direct transformation of the object # attributes. Each node in the dict gets one metadata field referring to the # specific node class name, _nodetype. Each local attribute (i.e. not linking # to child nodes) has a string value or array of string values. Each child # attribute is either another dict or an array of dicts, exactly as in the # Node object representation. The "coord" attribute, representing the # node's location within the source code, is serialized/deserialized from # a Coord object into a string of the format "filename:line[:column]". # # Example TypeDecl node, with IdentifierType child node, represented as a dict: # "type": { # "_nodetype": "TypeDecl", # "coord": "c_files/funky.c:8", # "declname": "o", # "quals": [], # "type": { # "_nodetype": "IdentifierType", # "coord": "c_files/funky.c:8", # "names": [ # "char" # ] # } # } #------------------------------------------------------------------------------ from __future__ import print_function import json import sys import re # This is not required if you've installed pycparser into # your site-packages/ with setup.py # sys.path.extend(['.', '..']) from pycparser import parse_file, c_ast from pycparser.plyparser import Coord RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]') RE_INTERNAL_ATTR = re.compile('__.*__') class CJsonError(Exception): pass def memodict(fn): """ Fast memoization decorator for a function taking a single argument """ class memodict(dict): def __missing__(self, key): ret = self[key] = fn(key) return ret return memodict().__getitem__ @memodict def child_attrs_of(klass): """ Given a Node class, get a set of child attrs. Memoized to avoid highly repetitive string manipulation """ non_child_attrs = set(klass.attr_names) all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)]) return all_attrs - non_child_attrs def to_dict(node): """ Recursively convert an ast into dict representation. """ klass = node.__class__ result = {} # Metadata result['_nodetype'] = klass.__name__ # Local node attributes for attr in klass.attr_names: result[attr] = getattr(node, attr) # Coord object if node.coord: result['coord'] = str(node.coord) else: result['coord'] = None # Child attributes for child_name, child in node.children(): # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]') match = RE_CHILD_ARRAY.match(child_name) if match: array_name, array_index = match.groups() array_index = int(array_index) # arrays come in order, so we verify and append. result[array_name] = result.get(array_name, []) if array_index != len(result[array_name]): raise CJsonError('Internal ast error. Array {} out of order. ' 'Expected index {}, got {}'.format( array_name, len(result[array_name]), array_index)) result[array_name].append(to_dict(child)) else: result[child_name] = to_dict(child) # Any child attributes that were missing need "None" values in the json. for child_attr in child_attrs_of(klass): if child_attr not in result: result[child_attr] = None return result def to_json(node, **kwargs): """ Convert ast node to json string """ return json.dumps(to_dict(node), **kwargs) def file_to_dict(filename): """ Load C file into dict representation of ast """ ast = parse_file(filename, use_cpp=True) return to_dict(ast) def file_to_json(filename, **kwargs): """ Load C file into json string representation of ast """ ast = parse_file(filename, use_cpp=True) return to_json(ast, **kwargs) def _parse_coord(coord_str): """ Parse coord string (file:line[:column]) into Coord object. """ if coord_str is None: return None vals = coord_str.split(':') vals.extend([None] * 3) filename, line, column = vals[:3] return Coord(filename, line, column) def _convert_to_obj(value): """ Convert an object in the dict representation into an object. Note: Mutually recursive with from_dict. """ value_type = type(value) if value_type == dict: return from_dict(value) elif value_type == list: return [_convert_to_obj(item) for item in value] else: # String return value def from_dict(node_dict): """ Recursively build an ast from dict representation """ class_name = node_dict.pop('_nodetype') klass = getattr(c_ast, class_name) # Create a new dict containing the key-value pairs which we can pass # to node constructors. objs = {} for key, value in node_dict.items(): if key == 'coord': objs[key] = _parse_coord(value) else: objs[key] = _convert_to_obj(value) # Use keyword parameters, which works thanks to beautifully consistent # ast Node initializers. return klass(**objs) def from_json(ast_json): """ Build an ast from json string representation """ return from_dict(json.loads(ast_json)) #------------------------------------------------------------------------------ if __name__ == "__main__": if len(sys.argv) > 1: # Some test code... # Do trip from C -> ast -> dict -> ast -> json, then print. ast_dict = file_to_dict(sys.argv[1]) ast = from_dict(ast_dict) print(to_json(ast, sort_keys=True, indent=4)) else: print("Please provide a filename as argument")