From 00de62cdc817fd1b92dd5d3e38d2a248a1613507 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Sat, 18 Apr 2015 06:55:42 -0700 Subject: Decrease memory usage of pycparser by using __slots__ in AST nodes. In general, there's no need to dynamically adjust the attributes of AST nodes. So __slots__ seems suitable. This reduces the memory usage of the test case reported in issue #72 from 21MB to 17.5MB and should reduce the amount of space consumed by AST nodes in general. --- pycparser/_ast_gen.py | 7 +++- pycparser/_c_ast.cfg | 5 +-- pycparser/c_ast.py | 88 +++++++++++++++++++++++++++++++++++++++------------ pycparser/c_parser.py | 2 ++ z.py | 28 ++++++++++++++-- 5 files changed, 102 insertions(+), 28 deletions(-) diff --git a/pycparser/_ast_gen.py b/pycparser/_ast_gen.py index 307ece5..654fad6 100644 --- a/pycparser/_ast_gen.py +++ b/pycparser/_ast_gen.py @@ -92,10 +92,14 @@ class NodeCfg(object): if self.all_entries: args = ', '.join(self.all_entries) + slots = ', '.join("'{0}'".format(e) for e in self.all_entries) + slots += ", 'coord'" arglist = '(self, %s, coord=None)' % args else: + slots = "'coord'," arglist = '(self, coord=None)' + src += " __slots__ = (%s)\n" % slots src += " def __init__%s:\n" % arglist for name in self.all_entries + ['coord']: @@ -128,7 +132,7 @@ class NodeCfg(object): return src def _gen_attr_names(self): - src = " attr_names = (" + ''.join("%r," % nm for nm in self.attr) + ')' + src = " attr_names = (" + ''.join("%r, " % nm for nm in self.attr) + ')' return src @@ -157,6 +161,7 @@ import sys class Node(object): + __slots__ = () """ Abstract base class for AST nodes. """ def children(self): diff --git a/pycparser/_c_ast.cfg b/pycparser/_c_ast.cfg index b9fce05..5323e57 100644 --- a/pycparser/_c_ast.cfg +++ b/pycparser/_c_ast.cfg @@ -177,7 +177,7 @@ TypeDecl: [declname, quals, type*] # Typedef: [name, quals, storage, type*] -Typename: [quals, type*] +Typename: [name, quals, type*] UnaryOp: [op, expr*] @@ -187,6 +187,3 @@ UnaryOp: [op, expr*] Union: [name, decls**] While: [cond*, stmt*] - - - diff --git a/pycparser/c_ast.py b/pycparser/c_ast.py index 212019a..04a6e5e 100644 --- a/pycparser/c_ast.py +++ b/pycparser/c_ast.py @@ -20,6 +20,7 @@ import sys class Node(object): + __slots__ = () """ Abstract base class for AST nodes. """ def children(self): @@ -127,6 +128,7 @@ class NodeVisitor(object): class ArrayDecl(Node): + __slots__ = ('type', 'dim', 'dim_quals', 'coord') def __init__(self, type, dim, dim_quals, coord=None): self.type = type self.dim = dim @@ -139,9 +141,10 @@ class ArrayDecl(Node): if self.dim is not None: nodelist.append(("dim", self.dim)) return tuple(nodelist) - attr_names = ('dim_quals',) + attr_names = ('dim_quals', ) class ArrayRef(Node): + __slots__ = ('name', 'subscript', 'coord') def __init__(self, name, subscript, coord=None): self.name = name self.subscript = subscript @@ -156,6 +159,7 @@ class ArrayRef(Node): attr_names = () class Assignment(Node): + __slots__ = ('op', 'lvalue', 'rvalue', 'coord') def __init__(self, op, lvalue, rvalue, coord=None): self.op = op self.lvalue = lvalue @@ -168,9 +172,10 @@ class Assignment(Node): if self.rvalue is not None: nodelist.append(("rvalue", self.rvalue)) return tuple(nodelist) - attr_names = ('op',) + attr_names = ('op', ) class BinaryOp(Node): + __slots__ = ('op', 'left', 'right', 'coord') def __init__(self, op, left, right, coord=None): self.op = op self.left = left @@ -183,9 +188,10 @@ class BinaryOp(Node): if self.right is not None: nodelist.append(("right", self.right)) return tuple(nodelist) - attr_names = ('op',) + attr_names = ('op', ) class Break(Node): + __slots__ = ('coord',) def __init__(self, coord=None): self.coord = coord @@ -195,6 +201,7 @@ class Break(Node): attr_names = () class Case(Node): + __slots__ = ('expr', 'stmts', 'coord') def __init__(self, expr, stmts, coord=None): self.expr = expr self.stmts = stmts @@ -210,6 +217,7 @@ class Case(Node): attr_names = () class Cast(Node): + __slots__ = ('to_type', 'expr', 'coord') def __init__(self, to_type, expr, coord=None): self.to_type = to_type self.expr = expr @@ -224,6 +232,7 @@ class Cast(Node): attr_names = () class Compound(Node): + __slots__ = ('block_items', 'coord') def __init__(self, block_items, coord=None): self.block_items = block_items self.coord = coord @@ -237,6 +246,7 @@ class Compound(Node): attr_names = () class CompoundLiteral(Node): + __slots__ = ('type', 'init', 'coord') def __init__(self, type, init, coord=None): self.type = type self.init = init @@ -251,6 +261,7 @@ class CompoundLiteral(Node): attr_names = () class Constant(Node): + __slots__ = ('type', 'value', 'coord') def __init__(self, type, value, coord=None): self.type = type self.value = value @@ -260,9 +271,10 @@ class Constant(Node): nodelist = [] return tuple(nodelist) - attr_names = ('type','value',) + attr_names = ('type', 'value', ) class Continue(Node): + __slots__ = ('coord',) def __init__(self, coord=None): self.coord = coord @@ -272,6 +284,7 @@ class Continue(Node): attr_names = () class Decl(Node): + __slots__ = ('name', 'quals', 'storage', 'funcspec', 'type', 'init', 'bitsize', 'coord') def __init__(self, name, quals, storage, funcspec, type, init, bitsize, coord=None): self.name = name self.quals = quals @@ -289,9 +302,10 @@ class Decl(Node): if self.bitsize is not None: nodelist.append(("bitsize", self.bitsize)) return tuple(nodelist) - attr_names = ('name','quals','storage','funcspec',) + attr_names = ('name', 'quals', 'storage', 'funcspec', ) class DeclList(Node): + __slots__ = ('decls', 'coord') def __init__(self, decls, coord=None): self.decls = decls self.coord = coord @@ -305,6 +319,7 @@ class DeclList(Node): attr_names = () class Default(Node): + __slots__ = ('stmts', 'coord') def __init__(self, stmts, coord=None): self.stmts = stmts self.coord = coord @@ -318,6 +333,7 @@ class Default(Node): attr_names = () class DoWhile(Node): + __slots__ = ('cond', 'stmt', 'coord') def __init__(self, cond, stmt, coord=None): self.cond = cond self.stmt = stmt @@ -332,6 +348,7 @@ class DoWhile(Node): attr_names = () class EllipsisParam(Node): + __slots__ = ('coord',) def __init__(self, coord=None): self.coord = coord @@ -341,6 +358,7 @@ class EllipsisParam(Node): attr_names = () class EmptyStatement(Node): + __slots__ = ('coord',) def __init__(self, coord=None): self.coord = coord @@ -350,6 +368,7 @@ class EmptyStatement(Node): attr_names = () class Enum(Node): + __slots__ = ('name', 'values', 'coord') def __init__(self, name, values, coord=None): self.name = name self.values = values @@ -360,9 +379,10 @@ class Enum(Node): if self.values is not None: nodelist.append(("values", self.values)) return tuple(nodelist) - attr_names = ('name',) + attr_names = ('name', ) class Enumerator(Node): + __slots__ = ('name', 'value', 'coord') def __init__(self, name, value, coord=None): self.name = name self.value = value @@ -373,9 +393,10 @@ class Enumerator(Node): if self.value is not None: nodelist.append(("value", self.value)) return tuple(nodelist) - attr_names = ('name',) + attr_names = ('name', ) class EnumeratorList(Node): + __slots__ = ('enumerators', 'coord') def __init__(self, enumerators, coord=None): self.enumerators = enumerators self.coord = coord @@ -389,6 +410,7 @@ class EnumeratorList(Node): attr_names = () class ExprList(Node): + __slots__ = ('exprs', 'coord') def __init__(self, exprs, coord=None): self.exprs = exprs self.coord = coord @@ -402,6 +424,7 @@ class ExprList(Node): attr_names = () class FileAST(Node): + __slots__ = ('ext', 'coord') def __init__(self, ext, coord=None): self.ext = ext self.coord = coord @@ -415,6 +438,7 @@ class FileAST(Node): attr_names = () class For(Node): + __slots__ = ('init', 'cond', 'next', 'stmt', 'coord') def __init__(self, init, cond, next, stmt, coord=None): self.init = init self.cond = cond @@ -433,6 +457,7 @@ class For(Node): attr_names = () class FuncCall(Node): + __slots__ = ('name', 'args', 'coord') def __init__(self, name, args, coord=None): self.name = name self.args = args @@ -447,6 +472,7 @@ class FuncCall(Node): attr_names = () class FuncDecl(Node): + __slots__ = ('args', 'type', 'coord') def __init__(self, args, type, coord=None): self.args = args self.type = type @@ -461,6 +487,7 @@ class FuncDecl(Node): attr_names = () class FuncDef(Node): + __slots__ = ('decl', 'param_decls', 'body', 'coord') def __init__(self, decl, param_decls, body, coord=None): self.decl = decl self.param_decls = param_decls @@ -478,6 +505,7 @@ class FuncDef(Node): attr_names = () class Goto(Node): + __slots__ = ('name', 'coord') def __init__(self, name, coord=None): self.name = name self.coord = coord @@ -486,9 +514,10 @@ class Goto(Node): nodelist = [] return tuple(nodelist) - attr_names = ('name',) + attr_names = ('name', ) class ID(Node): + __slots__ = ('name', 'coord') def __init__(self, name, coord=None): self.name = name self.coord = coord @@ -497,9 +526,10 @@ class ID(Node): nodelist = [] return tuple(nodelist) - attr_names = ('name',) + attr_names = ('name', ) class IdentifierType(Node): + __slots__ = ('names', 'coord') def __init__(self, names, coord=None): self.names = names self.coord = coord @@ -508,9 +538,10 @@ class IdentifierType(Node): nodelist = [] return tuple(nodelist) - attr_names = ('names',) + attr_names = ('names', ) class If(Node): + __slots__ = ('cond', 'iftrue', 'iffalse', 'coord') def __init__(self, cond, iftrue, iffalse, coord=None): self.cond = cond self.iftrue = iftrue @@ -527,6 +558,7 @@ class If(Node): attr_names = () class InitList(Node): + __slots__ = ('exprs', 'coord') def __init__(self, exprs, coord=None): self.exprs = exprs self.coord = coord @@ -540,6 +572,7 @@ class InitList(Node): attr_names = () class Label(Node): + __slots__ = ('name', 'stmt', 'coord') def __init__(self, name, stmt, coord=None): self.name = name self.stmt = stmt @@ -550,9 +583,10 @@ class Label(Node): if self.stmt is not None: nodelist.append(("stmt", self.stmt)) return tuple(nodelist) - attr_names = ('name',) + attr_names = ('name', ) class NamedInitializer(Node): + __slots__ = ('name', 'expr', 'coord') def __init__(self, name, expr, coord=None): self.name = name self.expr = expr @@ -568,6 +602,7 @@ class NamedInitializer(Node): attr_names = () class ParamList(Node): + __slots__ = ('params', 'coord') def __init__(self, params, coord=None): self.params = params self.coord = coord @@ -581,6 +616,7 @@ class ParamList(Node): attr_names = () class PtrDecl(Node): + __slots__ = ('quals', 'type', 'coord') def __init__(self, quals, type, coord=None): self.quals = quals self.type = type @@ -591,9 +627,10 @@ class PtrDecl(Node): if self.type is not None: nodelist.append(("type", self.type)) return tuple(nodelist) - attr_names = ('quals',) + attr_names = ('quals', ) class Return(Node): + __slots__ = ('expr', 'coord') def __init__(self, expr, coord=None): self.expr = expr self.coord = coord @@ -606,6 +643,7 @@ class Return(Node): attr_names = () class Struct(Node): + __slots__ = ('name', 'decls', 'coord') def __init__(self, name, decls, coord=None): self.name = name self.decls = decls @@ -617,9 +655,10 @@ class Struct(Node): nodelist.append(("decls[%d]" % i, child)) return tuple(nodelist) - attr_names = ('name',) + attr_names = ('name', ) class StructRef(Node): + __slots__ = ('name', 'type', 'field', 'coord') def __init__(self, name, type, field, coord=None): self.name = name self.type = type @@ -632,9 +671,10 @@ class StructRef(Node): if self.field is not None: nodelist.append(("field", self.field)) return tuple(nodelist) - attr_names = ('type',) + attr_names = ('type', ) class Switch(Node): + __slots__ = ('cond', 'stmt', 'coord') def __init__(self, cond, stmt, coord=None): self.cond = cond self.stmt = stmt @@ -649,6 +689,7 @@ class Switch(Node): attr_names = () class TernaryOp(Node): + __slots__ = ('cond', 'iftrue', 'iffalse', 'coord') def __init__(self, cond, iftrue, iffalse, coord=None): self.cond = cond self.iftrue = iftrue @@ -665,6 +706,7 @@ class TernaryOp(Node): attr_names = () class TypeDecl(Node): + __slots__ = ('declname', 'quals', 'type', 'coord') def __init__(self, declname, quals, type, coord=None): self.declname = declname self.quals = quals @@ -676,9 +718,10 @@ class TypeDecl(Node): if self.type is not None: nodelist.append(("type", self.type)) return tuple(nodelist) - attr_names = ('declname','quals',) + attr_names = ('declname', 'quals', ) class Typedef(Node): + __slots__ = ('name', 'quals', 'storage', 'type', 'coord') def __init__(self, name, quals, storage, type, coord=None): self.name = name self.quals = quals @@ -691,10 +734,12 @@ class Typedef(Node): if self.type is not None: nodelist.append(("type", self.type)) return tuple(nodelist) - attr_names = ('name','quals','storage',) + attr_names = ('name', 'quals', 'storage', ) class Typename(Node): - def __init__(self, quals, type, coord=None): + __slots__ = ('name', 'quals', 'type', 'coord') + def __init__(self, name, quals, type, coord=None): + self.name = name self.quals = quals self.type = type self.coord = coord @@ -704,9 +749,10 @@ class Typename(Node): if self.type is not None: nodelist.append(("type", self.type)) return tuple(nodelist) - attr_names = ('quals',) + attr_names = ('name', 'quals', ) class UnaryOp(Node): + __slots__ = ('op', 'expr', 'coord') def __init__(self, op, expr, coord=None): self.op = op self.expr = expr @@ -717,9 +763,10 @@ class UnaryOp(Node): if self.expr is not None: nodelist.append(("expr", self.expr)) return tuple(nodelist) - attr_names = ('op',) + attr_names = ('op', ) class Union(Node): + __slots__ = ('name', 'decls', 'coord') def __init__(self, name, decls, coord=None): self.name = name self.decls = decls @@ -731,9 +778,10 @@ class Union(Node): nodelist.append(("decls[%d]" % i, child)) return tuple(nodelist) - attr_names = ('name',) + attr_names = ('name', ) class While(Node): + __slots__ = ('cond', 'stmt', 'coord') def __init__(self, cond, stmt, coord=None): self.cond = cond self.stmt = stmt diff --git a/pycparser/c_parser.py b/pycparser/c_parser.py index 643b88c..4eb26b6 100644 --- a/pycparser/c_parser.py +++ b/pycparser/c_parser.py @@ -1124,6 +1124,7 @@ class CParser(PLYParser): # else: decl = c_ast.Typename( + name='', quals=spec['qual'], type=p[2] or c_ast.TypeDecl(None, None, None), coord=self._coord(p.lineno(2))) @@ -1195,6 +1196,7 @@ class CParser(PLYParser): #~ print '==========' typename = c_ast.Typename( + name='', quals=p[1]['qual'], type=p[2] or c_ast.TypeDecl(None, None, None), coord=self._coord(p.lineno(2))) diff --git a/z.py b/z.py index 9031e03..d881240 100644 --- a/z.py +++ b/z.py @@ -1,4 +1,5 @@ import sys +from pycparser import parse_file from pycparser.c_ast import * from pycparser.c_parser import CParser, Coord, ParseError from pycparser.c_lexer import CLexer @@ -74,12 +75,33 @@ class NodeVisitor(object): self.current_parent = oldparent +def memprofile(): + import resource + import tracemalloc + + tracemalloc.start() + + ast = parse_file('/tmp/197.c') + + print('Memory usage: %s (kb)' % + resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) + + snapshot = tracemalloc.take_snapshot() + print("[ tracemalloc stats ]") + for stat in snapshot.statistics('lineno')[:20]: + print(stat) + + if __name__ == "__main__": source_code = r'''void foo() { L"hi" L"there"; } ''' - parser = CParser() - ast = parser.parse(source_code, filename='zz') - ast.show(showcoord=True, attrnames=True, nodenames=True) + memprofile() + + #parser = CParser() + #ast = parser.parse(source_code, filename='zz') + #ast.show(showcoord=True, attrnames=True, nodenames=True) + + -- cgit v1.2.1