diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2011-08-02 22:35:48 -0400 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2011-08-02 22:35:48 -0400 |
commit | 3447a59bd5570dde4e1ae355aca06a4b9bf4413b (patch) | |
tree | 5ee8ca7fe830d5da8241e855af0b580b8675e9d6 /lab | |
parent | 37850e04b59675a7292f8b6b810c932be0b4939d (diff) | |
download | python-coveragepy-git-3447a59bd5570dde4e1ae355aca06a4b9bf4413b.tar.gz |
Split out and improve the ad-hoc parsing and disassembly tool
Diffstat (limited to 'lab')
-rw-r--r-- | lab/disgen.py | 260 | ||||
-rw-r--r-- | lab/parser.py | 177 |
2 files changed, 437 insertions, 0 deletions
diff --git a/lab/disgen.py b/lab/disgen.py new file mode 100644 index 00000000..82844eac --- /dev/null +++ b/lab/disgen.py @@ -0,0 +1,260 @@ +"""Disassembler of Python byte code into mnemonics.""" + +# Adapted from stdlib dis.py, but returns structured information +# instead of printing to stdout. + +import sys +import types +import collections + +from opcode import * +from opcode import __all__ as _opcodes_all + +__all__ = ["dis", "disassemble", "distb", "disco", + "findlinestarts", "findlabels"] + _opcodes_all +del _opcodes_all + +_have_code = (types.MethodType, types.FunctionType, types.CodeType, + types.ClassType, type) + +def dis(x=None): + for disline in disgen(x): + if disline.first and disline.offset > 0: + print + print format_dis_line(disline) + +def format_dis_line(disline): + if disline.first: + lineno = "%3d" % disline.lineno + else: + lineno = " " + if disline.target: + label = ">>" + else: + label = " " + if disline.oparg is not None: + oparg = repr(disline.oparg) + else: + oparg = "" + return "%s %s %4r %-20s %5s %s" % (lineno, label, disline.offset, disline.opcode, oparg, disline.argstr) + +def disgen(x=None): + """Disassemble classes, methods, functions, or code. + + With no argument, disassemble the last traceback. + + """ + if x is None: + return distb() + if isinstance(x, types.InstanceType): + x = x.__class__ + if hasattr(x, 'im_func'): + x = x.im_func + if hasattr(x, 'func_code'): + x = x.func_code + if hasattr(x, '__dict__'): + assert False + items = x.__dict__.items() + items.sort() + for name, x1 in items: + if isinstance(x1, _have_code): + print "Disassembly of %s:" % name + try: + dis(x1) + except TypeError, msg: + print "Sorry:", msg + print + elif hasattr(x, 'co_code'): + return disassemble(x) + elif isinstance(x, str): + assert False + disassemble_string(x) + else: + raise TypeError, \ + "don't know how to disassemble %s objects" % \ + type(x).__name__ + +def distb(tb=None): + """Disassemble a traceback (default: last traceback).""" + if tb is None: + try: + tb = sys.last_traceback + except AttributeError: + raise RuntimeError, "no last traceback to disassemble" + while tb.tb_next: tb = tb.tb_next + return disassemble(tb.tb_frame.f_code, tb.tb_lasti) + +DisLine = collections.namedtuple( + 'DisLine', + "lineno first target offset opcode oparg argstr" + ) + +def disassemble(co, lasti=-1): + """Disassemble a code object.""" + code = co.co_code + labels = findlabels(code) + linestarts = dict(findlinestarts(co)) + n = len(code) + i = 0 + extended_arg = 0 + free = None + + dislines = [] + lineno = linestarts[0] + + while i < n: + c = code[i] + op = ord(c) + first = i in linestarts + if first: + lineno = linestarts[i] + + #if i == lasti: print '-->', + #else: print ' ', + target = i in labels + offset = i + opcode = opname[op] + i = i+1 + if op >= HAVE_ARGUMENT: + oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg + extended_arg = 0 + i = i+2 + if op == EXTENDED_ARG: + extended_arg = oparg*65536L + if op in hasconst: + argstr = '(' + repr(co.co_consts[oparg]) + ')' + elif op in hasname: + argstr = '(' + co.co_names[oparg] + ')' + elif op in hasjrel: + argstr = '(to ' + repr(i + oparg) + ')' + elif op in haslocal: + argstr = '(' + co.co_varnames[oparg] + ')' + elif op in hascompare: + argstr = '(' + cmp_op[oparg] + ')' + elif op in hasfree: + if free is None: + free = co.co_cellvars + co.co_freevars + argstr = '(' + free[oparg] + ')' + else: + argstr = "" + else: + oparg = None + argstr = "" + yield DisLine(lineno=lineno, first=first, target=target, offset=offset, opcode=opcode, oparg=oparg, argstr=argstr) + + +def disassemble_string(code, lasti=-1, varnames=None, names=None, + constants=None): + labels = findlabels(code) + n = len(code) + i = 0 + while i < n: + c = code[i] + op = ord(c) + if i == lasti: print '-->', + else: print ' ', + if i in labels: print '>>', + else: print ' ', + print repr(i).rjust(4), + print opname[op].ljust(15), + i = i+1 + if op >= HAVE_ARGUMENT: + oparg = ord(code[i]) + ord(code[i+1])*256 + i = i+2 + print repr(oparg).rjust(5), + if op in hasconst: + if constants: + print '(' + repr(constants[oparg]) + ')', + else: + print '(%d)'%oparg, + elif op in hasname: + if names is not None: + print '(' + names[oparg] + ')', + else: + print '(%d)'%oparg, + elif op in hasjrel: + print '(to ' + repr(i + oparg) + ')', + elif op in haslocal: + if varnames: + print '(' + varnames[oparg] + ')', + else: + print '(%d)' % oparg, + elif op in hascompare: + print '(' + cmp_op[oparg] + ')', + print + +disco = disassemble # XXX For backwards compatibility + +def findlabels(code): + """Detect all offsets in a byte code which are jump targets. + + Return the list of offsets. + + """ + labels = [] + n = len(code) + i = 0 + while i < n: + c = code[i] + op = ord(c) + i = i+1 + if op >= HAVE_ARGUMENT: + oparg = ord(code[i]) + ord(code[i+1])*256 + i = i+2 + label = -1 + if op in hasjrel: + label = i+oparg + elif op in hasjabs: + label = oparg + if label >= 0: + if label not in labels: + labels.append(label) + return labels + +def findlinestarts(code): + """Find the offsets in a byte code which are start of lines in the source. + + Generate pairs (offset, lineno) as described in Python/compile.c. + + """ + byte_increments = [ord(c) for c in code.co_lnotab[0::2]] + line_increments = [ord(c) for c in code.co_lnotab[1::2]] + + lastlineno = None + lineno = code.co_firstlineno + addr = 0 + for byte_incr, line_incr in zip(byte_increments, line_increments): + if byte_incr: + if lineno != lastlineno: + yield (addr, lineno) + lastlineno = lineno + addr += byte_incr + lineno += line_incr + if lineno != lastlineno: + yield (addr, lineno) + +def _test(): + """Simple test program to disassemble a file.""" + if sys.argv[1:]: + if sys.argv[2:]: + sys.stderr.write("usage: python dis.py [-|file]\n") + sys.exit(2) + fn = sys.argv[1] + if not fn or fn == "-": + fn = None + else: + fn = None + if fn is None: + f = sys.stdin + else: + f = open(fn) + source = f.read() + if fn is not None: + f.close() + else: + fn = "<stdin>" + code = compile(source, fn, "exec") + dis(code) + +if __name__ == "__main__": + _test() diff --git a/lab/parser.py b/lab/parser.py new file mode 100644 index 00000000..67c41e80 --- /dev/null +++ b/lab/parser.py @@ -0,0 +1,177 @@ +"""Parser.py: a main for invoking code in coverage/parser.py""" + +import glob, os, sys +from optparse import OptionParser + +import disgen + +from coverage.misc import CoverageException +from coverage.parser import ByteParser, CodeParser + + +class AdHocMain(object): + """An ad-hoc main for code parsing experiments.""" + + def main(self, args): + """A main function for trying the code from the command line.""" + + parser = OptionParser() + parser.add_option( + "-c", action="store_true", dest="chunks", + help="Show basic block chunks" + ) + parser.add_option( + "-d", action="store_true", dest="dis", + help="Disassemble" + ) + parser.add_option( + "-R", action="store_true", dest="recursive", + help="Recurse to find source files" + ) + parser.add_option( + "-s", action="store_true", dest="source", + help="Show analyzed source" + ) + parser.add_option( + "-t", action="store_true", dest="tokens", + help="Show tokens" + ) + + options, args = parser.parse_args() + if options.recursive: + if args: + root = args[0] + else: + root = "." + for root, _, _ in os.walk(root): + for f in glob.glob(root + "/*.py"): + self.adhoc_one_file(options, f) + else: + self.adhoc_one_file(options, args[0]) + + def adhoc_one_file(self, options, filename): + """Process just one file.""" + + if options.dis or options.chunks: + try: + bp = ByteParser(filename=filename) + except CoverageException: + _, err, _ = sys.exc_info() + print("%s" % (err,)) + return + + if options.dis: + print("Main code:") + self.disassemble(bp) + + if options.chunks: + chunks = bp._all_chunks() + if options.recursive: + print("%6d: %s" % (len(chunks), filename)) + else: + print("Chunks: %r" % chunks) + arcs = bp._all_arcs() + print("Arcs: %r" % sorted(arcs)) + + if options.source or options.tokens: + cp = CodeParser(filename=filename, exclude=r"no\s*cover") + cp.show_tokens = options.tokens + cp._raw_parse() + + if options.source: + if options.chunks: + arc_width, arc_chars = self.arc_ascii_art(arcs) + else: + arc_width, arc_chars = 0, {} + + exit_counts = cp.exit_counts() + + for i, ltext in enumerate(cp.lines): + lineno = i+1 + m0 = m1 = m2 = m3 = a = ' ' + if lineno in cp.statement_starts: + m0 = '-' + exits = exit_counts.get(lineno, 0) + if exits > 1: + m1 = str(exits) + if lineno in cp.docstrings: + m2 = '"' + if lineno in cp.classdefs: + m2 = 'C' + if lineno in cp.excluded: + m3 = 'x' + a = arc_chars.get(lineno, '').ljust(arc_width) + print("%4d %s%s%s%s%s %s" % + (lineno, m0, m1, m2, m3, a, ltext) + ) + + def disassemble(self, byte_parser): + """Disassemble code, for ad-hoc experimenting.""" + + for bp in byte_parser.child_parsers(): + chunks = bp._split_into_chunks() + chunkd = dict((chunk.byte, chunk) for chunk in chunks) + if bp.text: + srclines = bp.text.splitlines() + else: + srclines = None + print("\n%s: " % bp.code) + for disline in disgen.disgen(bp.code): + if disline.first: + if srclines: + print("%80s%s" % ("", srclines[disline.lineno-1])) + elif disline.offset > 0: + print("") + line = disgen.format_dis_line(disline) + chunk = chunkd.get(disline.offset) + if chunk: + exits = " ".join(str(e) for e in sorted(chunk.exits)) + chunkstr = ": %s" % exits + else: + chunkstr = "" + print("%-70s%s" % (line, chunkstr)) + + print("") + + def arc_ascii_art(self, arcs): + """Draw arcs as ascii art. + + Returns a width of characters needed to draw all the arcs, and a + dictionary mapping line numbers to ascii strings to draw for that line. + + """ + arc_chars = {} + for lfrom, lto in sorted(arcs): + if lfrom < 0: + arc_chars[lto] = arc_chars.get(lto, '') + 'v' + elif lto < 0: + arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^' + else: + if lfrom == lto - 1: + # Don't show obvious arcs. + continue + if lfrom < lto: + l1, l2 = lfrom, lto + else: + l1, l2 = lto, lfrom + w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)]) + for l in range(l1, l2+1): + if l == lfrom: + ch = '<' + elif l == lto: + ch = '>' + else: + ch = '|' + arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch + arc_width = 0 + + if arc_chars: + arc_width = max([len(a) for a in arc_chars.values()]) + else: + arc_width = 0 + + return arc_width, arc_chars + +if __name__ == '__main__': + AdHocMain().main(sys.argv[1:]) + |