From 4910434d33d0928374bf966c00c07feda5b32d77 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Thu, 2 Apr 2009 19:42:04 -0400 Subject: A lab directory for experiments in progress. --- lab/hack_pyc.py | 82 +++++++++++++++++++++++++++++++++++ lab/lnotab.py | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lab/sample.py | 5 +++ lab/show_pyc.py | 64 +++++++++++++++++++++++++++ lab/trace_sample.py | 28 ++++++++++++ 5 files changed, 301 insertions(+) create mode 100644 lab/hack_pyc.py create mode 100644 lab/lnotab.py create mode 100644 lab/sample.py create mode 100644 lab/show_pyc.py create mode 100644 lab/trace_sample.py (limited to 'lab') diff --git a/lab/hack_pyc.py b/lab/hack_pyc.py new file mode 100644 index 00000000..e8992b96 --- /dev/null +++ b/lab/hack_pyc.py @@ -0,0 +1,82 @@ +""" Wicked hack to get .pyc files to do bytecode tracing instead of + line tracing. +""" + +import marshal, new, opcode, sys, types + +from lnotab import lnotab_numbers, lnotab_string + +class PycFile: + def read(self, f): + if isinstance(f, basestring): + f = open(f, "rb") + self.magic = f.read(4) + self.modtime = f.read(4) + self.code = marshal.load(f) + + def write(self, f): + if isinstance(f, basestring): + f = open(f, "wb") + f.write(self.magic) + f.write(self.modtime) + marshal.dump(self.code, f) + + def hack_line_numbers(self): + self.code = hack_line_numbers(self.code) + +def hack_line_numbers(code): + """ Replace a code object's line number information to claim that every + byte of the bytecode is a new source line. Returns a new code + object. Also recurses to hack the line numbers in nested code objects. + """ + + # Create a new lnotab table. Each opcode is claimed to be at + # 1000*lineno + (opcode number within line), so for example, the opcodes on + # source line 12 will be given new line numbers 12000, 12001, 12002, etc. + old_num = list(lnotab_numbers(code.co_lnotab, code.co_firstlineno)) + n_bytes = len(code.co_code) + new_num = [] + line = 0 + opnum_in_line = 0 + i_byte = 0 + while i_byte < n_bytes: + if old_num and i_byte == old_num[0][0]: + line = old_num.pop(0)[1] + opnum_in_line = 0 + new_num.append((i_byte, 100000000 + 1000*line + opnum_in_line)) + if ord(code.co_code[i_byte]) >= opcode.HAVE_ARGUMENT: + i_byte += 3 + else: + i_byte += 1 + opnum_in_line += 1 + + # new_num is a list of pairs, (byteoff, lineoff). Turn it into an lnotab. + new_firstlineno = new_num[0][1]-1 + new_lnotab = lnotab_string(new_num, new_firstlineno) + + # Recurse into code constants in this code object. + new_consts = [] + for const in code.co_consts: + if type(const) == types.CodeType: + new_consts.append(hack_line_numbers(const)) + else: + new_consts.append(const) + + # Create a new code object, just like the old one, except with new + # line numbers. + new_code = new.code( + code.co_argcount, code.co_nlocals, code.co_stacksize, code.co_flags, + code.co_code, tuple(new_consts), code.co_names, code.co_varnames, + code.co_filename, code.co_name, new_firstlineno, new_lnotab + ) + + return new_code + +def hack_file(f): + pyc = PycFile() + pyc.read(f) + pyc.hack_line_numbers() + pyc.write(f) + +if __name__ == '__main__': + hack_file(sys.argv[1]) diff --git a/lab/lnotab.py b/lab/lnotab.py new file mode 100644 index 00000000..230e42bb --- /dev/null +++ b/lab/lnotab.py @@ -0,0 +1,122 @@ +# Comment copied from Python/compile.c: +# +# All about a_lnotab. +# +# c_lnotab is an array of unsigned bytes disguised as a Python string. +# It is used to map bytecode offsets to source code line #s (when needed +# for tracebacks). +# +# The array is conceptually a list of +# (bytecode offset increment, line number increment) +# pairs. The details are important and delicate, best illustrated by example: +# +# byte code offset source code line number +# 0 1 +# 6 2 +# 50 7 +# 350 307 +# 361 308 +# +# The first trick is that these numbers aren't stored, only the increments +# from one row to the next (this doesn't really work, but it's a start): +# +# 0, 1, 6, 1, 44, 5, 300, 300, 11, 1 +# +# The second trick is that an unsigned byte can't hold negative values, or +# values larger than 255, so (a) there's a deep assumption that byte code +# offsets and their corresponding line #s both increase monotonically, and (b) +# if at least one column jumps by more than 255 from one row to the next, more +# than one pair is written to the table. In case #b, there's no way to know +# from looking at the table later how many were written. That's the delicate +# part. A user of c_lnotab desiring to find the source line number +# corresponding to a bytecode address A should do something like this +# +# lineno = addr = 0 +# for addr_incr, line_incr in c_lnotab: +# addr += addr_incr +# if addr > A: +# return lineno +# lineno += line_incr +# +# In order for this to work, when the addr field increments by more than 255, +# the line # increment in each pair generated must be 0 until the remaining addr +# increment is < 256. So, in the example above, assemble_lnotab (it used +# to be called com_set_lineno) should not (as was actually done until 2.2) +# expand 300, 300 to 255, 255, 45, 45, +# but to 255, 0, 45, 255, 0, 45. +# + +def lnotab(pairs, first_lineno=0): + """Yields byte integers representing the pairs of integers passed in.""" + assert first_lineno <= pairs[0][1] + cur_byte, cur_line = 0, first_lineno + for byte_off, line_off in pairs: + byte_delta = byte_off - cur_byte + line_delta = line_off - cur_line + assert byte_delta >= 0 + assert line_delta >= 0 + while byte_delta > 255: + yield 255 # byte + yield 0 # line + byte_delta -= 255 + yield byte_delta + while line_delta > 255: + yield 255 # line + yield 0 # byte + line_delta -= 255 + yield line_delta + cur_byte, cur_line = byte_off, line_off + +def lnotab_string(pairs, first_lineno=0): + return "".join(chr(b) for b in lnotab(pairs, first_lineno)) + +def byte_pairs(lnotab): + """Yield pairs of integers from a string.""" + for i in range(0, len(lnotab), 2): + yield ord(lnotab[i]), ord(lnotab[i+1]) + +def lnotab_numbers(lnotab, first_lineno=0): + """Yields the byte, line offset pairs from a packed lnotab string.""" + + last_line = None + cur_byte, cur_line = 0, first_lineno + for byte_delta, line_delta in byte_pairs(lnotab): + if byte_delta: + if cur_line != last_line: + yield cur_byte, cur_line + last_line = cur_line + cur_byte += byte_delta + cur_line += line_delta + if cur_line != last_line: + yield cur_byte, cur_line + + +## Tests + +def same_list(a, b): + a = list(a) + assert a == b + +def test_simple(): + same_list(lnotab([(0,1)]), [0, 1]) + same_list(lnotab([(0,1), (6, 2)]), [0, 1, 6, 1]) + +def test_starting_above_one(): + same_list(lnotab([(0,100), (6,101)]), [0, 100, 6, 1]) + same_list(lnotab([(0,100), (6,101)], 50), [0, 50, 6, 1]) + +def test_large_gaps(): + same_list(lnotab([(0,1), (300, 300)]), [0, 1, 255, 0, 45, 255, 0, 44]) + same_list(lnotab([(0,1), (255, 300)]), [0, 1, 255, 255, 0, 44]) + same_list(lnotab([(0,1), (255, 256)]), [0, 1, 255, 255]) + +def test_strings(): + assert lnotab_string([(0,1), (6, 2)]) == "\x00\x01\x06\x01" + assert lnotab_string([(0,1), (300, 300)]) == "\x00\x01\xff\x00\x2d\xff\x00\x2c" + +def test_numbers(): + same_list(lnotab_numbers("\x00\x01\x06\x01"), [(0,1), (6,2)]) + same_list(lnotab_numbers("\x00\x01\xff\x00\x2d\xff\x00\x2c"), [(0,1), (300, 300)]) + +def test_numbers_firstlineno(): + same_list(lnotab_numbers("\x00\x01\xff\x00\x2d\xff\x00\x2c", 10), [(0,11), (300, 310)]) diff --git a/lab/sample.py b/lab/sample.py new file mode 100644 index 00000000..cf4f6dcf --- /dev/null +++ b/lab/sample.py @@ -0,0 +1,5 @@ +a, b = 1, 0 +if a or b or fn(): + # Hey + a = 3 +d = 4 \ No newline at end of file diff --git a/lab/show_pyc.py b/lab/show_pyc.py new file mode 100644 index 00000000..a0834e88 --- /dev/null +++ b/lab/show_pyc.py @@ -0,0 +1,64 @@ +import dis, marshal, struct, sys, time, types + +def show_pyc_file(fname): + f = open(fname, "rb") + magic = f.read(4) + moddate = f.read(4) + modtime = time.asctime(time.localtime(struct.unpack('L', moddate)[0])) + print "magic %s" % (magic.encode('hex')) + print "moddate %s (%s)" % (moddate.encode('hex'), modtime) + code = marshal.load(f) + show_code(code) + +def show_py_file(fname): + text = open(fname).read().replace('\r\n', '\n') + code = compile(text, fname, "exec") + show_code(code) + +def show_code(code, indent=''): + print "%scode" % indent + indent += ' ' + print "%sargcount %d" % (indent, code.co_argcount) + print "%snlocals %d" % (indent, code.co_nlocals) + print "%sstacksize %d" % (indent, code.co_stacksize) + print "%sflags %04x" % (indent, code.co_flags) + show_hex("code", code.co_code, indent=indent) + dis.disassemble(code) + print "%sconsts" % indent + for const in code.co_consts: + if type(const) == types.CodeType: + show_code(const, indent+' ') + else: + print " %s%r" % (indent, const) + print "%snames %r" % (indent, code.co_names) + print "%svarnames %r" % (indent, code.co_varnames) + print "%sfreevars %r" % (indent, code.co_freevars) + print "%scellvars %r" % (indent, code.co_cellvars) + print "%sfilename %r" % (indent, code.co_filename) + print "%sname %r" % (indent, code.co_name) + print "%sfirstlineno %d" % (indent, code.co_firstlineno) + show_hex("lnotab", code.co_lnotab, indent=indent) + +def show_hex(label, h, indent): + h = h.encode('hex') + if len(h) < 60: + print "%s%s %s" % (indent, label, h) + else: + print "%s%s" % (indent, label) + for i in range(0, len(h), 60): + print "%s %s" % (indent, h[i:i+60]) + +def show_file(fname): + if fname.endswith('pyc'): + show_pyc_file(fname) + elif fname.endswith('py'): + show_py_file(fname) + else: + print "Odd file:", fname + +def main(args): + for a in args: + show_file(a) + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/lab/trace_sample.py b/lab/trace_sample.py new file mode 100644 index 00000000..6d616a51 --- /dev/null +++ b/lab/trace_sample.py @@ -0,0 +1,28 @@ +import os, sys + +global nest +nest = 0 + +def trace(frame, event, arg): + #if event == 'line': + global nest + + print "%s%s %s %d (%r)" % ( + " " * nest, + event, + os.path.basename(frame.f_code.co_filename), + frame.f_lineno, + arg + ) + + if event == 'call': + nest += 1 + if event == 'return': + nest -= 1 + + return trace + +sys.settrace(trace) + +import sample +#import littleclass -- cgit v1.2.1