Continued hacking on branch coverage.

author: Ned Batchelder <ned@nedbatchelder.com> 2009-10-10 10:43:06 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2009-10-10 10:43:06 -0400
commit: 02b1caf65a6a5786c580c35d3fc2ecd099f0eda1 (patch)
tree: 08ecf7f91e221b4cf18d5a9582226b87d7732948 /coverage
parent: 4f1638ecff2b6279179975a8e54045b2f06735d9 (diff)
download: python-coveragepy-git-02b1caf65a6a5786c580c35d3fc2ecd099f0eda1.tar.gz
2 files changed, 169 insertions, 48 deletions
diff --git a/coverage/collector.py b/coverage/collector.py
index 8e304c1f..87441ff9 100644
--- a/coverage/collector.py
+++ b/coverage/collector.py
@@ -77,6 +77,83 @@ class PyTracer:
         sys.settrace(None)
 
 
+class PyBranchTracer:
+    """Python implementation of the raw data tracer."""
+    
+    # Because of poor implementations of trace-function-manipulating tools,
+    # the Python trace function must be kept very simple.  In particular, there
+    # must be only one function ever set as the trace function, both through
+    # sys.settrace, and as the return value from the trace function.  Put
+    # another way, the trace function must always return itself.  It cannot
+    # swap in other functions, or return None to avoid tracing a particular
+    # frame.
+    #
+    # The trace manipulator that introduced this restriction is DecoratorTools,
+    # which sets a trace function, and then later restores the pre-existing one
+    # by calling sys.settrace with a function it found in the current frame.
+    #
+    # Systems that use DecoratorTools (or similar trace manipulations) must use
+    # PyTracer to get accurate results.  The command-line --timid argument is
+    # used to force the use of this tracer.
+
+    def __init__(self):
+        self.data = None
+        self.should_trace = None
+        self.should_trace_cache = None
+        self.cur_filename = None
+        self.last_line = 0
+        self.filename_stack = []
+        self.last_exc_back = None
+        self.branch = False
+
+    def _trace(self, frame, event, arg_unused):
+        """The trace function passed to sys.settrace."""
+        
+        #print "trace event: %s %r @%d" % (
+        #           event, frame.f_code.co_filename, frame.f_lineno)
+        
+        if self.last_exc_back:
+            if frame == self.last_exc_back:
+                # Someone forgot a return event.
+                if self.cur_filename:
+                    self.data['arcs'][(self.cur_filename, self.last_line, 0)] = True
+                self.cur_filename, self.last_line = self.filename_stack.pop()
+            self.last_exc_back = None
+            
+        if event == 'call':
+            # Entering a new function context.  Decide if we should trace
+            # in this file.
+            self.filename_stack.append((self.cur_filename, self.last_line))
+            filename = frame.f_code.co_filename
+            tracename = self.should_trace(filename, frame)
+            self.cur_filename = tracename
+            self.last_line = 0
+        elif event == 'line':
+            # Record an executed line.
+            if self.cur_filename:
+                self.data[(self.cur_filename, frame.f_lineno)] = True
+                self.data['arcs'][(self.cur_filename, self.last_line, frame.f_lineno)] = True
+            self.last_line = frame.f_lineno
+        elif event == 'return':
+            if self.cur_filename:
+                self.data['arcs'][(self.cur_filename, self.last_line, 0)] = True
+            # Leaving this function, pop the filename stack.
+            self.cur_filename, self.last_line = self.filename_stack.pop()
+        elif event == 'exception':
+            self.last_exc_back = frame.f_back
+        return self._trace
+        
+    def start(self):
+        """Start this Tracer."""
+        assert self.branch
+        self.data['arcs'] = {}
+        sys.settrace(self._trace)
+
+    def stop(self):
+        """Stop this Tracer."""
+        sys.settrace(None)
+
+
 class Collector:
     """Collects trace data.
 
@@ -115,7 +192,9 @@ class Collector:
         self.should_trace = should_trace
         self.branch = branch
         self.reset()
-        if timid or branch:
+        if branch:
+            self._trace_class = PyBranchTracer
+        elif timid:
             # Being timid: use the simple Python trace function.
             self._trace_class = PyTracer
         else:
@@ -205,4 +284,7 @@ class Collector:
 
     def data_points(self):
         """Return the (filename, lineno) pairs collected."""
+        if 'arcs' in self.data:
+            import pprint
+            pprint.pprint(self.data['arcs'])
         return self.data.keys()
diff --git a/coverage/parser.py b/coverage/parser.py
index 905596be..7f50a9bc 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -1,12 +1,12 @@
 """Code parsing for Coverage."""
 
-import re, sys, token, tokenize, types
+import opcode, re, sys, token, tokenize, types
 
-from coverage.misc import nice_pair, CoverageException
 from coverage.backward import set, StringIO   # pylint: disable-msg=W0622
+from coverage.bytecode import ByteCodes, CodeObjects
+from coverage.misc import nice_pair, CoverageException
 
 
-    
 class CodeParser:
     """Parse code to find executable lines, excluded lines, etc."""
     
@@ -28,8 +28,6 @@ class CodeParser:
         # The line numbers that start statements.
         self.statement_starts = set()
 
-        self.bytes_lines = []
-        
     # Getting numbers from the lnotab value changed in Py3.0.    
     if sys.hexversion >= 0x03000000:
         def _lnotab_increments(self, lnotab):
@@ -40,32 +38,32 @@ class CodeParser:
             """Return a list of ints from the lnotab string in 2.x"""
             return [ord(c) for c in lnotab]
 
-    def _find_statement_starts(self, code):
-        """Find the starts of statements in compiled code.
+    def _bytes_lines(self, code):
+        """Map byte offsets to line numbers in `code`.
     
-        Uses co_lnotab described in Python/compile.c to find line numbers that
-        start statements, adding them to `self.statement_starts`.
+        Uses co_lnotab described in Python/compile.c to map byte offsets to
+        line numbers.  Returns a list: [(b0, l0), (b1, l1), ...]
     
         """
         # Adapted from dis.py in the standard library.
         byte_increments = self._lnotab_increments(code.co_lnotab[0::2])
         line_increments = self._lnotab_increments(code.co_lnotab[1::2])
     
+        bytes_lines = []
         last_line_num = None
         line_num = code.co_firstlineno
         byte_num = 0
         for byte_incr, line_incr in zip(byte_increments, line_increments):
             if byte_incr:
                 if line_num != last_line_num:
-                    self.bytes_lines.append((byte_num, line_num))
-                    self.statement_starts.add(line_num)
+                    bytes_lines.append((byte_num, line_num))
                     last_line_num = line_num
                 byte_num += byte_incr
             line_num += line_incr
         if line_num != last_line_num:
-            self.bytes_lines.append((byte_num, line_num))
-            self.statement_starts.add(line_num)
-
+            bytes_lines.append((byte_num, line_num))
+        return bytes_lines
+    
     def _find_statements(self, code):
         """Find the statements in `code`.
         
@@ -76,7 +74,9 @@ class CodeParser:
         # Adapted from trace.py in the standard library.
 
         # Get all of the lineno information from this code.
-        self._find_statement_starts(code)
+        bytes_lines = self._bytes_lines(code)
+        for b, l in bytes_lines:
+            self.statement_starts.add(l)
     
         # Check the constants for references to other code objects.
         for c in code.co_consts:
@@ -220,43 +220,83 @@ class CodeParser:
     
         return lines, excluded_lines, self.multiline
 
-    def _find_byte_chunks(self, code):
-        import opcode
-        
-        code = code.co_code
-        #labels = findlabels(code)
-        #linestarts = dict(findlinestarts(co))
-        n = len(code)
-        i = 0
-        extended_arg = 0
-        free = None
-        while i < n:
-            c = code[i]
-            op = ord(c)
-            print repr(i).rjust(4),
-            print opcode.opname[op].ljust(20)
-            i = i+1
-            if op >= opcode.HAVE_ARGUMENT:
-                oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
-                extended_arg = 0
-                i = i+2
-                if op == opcode.EXTENDED_ARG:
-                    extended_arg = oparg*65536L
-        
     def _disassemble(self, code):
         """Disassemble code, for ad-hoc experimenting."""
         
         import dis
-        dis.dis(code)
-
-        for c in code.co_consts:
-            if isinstance(c, types.CodeType):
-                # Found another code object, so recurse into it.
-                print("\n%s:" % c)
-                self._disassemble(c)
+        
+        for codeobj in CodeObjects(code):
+            print("\n%s: " % codeobj)
+            dis.dis(codeobj)
+            print("Bytes lines: %r" % self._bytes_lines(codeobj))
+            print("Jumps: %r %r" % self._find_byte_jumps(codeobj))
+            print(self._split_into_chunks(codeobj))
 
         print("")
+
+    def _line_for_byte(self, bytes_lines, byte):
+        last_line = 0
+        for b, l in bytes_lines:
+            if b == byte:
+                return l
+            elif b > byte:
+                return last_line
+            else:
+                last_line = l
+        return last_line
+
+    def _find_byte_jumps(self, code):
+        byte_jumps = [(bc.offset, bc.jump_to) for bc in ByteCodes(code.co_code) if bc.jump_to >= 0]
         
+        bytes_lines = self._bytes_lines(code)
+        line_jumps = [(self._line_for_byte(bytes_lines, b0), self._line_for_byte(bytes_lines, b1)) for b0, b1 in byte_jumps]
+        return byte_jumps, line_jumps
+
+    _chunk_enders = set([opcode.opmap[name] for name in ['JUMP_ABSOLUTE', 'RETURN_VALUE']])
+    
+    def _split_into_chunks(self, code):
+        class Chunk(object):
+            def __init__(self, byte, line=0):
+                self.byte = byte
+                self.line = line
+                self.exits = set()
+                
+            def __repr__(self):
+                return "<%d:%d %r>" % (self.byte, self.line, list(self.exits))
+
+        chunks = []
+        chunk = None
+        bytes_lines_map = dict(self._bytes_lines(code))
+        
+        for bc in ByteCodes(code.co_code):
+            # Maybe have to start a new block
+            if bc.offset in bytes_lines_map:
+                if chunk:
+                    chunk.exits.add(bc.offset)
+                chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
+                chunks.append(chunk)
+                
+            if not chunk:
+                chunk = Chunk(bc.offset)
+                chunks.append(chunk)
+                
+            if bc.jump_to >= 0:
+                chunk.exits.add(bc.jump_to)
+            
+            if bc.op in self._chunk_enders:
+                chunk = None
+        
+        warnings = []
+        # Find anonymous chunks (not associated with a line number), and find
+        # the numbered chunks that jump to them.
+        for ch in chunks:
+            if not ch.line:
+                jumpers = [c for c in chunks if ch.line in c.exits]
+                if len(jumpers) > 1:
+                    warnings.append("Chunk at %d has %d jumpers" % (ch.byte, len(jumpers)))
+                
+        return warnings, chunks
+
     def adhoc_main(self, args):
         """A main function for trying the code from the command line."""
 
@@ -285,12 +325,11 @@ class CodeParser:
             self._disassemble(code)
 
         if options.chunks:
-            self._find_byte_chunks(code)
+            self._split_into_chunks(code)
 
         self.show_tokens = options.tokens
         self._raw_parse(filename=filename, exclude=r"no\s*cover")
 
-        print self.bytes_lines
         for i, ltext in enumerate(self.lines):
             lineno = i+1
             m0 = m1 = m2 = ' '
author	Ned Batchelder <ned@nedbatchelder.com>	2009-10-10 10:43:06 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2009-10-10 10:43:06 -0400
commit	02b1caf65a6a5786c580c35d3fc2ecd099f0eda1 (patch)
tree	08ecf7f91e221b4cf18d5a9582226b87d7732948 /coverage
parent	4f1638ecff2b6279179975a8e54045b2f06735d9 (diff)
download	python-coveragepy-git-02b1caf65a6a5786c580c35d3fc2ecd099f0eda1.tar.gz