1 files changed, 128 insertions, 105 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index f868d357..2d777a5d 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -1,9 +1,10 @@
 """Code parsing for Coverage."""
 
-import opcode, re, sys, token, tokenize
+import dis, re, sys, token, tokenize
 
 from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
-from coverage.backward import open_source
+from coverage.backward import open_source, range    # pylint: disable=W0622
+from coverage.backward import reversed              # pylint: disable=W0622
 from coverage.bytecode import ByteCodes, CodeObjects
 from coverage.misc import nice_pair, expensive, join_regex
 from coverage.misc import CoverageException, NoSource, NotPython
@@ -32,7 +33,7 @@ class CodeParser(object):
             except IOError:
                 _, err, _ = sys.exc_info()
                 raise NoSource(
-                    "No source for code: %r: %s" % (self.filename, err)
+                    "No source for code: '%s': %s" % (self.filename, err)
                     )
 
         # Scrap the BOM if it exists.
@@ -108,7 +109,7 @@ class CodeParser(object):
 
         tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
         for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
-            if self.show_tokens:                # pragma: no cover
+            if self.show_tokens:                # pragma: not covered
                 print("%10s %5s %-20r %r" % (
                     tokenize.tok_name.get(toktype, toktype),
                     nice_pair((slineno, elineno)), ttext, ltext
@@ -134,8 +135,7 @@ class CodeParser(object):
                 # (a trick from trace.py in the stdlib.) This works for
                 # 99.9999% of cases.  For the rest (!) see:
                 # http://stackoverflow.com/questions/1769332/x/1769794#1769794
-                for i in range(slineno, elineno+1):
-                    self.docstrings.add(i)
+                self.docstrings.update(range(slineno, elineno+1))
             elif toktype == token.NEWLINE:
                 if first_line is not None and elineno != first_line:
                     # We're at the end of a line, and we've ended on a
@@ -203,7 +203,15 @@ class CodeParser(object):
         statements.
 
         """
-        self._raw_parse()
+        try:
+            self._raw_parse()
+        except (tokenize.TokenError, IndentationError):
+            _, tokerr, _ = sys.exc_info()
+            msg, lineno = tokerr.args
+            raise NotPython(
+                "Couldn't parse '%s' as Python source: '%s' at %s" %
+                    (self.filename, msg, lineno)
+                )
 
         excluded_lines = self.first_lines(self.excluded)
         ignore = excluded_lines + list(self.docstrings)
@@ -262,8 +270,8 @@ class CodeParser(object):
 ## Opcodes that guide the ByteParser.
 
 def _opcode(name):
-    """Return the opcode by name from the opcode module."""
-    return opcode.opmap[name]
+    """Return the opcode by name from the dis module."""
+    return dis.opmap[name]
 
 def _opcode_set(*names):
     """Return a set of opcodes by the names in `names`."""
@@ -362,52 +370,60 @@ class ByteParser(object):
     # Getting numbers from the lnotab value changed in Py3.0.
     if sys.version_info >= (3, 0):
         def _lnotab_increments(self, lnotab):
-            """Return a list of ints from the lnotab bytes in 3.x"""
-            return list(lnotab)
+            """Produce ints from the lnotab bytes in 3.x"""
+            # co_lnotab is a bytes object, which iterates as ints.
+            return lnotab
     else:
         def _lnotab_increments(self, lnotab):
-            """Return a list of ints from the lnotab string in 2.x"""
-            return [ord(c) for c in lnotab]
+            """Produce ints from the lnotab string in 2.x"""
+            for c in lnotab:
+                yield ord(c)
 
     def _bytes_lines(self):
         """Map byte offsets to line numbers in `code`.
 
         Uses co_lnotab described in Python/compile.c to map byte offsets to
-        line numbers.  Returns a list: [(b0, l0), (b1, l1), ...]
+        line numbers.  Produces a sequence: (b0, l0), (b1, l1), ...
+
+        Only byte offsets that correspond to line numbers are included in the
+        results.
 
         """
         # Adapted from dis.py in the standard library.
         byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
         line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
 
-        bytes_lines = []
         last_line_num = None
         line_num = self.code.co_firstlineno
         byte_num = 0
         for byte_incr, line_incr in zip(byte_increments, line_increments):
             if byte_incr:
                 if line_num != last_line_num:
-                    bytes_lines.append((byte_num, line_num))
+                    yield (byte_num, line_num)
                     last_line_num = line_num
                 byte_num += byte_incr
             line_num += line_incr
         if line_num != last_line_num:
-            bytes_lines.append((byte_num, line_num))
-        return bytes_lines
+            yield (byte_num, line_num)
 
     def _find_statements(self):
         """Find the statements in `self.code`.
 
-        Return a set of line numbers that start statements.  Recurses into all
-        code objects reachable from `self.code`.
+        Produce a sequence of line numbers that start statements.  Recurses
+        into all code objects reachable from `self.code`.
 
         """
-        stmts = set()
         for bp in self.child_parsers():
             # Get all of the lineno information from this code.
             for _, l in bp._bytes_lines():
-                stmts.add(l)
-        return stmts
+                yield l
+
+    def _block_stack_repr(self, block_stack):
+        """Get a string version of `block_stack`, for debugging."""
+        blocks = ", ".join(
+            ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]
+        )
+        return "[" + blocks + "]"
 
     def _split_into_chunks(self):
         """Split the code object into a list of `Chunk` objects.
@@ -418,10 +434,11 @@ class ByteParser(object):
         Returns a list of `Chunk` objects.
 
         """
-
         # The list of chunks so far, and the one we're working on.
         chunks = []
         chunk = None
+
+        # A dict mapping byte offsets of line starts to the line numbers.
         bytes_lines_map = dict(self._bytes_lines())
 
         # The block stack: loops and try blocks get pushed here for the
@@ -436,24 +453,37 @@ class ByteParser(object):
         # We have to handle the last two bytecodes specially.
         ult = penult = None
 
+        # Get a set of all of the jump-to points.
+        jump_to = set()
+        for bc in ByteCodes(self.code.co_code):
+            if bc.jump_to >= 0:
+                jump_to.add(bc.jump_to)
+
+        chunk_lineno = 0
+
+        # Walk the byte codes building chunks.
         for bc in ByteCodes(self.code.co_code):
             # Maybe have to start a new chunk
+            start_new_chunk = False
+            first_chunk = False
             if bc.offset in bytes_lines_map:
                 # Start a new chunk for each source line number.
-                if chunk:
-                    chunk.exits.add(bc.offset)
-                chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
-                chunks.append(chunk)
+                start_new_chunk = True
+                chunk_lineno = bytes_lines_map[bc.offset]
+                first_chunk = True
+            elif bc.offset in jump_to:
+                # To make chunks have a single entrance, we have to make a new
+                # chunk when we get to a place some bytecode jumps to.
+                start_new_chunk = True
             elif bc.op in OPS_CHUNK_BEGIN:
                 # Jumps deserve their own unnumbered chunk.  This fixes
                 # problems with jumps to jumps getting confused.
+                start_new_chunk = True
+
+            if not chunk or start_new_chunk:
                 if chunk:
                     chunk.exits.add(bc.offset)
-                chunk = Chunk(bc.offset)
-                chunks.append(chunk)
-
-            if not chunk:
-                chunk = Chunk(bc.offset)
+                chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
                 chunks.append(chunk)
 
             # Look at the opcode
@@ -482,15 +512,11 @@ class ByteParser(object):
                     chunk.exits.add(block_stack[-1][1])
                 chunk = None
             if bc.op == OP_END_FINALLY:
-                if block_stack:
-                    # A break that goes through a finally will jump to whatever
-                    # block is on top of the stack.
-                    chunk.exits.add(block_stack[-1][1])
                 # For the finally clause we need to find the closest exception
                 # block, and use its jump target as an exit.
-                for iblock in range(len(block_stack)-1, -1, -1):
-                    if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS:
-                        chunk.exits.add(block_stack[iblock][1])
+                for block in reversed(block_stack):
+                    if block[0] in OPS_EXCEPT_BLOCKS:
+                        chunk.exits.add(block[1])
                         break
             if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
                 # This is an except clause.  We want to overlook the next
@@ -516,23 +542,33 @@ class ByteParser(object):
                             last_chunk = chunks[-1]
                             last_chunk.exits.remove(ex)
                             last_chunk.exits.add(penult.offset)
-                            chunk = Chunk(penult.offset)
+                            chunk = Chunk(
+                                penult.offset, last_chunk.line, False
+                            )
                             chunk.exits.add(ex)
                             chunks.append(chunk)
 
             # Give all the chunks a length.
-            chunks[-1].length = bc.next_offset - chunks[-1].byte
+            chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
             for i in range(len(chunks)-1):
                 chunks[i].length = chunks[i+1].byte - chunks[i].byte
 
+        #self.validate_chunks(chunks)
         return chunks
 
+    def validate_chunks(self, chunks):
+        """Validate the rule that chunks have a single entrance."""
+        # starts is the entrances to the chunks
+        starts = set([ch.byte for ch in chunks])
+        for ch in chunks:
+            assert all((ex in starts or ex < 0) for ex in ch.exits)
+
     def _arcs(self):
         """Find the executable arcs in the code.
 
-        Returns a set of pairs, (from,to).  From and to are integer line
-        numbers.  If from is < 0, then the arc is an entrance into the code
-        object.  If to is < 0, the arc is an exit from the code object.
+        Yields pairs: (from,to).  From and to are integer line numbers.  If
+        from is < 0, then the arc is an entrance into the code object.  If to
+        is < 0, the arc is an exit from the code object.
 
         """
         chunks = self._split_into_chunks()
@@ -540,65 +576,43 @@ class ByteParser(object):
         # A map from byte offsets to chunks jumped into.
         byte_chunks = dict([(c.byte, c) for c in chunks])
 
-        # Build a map from byte offsets to actual lines reached.
-        byte_lines = {}
-        bytes_to_add = set([c.byte for c in chunks])
+        # There's always an entrance at the first chunk.
+        yield (-1, byte_chunks[0].line)
 
-        while bytes_to_add:
-            byte_to_add = bytes_to_add.pop()
-            if byte_to_add in byte_lines or byte_to_add < 0:
+        # Traverse from the first chunk in each line, and yield arcs where
+        # the trace function will be invoked.
+        for chunk in chunks:
+            if not chunk.first:
                 continue
 
-            # Which lines does this chunk lead to?
-            bytes_considered = set()
-            bytes_to_consider = [byte_to_add]
-            lines = set()
-
-            while bytes_to_consider:
-                byte = bytes_to_consider.pop()
-                bytes_considered.add(byte)
-
-                # Find chunk for byte
-                try:
-                    ch = byte_chunks[byte]
-                except KeyError:
-                    for ch in chunks:
-                        if ch.byte <= byte < ch.byte+ch.length:
-                            break
-                    else:
-                        # No chunk for this byte!
-                        raise Exception("Couldn't find chunk @ %d" % byte)
-                    byte_chunks[byte] = ch
-
-                if ch.line:
-                    lines.add(ch.line)
-                else:
-                    for ex in ch.exits:
-                        if ex < 0:
-                            lines.add(ex)
-                        elif ex not in bytes_considered:
-                            bytes_to_consider.append(ex)
-
-                bytes_to_add.update(ch.exits)
-
-            byte_lines[byte_to_add] = lines
-
-        # Figure out for each chunk where the exits go.
-        arcs = set()
-        for chunk in chunks:
-            if chunk.line:
-                for ex in chunk.exits:
+            chunks_considered = set()
+            chunks_to_consider = [chunk]
+            while chunks_to_consider:
+                # Get the chunk we're considering, and make sure we don't
+                # consider it again
+                this_chunk = chunks_to_consider.pop()
+                chunks_considered.add(this_chunk)
+
+                # For each exit, add the line number if the trace function
+                # would be triggered, or add the chunk to those being
+                # considered if not.
+                for ex in this_chunk.exits:
                     if ex < 0:
-                        exit_lines = [ex]
+                        yield (chunk.line, ex)
                     else:
-                        exit_lines = byte_lines[ex]
-                    for exit_line in exit_lines:
-                        if chunk.line != exit_line:
-                            arcs.add((chunk.line, exit_line))
-        for line in byte_lines[0]:
-            arcs.add((-1, line))
-
-        return arcs
+                        next_chunk = byte_chunks[ex]
+                        if next_chunk in chunks_considered:
+                            continue
+
+                        # The trace function is invoked if visiting the first
+                        # bytecode in a line, or if the transition is a
+                        # backward jump.
+                        backward_jump = next_chunk.byte < this_chunk.byte
+                        if next_chunk.first or backward_jump:
+                            if next_chunk.line != chunk.line:
+                                yield (chunk.line, next_chunk.line)
+                        else:
+                            chunks_to_consider.append(next_chunk)
 
     def _all_chunks(self):
         """Returns a list of `Chunk` objects for this code and its children.
@@ -626,11 +640,11 @@ class ByteParser(object):
 
 
 class Chunk(object):
-    """A sequence of bytecodes with a single entrance.
+    """A sequence of byte codes with a single entrance.
 
     To analyze byte code, we have to divide it into chunks, sequences of byte
-    codes such that each basic block has only one entrance, the first
-    instruction in the block.
+    codes such that each chunk has only one entrance, the first instruction in
+    the block.
 
     This is almost the CS concept of `basic block`_, except that we're willing
     to have many exits from a chunk, and "basic block" is a more cumbersome
@@ -638,17 +652,26 @@ class Chunk(object):
 
     .. _basic block: http://en.wikipedia.org/wiki/Basic_block
 
+    `line` is the source line number containing this chunk.
+
+    `first` is true if this is the first chunk in the source line.
+
     An exit < 0 means the chunk can leave the code (return).  The exit is
     the negative of the starting line number of the code block.
 
     """
-    def __init__(self, byte, line=0):
+    def __init__(self, byte, line, first):
         self.byte = byte
         self.line = line
+        self.first = first
         self.length = 0
         self.exits = set()
 
     def __repr__(self):
-        return "<%d+%d @%d %r>" % (
-            self.byte, self.length, self.line, list(self.exits)
+        if self.first:
+            bang = "!"
+        else:
+            bang = ""
+        return "<%d+%d @%d%s %r>" % (
+            self.byte, self.length, self.line, bang, list(self.exits)
             )