summaryrefslogtreecommitdiff
path: root/coverage/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'coverage/parser.py')
-rw-r--r--coverage/parser.py233
1 files changed, 128 insertions, 105 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index f868d357..2d777a5d 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -1,9 +1,10 @@
"""Code parsing for Coverage."""
-import opcode, re, sys, token, tokenize
+import dis, re, sys, token, tokenize
from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
-from coverage.backward import open_source
+from coverage.backward import open_source, range # pylint: disable=W0622
+from coverage.backward import reversed # pylint: disable=W0622
from coverage.bytecode import ByteCodes, CodeObjects
from coverage.misc import nice_pair, expensive, join_regex
from coverage.misc import CoverageException, NoSource, NotPython
@@ -32,7 +33,7 @@ class CodeParser(object):
except IOError:
_, err, _ = sys.exc_info()
raise NoSource(
- "No source for code: %r: %s" % (self.filename, err)
+ "No source for code: '%s': %s" % (self.filename, err)
)
# Scrap the BOM if it exists.
@@ -108,7 +109,7 @@ class CodeParser(object):
tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
- if self.show_tokens: # pragma: no cover
+ if self.show_tokens: # pragma: not covered
print("%10s %5s %-20r %r" % (
tokenize.tok_name.get(toktype, toktype),
nice_pair((slineno, elineno)), ttext, ltext
@@ -134,8 +135,7 @@ class CodeParser(object):
# (a trick from trace.py in the stdlib.) This works for
# 99.9999% of cases. For the rest (!) see:
# http://stackoverflow.com/questions/1769332/x/1769794#1769794
- for i in range(slineno, elineno+1):
- self.docstrings.add(i)
+ self.docstrings.update(range(slineno, elineno+1))
elif toktype == token.NEWLINE:
if first_line is not None and elineno != first_line:
# We're at the end of a line, and we've ended on a
@@ -203,7 +203,15 @@ class CodeParser(object):
statements.
"""
- self._raw_parse()
+ try:
+ self._raw_parse()
+ except (tokenize.TokenError, IndentationError):
+ _, tokerr, _ = sys.exc_info()
+ msg, lineno = tokerr.args
+ raise NotPython(
+ "Couldn't parse '%s' as Python source: '%s' at %s" %
+ (self.filename, msg, lineno)
+ )
excluded_lines = self.first_lines(self.excluded)
ignore = excluded_lines + list(self.docstrings)
@@ -262,8 +270,8 @@ class CodeParser(object):
## Opcodes that guide the ByteParser.
def _opcode(name):
- """Return the opcode by name from the opcode module."""
- return opcode.opmap[name]
+ """Return the opcode by name from the dis module."""
+ return dis.opmap[name]
def _opcode_set(*names):
"""Return a set of opcodes by the names in `names`."""
@@ -362,52 +370,60 @@ class ByteParser(object):
# Getting numbers from the lnotab value changed in Py3.0.
if sys.version_info >= (3, 0):
def _lnotab_increments(self, lnotab):
- """Return a list of ints from the lnotab bytes in 3.x"""
- return list(lnotab)
+ """Produce ints from the lnotab bytes in 3.x"""
+ # co_lnotab is a bytes object, which iterates as ints.
+ return lnotab
else:
def _lnotab_increments(self, lnotab):
- """Return a list of ints from the lnotab string in 2.x"""
- return [ord(c) for c in lnotab]
+ """Produce ints from the lnotab string in 2.x"""
+ for c in lnotab:
+ yield ord(c)
def _bytes_lines(self):
"""Map byte offsets to line numbers in `code`.
Uses co_lnotab described in Python/compile.c to map byte offsets to
- line numbers. Returns a list: [(b0, l0), (b1, l1), ...]
+ line numbers. Produces a sequence: (b0, l0), (b1, l1), ...
+
+ Only byte offsets that correspond to line numbers are included in the
+ results.
"""
# Adapted from dis.py in the standard library.
byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
- bytes_lines = []
last_line_num = None
line_num = self.code.co_firstlineno
byte_num = 0
for byte_incr, line_incr in zip(byte_increments, line_increments):
if byte_incr:
if line_num != last_line_num:
- bytes_lines.append((byte_num, line_num))
+ yield (byte_num, line_num)
last_line_num = line_num
byte_num += byte_incr
line_num += line_incr
if line_num != last_line_num:
- bytes_lines.append((byte_num, line_num))
- return bytes_lines
+ yield (byte_num, line_num)
def _find_statements(self):
"""Find the statements in `self.code`.
- Return a set of line numbers that start statements. Recurses into all
- code objects reachable from `self.code`.
+ Produce a sequence of line numbers that start statements. Recurses
+ into all code objects reachable from `self.code`.
"""
- stmts = set()
for bp in self.child_parsers():
# Get all of the lineno information from this code.
for _, l in bp._bytes_lines():
- stmts.add(l)
- return stmts
+ yield l
+
+ def _block_stack_repr(self, block_stack):
+ """Get a string version of `block_stack`, for debugging."""
+ blocks = ", ".join(
+ ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]
+ )
+ return "[" + blocks + "]"
def _split_into_chunks(self):
"""Split the code object into a list of `Chunk` objects.
@@ -418,10 +434,11 @@ class ByteParser(object):
Returns a list of `Chunk` objects.
"""
-
# The list of chunks so far, and the one we're working on.
chunks = []
chunk = None
+
+ # A dict mapping byte offsets of line starts to the line numbers.
bytes_lines_map = dict(self._bytes_lines())
# The block stack: loops and try blocks get pushed here for the
@@ -436,24 +453,37 @@ class ByteParser(object):
# We have to handle the last two bytecodes specially.
ult = penult = None
+ # Get a set of all of the jump-to points.
+ jump_to = set()
+ for bc in ByteCodes(self.code.co_code):
+ if bc.jump_to >= 0:
+ jump_to.add(bc.jump_to)
+
+ chunk_lineno = 0
+
+ # Walk the byte codes building chunks.
for bc in ByteCodes(self.code.co_code):
# Maybe have to start a new chunk
+ start_new_chunk = False
+ first_chunk = False
if bc.offset in bytes_lines_map:
# Start a new chunk for each source line number.
- if chunk:
- chunk.exits.add(bc.offset)
- chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
- chunks.append(chunk)
+ start_new_chunk = True
+ chunk_lineno = bytes_lines_map[bc.offset]
+ first_chunk = True
+ elif bc.offset in jump_to:
+ # To make chunks have a single entrance, we have to make a new
+ # chunk when we get to a place some bytecode jumps to.
+ start_new_chunk = True
elif bc.op in OPS_CHUNK_BEGIN:
# Jumps deserve their own unnumbered chunk. This fixes
# problems with jumps to jumps getting confused.
+ start_new_chunk = True
+
+ if not chunk or start_new_chunk:
if chunk:
chunk.exits.add(bc.offset)
- chunk = Chunk(bc.offset)
- chunks.append(chunk)
-
- if not chunk:
- chunk = Chunk(bc.offset)
+ chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
chunks.append(chunk)
# Look at the opcode
@@ -482,15 +512,11 @@ class ByteParser(object):
chunk.exits.add(block_stack[-1][1])
chunk = None
if bc.op == OP_END_FINALLY:
- if block_stack:
- # A break that goes through a finally will jump to whatever
- # block is on top of the stack.
- chunk.exits.add(block_stack[-1][1])
# For the finally clause we need to find the closest exception
# block, and use its jump target as an exit.
- for iblock in range(len(block_stack)-1, -1, -1):
- if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS:
- chunk.exits.add(block_stack[iblock][1])
+ for block in reversed(block_stack):
+ if block[0] in OPS_EXCEPT_BLOCKS:
+ chunk.exits.add(block[1])
break
if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
# This is an except clause. We want to overlook the next
@@ -516,23 +542,33 @@ class ByteParser(object):
last_chunk = chunks[-1]
last_chunk.exits.remove(ex)
last_chunk.exits.add(penult.offset)
- chunk = Chunk(penult.offset)
+ chunk = Chunk(
+ penult.offset, last_chunk.line, False
+ )
chunk.exits.add(ex)
chunks.append(chunk)
# Give all the chunks a length.
- chunks[-1].length = bc.next_offset - chunks[-1].byte
+ chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
for i in range(len(chunks)-1):
chunks[i].length = chunks[i+1].byte - chunks[i].byte
+ #self.validate_chunks(chunks)
return chunks
+ def validate_chunks(self, chunks):
+ """Validate the rule that chunks have a single entrance."""
+ # starts is the entrances to the chunks
+ starts = set([ch.byte for ch in chunks])
+ for ch in chunks:
+ assert all((ex in starts or ex < 0) for ex in ch.exits)
+
def _arcs(self):
"""Find the executable arcs in the code.
- Returns a set of pairs, (from,to). From and to are integer line
- numbers. If from is < 0, then the arc is an entrance into the code
- object. If to is < 0, the arc is an exit from the code object.
+ Yields pairs: (from,to). From and to are integer line numbers. If
+ from is < 0, then the arc is an entrance into the code object. If to
+ is < 0, the arc is an exit from the code object.
"""
chunks = self._split_into_chunks()
@@ -540,65 +576,43 @@ class ByteParser(object):
# A map from byte offsets to chunks jumped into.
byte_chunks = dict([(c.byte, c) for c in chunks])
- # Build a map from byte offsets to actual lines reached.
- byte_lines = {}
- bytes_to_add = set([c.byte for c in chunks])
+ # There's always an entrance at the first chunk.
+ yield (-1, byte_chunks[0].line)
- while bytes_to_add:
- byte_to_add = bytes_to_add.pop()
- if byte_to_add in byte_lines or byte_to_add < 0:
+ # Traverse from the first chunk in each line, and yield arcs where
+ # the trace function will be invoked.
+ for chunk in chunks:
+ if not chunk.first:
continue
- # Which lines does this chunk lead to?
- bytes_considered = set()
- bytes_to_consider = [byte_to_add]
- lines = set()
-
- while bytes_to_consider:
- byte = bytes_to_consider.pop()
- bytes_considered.add(byte)
-
- # Find chunk for byte
- try:
- ch = byte_chunks[byte]
- except KeyError:
- for ch in chunks:
- if ch.byte <= byte < ch.byte+ch.length:
- break
- else:
- # No chunk for this byte!
- raise Exception("Couldn't find chunk @ %d" % byte)
- byte_chunks[byte] = ch
-
- if ch.line:
- lines.add(ch.line)
- else:
- for ex in ch.exits:
- if ex < 0:
- lines.add(ex)
- elif ex not in bytes_considered:
- bytes_to_consider.append(ex)
-
- bytes_to_add.update(ch.exits)
-
- byte_lines[byte_to_add] = lines
-
- # Figure out for each chunk where the exits go.
- arcs = set()
- for chunk in chunks:
- if chunk.line:
- for ex in chunk.exits:
+ chunks_considered = set()
+ chunks_to_consider = [chunk]
+ while chunks_to_consider:
+ # Get the chunk we're considering, and make sure we don't
+ # consider it again
+ this_chunk = chunks_to_consider.pop()
+ chunks_considered.add(this_chunk)
+
+ # For each exit, add the line number if the trace function
+ # would be triggered, or add the chunk to those being
+ # considered if not.
+ for ex in this_chunk.exits:
if ex < 0:
- exit_lines = [ex]
+ yield (chunk.line, ex)
else:
- exit_lines = byte_lines[ex]
- for exit_line in exit_lines:
- if chunk.line != exit_line:
- arcs.add((chunk.line, exit_line))
- for line in byte_lines[0]:
- arcs.add((-1, line))
-
- return arcs
+ next_chunk = byte_chunks[ex]
+ if next_chunk in chunks_considered:
+ continue
+
+ # The trace function is invoked if visiting the first
+ # bytecode in a line, or if the transition is a
+ # backward jump.
+ backward_jump = next_chunk.byte < this_chunk.byte
+ if next_chunk.first or backward_jump:
+ if next_chunk.line != chunk.line:
+ yield (chunk.line, next_chunk.line)
+ else:
+ chunks_to_consider.append(next_chunk)
def _all_chunks(self):
"""Returns a list of `Chunk` objects for this code and its children.
@@ -626,11 +640,11 @@ class ByteParser(object):
class Chunk(object):
- """A sequence of bytecodes with a single entrance.
+ """A sequence of byte codes with a single entrance.
To analyze byte code, we have to divide it into chunks, sequences of byte
- codes such that each basic block has only one entrance, the first
- instruction in the block.
+ codes such that each chunk has only one entrance, the first instruction in
+ the block.
This is almost the CS concept of `basic block`_, except that we're willing
to have many exits from a chunk, and "basic block" is a more cumbersome
@@ -638,17 +652,26 @@ class Chunk(object):
.. _basic block: http://en.wikipedia.org/wiki/Basic_block
+ `line` is the source line number containing this chunk.
+
+ `first` is true if this is the first chunk in the source line.
+
An exit < 0 means the chunk can leave the code (return). The exit is
the negative of the starting line number of the code block.
"""
- def __init__(self, byte, line=0):
+ def __init__(self, byte, line, first):
self.byte = byte
self.line = line
+ self.first = first
self.length = 0
self.exits = set()
def __repr__(self):
- return "<%d+%d @%d %r>" % (
- self.byte, self.length, self.line, list(self.exits)
+ if self.first:
+ bang = "!"
+ else:
+ bang = ""
+ return "<%d+%d @%d%s %r>" % (
+ self.byte, self.length, self.line, bang, list(self.exits)
)