diff options
Diffstat (limited to 'coverage/parser.py')
| -rw-r--r-- | coverage/parser.py | 233 |
1 files changed, 128 insertions, 105 deletions
diff --git a/coverage/parser.py b/coverage/parser.py index f868d357..2d777a5d 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -1,9 +1,10 @@ """Code parsing for Coverage.""" -import opcode, re, sys, token, tokenize +import dis, re, sys, token, tokenize from coverage.backward import set, sorted, StringIO # pylint: disable=W0622 -from coverage.backward import open_source +from coverage.backward import open_source, range # pylint: disable=W0622 +from coverage.backward import reversed # pylint: disable=W0622 from coverage.bytecode import ByteCodes, CodeObjects from coverage.misc import nice_pair, expensive, join_regex from coverage.misc import CoverageException, NoSource, NotPython @@ -32,7 +33,7 @@ class CodeParser(object): except IOError: _, err, _ = sys.exc_info() raise NoSource( - "No source for code: %r: %s" % (self.filename, err) + "No source for code: '%s': %s" % (self.filename, err) ) # Scrap the BOM if it exists. @@ -108,7 +109,7 @@ class CodeParser(object): tokgen = tokenize.generate_tokens(StringIO(self.text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: - if self.show_tokens: # pragma: no cover + if self.show_tokens: # pragma: not covered print("%10s %5s %-20r %r" % ( tokenize.tok_name.get(toktype, toktype), nice_pair((slineno, elineno)), ttext, ltext @@ -134,8 +135,7 @@ class CodeParser(object): # (a trick from trace.py in the stdlib.) This works for # 99.9999% of cases. For the rest (!) see: # http://stackoverflow.com/questions/1769332/x/1769794#1769794 - for i in range(slineno, elineno+1): - self.docstrings.add(i) + self.docstrings.update(range(slineno, elineno+1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: # We're at the end of a line, and we've ended on a @@ -203,7 +203,15 @@ class CodeParser(object): statements. """ - self._raw_parse() + try: + self._raw_parse() + except (tokenize.TokenError, IndentationError): + _, tokerr, _ = sys.exc_info() + msg, lineno = tokerr.args + raise NotPython( + "Couldn't parse '%s' as Python source: '%s' at %s" % + (self.filename, msg, lineno) + ) excluded_lines = self.first_lines(self.excluded) ignore = excluded_lines + list(self.docstrings) @@ -262,8 +270,8 @@ class CodeParser(object): ## Opcodes that guide the ByteParser. def _opcode(name): - """Return the opcode by name from the opcode module.""" - return opcode.opmap[name] + """Return the opcode by name from the dis module.""" + return dis.opmap[name] def _opcode_set(*names): """Return a set of opcodes by the names in `names`.""" @@ -362,52 +370,60 @@ class ByteParser(object): # Getting numbers from the lnotab value changed in Py3.0. if sys.version_info >= (3, 0): def _lnotab_increments(self, lnotab): - """Return a list of ints from the lnotab bytes in 3.x""" - return list(lnotab) + """Produce ints from the lnotab bytes in 3.x""" + # co_lnotab is a bytes object, which iterates as ints. + return lnotab else: def _lnotab_increments(self, lnotab): - """Return a list of ints from the lnotab string in 2.x""" - return [ord(c) for c in lnotab] + """Produce ints from the lnotab string in 2.x""" + for c in lnotab: + yield ord(c) def _bytes_lines(self): """Map byte offsets to line numbers in `code`. Uses co_lnotab described in Python/compile.c to map byte offsets to - line numbers. Returns a list: [(b0, l0), (b1, l1), ...] + line numbers. Produces a sequence: (b0, l0), (b1, l1), ... + + Only byte offsets that correspond to line numbers are included in the + results. """ # Adapted from dis.py in the standard library. byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2]) line_increments = self._lnotab_increments(self.code.co_lnotab[1::2]) - bytes_lines = [] last_line_num = None line_num = self.code.co_firstlineno byte_num = 0 for byte_incr, line_incr in zip(byte_increments, line_increments): if byte_incr: if line_num != last_line_num: - bytes_lines.append((byte_num, line_num)) + yield (byte_num, line_num) last_line_num = line_num byte_num += byte_incr line_num += line_incr if line_num != last_line_num: - bytes_lines.append((byte_num, line_num)) - return bytes_lines + yield (byte_num, line_num) def _find_statements(self): """Find the statements in `self.code`. - Return a set of line numbers that start statements. Recurses into all - code objects reachable from `self.code`. + Produce a sequence of line numbers that start statements. Recurses + into all code objects reachable from `self.code`. """ - stmts = set() for bp in self.child_parsers(): # Get all of the lineno information from this code. for _, l in bp._bytes_lines(): - stmts.add(l) - return stmts + yield l + + def _block_stack_repr(self, block_stack): + """Get a string version of `block_stack`, for debugging.""" + blocks = ", ".join( + ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack] + ) + return "[" + blocks + "]" def _split_into_chunks(self): """Split the code object into a list of `Chunk` objects. @@ -418,10 +434,11 @@ class ByteParser(object): Returns a list of `Chunk` objects. """ - # The list of chunks so far, and the one we're working on. chunks = [] chunk = None + + # A dict mapping byte offsets of line starts to the line numbers. bytes_lines_map = dict(self._bytes_lines()) # The block stack: loops and try blocks get pushed here for the @@ -436,24 +453,37 @@ class ByteParser(object): # We have to handle the last two bytecodes specially. ult = penult = None + # Get a set of all of the jump-to points. + jump_to = set() + for bc in ByteCodes(self.code.co_code): + if bc.jump_to >= 0: + jump_to.add(bc.jump_to) + + chunk_lineno = 0 + + # Walk the byte codes building chunks. for bc in ByteCodes(self.code.co_code): # Maybe have to start a new chunk + start_new_chunk = False + first_chunk = False if bc.offset in bytes_lines_map: # Start a new chunk for each source line number. - if chunk: - chunk.exits.add(bc.offset) - chunk = Chunk(bc.offset, bytes_lines_map[bc.offset]) - chunks.append(chunk) + start_new_chunk = True + chunk_lineno = bytes_lines_map[bc.offset] + first_chunk = True + elif bc.offset in jump_to: + # To make chunks have a single entrance, we have to make a new + # chunk when we get to a place some bytecode jumps to. + start_new_chunk = True elif bc.op in OPS_CHUNK_BEGIN: # Jumps deserve their own unnumbered chunk. This fixes # problems with jumps to jumps getting confused. + start_new_chunk = True + + if not chunk or start_new_chunk: if chunk: chunk.exits.add(bc.offset) - chunk = Chunk(bc.offset) - chunks.append(chunk) - - if not chunk: - chunk = Chunk(bc.offset) + chunk = Chunk(bc.offset, chunk_lineno, first_chunk) chunks.append(chunk) # Look at the opcode @@ -482,15 +512,11 @@ class ByteParser(object): chunk.exits.add(block_stack[-1][1]) chunk = None if bc.op == OP_END_FINALLY: - if block_stack: - # A break that goes through a finally will jump to whatever - # block is on top of the stack. - chunk.exits.add(block_stack[-1][1]) # For the finally clause we need to find the closest exception # block, and use its jump target as an exit. - for iblock in range(len(block_stack)-1, -1, -1): - if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS: - chunk.exits.add(block_stack[iblock][1]) + for block in reversed(block_stack): + if block[0] in OPS_EXCEPT_BLOCKS: + chunk.exits.add(block[1]) break if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: # This is an except clause. We want to overlook the next @@ -516,23 +542,33 @@ class ByteParser(object): last_chunk = chunks[-1] last_chunk.exits.remove(ex) last_chunk.exits.add(penult.offset) - chunk = Chunk(penult.offset) + chunk = Chunk( + penult.offset, last_chunk.line, False + ) chunk.exits.add(ex) chunks.append(chunk) # Give all the chunks a length. - chunks[-1].length = bc.next_offset - chunks[-1].byte + chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301 for i in range(len(chunks)-1): chunks[i].length = chunks[i+1].byte - chunks[i].byte + #self.validate_chunks(chunks) return chunks + def validate_chunks(self, chunks): + """Validate the rule that chunks have a single entrance.""" + # starts is the entrances to the chunks + starts = set([ch.byte for ch in chunks]) + for ch in chunks: + assert all((ex in starts or ex < 0) for ex in ch.exits) + def _arcs(self): """Find the executable arcs in the code. - Returns a set of pairs, (from,to). From and to are integer line - numbers. If from is < 0, then the arc is an entrance into the code - object. If to is < 0, the arc is an exit from the code object. + Yields pairs: (from,to). From and to are integer line numbers. If + from is < 0, then the arc is an entrance into the code object. If to + is < 0, the arc is an exit from the code object. """ chunks = self._split_into_chunks() @@ -540,65 +576,43 @@ class ByteParser(object): # A map from byte offsets to chunks jumped into. byte_chunks = dict([(c.byte, c) for c in chunks]) - # Build a map from byte offsets to actual lines reached. - byte_lines = {} - bytes_to_add = set([c.byte for c in chunks]) + # There's always an entrance at the first chunk. + yield (-1, byte_chunks[0].line) - while bytes_to_add: - byte_to_add = bytes_to_add.pop() - if byte_to_add in byte_lines or byte_to_add < 0: + # Traverse from the first chunk in each line, and yield arcs where + # the trace function will be invoked. + for chunk in chunks: + if not chunk.first: continue - # Which lines does this chunk lead to? - bytes_considered = set() - bytes_to_consider = [byte_to_add] - lines = set() - - while bytes_to_consider: - byte = bytes_to_consider.pop() - bytes_considered.add(byte) - - # Find chunk for byte - try: - ch = byte_chunks[byte] - except KeyError: - for ch in chunks: - if ch.byte <= byte < ch.byte+ch.length: - break - else: - # No chunk for this byte! - raise Exception("Couldn't find chunk @ %d" % byte) - byte_chunks[byte] = ch - - if ch.line: - lines.add(ch.line) - else: - for ex in ch.exits: - if ex < 0: - lines.add(ex) - elif ex not in bytes_considered: - bytes_to_consider.append(ex) - - bytes_to_add.update(ch.exits) - - byte_lines[byte_to_add] = lines - - # Figure out for each chunk where the exits go. - arcs = set() - for chunk in chunks: - if chunk.line: - for ex in chunk.exits: + chunks_considered = set() + chunks_to_consider = [chunk] + while chunks_to_consider: + # Get the chunk we're considering, and make sure we don't + # consider it again + this_chunk = chunks_to_consider.pop() + chunks_considered.add(this_chunk) + + # For each exit, add the line number if the trace function + # would be triggered, or add the chunk to those being + # considered if not. + for ex in this_chunk.exits: if ex < 0: - exit_lines = [ex] + yield (chunk.line, ex) else: - exit_lines = byte_lines[ex] - for exit_line in exit_lines: - if chunk.line != exit_line: - arcs.add((chunk.line, exit_line)) - for line in byte_lines[0]: - arcs.add((-1, line)) - - return arcs + next_chunk = byte_chunks[ex] + if next_chunk in chunks_considered: + continue + + # The trace function is invoked if visiting the first + # bytecode in a line, or if the transition is a + # backward jump. + backward_jump = next_chunk.byte < this_chunk.byte + if next_chunk.first or backward_jump: + if next_chunk.line != chunk.line: + yield (chunk.line, next_chunk.line) + else: + chunks_to_consider.append(next_chunk) def _all_chunks(self): """Returns a list of `Chunk` objects for this code and its children. @@ -626,11 +640,11 @@ class ByteParser(object): class Chunk(object): - """A sequence of bytecodes with a single entrance. + """A sequence of byte codes with a single entrance. To analyze byte code, we have to divide it into chunks, sequences of byte - codes such that each basic block has only one entrance, the first - instruction in the block. + codes such that each chunk has only one entrance, the first instruction in + the block. This is almost the CS concept of `basic block`_, except that we're willing to have many exits from a chunk, and "basic block" is a more cumbersome @@ -638,17 +652,26 @@ class Chunk(object): .. _basic block: http://en.wikipedia.org/wiki/Basic_block + `line` is the source line number containing this chunk. + + `first` is true if this is the first chunk in the source line. + An exit < 0 means the chunk can leave the code (return). The exit is the negative of the starting line number of the code block. """ - def __init__(self, byte, line=0): + def __init__(self, byte, line, first): self.byte = byte self.line = line + self.first = first self.length = 0 self.exits = set() def __repr__(self): - return "<%d+%d @%d %r>" % ( - self.byte, self.length, self.line, list(self.exits) + if self.first: + bang = "!" + else: + bang = "" + return "<%d+%d @%d%s %r>" % ( + self.byte, self.length, self.line, bang, list(self.exits) ) |
