4 files changed, 108 insertions, 79 deletions
diff --git a/lab/branches.py b/lab/branches.py
index 275eef4..d1908d0 100644
--- a/lab/branches.py
+++ b/lab/branches.py
@@ -21,7 +21,7 @@ def my_function(x):
 
     # Notice that "while 1" also has this problem.  Even though the compiler
     # knows there's no computation at the top of the loop, it's still expressed
-    # in byte code as a branch with two possibilities.
+    # in bytecode as a branch with two possibilities.
 
     i = 0
     while 1:
diff --git a/lab/disgen.py b/lab/disgen.py
index 4e4c6fa..26bc56b 100644
--- a/lab/disgen.py
+++ b/lab/disgen.py
@@ -1,4 +1,4 @@
-"""Disassembler of Python byte code into mnemonics."""
+"""Disassembler of Python bytecode into mnemonics."""
 
 # Adapted from stdlib dis.py, but returns structured information
 # instead of printing to stdout.
@@ -133,7 +133,7 @@ def byte_from_code(code, i):
     return byte
 
 def findlabels(code):
-    """Detect all offsets in a byte code which are jump targets.
+    """Detect all offsets in a bytecode which are jump targets.
 
     Return the list of offsets.
 
@@ -158,7 +158,7 @@ def findlabels(code):
     return labels
 
 def findlinestarts(code):
-    """Find the offsets in a byte code which are start of lines in the source.
+    """Find the offsets in a bytecode which are start of lines in the source.
 
     Generate pairs (offset, lineno) as described in Python/compile.c.
 
diff --git a/lab/new-data.js b/lab/new-data.js
index 973aa11..9cb8f71 100644
--- a/lab/new-data.js
+++ b/lab/new-data.js
@@ -1,13 +1,4 @@
 {
-    "run" {
-        "collector": "coverage.py 4.0",
-        "config": {
-            "branch": true,
-            "source": ".",
-        },
-        "collected": "20150711T090600",
-    },
-
     // As of now:
     "lines": {
         "a/b/c.py": [1, 2, 3, 4, 5],
@@ -16,10 +7,20 @@
     "arcs": {
         "a/b/c.py: [[1, 2], [2, 3], [4, 5]],
     },
-    "plugins: {
+    "file_tracers": {
         "a/b/c.py": "fooey.plugin",
     },
 
+    // We used to do this, but it got too bulky, removed in 4.0.1:
+    "run" {
+        "collector": "coverage.py 4.0",
+        "config": {
+            "branch": true,
+            "source": ".",
+        },
+        "collected": "20150711T090600",
+    },
+
     // Maybe in the future?
     "files": {
         "a/b/c.py": {
diff --git a/lab/parser.py b/lab/parser.py
index 97c81d8..4deb93e 100644
--- a/lab/parser.py
+++ b/lab/parser.py
@@ -5,29 +5,29 @@
 
 from __future__ import division
 
-import glob, os, sys
 import collections
-from optparse import OptionParser
+import glob
+import optparse
+import os
+import re
+import sys
+import textwrap
 
 import disgen
 
-from coverage.misc import CoverageException
-from coverage.parser import ByteParser, PythonParser
+from coverage.parser import PythonParser
 from coverage.python import get_python_source
 
 opcode_counts = collections.Counter()
 
+
 class ParserMain(object):
     """A main for code parsing experiments."""
 
     def main(self, args):
         """A main function for trying the code from the command line."""
 
-        parser = OptionParser()
-        parser.add_option(
-            "-c", action="store_true", dest="chunks",
-            help="Show basic block chunks"
-            )
+        parser = optparse.OptionParser()
         parser.add_option(
             "-d", action="store_true", dest="dis",
             help="Disassemble"
@@ -69,67 +69,74 @@ class ParserMain(object):
             for opcode, number in opcode_counts.most_common():
                 print("{0:20s} {1:6d}  {2:.1%}".format(opcode, number, number/total))
 
-
     def one_file(self, options, filename):
         """Process just one file."""
+        # `filename` can have a line number suffix. In that case, extract those
+        # lines, dedent them, and use that.  This is for trying test cases
+        # embedded in the test files.
+        match = re.search(r"^(.*):(\d+)-(\d+)$", filename)
+        if match:
+            filename, start, end = match.groups()
+            start, end = int(start), int(end)
+        else:
+            start = end = None
 
         try:
             text = get_python_source(filename)
-            bp = ByteParser(text, filename=filename)
+            if start is not None:
+                lines = text.splitlines(True)
+                text = textwrap.dedent("".join(lines[start-1:end]).replace("\\\\", "\\"))
+            pyparser = PythonParser(text, filename=filename, exclude=r"no\s*cover")
+            pyparser.parse_source()
         except Exception as err:
             print("%s" % (err,))
             return
 
         if options.dis:
             print("Main code:")
-            self.disassemble(bp, histogram=options.histogram)
+            self.disassemble(pyparser.byte_parser, histogram=options.histogram)
 
-        arcs = bp._all_arcs()
-        if options.chunks:# and not options.dis:
-            chunks = bp._all_chunks()
-            if options.recursive:
-                print("%6d: %s" % (len(chunks), filename))
-            else:
-                print("Chunks: %r" % chunks)
-                print("Arcs: %r" % sorted(arcs))
+        arcs = pyparser.arcs()
 
         if options.source or options.tokens:
-            cp = PythonParser(filename=filename, exclude=r"no\s*cover")
-            cp.show_tokens = options.tokens
-            cp._raw_parse()
+            pyparser.show_tokens = options.tokens
+            pyparser.parse_source()
 
             if options.source:
-                if options.chunks:
-                    arc_width, arc_chars = self.arc_ascii_art(arcs)
-                else:
-                    arc_width, arc_chars = 0, {}
-
-                exit_counts = cp.exit_counts()
-
-                for lineno, ltext in enumerate(cp.lines, start=1):
-                    m0 = m1 = m2 = m3 = a = ' '
-                    if lineno in cp.statement_starts:
-                        m0 = '-'
+                arc_chars = self.arc_ascii_art(arcs)
+                if arc_chars:
+                    arc_width = max(len(a) for a in arc_chars.values())
+
+                exit_counts = pyparser.exit_counts()
+
+                for lineno, ltext in enumerate(pyparser.lines, start=1):
+                    marks = [' ', ' ', ' ', ' ', ' ']
+                    a = ' '
+                    if lineno in pyparser.raw_statements:
+                        marks[0] = '-'
+                    if lineno in pyparser.statements:
+                        marks[1] = '='
                     exits = exit_counts.get(lineno, 0)
                     if exits > 1:
-                        m1 = str(exits)
-                    if lineno in cp.docstrings:
-                        m2 = '"'
-                    if lineno in cp.classdefs:
-                        m2 = 'C'
-                    if lineno in cp.excluded:
-                        m3 = 'x'
-                    a = arc_chars[lineno].ljust(arc_width)
-                    print("%4d %s%s%s%s%s %s" %
-                                (lineno, m0, m1, m2, m3, a, ltext)
-                        )
+                        marks[2] = str(exits)
+                    if lineno in pyparser.raw_docstrings:
+                        marks[3] = '"'
+                    if lineno in pyparser.raw_classdefs:
+                        marks[3] = 'C'
+                    if lineno in pyparser.raw_excluded:
+                        marks[4] = 'x'
+
+                    if arc_chars:
+                        a = arc_chars[lineno].ljust(arc_width)
+                    else:
+                        a = ""
+
+                    print("%4d %s%s %s" % (lineno, "".join(marks), a, ltext))
 
     def disassemble(self, byte_parser, histogram=False):
         """Disassemble code, for ad-hoc experimenting."""
 
         for bp in byte_parser.child_parsers():
-            chunks = bp._split_into_chunks()
-            chunkd = dict((chunk.byte, chunk) for chunk in chunks)
             if bp.text:
                 srclines = bp.text.splitlines()
             else:
@@ -149,22 +156,19 @@ class ParserMain(object):
                     elif disline.offset > 0:
                         print("")
                 line = disgen.format_dis_line(disline)
-                chunk = chunkd.get(disline.offset)
-                if chunk:
-                    chunkstr = ":: %r" % chunk
-                else:
-                    chunkstr = ""
-                print("%-70s%s" % (line, chunkstr))
+                print("%-70s" % (line,))
 
         print("")
 
     def arc_ascii_art(self, arcs):
         """Draw arcs as ascii art.
 
-        Returns a width of characters needed to draw all the arcs, and a
-        dictionary mapping line numbers to ascii strings to draw for that line.
+        Returns a dictionary mapping line numbers to ascii strings to draw for
+        that line.
 
         """
+
+        plus_ones = set()
         arc_chars = collections.defaultdict(str)
         for lfrom, lto in sorted(arcs):
             if lfrom < 0:
@@ -173,13 +177,13 @@ class ParserMain(object):
                 arc_chars[lfrom] += '^'
             else:
                 if lfrom == lto - 1:
-                    # Don't show obvious arcs.
+                    plus_ones.add(lfrom)
                     continue
                 if lfrom < lto:
                     l1, l2 = lfrom, lto
                 else:
                     l1, l2 = lto, lfrom
-                w = max(len(arc_chars[l]) for l in range(l1, l2+1))
+                w = first_all_blanks(arc_chars[l] for l in range(l1, l2+1))
                 for l in range(l1, l2+1):
                     if l == lfrom:
                         ch = '<'
@@ -187,16 +191,40 @@ class ParserMain(object):
                         ch = '>'
                     else:
                         ch = '|'
-                    arc_chars[l] = arc_chars[l].ljust(w) + ch
-                arc_width = 0
+                    arc_chars[l] = set_char(arc_chars[l], w, ch)
 
-        if arc_chars:
-            arc_width = max(len(a) for a in arc_chars.values())
-        else:
-            arc_width = 0
+        # Add the plusses as the first character
+        for lineno, arcs in arc_chars.items():
+            arc_chars[lineno] = (
+                ("+" if lineno in plus_ones else " ") +
+                arcs
+            )
+
+        return arc_chars
+
+
+def set_char(s, n, c):
+    """Set the nth char of s to be c, extending s if needed."""
+    s = s.ljust(n)
+    return s[:n] + c + s[n+1:]
+
+
+def blanks(s):
+    """Return the set of positions where s is blank."""
+    return set(i for i, c in enumerate(s) if c == " ")
+
+
+def first_all_blanks(ss):
+    """Find the first position that is all blank in the strings ss."""
+    ss = list(ss)
+    blankss = blanks(ss[0])
+    for s in ss[1:]:
+        blankss &= blanks(s)
+    if blankss:
+        return min(blankss)
+    else:
+        return max(len(s) for s in ss)
 
-        return arc_width, arc_chars
 
 if __name__ == '__main__':
     ParserMain().main(sys.argv[1:])
-