7 files changed, 269 insertions, 117 deletions
diff --git a/coverage/backward.py b/coverage/backward.py
index a0dc9027..03fa6512 100644
--- a/coverage/backward.py
+++ b/coverage/backward.py
@@ -8,7 +8,7 @@
 
 import os, re, sys
 
-# Pythons 2 and 3 differ on where to get StringIO
+# Pythons 2 and 3 differ on where to get StringIO.
 try:
     from cStringIO import StringIO
     BytesIO = StringIO
diff --git a/coverage/codeunit.py b/coverage/codeunit.py
index c58e237b..d9cd5e44 100644
--- a/coverage/codeunit.py
+++ b/coverage/codeunit.py
@@ -1,9 +1,11 @@
 """Code unit (module) handling for Coverage."""
 
-import glob, os
+import glob, os, re
 
 from coverage.backward import open_source, string_class, StringIO
-from coverage.misc import CoverageException
+from coverage.misc import CoverageException, NoSource
+from coverage.parser import CodeParser, PythonParser
+from coverage.phystokens import source_token_lines, source_encoding
 
 
 def code_unit_factory(morfs, file_locator):
@@ -29,7 +31,19 @@ def code_unit_factory(morfs, file_locator):
             globbed.append(morf)
     morfs = globbed
 
-    code_units = [CodeUnit(morf, file_locator) for morf in morfs]
+    code_units = []
+    for morf in morfs:
+        # Hacked-in Mako support. Disabled for going onto trunk.
+        if 0 and isinstance(morf, string_class) and "/mako/" in morf:
+            # Super hack! Do mako both ways!
+            if 0:
+                cu = PythonCodeUnit(morf, file_locator)
+                cu.name += '_fako'
+                code_units.append(cu)
+            klass = MakoCodeUnit
+        else:
+            klass = PythonCodeUnit
+        code_units.append(klass(morf, file_locator))
 
     return code_units
 
@@ -44,6 +58,7 @@ class CodeUnit(object):
     `relative` is a boolean.
 
     """
+
     def __init__(self, morf, file_locator):
         self.file_locator = file_locator
 
@@ -51,11 +66,7 @@ class CodeUnit(object):
             f = morf.__file__
         else:
             f = morf
-        # .pyc files should always refer to a .py instead.
-        if f.endswith(('.pyc', '.pyo')):
-            f = f[:-1]
-        elif f.endswith('$py.class'): # Jython
-            f = f[:-9] + ".py"
+        f = self._adjust_filename(f)
         self.filename = self.file_locator.canonical_filename(f)
 
         if hasattr(morf, '__name__'):
@@ -99,7 +110,7 @@ class CodeUnit(object):
         the same directory, but need to differentiate same-named files from
         different directories.
 
-        For example, the file a/b/c.py might return 'a_b_c'
+        For example, the file a/b/c.py will return 'a_b_c'
 
         """
         if self.modname:
@@ -127,7 +138,68 @@ class CodeUnit(object):
     def should_be_python(self):
         """Does it seem like this file should contain Python?
 
-        This is used to decide if a file reported as part of the exection of
+        This is used to decide if a file reported as part of the execution of
+        a program was really likely to have contained Python in the first
+        place.
+        """
+        return False
+
+
+class PythonCodeUnit(CodeUnit):
+    """Represents a Python file."""
+
+    parser_class = PythonParser
+
+    def _adjust_filename(self, fname):
+        # .pyc files should always refer to a .py instead.
+        if fname.endswith(('.pyc', '.pyo')):
+            fname = fname[:-1]
+        elif fname.endswith('$py.class'): # Jython
+            fname = fname[:-9] + ".py"
+        return fname
+
+    def find_source(self, filename):
+        """Find the source for `filename`.
+
+        Returns two values: the actual filename, and the source.
+
+        The source returned depends on which of these cases holds:
+
+            * The filename seems to be a non-source file: returns None
+
+            * The filename is a source file, and actually exists: returns None.
+
+            * The filename is a source file, and is in a zip file or egg:
+              returns the source.
+
+            * The filename is a source file, but couldn't be found: raises
+              `NoSource`.
+
+        """
+        source = None
+
+        base, ext = os.path.splitext(filename)
+        TRY_EXTS = {
+            '.py':  ['.py', '.pyw'],
+            '.pyw': ['.pyw'],
+        }
+        try_exts = TRY_EXTS.get(ext)
+        if not try_exts:
+            return filename, None
+
+        for try_ext in try_exts:
+            try_filename = base + try_ext
+            if os.path.exists(try_filename):
+                return try_filename, None
+            source = self.file_locator.get_zip_data(try_filename)
+            if source:
+                return try_filename, source
+        raise NoSource("No source for code: '%s'" % filename)
+
+    def should_be_python(self):
+        """Does it seem like this file should contain Python?
+
+        This is used to decide if a file reported as part of the execution of
         a program was really likely to have contained Python in the first
         place.
 
@@ -143,3 +215,86 @@ class CodeUnit(object):
             return True
         # Everything else is probably not Python.
         return False
+
+    def source_token_lines(self, source):
+        return source_token_lines(source)
+
+    def source_encoding(self, source):
+        return source_encoding(source)
+
+
+def mako_template_name(py_filename):
+    with open(py_filename) as f:
+        py_source = f.read()
+
+    # Find the template filename. TODO: string escapes in the string.
+    m = re.search(r"^_template_filename = u?'([^']+)'", py_source, flags=re.MULTILINE)
+    if not m:
+        raise Exception("Couldn't find template filename in Mako file %r" % py_filename)
+    template_filename = m.group(1)
+    return template_filename
+
+
+class MakoParser(CodeParser):
+    def __init__(self, cu, text, filename, exclude):
+        self.cu = cu
+        self.text = text
+        self.filename = filename
+        self.exclude = exclude
+
+    def parse_source(self):
+        """Returns executable_line_numbers, excluded_line_numbers"""
+        with open(self.cu.filename) as f:
+            py_source = f.read()
+
+        # Get the line numbers.
+        self.py_to_html = {}
+        html_linenum = None
+        for linenum, line in enumerate(py_source.splitlines(), start=1):
+            m_source_line = re.search(r"^\s*# SOURCE LINE (\d+)$", line)
+            if m_source_line:
+                html_linenum = int(m_source_line.group(1))
+            else:
+                m_boilerplate_line = re.search(r"^\s*# BOILERPLATE", line)
+                if m_boilerplate_line:
+                    html_linenum = None
+                elif html_linenum:
+                    self.py_to_html[linenum] = html_linenum
+
+        return set(self.py_to_html.values()), set()
+
+    def translate_lines(self, lines):
+        tlines = set(self.py_to_html.get(l, -1) for l in lines)
+        tlines.remove(-1)
+        return tlines
+
+
+class MakoCodeUnit(CodeUnit):
+    parser_class = MakoParser
+
+    def __init__(self, *args, **kwargs):
+        super(MakoCodeUnit, self).__init__(*args, **kwargs)
+        self.mako_filename = mako_template_name(self.filename)
+
+    def source_file(self):
+        return open(self.mako_filename)
+
+    def find_source(self, filename):
+        """Find the source for `filename`.
+
+        Returns two values: the actual filename, and the source.
+
+        """
+        mako_filename = mako_template_name(filename)
+        with open(mako_filename) as f:
+            source = f.read()
+
+        return mako_filename, source
+
+    def source_token_lines(self, source):
+        """Return the 'tokenized' text for the code."""
+        for line in source.splitlines():
+            yield [('txt', line)]
+
+    def source_encoding(self, source):
+        return "utf-8"
diff --git a/coverage/control.py b/coverage/control.py
index d5e2c6f8..07551ff1 100644
--- a/coverage/control.py
+++ b/coverage/control.py
@@ -4,7 +4,7 @@ import atexit, os, random, socket, sys
 
 from coverage.annotate import AnnotateReporter
 from coverage.backward import string_class, iitems
-from coverage.codeunit import code_unit_factory, CodeUnit
+from coverage.codeunit import code_unit_factory, CodeUnit, PythonCodeUnit
 from coverage.collector import Collector
 from coverage.config import CoverageConfig
 from coverage.data import CoverageData
@@ -214,7 +214,7 @@ class coverage(object):
 
     def _canonical_dir(self, morf):
         """Return the canonical directory of the module or file `morf`."""
-        return os.path.split(CodeUnit(morf, self.file_locator).filename)[0]
+        return os.path.split(PythonCodeUnit(morf, self.file_locator).filename)[0]
 
     def _source_for_file(self, filename):
         """Return the source file for `filename`."""
@@ -692,6 +692,13 @@ class coverage(object):
             if self.config.xml_output == '-':
                 outfile = sys.stdout
             else:
+                # Ensure that the output directory is created; done here
+                # because this report pre-opens the output file.
+                # HTMLReport does this using the Report plumbing because
+                # its task is more complex, being multiple files.
+                output_dir = os.path.dirname(self.config.xml_output)
+                if output_dir and not os.path.isdir(output_dir):
+                    os.makedirs(output_dir)
                 outfile = open(self.config.xml_output, "w")
                 file_to_close = outfile
         try:
diff --git a/coverage/html.py b/coverage/html.py
index d168e351..d890436c 100644
--- a/coverage/html.py
+++ b/coverage/html.py
@@ -167,7 +167,7 @@ class HtmlReporter(Reporter):
         # If need be, determine the encoding of the source file. We use it
         # later to properly write the HTML.
         if sys.version_info < (3, 0):
-            encoding = source_encoding(source)
+            encoding = cu.source_encoding(source)
             # Some UTF8 files have the dreaded UTF8 BOM. If so, junk it.
             if encoding.startswith("utf-8") and source[:3] == "\xef\xbb\xbf":
                 source = source[3:]
@@ -187,7 +187,7 @@ class HtmlReporter(Reporter):
 
         lines = []
 
-        for lineno, line in enumerate(source_token_lines(source), start=1):
+        for lineno, line in enumerate(cu.source_token_lines(source), start=1):
             # Figure out how to mark this line.
             line_class = []
             annotate_html = ""
diff --git a/coverage/parser.py b/coverage/parser.py
index de6590aa..f569de25 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -11,16 +11,57 @@ from coverage.misc import CoverageException, NoSource, NotPython
 
 
 class CodeParser(object):
+    """
+    Base class for any code parser.
+    """
+    def _adjust_filename(self, fname):
+        return fname
+
+    def first_lines(self, lines, *ignores):
+        """Map the line numbers in `lines` to the correct first line of the
+        statement.
+
+        Skip any line mentioned in any of the sequences in `ignores`.
+
+        Returns a set of the first lines.
+
+        """
+        ignore = set()
+        for ign in ignores:
+            ignore.update(ign)
+        lset = set()
+        for l in lines:
+            if l in ignore:
+                continue
+            new_l = self.first_line(l)
+            if new_l not in ignore:
+                lset.add(new_l)
+        return lset
+
+    def first_line(self, line):
+        return line
+
+    def translate_lines(self, lines):
+        return lines
+
+    def exit_counts(self):
+        return {}
+
+    def arcs(self):
+        return []
+
+
+class PythonParser(CodeParser):
     """Parse code to find executable lines, excluded lines, etc."""
 
-    def __init__(self, text=None, filename=None, exclude=None):
+    def __init__(self, cu, text=None, filename=None, exclude=None):
         """
         Source can be provided as `text`, the text itself, or `filename`, from
         which the text will be read.  Excluded lines are those that match
         `exclude`, a regex.
 
         """
-        assert text or filename, "CodeParser needs either text or filename"
+        assert text or filename, "PythonParser needs either text or filename"
         self.filename = filename or "<code>"
         self.text = text
         if not self.text:
@@ -137,9 +178,8 @@ class CodeParser(object):
                     # We're at the end of a line, and we've ended on a
                     # different line than the first line of the statement,
                     # so record a multi-line range.
-                    rng = (first_line, elineno)
                     for l in range(first_line, elineno+1):
-                        self.multiline[l] = rng
+                        self.multiline[l] = first_line
                 first_line = None
 
             if ttext.strip() and toktype != tokenize.COMMENT:
@@ -163,33 +203,11 @@ class CodeParser(object):
 
     def first_line(self, line):
         """Return the first line number of the statement including `line`."""
-        rng = self.multiline.get(line)
-        if rng:
-            first_line = rng[0]
+        first_line = self.multiline.get(line)
+        if first_line:
+            return first_line
         else:
-            first_line = line
-        return first_line
-
-    def first_lines(self, lines, *ignores):
-        """Map the line numbers in `lines` to the correct first line of the
-        statement.
-
-        Skip any line mentioned in any of the sequences in `ignores`.
-
-        Returns a set of the first lines.
-
-        """
-        ignore = set()
-        for ign in ignores:
-            ignore.update(ign)
-        lset = set()
-        for l in lines:
-            if l in ignore:
-                continue
-            new_l = self.first_line(l)
-            if new_l not in ignore:
-                lset.add(new_l)
-        return lset
+            return line
 
     def parse_source(self):
         """Parse source text to find executable lines, excluded lines, etc.
diff --git a/coverage/results.py b/coverage/results.py
index 0576ae1f..79615c77 100644
--- a/coverage/results.py
+++ b/coverage/results.py
@@ -4,8 +4,7 @@ import collections
 import os
 
 from coverage.backward import iitems
-from coverage.misc import format_lines, join_regex, NoSource
-from coverage.parser import CodeParser
+from coverage.misc import format_lines, join_regex
 
 
 class Analysis(object):
@@ -16,16 +15,17 @@ class Analysis(object):
         self.code_unit = code_unit
 
         self.filename = self.code_unit.filename
-        actual_filename, source = self.find_source(self.filename)
+        actual_filename, source = self.code_unit.find_source(self.filename)
 
-        self.parser = CodeParser(
+        self.parser = code_unit.parser_class(
+            code_unit,
             text=source, filename=actual_filename,
             exclude=self.coverage._exclude_regex('exclude')
             )
         self.statements, self.excluded = self.parser.parse_source()
 
         # Identify missing statements.
-        executed = self.coverage.data.executed_lines(self.filename)
+        executed = self.parser.translate_lines(self.coverage.data.executed_lines(self.filename))
         exec1 = self.parser.first_lines(executed)
         self.missing = self.statements - exec1
 
@@ -54,44 +54,6 @@ class Analysis(object):
             n_missing_branches=n_missing_branches,
             )
 
-    def find_source(self, filename):
-        """Find the source for `filename`.
-
-        Returns two values: the actual filename, and the source.
-
-        The source returned depends on which of these cases holds:
-
-            * The filename seems to be a non-source file: returns None
-
-            * The filename is a source file, and actually exists: returns None.
-
-            * The filename is a source file, and is in a zip file or egg:
-              returns the source.
-
-            * The filename is a source file, but couldn't be found: raises
-              `NoSource`.
-
-        """
-        source = None
-
-        base, ext = os.path.splitext(filename)
-        TRY_EXTS = {
-            '.py':  ['.py', '.pyw'],
-            '.pyw': ['.pyw'],
-        }
-        try_exts = TRY_EXTS.get(ext)
-        if not try_exts:
-            return filename, None
-
-        for try_ext in try_exts:
-            try_filename = base + try_ext
-            if os.path.exists(try_filename):
-                return try_filename, None
-            source = self.coverage.file_locator.get_zip_data(try_filename)
-            if source:
-                return try_filename, source
-        raise NoSource("No source for code: '%s'" % filename)
-
     def missing_formatted(self):
         """The missing line numbers, formatted nicely.
 
diff --git a/coverage/templite.py b/coverage/templite.py
index 1829aa82..3f6ef0b3 100644
--- a/coverage/templite.py
+++ b/coverage/templite.py
@@ -8,6 +8,8 @@ import re
 class CodeBuilder(object):
     """Build source code conveniently."""
 
+    INDENT_STEP = 4      # PEP8 says so!
+
     def __init__(self, indent=0):
         self.code = []
         self.indent_amount = indent
@@ -18,9 +20,7 @@ class CodeBuilder(object):
         Don't include indentations or newlines.
 
         """
-        self.code.append(" " * self.indent_amount)
-        self.code.append(line)
-        self.code.append("\n")
+        self.code.extend([" " * self.indent_amount, line, "\n"])
 
     def add_section(self):
         """Add a section, a sub-CodeBuilder."""
@@ -30,22 +30,25 @@ class CodeBuilder(object):
 
     def indent(self):
         """Increase the current indent for following lines."""
-        self.indent_amount += 4
+        self.indent_amount += self.INDENT_STEP
 
     def dedent(self):
         """Decrease the current indent for following lines."""
-        self.indent_amount -= 4
+        self.indent_amount -= self.INDENT_STEP
 
     def __str__(self):
         return "".join(str(c) for c in self.code)
 
-    def get_function(self, fn_name):
-        """Compile the code, and return the function `fn_name`."""
+    def get_globals(self):
+        """Compile the code, and return a dict of globals it defines."""
+        # A check that the caller really finished all the blocks they started.
         assert self.indent_amount == 0
-        g = {}
-        code_text = str(self)
-        exec(code_text, g)
-        return g[fn_name]
+        # Get the Python source as a single string.
+        python_source = str(self)
+        # Execute the source, defining globals, and return them.
+        global_namespace = {}
+        exec(python_source, global_namespace)
+        return global_namespace
 
 
 class Templite(object):
@@ -83,6 +86,9 @@ class Templite(object):
         for context in contexts:
             self.context.update(context)
 
+        self.all_vars = set()
+        self.loop_vars = set()
+
         # We construct a function in source form, then compile it and hold onto
         # it, and execute it to render the template.
         code = CodeBuilder()
@@ -90,8 +96,6 @@ class Templite(object):
         code.add_line("def render(ctx, dot):")
         code.indent()
         vars_code = code.add_section()
-        self.all_vars = set()
-        self.loop_vars = set()
         code.add_line("result = []")
         code.add_line("a = result.append")
         code.add_line("e = result.extend")
@@ -107,29 +111,31 @@ class Templite(object):
             del buffered[:]
 
         # Split the text to form a list of tokens.
-        toks = re.split(r"(?s)({{.*?}}|{%.*?%}|{#.*?#})", text)
+        tokens = re.split(r"(?s)({{.*?}}|{%.*?%}|{#.*?#})", text)
 
         ops_stack = []
-        for tok in toks:
-            if tok.startswith('{{'):
+        for token in tokens:
+            if token.startswith('{{'):
                 # An expression to evaluate.
-                buffered.append("s(%s)" % self.expr_code(tok[2:-2].strip()))
-            elif tok.startswith('{#'):
+                buffered.append("s(%s)" % self.expr_code(token[2:-2].strip()))
+            elif token.startswith('{#'):
                 # Comment: ignore it and move on.
                 continue
-            elif tok.startswith('{%'):
+            elif token.startswith('{%'):
                 # Action tag: split into words and parse further.
                 flush_output()
-                words = tok[2:-2].strip().split()
+                words = token[2:-2].strip().split()
                 if words[0] == 'if':
                     # An if statement: evaluate the expression to determine if.
-                    assert len(words) == 2
+                    if len(words) != 2:
+                        self.syntax_error("Don't understand if", token)
                     ops_stack.append('if')
                     code.add_line("if %s:" % self.expr_code(words[1]))
                     code.indent()
                 elif words[0] == 'for':
                     # A loop: iterate over expression result.
-                    assert len(words) == 4 and words[2] == 'in'
+                    if len(words) != 4 or words[2] != 'in':
+                        self.syntax_error("Don't understand for", token)
                     ops_stack.append('for')
                     self.loop_vars.add(words[1])
                     code.add_line(
@@ -140,29 +146,33 @@ class Templite(object):
                     )
                     code.indent()
                 elif words[0].startswith('end'):
-                    # Endsomething.  Pop the ops stack
+                    # Endsomething.  Pop the ops stack.
                     end_what = words[0][3:]
                     if ops_stack[-1] != end_what:
-                        raise SyntaxError("Mismatched end tag: %r" % end_what)
+                        self.syntax_error("Mismatched end tag", end_what)
                     ops_stack.pop()
                     code.dedent()
                 else:
-                    raise SyntaxError("Don't understand tag: %r" % words[0])
+                    self.syntax_error("Don't understand tag", words[0])
             else:
                 # Literal content.  If it isn't empty, output it.
-                if tok:
-                    buffered.append("%r" % tok)
+                if token:
+                    buffered.append("%r" % token)
         flush_output()
 
         for var_name in self.all_vars - self.loop_vars:
             vars_code.add_line("c_%s = ctx[%r]" % (var_name, var_name))
 
         if ops_stack:
-            raise SyntaxError("Unmatched action tag: %r" % ops_stack[-1])
+            self.syntax_error("Unmatched action tag", ops_stack[-1])
 
         code.add_line("return ''.join(result)")
         code.dedent()
-        self.render_function = code.get_function('render')
+        self.render_function = code.get_globals()['render']
+
+    def syntax_error(self, msg, thing):
+        """Raise a syntax error using `msg`, and showing `thing`."""
+        raise SyntaxError("%s: %r" % (msg, thing))
 
     def expr_code(self, expr):
         """Generate a Python expression for `expr`."""