summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2014-12-12 08:51:19 -0500
committerNed Batchelder <ned@nedbatchelder.com>2014-12-12 08:51:19 -0500
commit6055667e8090fc09e55b96ae49d5ac6a37706ede (patch)
tree12eba2ac0b9cb22476de8ec3d3eb9a6d183ad0b2
parent24e2488c36e4d19fd9eb06a5b03cd92821b4f2bd (diff)
downloadpython-coveragepy-git-6055667e8090fc09e55b96ae49d5ac6a37706ede.tar.gz
Source is always Unicode in HTML code. More refactoring to come.
-rw-r--r--coverage/backward.py19
-rw-r--r--coverage/codeunit.py44
-rw-r--r--coverage/files.py67
-rw-r--r--coverage/html.py25
-rw-r--r--coverage/misc.py2
-rw-r--r--coverage/parser.py23
-rw-r--r--coverage/phystokens.py3
-rw-r--r--coverage/templite.py6
-rw-r--r--tests/test_html.py2
-rw-r--r--tests/test_misc.py7
-rw-r--r--tests/test_phystokens.py2
-rw-r--r--tests/test_templite.py11
12 files changed, 130 insertions, 81 deletions
diff --git a/coverage/backward.py b/coverage/backward.py
index 50b21f3b..dfc169d4 100644
--- a/coverage/backward.py
+++ b/coverage/backward.py
@@ -20,6 +20,12 @@ try:
except NameError:
string_class = str
+# What's a Unicode string called?
+try:
+ unicode_class = unicode
+except NameError:
+ unicode_class = str
+
# Where do pickles come from?
try:
import cPickle as pickle
@@ -66,6 +72,11 @@ if sys.version_info >= (3, 0):
"""Convert bytes `b` to a string."""
return b.decode('utf8')
+ def unicode_literal(s):
+ """Make a plain string literal into unicode."""
+ # In Python 3, string literals already are unicode.
+ return s
+
def binary_bytes(byte_values):
"""Produce a byte string with the ints from `byte_values`."""
return bytes(byte_values)
@@ -88,6 +99,11 @@ else:
"""Convert bytes `b` to a string (no-op in 2.x)."""
return b
+ def unicode_literal(s):
+ """Make a plain string literal into unicode."""
+ # In Python 2, s is a byte string.
+ return s.decode('utf8')
+
def binary_bytes(byte_values):
"""Produce a byte string with the ints from `byte_values`."""
return "".join(chr(b) for b in byte_values)
@@ -117,13 +133,14 @@ try:
except ImportError:
importlib = None
-# we only want to use importlib if it has everything we need.
+# We only want to use importlib if it has everything we need.
try:
importlib_util_find_spec = importlib.util.find_spec
except Exception:
import imp
importlib_util_find_spec = None
+# What is the .pyc magic number for this version of Python?
try:
PYC_MAGIC_NUMBER = importlib.util.MAGIC_NUMBER
except AttributeError:
diff --git a/coverage/codeunit.py b/coverage/codeunit.py
index 207383e0..f34967e5 100644
--- a/coverage/codeunit.py
+++ b/coverage/codeunit.py
@@ -1,10 +1,12 @@
"""Code unit (module) handling for Coverage."""
import os
+import sys
-from coverage.backward import open_python_source, string_class
+from coverage.backward import string_class, unicode_class
+from coverage.files import get_python_source, get_zip_data
from coverage.misc import CoverageException, NoSource
-from coverage.parser import CodeParser, PythonParser
+from coverage.parser import PythonParser
from coverage.phystokens import source_token_lines, source_encoding
@@ -85,7 +87,7 @@ class CodeUnit(object):
self._source = None
def __repr__(self):
- return "<CodeUnit name=%r filename=%r>" % (self.name, self.filename)
+ return "<{self.__class__.__name__} name={self.name!r} filename={self.filename!r}>".format(self=self)
def _adjust_filename(self, f):
# TODO: This shouldn't be in the base class, right?
@@ -124,29 +126,12 @@ class CodeUnit(object):
return root.replace('\\', '_').replace('/', '_').replace('.', '_')
def source(self):
- if self._source is None:
- self._source = self.get_source()
- return self._source
-
- def get_source(self):
- """Return the source code, as a string."""
- if os.path.exists(self.filename):
- # A regular text file: open it.
- with open_python_source(self.filename) as f:
- return f.read()
-
- # Maybe it's in a zip file?
- source = self.file_locator.get_zip_data(self.filename)
- if source is not None:
- return source
-
- # Couldn't find source.
- raise CoverageException(
- "No source for code '%s'." % self.filename
- )
+ """Return the source for the code, a Unicode string."""
+ return unicode_class("???")
def source_token_lines(self):
"""Return the 'tokenized' text for the code."""
+ # A generic implementation, each line is one "txt" token.
for line in self.source().splitlines():
yield [('txt', line)]
@@ -174,6 +159,14 @@ class PythonCodeUnit(CodeUnit):
fname = fname[:-9] + ".py"
return fname
+ def source(self):
+ if self._source is None:
+ self._source = get_python_source(self.filename)
+ if sys.version_info < (3, 0):
+ encoding = source_encoding(self._source)
+ self._source = self._source.decode(encoding, "replace")
+ return self._source
+
def get_parser(self, exclude=None):
actual_filename, source = self._find_source(self.filename)
return PythonParser(
@@ -213,7 +206,7 @@ class PythonCodeUnit(CodeUnit):
try_filename = base + try_ext
if os.path.exists(try_filename):
return try_filename, None
- source = self.file_locator.get_zip_data(try_filename)
+ source = get_zip_data(try_filename)
if source:
return try_filename, source
raise NoSource("No source for code: '%s'" % filename)
@@ -240,6 +233,3 @@ class PythonCodeUnit(CodeUnit):
def source_token_lines(self):
return source_token_lines(self.source())
-
- def source_encoding(self):
- return source_encoding(self.source())
diff --git a/coverage/files.py b/coverage/files.py
index c2a5ec72..1400b6eb 100644
--- a/coverage/files.py
+++ b/coverage/files.py
@@ -1,10 +1,12 @@
"""File wrangling."""
-from coverage.backward import to_string
-from coverage.misc import CoverageException, join_regex
import fnmatch, os, os.path, re, sys
import ntpath, posixpath
+from coverage.backward import to_string, open_python_source
+from coverage.misc import CoverageException, join_regex
+
+
class FileLocator(object):
"""Understand how filenames work."""
@@ -47,29 +49,48 @@ class FileLocator(object):
self.canonical_filename_cache[filename] = cf
return self.canonical_filename_cache[filename]
- def get_zip_data(self, filename):
- """Get data from `filename` if it is a zip file path.
- Returns the string data read from the zip file, or None if no zip file
- could be found or `filename` isn't in it. The data returned will be
- an empty string if the file is empty.
+def get_python_source(filename):
+ """Return the source code, as a string."""
+ if os.path.exists(filename):
+ # A regular text file: open it.
+ with open_python_source(filename) as f:
+ return f.read()
- """
- import zipimport
- markers = ['.zip'+os.sep, '.egg'+os.sep]
- for marker in markers:
- if marker in filename:
- parts = filename.split(marker)
- try:
- zi = zipimport.zipimporter(parts[0]+marker[:-1])
- except zipimport.ZipImportError:
- continue
- try:
- data = zi.get_data(parts[1])
- except IOError:
- continue
- return to_string(data)
- return None
+ # Maybe it's in a zip file?
+ source = get_zip_data(filename)
+ if source is not None:
+ return source
+
+ # Couldn't find source.
+ raise CoverageException(
+ "No source for code: '%s'." % filename
+ )
+
+
+def get_zip_data(filename):
+ """Get data from `filename` if it is a zip file path.
+
+ Returns the string data read from the zip file, or None if no zip file
+ could be found or `filename` isn't in it. The data returned will be
+ an empty string if the file is empty.
+
+ """
+ import zipimport
+ markers = ['.zip'+os.sep, '.egg'+os.sep]
+ for marker in markers:
+ if marker in filename:
+ parts = filename.split(marker)
+ try:
+ zi = zipimport.zipimporter(parts[0]+marker[:-1])
+ except zipimport.ZipImportError:
+ continue
+ try:
+ data = zi.get_data(parts[1])
+ except IOError:
+ continue
+ return to_string(data)
+ return None
if sys.platform == 'win32':
diff --git a/coverage/html.py b/coverage/html.py
index a4b46a23..677e5e83 100644
--- a/coverage/html.py
+++ b/coverage/html.py
@@ -1,5 +1,7 @@
"""HTML reporting for Coverage."""
+from __future__ import unicode_literals
+
import json, os, re, shutil, sys
import coverage
@@ -65,10 +67,13 @@ class HtmlReporter(Reporter):
def __init__(self, cov, config):
super(HtmlReporter, self).__init__(cov, config)
self.directory = None
+ title = self.config.html_title
+ if sys.version_info < (3, 0):
+ title = title.decode("utf8")
self.template_globals = {
'escape': escape,
'pair': pair,
- 'title': self.config.html_title,
+ 'title': title,
'__url__': coverage.__url__,
'__version__': coverage.__version__,
}
@@ -154,7 +159,7 @@ class HtmlReporter(Reporter):
# Find out if the file on disk is already correct.
flat_rootname = cu.flat_rootname()
- this_hash = self.file_hash(source, cu)
+ this_hash = self.file_hash(source.encode('utf-8'), cu)
that_hash = self.status.file_hash(flat_rootname)
if this_hash == that_hash:
# Nothing has changed to require the file to be reported again.
@@ -163,15 +168,6 @@ class HtmlReporter(Reporter):
self.status.set_file_hash(flat_rootname, this_hash)
- # If need be, determine the encoding of the source file. We use it
- # later to properly write the HTML.
- if sys.version_info < (3, 0):
- encoding = cu.source_encoding()
- # Some UTF8 files have the dreaded UTF8 BOM. If so, junk it.
- if encoding.startswith("utf-8") and source[:3] == "\xef\xbb\xbf":
- source = source[3:]
- encoding = "utf-8"
-
# Get the numbers for this file.
nums = analysis.numbers
@@ -239,11 +235,6 @@ class HtmlReporter(Reporter):
'cu': cu, 'nums': nums, 'lines': lines,
}))
- if sys.version_info < (3, 0):
- # In theory, all the characters in the source can be decoded, but
- # strange things happen, so use 'replace' to keep errors at bay.
- html = html.decode(encoding, 'replace')
-
html_filename = flat_rootname + ".html"
html_path = os.path.join(self.directory, html_filename)
self.write_html(html_path, html)
@@ -272,8 +263,6 @@ class HtmlReporter(Reporter):
'totals': self.totals,
})
- if sys.version_info < (3, 0):
- html = html.decode("utf-8")
self.write_html(
os.path.join(self.directory, "index.html"),
html
diff --git a/coverage/misc.py b/coverage/misc.py
index f0e043b9..924199ef 100644
--- a/coverage/misc.py
+++ b/coverage/misc.py
@@ -109,6 +109,8 @@ class Hasher(object):
self.md5.update(to_bytes(str(type(v))))
if isinstance(v, string_class):
self.md5.update(to_bytes(v))
+ elif isinstance(v, bytes):
+ self.md5.update(v)
elif v is None:
pass
elif isinstance(v, (int, float)):
diff --git a/coverage/parser.py b/coverage/parser.py
index ef2ee5b8..97cc01bb 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -2,10 +2,10 @@
import collections, dis, re, token, tokenize
-from coverage.backward import StringIO
from coverage.backward import range # pylint: disable=redefined-builtin
-from coverage.backward import bytes_to_ints, open_python_source
+from coverage.backward import bytes_to_ints
from coverage.bytecode import ByteCodes, CodeObjects
+from coverage.files import get_python_source
from coverage.misc import nice_pair, expensive, join_regex
from coverage.misc import CoverageException, NoSource, NotPython
@@ -42,8 +42,7 @@ class PythonParser(CodeParser):
self.text = text
if not self.text:
try:
- with open_python_source(self.filename) as sourcef:
- self.text = sourcef.read()
+ self.text = get_python_source(self.filename)
except IOError as err:
raise NoSource(
"No source for code: '%s': %s" % (self.filename, err)
@@ -345,8 +344,7 @@ class ByteParser(object):
else:
if not text:
assert filename, "If no code or text, need a filename"
- with open_python_source(filename) as sourcef:
- text = sourcef.read()
+ text = get_python_source(filename)
self.text = text
try:
@@ -692,11 +690,16 @@ class CachedTokenizer(object):
def generate_tokens(self, text):
"""A stand-in for `tokenize.generate_tokens`."""
- if text != self.last_text:
+ # Check the type first so we don't compare bytes to unicode and get
+ # warnings.
+ if type(text) != type(self.last_text) or text != self.last_text:
self.last_text = text
- self.last_tokens = list(
- tokenize.generate_tokens(StringIO(text).readline)
- )
+ line_iter = iter(text.splitlines(True))
+ try:
+ readline = line_iter.next
+ except AttributeError:
+ readline = line_iter.__next__
+ self.last_tokens = list(tokenize.generate_tokens(readline))
return self.last_tokens
# Create our generate_tokens cache as a callable replacement function.
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index fe77c7de..3fd1165c 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -1,6 +1,7 @@
"""Better tokenizing for coverage.py."""
import codecs, keyword, re, sys, token, tokenize
+
from coverage.parser import generate_tokens
@@ -75,6 +76,7 @@ def source_token_lines(source):
is indistinguishable from a final line with a newline.
"""
+
ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])
line = []
col = 0
@@ -108,6 +110,7 @@ def source_token_lines(source):
if line:
yield line
+
def source_encoding(source):
"""Determine the encoding for `source` (a string), according to PEP 263.
diff --git a/coverage/templite.py b/coverage/templite.py
index 5877c058..c2e8981f 100644
--- a/coverage/templite.py
+++ b/coverage/templite.py
@@ -3,6 +3,7 @@
# Coincidentally named the same as http://code.activestate.com/recipes/496702/
import re
+import sys
class TempliteSyntaxError(ValueError):
@@ -116,7 +117,10 @@ class Templite(object):
code.add_line("result = []")
code.add_line("append_result = result.append")
code.add_line("extend_result = result.extend")
- code.add_line("to_str = str")
+ if sys.version_info < (3, 0):
+ code.add_line("to_str = unicode")
+ else:
+ code.add_line("to_str = str")
buffered = []
def flush_output():
diff --git a/tests/test_html.py b/tests/test_html.py
index 9448447b..cf143cc1 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -383,6 +383,6 @@ class HtmlStaticFileTest(CoverageTest):
self.make_file("main.py", "print(17)")
cov = coverage.coverage()
self.start_import_stop(cov, "main")
- msg = "Couldn't find static file '.*'"
+ msg = "Couldn't find static file u?'.*'"
with self.assertRaisesRegex(CoverageException, msg):
cov.html_report()
diff --git a/tests/test_misc.py b/tests/test_misc.py
index 7b7d51d0..d9b0c4e6 100644
--- a/tests/test_misc.py
+++ b/tests/test_misc.py
@@ -22,6 +22,13 @@ class HasherTest(CoverageTest):
self.assertNotEqual(h1.hexdigest(), h2.hexdigest())
self.assertEqual(h1.hexdigest(), h3.hexdigest())
+ def test_bytes_hashing(self):
+ h1 = Hasher()
+ h1.update(b"Hello, world!")
+ h2 = Hasher()
+ h2.update(b"Goodbye!")
+ self.assertNotEqual(h1.hexdigest(), h2.hexdigest())
+
def test_dict_hashing(self):
h1 = Hasher()
h1.update({'a': 17, 'b': 23})
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index ccd5682a..10e0225f 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -1,5 +1,7 @@
"""Tests for Coverage.py's improved tokenizer."""
+#from __future__ import unicode_literals
+
import os, re, sys
from tests.coveragetest import CoverageTest
from coverage.phystokens import source_token_lines, source_encoding
diff --git a/tests/test_templite.py b/tests/test_templite.py
index b3e21e70..56033ec1 100644
--- a/tests/test_templite.py
+++ b/tests/test_templite.py
@@ -1,7 +1,11 @@
+# -*- coding: utf8 -*-
"""Tests for coverage.templite."""
import re
+
+from coverage.backward import unicode_literal
from coverage.templite import Templite, TempliteSyntaxError
+
from tests.coveragetest import CoverageTest
# pylint: disable=unused-variable
@@ -232,6 +236,13 @@ class TempliteTest(CoverageTest):
"@a0b0c0a1b1c1a2b2c2!"
)
+ def test_non_ascii(self):
+ self.try_render(
+ unicode_literal("{{where}} ollǝɥ"),
+ { 'where': unicode_literal('ǝɹǝɥʇ') },
+ unicode_literal("ǝɹǝɥʇ ollǝɥ")
+ )
+
def test_exception_during_evaluation(self):
# TypeError: Couldn't evaluate {{ foo.bar.baz }}:
# 'NoneType' object is unsubscriptable