summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2015-10-25 17:40:09 -0400
committerNed Batchelder <ned@nedbatchelder.com>2015-10-25 17:40:09 -0400
commitb810cbc0d06df0a04e3380166215b8ad2f40524c (patch)
treeb0ea1499815b84301fa7991e37de128265526e81
parent15100248c12b85a00278371fea60f07718a9d499 (diff)
downloadpython-coveragepy-git-b810cbc0d06df0a04e3380166215b8ad2f40524c.tar.gz
Properly handle filenames with non-ASCII characters. #432
-rw-r--r--CHANGES.rst4
-rw-r--r--coverage/files.py35
-rw-r--r--coverage/parser.py4
-rw-r--r--coverage/phystokens.py3
-rw-r--r--coverage/python.py7
-rw-r--r--coverage/summary.py54
-rw-r--r--coverage/xmlreport.py6
-rw-r--r--tests/helpers.py7
-rw-r--r--tests/test_files.py2
-rw-r--r--tests/test_process.py76
-rw-r--r--tests/test_summary.py28
11 files changed, 193 insertions, 33 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 035c3b72..a61ef2d4 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -11,7 +11,11 @@ Version 4.0.2
- More work on supporting unusually encoded source. Fixed `issue 431`_.
+- Files or directories with non-ASCII characters are now handled properly,
+ fixing `issue 432`_.
+
.. _issue 431: https://bitbucket.org/ned/coveragepy/issues/431/couldnt-parse-python-file-with-cp1252
+.. _issue 432: https://bitbucket.org/ned/coveragepy/issues/432/path-with-unicode-characters-various
Version 4.0.1 --- 13 October 2015
diff --git a/coverage/files.py b/coverage/files.py
index 0b5651cb..855d8157 100644
--- a/coverage/files.py
+++ b/coverage/files.py
@@ -13,12 +13,9 @@ import sys
from coverage import env
from coverage.backward import unicode_class
-from coverage.misc import CoverageException, join_regex, isolate_module
+from coverage.misc import contract, CoverageException, join_regex, isolate_module
-RELATIVE_DIR = None
-CANONICAL_FILENAME_CACHE = {}
-
os = isolate_module(os)
@@ -33,10 +30,13 @@ def set_relative_directory():
# avoid duplicating work.
CANONICAL_FILENAME_CACHE = {}
+
def relative_directory():
"""Return the directory that `relative_filename` is relative to."""
return RELATIVE_DIR
+
+@contract(returns='unicode')
def relative_filename(filename):
"""Return the relative form of `filename`.
@@ -47,8 +47,10 @@ def relative_filename(filename):
fnorm = os.path.normcase(filename)
if fnorm.startswith(RELATIVE_DIR):
filename = filename[len(RELATIVE_DIR):]
- return filename
+ return unicode_filename(filename)
+
+@contract(returns='unicode')
def canonical_filename(filename):
"""Return a canonical file name for `filename`.
@@ -65,6 +67,8 @@ def canonical_filename(filename):
filename = f
break
cf = abs_file(filename)
+ if env.PY2 and isinstance(cf, str):
+ cf = cf.decode(sys.getfilesystemencoding())
CANONICAL_FILENAME_CACHE[filename] = cf
return CANONICAL_FILENAME_CACHE[filename]
@@ -126,14 +130,35 @@ else:
return filename
+if env.PY2:
+ @contract(returns='unicode')
+ def unicode_filename(filename):
+ """Return a Unicode version of `filename`."""
+ if isinstance(filename, str):
+ filename = filename.decode(sys.getfilesystemencoding())
+ return filename
+else:
+ @contract(filename='unicode', returns='unicode')
+ def unicode_filename(filename):
+ """Return a Unicode version of `filename`."""
+ return filename
+
+
+@contract(returns='unicode')
def abs_file(filename):
"""Return the absolute normalized form of `filename`."""
path = os.path.expandvars(os.path.expanduser(filename))
path = os.path.abspath(os.path.realpath(path))
path = actual_path(path)
+ path = unicode_filename(path)
return path
+RELATIVE_DIR = None
+CANONICAL_FILENAME_CACHE = None
+set_relative_directory()
+
+
def isabs_anywhere(filename):
"""Is `filename` an absolute path on any OS?"""
return ntpath.isabs(filename) or posixpath.isabs(filename)
diff --git a/coverage/parser.py b/coverage/parser.py
index 882c972b..a5e96237 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -211,7 +211,7 @@ class PythonParser(object):
else:
lineno = err.args[1][0] # TokenError
raise NotPython(
- "Couldn't parse '%s' as Python source: '%s' at line %d" % (
+ u"Couldn't parse '%s' as Python source: '%s' at line %d" % (
self.filename, err.args[0], lineno
)
)
@@ -338,7 +338,7 @@ class ByteParser(object):
self.code = compile_unicode(text, filename, "exec")
except SyntaxError as synerr:
raise NotPython(
- "Couldn't parse '%s' as Python source: '%s' at line %d" % (
+ u"Couldn't parse '%s' as Python source: '%s' at line %d" % (
filename, synerr.msg, synerr.lineno
)
)
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 203d41f2..f5bd0bc9 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -6,6 +6,7 @@
import codecs
import keyword
import re
+import sys
import token
import tokenize
@@ -281,6 +282,8 @@ def compile_unicode(source, filename, mode):
"""
source = neuter_encoding_declaration(source)
+ if env.PY2 and isinstance(filename, unicode):
+ filename = filename.encode(sys.getfilesystemencoding(), "replace")
code = compile(source, filename, mode)
return code
diff --git a/coverage/python.py b/coverage/python.py
index 71b50f0c..fe32150a 100644
--- a/coverage/python.py
+++ b/coverage/python.py
@@ -4,6 +4,7 @@
"""Python source expertise for coverage.py"""
import os.path
+import sys
import zipimport
from coverage import env, files
@@ -95,6 +96,9 @@ class PythonFileReporter(FileReporter):
else:
filename = morf
+ if env.PY2 and isinstance(filename, str):
+ filename = filename.decode(sys.getfilesystemencoding())
+
# .pyc files should always refer to a .py instead.
if filename.endswith(('.pyc', '.pyo')):
filename = filename[:-1]
@@ -106,6 +110,8 @@ class PythonFileReporter(FileReporter):
if hasattr(morf, '__name__'):
name = morf.__name__
name = name.replace(".", os.sep) + ".py"
+ if isinstance(name, bytes):
+ name = name.decode(sys.getfilesystemencoding())
else:
name = files.relative_filename(filename)
self.relname = name
@@ -115,6 +121,7 @@ class PythonFileReporter(FileReporter):
self._statements = None
self._excluded = None
+ @contract(returns='unicode')
def relative_filename(self):
return self.relname
diff --git a/coverage/summary.py b/coverage/summary.py
index 4dcaa735..f797e306 100644
--- a/coverage/summary.py
+++ b/coverage/summary.py
@@ -5,6 +5,7 @@
import sys
+from coverage import env
from coverage.report import Reporter
from coverage.results import Numbers
from coverage.misc import NotPython, CoverageException
@@ -20,38 +21,45 @@ class SummaryReporter(Reporter):
def report(self, morfs, outfile=None):
"""Writes a report summarizing coverage statistics per module.
- `outfile` is a file object to write the summary to.
+ `outfile` is a file object to write the summary to. It must be opened
+ for native strings (bytes on Python 2, Unicode on Python 3).
"""
self.find_file_reporters(morfs)
# Prepare the formatting strings
max_name = max([len(fr.relative_filename()) for fr in self.file_reporters] + [5])
- fmt_name = "%%- %ds " % max_name
- fmt_err = "%s %s: %s\n"
- fmt_skip_covered = "\n%s file%s skipped due to complete coverage.\n"
+ fmt_name = u"%%- %ds " % max_name
+ fmt_err = u"%s %s: %s\n"
+ fmt_skip_covered = u"\n%s file%s skipped due to complete coverage.\n"
- header = (fmt_name % "Name") + " Stmts Miss"
- fmt_coverage = fmt_name + "%6d %6d"
+ header = (fmt_name % "Name") + u" Stmts Miss"
+ fmt_coverage = fmt_name + u"%6d %6d"
if self.branches:
- header += " Branch BrPart"
- fmt_coverage += " %6d %6d"
+ header += u" Branch BrPart"
+ fmt_coverage += u" %6d %6d"
width100 = Numbers.pc_str_width()
- header += "%*s" % (width100+4, "Cover")
- fmt_coverage += "%%%ds%%%%" % (width100+3,)
+ header += u"%*s" % (width100+4, "Cover")
+ fmt_coverage += u"%%%ds%%%%" % (width100+3,)
if self.config.show_missing:
- header += " Missing"
- fmt_coverage += " %s"
- rule = "-" * len(header) + "\n"
- header += "\n"
- fmt_coverage += "\n"
+ header += u" Missing"
+ fmt_coverage += u" %s"
+ rule = u"-" * len(header) + u"\n"
+ header += u"\n"
+ fmt_coverage += u"\n"
- if not outfile:
+ if outfile is None:
outfile = sys.stdout
+ if env.PY2:
+ encoding = getattr(outfile, "encoding", None) or sys.getfilesystemencoding()
+ writeout = lambda u: outfile.write(u.encode(encoding))
+ else:
+ writeout = outfile.write
+
# Write the header
- outfile.write(header)
- outfile.write(rule)
+ writeout(header)
+ writeout(rule)
total = Numbers()
skipped_count = 0
@@ -83,7 +91,7 @@ class SummaryReporter(Reporter):
missing_fmtd += ", "
missing_fmtd += branches_fmtd
args += (missing_fmtd,)
- outfile.write(fmt_coverage % args)
+ writeout(fmt_coverage % args)
except Exception:
report_it = not self.config.ignore_errors
if report_it:
@@ -93,22 +101,22 @@ class SummaryReporter(Reporter):
if typ is NotPython and not fr.should_be_python():
report_it = False
if report_it:
- outfile.write(fmt_err % (fr.relative_filename(), typ.__name__, msg))
+ writeout(fmt_err % (fr.relative_filename(), typ.__name__, msg))
if total.n_files > 1:
- outfile.write(rule)
+ writeout(rule)
args = ("TOTAL", total.n_statements, total.n_missing)
if self.branches:
args += (total.n_branches, total.n_partial_branches)
args += (total.pc_covered_str,)
if self.config.show_missing:
args += ("",)
- outfile.write(fmt_coverage % args)
+ writeout(fmt_coverage % args)
if not total.n_files and not skipped_count:
raise CoverageException("No data to report.")
if self.config.skip_covered and skipped_count:
- outfile.write(fmt_skip_covered % (skipped_count, 's' if skipped_count > 1 else ''))
+ writeout(fmt_skip_covered % (skipped_count, 's' if skipped_count > 1 else ''))
return total.n_statements and total.pc_covered
diff --git a/coverage/xmlreport.py b/coverage/xmlreport.py
index b8f8a9e4..d7c2f44a 100644
--- a/coverage/xmlreport.py
+++ b/coverage/xmlreport.py
@@ -8,6 +8,7 @@ import sys
import time
import xml.dom.minidom
+from coverage import env
from coverage import __url__, __version__, files
from coverage.misc import isolate_module
from coverage.report import Reporter
@@ -116,7 +117,10 @@ class XmlReporter(Reporter):
xcoverage.setAttribute("branch-rate", branch_rate)
# Use the DOM to write the output file.
- outfile.write(self.xml_out.toprettyxml())
+ out = self.xml_out.toprettyxml()
+ if env.PY2:
+ out = out.encode("utf8")
+ outfile.write(out)
# Return the total percentage.
denom = lnum_tot + bnum_tot
diff --git a/tests/helpers.py b/tests/helpers.py
index aa094bc1..2723ea59 100644
--- a/tests/helpers.py
+++ b/tests/helpers.py
@@ -4,6 +4,7 @@
"""Helpers for coverage.py tests."""
import subprocess
+import sys
def run_command(cmd):
@@ -12,8 +13,12 @@ def run_command(cmd):
Returns the exit status code and the combined stdout and stderr.
"""
+ # In some strange cases (PyPy3 in a virtualenv!?) the stdout encoding of
+ # the subprocess is set incorrectly to ascii. Use an environment variable
+ # to force the encoding to be the same as ours.
proc = subprocess.Popen(
- cmd, shell=True,
+ "PYTHONIOENCODING=%s %s" % (sys.__stdout__.encoding, cmd),
+ shell=True,
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
diff --git a/tests/test_files.py b/tests/test_files.py
index e3d33285..e7353235 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -27,7 +27,7 @@ class FilesTest(CoverageTest):
def test_simple(self):
self.make_file("hello.py")
files.set_relative_directory()
- self.assertEqual(files.relative_filename("hello.py"), "hello.py")
+ self.assertEqual(files.relative_filename(u"hello.py"), u"hello.py")
a = self.abs_path("hello.py")
self.assertNotEqual(a, "hello.py")
self.assertEqual(files.relative_filename(a), "hello.py")
diff --git a/tests/test_process.py b/tests/test_process.py
index 4902f7c0..8f69877f 100644
--- a/tests/test_process.py
+++ b/tests/test_process.py
@@ -884,6 +884,82 @@ class FailUnderEmptyFilesTest(CoverageTest):
self.assertEqual(st, 2)
+class UnicodeFilePathsTest(CoverageTest):
+ """Tests of using non-ascii characters in the names of files."""
+
+ def test_snowman_dot_py(self):
+ # Make a file with a non-ascii character in the filename.
+ self.make_file(u"snowman☃.py", "print('snowman')")
+ out = self.run_command(u"coverage run snowman☃.py")
+ self.assertEqual(out, "snowman\n")
+
+ # The HTML report uses ascii-encoded HTML entities.
+ out = self.run_command("coverage html")
+ self.assertEqual(out, "")
+ self.assert_exists("htmlcov/snowman☃_py.html")
+ with open("htmlcov/index.html") as indexf:
+ index = indexf.read()
+ self.assertIn('<a href="snowman&#9731;_py.html">snowman&#9731;.py</a>', index)
+
+ # The XML report is always UTF8-encoded.
+ out = self.run_command("coverage xml")
+ self.assertEqual(out, "")
+ with open("coverage.xml", "rb") as xmlf:
+ xml = xmlf.read()
+ self.assertIn(u' filename="snowman☃.py"'.encode('utf8'), xml)
+ self.assertIn(u' name="snowman☃.py"'.encode('utf8'), xml)
+
+ report_expected = (
+ u"Name Stmts Miss Cover\n"
+ u"---------------------------------\n"
+ u"snowman☃.py 1 0 100%\n"
+ )
+
+ if env.PY2:
+ report_expected = report_expected.encode("utf8")
+
+ out = self.run_command("coverage report")
+ self.assertEqual(out, report_expected)
+
+ def test_snowman_directory(self):
+ # Make a file with a non-ascii character in the directory name.
+ self.make_file(u"☃/snowman.py", "print('snowman')")
+ out = self.run_command(u"coverage run ☃/snowman.py")
+ self.assertEqual(out, "snowman\n")
+
+ # The HTML report uses ascii-encoded HTML entities.
+ out = self.run_command("coverage html")
+ self.assertEqual(out, "")
+ self.assert_exists("htmlcov/☃_snowman_py.html")
+ with open("htmlcov/index.html") as indexf:
+ index = indexf.read()
+ self.assertIn('<a href="&#9731;_snowman_py.html">&#9731;/snowman.py</a>', index)
+
+ # The XML report is always UTF8-encoded.
+ out = self.run_command("coverage xml")
+ self.assertEqual(out, "")
+ with open("coverage.xml", "rb") as xmlf:
+ xml = xmlf.read()
+ self.assertIn(u' filename="☃/snowman.py"'.encode('utf8'), xml)
+ self.assertIn(u' name="snowman.py"'.encode('utf8'), xml)
+ self.assertIn(
+ u'<package branch-rate="0" complexity="0" line-rate="1" name="☃">'.encode('utf8'),
+ xml
+ )
+
+ report_expected = (
+ u"Name Stmts Miss Cover\n"
+ u"----------------------------------\n"
+ u"☃/snowman.py 1 0 100%\n"
+ )
+
+ if env.PY2:
+ report_expected = report_expected.encode("utf8")
+
+ out = self.run_command("coverage report")
+ self.assertEqual(out, report_expected)
+
+
def possible_pth_dirs():
"""Produce a sequence of directories for trying to write .pth files."""
# First look through sys.path, and we find a .pth file, then it's a good
diff --git a/tests/test_summary.py b/tests/test_summary.py
index cf55130a..f1331fea 100644
--- a/tests/test_summary.py
+++ b/tests/test_summary.py
@@ -1,3 +1,4 @@
+# coding: utf8
# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
@@ -404,6 +405,33 @@ class SummaryTest(CoverageTest):
"mycode.py NotPython: Couldn't parse 'mycode.py' as Python source: 'error' at line 1"
)
+ def test_snowmandotpy_not_python(self):
+ # We run a .py file with a non-ascii name, and when reporting, we can't
+ # parse it as Python. We should get an error message in the report.
+
+ self.make_file(u"snowman☃.py", "print('snowman')")
+ self.run_command(u"coverage run snowman☃.py")
+ self.make_file(u"snowman☃.py", "This isn't python at all!")
+ report = self.report_from_command(u"coverage report snowman☃.py")
+
+ # Name Stmts Miss Cover
+ # ----------------------------
+ # xxxx NotPython: Couldn't parse '...' as Python source: 'invalid syntax' at line 1
+ # No data to report.
+
+ last = self.squeezed_lines(report)[-2]
+ # The actual file name varies run to run.
+ last = re.sub(r"parse '.*(snowman.*?\.py)", r"parse '\1", last)
+ # The actual error message varies version to version
+ last = re.sub(r": '.*' at", ": 'error' at", last)
+ expected = (
+ u"snowman☃.py NotPython: "
+ u"Couldn't parse 'snowman☃.py' as Python source: 'error' at line 1"
+ )
+ if env.PY2:
+ expected = expected.encode("utf8")
+ self.assertEqual(last, expected)
+
def test_dotpy_not_python_ignored(self):
# We run a .py file, and when reporting, we can't parse it as Python,
# but we've said to ignore errors, so there's no error reported.