summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--coverage/execfile.py3
-rw-r--r--coverage/parser.py16
-rw-r--r--coverage/phystokens.py42
-rw-r--r--coverage/python.py16
-rw-r--r--tests/test_parser.py5
-rw-r--r--tests/test_phystokens.py41
6 files changed, 93 insertions, 30 deletions
diff --git a/coverage/execfile.py b/coverage/execfile.py
index 2d856897..942bfd57 100644
--- a/coverage/execfile.py
+++ b/coverage/execfile.py
@@ -8,6 +8,7 @@ import types
from coverage.backward import BUILTINS
from coverage.backward import PYC_MAGIC_NUMBER, imp, importlib_util_find_spec
from coverage.misc import ExceptionDuringRun, NoCode, NoSource
+from coverage.phystokens import compile_unicode
from coverage.python import get_python_source
@@ -182,7 +183,7 @@ def make_code_from_py(filename):
except (IOError, NoSource):
raise NoSource("No file to run: '%s'" % filename)
- code = compile(source, filename, "exec")
+ code = compile_unicode(source, filename, "exec")
return code
diff --git a/coverage/parser.py b/coverage/parser.py
index fc751eb2..173bdf9d 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -9,9 +9,9 @@ import tokenize
from coverage.backward import range # pylint: disable=redefined-builtin
from coverage.backward import bytes_to_ints
from coverage.bytecode import ByteCodes, CodeObjects
-from coverage.misc import nice_pair, expensive, join_regex
+from coverage.misc import contract, nice_pair, expensive, join_regex
from coverage.misc import CoverageException, NoSource, NotPython
-from coverage.phystokens import generate_tokens
+from coverage.phystokens import compile_unicode, generate_tokens
class CodeParser(object):
@@ -34,6 +34,7 @@ class CodeParser(object):
class PythonParser(CodeParser):
"""Parse code to find executable lines, excluded lines, etc."""
+ @contract(text='unicode|None')
def __init__(self, text=None, filename=None, exclude=None):
"""
Source can be provided as `text`, the text itself, or `filename`, from
@@ -53,14 +54,6 @@ class PythonParser(CodeParser):
"No source for code: '%s': %s" % (self.filename, err)
)
- if self.text:
- assert isinstance(self.text, str)
- # Scrap the BOM if it exists.
- # (Used to do this, but no longer. Not sure what bad will happen
- # if we don't do it.)
- # if ord(self.text[0]) == 0xfeff:
- # self.text = self.text[1:]
-
self.exclude = exclude
self.show_tokens = False
@@ -342,13 +335,14 @@ OP_RETURN_VALUE = _opcode('RETURN_VALUE')
class ByteParser(object):
"""Parse byte codes to understand the structure of code."""
+ @contract(text='unicode')
def __init__(self, text, code=None, filename=None):
self.text = text
if code:
self.code = code
else:
try:
- self.code = compile(text, filename, "exec")
+ self.code = compile_unicode(text, filename, "exec")
except SyntaxError as synerr:
raise NotPython(
"Couldn't parse '%s' as Python source: '%s' at line %d" % (
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index ed6bd238..d21d401c 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -8,6 +8,7 @@ import tokenize
from coverage import env
from coverage.backward import iternext
+from coverage.misc import contract
def phys_tokens(toks):
@@ -148,6 +149,8 @@ class CachedTokenizer(object):
generate_tokens = CachedTokenizer().generate_tokens
+COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE)
+
def _source_encoding_py2(source):
"""Determine the encoding for `source`, according to PEP 263.
@@ -165,8 +168,6 @@ def _source_encoding_py2(source):
# This is mostly code adapted from Py3.2's tokenize module.
- cookie_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)")
-
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
# Only care about the first 12 characters.
@@ -204,7 +205,7 @@ def _source_encoding_py2(source):
except UnicodeDecodeError:
return None
- matches = cookie_re.findall(line_string)
+ matches = COOKIE_RE.findall(line_string)
if not matches:
return None
encoding = _get_normal_name(matches[0])
@@ -265,3 +266,38 @@ if env.PY3:
source_encoding = _source_encoding_py3
else:
source_encoding = _source_encoding_py2
+
+
+@contract(source='unicode')
+def compile_unicode(source, filename, mode):
+ """Just like the `compile` builtin, but works on any Unicode string.
+
+ Python 2's compile() builtin has a stupid restriction: if the source string
+ is Unicode, then it may not have a encoding declaration in it. Why not?
+ Who knows!
+
+ This function catches that exception, neuters the coding declaration, and
+ compiles it anyway.
+
+ """
+ try:
+ code = compile(source, filename, mode)
+ except SyntaxError as synerr:
+ if synerr.args[0] != "encoding declaration in Unicode string":
+ raise
+ source = neuter_encoding_declaration(source)
+ code = compile(source, filename, mode)
+
+ return code
+
+
+@contract(source='unicode', returns='unicode')
+def neuter_encoding_declaration(source):
+ """Return `source`, with any encoding declaration neutered.
+
+ This function will only ever be called on `source` that has an encoding
+ declaration, so some edge cases can be ignored.
+
+ """
+ source = COOKIE_RE.sub("# (deleted declaration)", source)
+ return source
diff --git a/coverage/python.py b/coverage/python.py
index 19212a5b..f335f165 100644
--- a/coverage/python.py
+++ b/coverage/python.py
@@ -8,12 +8,13 @@ import zipimport
from coverage import env
from coverage.backward import unicode_class
from coverage.files import FileLocator
-from coverage.misc import NoSource, join_regex
+from coverage.misc import contract, NoSource, join_regex
from coverage.parser import PythonParser
from coverage.phystokens import source_token_lines, source_encoding
from coverage.plugin import FileReporter
+@contract(returns='str')
def read_python_source(filename):
"""Read the Python source text from `filename`.
@@ -30,8 +31,9 @@ def read_python_source(filename):
return f.read()
+@contract(returns='unicode')
def get_python_source(filename):
- """Return the source code, as a str."""
+ """Return the source code, as unicode."""
base, ext = os.path.splitext(filename)
if ext == ".py" and env.WINDOWS:
exts = [".py", ".pyw"]
@@ -49,12 +51,15 @@ def get_python_source(filename):
source = get_zip_bytes(try_filename)
if source is not None:
if env.PY3:
- source = source.decode(source_encoding(source))
+ source = source.decode(source_encoding(source), "replace")
break
else:
# Couldn't find source.
raise NoSource("No source for code: '%s'." % filename)
+ if env.PY2:
+ source = source.decode(source_encoding(source), "replace")
+
# Python code should always end with a line with a newline.
if source and source[-1] != '\n':
source += '\n'
@@ -62,6 +67,7 @@ def get_python_source(filename):
return source
+@contract(returns='bytes|None')
def get_zip_bytes(filename):
"""Get data from `filename` if it is a zip file path.
@@ -161,12 +167,10 @@ class PythonFileReporter(FileReporter):
def exit_counts(self):
return self.parser.exit_counts()
+ @contract(returns='unicode')
def source(self):
if self._source is None:
self._source = get_python_source(self.filename)
- if env.PY2:
- encoding = source_encoding(self._source)
- self._source = self._source.decode(encoding, "replace")
assert isinstance(self._source, unicode_class)
return self._source
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 81916a98..9359c408 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1,7 +1,10 @@
"""Tests for Coverage.py's code parsing."""
import textwrap
+
from tests.coveragetest import CoverageTest
+
+from coverage import env
from coverage.parser import PythonParser
@@ -12,6 +15,8 @@ class PythonParserTest(CoverageTest):
def parse_source(self, text):
"""Parse `text` as source, and return the `PythonParser` used."""
+ if env.PY2:
+ text = text.decode("ascii")
text = textwrap.dedent(text)
parser = PythonParser(text=text, exclude="nocover")
parser.parse_source()
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index 7edd6aa4..19f813ae 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -5,6 +5,7 @@ import re
from coverage import env
from coverage.phystokens import source_token_lines, source_encoding
+from coverage.phystokens import neuter_encoding_declaration
from tests.coveragetest import CoverageTest
@@ -92,21 +93,27 @@ else:
DEF_ENCODING = "ascii"
+ENCODING_DECLARATION_SOURCES = [
+ # Various forms from http://www.python.org/dev/peps/pep-0263/
+ b"# coding=cp850\n\n",
+ b"#!/usr/bin/python\n# -*- coding: cp850 -*-\n",
+ b"#!/usr/bin/python\n# vim: set fileencoding=cp850:\n",
+ b"# This Python file uses this encoding: cp850\n",
+ b"# This file uses a different encoding:\n# coding: cp850\n",
+]
+
class SourceEncodingTest(CoverageTest):
"""Tests of source_encoding() for detecting encodings."""
run_in_temp_dir = False
def test_detect_source_encoding(self):
- # Various forms from http://www.python.org/dev/peps/pep-0263/
- source = b"# coding=cp850\n\n"
- self.assertEqual(source_encoding(source), 'cp850')
- source = b"#!/usr/bin/python\n# -*- coding: utf-8 -*-\n"
- self.assertEqual(source_encoding(source), 'utf-8')
- source = b"#!/usr/bin/python\n# vim: set fileencoding=utf8 :\n"
- self.assertEqual(source_encoding(source), 'utf8')
- source = b"# This Python file uses this encoding: utf-8\n"
- self.assertEqual(source_encoding(source), 'utf-8')
+ for source in ENCODING_DECLARATION_SOURCES:
+ self.assertEqual(
+ source_encoding(source),
+ 'cp850',
+ "Wrong encoding in %r" % source
+ )
def test_detect_source_encoding_not_in_comment(self):
if env.PYPY and env.PY3:
@@ -140,3 +147,19 @@ class SourceEncodingTest(CoverageTest):
source = b"\xEF\xBB\xBF# coding: cp850\n"
with self.assertRaises(SyntaxError):
source_encoding(source)
+
+
+class NeuterEncodingDeclarationTest(CoverageTest):
+ """Tests of phystokens.neuter_encoding_declaration()."""
+
+ run_in_temp_dir = False
+
+ def test_neuter_encoding_declaration(self):
+ for source in ENCODING_DECLARATION_SOURCES:
+ neutered = neuter_encoding_declaration(source.decode("ascii"))
+ neutered = neutered.encode("ascii")
+ self.assertEqual(
+ source_encoding(neutered),
+ DEF_ENCODING,
+ "Wrong encoding in %r" % neutered
+ )