summaryrefslogtreecommitdiff
path: root/coverage
diff options
context:
space:
mode:
Diffstat (limited to 'coverage')
-rw-r--r--coverage/execfile.py3
-rw-r--r--coverage/parser.py16
-rw-r--r--coverage/phystokens.py42
-rw-r--r--coverage/python.py16
4 files changed, 56 insertions, 21 deletions
diff --git a/coverage/execfile.py b/coverage/execfile.py
index 2d856897..942bfd57 100644
--- a/coverage/execfile.py
+++ b/coverage/execfile.py
@@ -8,6 +8,7 @@ import types
from coverage.backward import BUILTINS
from coverage.backward import PYC_MAGIC_NUMBER, imp, importlib_util_find_spec
from coverage.misc import ExceptionDuringRun, NoCode, NoSource
+from coverage.phystokens import compile_unicode
from coverage.python import get_python_source
@@ -182,7 +183,7 @@ def make_code_from_py(filename):
except (IOError, NoSource):
raise NoSource("No file to run: '%s'" % filename)
- code = compile(source, filename, "exec")
+ code = compile_unicode(source, filename, "exec")
return code
diff --git a/coverage/parser.py b/coverage/parser.py
index fc751eb2..173bdf9d 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -9,9 +9,9 @@ import tokenize
from coverage.backward import range # pylint: disable=redefined-builtin
from coverage.backward import bytes_to_ints
from coverage.bytecode import ByteCodes, CodeObjects
-from coverage.misc import nice_pair, expensive, join_regex
+from coverage.misc import contract, nice_pair, expensive, join_regex
from coverage.misc import CoverageException, NoSource, NotPython
-from coverage.phystokens import generate_tokens
+from coverage.phystokens import compile_unicode, generate_tokens
class CodeParser(object):
@@ -34,6 +34,7 @@ class CodeParser(object):
class PythonParser(CodeParser):
"""Parse code to find executable lines, excluded lines, etc."""
+ @contract(text='unicode|None')
def __init__(self, text=None, filename=None, exclude=None):
"""
Source can be provided as `text`, the text itself, or `filename`, from
@@ -53,14 +54,6 @@ class PythonParser(CodeParser):
"No source for code: '%s': %s" % (self.filename, err)
)
- if self.text:
- assert isinstance(self.text, str)
- # Scrap the BOM if it exists.
- # (Used to do this, but no longer. Not sure what bad will happen
- # if we don't do it.)
- # if ord(self.text[0]) == 0xfeff:
- # self.text = self.text[1:]
-
self.exclude = exclude
self.show_tokens = False
@@ -342,13 +335,14 @@ OP_RETURN_VALUE = _opcode('RETURN_VALUE')
class ByteParser(object):
"""Parse byte codes to understand the structure of code."""
+ @contract(text='unicode')
def __init__(self, text, code=None, filename=None):
self.text = text
if code:
self.code = code
else:
try:
- self.code = compile(text, filename, "exec")
+ self.code = compile_unicode(text, filename, "exec")
except SyntaxError as synerr:
raise NotPython(
"Couldn't parse '%s' as Python source: '%s' at line %d" % (
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index ed6bd238..d21d401c 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -8,6 +8,7 @@ import tokenize
from coverage import env
from coverage.backward import iternext
+from coverage.misc import contract
def phys_tokens(toks):
@@ -148,6 +149,8 @@ class CachedTokenizer(object):
generate_tokens = CachedTokenizer().generate_tokens
+COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE)
+
def _source_encoding_py2(source):
"""Determine the encoding for `source`, according to PEP 263.
@@ -165,8 +168,6 @@ def _source_encoding_py2(source):
# This is mostly code adapted from Py3.2's tokenize module.
- cookie_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)")
-
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
# Only care about the first 12 characters.
@@ -204,7 +205,7 @@ def _source_encoding_py2(source):
except UnicodeDecodeError:
return None
- matches = cookie_re.findall(line_string)
+ matches = COOKIE_RE.findall(line_string)
if not matches:
return None
encoding = _get_normal_name(matches[0])
@@ -265,3 +266,38 @@ if env.PY3:
source_encoding = _source_encoding_py3
else:
source_encoding = _source_encoding_py2
+
+
+@contract(source='unicode')
+def compile_unicode(source, filename, mode):
+ """Just like the `compile` builtin, but works on any Unicode string.
+
+ Python 2's compile() builtin has a stupid restriction: if the source string
+ is Unicode, then it may not have a encoding declaration in it. Why not?
+ Who knows!
+
+ This function catches that exception, neuters the coding declaration, and
+ compiles it anyway.
+
+ """
+ try:
+ code = compile(source, filename, mode)
+ except SyntaxError as synerr:
+ if synerr.args[0] != "encoding declaration in Unicode string":
+ raise
+ source = neuter_encoding_declaration(source)
+ code = compile(source, filename, mode)
+
+ return code
+
+
+@contract(source='unicode', returns='unicode')
+def neuter_encoding_declaration(source):
+ """Return `source`, with any encoding declaration neutered.
+
+ This function will only ever be called on `source` that has an encoding
+ declaration, so some edge cases can be ignored.
+
+ """
+ source = COOKIE_RE.sub("# (deleted declaration)", source)
+ return source
diff --git a/coverage/python.py b/coverage/python.py
index 19212a5b..f335f165 100644
--- a/coverage/python.py
+++ b/coverage/python.py
@@ -8,12 +8,13 @@ import zipimport
from coverage import env
from coverage.backward import unicode_class
from coverage.files import FileLocator
-from coverage.misc import NoSource, join_regex
+from coverage.misc import contract, NoSource, join_regex
from coverage.parser import PythonParser
from coverage.phystokens import source_token_lines, source_encoding
from coverage.plugin import FileReporter
+@contract(returns='str')
def read_python_source(filename):
"""Read the Python source text from `filename`.
@@ -30,8 +31,9 @@ def read_python_source(filename):
return f.read()
+@contract(returns='unicode')
def get_python_source(filename):
- """Return the source code, as a str."""
+ """Return the source code, as unicode."""
base, ext = os.path.splitext(filename)
if ext == ".py" and env.WINDOWS:
exts = [".py", ".pyw"]
@@ -49,12 +51,15 @@ def get_python_source(filename):
source = get_zip_bytes(try_filename)
if source is not None:
if env.PY3:
- source = source.decode(source_encoding(source))
+ source = source.decode(source_encoding(source), "replace")
break
else:
# Couldn't find source.
raise NoSource("No source for code: '%s'." % filename)
+ if env.PY2:
+ source = source.decode(source_encoding(source), "replace")
+
# Python code should always end with a line with a newline.
if source and source[-1] != '\n':
source += '\n'
@@ -62,6 +67,7 @@ def get_python_source(filename):
return source
+@contract(returns='bytes|None')
def get_zip_bytes(filename):
"""Get data from `filename` if it is a zip file path.
@@ -161,12 +167,10 @@ class PythonFileReporter(FileReporter):
def exit_counts(self):
return self.parser.exit_counts()
+ @contract(returns='unicode')
def source(self):
if self._source is None:
self._source = get_python_source(self.filename)
- if env.PY2:
- encoding = source_encoding(self._source)
- self._source = self._source.decode(encoding, "replace")
assert isinstance(self._source, unicode_class)
return self._source