diff options
Diffstat (limited to 'coverage')
-rw-r--r-- | coverage/execfile.py | 3 | ||||
-rw-r--r-- | coverage/parser.py | 16 | ||||
-rw-r--r-- | coverage/phystokens.py | 42 | ||||
-rw-r--r-- | coverage/python.py | 16 |
4 files changed, 56 insertions, 21 deletions
diff --git a/coverage/execfile.py b/coverage/execfile.py index 2d856897..942bfd57 100644 --- a/coverage/execfile.py +++ b/coverage/execfile.py @@ -8,6 +8,7 @@ import types from coverage.backward import BUILTINS from coverage.backward import PYC_MAGIC_NUMBER, imp, importlib_util_find_spec from coverage.misc import ExceptionDuringRun, NoCode, NoSource +from coverage.phystokens import compile_unicode from coverage.python import get_python_source @@ -182,7 +183,7 @@ def make_code_from_py(filename): except (IOError, NoSource): raise NoSource("No file to run: '%s'" % filename) - code = compile(source, filename, "exec") + code = compile_unicode(source, filename, "exec") return code diff --git a/coverage/parser.py b/coverage/parser.py index fc751eb2..173bdf9d 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -9,9 +9,9 @@ import tokenize from coverage.backward import range # pylint: disable=redefined-builtin from coverage.backward import bytes_to_ints from coverage.bytecode import ByteCodes, CodeObjects -from coverage.misc import nice_pair, expensive, join_regex +from coverage.misc import contract, nice_pair, expensive, join_regex from coverage.misc import CoverageException, NoSource, NotPython -from coverage.phystokens import generate_tokens +from coverage.phystokens import compile_unicode, generate_tokens class CodeParser(object): @@ -34,6 +34,7 @@ class CodeParser(object): class PythonParser(CodeParser): """Parse code to find executable lines, excluded lines, etc.""" + @contract(text='unicode|None') def __init__(self, text=None, filename=None, exclude=None): """ Source can be provided as `text`, the text itself, or `filename`, from @@ -53,14 +54,6 @@ class PythonParser(CodeParser): "No source for code: '%s': %s" % (self.filename, err) ) - if self.text: - assert isinstance(self.text, str) - # Scrap the BOM if it exists. - # (Used to do this, but no longer. Not sure what bad will happen - # if we don't do it.) - # if ord(self.text[0]) == 0xfeff: - # self.text = self.text[1:] - self.exclude = exclude self.show_tokens = False @@ -342,13 +335,14 @@ OP_RETURN_VALUE = _opcode('RETURN_VALUE') class ByteParser(object): """Parse byte codes to understand the structure of code.""" + @contract(text='unicode') def __init__(self, text, code=None, filename=None): self.text = text if code: self.code = code else: try: - self.code = compile(text, filename, "exec") + self.code = compile_unicode(text, filename, "exec") except SyntaxError as synerr: raise NotPython( "Couldn't parse '%s' as Python source: '%s' at line %d" % ( diff --git a/coverage/phystokens.py b/coverage/phystokens.py index ed6bd238..d21d401c 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -8,6 +8,7 @@ import tokenize from coverage import env from coverage.backward import iternext +from coverage.misc import contract def phys_tokens(toks): @@ -148,6 +149,8 @@ class CachedTokenizer(object): generate_tokens = CachedTokenizer().generate_tokens +COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE) + def _source_encoding_py2(source): """Determine the encoding for `source`, according to PEP 263. @@ -165,8 +168,6 @@ def _source_encoding_py2(source): # This is mostly code adapted from Py3.2's tokenize module. - cookie_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)") - def _get_normal_name(orig_enc): """Imitates get_normal_name in tokenizer.c.""" # Only care about the first 12 characters. @@ -204,7 +205,7 @@ def _source_encoding_py2(source): except UnicodeDecodeError: return None - matches = cookie_re.findall(line_string) + matches = COOKIE_RE.findall(line_string) if not matches: return None encoding = _get_normal_name(matches[0]) @@ -265,3 +266,38 @@ if env.PY3: source_encoding = _source_encoding_py3 else: source_encoding = _source_encoding_py2 + + +@contract(source='unicode') +def compile_unicode(source, filename, mode): + """Just like the `compile` builtin, but works on any Unicode string. + + Python 2's compile() builtin has a stupid restriction: if the source string + is Unicode, then it may not have a encoding declaration in it. Why not? + Who knows! + + This function catches that exception, neuters the coding declaration, and + compiles it anyway. + + """ + try: + code = compile(source, filename, mode) + except SyntaxError as synerr: + if synerr.args[0] != "encoding declaration in Unicode string": + raise + source = neuter_encoding_declaration(source) + code = compile(source, filename, mode) + + return code + + +@contract(source='unicode', returns='unicode') +def neuter_encoding_declaration(source): + """Return `source`, with any encoding declaration neutered. + + This function will only ever be called on `source` that has an encoding + declaration, so some edge cases can be ignored. + + """ + source = COOKIE_RE.sub("# (deleted declaration)", source) + return source diff --git a/coverage/python.py b/coverage/python.py index 19212a5b..f335f165 100644 --- a/coverage/python.py +++ b/coverage/python.py @@ -8,12 +8,13 @@ import zipimport from coverage import env from coverage.backward import unicode_class from coverage.files import FileLocator -from coverage.misc import NoSource, join_regex +from coverage.misc import contract, NoSource, join_regex from coverage.parser import PythonParser from coverage.phystokens import source_token_lines, source_encoding from coverage.plugin import FileReporter +@contract(returns='str') def read_python_source(filename): """Read the Python source text from `filename`. @@ -30,8 +31,9 @@ def read_python_source(filename): return f.read() +@contract(returns='unicode') def get_python_source(filename): - """Return the source code, as a str.""" + """Return the source code, as unicode.""" base, ext = os.path.splitext(filename) if ext == ".py" and env.WINDOWS: exts = [".py", ".pyw"] @@ -49,12 +51,15 @@ def get_python_source(filename): source = get_zip_bytes(try_filename) if source is not None: if env.PY3: - source = source.decode(source_encoding(source)) + source = source.decode(source_encoding(source), "replace") break else: # Couldn't find source. raise NoSource("No source for code: '%s'." % filename) + if env.PY2: + source = source.decode(source_encoding(source), "replace") + # Python code should always end with a line with a newline. if source and source[-1] != '\n': source += '\n' @@ -62,6 +67,7 @@ def get_python_source(filename): return source +@contract(returns='bytes|None') def get_zip_bytes(filename): """Get data from `filename` if it is a zip file path. @@ -161,12 +167,10 @@ class PythonFileReporter(FileReporter): def exit_counts(self): return self.parser.exit_counts() + @contract(returns='unicode') def source(self): if self._source is None: self._source = get_python_source(self.filename) - if env.PY2: - encoding = source_encoding(self._source) - self._source = self._source.decode(encoding, "replace") assert isinstance(self._source, unicode_class) return self._source |