diff options
Diffstat (limited to 'coverage/phystokens.py')
-rw-r--r-- | coverage/phystokens.py | 30 |
1 files changed, 11 insertions, 19 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py index 7092d39e..b34b1c3b 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -6,6 +6,7 @@ import codecs import keyword import re +import sys import token import tokenize @@ -152,7 +153,7 @@ class CachedTokenizer(object): generate_tokens = CachedTokenizer().generate_tokens -COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE) +COOKIE_RE = re.compile(r"^[ \t]*#.*coding[:=][ \t]*([-\w.]+)", flags=re.MULTILINE) @contract(source='bytes') def _source_encoding_py2(source): @@ -274,30 +275,21 @@ def compile_unicode(source, filename, mode): Python 2's compile() builtin has a stupid restriction: if the source string is Unicode, then it may not have a encoding declaration in it. Why not? - Who knows! + Who knows! It also decodes to utf8, and then tries to interpret those utf8 + bytes according to the encoding declaration. Why? Who knows! - This function catches that exception, neuters the coding declaration, and - compiles it anyway. + This function neuters the coding declaration, and compiles it. """ - try: - code = compile(source, filename, mode) - except SyntaxError as synerr: - if "coding declaration in unicode string" not in synerr.args[0].lower(): - raise - source = neuter_encoding_declaration(source) - code = compile(source, filename, mode) - + source = neuter_encoding_declaration(source) + if env.PY2 and isinstance(filename, unicode): + filename = filename.encode(sys.getfilesystemencoding(), "replace") + code = compile(source, filename, mode) return code @contract(source='unicode', returns='unicode') def neuter_encoding_declaration(source): - """Return `source`, with any encoding declaration neutered. - - This function will only ever be called on `source` that has an encoding - declaration, so some edge cases can be ignored. - - """ - source = COOKIE_RE.sub("# (deleted declaration)", source) + """Return `source`, with any encoding declaration neutered.""" + source = COOKIE_RE.sub("# (deleted declaration)", source, count=1) return source |