summaryrefslogtreecommitdiff
path: root/coverage/phystokens.py
diff options
context:
space:
mode:
Diffstat (limited to 'coverage/phystokens.py')
-rw-r--r--coverage/phystokens.py30
1 files changed, 11 insertions, 19 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 7092d39e..b34b1c3b 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -6,6 +6,7 @@
import codecs
import keyword
import re
+import sys
import token
import tokenize
@@ -152,7 +153,7 @@ class CachedTokenizer(object):
generate_tokens = CachedTokenizer().generate_tokens
-COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE)
+COOKIE_RE = re.compile(r"^[ \t]*#.*coding[:=][ \t]*([-\w.]+)", flags=re.MULTILINE)
@contract(source='bytes')
def _source_encoding_py2(source):
@@ -274,30 +275,21 @@ def compile_unicode(source, filename, mode):
Python 2's compile() builtin has a stupid restriction: if the source string
is Unicode, then it may not have a encoding declaration in it. Why not?
- Who knows!
+ Who knows! It also decodes to utf8, and then tries to interpret those utf8
+ bytes according to the encoding declaration. Why? Who knows!
- This function catches that exception, neuters the coding declaration, and
- compiles it anyway.
+ This function neuters the coding declaration, and compiles it.
"""
- try:
- code = compile(source, filename, mode)
- except SyntaxError as synerr:
- if "coding declaration in unicode string" not in synerr.args[0].lower():
- raise
- source = neuter_encoding_declaration(source)
- code = compile(source, filename, mode)
-
+ source = neuter_encoding_declaration(source)
+ if env.PY2 and isinstance(filename, unicode):
+ filename = filename.encode(sys.getfilesystemencoding(), "replace")
+ code = compile(source, filename, mode)
return code
@contract(source='unicode', returns='unicode')
def neuter_encoding_declaration(source):
- """Return `source`, with any encoding declaration neutered.
-
- This function will only ever be called on `source` that has an encoding
- declaration, so some edge cases can be ignored.
-
- """
- source = COOKIE_RE.sub("# (deleted declaration)", source)
+ """Return `source`, with any encoding declaration neutered."""
+ source = COOKIE_RE.sub("# (deleted declaration)", source, count=1)
return source