summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2015-10-16 21:00:59 -0400
committerNed Batchelder <ned@nedbatchelder.com>2015-10-16 21:00:59 -0400
commitc731debc01e436609b73248f6dfd9581d039fc1a (patch)
tree709e551020ff2b6dd2a12e081fb14a1053e74204
parent9f269ac30b46038ee1fec1c89fb5b5fef5ce6678 (diff)
downloadpython-coveragepy-c731debc01e436609b73248f6dfd9581d039fc1a.tar.gz
Do a better job decoding source files. #431
-rw-r--r--CHANGES.rst8
-rw-r--r--coverage/phystokens.py16
-rw-r--r--igor.py2
-rw-r--r--tests/test_phystokens.py12
-rw-r--r--tests/test_python.py2
5 files changed, 27 insertions, 13 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index bc39853..876cd82 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -6,6 +6,14 @@ Change history for Coverage.py
==============================
+Version 4.0.2
+-------------
+
+- More work on supporting unusually encoded source. Fixed `issue 431`_.
+
+.. _issue 431: https://bitbucket.org/ned/coveragepy/issues/431/couldnt-parse-python-file-with-cp1252
+
+
Version 4.0.1 --- 13 October 2015
---------------------------------
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 7092d39..5d2ccfc 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -274,20 +274,14 @@ def compile_unicode(source, filename, mode):
Python 2's compile() builtin has a stupid restriction: if the source string
is Unicode, then it may not have a encoding declaration in it. Why not?
- Who knows!
+ Who knows! It also decodes to utf8, and then tries to interpret those utf8
+ bytes according to the encoding declaration. Why? Who knows!
- This function catches that exception, neuters the coding declaration, and
- compiles it anyway.
+ This function neuters the coding declaration, and compiles it.
"""
- try:
- code = compile(source, filename, mode)
- except SyntaxError as synerr:
- if "coding declaration in unicode string" not in synerr.args[0].lower():
- raise
- source = neuter_encoding_declaration(source)
- code = compile(source, filename, mode)
-
+ source = neuter_encoding_declaration(source)
+ code = compile(source, filename, mode)
return code
diff --git a/igor.py b/igor.py
index 4ef952b..409fdc9 100644
--- a/igor.py
+++ b/igor.py
@@ -214,11 +214,13 @@ def do_zip_mods():
assert [ord(c) for c in text] == ords
print(u"All OK with {encoding}")
""")
+ # These encodings should match the list in tests/test_python.py
details = [
(u'utf8', u'ⓗⓔⓛⓛⓞ, ⓦⓞⓡⓛⓓ'),
(u'gb2312', u'你好,世界'),
(u'hebrew', u'שלום, עולם'),
(u'shift_jis', u'こんにちは世界'),
+ (u'cp1252', u'“hi”'),
]
for encoding, text in details:
filename = 'encoded_{0}.py'.format(encoding)
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index b4a106f..7bdece7 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -8,7 +8,7 @@ import re
from coverage import env
from coverage.phystokens import source_token_lines, source_encoding
-from coverage.phystokens import neuter_encoding_declaration
+from coverage.phystokens import neuter_encoding_declaration, compile_unicode
from coverage.python import get_python_source
from tests.coveragetest import CoverageTest
@@ -165,3 +165,13 @@ class NeuterEncodingDeclarationTest(CoverageTest):
DEF_ENCODING,
"Wrong encoding in %r" % neutered
)
+
+
+class CompileUnicodeTest(CoverageTest):
+ """Tests of compiling Unicode strings."""
+
+ run_in_temp_dir = False
+
+ def test_cp1252(self):
+ uni = u"""# coding: cp1252\n# \u201C curly \u201D\n"""
+ compile_unicode(uni, "<string>", "exec")
diff --git a/tests/test_python.py b/tests/test_python.py
index e510e78..ee1e1f9 100644
--- a/tests/test_python.py
+++ b/tests/test_python.py
@@ -20,7 +20,7 @@ class GetZipBytesTest(CoverageTest):
# See igor.py, do_zipmods, for the text of these files.
zip_file = "tests/zipmods.zip"
sys.path.append(zip_file) # So we can import the files.
- for encoding in ["utf8", "gb2312", "hebrew", "shift_jis"]:
+ for encoding in ["utf8", "gb2312", "hebrew", "shift_jis", "cp1252"]:
filename = zip_file + "/encoded_" + encoding + ".py"
filename = filename.replace("/", os.sep)
zip_data = get_zip_bytes(filename)