Do a better job decoding source files. #431

author: Ned Batchelder <ned@nedbatchelder.com> 2015-10-16 21:00:59 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2015-10-16 21:00:59 -0400
commit: c731debc01e436609b73248f6dfd9581d039fc1a (patch)
tree: 709e551020ff2b6dd2a12e081fb14a1053e74204
parent: 9f269ac30b46038ee1fec1c89fb5b5fef5ce6678 (diff)
download: python-coveragepy-c731debc01e436609b73248f6dfd9581d039fc1a.tar.gz
5 files changed, 27 insertions, 13 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index bc39853..876cd82 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -6,6 +6,14 @@ Change history for Coverage.py
 ==============================
 
 
+Version 4.0.2
+-------------
+
+- More work on supporting unusually encoded source. Fixed `issue 431`_.
+
+.. _issue 431: https://bitbucket.org/ned/coveragepy/issues/431/couldnt-parse-python-file-with-cp1252
+
+
 Version 4.0.1 --- 13 October 2015
 ---------------------------------
 
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 7092d39..5d2ccfc 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -274,20 +274,14 @@ def compile_unicode(source, filename, mode):
 
     Python 2's compile() builtin has a stupid restriction: if the source string
     is Unicode, then it may not have a encoding declaration in it.  Why not?
-    Who knows!
+    Who knows!  It also decodes to utf8, and then tries to interpret those utf8
+    bytes according to the encoding declaration.  Why? Who knows!
 
-    This function catches that exception, neuters the coding declaration, and
-    compiles it anyway.
+    This function neuters the coding declaration, and compiles it.
 
     """
-    try:
-        code = compile(source, filename, mode)
-    except SyntaxError as synerr:
-        if "coding declaration in unicode string" not in synerr.args[0].lower():
-            raise
-        source = neuter_encoding_declaration(source)
-        code = compile(source, filename, mode)
-
+    source = neuter_encoding_declaration(source)
+    code = compile(source, filename, mode)
     return code
 
 
diff --git a/igor.py b/igor.py
index 4ef952b..409fdc9 100644
--- a/igor.py
+++ b/igor.py
@@ -214,11 +214,13 @@ def do_zip_mods():
         assert [ord(c) for c in text] == ords
         print(u"All OK with {encoding}")
         """)
+    # These encodings should match the list in tests/test_python.py
     details = [
         (u'utf8', u'ⓗⓔⓛⓛⓞ, ⓦⓞⓡⓛⓓ'),
         (u'gb2312', u'你好，世界'),
         (u'hebrew', u'שלום, עולם'),
         (u'shift_jis', u'こんにちは世界'),
+        (u'cp1252', u'“hi”'),
     ]
     for encoding, text in details:
         filename = 'encoded_{0}.py'.format(encoding)
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index b4a106f..7bdece7 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -8,7 +8,7 @@ import re
 
 from coverage import env
 from coverage.phystokens import source_token_lines, source_encoding
-from coverage.phystokens import neuter_encoding_declaration
+from coverage.phystokens import neuter_encoding_declaration, compile_unicode
 from coverage.python import get_python_source
 
 from tests.coveragetest import CoverageTest
@@ -165,3 +165,13 @@ class NeuterEncodingDeclarationTest(CoverageTest):
                 DEF_ENCODING,
                 "Wrong encoding in %r" % neutered
             )
+
+
+class CompileUnicodeTest(CoverageTest):
+    """Tests of compiling Unicode strings."""
+
+    run_in_temp_dir = False
+
+    def test_cp1252(self):
+        uni = u"""# coding: cp1252\n# \u201C curly \u201D\n"""
+        compile_unicode(uni, "<string>", "exec")
diff --git a/tests/test_python.py b/tests/test_python.py
index e510e78..ee1e1f9 100644
--- a/tests/test_python.py
+++ b/tests/test_python.py
@@ -20,7 +20,7 @@ class GetZipBytesTest(CoverageTest):
         # See igor.py, do_zipmods, for the text of these files.
         zip_file = "tests/zipmods.zip"
         sys.path.append(zip_file)       # So we can import the files.
-        for encoding in ["utf8", "gb2312", "hebrew", "shift_jis"]:
+        for encoding in ["utf8", "gb2312", "hebrew", "shift_jis", "cp1252"]:
             filename = zip_file + "/encoded_" + encoding + ".py"
             filename = filename.replace("/", os.sep)
             zip_data = get_zip_bytes(filename)
author	Ned Batchelder <ned@nedbatchelder.com>	2015-10-16 21:00:59 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2015-10-16 21:00:59 -0400
commit	c731debc01e436609b73248f6dfd9581d039fc1a (patch)
tree	709e551020ff2b6dd2a12e081fb14a1053e74204
parent	9f269ac30b46038ee1fec1c89fb5b5fef5ce6678 (diff)
download	python-coveragepy-c731debc01e436609b73248f6dfd9581d039fc1a.tar.gz