Make UTF-8 detection more robust.

If the 1st line of the Python is blank/empty, the function assumes that the encoding is 'ascii' and doesn't try for the 2nd line.
author: Roger Hu <roger.hu@gmail.com> 2013-05-25 01:31:27 +0000
committer: Roger Hu <roger.hu@gmail.com> 2013-05-25 01:31:27 +0000
commit: dfe83e5fcdb5cc48880fda91d3d78353cb6ce4f7 (patch)
tree: a735e19c0d9090daaf169a02104402870005e21f /tests/test_phystokens.py
parent: f10c94d8509aa7cc178a72def0907e97f60903c8 (diff)
download: python-coveragepy-dfe83e5fcdb5cc48880fda91d3d78353cb6ce4f7.tar.gz
1 files changed, 15 insertions, 1 deletions
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index 6d38a0e..5a9ddac 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -2,7 +2,7 @@
 
 import os, re
 from tests.coveragetest import CoverageTest
-from coverage.phystokens import source_token_lines
+from coverage.phystokens import source_token_lines, source_encoding
 
 
 SIMPLE = """\
@@ -77,3 +77,17 @@ class PhysTokensTest(CoverageTest):
         self.check_file_tokenization(stress)
         stress = os.path.join(HERE, "stress_phystoken_dos.tok")
         self.check_file_tokenization(stress)
+
+    def test_source_encoding_detect_utf8(self):
+        source = """\
+# coding=utf-8
+"""
+        self.assertEqual(source_encoding(source), 'utf-8')
+
+    def test_source_encoding_second_line_detect_utf8(self):
+        """ Verifies that UTF-8 encoding will still be detected in spite of the newline."""
+        source = """\
+
+# coding=utf-8
+"""
+        self.assertEqual(source_encoding(source), 'utf-8')
author	Roger Hu <roger.hu@gmail.com>	2013-05-25 01:31:27 +0000
committer	Roger Hu <roger.hu@gmail.com>	2013-05-25 01:31:27 +0000
commit	dfe83e5fcdb5cc48880fda91d3d78353cb6ce4f7 (patch)
tree	a735e19c0d9090daaf169a02104402870005e21f /tests/test_phystokens.py
parent	f10c94d8509aa7cc178a72def0907e97f60903c8 (diff)
download	python-coveragepy-dfe83e5fcdb5cc48880fda91d3d78353cb6ce4f7.tar.gz