Merge pull request #69 from myint/master

Open files with proper encoding
author: Florent Xicluna <florent.xicluna@gmail.com> 2012-06-03 02:39:35 -0700
committer: Florent Xicluna <florent.xicluna@gmail.com> 2012-06-03 02:39:35 -0700
commit: ce11d984df04fdbef7a261f31bd9a4b498859ddb (patch)
tree: 938b8fa0db8cc29b51bf31b25961bea621efda3e
parent: 010c5f9a66348696e8b38fa1eecce78b3c7ae63c (diff)
parent: 6f2f0e2953e406b69978607b9f7f258637a3dbfe (diff)
download: pep8-ce11d984df04fdbef7a261f31bd9a4b498859ddb.tar.gz
2 files changed, 27 insertions, 5 deletions
diff --git a/pep8.py b/pep8.py
index 0fc686e..8de66a2 100755
--- a/pep8.py
+++ b/pep8.py
@@ -1042,16 +1042,36 @@ def python_3000_backticks(logical_line):
 if '' == ''.encode():
     # Python 2: implicit encoding.
     def readlines(filename):
-        return open(filename).readlines()
+        try:
+            input_file = open(filename)
+            return input_file.readlines()
+        finally:
+            input_file.close()
 
     def isidentifier(s):
         return re.match('[a-zA-Z_]\w*', s)
 else:
-    # Python 3: decode to latin-1.
-    # This function is lazy, it does not read the encoding declaration.
-    # XXX: use tokenize.detect_encoding()
+    # Python 3
     def readlines(filename):
-        return open(filename, encoding='latin-1').readlines()
+        try:
+            input_file = open(filename, 'rb')
+            encoding = tokenize.detect_encoding(input_file.readline)[0]
+        finally:
+            input_file.close()
+
+        try:
+            try:
+                input_file = open(filename, encoding=encoding)
+                return input_file.readlines()
+            finally:
+                input_file.close()
+        except UnicodeDecodeError:
+            # Fall back if files are improperly declared
+            try:
+                input_file = open(filename, encoding='latin-1')
+                return input_file.readlines()
+            finally:
+                input_file.close()
 
     def isidentifier(s):
         return s.isidentifier()
diff --git a/testsuite/W29.py b/testsuite/W29.py
index 85f1137..2578f4f 100644
--- a/testsuite/W29.py
+++ b/testsuite/W29.py
@@ -1,3 +1,5 @@
+#: Okay
+# 情
 #: W291
 print 
 #: W293
author	Florent Xicluna <florent.xicluna@gmail.com>	2012-06-03 02:39:35 -0700
committer	Florent Xicluna <florent.xicluna@gmail.com>	2012-06-03 02:39:35 -0700
commit	ce11d984df04fdbef7a261f31bd9a4b498859ddb (patch)
tree	938b8fa0db8cc29b51bf31b25961bea621efda3e
parent	010c5f9a66348696e8b38fa1eecce78b3c7ae63c (diff)
parent	6f2f0e2953e406b69978607b9f7f258637a3dbfe (diff)
download	pep8-ce11d984df04fdbef7a261f31bd9a4b498859ddb.tar.gz