summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorent Xicluna <florent.xicluna@gmail.com>2012-06-03 02:39:35 -0700
committerFlorent Xicluna <florent.xicluna@gmail.com>2012-06-03 02:39:35 -0700
commitce11d984df04fdbef7a261f31bd9a4b498859ddb (patch)
tree938b8fa0db8cc29b51bf31b25961bea621efda3e
parent010c5f9a66348696e8b38fa1eecce78b3c7ae63c (diff)
parent6f2f0e2953e406b69978607b9f7f258637a3dbfe (diff)
downloadpep8-ce11d984df04fdbef7a261f31bd9a4b498859ddb.tar.gz
Merge pull request #69 from myint/master
Open files with proper encoding
-rwxr-xr-xpep8.py30
-rw-r--r--testsuite/W29.py2
2 files changed, 27 insertions, 5 deletions
diff --git a/pep8.py b/pep8.py
index 0fc686e..8de66a2 100755
--- a/pep8.py
+++ b/pep8.py
@@ -1042,16 +1042,36 @@ def python_3000_backticks(logical_line):
if '' == ''.encode():
# Python 2: implicit encoding.
def readlines(filename):
- return open(filename).readlines()
+ try:
+ input_file = open(filename)
+ return input_file.readlines()
+ finally:
+ input_file.close()
def isidentifier(s):
return re.match('[a-zA-Z_]\w*', s)
else:
- # Python 3: decode to latin-1.
- # This function is lazy, it does not read the encoding declaration.
- # XXX: use tokenize.detect_encoding()
+ # Python 3
def readlines(filename):
- return open(filename, encoding='latin-1').readlines()
+ try:
+ input_file = open(filename, 'rb')
+ encoding = tokenize.detect_encoding(input_file.readline)[0]
+ finally:
+ input_file.close()
+
+ try:
+ try:
+ input_file = open(filename, encoding=encoding)
+ return input_file.readlines()
+ finally:
+ input_file.close()
+ except UnicodeDecodeError:
+ # Fall back if files are improperly declared
+ try:
+ input_file = open(filename, encoding='latin-1')
+ return input_file.readlines()
+ finally:
+ input_file.close()
def isidentifier(s):
return s.isidentifier()
diff --git a/testsuite/W29.py b/testsuite/W29.py
index 85f1137..2578f4f 100644
--- a/testsuite/W29.py
+++ b/testsuite/W29.py
@@ -1,3 +1,5 @@
+#: Okay
+# 情
#: W291
print
#: W293