summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Hupp <adam@hupp.org>2014-05-30 14:18:30 -0700
committerAdam Hupp <adam@hupp.org>2014-05-30 14:18:30 -0700
commit012f8a90501d314a694c136b73179af12b2f41e8 (patch)
tree87657d98e5c62398831974afd286bb356751ab0b
parent5c5036c96472eba1e5908e8b5527295d08aff106 (diff)
downloadpython-magic-012f8a90501d314a694c136b73179af12b2f41e8.tar.gz
Always use utf-8 regardless of filesystem encoding
-rw-r--r--magic.py7
1 files changed, 4 insertions, 3 deletions
diff --git a/magic.py b/magic.py
index cd5ff24..10685ac 100644
--- a/magic.py
+++ b/magic.py
@@ -193,14 +193,15 @@ def coerce_filename(filename):
return None
# ctypes will implicitly convert unicode strings to bytes with
- # .encode('ascii'). A more useful default here is
- # getfilesystemencoding(). We need to leave byte-str unchanged.
+ # .encode('ascii'). If you use the filesystem encoding
+ # then you'll get inconsistent behavior (crashes) depending on the user's
+ # LANG environment variable
is_unicode = (sys.version_info[0] <= 2 and
isinstance(filename, unicode)) or \
(sys.version_info[0] >= 3 and
isinstance(filename, str))
if is_unicode:
- return filename.encode(sys.getfilesystemencoding())
+ return filename.encode('utf-8')
else:
return filename