From 012f8a90501d314a694c136b73179af12b2f41e8 Mon Sep 17 00:00:00 2001 From: Adam Hupp Date: Fri, 30 May 2014 14:18:30 -0700 Subject: Always use utf-8 regardless of filesystem encoding --- magic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/magic.py b/magic.py index cd5ff24..10685ac 100644 --- a/magic.py +++ b/magic.py @@ -193,14 +193,15 @@ def coerce_filename(filename): return None # ctypes will implicitly convert unicode strings to bytes with - # .encode('ascii'). A more useful default here is - # getfilesystemencoding(). We need to leave byte-str unchanged. + # .encode('ascii'). If you use the filesystem encoding + # then you'll get inconsistent behavior (crashes) depending on the user's + # LANG environment variable is_unicode = (sys.version_info[0] <= 2 and isinstance(filename, unicode)) or \ (sys.version_info[0] >= 3 and isinstance(filename, str)) if is_unicode: - return filename.encode(sys.getfilesystemencoding()) + return filename.encode('utf-8') else: return filename -- cgit v1.2.1