summaryrefslogtreecommitdiff
path: root/setuptools
diff options
context:
space:
mode:
Diffstat (limited to 'setuptools')
-rwxr-xr-xsetuptools/command/egg_info.py43
-rw-r--r--setuptools/tests/test_sdist.py1
-rw-r--r--setuptools/unicode_utils.py41
3 files changed, 74 insertions, 11 deletions
diff --git a/setuptools/command/egg_info.py b/setuptools/command/egg_info.py
index 3a26f201..33fe147e 100755
--- a/setuptools/command/egg_info.py
+++ b/setuptools/command/egg_info.py
@@ -1,6 +1,7 @@
"""setuptools.command.egg_info
Create a distribution's .egg-info directory and contents"""
+from __future__ import with_statement
import os
import re
@@ -10,13 +11,14 @@ from setuptools import Command
import distutils.errors
from distutils import log
from setuptools.command.sdist import sdist
-from setuptools.compat import basestring, PY3
+from setuptools.compat import basestring, PY3, unicode
from setuptools import svn_utils
from distutils.util import convert_path
from distutils.filelist import FileList as _FileList
from pkg_resources import (parse_requirements, safe_name, parse_version,
safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename)
from setuptools.command.sdist import walk_revctrl
+import setuptools.unicode_utils as unicode_utils
class egg_info(Command):
@@ -226,15 +228,28 @@ class FileList(_FileList):
self.files = list(filter(self._safe_path, self.files))
def _safe_path(self, path):
- if not PY3:
- return os.path.exists(path)
+ enc_warn = "'%s' not %s encodable -- skipping"
+
+ #To avoid accidental trans-codings errors, first to unicode
+ u_path = unicode_utils.filesys_decode(path)
+ if u_path is None:
+ log.warn("'%s' in unexpected encoding -- skipping" % path)
+ return False
+
+ #Must ensure utf-8 encodability
+ utf8_path = unicode_utils.try_encode(u_path, "utf-8")
+ if utf8_path is None:
+ log.warn(enc_warn, path, 'utf-8')
+ return False
try:
- if os.path.exists(path) or os.path.exists(path.encode('utf-8')):
+ #accept is either way checks out
+ if os.path.exists(u_path) or os.path.exists(utf8_path):
return True
+ #this will catch any encode errors decoding u_path
except UnicodeEncodeError:
- log.warn("'%s' not %s encodable -- skipping", path,
- sys.getfilesystemencoding())
+ log.warn(enc_warn, path, sys.getfilesystemencoding())
+
class manifest_maker(sdist):
@@ -262,6 +277,10 @@ class manifest_maker(sdist):
self.filelist.remove_duplicates()
self.write_manifest()
+ def _manifest_normalize(self, path):
+ path = unicode_utils.filesys_decode(path)
+ return path.replace(os.sep, '/')
+
def write_manifest(self):
"""
Write the file list in 'self.filelist' to the manifest file
@@ -269,7 +288,8 @@ class manifest_maker(sdist):
"""
self.filelist._repair()
- files = [f.replace(os.sep, '/') for f in self.filelist.files]
+ #Now _repairs should encodability, but not unicode
+ files = [self._manifest_normalize(f) for f in self.filelist.files]
msg = "writing manifest file '%s'" % self.manifest
self.execute(write_file, (self.manifest, files), msg)
@@ -303,10 +323,13 @@ def write_file(filename, contents):
sequence of strings without line terminators) to it.
"""
contents = "\n".join(contents)
+
+ #assuming the contents has been vetted for utf-8 encoding
contents = contents.encode("utf-8")
- f = open(filename, "wb") # always write POSIX-style manifest
- f.write(contents)
- f.close()
+
+ with open(filename, "wb") as f: # always write POSIX-style manifest
+ f.write(contents)
+
def write_pkg_info(cmd, basename, filename):
log.info("writing %s", filename)
diff --git a/setuptools/tests/test_sdist.py b/setuptools/tests/test_sdist.py
index cc426f66..ada86189 100644
--- a/setuptools/tests/test_sdist.py
+++ b/setuptools/tests/test_sdist.py
@@ -414,7 +414,6 @@ class TestSdistTest(unittest.TestCase):
except UnicodeDecodeError:
self.assertFalse(filename in cmd.filelist.files)
-
class TestDummyOutput(environment.ZippedEnvironment):
def setUp(self):
diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py
new file mode 100644
index 00000000..d2de941a
--- /dev/null
+++ b/setuptools/unicode_utils.py
@@ -0,0 +1,41 @@
+import unicodedata
+import sys
+from setuptools.compat import unicode as decoded_string
+
+
+# HFS Plus uses decomposed UTF-8
+def decompose(path):
+ if isinstance(path, decoded_string):
+ return unicodedata.normalize('NFD', path)
+ try:
+ path = path.decode('utf-8')
+ path = unicodedata.normalize('NFD', path)
+ path = path.encode('utf-8')
+ except UnicodeError:
+ pass # Not UTF-8
+ return path
+
+
+def filesys_decode(path):
+ """
+ Ensure that the given path is decoded,
+ NONE when no expected encoding works
+ """
+
+ fs_enc = sys.getfilesystemencoding()
+ if isinstance(path, decoded_string):
+ return path
+
+ for enc in (fs_enc, "utf-8"):
+ try:
+ return path.decode(enc)
+ except UnicodeDecodeError:
+ continue
+
+
+def try_encode(string, enc):
+ "turn unicode encoding into a functional routine"
+ try:
+ return string.encode(enc)
+ except UnicodeEncodeError:
+ return None