diff options
Diffstat (limited to 'setuptools')
| -rwxr-xr-x | setuptools/command/egg_info.py | 43 | ||||
| -rw-r--r-- | setuptools/tests/test_sdist.py | 1 | ||||
| -rw-r--r-- | setuptools/unicode_utils.py | 41 |
3 files changed, 74 insertions, 11 deletions
diff --git a/setuptools/command/egg_info.py b/setuptools/command/egg_info.py index 3a26f201..33fe147e 100755 --- a/setuptools/command/egg_info.py +++ b/setuptools/command/egg_info.py @@ -1,6 +1,7 @@ """setuptools.command.egg_info Create a distribution's .egg-info directory and contents""" +from __future__ import with_statement import os import re @@ -10,13 +11,14 @@ from setuptools import Command import distutils.errors from distutils import log from setuptools.command.sdist import sdist -from setuptools.compat import basestring, PY3 +from setuptools.compat import basestring, PY3, unicode from setuptools import svn_utils from distutils.util import convert_path from distutils.filelist import FileList as _FileList from pkg_resources import (parse_requirements, safe_name, parse_version, safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename) from setuptools.command.sdist import walk_revctrl +import setuptools.unicode_utils as unicode_utils class egg_info(Command): @@ -226,15 +228,28 @@ class FileList(_FileList): self.files = list(filter(self._safe_path, self.files)) def _safe_path(self, path): - if not PY3: - return os.path.exists(path) + enc_warn = "'%s' not %s encodable -- skipping" + + #To avoid accidental trans-codings errors, first to unicode + u_path = unicode_utils.filesys_decode(path) + if u_path is None: + log.warn("'%s' in unexpected encoding -- skipping" % path) + return False + + #Must ensure utf-8 encodability + utf8_path = unicode_utils.try_encode(u_path, "utf-8") + if utf8_path is None: + log.warn(enc_warn, path, 'utf-8') + return False try: - if os.path.exists(path) or os.path.exists(path.encode('utf-8')): + #accept is either way checks out + if os.path.exists(u_path) or os.path.exists(utf8_path): return True + #this will catch any encode errors decoding u_path except UnicodeEncodeError: - log.warn("'%s' not %s encodable -- skipping", path, - sys.getfilesystemencoding()) + log.warn(enc_warn, path, sys.getfilesystemencoding()) + class manifest_maker(sdist): @@ -262,6 +277,10 @@ class manifest_maker(sdist): self.filelist.remove_duplicates() self.write_manifest() + def _manifest_normalize(self, path): + path = unicode_utils.filesys_decode(path) + return path.replace(os.sep, '/') + def write_manifest(self): """ Write the file list in 'self.filelist' to the manifest file @@ -269,7 +288,8 @@ class manifest_maker(sdist): """ self.filelist._repair() - files = [f.replace(os.sep, '/') for f in self.filelist.files] + #Now _repairs should encodability, but not unicode + files = [self._manifest_normalize(f) for f in self.filelist.files] msg = "writing manifest file '%s'" % self.manifest self.execute(write_file, (self.manifest, files), msg) @@ -303,10 +323,13 @@ def write_file(filename, contents): sequence of strings without line terminators) to it. """ contents = "\n".join(contents) + + #assuming the contents has been vetted for utf-8 encoding contents = contents.encode("utf-8") - f = open(filename, "wb") # always write POSIX-style manifest - f.write(contents) - f.close() + + with open(filename, "wb") as f: # always write POSIX-style manifest + f.write(contents) + def write_pkg_info(cmd, basename, filename): log.info("writing %s", filename) diff --git a/setuptools/tests/test_sdist.py b/setuptools/tests/test_sdist.py index cc426f66..ada86189 100644 --- a/setuptools/tests/test_sdist.py +++ b/setuptools/tests/test_sdist.py @@ -414,7 +414,6 @@ class TestSdistTest(unittest.TestCase): except UnicodeDecodeError: self.assertFalse(filename in cmd.filelist.files) - class TestDummyOutput(environment.ZippedEnvironment): def setUp(self): diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py new file mode 100644 index 00000000..d2de941a --- /dev/null +++ b/setuptools/unicode_utils.py @@ -0,0 +1,41 @@ +import unicodedata +import sys +from setuptools.compat import unicode as decoded_string + + +# HFS Plus uses decomposed UTF-8 +def decompose(path): + if isinstance(path, decoded_string): + return unicodedata.normalize('NFD', path) + try: + path = path.decode('utf-8') + path = unicodedata.normalize('NFD', path) + path = path.encode('utf-8') + except UnicodeError: + pass # Not UTF-8 + return path + + +def filesys_decode(path): + """ + Ensure that the given path is decoded, + NONE when no expected encoding works + """ + + fs_enc = sys.getfilesystemencoding() + if isinstance(path, decoded_string): + return path + + for enc in (fs_enc, "utf-8"): + try: + return path.decode(enc) + except UnicodeDecodeError: + continue + + +def try_encode(string, enc): + "turn unicode encoding into a functional routine" + try: + return string.encode(enc) + except UnicodeEncodeError: + return None |
