diff options
| author | Stefan H. Holek <stefan@epy.co.at> | 2012-10-17 10:54:39 +0200 | 
|---|---|---|
| committer | Stefan H. Holek <stefan@epy.co.at> | 2012-10-17 10:54:39 +0200 | 
| commit | 9d66fb61d9579516c5333d51eb85dc3495e6032f (patch) | |
| tree | 2aa5f1a07c2149eb791d62e41077eaa470336751 | |
| parent | b68c62e1cd28a9bedf6c6b8f65c5428361e644a9 (diff) | |
| download | python-setuptools-git-9d66fb61d9579516c5333d51eb85dc3495e6032f.tar.gz | |
Use surrogateescape error handler when reading and writing the manifest. Refs #303.
--HG--
branch : distribute
extra : rebase_source : f0231cf87e2478f988f798dfe579f28e7561aeff
| -rwxr-xr-x | setuptools/command/egg_info.py | 2 | ||||
| -rwxr-xr-x | setuptools/command/sdist.py | 2 | ||||
| -rw-r--r-- | setuptools/tests/test_sdist.py | 256 | 
3 files changed, 214 insertions, 46 deletions
| diff --git a/setuptools/command/egg_info.py b/setuptools/command/egg_info.py index e1aaa491..9955c8ef 100755 --- a/setuptools/command/egg_info.py +++ b/setuptools/command/egg_info.py @@ -360,7 +360,7 @@ def write_file (filename, contents):      """      contents = "\n".join(contents)      if sys.version_info >= (3,): -        contents = contents.encode("utf-8") +        contents = contents.encode("utf-8", "surrogateescape")      f = open(filename, "wb")        # always write POSIX-style manifest      f.write(contents)      f.close() diff --git a/setuptools/command/sdist.py b/setuptools/command/sdist.py index d5259c2b..42558143 100755 --- a/setuptools/command/sdist.py +++ b/setuptools/command/sdist.py @@ -283,7 +283,7 @@ class sdist(_sdist):          manifest = open(self.manifest, 'rbU')          for line in manifest:              if sys.version_info >= (3,): -                line = line.decode('UTF-8') +                line = line.decode('UTF-8', 'surrogateescape')              # ignore comments and blank lines              line = line.strip()              if line.startswith('#') or not line: diff --git a/setuptools/tests/test_sdist.py b/setuptools/tests/test_sdist.py index 4478d438..65b83b6e 100644 --- a/setuptools/tests/test_sdist.py +++ b/setuptools/tests/test_sdist.py @@ -7,10 +7,13 @@ import shutil  import sys  import tempfile  import unittest +import urllib +import unicodedata  from StringIO import StringIO  from setuptools.command.sdist import sdist +from setuptools.command.egg_info import manifest_maker  from setuptools.dist import Distribution @@ -29,18 +32,58 @@ setup(**%r)  """ % SETUP_ATTRS -def compose(path): -    # HFS Plus returns decomposed UTF-8 -    if sys.platform == 'darwin': -        from unicodedata import normalize +if sys.version_info >= (3,): +    LATIN1_FILENAME = 'smörbröd.py'.encode('latin-1') +else: +    LATIN1_FILENAME = 'sm\xf6rbr\xf6d.py' + + +# Cannot use context manager because of Python 2.4 +def quiet(): +    global old_stdout, old_stderr +    old_stdout, old_stderr = sys.stdout, sys.stderr +    sys.stdout, sys.stderr = StringIO(), StringIO() + +def unquiet(): +    sys.stdout, sys.stderr = old_stdout, old_stderr + + +# Fake byte literals to shut up Python <= 2.5 +def b(s, encoding='utf-8'): +    if sys.version_info >= (3,): +        return s.encode(encoding) +    return s + + +# HFS Plus returns decomposed UTF-8 +def decompose(path): +    if isinstance(path, unicode): +        return unicodedata.normalize('NFD', path) +    try: +        path = path.decode('utf-8') +        path = unicodedata.normalize('NFD', path) +        path = path.encode('utf-8') +    except UnicodeError: +        pass # Not UTF-8 +    return path + + +# HFS Plus quotes unknown bytes like so: %F6 +def hfs_quote(path): +    if isinstance(path, unicode): +        raise TypeError('bytes are required') +    try: +        u = path.decode('utf-8') +    except UnicodeDecodeError: +        path = urllib.quote(path) # Not UTF-8 +    else:          if sys.version_info >= (3,): -            path = normalize('NFC', path) -        else: -            path = normalize('NFC', path.decode('utf-8')).encode('utf-8') +            path = u      return path  class TestSdistTest(unittest.TestCase): +      def setUp(self):          self.temp_dir = tempfile.mkdtemp()          f = open(os.path.join(self.temp_dir, 'setup.py'), 'w') @@ -74,81 +117,206 @@ class TestSdistTest(unittest.TestCase):          cmd.ensure_finalized()          # squelch output -        old_stdout = sys.stdout -        old_stderr = sys.stderr -        sys.stdout = StringIO() -        sys.stderr = StringIO() +        quiet()          try:              cmd.run()          finally: -            sys.stdout = old_stdout -            sys.stderr = old_stderr +            unquiet()          manifest = cmd.filelist.files -          self.assertTrue(os.path.join('sdist_test', 'a.txt') in manifest)          self.assertTrue(os.path.join('sdist_test', 'b.txt') in manifest)          self.assertTrue(os.path.join('sdist_test', 'c.rst') not in manifest) -    def test_manifest_is_written_in_utf8(self): +    def test_manifest_is_written_with_utf8_encoding(self):          # Test for #303. +        dist = Distribution(SETUP_ATTRS) +        dist.script_name = 'setup.py' +        mm = manifest_maker(dist) +        mm.manifest = os.path.join('sdist_test.egg-info', 'SOURCES.txt') +        os.mkdir('sdist_test.egg-info') -        # Add file with non-ASCII filename +        # UTF-8 filename          filename = os.path.join('sdist_test', 'smörbröd.py') -        open(filename, 'w').close() +        # Add UTF-8 filename and write manifest +        quiet() +        try: +            mm.run() +            mm.filelist.files.append(filename) +            mm.write_manifest() +        finally: +            unquiet() + +        manifest = open(mm.manifest, 'rbU') +        contents = manifest.read() +        manifest.close() + +        # The manifest should be UTF-8 encoded +        try: +            u = contents.decode('UTF-8') +        except UnicodeDecodeError, e: +            self.fail(e) + +        # The manifest should contain the UTF-8 filename +        if sys.version_info >= (3,): +            self.assertTrue(filename in u) +        else: +            self.assertTrue(filename in contents) + +    def test_manifest_is_written_with_surrogateescape_error_handler(self): +        # Test for #303.          dist = Distribution(SETUP_ATTRS)          dist.script_name = 'setup.py' -        cmd = sdist(dist) -        cmd.ensure_finalized() +        mm = manifest_maker(dist) +        mm.manifest = os.path.join('sdist_test.egg-info', 'SOURCES.txt') +        os.mkdir('sdist_test.egg-info') -        # squelch output -        old_stdout = sys.stdout -        old_stderr = sys.stderr -        sys.stdout = StringIO() -        sys.stderr = StringIO() +        # Latin-1 filename +        filename = os.path.join(b('sdist_test'), LATIN1_FILENAME) + +        # Add filename with surrogates and write manifest +        quiet()          try: -            cmd.run() +            mm.run() +            if sys.version_info >= (3,): +                u = filename.decode('utf-8', 'surrogateescape') +                mm.filelist.files.append(u) +            else: +                mm.filelist.files.append(filename) +            mm.write_manifest()          finally: -            sys.stdout = old_stdout -            sys.stderr = old_stderr +            unquiet() -        manifest = open(os.path.join('sdist_test.egg-info', 'SOURCES.txt'), 'rbU') +        manifest = open(mm.manifest, 'rbU')          contents = manifest.read()          manifest.close() -        self.assertTrue(len(contents)) -        # This must not fail: -        contents.decode('UTF-8') +        # The manifest should contain the Latin-1 filename +        self.assertTrue(filename in contents) -    def test_manifest_is_read_in_utf8(self): +    def test_manifest_is_read_with_utf8_encoding(self):          # Test for #303. +        dist = Distribution(SETUP_ATTRS) +        dist.script_name = 'setup.py' +        cmd = sdist(dist) +        cmd.ensure_finalized() -        # Add file with non-ASCII filename +        # UTF-8 filename          filename = os.path.join('sdist_test', 'smörbröd.py')          open(filename, 'w').close() +        quiet() +        try: +            cmd.run() +        finally: +            unquiet() + +        # The filelist should contain the UTF-8 filename +        if sys.platform == 'darwin': +            filename = decompose(filename) +        self.assertTrue(filename in cmd.filelist.files) + +    def test_manifest_is_read_with_surrogateescape_error_handler(self): +        # Test for #303. + +        # This is hard to test on HFS Plus because it quotes unknown +        # bytes (see previous test). Furthermore, egg_info.FileList +        # only appends filenames that os.path.exist. + +        # We therefore write the manifest file by hand and check whether +        # read_manifest produces a UnicodeDecodeError. +        dist = Distribution(SETUP_ATTRS) +        dist.script_name = 'setup.py' +        cmd = sdist(dist) +        cmd.ensure_finalized() + +        filename = os.path.join(b('sdist_test'), LATIN1_FILENAME) + +        quiet() +        try: +            cmd.run() +            # Add Latin-1 filename to manifest +            cmd.manifest = os.path.join('sdist_test.egg-info', 'SOURCES.txt') +            manifest = open(cmd.manifest, 'ab') +            manifest.write(filename+b('\n')) +            manifest.close() +            # Re-read manifest +            try: +                cmd.read_manifest() +            except UnicodeDecodeError, e: +                self.fail(e) +        finally: +            unquiet() + +    def test_sdist_with_utf8_encoded_filename(self): +        # Test for #303. +        dist = Distribution(SETUP_ATTRS) +        dist.script_name = 'setup.py' +        cmd = sdist(dist) +        cmd.ensure_finalized() + +        # UTF-8 filename +        filename = os.path.join(b('sdist_test'), b('smörbröd.py')) +        open(filename, 'w').close() + +        quiet() +        try: +            cmd.run() +        finally: +            unquiet() + +        # The filelist should contain the UTF-8 filename +        # (in one representation or other) +        if sys.version_info >= (3,): +            filename = filename.decode(sys.getfilesystemencoding(), 'surrogateescape') +        if sys.platform == 'darwin': +            filename = decompose(filename) +        self.assertTrue(filename in cmd.filelist.files) + +    def test_sdist_with_latin1_encoded_filename(self): +        # Test for #303.          dist = Distribution(SETUP_ATTRS)          dist.script_name = 'setup.py'          cmd = sdist(dist)          cmd.ensure_finalized() -        # squelch output -        old_stdout = sys.stdout -        old_stderr = sys.stderr -        sys.stdout = StringIO() -        sys.stderr = StringIO() +        # Latin-1 filename +        filename = os.path.join(b('sdist_test'), LATIN1_FILENAME) +        open(filename, 'w').close() + +        quiet()          try:              cmd.run()          finally: -            sys.stdout = old_stdout -            sys.stderr = old_stderr +            unquiet() + +        # The filelist should contain the Latin-1 filename +        # (in one representation or other) +        if sys.platform == 'darwin': +            filename = hfs_quote(filename) +        elif sys.version_info >= (3,): +            filename = filename.decode(sys.getfilesystemencoding(), 'surrogateescape') +        self.assertTrue(filename in cmd.filelist.files) + +    def test_decompose(self): +        self.assertNotEqual('smörbröd.py', decompose('smörbröd.py')) -        cmd.filelist.files = [] -        cmd.manifest = os.path.join('sdist_test.egg-info', 'SOURCES.txt') -        cmd.read_manifest() +        if sys.version_info >= (3,): +            self.assertEqual(len('smörbröd.py'), 11) +            self.assertEqual(len(decompose('smörbröd.py')), 13) +        else: +            self.assertEqual(len('smörbröd.py'), 13) +            self.assertEqual(len(decompose('smörbröd.py')), 15) + +    def test_hfs_quote(self): +        self.assertEqual(hfs_quote(LATIN1_FILENAME), 'sm%F6rbr%F6d.py') -        self.assertTrue(filename in [compose(x) for x in cmd.filelist.files]) +        # Bytes are required +        if sys.version_info >= (3,): +            self.assertRaises(TypeError, hfs_quote, 'smörbröd.py') +        else: +            self.assertRaises(TypeError, hfs_quote, 'smörbröd.py'.decode('utf-8'))  def test_suite(): | 
