1 files changed, 163 insertions, 159 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index 4f39c70a..4c9e40a7 100755
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -1,18 +1,28 @@
 """PyPI and direct package downloading"""
-import sys, os.path, re, urlparse, urllib2, shutil, random, socket, cStringIO
-import itertools
+import sys
+import os
+import re
+import shutil
+import socket
 import base64
-import httplib, urllib
+
+from pkg_resources import (
+    CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
+    require, Environment, find_distributions, safe_name, safe_version,
+    to_filename, Requirement, DEVELOP_DIST,
+)
 from setuptools import ssl_support
-from pkg_resources import *
 from distutils import log
 from distutils.errors import DistutilsError
-try:
-    from hashlib import md5
-except ImportError:
-    from md5 import md5
+from setuptools.compat import (urllib2, httplib, StringIO, HTTPError,
+                               urlparse, urlunparse, unquote, splituser,
+                               url2pathname, name2codepoint,
+                               unichr, urljoin, urlsplit, urlunsplit)
+from setuptools.compat import filterfalse
 from fnmatch import translate
+from setuptools.py24compat import hashlib
 from setuptools.py24compat import wraps
+from setuptools.py26compat import strip_fragment
 from setuptools.py27compat import get_all_headers
 
 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
@@ -57,10 +67,10 @@ def parse_bdist_wininst(name):
 
 
 def egg_info_for_url(url):
-    scheme, server, path, parameters, query, fragment = urlparse.urlparse(url)
-    base = urllib2.unquote(path.split('/')[-1])
+    scheme, server, path, parameters, query, fragment = urlparse(url)
+    base = unquote(path.split('/')[-1])
     if server=='sourceforge.net' and base=='download':    # XXX Yuck
-        base = urllib2.unquote(path.split('/')[-2])
+        base = unquote(path.split('/')[-2])
     if '#' in base: base, fragment = base.split('#',1)
     return base,fragment
 
@@ -104,9 +114,10 @@ def distros_for_filename(filename, metadata=None):
     )
 
 
-def interpret_distro_name(location, basename, metadata,
-    py_version=None, precedence=SOURCE_DIST, platform=None
-):
+def interpret_distro_name(
+        location, basename, metadata, py_version=None, precedence=SOURCE_DIST,
+        platform=None
+        ):
     """Generate alternative interpretations of a source distro name
 
     Note: if `location` is a filesystem filename, you should call
@@ -146,7 +157,7 @@ def unique_everseen(iterable, key=None):
     seen = set()
     seen_add = seen.add
     if key is None:
-        for element in itertools.ifilterfalse(seen.__contains__, iterable):
+        for element in filterfalse(seen.__contains__, iterable):
             seen_add(element)
             yield element
     else:
@@ -175,28 +186,85 @@ def find_external_links(url, page):
 
     for match in REL.finditer(page):
         tag, rel = match.groups()
-        rels = map(str.strip, rel.lower().split(','))
+        rels = set(map(str.strip, rel.lower().split(',')))
         if 'homepage' in rels or 'download' in rels:
             for match in HREF.finditer(tag):
-                yield urlparse.urljoin(url, htmldecode(match.group(1)))
+                yield urljoin(url, htmldecode(match.group(1)))
 
     for tag in ("<th>Home Page", "<th>Download URL"):
         pos = page.find(tag)
         if pos!=-1:
             match = HREF.search(page,pos)
             if match:
-                yield urlparse.urljoin(url, htmldecode(match.group(1)))
+                yield urljoin(url, htmldecode(match.group(1)))
 
 user_agent = "Python-urllib/%s setuptools/%s" % (
     sys.version[:3], require('setuptools')[0].version
 )
 
+class ContentChecker(object):
+    """
+    A null content checker that defines the interface for checking content
+    """
+    def feed(self, block):
+        """
+        Feed a block of data to the hash.
+        """
+        return
+
+    def is_valid(self):
+        """
+        Check the hash. Return False if validation fails.
+        """
+        return True
+
+    def report(self, reporter, template):
+        """
+        Call reporter with information about the checker (hash name)
+        substituted into the template.
+        """
+        return
+
+class HashChecker(ContentChecker):
+    pattern = re.compile(
+        r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
+        r'(?P<expected>[a-f0-9]+)'
+    )
+
+    def __init__(self, hash_name, expected):
+        self.hash_name = hash_name
+        self.hash = hashlib.new(hash_name)
+        self.expected = expected
+
+    @classmethod
+    def from_url(cls, url):
+        "Construct a (possibly null) ContentChecker from a URL"
+        fragment = urlparse(url)[-1]
+        if not fragment:
+            return ContentChecker()
+        match = cls.pattern.search(fragment)
+        if not match:
+            return ContentChecker()
+        return cls(**match.groupdict())
+
+    def feed(self, block):
+        self.hash.update(block)
+
+    def is_valid(self):
+        return self.hash.hexdigest() == self.expected
+
+    def report(self, reporter, template):
+        msg = template % self.hash_name
+        return reporter(msg)
+
+
 class PackageIndex(Environment):
     """A distribution index that scans web pages for download URLs"""
 
-    def __init__(self, index_url="https://pypi.python.org/simple", hosts=('*',),
-        ca_bundle=None, verify_ssl=True, *args, **kw
-    ):
+    def __init__(
+            self, index_url="https://pypi.python.org/simple", hosts=('*',),
+            ca_bundle=None, verify_ssl=True, *args, **kw
+            ):
         Environment.__init__(self,*args,**kw)
         self.index_url = index_url + "/"[:not index_url.endswith('/')]
         self.scanned_urls = {}
@@ -224,7 +292,7 @@ class PackageIndex(Environment):
                 self.debug("Found link: %s", url)
 
         if dists or not retrieve or url in self.fetched_urls:
-            map(self.add, dists)
+            list(map(self.add, dists))
             return  # don't need the actual page
 
         if not self.url_ok(url):
@@ -243,7 +311,7 @@ class PackageIndex(Environment):
         base = f.url     # handle redirects
         page = f.read()
         if not isinstance(page, str): # We are in Python 3 and got bytes. We want str.
-            if isinstance(f, urllib2.HTTPError):
+            if isinstance(f, HTTPError):
                 # Errors have no charset, assume latin1:
                 charset = 'latin-1'
             else:
@@ -251,7 +319,7 @@ class PackageIndex(Environment):
             page = page.decode(charset, "ignore")
         f.close()
         for match in HREF.finditer(page):
-            link = urlparse.urljoin(base, htmldecode(match.group(1)))
+            link = urljoin(base, htmldecode(match.group(1)))
             self.process_url(link)
         if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
             page = self.process_index(url, page)
@@ -270,13 +338,14 @@ class PackageIndex(Environment):
         dists = distros_for_filename(fn)
         if dists:
             self.debug("Found: %s", fn)
-            map(self.add, dists)
+            list(map(self.add, dists))
 
     def url_ok(self, url, fatal=False):
         s = URL_SCHEME(url)
-        if (s and s.group(1).lower()=='file') or self.allows(urlparse.urlparse(url)[1]):
+        if (s and s.group(1).lower()=='file') or self.allows(urlparse(url)[1]):
             return True
-        msg = "\nLink to % s ***BLOCKED*** by --allow-hosts\n"
+        msg = ("\nNote: Bypassing %s (disallowed host; see "
+            "http://bit.ly/1dg9ijs for details).\n")
         if fatal:
             raise DistutilsError(msg % url)
         else:
@@ -290,7 +359,8 @@ class PackageIndex(Environment):
                         self.scan_egg_link(item, entry)
 
     def scan_egg_link(self, path, entry):
-        lines = filter(None, map(str.strip, open(os.path.join(path, entry))))
+        lines = [_f for _f in map(str.strip,
+                                  open(os.path.join(path, entry))) if _f]
         if len(lines)==2:
             for dist in find_distributions(os.path.join(path, lines[0])):
                 dist.location = os.path.join(path, *lines)
@@ -302,9 +372,9 @@ class PackageIndex(Environment):
         def scan(link):
             # Process a URL to see if it's for a package page
             if link.startswith(self.index_url):
-                parts = map(
-                    urllib2.unquote, link[len(self.index_url):].split('/')
-                )
+                parts = list(map(
+                    unquote, link[len(self.index_url):].split('/')
+                ))
                 if len(parts)==2 and '#' not in parts[1]:
                     # it's a package page, sanitize and index it
                     pkg = safe_name(parts[0])
@@ -316,7 +386,7 @@ class PackageIndex(Environment):
         # process an index page into the package-page index
         for match in HREF.finditer(page):
             try:
-                scan( urlparse.urljoin(url, htmldecode(match.group(1))) )
+                scan(urljoin(url, htmldecode(match.group(1))))
             except ValueError:
                 pass
 
@@ -339,8 +409,6 @@ class PackageIndex(Environment):
         else:
             return ""   # no sense double-scanning non-package pages
 
-
-
     def need_version_info(self, url):
         self.scan_all(
             "Page at %s links to .py file(s) without version info; an index "
@@ -371,27 +439,28 @@ class PackageIndex(Environment):
             self.scan_url(url)
 
     def obtain(self, requirement, installer=None):
-        self.prescan(); self.find_packages(requirement)
+        self.prescan()
+        self.find_packages(requirement)
         for dist in self[requirement.key]:
             if dist in requirement:
                 return dist
             self.debug("%s does not match %s", requirement, dist)
         return super(PackageIndex, self).obtain(requirement,installer)
 
-
-
-
-
-    def check_md5(self, cs, info, filename, tfp):
-        if re.match('md5=[0-9a-f]{32}$', info):
-            self.debug("Validating md5 checksum for %s", filename)
-            if cs.hexdigest()!=info[4:]:
-                tfp.close()
-                os.unlink(filename)
-                raise DistutilsError(
-                    "MD5 validation failed for "+os.path.basename(filename)+
-                    "; possible download problem?"
-                )
+    def check_hash(self, checker, filename, tfp):
+        """
+        checker is a ContentChecker
+        """
+        checker.report(self.debug,
+            "Validating %%s checksum for %s" % filename)
+        if not checker.is_valid():
+            tfp.close()
+            os.unlink(filename)
+            raise DistutilsError(
+                "%s validation failed for %s; "
+                "possible download problem?" % (
+                                checker.hash.name, os.path.basename(filename))
+            )
 
     def add_find_links(self, urls):
         """Add `urls` to the list that will be prescanned for searches"""
@@ -411,7 +480,7 @@ class PackageIndex(Environment):
     def prescan(self):
         """Scan urls scheduled for prescanning (e.g. --find-links)"""
         if self.to_scan:
-            map(self.scan_url, self.to_scan)
+            list(map(self.scan_url, self.to_scan))
         self.to_scan = None     # from now on, go ahead and process immediately
 
     def not_found_in_index(self, requirement):
@@ -463,11 +532,10 @@ class PackageIndex(Environment):
                     )
         return getattr(self.fetch_distribution(spec, tmpdir),'location',None)
 
-
-    def fetch_distribution(self,
-        requirement, tmpdir, force_scan=False, source=False, develop_ok=False,
-        local_index=None
-    ):
+    def fetch_distribution(
+            self, requirement, tmpdir, force_scan=False, source=False,
+            develop_ok=False, local_index=None
+            ):
         """Obtain a distribution suitable for fulfilling `requirement`
 
         `requirement` must be a ``pkg_resources.Requirement`` instance.
@@ -505,8 +573,6 @@ class PackageIndex(Environment):
                 if dist in req and (dist.precedence<=SOURCE_DIST or not source):
                     return dist
 
-
-
         if force_scan:
             self.prescan()
             self.find_packages(requirement)
@@ -533,7 +599,6 @@ class PackageIndex(Environment):
             self.info("Best match: %s", dist)
             return dist.clone(location=self.download(dist.location, tmpdir))
 
-
     def fetch(self, requirement, tmpdir, force_scan=False, source=False):
         """Obtain a file suitable for fulfilling `requirement`
 
@@ -547,10 +612,10 @@ class PackageIndex(Environment):
             return dist.location
         return None
 
-
     def gen_setup(self, filename, fragment, tmpdir):
         match = EGG_FRAGMENT.match(fragment)
-        dists = match and [d for d in
+        dists = match and [
+            d for d in
             interpret_distro_name(filename, match.group(1), None) if d.version
         ] or []
 
@@ -595,14 +660,12 @@ class PackageIndex(Environment):
         # Download the file
         fp, tfp, info = None, None, None
         try:
-            if '#' in url:
-                url, info = url.split('#', 1)
-            fp = self.open_url(url)
-            if isinstance(fp, urllib2.HTTPError):
+            checker = HashChecker.from_url(url)
+            fp = self.open_url(strip_fragment(url))
+            if isinstance(fp, HTTPError):
                 raise DistutilsError(
                     "Can't download %s: %s %s" % (url, fp.code,fp.msg)
                 )
-            cs = md5()
             headers = fp.info()
             blocknum = 0
             bs = self.dl_blocksize
@@ -616,13 +679,13 @@ class PackageIndex(Environment):
             while True:
                 block = fp.read(bs)
                 if block:
-                    cs.update(block)
+                    checker.feed(block)
                     tfp.write(block)
                     blocknum += 1
                     self.reporthook(url, filename, blocknum, bs, size)
                 else:
                     break
-            if info: self.check_md5(cs, info, filename, tfp)
+            self.check_hash(checker, filename, tfp)
             return headers
         finally:
             if fp: fp.close()
@@ -631,34 +694,40 @@ class PackageIndex(Environment):
     def reporthook(self, url, filename, blocknum, blksize, size):
         pass    # no-op
 
-
     def open_url(self, url, warning=None):
         if url.startswith('file:'):
             return local_open(url)
         try:
             return open_with_auth(url, self.opener)
-        except (ValueError, httplib.InvalidURL), v:
+        except (ValueError, httplib.InvalidURL):
+            v = sys.exc_info()[1]
             msg = ' '.join([str(arg) for arg in v.args])
             if warning:
                 self.warn(warning, msg)
             else:
                 raise DistutilsError('%s %s' % (url, msg))
-        except urllib2.HTTPError, v:
+        except urllib2.HTTPError:
+            v = sys.exc_info()[1]
             return v
-        except urllib2.URLError, v:
+        except urllib2.URLError:
+            v = sys.exc_info()[1]
             if warning:
                 self.warn(warning, v.reason)
             else:
                 raise DistutilsError("Download error for %s: %s"
                                      % (url, v.reason))
-        except httplib.BadStatusLine, v:
+        except httplib.BadStatusLine:
+            v = sys.exc_info()[1]
             if warning:
                 self.warn(warning, v.line)
             else:
-                raise DistutilsError('%s returned a bad status line. '
-                                     'The server might be down, %s' % \
-                                             (url, v.line))
-        except httplib.HTTPException, v:
+                raise DistutilsError(
+                    '%s returned a bad status line. The server might be '
+                    'down, %s' %
+                    (url, v.line)
+                )
+        except httplib.HTTPException:
+            v = sys.exc_info()[1]
             if warning:
                 self.warn(warning, v)
             else:
@@ -689,7 +758,7 @@ class PackageIndex(Environment):
         elif scheme.startswith('hg+'):
             return self._download_hg(url, filename)
         elif scheme=='file':
-            return urllib.url2pathname(urlparse.urlparse(url)[2])
+            return url2pathname(urlparse(url)[2])
         else:
             self.url_ok(url, True)   # raises error if not allowed
             return self._attempt_download(url, filename)
@@ -697,7 +766,6 @@ class PackageIndex(Environment):
     def scan_url(self, url):
         self.process_url(url, True)
 
-
     def _attempt_download(self, url, filename):
         headers = self._download_to(url, filename)
         if 'html' in headers.get('content-type','').lower():
@@ -720,29 +788,14 @@ class PackageIndex(Environment):
         os.unlink(filename)
         raise DistutilsError("Unexpected HTML page found at "+url)
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     def _download_svn(self, url, filename):
         url = url.split('#',1)[0]   # remove any fragment for svn's sake
         creds = ''
         if url.lower().startswith('svn:') and '@' in url:
-            scheme, netloc, path, p, q, f = urlparse.urlparse(url)
+            scheme, netloc, path, p, q, f = urlparse(url)
             if not netloc and path.startswith('//') and '/' in path[2:]:
                 netloc, path = path[2:].split('/',1)
-                auth, host = urllib.splituser(netloc)
+                auth, host = splituser(netloc)
                 if auth:
                     if ':' in auth:
                         user, pw = auth.split(':',1)
@@ -750,13 +803,14 @@ class PackageIndex(Environment):
                     else:
                         creds = " --username="+auth
                     netloc = host
-                    url = urlparse.urlunparse((scheme, netloc, url, p, q, f))
+                    url = urlunparse((scheme, netloc, url, p, q, f))
         self.info("Doing subversion checkout from %s to %s", url, filename)
         os.system("svn checkout%s -q %s %s" % (creds, url, filename))
         return filename
 
-    def _vcs_split_rev_from_url(self, url, pop_prefix=False):
-        scheme, netloc, path, query, frag = urlparse.urlsplit(url)
+    @staticmethod
+    def _vcs_split_rev_from_url(url, pop_prefix=False):
+        scheme, netloc, path, query, frag = urlsplit(url)
 
         scheme = scheme.split('+', 1)[-1]
 
@@ -768,7 +822,7 @@ class PackageIndex(Environment):
             path, rev = path.rsplit('@', 1)
 
         # Also, discard fragment
-        url = urlparse.urlunsplit((scheme, netloc, path, query, ''))
+        url = urlunsplit((scheme, netloc, path, query, ''))
 
         return url, rev
 
@@ -813,18 +867,6 @@ class PackageIndex(Environment):
     def warn(self, msg, *args):
         log.warn(msg, *args)
 
-
-
-
-
-
-
-
-
-
-
-
-
 # This pattern matches a character entity reference (a decimal numeric
 # references, a hexadecimal numeric reference, or a named reference).
 entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
@@ -842,7 +884,6 @@ def decode_entity(match):
     elif what.startswith('#'):
         what = int(what[1:])
     else:
-        from htmlentitydefs import name2codepoint
         what = name2codepoint.get(what, match.group(0))
     return uchr(what)
 
@@ -850,20 +891,6 @@ def htmldecode(text):
     """Decode HTML entities in the given text."""
     return entity_sub(decode_entity, text)
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 def socket_timeout(timeout=15):
     def _socket_timeout(func):
         def _socket_timeout(*args, **kwargs):
@@ -883,7 +910,7 @@ def _encode_auth(auth):
     >>> _encode_auth('username%3Apassword')
     u'dXNlcm5hbWU6cGFzc3dvcmQ='
     """
-    auth_s = urllib2.unquote(auth)
+    auth_s = unquote(auth)
     # convert to bytes
     auth_bytes = auth_s.encode()
     # use the legacy interface for Python 2.3 support
@@ -896,7 +923,7 @@ def _encode_auth(auth):
 def open_with_auth(url, opener=urllib2.urlopen):
     """Open a urllib2 request, handling HTTP authentication"""
 
-    scheme, netloc, path, params, query, frag = urlparse.urlparse(url)
+    scheme, netloc, path, params, query, frag = urlparse(url)
 
     # Double scheme does not raise on Mac OS X as revealed by a
     # failing test. We would expect "nonnumeric port". Refs #20.
@@ -904,13 +931,13 @@ def open_with_auth(url, opener=urllib2.urlopen):
         raise httplib.InvalidURL("nonnumeric port: ''")
 
     if scheme in ('http', 'https'):
-        auth, host = urllib.splituser(netloc)
+        auth, host = splituser(netloc)
     else:
         auth = None
 
     if auth:
         auth = "Basic " + _encode_auth(auth)
-        new_url = urlparse.urlunparse((scheme,host,path,params,query,frag))
+        new_url = urlunparse((scheme,host,path,params,query,frag))
         request = urllib2.Request(new_url)
         request.add_header("Authorization", auth)
     else:
@@ -922,9 +949,9 @@ def open_with_auth(url, opener=urllib2.urlopen):
     if auth:
         # Put authentication info back into request URL if same host,
         # so that links found on the page will work
-        s2, h2, path2, param2, query2, frag2 = urlparse.urlparse(fp.url)
+        s2, h2, path2, param2, query2, frag2 = urlparse(fp.url)
         if s2==scheme and h2==host:
-            fp.url = urlparse.urlunparse((s2,netloc,path2,param2,query2,frag2))
+            fp.url = urlunparse((s2,netloc,path2,param2,query2,frag2))
 
     return fp
 
@@ -932,22 +959,13 @@ def open_with_auth(url, opener=urllib2.urlopen):
 open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
 
 
-
-
-
-
-
-
-
-
-
 def fix_sf_url(url):
     return url      # backward compatibility
 
 def local_open(url):
     """Read a local path, with special support for directories"""
-    scheme, server, path, param, query, frag = urlparse.urlparse(url)
-    filename = urllib.url2pathname(path)
+    scheme, server, path, param, query, frag = urlparse(url)
+    filename = url2pathname(path)
     if os.path.isfile(filename):
         return urllib2.urlopen(url)
     elif path.endswith('/') and os.path.isdir(filename):
@@ -968,19 +986,5 @@ def local_open(url):
     else:
         status, message, body = 404, "Path not found", "Not found"
 
-    return urllib2.HTTPError(url, status, message,
-            {'content-type':'text/html'}, cStringIO.StringIO(body))
-
-
-
-
-
-
-
-
-
-
-
-
-
-# this line is a kludge to keep the trailing blank lines for pje's editor
+    return HTTPError(url, status, message,
+            {'content-type':'text/html'}, StringIO(body))