diff options
Diffstat (limited to 'setuptools/package_index.py')
| -rwxr-xr-x | setuptools/package_index.py | 322 |
1 files changed, 163 insertions, 159 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 4f39c70a..4c9e40a7 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -1,18 +1,28 @@ """PyPI and direct package downloading""" -import sys, os.path, re, urlparse, urllib2, shutil, random, socket, cStringIO -import itertools +import sys +import os +import re +import shutil +import socket import base64 -import httplib, urllib + +from pkg_resources import ( + CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST, + require, Environment, find_distributions, safe_name, safe_version, + to_filename, Requirement, DEVELOP_DIST, +) from setuptools import ssl_support -from pkg_resources import * from distutils import log from distutils.errors import DistutilsError -try: - from hashlib import md5 -except ImportError: - from md5 import md5 +from setuptools.compat import (urllib2, httplib, StringIO, HTTPError, + urlparse, urlunparse, unquote, splituser, + url2pathname, name2codepoint, + unichr, urljoin, urlsplit, urlunsplit) +from setuptools.compat import filterfalse from fnmatch import translate +from setuptools.py24compat import hashlib from setuptools.py24compat import wraps +from setuptools.py26compat import strip_fragment from setuptools.py27compat import get_all_headers EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$') @@ -57,10 +67,10 @@ def parse_bdist_wininst(name): def egg_info_for_url(url): - scheme, server, path, parameters, query, fragment = urlparse.urlparse(url) - base = urllib2.unquote(path.split('/')[-1]) + scheme, server, path, parameters, query, fragment = urlparse(url) + base = unquote(path.split('/')[-1]) if server=='sourceforge.net' and base=='download': # XXX Yuck - base = urllib2.unquote(path.split('/')[-2]) + base = unquote(path.split('/')[-2]) if '#' in base: base, fragment = base.split('#',1) return base,fragment @@ -104,9 +114,10 @@ def distros_for_filename(filename, metadata=None): ) -def interpret_distro_name(location, basename, metadata, - py_version=None, precedence=SOURCE_DIST, platform=None -): +def interpret_distro_name( + location, basename, metadata, py_version=None, precedence=SOURCE_DIST, + platform=None + ): """Generate alternative interpretations of a source distro name Note: if `location` is a filesystem filename, you should call @@ -146,7 +157,7 @@ def unique_everseen(iterable, key=None): seen = set() seen_add = seen.add if key is None: - for element in itertools.ifilterfalse(seen.__contains__, iterable): + for element in filterfalse(seen.__contains__, iterable): seen_add(element) yield element else: @@ -175,28 +186,85 @@ def find_external_links(url, page): for match in REL.finditer(page): tag, rel = match.groups() - rels = map(str.strip, rel.lower().split(',')) + rels = set(map(str.strip, rel.lower().split(','))) if 'homepage' in rels or 'download' in rels: for match in HREF.finditer(tag): - yield urlparse.urljoin(url, htmldecode(match.group(1))) + yield urljoin(url, htmldecode(match.group(1))) for tag in ("<th>Home Page", "<th>Download URL"): pos = page.find(tag) if pos!=-1: match = HREF.search(page,pos) if match: - yield urlparse.urljoin(url, htmldecode(match.group(1))) + yield urljoin(url, htmldecode(match.group(1))) user_agent = "Python-urllib/%s setuptools/%s" % ( sys.version[:3], require('setuptools')[0].version ) +class ContentChecker(object): + """ + A null content checker that defines the interface for checking content + """ + def feed(self, block): + """ + Feed a block of data to the hash. + """ + return + + def is_valid(self): + """ + Check the hash. Return False if validation fails. + """ + return True + + def report(self, reporter, template): + """ + Call reporter with information about the checker (hash name) + substituted into the template. + """ + return + +class HashChecker(ContentChecker): + pattern = re.compile( + r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)=' + r'(?P<expected>[a-f0-9]+)' + ) + + def __init__(self, hash_name, expected): + self.hash_name = hash_name + self.hash = hashlib.new(hash_name) + self.expected = expected + + @classmethod + def from_url(cls, url): + "Construct a (possibly null) ContentChecker from a URL" + fragment = urlparse(url)[-1] + if not fragment: + return ContentChecker() + match = cls.pattern.search(fragment) + if not match: + return ContentChecker() + return cls(**match.groupdict()) + + def feed(self, block): + self.hash.update(block) + + def is_valid(self): + return self.hash.hexdigest() == self.expected + + def report(self, reporter, template): + msg = template % self.hash_name + return reporter(msg) + + class PackageIndex(Environment): """A distribution index that scans web pages for download URLs""" - def __init__(self, index_url="https://pypi.python.org/simple", hosts=('*',), - ca_bundle=None, verify_ssl=True, *args, **kw - ): + def __init__( + self, index_url="https://pypi.python.org/simple", hosts=('*',), + ca_bundle=None, verify_ssl=True, *args, **kw + ): Environment.__init__(self,*args,**kw) self.index_url = index_url + "/"[:not index_url.endswith('/')] self.scanned_urls = {} @@ -224,7 +292,7 @@ class PackageIndex(Environment): self.debug("Found link: %s", url) if dists or not retrieve or url in self.fetched_urls: - map(self.add, dists) + list(map(self.add, dists)) return # don't need the actual page if not self.url_ok(url): @@ -243,7 +311,7 @@ class PackageIndex(Environment): base = f.url # handle redirects page = f.read() if not isinstance(page, str): # We are in Python 3 and got bytes. We want str. - if isinstance(f, urllib2.HTTPError): + if isinstance(f, HTTPError): # Errors have no charset, assume latin1: charset = 'latin-1' else: @@ -251,7 +319,7 @@ class PackageIndex(Environment): page = page.decode(charset, "ignore") f.close() for match in HREF.finditer(page): - link = urlparse.urljoin(base, htmldecode(match.group(1))) + link = urljoin(base, htmldecode(match.group(1))) self.process_url(link) if url.startswith(self.index_url) and getattr(f,'code',None)!=404: page = self.process_index(url, page) @@ -270,13 +338,14 @@ class PackageIndex(Environment): dists = distros_for_filename(fn) if dists: self.debug("Found: %s", fn) - map(self.add, dists) + list(map(self.add, dists)) def url_ok(self, url, fatal=False): s = URL_SCHEME(url) - if (s and s.group(1).lower()=='file') or self.allows(urlparse.urlparse(url)[1]): + if (s and s.group(1).lower()=='file') or self.allows(urlparse(url)[1]): return True - msg = "\nLink to % s ***BLOCKED*** by --allow-hosts\n" + msg = ("\nNote: Bypassing %s (disallowed host; see " + "http://bit.ly/1dg9ijs for details).\n") if fatal: raise DistutilsError(msg % url) else: @@ -290,7 +359,8 @@ class PackageIndex(Environment): self.scan_egg_link(item, entry) def scan_egg_link(self, path, entry): - lines = filter(None, map(str.strip, open(os.path.join(path, entry)))) + lines = [_f for _f in map(str.strip, + open(os.path.join(path, entry))) if _f] if len(lines)==2: for dist in find_distributions(os.path.join(path, lines[0])): dist.location = os.path.join(path, *lines) @@ -302,9 +372,9 @@ class PackageIndex(Environment): def scan(link): # Process a URL to see if it's for a package page if link.startswith(self.index_url): - parts = map( - urllib2.unquote, link[len(self.index_url):].split('/') - ) + parts = list(map( + unquote, link[len(self.index_url):].split('/') + )) if len(parts)==2 and '#' not in parts[1]: # it's a package page, sanitize and index it pkg = safe_name(parts[0]) @@ -316,7 +386,7 @@ class PackageIndex(Environment): # process an index page into the package-page index for match in HREF.finditer(page): try: - scan( urlparse.urljoin(url, htmldecode(match.group(1))) ) + scan(urljoin(url, htmldecode(match.group(1)))) except ValueError: pass @@ -339,8 +409,6 @@ class PackageIndex(Environment): else: return "" # no sense double-scanning non-package pages - - def need_version_info(self, url): self.scan_all( "Page at %s links to .py file(s) without version info; an index " @@ -371,27 +439,28 @@ class PackageIndex(Environment): self.scan_url(url) def obtain(self, requirement, installer=None): - self.prescan(); self.find_packages(requirement) + self.prescan() + self.find_packages(requirement) for dist in self[requirement.key]: if dist in requirement: return dist self.debug("%s does not match %s", requirement, dist) return super(PackageIndex, self).obtain(requirement,installer) - - - - - def check_md5(self, cs, info, filename, tfp): - if re.match('md5=[0-9a-f]{32}$', info): - self.debug("Validating md5 checksum for %s", filename) - if cs.hexdigest()!=info[4:]: - tfp.close() - os.unlink(filename) - raise DistutilsError( - "MD5 validation failed for "+os.path.basename(filename)+ - "; possible download problem?" - ) + def check_hash(self, checker, filename, tfp): + """ + checker is a ContentChecker + """ + checker.report(self.debug, + "Validating %%s checksum for %s" % filename) + if not checker.is_valid(): + tfp.close() + os.unlink(filename) + raise DistutilsError( + "%s validation failed for %s; " + "possible download problem?" % ( + checker.hash.name, os.path.basename(filename)) + ) def add_find_links(self, urls): """Add `urls` to the list that will be prescanned for searches""" @@ -411,7 +480,7 @@ class PackageIndex(Environment): def prescan(self): """Scan urls scheduled for prescanning (e.g. --find-links)""" if self.to_scan: - map(self.scan_url, self.to_scan) + list(map(self.scan_url, self.to_scan)) self.to_scan = None # from now on, go ahead and process immediately def not_found_in_index(self, requirement): @@ -463,11 +532,10 @@ class PackageIndex(Environment): ) return getattr(self.fetch_distribution(spec, tmpdir),'location',None) - - def fetch_distribution(self, - requirement, tmpdir, force_scan=False, source=False, develop_ok=False, - local_index=None - ): + def fetch_distribution( + self, requirement, tmpdir, force_scan=False, source=False, + develop_ok=False, local_index=None + ): """Obtain a distribution suitable for fulfilling `requirement` `requirement` must be a ``pkg_resources.Requirement`` instance. @@ -505,8 +573,6 @@ class PackageIndex(Environment): if dist in req and (dist.precedence<=SOURCE_DIST or not source): return dist - - if force_scan: self.prescan() self.find_packages(requirement) @@ -533,7 +599,6 @@ class PackageIndex(Environment): self.info("Best match: %s", dist) return dist.clone(location=self.download(dist.location, tmpdir)) - def fetch(self, requirement, tmpdir, force_scan=False, source=False): """Obtain a file suitable for fulfilling `requirement` @@ -547,10 +612,10 @@ class PackageIndex(Environment): return dist.location return None - def gen_setup(self, filename, fragment, tmpdir): match = EGG_FRAGMENT.match(fragment) - dists = match and [d for d in + dists = match and [ + d for d in interpret_distro_name(filename, match.group(1), None) if d.version ] or [] @@ -595,14 +660,12 @@ class PackageIndex(Environment): # Download the file fp, tfp, info = None, None, None try: - if '#' in url: - url, info = url.split('#', 1) - fp = self.open_url(url) - if isinstance(fp, urllib2.HTTPError): + checker = HashChecker.from_url(url) + fp = self.open_url(strip_fragment(url)) + if isinstance(fp, HTTPError): raise DistutilsError( "Can't download %s: %s %s" % (url, fp.code,fp.msg) ) - cs = md5() headers = fp.info() blocknum = 0 bs = self.dl_blocksize @@ -616,13 +679,13 @@ class PackageIndex(Environment): while True: block = fp.read(bs) if block: - cs.update(block) + checker.feed(block) tfp.write(block) blocknum += 1 self.reporthook(url, filename, blocknum, bs, size) else: break - if info: self.check_md5(cs, info, filename, tfp) + self.check_hash(checker, filename, tfp) return headers finally: if fp: fp.close() @@ -631,34 +694,40 @@ class PackageIndex(Environment): def reporthook(self, url, filename, blocknum, blksize, size): pass # no-op - def open_url(self, url, warning=None): if url.startswith('file:'): return local_open(url) try: return open_with_auth(url, self.opener) - except (ValueError, httplib.InvalidURL), v: + except (ValueError, httplib.InvalidURL): + v = sys.exc_info()[1] msg = ' '.join([str(arg) for arg in v.args]) if warning: self.warn(warning, msg) else: raise DistutilsError('%s %s' % (url, msg)) - except urllib2.HTTPError, v: + except urllib2.HTTPError: + v = sys.exc_info()[1] return v - except urllib2.URLError, v: + except urllib2.URLError: + v = sys.exc_info()[1] if warning: self.warn(warning, v.reason) else: raise DistutilsError("Download error for %s: %s" % (url, v.reason)) - except httplib.BadStatusLine, v: + except httplib.BadStatusLine: + v = sys.exc_info()[1] if warning: self.warn(warning, v.line) else: - raise DistutilsError('%s returned a bad status line. ' - 'The server might be down, %s' % \ - (url, v.line)) - except httplib.HTTPException, v: + raise DistutilsError( + '%s returned a bad status line. The server might be ' + 'down, %s' % + (url, v.line) + ) + except httplib.HTTPException: + v = sys.exc_info()[1] if warning: self.warn(warning, v) else: @@ -689,7 +758,7 @@ class PackageIndex(Environment): elif scheme.startswith('hg+'): return self._download_hg(url, filename) elif scheme=='file': - return urllib.url2pathname(urlparse.urlparse(url)[2]) + return url2pathname(urlparse(url)[2]) else: self.url_ok(url, True) # raises error if not allowed return self._attempt_download(url, filename) @@ -697,7 +766,6 @@ class PackageIndex(Environment): def scan_url(self, url): self.process_url(url, True) - def _attempt_download(self, url, filename): headers = self._download_to(url, filename) if 'html' in headers.get('content-type','').lower(): @@ -720,29 +788,14 @@ class PackageIndex(Environment): os.unlink(filename) raise DistutilsError("Unexpected HTML page found at "+url) - - - - - - - - - - - - - - - def _download_svn(self, url, filename): url = url.split('#',1)[0] # remove any fragment for svn's sake creds = '' if url.lower().startswith('svn:') and '@' in url: - scheme, netloc, path, p, q, f = urlparse.urlparse(url) + scheme, netloc, path, p, q, f = urlparse(url) if not netloc and path.startswith('//') and '/' in path[2:]: netloc, path = path[2:].split('/',1) - auth, host = urllib.splituser(netloc) + auth, host = splituser(netloc) if auth: if ':' in auth: user, pw = auth.split(':',1) @@ -750,13 +803,14 @@ class PackageIndex(Environment): else: creds = " --username="+auth netloc = host - url = urlparse.urlunparse((scheme, netloc, url, p, q, f)) + url = urlunparse((scheme, netloc, url, p, q, f)) self.info("Doing subversion checkout from %s to %s", url, filename) os.system("svn checkout%s -q %s %s" % (creds, url, filename)) return filename - def _vcs_split_rev_from_url(self, url, pop_prefix=False): - scheme, netloc, path, query, frag = urlparse.urlsplit(url) + @staticmethod + def _vcs_split_rev_from_url(url, pop_prefix=False): + scheme, netloc, path, query, frag = urlsplit(url) scheme = scheme.split('+', 1)[-1] @@ -768,7 +822,7 @@ class PackageIndex(Environment): path, rev = path.rsplit('@', 1) # Also, discard fragment - url = urlparse.urlunsplit((scheme, netloc, path, query, '')) + url = urlunsplit((scheme, netloc, path, query, '')) return url, rev @@ -813,18 +867,6 @@ class PackageIndex(Environment): def warn(self, msg, *args): log.warn(msg, *args) - - - - - - - - - - - - # This pattern matches a character entity reference (a decimal numeric # references, a hexadecimal numeric reference, or a named reference). entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub @@ -842,7 +884,6 @@ def decode_entity(match): elif what.startswith('#'): what = int(what[1:]) else: - from htmlentitydefs import name2codepoint what = name2codepoint.get(what, match.group(0)) return uchr(what) @@ -850,20 +891,6 @@ def htmldecode(text): """Decode HTML entities in the given text.""" return entity_sub(decode_entity, text) - - - - - - - - - - - - - - def socket_timeout(timeout=15): def _socket_timeout(func): def _socket_timeout(*args, **kwargs): @@ -883,7 +910,7 @@ def _encode_auth(auth): >>> _encode_auth('username%3Apassword') u'dXNlcm5hbWU6cGFzc3dvcmQ=' """ - auth_s = urllib2.unquote(auth) + auth_s = unquote(auth) # convert to bytes auth_bytes = auth_s.encode() # use the legacy interface for Python 2.3 support @@ -896,7 +923,7 @@ def _encode_auth(auth): def open_with_auth(url, opener=urllib2.urlopen): """Open a urllib2 request, handling HTTP authentication""" - scheme, netloc, path, params, query, frag = urlparse.urlparse(url) + scheme, netloc, path, params, query, frag = urlparse(url) # Double scheme does not raise on Mac OS X as revealed by a # failing test. We would expect "nonnumeric port". Refs #20. @@ -904,13 +931,13 @@ def open_with_auth(url, opener=urllib2.urlopen): raise httplib.InvalidURL("nonnumeric port: ''") if scheme in ('http', 'https'): - auth, host = urllib.splituser(netloc) + auth, host = splituser(netloc) else: auth = None if auth: auth = "Basic " + _encode_auth(auth) - new_url = urlparse.urlunparse((scheme,host,path,params,query,frag)) + new_url = urlunparse((scheme,host,path,params,query,frag)) request = urllib2.Request(new_url) request.add_header("Authorization", auth) else: @@ -922,9 +949,9 @@ def open_with_auth(url, opener=urllib2.urlopen): if auth: # Put authentication info back into request URL if same host, # so that links found on the page will work - s2, h2, path2, param2, query2, frag2 = urlparse.urlparse(fp.url) + s2, h2, path2, param2, query2, frag2 = urlparse(fp.url) if s2==scheme and h2==host: - fp.url = urlparse.urlunparse((s2,netloc,path2,param2,query2,frag2)) + fp.url = urlunparse((s2,netloc,path2,param2,query2,frag2)) return fp @@ -932,22 +959,13 @@ def open_with_auth(url, opener=urllib2.urlopen): open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth) - - - - - - - - - def fix_sf_url(url): return url # backward compatibility def local_open(url): """Read a local path, with special support for directories""" - scheme, server, path, param, query, frag = urlparse.urlparse(url) - filename = urllib.url2pathname(path) + scheme, server, path, param, query, frag = urlparse(url) + filename = url2pathname(path) if os.path.isfile(filename): return urllib2.urlopen(url) elif path.endswith('/') and os.path.isdir(filename): @@ -968,19 +986,5 @@ def local_open(url): else: status, message, body = 404, "Path not found", "Not found" - return urllib2.HTTPError(url, status, message, - {'content-type':'text/html'}, cStringIO.StringIO(body)) - - - - - - - - - - - - - -# this line is a kludge to keep the trailing blank lines for pje's editor + return HTTPError(url, status, message, + {'content-type':'text/html'}, StringIO(body)) |
