summaryrefslogtreecommitdiff
path: root/Lib/packaging/pypi/dist.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/packaging/pypi/dist.py')
-rw-r--r--Lib/packaging/pypi/dist.py544
1 files changed, 544 insertions, 0 deletions
diff --git a/Lib/packaging/pypi/dist.py b/Lib/packaging/pypi/dist.py
new file mode 100644
index 0000000000..541465e63f
--- /dev/null
+++ b/Lib/packaging/pypi/dist.py
@@ -0,0 +1,544 @@
+"""Classes representing releases and distributions retrieved from indexes.
+
+A project (= unique name) can have several releases (= versions) and
+each release can have several distributions (= sdist and bdists).
+
+Release objects contain metadata-related information (see PEP 376);
+distribution objects contain download-related information.
+"""
+
+import re
+import hashlib
+import tempfile
+import urllib.request
+import urllib.parse
+import urllib.error
+import urllib.parse
+from shutil import unpack_archive
+
+from packaging.errors import IrrationalVersionError
+from packaging.version import (suggest_normalized_version, NormalizedVersion,
+ get_version_predicate)
+from packaging.metadata import Metadata
+from packaging.pypi.errors import (HashDoesNotMatch, UnsupportedHashName,
+ CantParseArchiveName)
+
+
+__all__ = ['ReleaseInfo', 'DistInfo', 'ReleasesList', 'get_infos_from_url']
+
+EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz .egg".split()
+MD5_HASH = re.compile(r'^.*#md5=([a-f0-9]+)$')
+DIST_TYPES = ['bdist', 'sdist']
+
+
+class IndexReference:
+ """Mixin used to store the index reference"""
+ def set_index(self, index=None):
+ self._index = index
+
+
+class ReleaseInfo(IndexReference):
+ """Represent a release of a project (a project with a specific version).
+ The release contain the _metadata informations related to this specific
+ version, and is also a container for distribution related informations.
+
+ See the DistInfo class for more information about distributions.
+ """
+
+ def __init__(self, name, version, metadata=None, hidden=False,
+ index=None, **kwargs):
+ """
+ :param name: the name of the distribution
+ :param version: the version of the distribution
+ :param metadata: the metadata fields of the release.
+ :type metadata: dict
+ :param kwargs: optional arguments for a new distribution.
+ """
+ self.set_index(index)
+ self.name = name
+ self._version = None
+ self.version = version
+ if metadata:
+ self.metadata = Metadata(mapping=metadata)
+ else:
+ self.metadata = None
+ self.dists = {}
+ self.hidden = hidden
+
+ if 'dist_type' in kwargs:
+ dist_type = kwargs.pop('dist_type')
+ self.add_distribution(dist_type, **kwargs)
+
+ def set_version(self, version):
+ try:
+ self._version = NormalizedVersion(version)
+ except IrrationalVersionError:
+ suggestion = suggest_normalized_version(version)
+ if suggestion:
+ self.version = suggestion
+ else:
+ raise IrrationalVersionError(version)
+
+ def get_version(self):
+ return self._version
+
+ version = property(get_version, set_version)
+
+ def fetch_metadata(self):
+ """If the metadata is not set, use the indexes to get it"""
+ if not self.metadata:
+ self._index.get_metadata(self.name, str(self.version))
+ return self.metadata
+
+ @property
+ def is_final(self):
+ """proxy to version.is_final"""
+ return self.version.is_final
+
+ def fetch_distributions(self):
+ if self.dists is None:
+ self._index.get_distributions(self.name, str(self.version))
+ if self.dists is None:
+ self.dists = {}
+ return self.dists
+
+ def add_distribution(self, dist_type='sdist', python_version=None,
+ **params):
+ """Add distribution informations to this release.
+ If distribution information is already set for this distribution type,
+ add the given url paths to the distribution. This can be useful while
+ some of them fails to download.
+
+ :param dist_type: the distribution type (eg. "sdist", "bdist", etc.)
+ :param params: the fields to be passed to the distribution object
+ (see the :class:DistInfo constructor).
+ """
+ if dist_type not in DIST_TYPES:
+ raise ValueError(dist_type)
+ if dist_type in self.dists:
+ self.dists[dist_type].add_url(**params)
+ else:
+ self.dists[dist_type] = DistInfo(self, dist_type,
+ index=self._index, **params)
+ if python_version:
+ self.dists[dist_type].python_version = python_version
+
+ def get_distribution(self, dist_type=None, prefer_source=True):
+ """Return a distribution.
+
+ If dist_type is set, find first for this distribution type, and just
+ act as an alias of __get_item__.
+
+ If prefer_source is True, search first for source distribution, and if
+ not return one existing distribution.
+ """
+ if len(self.dists) == 0:
+ raise LookupError
+ if dist_type:
+ return self[dist_type]
+ if prefer_source:
+ if "sdist" in self.dists:
+ dist = self["sdist"]
+ else:
+ dist = next(self.dists.values())
+ return dist
+
+ def unpack(self, path=None, prefer_source=True):
+ """Unpack the distribution to the given path.
+
+ If not destination is given, creates a temporary location.
+
+ Returns the location of the extracted files (root).
+ """
+ return self.get_distribution(prefer_source=prefer_source)\
+ .unpack(path=path)
+
+ def download(self, temp_path=None, prefer_source=True):
+ """Download the distribution, using the requirements.
+
+ If more than one distribution match the requirements, use the last
+ version.
+ Download the distribution, and put it in the temp_path. If no temp_path
+ is given, creates and return one.
+
+ Returns the complete absolute path to the downloaded archive.
+ """
+ return self.get_distribution(prefer_source=prefer_source)\
+ .download(path=temp_path)
+
+ def set_metadata(self, metadata):
+ if not self.metadata:
+ self.metadata = Metadata()
+ self.metadata.update(metadata)
+
+ def __getitem__(self, item):
+ """distributions are available using release["sdist"]"""
+ return self.dists[item]
+
+ def _check_is_comparable(self, other):
+ if not isinstance(other, ReleaseInfo):
+ raise TypeError("cannot compare %s and %s"
+ % (type(self).__name__, type(other).__name__))
+ elif self.name != other.name:
+ raise TypeError("cannot compare %s and %s"
+ % (self.name, other.name))
+
+ def __repr__(self):
+ return "<%s %s>" % (self.name, self.version)
+
+ def __eq__(self, other):
+ self._check_is_comparable(other)
+ return self.version == other.version
+
+ def __lt__(self, other):
+ self._check_is_comparable(other)
+ return self.version < other.version
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __gt__(self, other):
+ return not (self.__lt__(other) or self.__eq__(other))
+
+ def __le__(self, other):
+ return self.__eq__(other) or self.__lt__(other)
+
+ def __ge__(self, other):
+ return self.__eq__(other) or self.__gt__(other)
+
+ # See http://docs.python.org/reference/datamodel#object.__hash__
+ __hash__ = object.__hash__
+
+
+class DistInfo(IndexReference):
+ """Represents a distribution retrieved from an index (sdist, bdist, ...)
+ """
+
+ def __init__(self, release, dist_type=None, url=None, hashname=None,
+ hashval=None, is_external=True, python_version=None,
+ index=None):
+ """Create a new instance of DistInfo.
+
+ :param release: a DistInfo class is relative to a release.
+ :param dist_type: the type of the dist (eg. source, bin-*, etc.)
+ :param url: URL where we found this distribution
+ :param hashname: the name of the hash we want to use. Refer to the
+ hashlib.new documentation for more information.
+ :param hashval: the hash value.
+ :param is_external: we need to know if the provided url comes from
+ an index browsing, or from an external resource.
+
+ """
+ self.set_index(index)
+ self.release = release
+ self.dist_type = dist_type
+ self.python_version = python_version
+ self._unpacked_dir = None
+ # set the downloaded path to None by default. The goal here
+ # is to not download distributions multiple times
+ self.downloaded_location = None
+ # We store urls in dict, because we need to have a bit more infos
+ # than the simple URL. It will be used later to find the good url to
+ # use.
+ # We have two _url* attributes: _url and urls. urls contains a list
+ # of dict for the different urls, and _url contains the choosen url, in
+ # order to dont make the selection process multiple times.
+ self.urls = []
+ self._url = None
+ self.add_url(url, hashname, hashval, is_external)
+
+ def add_url(self, url=None, hashname=None, hashval=None, is_external=True):
+ """Add a new url to the list of urls"""
+ if hashname is not None:
+ try:
+ hashlib.new(hashname)
+ except ValueError:
+ raise UnsupportedHashName(hashname)
+ if url not in [u['url'] for u in self.urls]:
+ self.urls.append({
+ 'url': url,
+ 'hashname': hashname,
+ 'hashval': hashval,
+ 'is_external': is_external,
+ })
+ # reset the url selection process
+ self._url = None
+
+ @property
+ def url(self):
+ """Pick up the right url for the list of urls in self.urls"""
+ # We return internal urls over externals.
+ # If there is more than one internal or external, return the first
+ # one.
+ if self._url is None:
+ if len(self.urls) > 1:
+ internals_urls = [u for u in self.urls \
+ if u['is_external'] == False]
+ if len(internals_urls) >= 1:
+ self._url = internals_urls[0]
+ if self._url is None:
+ self._url = self.urls[0]
+ return self._url
+
+ @property
+ def is_source(self):
+ """return if the distribution is a source one or not"""
+ return self.dist_type == 'sdist'
+
+ def download(self, path=None):
+ """Download the distribution to a path, and return it.
+
+ If the path is given in path, use this, otherwise, generates a new one
+ Return the download location.
+ """
+ if path is None:
+ path = tempfile.mkdtemp()
+
+ # if we do not have downloaded it yet, do it.
+ if self.downloaded_location is None:
+ url = self.url['url']
+ archive_name = urllib.parse.urlparse(url)[2].split('/')[-1]
+ filename, headers = urllib.request.urlretrieve(url,
+ path + "/" + archive_name)
+ self.downloaded_location = filename
+ self._check_md5(filename)
+ return self.downloaded_location
+
+ def unpack(self, path=None):
+ """Unpack the distribution to the given path.
+
+ If not destination is given, creates a temporary location.
+
+ Returns the location of the extracted files (root).
+ """
+ if not self._unpacked_dir:
+ if path is None:
+ path = tempfile.mkdtemp()
+
+ filename = self.download(path)
+ unpack_archive(filename, path)
+ self._unpacked_dir = path
+
+ return path
+
+ def _check_md5(self, filename):
+ """Check that the md5 checksum of the given file matches the one in
+ url param"""
+ hashname = self.url['hashname']
+ expected_hashval = self.url['hashval']
+ if None not in (expected_hashval, hashname):
+ with open(filename, 'rb') as f:
+ hashval = hashlib.new(hashname)
+ hashval.update(f.read())
+
+ if hashval.hexdigest() != expected_hashval:
+ raise HashDoesNotMatch("got %s instead of %s"
+ % (hashval.hexdigest(), expected_hashval))
+
+ def __repr__(self):
+ if self.release is None:
+ return "<? ? %s>" % self.dist_type
+
+ return "<%s %s %s>" % (
+ self.release.name, self.release.version, self.dist_type or "")
+
+
+class ReleasesList(IndexReference):
+ """A container of Release.
+
+ Provides useful methods and facilities to sort and filter releases.
+ """
+ def __init__(self, name, releases=None, contains_hidden=False, index=None):
+ self.set_index(index)
+ self.releases = []
+ self.name = name
+ self.contains_hidden = contains_hidden
+ if releases:
+ self.add_releases(releases)
+
+ def fetch_releases(self):
+ self._index.get_releases(self.name)
+ return self.releases
+
+ def filter(self, predicate):
+ """Filter and return a subset of releases matching the given predicate.
+ """
+ return ReleasesList(self.name, [release for release in self.releases
+ if predicate.match(release.version)],
+ index=self._index)
+
+ def get_last(self, requirements, prefer_final=None):
+ """Return the "last" release, that satisfy the given predicates.
+
+ "last" is defined by the version number of the releases, you also could
+ set prefer_final parameter to True or False to change the order results
+ """
+ predicate = get_version_predicate(requirements)
+ releases = self.filter(predicate)
+ if len(releases) == 0:
+ return None
+ releases.sort_releases(prefer_final, reverse=True)
+ return releases[0]
+
+ def add_releases(self, releases):
+ """Add releases in the release list.
+
+ :param: releases is a list of ReleaseInfo objects.
+ """
+ for r in releases:
+ self.add_release(release=r)
+
+ def add_release(self, version=None, dist_type='sdist', release=None,
+ **dist_args):
+ """Add a release to the list.
+
+ The release can be passed in the `release` parameter, and in this case,
+ it will be crawled to extract the useful informations if necessary, or
+ the release informations can be directly passed in the `version` and
+ `dist_type` arguments.
+
+ Other keywords arguments can be provided, and will be forwarded to the
+ distribution creation (eg. the arguments of the DistInfo constructor).
+ """
+ if release:
+ if release.name.lower() != self.name.lower():
+ raise ValueError("%s is not the same project as %s" %
+ (release.name, self.name))
+ version = str(release.version)
+
+ if version not in self.get_versions():
+ # append only if not already exists
+ self.releases.append(release)
+ for dist in release.dists.values():
+ for url in dist.urls:
+ self.add_release(version, dist.dist_type, **url)
+ else:
+ matches = [r for r in self.releases
+ if str(r.version) == version and r.name == self.name]
+ if not matches:
+ release = ReleaseInfo(self.name, version, index=self._index)
+ self.releases.append(release)
+ else:
+ release = matches[0]
+
+ release.add_distribution(dist_type=dist_type, **dist_args)
+
+ def sort_releases(self, prefer_final=False, reverse=True, *args, **kwargs):
+ """Sort the results with the given properties.
+
+ The `prefer_final` argument can be used to specify if final
+ distributions (eg. not dev, beta or alpha) would be preferred or not.
+
+ Results can be inverted by using `reverse`.
+
+ Any other parameter provided will be forwarded to the sorted call. You
+ cannot redefine the key argument of "sorted" here, as it is used
+ internally to sort the releases.
+ """
+
+ sort_by = []
+ if prefer_final:
+ sort_by.append("is_final")
+ sort_by.append("version")
+
+ self.releases.sort(
+ key=lambda i: tuple(getattr(i, arg) for arg in sort_by),
+ reverse=reverse, *args, **kwargs)
+
+ def get_release(self, version):
+ """Return a release from its version."""
+ matches = [r for r in self.releases if str(r.version) == version]
+ if len(matches) != 1:
+ raise KeyError(version)
+ return matches[0]
+
+ def get_versions(self):
+ """Return a list of releases versions contained"""
+ return [str(r.version) for r in self.releases]
+
+ def __getitem__(self, key):
+ return self.releases[key]
+
+ def __len__(self):
+ return len(self.releases)
+
+ def __repr__(self):
+ string = 'Project "%s"' % self.name
+ if self.get_versions():
+ string += ' versions: %s' % ', '.join(self.get_versions())
+ return '<%s>' % string
+
+
+def get_infos_from_url(url, probable_dist_name=None, is_external=True):
+ """Get useful informations from an URL.
+
+ Return a dict of (name, version, url, hashtype, hash, is_external)
+
+ :param url: complete url of the distribution
+ :param probable_dist_name: A probable name of the project.
+ :param is_external: Tell if the url commes from an index or from
+ an external URL.
+ """
+ # if the url contains a md5 hash, get it.
+ md5_hash = None
+ match = MD5_HASH.match(url)
+ if match is not None:
+ md5_hash = match.group(1)
+ # remove the hash
+ url = url.replace("#md5=%s" % md5_hash, "")
+
+ # parse the archive name to find dist name and version
+ archive_name = urllib.parse.urlparse(url)[2].split('/')[-1]
+ extension_matched = False
+ # remove the extension from the name
+ for ext in EXTENSIONS:
+ if archive_name.endswith(ext):
+ archive_name = archive_name[:-len(ext)]
+ extension_matched = True
+
+ name, version = split_archive_name(archive_name)
+ if extension_matched is True:
+ return {'name': name,
+ 'version': version,
+ 'url': url,
+ 'hashname': "md5",
+ 'hashval': md5_hash,
+ 'is_external': is_external,
+ 'dist_type': 'sdist'}
+
+
+def split_archive_name(archive_name, probable_name=None):
+ """Split an archive name into two parts: name and version.
+
+ Return the tuple (name, version)
+ """
+ # Try to determine wich part is the name and wich is the version using the
+ # "-" separator. Take the larger part to be the version number then reduce
+ # if this not works.
+ def eager_split(str, maxsplit=2):
+ # split using the "-" separator
+ splits = str.rsplit("-", maxsplit)
+ name = splits[0]
+ version = "-".join(splits[1:])
+ if version.startswith("-"):
+ version = version[1:]
+ if suggest_normalized_version(version) is None and maxsplit >= 0:
+ # we dont get a good version number: recurse !
+ return eager_split(str, maxsplit - 1)
+ else:
+ return name, version
+ if probable_name is not None:
+ probable_name = probable_name.lower()
+ name = None
+ if probable_name is not None and probable_name in archive_name:
+ # we get the name from probable_name, if given.
+ name = probable_name
+ version = archive_name.lstrip(name)
+ else:
+ name, version = eager_split(archive_name)
+
+ version = suggest_normalized_version(version)
+ if version is not None and name != "":
+ return name.lower(), version
+ else:
+ raise CantParseArchiveName(archive_name)