diff options
author | Stéphane Bidoul <stephane.bidoul@gmail.com> | 2023-04-15 11:38:38 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-15 11:38:38 +0200 |
commit | dbf4e6842c9603792f6d3944a5c9cec17bd0a92a (patch) | |
tree | a9e4630d04fa03dea335d5a93a6041cf736c0bde /src/pip/_internal | |
parent | f7787f8798712e475ebbf71f5487f92158f043a9 (diff) | |
parent | efe2d27451d50b165df78093bf5885da713fbdf8 (diff) | |
download | pip-dbf4e6842c9603792f6d3944a5c9cec17bd0a92a.tar.gz |
Merge pull request #11897 from sbidoul/cache-hash-checking-sbi
Support wheel cache when using --require-hashes
Diffstat (limited to 'src/pip/_internal')
-rw-r--r-- | src/pip/_internal/operations/prepare.py | 59 | ||||
-rw-r--r-- | src/pip/_internal/req/req_install.py | 12 | ||||
-rw-r--r-- | src/pip/_internal/resolution/legacy/resolver.py | 2 | ||||
-rw-r--r-- | src/pip/_internal/resolution/resolvelib/candidates.py | 6 | ||||
-rw-r--r-- | src/pip/_internal/resolution/resolvelib/factory.py | 2 | ||||
-rw-r--r-- | src/pip/_internal/utils/hashes.py | 7 |
6 files changed, 74 insertions, 14 deletions
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index dda92d29b..227331523 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -179,7 +179,10 @@ def unpack_url( def _check_download_dir( - link: Link, download_dir: str, hashes: Optional[Hashes] + link: Link, + download_dir: str, + hashes: Optional[Hashes], + warn_on_hash_mismatch: bool = True, ) -> Optional[str]: """Check download_dir for previously downloaded file with correct hash If a correct file is found return its path else None @@ -195,10 +198,11 @@ def _check_download_dir( try: hashes.check_against_path(download_path) except HashMismatch: - logger.warning( - "Previously-downloaded file %s has bad hash. Re-downloading.", - download_path, - ) + if warn_on_hash_mismatch: + logger.warning( + "Previously-downloaded file %s has bad hash. Re-downloading.", + download_path, + ) os.unlink(download_path) return None return download_path @@ -263,7 +267,7 @@ class RequirementPreparer: def _log_preparing_link(self, req: InstallRequirement) -> None: """Provide context for the requirement being prepared.""" - if req.link.is_file and not req.original_link_is_in_wheel_cache: + if req.link.is_file and not req.is_wheel_from_cache: message = "Processing %s" information = str(display_path(req.link.file_path)) else: @@ -284,7 +288,7 @@ class RequirementPreparer: self._previous_requirement_header = (message, information) logger.info(message, information) - if req.original_link_is_in_wheel_cache: + if req.is_wheel_from_cache: with indent_log(): logger.info("Using cached %s", req.link.filename) @@ -485,7 +489,18 @@ class RequirementPreparer: file_path = None if self.download_dir is not None and req.link.is_wheel: hashes = self._get_linked_req_hashes(req) - file_path = _check_download_dir(req.link, self.download_dir, hashes) + file_path = _check_download_dir( + req.link, + self.download_dir, + hashes, + # When a locally built wheel has been found in cache, we don't warn + # about re-downloading when the already downloaded wheel hash does + # not match. This is because the hash must be checked against the + # original link, not the cached link. It that case the already + # downloaded file will be removed and re-fetched from cache (which + # implies a hash check against the cache entry's origin.json). + warn_on_hash_mismatch=not req.is_wheel_from_cache, + ) if file_path is not None: # The file is already available, so mark it as downloaded @@ -536,9 +551,35 @@ class RequirementPreparer: assert req.link link = req.link - self._ensure_link_req_src_dir(req, parallel_builds) hashes = self._get_linked_req_hashes(req) + if hashes and req.is_wheel_from_cache: + assert req.download_info is not None + assert link.is_wheel + assert link.is_file + # We need to verify hashes, and we have found the requirement in the cache + # of locally built wheels. + if ( + isinstance(req.download_info.info, ArchiveInfo) + and req.download_info.info.hashes + and hashes.has_one_of(req.download_info.info.hashes) + ): + # At this point we know the requirement was built from a hashable source + # artifact, and we verified that the cache entry's hash of the original + # artifact matches one of the hashes we expect. We don't verify hashes + # against the cached wheel, because the wheel is not the original. + hashes = None + else: + logger.warning( + "The hashes of the source archive found in cache entry " + "don't match, ignoring cached built wheel " + "and re-downloading source." + ) + req.link = req.cached_wheel_source_link + link = req.link + + self._ensure_link_req_src_dir(req, parallel_builds) + if link.is_existing_dir(): local_file = None elif link.url not in self._downloaded: diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index e2353f032..a0ea58bd1 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -108,7 +108,11 @@ class InstallRequirement: # PEP 508 URL requirement link = Link(req.url) self.link = self.original_link = link - self.original_link_is_in_wheel_cache = False + + # When this InstallRequirement is a wheel obtained from the cache of locally + # built wheels, this is the source link corresponding to the cache entry, which + # was used to download and build the cached wheel. + self.cached_wheel_source_link: Optional[Link] = None # Information about the location of the artifact that was downloaded . This # property is guaranteed to be set in resolver results. @@ -437,6 +441,12 @@ class InstallRequirement: return False return self.link.is_wheel + @property + def is_wheel_from_cache(self) -> bool: + # When True, it means that this InstallRequirement is a local wheel file in the + # cache of locally built wheels. + return self.cached_wheel_source_link is not None + # Things valid for sdists @property def unpacked_source_directory(self) -> str: diff --git a/src/pip/_internal/resolution/legacy/resolver.py b/src/pip/_internal/resolution/legacy/resolver.py index 3a561e6db..b17b7e453 100644 --- a/src/pip/_internal/resolution/legacy/resolver.py +++ b/src/pip/_internal/resolution/legacy/resolver.py @@ -431,7 +431,7 @@ class Resolver(BaseResolver): if cache_entry is not None: logger.debug("Using cached wheel link: %s", cache_entry.link) if req.link is req.original_link and cache_entry.persistent: - req.original_link_is_in_wheel_cache = True + req.cached_wheel_source_link = req.link if cache_entry.origin is not None: req.download_info = cache_entry.origin else: diff --git a/src/pip/_internal/resolution/resolvelib/candidates.py b/src/pip/_internal/resolution/resolvelib/candidates.py index e5e9d1fd7..31020e27a 100644 --- a/src/pip/_internal/resolution/resolvelib/candidates.py +++ b/src/pip/_internal/resolution/resolvelib/candidates.py @@ -259,7 +259,7 @@ class LinkCandidate(_InstallRequirementBackedCandidate): version: Optional[CandidateVersion] = None, ) -> None: source_link = link - cache_entry = factory.get_wheel_cache_entry(link, name) + cache_entry = factory.get_wheel_cache_entry(source_link, name) if cache_entry is not None: logger.debug("Using cached wheel link: %s", cache_entry.link) link = cache_entry.link @@ -277,8 +277,10 @@ class LinkCandidate(_InstallRequirementBackedCandidate): ) if cache_entry is not None: + assert ireq.link.is_wheel + assert ireq.link.is_file if cache_entry.persistent and template.link is template.original_link: - ireq.original_link_is_in_wheel_cache = True + ireq.cached_wheel_source_link = source_link if cache_entry.origin is not None: ireq.download_info = cache_entry.origin else: diff --git a/src/pip/_internal/resolution/resolvelib/factory.py b/src/pip/_internal/resolution/resolvelib/factory.py index 0ad4641b1..0331297b8 100644 --- a/src/pip/_internal/resolution/resolvelib/factory.py +++ b/src/pip/_internal/resolution/resolvelib/factory.py @@ -535,7 +535,7 @@ class Factory: hash mismatches. Furthermore, cached wheels at present have nondeterministic contents due to file modification times. """ - if self._wheel_cache is None or self.preparer.require_hashes: + if self._wheel_cache is None: return None return self._wheel_cache.get_cache_entry( link=link, diff --git a/src/pip/_internal/utils/hashes.py b/src/pip/_internal/utils/hashes.py index 76727306a..843cffc6b 100644 --- a/src/pip/_internal/utils/hashes.py +++ b/src/pip/_internal/utils/hashes.py @@ -105,6 +105,13 @@ class Hashes: with open(path, "rb") as file: return self.check_against_file(file) + def has_one_of(self, hashes: Dict[str, str]) -> bool: + """Return whether any of the given hashes are allowed.""" + for hash_name, hex_digest in hashes.items(): + if self.is_hash_allowed(hash_name, hex_digest): + return True + return False + def __bool__(self) -> bool: """Return whether I know any known-good hashes.""" return bool(self._allowed) |