summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNguyễn Gia Phong <mcsinyx@disroot.org>2020-08-09 22:44:20 +0700
committerNguyễn Gia Phong <mcsinyx@disroot.org>2020-08-11 22:39:42 +0700
commit078e0effb72b1078bab3d268aa5b4e374505e18a (patch)
treeba2ace5c9a36eaab85aa1af5c08a4b3e7ee12f82
parente62f16e96938ee24e7a57168b829942526be56e2 (diff)
downloadpip-078e0effb72b1078bab3d268aa5b4e374505e18a.tar.gz
Add memoization mechanism for file download
This is intentionally dependent from caching, which relies on cache dir.
-rw-r--r--news/a3a2b1b7-744e-4533-b3ff-6e7a1843d573.trivial0
-rw-r--r--src/pip/_internal/network/download.py9
-rw-r--r--src/pip/_internal/operations/prepare.py43
-rw-r--r--src/pip/_internal/resolution/resolvelib/resolver.py6
-rw-r--r--tests/unit/test_operations_prepare.py2
5 files changed, 39 insertions, 21 deletions
diff --git a/news/a3a2b1b7-744e-4533-b3ff-6e7a1843d573.trivial b/news/a3a2b1b7-744e-4533-b3ff-6e7a1843d573.trivial
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/news/a3a2b1b7-744e-4533-b3ff-6e7a1843d573.trivial
diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py
index a4d4bd2a5..0eb4fd9ce 100644
--- a/src/pip/_internal/network/download.py
+++ b/src/pip/_internal/network/download.py
@@ -151,8 +151,9 @@ class Downloader(object):
self._session = session
self._progress_bar = progress_bar
- def __call__(self, link, location):
+ def download_one(self, link, location):
# type: (Link, str) -> Tuple[str, str]
+ """Download the file given by link into location."""
try:
resp = _http_get_download(self._session, link)
except NetworkConnectionError as e:
@@ -168,3 +169,9 @@ class Downloader(object):
for chunk in chunks:
content_file.write(chunk)
return content_file.name, resp.headers.get('Content-Type', '')
+
+ def download_many(self, links, location):
+ # type: (Iterable[Link], str) -> Iterable[Tuple[str, Tuple[str, str]]]
+ """Download the files given by links into location."""
+ for link in links:
+ yield link.url, self.download_one(link, location)
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index e4de1be4a..5fdbd674b 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -45,7 +45,7 @@ from pip._internal.utils.unpacking import unpack_file
from pip._internal.vcs import vcs
if MYPY_CHECK_RUNNING:
- from typing import Callable, List, Optional
+ from typing import Callable, Dict, List, Optional, Tuple
from mypy_extensions import TypedDict
from pip._vendor.pkg_resources import Distribution
@@ -130,7 +130,7 @@ def get_http_url(
content_type = mimetypes.guess_type(from_path)[0]
else:
# let's download to a tmp dir
- from_path, content_type = downloader(link, temp_dir.path)
+ from_path, content_type = downloader.download_one(link, temp_dir.path)
if hashes:
hashes.check_against_path(from_path)
@@ -352,6 +352,9 @@ class RequirementPreparer(object):
# Should wheels be downloaded lazily?
self.use_lazy_wheel = lazy_wheel
+ # Memoized downloaded files, as mapping of url: (path, mime type)
+ self._downloaded = {} # type: Dict[str, Tuple[str, str]]
+
@property
def _download_should_save(self):
# type: () -> bool
@@ -480,12 +483,15 @@ class RequirementPreparer(object):
return wheel_dist
return self._prepare_linked_requirement(req, parallel_builds)
- def prepare_linked_requirement_more(self, req, parallel_builds=False):
- # type: (InstallRequirement, bool) -> None
+ def prepare_linked_requirements_more(self, reqs, parallel_builds=False):
+ # type: (List[InstallRequirement], bool) -> None
"""Prepare a linked requirement more, if needed."""
- if not req.needs_more_preparation:
- return
- self._prepare_linked_requirement(req, parallel_builds)
+ # Let's download to a temporary directory.
+ tmpdir = TempDirectory(kind="unpack", globally_managed=True).path
+ links = (req.link for req in reqs)
+ self._downloaded.update(self.downloader.download_many(links, tmpdir))
+ for req in reqs:
+ self._prepare_linked_requirement(req, parallel_builds)
def _prepare_linked_requirement(self, req, parallel_builds):
# type: (InstallRequirement, bool) -> Distribution
@@ -499,16 +505,19 @@ class RequirementPreparer(object):
with indent_log():
self._ensure_link_req_src_dir(req, download_dir, parallel_builds)
- try:
- local_file = unpack_url(
- link, req.source_dir, self.downloader, download_dir,
- hashes=self._get_linked_req_hashes(req)
- )
- except NetworkConnectionError as exc:
- raise InstallationError(
- 'Could not install requirement {} because of HTTP '
- 'error {} for URL {}'.format(req, exc, link)
- )
+ if link.url in self._downloaded:
+ local_file = File(*self._downloaded[link.url])
+ else:
+ try:
+ local_file = unpack_url(
+ link, req.source_dir, self.downloader, download_dir,
+ hashes=self._get_linked_req_hashes(req)
+ )
+ except NetworkConnectionError as exc:
+ raise InstallationError(
+ 'Could not install requirement {} because of HTTP '
+ 'error {} for URL {}'.format(req, exc, link)
+ )
# For use in later processing, preserve the file path on the
# requirement.
diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py
index fde86413d..1cabe236d 100644
--- a/src/pip/_internal/resolution/resolvelib/resolver.py
+++ b/src/pip/_internal/resolution/resolvelib/resolver.py
@@ -160,8 +160,10 @@ class Resolver(BaseResolver):
req_set.add_named_requirement(ireq)
- for actual_req in req_set.all_requirements:
- self.factory.preparer.prepare_linked_requirement_more(actual_req)
+ self.factory.preparer.prepare_linked_requirements_more([
+ req for req in req_set.all_requirements
+ if req.needs_more_preparation
+ ])
return req_set
diff --git a/tests/unit/test_operations_prepare.py b/tests/unit/test_operations_prepare.py
index d2e4d6091..e90eab8d7 100644
--- a/tests/unit/test_operations_prepare.py
+++ b/tests/unit/test_operations_prepare.py
@@ -79,7 +79,7 @@ def test_download_http_url__no_directory_traversal(mock_raise_for_status,
download_dir = tmpdir.joinpath('download')
os.mkdir(download_dir)
- file_path, content_type = downloader(link, download_dir)
+ file_path, content_type = downloader.download_one(link, download_dir)
# The file should be downloaded to download_dir.
actual = os.listdir(download_dir)
assert actual == ['out_dir_file']