From 078e0effb72b1078bab3d268aa5b4e374505e18a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Gia=20Phong?= Date: Sun, 9 Aug 2020 22:44:20 +0700 Subject: Add memoization mechanism for file download This is intentionally dependent from caching, which relies on cache dir. --- news/a3a2b1b7-744e-4533-b3ff-6e7a1843d573.trivial | 0 src/pip/_internal/network/download.py | 9 ++++- src/pip/_internal/operations/prepare.py | 43 +++++++++++++--------- .../_internal/resolution/resolvelib/resolver.py | 6 ++- tests/unit/test_operations_prepare.py | 2 +- 5 files changed, 39 insertions(+), 21 deletions(-) create mode 100644 news/a3a2b1b7-744e-4533-b3ff-6e7a1843d573.trivial diff --git a/news/a3a2b1b7-744e-4533-b3ff-6e7a1843d573.trivial b/news/a3a2b1b7-744e-4533-b3ff-6e7a1843d573.trivial new file mode 100644 index 000000000..e69de29bb diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py index a4d4bd2a5..0eb4fd9ce 100644 --- a/src/pip/_internal/network/download.py +++ b/src/pip/_internal/network/download.py @@ -151,8 +151,9 @@ class Downloader(object): self._session = session self._progress_bar = progress_bar - def __call__(self, link, location): + def download_one(self, link, location): # type: (Link, str) -> Tuple[str, str] + """Download the file given by link into location.""" try: resp = _http_get_download(self._session, link) except NetworkConnectionError as e: @@ -168,3 +169,9 @@ class Downloader(object): for chunk in chunks: content_file.write(chunk) return content_file.name, resp.headers.get('Content-Type', '') + + def download_many(self, links, location): + # type: (Iterable[Link], str) -> Iterable[Tuple[str, Tuple[str, str]]] + """Download the files given by links into location.""" + for link in links: + yield link.url, self.download_one(link, location) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index e4de1be4a..5fdbd674b 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -45,7 +45,7 @@ from pip._internal.utils.unpacking import unpack_file from pip._internal.vcs import vcs if MYPY_CHECK_RUNNING: - from typing import Callable, List, Optional + from typing import Callable, Dict, List, Optional, Tuple from mypy_extensions import TypedDict from pip._vendor.pkg_resources import Distribution @@ -130,7 +130,7 @@ def get_http_url( content_type = mimetypes.guess_type(from_path)[0] else: # let's download to a tmp dir - from_path, content_type = downloader(link, temp_dir.path) + from_path, content_type = downloader.download_one(link, temp_dir.path) if hashes: hashes.check_against_path(from_path) @@ -352,6 +352,9 @@ class RequirementPreparer(object): # Should wheels be downloaded lazily? self.use_lazy_wheel = lazy_wheel + # Memoized downloaded files, as mapping of url: (path, mime type) + self._downloaded = {} # type: Dict[str, Tuple[str, str]] + @property def _download_should_save(self): # type: () -> bool @@ -480,12 +483,15 @@ class RequirementPreparer(object): return wheel_dist return self._prepare_linked_requirement(req, parallel_builds) - def prepare_linked_requirement_more(self, req, parallel_builds=False): - # type: (InstallRequirement, bool) -> None + def prepare_linked_requirements_more(self, reqs, parallel_builds=False): + # type: (List[InstallRequirement], bool) -> None """Prepare a linked requirement more, if needed.""" - if not req.needs_more_preparation: - return - self._prepare_linked_requirement(req, parallel_builds) + # Let's download to a temporary directory. + tmpdir = TempDirectory(kind="unpack", globally_managed=True).path + links = (req.link for req in reqs) + self._downloaded.update(self.downloader.download_many(links, tmpdir)) + for req in reqs: + self._prepare_linked_requirement(req, parallel_builds) def _prepare_linked_requirement(self, req, parallel_builds): # type: (InstallRequirement, bool) -> Distribution @@ -499,16 +505,19 @@ class RequirementPreparer(object): with indent_log(): self._ensure_link_req_src_dir(req, download_dir, parallel_builds) - try: - local_file = unpack_url( - link, req.source_dir, self.downloader, download_dir, - hashes=self._get_linked_req_hashes(req) - ) - except NetworkConnectionError as exc: - raise InstallationError( - 'Could not install requirement {} because of HTTP ' - 'error {} for URL {}'.format(req, exc, link) - ) + if link.url in self._downloaded: + local_file = File(*self._downloaded[link.url]) + else: + try: + local_file = unpack_url( + link, req.source_dir, self.downloader, download_dir, + hashes=self._get_linked_req_hashes(req) + ) + except NetworkConnectionError as exc: + raise InstallationError( + 'Could not install requirement {} because of HTTP ' + 'error {} for URL {}'.format(req, exc, link) + ) # For use in later processing, preserve the file path on the # requirement. diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index fde86413d..1cabe236d 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -160,8 +160,10 @@ class Resolver(BaseResolver): req_set.add_named_requirement(ireq) - for actual_req in req_set.all_requirements: - self.factory.preparer.prepare_linked_requirement_more(actual_req) + self.factory.preparer.prepare_linked_requirements_more([ + req for req in req_set.all_requirements + if req.needs_more_preparation + ]) return req_set diff --git a/tests/unit/test_operations_prepare.py b/tests/unit/test_operations_prepare.py index d2e4d6091..e90eab8d7 100644 --- a/tests/unit/test_operations_prepare.py +++ b/tests/unit/test_operations_prepare.py @@ -79,7 +79,7 @@ def test_download_http_url__no_directory_traversal(mock_raise_for_status, download_dir = tmpdir.joinpath('download') os.mkdir(download_dir) - file_path, content_type = downloader(link, download_dir) + file_path, content_type = downloader.download_one(link, download_dir) # The file should be downloaded to download_dir. actual = os.listdir(download_dir) assert actual == ['out_dir_file'] -- cgit v1.2.1