From 2c0d7a614de63eacda257fbfc8898bcdb2a79492 Mon Sep 17 00:00:00 2001 From: Jonathan Maw Date: Tue, 28 Aug 2018 16:12:45 +0100 Subject: Make DownloadableFileSource clearly define public and private fields TarSource and RemoteSource now parse url from the config themselves, because they need it, but the url is private. --- buildstream/downloadablefilesource.py | 146 +++++++++++++++++++++------------- buildstream/plugins/sources/deb.py | 2 +- buildstream/plugins/sources/remote.py | 10 ++- buildstream/plugins/sources/tar.py | 8 +- buildstream/plugins/sources/zip.py | 2 +- 5 files changed, 107 insertions(+), 61 deletions(-) diff --git a/buildstream/downloadablefilesource.py b/buildstream/downloadablefilesource.py index 11981c4c2..593db8997 100644 --- a/buildstream/downloadablefilesource.py +++ b/buildstream/downloadablefilesource.py @@ -2,6 +2,13 @@ DownloadableFileSource - Abstract class for downloading files ============================================================= A base abstract class for source implementations which download a file. + +Derived classes must write their own stage() implementation, using the +public APIs exposed in this class. + +Derived classes must also chain up to the parent method in their get_unique_key() +implementations. + """ import os @@ -19,50 +26,54 @@ class DownloadableFileSource(Source): COMMON_CONFIG_KEYS = Source.COMMON_CONFIG_KEYS + ['url', 'ref', 'etag'] + ##################################### + # Implementations of abstract methods + ##################################### + def configure(self, node): - self.original_url = self.node_get_member(node, str, 'url') - self.ref = self.node_get_member(node, str, 'ref', None) - self.url = self.translate_url(self.original_url) - self._warn_deprecated_etag(node) + self.__original_url = self.node_get_member(node, str, 'url') + self.__ref = self.node_get_member(node, str, 'ref', None) + self.__url = self.translate_url(self.__original_url) + self.__warn_deprecated_etag(node) def preflight(self): return def get_unique_key(self): - return [self.original_url, self.ref] + return [self.__original_url, self.__ref] def get_consistency(self): - if self.ref is None: + if self.__ref is None: return Consistency.INCONSISTENT - if os.path.isfile(self._get_mirror_file()): + if os.path.isfile(self.get_mirror_file()): return Consistency.CACHED else: return Consistency.RESOLVED def load_ref(self, node): - self.ref = self.node_get_member(node, str, 'ref', None) - self._warn_deprecated_etag(node) + self.__ref = self.node_get_member(node, str, 'ref', None) + self.__warn_deprecated_etag(node) def get_ref(self): - return self.ref + return self.__ref def set_ref(self, ref, node): - node['ref'] = self.ref = ref + node['ref'] = self.__ref = ref def track(self): # there is no 'track' field in the source to determine what/whether # or not to update refs, because tracking a ref is always a conscious # decision by the user. - with self.timed_activity("Tracking {}".format(self.url), + with self.timed_activity("Tracking {}".format(self.__url), silent_nested=True): - new_ref = self._ensure_mirror() + new_ref = self.ensure_mirror() - if self.ref and self.ref != new_ref: + if self.__ref and self.__ref != new_ref: detail = "When tracking, new ref differs from current ref:\n" \ - + " Tracked URL: {}\n".format(self.url) \ - + " Current ref: {}\n".format(self.ref) \ + + " Tracked URL: {}\n".format(self.__url) \ + + " Current ref: {}\n".format(self.__ref) \ + " New ref: {}\n".format(new_ref) self.warn("Potential man-in-the-middle attack!", detail=detail) @@ -74,49 +85,40 @@ class DownloadableFileSource(Source): # file to be already cached because Source.fetch() will # not be called if the source is already Consistency.CACHED. # - if os.path.isfile(self._get_mirror_file()): + if os.path.isfile(self.get_mirror_file()): return # pragma: nocover # Download the file, raise hell if the sha256sums don't match, # and mirror the file otherwise. - with self.timed_activity("Fetching {}".format(self.url), silent_nested=True): - sha256 = self._ensure_mirror() - if sha256 != self.ref: + with self.timed_activity("Fetching {}".format(self.__url), silent_nested=True): + sha256 = self.ensure_mirror() + if sha256 != self.__ref: raise SourceError("File downloaded from {} has sha256sum '{}', not '{}'!" - .format(self.url, sha256, self.ref)) + .format(self.__url, sha256, self.__ref)) + ################ + # Public methods + ################ - def _warn_deprecated_etag(self, node): - etag = self.node_get_member(node, str, 'etag', None) - if etag: - provenance = self.node_provenance(node, member_name='etag') - self.warn('{} "etag" is deprecated and ignored.'.format(provenance)) + def ensure_mirror(self): + """Downloads from the url and caches it according to its sha256sum - def _get_etag(self, ref): - etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref)) - if os.path.exists(etagfilename): - with open(etagfilename, 'r') as etagfile: - return etagfile.read() - - return None + Returns: + (str): The sha256sum of the mirrored file - def _store_etag(self, ref, etag): - etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref)) - with utils.save_file_atomic(etagfilename) as etagfile: - etagfile.write(etag) - - def _ensure_mirror(self): - # Downloads from the url and caches it according to its sha256sum. + Raises: + :class:`.SourceError` + """ try: with self.tempdir() as td: - default_name = os.path.basename(self.url) - request = urllib.request.Request(self.url) + default_name = os.path.basename(self.__url) + request = urllib.request.Request(self.__url) request.add_header('Accept', '*/*') # We do not use etag in case what we have in cache is # not matching ref in order to be able to recover from # corrupted download. - if self.ref: - etag = self._get_etag(self.ref) + if self.__ref: + etag = self.__get_etag(self.__ref) # Do not re-download the file if the ETag matches. if etag and self.get_consistency() == Consistency.CACHED: @@ -134,17 +136,17 @@ class DownloadableFileSource(Source): shutil.copyfileobj(response, dest) # Make sure url-specific mirror dir exists. - if not os.path.isdir(self._get_mirror_dir()): - os.makedirs(self._get_mirror_dir()) + if not os.path.isdir(self.__get_mirror_dir()): + os.makedirs(self.__get_mirror_dir()) # Store by sha256sum sha256 = utils.sha256sum(local_file) # Even if the file already exists, move the new file over. # In case the old file was corrupted somehow. - os.rename(local_file, self._get_mirror_file(sha256)) + os.rename(local_file, self.get_mirror_file(sha=sha256)) if etag: - self._store_etag(sha256, etag) + self.__store_etag(sha256, etag) return sha256 except urllib.error.HTTPError as e: @@ -152,19 +154,53 @@ class DownloadableFileSource(Source): # 304 Not Modified. # Because we use etag only for matching ref, currently specified ref is what # we would have downloaded. - return self.ref + return self.__ref raise SourceError("{}: Error mirroring {}: {}" - .format(self, self.url, e), temporary=True) from e + .format(self, self.__url, e), temporary=True) from e except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError, ValueError) as e: # Note that urllib.request.Request in the try block may throw a # ValueError for unknown url types, so we handle it here. raise SourceError("{}: Error mirroring {}: {}" - .format(self, self.url, e), temporary=True) from e + .format(self, self.__url, e), temporary=True) from e + + def get_mirror_file(self, *, sha=None): + """Calculates the path to where this source stores the downloaded file - def _get_mirror_dir(self): + Users are expected to read the file this points to when staging their source. + + Returns: + (str): A path to the file the source should be cached at + """ + return os.path.join(self.__get_mirror_dir(), sha or self.__ref) + + ####################### + # Local Private methods + ####################### + + def __get_mirror_dir(self): + # Get the directory this source should store things in, for a given URL return os.path.join(self.get_mirror_directory(), - utils.url_directory_name(self.original_url)) + utils.url_directory_name(self.__original_url)) + + def __warn_deprecated_etag(self, node): + # Warn the user if the 'etag' field is being used + etag = self.node_get_member(node, str, 'etag', None) + if etag: + provenance = self.node_provenance(node, member_name='etag') + self.warn('{} "etag" is deprecated and ignored.'.format(provenance)) - def _get_mirror_file(self, sha=None): - return os.path.join(self._get_mirror_dir(), sha or self.ref) + def __get_etag(self, ref): + # Retrieve the etag's data from disk + etagfilename = os.path.join(self.__get_mirror_dir(), '{}.etag'.format(ref)) + if os.path.exists(etagfilename): + with open(etagfilename, 'r') as etagfile: + return etagfile.read() + + return None + + def __store_etag(self, ref, etag): + # Write the etag's data to disk + etagfilename = os.path.join(self.__get_mirror_dir(), '{}.etag'.format(ref)) + with utils.save_file_atomic(etagfilename) as etagfile: + etagfile.write(etag) diff --git a/buildstream/plugins/sources/deb.py b/buildstream/plugins/sources/deb.py index 1b7dafd31..b62a976da 100644 --- a/buildstream/plugins/sources/deb.py +++ b/buildstream/plugins/sources/deb.py @@ -68,7 +68,7 @@ class DebSource(TarSource): @contextmanager def _get_tar(self): - with open(self._get_mirror_file(), 'rb') as deb_file: + with open(self.get_mirror_file(), 'rb') as deb_file: arpy_archive = arpy.Archive(fileobj=deb_file) arpy_archive.read_all_headers() data_tar_arpy = [v for k, v in arpy_archive.archived_files.items() if b"data.tar" in k][0] diff --git a/buildstream/plugins/sources/remote.py b/buildstream/plugins/sources/remote.py index c296d3158..fbfff9056 100644 --- a/buildstream/plugins/sources/remote.py +++ b/buildstream/plugins/sources/remote.py @@ -61,7 +61,13 @@ class RemoteSource(DownloadableFileSource): def configure(self, node): super().configure(node) - self.filename = self.node_get_member(node, str, 'filename', os.path.basename(self.url)) + self.filename = self.node_get_member(node, str, 'filename', None) + if not self.filename: + # url in DownloadableFileSource is private, so we read it again + original_url = self.node_get_member(node, str, 'url') + url = self.translate_url(original_url) + self.filename = os.path.basename(url) + self.executable = self.node_get_member(node, bool, 'executable', False) if os.sep in self.filename: @@ -78,7 +84,7 @@ class RemoteSource(DownloadableFileSource): dest = os.path.join(directory, self.filename) with self.timed_activity("Staging remote file to {}".format(dest)): - utils.safe_copy(self._get_mirror_file(), dest) + utils.safe_copy(self.get_mirror_file(), dest) # To prevent user's umask introducing variability here, explicitly set # file modes. diff --git a/buildstream/plugins/sources/tar.py b/buildstream/plugins/sources/tar.py index 75219dc89..667b28379 100644 --- a/buildstream/plugins/sources/tar.py +++ b/buildstream/plugins/sources/tar.py @@ -71,6 +71,10 @@ class TarSource(DownloadableFileSource): def configure(self, node): super().configure(node) + # url in DownloadableFileSource is private, so we read it again + original_url = self.node_get_member(node, str, 'url') + self.url = self.translate_url(original_url) + self.base_dir = self.node_get_member(node, str, 'base-dir', '*') or None self.node_validate(node, DownloadableFileSource.COMMON_CONFIG_KEYS + ['base-dir']) @@ -87,7 +91,7 @@ class TarSource(DownloadableFileSource): def _run_lzip(self): assert self.host_lzip with TemporaryFile() as lzip_stdout: - with open(self._get_mirror_file(), 'r') as lzip_file: + with open(self.get_mirror_file(), 'r') as lzip_file: self.call([self.host_lzip, '-d'], stdin=lzip_file, stdout=lzip_stdout) @@ -102,7 +106,7 @@ class TarSource(DownloadableFileSource): with tarfile.open(fileobj=lzip_dec, mode='r:') as tar: yield tar else: - with tarfile.open(self._get_mirror_file()) as tar: + with tarfile.open(self.get_mirror_file()) as tar: yield tar def stage(self, directory): diff --git a/buildstream/plugins/sources/zip.py b/buildstream/plugins/sources/zip.py index dafd6f500..5773e0dba 100644 --- a/buildstream/plugins/sources/zip.py +++ b/buildstream/plugins/sources/zip.py @@ -83,7 +83,7 @@ class ZipSource(DownloadableFileSource): noexec_rights = exec_rights & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) try: - with zipfile.ZipFile(self._get_mirror_file()) as archive: + with zipfile.ZipFile(self.get_mirror_file()) as archive: base_dir = None if self.base_dir: base_dir = self._find_base_dir(archive, self.base_dir) -- cgit v1.2.1