From 30769028860da65af8967250cdf7679bbcc0c7b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Tue, 26 Mar 2019 09:43:16 +0100 Subject: element.py: Drop remote execution fallback for incompatible plugins Element plugins will be required to support virtual directories before the next major version. Drop support for fallback from remote to local execution as this blocks partial CAS. --- buildstream/element.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/buildstream/element.py b/buildstream/element.py index 5c28b4753..0dbb91148 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -2348,7 +2348,7 @@ class Element(Plugin): # supports it. # def __use_remote_execution(self): - return self.__remote_execution_specs and self.BST_VIRTUAL_DIRECTORY + return bool(self.__remote_execution_specs) # __sandbox(): # @@ -2376,6 +2376,11 @@ class Element(Plugin): if directory is not None and allow_remote and self.__use_remote_execution(): + if not self.BST_VIRTUAL_DIRECTORY: + raise ElementError("Element {} is configured to use remote execution but plugin does not support it." + .format(self.name), detail="Plugin '{kind}' does not support virtual directories." + .format(kind=self.get_kind())) + self.info("Using a remote sandbox for artifact {} with directory '{}'".format(self.name, directory)) sandbox = SandboxRemote(context, project, @@ -2390,12 +2395,6 @@ class Element(Plugin): yield sandbox elif directory is not None and os.path.exists(directory): - if allow_remote and self.__remote_execution_specs: - self.warn("Artifact {} is configured to use remote execution but element plugin does not support it." - .format(self.name), detail="Element plugin '{kind}' does not support virtual directories." - .format(kind=self.get_kind()), warning_token="remote-failure") - - self.info("Falling back to local sandbox for artifact {}".format(self.name)) sandbox = platform.create_sandbox(context, project, directory, -- cgit v1.2.1 From b1cbd2a704b7d66143658466f7dcef1809d10d61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Tue, 26 Mar 2019 09:14:42 +0100 Subject: element.py: Add _set_artifact_files_required() method This will be used to allow build sessions with a partial local CAS when using remote execution. --- buildstream/element.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/buildstream/element.py b/buildstream/element.py index 0dbb91148..d4c4aa0b1 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -227,6 +227,7 @@ class Element(Plugin): self.__staged_sources_directory = None # Location where Element.stage_sources() was called self.__tainted = None # Whether the artifact is tainted and should not be shared self.__required = False # Whether the artifact is required in the current session + self.__artifact_files_required = False # Whether artifact files are required in the local cache self.__build_result = None # The result of assembling this Element (success, description, detail) self._build_log_path = None # The path of the build log for this Element self.__artifact = Artifact(self, context) # Artifact class for direct artifact composite interaction @@ -1556,6 +1557,29 @@ class Element(Plugin): def _is_required(self): return self.__required + # _set_artifact_files_required(): + # + # Mark artifact files for this element and its runtime dependencies as + # required in the local cache. + # + def _set_artifact_files_required(self): + if self.__artifact_files_required: + # Already done + return + + self.__artifact_files_required = True + + # Request artifact files of runtime dependencies + for dep in self.dependencies(Scope.RUN, recurse=False): + dep._set_artifact_files_required() + + # _artifact_files_required(): + # + # Returns whether artifact files for this element have been marked as required. + # + def _artifact_files_required(self): + return self.__artifact_files_required + # _schedule_assemble(): # # This is called in the main process before the element is assembled -- cgit v1.2.1 From 61d328e3c5797a2e6bd0e339ca6881097337c933 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Tue, 26 Mar 2019 09:18:00 +0100 Subject: _artifact.py: Use Element._artifact_files_required() in cached() Ensure required artifact files are available for elements that need them even if the context does not require artifact files for all elements. --- buildstream/_artifact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildstream/_artifact.py b/buildstream/_artifact.py index 6d26eb392..ba9c626fc 100644 --- a/buildstream/_artifact.py +++ b/buildstream/_artifact.py @@ -469,7 +469,7 @@ class Artifact(): # Determine whether directories are required require_directories = context.require_artifact_directories # Determine whether file contents are required as well - require_files = context.require_artifact_files + require_files = context.require_artifact_files or self._element._artifact_files_required() filesdigest = vdir._get_child_digest('files') -- cgit v1.2.1 From 4823256b66a97b2136b83ab6aa0ae4850d99b266 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Tue, 26 Mar 2019 09:49:34 +0100 Subject: _stream.py: Suport partial CAS for build sessions with remote execution --- buildstream/_stream.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/buildstream/_stream.py b/buildstream/_stream.py index 73bc4badd..64a578c92 100644 --- a/buildstream/_stream.py +++ b/buildstream/_stream.py @@ -247,6 +247,13 @@ class Stream(): # Assert that the elements we're not going to track are consistent self._pipeline.assert_consistent(elements) + if all(project.remote_execution_specs for project in self._context.get_projects()): + # Remote execution is configured for all projects. + # Require artifact files only for target elements and their runtime dependencies. + self._context.set_artifact_files_optional() + for element in self.targets: + element._set_artifact_files_required() + # Now construct the queues # track_queue = None -- cgit v1.2.1 From 6168faf1a8bc66994c8f5b67ea2871f596451416 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Thu, 28 Mar 2019 11:01:18 +0100 Subject: element.py: Do not pull file contents if not required --- buildstream/element.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/buildstream/element.py b/buildstream/element.py index d4c4aa0b1..b63cc3a4f 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -2983,6 +2983,11 @@ class Element(Plugin): subdir = "buildtree" excluded_subdirs.remove(subdir) + # If file contents are not required for this element, don't pull them. + # The directories themselves will always be pulled. + if not context.require_artifact_files and not self._artifact_files_required(): + excluded_subdirs.append("files") + return (subdir, excluded_subdirs) # __cache_sources(): -- cgit v1.2.1 From 417a9fb231abac5c85e57b8216fe2d6012740045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 1 Apr 2019 11:57:41 +0200 Subject: cascache.py: Make _required_blobs() public --- buildstream/_cas/cascache.py | 59 ++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py index eae3ef04d..3a3662aa8 100644 --- a/buildstream/_cas/cascache.py +++ b/buildstream/_cas/cascache.py @@ -276,7 +276,7 @@ class CASCache(): self._fetch_directory(remote, tree) # Fetch files, excluded_subdirs determined in pullqueue - required_blobs = self._required_blobs(tree, excluded_subdirs=excluded_subdirs) + required_blobs = self.required_blobs_for_directory(tree, excluded_subdirs=excluded_subdirs) missing_blobs = self.local_missing_blobs(required_blobs) if missing_blobs: self.fetch_blobs(remote, missing_blobs) @@ -647,7 +647,7 @@ class CASCache(): # Returns: List of missing Digest objects # def remote_missing_blobs_for_directory(self, remote, digest): - required_blobs = self._required_blobs(digest) + required_blobs = self.required_blobs_for_directory(digest) missing_blobs = dict() # Limit size of FindMissingBlobs request @@ -685,6 +685,36 @@ class CASCache(): missing_blobs.append(digest) return missing_blobs + # required_blobs_for_directory(): + # + # Generator that returns the Digests of all blobs in the tree specified by + # the Digest of the toplevel Directory object. + # + def required_blobs_for_directory(self, directory_digest, *, excluded_subdirs=None): + if not excluded_subdirs: + excluded_subdirs = [] + + # parse directory, and recursively add blobs + d = remote_execution_pb2.Digest() + d.hash = directory_digest.hash + d.size_bytes = directory_digest.size_bytes + yield d + + directory = remote_execution_pb2.Directory() + + with open(self.objpath(directory_digest), 'rb') as f: + directory.ParseFromString(f.read()) + + for filenode in directory.files: + d = remote_execution_pb2.Digest() + d.hash = filenode.digest.hash + d.size_bytes = filenode.digest.size_bytes + yield d + + for dirnode in directory.directories: + if dirnode.name not in excluded_subdirs: + yield from self.required_blobs_for_directory(dirnode.digest) + ################################################ # Local Private Methods # ################################################ @@ -881,31 +911,6 @@ class CASCache(): for dirnode in directory.directories: self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime, check_exists=check_exists) - def _required_blobs(self, directory_digest, *, excluded_subdirs=None): - if not excluded_subdirs: - excluded_subdirs = [] - - # parse directory, and recursively add blobs - d = remote_execution_pb2.Digest() - d.hash = directory_digest.hash - d.size_bytes = directory_digest.size_bytes - yield d - - directory = remote_execution_pb2.Directory() - - with open(self.objpath(directory_digest), 'rb') as f: - directory.ParseFromString(f.read()) - - for filenode in directory.files: - d = remote_execution_pb2.Digest() - d.hash = filenode.digest.hash - d.size_bytes = filenode.digest.size_bytes - yield d - - for dirnode in directory.directories: - if dirnode.name not in excluded_subdirs: - yield from self._required_blobs(dirnode.digest) - # _temporary_object(): # # Returns: -- cgit v1.2.1 From 403b41a598160b374219a4c0fbc75f08a1bbb37a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 1 Apr 2019 12:05:34 +0200 Subject: _sandboxremote.py: Fetch output file blobs after pull_tree() This allows skipping file download in pull_tree()/_fetch_tree() and is preparation for conditional output file fetching. --- buildstream/sandbox/_sandboxremote.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/buildstream/sandbox/_sandboxremote.py b/buildstream/sandbox/_sandboxremote.py index ffd53f0d0..46a3db527 100644 --- a/buildstream/sandbox/_sandboxremote.py +++ b/buildstream/sandbox/_sandboxremote.py @@ -277,7 +277,7 @@ class SandboxRemote(Sandbox): cascache = context.get_cascache() casremote = CASRemote(self.storage_remote_spec) - # Now do a pull to ensure we have the necessary parts. + # Now do a pull to ensure we have the full directory structure. dir_digest = cascache.pull_tree(casremote, tree_digest) if dir_digest is None or not dir_digest.hash or not dir_digest.size_bytes: raise SandboxError("Output directory structure pulling from remote failed.") @@ -289,6 +289,14 @@ class SandboxRemote(Sandbox): new_dir = CasBasedDirectory(context.artifactcache.cas, digest=dir_digest) self._set_virtual_directory(new_dir) + # Fetch the file blobs + required_blobs = cascache.required_blobs_for_directory(dir_digest) + local_missing_blobs = cascache.local_missing_blobs(required_blobs) + remote_missing_blobs = cascache.fetch_blobs(casremote, local_missing_blobs) + if remote_missing_blobs: + raise SandboxError("{} output files are missing on the CAS server" + .format(len(remote_missing_blobs))) + def _run(self, command, flags, *, cwd, env): stdout, stderr = self._get_output() -- cgit v1.2.1 From 5780868547efc0db143cb54cbe97d646259331a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 1 Apr 2019 11:40:03 +0200 Subject: cascache.py: Do not fetch files in _fetch_tree() Download them separately as they are not always required. --- buildstream/_cas/cascache.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py index 3a3662aa8..8c64e76d2 100644 --- a/buildstream/_cas/cascache.py +++ b/buildstream/_cas/cascache.py @@ -1047,11 +1047,6 @@ class CASCache(): tree.children.extend([tree.root]) for directory in tree.children: - for filenode in directory.files: - self._ensure_blob(remote, filenode.digest) - - # place directory blob only in final location when we've downloaded - # all referenced blobs to avoid dangling references in the repository dirbuffer = directory.SerializeToString() dirdigest = self.add_object(buffer=dirbuffer) assert dirdigest.size_bytes == len(dirbuffer) -- cgit v1.2.1 From 4ea1ae62a42ebbaa0b8ba9fbee2c1629b2f73b28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 1 Apr 2019 13:26:17 +0200 Subject: cascache.py: Add remote_missing_blobs() method Extracted from remote_missing_blobs_for_directory(). --- buildstream/_cas/cascache.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py index 8c64e76d2..3a621fbb1 100644 --- a/buildstream/_cas/cascache.py +++ b/buildstream/_cas/cascache.py @@ -649,9 +649,21 @@ class CASCache(): def remote_missing_blobs_for_directory(self, remote, digest): required_blobs = self.required_blobs_for_directory(digest) + return self.remote_missing_blobs(remote, required_blobs) + + # remote_missing_blobs(): + # + # Determine which blobs are missing on the remote. + # + # Args: + # blobs (Digest): The directory digest + # + # Returns: List of missing Digest objects + # + def remote_missing_blobs(self, remote, blobs): missing_blobs = dict() # Limit size of FindMissingBlobs request - for required_blobs_group in _grouper(required_blobs, 512): + for required_blobs_group in _grouper(blobs, 512): request = remote_execution_pb2.FindMissingBlobsRequest(instance_name=remote.spec.instance_name) for required_digest in required_blobs_group: -- cgit v1.2.1 From a16a5cfc1b91eb09b22baff63571c88f03381dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 1 Apr 2019 13:28:52 +0200 Subject: _artifactcache.py: Add find_missing_blobs() method --- buildstream/_artifactcache.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/buildstream/_artifactcache.py b/buildstream/_artifactcache.py index fb0670e3e..7a6f2ea0c 100644 --- a/buildstream/_artifactcache.py +++ b/buildstream/_artifactcache.py @@ -436,3 +436,30 @@ class ArtifactCache(BaseCache): if missing_blobs: raise ArtifactError("Blobs not found on configured artifact servers") + + # find_missing_blobs(): + # + # Find missing blobs from configured push remote repositories. + # + # Args: + # project (Project): The current project + # missing_blobs (list): The Digests of the blobs to check + # + # Returns: + # (list): The Digests of the blobs missing on at least one push remote + # + def find_missing_blobs(self, project, missing_blobs): + if not missing_blobs: + return [] + + push_remotes = [r for r in self._remotes[project] if r.spec.push] + + remote_missing_blobs_set = set() + + for remote in push_remotes: + remote.init() + + remote_missing_blobs = self.cas.remote_missing_blobs(remote, missing_blobs) + remote_missing_blobs_set.update(remote_missing_blobs) + + return list(remote_missing_blobs_set) -- cgit v1.2.1 From 18f3a8ad07746cc62319b2520752a078c9cd4dfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 1 Apr 2019 12:19:32 +0200 Subject: _sandboxremote.py: Make output file fetching conditional Output files need to be downloaded only if they are required in the local cache or if artifact push remotes need them. --- buildstream/sandbox/_sandboxremote.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/buildstream/sandbox/_sandboxremote.py b/buildstream/sandbox/_sandboxremote.py index 46a3db527..1219afefc 100644 --- a/buildstream/sandbox/_sandboxremote.py +++ b/buildstream/sandbox/_sandboxremote.py @@ -53,6 +53,8 @@ class SandboxRemote(Sandbox): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self._output_files_required = kwargs.get('output_files_required', True) + config = kwargs['specs'] # This should be a RemoteExecutionSpec if config is None: return @@ -274,7 +276,9 @@ class SandboxRemote(Sandbox): raise SandboxError("Output directory structure had no digest attached.") context = self._get_context() + project = self._get_project() cascache = context.get_cascache() + artifactcache = context.artifactcache casremote = CASRemote(self.storage_remote_spec) # Now do a pull to ensure we have the full directory structure. @@ -289,13 +293,25 @@ class SandboxRemote(Sandbox): new_dir = CasBasedDirectory(context.artifactcache.cas, digest=dir_digest) self._set_virtual_directory(new_dir) - # Fetch the file blobs - required_blobs = cascache.required_blobs_for_directory(dir_digest) - local_missing_blobs = cascache.local_missing_blobs(required_blobs) - remote_missing_blobs = cascache.fetch_blobs(casremote, local_missing_blobs) - if remote_missing_blobs: - raise SandboxError("{} output files are missing on the CAS server" - .format(len(remote_missing_blobs))) + # Fetch the file blobs if needed + if self._output_files_required or artifactcache.has_push_remotes(): + required_blobs = cascache.required_blobs_for_directory(dir_digest) + local_missing_blobs = cascache.local_missing_blobs(required_blobs) + if local_missing_blobs: + if self._output_files_required: + # Fetch all blobs from Remote Execution CAS server + blobs_to_fetch = local_missing_blobs + else: + # Output files are not required in the local cache, + # however, artifact push remotes will need them. + # Only fetch blobs that are missing on one or multiple + # artifact servers. + blobs_to_fetch = artifactcache.find_missing_blobs(project, local_missing_blobs) + + remote_missing_blobs = cascache.fetch_blobs(casremote, blobs_to_fetch) + if remote_missing_blobs: + raise SandboxError("{} output files are missing on the CAS server" + .format(len(remote_missing_blobs))) def _run(self, command, flags, *, cwd, env): stdout, stderr = self._get_output() -- cgit v1.2.1 From 4f04e714a66e7fc550248fe10d9acb11e376c315 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Wed, 3 Apr 2019 11:33:26 +0200 Subject: sandbox.py: Add _set_build_directory() method This will enable SandboxRemote to download the file blobs of the build directory only when needed. --- buildstream/sandbox/sandbox.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/buildstream/sandbox/sandbox.py b/buildstream/sandbox/sandbox.py index 93db2f8ca..c96ccb57b 100644 --- a/buildstream/sandbox/sandbox.py +++ b/buildstream/sandbox/sandbox.py @@ -147,6 +147,8 @@ class Sandbox(): os.makedirs(directory_, exist_ok=True) self._output_directory = None + self._build_directory = None + self._build_directory_always = None self._vdir = None self._usebuildtree = False @@ -592,6 +594,20 @@ class Sandbox(): def _disable_run(self): self.__allow_run = False + # _set_build_directory() + # + # Sets the build directory - the directory which may be preserved as + # buildtree in the artifact. + # + # Args: + # directory (str): An absolute path within the sandbox + # always (bool): True if the build directory should always be downloaded, + # False if it should be downloaded only on failure + # + def _set_build_directory(self, directory, *, always): + self._build_directory = directory + self._build_directory_always = always + # _SandboxBatch() # -- cgit v1.2.1 From d0880afceec608a1a25ba9a73bd624707bcc5d31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 1 Apr 2019 12:21:59 +0200 Subject: element.py: Set SandboxRemote.output_files_required This enables conditional output file fetching with remote execution. --- buildstream/element.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/buildstream/element.py b/buildstream/element.py index b63cc3a4f..b31af1770 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -2407,6 +2407,8 @@ class Element(Plugin): self.info("Using a remote sandbox for artifact {} with directory '{}'".format(self.name, directory)) + output_files_required = context.require_artifact_files or self._artifact_files_required() + sandbox = SandboxRemote(context, project, directory, plugin=self, @@ -2415,7 +2417,8 @@ class Element(Plugin): config=config, specs=self.__remote_execution_specs, bare_directory=bare_directory, - allow_real_directory=False) + allow_real_directory=False, + output_files_required=output_files_required) yield sandbox elif directory is not None and os.path.exists(directory): -- cgit v1.2.1 From 902384aa43fd6ff3ab04503a7df935a7c561c495 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Wed, 3 Apr 2019 16:52:07 +0200 Subject: element.py: Set sandbox build directory Let the sandbox know whether the buildtree will be required. This allows the remote execution sandbox to skip buildtree download when it's not needed. --- buildstream/element.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/buildstream/element.py b/buildstream/element.py index b31af1770..bc8f25cf8 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -1685,6 +1685,15 @@ class Element(Plugin): with _signals.terminator(cleanup_rootdir), \ self.__sandbox(rootdir, output_file, output_file, self.__sandbox_config) as sandbox: # noqa + # Let the sandbox know whether the buildtree will be required. + # This allows the remote execution sandbox to skip buildtree + # download when it's not needed. + buildroot = self.get_variable('build-root') + cache_buildtrees = context.cache_buildtrees + if cache_buildtrees != 'never': + always_cache_buildtrees = cache_buildtrees == 'always' + sandbox._set_build_directory(buildroot, always=always_cache_buildtrees) + if not self.BST_RUN_COMMANDS: # Element doesn't need to run any commands in the sandbox. # -- cgit v1.2.1 From 96cca33a756b5855153e84f719029e80f2fa69c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 1 Apr 2019 15:00:11 +0200 Subject: _sandboxremote.py: Fetch only needed directories --- buildstream/sandbox/_sandboxremote.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/buildstream/sandbox/_sandboxremote.py b/buildstream/sandbox/_sandboxremote.py index 1219afefc..2cb7e2538 100644 --- a/buildstream/sandbox/_sandboxremote.py +++ b/buildstream/sandbox/_sandboxremote.py @@ -29,6 +29,7 @@ import grpc from .. import utils from .._message import Message, MessageType from .sandbox import Sandbox, SandboxCommandError, _SandboxBatch +from ..storage.directory import VirtualDirectoryError from ..storage._casbaseddirectory import CasBasedDirectory from .. import _signals from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc @@ -253,7 +254,7 @@ class SandboxRemote(Sandbox): raise SandboxError("Failed trying to send CancelOperation request: " "{} ({})".format(e.details(), e.code().name)) - def process_job_output(self, output_directories, output_files): + def process_job_output(self, output_directories, output_files, *, failure): # Reads the remote execution server response to an execution request. # # output_directories is an array of OutputDirectory objects. @@ -295,7 +296,23 @@ class SandboxRemote(Sandbox): # Fetch the file blobs if needed if self._output_files_required or artifactcache.has_push_remotes(): - required_blobs = cascache.required_blobs_for_directory(dir_digest) + required_blobs = [] + directories = [] + + directories.append(self._output_directory) + if self._build_directory and (self._build_directory_always or failure): + directories.append(self._build_directory) + + for directory in directories: + try: + vdir = new_dir.descend(*directory.strip(os.sep).split(os.sep)) + dir_digest = vdir._get_digest() + required_blobs += cascache.required_blobs_for_directory(dir_digest) + except VirtualDirectoryError: + # If the directory does not exist, there is no need to + # download file blobs. + pass + local_missing_blobs = cascache.local_missing_blobs(required_blobs) if local_missing_blobs: if self._output_files_required: @@ -401,7 +418,8 @@ class SandboxRemote(Sandbox): action_result = self._extract_action_result(operation) # Get output of build - self.process_job_output(action_result.output_directories, action_result.output_files) + self.process_job_output(action_result.output_directories, action_result.output_files, + failure=action_result.exit_code != 0) if stdout: if action_result.stdout_raw: -- cgit v1.2.1 From ad3a73d4c80e10e44869e20fdcd389e07d59edab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Thu, 28 Mar 2019 15:34:32 +0100 Subject: tests/remoteexecution/partial.py: Add test for partial CAS --- tests/remoteexecution/partial.py | 43 ++++++++++++++++++++++ .../project/elements/no-runtime-deps.bst | 9 +++++ 2 files changed, 52 insertions(+) create mode 100644 tests/remoteexecution/partial.py create mode 100644 tests/remoteexecution/project/elements/no-runtime-deps.bst diff --git a/tests/remoteexecution/partial.py b/tests/remoteexecution/partial.py new file mode 100644 index 000000000..907fc507d --- /dev/null +++ b/tests/remoteexecution/partial.py @@ -0,0 +1,43 @@ +import os +import pytest + +from buildstream._exceptions import ErrorDomain +from buildstream.plugintestutils import cli_remote_execution as cli +from buildstream.plugintestutils.integration import assert_contains + + +pytestmark = pytest.mark.remoteexecution + + +DATA_DIR = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "project" +) + + +# Test that `bst build` does not download file blobs of a build-only dependency +# to the local cache. +@pytest.mark.datafiles(DATA_DIR) +def test_build_dependency_partial_local_cas(cli, datafiles): + project = str(datafiles) + element_name = 'no-runtime-deps.bst' + builddep_element_name = 'autotools/amhello.bst' + checkout = os.path.join(cli.directory, 'checkout') + builddep_checkout = os.path.join(cli.directory, 'builddep-checkout') + + services = cli.ensure_services() + assert set(services) == set(['action-cache', 'execution', 'storage']) + + result = cli.run(project=project, args=['build', element_name]) + result.assert_success() + + # Verify that the target element is available in local cache + result = cli.run(project=project, args=['artifact', 'checkout', element_name, + '--directory', checkout]) + result.assert_success() + assert_contains(checkout, ['/test']) + + # Verify that the build-only dependency is not (complete) in the local cache + result = cli.run(project=project, args=['artifact', 'checkout', builddep_element_name, + '--directory', builddep_checkout]) + result.assert_main_error(ErrorDomain.STREAM, 'uncached-checkout-attempt') diff --git a/tests/remoteexecution/project/elements/no-runtime-deps.bst b/tests/remoteexecution/project/elements/no-runtime-deps.bst new file mode 100644 index 000000000..ca76aaf9f --- /dev/null +++ b/tests/remoteexecution/project/elements/no-runtime-deps.bst @@ -0,0 +1,9 @@ +kind: manual +description: Test element without runtime dependencies + +build-depends: +- autotools/amhello.bst + +config: + install-commands: + - echo Test > %{install-root}/test -- cgit v1.2.1 From 78ed45d07ca77b78d2931d16cfb784a56fe1569d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Thu, 11 Apr 2019 11:26:21 +0200 Subject: cascache.py: Simplify handling of Digest objects Avoid unnecessary copies and use CopyFrom() instead of copying fields one by one where copies are necessary. --- buildstream/_cas/cascache.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py index 3a621fbb1..5f67dc0c1 100644 --- a/buildstream/_cas/cascache.py +++ b/buildstream/_cas/cascache.py @@ -268,9 +268,7 @@ class CASCache(): request.key = ref response = remote.ref_storage.GetReference(request) - tree = remote_execution_pb2.Digest() - tree.hash = response.digest.hash - tree.size_bytes = response.digest.size_bytes + tree = response.digest # Fetch Directory objects self._fetch_directory(remote, tree) @@ -368,8 +366,7 @@ class CASCache(): request = buildstream_pb2.UpdateReferenceRequest(instance_name=remote.spec.instance_name) request.keys.append(ref) - request.digest.hash = tree.hash - request.digest.size_bytes = tree.size_bytes + request.digest.CopyFrom(tree) remote.ref_storage.UpdateReference(request) skipped_remote = False @@ -668,14 +665,12 @@ class CASCache(): for required_digest in required_blobs_group: d = request.blob_digests.add() - d.hash = required_digest.hash - d.size_bytes = required_digest.size_bytes + d.CopyFrom(required_digest) response = remote.cas.FindMissingBlobs(request) for missing_digest in response.missing_blob_digests: d = remote_execution_pb2.Digest() - d.hash = missing_digest.hash - d.size_bytes = missing_digest.size_bytes + d.CopyFrom(missing_digest) missing_blobs[d.hash] = d return missing_blobs.values() @@ -707,10 +702,8 @@ class CASCache(): excluded_subdirs = [] # parse directory, and recursively add blobs - d = remote_execution_pb2.Digest() - d.hash = directory_digest.hash - d.size_bytes = directory_digest.size_bytes - yield d + + yield directory_digest directory = remote_execution_pb2.Directory() @@ -718,10 +711,7 @@ class CASCache(): directory.ParseFromString(f.read()) for filenode in directory.files: - d = remote_execution_pb2.Digest() - d.hash = filenode.digest.hash - d.size_bytes = filenode.digest.size_bytes - yield d + yield filenode.digest for dirnode in directory.directories: if dirnode.name not in excluded_subdirs: -- cgit v1.2.1