summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTristan van Berkom <tristan@codethink.co.uk>2020-12-11 19:01:04 +0900
committerTristan van Berkom <tristan@codethink.co.uk>2020-12-13 12:50:38 +0900
commit57293c8b47ca15937df8a5464582b6a58ef10dc7 (patch)
tree0f563148cd0f82d743cab4782d8fea5e2be22167
parente9b2d7ac94caa2fea2345bba656e4b6cd17d4631 (diff)
downloadbuildstream-57293c8b47ca15937df8a5464582b6a58ef10dc7.tar.gz
_stream.py: Pre-emptive pulling of artifact metadata in some cases
In any case that the user has requested artifacts be pulled, and that artifact targets have been specified, pre-emptively pull the target artifact metadata so that the operation can complete coherently. Specifically, this fixes: * bst artifact pull --deps build <artifact name> * bst artifact checkout --deps build --pull <artifact name> In these cases, we cannot know the build dependencies of the artifact if we do not at least try to obtain the toplevel target artfiact(s) first, so we instate a policy to initially download metadata. This introduces a new parameter to the general purpose _load() function, which is called `attempt_artifact_metadata`. When `attempt_artifact_metadata` is specified, we will attempt to download artifact metadata for toplevel specified targets, this way we can derive their dependencies which is needed for some operations. Currently this unfortunately downloads the entire artifact, but should be optimized later on to download only the required metadata.
-rw-r--r--src/buildstream/_stream.py90
1 files changed, 74 insertions, 16 deletions
diff --git a/src/buildstream/_stream.py b/src/buildstream/_stream.py
index 0aff5fb94..3feac677e 100644
--- a/src/buildstream/_stream.py
+++ b/src/buildstream/_stream.py
@@ -150,7 +150,7 @@ class Stream:
selection=_PipelineSelection.NONE,
except_targets=(),
use_artifact_config=False,
- load_artifacts=False
+ load_artifacts=False,
):
with PROFILER.profile(Topics.LOAD_SELECTION, "_".join(t.replace(os.sep, "-") for t in targets)):
target_objects = self._load(
@@ -192,7 +192,7 @@ class Stream:
command=None,
usebuildtree=False,
pull_=False,
- unique_id=None
+ unique_id=None,
):
# Load the Element via the unique_id if given
@@ -438,6 +438,7 @@ class Stream:
use_artifact_config=use_config,
artifact_remote_url=remote,
load_artifacts=True,
+ attempt_artifact_metadata=True,
)
if not self._artifacts.has_fetch_remotes():
@@ -523,10 +524,16 @@ class Stream:
hardlinks=False,
compression="",
pull=False,
- tar=False
+ tar=False,
):
- elements = self._load((target,), selection=selection, use_artifact_config=True, load_artifacts=True)
+ elements = self._load(
+ (target,),
+ selection=selection,
+ use_artifact_config=True,
+ load_artifacts=True,
+ attempt_artifact_metadata=True,
+ )
# self.targets contains a list of the loaded target objects
# if we specify --deps build, Stream._load() will return a list
@@ -731,7 +738,7 @@ class Stream:
except_targets=(),
tar=False,
compression=None,
- include_build_scripts=False
+ include_build_scripts=False,
):
self._check_location_writable(location, force=force, tar=tar)
@@ -1161,7 +1168,7 @@ class Stream:
except_targets: List[str],
*,
rewritable: bool = False,
- valid_artifact_names: bool = False
+ valid_artifact_names: bool = False,
) -> Tuple[List[Element], List[Element], List[Element]]:
names, refs = self._expand_and_classify_targets(targets, valid_artifact_names=valid_artifact_names)
loadable = [names, except_targets]
@@ -1187,20 +1194,32 @@ class Stream:
# Connect to the source and artifact remotes.
#
# Args:
- # artifact_url - The url of the artifact server to connect to.
- # source_url - The url of the source server to connect to.
- # use_artifact_config - Whether to use the artifact config.
- # use_source_config - Whether to use the source config.
+ # artifact_url: The url of the artifact server to connect to.
+ # source_url: The url of the source server to connect to.
+ # use_artifact_config: Whether to use the artifact config.
+ # use_source_config: Whether to use the source config.
+ # reinitialize: Whether to reinitialize from scratch
#
- def _connect_remotes(self, artifact_url: str, source_url: str, use_artifact_config: bool, use_source_config: bool):
+ def _connect_remotes(
+ self,
+ artifact_url: str,
+ source_url: str,
+ use_artifact_config: bool,
+ use_source_config: bool,
+ reinitialize: bool = False,
+ ):
# ArtifactCache.setup_remotes expects all projects to be fully loaded
for project in self._context.get_projects():
project.ensure_fully_loaded()
# Connect to remote caches, this needs to be done before resolving element state
- self._artifacts.setup_remotes(use_config=use_artifact_config, remote_url=artifact_url)
- self._elementsourcescache.setup_remotes(use_config=use_source_config, remote_url=source_url)
- self._sourcecache.setup_remotes(use_config=use_source_config, remote_url=source_url)
+ self._artifacts.setup_remotes(
+ use_config=use_artifact_config, remote_url=artifact_url, reinitialize=reinitialize
+ )
+ self._elementsourcescache.setup_remotes(
+ use_config=use_source_config, remote_url=source_url, reinitialize=reinitialize
+ )
+ self._sourcecache.setup_remotes(use_config=use_source_config, remote_url=source_url, reinitialize=reinitialize)
# _load_tracking()
#
@@ -1272,6 +1291,8 @@ class Stream:
# source_remote_url (str): A remote url for initializing source caches
# dynamic_plan (bool): Require artifacts as needed during the build
# load_artifacts (bool): Whether to load artifacts with artifact names
+ # attempt_artifact_metadata (bool): Whether to attempt to download artifact metadata in
+ # order to deduce build dependencies and reload.
#
# Returns:
# (list of Element): The primary element selection
@@ -1288,7 +1309,8 @@ class Stream:
artifact_remote_url=None,
source_remote_url=None,
dynamic_plan=False,
- load_artifacts=False
+ load_artifacts=False,
+ attempt_artifact_metadata=False,
):
elements, except_elements, artifacts = self._load_elements_from_targets(
targets, except_targets, rewritable=False, valid_artifact_names=load_artifacts
@@ -1305,11 +1327,47 @@ class Stream:
elements = [e for e in elements if e.get_kind() != "junction"]
# Hold on to the targets
- self.targets = elements + artifacts
+ self.targets = elements
# Connect to remote caches, this needs to be done before resolving element state
self._connect_remotes(artifact_remote_url, source_remote_url, use_artifact_config, use_source_config)
+ # In some cases we need to have an actualized artifact, with all of
+ # it's metadata, such that we can derive attributes about the artifact
+ # like it's build dependencies.
+ if artifacts and attempt_artifact_metadata:
+ #
+ # FIXME: We need a semantic here to download only the metadata
+ #
+ for element in artifacts:
+ element._set_required(_Scope.NONE)
+
+ self._scheduler.clear_queues()
+ self._add_queue(PullQueue(self._scheduler))
+ self._enqueue_plan(artifacts)
+ self._run()
+
+ #
+ # After obtaining the metadata for the toplevel specified artifact
+ # targets, we need to reload just the artifacts.
+ #
+ artifact_targets = [e.get_artifact_name() for e in artifacts]
+ _, _, artifacts = self._load_elements_from_targets(
+ artifact_targets, [], rewritable=False, valid_artifact_names=True
+ )
+
+ # FIXME:
+ #
+ # Sadly, we need to reinitialize just because we re-instantiated new projects due to
+ # downloading artifacts - this could be fixed by addressing the awkward structure
+ # of remotes in the asset caches.
+ #
+ self._connect_remotes(
+ artifact_remote_url, source_remote_url, use_artifact_config, use_source_config, reinitialize=True
+ )
+
+ self.targets += artifacts
+
# Now move on to loading primary selection.
#
self._pipeline.resolve_elements(self.targets)