diff options
author | bst-marge-bot <marge-bot@buildstream.build> | 2019-11-18 17:27:14 +0000 |
---|---|---|
committer | bst-marge-bot <marge-bot@buildstream.build> | 2019-11-18 17:27:14 +0000 |
commit | 26670d5dd6d08e0af187f9eeb33413495fbe9fa0 (patch) | |
tree | 5d250304dd047bbc08964fd69f4708545c11ffae | |
parent | 516833fb68af32af80135639190ac7501df1c79f (diff) | |
parent | 764ab58d17ba572a099b092c88ee69e487dbe630 (diff) | |
download | buildstream-26670d5dd6d08e0af187f9eeb33413495fbe9fa0.tar.gz |
Merge branch 'tlater/annihilate_update_state' into 'master'
Remove update_state
Closes #1054
See merge request BuildStream/buildstream!1660
-rw-r--r-- | src/buildstream/_artifactelement.py | 4 | ||||
-rw-r--r-- | src/buildstream/_loader/loader.py | 17 | ||||
-rw-r--r-- | src/buildstream/_pipeline.py | 2 | ||||
-rw-r--r-- | src/buildstream/element.py | 351 | ||||
-rw-r--r-- | src/buildstream/plugins/sources/workspace.py | 13 | ||||
-rw-r--r-- | tests/artifactcache/push.py | 3 |
6 files changed, 254 insertions, 136 deletions
diff --git a/src/buildstream/_artifactelement.py b/src/buildstream/_artifactelement.py index 1c1c5db46..4b6d6dd50 100644 --- a/src/buildstream/_artifactelement.py +++ b/src/buildstream/_artifactelement.py @@ -78,9 +78,9 @@ class ArtifactElement(Element): return cls.__instantiated_artifacts[ref] artifact_element = ArtifactElement(context, ref) - # XXX: We need to call update state as it is responsible for + # XXX: We need to call initialize_state as it is responsible for # initialising an Element/ArtifactElement's Artifact (__artifact) - artifact_element._update_state() + artifact_element._initialize_state() cls.__instantiated_artifacts[ref] = artifact_element for dep_ref in artifact_element.get_dependency_refs(Scope.BUILD): diff --git a/src/buildstream/_loader/loader.py b/src/buildstream/_loader/loader.py index da0c0fb29..3b721d6f2 100644 --- a/src/buildstream/_loader/loader.py +++ b/src/buildstream/_loader/loader.py @@ -33,7 +33,7 @@ from . import loadelement from .loadelement import Dependency, LoadElement from .metaelement import MetaElement from .metasource import MetaSource -from ..types import CoreWarnings +from ..types import CoreWarnings, _KeyStrength from .._message import Message, MessageType @@ -633,7 +633,7 @@ class Loader: raise LoadError("Dependencies are forbidden for 'junction' elements", LoadErrorReason.INVALID_JUNCTION) element = Element._new_from_meta(meta_element) - element._update_state() + element._initialize_state() # If this junction element points to a sub-sub-project, we need to # find loader for that project. @@ -671,8 +671,19 @@ class Loader: else: # Stage sources element._set_required() + + # Note: We use _KeyStrength.WEAK here because junctions + # cannot have dependencies, therefore the keys are + # equivalent. + # + # Since the element has not necessarily been given a + # strong cache key at this point (in a non-strict build + # that is set *after* we complete building/pulling, which + # we haven't yet for this element), + # element._get_cache_key() can fail if used with the + # default _KeyStrength.STRONG. basedir = os.path.join( - self.project.directory, ".bst", "staged-junctions", filename, element._get_cache_key() + self.project.directory, ".bst", "staged-junctions", filename, element._get_cache_key(_KeyStrength.WEAK) ) if not os.path.exists(basedir): os.makedirs(basedir, exist_ok=True) diff --git a/src/buildstream/_pipeline.py b/src/buildstream/_pipeline.py index 0b9ab5f24..a40d89861 100644 --- a/src/buildstream/_pipeline.py +++ b/src/buildstream/_pipeline.py @@ -147,7 +147,7 @@ class Pipeline: for element in self.dependencies(targets, Scope.ALL): # Determine initial element state. if not element._resolved_initial_state: - element._update_state() + element._initialize_state() # We may already have Elements which are cached and have their runtimes # cached, if this is the case, we should immediately notify their reverse diff --git a/src/buildstream/element.py b/src/buildstream/element.py index 5028cc5fa..74fb1a056 100644 --- a/src/buildstream/element.py +++ b/src/buildstream/element.py @@ -1218,64 +1218,78 @@ class Element(Plugin): # (bool): True if cache can be queried # def _can_query_cache(self): - # If build has already been scheduled, we know that the element is - # not cached and thus can allow cache query even if the strict cache key - # is not available yet. - # This special case is required for workspaced elements to prevent - # them from getting blocked in the pull queue. - if self.__assemble_scheduled: - return True - # cache cannot be queried until strict cache key is available return self.__strict_cache_key is not None - # _update_state() - # - # Keep track of element state. Calculate cache keys if possible and - # check whether artifacts are cached. - # - # This must be called whenever the state of an element may have changed. - # - def _update_state(self): - if not self._resolved_initial_state: - self._resolved_initial_state = True - context = self._get_context() - - # Compute and determine consistency of sources + # _initialize_state() + # + # Compute up the elment's initial state. Element state contains + # the following mutable sub-states: + # + # - Source state + # - Artifact cache key + # - Source key + # - Integral component of the cache key + # - Computed as part of the source state + # - Artifact state + # - Cache key + # - Must be known to compute this state + # - Build status + # - Artifact state + # - Must be known before we can decide whether to build + # + # Note that sub-states are dependent on each other, and changes to + # one state will effect changes in the next. + # + # Changes to these states can be caused by numerous things, + # notably jobs executed in sub-processes. Changes are performed by + # invocations of the following methods: + # + # - __update_source_state() + # - Computes the state of all sources of the element. + # - __update_cache_keys() + # - Computes the strong and weak cache keys. + # - _update_artifact_state() + # - Computes the state of the element's artifact using the + # cache key. + # - __schedule_assembly_when_necessary() + # - Schedules assembly of an element, iff its current state + # allows/necessitates it + # - __update_cache_key_non_strict() + # - Sets strict cache keys in non-strict builds + # - Some non-strict build actions can create artifacts + # compatible with strict mode (such as pulling), so + # this needs to be done + # + # When any one of these methods are called and cause a change, + # they will invoke methods that have a potential dependency on + # them, causing the state change to bubble through all potential + # side effects. + # + # *This* method starts the process by invoking + # `__update_source_state()`, which will cause all necessary state + # changes. Other functions should use the appropriate methods and + # only update what they expect to change - this will ensure that + # the minimum amount of work is done. + # + def _initialize_state(self): + assert not self._resolved_initial_state, "_initialize_state() should only be called once" + self._resolved_initial_state = True + + # This will update source state, and for un-initialized + # elements recursively initialize anything else (because it + # will become considered outdated after source state is + # updated). + # + # FIXME: Currently this method may cause recursion through + # `self.__update_strict_cache_key_of_rdeps()`, since this may + # invoke reverse dependencies' cache key updates + # recursively. This is necessary when we update keys after a + # pull/build, however should not occur during initialization + # (since we will eventualyl visit reverse dependencies during + # our initialization anyway). self.__update_source_state() - if self._get_consistency() == Consistency.INCONSISTENT: - # Tracking may still be pending - return - - self.__update_cache_keys() - self.__update_artifact_state() - - # If the element wasn't assembled and isn't scheduled to be assemble, - # or cached, or waiting to be pulled but has an artifact then schedule - # the assembly. - if ( - not self.__assemble_scheduled - and not self.__assemble_done - and self.__artifact - and self._is_required() - and not self._cached() - and not self._pull_pending() - ): - self._schedule_assemble() - - # If a build has been scheduled, we know that the element - # is not cached and can allow cache query even if the strict cache - # key is not available yet. - if self.__can_query_cache_callback is not None: - self.__can_query_cache_callback(self) - self.__can_query_cache_callback = None - - return - - if not context.get_strict(): - self.__update_cache_key_non_strict() - # _get_display_key(): # # Returns cache keys for display purposes @@ -1353,7 +1367,9 @@ class Element(Plugin): self.__tracking_scheduled = False - self._update_state() + # Tracking may change the sources' refs, and therefore the + # source state. We need to update source state. + self.__update_source_state() # _track(): # @@ -1519,7 +1535,12 @@ class Element(Plugin): for dep in self.dependencies(Scope.RUN, recurse=False): dep._set_required() - self._update_state() + # When an element becomes required, it must be assembled for + # the current pipeline. `__schedule_assembly_when_necessary()` + # will abort if some other state prevents it from being built, + # and changes to such states will cause re-scheduling, so this + # is safe. + self.__schedule_assembly_when_necessary() # Callback to the Queue if self.__required_callback is not None: @@ -1556,27 +1577,56 @@ class Element(Plugin): def _artifact_files_required(self): return self.__artifact_files_required - # _schedule_assemble(): + # __should_schedule() + # + # Returns: + # bool - Whether the element can be scheduled for a build. + # + def __should_schedule(self): + # We're processing if we're already scheduled, we've + # finished assembling or if we're waiting to pull. + processing = self.__assemble_scheduled or self.__assemble_done or self._pull_pending() + + # We should schedule a build when + return ( + # We're not processing + not processing + and + # We're required for the current build + self._is_required() + and + # We have figured out the state of our artifact + self.__artifact + and + # And we're not cached yet + not self._cached() + ) + + # __schedule_assembly_when_necessary(): # # This is called in the main process before the element is assembled # in a subprocess. # - def _schedule_assemble(self): - assert not self.__assemble_scheduled + def __schedule_assembly_when_necessary(self): + # FIXME: We could reduce the number of function calls a bit by + # factoring this out of this method (and checking whether we + # should schedule at the calling end). + # + # This would make the code less pretty, but it's a possible + # optimization if we get desperate enough (and we will ;)). + if not self.__should_schedule(): + return + self.__assemble_scheduled = True # Requests artifacts of build dependencies for dep in self.dependencies(Scope.BUILD, recurse=False): dep._set_required() - self._set_required() - - # Invalidate workspace key as the build modifies the workspace directory - workspace = self._get_workspace() - if workspace: - workspace.invalidate_key() - - self._update_state() + # Once we schedule an element for assembly, we know that our + # build dependencies have strong cache keys, so we can update + # our own strong cache key. + self.__update_cache_key_non_strict() # _assemble_done(): # @@ -1597,7 +1647,11 @@ class Element(Plugin): if self.__artifact: self.__artifact.reset_cached() - self._update_state() + # When we're building in non-strict mode, we may have + # assembled everything to this point without a strong cache + # key. Once the element has been assembled, a strong cache key + # can be set, so we do so. + self.__update_cache_key_non_strict() self._update_ready_for_runtime_and_cached() if self._get_workspace() and self._cached_success(): @@ -1829,7 +1883,13 @@ class Element(Plugin): self.__strict_artifact.reset_cached() self.__artifact.reset_cached() - self._update_state() + # We may not have actually pulled an artifact - the pull may + # have failed. We might therefore need to schedule assembly. + self.__schedule_assembly_when_necessary() + # If we've finished pulling, an artifact might now exist + # locally, so we might need to update a non-strict strong + # cache key. + self.__update_cache_key_non_strict() self._update_ready_for_runtime_and_cached() # _pull(): @@ -2395,24 +2455,19 @@ class Element(Plugin): if self.__tracking_scheduled: return + old_consistency = self.__consistency self.__consistency = Consistency.CACHED - workspace = self._get_workspace() - # Special case for workspaces - if workspace: - - # A workspace is considered inconsistent in the case - # that its directory went missing - # - fullpath = workspace.get_absolute_path() - if not os.path.exists(fullpath): - self.__consistency = Consistency.INCONSISTENT - else: + # Determine overall consistency of the element + for source in self.__sources: + # FIXME: It'd be nice to remove this eventually + source._update_state() + self.__consistency = min(self.__consistency, source._get_consistency()) - # Determine overall consistency of the element - for source in self.__sources: - source._update_state() - self.__consistency = min(self.__consistency, source._get_consistency()) + # If the source state changes, our cache key must also change, + # since it contains the source's key. + if old_consistency != self.__consistency: + self.__update_cache_keys() # __can_build_incrementally() # @@ -3088,17 +3143,26 @@ class Element(Plugin): # Note that it does not update *all* cache keys - In non-strict mode, the # strong cache key is updated in __update_cache_key_non_strict() # - # If the cache keys are not stable (i.e. workspace that isn't cached), - # then cache keys are erased. - # Otherwise, the weak and strict cache keys will be calculated if not - # already set. - # The weak cache key is a cache key that doesn't necessarily change when - # its dependencies change, useful for avoiding full rebuilds when one's - # dependencies guarantee stability across versions. - # The strict cache key is a cache key that changes if any build-dependency - # has changed. + # If the element's consistency is Consistency.INCONSISTENT this is + # a no-op (since inconsistent elements cannot have cache keys). + # + # The weak and strict cache keys will be calculated if not already + # set. + # + # The weak cache key is a cache key that doesn't change when its + # runtime dependencies change, useful for avoiding full rebuilds + # when one's dependencies guarantee stability across + # versions. Changes in build dependencies still force a rebuild, + # since those will change the built artifact directly. + # + # The strict cache key is a cache key that changes if any + # dependency has changed. # def __update_cache_keys(self): + if self._get_consistency() == Consistency.INCONSISTENT: + # Tracking may still be pending + return + context = self._get_context() if self.__weak_cache_key is None: @@ -3149,11 +3213,21 @@ class Element(Plugin): self.__can_query_cache_callback(self) self.__can_query_cache_callback = None + # If we've newly calculated a cache key, our artifact's + # current state will also change - after all, we can now find + # a potential existing artifact. + if self.__weak_cache_key is not None or self.__strict_cache_key is not None: + self.__update_artifact_state() + # __update_artifact_state() # # Updates the data involved in knowing about the artifact corresponding # to this element. # + # If the state changes, this will subsequently call + # `self.__schedule_assembly_when_necessary()` to schedule assembly if it becomes + # possible. + # # Element.__update_cache_keys() must be called before this to have # meaningful results, because the element must know its cache key before # it can check whether an artifact exists for that cache key. @@ -3167,6 +3241,7 @@ class Element(Plugin): if not context.get_strict() and not self.__artifact: # We've calculated the weak_key, so instantiate artifact instance member self.__artifact = Artifact(self, context, weak_key=self.__weak_cache_key) + self.__schedule_assembly_when_necessary() if not self.__strict_cache_key: return @@ -3178,6 +3253,9 @@ class Element(Plugin): if context.get_strict(): self.__artifact = self.__strict_artifact + self.__schedule_assembly_when_necessary() + else: + self.__update_cache_key_non_strict() # __update_cache_key_non_strict() # @@ -3227,24 +3305,36 @@ class Element(Plugin): # obtained # def __update_strict_cache_key_of_rdeps(self): - if not self.__updated_strict_cache_keys_of_rdeps: - if self.__runtime_deps_without_strict_cache_key == 0 and self.__strict_cache_key is not None: - self.__updated_strict_cache_keys_of_rdeps = True + if any( + ( + # If we've previously updated these we don't need to do so + # again. + self.__updated_strict_cache_keys_of_rdeps, + # We can't do this until none of *our* rdeps are lacking a + # strict cache key. + not self.__runtime_deps_without_strict_cache_key == 0, + # If we don't have a strict cache key we can't do this either. + self.__strict_cache_key is None, + ) + ): + return - # Notify reverse dependencies - for rdep in self.__reverse_runtime_deps: - rdep.__runtime_deps_without_strict_cache_key -= 1 - assert not rdep.__runtime_deps_without_strict_cache_key < 0 + self.__updated_strict_cache_keys_of_rdeps = True - if rdep.__runtime_deps_without_strict_cache_key == 0: - rdep.__update_strict_cache_key_of_rdeps() + # Notify reverse dependencies + for rdep in self.__reverse_runtime_deps: + rdep.__runtime_deps_without_strict_cache_key -= 1 + assert not rdep.__runtime_deps_without_strict_cache_key < 0 - for rdep in self.__reverse_build_deps: - rdep.__build_deps_without_strict_cache_key -= 1 - assert not rdep.__build_deps_without_strict_cache_key < 0 + if rdep.__runtime_deps_without_strict_cache_key == 0: + rdep.__update_strict_cache_key_of_rdeps() - if rdep.__build_deps_without_strict_cache_key == 0: - rdep._update_state() + for rdep in self.__reverse_build_deps: + rdep.__build_deps_without_strict_cache_key -= 1 + assert not rdep.__build_deps_without_strict_cache_key < 0 + + if rdep.__build_deps_without_strict_cache_key == 0: + rdep.__update_cache_keys() # __update_ready_for_runtime() # @@ -3261,30 +3351,41 @@ class Element(Plugin): # decrementing the appropriate counters. # def __update_ready_for_runtime(self): - if not self.__ready_for_runtime: - if self.__runtime_deps_without_cache_key == 0 and self.__cache_key is not None: - self.__ready_for_runtime = True + if any( + ( + # We're already ready for runtime; no update required + self.__ready_for_runtime, + # If not all our dependencies are ready yet, we can't be ready + # either. + not self.__runtime_deps_without_cache_key == 0, + # If our cache state has not been resolved, we can't be ready. + self.__cache_key is None, + ) + ): + return - # Notify reverse dependencies - for rdep in self.__reverse_runtime_deps: - rdep.__runtime_deps_without_cache_key -= 1 - assert not rdep.__runtime_deps_without_cache_key < 0 + self.__ready_for_runtime = True - # If all of our runtimes have cache keys, we can calculate ours - if rdep.__runtime_deps_without_cache_key == 0: - rdep.__update_ready_for_runtime() + # Notify reverse dependencies + for rdep in self.__reverse_runtime_deps: + rdep.__runtime_deps_without_cache_key -= 1 + assert not rdep.__runtime_deps_without_cache_key < 0 - for rdep in self.__reverse_build_deps: - rdep.__build_deps_without_cache_key -= 1 - assert not rdep.__build_deps_without_cache_key < 0 + # If all of our runtimes have cache keys, we can calculate ours + if rdep.__runtime_deps_without_cache_key == 0: + rdep.__update_ready_for_runtime() + + for rdep in self.__reverse_build_deps: + rdep.__build_deps_without_cache_key -= 1 + assert not rdep.__build_deps_without_cache_key < 0 - if rdep.__build_deps_without_cache_key == 0: - rdep._update_state() + if rdep.__build_deps_without_cache_key == 0: + rdep.__update_cache_keys() - # If the element is cached, and has all of its runtime dependencies cached, - # now that we have the cache key, we are able to notify reverse dependencies - # that the element it ready. This is a likely trigger for workspaced elements. - self._update_ready_for_runtime_and_cached() + # If the element is cached, and has all of its runtime dependencies cached, + # now that we have the cache key, we are able to notify reverse dependencies + # that the element it ready. This is a likely trigger for workspaced elements. + self._update_ready_for_runtime_and_cached() def _overlap_error_detail(f, forbidden_overlap_elements, elements): diff --git a/src/buildstream/plugins/sources/workspace.py b/src/buildstream/plugins/sources/workspace.py index a845fd440..c7d16f685 100644 --- a/src/buildstream/plugins/sources/workspace.py +++ b/src/buildstream/plugins/sources/workspace.py @@ -35,6 +35,8 @@ workspace. The node constructed would be specified as follows: path: /path/to/workspace """ +import os + from buildstream.storage.directory import Directory from buildstream import Source, SourceError, Consistency from buildstream.types import SourceRef @@ -43,7 +45,6 @@ from buildstream.node import MappingNode class WorkspaceSource(Source): # pylint: disable=attribute-defined-outside-init - BST_STAGE_VIRTUAL_DIRECTORY = True BST_KEY_REQUIRES_STAGE = True @@ -81,9 +82,13 @@ class WorkspaceSource(Source): def init_workspace(self, directory: Directory) -> None: raise AssertionError("Attempting to re-open an existing workspace") - def get_consistency(self): - # always return cached state - return Consistency.CACHED + def get_consistency(self) -> Consistency: + if not os.path.exists(self._get_local_path()): + # A workspace is considered inconsistent in the case that + # its directory went missing + return Consistency.INCONSISTENT + else: + return Consistency.CACHED def fetch(self) -> None: # pylint: disable=arguments-differ pass # pragma: nocover diff --git a/tests/artifactcache/push.py b/tests/artifactcache/push.py index 238d5f7ef..7160e05b4 100644 --- a/tests/artifactcache/push.py +++ b/tests/artifactcache/push.py @@ -37,7 +37,8 @@ def _push(cli, cache_dir, project_dir, config_file, target): # as this test does not use the cli frontend. for e in element.dependencies(Scope.ALL): # Determine initial element state. - e._update_state() + if not element._resolved_initial_state: + e._initialize_state() # Manually setup the CAS remotes artifactcache.setup_remotes(use_config=True) |