diff options
author | Jürg Billeter <j@bitron.ch> | 2019-02-18 11:18:10 +0000 |
---|---|---|
committer | Jürg Billeter <j@bitron.ch> | 2019-02-18 11:18:10 +0000 |
commit | ef85e3b295bdec7ad8c0b34e16c56034073d5031 (patch) | |
tree | a1fcdd89fd9601947a20067740d75360c08fe48f | |
parent | c79696febe054e317af89f355646bbb3ae59bc8d (diff) | |
parent | 3832c0d1ff7ec7458e04899873bde574066f7f23 (diff) | |
download | buildstream-ef85e3b295bdec7ad8c0b34e16c56034073d5031.tar.gz |
Merge branch 'jmac/cache_artifacts_with_vdir' into 'master'
Cache artifacts with virtual directories instead of filesystem.
Closes #787
See merge request BuildStream/buildstream!991
-rw-r--r-- | buildstream/_artifactcache.py | 7 | ||||
-rw-r--r-- | buildstream/element.py | 194 | ||||
-rw-r--r-- | buildstream/storage/_casbaseddirectory.py | 36 | ||||
-rw-r--r-- | buildstream/storage/_filebaseddirectory.py | 4 | ||||
-rw-r--r-- | buildstream/storage/directory.py | 6 |
5 files changed, 140 insertions, 107 deletions
diff --git a/buildstream/_artifactcache.py b/buildstream/_artifactcache.py index bc0032bec..b72b20fda 100644 --- a/buildstream/_artifactcache.py +++ b/buildstream/_artifactcache.py @@ -588,13 +588,16 @@ class ArtifactCache(): # # Args: # element (Element): The Element commit an artifact for - # content (str): The element's content directory + # content (Directory): The element's content directory # keys (list): The cache keys to use # def commit(self, element, content, keys): refs = [element.get_artifact_name(key) for key in keys] - self.cas.commit(refs, content) + tree = content._get_digest() + + for ref in refs: + self.cas.set_ref(ref, tree) # diff(): # diff --git a/buildstream/element.py b/buildstream/element.py index b3f4d5518..8fc491204 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -103,6 +103,7 @@ from .types import _KeyStrength, CoreWarnings from .storage.directory import Directory from .storage._filebaseddirectory import FileBasedDirectory +from .storage._casbaseddirectory import CasBasedDirectory from .storage.directory import VirtualDirectoryError @@ -1670,106 +1671,109 @@ class Element(Plugin): cleanup_rootdir() def _cache_artifact(self, rootdir, sandbox, collect): - if collect is not None: - try: - sandbox_vroot = sandbox.get_virtual_directory() - collectvdir = sandbox_vroot.descend(collect.lstrip(os.sep).split(os.sep)) - except VirtualDirectoryError: - # No collect directory existed - collectvdir = None + with self.timed_activity("Caching artifact"): + if collect is not None: + try: + sandbox_vroot = sandbox.get_virtual_directory() + collectvdir = sandbox_vroot.descend(collect.lstrip(os.sep).split(os.sep)) + except VirtualDirectoryError: + # No collect directory existed + collectvdir = None - context = self._get_context() + context = self._get_context() - # Create artifact directory structure - assembledir = os.path.join(rootdir, 'artifact') - filesdir = os.path.join(assembledir, 'files') - logsdir = os.path.join(assembledir, 'logs') - metadir = os.path.join(assembledir, 'meta') - buildtreedir = os.path.join(assembledir, 'buildtree') - os.mkdir(assembledir) - if collect is not None and collectvdir is not None: - os.mkdir(filesdir) - os.mkdir(logsdir) - os.mkdir(metadir) - os.mkdir(buildtreedir) - - # Hard link files from collect dir to files directory - if collect is not None and collectvdir is not None: - collectvdir.export_files(filesdir, can_link=True) - - cache_buildtrees = context.cache_buildtrees - build_success = self.__build_result[0] - - # cache_buildtrees defaults to 'always', as such the - # default behaviour is to attempt to cache them. If only - # caching failed artifact buildtrees, then query the build - # result. Element types without a build-root dir will be cached - # with an empty buildtreedir regardless of this configuration. - - if cache_buildtrees == 'always' or (cache_buildtrees == 'failure' and not build_success): - try: + assemblevdir = CasBasedDirectory(cas_cache=context.artifactcache.cas, ref=None) + logsvdir = assemblevdir.descend("logs", create=True) + metavdir = assemblevdir.descend("meta", create=True) + buildtreevdir = assemblevdir.descend("buildtree", create=True) + + # Create artifact directory structure + assembledir = os.path.join(rootdir, 'artifact') + logsdir = os.path.join(assembledir, 'logs') + metadir = os.path.join(assembledir, 'meta') + os.mkdir(assembledir) + os.mkdir(logsdir) + os.mkdir(metadir) + + if collect is not None and collectvdir is not None: + filesvdir = assemblevdir.descend("files", create=True) + filesvdir.import_files(collectvdir) + + cache_buildtrees = context.cache_buildtrees + build_success = self.__build_result[0] + + # cache_buildtrees defaults to 'always', as such the + # default behaviour is to attempt to cache them. If only + # caching failed artifact buildtrees, then query the build + # result. Element types without a build-root dir will be cached + # with an empty buildtreedir regardless of this configuration. + + if cache_buildtrees == 'always' or (cache_buildtrees == 'failure' and not build_success): sandbox_vroot = sandbox.get_virtual_directory() - sandbox_build_dir = sandbox_vroot.descend( - self.get_variable('build-root').lstrip(os.sep).split(os.sep)) - # Hard link files from build-root dir to buildtreedir directory - sandbox_build_dir.export_files(buildtreedir) - except VirtualDirectoryError: - # Directory could not be found. Pre-virtual - # directory behaviour was to continue silently - # if the directory could not be found. - pass + try: + sandbox_build_dir = sandbox_vroot.descend( + self.get_variable('build-root').lstrip(os.sep).split(os.sep)) + buildtreevdir.import_files(sandbox_build_dir) + except VirtualDirectoryError: + # Directory could not be found. Pre-virtual + # directory behaviour was to continue silently + # if the directory could not be found. + pass + + # Write some logs out to normal directories: logsdir and metadir + # Copy build log + log_filename = context.get_log_filename() + self._build_log_path = os.path.join(logsdir, 'build.log') + if log_filename: + shutil.copyfile(log_filename, self._build_log_path) + + # Store public data + _yaml.dump(_yaml.node_sanitize(self.__dynamic_public), os.path.join(metadir, 'public.yaml')) + + # Store result + build_result_dict = {"success": self.__build_result[0], "description": self.__build_result[1]} + if self.__build_result[2] is not None: + build_result_dict["detail"] = self.__build_result[2] + _yaml.dump(build_result_dict, os.path.join(metadir, 'build-result.yaml')) + + # ensure we have cache keys + self._assemble_done() + + # Store keys.yaml + _yaml.dump(_yaml.node_sanitize({ + 'strong': self._get_cache_key(), + 'weak': self._get_cache_key(_KeyStrength.WEAK), + }), os.path.join(metadir, 'keys.yaml')) + + # Store dependencies.yaml + _yaml.dump(_yaml.node_sanitize({ + e.name: e._get_cache_key() for e in self.dependencies(Scope.BUILD) + }), os.path.join(metadir, 'dependencies.yaml')) + + # Store workspaced.yaml + _yaml.dump(_yaml.node_sanitize({ + 'workspaced': bool(self._get_workspace()) + }), os.path.join(metadir, 'workspaced.yaml')) + + # Store workspaced-dependencies.yaml + _yaml.dump(_yaml.node_sanitize({ + 'workspaced-dependencies': [ + e.name for e in self.dependencies(Scope.BUILD) + if e._get_workspace() + ] + }), os.path.join(metadir, 'workspaced-dependencies.yaml')) - # Copy build log - log_filename = context.get_log_filename() - self._build_log_path = os.path.join(logsdir, 'build.log') - if log_filename: - shutil.copyfile(log_filename, self._build_log_path) - - # Store public data - _yaml.dump(_yaml.node_sanitize(self.__dynamic_public), os.path.join(metadir, 'public.yaml')) - - # Store result - build_result_dict = {"success": self.__build_result[0], "description": self.__build_result[1]} - if self.__build_result[2] is not None: - build_result_dict["detail"] = self.__build_result[2] - _yaml.dump(build_result_dict, os.path.join(metadir, 'build-result.yaml')) - - # ensure we have cache keys - self._assemble_done() - - # Store keys.yaml - _yaml.dump(_yaml.node_sanitize({ - 'strong': self._get_cache_key(), - 'weak': self._get_cache_key(_KeyStrength.WEAK), - }), os.path.join(metadir, 'keys.yaml')) - - # Store dependencies.yaml - _yaml.dump(_yaml.node_sanitize({ - e.name: e._get_cache_key() for e in self.dependencies(Scope.BUILD) - }), os.path.join(metadir, 'dependencies.yaml')) - - # Store workspaced.yaml - _yaml.dump(_yaml.node_sanitize({ - 'workspaced': bool(self._get_workspace()) - }), os.path.join(metadir, 'workspaced.yaml')) - - # Store workspaced-dependencies.yaml - _yaml.dump(_yaml.node_sanitize({ - 'workspaced-dependencies': [ - e.name for e in self.dependencies(Scope.BUILD) - if e._get_workspace() - ] - }), os.path.join(metadir, 'workspaced-dependencies.yaml')) + metavdir.import_files(metadir) + logsvdir.import_files(logsdir) - with self.timed_activity("Caching artifact"): - artifact_size = utils._get_dir_size(assembledir) - self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit()) - - if collect is not None and collectvdir is None: - raise ElementError( - "Directory '{}' was not found inside the sandbox, " - "unable to collect artifact contents" - .format(collect)) + artifact_size = assemblevdir.get_size() + self.__artifacts.commit(self, assemblevdir, self.__get_cache_keys_for_commit()) + + if collect is not None and collectvdir is None: + raise ElementError( + "Directory '{}' was not found inside the sandbox, " + "unable to collect artifact contents" + .format(collect)) return artifact_size diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py index 96a7aa79d..0ff7ea80b 100644 --- a/buildstream/storage/_casbaseddirectory.py +++ b/buildstream/storage/_casbaseddirectory.py @@ -136,10 +136,10 @@ class CasBasedDirectory(Directory): the parent). """ - self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString()) if caller: old_dir = self._find_pb2_entry(caller.filename) self.cas_cache.add_object(digest=old_dir.digest, buffer=caller.pb2_directory.SerializeToString()) + self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString()) if self.parent: self.parent._recalculate_recursing_up(self) @@ -277,14 +277,6 @@ class CasBasedDirectory(Directory): directory_list)) return None - def find_root(self): - """ Finds the root of this directory tree by following 'parent' until there is - no parent. """ - if self.parent: - return self.parent.find_root() - else: - return self - def _check_replacement(self, name, path_prefix, fileListResult): """ Checks whether 'name' exists, and if so, whether we can overwrite it. If we can, add the name to 'overwritten_files' and delete the existing entry. @@ -451,7 +443,7 @@ class CasBasedDirectory(Directory): files = external_pathspec.list_relative_paths() if isinstance(external_pathspec, FileBasedDirectory): - source_directory = external_pathspec.get_underlying_directory() + source_directory = external_pathspec._get_underlying_directory() result = self._import_files_from_directory(source_directory, files=files) elif isinstance(external_pathspec, str): source_directory = external_pathspec @@ -635,6 +627,18 @@ class CasBasedDirectory(Directory): self._recalculate_recursing_up() self._recalculate_recursing_down() + def get_size(self): + total = len(self.pb2_directory.SerializeToString()) + for i in self.index.values(): + if isinstance(i.buildstream_object, CasBasedDirectory): + total += i.buildstream_object.get_size() + elif isinstance(i.pb_object, remote_execution_pb2.FileNode): + src_name = self.cas_cache.objpath(i.pb_object.digest) + filesize = os.stat(src_name).st_size + total += filesize + # Symlink nodes are encoded as part of the directory serialization. + return total + def _get_identifier(self): path = "" if self.parent: @@ -653,3 +657,15 @@ class CasBasedDirectory(Directory): throw an exception. """ raise VirtualDirectoryError("_get_underlying_directory was called on a CAS-backed directory," + " which has no underlying directory.") + + # _get_digest(): + # + # Return the Digest for this directory. + # + # Returns: + # (Digest): The Digest protobuf object for the Directory protobuf + # + def _get_digest(self): + if not self.ref: + self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString()) + return self.ref diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py index b919413f0..0752a0e05 100644 --- a/buildstream/storage/_filebaseddirectory.py +++ b/buildstream/storage/_filebaseddirectory.py @@ -30,6 +30,7 @@ See also: :ref:`sandboxing`. import os import time from .directory import Directory, VirtualDirectoryError +from .. import utils from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp from ..utils import _set_deterministic_user, _set_deterministic_mtime @@ -201,6 +202,9 @@ class FileBasedDirectory(Directory): return list_relative_paths(self.external_directory) + def get_size(self): + return utils._get_dir_size(self.external_directory) + def __str__(self): # This returns the whole path (since we don't know where the directory started) # which exposes the sandbox directory; we will have to assume for the time being diff --git a/buildstream/storage/directory.py b/buildstream/storage/directory.py index 66b93a7f1..f572257d7 100644 --- a/buildstream/storage/directory.py +++ b/buildstream/storage/directory.py @@ -177,3 +177,9 @@ class Directory(): """ raise NotImplementedError() + + def get_size(self): + """ Get an approximation of the storage space in bytes used by this directory + and all files and subdirectories in it. Storage space varies by implementation + and effective space used may be lower than this number due to deduplication. """ + raise NotImplementedError() |