summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJürg Billeter <j@bitron.ch>2019-02-18 11:18:10 +0000
committerJürg Billeter <j@bitron.ch>2019-02-18 11:18:10 +0000
commitef85e3b295bdec7ad8c0b34e16c56034073d5031 (patch)
treea1fcdd89fd9601947a20067740d75360c08fe48f
parentc79696febe054e317af89f355646bbb3ae59bc8d (diff)
parent3832c0d1ff7ec7458e04899873bde574066f7f23 (diff)
downloadbuildstream-ef85e3b295bdec7ad8c0b34e16c56034073d5031.tar.gz
Merge branch 'jmac/cache_artifacts_with_vdir' into 'master'
Cache artifacts with virtual directories instead of filesystem. Closes #787 See merge request BuildStream/buildstream!991
-rw-r--r--buildstream/_artifactcache.py7
-rw-r--r--buildstream/element.py194
-rw-r--r--buildstream/storage/_casbaseddirectory.py36
-rw-r--r--buildstream/storage/_filebaseddirectory.py4
-rw-r--r--buildstream/storage/directory.py6
5 files changed, 140 insertions, 107 deletions
diff --git a/buildstream/_artifactcache.py b/buildstream/_artifactcache.py
index bc0032bec..b72b20fda 100644
--- a/buildstream/_artifactcache.py
+++ b/buildstream/_artifactcache.py
@@ -588,13 +588,16 @@ class ArtifactCache():
#
# Args:
# element (Element): The Element commit an artifact for
- # content (str): The element's content directory
+ # content (Directory): The element's content directory
# keys (list): The cache keys to use
#
def commit(self, element, content, keys):
refs = [element.get_artifact_name(key) for key in keys]
- self.cas.commit(refs, content)
+ tree = content._get_digest()
+
+ for ref in refs:
+ self.cas.set_ref(ref, tree)
# diff():
#
diff --git a/buildstream/element.py b/buildstream/element.py
index b3f4d5518..8fc491204 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -103,6 +103,7 @@ from .types import _KeyStrength, CoreWarnings
from .storage.directory import Directory
from .storage._filebaseddirectory import FileBasedDirectory
+from .storage._casbaseddirectory import CasBasedDirectory
from .storage.directory import VirtualDirectoryError
@@ -1670,106 +1671,109 @@ class Element(Plugin):
cleanup_rootdir()
def _cache_artifact(self, rootdir, sandbox, collect):
- if collect is not None:
- try:
- sandbox_vroot = sandbox.get_virtual_directory()
- collectvdir = sandbox_vroot.descend(collect.lstrip(os.sep).split(os.sep))
- except VirtualDirectoryError:
- # No collect directory existed
- collectvdir = None
+ with self.timed_activity("Caching artifact"):
+ if collect is not None:
+ try:
+ sandbox_vroot = sandbox.get_virtual_directory()
+ collectvdir = sandbox_vroot.descend(collect.lstrip(os.sep).split(os.sep))
+ except VirtualDirectoryError:
+ # No collect directory existed
+ collectvdir = None
- context = self._get_context()
+ context = self._get_context()
- # Create artifact directory structure
- assembledir = os.path.join(rootdir, 'artifact')
- filesdir = os.path.join(assembledir, 'files')
- logsdir = os.path.join(assembledir, 'logs')
- metadir = os.path.join(assembledir, 'meta')
- buildtreedir = os.path.join(assembledir, 'buildtree')
- os.mkdir(assembledir)
- if collect is not None and collectvdir is not None:
- os.mkdir(filesdir)
- os.mkdir(logsdir)
- os.mkdir(metadir)
- os.mkdir(buildtreedir)
-
- # Hard link files from collect dir to files directory
- if collect is not None and collectvdir is not None:
- collectvdir.export_files(filesdir, can_link=True)
-
- cache_buildtrees = context.cache_buildtrees
- build_success = self.__build_result[0]
-
- # cache_buildtrees defaults to 'always', as such the
- # default behaviour is to attempt to cache them. If only
- # caching failed artifact buildtrees, then query the build
- # result. Element types without a build-root dir will be cached
- # with an empty buildtreedir regardless of this configuration.
-
- if cache_buildtrees == 'always' or (cache_buildtrees == 'failure' and not build_success):
- try:
+ assemblevdir = CasBasedDirectory(cas_cache=context.artifactcache.cas, ref=None)
+ logsvdir = assemblevdir.descend("logs", create=True)
+ metavdir = assemblevdir.descend("meta", create=True)
+ buildtreevdir = assemblevdir.descend("buildtree", create=True)
+
+ # Create artifact directory structure
+ assembledir = os.path.join(rootdir, 'artifact')
+ logsdir = os.path.join(assembledir, 'logs')
+ metadir = os.path.join(assembledir, 'meta')
+ os.mkdir(assembledir)
+ os.mkdir(logsdir)
+ os.mkdir(metadir)
+
+ if collect is not None and collectvdir is not None:
+ filesvdir = assemblevdir.descend("files", create=True)
+ filesvdir.import_files(collectvdir)
+
+ cache_buildtrees = context.cache_buildtrees
+ build_success = self.__build_result[0]
+
+ # cache_buildtrees defaults to 'always', as such the
+ # default behaviour is to attempt to cache them. If only
+ # caching failed artifact buildtrees, then query the build
+ # result. Element types without a build-root dir will be cached
+ # with an empty buildtreedir regardless of this configuration.
+
+ if cache_buildtrees == 'always' or (cache_buildtrees == 'failure' and not build_success):
sandbox_vroot = sandbox.get_virtual_directory()
- sandbox_build_dir = sandbox_vroot.descend(
- self.get_variable('build-root').lstrip(os.sep).split(os.sep))
- # Hard link files from build-root dir to buildtreedir directory
- sandbox_build_dir.export_files(buildtreedir)
- except VirtualDirectoryError:
- # Directory could not be found. Pre-virtual
- # directory behaviour was to continue silently
- # if the directory could not be found.
- pass
+ try:
+ sandbox_build_dir = sandbox_vroot.descend(
+ self.get_variable('build-root').lstrip(os.sep).split(os.sep))
+ buildtreevdir.import_files(sandbox_build_dir)
+ except VirtualDirectoryError:
+ # Directory could not be found. Pre-virtual
+ # directory behaviour was to continue silently
+ # if the directory could not be found.
+ pass
+
+ # Write some logs out to normal directories: logsdir and metadir
+ # Copy build log
+ log_filename = context.get_log_filename()
+ self._build_log_path = os.path.join(logsdir, 'build.log')
+ if log_filename:
+ shutil.copyfile(log_filename, self._build_log_path)
+
+ # Store public data
+ _yaml.dump(_yaml.node_sanitize(self.__dynamic_public), os.path.join(metadir, 'public.yaml'))
+
+ # Store result
+ build_result_dict = {"success": self.__build_result[0], "description": self.__build_result[1]}
+ if self.__build_result[2] is not None:
+ build_result_dict["detail"] = self.__build_result[2]
+ _yaml.dump(build_result_dict, os.path.join(metadir, 'build-result.yaml'))
+
+ # ensure we have cache keys
+ self._assemble_done()
+
+ # Store keys.yaml
+ _yaml.dump(_yaml.node_sanitize({
+ 'strong': self._get_cache_key(),
+ 'weak': self._get_cache_key(_KeyStrength.WEAK),
+ }), os.path.join(metadir, 'keys.yaml'))
+
+ # Store dependencies.yaml
+ _yaml.dump(_yaml.node_sanitize({
+ e.name: e._get_cache_key() for e in self.dependencies(Scope.BUILD)
+ }), os.path.join(metadir, 'dependencies.yaml'))
+
+ # Store workspaced.yaml
+ _yaml.dump(_yaml.node_sanitize({
+ 'workspaced': bool(self._get_workspace())
+ }), os.path.join(metadir, 'workspaced.yaml'))
+
+ # Store workspaced-dependencies.yaml
+ _yaml.dump(_yaml.node_sanitize({
+ 'workspaced-dependencies': [
+ e.name for e in self.dependencies(Scope.BUILD)
+ if e._get_workspace()
+ ]
+ }), os.path.join(metadir, 'workspaced-dependencies.yaml'))
- # Copy build log
- log_filename = context.get_log_filename()
- self._build_log_path = os.path.join(logsdir, 'build.log')
- if log_filename:
- shutil.copyfile(log_filename, self._build_log_path)
-
- # Store public data
- _yaml.dump(_yaml.node_sanitize(self.__dynamic_public), os.path.join(metadir, 'public.yaml'))
-
- # Store result
- build_result_dict = {"success": self.__build_result[0], "description": self.__build_result[1]}
- if self.__build_result[2] is not None:
- build_result_dict["detail"] = self.__build_result[2]
- _yaml.dump(build_result_dict, os.path.join(metadir, 'build-result.yaml'))
-
- # ensure we have cache keys
- self._assemble_done()
-
- # Store keys.yaml
- _yaml.dump(_yaml.node_sanitize({
- 'strong': self._get_cache_key(),
- 'weak': self._get_cache_key(_KeyStrength.WEAK),
- }), os.path.join(metadir, 'keys.yaml'))
-
- # Store dependencies.yaml
- _yaml.dump(_yaml.node_sanitize({
- e.name: e._get_cache_key() for e in self.dependencies(Scope.BUILD)
- }), os.path.join(metadir, 'dependencies.yaml'))
-
- # Store workspaced.yaml
- _yaml.dump(_yaml.node_sanitize({
- 'workspaced': bool(self._get_workspace())
- }), os.path.join(metadir, 'workspaced.yaml'))
-
- # Store workspaced-dependencies.yaml
- _yaml.dump(_yaml.node_sanitize({
- 'workspaced-dependencies': [
- e.name for e in self.dependencies(Scope.BUILD)
- if e._get_workspace()
- ]
- }), os.path.join(metadir, 'workspaced-dependencies.yaml'))
+ metavdir.import_files(metadir)
+ logsvdir.import_files(logsdir)
- with self.timed_activity("Caching artifact"):
- artifact_size = utils._get_dir_size(assembledir)
- self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit())
-
- if collect is not None and collectvdir is None:
- raise ElementError(
- "Directory '{}' was not found inside the sandbox, "
- "unable to collect artifact contents"
- .format(collect))
+ artifact_size = assemblevdir.get_size()
+ self.__artifacts.commit(self, assemblevdir, self.__get_cache_keys_for_commit())
+
+ if collect is not None and collectvdir is None:
+ raise ElementError(
+ "Directory '{}' was not found inside the sandbox, "
+ "unable to collect artifact contents"
+ .format(collect))
return artifact_size
diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 96a7aa79d..0ff7ea80b 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -136,10 +136,10 @@ class CasBasedDirectory(Directory):
the parent).
"""
- self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
if caller:
old_dir = self._find_pb2_entry(caller.filename)
self.cas_cache.add_object(digest=old_dir.digest, buffer=caller.pb2_directory.SerializeToString())
+ self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
if self.parent:
self.parent._recalculate_recursing_up(self)
@@ -277,14 +277,6 @@ class CasBasedDirectory(Directory):
directory_list))
return None
- def find_root(self):
- """ Finds the root of this directory tree by following 'parent' until there is
- no parent. """
- if self.parent:
- return self.parent.find_root()
- else:
- return self
-
def _check_replacement(self, name, path_prefix, fileListResult):
""" Checks whether 'name' exists, and if so, whether we can overwrite it.
If we can, add the name to 'overwritten_files' and delete the existing entry.
@@ -451,7 +443,7 @@ class CasBasedDirectory(Directory):
files = external_pathspec.list_relative_paths()
if isinstance(external_pathspec, FileBasedDirectory):
- source_directory = external_pathspec.get_underlying_directory()
+ source_directory = external_pathspec._get_underlying_directory()
result = self._import_files_from_directory(source_directory, files=files)
elif isinstance(external_pathspec, str):
source_directory = external_pathspec
@@ -635,6 +627,18 @@ class CasBasedDirectory(Directory):
self._recalculate_recursing_up()
self._recalculate_recursing_down()
+ def get_size(self):
+ total = len(self.pb2_directory.SerializeToString())
+ for i in self.index.values():
+ if isinstance(i.buildstream_object, CasBasedDirectory):
+ total += i.buildstream_object.get_size()
+ elif isinstance(i.pb_object, remote_execution_pb2.FileNode):
+ src_name = self.cas_cache.objpath(i.pb_object.digest)
+ filesize = os.stat(src_name).st_size
+ total += filesize
+ # Symlink nodes are encoded as part of the directory serialization.
+ return total
+
def _get_identifier(self):
path = ""
if self.parent:
@@ -653,3 +657,15 @@ class CasBasedDirectory(Directory):
throw an exception. """
raise VirtualDirectoryError("_get_underlying_directory was called on a CAS-backed directory," +
" which has no underlying directory.")
+
+ # _get_digest():
+ #
+ # Return the Digest for this directory.
+ #
+ # Returns:
+ # (Digest): The Digest protobuf object for the Directory protobuf
+ #
+ def _get_digest(self):
+ if not self.ref:
+ self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
+ return self.ref
diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py
index b919413f0..0752a0e05 100644
--- a/buildstream/storage/_filebaseddirectory.py
+++ b/buildstream/storage/_filebaseddirectory.py
@@ -30,6 +30,7 @@ See also: :ref:`sandboxing`.
import os
import time
from .directory import Directory, VirtualDirectoryError
+from .. import utils
from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp
from ..utils import _set_deterministic_user, _set_deterministic_mtime
@@ -201,6 +202,9 @@ class FileBasedDirectory(Directory):
return list_relative_paths(self.external_directory)
+ def get_size(self):
+ return utils._get_dir_size(self.external_directory)
+
def __str__(self):
# This returns the whole path (since we don't know where the directory started)
# which exposes the sandbox directory; we will have to assume for the time being
diff --git a/buildstream/storage/directory.py b/buildstream/storage/directory.py
index 66b93a7f1..f572257d7 100644
--- a/buildstream/storage/directory.py
+++ b/buildstream/storage/directory.py
@@ -177,3 +177,9 @@ class Directory():
"""
raise NotImplementedError()
+
+ def get_size(self):
+ """ Get an approximation of the storage space in bytes used by this directory
+ and all files and subdirectories in it. Storage space varies by implementation
+ and effective space used may be lower than this number due to deduplication. """
+ raise NotImplementedError()