summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim MacArthur <jim+gitlab@mode7.co.uk>2018-08-01 13:16:04 +0000
committerJim MacArthur <jim+gitlab@mode7.co.uk>2018-08-01 13:16:04 +0000
commiteabc38998ea35f5cd07846e40460b35453a995a8 (patch)
treefb3d5480e8566c79679d19f6874cfe01308ecd40
parentd10e4668e79155c406fbe726be9458261083f3d7 (diff)
parent02c6c84682af1d6ee4a27588105e15c599085850 (diff)
downloadbuildstream-eabc38998ea35f5cd07846e40460b35453a995a8.tar.gz
Merge branch 'jmac/virtual_directories' into 'master'
Abstract directory class and filesystem-backed implementation See merge request BuildStream/buildstream!445
-rw-r--r--buildstream/_exceptions.py1
-rw-r--r--buildstream/_stream.py52
-rw-r--r--buildstream/element.py120
-rw-r--r--buildstream/plugins/elements/compose.py45
-rw-r--r--buildstream/plugins/elements/import.py23
-rw-r--r--buildstream/plugins/elements/stack.py11
-rw-r--r--buildstream/sandbox/_mount.py3
-rw-r--r--buildstream/sandbox/_sandboxbwrap.py4
-rw-r--r--buildstream/sandbox/_sandboxchroot.py2
-rw-r--r--buildstream/sandbox/sandbox.py39
-rw-r--r--buildstream/scriptelement.py9
-rw-r--r--buildstream/storage/__init__.py22
-rw-r--r--buildstream/storage/_filebaseddirectory.py216
-rw-r--r--buildstream/storage/directory.py155
-rw-r--r--buildstream/utils.py18
15 files changed, 554 insertions, 166 deletions
diff --git a/buildstream/_exceptions.py b/buildstream/_exceptions.py
index c86b6780c..e55d942fd 100644
--- a/buildstream/_exceptions.py
+++ b/buildstream/_exceptions.py
@@ -88,6 +88,7 @@ class ErrorDomain(Enum):
ELEMENT = 11
APP = 12
STREAM = 13
+ VIRTUAL_FS = 14
# BstError is an internal base exception class for BuildSream
diff --git a/buildstream/_stream.py b/buildstream/_stream.py
index dcefc64f1..bd4b2ccf0 100644
--- a/buildstream/_stream.py
+++ b/buildstream/_stream.py
@@ -407,15 +407,16 @@ class Stream():
integrate=integrate) as sandbox:
# Copy or move the sandbox to the target directory
- sandbox_root = sandbox.get_directory()
+ sandbox_vroot = sandbox.get_virtual_directory()
+
if not tar:
with target.timed_activity("Checking out files in '{}'"
.format(location)):
try:
if hardlinks:
- self._checkout_hardlinks(sandbox_root, location)
+ self._checkout_hardlinks(sandbox_vroot, location)
else:
- utils.copy_files(sandbox_root, location)
+ sandbox_vroot.export_files(location)
except OSError as e:
raise StreamError("Failed to checkout files: '{}'"
.format(e)) from e
@@ -424,14 +425,12 @@ class Stream():
with target.timed_activity("Creating tarball"):
with os.fdopen(sys.stdout.fileno(), 'wb') as fo:
with tarfile.open(fileobj=fo, mode="w|") as tf:
- Stream._add_directory_to_tarfile(
- tf, sandbox_root, '.')
+ sandbox_vroot.export_to_tar(tf, '.')
else:
with target.timed_activity("Creating tarball '{}'"
.format(location)):
with tarfile.open(location, "w:") as tf:
- Stream._add_directory_to_tarfile(
- tf, sandbox_root, '.')
+ sandbox_vroot.export_to_tar(tf, '.')
except BstError as e:
raise StreamError("Error while staging dependencies into a sandbox"
@@ -1050,46 +1049,13 @@ class Stream():
# Helper function for checkout()
#
- def _checkout_hardlinks(self, sandbox_root, directory):
+ def _checkout_hardlinks(self, sandbox_vroot, directory):
try:
- removed = utils.safe_remove(directory)
+ utils.safe_remove(directory)
except OSError as e:
raise StreamError("Failed to remove checkout directory: {}".format(e)) from e
- if removed:
- # Try a simple rename of the sandbox root; if that
- # doesnt cut it, then do the regular link files code path
- try:
- os.rename(sandbox_root, directory)
- except OSError:
- os.makedirs(directory, exist_ok=True)
- utils.link_files(sandbox_root, directory)
- else:
- utils.link_files(sandbox_root, directory)
-
- # Add a directory entry deterministically to a tar file
- #
- # This function takes extra steps to ensure the output is deterministic.
- # First, it sorts the results of os.listdir() to ensure the ordering of
- # the files in the archive is the same. Second, it sets a fixed
- # timestamp for each entry. See also https://bugs.python.org/issue24465.
- @staticmethod
- def _add_directory_to_tarfile(tf, dir_name, dir_arcname, mtime=0):
- for filename in sorted(os.listdir(dir_name)):
- name = os.path.join(dir_name, filename)
- arcname = os.path.join(dir_arcname, filename)
-
- tarinfo = tf.gettarinfo(name, arcname)
- tarinfo.mtime = mtime
-
- if tarinfo.isreg():
- with open(name, "rb") as f:
- tf.addfile(tarinfo, f)
- elif tarinfo.isdir():
- tf.addfile(tarinfo)
- Stream._add_directory_to_tarfile(tf, name, arcname, mtime)
- else:
- tf.addfile(tarinfo)
+ sandbox_vroot.export_files(directory, can_link=True, can_destroy=True)
# Write the element build script to the given directory
def _write_element_script(self, directory, element):
diff --git a/buildstream/element.py b/buildstream/element.py
index 4260d32a5..2218ef94b 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -80,7 +80,6 @@ from collections import Mapping, OrderedDict
from contextlib import contextmanager
from enum import Enum
import tempfile
-import time
import shutil
from . import _yaml
@@ -97,6 +96,9 @@ from . import _site
from ._platform import Platform
from .sandbox._config import SandboxConfig
+from .storage.directory import Directory
+from .storage._filebaseddirectory import FileBasedDirectory, VirtualDirectoryError
+
# _KeyStrength():
#
@@ -195,6 +197,13 @@ class Element(Plugin):
*Since: 1.2*
"""
+ BST_VIRTUAL_DIRECTORY = False
+ """Whether to raise exceptions if an element uses Sandbox.get_directory
+ instead of Sandbox.get_virtual_directory.
+
+ *Since: 1.4*
+ """
+
def __init__(self, context, project, artifacts, meta, plugin_conf):
self.__cache_key_dict = None # Dict for cache key calculation
@@ -627,10 +636,10 @@ class Element(Plugin):
# Hard link it into the staging area
#
- basedir = sandbox.get_directory()
- stagedir = basedir \
+ vbasedir = sandbox.get_virtual_directory()
+ vstagedir = vbasedir \
if path is None \
- else os.path.join(basedir, path.lstrip(os.sep))
+ else vbasedir.descend(path.lstrip(os.sep).split(os.sep))
files = list(self.__compute_splits(include, exclude, orphans))
@@ -642,15 +651,8 @@ class Element(Plugin):
link_files = files
copy_files = []
- link_result = utils.link_files(artifact, stagedir, files=link_files,
- report_written=True)
- copy_result = utils.copy_files(artifact, stagedir, files=copy_files,
- report_written=True)
-
- cur_time = time.time()
-
- for f in copy_result.files_written:
- os.utime(os.path.join(stagedir, f), times=(cur_time, cur_time))
+ link_result = vstagedir.import_files(artifact, files=link_files, report_written=True, can_link=True)
+ copy_result = vstagedir.import_files(artifact, files=copy_files, report_written=True, update_utimes=True)
return link_result.combine(copy_result)
@@ -1359,40 +1361,45 @@ class Element(Plugin):
sandbox._set_mount_source(directory, workspace.get_absolute_path())
# Stage all sources that need to be copied
- sandbox_root = sandbox.get_directory()
- host_directory = os.path.join(sandbox_root, directory.lstrip(os.sep))
- self._stage_sources_at(host_directory, mount_workspaces=mount_workspaces)
+ sandbox_vroot = sandbox.get_virtual_directory()
+ host_vdirectory = sandbox_vroot.descend(directory.lstrip(os.sep).split(os.sep), create=True)
+ self._stage_sources_at(host_vdirectory, mount_workspaces=mount_workspaces)
# _stage_sources_at():
#
# Stage this element's sources to a directory
#
# Args:
- # directory (str): An absolute path to stage the sources at
+ # vdirectory (:class:`.storage.Directory`): A virtual directory object to stage sources into.
# mount_workspaces (bool): mount workspaces if True, copy otherwise
#
- def _stage_sources_at(self, directory, mount_workspaces=True):
+ def _stage_sources_at(self, vdirectory, mount_workspaces=True):
with self.timed_activity("Staging sources", silent_nested=True):
- if os.path.isdir(directory) and os.listdir(directory):
- raise ElementError("Staging directory '{}' is not empty".format(directory))
-
- workspace = self._get_workspace()
- if workspace:
- # If mount_workspaces is set and we're doing incremental builds,
- # the workspace is already mounted into the sandbox.
- if not (mount_workspaces and self.__can_build_incrementally()):
- with self.timed_activity("Staging local files at {}".format(workspace.path)):
- workspace.stage(directory)
- else:
- # No workspace, stage directly
- for source in self.sources():
- source._stage(directory)
-
+ if not isinstance(vdirectory, Directory):
+ vdirectory = FileBasedDirectory(vdirectory)
+ if not vdirectory.is_empty():
+ raise ElementError("Staging directory '{}' is not empty".format(vdirectory))
+
+ with tempfile.TemporaryDirectory() as temp_staging_directory:
+
+ workspace = self._get_workspace()
+ if workspace:
+ # If mount_workspaces is set and we're doing incremental builds,
+ # the workspace is already mounted into the sandbox.
+ if not (mount_workspaces and self.__can_build_incrementally()):
+ with self.timed_activity("Staging local files at {}".format(workspace.path)):
+ workspace.stage(temp_staging_directory)
+ else:
+ # No workspace, stage directly
+ for source in self.sources():
+ source._stage(temp_staging_directory)
+
+ vdirectory.import_files(temp_staging_directory)
# Ensure deterministic mtime of sources at build time
- utils._set_deterministic_mtime(directory)
+ vdirectory.set_deterministic_mtime()
# Ensure deterministic owners of sources at build time
- utils._set_deterministic_user(directory)
+ vdirectory.set_deterministic_user()
# _set_required():
#
@@ -1508,7 +1515,7 @@ class Element(Plugin):
with _signals.terminator(cleanup_rootdir), \
self.__sandbox(rootdir, output_file, output_file, self.__sandbox_config) as sandbox: # nopep8
- sandbox_root = sandbox.get_directory()
+ sandbox_vroot = sandbox.get_virtual_directory()
# By default, the dynamic public data is the same as the static public data.
# The plugin's assemble() method may modify this, though.
@@ -1540,11 +1547,11 @@ class Element(Plugin):
#
workspace = self._get_workspace()
if workspace and self.__staged_sources_directory:
- sandbox_root = sandbox.get_directory()
- sandbox_path = os.path.join(sandbox_root,
- self.__staged_sources_directory.lstrip(os.sep))
+ sandbox_vroot = sandbox.get_virtual_directory()
+ path_components = self.__staged_sources_directory.lstrip(os.sep).split(os.sep)
+ sandbox_vpath = sandbox_vroot.descend(path_components)
try:
- utils.copy_files(workspace.path, sandbox_path)
+ sandbox_vpath.import_files(workspace.path)
except UtilError as e:
self.warn("Failed to preserve workspace state for failed build sysroot: {}"
.format(e))
@@ -1556,7 +1563,11 @@ class Element(Plugin):
raise
finally:
if collect is not None:
- collectdir = os.path.join(sandbox_root, collect.lstrip(os.sep))
+ try:
+ collectvdir = sandbox_vroot.descend(collect.lstrip(os.sep).split(os.sep))
+ except VirtualDirectoryError:
+ # No collect directory existed
+ collectvdir = None
# Create artifact directory structure
assembledir = os.path.join(rootdir, 'artifact')
@@ -1565,20 +1576,26 @@ class Element(Plugin):
metadir = os.path.join(assembledir, 'meta')
buildtreedir = os.path.join(assembledir, 'buildtree')
os.mkdir(assembledir)
- if collect is not None and os.path.exists(collectdir):
+ if collect is not None and collectvdir is not None:
os.mkdir(filesdir)
os.mkdir(logsdir)
os.mkdir(metadir)
os.mkdir(buildtreedir)
# Hard link files from collect dir to files directory
- if collect is not None and os.path.exists(collectdir):
- utils.link_files(collectdir, filesdir)
-
- sandbox_build_dir = os.path.join(sandbox_root, self.get_variable('build-root').lstrip(os.sep))
- # Hard link files from build-root dir to buildtreedir directory
- if os.path.isdir(sandbox_build_dir):
- utils.link_files(sandbox_build_dir, buildtreedir)
+ if collect is not None and collectvdir is not None:
+ collectvdir.export_files(filesdir, can_link=True)
+
+ try:
+ sandbox_build_dir = sandbox_vroot.descend(
+ self.get_variable('build-root').lstrip(os.sep).split(os.sep))
+ # Hard link files from build-root dir to buildtreedir directory
+ sandbox_build_dir.export_files(buildtreedir)
+ except VirtualDirectoryError:
+ # Directory could not be found. Pre-virtual
+ # directory behaviour was to continue silently
+ # if the directory could not be found.
+ pass
# Copy build log
log_filename = context.get_log_filename()
@@ -1626,7 +1643,7 @@ class Element(Plugin):
self.__artifact_size = utils._get_dir_size(assembledir)
self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit())
- if collect is not None and not os.path.exists(collectdir):
+ if collect is not None and collectvdir is None:
raise ElementError(
"Directory '{}' was not found inside the sandbox, "
"unable to collect artifact contents"
@@ -2126,7 +2143,8 @@ class Element(Plugin):
directory,
stdout=stdout,
stderr=stderr,
- config=config)
+ config=config,
+ allow_real_directory=not self.BST_VIRTUAL_DIRECTORY)
yield sandbox
else:
diff --git a/buildstream/plugins/elements/compose.py b/buildstream/plugins/elements/compose.py
index 44a760215..a206dd6d7 100644
--- a/buildstream/plugins/elements/compose.py
+++ b/buildstream/plugins/elements/compose.py
@@ -34,7 +34,6 @@ The default configuration and possible options are as such:
"""
import os
-from buildstream import utils
from buildstream import Element, Scope
@@ -56,6 +55,9 @@ class ComposeElement(Element):
# added, to reduce the potential for confusion
BST_FORBID_SOURCES = True
+ # This plugin has been modified to avoid the use of Sandbox.get_directory
+ BST_VIRTUAL_DIRECTORY = True
+
def configure(self, node):
self.node_validate(node, [
'integrate', 'include', 'exclude', 'include-orphans'
@@ -104,7 +106,8 @@ class ComposeElement(Element):
orphans=self.include_orphans)
manifest.update(files)
- basedir = sandbox.get_directory()
+ # Make a snapshot of all the files.
+ vbasedir = sandbox.get_virtual_directory()
modified_files = set()
removed_files = set()
added_files = set()
@@ -116,38 +119,24 @@ class ComposeElement(Element):
if require_split:
# Make a snapshot of all the files before integration-commands are run.
- snapshot = {
- f: getmtime(os.path.join(basedir, f))
- for f in utils.list_relative_paths(basedir)
- }
+ snapshot = set(vbasedir.list_relative_paths())
+ vbasedir.mark_unmodified()
for dep in self.dependencies(Scope.BUILD):
dep.integrate(sandbox)
if require_split:
-
# Calculate added, modified and removed files
- basedir_contents = set(utils.list_relative_paths(basedir))
+ post_integration_snapshot = vbasedir.list_relative_paths()
+ modified_files = set(vbasedir.list_modified_paths())
+ basedir_contents = set(post_integration_snapshot)
for path in manifest:
- if path in basedir_contents:
- if path in snapshot:
- preintegration_mtime = snapshot[path]
- if preintegration_mtime != getmtime(os.path.join(basedir, path)):
- modified_files.add(path)
- else:
- # If the path appears in the manifest but not the initial snapshot,
- # it may be a file staged inside a directory symlink. In this case
- # the path we got from the manifest won't show up in the snapshot
- # because utils.list_relative_paths() doesn't recurse into symlink
- # directories.
- pass
- elif path in snapshot:
+ if path in snapshot and path not in basedir_contents:
removed_files.add(path)
for path in basedir_contents:
if path not in snapshot:
added_files.add(path)
-
self.info("Integration modified {}, added {} and removed {} files"
.format(len(modified_files), len(added_files), len(removed_files)))
@@ -166,8 +155,7 @@ class ComposeElement(Element):
# instead of into a subdir. The element assemble() method should
# support this in some way.
#
- installdir = os.path.join(basedir, 'buildstream', 'install')
- os.makedirs(installdir, exist_ok=True)
+ installdir = vbasedir.descend(['buildstream', 'install'], create=True)
# We already saved the manifest for created files in the integration phase,
# now collect the rest of the manifest.
@@ -191,19 +179,12 @@ class ComposeElement(Element):
with self.timed_activity("Creating composition", detail=detail, silent_nested=True):
self.info("Composing {} files".format(len(manifest)))
- utils.link_files(basedir, installdir, files=manifest)
+ installdir.import_files(vbasedir, files=manifest, can_link=True)
# And we're done
return os.path.join(os.sep, 'buildstream', 'install')
-# Like os.path.getmtime(), but doesnt explode on symlinks
-#
-def getmtime(path):
- stat = os.lstat(path)
- return stat.st_mtime
-
-
# Plugin entry point
def setup():
return ComposeElement
diff --git a/buildstream/plugins/elements/import.py b/buildstream/plugins/elements/import.py
index 93594b623..0eca2a902 100644
--- a/buildstream/plugins/elements/import.py
+++ b/buildstream/plugins/elements/import.py
@@ -31,7 +31,6 @@ The empty configuration is as such:
"""
import os
-import shutil
from buildstream import Element, BuildElement, ElementError
@@ -39,6 +38,9 @@ from buildstream import Element, BuildElement, ElementError
class ImportElement(BuildElement):
# pylint: disable=attribute-defined-outside-init
+ # This plugin has been modified to avoid the use of Sandbox.get_directory
+ BST_VIRTUAL_DIRECTORY = True
+
def configure(self, node):
self.source = self.node_subst_member(node, 'source')
self.target = self.node_subst_member(node, 'target')
@@ -68,27 +70,22 @@ class ImportElement(BuildElement):
# Do not mount workspaces as the files are copied from outside the sandbox
self._stage_sources_in_sandbox(sandbox, 'input', mount_workspaces=False)
- rootdir = sandbox.get_directory()
- inputdir = os.path.join(rootdir, 'input')
- outputdir = os.path.join(rootdir, 'output')
+ rootdir = sandbox.get_virtual_directory()
+ inputdir = rootdir.descend(['input'])
+ outputdir = rootdir.descend(['output'], create=True)
# The directory to grab
- inputdir = os.path.join(inputdir, self.source.lstrip(os.sep))
- inputdir = inputdir.rstrip(os.sep)
+ inputdir = inputdir.descend(self.source.strip(os.sep).split(os.sep))
# The output target directory
- outputdir = os.path.join(outputdir, self.target.lstrip(os.sep))
- outputdir = outputdir.rstrip(os.sep)
-
- # Ensure target directory parent
- os.makedirs(os.path.dirname(outputdir), exist_ok=True)
+ outputdir = outputdir.descend(self.target.strip(os.sep).split(os.sep), create=True)
- if not os.path.exists(inputdir):
+ if inputdir.is_empty():
raise ElementError("{}: No files were found inside directory '{}'"
.format(self, self.source))
# Move it over
- shutil.move(inputdir, outputdir)
+ outputdir.import_files(inputdir)
# And we're done
return '/output'
diff --git a/buildstream/plugins/elements/stack.py b/buildstream/plugins/elements/stack.py
index 087d4dac0..138afedf7 100644
--- a/buildstream/plugins/elements/stack.py
+++ b/buildstream/plugins/elements/stack.py
@@ -24,13 +24,15 @@ Stack elements are simply a symbolic element used for representing
a logical group of elements.
"""
-import os
from buildstream import Element
# Element implementation for the 'stack' kind.
class StackElement(Element):
+ # This plugin has been modified to avoid the use of Sandbox.get_directory
+ BST_VIRTUAL_DIRECTORY = True
+
def configure(self, node):
pass
@@ -52,7 +54,7 @@ class StackElement(Element):
# Just create a dummy empty artifact, its existence is a statement
# that all this stack's dependencies are built.
- rootdir = sandbox.get_directory()
+ vrootdir = sandbox.get_virtual_directory()
# XXX FIXME: This is currently needed because the artifact
# cache wont let us commit an empty artifact.
@@ -61,10 +63,7 @@ class StackElement(Element):
# the actual artifact data in a subdirectory, then we
# will be able to store some additional state in the
# artifact cache, and we can also remove this hack.
- outputdir = os.path.join(rootdir, 'output', 'bst')
-
- # Ensure target directory parent
- os.makedirs(os.path.dirname(outputdir), exist_ok=True)
+ vrootdir.descend(['output', 'bst'], create=True)
# And we're done
return '/output'
diff --git a/buildstream/sandbox/_mount.py b/buildstream/sandbox/_mount.py
index 1540d9d4f..0f96a92b7 100644
--- a/buildstream/sandbox/_mount.py
+++ b/buildstream/sandbox/_mount.py
@@ -32,7 +32,8 @@ from .._fuse import SafeHardlinks
class Mount():
def __init__(self, sandbox, mount_point, safe_hardlinks):
scratch_directory = sandbox._get_scratch_directory()
- root_directory = sandbox.get_directory()
+ # Getting external_directory here is acceptable as we're part of the sandbox code.
+ root_directory = sandbox.get_virtual_directory().external_directory
self.mount_point = mount_point
self.safe_hardlinks = safe_hardlinks
diff --git a/buildstream/sandbox/_sandboxbwrap.py b/buildstream/sandbox/_sandboxbwrap.py
index 9ed677620..010e4791d 100644
--- a/buildstream/sandbox/_sandboxbwrap.py
+++ b/buildstream/sandbox/_sandboxbwrap.py
@@ -56,7 +56,9 @@ class SandboxBwrap(Sandbox):
def run(self, command, flags, *, cwd=None, env=None):
stdout, stderr = self._get_output()
- root_directory = self.get_directory()
+
+ # Allowable access to underlying storage as we're part of the sandbox
+ root_directory = self.get_virtual_directory().external_directory
# Fallback to the sandbox default settings for
# the cwd and env.
diff --git a/buildstream/sandbox/_sandboxchroot.py b/buildstream/sandbox/_sandboxchroot.py
index 8788c3031..de4eb46e2 100644
--- a/buildstream/sandbox/_sandboxchroot.py
+++ b/buildstream/sandbox/_sandboxchroot.py
@@ -90,7 +90,7 @@ class SandboxChroot(Sandbox):
# Nonetheless a better solution could perhaps be found.
rootfs = stack.enter_context(utils._tempdir(dir='/var/run/buildstream'))
- stack.enter_context(self.create_devices(self.get_directory(), flags))
+ stack.enter_context(self.create_devices(self._root, flags))
stack.enter_context(self.mount_dirs(rootfs, flags, stdout, stderr))
if flags & SandboxFlags.INTERACTIVE:
diff --git a/buildstream/sandbox/sandbox.py b/buildstream/sandbox/sandbox.py
index 7e1e32b65..9fe1194bb 100644
--- a/buildstream/sandbox/sandbox.py
+++ b/buildstream/sandbox/sandbox.py
@@ -29,7 +29,8 @@ See also: :ref:`sandboxing`.
"""
import os
-from .._exceptions import ImplError
+from .._exceptions import ImplError, BstError
+from ..storage._filebaseddirectory import FileBasedDirectory
class SandboxFlags():
@@ -90,28 +91,50 @@ class Sandbox():
self.__cwd = None
self.__env = None
self.__mount_sources = {}
+ self.__allow_real_directory = kwargs['allow_real_directory']
+
# Configuration from kwargs common to all subclasses
self.__config = kwargs['config']
self.__stdout = kwargs['stdout']
self.__stderr = kwargs['stderr']
- # Setup the directories
+ # Setup the directories. Root should be available to subclasses, hence
+ # being single-underscore. The others are private to this class.
+ self._root = os.path.join(directory, 'root')
self.__directory = directory
- self.__root = os.path.join(self.__directory, 'root')
self.__scratch = os.path.join(self.__directory, 'scratch')
- for directory_ in [self.__root, self.__scratch]:
+ for directory_ in [self._root, self.__scratch]:
os.makedirs(directory_, exist_ok=True)
def get_directory(self):
"""Fetches the sandbox root directory
The root directory is where artifacts for the base
- runtime environment should be staged.
+ runtime environment should be staged. Only works if
+ BST_VIRTUAL_DIRECTORY is not set.
Returns:
(str): The sandbox root directory
+
+ """
+ if self.__allow_real_directory:
+ return self._root
+ else:
+ raise BstError("You can't use get_directory")
+
+ def get_virtual_directory(self):
+ """Fetches the sandbox root directory
+
+ The root directory is where artifacts for the base
+ runtime environment should be staged. Only works if
+ BST_VIRTUAL_DIRECTORY is not set.
+
+ Returns:
+ (str): The sandbox root directory
+
"""
- return self.__root
+ # For now, just create a new Directory every time we're asked
+ return FileBasedDirectory(self._root)
def set_environment(self, environment):
"""Sets the environment variables for the sandbox
@@ -293,11 +316,11 @@ class Sandbox():
def _has_command(self, command, env=None):
if os.path.isabs(command):
return os.path.exists(os.path.join(
- self.get_directory(), command.lstrip(os.sep)))
+ self._root, command.lstrip(os.sep)))
for path in env.get('PATH').split(':'):
if os.path.exists(os.path.join(
- self.get_directory(), path.lstrip(os.sep), command)):
+ self._root, path.lstrip(os.sep), command)):
return True
return False
diff --git a/buildstream/scriptelement.py b/buildstream/scriptelement.py
index 145dc2648..212402058 100644
--- a/buildstream/scriptelement.py
+++ b/buildstream/scriptelement.py
@@ -243,9 +243,8 @@ class ScriptElement(Element):
with self.timed_activity("Staging {} at {}"
.format(element.name, item['destination']),
silent_nested=True):
- real_dstdir = os.path.join(sandbox.get_directory(),
- item['destination'].lstrip(os.sep))
- os.makedirs(os.path.dirname(real_dstdir), exist_ok=True)
+ virtual_dstdir = sandbox.get_virtual_directory()
+ virtual_dstdir.descend(item['destination'].lstrip(os.sep).split(os.sep), create=True)
element.stage_dependency_artifacts(sandbox, Scope.RUN, path=item['destination'])
for item in self.__layout:
@@ -263,8 +262,8 @@ class ScriptElement(Element):
for dep in element.dependencies(Scope.RUN):
dep.integrate(sandbox)
- os.makedirs(os.path.join(sandbox.get_directory(), self.__install_root.lstrip(os.sep)),
- exist_ok=True)
+ install_root_path_components = self.__install_root.lstrip(os.sep).split(os.sep)
+ sandbox.get_virtual_directory().descend(install_root_path_components, create=True)
def assemble(self, sandbox):
diff --git a/buildstream/storage/__init__.py b/buildstream/storage/__init__.py
new file mode 100644
index 000000000..33424ac8d
--- /dev/null
+++ b/buildstream/storage/__init__.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2018 Bloomberg Finance LP
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+# Jim MacArthur <jim.macarthur@codethink.co.uk>
+
+from ._filebaseddirectory import FileBasedDirectory
+from .directory import Directory
diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py
new file mode 100644
index 000000000..5b3da28f4
--- /dev/null
+++ b/buildstream/storage/_filebaseddirectory.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2018 Bloomberg Finance LP
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+# Jim MacArthur <jim.macarthur@codethink.co.uk>
+
+"""
+FileBasedDirectory
+=========
+
+Implementation of the Directory class which backs onto a normal POSIX filing system.
+
+See also: :ref:`sandboxing`.
+"""
+
+import os
+import time
+from .._exceptions import BstError, ErrorDomain
+from .directory import Directory
+from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp
+from ..utils import _set_deterministic_user, _set_deterministic_mtime
+
+
+class VirtualDirectoryError(BstError):
+ """Raised by Directory functions when system calls fail.
+ This will be handled internally by the BuildStream core,
+ if you need to handle this error, then it should be reraised,
+ or either of the :class:`.ElementError` or :class:`.SourceError`
+ exceptions should be raised from this error.
+ """
+ def __init__(self, message, reason=None):
+ super().__init__(message, domain=ErrorDomain.VIRTUAL_FS, reason=reason)
+
+
+# FileBasedDirectory intentionally doesn't call its superclass constuctor,
+# which is mean to be unimplemented.
+# pylint: disable=super-init-not-called
+
+
+class _FileObject():
+ """A description of a file in a virtual directory. The contents of
+ this class are never used, but there needs to be something present
+ for files so is_empty() works correctly.
+
+ """
+ def __init__(self, virtual_directory: Directory, filename: str):
+ self.directory = virtual_directory
+ self.filename = filename
+
+
+class FileBasedDirectory(Directory):
+ def __init__(self, external_directory=None):
+ self.external_directory = external_directory
+ self.index = {}
+ self._directory_read = False
+
+ def _populate_index(self):
+ if self._directory_read:
+ return
+ for entry in os.listdir(self.external_directory):
+ if os.path.isdir(os.path.join(self.external_directory, entry)):
+ self.index[entry] = FileBasedDirectory(os.path.join(self.external_directory, entry))
+ else:
+ self.index[entry] = _FileObject(self, entry)
+ self._directory_read = True
+
+ def descend(self, subdirectory_spec, create=False):
+ """ See superclass Directory for arguments """
+ # It's very common to send a directory name instead of a list and this causes
+ # bizarre errors, so check for it here
+ if not isinstance(subdirectory_spec, list):
+ subdirectory_spec = [subdirectory_spec]
+
+ # Because of the way split works, it's common to get a list which begins with
+ # an empty string. Detect these and remove them.
+ while subdirectory_spec and subdirectory_spec[0] == "":
+ subdirectory_spec.pop(0)
+
+ if not subdirectory_spec:
+ return self
+
+ self._populate_index()
+ if subdirectory_spec[0] in self.index:
+ entry = self.index[subdirectory_spec[0]]
+ if isinstance(entry, FileBasedDirectory):
+ new_path = os.path.join(self.external_directory, subdirectory_spec[0])
+ return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
+ else:
+ error = "Cannot descend into {}, which is a '{}' in the directory {}"
+ raise VirtualDirectoryError(error.format(subdirectory_spec[0],
+ type(entry).__name__,
+ self.external_directory))
+ else:
+ if create:
+ new_path = os.path.join(self.external_directory, subdirectory_spec[0])
+ os.makedirs(new_path, exist_ok=True)
+ return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
+ else:
+ error = "No entry called '{}' found in the directory rooted at {}"
+ raise VirtualDirectoryError(error.format(subdirectory_spec[0], self.external_directory))
+
+ def import_files(self, external_pathspec, *, files=None,
+ report_written=True, update_utimes=False,
+ can_link=False):
+ """ See superclass Directory for arguments """
+
+ if isinstance(external_pathspec, Directory):
+ source_directory = external_pathspec.external_directory
+ else:
+ source_directory = external_pathspec
+
+ if can_link and not update_utimes:
+ import_result = link_files(source_directory, self.external_directory, files=files,
+ ignore_missing=False, report_written=report_written)
+ else:
+ import_result = copy_files(source_directory, self.external_directory, files=files,
+ ignore_missing=False, report_written=report_written)
+ if update_utimes:
+ cur_time = time.time()
+
+ for f in import_result.files_written:
+ os.utime(os.path.join(self.external_directory, f), times=(cur_time, cur_time))
+ return import_result
+
+ def set_deterministic_mtime(self):
+ _set_deterministic_mtime(self.external_directory)
+
+ def set_deterministic_user(self):
+ _set_deterministic_user(self.external_directory)
+
+ def export_files(self, to_directory, *, can_link=False, can_destroy=False):
+ if can_destroy:
+ # Try a simple rename of the sandbox root; if that
+ # doesnt cut it, then do the regular link files code path
+ try:
+ os.rename(self.external_directory, to_directory)
+ return
+ except OSError:
+ # Proceed using normal link/copy
+ pass
+
+ os.makedirs(to_directory, exist_ok=True)
+ if can_link:
+ link_files(self.external_directory, to_directory)
+ else:
+ copy_files(self.external_directory, to_directory)
+
+ # Add a directory entry deterministically to a tar file
+ #
+ # This function takes extra steps to ensure the output is deterministic.
+ # First, it sorts the results of os.listdir() to ensure the ordering of
+ # the files in the archive is the same. Second, it sets a fixed
+ # timestamp for each entry. See also https://bugs.python.org/issue24465.
+ def export_to_tar(self, tf, dir_arcname, mtime=0):
+ # We need directories here, including non-empty ones,
+ # so list_relative_paths is not used.
+ for filename in sorted(os.listdir(self.external_directory)):
+ source_name = os.path.join(self.external_directory, filename)
+ arcname = os.path.join(dir_arcname, filename)
+ tarinfo = tf.gettarinfo(source_name, arcname)
+ tarinfo.mtime = mtime
+
+ if tarinfo.isreg():
+ with open(source_name, "rb") as f:
+ tf.addfile(tarinfo, f)
+ elif tarinfo.isdir():
+ tf.addfile(tarinfo)
+ self.descend(filename.split(os.path.sep)).export_to_tar(tf, arcname, mtime)
+ else:
+ tf.addfile(tarinfo)
+
+ def is_empty(self):
+ self._populate_index()
+ return len(self.index) == 0
+
+ def mark_unmodified(self):
+ """ Marks all files in this directory (recursively) as unmodified.
+ """
+ _set_deterministic_mtime(self.external_directory)
+
+ def list_modified_paths(self):
+ """Provide a list of relative paths which have been modified since the
+ last call to mark_unmodified.
+
+ Return value: List(str) - list of modified paths
+ """
+ return [f for f in list_relative_paths(self.external_directory)
+ if _get_link_mtime(os.path.join(self.external_directory, f)) != _magic_timestamp]
+
+ def list_relative_paths(self):
+ """Provide a list of all relative paths.
+
+ Return value: List(str) - list of all paths
+ """
+
+ return list_relative_paths(self.external_directory)
+
+ def __str__(self):
+ # This returns the whole path (since we don't know where the directory started)
+ # which exposes the sandbox directory; we will have to assume for the time being
+ # that people will not abuse __str__.
+ return self.external_directory
diff --git a/buildstream/storage/directory.py b/buildstream/storage/directory.py
new file mode 100644
index 000000000..40a895acc
--- /dev/null
+++ b/buildstream/storage/directory.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2018 Bloomberg Finance LP
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+# Jim MacArthur <jim.macarthur@codethink.co.uk>
+
+"""
+Directory
+=========
+
+This is a virtual Directory class to isolate the rest of BuildStream
+from the backing store implementation. Sandboxes are allowed to read
+from and write to the underlying storage, but all others must use this
+Directory class to access files and directories in the sandbox.
+
+See also: :ref:`sandboxing`.
+
+"""
+
+
+class Directory():
+ def __init__(self, external_directory=None):
+ raise NotImplementedError()
+
+ def descend(self, subdirectory_spec, create=False):
+ """Descend one or more levels of directory hierarchy and return a new
+ Directory object for that directory.
+
+ Args:
+ subdirectory_spec (list of str): A list of strings which are all directory
+ names.
+ create (boolean): If this is true, the directories will be created if
+ they don't already exist.
+
+ Yields:
+ A Directory object representing the found directory.
+
+ Raises:
+ VirtualDirectoryError: if any of the components in subdirectory_spec
+ cannot be found, or are files, or symlinks to files.
+
+ """
+ raise NotImplementedError()
+
+ # Import and export of files and links
+ def import_files(self, external_pathspec, *, files=None,
+ report_written=True, update_utimes=False,
+ can_link=False):
+ """Imports some or all files from external_path into this directory.
+
+ Args:
+ external_pathspec: Either a string containing a pathname, or a
+ Directory object, to use as the source.
+ files (list of str): A list of all the files relative to
+ the external_pathspec to copy. If 'None' is supplied, all
+ files are copied.
+ report_written (bool): Return the full list of files
+ written. Defaults to true. If false, only a list of
+ overwritten files is returned.
+ update_utimes (bool): Update the access and modification time
+ of each file copied to the current time.
+ can_link (bool): Whether it's OK to create a hard link to the
+ original content, meaning the stored copy will change when the
+ original files change. Setting this doesn't guarantee hard
+ links will be made. can_link will never be used if
+ update_utimes is set.
+
+ Yields:
+ (FileListResult) - A report of files imported and overwritten.
+
+ """
+
+ raise NotImplementedError()
+
+ def export_files(self, to_directory, *, can_link=False, can_destroy=False):
+ """Copies everything from this into to_directory.
+
+ Args:
+ to_directory (string): a path outside this directory object
+ where the contents will be copied to.
+ can_link (bool): Whether we can create hard links in to_directory
+ instead of copying. Setting this does not guarantee hard links will be used.
+ can_destroy (bool): Can we destroy the data already in this
+ directory when exporting? If set, this may allow data to be
+ moved rather than copied which will be quicker.
+ """
+
+ raise NotImplementedError()
+
+ def export_to_tar(self, tarfile, destination_dir, mtime=0):
+ """ Exports this directory into the given tar file.
+
+ Args:
+ tarfile (TarFile): A Python TarFile object to export into.
+ destination_dir (str): The prefix for all filenames inside the archive.
+ mtime (int): mtimes of all files in the archive are set to this.
+ """
+ raise NotImplementedError()
+
+ # Convenience functions
+ def is_empty(self):
+ """ Return true if this directory has no files, subdirectories or links in it.
+ """
+ raise NotImplementedError()
+
+ def set_deterministic_mtime(self):
+ """ Sets a static modification time for all regular files in this directory.
+ The magic number for timestamps is 2011-11-11 11:11:11.
+ """
+ raise NotImplementedError()
+
+ def set_deterministic_user(self):
+ """ Sets all files in this directory to the current user's euid/egid.
+ """
+ raise NotImplementedError()
+
+ def mark_unmodified(self):
+ """ Marks all files in this directory (recursively) as unmodified.
+ """
+ raise NotImplementedError()
+
+ def list_modified_paths(self):
+ """Provide a list of relative paths which have been modified since the
+ last call to mark_unmodified. Includes directories only if
+ they are empty.
+
+ Yields:
+ (List(str)) - list of all modified files with relative paths.
+
+ """
+ raise NotImplementedError()
+
+ def list_relative_paths(self):
+ """Provide a list of all relative paths in this directory. Includes
+ directories only if they are empty.
+
+ Yields:
+ (List(str)) - list of all files with relative paths.
+
+ """
+ raise NotImplementedError()
diff --git a/buildstream/utils.py b/buildstream/utils.py
index 68f99b9a3..93ab6fb0e 100644
--- a/buildstream/utils.py
+++ b/buildstream/utils.py
@@ -41,6 +41,9 @@ import psutil
from . import _signals
from ._exceptions import BstError, ErrorDomain
+# The magic number for timestamps: 2011-11-11 11:11:11
+_magic_timestamp = calendar.timegm([2011, 11, 11, 11, 11, 11])
+
# The separator we use for user specified aliases
_ALIAS_SEPARATOR = ':'
@@ -909,9 +912,6 @@ def _set_deterministic_user(directory):
# directory (str): The directory to recursively set the mtime on
#
def _set_deterministic_mtime(directory):
- # The magic number for timestamps: 2011-11-11 11:11:11
- magic_timestamp = calendar.timegm([2011, 11, 11, 11, 11, 11])
-
for dirname, _, filenames in os.walk(directory.encode("utf-8"), topdown=False):
for filename in filenames:
pathname = os.path.join(dirname, filename)
@@ -930,9 +930,9 @@ def _set_deterministic_mtime(directory):
# However, nowadays it is possible at least on gnuish systems
# with with the lutimes glibc function.
if not os.path.islink(pathname):
- os.utime(pathname, (magic_timestamp, magic_timestamp))
+ os.utime(pathname, (_magic_timestamp, _magic_timestamp))
- os.utime(dirname, (magic_timestamp, magic_timestamp))
+ os.utime(dirname, (_magic_timestamp, _magic_timestamp))
# _tempdir()
@@ -1159,3 +1159,11 @@ def _deduplicate(iterable, key=None):
if k not in seen:
seen_add(k)
yield element
+
+
+# Like os.path.getmtime(), but returns the mtime of a link rather than
+# the target, if the filesystem supports that.
+#
+def _get_link_mtime(path):
+ path_stat = os.lstat(path)
+ return path_stat.st_mtime