From c3f96e1bc32f1728619b144e1d0120349411df66 Mon Sep 17 00:00:00 2001 From: Raoul Hidalgo Charman Date: Wed, 1 May 2019 13:38:35 +0100 Subject: Add BST_STAGE_VIRTUAL_DIRECTORY option Add flag to indicate whether sources can stage directly to a virtual directory. Adds `__stage_previous_sources` method which stages previous sources taking into account whether they use virtual directories or not. Part of #983 --- src/buildstream/_sourcecache.py | 13 +++++--- src/buildstream/source.py | 74 ++++++++++++++++++++++++++++++++--------- 2 files changed, 66 insertions(+), 21 deletions(-) diff --git a/src/buildstream/_sourcecache.py b/src/buildstream/_sourcecache.py index 1d3342a75..ce0694e08 100644 --- a/src/buildstream/_sourcecache.py +++ b/src/buildstream/_sourcecache.py @@ -153,11 +153,14 @@ class SourceCache(BaseCache): for previous_source in previous_sources: vdir.import_files(self.export(previous_source)) - with utils._tempdir(dir=self.context.tmpdir, prefix='staging-temp') as tmpdir: - if not vdir.is_empty(): - vdir.export_files(tmpdir) - source._stage(tmpdir) - vdir.import_files(tmpdir, can_link=True) + if not source.BST_STAGE_VIRTUAL_DIRECTORY: + with utils._tempdir(dir=self.context.tmpdir, prefix='staging-temp') as tmpdir: + if not vdir.is_empty(): + vdir.export_files(tmpdir) + source._stage(tmpdir) + vdir.import_files(tmpdir, can_link=True) + else: + source._stage(vdir) self.cas.set_ref(ref, vdir._get_digest()) diff --git a/src/buildstream/source.py b/src/buildstream/source.py index fe94a15d7..f6cefb386 100644 --- a/src/buildstream/source.py +++ b/src/buildstream/source.py @@ -172,6 +172,8 @@ from ._exceptions import BstError, ImplError, ErrorDomain from ._loader.metasource import MetaSource from ._projectrefs import ProjectRefStorage from ._cachekey import generate_key +from .storage import FileBasedDirectory +from .storage.directory import Directory, VirtualDirectoryError class SourceError(BstError): @@ -297,6 +299,15 @@ class Source(Plugin): *Since: 1.4* """ + BST_STAGE_VIRTUAL_DIRECTORY = False + """Whether we can stage this source directly to a virtual directory + + When set to true, virtual directories can be passed to the source to stage + to. + + *Since: 1.4* + """ + def __init__(self, context, project, meta, *, alias_override=None, unique_id=None): provenance = _yaml.node_get_provenance(meta.config) super().__init__("{}-{}".format(meta.element_name, meta.element_index), @@ -711,9 +722,7 @@ class Source(Plugin): if self.BST_REQUIRES_PREVIOUS_SOURCES_FETCH: self.__ensure_previous_sources(previous_sources) - with self.tempdir() as staging_directory: - for src in previous_sources: - src._stage(staging_directory) + with self.__stage_previous_sources(previous_sources) as staging_directory: self.__do_fetch(previous_sources_dir=self.__ensure_directory(staging_directory)) else: self.__do_fetch() @@ -727,12 +736,15 @@ class Source(Plugin): # 'directory' option # def _stage(self, directory): - staging_directory = self.__ensure_directory(directory) + directory = self.__ensure_directory(directory) - self.stage(staging_directory) + self.stage(directory) # Wrapper for init_workspace() def _init_workspace(self, directory): + if self.BST_STAGE_VIRTUAL_DIRECTORY: + directory = FileBasedDirectory(external_directory=directory) + directory = self.__ensure_directory(directory) self.init_workspace(directory) @@ -994,9 +1006,8 @@ class Source(Plugin): def _track(self, previous_sources): if self.BST_REQUIRES_PREVIOUS_SOURCES_TRACK: self.__ensure_previous_sources(previous_sources) - with self.tempdir() as staging_directory: - for src in previous_sources: - src._stage(staging_directory) + with self.__stage_previous_sources(previous_sources) \ + as staging_directory: new_ref = self.__do_track(previous_sources_dir=self.__ensure_directory(staging_directory)) else: new_ref = self.__do_track() @@ -1115,6 +1126,24 @@ class Source(Plugin): return clone + # Context manager that stages sources in a cas based or temporary file + # based directory + @contextmanager + def __stage_previous_sources(self, sources): + with self.tempdir() as tempdir: + directory = FileBasedDirectory(external_directory=tempdir) + + for src in sources: + if src.BST_STAGE_VIRTUAL_DIRECTORY: + src._stage(directory) + else: + src._stage(tempdir) + + if self.BST_STAGE_VIRTUAL_DIRECTORY: + yield directory + else: + yield tempdir + # Tries to call fetch for every mirror, stopping once it succeeds def __do_fetch(self, **kwargs): project = self._get_project() @@ -1220,15 +1249,28 @@ class Source(Plugin): # Ensures a fully constructed path and returns it def __ensure_directory(self, directory): - if self.__directory is not None: - directory = os.path.join(directory, self.__directory.lstrip(os.sep)) + if not isinstance(directory, Directory): + if self.__directory is not None: + directory = os.path.join(directory, self.__directory.lstrip(os.sep)) + + try: + os.makedirs(directory, exist_ok=True) + except OSError as e: + raise SourceError("Failed to create staging directory: {}" + .format(e), + reason="ensure-stage-dir-fail") from e + + else: + if self.__directory is not None: + try: + directory = directory.descend( + *self.__directory.lstrip(os.sep).split(os.sep), + create=True) + except VirtualDirectoryError as e: + raise SourceError("Failed to descend into staging directory: {}" + .format(e), + reason="ensure-stage-dir-fail") from e - try: - os.makedirs(directory, exist_ok=True) - except OSError as e: - raise SourceError("Failed to create staging directory: {}" - .format(e), - reason="ensure-stage-dir-fail") from e return directory @classmethod -- cgit v1.2.1 From 3b44a7785e22ee82dee28533b1e825c26491b2b3 Mon Sep 17 00:00:00 2001 From: Raoul Hidalgo Charman Date: Wed, 1 May 2019 15:41:38 +0100 Subject: Directory: add `import_single_file` method This a new method which deals with importing a single file. Implemented for both FileBasedDirectory and CasBasedDirectory. Part of #983 --- src/buildstream/storage/_casbaseddirectory.py | 12 ++++++++++++ src/buildstream/storage/_filebaseddirectory.py | 10 ++++++++++ src/buildstream/storage/directory.py | 4 ++++ 3 files changed, 26 insertions(+) diff --git a/src/buildstream/storage/_casbaseddirectory.py b/src/buildstream/storage/_casbaseddirectory.py index 2aff29b98..12ef482dc 100644 --- a/src/buildstream/storage/_casbaseddirectory.py +++ b/src/buildstream/storage/_casbaseddirectory.py @@ -386,6 +386,18 @@ class CasBasedDirectory(Directory): return result + def import_single_file(self, srcpath): + result = FileListResult() + if self._check_replacement(os.path.basename(srcpath), + os.path.dirname(srcpath), + result): + self._add_file(os.path.dirname(srcpath), + os.path.basename(srcpath), + modified=os.path.basename(srcpath) + in result.overwritten) + result.files_written.append(srcpath) + return result + def set_deterministic_mtime(self): """ Sets a static modification time for all regular files in this directory. Since we don't store any modification time, we don't need to do anything. diff --git a/src/buildstream/storage/_filebaseddirectory.py b/src/buildstream/storage/_filebaseddirectory.py index 9a746f731..90911da86 100644 --- a/src/buildstream/storage/_filebaseddirectory.py +++ b/src/buildstream/storage/_filebaseddirectory.py @@ -28,6 +28,7 @@ See also: :ref:`sandboxing`. """ import os +import shutil import stat import time @@ -111,6 +112,15 @@ class FileBasedDirectory(Directory): os.utime(os.path.join(self.external_directory, f), times=(cur_time, cur_time)) return import_result + def import_single_file(self, srcpath): + dstpath = os.path.join(self.external_directory, os.path.basename(srcpath)) + result = FileListResult() + if os.path.exists(dstpath): + result.ignored.append(dstpath) + else: + shutil.copyfile(srcpath, dstpath, follow_symlinks=False) + return result + def _mark_changed(self): pass diff --git a/src/buildstream/storage/directory.py b/src/buildstream/storage/directory.py index bad818fef..c40e1bdb9 100644 --- a/src/buildstream/storage/directory.py +++ b/src/buildstream/storage/directory.py @@ -103,6 +103,10 @@ class Directory(): raise NotImplementedError() + def import_single_file(self, external_pathspec): + """Imports a single file from an external path""" + raise NotImplementedError() + def export_files(self, to_directory, *, can_link=False, can_destroy=False): """Copies everything from this into to_directory. -- cgit v1.2.1 From 1cc15b44376a4022ab464f26e92f209bad9fa254 Mon Sep 17 00:00:00 2001 From: Raoul Hidalgo Charman Date: Fri, 10 May 2019 17:37:05 +0100 Subject: _casbaseddirectory: Add support for can_link Without this, there's not much benefit to using the virtual directories as we still copy files back into the CAS. Part of #983 --- src/buildstream/storage/_casbaseddirectory.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/buildstream/storage/_casbaseddirectory.py b/src/buildstream/storage/_casbaseddirectory.py index 12ef482dc..94e6e09c7 100644 --- a/src/buildstream/storage/_casbaseddirectory.py +++ b/src/buildstream/storage/_casbaseddirectory.py @@ -161,11 +161,11 @@ class CasBasedDirectory(Directory): return newdir - def _add_file(self, basename, filename, modified=False): + def _add_file(self, basename, filename, modified=False, can_link=False): entry = IndexEntry(filename, _FileType.REGULAR_FILE, modified=modified or filename in self.index) path = os.path.join(basename, filename) - entry.digest = self.cas_cache.add_object(path=path) + entry.digest = self.cas_cache.add_object(path=path, link_directly=can_link) entry.is_executable = os.access(path, os.X_OK) self.index[filename] = entry @@ -253,7 +253,9 @@ class CasBasedDirectory(Directory): fileListResult.overwritten.append(relative_pathname) return True - def _import_files_from_directory(self, source_directory, filter_callback, *, path_prefix="", result): + def _import_files_from_directory(self, source_directory, filter_callback, + *, path_prefix="", result, + can_link=False): """ Import files from a traditional directory. """ for direntry in os.scandir(source_directory): @@ -273,8 +275,10 @@ class CasBasedDirectory(Directory): raise VirtualDirectoryError('Destination is a {}, not a directory: /{}' .format(filetype, relative_pathname)) - dest_subdir._import_files_from_directory(src_subdir, filter_callback, - path_prefix=relative_pathname, result=result) + dest_subdir._import_files_from_directory( + src_subdir, filter_callback, + path_prefix=relative_pathname, result=result, + can_link=can_link) if filter_callback and not filter_callback(relative_pathname): if is_dir and create_subdir and dest_subdir.is_empty(): @@ -286,7 +290,9 @@ class CasBasedDirectory(Directory): if direntry.is_file(follow_symlinks=False): if self._check_replacement(direntry.name, relative_pathname, result): - self._add_file(source_directory, direntry.name, modified=relative_pathname in result.overwritten) + self._add_file(source_directory, direntry.name, + modified=relative_pathname in result.overwritten, + can_link=can_link) result.files_written.append(relative_pathname) elif direntry.is_symlink(): if self._check_replacement(direntry.name, relative_pathname, result): @@ -372,15 +378,17 @@ class CasBasedDirectory(Directory): if isinstance(external_pathspec, FileBasedDirectory): source_directory = external_pathspec._get_underlying_directory() - self._import_files_from_directory(source_directory, filter_callback, result=result) + self._import_files_from_directory(source_directory, filter_callback, + result=result, can_link=can_link) elif isinstance(external_pathspec, str): source_directory = external_pathspec - self._import_files_from_directory(source_directory, filter_callback, result=result) + self._import_files_from_directory(source_directory, filter_callback, + result=result, can_link=can_link) else: assert isinstance(external_pathspec, CasBasedDirectory) self._partial_import_cas_into_cas(external_pathspec, filter_callback, result=result) - # TODO: No notice is taken of report_written, update_mtime or can_link. + # TODO: No notice is taken of report_written or update_mtime. # Current behaviour is to fully populate the report, which is inefficient, # but still correct. -- cgit v1.2.1 From e7dee9ed0049188f9541e36316c22d0e10a55818 Mon Sep 17 00:00:00 2001 From: Raoul Hidalgo Charman Date: Wed, 1 May 2019 15:43:17 +0100 Subject: local.py: Support staging directly into CAS Part of #983 --- src/buildstream/plugins/sources/local.py | 48 ++++++++++---------------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/src/buildstream/plugins/sources/local.py b/src/buildstream/plugins/sources/local.py index 50df85427..fba8af604 100644 --- a/src/buildstream/plugins/sources/local.py +++ b/src/buildstream/plugins/sources/local.py @@ -37,14 +37,16 @@ details on common configuration options for sources. """ import os -import stat -from buildstream import Source, Consistency +from buildstream.storage.directory import Directory +from buildstream import Source, SourceError, Consistency from buildstream import utils class LocalSource(Source): # pylint: disable=attribute-defined-outside-init + BST_STAGE_VIRTUAL_DIRECTORY = True + def __init__(self, context, project, meta): super().__init__(context, project, meta) @@ -91,38 +93,18 @@ class LocalSource(Source): pass # pragma: nocover def stage(self, directory): - - # Dont use hardlinks to stage sources, they are not write protected - # in the sandbox. - with self.timed_activity("Staging local files at {}".format(self.path)): - - if os.path.isdir(self.fullpath): - files = list(utils.list_relative_paths(self.fullpath)) - utils.copy_files(self.fullpath, directory) + # directory should always be a Directory object + assert isinstance(directory, Directory) + with self.timed_activity("Staging local files into CAS"): + if os.path.isdir(self.fullpath) and not os.path.islink(self.fullpath): + result = directory.import_files(self.fullpath) else: - destfile = os.path.join(directory, os.path.basename(self.path)) - files = [os.path.basename(self.path)] - utils.safe_copy(self.fullpath, destfile) - - for f in files: - # Non empty directories are not listed by list_relative_paths - dirs = f.split(os.sep) - for i in range(1, len(dirs)): - d = os.path.join(directory, *(dirs[:i])) - assert os.path.isdir(d) and not os.path.islink(d) - os.chmod(d, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) - - path = os.path.join(directory, f) - if os.path.islink(path): - pass - elif os.path.isdir(path): - os.chmod(path, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) - else: - st = os.stat(path) - if st.st_mode & stat.S_IXUSR: - os.chmod(path, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) - else: - os.chmod(path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) + result = directory.import_single_file(self.fullpath) + + if result.overwritten or result.ignored: + raise SourceError( + "Failed to stage source: files clash with existing directory", + reason='ensure-stage-dir-fail') def _get_local_path(self): return self.fullpath -- cgit v1.2.1 From 3d2a51b39cdf6e0ae415136d3c36ce09c68ec8ae Mon Sep 17 00:00:00 2001 From: Raoul Hidalgo Charman Date: Mon, 20 May 2019 12:10:22 +0100 Subject: utils.py: remove misleading documentation copy_files and link_files suggested that a single file path is a valid argument when it isn't. --- src/buildstream/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/buildstream/utils.py b/src/buildstream/utils.py index ade593750..f509ce998 100644 --- a/src/buildstream/utils.py +++ b/src/buildstream/utils.py @@ -356,7 +356,7 @@ def copy_files(src, dest, *, filter_callback=None, ignore_missing=False, report_ """Copy files from source to destination. Args: - src (str): The source file or directory + src (str): The source directory dest (str): The destination directory filter_callback (callable): Optional filter callback. Called with the relative path as argument for every file in the source directory. The file is @@ -395,7 +395,7 @@ def link_files(src, dest, *, filter_callback=None, ignore_missing=False, report_ """Hardlink files from source to destination. Args: - src (str): The source file or directory + src (str): The source directory dest (str): The destination directory filter_callback (callable): Optional filter callback. Called with the relative path as argument for every file in the source directory. The file is -- cgit v1.2.1