summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAbderrahim Kitouni <akitouni@gnome.org>2020-08-24 06:38:33 +0100
committerAbderrahim Kitouni <akitouni@gnome.org>2020-08-26 13:53:46 +0100
commitd1852c8457d99a87d922bacd5bd3cf99db6c11cd (patch)
treed75b7a7fcf4b1568c8bac86dab9d229391d0cfa5
parent0db43cf2f968ae0b02aaec9188c2c4bc839eefab (diff)
downloadbuildstream-d1852c8457d99a87d922bacd5bd3cf99db6c11cd.tar.gz
_casbaseddirectory.py: Create Directory proto objects in canonical form
Create Directory proto objects as part of lazy digest calculation instead of keeping them up-to-date. This is required to create Directory proto objects in canonical form (sorted directory entries). Based on de6a7591365afab686259bcef14bf3244a918c08
-rw-r--r--buildstream/storage/_casbaseddirectory.py187
-rw-r--r--buildstream/storage/_filebaseddirectory.py7
2 files changed, 99 insertions, 95 deletions
diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index 73c102e18..9a4ee8119 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -36,20 +36,22 @@ from ..utils import FileListResult, list_relative_paths
class IndexEntry():
- """ Used in our index of names to objects to store the 'modified' flag
- for directory entries. Because we need both the remote_execution_pb2 object
- and our own Directory object for directory entries, we store both. For files
- and symlinks, only pb_object is used. """
- def __init__(self, pb_object, entrytype, buildstream_object=None, modified=False):
- self.pb_object = pb_object # Short for 'protocol buffer object')
+ """ Directory entry used in CasBasedDirectory.index """
+ def __init__(self, name, entrytype, *, digest=None, target=None, is_executable=False,
+ buildstream_object=None, modified=False):
+ self.name = name
self.type = entrytype
+ self.digest = digest
+ self.target = target
+ self.is_executable = is_executable
self.buildstream_object = buildstream_object
self.modified = modified
def get_directory(self, parent):
if not self.buildstream_object:
- self.buildstream_object = CasBasedDirectory(parent.cas_cache, digest=self.pb_object.digest,
- parent=parent, filename=self.pb_object.name)
+ self.buildstream_object = CasBasedDirectory(parent.cas_cache, digest=self.digest,
+ parent=parent, filename=self.name)
+ self.digest = None
return self.buildstream_object
@@ -129,27 +131,26 @@ class _Resolver():
# First check for nonexistent things or 'normal' objects and return them
if name not in directory.index:
- return None
+ return None, None
index_entry = directory.index[name]
if index_entry.type == _FileType.DIRECTORY:
- return index_entry.get_directory(directory)
+ return index_entry.type, index_entry.get_directory(directory)
elif index_entry.type == _FileType.REGULAR_FILE:
- return index_entry.pb_object
+ return index_entry.type, None
# Now we must be dealing with a symlink.
assert index_entry.type == _FileType.SYMLINK
- symlink_object = index_entry.pb_object
- if symlink_object in self.seen_objects:
+ if index_entry in self.seen_objects:
# Infinite symlink loop detected
message = ("Infinite symlink loop found during resolution. " +
"First repeated element is {}".format(name))
raise InfiniteSymlinkException(message=message)
- self.seen_objects.append(symlink_object)
+ self.seen_objects.append(index_entry)
- components = symlink_object.target.split(CasBasedDirectory._pb2_path_sep)
- absolute = symlink_object.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
+ components = index_entry.target.split(CasBasedDirectory._pb2_path_sep)
+ absolute = index_entry.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
if absolute:
if self.absolute_symlinks_resolve:
@@ -162,20 +163,22 @@ class _Resolver():
raise AbsoluteSymlinkException(message=message)
resolution = directory
- while components and isinstance(resolution, CasBasedDirectory):
+ resolution_type = _FileType.DIRECTORY
+ while components and resolution_type == _FileType.DIRECTORY:
c = components.pop(0)
directory = resolution
try:
- resolution = self._resolve_path_component(c, directory, components)
+ resolution_type, resolution = self._resolve_path_component(c, directory, components)
except UnexpectedFileException as original:
errormsg = ("Reached a file called {} while trying to resolve a symlink; " +
"cannot proceed. The remaining path components are {}.")
raise UnexpectedFileException(errormsg.format(c, components)) from original
- return resolution
+ return resolution_type, resolution
def _resolve_path_component(self, c, directory, components_remaining):
+ resolution_type = _FileType.DIRECTORY
if c == ".":
resolution = directory
elif c == "..":
@@ -188,7 +191,7 @@ class _Resolver():
resolution = directory
elif c in directory.index:
try:
- resolution = self._resolve_through_files(c, directory, components_remaining)
+ resolution_type, resolution = self._resolve_through_files(c, directory, components_remaining)
except UnexpectedFileException as original:
errormsg = ("Reached a file called {} while trying to resolve a symlink; " +
"cannot proceed. The remaining path components are {}.")
@@ -199,7 +202,8 @@ class _Resolver():
resolution = directory.descend(c, create=True)
else:
resolution = None
- return resolution
+ resolution_type = None
+ return resolution_type, resolution
def _resolve_through_files(self, c, directory, require_traversable):
"""A wrapper to resolve() which deals with files being found
@@ -212,15 +216,16 @@ class _Resolver():
force_create is off, throws ResolutionException.
"""
- resolved_thing = self.resolve(c, directory)
+ resolved_type, resolved_thing = self.resolve(c, directory)
- if isinstance(resolved_thing, remote_execution_pb2.FileNode):
+ if resolved_type == _FileType.REGULAR_FILE:
if require_traversable:
# We have components still to resolve, but one of the path components
# is a file.
if self.force_create:
directory.delete_entry(c)
resolved_thing = directory.descend(c, create=True)
+ resolved_type = _FileType.DIRECTORY
else:
# This is a signal that we hit a file, but don't
# have the data to give a proper message, so the
@@ -228,7 +233,7 @@ class _Resolver():
# description.
raise UnexpectedFileException()
- return resolved_thing
+ return resolved_type, resolved_thing
# CasBasedDirectory intentionally doesn't call its superclass constuctor,
@@ -256,28 +261,28 @@ class CasBasedDirectory(Directory):
def __init__(self, cas_cache, *, digest=None, parent=None, common_name="untitled", filename=None):
self.filename = filename
self.common_name = common_name
- self.pb2_directory = remote_execution_pb2.Directory()
self.cas_cache = cas_cache
- if digest:
- with open(self.cas_cache.objpath(digest), 'rb') as f:
- self.pb2_directory.ParseFromString(f.read())
-
self.__digest = digest
self.index = {}
self.parent = parent
- self._directory_read = False
- self._populate_index()
-
- def _populate_index(self):
- if self._directory_read:
- return
- for entry in self.pb2_directory.directories:
- self.index[entry.name] = IndexEntry(entry, _FileType.DIRECTORY)
- for entry in self.pb2_directory.files:
- self.index[entry.name] = IndexEntry(entry, _FileType.REGULAR_FILE)
- for entry in self.pb2_directory.symlinks:
- self.index[entry.name] = IndexEntry(entry, _FileType.SYMLINK)
- self._directory_read = True
+ if digest:
+ self._populate_index(digest)
+
+ def _populate_index(self, digest):
+ pb2_directory = remote_execution_pb2.Directory()
+ with open(self.cas_cache.objpath(digest), 'rb') as f:
+ pb2_directory.ParseFromString(f.read())
+
+ for entry in pb2_directory.directories:
+ self.index[entry.name] = IndexEntry(entry.name, _FileType.DIRECTORY,
+ digest=entry.digest)
+ for entry in pb2_directory.files:
+ self.index[entry.name] = IndexEntry(entry.name, _FileType.REGULAR_FILE,
+ digest=entry.digest,
+ is_executable=entry.is_executable)
+ for entry in pb2_directory.symlinks:
+ self.index[entry.name] = IndexEntry(entry.name, _FileType.SYMLINK,
+ target=entry.target)
def _find_self_in_parent(self):
assert self.parent is not None
@@ -291,23 +296,19 @@ class CasBasedDirectory(Directory):
assert name not in self.index
newdir = CasBasedDirectory(self.cas_cache, parent=self, filename=name)
- dirnode = self.pb2_directory.directories.add()
- dirnode.name = name
- self.index[name] = IndexEntry(dirnode, _FileType.DIRECTORY, buildstream_object=newdir)
+ self.index[name] = IndexEntry(name, _FileType.DIRECTORY, buildstream_object=newdir)
self.__invalidate_digest()
return newdir
def _add_file(self, basename, filename, modified=False):
- filenode = self.pb2_directory.files.add()
- filenode.name = filename
- self.cas_cache.add_object(digest=filenode.digest, path=os.path.join(basename, filename))
- is_executable = os.access(os.path.join(basename, filename), os.X_OK)
- filenode.is_executable = is_executable
- self.index[filename] = IndexEntry(filenode, _FileType.REGULAR_FILE,
- modified=modified or filename in self.index)
+ entry = IndexEntry(filename, _FileType.REGULAR_FILE,
+ modified=modified or filename in self.index)
+ entry.digest = self.cas_cache.add_object(path=os.path.join(basename, filename))
+ entry.is_executable = os.access(os.path.join(basename, filename), os.X_OK)
+ self.index[filename] = entry
self.__invalidate_digest()
@@ -315,23 +316,11 @@ class CasBasedDirectory(Directory):
self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
def _add_new_link_direct(self, name, target):
- entry = self.index.get(name)
- if entry:
- symlinknode = entry.pb_object
- else:
- symlinknode = self.pb2_directory.symlinks.add()
- symlinknode.name = name
- # A symlink node has no digest.
- symlinknode.target = target
- self.index[name] = IndexEntry(symlinknode, _FileType.SYMLINK, modified=(entry is not None))
+ self.index[name] = IndexEntry(name, _FileType.SYMLINK, target=target, modified=name in self.index)
self.__invalidate_digest()
def delete_entry(self, name):
- for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
- for thing in collection:
- if thing.name == name:
- collection.remove(thing)
if name in self.index:
del self.index[name]
@@ -376,13 +365,11 @@ class CasBasedDirectory(Directory):
return subdir.descend(subdirectory_spec[1:], create)
else:
# May be a symlink
- target = self._resolve(subdirectory_spec[0], force_create=create)
- if isinstance(target, CasBasedDirectory):
+ type, target = self._resolve(subdirectory_spec[0], force_create=create)
+ if type == _FileType.DIRECTORY:
return target
error = "Cannot descend into {}, which is a '{}' in the directory {}"
- raise VirtualDirectoryError(error.format(subdirectory_spec[0],
- type(self.index[subdirectory_spec[0]].pb_object).__name__,
- self))
+ raise VirtualDirectoryError(error.format(subdirectory_spec[0], type, self))
else:
if create:
newdir = self._add_directory(subdirectory_spec[0])
@@ -444,15 +431,15 @@ class CasBasedDirectory(Directory):
def _ensure_followable(name, path_prefix):
""" Makes sure 'name' is a directory or symlink to a directory which can be descended into. """
- if isinstance(self.index[name].buildstream_object, Directory):
+ if self.index[name].type == _FileType.DIRECTORY:
return self.descend(name)
try:
- target = self._resolve(name, force_create=True)
+ type, target = self._resolve(name, force_create=True)
except InfiniteSymlinkException:
return self._replace_anything_with_dir(name, path_prefix, result.overwritten)
- if isinstance(target, CasBasedDirectory):
+ if type == _FileType.DIRECTORY:
return target
- elif isinstance(target, remote_execution_pb2.FileNode):
+ elif type == _FileType.REGULAR_FILE:
return self._replace_anything_with_dir(name, path_prefix, result.overwritten)
return target
@@ -533,8 +520,8 @@ class CasBasedDirectory(Directory):
subcomponents = CasBasedDirectory._files_in_subdir(files, dirname)
# We will fail at this point if there is a file or symlink to file called 'dirname'.
if dirname in self.index:
- resolved_component = self._resolve(dirname, force_create=True)
- if isinstance(resolved_component, remote_execution_pb2.FileNode):
+ resolved_type, resolved_component = self._resolve(dirname, force_create=True)
+ if resolved_type == _FileType.REGULAR_FILE:
dest_subdir = self._replace_anything_with_dir(dirname, path_prefix, result.overwritten)
else:
dest_subdir = resolved_component
@@ -558,15 +545,15 @@ class CasBasedDirectory(Directory):
importable = self._check_replacement(f, path_prefix, result)
if importable:
entry = source_directory.index[f]
- item = entry.pb_object
if entry.type == _FileType.REGULAR_FILE:
- filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
- is_executable=item.is_executable)
- self.index[f] = IndexEntry(filenode, _FileType.REGULAR_FILE, modified=True)
+ self.index[f] = IndexEntry(entry.name, _FileType.REGULAR_FILE,
+ digest=entry.digest,
+ is_executable=entry.is_executable,
+ modified=True)
self.__invalidate_digest()
else:
assert entry.type == _FileType.SYMLINK
- self._add_new_link_direct(name=f, target=item.target)
+ self._add_new_link_direct(name=f, target=entry.target)
result.files_written.append(os.path.join(path_prefix, f))
else:
result.ignored.append(os.path.join(path_prefix, f))
@@ -724,8 +711,8 @@ class CasBasedDirectory(Directory):
# broken symlinks count as files. os.walk doesn't follow
# symlinks, so we don't recurse.
for (k, v) in sorted(symlink_list):
- target = self._resolve(k, absolute_symlinks_resolve=True)
- if isinstance(target, CasBasedDirectory):
+ type, target = self._resolve(k, absolute_symlinks_resolve=True)
+ if type == _FileType.DIRECTORY:
yield os.path.join(relpath, k)
else:
file_list.append((k, v))
@@ -741,13 +728,14 @@ class CasBasedDirectory(Directory):
yield from subdir.list_relative_paths(relpath=os.path.join(relpath, k))
def get_size(self):
- total = len(self.pb2_directory.SerializeToString())
+ digest = self._get_digest()
+ total = digest.size_bytes
for i in self.index.values():
if i.type == _FileType.DIRECTORY:
subdir = i.get_directory(self)
total += subdir.get_size()
elif i.type == _FileType.REGULAR_FILE:
- src_name = self.cas_cache.objpath(i.pb_object.digest)
+ src_name = self.cas_cache.objpath(i.digest)
filesize = os.stat(src_name).st_size
total += filesize
# Symlink nodes are encoded as part of the directory serialization.
@@ -781,23 +769,40 @@ class CasBasedDirectory(Directory):
#
def _get_digest(self):
if not self.__digest:
- # Update digests for subdirectories in DirectoryNodes
- for name, entry in self.index.items():
+ # Create updated Directory proto
+ pb2_directory = remote_execution_pb2.Directory()
+
+ for name, entry in sorted(self.index.items()):
if entry.type == _FileType.DIRECTORY:
+ dirnode = pb2_directory.directories.add()
+ dirnode.name = name
+
+ # Update digests for subdirectories in DirectoryNodes.
# No need to call entry.get_directory().
# If it hasn't been instantiated, digest must be up-to-date.
subdir = entry.buildstream_object
if subdir:
- entry.pb_object.digest.CopyFrom(subdir._get_digest())
-
- self.__digest = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
+ dirnode.digest.CopyFrom(subdir._get_digest())
+ else:
+ dirnode.digest.CopyFrom(entry.digest)
+ elif entry.type == _FileType.REGULAR_FILE:
+ filenode = pb2_directory.files.add()
+ filenode.name = name
+ filenode.digest.CopyFrom(entry.digest)
+ filenode.is_executable = entry.is_executable
+ elif entry.type == _FileType.SYMLINK:
+ symlinknode = pb2_directory.symlinks.add()
+ symlinknode.name = name
+ symlinknode.target = entry.target
+
+ self.__digest = self.cas_cache.add_object(buffer=pb2_directory.SerializeToString())
return self.__digest
def _objpath(self, path):
subdir = self.descend(path[:-1])
entry = subdir.index[path[-1]]
- return self.cas_cache.objpath(entry.pb_object.digest)
+ return self.cas_cache.objpath(entry.digest)
def _exists(self, path):
try:
diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py
index 2b1d4545a..8c6db8f1d 100644
--- a/buildstream/storage/_filebaseddirectory.py
+++ b/buildstream/storage/_filebaseddirectory.py
@@ -248,7 +248,6 @@ class FileBasedDirectory(Directory):
_ensure_real_directory(self.external_directory, os.path.dirname(destpath))
entry = source_directory._lightweight_resolve_to_index(path)
- item = entry.pb_object
if entry.type == _FileType.DIRECTORY:
# Ensure directory exists in destination
@@ -265,7 +264,7 @@ class FileBasedDirectory(Directory):
result.ignored.append(path)
continue
- target = item.target
+ target = entry.target
target = _relative_symlink_target(self.external_directory, destpath, target)
os.symlink(target, destpath)
@@ -275,10 +274,10 @@ class FileBasedDirectory(Directory):
result.ignored.append(path)
continue
- src_path = source_directory.cas_cache.objpath(item.digest)
+ src_path = source_directory.cas_cache.objpath(entry.digest)
actionfunc(src_path, destpath, result=result)
- if item.is_executable:
+ if entry.is_executable:
os.chmod(destpath, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)