summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJürg Billeter <j@bitron.ch>2019-02-27 12:52:34 +0100
committerJürg Billeter <j@bitron.ch>2019-02-28 14:05:47 +0100
commitd5be8c3e8013c7af01cdeab66bd7981db0f641b6 (patch)
treec4a5a117b463abe687cd4c6cf47f88e60e352327
parentb4144edc65af59d8f4942b8cf5dd137ef708df58 (diff)
downloadbuildstream-d5be8c3e8013c7af01cdeab66bd7981db0f641b6.tar.gz
_casbaseddirectory.py: Create Directory proto objects in canonical form
Create Directory proto objects as part of lazy digest calculation instead of keeping them up-to-date. This is required to create Directory proto objects in canonical form (sorted directory entries).
-rw-r--r--buildstream/storage/_casbaseddirectory.py136
-rw-r--r--buildstream/storage/_filebaseddirectory.py7
2 files changed, 72 insertions, 71 deletions
diff --git a/buildstream/storage/_casbaseddirectory.py b/buildstream/storage/_casbaseddirectory.py
index ad79fc13b..37bf5693c 100644
--- a/buildstream/storage/_casbaseddirectory.py
+++ b/buildstream/storage/_casbaseddirectory.py
@@ -36,20 +36,22 @@ from ..utils import FileListResult, _magic_timestamp
class IndexEntry():
- """ Used in our index of names to objects to store the 'modified' flag
- for directory entries. Because we need both the remote_execution_pb2 object
- and our own Directory object for directory entries, we store both. For files
- and symlinks, only pb_object is used. """
- def __init__(self, pb_object, entrytype, buildstream_object=None, modified=False):
- self.pb_object = pb_object # Short for 'protocol buffer object')
+ """ Directory entry used in CasBasedDirectory.index """
+ def __init__(self, name, entrytype, *, digest=None, target=None, is_executable=False,
+ buildstream_object=None, modified=False):
+ self.name = name
self.type = entrytype
+ self.digest = digest
+ self.target = target
+ self.is_executable = is_executable
self.buildstream_object = buildstream_object
self.modified = modified
def get_directory(self, parent):
if not self.buildstream_object:
- self.buildstream_object = CasBasedDirectory(parent.cas_cache, digest=self.pb_object.digest,
- parent=parent, filename=self.pb_object.name)
+ self.buildstream_object = CasBasedDirectory(parent.cas_cache, digest=self.digest,
+ parent=parent, filename=self.name)
+ self.digest = None
return self.buildstream_object
@@ -108,28 +110,28 @@ class CasBasedDirectory(Directory):
def __init__(self, cas_cache, *, digest=None, parent=None, common_name="untitled", filename=None):
self.filename = filename
self.common_name = common_name
- self.pb2_directory = remote_execution_pb2.Directory()
self.cas_cache = cas_cache
- if digest:
- with open(self.cas_cache.objpath(digest), 'rb') as f:
- self.pb2_directory.ParseFromString(f.read())
-
self.__digest = digest
self.index = {}
self.parent = parent
- self._directory_read = False
- self._populate_index()
-
- def _populate_index(self):
- if self._directory_read:
- return
- for entry in self.pb2_directory.directories:
- self.index[entry.name] = IndexEntry(entry, _FileType.DIRECTORY)
- for entry in self.pb2_directory.files:
- self.index[entry.name] = IndexEntry(entry, _FileType.REGULAR_FILE)
- for entry in self.pb2_directory.symlinks:
- self.index[entry.name] = IndexEntry(entry, _FileType.SYMLINK)
- self._directory_read = True
+ if digest:
+ self._populate_index(digest)
+
+ def _populate_index(self, digest):
+ pb2_directory = remote_execution_pb2.Directory()
+ with open(self.cas_cache.objpath(digest), 'rb') as f:
+ pb2_directory.ParseFromString(f.read())
+
+ for entry in pb2_directory.directories:
+ self.index[entry.name] = IndexEntry(entry.name, _FileType.DIRECTORY,
+ digest=entry.digest)
+ for entry in pb2_directory.files:
+ self.index[entry.name] = IndexEntry(entry.name, _FileType.REGULAR_FILE,
+ digest=entry.digest,
+ is_executable=entry.is_executable)
+ for entry in pb2_directory.symlinks:
+ self.index[entry.name] = IndexEntry(entry.name, _FileType.SYMLINK,
+ target=entry.target)
def _find_self_in_parent(self):
assert self.parent is not None
@@ -143,23 +145,19 @@ class CasBasedDirectory(Directory):
assert name not in self.index
newdir = CasBasedDirectory(self.cas_cache, parent=self, filename=name)
- dirnode = self.pb2_directory.directories.add()
- dirnode.name = name
- self.index[name] = IndexEntry(dirnode, _FileType.DIRECTORY, buildstream_object=newdir)
+ self.index[name] = IndexEntry(name, _FileType.DIRECTORY, buildstream_object=newdir)
self.__invalidate_digest()
return newdir
def _add_file(self, basename, filename, modified=False):
- filenode = self.pb2_directory.files.add()
- filenode.name = filename
- self.cas_cache.add_object(digest=filenode.digest, path=os.path.join(basename, filename))
- is_executable = os.access(os.path.join(basename, filename), os.X_OK)
- filenode.is_executable = is_executable
- self.index[filename] = IndexEntry(filenode, _FileType.REGULAR_FILE,
- modified=modified or filename in self.index)
+ entry = IndexEntry(filename, _FileType.REGULAR_FILE,
+ modified=modified or filename in self.index)
+ entry.digest = self.cas_cache.add_object(path=os.path.join(basename, filename))
+ entry.is_executable = os.access(os.path.join(basename, filename), os.X_OK)
+ self.index[filename] = entry
self.__invalidate_digest()
@@ -167,23 +165,11 @@ class CasBasedDirectory(Directory):
self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
def _add_new_link_direct(self, name, target):
- entry = self.index.get(name)
- if entry:
- symlinknode = entry.pb_object
- else:
- symlinknode = self.pb2_directory.symlinks.add()
- symlinknode.name = name
- # A symlink node has no digest.
- symlinknode.target = target
- self.index[name] = IndexEntry(symlinknode, _FileType.SYMLINK, modified=(entry is not None))
+ self.index[name] = IndexEntry(name, _FileType.SYMLINK, target=target, modified=name in self.index)
self.__invalidate_digest()
def delete_entry(self, name):
- for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
- for thing in collection:
- if thing.name == name:
- collection.remove(thing)
if name in self.index:
del self.index[name]
@@ -229,17 +215,15 @@ class CasBasedDirectory(Directory):
else:
error = "Cannot descend into {}, which is a '{}' in the directory {}"
raise VirtualDirectoryError(error.format(subdirectory_spec[0],
- type(self.index[subdirectory_spec[0]].pb_object).__name__,
+ self.index[subdirectory_spec[0]].type,
self))
else:
if create:
newdir = self._add_directory(subdirectory_spec[0])
return newdir.descend(subdirectory_spec[1:], create)
else:
- error = "No entry called '{}' found in {}. There are directories called {}."
- directory_list = ",".join([entry.name for entry in self.pb2_directory.directories])
- raise VirtualDirectoryError(error.format(subdirectory_spec[0], str(self),
- directory_list))
+ error = "'{}' not found in {}"
+ raise VirtualDirectoryError(error.format(subdirectory_spec[0], str(self)))
return None
def _check_replacement(self, name, path_prefix, fileListResult):
@@ -342,15 +326,15 @@ class CasBasedDirectory(Directory):
if not is_dir:
if self._check_replacement(name, path_prefix, result):
- item = entry.pb_object
if entry.type == _FileType.REGULAR_FILE:
- filenode = self.pb2_directory.files.add(digest=item.digest, name=name,
- is_executable=item.is_executable)
- self.index[name] = IndexEntry(filenode, _FileType.REGULAR_FILE, modified=True)
+ self.index[name] = IndexEntry(name, _FileType.REGULAR_FILE,
+ digest=entry.digest,
+ is_executable=entry.is_executable,
+ modified=True)
self.__invalidate_digest()
else:
assert entry.type == _FileType.SYMLINK
- self._add_new_link_direct(name=name, target=item.target)
+ self._add_new_link_direct(name=name, target=entry.target)
result.files_written.append(relative_pathname)
def import_files(self, external_pathspec, *,
@@ -500,13 +484,14 @@ class CasBasedDirectory(Directory):
yield from subdir.list_relative_paths(relpath=os.path.join(relpath, k))
def get_size(self):
- total = len(self.pb2_directory.SerializeToString())
+ digest = self._get_digest()
+ total = digest.size_bytes
for i in self.index.values():
if i.type == _FileType.DIRECTORY:
subdir = i.get_directory(self)
total += subdir.get_size()
elif i.type == _FileType.REGULAR_FILE:
- src_name = self.cas_cache.objpath(i.pb_object.digest)
+ src_name = self.cas_cache.objpath(i.digest)
filesize = os.stat(src_name).st_size
total += filesize
# Symlink nodes are encoded as part of the directory serialization.
@@ -540,23 +525,40 @@ class CasBasedDirectory(Directory):
#
def _get_digest(self):
if not self.__digest:
- # Update digests for subdirectories in DirectoryNodes
- for name, entry in self.index.items():
+ # Create updated Directory proto
+ pb2_directory = remote_execution_pb2.Directory()
+
+ for name, entry in sorted(self.index.items()):
if entry.type == _FileType.DIRECTORY:
+ dirnode = pb2_directory.directories.add()
+ dirnode.name = name
+
+ # Update digests for subdirectories in DirectoryNodes.
# No need to call entry.get_directory().
# If it hasn't been instantiated, digest must be up-to-date.
subdir = entry.buildstream_object
if subdir:
- entry.pb_object.digest.CopyFrom(subdir._get_digest())
-
- self.__digest = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
+ dirnode.digest.CopyFrom(subdir._get_digest())
+ else:
+ dirnode.digest.CopyFrom(entry.digest)
+ elif entry.type == _FileType.REGULAR_FILE:
+ filenode = pb2_directory.files.add()
+ filenode.name = name
+ filenode.digest.CopyFrom(entry.digest)
+ filenode.is_executable = entry.is_executable
+ elif entry.type == _FileType.SYMLINK:
+ symlinknode = pb2_directory.symlinks.add()
+ symlinknode.name = name
+ symlinknode.target = entry.target
+
+ self.__digest = self.cas_cache.add_object(buffer=pb2_directory.SerializeToString())
return self.__digest
def _objpath(self, path):
subdir = self.descend(path[:-1])
entry = subdir.index[path[-1]]
- return self.cas_cache.objpath(entry.pb_object.digest)
+ return self.cas_cache.objpath(entry.digest)
def _exists(self, path):
try:
diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py
index 61827f19c..4b0fd917b 100644
--- a/buildstream/storage/_filebaseddirectory.py
+++ b/buildstream/storage/_filebaseddirectory.py
@@ -264,14 +264,13 @@ class FileBasedDirectory(Directory):
result.ignored.append(relative_pathname)
continue
- item = entry.pb_object
if entry.type == _FileType.REGULAR_FILE:
- src_path = source_directory.cas_cache.objpath(item.digest)
+ src_path = source_directory.cas_cache.objpath(entry.digest)
actionfunc(src_path, dest_path, result=result)
- if item.is_executable:
+ if entry.is_executable:
os.chmod(dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
else:
assert entry.type == _FileType.SYMLINK
- os.symlink(item.target, dest_path)
+ os.symlink(entry.target, dest_path)
result.files_written.append(relative_pathname)