summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJürg Billeter <j@bitron.ch>2020-10-19 18:21:22 +0200
committerJürg Billeter <j@bitron.ch>2020-10-19 18:21:22 +0200
commit05a5d63357837076198200190e6498af3b987bf2 (patch)
treeaac36643703a5b4ce2321655956f3c4dab363454
parent1be07a2a862dea10e26507497533cf3a8bb7028c (diff)
downloadbuildstream-05a5d63357837076198200190e6498af3b987bf2.tar.gz
wip: cascache.py: add notes
-rw-r--r--src/buildstream/_cas/cascache.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 27d68905b..ff982e4b8 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -152,6 +152,9 @@ class CASCache:
def contains_files(self, digests):
cas = self.get_cas()
+ # TODO Limit size of FindMissingBlobs request?
+ # possibly share code with remote_missing_blobs()
+
request = remote_execution_pb2.FindMissingBlobsRequest()
request.blob_digests.extend(digests)
@@ -172,6 +175,15 @@ class CASCache:
def contains_directory(self, digest, *, with_files):
local_cas = self.get_local_cas()
+ # TODO if remote cache is enabled
+ # use FetchTree always without file blobs,
+ # determine required blobs in BuildStream code
+ # and then issue a FindMissingBlobs request.
+ # that request must include directory blobs (as FetchTree doesn't guarantee their existence in the remote CAS)
+ # also include file blobs in that list if with_files is True, of course.
+ # if no remote cache is configured, maybe keep using FetchTree for better performance.
+ # longer term possibly extend localcas protocol to have something like a FindTree method?
+
request = local_cas_pb2.FetchTreeRequest()
request.root_digest.CopyFrom(digest)
request.fetch_file_blobs = with_files
@@ -198,6 +210,9 @@ class CASCache:
def checkout(self, dest, tree, *, can_link=False):
os.makedirs(dest, exist_ok=True)
+ # with remote caching we can't assume blobs are in the local cache,
+ # so issue FetchTree request in that case
+
directory = remote_execution_pb2.Directory()
with open(self.objpath(tree), "rb") as f:
@@ -432,6 +447,8 @@ class CASCache:
# Returns: Missing Digest objects
#
def local_missing_blobs(self, digests):
+ # TODO replace with FindMissingBlobs call,
+ # possibly share code with contains_files
missing_blobs = []
for digest in digests:
objpath = self.objpath(digest)
@@ -454,6 +471,10 @@ class CASCache:
directory = remote_execution_pb2.Directory()
+ # TODO may need to fetch directory digest
+ # should probably use FetchTree as optimization
+ # (but need to avoid unnecessary calls due to recursion as FetchTree
+ # is already recursive on its own)
with open(self.objpath(directory_digest), "rb") as f:
directory.ParseFromString(f.read())
@@ -469,6 +490,8 @@ class CASCache:
################################################
def _reachable_refs_dir(self, reachable, tree, update_mtime=False, check_exists=False):
+ # this method is only used by a test,
+ # maybe move it to that test
if tree.hash in reachable:
return
try:
@@ -529,6 +552,10 @@ class CASCache:
# already in local repository
return objpath
+ # TODO also check remote cache if configured
+ # (before checking `remote`)
+ # or where/when do we even use this method? investigate
+
batch = _CASBatchRead(remote)
batch.add(digest)
batch.send()
@@ -580,6 +607,10 @@ class CASCache:
def _fetch_directory(self, remote, dir_digest):
# TODO Use GetTree() if the server supports it
+ # TODO investigate for remote caching
+ # what's the difference to FetchTree?
+ # with remote cache, should this implicitly upload the fetched blobs?
+
fetch_queue = [dir_digest]
fetch_next_queue = []
batch = _CASBatchRead(remote)
@@ -605,6 +636,8 @@ class CASCache:
self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
def _fetch_tree(self, remote, digest):
+ # TODO investigate for remote caching
+
objpath = self._ensure_blob(remote, digest)
tree = remote_execution_pb2.Tree()
@@ -634,6 +667,8 @@ class CASCache:
# Returns: The Digests of the blobs that were not available on the remote CAS
#
def fetch_blobs(self, remote, digests, *, allow_partial=False):
+ # TODO investigate for remote caching
+
missing_blobs = [] if allow_partial else None
remote.init()
@@ -657,6 +692,9 @@ class CASCache:
# digests (list): The Digests of Blobs to upload
#
def send_blobs(self, remote, digests):
+ # TODO investigate for remote caching
+ # do we first need to download blobs from remote cache?
+
batch = _CASBatchUpdate(remote)
for digest in digests:
@@ -665,6 +703,8 @@ class CASCache:
batch.send()
def _send_directory(self, remote, digest):
+ # TODO investigate for remote caching
+
required_blobs = self.required_blobs_for_directory(digest)
# Upload any blobs missing on the server.