diff options
author | Jürg Billeter <j@bitron.ch> | 2020-10-19 18:21:22 +0200 |
---|---|---|
committer | Jürg Billeter <j@bitron.ch> | 2020-10-19 18:21:22 +0200 |
commit | 05a5d63357837076198200190e6498af3b987bf2 (patch) | |
tree | aac36643703a5b4ce2321655956f3c4dab363454 | |
parent | 1be07a2a862dea10e26507497533cf3a8bb7028c (diff) | |
download | buildstream-05a5d63357837076198200190e6498af3b987bf2.tar.gz |
wip: cascache.py: add notes
-rw-r--r-- | src/buildstream/_cas/cascache.py | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py index 27d68905b..ff982e4b8 100644 --- a/src/buildstream/_cas/cascache.py +++ b/src/buildstream/_cas/cascache.py @@ -152,6 +152,9 @@ class CASCache: def contains_files(self, digests): cas = self.get_cas() + # TODO Limit size of FindMissingBlobs request? + # possibly share code with remote_missing_blobs() + request = remote_execution_pb2.FindMissingBlobsRequest() request.blob_digests.extend(digests) @@ -172,6 +175,15 @@ class CASCache: def contains_directory(self, digest, *, with_files): local_cas = self.get_local_cas() + # TODO if remote cache is enabled + # use FetchTree always without file blobs, + # determine required blobs in BuildStream code + # and then issue a FindMissingBlobs request. + # that request must include directory blobs (as FetchTree doesn't guarantee their existence in the remote CAS) + # also include file blobs in that list if with_files is True, of course. + # if no remote cache is configured, maybe keep using FetchTree for better performance. + # longer term possibly extend localcas protocol to have something like a FindTree method? + request = local_cas_pb2.FetchTreeRequest() request.root_digest.CopyFrom(digest) request.fetch_file_blobs = with_files @@ -198,6 +210,9 @@ class CASCache: def checkout(self, dest, tree, *, can_link=False): os.makedirs(dest, exist_ok=True) + # with remote caching we can't assume blobs are in the local cache, + # so issue FetchTree request in that case + directory = remote_execution_pb2.Directory() with open(self.objpath(tree), "rb") as f: @@ -432,6 +447,8 @@ class CASCache: # Returns: Missing Digest objects # def local_missing_blobs(self, digests): + # TODO replace with FindMissingBlobs call, + # possibly share code with contains_files missing_blobs = [] for digest in digests: objpath = self.objpath(digest) @@ -454,6 +471,10 @@ class CASCache: directory = remote_execution_pb2.Directory() + # TODO may need to fetch directory digest + # should probably use FetchTree as optimization + # (but need to avoid unnecessary calls due to recursion as FetchTree + # is already recursive on its own) with open(self.objpath(directory_digest), "rb") as f: directory.ParseFromString(f.read()) @@ -469,6 +490,8 @@ class CASCache: ################################################ def _reachable_refs_dir(self, reachable, tree, update_mtime=False, check_exists=False): + # this method is only used by a test, + # maybe move it to that test if tree.hash in reachable: return try: @@ -529,6 +552,10 @@ class CASCache: # already in local repository return objpath + # TODO also check remote cache if configured + # (before checking `remote`) + # or where/when do we even use this method? investigate + batch = _CASBatchRead(remote) batch.add(digest) batch.send() @@ -580,6 +607,10 @@ class CASCache: def _fetch_directory(self, remote, dir_digest): # TODO Use GetTree() if the server supports it + # TODO investigate for remote caching + # what's the difference to FetchTree? + # with remote cache, should this implicitly upload the fetched blobs? + fetch_queue = [dir_digest] fetch_next_queue = [] batch = _CASBatchRead(remote) @@ -605,6 +636,8 @@ class CASCache: self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue) def _fetch_tree(self, remote, digest): + # TODO investigate for remote caching + objpath = self._ensure_blob(remote, digest) tree = remote_execution_pb2.Tree() @@ -634,6 +667,8 @@ class CASCache: # Returns: The Digests of the blobs that were not available on the remote CAS # def fetch_blobs(self, remote, digests, *, allow_partial=False): + # TODO investigate for remote caching + missing_blobs = [] if allow_partial else None remote.init() @@ -657,6 +692,9 @@ class CASCache: # digests (list): The Digests of Blobs to upload # def send_blobs(self, remote, digests): + # TODO investigate for remote caching + # do we first need to download blobs from remote cache? + batch = _CASBatchUpdate(remote) for digest in digests: @@ -665,6 +703,8 @@ class CASCache: batch.send() def _send_directory(self, remote, digest): + # TODO investigate for remote caching + required_blobs = self.required_blobs_for_directory(digest) # Upload any blobs missing on the server. |