diff options
author | Tristan Van Berkom <tristan.van.berkom@gmail.com> | 2019-01-18 20:57:42 +0000 |
---|---|---|
committer | Tristan Van Berkom <tristan.van.berkom@gmail.com> | 2019-01-18 20:57:42 +0000 |
commit | 73c7252d339d8761d9f97fe5ac1355f6a2c1057f (patch) | |
tree | 6030968e7a99c52e7fa0e8ee9bb4e4e026d3404c | |
parent | 9911023f2e5432b5f7a390087879588d0c2e057c (diff) | |
parent | 8ce483d431f1be51e1c2051d15c2c47453bab0eb (diff) | |
download | buildstream-73c7252d339d8761d9f97fe5ac1355f6a2c1057f.tar.gz |
Merge branch 'tristan/cas-cleanup-improve' into 'master'
CASCache cleanup improvements
See merge request BuildStream/buildstream!1087
-rw-r--r-- | buildstream/_cas/cascache.py | 67 | ||||
-rw-r--r-- | buildstream/utils.py | 30 | ||||
-rw-r--r-- | tests/artifactcache/expiry.py | 14 |
3 files changed, 100 insertions, 11 deletions
diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py index adbd34c9e..5a6251815 100644 --- a/buildstream/_cas/cascache.py +++ b/buildstream/_cas/cascache.py @@ -21,7 +21,7 @@ import hashlib import itertools import os import stat -import tempfile +import errno import uuid import contextlib @@ -129,7 +129,7 @@ class CASCache(): else: return dest - with tempfile.TemporaryDirectory(prefix='tmp', dir=self.tmpdir) as tmpdir: + with utils._tempdir(prefix='tmp', dir=self.tmpdir) as tmpdir: checkoutdir = os.path.join(tmpdir, ref) self._checkout(checkoutdir, tree) @@ -374,7 +374,7 @@ class CASCache(): for chunk in iter(lambda: tmp.read(4096), b""): h.update(chunk) else: - tmp = stack.enter_context(tempfile.NamedTemporaryFile(dir=self.tmpdir)) + tmp = stack.enter_context(utils._tempnamedfile(dir=self.tmpdir)) # Set mode bits to 0644 os.chmod(tmp.name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) @@ -545,11 +545,7 @@ class CASCache(): def remove(self, ref, *, defer_prune=False): # Remove cache ref - refpath = self._refpath(ref) - if not os.path.exists(refpath): - raise CASCacheError("Could not find ref '{}'".format(ref)) - - os.unlink(refpath) + self._remove_ref(ref) if not defer_prune: pruned = self.prune() @@ -626,6 +622,55 @@ class CASCache(): def _refpath(self, ref): return os.path.join(self.casdir, 'refs', 'heads', ref) + # _remove_ref() + # + # Removes a ref. + # + # This also takes care of pruning away directories which can + # be removed after having removed the given ref. + # + # Args: + # ref (str): The ref to remove + # + # Raises: + # (CASCacheError): If the ref didnt exist, or a system error + # occurred while removing it + # + def _remove_ref(self, ref): + + # Remove the ref itself + refpath = self._refpath(ref) + try: + os.unlink(refpath) + except FileNotFoundError as e: + raise CASCacheError("Could not find ref '{}'".format(ref)) from e + + # Now remove any leading directories + basedir = os.path.join(self.casdir, 'refs', 'heads') + components = list(os.path.split(ref)) + while components: + components.pop() + refdir = os.path.join(basedir, *components) + + # Break out once we reach the base + if refdir == basedir: + break + + try: + os.rmdir(refdir) + except FileNotFoundError: + # The parent directory did not exist, but it's + # parent directory might still be ready to prune + pass + except OSError as e: + if e.errno == errno.ENOTEMPTY: + # The parent directory was not empty, so we + # cannot prune directories beyond this point + break + + # Something went wrong here + raise CASCacheError("System error while removing ref '{}': {}".format(ref, e)) from e + # _commit_directory(): # # Adds local directory to content addressable store. @@ -797,7 +842,7 @@ class CASCache(): # already in local repository return objpath - with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f: + with utils._tempnamedfile(dir=self.tmpdir) as f: remote._fetch_blob(digest, f) added_digest = self.add_object(path=f.name, link_directly=True) @@ -807,7 +852,7 @@ class CASCache(): def _batch_download_complete(self, batch): for digest, data in batch.send(): - with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f: + with utils._tempnamedfile(dir=self.tmpdir) as f: f.write(data) f.flush() @@ -904,7 +949,7 @@ class CASCache(): def _fetch_tree(self, remote, digest): # download but do not store the Tree object - with tempfile.NamedTemporaryFile(dir=self.tmpdir) as out: + with utils._tempnamedfile(dir=self.tmpdir) as out: remote._fetch_blob(digest, out) tree = remote_execution_pb2.Tree() diff --git a/buildstream/utils.py b/buildstream/utils.py index 45b3c375f..e3ff88034 100644 --- a/buildstream/utils.py +++ b/buildstream/utils.py @@ -1032,6 +1032,36 @@ def _tempdir(suffix="", prefix="tmp", dir=None): # pylint: disable=redefined-bu cleanup_tempdir() +# _tempnamedfile() +# +# A context manager for doing work on an open temporary file +# which is guaranteed to be named and have an entry in the filesystem. +# +# Args: +# dir (str): A path to a parent directory for the temporary file +# suffix (str): A suffix for the temproary file name +# prefix (str): A prefix for the temporary file name +# +# Yields: +# (str): The temporary file handle +# +# Do not use tempfile.NamedTemporaryFile() directly, as this will +# leak files on the filesystem when BuildStream exits a process +# on SIGTERM. +# +@contextmanager +def _tempnamedfile(suffix="", prefix="tmp", dir=None): # pylint: disable=redefined-builtin + temp = None + + def close_tempfile(): + if temp is not None: + temp.close() + + with _signals.terminator(close_tempfile), \ + tempfile.NamedTemporaryFile(suffix=suffix, prefix=prefix, dir=dir) as temp: + yield temp + + # _kill_process_tree() # # Brutally murder a process and all of its children diff --git a/tests/artifactcache/expiry.py b/tests/artifactcache/expiry.py index e73928363..d40f432c9 100644 --- a/tests/artifactcache/expiry.py +++ b/tests/artifactcache/expiry.py @@ -382,6 +382,7 @@ def test_extract_expiry(cli, datafiles, tmpdir): res = cli.run(project=project, args=['checkout', 'target.bst', os.path.join(str(tmpdir), 'checkout')]) res.assert_success() + # Get a snapshot of the extracts in advance extractdir = os.path.join(project, 'cache', 'artifacts', 'extract', 'test', 'target') extracts = os.listdir(extractdir) assert(len(extracts) == 1) @@ -395,3 +396,16 @@ def test_extract_expiry(cli, datafiles, tmpdir): # Now the extract should be removed. assert not os.path.exists(extract) + + # As an added bonus, let's ensure that no directories have been left behind + # + # Now we should have a directory for the cached target2.bst, which + # replaced target.bst in the cache, we should not have a directory + # for the target.bst + refsdir = os.path.join(project, 'cache', 'artifacts', 'cas', 'refs', 'heads') + refsdirtest = os.path.join(refsdir, 'test') + refsdirtarget = os.path.join(refsdirtest, 'target') + refsdirtarget2 = os.path.join(refsdirtest, 'target2') + + assert os.path.isdir(refsdirtarget2) + assert not os.path.exists(refsdirtarget) |