summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTristan Van Berkom <tristan.van.berkom@gmail.com>2019-01-18 20:57:42 +0000
committerTristan Van Berkom <tristan.van.berkom@gmail.com>2019-01-18 20:57:42 +0000
commit73c7252d339d8761d9f97fe5ac1355f6a2c1057f (patch)
tree6030968e7a99c52e7fa0e8ee9bb4e4e026d3404c
parent9911023f2e5432b5f7a390087879588d0c2e057c (diff)
parent8ce483d431f1be51e1c2051d15c2c47453bab0eb (diff)
downloadbuildstream-73c7252d339d8761d9f97fe5ac1355f6a2c1057f.tar.gz
Merge branch 'tristan/cas-cleanup-improve' into 'master'
CASCache cleanup improvements See merge request BuildStream/buildstream!1087
-rw-r--r--buildstream/_cas/cascache.py67
-rw-r--r--buildstream/utils.py30
-rw-r--r--tests/artifactcache/expiry.py14
3 files changed, 100 insertions, 11 deletions
diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py
index adbd34c9e..5a6251815 100644
--- a/buildstream/_cas/cascache.py
+++ b/buildstream/_cas/cascache.py
@@ -21,7 +21,7 @@ import hashlib
import itertools
import os
import stat
-import tempfile
+import errno
import uuid
import contextlib
@@ -129,7 +129,7 @@ class CASCache():
else:
return dest
- with tempfile.TemporaryDirectory(prefix='tmp', dir=self.tmpdir) as tmpdir:
+ with utils._tempdir(prefix='tmp', dir=self.tmpdir) as tmpdir:
checkoutdir = os.path.join(tmpdir, ref)
self._checkout(checkoutdir, tree)
@@ -374,7 +374,7 @@ class CASCache():
for chunk in iter(lambda: tmp.read(4096), b""):
h.update(chunk)
else:
- tmp = stack.enter_context(tempfile.NamedTemporaryFile(dir=self.tmpdir))
+ tmp = stack.enter_context(utils._tempnamedfile(dir=self.tmpdir))
# Set mode bits to 0644
os.chmod(tmp.name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
@@ -545,11 +545,7 @@ class CASCache():
def remove(self, ref, *, defer_prune=False):
# Remove cache ref
- refpath = self._refpath(ref)
- if not os.path.exists(refpath):
- raise CASCacheError("Could not find ref '{}'".format(ref))
-
- os.unlink(refpath)
+ self._remove_ref(ref)
if not defer_prune:
pruned = self.prune()
@@ -626,6 +622,55 @@ class CASCache():
def _refpath(self, ref):
return os.path.join(self.casdir, 'refs', 'heads', ref)
+ # _remove_ref()
+ #
+ # Removes a ref.
+ #
+ # This also takes care of pruning away directories which can
+ # be removed after having removed the given ref.
+ #
+ # Args:
+ # ref (str): The ref to remove
+ #
+ # Raises:
+ # (CASCacheError): If the ref didnt exist, or a system error
+ # occurred while removing it
+ #
+ def _remove_ref(self, ref):
+
+ # Remove the ref itself
+ refpath = self._refpath(ref)
+ try:
+ os.unlink(refpath)
+ except FileNotFoundError as e:
+ raise CASCacheError("Could not find ref '{}'".format(ref)) from e
+
+ # Now remove any leading directories
+ basedir = os.path.join(self.casdir, 'refs', 'heads')
+ components = list(os.path.split(ref))
+ while components:
+ components.pop()
+ refdir = os.path.join(basedir, *components)
+
+ # Break out once we reach the base
+ if refdir == basedir:
+ break
+
+ try:
+ os.rmdir(refdir)
+ except FileNotFoundError:
+ # The parent directory did not exist, but it's
+ # parent directory might still be ready to prune
+ pass
+ except OSError as e:
+ if e.errno == errno.ENOTEMPTY:
+ # The parent directory was not empty, so we
+ # cannot prune directories beyond this point
+ break
+
+ # Something went wrong here
+ raise CASCacheError("System error while removing ref '{}': {}".format(ref, e)) from e
+
# _commit_directory():
#
# Adds local directory to content addressable store.
@@ -797,7 +842,7 @@ class CASCache():
# already in local repository
return objpath
- with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
+ with utils._tempnamedfile(dir=self.tmpdir) as f:
remote._fetch_blob(digest, f)
added_digest = self.add_object(path=f.name, link_directly=True)
@@ -807,7 +852,7 @@ class CASCache():
def _batch_download_complete(self, batch):
for digest, data in batch.send():
- with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
+ with utils._tempnamedfile(dir=self.tmpdir) as f:
f.write(data)
f.flush()
@@ -904,7 +949,7 @@ class CASCache():
def _fetch_tree(self, remote, digest):
# download but do not store the Tree object
- with tempfile.NamedTemporaryFile(dir=self.tmpdir) as out:
+ with utils._tempnamedfile(dir=self.tmpdir) as out:
remote._fetch_blob(digest, out)
tree = remote_execution_pb2.Tree()
diff --git a/buildstream/utils.py b/buildstream/utils.py
index 45b3c375f..e3ff88034 100644
--- a/buildstream/utils.py
+++ b/buildstream/utils.py
@@ -1032,6 +1032,36 @@ def _tempdir(suffix="", prefix="tmp", dir=None): # pylint: disable=redefined-bu
cleanup_tempdir()
+# _tempnamedfile()
+#
+# A context manager for doing work on an open temporary file
+# which is guaranteed to be named and have an entry in the filesystem.
+#
+# Args:
+# dir (str): A path to a parent directory for the temporary file
+# suffix (str): A suffix for the temproary file name
+# prefix (str): A prefix for the temporary file name
+#
+# Yields:
+# (str): The temporary file handle
+#
+# Do not use tempfile.NamedTemporaryFile() directly, as this will
+# leak files on the filesystem when BuildStream exits a process
+# on SIGTERM.
+#
+@contextmanager
+def _tempnamedfile(suffix="", prefix="tmp", dir=None): # pylint: disable=redefined-builtin
+ temp = None
+
+ def close_tempfile():
+ if temp is not None:
+ temp.close()
+
+ with _signals.terminator(close_tempfile), \
+ tempfile.NamedTemporaryFile(suffix=suffix, prefix=prefix, dir=dir) as temp:
+ yield temp
+
+
# _kill_process_tree()
#
# Brutally murder a process and all of its children
diff --git a/tests/artifactcache/expiry.py b/tests/artifactcache/expiry.py
index e73928363..d40f432c9 100644
--- a/tests/artifactcache/expiry.py
+++ b/tests/artifactcache/expiry.py
@@ -382,6 +382,7 @@ def test_extract_expiry(cli, datafiles, tmpdir):
res = cli.run(project=project, args=['checkout', 'target.bst', os.path.join(str(tmpdir), 'checkout')])
res.assert_success()
+ # Get a snapshot of the extracts in advance
extractdir = os.path.join(project, 'cache', 'artifacts', 'extract', 'test', 'target')
extracts = os.listdir(extractdir)
assert(len(extracts) == 1)
@@ -395,3 +396,16 @@ def test_extract_expiry(cli, datafiles, tmpdir):
# Now the extract should be removed.
assert not os.path.exists(extract)
+
+ # As an added bonus, let's ensure that no directories have been left behind
+ #
+ # Now we should have a directory for the cached target2.bst, which
+ # replaced target.bst in the cache, we should not have a directory
+ # for the target.bst
+ refsdir = os.path.join(project, 'cache', 'artifacts', 'cas', 'refs', 'heads')
+ refsdirtest = os.path.join(refsdir, 'test')
+ refsdirtarget = os.path.join(refsdirtest, 'target')
+ refsdirtarget2 = os.path.join(refsdirtest, 'target2')
+
+ assert os.path.isdir(refsdirtarget2)
+ assert not os.path.exists(refsdirtarget)