summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Maw <jonathan.maw@codethink.co.uk>2018-08-20 16:27:18 +0000
committerJonathan Maw <jonathan.maw@codethink.co.uk>2018-08-20 16:27:18 +0000
commit92e34ccd2bfdb09f3c6d16e974d1c5fb78ac0516 (patch)
treee093c1ea9fa30eca781a14fa20348ac38e2bacb6
parent372abed5f1b2202435d553b28fa807a913f8b5b9 (diff)
parenta67906e695c027bdfa60a77a243bf3d6a664eec1 (diff)
downloadbuildstream-92e34ccd2bfdb09f3c6d16e974d1c5fb78ac0516.tar.gz
Merge branch 'jonathan/cache-cache-size' into 'master'
Jonathan/cache cache size See merge request BuildStream/buildstream!679
-rw-r--r--buildstream/_artifactcache/__init__.py2
-rw-r--r--buildstream/_artifactcache/artifactcache.py120
-rw-r--r--buildstream/_artifactcache/cascache.py4
-rw-r--r--buildstream/_context.py74
-rw-r--r--buildstream/_frontend/app.py6
-rw-r--r--buildstream/_scheduler/queues/buildqueue.py2
-rw-r--r--buildstream/_scheduler/scheduler.py4
-rw-r--r--tests/artifactcache/cache_size.py62
-rw-r--r--tests/testutils/artifactshare.py3
9 files changed, 195 insertions, 82 deletions
diff --git a/buildstream/_artifactcache/__init__.py b/buildstream/_artifactcache/__init__.py
index 07ed52b4b..fad483a57 100644
--- a/buildstream/_artifactcache/__init__.py
+++ b/buildstream/_artifactcache/__init__.py
@@ -17,4 +17,4 @@
# Authors:
# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
-from .artifactcache import ArtifactCache, ArtifactCacheSpec
+from .artifactcache import ArtifactCache, ArtifactCacheSpec, CACHE_SIZE_FILE
diff --git a/buildstream/_artifactcache/artifactcache.py b/buildstream/_artifactcache/artifactcache.py
index d98c291f9..f28fe394f 100644
--- a/buildstream/_artifactcache/artifactcache.py
+++ b/buildstream/_artifactcache/artifactcache.py
@@ -28,6 +28,9 @@ from .. import utils
from .. import _yaml
+CACHE_SIZE_FILE = "cache_size"
+
+
# An ArtifactCacheSpec holds the user configuration for a single remote
# artifact cache.
#
@@ -82,7 +85,6 @@ class ArtifactCache():
self.extractdir = os.path.join(context.artifactdir, 'extract')
self.tmpdir = os.path.join(context.artifactdir, 'tmp')
- self.max_size = context.cache_quota
self.estimated_size = None
self.global_remote_specs = []
@@ -90,6 +92,8 @@ class ArtifactCache():
self._local = False
self.cache_size = None
+ self.cache_quota = None
+ self.cache_lower_threshold = None
os.makedirs(self.extractdir, exist_ok=True)
os.makedirs(self.tmpdir, exist_ok=True)
@@ -227,7 +231,7 @@ class ArtifactCache():
def clean(self):
artifacts = self.list_artifacts()
- while self.calculate_cache_size() >= self.context.cache_quota - self.context.cache_lower_threshold:
+ while self.calculate_cache_size() >= self.cache_quota - self.cache_lower_threshold:
try:
to_remove = artifacts.pop(0)
except IndexError:
@@ -241,7 +245,7 @@ class ArtifactCache():
"Please increase the cache-quota in {}."
.format(self.context.config_origin or default_conf))
- if self.calculate_cache_size() > self.context.cache_quota:
+ if self.calculate_cache_size() > self.cache_quota:
raise ArtifactError("Cache too full. Aborting.",
detail=detail,
reason="cache-too-full")
@@ -282,7 +286,11 @@ class ArtifactCache():
# If we don't currently have an estimate, figure out the real
# cache size.
if self.estimated_size is None:
- self.estimated_size = self.calculate_cache_size()
+ stored_size = self._read_cache_size()
+ if stored_size is not None:
+ self.estimated_size = stored_size
+ else:
+ self.estimated_size = self.calculate_cache_size()
return self.estimated_size
@@ -541,6 +549,7 @@ class ArtifactCache():
self.estimated_size = self.calculate_cache_size()
self.estimated_size += artifact_size
+ self._write_cache_size(self.estimated_size)
# _set_cache_size()
#
@@ -551,6 +560,109 @@ class ArtifactCache():
def _set_cache_size(self, cache_size):
self.estimated_size = cache_size
+ # set_cache_size is called in cleanup, where it may set the cache to None
+ if self.estimated_size is not None:
+ self._write_cache_size(self.estimated_size)
+
+ # _write_cache_size()
+ #
+ # Writes the given size of the artifact to the cache's size file
+ #
+ def _write_cache_size(self, size):
+ assert isinstance(size, int)
+ size_file_path = os.path.join(self.context.artifactdir, CACHE_SIZE_FILE)
+ with open(size_file_path, "w") as f:
+ f.write(str(size))
+
+ # _read_cache_size()
+ #
+ # Reads and returns the size of the artifact cache that's stored in the
+ # cache's size file
+ #
+ def _read_cache_size(self):
+ size_file_path = os.path.join(self.context.artifactdir, CACHE_SIZE_FILE)
+
+ if not os.path.exists(size_file_path):
+ return None
+
+ with open(size_file_path, "r") as f:
+ size = f.read()
+
+ try:
+ num_size = int(size)
+ except ValueError as e:
+ raise ArtifactError("Size '{}' parsed from '{}' was not an integer".format(
+ size, size_file_path)) from e
+
+ return num_size
+
+ # _calculate_cache_quota()
+ #
+ # Calculates and sets the cache quota and lower threshold based on the
+ # quota set in Context.
+ # It checks that the quota is both a valid expression, and that there is
+ # enough disk space to satisfy that quota
+ #
+ def _calculate_cache_quota(self):
+ # Headroom intended to give BuildStream a bit of leeway.
+ # This acts as the minimum size of cache_quota and also
+ # is taken from the user requested cache_quota.
+ #
+ if 'BST_TEST_SUITE' in os.environ:
+ headroom = 0
+ else:
+ headroom = 2e9
+
+ artifactdir_volume = self.context.artifactdir
+ while not os.path.exists(artifactdir_volume):
+ artifactdir_volume = os.path.dirname(artifactdir_volume)
+
+ try:
+ cache_quota = utils._parse_size(self.context.config_cache_quota, artifactdir_volume)
+ except utils.UtilError as e:
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ "{}\nPlease specify the value in bytes or as a % of full disk space.\n"
+ "\nValid values are, for example: 800M 10G 1T 50%\n"
+ .format(str(e))) from e
+
+ stat = os.statvfs(artifactdir_volume)
+ available_space = (stat.f_bsize * stat.f_bavail)
+
+ cache_size = self.get_approximate_cache_size()
+
+ # Ensure system has enough storage for the cache_quota
+ #
+ # If cache_quota is none, set it to the maximum it could possibly be.
+ #
+ # Also check that cache_quota is atleast as large as our headroom.
+ #
+ if cache_quota is None: # Infinity, set to max system storage
+ cache_quota = cache_size + available_space
+ if cache_quota < headroom: # Check minimum
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ "Invalid cache quota ({}): ".format(utils._pretty_size(cache_quota)) +
+ "BuildStream requires a minimum cache quota of 2G.")
+ elif cache_quota > cache_size + available_space: # Check maximum
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ ("Your system does not have enough available " +
+ "space to support the cache quota specified.\n" +
+ "You currently have:\n" +
+ "- {used} of cache in use at {local_cache_path}\n" +
+ "- {available} of available system storage").format(
+ used=utils._pretty_size(cache_size),
+ local_cache_path=self.context.artifactdir,
+ available=utils._pretty_size(available_space)))
+
+ # Place a slight headroom (2e9 (2GB) on the cache_quota) into
+ # cache_quota to try and avoid exceptions.
+ #
+ # Of course, we might still end up running out during a build
+ # if we end up writing more than 2G, but hey, this stuff is
+ # already really fuzzy.
+ #
+ self.cache_quota = cache_quota - headroom
+ self.cache_lower_threshold = self.cache_quota / 2
+
# _configured_remote_artifact_cache_specs():
#
diff --git a/buildstream/_artifactcache/cascache.py b/buildstream/_artifactcache/cascache.py
index 074899d81..d616e293a 100644
--- a/buildstream/_artifactcache/cascache.py
+++ b/buildstream/_artifactcache/cascache.py
@@ -61,6 +61,8 @@ class CASCache(ArtifactCache):
os.makedirs(os.path.join(self.casdir, 'refs', 'heads'), exist_ok=True)
os.makedirs(os.path.join(self.casdir, 'objects'), exist_ok=True)
+ self._calculate_cache_quota()
+
self._enable_push = enable_push
# Per-project list of _CASRemote instances.
@@ -330,7 +332,7 @@ class CASCache(ArtifactCache):
request.write_offset = offset
# max. 64 kB chunks
request.data = f.read(chunk_size)
- request.resource_name = resource_name
+ request.resource_name = resource_name # pylint: disable=cell-var-from-loop
request.finish_write = remaining <= 0
yield request
offset += chunk_size
diff --git a/buildstream/_context.py b/buildstream/_context.py
index 8ebb61d23..a94d374cf 100644
--- a/buildstream/_context.py
+++ b/buildstream/_context.py
@@ -64,12 +64,6 @@ class Context():
# The locations from which to push and pull prebuilt artifacts
self.artifact_cache_specs = []
- # The artifact cache quota
- self.cache_quota = None
-
- # The lower threshold to which we aim to reduce the cache size
- self.cache_lower_threshold = None
-
# The directory to store build logs
self.logdir = None
@@ -124,6 +118,8 @@ class Context():
self._workspaces = None
self._log_handle = None
self._log_filename = None
+ self.config_cache_quota = 'infinity'
+ self.artifactdir_volume = None
# load()
#
@@ -183,71 +179,7 @@ class Context():
cache = _yaml.node_get(defaults, Mapping, 'cache')
_yaml.node_validate(cache, ['quota'])
- artifactdir_volume = self.artifactdir
- while not os.path.exists(artifactdir_volume):
- artifactdir_volume = os.path.dirname(artifactdir_volume)
-
- # We read and parse the cache quota as specified by the user
- cache_quota = _yaml.node_get(cache, str, 'quota', default_value='infinity')
- try:
- cache_quota = utils._parse_size(cache_quota, artifactdir_volume)
- except utils.UtilError as e:
- raise LoadError(LoadErrorReason.INVALID_DATA,
- "{}\nPlease specify the value in bytes or as a % of full disk space.\n"
- "\nValid values are, for example: 800M 10G 1T 50%\n"
- .format(str(e))) from e
-
- # Headroom intended to give BuildStream a bit of leeway.
- # This acts as the minimum size of cache_quota and also
- # is taken from the user requested cache_quota.
- #
- if 'BST_TEST_SUITE' in os.environ:
- headroom = 0
- else:
- headroom = 2e9
-
- stat = os.statvfs(artifactdir_volume)
- available_space = (stat.f_bsize * stat.f_bavail)
-
- # Again, the artifact directory may not yet have been created yet
- #
- if not os.path.exists(self.artifactdir):
- cache_size = 0
- else:
- cache_size = utils._get_dir_size(self.artifactdir)
-
- # Ensure system has enough storage for the cache_quota
- #
- # If cache_quota is none, set it to the maximum it could possibly be.
- #
- # Also check that cache_quota is atleast as large as our headroom.
- #
- if cache_quota is None: # Infinity, set to max system storage
- cache_quota = cache_size + available_space
- if cache_quota < headroom: # Check minimum
- raise LoadError(LoadErrorReason.INVALID_DATA,
- "Invalid cache quota ({}): ".format(utils._pretty_size(cache_quota)) +
- "BuildStream requires a minimum cache quota of 2G.")
- elif cache_quota > cache_size + available_space: # Check maximum
- raise LoadError(LoadErrorReason.INVALID_DATA,
- ("Your system does not have enough available " +
- "space to support the cache quota specified.\n" +
- "You currently have:\n" +
- "- {used} of cache in use at {local_cache_path}\n" +
- "- {available} of available system storage").format(
- used=utils._pretty_size(cache_size),
- local_cache_path=self.artifactdir,
- available=utils._pretty_size(available_space)))
-
- # Place a slight headroom (2e9 (2GB) on the cache_quota) into
- # cache_quota to try and avoid exceptions.
- #
- # Of course, we might still end up running out during a build
- # if we end up writing more than 2G, but hey, this stuff is
- # already really fuzzy.
- #
- self.cache_quota = cache_quota - headroom
- self.cache_lower_threshold = self.cache_quota / 2
+ self.config_cache_quota = _yaml.node_get(cache, str, 'quota', default_value='infinity')
# Load artifact share configuration
self.artifact_cache_specs = ArtifactCache.specs_from_config_node(defaults)
diff --git a/buildstream/_frontend/app.py b/buildstream/_frontend/app.py
index 1550fbcb3..1e357f123 100644
--- a/buildstream/_frontend/app.py
+++ b/buildstream/_frontend/app.py
@@ -198,8 +198,10 @@ class App():
option_value = self._main_options.get(cli_option)
if option_value is not None:
setattr(self.context, context_attr, option_value)
-
- Platform.create_instance(self.context)
+ try:
+ Platform.create_instance(self.context)
+ except BstError as e:
+ self._error_exit(e, "Error instantiating platform")
# Create the logger right before setting the message handler
self.logger = LogLine(self.context,
diff --git a/buildstream/_scheduler/queues/buildqueue.py b/buildstream/_scheduler/queues/buildqueue.py
index 5967fbf76..2009fce97 100644
--- a/buildstream/_scheduler/queues/buildqueue.py
+++ b/buildstream/_scheduler/queues/buildqueue.py
@@ -97,7 +97,7 @@ class BuildQueue(Queue):
cache = element._get_artifact_cache()
cache._add_artifact_size(artifact_size)
- if cache.get_approximate_cache_size() > self._scheduler.context.cache_quota:
+ if cache.get_approximate_cache_size() > cache.cache_quota:
self._scheduler._check_cache_size_real()
def done(self, job, element, result, success):
diff --git a/buildstream/_scheduler/scheduler.py b/buildstream/_scheduler/scheduler.py
index 3d1d79b61..38d38be48 100644
--- a/buildstream/_scheduler/scheduler.py
+++ b/buildstream/_scheduler/scheduler.py
@@ -29,6 +29,7 @@ from contextlib import contextmanager
# Local imports
from .resources import Resources, ResourceType
from .jobs import CacheSizeJob, CleanupJob
+from .._platform import Platform
# A decent return code for Scheduler.run()
@@ -316,7 +317,8 @@ class Scheduler():
self._sched()
def _run_cleanup(self, cache_size):
- if cache_size and cache_size < self.context.cache_quota:
+ platform = Platform.get_platform()
+ if cache_size and cache_size < platform.artifactcache.cache_quota:
return
job = CleanupJob(self, 'cleanup', 'cleanup',
diff --git a/tests/artifactcache/cache_size.py b/tests/artifactcache/cache_size.py
new file mode 100644
index 000000000..0d12cda8c
--- /dev/null
+++ b/tests/artifactcache/cache_size.py
@@ -0,0 +1,62 @@
+import os
+import pytest
+
+from buildstream import _yaml
+from buildstream._artifactcache import CACHE_SIZE_FILE
+
+from tests.testutils import cli, create_element_size
+
+# XXX: Currently lacking:
+# * A way to check whether it's faster to read cache size on
+# successive invocations.
+# * A way to check whether the cache size file has been read.
+
+
+def create_project(project_dir):
+ project_file = os.path.join(project_dir, "project.conf")
+ project_conf = {
+ "name": "test"
+ }
+ _yaml.dump(project_conf, project_file)
+ element_name = "test.bst"
+ create_element_size(element_name, project_dir, ".", [], 1024)
+
+
+def test_cache_size_roundtrip(cli, tmpdir):
+ # Builds (to put files in the cache), then invokes buildstream again
+ # to check nothing breaks
+
+ # Create project
+ project_dir = str(tmpdir)
+ create_project(project_dir)
+
+ # Build, to populate the cache
+ res = cli.run(project=project_dir, args=["build", "test.bst"])
+ res.assert_success()
+
+ # Show, to check that nothing breaks while reading cache size
+ res = cli.run(project=project_dir, args=["show", "test.bst"])
+ res.assert_success()
+
+
+def test_cache_size_write(cli, tmpdir):
+ # Builds (to put files in the cache), then checks a number is
+ # written to the cache size file.
+
+ project_dir = str(tmpdir)
+ create_project(project_dir)
+
+ # Artifact cache must be in a known place
+ artifactdir = os.path.join(project_dir, "artifacts")
+ cli.configure({"artifactdir": artifactdir})
+
+ # Build, to populate the cache
+ res = cli.run(project=project_dir, args=["build", "test.bst"])
+ res.assert_success()
+
+ # Inspect the artifact cache
+ sizefile = os.path.join(artifactdir, CACHE_SIZE_FILE)
+ assert os.path.isfile(sizefile)
+ with open(sizefile, "r") as f:
+ size_data = f.read()
+ size = int(size_data)
diff --git a/tests/testutils/artifactshare.py b/tests/testutils/artifactshare.py
index 76b729e33..05e87a499 100644
--- a/tests/testutils/artifactshare.py
+++ b/tests/testutils/artifactshare.py
@@ -140,6 +140,7 @@ class ArtifactShare():
return statvfs_result(f_blocks=self.total_space,
f_bfree=self.free_space - repo_size,
+ f_bavail=self.free_space - repo_size,
f_bsize=1)
@@ -156,4 +157,4 @@ def create_artifact_share(directory, *, total_space=None, free_space=None):
share.close()
-statvfs_result = namedtuple('statvfs_result', 'f_blocks f_bfree f_bsize')
+statvfs_result = namedtuple('statvfs_result', 'f_blocks f_bfree f_bsize f_bavail')