diff options
author | Tobias Henkel <tobias.henkel@bmw.de> | 2018-11-07 22:01:14 +0100 |
---|---|---|
committer | Tobias Henkel <tobias.henkel@bmw.de> | 2019-05-29 04:10:08 +0000 |
commit | e90fe41bfeb7431c3fc352e275cdb864a73fcda6 (patch) | |
tree | d65e615a391602fb575e5987283fff830a0f37d7 /zuul | |
parent | af910d9d8e432100a74e97a91670fc0ecc7e2b4f (diff) | |
download | zuul-e90fe41bfeb7431c3fc352e275cdb864a73fcda6.tar.gz |
Report tenant and project specific resource usage stats
We currently lack means to support resource accounting of tenants or
projects. Together with an addition to nodepool that adds resource
metadata to nodes we can emit statsd statistics per tenant and per
project.
The following statistics are emitted:
* zuul.nodepool.resources.tenant.{tenant}.{resource}.current
Gauge with the currently used resources by tenant
* zuul.nodepool.resources.project.{project}.{resource}.current
Gauge with the currently used resources by project
* zuul.nodepool.resources.tenant.{tenant}.{resource}.counter
Counter with the summed usage by tenant. e.g. cpu seconds
* zuul.nodepool.resources.project.{project}.{resource}.counter
Counter with the summed usage by project. e.g. cpu seconds
Depends-On: https://review.openstack.org/616262
Change-Id: I68ea68128287bf52d107959e1c343dfce98f1fc8
Diffstat (limited to 'zuul')
-rw-r--r-- | zuul/manager/__init__.py | 3 | ||||
-rw-r--r-- | zuul/model.py | 1 | ||||
-rw-r--r-- | zuul/nodepool.py | 91 |
3 files changed, 92 insertions, 3 deletions
diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py index 0f2d7da06..3cd863ec0 100644 --- a/zuul/manager/__init__.py +++ b/zuul/manager/__init__.py @@ -403,7 +403,8 @@ class PipelineManager(object): self.log.debug("Found job %s for change %s" % (job, item.change)) try: nodeset = item.current_build_set.getJobNodeSet(job.name) - self.sched.nodepool.useNodeSet(nodeset) + self.sched.nodepool.useNodeSet( + nodeset, build_set=item.current_build_set) self.sched.executor.execute( job, item, self.pipeline, build_set.dependent_changes, diff --git a/zuul/model.py b/zuul/model.py index 432104b39..4e55aaf46 100644 --- a/zuul/model.py +++ b/zuul/model.py @@ -547,6 +547,7 @@ class Node(ConfigObject): self.region = None self.username = None self.hold_expiration = None + self.resources = None @property def state(self): diff --git a/zuul/nodepool.py b/zuul/nodepool.py index 005482f01..ba22c8ddc 100644 --- a/zuul/nodepool.py +++ b/zuul/nodepool.py @@ -12,17 +12,30 @@ import logging +from collections import defaultdict from zuul import model from zuul.lib.logutil import get_annotated_logger from zuul.zk import LockException +def add_resources(target, source): + for key, value in source.items(): + target[key] += value + + +def subtract_resources(target, source): + for key, value in source.items(): + target[key] -= value + + class Nodepool(object): log = logging.getLogger('zuul.nodepool') def __init__(self, scheduler): self.requests = {} self.sched = scheduler + self.current_resources_by_tenant = {} + self.current_resources_by_project = {} def emitStats(self, request): # Implements the following : @@ -60,6 +73,37 @@ class Nodepool(object): pipe.gauge('zuul.nodepool.current_requests', len(self.requests)) pipe.send() + def emitStatsResources(self): + if not self.sched.statsd: + return + statsd = self.sched.statsd + + for tenant, resources in self.current_resources_by_tenant.items(): + for resource, value in resources.items(): + key = 'zuul.nodepool.resources.tenant.' \ + '{tenant}.{resource}' + statsd.gauge(key, value, tenant=tenant, resource=resource) + for project, resources in self.current_resources_by_project.items(): + for resource, value in resources.items(): + key = 'zuul.nodepool.resources.project.' \ + '{project}.{resource}' + statsd.gauge(key, value, project=project, resource=resource) + + def emitStatsResourceCounters(self, tenant, project, resources, duration): + if not self.sched.statsd: + return + statsd = self.sched.statsd + + for resource, value in resources.items(): + key = 'zuul.nodepool.resources.tenant.{tenant}.{resource}' + statsd.incr(key, value * duration, + tenant=tenant, resource=resource) + for resource, value in resources.items(): + key = 'zuul.nodepool.resources.project.' \ + '{project}.{resource}' + statsd.incr(key, value * duration, + project=project, resource=resource) + def requestNodes(self, build_set, job, relative_priority, event=None): log = get_annotated_logger(self.log, event) # Create a copy of the nodeset to represent the actual nodes @@ -157,22 +201,47 @@ class Nodepool(object): self.log.debug("Removing autohold for %s", autohold_key) del self.sched.autohold_requests[autohold_key] - def useNodeSet(self, nodeset): + def useNodeSet(self, nodeset, build_set=None): self.log.info("Setting nodeset %s in use" % (nodeset,)) + resources = defaultdict(int) for node in nodeset.getNodes(): if node.lock is None: raise Exception("Node %s is not locked" % (node,)) node.state = model.STATE_IN_USE self.sched.zk.storeNode(node) + if node.resources: + add_resources(resources, node.resources) + if build_set and resources: + # we have a buildset and thus also tenant and project so we + # can emit project specific resource usage stats + tenant_name = build_set.item.layout.tenant.name + project_name = build_set.item.change.project.canonical_name + + self.current_resources_by_tenant.setdefault( + tenant_name, defaultdict(int)) + self.current_resources_by_project.setdefault( + project_name, defaultdict(int)) + + add_resources(self.current_resources_by_tenant[tenant_name], + resources) + add_resources(self.current_resources_by_project[project_name], + resources) + self.emitStatsResources() def returnNodeSet(self, nodeset, build=None): self.log.info("Returning nodeset %s" % (nodeset,)) + resources = defaultdict(int) + duration = None + project = None + tenant = None + if build: + project = build.build_set.item.change.project + tenant = build.build_set.item.pipeline.tenant.name if (build and build.start_time and build.end_time and build.build_set and build.build_set.item and build.build_set.item.change and build.build_set.item.change.project): duration = build.end_time - build.start_time - project = build.build_set.item.change.project self.log.info("Nodeset %s with %s nodes was in use " "for %s seconds for build %s for project %s", nodeset, len(nodeset.nodes), duration, build, @@ -183,6 +252,8 @@ class Nodepool(object): else: try: if node.state == model.STATE_IN_USE: + if node.resources: + add_resources(resources, node.resources) node.state = model.STATE_USED self.sched.zk.storeNode(node) except Exception: @@ -190,6 +261,22 @@ class Nodepool(object): "while unlocking:" % (node,)) self._unlockNodes(nodeset.getNodes()) + # When returning a nodeset we need to update the gauges if we have a + # build. Further we calculate resource*duration and increment their + # tenant or project specific counters. With that we have both the + # current value and also counters to be able to perform accounting. + if tenant and project and resources: + project_name = project.canonical_name + subtract_resources( + self.current_resources_by_tenant[tenant], resources) + subtract_resources( + self.current_resources_by_project[project_name], resources) + self.emitStatsResources() + + if duration: + self.emitStatsResourceCounters( + tenant, project_name, resources, duration) + def unlockNodeSet(self, nodeset): self._unlockNodes(nodeset.getNodes()) |