summaryrefslogtreecommitdiff
path: root/zuul
diff options
context:
space:
mode:
authorTobias Henkel <tobias.henkel@bmw.de>2018-11-07 22:01:14 +0100
committerTobias Henkel <tobias.henkel@bmw.de>2019-05-29 04:10:08 +0000
commite90fe41bfeb7431c3fc352e275cdb864a73fcda6 (patch)
treed65e615a391602fb575e5987283fff830a0f37d7 /zuul
parentaf910d9d8e432100a74e97a91670fc0ecc7e2b4f (diff)
downloadzuul-e90fe41bfeb7431c3fc352e275cdb864a73fcda6.tar.gz
Report tenant and project specific resource usage stats
We currently lack means to support resource accounting of tenants or projects. Together with an addition to nodepool that adds resource metadata to nodes we can emit statsd statistics per tenant and per project. The following statistics are emitted: * zuul.nodepool.resources.tenant.{tenant}.{resource}.current Gauge with the currently used resources by tenant * zuul.nodepool.resources.project.{project}.{resource}.current Gauge with the currently used resources by project * zuul.nodepool.resources.tenant.{tenant}.{resource}.counter Counter with the summed usage by tenant. e.g. cpu seconds * zuul.nodepool.resources.project.{project}.{resource}.counter Counter with the summed usage by project. e.g. cpu seconds Depends-On: https://review.openstack.org/616262 Change-Id: I68ea68128287bf52d107959e1c343dfce98f1fc8
Diffstat (limited to 'zuul')
-rw-r--r--zuul/manager/__init__.py3
-rw-r--r--zuul/model.py1
-rw-r--r--zuul/nodepool.py91
3 files changed, 92 insertions, 3 deletions
diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py
index 0f2d7da06..3cd863ec0 100644
--- a/zuul/manager/__init__.py
+++ b/zuul/manager/__init__.py
@@ -403,7 +403,8 @@ class PipelineManager(object):
self.log.debug("Found job %s for change %s" % (job, item.change))
try:
nodeset = item.current_build_set.getJobNodeSet(job.name)
- self.sched.nodepool.useNodeSet(nodeset)
+ self.sched.nodepool.useNodeSet(
+ nodeset, build_set=item.current_build_set)
self.sched.executor.execute(
job, item, self.pipeline,
build_set.dependent_changes,
diff --git a/zuul/model.py b/zuul/model.py
index 432104b39..4e55aaf46 100644
--- a/zuul/model.py
+++ b/zuul/model.py
@@ -547,6 +547,7 @@ class Node(ConfigObject):
self.region = None
self.username = None
self.hold_expiration = None
+ self.resources = None
@property
def state(self):
diff --git a/zuul/nodepool.py b/zuul/nodepool.py
index 005482f01..ba22c8ddc 100644
--- a/zuul/nodepool.py
+++ b/zuul/nodepool.py
@@ -12,17 +12,30 @@
import logging
+from collections import defaultdict
from zuul import model
from zuul.lib.logutil import get_annotated_logger
from zuul.zk import LockException
+def add_resources(target, source):
+ for key, value in source.items():
+ target[key] += value
+
+
+def subtract_resources(target, source):
+ for key, value in source.items():
+ target[key] -= value
+
+
class Nodepool(object):
log = logging.getLogger('zuul.nodepool')
def __init__(self, scheduler):
self.requests = {}
self.sched = scheduler
+ self.current_resources_by_tenant = {}
+ self.current_resources_by_project = {}
def emitStats(self, request):
# Implements the following :
@@ -60,6 +73,37 @@ class Nodepool(object):
pipe.gauge('zuul.nodepool.current_requests', len(self.requests))
pipe.send()
+ def emitStatsResources(self):
+ if not self.sched.statsd:
+ return
+ statsd = self.sched.statsd
+
+ for tenant, resources in self.current_resources_by_tenant.items():
+ for resource, value in resources.items():
+ key = 'zuul.nodepool.resources.tenant.' \
+ '{tenant}.{resource}'
+ statsd.gauge(key, value, tenant=tenant, resource=resource)
+ for project, resources in self.current_resources_by_project.items():
+ for resource, value in resources.items():
+ key = 'zuul.nodepool.resources.project.' \
+ '{project}.{resource}'
+ statsd.gauge(key, value, project=project, resource=resource)
+
+ def emitStatsResourceCounters(self, tenant, project, resources, duration):
+ if not self.sched.statsd:
+ return
+ statsd = self.sched.statsd
+
+ for resource, value in resources.items():
+ key = 'zuul.nodepool.resources.tenant.{tenant}.{resource}'
+ statsd.incr(key, value * duration,
+ tenant=tenant, resource=resource)
+ for resource, value in resources.items():
+ key = 'zuul.nodepool.resources.project.' \
+ '{project}.{resource}'
+ statsd.incr(key, value * duration,
+ project=project, resource=resource)
+
def requestNodes(self, build_set, job, relative_priority, event=None):
log = get_annotated_logger(self.log, event)
# Create a copy of the nodeset to represent the actual nodes
@@ -157,22 +201,47 @@ class Nodepool(object):
self.log.debug("Removing autohold for %s", autohold_key)
del self.sched.autohold_requests[autohold_key]
- def useNodeSet(self, nodeset):
+ def useNodeSet(self, nodeset, build_set=None):
self.log.info("Setting nodeset %s in use" % (nodeset,))
+ resources = defaultdict(int)
for node in nodeset.getNodes():
if node.lock is None:
raise Exception("Node %s is not locked" % (node,))
node.state = model.STATE_IN_USE
self.sched.zk.storeNode(node)
+ if node.resources:
+ add_resources(resources, node.resources)
+ if build_set and resources:
+ # we have a buildset and thus also tenant and project so we
+ # can emit project specific resource usage stats
+ tenant_name = build_set.item.layout.tenant.name
+ project_name = build_set.item.change.project.canonical_name
+
+ self.current_resources_by_tenant.setdefault(
+ tenant_name, defaultdict(int))
+ self.current_resources_by_project.setdefault(
+ project_name, defaultdict(int))
+
+ add_resources(self.current_resources_by_tenant[tenant_name],
+ resources)
+ add_resources(self.current_resources_by_project[project_name],
+ resources)
+ self.emitStatsResources()
def returnNodeSet(self, nodeset, build=None):
self.log.info("Returning nodeset %s" % (nodeset,))
+ resources = defaultdict(int)
+ duration = None
+ project = None
+ tenant = None
+ if build:
+ project = build.build_set.item.change.project
+ tenant = build.build_set.item.pipeline.tenant.name
if (build and build.start_time and build.end_time and
build.build_set and build.build_set.item and
build.build_set.item.change and
build.build_set.item.change.project):
duration = build.end_time - build.start_time
- project = build.build_set.item.change.project
self.log.info("Nodeset %s with %s nodes was in use "
"for %s seconds for build %s for project %s",
nodeset, len(nodeset.nodes), duration, build,
@@ -183,6 +252,8 @@ class Nodepool(object):
else:
try:
if node.state == model.STATE_IN_USE:
+ if node.resources:
+ add_resources(resources, node.resources)
node.state = model.STATE_USED
self.sched.zk.storeNode(node)
except Exception:
@@ -190,6 +261,22 @@ class Nodepool(object):
"while unlocking:" % (node,))
self._unlockNodes(nodeset.getNodes())
+ # When returning a nodeset we need to update the gauges if we have a
+ # build. Further we calculate resource*duration and increment their
+ # tenant or project specific counters. With that we have both the
+ # current value and also counters to be able to perform accounting.
+ if tenant and project and resources:
+ project_name = project.canonical_name
+ subtract_resources(
+ self.current_resources_by_tenant[tenant], resources)
+ subtract_resources(
+ self.current_resources_by_project[project_name], resources)
+ self.emitStatsResources()
+
+ if duration:
+ self.emitStatsResourceCounters(
+ tenant, project_name, resources, duration)
+
def unlockNodeSet(self, nodeset):
self._unlockNodes(nodeset.getNodes())