diff options
author | Simon Westphahl <simon.westphahl@bmw.de> | 2023-01-05 13:08:26 +0100 |
---|---|---|
committer | Simon Westphahl <simon.westphahl@bmw.de> | 2023-02-06 08:05:41 +0100 |
commit | 95ecb41c51c4a766d254ec05f32ec9c1f0472d4f (patch) | |
tree | 0d1562f65ca2ffd4f1756879628dacd4ddbc3228 /zuul | |
parent | 1ae2d929a369f700a23799f695831686b8b8e150 (diff) | |
download | zuul-95ecb41c51c4a766d254ec05f32ec9c1f0472d4f.tar.gz |
Add scheduler run handler metric
In order to better understand the scheduler run handler performance this
change adds a new `zuul.scheduler.run_handler` metric to measure the
duration of one run handler loop.
Change-Id: I77e862cf99d6a8445e71d7daab410d5853487dc3
Diffstat (limited to 'zuul')
-rw-r--r-- | zuul/scheduler.py | 108 |
1 files changed, 56 insertions, 52 deletions
diff --git a/zuul/scheduler.py b/zuul/scheduler.py index 7f61f3fe4..f22d20a05 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -2126,70 +2126,74 @@ class Scheduler(threading.Thread): return self.log.debug("Run handler awake") self.run_handler_lock.acquire() - try: - if not self._stopped: - self.process_reconfigure_queue() + with self.statsd_timer("zuul.scheduler.run_handler"): + try: + self._run() + except Exception: + self.log.exception("Exception in run handler:") + # There may still be more events to process + self.wake_event.set() + finally: + self.run_handler_lock.release() - if self.unparsed_abide.ltime < self.system_config_cache.ltime: - self.updateSystemConfig() + def _run(self): + if not self._stopped: + self.process_reconfigure_queue() - for tenant_name in self.unparsed_abide.tenants: - if self._stopped: - break + if self.unparsed_abide.ltime < self.system_config_cache.ltime: + self.updateSystemConfig() - tenant = self.abide.tenants.get(tenant_name) - if not tenant: - continue + for tenant_name in self.unparsed_abide.tenants: + if self._stopped: + break - # This will also forward events for the pipelines - # (e.g. enqueue or dequeue events) to the matching - # pipeline event queues that are processed afterwards. - self.process_tenant_management_queue(tenant) + tenant = self.abide.tenants.get(tenant_name) + if not tenant: + continue - if self._stopped: - break + # This will also forward events for the pipelines + # (e.g. enqueue or dequeue events) to the matching + # pipeline event queues that are processed afterwards. + self.process_tenant_management_queue(tenant) - try: - with tenant_read_lock( - self.zk_client, tenant_name, blocking=False - ) as tlock: - if not self.isTenantLayoutUpToDate(tenant_name): - continue + if self._stopped: + break - # Get tenant again, as it might have been updated - # by a tenant reconfig or layout change. - tenant = self.abide.tenants[tenant_name] + try: + with tenant_read_lock( + self.zk_client, tenant_name, blocking=False + ) as tlock: + if not self.isTenantLayoutUpToDate(tenant_name): + continue - if not self._stopped: - # This will forward trigger events to pipeline - # event queues that are processed below. - self.process_tenant_trigger_queue(tenant) + # Get tenant again, as it might have been updated + # by a tenant reconfig or layout change. + tenant = self.abide.tenants[tenant_name] - self.process_pipelines(tenant, tlock) - except LockException: - self.log.debug("Skipping locked tenant %s", - tenant.name) - remote_state = self.tenant_layout_state.get( - tenant_name) - local_state = self.local_layout_state.get( - tenant_name) - if (remote_state is None or - local_state is None or - remote_state > local_state): - # Let's keep looping until we've updated to the - # latest tenant layout. - self.wake_event.set() - except Exception: - self.log.exception("Exception processing tenant %s:", - tenant_name) - # There may still be more events to process - self.wake_event.set() + if not self._stopped: + # This will forward trigger events to pipeline + # event queues that are processed below. + self.process_tenant_trigger_queue(tenant) + + self.process_pipelines(tenant, tlock) + except LockException: + self.log.debug("Skipping locked tenant %s", + tenant.name) + remote_state = self.tenant_layout_state.get( + tenant_name) + local_state = self.local_layout_state.get( + tenant_name) + if (remote_state is None or + local_state is None or + remote_state > local_state): + # Let's keep looping until we've updated to the + # latest tenant layout. + self.wake_event.set() except Exception: - self.log.exception("Exception in run handler:") + self.log.exception("Exception processing tenant %s:", + tenant_name) # There may still be more events to process self.wake_event.set() - finally: - self.run_handler_lock.release() def primeSystemConfig(self): with self.layout_lock: |