summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTristan Cacqueray <tdecacqu@redhat.com>2018-09-02 00:53:37 +0000
committerTristan Cacqueray <tdecacqu@redhat.com>2021-04-26 14:47:36 +0000
commit0dbd8c07847635830c6d5bec38780d37fb14c979 (patch)
tree7ebbd78441e3caafc26ca9dd43d22e33a9323c67
parent3ca33f0686437fc949ee2cedbf2fe13c9dc4ab3d (diff)
downloadzuul-0dbd8c07847635830c6d5bec38780d37fb14c979.tar.gz
prometheus: add options to start the server and process collector
This change adds a new prometheus_port option to start a metric server to be scrapped by a prometheus service. By default, the server exposes process informations. Change-Id: Ie329df6adc69768dfdb158d00283161f8b70f07a
-rw-r--r--doc/source/discussion/components.rst11
-rw-r--r--doc/source/reference/monitoring.rst43
-rw-r--r--etc/zuul.conf-sample5
-rw-r--r--releasenotes/notes/prometheus-monitoring-ffa6de30a483e4b5.yaml5
-rw-r--r--requirements.txt1
-rw-r--r--tests/base.py27
-rw-r--r--tests/unit/test_prometheus.py42
-rwxr-xr-xzuul/cmd/__init__.py10
-rwxr-xr-xzuul/cmd/executor.py1
-rwxr-xr-xzuul/cmd/merger.py1
-rwxr-xr-xzuul/cmd/scheduler.py1
-rwxr-xr-xzuul/cmd/web.py1
12 files changed, 148 insertions, 0 deletions
diff --git a/doc/source/discussion/components.rst b/doc/source/discussion/components.rst
index d72696854..7b106a84b 100644
--- a/doc/source/discussion/components.rst
+++ b/doc/source/discussion/components.rst
@@ -392,6 +392,17 @@ The following sections of ``zuul.conf`` are used by the scheduler:
If a value higher than ``max_hold_expiration`` is supplied during
hold request creation, it will be lowered to this value.
+ .. attr:: prometheus_port
+
+ Set a TCP port to start the prometheus metrics client.
+
+ .. attr:: prometheus_addr
+ :default: 0.0.0.0
+
+ The IPv4 addr to listen for prometheus metrics poll.
+ To use IPv6, python>3.8 is required `issue24209 <https://bugs.python.org/issue24209>`_.
+
+
Operation
~~~~~~~~~
diff --git a/doc/source/reference/monitoring.rst b/doc/source/reference/monitoring.rst
index 2a91252f0..8680abba8 100644
--- a/doc/source/reference/monitoring.rst
+++ b/doc/source/reference/monitoring.rst
@@ -503,3 +503,46 @@ following statsd events:
* ``zuul.tenant.mytenant.pipeline.gate.project.example_com.myproject.master.job.myjob.SUCCESS`` +1
* ``zuul.tenant.mytenant.pipeline.gate.project.example_com.myproject.master.job.myjob.SUCCESS`` 40 seconds
* ``zuul.tenant.mytenant.pipeline.gate.all_jobs`` +1
+
+
+Prometheus monitoring
+---------------------
+
+Zuul comes with support to start a prometheus_ metric server to be added as
+prometheus's target.
+
+.. _prometheus: https://prometheus.io/docs/introduction/overview/
+
+
+Configuration
+~~~~~~~~~~~~~
+
+Prometheus support uses the ``prometheus_client`` python module.
+Note that support is optional and Zuul will start without
+the prometheus python module present.
+
+To enable the service, set the ``prometheus_port`` in a service section of
+``zuul.conf``. For example setting :attr:`scheduler.prometheus_port` to 9091
+starts a HTTP server to expose metrics to a prometheus services at:
+http://scheduler:9091/metrics
+
+
+Metrics
+~~~~~~~
+
+These metrics are exposed by default:
+
+.. stat:: process_virtual_memory_bytes
+ :type: gauge
+
+.. stat:: process_resident_memory_bytes
+ :type: gauge
+
+.. stat:: process_open_fds
+ :type: gauge
+
+.. stat:: process_start_time_seconds
+ :type: gauge
+
+.. stat:: process_cpu_seconds_total
+ :type: counter
diff --git a/etc/zuul.conf-sample b/etc/zuul.conf-sample
index 4a83e04ed..040eef826 100644
--- a/etc/zuul.conf-sample
+++ b/etc/zuul.conf-sample
@@ -26,22 +26,27 @@ tenant_config=/etc/zuul/main.yaml
log_config=/etc/zuul/logging.conf
pidfile=/var/run/zuul/zuul.pid
state_dir=/var/lib/zuul
+prometheus_port=9091
+;prometheus_addr=0.0.0.0
[merger]
git_dir=/var/lib/zuul/git
;git_user_email=zuul@example.com
;git_user_name=zuul
+prometheus_port=9092
[executor]
default_username=zuul
trusted_ro_paths=/opt/zuul-scripts:/var/cache
trusted_rw_paths=/opt/zuul-logs
+prometheus_port=9093
[web]
listen_address=127.0.0.1
port=9000
static_cache_expiry=0
status_url=https://zuul.example.com/status
+prometheus_port=9094
[webclient]
url=https://zuul.example.com
diff --git a/releasenotes/notes/prometheus-monitoring-ffa6de30a483e4b5.yaml b/releasenotes/notes/prometheus-monitoring-ffa6de30a483e4b5.yaml
new file mode 100644
index 000000000..58f09d5be
--- /dev/null
+++ b/releasenotes/notes/prometheus-monitoring-ffa6de30a483e4b5.yaml
@@ -0,0 +1,5 @@
+---
+features:
+ - |
+ A new prometheus_port option for the services can be used to start the
+ prometheus python client and exposes metrics.
diff --git a/requirements.txt b/requirements.txt
index 2f7ac5d4d..3ca11a6ad 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,7 @@ GitPython>=2.1.8
python-daemon>=2.0.4
extras
statsd>=3.0
+prometheus-client
voluptuous>=0.10.2
gear>=0.13.0,<1.0.0,!=0.15.0
apscheduler>=3.0
diff --git a/tests/base.py b/tests/base.py
index 09597f065..f8fc14f59 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -65,6 +65,7 @@ from git.exc import NoSuchPathError
from git.util import IterableList
import yaml
import paramiko
+import prometheus_client.exposition
from zuul.driver.sql.sqlconnection import DatabaseSession
from zuul.model import Change
@@ -868,6 +869,25 @@ class FakeGerritChange(object):
self.reported += 1
+class PrometheusServer(object):
+ def start(self):
+ app = prometheus_client.make_wsgi_app(prometheus_client.REGISTRY)
+ self.httpd = prometheus_client.exposition.make_server(
+ "0.0.0.0",
+ 0,
+ app,
+ prometheus_client.exposition.ThreadingWSGIServer,
+ handler_class=prometheus_client.exposition._SilentHandler)
+ self.port = self.httpd.socket.getsockname()[1]
+ self.thread = threading.Thread(target=self.httpd.serve_forever)
+ self.thread.daemon = True
+ self.thread.start()
+
+ def stop(self):
+ self.httpd.shutdown()
+ self.thread.join()
+
+
class GerritWebServer(object):
def __init__(self, fake_gerrit):
@@ -4156,6 +4176,10 @@ class ZuulTestCase(BaseTestCase):
server that all of the Zuul components in this test use to
communicate with each other.
+ :ivar PrometheusServer prometheus_server: An instance of
+ :py:class: ~test.base.PrometheusServer` which is the Prometheus
+ metrics endpoint.
+
:ivar RecordingExecutorServer executor_server: An instance of
:py:class:`~tests.base.RecordingExecutorServer` which is the
Ansible execute server used to run jobs for this test.
@@ -4279,6 +4303,8 @@ class ZuulTestCase(BaseTestCase):
self.statsd.start()
self.gearman_server = FakeGearmanServer(self.use_ssl)
+ self.prometheus_server = PrometheusServer()
+ self.prometheus_server.start()
self.config.set('gearman', 'port', str(self.gearman_server.port))
self.log.info("Gearman server on port %s" %
@@ -4673,6 +4699,7 @@ class ZuulTestCase(BaseTestCase):
self.statsd.join()
self.rpcclient.shutdown()
self.gearman_server.shutdown()
+ self.prometheus_server.stop()
self.fake_nodepool.stop()
self.zk_client.disconnect()
self.printHistory()
diff --git a/tests/unit/test_prometheus.py b/tests/unit/test_prometheus.py
new file mode 100644
index 000000000..ac185361d
--- /dev/null
+++ b/tests/unit/test_prometheus.py
@@ -0,0 +1,42 @@
+# Copyright 2019 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import requests
+
+from tests.base import ZuulTestCase
+
+
+class BaseTestPrometheus(ZuulTestCase):
+ tenant_config_file = 'config/single-tenant/main.yaml'
+
+ def get_metrics(self):
+ r = requests.get(
+ "http://localhost:%d" % self.prometheus_server.port)
+ metrics = {}
+ for line in r.text.split('\n'):
+ if not line or line.startswith("#"):
+ continue
+ try:
+ key, value = line.split()
+ except ValueError:
+ continue
+ metrics[key] = value
+ return metrics
+
+
+class TestPrometheus(BaseTestPrometheus):
+ def test_prometheus_process_metrics(self):
+ metrics = self.get_metrics()
+ self.assertIn("process_resident_memory_bytes", metrics)
+ self.assertIn("process_open_fds", metrics)
diff --git a/zuul/cmd/__init__.py b/zuul/cmd/__init__.py
index e4f6803d9..46d5902ac 100755
--- a/zuul/cmd/__init__.py
+++ b/zuul/cmd/__init__.py
@@ -28,6 +28,7 @@ import sys
import traceback
import threading
+prometheus_client = extras.try_import('prometheus_client')
yappi = extras.try_import('yappi')
objgraph = extras.try_import('objgraph')
@@ -199,6 +200,15 @@ class ZuulDaemonApp(ZuulApp, metaclass=abc.ABCMeta):
"Configured logging: {version}".format(
version=zuul_version_info.release_string()))
+ def setup_prometheus(self, section):
+ if self.config.has_option(section, 'prometheus_port'):
+ if not prometheus_client:
+ raise RuntimeError("prometheus_client library is missing.")
+ port = int(self.config.get(section, 'prometheus_port'))
+ addr = get_default(
+ self.config, section, 'prometheus_addr', '0.0.0.0')
+ prometheus_client.start_http_server(port, addr)
+
def main(self):
self.parseArguments()
self.readConfig()
diff --git a/zuul/cmd/executor.py b/zuul/cmd/executor.py
index 96d484afb..15ff0272b 100755
--- a/zuul/cmd/executor.py
+++ b/zuul/cmd/executor.py
@@ -87,6 +87,7 @@ class Executor(zuul.cmd.ZuulDaemonApp):
os.mkdir(self.job_dir)
self.setup_logging('executor', 'log_config')
+ self.setup_prometheus('executor')
self.log = logging.getLogger("zuul.Executor")
self.finger_port = int(
diff --git a/zuul/cmd/merger.py b/zuul/cmd/merger.py
index e5f412826..5e318fb92 100755
--- a/zuul/cmd/merger.py
+++ b/zuul/cmd/merger.py
@@ -50,6 +50,7 @@ class Merger(zuul.cmd.ZuulDaemonApp):
self.configure_connections(source_only=True)
self.setup_logging('merger', 'log_config')
+ self.setup_prometheus('merger')
self.merger = MergeServer(self.config, self.connections)
self.merger.start()
diff --git a/zuul/cmd/scheduler.py b/zuul/cmd/scheduler.py
index 04ce746b7..1a5c59438 100755
--- a/zuul/cmd/scheduler.py
+++ b/zuul/cmd/scheduler.py
@@ -132,6 +132,7 @@ class Scheduler(zuul.cmd.ZuulDaemonApp):
self.start_gear_server()
self.setup_logging('scheduler', 'log_config')
+ self.setup_prometheus('scheduler')
self.log = logging.getLogger("zuul.Scheduler")
self.configure_connections(require_sql=True)
diff --git a/zuul/cmd/web.py b/zuul/cmd/web.py
index db63b77a9..8a74b7889 100755
--- a/zuul/cmd/web.py
+++ b/zuul/cmd/web.py
@@ -84,6 +84,7 @@ class WebServer(zuul.cmd.ZuulDaemonApp):
sys.exit(0)
self.setup_logging('web', 'log_config')
+ self.setup_prometheus('web')
self.log = logging.getLogger("zuul.WebServer")
try: