summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Provaznik <jprovaznik@gitlab.com>2019-05-29 12:27:44 +0000
committerKamil TrzciƄski <ayufan@ayufan.eu>2019-05-29 12:27:44 +0000
commita5adc6a024012f727ac32c440bb42f4634ae0605 (patch)
treec02e44b595f6bdc67bc647234dfc4a19972aba6b
parentd017d2d93d6afbd597cbf6683fcd7f90e88a05de (diff)
downloadgitlab-ce-a5adc6a024012f727ac32c440bb42f4634ae0605.tar.gz
Add Puma sampler
This sampler gathers Puma-specific metrics which can be used by Prometheus then.
-rw-r--r--config/gitlab.yml.example2
-rw-r--r--config/initializers/1_settings.rb1
-rw-r--r--config/initializers/7_prometheus_metrics.rb8
-rw-r--r--doc/administration/monitoring/prometheus/gitlab_metrics.md18
-rw-r--r--lib/gitlab/cluster/lifecycle_events.rb8
-rw-r--r--lib/gitlab/metrics/samplers/puma_sampler.rb92
-rw-r--r--spec/lib/gitlab/metrics/samplers/puma_sampler_spec.rb96
7 files changed, 224 insertions, 1 deletions
diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example
index 23377b43f78..c83f569d885 100644
--- a/config/gitlab.yml.example
+++ b/config/gitlab.yml.example
@@ -752,6 +752,8 @@ production: &base
monitoring:
# Time between sampling of unicorn socket metrics, in seconds
# unicorn_sampler_interval: 10
+ # Time between sampling of Puma metrics, in seconds
+ # puma_sampler_interval: 5
# IP whitelist to access monitoring endpoints
ip_whitelist:
- 127.0.0.0/8
diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb
index d56bd7654af..0c8d94ccaed 100644
--- a/config/initializers/1_settings.rb
+++ b/config/initializers/1_settings.rb
@@ -491,6 +491,7 @@ Settings.webpack.dev_server['port'] ||= 3808
Settings['monitoring'] ||= Settingslogic.new({})
Settings.monitoring['ip_whitelist'] ||= ['127.0.0.1/8']
Settings.monitoring['unicorn_sampler_interval'] ||= 10
+Settings.monitoring['puma_sampler_interval'] ||= 5
Settings.monitoring['ruby_sampler_interval'] ||= 60
Settings.monitoring['sidekiq_exporter'] ||= Settingslogic.new({})
Settings.monitoring.sidekiq_exporter['enabled'] ||= false
diff --git a/config/initializers/7_prometheus_metrics.rb b/config/initializers/7_prometheus_metrics.rb
index 8052880cc3d..68f8487d377 100644
--- a/config/initializers/7_prometheus_metrics.rb
+++ b/config/initializers/7_prometheus_metrics.rb
@@ -29,12 +29,18 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
Gitlab::Cluster::LifecycleEvents.on_worker_start do
defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change
- unless Sidekiq.server?
+ if defined?(::Unicorn)
Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start
end
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start
end
+
+ if defined?(::Puma)
+ Gitlab::Cluster::LifecycleEvents.on_master_start do
+ Gitlab::Metrics::Samplers::PumaSampler.initialize_instance(Settings.monitoring.puma_sampler_interval).start
+ end
+ end
end
Gitlab::Cluster::LifecycleEvents.on_master_restart do
diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md
index c243dd9edbb..9c75403dd4c 100644
--- a/doc/administration/monitoring/prometheus/gitlab_metrics.md
+++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md
@@ -103,6 +103,24 @@ Some basic Ruby runtime metrics are available:
[GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat
+## Puma Metrics **[EXPERIMENTAL]**
+
+When Puma is used instead of Unicorn, following metrics are available:
+
+| Metric | Type | Since | Description |
+|:-------------------------------------------- |:------- |:----- |:----------- |
+| puma_workers | Gauge | 12.0 | Total number of workers |
+| puma_running_workers | Gauge | 12.0 | Number of booted workers |
+| puma_stale_workers | Gauge | 12.0 | Number of old workers |
+| puma_phase | Gauge | 12.0 | Phase number (increased during phased restarts) |
+| puma_running | Gauge | 12.0 | Number of running threads |
+| puma_queued_connections | Gauge | 12.0 | Number of connections in that worker's "todo" set waiting for a worker thread |
+| puma_active_connections | Gauge | 12.0 | Number of threads processing a request |
+| puma_pool_capacity | Gauge | 12.0 | Number of requests the worker is capable of taking right now |
+| puma_max_threads | Gauge | 12.0 | Maximum number of worker threads |
+| puma_idle_threads | Gauge | 12.0 | Number of spawned threads which are not processing a request |
+
+
## Metrics shared directory
GitLab's Prometheus client requires a directory to store metrics data shared between multi-process services.
diff --git a/lib/gitlab/cluster/lifecycle_events.rb b/lib/gitlab/cluster/lifecycle_events.rb
index b05dca409d1..e0f9eb59924 100644
--- a/lib/gitlab/cluster/lifecycle_events.rb
+++ b/lib/gitlab/cluster/lifecycle_events.rb
@@ -44,6 +44,14 @@ module Gitlab
(@master_restart_hooks ||= []) << block
end
+ def on_master_start(&block)
+ if in_clustered_environment?
+ on_before_fork(&block)
+ else
+ on_worker_start(&block)
+ end
+ end
+
#
# Lifecycle integration methods (called from unicorn.rb, puma.rb, etc.)
#
diff --git a/lib/gitlab/metrics/samplers/puma_sampler.rb b/lib/gitlab/metrics/samplers/puma_sampler.rb
new file mode 100644
index 00000000000..87669b253bc
--- /dev/null
+++ b/lib/gitlab/metrics/samplers/puma_sampler.rb
@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+
+require 'puma/state_file'
+
+module Gitlab
+ module Metrics
+ module Samplers
+ class PumaSampler < BaseSampler
+ def metrics
+ @metrics ||= init_metrics
+ end
+
+ def init_metrics
+ {
+ puma_workers: ::Gitlab::Metrics.gauge(:puma_workers, 'Total number of workers'),
+ puma_running_workers: ::Gitlab::Metrics.gauge(:puma_running_workers, 'Number of active workers'),
+ puma_stale_workers: ::Gitlab::Metrics.gauge(:puma_stale_workers, 'Number of stale workers'),
+ puma_phase: ::Gitlab::Metrics.gauge(:puma_phase, 'Phase number (increased during phased restarts)'),
+ puma_running: ::Gitlab::Metrics.gauge(:puma_running, 'Number of running threads'),
+ puma_queued_connections: ::Gitlab::Metrics.gauge(:puma_queued_connections, 'Number of connections in that worker\'s "todo" set waiting for a worker thread'),
+ puma_active_connections: ::Gitlab::Metrics.gauge(:puma_active_connections, 'Number of threads processing a request'),
+ puma_pool_capacity: ::Gitlab::Metrics.gauge(:puma_pool_capacity, 'Number of requests the worker is capable of taking right now'),
+ puma_max_threads: ::Gitlab::Metrics.gauge(:puma_max_threads, 'Maximum number of worker threads'),
+ puma_idle_threads: ::Gitlab::Metrics.gauge(:puma_idle_threads, 'Number of spawned threads which are not processing a request')
+ }
+ end
+
+ def sample
+ json_stats = puma_stats
+ return unless json_stats
+
+ stats = JSON.parse(json_stats)
+
+ if cluster?(stats)
+ sample_cluster(stats)
+ else
+ sample_single_worker(stats)
+ end
+ end
+
+ private
+
+ def puma_stats
+ Puma.stats
+ rescue NoMethodError
+ Rails.logger.info "PumaSampler: stats are not available yet, waiting for Puma to boot"
+ nil
+ end
+
+ def sample_cluster(stats)
+ set_master_metrics(stats)
+
+ stats['worker_status'].each do |worker|
+ labels = { worker: "worker_#{worker['index']}" }
+
+ metrics[:puma_phase].set(labels, worker['phase'])
+ set_worker_metrics(worker['last_status'], labels)
+ end
+ end
+
+ def sample_single_worker(stats)
+ metrics[:puma_workers].set({}, 1)
+ metrics[:puma_running_workers].set({}, 1)
+
+ set_worker_metrics(stats)
+ end
+
+ def cluster?(stats)
+ stats['worker_status'].present?
+ end
+
+ def set_master_metrics(stats)
+ labels = { worker: "master" }
+
+ metrics[:puma_workers].set(labels, stats['workers'])
+ metrics[:puma_running_workers].set(labels, stats['booted_workers'])
+ metrics[:puma_stale_workers].set(labels, stats['old_workers'])
+ metrics[:puma_phase].set(labels, stats['phase'])
+ end
+
+ def set_worker_metrics(stats, labels = {})
+ metrics[:puma_running].set(labels, stats['running'])
+ metrics[:puma_queued_connections].set(labels, stats['backlog'])
+ metrics[:puma_active_connections].set(labels, stats['max_threads'] - stats['pool_capacity'])
+ metrics[:puma_pool_capacity].set(labels, stats['pool_capacity'])
+ metrics[:puma_max_threads].set(labels, stats['max_threads'])
+ metrics[:puma_idle_threads].set(labels, stats['running'] + stats['pool_capacity'] - stats['max_threads'])
+ end
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/metrics/samplers/puma_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/puma_sampler_spec.rb
new file mode 100644
index 00000000000..c471c30a194
--- /dev/null
+++ b/spec/lib/gitlab/metrics/samplers/puma_sampler_spec.rb
@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Gitlab::Metrics::Samplers::PumaSampler do
+ subject { described_class.new(5) }
+ let(:null_metric) { double('null_metric', set: nil, observe: nil) }
+
+ before do
+ allow(Gitlab::Metrics::NullMetric).to receive(:instance).and_return(null_metric)
+ end
+
+ describe '#sample' do
+ before do
+ expect(subject).to receive(:puma_stats).and_return(puma_stats)
+ end
+
+ context 'in cluster mode' do
+ let(:puma_stats) do
+ <<~EOS
+ {
+ "workers": 2,
+ "phase": 2,
+ "booted_workers": 2,
+ "old_workers": 0,
+ "worker_status": [{
+ "pid": 32534,
+ "index": 0,
+ "phase": 1,
+ "booted": true,
+ "last_checkin": "2019-05-15T07:57:55Z",
+ "last_status": {
+ "backlog":0,
+ "running":1,
+ "pool_capacity":4,
+ "max_threads": 4
+ }
+ }]
+ }
+ EOS
+ end
+
+ it 'samples master statistics' do
+ labels = { worker: 'master' }
+
+ expect(subject.metrics[:puma_workers]).to receive(:set).with(labels, 2)
+ expect(subject.metrics[:puma_running_workers]).to receive(:set).with(labels, 2)
+ expect(subject.metrics[:puma_stale_workers]).to receive(:set).with(labels, 0)
+ expect(subject.metrics[:puma_phase]).to receive(:set).once.with(labels, 2)
+ expect(subject.metrics[:puma_phase]).to receive(:set).once.with({ worker: 'worker_0' }, 1)
+
+ subject.sample
+ end
+
+ it 'samples worker statistics' do
+ labels = { worker: 'worker_0' }
+
+ expect_worker_stats(labels)
+
+ subject.sample
+ end
+ end
+
+ context 'in single mode' do
+ let(:puma_stats) do
+ <<~EOS
+ {
+ "backlog":0,
+ "running":1,
+ "pool_capacity":4,
+ "max_threads": 4
+ }
+ EOS
+ end
+
+ it 'samples worker statistics' do
+ labels = {}
+
+ expect(subject.metrics[:puma_workers]).to receive(:set).with(labels, 1)
+ expect(subject.metrics[:puma_running_workers]).to receive(:set).with(labels, 1)
+ expect_worker_stats(labels)
+
+ subject.sample
+ end
+ end
+ end
+
+ def expect_worker_stats(labels)
+ expect(subject.metrics[:puma_queued_connections]).to receive(:set).with(labels, 0)
+ expect(subject.metrics[:puma_active_connections]).to receive(:set).with(labels, 0)
+ expect(subject.metrics[:puma_running]).to receive(:set).with(labels, 1)
+ expect(subject.metrics[:puma_pool_capacity]).to receive(:set).with(labels, 4)
+ expect(subject.metrics[:puma_max_threads]).to receive(:set).with(labels, 4)
+ expect(subject.metrics[:puma_idle_threads]).to receive(:set).with(labels, 1)
+ end
+end