diff options
-rw-r--r-- | config/gitlab.yml.example | 2 | ||||
-rw-r--r-- | config/initializers/1_settings.rb | 1 | ||||
-rw-r--r-- | config/initializers/7_prometheus_metrics.rb | 8 | ||||
-rw-r--r-- | doc/administration/monitoring/prometheus/gitlab_metrics.md | 18 | ||||
-rw-r--r-- | lib/gitlab/cluster/lifecycle_events.rb | 8 | ||||
-rw-r--r-- | lib/gitlab/metrics/samplers/puma_sampler.rb | 92 | ||||
-rw-r--r-- | spec/lib/gitlab/metrics/samplers/puma_sampler_spec.rb | 96 |
7 files changed, 224 insertions, 1 deletions
diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example index 23377b43f78..c83f569d885 100644 --- a/config/gitlab.yml.example +++ b/config/gitlab.yml.example @@ -752,6 +752,8 @@ production: &base monitoring: # Time between sampling of unicorn socket metrics, in seconds # unicorn_sampler_interval: 10 + # Time between sampling of Puma metrics, in seconds + # puma_sampler_interval: 5 # IP whitelist to access monitoring endpoints ip_whitelist: - 127.0.0.0/8 diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index d56bd7654af..0c8d94ccaed 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -491,6 +491,7 @@ Settings.webpack.dev_server['port'] ||= 3808 Settings['monitoring'] ||= Settingslogic.new({}) Settings.monitoring['ip_whitelist'] ||= ['127.0.0.1/8'] Settings.monitoring['unicorn_sampler_interval'] ||= 10 +Settings.monitoring['puma_sampler_interval'] ||= 5 Settings.monitoring['ruby_sampler_interval'] ||= 60 Settings.monitoring['sidekiq_exporter'] ||= Settingslogic.new({}) Settings.monitoring.sidekiq_exporter['enabled'] ||= false diff --git a/config/initializers/7_prometheus_metrics.rb b/config/initializers/7_prometheus_metrics.rb index 8052880cc3d..68f8487d377 100644 --- a/config/initializers/7_prometheus_metrics.rb +++ b/config/initializers/7_prometheus_metrics.rb @@ -29,12 +29,18 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled? Gitlab::Cluster::LifecycleEvents.on_worker_start do defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change - unless Sidekiq.server? + if defined?(::Unicorn) Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start end Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start end + + if defined?(::Puma) + Gitlab::Cluster::LifecycleEvents.on_master_start do + Gitlab::Metrics::Samplers::PumaSampler.initialize_instance(Settings.monitoring.puma_sampler_interval).start + end + end end Gitlab::Cluster::LifecycleEvents.on_master_restart do diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index c243dd9edbb..9c75403dd4c 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -103,6 +103,24 @@ Some basic Ruby runtime metrics are available: [GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat +## Puma Metrics **[EXPERIMENTAL]** + +When Puma is used instead of Unicorn, following metrics are available: + +| Metric | Type | Since | Description | +|:-------------------------------------------- |:------- |:----- |:----------- | +| puma_workers | Gauge | 12.0 | Total number of workers | +| puma_running_workers | Gauge | 12.0 | Number of booted workers | +| puma_stale_workers | Gauge | 12.0 | Number of old workers | +| puma_phase | Gauge | 12.0 | Phase number (increased during phased restarts) | +| puma_running | Gauge | 12.0 | Number of running threads | +| puma_queued_connections | Gauge | 12.0 | Number of connections in that worker's "todo" set waiting for a worker thread | +| puma_active_connections | Gauge | 12.0 | Number of threads processing a request | +| puma_pool_capacity | Gauge | 12.0 | Number of requests the worker is capable of taking right now | +| puma_max_threads | Gauge | 12.0 | Maximum number of worker threads | +| puma_idle_threads | Gauge | 12.0 | Number of spawned threads which are not processing a request | + + ## Metrics shared directory GitLab's Prometheus client requires a directory to store metrics data shared between multi-process services. diff --git a/lib/gitlab/cluster/lifecycle_events.rb b/lib/gitlab/cluster/lifecycle_events.rb index b05dca409d1..e0f9eb59924 100644 --- a/lib/gitlab/cluster/lifecycle_events.rb +++ b/lib/gitlab/cluster/lifecycle_events.rb @@ -44,6 +44,14 @@ module Gitlab (@master_restart_hooks ||= []) << block end + def on_master_start(&block) + if in_clustered_environment? + on_before_fork(&block) + else + on_worker_start(&block) + end + end + # # Lifecycle integration methods (called from unicorn.rb, puma.rb, etc.) # diff --git a/lib/gitlab/metrics/samplers/puma_sampler.rb b/lib/gitlab/metrics/samplers/puma_sampler.rb new file mode 100644 index 00000000000..87669b253bc --- /dev/null +++ b/lib/gitlab/metrics/samplers/puma_sampler.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require 'puma/state_file' + +module Gitlab + module Metrics + module Samplers + class PumaSampler < BaseSampler + def metrics + @metrics ||= init_metrics + end + + def init_metrics + { + puma_workers: ::Gitlab::Metrics.gauge(:puma_workers, 'Total number of workers'), + puma_running_workers: ::Gitlab::Metrics.gauge(:puma_running_workers, 'Number of active workers'), + puma_stale_workers: ::Gitlab::Metrics.gauge(:puma_stale_workers, 'Number of stale workers'), + puma_phase: ::Gitlab::Metrics.gauge(:puma_phase, 'Phase number (increased during phased restarts)'), + puma_running: ::Gitlab::Metrics.gauge(:puma_running, 'Number of running threads'), + puma_queued_connections: ::Gitlab::Metrics.gauge(:puma_queued_connections, 'Number of connections in that worker\'s "todo" set waiting for a worker thread'), + puma_active_connections: ::Gitlab::Metrics.gauge(:puma_active_connections, 'Number of threads processing a request'), + puma_pool_capacity: ::Gitlab::Metrics.gauge(:puma_pool_capacity, 'Number of requests the worker is capable of taking right now'), + puma_max_threads: ::Gitlab::Metrics.gauge(:puma_max_threads, 'Maximum number of worker threads'), + puma_idle_threads: ::Gitlab::Metrics.gauge(:puma_idle_threads, 'Number of spawned threads which are not processing a request') + } + end + + def sample + json_stats = puma_stats + return unless json_stats + + stats = JSON.parse(json_stats) + + if cluster?(stats) + sample_cluster(stats) + else + sample_single_worker(stats) + end + end + + private + + def puma_stats + Puma.stats + rescue NoMethodError + Rails.logger.info "PumaSampler: stats are not available yet, waiting for Puma to boot" + nil + end + + def sample_cluster(stats) + set_master_metrics(stats) + + stats['worker_status'].each do |worker| + labels = { worker: "worker_#{worker['index']}" } + + metrics[:puma_phase].set(labels, worker['phase']) + set_worker_metrics(worker['last_status'], labels) + end + end + + def sample_single_worker(stats) + metrics[:puma_workers].set({}, 1) + metrics[:puma_running_workers].set({}, 1) + + set_worker_metrics(stats) + end + + def cluster?(stats) + stats['worker_status'].present? + end + + def set_master_metrics(stats) + labels = { worker: "master" } + + metrics[:puma_workers].set(labels, stats['workers']) + metrics[:puma_running_workers].set(labels, stats['booted_workers']) + metrics[:puma_stale_workers].set(labels, stats['old_workers']) + metrics[:puma_phase].set(labels, stats['phase']) + end + + def set_worker_metrics(stats, labels = {}) + metrics[:puma_running].set(labels, stats['running']) + metrics[:puma_queued_connections].set(labels, stats['backlog']) + metrics[:puma_active_connections].set(labels, stats['max_threads'] - stats['pool_capacity']) + metrics[:puma_pool_capacity].set(labels, stats['pool_capacity']) + metrics[:puma_max_threads].set(labels, stats['max_threads']) + metrics[:puma_idle_threads].set(labels, stats['running'] + stats['pool_capacity'] - stats['max_threads']) + end + end + end + end +end diff --git a/spec/lib/gitlab/metrics/samplers/puma_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/puma_sampler_spec.rb new file mode 100644 index 00000000000..c471c30a194 --- /dev/null +++ b/spec/lib/gitlab/metrics/samplers/puma_sampler_spec.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::Metrics::Samplers::PumaSampler do + subject { described_class.new(5) } + let(:null_metric) { double('null_metric', set: nil, observe: nil) } + + before do + allow(Gitlab::Metrics::NullMetric).to receive(:instance).and_return(null_metric) + end + + describe '#sample' do + before do + expect(subject).to receive(:puma_stats).and_return(puma_stats) + end + + context 'in cluster mode' do + let(:puma_stats) do + <<~EOS + { + "workers": 2, + "phase": 2, + "booted_workers": 2, + "old_workers": 0, + "worker_status": [{ + "pid": 32534, + "index": 0, + "phase": 1, + "booted": true, + "last_checkin": "2019-05-15T07:57:55Z", + "last_status": { + "backlog":0, + "running":1, + "pool_capacity":4, + "max_threads": 4 + } + }] + } + EOS + end + + it 'samples master statistics' do + labels = { worker: 'master' } + + expect(subject.metrics[:puma_workers]).to receive(:set).with(labels, 2) + expect(subject.metrics[:puma_running_workers]).to receive(:set).with(labels, 2) + expect(subject.metrics[:puma_stale_workers]).to receive(:set).with(labels, 0) + expect(subject.metrics[:puma_phase]).to receive(:set).once.with(labels, 2) + expect(subject.metrics[:puma_phase]).to receive(:set).once.with({ worker: 'worker_0' }, 1) + + subject.sample + end + + it 'samples worker statistics' do + labels = { worker: 'worker_0' } + + expect_worker_stats(labels) + + subject.sample + end + end + + context 'in single mode' do + let(:puma_stats) do + <<~EOS + { + "backlog":0, + "running":1, + "pool_capacity":4, + "max_threads": 4 + } + EOS + end + + it 'samples worker statistics' do + labels = {} + + expect(subject.metrics[:puma_workers]).to receive(:set).with(labels, 1) + expect(subject.metrics[:puma_running_workers]).to receive(:set).with(labels, 1) + expect_worker_stats(labels) + + subject.sample + end + end + end + + def expect_worker_stats(labels) + expect(subject.metrics[:puma_queued_connections]).to receive(:set).with(labels, 0) + expect(subject.metrics[:puma_active_connections]).to receive(:set).with(labels, 0) + expect(subject.metrics[:puma_running]).to receive(:set).with(labels, 1) + expect(subject.metrics[:puma_pool_capacity]).to receive(:set).with(labels, 4) + expect(subject.metrics[:puma_max_threads]).to receive(:set).with(labels, 4) + expect(subject.metrics[:puma_idle_threads]).to receive(:set).with(labels, 1) + end +end |