From 497acb167078d62c0cec7bc5ff9be1be6cd2fe4a Mon Sep 17 00:00:00 2001 From: Jan Provaznik Date: Mon, 10 Jun 2019 16:09:40 +0000 Subject: Add metric for measuring PumaWorkerKiller activity PumaWorkerKiller is used for periodically checking and killing workers (the biggest one) if overall memory reaches specified limit. This metric allows us to watch number of killed workers. --- config/puma.example.development.rb | 1 - .../monitoring/prometheus/gitlab_metrics.md | 1 + .../cluster/puma_worker_killer_initializer.rb | 3 +++ lib/gitlab/cluster/puma_worker_killer_observer.rb | 24 +++++++++++++++++++ .../cluster/puma_worker_killer_observer_spec.rb | 27 ++++++++++++++++++++++ spec/rack_servers/puma_spec.rb | 2 +- 6 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 lib/gitlab/cluster/puma_worker_killer_observer.rb create mode 100644 spec/lib/gitlab/cluster/puma_worker_killer_observer_spec.rb diff --git a/config/puma.example.development.rb b/config/puma.example.development.rb index 490c940077a..9df24bf74e3 100644 --- a/config/puma.example.development.rb +++ b/config/puma.example.development.rb @@ -42,7 +42,6 @@ bind 'unix:///home/git/gitlab.socket' workers 2 require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events" -require_relative "/home/git/gitlab/lib/gitlab/cluster/puma_worker_killer_initializer" on_restart do # Signal application hooks that we're about to restart diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index 3dcd1593099..84b71ae6f1c 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -125,6 +125,7 @@ When Puma is used instead of Unicorn, following metrics are available: | puma_max_threads | Gauge | 12.0 | Maximum number of worker threads | | puma_idle_threads | Gauge | 12.0 | Number of spawned threads which are not processing a request | | rack_state_total | Gauge | 12.0 | Number of requests in a given rack state | +| puma_killer_terminations_total | Gauge | 12.0 | Number of workers terminated by PumaWorkerKiller | ## Metrics shared directory diff --git a/lib/gitlab/cluster/puma_worker_killer_initializer.rb b/lib/gitlab/cluster/puma_worker_killer_initializer.rb index 4ed9a9a02ab..4affc52b7b0 100644 --- a/lib/gitlab/cluster/puma_worker_killer_initializer.rb +++ b/lib/gitlab/cluster/puma_worker_killer_initializer.rb @@ -27,6 +27,9 @@ module Gitlab # is restarted already, thus periodically restarting workers shouldn't be # needed. config.rolling_restart_frequency = false + + observer = Gitlab::Cluster::PumaWorkerKillerObserver.new + config.pre_term = observer.callback end PumaWorkerKiller.start diff --git a/lib/gitlab/cluster/puma_worker_killer_observer.rb b/lib/gitlab/cluster/puma_worker_killer_observer.rb new file mode 100644 index 00000000000..3b4ebc3fbae --- /dev/null +++ b/lib/gitlab/cluster/puma_worker_killer_observer.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Gitlab + module Cluster + class PumaWorkerKillerObserver + def initialize + @counter = Gitlab::Metrics.counter(:puma_killer_terminations_total, 'Number of workers terminated by PumaWorkerKiller') + end + + # returns the Proc to be used as the observer callback block + def callback + method(:log_termination) + end + + private + + def log_termination(worker) + labels = { worker: "worker_#{worker.index}" } + + @counter.increment(labels) + end + end + end +end diff --git a/spec/lib/gitlab/cluster/puma_worker_killer_observer_spec.rb b/spec/lib/gitlab/cluster/puma_worker_killer_observer_spec.rb new file mode 100644 index 00000000000..180520b27e7 --- /dev/null +++ b/spec/lib/gitlab/cluster/puma_worker_killer_observer_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::Cluster::PumaWorkerKillerObserver do + let(:counter) { Gitlab::Metrics::NullMetric.instance } + + before do + allow(Gitlab::Metrics).to receive(:counter) + .with(any_args) + .and_return(counter) + end + + describe '#callback' do + subject { described_class.new } + + it 'increments timeout counter' do + worker = double(index: 0) + + expect(counter) + .to receive(:increment) + .with({ worker: 'worker_0' }) + + subject.callback.call(worker) + end + end +end diff --git a/spec/rack_servers/puma_spec.rb b/spec/rack_servers/puma_spec.rb index 8290473821c..a4b37905af3 100644 --- a/spec/rack_servers/puma_spec.rb +++ b/spec/rack_servers/puma_spec.rb @@ -20,7 +20,7 @@ describe 'Puma' do File.write(config_path, config_lines) cmd = %W[puma -e test -C #{config_path} #{File.join(__dir__, 'configs/config.ru')}] - @puma_master_pid = spawn(*cmd) + @puma_master_pid = spawn({ 'DISABLE_PUMA_WORKER_KILLER' => '1' }, *cmd) wait_puma_boot!(@puma_master_pid, File.join(project_root, 'tmp/tests/puma-worker-ready')) WebMock.allow_net_connect! end -- cgit v1.2.1