diff options
Diffstat (limited to 'spec/lib/gitlab/memory/watchdog_spec.rb')
-rw-r--r-- | spec/lib/gitlab/memory/watchdog_spec.rb | 233 |
1 files changed, 112 insertions, 121 deletions
diff --git a/spec/lib/gitlab/memory/watchdog_spec.rb b/spec/lib/gitlab/memory/watchdog_spec.rb index 5d9599d6eab..668ea36d420 100644 --- a/spec/lib/gitlab/memory/watchdog_spec.rb +++ b/spec/lib/gitlab/memory/watchdog_spec.rb @@ -2,15 +2,13 @@ require 'spec_helper' -RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures do +RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures, feature_category: :application_performance do context 'watchdog' do let(:configuration) { instance_double(described_class::Configuration) } let(:handler) { instance_double(described_class::NullHandler) } - let(:logger) { instance_double(::Logger) } + let(:reporter) { instance_double(described_class::EventReporter) } let(:sleep_time_seconds) { 60 } let(:threshold_violated) { false } - let(:violations_counter) { instance_double(::Prometheus::Client::Counter) } - let(:violations_handled_counter) { instance_double(::Prometheus::Client::Counter) } let(:watchdog_iterations) { 1 } let(:name) { :monitor_name } let(:payload) { { message: 'dummy_text' } } @@ -37,18 +35,6 @@ RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures do end end - def stub_prometheus_metrics - allow(Gitlab::Metrics).to receive(:counter) - .with(:gitlab_memwd_violations_total, anything, anything) - .and_return(violations_counter) - allow(Gitlab::Metrics).to receive(:counter) - .with(:gitlab_memwd_violations_handled_total, anything, anything) - .and_return(violations_handled_counter) - - allow(violations_counter).to receive(:increment) - allow(violations_handled_counter).to receive(:increment) - end - describe '#initialize' do it 'initialize new configuration' do expect(described_class::Configuration).to receive(:new) @@ -59,33 +45,25 @@ RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures do describe '#call' do before do - stub_prometheus_metrics - allow(Gitlab::Metrics::System).to receive(:memory_usage_rss).at_least(:once).and_return( - total: 1024 - ) - allow(::Prometheus::PidProvider).to receive(:worker_id).and_return('worker_1') - watchdog.configure do |config| config.handler = handler - config.logger = logger + config.event_reporter = reporter config.sleep_time_seconds = sleep_time_seconds - config.monitors.push monitor_class, threshold_violated, payload, max_strikes: max_strikes end allow(handler).to receive(:call).and_return(true) - allow(logger).to receive(:info) - allow(logger).to receive(:warn) + allow(reporter).to receive(:started) + allow(reporter).to receive(:stopped) + allow(reporter).to receive(:threshold_violated) + allow(reporter).to receive(:strikes_exceeded) end - it 'logs start message once' do - expect(logger).to receive(:info).once + it 'reports started event once' do + expect(reporter).to receive(:started).once .with( - pid: Process.pid, - worker_id: 'worker_1', memwd_handler_class: handler.class.name, - memwd_sleep_time_s: sleep_time_seconds, - memwd_rss_bytes: 1024, - message: 'started') + memwd_sleep_time_s: sleep_time_seconds + ) watchdog.call end @@ -96,55 +74,50 @@ RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures do watchdog.call end - context 'when gitlab_memory_watchdog ops toggle is off' do - before do - stub_feature_flags(gitlab_memory_watchdog: false) - end - - it 'does not trigger any monitor' do - expect(configuration).not_to receive(:monitors) - end - end - - context 'when process does not exceed threshold' do - it 'does not increment violations counters' do - expect(violations_counter).not_to receive(:increment) - expect(violations_handled_counter).not_to receive(:increment) - - watchdog.call - end - - it 'does not log violation' do - expect(logger).not_to receive(:warn) - - watchdog.call - end - - it 'does not execute handler' do - expect(handler).not_to receive(:call) + context 'when no monitors are configured' do + it 'reports stopped event once with correct reason' do + expect(reporter).to receive(:stopped).once + .with( + memwd_handler_class: handler.class.name, + memwd_sleep_time_s: sleep_time_seconds, + memwd_reason: 'monitors are not configured' + ) watchdog.call end end - context 'when process exceeds threshold' do - let(:threshold_violated) { true } + context 'when monitors are configured' do + before do + watchdog.configure do |config| + config.monitors.push monitor_class, threshold_violated, payload, max_strikes: max_strikes + end + end - it 'increments violations counter' do - expect(violations_counter).to receive(:increment).with(reason: name) + it 'reports stopped event once' do + expect(reporter).to receive(:stopped).once + .with( + memwd_handler_class: handler.class.name, + memwd_sleep_time_s: sleep_time_seconds + ) watchdog.call end - context 'when process does not exceed the allowed number of strikes' do - it 'does not increment handled violations counter' do - expect(violations_handled_counter).not_to receive(:increment) + context 'when gitlab_memory_watchdog ops toggle is off' do + before do + stub_feature_flags(gitlab_memory_watchdog: false) + end - watchdog.call + it 'does not trigger any monitor' do + expect(configuration).not_to receive(:monitors) end + end - it 'does not log violation' do - expect(logger).not_to receive(:warn) + context 'when process does not exceed threshold' do + it 'does not report violations event' do + expect(reporter).not_to receive(:threshold_violated) + expect(reporter).not_to receive(:strikes_exceeded) watchdog.call end @@ -156,81 +129,94 @@ RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures do end end - context 'when monitor exceeds the allowed number of strikes' do - let(:max_strikes) { 0 } + context 'when process exceeds threshold' do + let(:threshold_violated) { true } - it 'increments handled violations counter' do - expect(violations_handled_counter).to receive(:increment).with(reason: name) + it 'reports threshold violated event' do + expect(reporter).to receive(:threshold_violated).with(name) watchdog.call end - it 'logs violation' do - expect(logger).to receive(:warn) - .with( - pid: Process.pid, - worker_id: 'worker_1', - memwd_handler_class: handler.class.name, - memwd_sleep_time_s: sleep_time_seconds, - memwd_rss_bytes: 1024, - memwd_cur_strikes: 1, - memwd_max_strikes: max_strikes, - message: 'dummy_text') + context 'when process does not exceed the allowed number of strikes' do + it 'does not report strikes exceeded event' do + expect(reporter).not_to receive(:strikes_exceeded) - watchdog.call - end + watchdog.call + end - it 'executes handler' do - expect(handler).to receive(:call) + it 'does not execute handler' do + expect(handler).not_to receive(:call) - watchdog.call + watchdog.call + end end - context 'when enforce_memory_watchdog ops toggle is off' do - before do - stub_feature_flags(enforce_memory_watchdog: false) + context 'when monitor exceeds the allowed number of strikes' do + let(:max_strikes) { 0 } + + it 'reports strikes exceeded event' do + expect(reporter).to receive(:strikes_exceeded) + .with( + name, + memwd_handler_class: handler.class.name, + memwd_sleep_time_s: sleep_time_seconds, + memwd_cur_strikes: 1, + memwd_max_strikes: max_strikes, + message: "dummy_text" + ) + + watchdog.call end - it 'always uses the NullHandler' do - expect(handler).not_to receive(:call) - expect(described_class::NullHandler.instance).to receive(:call).and_return(true) + it 'executes handler and stops the watchdog' do + expect(handler).to receive(:call).and_return(true) + expect(reporter).to receive(:stopped).once + .with( + memwd_handler_class: handler.class.name, + memwd_sleep_time_s: sleep_time_seconds, + memwd_reason: 'successfully handled' + ) watchdog.call end - end - context 'when multiple monitors exceeds allowed number of strikes' do - before do - watchdog.configure do |config| - config.handler = handler - config.logger = logger - config.sleep_time_seconds = sleep_time_seconds - config.monitors.push monitor_class, threshold_violated, payload, max_strikes: max_strikes - config.monitors.push monitor_class, threshold_violated, payload, max_strikes: max_strikes + it 'schedules a heap dump' do + expect(Gitlab::Memory::Reports::HeapDump).to receive(:enqueue!) + + watchdog.call + end + + context 'when enforce_memory_watchdog ops toggle is off' do + before do + stub_feature_flags(enforce_memory_watchdog: false) + end + + it 'always uses the NullHandler' do + expect(handler).not_to receive(:call) + expect(described_class::NullHandler.instance).to receive(:call).and_return(true) + + watchdog.call end end - it 'only calls the handler once' do - expect(handler).to receive(:call).once.and_return(true) + context 'when multiple monitors exceeds allowed number of strikes' do + before do + watchdog.configure do |config| + config.monitors.push monitor_class, threshold_violated, payload, max_strikes: max_strikes + config.monitors.push monitor_class, threshold_violated, payload, max_strikes: max_strikes + end + end + + it 'only calls the handler once' do + expect(handler).to receive(:call).once.and_return(true) - watchdog.call + watchdog.call + end end end end end - - it 'logs stop message once' do - expect(logger).to receive(:info).once - .with( - pid: Process.pid, - worker_id: 'worker_1', - memwd_handler_class: handler.class.name, - memwd_sleep_time_s: sleep_time_seconds, - memwd_rss_bytes: 1024, - message: 'stopped') - - watchdog.call - end end describe '#configure' do @@ -255,6 +241,10 @@ RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures do subject(:handler) { described_class::TermProcessHandler.new(42) } describe '#call' do + before do + allow(Process).to receive(:kill) + end + it 'sends SIGTERM to the current process' do expect(Process).to receive(:kill).with(:TERM, 42) @@ -274,11 +264,12 @@ RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures do before do stub_const('::Puma::Cluster::WorkerHandle', puma_worker_handle_class) + allow(puma_worker_handle_class).to receive(:new).and_return(puma_worker_handle) + allow(puma_worker_handle).to receive(:term) end describe '#call' do it 'invokes orderly termination via Puma API' do - expect(puma_worker_handle_class).to receive(:new).and_return(puma_worker_handle) expect(puma_worker_handle).to receive(:term) expect(handler.call).to be(true) |