summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@selenight.nl>2017-10-31 10:33:30 +0100
committerDouwe Maan <douwe@selenight.nl>2017-11-02 15:33:19 +0100
commitccb5bad6b8ff73df978b03a1ede8051ea78b2ee9 (patch)
tree16d4f49e46952ead9652f0eee441102464dfca56
parentb8f5239f2b7592fadfe6f4c6fa30b565c5a09d9f (diff)
downloadgitlab-ce-dm-sidekiq-sigstp.tar.gz
Send SIGSTP before SIGTERM to actually give Sidekiq jobs 30s to finish when the memory killer kicks indm-sidekiq-sigstp
-rw-r--r--doc/administration/operations/sidekiq_memory_killer.md4
-rw-r--r--lib/gitlab/sidekiq_middleware/memory_killer.rb41
-rw-r--r--spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb63
3 files changed, 91 insertions, 17 deletions
diff --git a/doc/administration/operations/sidekiq_memory_killer.md b/doc/administration/operations/sidekiq_memory_killer.md
index b5e78348989..cbffd883774 100644
--- a/doc/administration/operations/sidekiq_memory_killer.md
+++ b/doc/administration/operations/sidekiq_memory_killer.md
@@ -28,7 +28,7 @@ The MemoryKiller is controlled using environment variables.
delayed shutdown is triggered. The default value for Omnibus packages is set
[in the omnibus-gitlab
repository](https://gitlab.com/gitlab-org/omnibus-gitlab/blob/master/files/gitlab-cookbooks/gitlab/attributes/default.rb).
-- `SIDEKIQ_MEMORY_KILLER_GRACE_TIME`: defaults 900 seconds (15 minutes). When
+- `SIDEKIQ_MEMORY_KILLER_GRACE_TIME`: defaults to 900 seconds (15 minutes). When
a shutdown is triggered, the Sidekiq process will keep working normally for
another 15 minutes.
- `SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT`: defaults to 30 seconds. When the grace
@@ -36,5 +36,3 @@ The MemoryKiller is controlled using environment variables.
Existing jobs get 30 seconds to finish. After that, the MemoryKiller tells
Sidekiq to shut down, and an external supervision mechanism (e.g. Runit) must
restart Sidekiq.
-- `SIDEKIQ_MEMORY_KILLER_SHUTDOWN_SIGNAL`: defaults to `SIGKILL`. The name of
- the final signal sent to the Sidekiq process when we want it to shut down.
diff --git a/lib/gitlab/sidekiq_middleware/memory_killer.rb b/lib/gitlab/sidekiq_middleware/memory_killer.rb
index d7d24eeb37b..2bfb7caefd9 100644
--- a/lib/gitlab/sidekiq_middleware/memory_killer.rb
+++ b/lib/gitlab/sidekiq_middleware/memory_killer.rb
@@ -7,7 +7,6 @@ module Gitlab
GRACE_TIME = (ENV['SIDEKIQ_MEMORY_KILLER_GRACE_TIME'] || 15 * 60).to_s.to_i
# Wait 30 seconds for running jobs to finish during graceful shutdown
SHUTDOWN_WAIT = (ENV['SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT'] || 30).to_s.to_i
- SHUTDOWN_SIGNAL = (ENV['SIDEKIQ_MEMORY_KILLER_SHUTDOWN_SIGNAL'] || 'SIGKILL').to_s
# Create a mutex used to ensure there will be only one thread waiting to
# shut Sidekiq down
@@ -15,6 +14,7 @@ module Gitlab
def call(worker, job, queue)
yield
+
current_rss = get_rss
return unless MAX_RSS > 0 && current_rss > MAX_RSS
@@ -23,32 +23,45 @@ module Gitlab
# Return if another thread is already waiting to shut Sidekiq down
return unless MUTEX.try_lock
- Sidekiq.logger.warn "current RSS #{current_rss} exceeds maximum RSS "\
- "#{MAX_RSS}"
- Sidekiq.logger.warn "this thread will shut down PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']} "\
- "in #{GRACE_TIME} seconds"
- sleep(GRACE_TIME)
+ Sidekiq.logger.warn "Sidekiq worker PID-#{pid} current RSS #{current_rss}"\
+ " exceeds maximum RSS #{MAX_RSS} after finishing job #{worker.class} JID-#{job['jid']}"
+ Sidekiq.logger.warn "Sidekiq worker PID-#{pid} will stop fetching new jobs in #{GRACE_TIME} seconds, and will be shut down #{SHUTDOWN_WAIT} seconds later"
- Sidekiq.logger.warn "sending SIGTERM to PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']}"
- Process.kill('SIGTERM', Process.pid)
+ # Wait `GRACE_TIME` to give the memory intensive job time to finish.
+ # Then, tell Sidekiq to stop fetching new jobs.
+ wait_and_signal(GRACE_TIME, 'SIGSTP', 'stop fetching new jobs')
- Sidekiq.logger.warn "waiting #{SHUTDOWN_WAIT} seconds before sending "\
- "#{SHUTDOWN_SIGNAL} to PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']}"
- sleep(SHUTDOWN_WAIT)
+ # Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish.
+ # Then, tell Sidekiq to gracefully shut down by giving jobs a few more
+ # moments to finish, killing and requeuing them if they didn't, and
+ # then terminating itself.
+ wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down')
- Sidekiq.logger.warn "sending #{SHUTDOWN_SIGNAL} to PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']}"
- Process.kill(SHUTDOWN_SIGNAL, Process.pid)
+ # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't.
+ wait_and_signal(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
end
end
private
def get_rss
- output, status = Gitlab::Popen.popen(%W(ps -o rss= -p #{Process.pid}))
+ output, status = Gitlab::Popen.popen(%W(ps -o rss= -p #{pid}))
return 0 unless status.zero?
output.to_i
end
+
+ def wait_and_signal(time, signal, explanation)
+ Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
+ sleep(time)
+
+ Sidekiq.logger.warn "sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
+ Process.kill(signal, pid)
+ end
+
+ def pid
+ Process.pid
+ end
end
end
end
diff --git a/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb b/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb
new file mode 100644
index 00000000000..8fdbbacd04d
--- /dev/null
+++ b/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb
@@ -0,0 +1,63 @@
+require 'spec_helper'
+
+describe Gitlab::SidekiqMiddleware::MemoryKiller do
+ subject { described_class.new }
+ let(:pid) { 999 }
+
+ let(:worker) { double(:worker, class: 'TestWorker') }
+ let(:job) { { 'jid' => 123 } }
+ let(:queue) { 'test_queue' }
+
+ def run
+ thread = subject.call(worker, job, queue) { nil }
+ thread&.join
+ end
+
+ before do
+ allow(subject).to receive(:get_rss).and_return(10.kilobytes)
+ allow(subject).to receive(:pid).and_return(pid)
+ end
+
+ context 'when MAX_RSS is set to 0' do
+ before do
+ stub_const("#{described_class}::MAX_RSS", 0)
+ end
+
+ it 'does nothing' do
+ expect(subject).not_to receive(:sleep)
+
+ run
+ end
+ end
+
+ context 'when MAX_RSS is exceeded' do
+ before do
+ stub_const("#{described_class}::MAX_RSS", 5.kilobytes)
+ end
+
+ it 'sends the STP, TERM and KILL signals at expected times' do
+ expect(subject).to receive(:sleep).with(15 * 60).ordered
+ expect(Process).to receive(:kill).with('SIGSTP', pid).ordered
+
+ expect(subject).to receive(:sleep).with(30).ordered
+ expect(Process).to receive(:kill).with('SIGTERM', pid).ordered
+
+ expect(subject).to receive(:sleep).with(10).ordered
+ expect(Process).to receive(:kill).with('SIGKILL', pid).ordered
+
+ run
+ end
+ end
+
+ context 'when MAX_RSS is not exceeded' do
+ before do
+ stub_const("#{described_class}::MAX_RSS", 15.kilobytes)
+ end
+
+ it 'does nothing' do
+ expect(subject).not_to receive(:sleep)
+
+ run
+ end
+ end
+end