summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McGivern <sean@mcgivern.me.uk>2017-11-02 15:33:53 +0000
committerSean McGivern <sean@mcgivern.me.uk>2017-11-02 15:33:53 +0000
commit62ab17798d10fe2f66498fdffda427ffde291b73 (patch)
treeac02ffe64cff6ef5bfbe5528edaeb7a78271852b
parent56dccc2e1089e2866d0442cac379b3f93c98a55f (diff)
parentccb5bad6b8ff73df978b03a1ede8051ea78b2ee9 (diff)
downloadgitlab-ce-62ab17798d10fe2f66498fdffda427ffde291b73.tar.gz
Merge branch 'dm-sidekiq-sigstp' into 'master'
Send SIGSTP before SIGTERM to actually give Sidekiq jobs 30s to finish when the memory killer kicks in See merge request gitlab-org/gitlab-ce!15102
-rw-r--r--doc/administration/operations/sidekiq_memory_killer.md4
-rw-r--r--lib/gitlab/sidekiq_middleware/memory_killer.rb41
-rw-r--r--spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb63
3 files changed, 91 insertions, 17 deletions
diff --git a/doc/administration/operations/sidekiq_memory_killer.md b/doc/administration/operations/sidekiq_memory_killer.md
index b5e78348989..cbffd883774 100644
--- a/doc/administration/operations/sidekiq_memory_killer.md
+++ b/doc/administration/operations/sidekiq_memory_killer.md
@@ -28,7 +28,7 @@ The MemoryKiller is controlled using environment variables.
delayed shutdown is triggered. The default value for Omnibus packages is set
[in the omnibus-gitlab
repository](https://gitlab.com/gitlab-org/omnibus-gitlab/blob/master/files/gitlab-cookbooks/gitlab/attributes/default.rb).
-- `SIDEKIQ_MEMORY_KILLER_GRACE_TIME`: defaults 900 seconds (15 minutes). When
+- `SIDEKIQ_MEMORY_KILLER_GRACE_TIME`: defaults to 900 seconds (15 minutes). When
a shutdown is triggered, the Sidekiq process will keep working normally for
another 15 minutes.
- `SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT`: defaults to 30 seconds. When the grace
@@ -36,5 +36,3 @@ The MemoryKiller is controlled using environment variables.
Existing jobs get 30 seconds to finish. After that, the MemoryKiller tells
Sidekiq to shut down, and an external supervision mechanism (e.g. Runit) must
restart Sidekiq.
-- `SIDEKIQ_MEMORY_KILLER_SHUTDOWN_SIGNAL`: defaults to `SIGKILL`. The name of
- the final signal sent to the Sidekiq process when we want it to shut down.
diff --git a/lib/gitlab/sidekiq_middleware/memory_killer.rb b/lib/gitlab/sidekiq_middleware/memory_killer.rb
index d7d24eeb37b..2bfb7caefd9 100644
--- a/lib/gitlab/sidekiq_middleware/memory_killer.rb
+++ b/lib/gitlab/sidekiq_middleware/memory_killer.rb
@@ -7,7 +7,6 @@ module Gitlab
GRACE_TIME = (ENV['SIDEKIQ_MEMORY_KILLER_GRACE_TIME'] || 15 * 60).to_s.to_i
# Wait 30 seconds for running jobs to finish during graceful shutdown
SHUTDOWN_WAIT = (ENV['SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT'] || 30).to_s.to_i
- SHUTDOWN_SIGNAL = (ENV['SIDEKIQ_MEMORY_KILLER_SHUTDOWN_SIGNAL'] || 'SIGKILL').to_s
# Create a mutex used to ensure there will be only one thread waiting to
# shut Sidekiq down
@@ -15,6 +14,7 @@ module Gitlab
def call(worker, job, queue)
yield
+
current_rss = get_rss
return unless MAX_RSS > 0 && current_rss > MAX_RSS
@@ -23,32 +23,45 @@ module Gitlab
# Return if another thread is already waiting to shut Sidekiq down
return unless MUTEX.try_lock
- Sidekiq.logger.warn "current RSS #{current_rss} exceeds maximum RSS "\
- "#{MAX_RSS}"
- Sidekiq.logger.warn "this thread will shut down PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']} "\
- "in #{GRACE_TIME} seconds"
- sleep(GRACE_TIME)
+ Sidekiq.logger.warn "Sidekiq worker PID-#{pid} current RSS #{current_rss}"\
+ " exceeds maximum RSS #{MAX_RSS} after finishing job #{worker.class} JID-#{job['jid']}"
+ Sidekiq.logger.warn "Sidekiq worker PID-#{pid} will stop fetching new jobs in #{GRACE_TIME} seconds, and will be shut down #{SHUTDOWN_WAIT} seconds later"
- Sidekiq.logger.warn "sending SIGTERM to PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']}"
- Process.kill('SIGTERM', Process.pid)
+ # Wait `GRACE_TIME` to give the memory intensive job time to finish.
+ # Then, tell Sidekiq to stop fetching new jobs.
+ wait_and_signal(GRACE_TIME, 'SIGSTP', 'stop fetching new jobs')
- Sidekiq.logger.warn "waiting #{SHUTDOWN_WAIT} seconds before sending "\
- "#{SHUTDOWN_SIGNAL} to PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']}"
- sleep(SHUTDOWN_WAIT)
+ # Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish.
+ # Then, tell Sidekiq to gracefully shut down by giving jobs a few more
+ # moments to finish, killing and requeuing them if they didn't, and
+ # then terminating itself.
+ wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down')
- Sidekiq.logger.warn "sending #{SHUTDOWN_SIGNAL} to PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']}"
- Process.kill(SHUTDOWN_SIGNAL, Process.pid)
+ # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't.
+ wait_and_signal(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
end
end
private
def get_rss
- output, status = Gitlab::Popen.popen(%W(ps -o rss= -p #{Process.pid}))
+ output, status = Gitlab::Popen.popen(%W(ps -o rss= -p #{pid}))
return 0 unless status.zero?
output.to_i
end
+
+ def wait_and_signal(time, signal, explanation)
+ Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
+ sleep(time)
+
+ Sidekiq.logger.warn "sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
+ Process.kill(signal, pid)
+ end
+
+ def pid
+ Process.pid
+ end
end
end
end
diff --git a/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb b/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb
new file mode 100644
index 00000000000..8fdbbacd04d
--- /dev/null
+++ b/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb
@@ -0,0 +1,63 @@
+require 'spec_helper'
+
+describe Gitlab::SidekiqMiddleware::MemoryKiller do
+ subject { described_class.new }
+ let(:pid) { 999 }
+
+ let(:worker) { double(:worker, class: 'TestWorker') }
+ let(:job) { { 'jid' => 123 } }
+ let(:queue) { 'test_queue' }
+
+ def run
+ thread = subject.call(worker, job, queue) { nil }
+ thread&.join
+ end
+
+ before do
+ allow(subject).to receive(:get_rss).and_return(10.kilobytes)
+ allow(subject).to receive(:pid).and_return(pid)
+ end
+
+ context 'when MAX_RSS is set to 0' do
+ before do
+ stub_const("#{described_class}::MAX_RSS", 0)
+ end
+
+ it 'does nothing' do
+ expect(subject).not_to receive(:sleep)
+
+ run
+ end
+ end
+
+ context 'when MAX_RSS is exceeded' do
+ before do
+ stub_const("#{described_class}::MAX_RSS", 5.kilobytes)
+ end
+
+ it 'sends the STP, TERM and KILL signals at expected times' do
+ expect(subject).to receive(:sleep).with(15 * 60).ordered
+ expect(Process).to receive(:kill).with('SIGSTP', pid).ordered
+
+ expect(subject).to receive(:sleep).with(30).ordered
+ expect(Process).to receive(:kill).with('SIGTERM', pid).ordered
+
+ expect(subject).to receive(:sleep).with(10).ordered
+ expect(Process).to receive(:kill).with('SIGKILL', pid).ordered
+
+ run
+ end
+ end
+
+ context 'when MAX_RSS is not exceeded' do
+ before do
+ stub_const("#{described_class}::MAX_RSS", 15.kilobytes)
+ end
+
+ it 'does nothing' do
+ expect(subject).not_to receive(:sleep)
+
+ run
+ end
+ end
+end