summaryrefslogtreecommitdiff
path: root/lib/gitlab/sidekiq_daemon/memory_killer.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/sidekiq_daemon/memory_killer.rb')
-rw-r--r--lib/gitlab/sidekiq_daemon/memory_killer.rb221
1 files changed, 0 insertions, 221 deletions
diff --git a/lib/gitlab/sidekiq_daemon/memory_killer.rb b/lib/gitlab/sidekiq_daemon/memory_killer.rb
deleted file mode 100644
index 25b91318dfe..00000000000
--- a/lib/gitlab/sidekiq_daemon/memory_killer.rb
+++ /dev/null
@@ -1,221 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module SidekiqDaemon
- class MemoryKiller < Daemon
- include ::Gitlab::Utils::StrongMemoize
-
- # Today 64-bit CPU support max 256T memory. It is big enough.
- MAX_MEMORY_KB = 256 * 1024 * 1024 * 1024
- # RSS below `soft_limit_rss` is considered safe
- SOFT_LIMIT_RSS_KB = ENV.fetch('SIDEKIQ_MEMORY_KILLER_MAX_RSS', 2000000).to_i
- # RSS above `hard_limit_rss` will be stopped
- HARD_LIMIT_RSS_KB = ENV.fetch('SIDEKIQ_MEMORY_KILLER_HARD_LIMIT_RSS', MAX_MEMORY_KB).to_i
- # RSS in range (soft_limit_rss, hard_limit_rss) is allowed for GRACE_BALLOON_SECONDS
- GRACE_BALLOON_SECONDS = ENV.fetch('SIDEKIQ_MEMORY_KILLER_GRACE_TIME', 15 * 60).to_i
- # Check RSS every CHECK_INTERVAL_SECONDS, minimum 2 seconds
- CHECK_INTERVAL_SECONDS = [ENV.fetch('SIDEKIQ_MEMORY_KILLER_CHECK_INTERVAL', 3).to_i, 2].max
- # Give Sidekiq up to 30 seconds to allow existing jobs to finish after exceeding the limit
- SHUTDOWN_TIMEOUT_SECONDS = ENV.fetch('SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT', 30).to_i
- # Developer/admin should always set `memory_killer_max_memory_growth_kb` explicitly
- # In case not set, default to 300M. This is for extra-safe.
- DEFAULT_MAX_MEMORY_GROWTH_KB = 300_000
-
- def initialize
- super
-
- @enabled = true
- end
-
- private
-
- def start_working
- Sidekiq.logger.info(
- class: self.class.to_s,
- action: 'start',
- pid: pid,
- message: 'Starting Gitlab::SidekiqDaemon::MemoryKiller Daemon'
- )
-
- while enabled?
- begin
- restart_sidekiq unless rss_within_range?
- sleep(CHECK_INTERVAL_SECONDS)
- rescue => e
- log_exception(e, __method__)
- rescue Exception => e # rubocop:disable Lint/RescueException
- log_exception(e, __method__ )
- raise e
- end
- end
- ensure
- Sidekiq.logger.warn(
- class: self.class.to_s,
- action: 'stop',
- pid: pid,
- message: 'Stopping Gitlab::SidekiqDaemon::MemoryKiller Daemon'
- )
- end
-
- def log_exception(exception, method)
- Sidekiq.logger.warn(
- class: self.class.to_s,
- pid: pid,
- message: "Exception from #{method}: #{exception.message}"
- )
- end
-
- def stop_working
- @enabled = false
- end
-
- def enabled?
- @enabled
- end
-
- def restart_sidekiq
- # Tell Sidekiq to stop fetching new jobs
- # We first SIGNAL and then wait given time
- # We also monitor a number of running jobs and allow to restart early
- signal_and_wait(SHUTDOWN_TIMEOUT_SECONDS, 'SIGTSTP', 'stop fetching new jobs')
- return unless enabled?
-
- # Tell sidekiq to restart itself
- # Keep extra safe to wait `Sidekiq.options[:timeout] + 2` seconds before SIGKILL
- signal_and_wait(Sidekiq.options[:timeout] + 2, 'SIGTERM', 'gracefully shut down')
- return unless enabled?
-
- # Ideally we should never reach this condition
- # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't
- # Kill the whole pgroup, so we can be sure no children are left behind
- signal_pgroup('SIGKILL', 'die')
- end
-
- def rss_within_range?
- current_rss = nil
- deadline = Gitlab::Metrics::System.monotonic_time + GRACE_BALLOON_SECONDS.seconds
- loop do
- return true unless enabled?
-
- current_rss = get_rss
-
- # RSS go above hard limit should trigger forcible shutdown right away
- break if current_rss > hard_limit_rss
-
- # RSS go below the soft limit
- return true if current_rss < soft_limit_rss
-
- # RSS did not go below the soft limit within deadline, restart
- break if Gitlab::Metrics::System.monotonic_time > deadline
-
- sleep(CHECK_INTERVAL_SECONDS)
- end
-
- log_rss_out_of_range(current_rss, hard_limit_rss, soft_limit_rss)
-
- false
- end
-
- def log_rss_out_of_range(current_rss, hard_limit_rss, soft_limit_rss)
- Sidekiq.logger.warn(
- class: self.class.to_s,
- pid: pid,
- message: 'Sidekiq worker RSS out of range',
- current_rss: current_rss,
- hard_limit_rss: hard_limit_rss,
- soft_limit_rss: soft_limit_rss,
- reason: out_of_range_description(current_rss, hard_limit_rss, soft_limit_rss)
- )
- end
-
- def out_of_range_description(rss, hard_limit, soft_limit)
- if rss > hard_limit
- "current_rss(#{rss}) > hard_limit_rss(#{hard_limit})"
- else
- "current_rss(#{rss}) > soft_limit_rss(#{soft_limit}) longer than GRACE_BALLOON_SECONDS(#{GRACE_BALLOON_SECONDS})"
- end
- end
-
- def get_rss
- output, status = Gitlab::Popen.popen(%W(ps -o rss= -p #{pid}), Rails.root.to_s)
- return 0 unless status&.zero?
-
- output.to_i
- end
-
- def soft_limit_rss
- SOFT_LIMIT_RSS_KB + rss_increase_by_jobs
- end
-
- def hard_limit_rss
- HARD_LIMIT_RSS_KB
- end
-
- def signal_and_wait(time, signal, explanation)
- Sidekiq.logger.warn(
- class: self.class.to_s,
- pid: pid,
- signal: signal,
- explanation: explanation,
- wait_time: time,
- message: "Sending signal and waiting"
- )
- Process.kill(signal, pid)
-
- deadline = Gitlab::Metrics::System.monotonic_time + time
-
- # we try to finish as early as all jobs finished
- # so we retest that in loop
- sleep(CHECK_INTERVAL_SECONDS) while enabled? && any_jobs? && Gitlab::Metrics::System.monotonic_time < deadline
- end
-
- def signal_pgroup(signal, explanation)
- if Process.getpgrp == pid
- pid_or_pgrp_str = 'PGRP'
- pid_to_signal = 0
- else
- pid_or_pgrp_str = 'PID'
- pid_to_signal = pid
- end
-
- Sidekiq.logger.warn(
- class: self.class.to_s,
- signal: signal,
- pid: pid,
- message: "sending Sidekiq worker #{pid_or_pgrp_str}-#{pid} #{signal} (#{explanation})"
- )
- Process.kill(signal, pid_to_signal)
- end
-
- def rss_increase_by_jobs
- Gitlab::SidekiqDaemon::Monitor.instance.jobs.sum do |job| # rubocop:disable CodeReuse/ActiveRecord
- rss_increase_by_job(job)
- end
- end
-
- def rss_increase_by_job(job)
- memory_growth_kb = get_job_options(job, 'memory_killer_memory_growth_kb', 0).to_i
- max_memory_growth_kb = get_job_options(job, 'memory_killer_max_memory_growth_kb', DEFAULT_MAX_MEMORY_GROWTH_KB).to_i
-
- return 0 if memory_growth_kb.zero?
-
- time_elapsed = [Gitlab::Metrics::System.monotonic_time - job[:started_at], 0].max
- [memory_growth_kb * time_elapsed, max_memory_growth_kb].min
- end
-
- def get_job_options(job, key, default)
- job[:worker_class].sidekiq_options.fetch(key, default)
- rescue
- default
- end
-
- def pid
- Process.pid
- end
-
- def any_jobs?
- Gitlab::SidekiqDaemon::Monitor.instance.jobs.any?
- end
- end
- end
-end