summaryrefslogtreecommitdiff
path: root/lib/gitlab/memory/watchdog
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2023-02-20 13:49:51 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2023-02-20 13:49:51 +0000
commit71786ddc8e28fbd3cb3fcc4b3ff15e5962a1c82e (patch)
tree6a2d93ef3fb2d353bb7739e4b57e6541f51cdd71 /lib/gitlab/memory/watchdog
parenta7253423e3403b8c08f8a161e5937e1488f5f407 (diff)
downloadgitlab-ce-a36f25615e8226344d87b692ccf3e543d5d81712.tar.gz
Add latest changes from gitlab-org/gitlab@15-9-stable-eev15.9.0-rc42
Diffstat (limited to 'lib/gitlab/memory/watchdog')
-rw-r--r--lib/gitlab/memory/watchdog/configuration.rb2
-rw-r--r--lib/gitlab/memory/watchdog/configurator.rb12
-rw-r--r--lib/gitlab/memory/watchdog/handlers/null_handler.rb24
-rw-r--r--lib/gitlab/memory/watchdog/handlers/puma_handler.rb23
-rw-r--r--lib/gitlab/memory/watchdog/handlers/sidekiq_handler.rb63
5 files changed, 120 insertions, 4 deletions
diff --git a/lib/gitlab/memory/watchdog/configuration.rb b/lib/gitlab/memory/watchdog/configuration.rb
index 5c459220be8..6ab199bf816 100644
--- a/lib/gitlab/memory/watchdog/configuration.rb
+++ b/lib/gitlab/memory/watchdog/configuration.rb
@@ -48,7 +48,7 @@ module Gitlab
end
def handler
- @handler ||= NullHandler.instance
+ @handler ||= Handlers::NullHandler.instance
end
def event_reporter
diff --git a/lib/gitlab/memory/watchdog/configurator.rb b/lib/gitlab/memory/watchdog/configurator.rb
index 04c04cbde02..4a6640ba901 100644
--- a/lib/gitlab/memory/watchdog/configurator.rb
+++ b/lib/gitlab/memory/watchdog/configurator.rb
@@ -12,12 +12,12 @@ module Gitlab
DEFAULT_MAX_HEAP_FRAG = 0.5
DEFAULT_MAX_MEM_GROWTH = 3.0
# grace_time / sleep_interval = max_strikes allowed for Sidekiq process to violate defined limits.
- DEFAULT_SIDEKIQ_GRACE_TIME_S = 300
+ DEFAULT_SIDEKIQ_GRACE_TIME_S = 900
class << self
def configure_for_puma
->(config) do
- config.handler = Gitlab::Memory::Watchdog::PumaHandler.new
+ config.handler = Gitlab::Memory::Watchdog::Handlers::PumaHandler.new
config.sleep_time_seconds = ENV.fetch('GITLAB_MEMWD_SLEEP_TIME_SEC', DEFAULT_SLEEP_INTERVAL_S).to_i
config.monitors(&configure_monitors_for_puma)
end
@@ -25,7 +25,13 @@ module Gitlab
def configure_for_sidekiq
->(config) do
- config.handler = Gitlab::Memory::Watchdog::TermProcessHandler.new
+ # Give Sidekiq up to 30 seconds to allow existing jobs to finish after exceeding the limit
+ shutdown_timeout_seconds = ENV.fetch('SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT', 30).to_i
+
+ config.handler = Gitlab::Memory::Watchdog::Handlers::SidekiqHandler.new(
+ shutdown_timeout_seconds,
+ sidekiq_sleep_time
+ )
config.sleep_time_seconds = sidekiq_sleep_time
config.monitors(&configure_monitors_for_sidekiq)
config.event_reporter = SidekiqEventReporter.new
diff --git a/lib/gitlab/memory/watchdog/handlers/null_handler.rb b/lib/gitlab/memory/watchdog/handlers/null_handler.rb
new file mode 100644
index 00000000000..127001003ce
--- /dev/null
+++ b/lib/gitlab/memory/watchdog/handlers/null_handler.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Memory
+ class Watchdog
+ module Handlers
+ # This handler does nothing. It returns `false` to indicate to the
+ # caller that the situation has not been dealt with so it will
+ # receive calls repeatedly if fragmentation remains high.
+ #
+ # This is useful for "dress rehearsals" in production since it allows
+ # us to observe how frequently the handler is invoked before taking action.
+ class NullHandler
+ include Singleton
+
+ def call
+ # NOP
+ false
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/memory/watchdog/handlers/puma_handler.rb b/lib/gitlab/memory/watchdog/handlers/puma_handler.rb
new file mode 100644
index 00000000000..fffd91733c8
--- /dev/null
+++ b/lib/gitlab/memory/watchdog/handlers/puma_handler.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Memory
+ class Watchdog
+ module Handlers
+ # This handler invokes Puma's graceful termination handler, which takes
+ # into account a configurable grace period during which a process may
+ # remain unresponsive to a SIGTERM.
+ class PumaHandler
+ def initialize(puma_options = ::Puma.cli_config.options)
+ @worker = ::Puma::Cluster::WorkerHandle.new(0, $$, 0, puma_options)
+ end
+
+ def call
+ @worker.term
+ true
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/memory/watchdog/handlers/sidekiq_handler.rb b/lib/gitlab/memory/watchdog/handlers/sidekiq_handler.rb
new file mode 100644
index 00000000000..47ed608c576
--- /dev/null
+++ b/lib/gitlab/memory/watchdog/handlers/sidekiq_handler.rb
@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Memory
+ class Watchdog
+ module Handlers
+ class SidekiqHandler
+ def initialize(shutdown_timeout_seconds, sleep_time_seconds)
+ @shutdown_timeout_seconds = shutdown_timeout_seconds
+ @sleep_time_seconds = sleep_time_seconds
+ @alive = true
+ end
+
+ def call
+ # Tell Sidekiq to stop fetching new jobs
+ # We first SIGNAL and then wait given time
+ send_signal(:TSTP, $$, 'stop fetching new jobs', @shutdown_timeout_seconds)
+ return true unless @alive
+
+ # Tell sidekiq to restart itself
+ # Keep extra safe to wait `Sidekiq[:timeout] + 2` seconds before SIGKILL
+ send_signal(:TERM, $$, 'gracefully shut down', Sidekiq[:timeout] + 2)
+ return true unless @alive
+
+ # Ideally we should never reach this condition
+ # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't
+ # If process is group leader, kill the whole pgroup, so we can be sure no children are left behind
+ send_signal(:KILL, Process.getpgrp == $$ ? 0 : $$, 'hard shut down')
+
+ true
+ end
+
+ def stop
+ @alive = false
+ end
+
+ private
+
+ def send_signal(signal, pid, explanation, wait_time = nil)
+ Sidekiq.logger.warn(
+ pid: pid,
+ worker_id: ::Prometheus::PidProvider.worker_id,
+ memwd_handler_class: self.class.to_s,
+ memwd_signal: signal,
+ memwd_explanation: explanation,
+ memwd_wait_time: wait_time,
+ message: "Sending signal and waiting"
+ )
+
+ ProcessManagement.signal(pid, signal)
+
+ return unless wait_time
+
+ deadline = Gitlab::Metrics::System.monotonic_time + wait_time
+
+ # Sleep until timeout reached
+ sleep(@sleep_time_seconds) while @alive && Gitlab::Metrics::System.monotonic_time < deadline
+ end
+ end
+ end
+ end
+ end
+end