diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2023-02-20 13:49:51 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2023-02-20 13:49:51 +0000 |
commit | 71786ddc8e28fbd3cb3fcc4b3ff15e5962a1c82e (patch) | |
tree | 6a2d93ef3fb2d353bb7739e4b57e6541f51cdd71 /lib/gitlab/memory/watchdog | |
parent | a7253423e3403b8c08f8a161e5937e1488f5f407 (diff) | |
download | gitlab-ce-a36f25615e8226344d87b692ccf3e543d5d81712.tar.gz |
Add latest changes from gitlab-org/gitlab@15-9-stable-eev15.9.0-rc42
Diffstat (limited to 'lib/gitlab/memory/watchdog')
-rw-r--r-- | lib/gitlab/memory/watchdog/configuration.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/memory/watchdog/configurator.rb | 12 | ||||
-rw-r--r-- | lib/gitlab/memory/watchdog/handlers/null_handler.rb | 24 | ||||
-rw-r--r-- | lib/gitlab/memory/watchdog/handlers/puma_handler.rb | 23 | ||||
-rw-r--r-- | lib/gitlab/memory/watchdog/handlers/sidekiq_handler.rb | 63 |
5 files changed, 120 insertions, 4 deletions
diff --git a/lib/gitlab/memory/watchdog/configuration.rb b/lib/gitlab/memory/watchdog/configuration.rb index 5c459220be8..6ab199bf816 100644 --- a/lib/gitlab/memory/watchdog/configuration.rb +++ b/lib/gitlab/memory/watchdog/configuration.rb @@ -48,7 +48,7 @@ module Gitlab end def handler - @handler ||= NullHandler.instance + @handler ||= Handlers::NullHandler.instance end def event_reporter diff --git a/lib/gitlab/memory/watchdog/configurator.rb b/lib/gitlab/memory/watchdog/configurator.rb index 04c04cbde02..4a6640ba901 100644 --- a/lib/gitlab/memory/watchdog/configurator.rb +++ b/lib/gitlab/memory/watchdog/configurator.rb @@ -12,12 +12,12 @@ module Gitlab DEFAULT_MAX_HEAP_FRAG = 0.5 DEFAULT_MAX_MEM_GROWTH = 3.0 # grace_time / sleep_interval = max_strikes allowed for Sidekiq process to violate defined limits. - DEFAULT_SIDEKIQ_GRACE_TIME_S = 300 + DEFAULT_SIDEKIQ_GRACE_TIME_S = 900 class << self def configure_for_puma ->(config) do - config.handler = Gitlab::Memory::Watchdog::PumaHandler.new + config.handler = Gitlab::Memory::Watchdog::Handlers::PumaHandler.new config.sleep_time_seconds = ENV.fetch('GITLAB_MEMWD_SLEEP_TIME_SEC', DEFAULT_SLEEP_INTERVAL_S).to_i config.monitors(&configure_monitors_for_puma) end @@ -25,7 +25,13 @@ module Gitlab def configure_for_sidekiq ->(config) do - config.handler = Gitlab::Memory::Watchdog::TermProcessHandler.new + # Give Sidekiq up to 30 seconds to allow existing jobs to finish after exceeding the limit + shutdown_timeout_seconds = ENV.fetch('SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT', 30).to_i + + config.handler = Gitlab::Memory::Watchdog::Handlers::SidekiqHandler.new( + shutdown_timeout_seconds, + sidekiq_sleep_time + ) config.sleep_time_seconds = sidekiq_sleep_time config.monitors(&configure_monitors_for_sidekiq) config.event_reporter = SidekiqEventReporter.new diff --git a/lib/gitlab/memory/watchdog/handlers/null_handler.rb b/lib/gitlab/memory/watchdog/handlers/null_handler.rb new file mode 100644 index 00000000000..127001003ce --- /dev/null +++ b/lib/gitlab/memory/watchdog/handlers/null_handler.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Gitlab + module Memory + class Watchdog + module Handlers + # This handler does nothing. It returns `false` to indicate to the + # caller that the situation has not been dealt with so it will + # receive calls repeatedly if fragmentation remains high. + # + # This is useful for "dress rehearsals" in production since it allows + # us to observe how frequently the handler is invoked before taking action. + class NullHandler + include Singleton + + def call + # NOP + false + end + end + end + end + end +end diff --git a/lib/gitlab/memory/watchdog/handlers/puma_handler.rb b/lib/gitlab/memory/watchdog/handlers/puma_handler.rb new file mode 100644 index 00000000000..fffd91733c8 --- /dev/null +++ b/lib/gitlab/memory/watchdog/handlers/puma_handler.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Gitlab + module Memory + class Watchdog + module Handlers + # This handler invokes Puma's graceful termination handler, which takes + # into account a configurable grace period during which a process may + # remain unresponsive to a SIGTERM. + class PumaHandler + def initialize(puma_options = ::Puma.cli_config.options) + @worker = ::Puma::Cluster::WorkerHandle.new(0, $$, 0, puma_options) + end + + def call + @worker.term + true + end + end + end + end + end +end diff --git a/lib/gitlab/memory/watchdog/handlers/sidekiq_handler.rb b/lib/gitlab/memory/watchdog/handlers/sidekiq_handler.rb new file mode 100644 index 00000000000..47ed608c576 --- /dev/null +++ b/lib/gitlab/memory/watchdog/handlers/sidekiq_handler.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module Gitlab + module Memory + class Watchdog + module Handlers + class SidekiqHandler + def initialize(shutdown_timeout_seconds, sleep_time_seconds) + @shutdown_timeout_seconds = shutdown_timeout_seconds + @sleep_time_seconds = sleep_time_seconds + @alive = true + end + + def call + # Tell Sidekiq to stop fetching new jobs + # We first SIGNAL and then wait given time + send_signal(:TSTP, $$, 'stop fetching new jobs', @shutdown_timeout_seconds) + return true unless @alive + + # Tell sidekiq to restart itself + # Keep extra safe to wait `Sidekiq[:timeout] + 2` seconds before SIGKILL + send_signal(:TERM, $$, 'gracefully shut down', Sidekiq[:timeout] + 2) + return true unless @alive + + # Ideally we should never reach this condition + # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't + # If process is group leader, kill the whole pgroup, so we can be sure no children are left behind + send_signal(:KILL, Process.getpgrp == $$ ? 0 : $$, 'hard shut down') + + true + end + + def stop + @alive = false + end + + private + + def send_signal(signal, pid, explanation, wait_time = nil) + Sidekiq.logger.warn( + pid: pid, + worker_id: ::Prometheus::PidProvider.worker_id, + memwd_handler_class: self.class.to_s, + memwd_signal: signal, + memwd_explanation: explanation, + memwd_wait_time: wait_time, + message: "Sending signal and waiting" + ) + + ProcessManagement.signal(pid, signal) + + return unless wait_time + + deadline = Gitlab::Metrics::System.monotonic_time + wait_time + + # Sleep until timeout reached + sleep(@sleep_time_seconds) while @alive && Gitlab::Metrics::System.monotonic_time < deadline + end + end + end + end + end +end |