summaryrefslogtreecommitdiff
path: root/sidekiq_cluster
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2021-12-20 13:37:47 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2021-12-20 13:37:47 +0000
commitaee0a117a889461ce8ced6fcf73207fe017f1d99 (patch)
tree891d9ef189227a8445d83f35c1b0fc99573f4380 /sidekiq_cluster
parent8d46af3258650d305f53b819eabf7ab18d22f59e (diff)
downloadgitlab-ce-aee0a117a889461ce8ced6fcf73207fe017f1d99.tar.gz
Add latest changes from gitlab-org/gitlab@14-6-stable-eev14.6.0-rc42
Diffstat (limited to 'sidekiq_cluster')
-rw-r--r--sidekiq_cluster/cli.rb92
-rw-r--r--sidekiq_cluster/dependencies.rb6
-rw-r--r--sidekiq_cluster/sidekiq_cluster.rb104
3 files changed, 98 insertions, 104 deletions
diff --git a/sidekiq_cluster/cli.rb b/sidekiq_cluster/cli.rb
index 55b4521d37d..57649ec74c8 100644
--- a/sidekiq_cluster/cli.rb
+++ b/sidekiq_cluster/cli.rb
@@ -1,5 +1,7 @@
# frozen_string_literal: true
+require_relative '../config/bundler_setup'
+
require 'optparse'
require 'logger'
require 'time'
@@ -11,11 +13,21 @@ require_relative '../lib/gitlab/utils'
require_relative '../lib/gitlab/sidekiq_config/cli_methods'
require_relative '../lib/gitlab/sidekiq_config/worker_matcher'
require_relative '../lib/gitlab/sidekiq_logging/json_formatter'
+require_relative '../lib/gitlab/process_management'
+require_relative '../metrics_server/metrics_server'
require_relative 'sidekiq_cluster'
module Gitlab
module SidekiqCluster
class CLI
+ THREAD_NAME = 'supervisor'
+
+ # The signals that should terminate both the master and workers.
+ TERMINATE_SIGNALS = %i(INT TERM).freeze
+
+ # The signals that should simply be forwarded to the workers.
+ FORWARD_SIGNALS = %i(TTIN USR1 USR2 HUP).freeze
+
CommandError = Class.new(StandardError)
def initialize(log_output = $stderr)
@@ -23,6 +35,7 @@ module Gitlab
@max_concurrency = 50
@min_concurrency = 0
@environment = ENV['RAILS_ENV'] || 'development'
+ @metrics_dir = ENV["prometheus_multiproc_dir"] || File.absolute_path("tmp/prometheus_multiproc_dir/sidekiq")
@pid = nil
@interval = 5
@alive = true
@@ -35,6 +48,8 @@ module Gitlab
end
def run(argv = ARGV)
+ Thread.current.name = THREAD_NAME
+
if argv.empty?
raise CommandError,
'You must specify at least one queue to start a worker for'
@@ -88,6 +103,8 @@ module Gitlab
@logger.info("Starting cluster with #{queue_groups.length} processes")
end
+ start_metrics_server(wipe_metrics_dir: true)
+
@processes = SidekiqCluster.start(
queue_groups,
env: @environment,
@@ -106,7 +123,7 @@ module Gitlab
end
def write_pid
- SidekiqCluster.write_pid(@pid) if @pid
+ ProcessManagement.write_pid(@pid) if @pid
end
def soft_timeout_seconds
@@ -123,11 +140,11 @@ module Gitlab
end
def continue_waiting?(deadline)
- SidekiqCluster.any_alive?(@processes) && monotonic_time < deadline
+ ProcessManagement.any_alive?(@processes) && monotonic_time < deadline
end
def hard_stop_stuck_pids
- SidekiqCluster.signal_processes(SidekiqCluster.pids_alive(@processes), "-KILL")
+ ProcessManagement.signal_processes(ProcessManagement.pids_alive(@processes), "-KILL")
end
def wait_for_termination
@@ -138,14 +155,14 @@ module Gitlab
end
def trap_signals
- SidekiqCluster.trap_terminate do |signal|
+ ProcessManagement.trap_signals(TERMINATE_SIGNALS) do |signal|
@alive = false
- SidekiqCluster.signal_processes(@processes, signal)
+ ProcessManagement.signal_processes(@processes, signal)
wait_for_termination
end
- SidekiqCluster.trap_forward do |signal|
- SidekiqCluster.signal_processes(@processes, signal)
+ ProcessManagement.trap_signals(FORWARD_SIGNALS) do |signal|
+ ProcessManagement.signal_processes(@processes, signal)
end
end
@@ -153,17 +170,74 @@ module Gitlab
while @alive
sleep(@interval)
- unless SidekiqCluster.all_alive?(@processes)
+ if metrics_server_enabled? && ProcessManagement.process_died?(@metrics_server_pid)
+ @logger.warn('Metrics server went away')
+ start_metrics_server(wipe_metrics_dir: false)
+ end
+
+ unless ProcessManagement.all_alive?(@processes)
# If a child process died we'll just terminate the whole cluster. It's up to
# runit and such to then restart the cluster.
@logger.info('A worker terminated, shutting down the cluster')
- SidekiqCluster.signal_processes(@processes, :TERM)
+ stop_metrics_server
+ ProcessManagement.signal_processes(@processes, :TERM)
break
end
end
end
+ def start_metrics_server(wipe_metrics_dir: false)
+ return unless metrics_server_enabled?
+
+ @logger.info("Starting metrics server on port #{sidekiq_exporter_port}")
+ @metrics_server_pid = MetricsServer.spawn(
+ 'sidekiq',
+ metrics_dir: @metrics_dir,
+ wipe_metrics_dir: wipe_metrics_dir,
+ trapped_signals: TERMINATE_SIGNALS + FORWARD_SIGNALS
+ )
+ end
+
+ def sidekiq_exporter_enabled?
+ ::Settings.monitoring.sidekiq_exporter.enabled
+ rescue Settingslogic::MissingSetting
+ nil
+ end
+
+ def exporter_has_a_unique_port?
+ # In https://gitlab.com/gitlab-org/gitlab/-/issues/345802 we added settings for sidekiq_health_checks.
+ # These settings default to the same values as sidekiq_exporter for backwards compatibility.
+ # If a different port for sidekiq_health_checks has been set up, we know that the
+ # user wants to serve health checks and metrics from different servers.
+ return false if sidekiq_health_check_port.nil? || sidekiq_exporter_port.nil?
+
+ sidekiq_exporter_port != sidekiq_health_check_port
+ end
+
+ def sidekiq_exporter_port
+ ::Settings.monitoring.sidekiq_exporter.port
+ rescue Settingslogic::MissingSetting
+ nil
+ end
+
+ def sidekiq_health_check_port
+ ::Settings.monitoring.sidekiq_health_checks.port
+ rescue Settingslogic::MissingSetting
+ nil
+ end
+
+ def metrics_server_enabled?
+ !@dryrun && sidekiq_exporter_enabled? && exporter_has_a_unique_port?
+ end
+
+ def stop_metrics_server
+ return unless @metrics_server_pid
+
+ @logger.info("Stopping metrics server (PID #{@metrics_server_pid})")
+ ProcessManagement.signal(@metrics_server_pid, :TERM)
+ end
+
def option_parser
OptionParser.new do |opt|
opt.banner = "#{File.basename(__FILE__)} [QUEUE,QUEUE] [QUEUE] ... [OPTIONS]"
diff --git a/sidekiq_cluster/dependencies.rb b/sidekiq_cluster/dependencies.rb
deleted file mode 100644
index 91e91475f15..00000000000
--- a/sidekiq_cluster/dependencies.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-# rubocop:disable Naming/FileName
-# frozen_string_literal: true
-
-require 'shellwords'
-
-# rubocop:enable Naming/FileName
diff --git a/sidekiq_cluster/sidekiq_cluster.rb b/sidekiq_cluster/sidekiq_cluster.rb
index 49478ba740d..c5139ab8874 100644
--- a/sidekiq_cluster/sidekiq_cluster.rb
+++ b/sidekiq_cluster/sidekiq_cluster.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative 'dependencies'
+require_relative '../lib/gitlab/process_management'
module Gitlab
module SidekiqCluster
@@ -17,49 +17,6 @@ module Gitlab
# After surpassing the soft timeout.
DEFAULT_HARD_TIMEOUT_SECONDS = 5
- # The signals that should terminate both the master and workers.
- TERMINATE_SIGNALS = %i(INT TERM).freeze
-
- # The signals that should simply be forwarded to the workers.
- FORWARD_SIGNALS = %i(TTIN USR1 USR2 HUP).freeze
-
- # Traps the given signals and yields the block whenever these signals are
- # received.
- #
- # The block is passed the name of the signal.
- #
- # Example:
- #
- # trap_signals(%i(HUP TERM)) do |signal|
- # ...
- # end
- def self.trap_signals(signals)
- signals.each do |signal|
- trap(signal) do
- yield signal
- end
- end
- end
-
- def self.trap_terminate(&block)
- trap_signals(TERMINATE_SIGNALS, &block)
- end
-
- def self.trap_forward(&block)
- trap_signals(FORWARD_SIGNALS, &block)
- end
-
- def self.signal(pid, signal)
- Process.kill(signal, pid)
- true
- rescue Errno::ESRCH
- false
- end
-
- def self.signal_processes(pids, signal)
- pids.each { |pid| signal(pid, signal) }
- end
-
# Starts Sidekiq workers for the pairs of processes.
#
# Example:
@@ -109,16 +66,21 @@ module Gitlab
return
end
- pid = Process.spawn(
- { 'ENABLE_SIDEKIQ_CLUSTER' => '1',
- 'SIDEKIQ_WORKER_ID' => worker_id.to_s },
- *cmd,
- pgroup: true,
- err: $stderr,
- out: $stdout
- )
+ # We need to remove Bundler specific env vars, since otherwise the
+ # child process will think we are passing an alternative Gemfile
+ # and will clear and reset LOAD_PATH.
+ pid = Bundler.with_original_env do
+ Process.spawn(
+ { 'ENABLE_SIDEKIQ_CLUSTER' => '1',
+ 'SIDEKIQ_WORKER_ID' => worker_id.to_s },
+ *cmd,
+ pgroup: true,
+ err: $stderr,
+ out: $stdout
+ )
+ end
- wait_async(pid)
+ ProcessManagement.wait_async(pid)
pid
end
@@ -144,41 +106,5 @@ module Gitlab
concurrency_from_queues.clamp(min, max)
end
-
- # Waits for the given process to complete using a separate thread.
- def self.wait_async(pid)
- Thread.new do
- Process.wait(pid) rescue Errno::ECHILD
- end
- end
-
- # Returns true if all the processes are alive.
- def self.all_alive?(pids)
- pids.each do |pid|
- return false unless process_alive?(pid)
- end
-
- true
- end
-
- def self.any_alive?(pids)
- pids_alive(pids).any?
- end
-
- def self.pids_alive(pids)
- pids.select { |pid| process_alive?(pid) }
- end
-
- def self.process_alive?(pid)
- # Signal 0 tests whether the process exists and we have access to send signals
- # but is otherwise a noop (doesn't actually send a signal to the process)
- signal(pid, 0)
- end
-
- def self.write_pid(path)
- File.open(path, 'w') do |handle|
- handle.write(Process.pid.to_s)
- end
- end
end
end