diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2021-12-20 13:37:47 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2021-12-20 13:37:47 +0000 |
commit | aee0a117a889461ce8ced6fcf73207fe017f1d99 (patch) | |
tree | 891d9ef189227a8445d83f35c1b0fc99573f4380 /sidekiq_cluster | |
parent | 8d46af3258650d305f53b819eabf7ab18d22f59e (diff) | |
download | gitlab-ce-aee0a117a889461ce8ced6fcf73207fe017f1d99.tar.gz |
Add latest changes from gitlab-org/gitlab@14-6-stable-eev14.6.0-rc42
Diffstat (limited to 'sidekiq_cluster')
-rw-r--r-- | sidekiq_cluster/cli.rb | 92 | ||||
-rw-r--r-- | sidekiq_cluster/dependencies.rb | 6 | ||||
-rw-r--r-- | sidekiq_cluster/sidekiq_cluster.rb | 104 |
3 files changed, 98 insertions, 104 deletions
diff --git a/sidekiq_cluster/cli.rb b/sidekiq_cluster/cli.rb index 55b4521d37d..57649ec74c8 100644 --- a/sidekiq_cluster/cli.rb +++ b/sidekiq_cluster/cli.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require_relative '../config/bundler_setup' + require 'optparse' require 'logger' require 'time' @@ -11,11 +13,21 @@ require_relative '../lib/gitlab/utils' require_relative '../lib/gitlab/sidekiq_config/cli_methods' require_relative '../lib/gitlab/sidekiq_config/worker_matcher' require_relative '../lib/gitlab/sidekiq_logging/json_formatter' +require_relative '../lib/gitlab/process_management' +require_relative '../metrics_server/metrics_server' require_relative 'sidekiq_cluster' module Gitlab module SidekiqCluster class CLI + THREAD_NAME = 'supervisor' + + # The signals that should terminate both the master and workers. + TERMINATE_SIGNALS = %i(INT TERM).freeze + + # The signals that should simply be forwarded to the workers. + FORWARD_SIGNALS = %i(TTIN USR1 USR2 HUP).freeze + CommandError = Class.new(StandardError) def initialize(log_output = $stderr) @@ -23,6 +35,7 @@ module Gitlab @max_concurrency = 50 @min_concurrency = 0 @environment = ENV['RAILS_ENV'] || 'development' + @metrics_dir = ENV["prometheus_multiproc_dir"] || File.absolute_path("tmp/prometheus_multiproc_dir/sidekiq") @pid = nil @interval = 5 @alive = true @@ -35,6 +48,8 @@ module Gitlab end def run(argv = ARGV) + Thread.current.name = THREAD_NAME + if argv.empty? raise CommandError, 'You must specify at least one queue to start a worker for' @@ -88,6 +103,8 @@ module Gitlab @logger.info("Starting cluster with #{queue_groups.length} processes") end + start_metrics_server(wipe_metrics_dir: true) + @processes = SidekiqCluster.start( queue_groups, env: @environment, @@ -106,7 +123,7 @@ module Gitlab end def write_pid - SidekiqCluster.write_pid(@pid) if @pid + ProcessManagement.write_pid(@pid) if @pid end def soft_timeout_seconds @@ -123,11 +140,11 @@ module Gitlab end def continue_waiting?(deadline) - SidekiqCluster.any_alive?(@processes) && monotonic_time < deadline + ProcessManagement.any_alive?(@processes) && monotonic_time < deadline end def hard_stop_stuck_pids - SidekiqCluster.signal_processes(SidekiqCluster.pids_alive(@processes), "-KILL") + ProcessManagement.signal_processes(ProcessManagement.pids_alive(@processes), "-KILL") end def wait_for_termination @@ -138,14 +155,14 @@ module Gitlab end def trap_signals - SidekiqCluster.trap_terminate do |signal| + ProcessManagement.trap_signals(TERMINATE_SIGNALS) do |signal| @alive = false - SidekiqCluster.signal_processes(@processes, signal) + ProcessManagement.signal_processes(@processes, signal) wait_for_termination end - SidekiqCluster.trap_forward do |signal| - SidekiqCluster.signal_processes(@processes, signal) + ProcessManagement.trap_signals(FORWARD_SIGNALS) do |signal| + ProcessManagement.signal_processes(@processes, signal) end end @@ -153,17 +170,74 @@ module Gitlab while @alive sleep(@interval) - unless SidekiqCluster.all_alive?(@processes) + if metrics_server_enabled? && ProcessManagement.process_died?(@metrics_server_pid) + @logger.warn('Metrics server went away') + start_metrics_server(wipe_metrics_dir: false) + end + + unless ProcessManagement.all_alive?(@processes) # If a child process died we'll just terminate the whole cluster. It's up to # runit and such to then restart the cluster. @logger.info('A worker terminated, shutting down the cluster') - SidekiqCluster.signal_processes(@processes, :TERM) + stop_metrics_server + ProcessManagement.signal_processes(@processes, :TERM) break end end end + def start_metrics_server(wipe_metrics_dir: false) + return unless metrics_server_enabled? + + @logger.info("Starting metrics server on port #{sidekiq_exporter_port}") + @metrics_server_pid = MetricsServer.spawn( + 'sidekiq', + metrics_dir: @metrics_dir, + wipe_metrics_dir: wipe_metrics_dir, + trapped_signals: TERMINATE_SIGNALS + FORWARD_SIGNALS + ) + end + + def sidekiq_exporter_enabled? + ::Settings.monitoring.sidekiq_exporter.enabled + rescue Settingslogic::MissingSetting + nil + end + + def exporter_has_a_unique_port? + # In https://gitlab.com/gitlab-org/gitlab/-/issues/345802 we added settings for sidekiq_health_checks. + # These settings default to the same values as sidekiq_exporter for backwards compatibility. + # If a different port for sidekiq_health_checks has been set up, we know that the + # user wants to serve health checks and metrics from different servers. + return false if sidekiq_health_check_port.nil? || sidekiq_exporter_port.nil? + + sidekiq_exporter_port != sidekiq_health_check_port + end + + def sidekiq_exporter_port + ::Settings.monitoring.sidekiq_exporter.port + rescue Settingslogic::MissingSetting + nil + end + + def sidekiq_health_check_port + ::Settings.monitoring.sidekiq_health_checks.port + rescue Settingslogic::MissingSetting + nil + end + + def metrics_server_enabled? + !@dryrun && sidekiq_exporter_enabled? && exporter_has_a_unique_port? + end + + def stop_metrics_server + return unless @metrics_server_pid + + @logger.info("Stopping metrics server (PID #{@metrics_server_pid})") + ProcessManagement.signal(@metrics_server_pid, :TERM) + end + def option_parser OptionParser.new do |opt| opt.banner = "#{File.basename(__FILE__)} [QUEUE,QUEUE] [QUEUE] ... [OPTIONS]" diff --git a/sidekiq_cluster/dependencies.rb b/sidekiq_cluster/dependencies.rb deleted file mode 100644 index 91e91475f15..00000000000 --- a/sidekiq_cluster/dependencies.rb +++ /dev/null @@ -1,6 +0,0 @@ -# rubocop:disable Naming/FileName -# frozen_string_literal: true - -require 'shellwords' - -# rubocop:enable Naming/FileName diff --git a/sidekiq_cluster/sidekiq_cluster.rb b/sidekiq_cluster/sidekiq_cluster.rb index 49478ba740d..c5139ab8874 100644 --- a/sidekiq_cluster/sidekiq_cluster.rb +++ b/sidekiq_cluster/sidekiq_cluster.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative 'dependencies' +require_relative '../lib/gitlab/process_management' module Gitlab module SidekiqCluster @@ -17,49 +17,6 @@ module Gitlab # After surpassing the soft timeout. DEFAULT_HARD_TIMEOUT_SECONDS = 5 - # The signals that should terminate both the master and workers. - TERMINATE_SIGNALS = %i(INT TERM).freeze - - # The signals that should simply be forwarded to the workers. - FORWARD_SIGNALS = %i(TTIN USR1 USR2 HUP).freeze - - # Traps the given signals and yields the block whenever these signals are - # received. - # - # The block is passed the name of the signal. - # - # Example: - # - # trap_signals(%i(HUP TERM)) do |signal| - # ... - # end - def self.trap_signals(signals) - signals.each do |signal| - trap(signal) do - yield signal - end - end - end - - def self.trap_terminate(&block) - trap_signals(TERMINATE_SIGNALS, &block) - end - - def self.trap_forward(&block) - trap_signals(FORWARD_SIGNALS, &block) - end - - def self.signal(pid, signal) - Process.kill(signal, pid) - true - rescue Errno::ESRCH - false - end - - def self.signal_processes(pids, signal) - pids.each { |pid| signal(pid, signal) } - end - # Starts Sidekiq workers for the pairs of processes. # # Example: @@ -109,16 +66,21 @@ module Gitlab return end - pid = Process.spawn( - { 'ENABLE_SIDEKIQ_CLUSTER' => '1', - 'SIDEKIQ_WORKER_ID' => worker_id.to_s }, - *cmd, - pgroup: true, - err: $stderr, - out: $stdout - ) + # We need to remove Bundler specific env vars, since otherwise the + # child process will think we are passing an alternative Gemfile + # and will clear and reset LOAD_PATH. + pid = Bundler.with_original_env do + Process.spawn( + { 'ENABLE_SIDEKIQ_CLUSTER' => '1', + 'SIDEKIQ_WORKER_ID' => worker_id.to_s }, + *cmd, + pgroup: true, + err: $stderr, + out: $stdout + ) + end - wait_async(pid) + ProcessManagement.wait_async(pid) pid end @@ -144,41 +106,5 @@ module Gitlab concurrency_from_queues.clamp(min, max) end - - # Waits for the given process to complete using a separate thread. - def self.wait_async(pid) - Thread.new do - Process.wait(pid) rescue Errno::ECHILD - end - end - - # Returns true if all the processes are alive. - def self.all_alive?(pids) - pids.each do |pid| - return false unless process_alive?(pid) - end - - true - end - - def self.any_alive?(pids) - pids_alive(pids).any? - end - - def self.pids_alive(pids) - pids.select { |pid| process_alive?(pid) } - end - - def self.process_alive?(pid) - # Signal 0 tests whether the process exists and we have access to send signals - # but is otherwise a noop (doesn't actually send a signal to the process) - signal(pid, 0) - end - - def self.write_pid(path) - File.open(path, 'w') do |handle| - handle.write(Process.pid.to_s) - end - end end end |