From 1065f8ce7a261dff5a3077be46405343141733df Mon Sep 17 00:00:00 2001 From: Andrew Newdigate Date: Sat, 20 Oct 2018 19:00:19 +0100 Subject: Add experimental support for Puma This allows us (and others) to test drive Puma without it affecting all users. Puma can be enabled by setting the environment variable "EXPERIMENTAL_PUMA" to a non empty value. --- .gitignore | 1 + Gemfile | 5 + Gemfile.lock | 6 + Gemfile.rails5.lock | 6 + bin/web | 5 + bin/web_puma | 63 ++++ changelogs/unreleased/an-multithreading.yml | 5 + config/initializers/7_prometheus_metrics.rb | 22 +- config/initializers/8_metrics.rb | 4 +- config/initializers/active_record_lifecycle.rb | 23 ++ config/initializers/macos.rb | 13 + config/initializers/rbtrace.rb | 9 + config/initializers/sidekiq.rb | 2 - config/puma.example.development.rb | 77 ++++ config/unicorn.rb.example | 37 +- config/unicorn.rb.example.development | 67 +++- doc/update/11.4-to-11.5.md | 404 +++++++++++++++++++++ lib/gitlab/cluster/lifecycle_events.rb | 99 +++++ .../cluster/puma_worker_killer_initializer.rb | 32 ++ spec/rack_servers/configs/config.ru | 12 + spec/rack_servers/configs/puma.rb | 32 ++ spec/rack_servers/puma_spec.rb | 84 +++++ spec/rack_servers/unicorn_spec.rb | 105 ++++++ spec/unicorn/unicorn_spec.rb | 114 ------ 24 files changed, 1059 insertions(+), 168 deletions(-) create mode 100755 bin/web_puma create mode 100644 changelogs/unreleased/an-multithreading.yml create mode 100644 config/initializers/active_record_lifecycle.rb create mode 100644 config/initializers/macos.rb create mode 100644 config/initializers/rbtrace.rb create mode 100644 config/puma.example.development.rb create mode 100644 doc/update/11.4-to-11.5.md create mode 100644 lib/gitlab/cluster/lifecycle_events.rb create mode 100644 lib/gitlab/cluster/puma_worker_killer_initializer.rb create mode 100644 spec/rack_servers/configs/config.ru create mode 100644 spec/rack_servers/configs/puma.rb create mode 100644 spec/rack_servers/puma_spec.rb create mode 100644 spec/rack_servers/unicorn_spec.rb delete mode 100644 spec/unicorn/unicorn_spec.rb diff --git a/.gitignore b/.gitignore index 82b3d08f7a8..aecaae95b8c 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ eslint-report.html /config/redis.queues.yml /config/redis.shared_state.yml /config/unicorn.rb +/config/puma.rb /config/secrets.yml /config/sidekiq.yml /config/registry.key diff --git a/Gemfile b/Gemfile index c442ed9065e..d8c996cf6c4 100644 --- a/Gemfile +++ b/Gemfile @@ -153,6 +153,11 @@ group :unicorn do gem 'unicorn-worker-killer', '~> 0.4.4' end +group :puma do + gem 'puma', '~> 3.12', require: false + gem 'puma_worker_killer', require: false +end + # State machine gem 'state_machines-activerecord', '~> 0.5.1' diff --git a/Gemfile.lock b/Gemfile.lock index bf16bef4f32..da74cae3f5f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -598,6 +598,10 @@ GEM pry-rails (0.3.6) pry (>= 0.10.4) public_suffix (3.0.3) + puma (3.12.0) + puma_worker_killer (0.1.0) + get_process_mem (~> 0.2) + puma (>= 2.7, < 4) pyu-ruby-sasl (0.0.3.3) rack (1.6.10) rack-accept (0.4.5) @@ -1076,6 +1080,8 @@ DEPENDENCIES prometheus-client-mmap (~> 0.9.4) pry-byebug (~> 3.4.1) pry-rails (~> 0.3.4) + puma (~> 3.12) + puma_worker_killer rack-attack (~> 4.4.1) rack-cors (~> 1.0.0) rack-oauth2 (~> 1.2.1) diff --git a/Gemfile.rails5.lock b/Gemfile.rails5.lock index 81547303ed2..a1018753510 100644 --- a/Gemfile.rails5.lock +++ b/Gemfile.rails5.lock @@ -602,6 +602,10 @@ GEM pry-rails (0.3.6) pry (>= 0.10.4) public_suffix (3.0.3) + puma (3.12.0) + puma_worker_killer (0.1.0) + get_process_mem (~> 0.2) + puma (>= 2.7, < 4) pyu-ruby-sasl (0.0.3.3) rack (2.0.5) rack-accept (0.4.5) @@ -1085,6 +1089,8 @@ DEPENDENCIES prometheus-client-mmap (~> 0.9.4) pry-byebug (~> 3.4.1) pry-rails (~> 0.3.4) + puma (~> 3.12) + puma_worker_killer rack-attack (~> 4.4.1) rack-cors (~> 1.0.0) rack-oauth2 (~> 1.2.1) diff --git a/bin/web b/bin/web index ecd0bbd10b0..06ff7c39296 100755 --- a/bin/web +++ b/bin/web @@ -3,6 +3,11 @@ cd $(dirname $0)/.. app_root=$(pwd) +# Switch to experimental PUMA configuration +if [ -n "${EXPERIMENTAL_PUMA}" ]; then + exec bin/web_puma "$@" +fi + unicorn_pidfile="$app_root/tmp/pids/unicorn.pid" unicorn_config="$app_root/config/unicorn.rb" unicorn_cmd="bundle exec unicorn_rails -c $unicorn_config -E $RAILS_ENV" diff --git a/bin/web_puma b/bin/web_puma new file mode 100755 index 00000000000..178fe84800d --- /dev/null +++ b/bin/web_puma @@ -0,0 +1,63 @@ +#!/bin/sh + +set -e + +cd $(dirname $0)/.. +app_root=$(pwd) + +puma_pidfile="$app_root/tmp/pids/puma.pid" +puma_config="$app_root/config/puma.rb" + +spawn_puma() +{ + exec bundle exec puma --config "${puma_config}" "$@" +} + +get_puma_pid() +{ + pid=$(cat "${puma_pidfile}") + if [ -z "$pid" ] ; then + echo "Could not find a PID in $puma_pidfile" + exit 1 + fi + echo "${pid}" +} + +start() +{ + spawn_puma -d +} + +start_foreground() +{ + spawn_puma +} + +stop() +{ + get_puma_pid + kill -QUIT "$(get_puma_pid)" +} + +reload() +{ + kill -USR2 "$(get_puma_pid)" +} + +case "$1" in + start) + start + ;; + start_foreground) + start_foreground + ;; + stop) + stop + ;; + reload) + reload + ;; + *) + echo "Usage: RAILS_ENV=your_env $0 {start|stop|reload}" + ;; +esac diff --git a/changelogs/unreleased/an-multithreading.yml b/changelogs/unreleased/an-multithreading.yml new file mode 100644 index 00000000000..fca847e6ea4 --- /dev/null +++ b/changelogs/unreleased/an-multithreading.yml @@ -0,0 +1,5 @@ +--- +title: Experimental support for running Puma multithreaded web-server +merge_request: 22372 +author: +type: performance diff --git a/config/initializers/7_prometheus_metrics.rb b/config/initializers/7_prometheus_metrics.rb index 146c4b1e024..8052880cc3d 100644 --- a/config/initializers/7_prometheus_metrics.rb +++ b/config/initializers/7_prometheus_metrics.rb @@ -26,9 +26,25 @@ Sidekiq.configure_server do |config| end if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled? - unless Sidekiq.server? - Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start + Gitlab::Cluster::LifecycleEvents.on_worker_start do + defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change + + unless Sidekiq.server? + Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start + end + + Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start end +end - Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start +Gitlab::Cluster::LifecycleEvents.on_master_restart do + # The following is necessary to ensure stale Prometheus metrics don't + # accumulate over time. It needs to be done in this hook as opposed to + # inside an init script to ensure metrics files aren't deleted after new + # unicorn workers start after a SIGUSR2 is received. + prometheus_multiproc_dir = ENV['prometheus_multiproc_dir'] + if prometheus_multiproc_dir + old_metrics = Dir[File.join(prometheus_multiproc_dir, '*.db')] + FileUtils.rm_rf(old_metrics) + end end diff --git a/config/initializers/8_metrics.rb b/config/initializers/8_metrics.rb index eccf82ab8dc..c8d261d415e 100644 --- a/config/initializers/8_metrics.rb +++ b/config/initializers/8_metrics.rb @@ -158,7 +158,9 @@ if Gitlab::Metrics.enabled? && !Rails.env.test? GC::Profiler.enable - Gitlab::Metrics::Samplers::InfluxSampler.initialize_instance.start + Gitlab::Cluster::LifecycleEvents.on_worker_start do + Gitlab::Metrics::Samplers::InfluxSampler.initialize_instance.start + end module TrackNewRedisConnections def connect(*args) diff --git a/config/initializers/active_record_lifecycle.rb b/config/initializers/active_record_lifecycle.rb new file mode 100644 index 00000000000..7fa37121efc --- /dev/null +++ b/config/initializers/active_record_lifecycle.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +# Don't handle sidekiq configuration as it +# has its own special active record configuration here +if defined?(ActiveRecord::Base) && !Sidekiq.server? + Gitlab::Cluster::LifecycleEvents.on_worker_start do + ActiveSupport.on_load(:active_record) do + ActiveRecord::Base.establish_connection + + Rails.logger.debug("ActiveRecord connection established") + end + end +end + +if defined?(ActiveRecord::Base) + Gitlab::Cluster::LifecycleEvents.on_before_fork do + # the following is highly recommended for Rails + "preload_app true" + # as there's no need for the master process to hold a connection + ActiveRecord::Base.connection.disconnect! + + Rails.logger.debug("ActiveRecord connection disconnected") + end +end diff --git a/config/initializers/macos.rb b/config/initializers/macos.rb new file mode 100644 index 00000000000..f410af6ed47 --- /dev/null +++ b/config/initializers/macos.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +if /darwin/ =~ RUBY_PLATFORM + Gitlab::Cluster::LifecycleEvents.on_before_fork do + require 'fiddle' + + # Dynamically load Foundation.framework, ~implicitly~ initialising + # the Objective-C runtime before any forking happens in Unicorn + # + # From https://bugs.ruby-lang.org/issues/14009 + Fiddle.dlopen '/System/Library/Frameworks/Foundation.framework/Foundation' + end +end diff --git a/config/initializers/rbtrace.rb b/config/initializers/rbtrace.rb new file mode 100644 index 00000000000..6a1b71bf4bd --- /dev/null +++ b/config/initializers/rbtrace.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +if ENV['ENABLE_RBTRACE'] + Gitlab::Cluster::LifecycleEvents.on_worker_start do + # Unicorn clears out signals before it forks, so rbtrace won't work + # unless it is enabled after the fork. + require 'rbtrace' + end +end diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb index bc6b7aed6aa..565efc858d1 100644 --- a/config/initializers/sidekiq.rb +++ b/config/initializers/sidekiq.rb @@ -14,8 +14,6 @@ Sidekiq.default_worker_options = { retry: 3 } enable_json_logs = Gitlab.config.sidekiq.log_format == 'json' Sidekiq.configure_server do |config| - require 'rbtrace' if ENV['ENABLE_RBTRACE'] - config.redis = queues_config_hash config.server_middleware do |chain| diff --git a/config/puma.example.development.rb b/config/puma.example.development.rb new file mode 100644 index 00000000000..490c940077a --- /dev/null +++ b/config/puma.example.development.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +# ----------------------------------------------------------------------- +# This file is used by the GDK to generate a default config/puma.rb file +# Note that `/home/git` will be substituted for the actual GDK root +# directory when this file is generated +# ----------------------------------------------------------------------- + +# Load "path" as a rackup file. +# +# The default is "config.ru". +# +rackup 'config.ru' +pidfile '/home/git/gitlab/tmp/pids/puma.pid' +state_path '/home/git/gitlab/tmp/pids/puma.state' + +stdout_redirect '/home/git/gitlab/log/puma.stdout.log', + '/home/git/gitlab/log/puma.stderr.log', + true + +# Configure "min" to be the minimum number of threads to use to answer +# requests and "max" the maximum. +# +# The default is "0, 16". +# +threads 1, 4 + +# By default, workers accept all requests and queue them to pass to handlers. +# When false, workers accept the number of simultaneous requests configured. +# +# Queueing requests generally improves performance, but can cause deadlocks if +# the app is waiting on a request to itself. See https://github.com/puma/puma/issues/612 +# +# When set to false this may require a reverse proxy to handle slow clients and +# queue requests before they reach puma. This is due to disabling HTTP keepalive +queue_requests false + +# Bind the server to "url". "tcp://", "unix://" and "ssl://" are the only +# accepted protocols. +bind 'unix:///home/git/gitlab.socket' + +workers 2 + +require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events" +require_relative "/home/git/gitlab/lib/gitlab/cluster/puma_worker_killer_initializer" + +on_restart do + # Signal application hooks that we're about to restart + Gitlab::Cluster::LifecycleEvents.do_master_restart +end + +before_fork do + # Signal to the puma killer + Gitlab::Cluster::PumaWorkerKillerInitializer.start @config.options unless ENV['DISABLE_PUMA_WORKER_KILLER'] + + # Signal application hooks that we're about to fork + Gitlab::Cluster::LifecycleEvents.do_before_fork +end + +Gitlab::Cluster::LifecycleEvents.set_puma_options @config.options +on_worker_boot do + # Signal application hooks of worker start + Gitlab::Cluster::LifecycleEvents.do_worker_start +end + +# Preload the application before starting the workers; this conflicts with +# phased restart feature. (off by default) + +preload_app! + +tag 'gitlab-puma-worker' + +# Verifies that all workers have checked in to the master process within +# the given timeout. If not the worker process will be restarted. Default +# value is 60 seconds. +# +worker_timeout 60 diff --git a/config/unicorn.rb.example b/config/unicorn.rb.example index e06cce3e97a..4637eb8bc6e 100644 --- a/config/unicorn.rb.example +++ b/config/unicorn.rb.example @@ -81,22 +81,16 @@ preload_app true # fast LAN. check_client_connection false +require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events" + before_exec do |server| - # The following is necessary to ensure stale Prometheus metrics don't - # accumulate over time. It needs to be done in this hook as opposed to - # inside an init script to ensure metrics files aren't deleted after new - # unicorn workers start after a SIGUSR2 is received. - if ENV['prometheus_multiproc_dir'] - old_metrics = Dir[File.join(ENV['prometheus_multiproc_dir'], '*.db')] - FileUtils.rm_rf(old_metrics) - end + # Signal application hooks that we're about to restart + Gitlab::Cluster::LifecycleEvents.do_master_restart end before_fork do |server, worker| - # the following is highly recommended for Rails + "preload_app true" - # as there's no need for the master process to hold a connection - defined?(ActiveRecord::Base) && - ActiveRecord::Base.connection.disconnect! + # Signal application hooks that we're about to fork + Gitlab::Cluster::LifecycleEvents.do_before_fork # The following is only recommended for memory/DB-constrained # installations. It is not needed if your system can house @@ -124,25 +118,10 @@ before_fork do |server, worker| end after_fork do |server, worker| - # Unicorn clears out signals before it forks, so rbtrace won't work - # unless it is enabled after the fork. - require 'rbtrace' if ENV['ENABLE_RBTRACE'] + # Signal application hooks of worker start + Gitlab::Cluster::LifecycleEvents.do_worker_start # per-process listener ports for debugging/admin/migrations # addr = "127.0.0.1:#{9293 + worker.nr}" # server.listen(addr, :tries => -1, :delay => 5, :tcp_nopush => true) - - # the following is *required* for Rails + "preload_app true", - defined?(ActiveRecord::Base) && - ActiveRecord::Base.establish_connection - - # reset prometheus client, this will cause any opened metrics files to be closed - defined?(::Prometheus::Client.reinitialize_on_pid_change) && - Prometheus::Client.reinitialize_on_pid_change - - # if preload_app is true, then you may also want to check and - # restart any other shared sockets/descriptors such as Memcached, - # and Redis. TokyoCabinet file handles are safe to reuse - # between any number of forked children (assuming your kernel - # correctly implements pread()/pwrite() system calls) end diff --git a/config/unicorn.rb.example.development b/config/unicorn.rb.example.development index f31df66015a..f7541bb9d55 100644 --- a/config/unicorn.rb.example.development +++ b/config/unicorn.rb.example.development @@ -1,32 +1,61 @@ +# frozen_string_literal: true + +# ------------------------------------------------------------------------- +# This file is used by the GDK to generate a default config/unicorn.rb file +# Note that `/home/git` will be substituted for the actual GDK root +# directory when this file is generated +# ------------------------------------------------------------------------- + worker_processes 2 timeout 60 +listen '/home/git/gitlab.socket' + preload_app true check_client_connection false +require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events" + +before_exec do |server| + # Signal application hooks that we're about to restart + Gitlab::Cluster::LifecycleEvents.do_master_restart +end + before_fork do |server, worker| - # the following is highly recommended for Rails + "preload_app true" - # as there's no need for the master process to hold a connection - defined?(ActiveRecord::Base) && - ActiveRecord::Base.connection.disconnect! - - if /darwin/ =~ RUBY_PLATFORM - require 'fiddle' - - # Dynamically load Foundation.framework, ~implicitly~ initialising - # the Objective-C runtime before any forking happens in Unicorn - # - # From https://bugs.ruby-lang.org/issues/14009 - Fiddle.dlopen '/System/Library/Frameworks/Foundation.framework/Foundation' + # Signal application hooks that we're about to fork + Gitlab::Cluster::LifecycleEvents.do_before_fork + + # The following is only recommended for memory/DB-constrained + # installations. It is not needed if your system can house + # twice as many worker_processes as you have configured. + # + # This allows a new master process to incrementally + # phase out the old master process with SIGTTOU to avoid a + # thundering herd (especially in the "preload_app false" case) + # when doing a transparent upgrade. The last worker spawned + # will then kill off the old master process with a SIGQUIT. + old_pid = "#{server.config[:pid]}.oldbin" + if old_pid != server.pid + begin + sig = (worker.nr + 1) >= server.worker_processes ? :QUIT : :TTOU + Process.kill(sig, File.read(old_pid).to_i) + rescue Errno::ENOENT, Errno::ESRCH + end end + # + # Throttle the master from forking too quickly by sleeping. Due + # to the implementation of standard Unix signal handlers, this + # helps (but does not completely) prevent identical, repeated signals + # from being lost when the receiving process is busy. + # sleep 1 end after_fork do |server, worker| - # Unicorn clears out signals before it forks, so rbtrace won't work - # unless it is enabled after the fork. - require 'rbtrace' if ENV['ENABLE_RBTRACE'] + # Signal application hooks of worker start + Gitlab::Cluster::LifecycleEvents.do_worker_start - # the following is *required* for Rails + "preload_app true", - defined?(ActiveRecord::Base) && - ActiveRecord::Base.establish_connection + # per-process listener ports for debugging/admin/migrations + # addr = "127.0.0.1:#{9293 + worker.nr}" + # server.listen(addr, :tries => -1, :delay => 5, :tcp_nopush => true) end + diff --git a/doc/update/11.4-to-11.5.md b/doc/update/11.4-to-11.5.md new file mode 100644 index 00000000000..e64ab2acae2 --- /dev/null +++ b/doc/update/11.4-to-11.5.md @@ -0,0 +1,404 @@ +--- +comments: false +--- + +# From 11.4 to 11.5 + +Make sure you view this update guide from the branch (version) of GitLab you would +like to install (e.g., `11-5-stable`. You can select the branch in the version +dropdown at the top left corner of GitLab (below the menu bar). + +If the highest number stable branch is unclear please check the +[GitLab Blog](https://about.gitlab.com/blog/archives.html) for installation +guide links by version. + +### 1. Stop server + +```bash +sudo service gitlab stop +``` + +### 2. Backup + +NOTE: If you installed GitLab from source, make sure `rsync` is installed. + +```bash +cd /home/git/gitlab + +sudo -u git -H bundle exec rake gitlab:backup:create RAILS_ENV=production +``` + +### 3. Update Ruby + +NOTE: GitLab 11.0 and higher only support Ruby 2.4.x and dropped support for Ruby 2.3.x. Be +sure to upgrade your interpreter if necessary. + +You can check which version you are running with `ruby -v`. + +Download Ruby and compile it: + +```bash +mkdir /tmp/ruby && cd /tmp/ruby +curl --remote-name --progress https://cache.ruby-lang.org/pub/ruby/2.4/ruby-2.4.5.tar.gz +echo '4d650f302f1ec00256450b112bb023644b6ab6dd ruby-2.4.5.tar.gz' | shasum -c - && tar xzf ruby-2.4.5.tar.gz +cd ruby-2.4.5 + +./configure --disable-install-rdoc +make +sudo make install +``` + +Install Bundler: + +```bash +sudo gem install bundler --no-ri --no-rdoc +``` + +### 4. Update Node + +GitLab utilizes [webpack](http://webpack.js.org) to compile frontend assets. +This requires a minimum version of node v6.0.0. + +You can check which version you are running with `node -v`. If you are running +a version older than `v6.0.0` you will need to update to a newer version. You +can find instructions to install from community maintained packages or compile +from source at the nodejs.org website. + + + +GitLab also requires the use of yarn `>= v1.2.0` to manage JavaScript +dependencies. + +```bash +curl --silent --show-error https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add - +echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list +sudo apt-get update +sudo apt-get install yarn +``` + +More information can be found on the [yarn website](https://yarnpkg.com/en/docs/install). + +### 5. Update Go + +NOTE: GitLab 11.4 and higher only supports Go 1.10.x and newer, and dropped support for Go +1.9.x. Be sure to upgrade your installation if necessary. + +You can check which version you are running with `go version`. + +Download and install Go: + +```bash +# Remove former Go installation folder +sudo rm -rf /usr/local/go + +curl --remote-name --progress https://dl.google.com/go/go1.10.3.linux-amd64.tar.gz +echo 'fa1b0e45d3b647c252f51f5e1204aba049cde4af177ef9f2181f43004f901035 go1.10.3.linux-amd64.tar.gz' | shasum -a256 -c - && \ + sudo tar -C /usr/local -xzf go1.10.3.linux-amd64.tar.gz +sudo ln -sf /usr/local/go/bin/{go,godoc,gofmt} /usr/local/bin/ +rm go1.10.3.linux-amd64.tar.gz +``` + +### 6. Get latest code + +```bash +cd /home/git/gitlab + +sudo -u git -H git fetch --all --prune +sudo -u git -H git checkout -- db/schema.rb # local changes will be restored automatically +sudo -u git -H git checkout -- locale +``` + +For GitLab Community Edition: + +```bash +cd /home/git/gitlab + +sudo -u git -H git checkout 11-5-stable +``` + +OR + +For GitLab Enterprise Edition: + +```bash +cd /home/git/gitlab + +sudo -u git -H git checkout 11-5-stable-ee +``` + +### 7. Update gitlab-shell + +```bash +cd /home/git/gitlab-shell + +sudo -u git -H git fetch --all --tags --prune +sudo -u git -H git checkout v$( 0 + end + end + end + end +end diff --git a/lib/gitlab/cluster/puma_worker_killer_initializer.rb b/lib/gitlab/cluster/puma_worker_killer_initializer.rb new file mode 100644 index 00000000000..331c39f7d6b --- /dev/null +++ b/lib/gitlab/cluster/puma_worker_killer_initializer.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Gitlab + module Cluster + class PumaWorkerKillerInitializer + def self.start(puma_options, puma_per_worker_max_memory_mb: 650) + require 'puma_worker_killer' + + PumaWorkerKiller.config do |config| + # Note! ram is expressed in megabytes (whereas GITLAB_UNICORN_MEMORY_MAX is in bytes) + # Importantly RAM is for _all_workers (ie, the cluster), + # not each worker as is the case with GITLAB_UNICORN_MEMORY_MAX + worker_count = puma_options[:workers] || 1 + config.ram = worker_count * puma_per_worker_max_memory_mb + + config.frequency = 20 # seconds + + # We just want to limit to a fixed maximum, unrelated to the total amount + # of available RAM. + config.percent_usage = 0.98 + + # Ideally we'll never hit the maximum amount of memory. If so the worker + # is restarted already, thus periodically restarting workers shouldn't be + # needed. + config.rolling_restart_frequency = false + end + + PumaWorkerKiller.start + end + end + end +end diff --git a/spec/rack_servers/configs/config.ru b/spec/rack_servers/configs/config.ru new file mode 100644 index 00000000000..63daeb9eec5 --- /dev/null +++ b/spec/rack_servers/configs/config.ru @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +app = proc do |env| + if env['REQUEST_METHOD'] == 'GET' + [200, {}, ["#{Process.pid}"]] + else + Process.kill(env['QUERY_STRING'], Process.pid) + [200, {}, ['Bye!']] + end +end + +run app diff --git a/spec/rack_servers/configs/puma.rb b/spec/rack_servers/configs/puma.rb new file mode 100644 index 00000000000..d6b6d83d648 --- /dev/null +++ b/spec/rack_servers/configs/puma.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +# Note: this file is used for testing puma in `spec/rack_servers/puma_spec.rb` only +# Note: as per the convention in `config/puma.example.development.rb`, +# this file will replace `/home/git` with the actual working directory + +directory '/home/git' +threads 1, 10 +queue_requests false +pidfile '/home/git/gitlab/tmp/pids/puma.pid' +bind 'unix:///home/git/gitlab/tmp/tests/puma.socket' +workers 1 +preload_app! +worker_timeout 60 + +require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events" +require_relative "/home/git/gitlab/lib/gitlab/cluster/puma_worker_killer_initializer" + +before_fork do + Gitlab::Cluster::PumaWorkerKillerInitializer.start @config.options + Gitlab::Cluster::LifecycleEvents.do_before_fork +end + +Gitlab::Cluster::LifecycleEvents.set_puma_options @config.options +on_worker_boot do + Gitlab::Cluster::LifecycleEvents.do_worker_start + File.write('/home/git/gitlab/tmp/tests/puma-worker-ready', Process.pid) +end + +on_restart do + Gitlab::Cluster::LifecycleEvents.do_master_restart +end diff --git a/spec/rack_servers/puma_spec.rb b/spec/rack_servers/puma_spec.rb new file mode 100644 index 00000000000..431fab87857 --- /dev/null +++ b/spec/rack_servers/puma_spec.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +require 'fileutils' + +require 'excon' + +require 'spec_helper' + +describe 'Puma' do + before(:all) do + project_root = File.expand_path('../..', __dir__) + + config_lines = File.read('spec/rack_servers/configs/puma.rb') + .gsub('/home/git/gitlab', project_root) + .gsub('/home/git', project_root) + + config_path = File.join(project_root, "tmp/tests/puma.rb") + @socket_path = File.join(project_root, 'tmp/tests/puma.socket') + + File.write(config_path, config_lines) + + cmd = %W[puma -e test -C #{config_path} #{File.join(__dir__, 'configs/config.ru')}] + @puma_master_pid = spawn(*cmd) + wait_puma_boot!(@puma_master_pid, File.join(project_root, 'tmp/tests/puma-worker-ready')) + WebMock.allow_net_connect! + end + + %w[SIGQUIT SIGTERM SIGKILL].each do |signal| + it "has a worker that self-terminates on signal #{signal}" do + response = Excon.get('unix://', socket: @socket_path) + expect(response.status).to eq(200) + + worker_pid = response.body.to_i + expect(worker_pid).to be > 0 + + begin + Excon.post("unix://?#{signal}", socket: @socket_path) + rescue Excon::Error::Socket + # The connection may be closed abruptly + end + + expect(pid_gone?(worker_pid)).to eq(true) + end + end + + after(:all) do + begin + WebMock.disable_net_connect!(allow_localhost: true) + Process.kill('TERM', @puma_master_pid) + rescue Errno::ESRCH + end + end + + def wait_puma_boot!(master_pid, ready_file) + # We have seen the boot timeout after 2 minutes in CI so let's set it to 5 minutes. + timeout = 5 * 60 + timeout.times do + return if File.exist?(ready_file) + + pid = Process.waitpid(master_pid, Process::WNOHANG) + raise "puma failed to boot: #{$?}" unless pid.nil? + + sleep 1 + end + + raise "puma boot timed out after #{timeout} seconds" + end + + def pid_gone?(pid) + # Worker termination should take less than a second. That makes 10 + # seconds a generous timeout. + 10.times do + begin + Process.kill(0, pid) + rescue Errno::ESRCH + return true + end + + sleep 1 + end + + false + end +end diff --git a/spec/rack_servers/unicorn_spec.rb b/spec/rack_servers/unicorn_spec.rb new file mode 100644 index 00000000000..6a02ebcd048 --- /dev/null +++ b/spec/rack_servers/unicorn_spec.rb @@ -0,0 +1,105 @@ +# frozen_string_literal: true + +require 'fileutils' + +require 'excon' + +require 'spec_helper' + +describe 'Unicorn' do + before(:all) do + project_root = File.expand_path('../..', __dir__) + + config_lines = File.read('config/unicorn.rb.example') + .gsub('/home/git/gitlab', project_root) + .gsub('/home/git', project_root) + .split("\n") + + # Remove these because they make setup harder. + config_lines = config_lines.reject do |line| + %w[ + worker_processes + listen + pid + stderr_path + stdout_path + ].any? { |prefix| line.start_with?(prefix) } + end + + config_lines << "working_directory '#{Rails.root}'" + + # We want to have exactly 1 worker process because that makes it + # predictable which process will handle our requests. + config_lines << 'worker_processes 1' + + @socket_path = File.join(project_root, 'tmp/tests/unicorn.socket') + config_lines << "listen '#{@socket_path}'" + + ready_file = File.join(project_root, 'tmp/tests/unicorn-worker-ready') + FileUtils.rm_f(ready_file) + after_fork_index = config_lines.index { |l| l.start_with?('after_fork') } + config_lines.insert(after_fork_index + 1, "File.write('#{ready_file}', Process.pid)") + + config_path = File.join(project_root, 'tmp/tests/unicorn.rb') + File.write(config_path, config_lines.join("\n") + "\n") + + cmd = %W[unicorn -E test -c #{config_path} spec/rack_servers/configs/config.ru] + @unicorn_master_pid = spawn(*cmd) + wait_unicorn_boot!(@unicorn_master_pid, ready_file) + WebMock.allow_net_connect! + end + + %w[SIGQUIT SIGTERM SIGKILL].each do |signal| + it "has a worker that self-terminates on signal #{signal}" do + response = Excon.get('unix://', socket: @socket_path) + expect(response.status).to eq(200) + + worker_pid = response.body.to_i + expect(worker_pid).to be > 0 + + begin + Excon.post("unix://?#{signal}", socket: @socket_path) + rescue Excon::Error::Socket + # The connection may be closed abruptly + end + + expect(pid_gone?(worker_pid)).to eq(true) + end + end + + after(:all) do + WebMock.disable_net_connect!(allow_localhost: true) + Process.kill('TERM', @unicorn_master_pid) + end + + def wait_unicorn_boot!(master_pid, ready_file) + # We have seen the boot timeout after 2 minutes in CI so let's set it to 5 minutes. + timeout = 5 * 60 + timeout.times do + return if File.exist?(ready_file) + + pid = Process.waitpid(master_pid, Process::WNOHANG) + raise "unicorn failed to boot: #{$?}" unless pid.nil? + + sleep 1 + end + + raise "unicorn boot timed out after #{timeout} seconds" + end + + def pid_gone?(pid) + # Worker termination should take less than a second. That makes 10 + # seconds a generous timeout. + 10.times do + begin + Process.kill(0, pid) + rescue Errno::ESRCH + return true + end + + sleep 1 + end + + false + end +end diff --git a/spec/unicorn/unicorn_spec.rb b/spec/unicorn/unicorn_spec.rb deleted file mode 100644 index a4cf479a339..00000000000 --- a/spec/unicorn/unicorn_spec.rb +++ /dev/null @@ -1,114 +0,0 @@ -require 'fileutils' - -require 'excon' - -require 'spec_helper' - -describe 'Unicorn' do - before(:all) do - config_lines = File.read('config/unicorn.rb.example').split("\n") - - # Remove these because they make setup harder. - config_lines = config_lines.reject do |line| - %w[ - working_directory - worker_processes - listen - pid - stderr_path - stdout_path - ].any? { |prefix| line.start_with?(prefix) } - end - - config_lines << "working_directory '#{Rails.root}'" - - # We want to have exactly 1 worker process because that makes it - # predictable which process will handle our requests. - config_lines << 'worker_processes 1' - - @socket_path = File.join(Dir.pwd, 'tmp/tests/unicorn.socket') - config_lines << "listen '#{@socket_path}'" - - ready_file = 'tmp/tests/unicorn-worker-ready' - FileUtils.rm_f(ready_file) - after_fork_index = config_lines.index { |l| l.start_with?('after_fork') } - config_lines.insert(after_fork_index + 1, "File.write('#{ready_file}', Process.pid)") - - config_path = 'tmp/tests/unicorn.rb' - File.write(config_path, config_lines.join("\n") + "\n") - - rackup_path = 'tmp/tests/config.ru' - File.write(rackup_path, <<~EOS) - app = - proc do |env| - if env['REQUEST_METHOD'] == 'GET' - [200, {}, [Process.pid]] - else - Process.kill(env['QUERY_STRING'], Process.pid) - [200, {}, ['Bye!']] - end - end - - run app - EOS - - cmd = %W[unicorn -E test -c #{config_path} #{rackup_path}] - @unicorn_master_pid = spawn(*cmd) - wait_unicorn_boot!(@unicorn_master_pid, ready_file) - WebMock.allow_net_connect! - end - - %w[SIGQUIT SIGTERM SIGKILL].each do |signal| - it "has a worker that self-terminates on signal #{signal}" do - response = Excon.get('unix://', socket: @socket_path) - expect(response.status).to eq(200) - - worker_pid = response.body.to_i - expect(worker_pid).to be > 0 - - begin - Excon.post("unix://?#{signal}", socket: @socket_path) - rescue Excon::Error::Socket - # The connection may be closed abruptly - end - - expect(pid_gone?(worker_pid)).to eq(true) - end - end - - after(:all) do - WebMock.disable_net_connect!(allow_localhost: true) - Process.kill('TERM', @unicorn_master_pid) - end - - def wait_unicorn_boot!(master_pid, ready_file) - # We have seen the boot timeout after 2 minutes in CI so let's set it to 5 minutes. - timeout = 5 * 60 - timeout.times do - return if File.exist?(ready_file) - - pid = Process.waitpid(master_pid, Process::WNOHANG) - raise "unicorn failed to boot: #{$?}" unless pid.nil? - - sleep 1 - end - - raise "unicorn boot timed out after #{timeout} seconds" - end - - def pid_gone?(pid) - # Worker termination should take less than a second. That makes 10 - # seconds a generous timeout. - 10.times do - begin - Process.kill(0, pid) - rescue Errno::ESRCH - return true - end - - sleep 1 - end - - false - end -end -- cgit v1.2.1