diff options
author | Jan Provaznik <jprovaznik@gitlab.com> | 2019-05-16 21:55:10 +0200 |
---|---|---|
committer | Jan Provaznik <jprovaznik@gitlab.com> | 2019-05-29 14:34:15 +0200 |
commit | 7b8bd6fd9cd9b813ae3575b08c869be767a10104 (patch) | |
tree | 151bda2a3034e44868695af9997c15e3855bd582 | |
parent | 65d65fed5c6fdc1e536a0e22c0664913e0a9b9dd (diff) | |
download | gitlab-ce-7b8bd6fd9cd9b813ae3575b08c869be767a10104.tar.gz |
Added rack-timeout for Puma
It assures that requests are aborted after 60 seconds, otherwise
an exception is raised. This exception is logged by Sentry, also
there is a Prometheus counter for measuring number of requests in each
state.
-rw-r--r-- | Gemfile | 1 | ||||
-rw-r--r-- | Gemfile.lock | 2 | ||||
-rw-r--r-- | config/initializers/rack_timeout.rb | 23 | ||||
-rw-r--r-- | doc/administration/monitoring/prometheus/gitlab_metrics.md | 22 | ||||
-rw-r--r-- | lib/gitlab/rack_timeout_observer.rb | 48 | ||||
-rw-r--r-- | spec/lib/gitlab/rack_timeout_observer_spec.rb | 56 |
6 files changed, 141 insertions, 11 deletions
@@ -154,6 +154,7 @@ end group :puma do gem 'puma', '~> 3.12', require: false gem 'puma_worker_killer', require: false + gem 'rack-timeout', require: false end # State machine diff --git a/Gemfile.lock b/Gemfile.lock index 9e922d8a3bb..3f85fa958bd 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -680,6 +680,7 @@ GEM rack rack-test (1.1.0) rack (>= 1.0, < 3) + rack-timeout (0.5.1) rails (5.1.7) actioncable (= 5.1.7) actionmailer (= 5.1.7) @@ -1174,6 +1175,7 @@ DEPENDENCIES rack-cors (~> 1.0.0) rack-oauth2 (~> 1.9.3) rack-proxy (~> 0.6.0) + rack-timeout rails (= 5.1.7) rails-controller-testing rails-i18n (~> 5.1) diff --git a/config/initializers/rack_timeout.rb b/config/initializers/rack_timeout.rb new file mode 100644 index 00000000000..5c4f2dd708c --- /dev/null +++ b/config/initializers/rack_timeout.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +# Unicorn terminates any request which runs longer than 60 seconds. +# Puma doesn't have any timeout mechanism for terminating long-running +# requests, to make sure that server is not paralyzed by long-running +# or stuck queries, we add a request timeout which terminates the +# request after 60 seconds. This may be dangerous in some situations +# (https://github.com/heroku/rack-timeout/blob/master/doc/exceptions.md) +# and it's used only as the last resort. In such case this termination is +# logged and we should fix the potential timeout issue in the code itself. + +if defined?(::Puma) && !Rails.env.test? + require 'rack/timeout/base' + + Gitlab::Application.configure do |config| + config.middleware.insert_before(Rack::Runtime, Rack::Timeout, + service_timeout: 60, + wait_timeout: 90) + end + + observer = Gitlab::RackTimeoutObserver.new + Rack::Timeout.register_state_change_observer(:gitlab_rack_timeout, &observer.callback) +end diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index 9c75403dd4c..4529bd3bee3 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -109,17 +109,17 @@ When Puma is used instead of Unicorn, following metrics are available: | Metric | Type | Since | Description | |:-------------------------------------------- |:------- |:----- |:----------- | -| puma_workers | Gauge | 12.0 | Total number of workers | -| puma_running_workers | Gauge | 12.0 | Number of booted workers | -| puma_stale_workers | Gauge | 12.0 | Number of old workers | -| puma_phase | Gauge | 12.0 | Phase number (increased during phased restarts) | -| puma_running | Gauge | 12.0 | Number of running threads | -| puma_queued_connections | Gauge | 12.0 | Number of connections in that worker's "todo" set waiting for a worker thread | -| puma_active_connections | Gauge | 12.0 | Number of threads processing a request | -| puma_pool_capacity | Gauge | 12.0 | Number of requests the worker is capable of taking right now | -| puma_max_threads | Gauge | 12.0 | Maximum number of worker threads | -| puma_idle_threads | Gauge | 12.0 | Number of spawned threads which are not processing a request | - +| puma_workers | Gauge | 12.0 | Total number of workers | +| puma_running_workers | Gauge | 12.0 | Number of booted workers | +| puma_stale_workers | Gauge | 12.0 | Number of old workers | +| puma_phase | Gauge | 12.0 | Phase number (increased during phased restarts) | +| puma_running | Gauge | 12.0 | Number of running threads | +| puma_queued_connections | Gauge | 12.0 | Number of connections in that worker's "todo" set waiting for a worker thread | +| puma_active_connections | Gauge | 12.0 | Number of threads processing a request | +| puma_pool_capacity | Gauge | 12.0 | Number of requests the worker is capable of taking right now | +| puma_max_threads | Gauge | 12.0 | Maximum number of worker threads | +| puma_idle_threads | Gauge | 12.0 | Number of spawned threads which are not processing a request | +| rack_state_total | Gauge | 12.0 | Number of requests in a given rack state | ## Metrics shared directory diff --git a/lib/gitlab/rack_timeout_observer.rb b/lib/gitlab/rack_timeout_observer.rb new file mode 100644 index 00000000000..1798e4bdb96 --- /dev/null +++ b/lib/gitlab/rack_timeout_observer.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module Gitlab + class RackTimeoutObserver + attr_reader :counter + + def initialize + @counter = Gitlab::Metrics.counter(:rack_state_total, 'Number of requests in a given rack state') + end + + # returns the Proc to be used as the observer callback block + def callback + method(:log_timeout_exception) + end + + private + + def log_timeout_exception(env) + info = env[::Rack::Timeout::ENV_INFO_KEY] + return unless info + + counter.increment(labels(info, env)) + end + + def labels(info, env) + params = controller_params(env) || grape_params(env) || {} + + { + controller: params['controller'], + action: params['action'], + route: params['route'], + state: info.state + } + end + + def controller_params(env) + env['action_dispatch.request.parameters'] + end + + def grape_params(env) + endpoint = env[Grape::Env::API_ENDPOINT] + route = endpoint&.route&.pattern&.origin + return unless route + + { 'route' => route } + end + end +end diff --git a/spec/lib/gitlab/rack_timeout_observer_spec.rb b/spec/lib/gitlab/rack_timeout_observer_spec.rb new file mode 100644 index 00000000000..5191b4f7db7 --- /dev/null +++ b/spec/lib/gitlab/rack_timeout_observer_spec.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::RackTimeoutObserver do + before do + allow(Gitlab::Metrics).to receive(:counter) + .with(any_args) + .and_return(Gitlab::Metrics::NullMetric.instance) + end + + describe '#callback' do + context 'when request times out' do + let(:env) do + { + ::Rack::Timeout::ENV_INFO_KEY => double(state: :timed_out), + 'action_dispatch.request.parameters' => { + 'controller' => 'foo', + 'action' => 'bar' + } + } + end + + subject { described_class.new } + + it 'increments timeout counter' do + expect(subject.counter) + .to receive(:increment) + .with({ controller: 'foo', action: 'bar', route: nil, state: :timed_out }) + + subject.callback.call(env) + end + end + + context 'when request expires' do + let(:endpoint) { double } + let(:env) do + { + ::Rack::Timeout::ENV_INFO_KEY => double(state: :expired), + Grape::Env::API_ENDPOINT => endpoint + } + end + + subject { described_class.new } + + it 'increments timeout counter' do + allow(endpoint).to receive_message_chain('route.pattern.origin') { 'foobar' } + expect(subject.counter) + .to receive(:increment) + .with({ controller: nil, action: nil, route: 'foobar', state: :expired }) + + subject.callback.call(env) + end + end + end +end |