summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMayra Cabrera <mcabrera@gitlab.com>2019-05-29 18:53:45 +0000
committerMayra Cabrera <mcabrera@gitlab.com>2019-05-29 18:53:45 +0000
commit6aa0df5a0f9a71de3e64f8170ebdc1c041a238ff (patch)
tree3f9d88e2b1282418b1e011465d9c4ab6bc50cca4
parent70d1537dda66da8b319ceefde36195047b26a8fd (diff)
parent6d6bae66dffefeb3f34f2646b0e801b3c1002170 (diff)
downloadgitlab-ce-6aa0df5a0f9a71de3e64f8170ebdc1c041a238ff.tar.gz
Merge branch 'rack-timeout' into 'master'
Added rack-timeout Closes #53416 See merge request gitlab-org/gitlab-ce!28405
-rw-r--r--Gemfile1
-rw-r--r--Gemfile.lock2
-rw-r--r--config/initializers/rack_timeout.rb23
-rw-r--r--doc/administration/monitoring/prometheus/gitlab_metrics.md22
-rw-r--r--lib/gitlab/rack_timeout_observer.rb46
-rw-r--r--spec/lib/gitlab/rack_timeout_observer_spec.rb58
6 files changed, 141 insertions, 11 deletions
diff --git a/Gemfile b/Gemfile
index f5f963bb2ff..66348414506 100644
--- a/Gemfile
+++ b/Gemfile
@@ -154,6 +154,7 @@ end
group :puma do
gem 'puma', '~> 3.12', require: false
gem 'puma_worker_killer', require: false
+ gem 'rack-timeout', require: false
end
# State machine
diff --git a/Gemfile.lock b/Gemfile.lock
index 9e922d8a3bb..3f85fa958bd 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -680,6 +680,7 @@ GEM
rack
rack-test (1.1.0)
rack (>= 1.0, < 3)
+ rack-timeout (0.5.1)
rails (5.1.7)
actioncable (= 5.1.7)
actionmailer (= 5.1.7)
@@ -1174,6 +1175,7 @@ DEPENDENCIES
rack-cors (~> 1.0.0)
rack-oauth2 (~> 1.9.3)
rack-proxy (~> 0.6.0)
+ rack-timeout
rails (= 5.1.7)
rails-controller-testing
rails-i18n (~> 5.1)
diff --git a/config/initializers/rack_timeout.rb b/config/initializers/rack_timeout.rb
new file mode 100644
index 00000000000..5c4f2dd708c
--- /dev/null
+++ b/config/initializers/rack_timeout.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+# Unicorn terminates any request which runs longer than 60 seconds.
+# Puma doesn't have any timeout mechanism for terminating long-running
+# requests, to make sure that server is not paralyzed by long-running
+# or stuck queries, we add a request timeout which terminates the
+# request after 60 seconds. This may be dangerous in some situations
+# (https://github.com/heroku/rack-timeout/blob/master/doc/exceptions.md)
+# and it's used only as the last resort. In such case this termination is
+# logged and we should fix the potential timeout issue in the code itself.
+
+if defined?(::Puma) && !Rails.env.test?
+ require 'rack/timeout/base'
+
+ Gitlab::Application.configure do |config|
+ config.middleware.insert_before(Rack::Runtime, Rack::Timeout,
+ service_timeout: 60,
+ wait_timeout: 90)
+ end
+
+ observer = Gitlab::RackTimeoutObserver.new
+ Rack::Timeout.register_state_change_observer(:gitlab_rack_timeout, &observer.callback)
+end
diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md
index 9c75403dd4c..4529bd3bee3 100644
--- a/doc/administration/monitoring/prometheus/gitlab_metrics.md
+++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md
@@ -109,17 +109,17 @@ When Puma is used instead of Unicorn, following metrics are available:
| Metric | Type | Since | Description |
|:-------------------------------------------- |:------- |:----- |:----------- |
-| puma_workers | Gauge | 12.0 | Total number of workers |
-| puma_running_workers | Gauge | 12.0 | Number of booted workers |
-| puma_stale_workers | Gauge | 12.0 | Number of old workers |
-| puma_phase | Gauge | 12.0 | Phase number (increased during phased restarts) |
-| puma_running | Gauge | 12.0 | Number of running threads |
-| puma_queued_connections | Gauge | 12.0 | Number of connections in that worker's "todo" set waiting for a worker thread |
-| puma_active_connections | Gauge | 12.0 | Number of threads processing a request |
-| puma_pool_capacity | Gauge | 12.0 | Number of requests the worker is capable of taking right now |
-| puma_max_threads | Gauge | 12.0 | Maximum number of worker threads |
-| puma_idle_threads | Gauge | 12.0 | Number of spawned threads which are not processing a request |
-
+| puma_workers | Gauge | 12.0 | Total number of workers |
+| puma_running_workers | Gauge | 12.0 | Number of booted workers |
+| puma_stale_workers | Gauge | 12.0 | Number of old workers |
+| puma_phase | Gauge | 12.0 | Phase number (increased during phased restarts) |
+| puma_running | Gauge | 12.0 | Number of running threads |
+| puma_queued_connections | Gauge | 12.0 | Number of connections in that worker's "todo" set waiting for a worker thread |
+| puma_active_connections | Gauge | 12.0 | Number of threads processing a request |
+| puma_pool_capacity | Gauge | 12.0 | Number of requests the worker is capable of taking right now |
+| puma_max_threads | Gauge | 12.0 | Maximum number of worker threads |
+| puma_idle_threads | Gauge | 12.0 | Number of spawned threads which are not processing a request |
+| rack_state_total | Gauge | 12.0 | Number of requests in a given rack state |
## Metrics shared directory
diff --git a/lib/gitlab/rack_timeout_observer.rb b/lib/gitlab/rack_timeout_observer.rb
new file mode 100644
index 00000000000..80d3f7dea60
--- /dev/null
+++ b/lib/gitlab/rack_timeout_observer.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+
+module Gitlab
+ class RackTimeoutObserver
+ def initialize
+ @counter = Gitlab::Metrics.counter(:rack_state_total, 'Number of requests in a given rack state')
+ end
+
+ # returns the Proc to be used as the observer callback block
+ def callback
+ method(:log_timeout_exception)
+ end
+
+ private
+
+ def log_timeout_exception(env)
+ info = env[::Rack::Timeout::ENV_INFO_KEY]
+ return unless info
+
+ @counter.increment(labels(info, env))
+ end
+
+ def labels(info, env)
+ params = controller_params(env) || grape_params(env) || {}
+
+ {
+ controller: params['controller'],
+ action: params['action'],
+ route: params['route'],
+ state: info.state
+ }
+ end
+
+ def controller_params(env)
+ env['action_dispatch.request.parameters']
+ end
+
+ def grape_params(env)
+ endpoint = env[Grape::Env::API_ENDPOINT]
+ route = endpoint&.route&.pattern&.origin
+ return unless route
+
+ { 'route' => route }
+ end
+ end
+end
diff --git a/spec/lib/gitlab/rack_timeout_observer_spec.rb b/spec/lib/gitlab/rack_timeout_observer_spec.rb
new file mode 100644
index 00000000000..3dc1a8b68fb
--- /dev/null
+++ b/spec/lib/gitlab/rack_timeout_observer_spec.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Gitlab::RackTimeoutObserver do
+ let(:counter) { Gitlab::Metrics::NullMetric.instance }
+
+ before do
+ allow(Gitlab::Metrics).to receive(:counter)
+ .with(any_args)
+ .and_return(counter)
+ end
+
+ describe '#callback' do
+ context 'when request times out' do
+ let(:env) do
+ {
+ ::Rack::Timeout::ENV_INFO_KEY => double(state: :timed_out),
+ 'action_dispatch.request.parameters' => {
+ 'controller' => 'foo',
+ 'action' => 'bar'
+ }
+ }
+ end
+
+ subject { described_class.new }
+
+ it 'increments timeout counter' do
+ expect(counter)
+ .to receive(:increment)
+ .with({ controller: 'foo', action: 'bar', route: nil, state: :timed_out })
+
+ subject.callback.call(env)
+ end
+ end
+
+ context 'when request expires' do
+ let(:endpoint) { double }
+ let(:env) do
+ {
+ ::Rack::Timeout::ENV_INFO_KEY => double(state: :expired),
+ Grape::Env::API_ENDPOINT => endpoint
+ }
+ end
+
+ subject { described_class.new }
+
+ it 'increments timeout counter' do
+ allow(endpoint).to receive_message_chain('route.pattern.origin') { 'foobar' }
+ expect(counter)
+ .to receive(:increment)
+ .with({ controller: nil, action: nil, route: 'foobar', state: :expired })
+
+ subject.callback.call(env)
+ end
+ end
+ end
+end