summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--app/controllers/admin/health_check_controller.rb2
-rw-r--r--app/controllers/health_controller.rb11
-rw-r--r--app/helpers/application_settings_helper.rb19
-rw-r--r--app/helpers/storage_health_helper.rb6
-rw-r--r--app/models/application_setting.rb12
-rw-r--r--app/views/admin/application_settings/_form.html.haml18
-rwxr-xr-xbin/storage_check11
-rw-r--r--changelogs/unreleased/bvl-circuitbreaker-process.yml5
-rw-r--r--config/routes.rb1
-rw-r--r--db/migrate/20171123094802_add_circuitbreaker_check_interval_to_application_settings.rb20
-rw-r--r--db/post_migrate/20171123101020_update_circuitbreaker_defaults.rb34
-rw-r--r--db/post_migrate/20171123101046_remove_old_circuitbreaker_config.rb26
-rw-r--r--db/schema.rb7
-rw-r--r--doc/api/settings.md3
-rw-r--r--lib/api/circuit_breakers.rb2
-rw-r--r--lib/gitlab/git/storage/checker.rb98
-rw-r--r--lib/gitlab/git/storage/circuit_breaker.rb106
-rw-r--r--lib/gitlab/git/storage/circuit_breaker_settings.rb12
-rw-r--r--lib/gitlab/git/storage/failure_info.rb39
-rw-r--r--lib/gitlab/git/storage/null_circuit_breaker.rb22
-rw-r--r--lib/gitlab/storage_check.rb11
-rw-r--r--lib/gitlab/storage_check/cli.rb69
-rw-r--r--lib/gitlab/storage_check/gitlab_caller.rb39
-rw-r--r--lib/gitlab/storage_check/option_parser.rb39
-rw-r--r--lib/gitlab/storage_check/response.rb77
-rw-r--r--spec/bin/storage_check_spec.rb13
-rw-r--r--spec/controllers/admin/health_check_controller_spec.rb4
-rw-r--r--spec/controllers/health_controller_spec.rb42
-rw-r--r--spec/features/admin/admin_health_check_spec.rb12
-rw-r--r--spec/lib/gitlab/git/storage/checker_spec.rb132
-rw-r--r--spec/lib/gitlab/git/storage/circuit_breaker_spec.rb163
-rw-r--r--spec/lib/gitlab/git/storage/failure_info_spec.rb70
-rw-r--r--spec/lib/gitlab/git/storage/health_spec.rb2
-rw-r--r--spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb4
-rw-r--r--spec/lib/gitlab/storage_check/cli_spec.rb19
-rw-r--r--spec/lib/gitlab/storage_check/gitlab_caller_spec.rb46
-rw-r--r--spec/lib/gitlab/storage_check/option_parser_spec.rb31
-rw-r--r--spec/lib/gitlab/storage_check/response_spec.rb54
-rw-r--r--spec/models/application_setting_spec.rb15
-rw-r--r--spec/models/repository_spec.rb8
-rw-r--r--spec/requests/api/circuit_breakers_spec.rb2
-rw-r--r--spec/requests/api/settings_spec.rb4
-rw-r--r--spec/spec_helper.rb12
-rw-r--r--spec/support/stored_repositories.rb21
-rw-r--r--spec/support/stub_configuration.rb2
45 files changed, 983 insertions, 362 deletions
diff --git a/app/controllers/admin/health_check_controller.rb b/app/controllers/admin/health_check_controller.rb
index 65a17828feb..61247b280b3 100644
--- a/app/controllers/admin/health_check_controller.rb
+++ b/app/controllers/admin/health_check_controller.rb
@@ -5,7 +5,7 @@ class Admin::HealthCheckController < Admin::ApplicationController
end
def reset_storage_health
- Gitlab::Git::Storage::CircuitBreaker.reset_all!
+ Gitlab::Git::Storage::FailureInfo.reset_all!
redirect_to admin_health_check_path,
notice: _('Git storage health information has been reset')
end
diff --git a/app/controllers/health_controller.rb b/app/controllers/health_controller.rb
index 98c2aaa3526..a931b456a93 100644
--- a/app/controllers/health_controller.rb
+++ b/app/controllers/health_controller.rb
@@ -1,5 +1,5 @@
class HealthController < ActionController::Base
- protect_from_forgery with: :exception
+ protect_from_forgery with: :exception, except: :storage_check
include RequiresWhitelistedMonitoringClient
CHECKS = [
@@ -23,6 +23,15 @@ class HealthController < ActionController::Base
render_check_results(results)
end
+ def storage_check
+ results = Gitlab::Git::Storage::Checker.check_all
+
+ render json: {
+ check_interval: Gitlab::CurrentSettings.current_application_settings.circuitbreaker_check_interval,
+ results: results
+ }
+ end
+
private
def render_check_results(results)
diff --git a/app/helpers/application_settings_helper.rb b/app/helpers/application_settings_helper.rb
index dccde46fa33..b12ea760668 100644
--- a/app/helpers/application_settings_helper.rb
+++ b/app/helpers/application_settings_helper.rb
@@ -124,17 +124,6 @@ module ApplicationSettingsHelper
_('The number of attempts GitLab will make to access a storage.')
end
- def circuitbreaker_backoff_threshold_help_text
- _("The number of failures after which GitLab will start temporarily "\
- "disabling access to a storage shard on a host")
- end
-
- def circuitbreaker_failure_wait_time_help_text
- _("When access to a storage fails. GitLab will prevent access to the "\
- "storage for the time specified here. This allows the filesystem to "\
- "recover. Repositories on failing shards are temporarly unavailable")
- end
-
def circuitbreaker_failure_reset_time_help_text
_("The time in seconds GitLab will keep failure information. When no "\
"failures occur during this time, information about the mount is reset.")
@@ -145,6 +134,11 @@ module ApplicationSettingsHelper
"timeout error will be raised.")
end
+ def circuitbreaker_check_interval_help_text
+ _("The time in seconds between storage checks. When a previous check did "\
+ "complete yet, GitLab will skip a check.")
+ end
+
def visible_attributes
[
:admin_notification_email,
@@ -154,10 +148,9 @@ module ApplicationSettingsHelper
:akismet_enabled,
:auto_devops_enabled,
:circuitbreaker_access_retries,
- :circuitbreaker_backoff_threshold,
+ :circuitbreaker_check_interval,
:circuitbreaker_failure_count_threshold,
:circuitbreaker_failure_reset_time,
- :circuitbreaker_failure_wait_time,
:circuitbreaker_storage_timeout,
:clientside_sentry_dsn,
:clientside_sentry_enabled,
diff --git a/app/helpers/storage_health_helper.rb b/app/helpers/storage_health_helper.rb
index 4d2180f7eee..b76c1228220 100644
--- a/app/helpers/storage_health_helper.rb
+++ b/app/helpers/storage_health_helper.rb
@@ -18,16 +18,12 @@ module StorageHealthHelper
current_failures = circuit_breaker.failure_count
translation_params = { number_of_failures: current_failures,
- maximum_failures: maximum_failures,
- number_of_seconds: circuit_breaker.failure_wait_time }
+ maximum_failures: maximum_failures }
if circuit_breaker.circuit_broken?
s_("%{number_of_failures} of %{maximum_failures} failures. GitLab will not "\
"retry automatically. Reset storage information when the problem is "\
"resolved.") % translation_params
- elsif circuit_breaker.backing_off?
- _("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
- "block access for %{number_of_seconds} seconds.") % translation_params
else
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
"allow access on the next attempt.") % translation_params
diff --git a/app/models/application_setting.rb b/app/models/application_setting.rb
index 3117c98c846..253e213af81 100644
--- a/app/models/application_setting.rb
+++ b/app/models/application_setting.rb
@@ -153,11 +153,10 @@ class ApplicationSetting < ActiveRecord::Base
presence: true,
numericality: { greater_than_or_equal_to: 0 }
- validates :circuitbreaker_backoff_threshold,
- :circuitbreaker_failure_count_threshold,
- :circuitbreaker_failure_wait_time,
+ validates :circuitbreaker_failure_count_threshold,
:circuitbreaker_failure_reset_time,
:circuitbreaker_storage_timeout,
+ :circuitbreaker_check_interval,
presence: true,
numericality: { only_integer: true, greater_than_or_equal_to: 0 }
@@ -165,13 +164,6 @@ class ApplicationSetting < ActiveRecord::Base
presence: true,
numericality: { only_integer: true, greater_than_or_equal_to: 1 }
- validates_each :circuitbreaker_backoff_threshold do |record, attr, value|
- if value.to_i >= record.circuitbreaker_failure_count_threshold
- record.errors.add(attr, _("The circuitbreaker backoff threshold should be "\
- "lower than the failure count threshold"))
- end
- end
-
validates :gitaly_timeout_default,
presence: true,
numericality: { only_integer: true, greater_than_or_equal_to: 0 }
diff --git a/app/views/admin/application_settings/_form.html.haml b/app/views/admin/application_settings/_form.html.haml
index a9d0503bc73..3e2dbb07a6c 100644
--- a/app/views/admin/application_settings/_form.html.haml
+++ b/app/views/admin/application_settings/_form.html.haml
@@ -546,6 +546,12 @@
%fieldset
%legend Git Storage Circuitbreaker settings
.form-group
+ = f.label :circuitbreaker_check_interval, _('Check interval'), class: 'control-label col-sm-2'
+ .col-sm-10
+ = f.number_field :circuitbreaker_check_interval, class: 'form-control'
+ .help-block
+ = circuitbreaker_check_interval_help_text
+ .form-group
= f.label :circuitbreaker_access_retries, _('Number of access attempts'), class: 'control-label col-sm-2'
.col-sm-10
= f.number_field :circuitbreaker_access_retries, class: 'form-control'
@@ -558,18 +564,6 @@
.help-block
= circuitbreaker_storage_timeout_help_text
.form-group
- = f.label :circuitbreaker_backoff_threshold, _('Number of failures before backing off'), class: 'control-label col-sm-2'
- .col-sm-10
- = f.number_field :circuitbreaker_backoff_threshold, class: 'form-control'
- .help-block
- = circuitbreaker_backoff_threshold_help_text
- .form-group
- = f.label :circuitbreaker_failure_wait_time, _('Seconds to wait after a storage failure'), class: 'control-label col-sm-2'
- .col-sm-10
- = f.number_field :circuitbreaker_failure_wait_time, class: 'form-control'
- .help-block
- = circuitbreaker_failure_wait_time_help_text
- .form-group
= f.label :circuitbreaker_failure_count_threshold, _('Maximum git storage failures'), class: 'control-label col-sm-2'
.col-sm-10
= f.number_field :circuitbreaker_failure_count_threshold, class: 'form-control'
diff --git a/bin/storage_check b/bin/storage_check
new file mode 100755
index 00000000000..5a818732bd1
--- /dev/null
+++ b/bin/storage_check
@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+
+require 'optparse'
+require 'net/http'
+require 'json'
+require 'socket'
+require 'logger'
+
+require_relative '../lib/gitlab/storage_check'
+
+Gitlab::StorageCheck::CLI.start!(ARGV)
diff --git a/changelogs/unreleased/bvl-circuitbreaker-process.yml b/changelogs/unreleased/bvl-circuitbreaker-process.yml
new file mode 100644
index 00000000000..595dd13f724
--- /dev/null
+++ b/changelogs/unreleased/bvl-circuitbreaker-process.yml
@@ -0,0 +1,5 @@
+---
+title: Monitor NFS shards for circuitbreaker in a separate process
+merge_request: 15426
+author:
+type: changed
diff --git a/config/routes.rb b/config/routes.rb
index 4f27fea0e92..016140e0ede 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -42,6 +42,7 @@ Rails.application.routes.draw do
scope path: '-' do
get 'liveness' => 'health#liveness'
get 'readiness' => 'health#readiness'
+ post 'storage_check' => 'health#storage_check'
resources :metrics, only: [:index]
mount Peek::Railtie => '/peek'
diff --git a/db/migrate/20171123094802_add_circuitbreaker_check_interval_to_application_settings.rb b/db/migrate/20171123094802_add_circuitbreaker_check_interval_to_application_settings.rb
new file mode 100644
index 00000000000..213d46018fc
--- /dev/null
+++ b/db/migrate/20171123094802_add_circuitbreaker_check_interval_to_application_settings.rb
@@ -0,0 +1,20 @@
+class AddCircuitbreakerCheckIntervalToApplicationSettings < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ # Set this constant to true if this migration requires downtime.
+ DOWNTIME = false
+
+ disable_ddl_transaction!
+
+ def up
+ add_column_with_default :application_settings,
+ :circuitbreaker_check_interval,
+ :integer,
+ default: 1
+ end
+
+ def down
+ remove_column :application_settings,
+ :circuitbreaker_check_interval
+ end
+end
diff --git a/db/post_migrate/20171123101020_update_circuitbreaker_defaults.rb b/db/post_migrate/20171123101020_update_circuitbreaker_defaults.rb
new file mode 100644
index 00000000000..8e1c9e6d6bb
--- /dev/null
+++ b/db/post_migrate/20171123101020_update_circuitbreaker_defaults.rb
@@ -0,0 +1,34 @@
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class UpdateCircuitbreakerDefaults < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ class ApplicationSetting < ActiveRecord::Base; end
+
+ def up
+ change_column_default :application_settings,
+ :circuitbreaker_failure_count_threshold,
+ 3
+ change_column_default :application_settings,
+ :circuitbreaker_storage_timeout,
+ 15
+
+ ApplicationSetting.update_all(circuitbreaker_failure_count_threshold: 3,
+ circuitbreaker_storage_timeout: 15)
+ end
+
+ def down
+ change_column_default :application_settings,
+ :circuitbreaker_failure_count_threshold,
+ 160
+ change_column_default :application_settings,
+ :circuitbreaker_storage_timeout,
+ 30
+
+ ApplicationSetting.update_all(circuitbreaker_failure_count_threshold: 160,
+ circuitbreaker_storage_timeout: 30)
+ end
+end
diff --git a/db/post_migrate/20171123101046_remove_old_circuitbreaker_config.rb b/db/post_migrate/20171123101046_remove_old_circuitbreaker_config.rb
new file mode 100644
index 00000000000..e646d4d3224
--- /dev/null
+++ b/db/post_migrate/20171123101046_remove_old_circuitbreaker_config.rb
@@ -0,0 +1,26 @@
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class RemoveOldCircuitbreakerConfig < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ def up
+ remove_column :application_settings,
+ :circuitbreaker_backoff_threshold
+ remove_column :application_settings,
+ :circuitbreaker_failure_wait_time
+ end
+
+ def down
+ add_column :application_settings,
+ :circuitbreaker_backoff_threshold,
+ :integer,
+ default: 80
+ add_column :application_settings,
+ :circuitbreaker_failure_wait_time,
+ :integer,
+ default: 30
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 6ea3ab54742..4c697a4a384 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -135,12 +135,10 @@ ActiveRecord::Schema.define(version: 20171205190711) do
t.boolean "hashed_storage_enabled", default: false, null: false
t.boolean "project_export_enabled", default: true, null: false
t.boolean "auto_devops_enabled", default: false, null: false
- t.integer "circuitbreaker_failure_count_threshold", default: 160
- t.integer "circuitbreaker_failure_wait_time", default: 30
+ t.integer "circuitbreaker_failure_count_threshold", default: 3
t.integer "circuitbreaker_failure_reset_time", default: 1800
- t.integer "circuitbreaker_storage_timeout", default: 30
+ t.integer "circuitbreaker_storage_timeout", default: 15
t.integer "circuitbreaker_access_retries", default: 3
- t.integer "circuitbreaker_backoff_threshold", default: 80
t.boolean "throttle_unauthenticated_enabled", default: false, null: false
t.integer "throttle_unauthenticated_requests_per_period", default: 3600, null: false
t.integer "throttle_unauthenticated_period_in_seconds", default: 3600, null: false
@@ -150,6 +148,7 @@ ActiveRecord::Schema.define(version: 20171205190711) do
t.boolean "throttle_authenticated_web_enabled", default: false, null: false
t.integer "throttle_authenticated_web_requests_per_period", default: 7200, null: false
t.integer "throttle_authenticated_web_period_in_seconds", default: 3600, null: false
+ t.integer "circuitbreaker_check_interval", default: 1, null: false
t.boolean "password_authentication_enabled_for_web"
t.boolean "password_authentication_enabled_for_git", default: true
t.integer "gitaly_timeout_default", default: 55, null: false
diff --git a/doc/api/settings.md b/doc/api/settings.md
index 22fb2baa8ec..0e4758cda2d 100644
--- a/doc/api/settings.md
+++ b/doc/api/settings.md
@@ -70,10 +70,9 @@ PUT /application/settings
| `akismet_api_key` | string | no | API key for akismet spam protection |
| `akismet_enabled` | boolean | no | Enable or disable akismet spam protection |
| `circuitbreaker_access_retries | integer | no | The number of attempts GitLab will make to access a storage. |
-| `circuitbreaker_backoff_threshold | integer | no | The number of failures after which GitLab will start temporarily disabling access to a storage shard on a host. |
+| `circuitbreaker_check_interval` | integer | no | Number of seconds in between storage checks. |
| `circuitbreaker_failure_count_threshold` | integer | no | The number of failures of after which GitLab will completely prevent access to the storage. |
| `circuitbreaker_failure_reset_time` | integer | no | Time in seconds GitLab will keep storage failure information. When no failures occur during this time, the failure information is reset. |
-| `circuitbreaker_failure_wait_time` | integer | no | Time in seconds GitLab will block access to a failing storage to allow it to recover. |
| `circuitbreaker_storage_timeout` | integer | no | Seconds to wait for a storage access attempt |
| `clientside_sentry_dsn` | string | no | Required if `clientside_sentry_dsn` is enabled |
| `clientside_sentry_enabled` | boolean | no | Enable Sentry error reporting for the client side |
diff --git a/lib/api/circuit_breakers.rb b/lib/api/circuit_breakers.rb
index 118883f5ea5..598c76f6168 100644
--- a/lib/api/circuit_breakers.rb
+++ b/lib/api/circuit_breakers.rb
@@ -41,7 +41,7 @@ module API
detail 'This feature was introduced in GitLab 9.5'
end
delete do
- Gitlab::Git::Storage::CircuitBreaker.reset_all!
+ Gitlab::Git::Storage::FailureInfo.reset_all!
end
end
end
diff --git a/lib/gitlab/git/storage/checker.rb b/lib/gitlab/git/storage/checker.rb
new file mode 100644
index 00000000000..de63cb4b40c
--- /dev/null
+++ b/lib/gitlab/git/storage/checker.rb
@@ -0,0 +1,98 @@
+module Gitlab
+ module Git
+ module Storage
+ class Checker
+ include CircuitBreakerSettings
+
+ attr_reader :storage_path, :storage, :hostname, :logger
+
+ def self.check_all(logger = Rails.logger)
+ threads = Gitlab.config.repositories.storages.keys.map do |storage_name|
+ Thread.new do
+ Thread.current[:result] = new(storage_name, logger).check_with_lease
+ end
+ end
+
+ threads.map do |thread|
+ thread.join
+ thread[:result]
+ end
+ end
+
+ def initialize(storage, logger = Rails.logger)
+ @storage = storage
+ config = Gitlab.config.repositories.storages[@storage]
+ @storage_path = config['path']
+ @logger = logger
+
+ @hostname = Gitlab::Environment.hostname
+ end
+
+ def check_with_lease
+ lease_key = "storage_check:#{cache_key}"
+ lease = Gitlab::ExclusiveLease.new(lease_key, timeout: storage_timeout)
+ result = { storage: storage, success: nil }
+
+ if uuid = lease.try_obtain
+ result[:success] = check
+
+ Gitlab::ExclusiveLease.cancel(lease_key, uuid)
+ else
+ logger.warn("#{hostname}: #{storage}: Skipping check, previous check still running")
+ end
+
+ result
+ end
+
+ def check
+ if Gitlab::Git::Storage::ForkedStorageCheck.storage_available?(storage_path, storage_timeout, access_retries)
+ track_storage_accessible
+ true
+ else
+ track_storage_inaccessible
+ logger.error("#{hostname}: #{storage}: Not accessible.")
+ false
+ end
+ end
+
+ private
+
+ def track_storage_inaccessible
+ first_failure = current_failure_info.first_failure || Time.now
+ last_failure = Time.now
+
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.pipelined do
+ redis.hset(cache_key, :first_failure, first_failure.to_i)
+ redis.hset(cache_key, :last_failure, last_failure.to_i)
+ redis.hincrby(cache_key, :failure_count, 1)
+ redis.expire(cache_key, failure_reset_time)
+ maintain_known_keys(redis)
+ end
+ end
+ end
+
+ def track_storage_accessible
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.pipelined do
+ redis.hset(cache_key, :first_failure, nil)
+ redis.hset(cache_key, :last_failure, nil)
+ redis.hset(cache_key, :failure_count, 0)
+ maintain_known_keys(redis)
+ end
+ end
+ end
+
+ def maintain_known_keys(redis)
+ expire_time = Time.now.to_i + failure_reset_time
+ redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key)
+ redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i)
+ end
+
+ def current_failure_info
+ FailureInfo.load(cache_key)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/storage/circuit_breaker.rb b/lib/gitlab/git/storage/circuit_breaker.rb
index 4328c0ea29b..898bb1b65be 100644
--- a/lib/gitlab/git/storage/circuit_breaker.rb
+++ b/lib/gitlab/git/storage/circuit_breaker.rb
@@ -4,22 +4,11 @@ module Gitlab
class CircuitBreaker
include CircuitBreakerSettings
- FailureInfo = Struct.new(:last_failure, :failure_count)
-
attr_reader :storage,
- :hostname,
- :storage_path
-
- delegate :last_failure, :failure_count, to: :failure_info
-
- def self.reset_all!
- Gitlab::Git::Storage.redis.with do |redis|
- all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
- redis.del(*all_storage_keys) unless all_storage_keys.empty?
- end
+ :hostname
- RequestStore.delete(:circuitbreaker_cache)
- end
+ delegate :last_failure, :failure_count, :no_failures?,
+ to: :failure_info
def self.for_storage(storage)
cached_circuitbreakers = RequestStore.fetch(:circuitbreaker_cache) do
@@ -46,9 +35,6 @@ module Gitlab
def initialize(storage, hostname)
@storage = storage
@hostname = hostname
-
- config = Gitlab.config.repositories.storages[@storage]
- @storage_path = config['path']
end
def perform
@@ -65,15 +51,6 @@ module Gitlab
failure_count > failure_count_threshold
end
- def backing_off?
- return false if no_failures?
-
- recent_failure = last_failure > failure_wait_time.seconds.ago
- too_many_failures = failure_count > backoff_threshold
-
- recent_failure && too_many_failures
- end
-
private
# The circuitbreaker can be enabled for the entire fleet using a Feature
@@ -86,88 +63,13 @@ module Gitlab
end
def failure_info
- @failure_info ||= get_failure_info
- end
-
- # Memoizing the `storage_available` call means we only do it once per
- # request when the storage is available.
- #
- # When the storage appears not available, and the memoized value is `false`
- # we might want to try again.
- def storage_available?
- return @storage_available if @storage_available
-
- if @storage_available = Gitlab::Git::Storage::ForkedStorageCheck
- .storage_available?(storage_path, storage_timeout, access_retries)
- track_storage_accessible
- else
- track_storage_inaccessible
- end
-
- @storage_available
+ @failure_info ||= FailureInfo.load(cache_key)
end
def check_storage_accessible!
if circuit_broken?
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time)
end
-
- if backing_off?
- raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time)
- end
-
- unless storage_available?
- raise Gitlab::Git::Storage::Inaccessible.new("#{storage} not accessible", failure_wait_time)
- end
- end
-
- def no_failures?
- last_failure.blank? && failure_count == 0
- end
-
- def track_storage_inaccessible
- @failure_info = FailureInfo.new(Time.now, failure_count + 1)
-
- Gitlab::Git::Storage.redis.with do |redis|
- redis.pipelined do
- redis.hset(cache_key, :last_failure, last_failure.to_i)
- redis.hincrby(cache_key, :failure_count, 1)
- redis.expire(cache_key, failure_reset_time)
- maintain_known_keys(redis)
- end
- end
- end
-
- def track_storage_accessible
- @failure_info = FailureInfo.new(nil, 0)
-
- Gitlab::Git::Storage.redis.with do |redis|
- redis.pipelined do
- redis.hset(cache_key, :last_failure, nil)
- redis.hset(cache_key, :failure_count, 0)
- maintain_known_keys(redis)
- end
- end
- end
-
- def maintain_known_keys(redis)
- expire_time = Time.now.to_i + failure_reset_time
- redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key)
- redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i)
- end
-
- def get_failure_info
- last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
- redis.hmget(cache_key, :last_failure, :failure_count)
- end
-
- last_failure = Time.at(last_failure.to_i) if last_failure.present?
-
- FailureInfo.new(last_failure, failure_count.to_i)
- end
-
- def cache_key
- @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
end
end
diff --git a/lib/gitlab/git/storage/circuit_breaker_settings.rb b/lib/gitlab/git/storage/circuit_breaker_settings.rb
index 257fe8cd8f0..c9e225f187d 100644
--- a/lib/gitlab/git/storage/circuit_breaker_settings.rb
+++ b/lib/gitlab/git/storage/circuit_breaker_settings.rb
@@ -6,10 +6,6 @@ module Gitlab
application_settings.circuitbreaker_failure_count_threshold
end
- def failure_wait_time
- application_settings.circuitbreaker_failure_wait_time
- end
-
def failure_reset_time
application_settings.circuitbreaker_failure_reset_time
end
@@ -22,8 +18,12 @@ module Gitlab
application_settings.circuitbreaker_access_retries
end
- def backoff_threshold
- application_settings.circuitbreaker_backoff_threshold
+ def check_interval
+ application_settings.circuitbreaker_check_interval
+ end
+
+ def cache_key
+ @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
private
diff --git a/lib/gitlab/git/storage/failure_info.rb b/lib/gitlab/git/storage/failure_info.rb
new file mode 100644
index 00000000000..387279c110d
--- /dev/null
+++ b/lib/gitlab/git/storage/failure_info.rb
@@ -0,0 +1,39 @@
+module Gitlab
+ module Git
+ module Storage
+ class FailureInfo
+ attr_accessor :first_failure, :last_failure, :failure_count
+
+ def self.reset_all!
+ Gitlab::Git::Storage.redis.with do |redis|
+ all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
+ redis.del(*all_storage_keys) unless all_storage_keys.empty?
+ end
+
+ RequestStore.delete(:circuitbreaker_cache)
+ end
+
+ def self.load(cache_key)
+ first_failure, last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
+ redis.hmget(cache_key, :first_failure, :last_failure, :failure_count)
+ end
+
+ last_failure = Time.at(last_failure.to_i) if last_failure.present?
+ first_failure = Time.at(first_failure.to_i) if first_failure.present?
+
+ new(first_failure, last_failure, failure_count.to_i)
+ end
+
+ def initialize(first_failure, last_failure, failure_count)
+ @first_failure = first_failure
+ @last_failure = last_failure
+ @failure_count = failure_count
+ end
+
+ def no_failures?
+ first_failure.blank? && last_failure.blank? && failure_count == 0
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/storage/null_circuit_breaker.rb b/lib/gitlab/git/storage/null_circuit_breaker.rb
index a12d52d295f..261c936c689 100644
--- a/lib/gitlab/git/storage/null_circuit_breaker.rb
+++ b/lib/gitlab/git/storage/null_circuit_breaker.rb
@@ -11,6 +11,9 @@ module Gitlab
# These will always have nil values
attr_reader :storage_path
+ delegate :last_failure, :failure_count, :no_failures?,
+ to: :failure_info
+
def initialize(storage, hostname, error: nil)
@storage = storage
@hostname = hostname
@@ -29,16 +32,17 @@ module Gitlab
false
end
- def last_failure
- circuit_broken? ? Time.now : nil
- end
-
- def failure_count
- circuit_broken? ? failure_count_threshold : 0
- end
-
def failure_info
- Gitlab::Git::Storage::CircuitBreaker::FailureInfo.new(last_failure, failure_count)
+ @failure_info ||=
+ if circuit_broken?
+ Gitlab::Git::Storage::FailureInfo.new(Time.now,
+ Time.now,
+ failure_count_threshold)
+ else
+ Gitlab::Git::Storage::FailureInfo.new(nil,
+ nil,
+ 0)
+ end
end
end
end
diff --git a/lib/gitlab/storage_check.rb b/lib/gitlab/storage_check.rb
new file mode 100644
index 00000000000..fe81513c9ec
--- /dev/null
+++ b/lib/gitlab/storage_check.rb
@@ -0,0 +1,11 @@
+require_relative 'storage_check/cli'
+require_relative 'storage_check/gitlab_caller'
+require_relative 'storage_check/option_parser'
+require_relative 'storage_check/response'
+
+module Gitlab
+ module StorageCheck
+ ENDPOINT = '/-/storage_check'.freeze
+ Options = Struct.new(:target, :token, :interval, :dryrun)
+ end
+end
diff --git a/lib/gitlab/storage_check/cli.rb b/lib/gitlab/storage_check/cli.rb
new file mode 100644
index 00000000000..04bf1bf1d26
--- /dev/null
+++ b/lib/gitlab/storage_check/cli.rb
@@ -0,0 +1,69 @@
+module Gitlab
+ module StorageCheck
+ class CLI
+ def self.start!(args)
+ runner = new(Gitlab::StorageCheck::OptionParser.parse!(args))
+ runner.start_loop
+ end
+
+ attr_reader :logger, :options
+
+ def initialize(options)
+ @options = options
+ @logger = Logger.new(STDOUT)
+ end
+
+ def start_loop
+ logger.info "Checking #{options.target} every #{options.interval} seconds"
+
+ if options.dryrun
+ logger.info "Dryrun, exiting..."
+ return
+ end
+
+ begin
+ loop do
+ response = GitlabCaller.new(options).call!
+ log_response(response)
+ update_settings(response)
+
+ sleep options.interval
+ end
+ rescue Interrupt
+ logger.info "Ending storage-check"
+ end
+ end
+
+ def update_settings(response)
+ previous_interval = options.interval
+
+ if response.valid?
+ options.interval = response.check_interval || previous_interval
+ end
+
+ if previous_interval != options.interval
+ logger.info "Interval changed: #{options.interval} seconds"
+ end
+ end
+
+ def log_response(response)
+ unless response.valid?
+ return logger.error("Invalid response checking nfs storage: #{response.http_response.inspect}")
+ end
+
+ if response.responsive_shards.any?
+ logger.debug("Responsive shards: #{response.responsive_shards.join(', ')}")
+ end
+
+ warnings = []
+ if response.skipped_shards.any?
+ warnings << "Skipped shards: #{response.skipped_shards.join(', ')}"
+ end
+ if response.failing_shards.any?
+ warnings << "Failing shards: #{response.failing_shards.join(', ')}"
+ end
+ logger.warn(warnings.join(' - ')) if warnings.any?
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/storage_check/gitlab_caller.rb b/lib/gitlab/storage_check/gitlab_caller.rb
new file mode 100644
index 00000000000..44952b68844
--- /dev/null
+++ b/lib/gitlab/storage_check/gitlab_caller.rb
@@ -0,0 +1,39 @@
+require 'excon'
+
+module Gitlab
+ module StorageCheck
+ class GitlabCaller
+ def initialize(options)
+ @options = options
+ end
+
+ def call!
+ Gitlab::StorageCheck::Response.new(get_response)
+ rescue Errno::ECONNREFUSED, Excon::Error
+ # Server not ready, treated as invalid response.
+ Gitlab::StorageCheck::Response.new(nil)
+ end
+
+ def get_response
+ scheme, *other_parts = URI.split(@options.target)
+ socket_path = if scheme == 'unix'
+ other_parts.compact.join
+ end
+
+ connection = Excon.new(@options.target, socket: socket_path)
+ connection.post(path: Gitlab::StorageCheck::ENDPOINT,
+ headers: headers)
+ end
+
+ def headers
+ @headers ||= begin
+ headers = {}
+ headers['Content-Type'] = headers['Accept'] = 'application/json'
+ headers['TOKEN'] = @options.token if @options.token
+
+ headers
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/storage_check/option_parser.rb b/lib/gitlab/storage_check/option_parser.rb
new file mode 100644
index 00000000000..66ed7906f97
--- /dev/null
+++ b/lib/gitlab/storage_check/option_parser.rb
@@ -0,0 +1,39 @@
+module Gitlab
+ module StorageCheck
+ class OptionParser
+ def self.parse!(args)
+ # Start out with some defaults
+ options = Gitlab::StorageCheck::Options.new(nil, nil, 1, false)
+
+ parser = ::OptionParser.new do |opts|
+ opts.banner = "Usage: bin/storage_check [options]"
+
+ opts.on('-t=string', '--target string', 'URL or socket to trigger storage check') do |value|
+ options.target = value
+ end
+
+ opts.on('-T=string', '--token string', 'Health token to use') { |value| options.token = value }
+
+ opts.on('-i=n', '--interval n', ::OptionParser::DecimalInteger, 'Seconds between checks') do |value|
+ options.interval = value
+ end
+
+ opts.on('-d', '--dryrun', "Output what will be performed, but don't start the process") do |value|
+ options.dryrun = value
+ end
+ end
+ parser.parse!(args)
+
+ unless options.target
+ raise ::OptionParser::InvalidArgument.new('Provide a URI to provide checks')
+ end
+
+ if URI.parse(options.target).scheme.nil?
+ raise ::OptionParser::InvalidArgument.new('Add the scheme to the target, `unix://`, `https://` or `http://` are supported')
+ end
+
+ options
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/storage_check/response.rb b/lib/gitlab/storage_check/response.rb
new file mode 100644
index 00000000000..326ab236e3e
--- /dev/null
+++ b/lib/gitlab/storage_check/response.rb
@@ -0,0 +1,77 @@
+require 'json'
+
+module Gitlab
+ module StorageCheck
+ class Response
+ attr_reader :http_response
+
+ def initialize(http_response)
+ @http_response = http_response
+ end
+
+ def valid?
+ @http_response && (200...299).cover?(@http_response.status) &&
+ @http_response.headers['Content-Type'].include?('application/json') &&
+ parsed_response
+ end
+
+ def check_interval
+ return nil unless parsed_response
+
+ parsed_response['check_interval']
+ end
+
+ def responsive_shards
+ divided_results[:responsive_shards]
+ end
+
+ def skipped_shards
+ divided_results[:skipped_shards]
+ end
+
+ def failing_shards
+ divided_results[:failing_shards]
+ end
+
+ private
+
+ def results
+ return [] unless parsed_response
+
+ parsed_response['results']
+ end
+
+ def divided_results
+ return @divided_results if @divided_results
+
+ @divided_results = {}
+ @divided_results[:responsive_shards] = []
+ @divided_results[:skipped_shards] = []
+ @divided_results[:failing_shards] = []
+
+ results.each do |info|
+ name = info['storage']
+
+ case info['success']
+ when true
+ @divided_results[:responsive_shards] << name
+ when false
+ @divided_results[:failing_shards] << name
+ else
+ @divided_results[:skipped_shards] << name
+ end
+ end
+
+ @divided_results
+ end
+
+ def parsed_response
+ return @parsed_response if defined?(@parsed_response)
+
+ @parsed_response = JSON.parse(@http_response.body)
+ rescue JSON::JSONError
+ @parsed_response = nil
+ end
+ end
+ end
+end
diff --git a/spec/bin/storage_check_spec.rb b/spec/bin/storage_check_spec.rb
new file mode 100644
index 00000000000..02f6fcb6e3a
--- /dev/null
+++ b/spec/bin/storage_check_spec.rb
@@ -0,0 +1,13 @@
+require 'spec_helper'
+
+describe 'bin/storage_check' do
+ it 'is executable' do
+ command = %w[bin/storage_check -t unix://the/path/to/a/unix-socket.sock -i 10 -d]
+ expected_output = 'Checking unix://the/path/to/a/unix-socket.sock every 10 seconds'
+
+ output, status = Gitlab::Popen.popen(command, Rails.root.to_s)
+
+ expect(status).to eq(0)
+ expect(output).to include(expected_output)
+ end
+end
diff --git a/spec/controllers/admin/health_check_controller_spec.rb b/spec/controllers/admin/health_check_controller_spec.rb
index 0b8e0c8a065..d15ee0021d9 100644
--- a/spec/controllers/admin/health_check_controller_spec.rb
+++ b/spec/controllers/admin/health_check_controller_spec.rb
@@ -1,6 +1,6 @@
require 'spec_helper'
-describe Admin::HealthCheckController, broken_storage: true do
+describe Admin::HealthCheckController do
let(:admin) { create(:admin) }
before do
@@ -17,7 +17,7 @@ describe Admin::HealthCheckController, broken_storage: true do
describe 'POST reset_storage_health' do
it 'resets all storage health information' do
- expect(Gitlab::Git::Storage::CircuitBreaker).to receive(:reset_all!)
+ expect(Gitlab::Git::Storage::FailureInfo).to receive(:reset_all!)
post :reset_storage_health
end
diff --git a/spec/controllers/health_controller_spec.rb b/spec/controllers/health_controller_spec.rb
index 9e9cf4f2c1f..95946def5f9 100644
--- a/spec/controllers/health_controller_spec.rb
+++ b/spec/controllers/health_controller_spec.rb
@@ -14,6 +14,48 @@ describe HealthController do
stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'false')
end
+ describe '#storage_check' do
+ before do
+ allow(Gitlab::RequestContext).to receive(:client_ip).and_return(whitelisted_ip)
+ end
+
+ subject { post :storage_check }
+
+ it 'checks all the configured storages' do
+ expect(Gitlab::Git::Storage::Checker).to receive(:check_all).and_call_original
+
+ subject
+ end
+
+ it 'returns the check interval' do
+ stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'true')
+ stub_application_setting(circuitbreaker_check_interval: 10)
+
+ subject
+
+ expect(json_response['check_interval']).to eq(10)
+ end
+
+ context 'with failing storages', :broken_storage do
+ before do
+ stub_storage_settings(
+ broken: { path: 'tmp/tests/non-existent-repositories' }
+ )
+ end
+
+ it 'includes the failure information' do
+ subject
+
+ expected_results = [
+ { 'storage' => 'broken', 'success' => false },
+ { 'storage' => 'default', 'success' => true }
+ ]
+
+ expect(json_response['results']).to eq(expected_results)
+ end
+ end
+ end
+
describe '#readiness' do
shared_context 'endpoint responding with readiness data' do
let(:request_params) { {} }
diff --git a/spec/features/admin/admin_health_check_spec.rb b/spec/features/admin/admin_health_check_spec.rb
index 4430fc15501..ac3392b49f9 100644
--- a/spec/features/admin/admin_health_check_spec.rb
+++ b/spec/features/admin/admin_health_check_spec.rb
@@ -1,6 +1,6 @@
require 'spec_helper'
-feature "Admin Health Check", :feature, :broken_storage do
+feature "Admin Health Check", :feature do
include StubENV
before do
@@ -36,6 +36,7 @@ feature "Admin Health Check", :feature, :broken_storage do
context 'when services are up' do
before do
+ stub_storage_settings({}) # Hide the broken storage
visit admin_health_check_path
end
@@ -56,10 +57,8 @@ feature "Admin Health Check", :feature, :broken_storage do
end
end
- context 'with repository storage failures' do
+ context 'with repository storage failures', :broken_storage do
before do
- # Track a failure
- Gitlab::Git::Storage::CircuitBreaker.for_storage('broken').perform { nil } rescue nil
visit admin_health_check_path
end
@@ -67,9 +66,10 @@ feature "Admin Health Check", :feature, :broken_storage do
hostname = Gitlab::Environment.hostname
maximum_failures = Gitlab::CurrentSettings.current_application_settings
.circuitbreaker_failure_count_threshold
+ number_of_failures = maximum_failures + 1
- expect(page).to have_content('broken: failed storage access attempt on host:')
- expect(page).to have_content("#{hostname}: 1 of #{maximum_failures} failures.")
+ expect(page).to have_content("broken: #{number_of_failures} failed storage access attempts:")
+ expect(page).to have_content("#{hostname}: #{number_of_failures} of #{maximum_failures} failures.")
end
it 'allows resetting storage failures' do
diff --git a/spec/lib/gitlab/git/storage/checker_spec.rb b/spec/lib/gitlab/git/storage/checker_spec.rb
new file mode 100644
index 00000000000..d74c3bcb04c
--- /dev/null
+++ b/spec/lib/gitlab/git/storage/checker_spec.rb
@@ -0,0 +1,132 @@
+require 'spec_helper'
+
+describe Gitlab::Git::Storage::Checker, :clean_gitlab_redis_shared_state do
+ let(:storage_name) { 'default' }
+ let(:hostname) { Gitlab::Environment.hostname }
+ let(:cache_key) { "storage_accessible:#{storage_name}:#{hostname}" }
+
+ subject(:checker) { described_class.new(storage_name) }
+
+ def value_from_redis(name)
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.hmget(cache_key, name)
+ end.first
+ end
+
+ def set_in_redis(name, value)
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.hmset(cache_key, name, value)
+ end.first
+ end
+
+ describe '.check_all' do
+ it 'calls a check for each storage' do
+ fake_checker_default = double
+ fake_checker_broken = double
+ fake_logger = fake_logger
+
+ expect(described_class).to receive(:new).with('default', fake_logger) { fake_checker_default }
+ expect(described_class).to receive(:new).with('broken', fake_logger) { fake_checker_broken }
+ expect(fake_checker_default).to receive(:check_with_lease)
+ expect(fake_checker_broken).to receive(:check_with_lease)
+
+ described_class.check_all(fake_logger)
+ end
+
+ context 'with broken storage', :broken_storage do
+ it 'returns the results' do
+ expected_result = [
+ { storage: 'default', success: true },
+ { storage: 'broken', success: false }
+ ]
+
+ expect(described_class.check_all).to eq(expected_result)
+ end
+ end
+ end
+
+ describe '#initialize' do
+ it 'assigns the settings' do
+ expect(checker.hostname).to eq(hostname)
+ expect(checker.storage).to eq('default')
+ expect(checker.storage_path).to eq(TestEnv.repos_path)
+ end
+ end
+
+ describe '#check_with_lease' do
+ it 'only allows one check at a time' do
+ expect(checker).to receive(:check).once { sleep 1 }
+
+ thread = Thread.new { checker.check_with_lease }
+ checker.check_with_lease
+ thread.join
+ end
+
+ it 'returns a result hash' do
+ expect(checker.check_with_lease).to eq(storage: 'default', success: true)
+ end
+ end
+
+ describe '#check' do
+ it 'tracks that the storage was accessible' do
+ set_in_redis(:failure_count, 10)
+ set_in_redis(:last_failure, Time.now.to_f)
+
+ checker.check
+
+ expect(value_from_redis(:failure_count).to_i).to eq(0)
+ expect(value_from_redis(:last_failure)).to be_empty
+ expect(value_from_redis(:first_failure)).to be_empty
+ end
+
+ it 'calls the check with the correct arguments' do
+ stub_application_setting(circuitbreaker_storage_timeout: 30,
+ circuitbreaker_access_retries: 3)
+
+ expect(Gitlab::Git::Storage::ForkedStorageCheck)
+ .to receive(:storage_available?).with(TestEnv.repos_path, 30, 3)
+ .and_call_original
+
+ checker.check
+ end
+
+ it 'returns `true`' do
+ expect(checker.check).to eq(true)
+ end
+
+ it 'maintains known storage keys' do
+ Timecop.freeze do
+ # Insert an old key to expire
+ old_entry = Time.now.to_i - 3.days.to_i
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, old_entry, 'to_be_removed')
+ end
+
+ checker.check
+
+ known_keys = Gitlab::Git::Storage.redis.with do |redis|
+ redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
+ end
+
+ expect(known_keys).to contain_exactly(cache_key)
+ end
+ end
+
+ context 'the storage is not available', :broken_storage do
+ let(:storage_name) { 'broken' }
+
+ it 'tracks that the storage was inaccessible' do
+ Timecop.freeze do
+ expect { checker.check }.to change { value_from_redis(:failure_count).to_i }.by(1)
+
+ expect(value_from_redis(:last_failure)).not_to be_empty
+ expect(value_from_redis(:first_failure)).not_to be_empty
+ end
+ end
+
+ it 'returns `false`' do
+ expect(checker.check).to eq(false)
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb b/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb
index f34c9f09057..210b90bfba9 100644
--- a/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb
+++ b/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb
@@ -1,11 +1,18 @@
require 'spec_helper'
-describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: true, broken_storage: true do
+describe Gitlab::Git::Storage::CircuitBreaker, :broken_storage do
let(:storage_name) { 'default' }
let(:circuit_breaker) { described_class.new(storage_name, hostname) }
let(:hostname) { Gitlab::Environment.hostname }
let(:cache_key) { "storage_accessible:#{storage_name}:#{hostname}" }
+ def set_in_redis(name, value)
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, cache_key)
+ redis.hmset(cache_key, name, value)
+ end.first
+ end
+
before do
# Override test-settings for the circuitbreaker with something more realistic
# for these specs.
@@ -19,36 +26,7 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
)
end
- def value_from_redis(name)
- Gitlab::Git::Storage.redis.with do |redis|
- redis.hmget(cache_key, name)
- end.first
- end
-
- def set_in_redis(name, value)
- Gitlab::Git::Storage.redis.with do |redis|
- redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, cache_key)
- redis.hmset(cache_key, name, value)
- end.first
- end
-
- describe '.reset_all!' do
- it 'clears all entries form redis' do
- set_in_redis(:failure_count, 10)
-
- described_class.reset_all!
-
- key_exists = Gitlab::Git::Storage.redis.with { |redis| redis.exists(cache_key) }
-
- expect(key_exists).to be_falsey
- end
-
- it 'does not break when there are no keys in redis' do
- expect { described_class.reset_all! }.not_to raise_error
- end
- end
-
- describe '.for_storage' do
+ describe '.for_storage', :request_store do
it 'only builds a single circuitbreaker per storage' do
expect(described_class).to receive(:new).once.and_call_original
@@ -71,7 +49,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
it 'assigns the settings' do
expect(circuit_breaker.hostname).to eq(hostname)
expect(circuit_breaker.storage).to eq('default')
- expect(circuit_breaker.storage_path).to eq(TestEnv.repos_path)
end
end
@@ -91,9 +68,9 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
end
end
- describe '#failure_wait_time' do
+ describe '#check_interval' do
it 'reads the value from settings' do
- expect(circuit_breaker.failure_wait_time).to eq(1)
+ expect(circuit_breaker.check_interval).to eq(1)
end
end
@@ -114,12 +91,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
expect(circuit_breaker.access_retries).to eq(4)
end
end
-
- describe '#backoff_threshold' do
- it 'reads the value from settings' do
- expect(circuit_breaker.backoff_threshold).to eq(5)
- end
- end
end
describe '#perform' do
@@ -134,19 +105,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
end
end
- it 'raises the correct exception when backing off' do
- Timecop.freeze do
- set_in_redis(:last_failure, 1.second.ago.to_f)
- set_in_redis(:failure_count, 90)
-
- expect { |b| circuit_breaker.perform(&b) }
- .to raise_error do |exception|
- expect(exception).to be_kind_of(Gitlab::Git::Storage::Failing)
- expect(exception.retry_after).to eq(30)
- end
- end
- end
-
it 'yields the block' do
expect { |b| circuit_breaker.perform(&b) }
.to yield_control
@@ -170,54 +128,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
.to raise_error(Rugged::OSError)
end
- it 'tracks that the storage was accessible' do
- set_in_redis(:failure_count, 10)
- set_in_redis(:last_failure, Time.now.to_f)
-
- circuit_breaker.perform { '' }
-
- expect(value_from_redis(:failure_count).to_i).to eq(0)
- expect(value_from_redis(:last_failure)).to be_empty
- expect(circuit_breaker.failure_count).to eq(0)
- expect(circuit_breaker.last_failure).to be_nil
- end
-
- it 'maintains known storage keys' do
- Timecop.freeze do
- # Insert an old key to expire
- old_entry = Time.now.to_i - 3.days.to_i
- Gitlab::Git::Storage.redis.with do |redis|
- redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, old_entry, 'to_be_removed')
- end
-
- circuit_breaker.perform { '' }
-
- known_keys = Gitlab::Git::Storage.redis.with do |redis|
- redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
- end
-
- expect(known_keys).to contain_exactly(cache_key)
- end
- end
-
- it 'only performs the accessibility check once' do
- expect(Gitlab::Git::Storage::ForkedStorageCheck)
- .to receive(:storage_available?).once.and_call_original
-
- 2.times { circuit_breaker.perform { '' } }
- end
-
- it 'calls the check with the correct arguments' do
- stub_application_setting(circuitbreaker_storage_timeout: 30,
- circuitbreaker_access_retries: 3)
-
- expect(Gitlab::Git::Storage::ForkedStorageCheck)
- .to receive(:storage_available?).with(TestEnv.repos_path, 30, 3)
- .and_call_original
-
- circuit_breaker.perform { '' }
- end
-
context 'with the feature disabled' do
before do
stub_feature_flags(git_storage_circuit_breaker: false)
@@ -240,31 +150,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
expect(result).to eq('hello')
end
end
-
- context 'the storage is not available' do
- let(:storage_name) { 'broken' }
-
- it 'raises the correct exception' do
- expect(circuit_breaker).to receive(:track_storage_inaccessible)
-
- expect { circuit_breaker.perform { '' } }
- .to raise_error do |exception|
- expect(exception).to be_kind_of(Gitlab::Git::Storage::Inaccessible)
- expect(exception.retry_after).to eq(30)
- end
- end
-
- it 'tracks that the storage was inaccessible' do
- Timecop.freeze do
- expect { circuit_breaker.perform { '' } }.to raise_error(Gitlab::Git::Storage::Inaccessible)
-
- expect(value_from_redis(:failure_count).to_i).to eq(1)
- expect(value_from_redis(:last_failure)).not_to be_empty
- expect(circuit_breaker.failure_count).to eq(1)
- expect(circuit_breaker.last_failure).to be_within(1.second).of(Time.now)
- end
- end
- end
end
describe '#circuit_broken?' do
@@ -283,32 +168,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
end
end
- describe '#backing_off?' do
- it 'is true when there was a recent failure' do
- Timecop.freeze do
- set_in_redis(:last_failure, 1.second.ago.to_f)
- set_in_redis(:failure_count, 90)
-
- expect(circuit_breaker.backing_off?).to be_truthy
- end
- end
-
- context 'the `failure_wait_time` is set to 0' do
- before do
- stub_application_setting(circuitbreaker_failure_wait_time: 0)
- end
-
- it 'is working even when there are failures' do
- Timecop.freeze do
- set_in_redis(:last_failure, 0.seconds.ago.to_f)
- set_in_redis(:failure_count, 90)
-
- expect(circuit_breaker.backing_off?).to be_falsey
- end
- end
- end
- end
-
describe '#last_failure' do
it 'returns the last failure time' do
time = Time.parse("2017-05-26 17:52:30")
diff --git a/spec/lib/gitlab/git/storage/failure_info_spec.rb b/spec/lib/gitlab/git/storage/failure_info_spec.rb
new file mode 100644
index 00000000000..bae88fdda86
--- /dev/null
+++ b/spec/lib/gitlab/git/storage/failure_info_spec.rb
@@ -0,0 +1,70 @@
+require 'spec_helper'
+
+describe Gitlab::Git::Storage::FailureInfo, :broken_storage do
+ let(:storage_name) { 'default' }
+ let(:hostname) { Gitlab::Environment.hostname }
+ let(:cache_key) { "storage_accessible:#{storage_name}:#{hostname}" }
+
+ def value_from_redis(name)
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.hmget(cache_key, name)
+ end.first
+ end
+
+ def set_in_redis(name, value)
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, cache_key)
+ redis.hmset(cache_key, name, value)
+ end.first
+ end
+
+ describe '.reset_all!' do
+ it 'clears all entries form redis' do
+ set_in_redis(:failure_count, 10)
+
+ described_class.reset_all!
+
+ key_exists = Gitlab::Git::Storage.redis.with { |redis| redis.exists(cache_key) }
+
+ expect(key_exists).to be_falsey
+ end
+
+ it 'does not break when there are no keys in redis' do
+ expect { described_class.reset_all! }.not_to raise_error
+ end
+ end
+
+ describe '.load' do
+ it 'loads failure information for a storage on a host' do
+ first_failure = Time.parse("2017-11-14 17:52:30")
+ last_failure = Time.parse("2017-11-14 18:54:37")
+ failure_count = 11
+
+ set_in_redis(:first_failure, first_failure.to_i)
+ set_in_redis(:last_failure, last_failure.to_i)
+ set_in_redis(:failure_count, failure_count.to_i)
+
+ info = described_class.load(cache_key)
+
+ expect(info.first_failure).to eq(first_failure)
+ expect(info.last_failure).to eq(last_failure)
+ expect(info.failure_count).to eq(failure_count)
+ end
+ end
+
+ describe '#no_failures?' do
+ it 'is true when there are no failures' do
+ info = described_class.new(nil, nil, 0)
+
+ expect(info.no_failures?).to be_truthy
+ end
+
+ it 'is false when there are failures' do
+ info = described_class.new(Time.parse("2017-11-14 17:52:30"),
+ Time.parse("2017-11-14 18:54:37"),
+ 20)
+
+ expect(info.no_failures?).to be_falsy
+ end
+ end
+end
diff --git a/spec/lib/gitlab/git/storage/health_spec.rb b/spec/lib/gitlab/git/storage/health_spec.rb
index d7a52a04fbb..bb670fc5d94 100644
--- a/spec/lib/gitlab/git/storage/health_spec.rb
+++ b/spec/lib/gitlab/git/storage/health_spec.rb
@@ -1,6 +1,6 @@
require 'spec_helper'
-describe Gitlab::Git::Storage::Health, clean_gitlab_redis_shared_state: true, broken_storage: true do
+describe Gitlab::Git::Storage::Health, broken_storage: true do
let(:host1_key) { 'storage_accessible:broken:web01' }
let(:host2_key) { 'storage_accessible:default:kiq01' }
diff --git a/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb b/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb
index 5db37f55e03..93ad20011de 100644
--- a/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb
+++ b/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb
@@ -27,7 +27,7 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do
end
describe '#failure_info' do
- it { Timecop.freeze { expect(breaker.failure_info).to eq(Gitlab::Git::Storage::CircuitBreaker::FailureInfo.new(Time.now, breaker.failure_count_threshold)) } }
+ it { expect(breaker.failure_info.no_failures?).to be_falsy }
end
end
@@ -49,7 +49,7 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do
end
describe '#failure_info' do
- it { expect(breaker.failure_info).to eq(Gitlab::Git::Storage::CircuitBreaker::FailureInfo.new(nil, 0)) }
+ it { expect(breaker.failure_info.no_failures?).to be_truthy }
end
end
diff --git a/spec/lib/gitlab/storage_check/cli_spec.rb b/spec/lib/gitlab/storage_check/cli_spec.rb
new file mode 100644
index 00000000000..6db0925899c
--- /dev/null
+++ b/spec/lib/gitlab/storage_check/cli_spec.rb
@@ -0,0 +1,19 @@
+require 'spec_helper'
+
+describe Gitlab::StorageCheck::CLI do
+ let(:options) { Gitlab::StorageCheck::Options.new('unix://tmp/socket.sock', nil, 1, false) }
+ subject(:runner) { described_class.new(options) }
+
+ describe '#update_settings' do
+ it 'updates the interval when changed in a valid response and logs the change' do
+ fake_response = double
+ expect(fake_response).to receive(:valid?).and_return(true)
+ expect(fake_response).to receive(:check_interval).and_return(42)
+ expect(runner.logger).to receive(:info)
+
+ runner.update_settings(fake_response)
+
+ expect(options.interval).to eq(42)
+ end
+ end
+end
diff --git a/spec/lib/gitlab/storage_check/gitlab_caller_spec.rb b/spec/lib/gitlab/storage_check/gitlab_caller_spec.rb
new file mode 100644
index 00000000000..d869022fd31
--- /dev/null
+++ b/spec/lib/gitlab/storage_check/gitlab_caller_spec.rb
@@ -0,0 +1,46 @@
+require 'spec_helper'
+
+describe Gitlab::StorageCheck::GitlabCaller do
+ let(:options) { Gitlab::StorageCheck::Options.new('unix://tmp/socket.sock', nil, nil, false) }
+ subject(:gitlab_caller) { described_class.new(options) }
+
+ describe '#call!' do
+ context 'when a socket is given' do
+ it 'calls a socket' do
+ fake_connection = double
+ expect(fake_connection).to receive(:post)
+ expect(Excon).to receive(:new).with('unix://tmp/socket.sock', socket: "tmp/socket.sock") { fake_connection }
+
+ gitlab_caller.call!
+ end
+ end
+
+ context 'when a host is given' do
+ let(:options) { Gitlab::StorageCheck::Options.new('http://localhost:8080', nil, nil, false) }
+
+ it 'it calls a http response' do
+ fake_connection = double
+ expect(Excon).to receive(:new).with('http://localhost:8080', socket: nil) { fake_connection }
+ expect(fake_connection).to receive(:post)
+
+ gitlab_caller.call!
+ end
+ end
+ end
+
+ describe '#headers' do
+ it 'Adds the JSON header' do
+ headers = gitlab_caller.headers
+
+ expect(headers['Content-Type']).to eq('application/json')
+ end
+
+ context 'when a token was provided' do
+ let(:options) { Gitlab::StorageCheck::Options.new('unix://tmp/socket.sock', 'atoken', nil, false) }
+
+ it 'adds it to the headers' do
+ expect(gitlab_caller.headers['TOKEN']).to eq('atoken')
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/storage_check/option_parser_spec.rb b/spec/lib/gitlab/storage_check/option_parser_spec.rb
new file mode 100644
index 00000000000..cad4dfbefcf
--- /dev/null
+++ b/spec/lib/gitlab/storage_check/option_parser_spec.rb
@@ -0,0 +1,31 @@
+require 'spec_helper'
+
+describe Gitlab::StorageCheck::OptionParser do
+ describe '.parse!' do
+ it 'assigns all options' do
+ args = %w(--target unix://tmp/hello/world.sock --token thetoken --interval 42)
+
+ options = described_class.parse!(args)
+
+ expect(options.token).to eq('thetoken')
+ expect(options.interval).to eq(42)
+ expect(options.target).to eq('unix://tmp/hello/world.sock')
+ end
+
+ it 'requires the interval to be a number' do
+ args = %w(--target unix://tmp/hello/world.sock --interval fortytwo)
+
+ expect { described_class.parse!(args) }.to raise_error(OptionParser::InvalidArgument)
+ end
+
+ it 'raises an error if the scheme is not included' do
+ args = %w(--target tmp/hello/world.sock)
+
+ expect { described_class.parse!(args) }.to raise_error(OptionParser::InvalidArgument)
+ end
+
+ it 'raises an error if both socket and host are missing' do
+ expect { described_class.parse!([]) }.to raise_error(OptionParser::InvalidArgument)
+ end
+ end
+end
diff --git a/spec/lib/gitlab/storage_check/response_spec.rb b/spec/lib/gitlab/storage_check/response_spec.rb
new file mode 100644
index 00000000000..0ff2963e443
--- /dev/null
+++ b/spec/lib/gitlab/storage_check/response_spec.rb
@@ -0,0 +1,54 @@
+require 'spec_helper'
+
+describe Gitlab::StorageCheck::Response do
+ let(:fake_json) do
+ {
+ check_interval: 42,
+ results: [
+ { storage: 'working', success: true },
+ { storage: 'skipped', success: nil },
+ { storage: 'failing', success: false }
+ ]
+ }.to_json
+ end
+
+ let(:fake_http_response) do
+ fake_response = instance_double("Excon::Response - Status check")
+ allow(fake_response).to receive(:status).and_return(200)
+ allow(fake_response).to receive(:body).and_return(fake_json)
+ allow(fake_response).to receive(:headers).and_return('Content-Type' => 'application/json')
+
+ fake_response
+ end
+ let(:response) { described_class.new(fake_http_response) }
+
+ describe '#valid?' do
+ it 'is valid for a success response with parseable JSON' do
+ expect(response).to be_valid
+ end
+ end
+
+ describe '#check_interval' do
+ it 'returns the result from the JSON' do
+ expect(response.check_interval).to eq(42)
+ end
+ end
+
+ describe '#responsive_shards' do
+ it 'contains the names of working shards' do
+ expect(response.responsive_shards).to contain_exactly('working')
+ end
+ end
+
+ describe '#skipped_shards' do
+ it 'contains the names of skipped shards' do
+ expect(response.skipped_shards).to contain_exactly('skipped')
+ end
+ end
+
+ describe '#failing_shards' do
+ it 'contains the name of failing shards' do
+ expect(response.failing_shards).to contain_exactly('failing')
+ end
+ end
+end
diff --git a/spec/models/application_setting_spec.rb b/spec/models/application_setting_spec.rb
index 0b7e16cc33c..ef480e7a80a 100644
--- a/spec/models/application_setting_spec.rb
+++ b/spec/models/application_setting_spec.rb
@@ -115,9 +115,8 @@ describe ApplicationSetting do
end
context 'circuitbreaker settings' do
- [:circuitbreaker_backoff_threshold,
- :circuitbreaker_failure_count_threshold,
- :circuitbreaker_failure_wait_time,
+ [:circuitbreaker_failure_count_threshold,
+ :circuitbreaker_check_interval,
:circuitbreaker_failure_reset_time,
:circuitbreaker_storage_timeout].each do |field|
it "Validates #{field} as number" do
@@ -126,16 +125,6 @@ describe ApplicationSetting do
.is_greater_than_or_equal_to(0)
end
end
-
- it 'requires the `backoff_threshold` to be lower than the `failure_count_threshold`' do
- setting.circuitbreaker_failure_count_threshold = 10
- setting.circuitbreaker_backoff_threshold = 15
- failure_message = "The circuitbreaker backoff threshold should be lower "\
- "than the failure count threshold"
-
- expect(setting).not_to be_valid
- expect(setting.errors[:circuitbreaker_backoff_threshold]).to include(failure_message)
- end
end
context 'repository storages' do
diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb
index 82ed1ecee33..358bc3dfb94 100644
--- a/spec/models/repository_spec.rb
+++ b/spec/models/repository_spec.rb
@@ -29,7 +29,9 @@ describe Repository do
def expect_to_raise_storage_error
expect { yield }.to raise_error do |exception|
storage_exceptions = [Gitlab::Git::Storage::Inaccessible, Gitlab::Git::CommandError, GRPC::Unavailable]
- expect(exception.class).to be_in(storage_exceptions)
+ known_exception = storage_exceptions.select { |e| exception.is_a?(e) }
+
+ expect(known_exception).not_to be_nil
end
end
@@ -634,9 +636,7 @@ describe Repository do
end
describe '#fetch_ref' do
- # Setting the var here, sidesteps the stub that makes gitaly raise an error
- # before the actual test call
- set(:broken_repository) { create(:project, :broken_storage).repository }
+ let(:broken_repository) { create(:project, :broken_storage).repository }
describe 'when storage is broken', :broken_storage do
it 'should raise a storage error' do
diff --git a/spec/requests/api/circuit_breakers_spec.rb b/spec/requests/api/circuit_breakers_spec.rb
index 3b858c40fd6..fe76f057115 100644
--- a/spec/requests/api/circuit_breakers_spec.rb
+++ b/spec/requests/api/circuit_breakers_spec.rb
@@ -47,7 +47,7 @@ describe API::CircuitBreakers do
describe 'DELETE circuit_breakers/repository_storage' do
it 'clears all circuit_breakers' do
- expect(Gitlab::Git::Storage::CircuitBreaker).to receive(:reset_all!)
+ expect(Gitlab::Git::Storage::FailureInfo).to receive(:reset_all!)
delete api('/circuit_breakers/repository_storage', admin)
diff --git a/spec/requests/api/settings_spec.rb b/spec/requests/api/settings_spec.rb
index 63175c40a18..015d4b9a491 100644
--- a/spec/requests/api/settings_spec.rb
+++ b/spec/requests/api/settings_spec.rb
@@ -54,7 +54,7 @@ describe API::Settings, 'Settings' do
dsa_key_restriction: 2048,
ecdsa_key_restriction: 384,
ed25519_key_restriction: 256,
- circuitbreaker_failure_wait_time: 2
+ circuitbreaker_check_interval: 2
expect(response).to have_gitlab_http_status(200)
expect(json_response['default_projects_limit']).to eq(3)
@@ -75,7 +75,7 @@ describe API::Settings, 'Settings' do
expect(json_response['dsa_key_restriction']).to eq(2048)
expect(json_response['ecdsa_key_restriction']).to eq(384)
expect(json_response['ed25519_key_restriction']).to eq(256)
- expect(json_response['circuitbreaker_failure_wait_time']).to eq(2)
+ expect(json_response['circuitbreaker_check_interval']).to eq(2)
end
end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 242a2230b67..f94fb8733d5 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -121,18 +121,6 @@ RSpec.configure do |config|
reset_delivered_emails!
end
- # Stub the `ForkedStorageCheck.storage_available?` method unless
- # `:broken_storage` metadata is defined
- #
- # This check can be slow and is unnecessary in a test environment where we
- # know the storage is available, because we create it at runtime
- config.before(:example) do |example|
- unless example.metadata[:broken_storage]
- allow(Gitlab::Git::Storage::ForkedStorageCheck)
- .to receive(:storage_available?).and_return(true)
- end
- end
-
config.around(:each, :use_clean_rails_memory_store_caching) do |example|
caching_store = Rails.cache
Rails.cache = ActiveSupport::Cache::MemoryStore.new
diff --git a/spec/support/stored_repositories.rb b/spec/support/stored_repositories.rb
index f3deae0f455..f9121cce985 100644
--- a/spec/support/stored_repositories.rb
+++ b/spec/support/stored_repositories.rb
@@ -12,6 +12,25 @@ RSpec.configure do |config|
raise GRPC::Unavailable.new('Gitaly broken in this spec')
end
- Gitlab::Git::Storage::CircuitBreaker.reset_all!
+ # Track the maximum number of failures
+ first_failure = Time.parse("2017-11-14 17:52:30")
+ last_failure = Time.parse("2017-11-14 18:54:37")
+ failure_count = Gitlab::CurrentSettings
+ .current_application_settings
+ .circuitbreaker_failure_count_threshold + 1
+ cache_key = "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}broken:#{Gitlab::Environment.hostname}"
+
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.pipelined do
+ redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, cache_key)
+ redis.hset(cache_key, :first_failure, first_failure.to_i)
+ redis.hset(cache_key, :last_failure, last_failure.to_i)
+ redis.hset(cache_key, :failure_count, failure_count.to_i)
+ end
+ end
+ end
+
+ config.after(:each, :broken_storage) do
+ Gitlab::Git::Storage.redis.with(&:flushall)
end
end
diff --git a/spec/support/stub_configuration.rb b/spec/support/stub_configuration.rb
index 4ead78529c3..b36cf3c544c 100644
--- a/spec/support/stub_configuration.rb
+++ b/spec/support/stub_configuration.rb
@@ -43,6 +43,8 @@ module StubConfiguration
end
def stub_storage_settings(messages)
+ messages.deep_stringify_keys!
+
# Default storage is always required
messages['default'] ||= Gitlab.config.repositories.storages.default
messages.each do |storage_name, storage_settings|