summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2018-01-21 15:34:09 -0800
committerStan Hu <stanhu@gmail.com>2018-01-24 15:47:27 -0800
commit867126130946a9d3c3da7ff8e64b59b14da13599 (patch)
treeb67bf98dce78a0b953e63e84ff95a24c60b8af15
parent44728e0527bc7c5cf982be2fbbd26e24a79e5d8f (diff)
downloadgitlab-ce-sh-add-gitaly-health-check.tar.gz
Add a gRPC health check to ensure Gitaly is upsh-add-gitaly-health-check
This will enable Geo to skip shards that not operational. Relates to gitlab-org/gitlab-ee#4329
-rw-r--r--app/controllers/health_controller.rb3
-rw-r--r--changelogs/unreleased/sh-add-gitaly-health-check.yml5
-rw-r--r--lib/gitlab/gitaly_client.rb21
-rw-r--r--lib/gitlab/gitaly_client/health_check_service.rb19
-rw-r--r--lib/gitlab/health_checks/gitaly_check.rb53
-rw-r--r--spec/lib/gitlab/gitaly_client/health_check_service_spec.rb41
-rw-r--r--spec/lib/gitlab/gitaly_client_spec.rb25
-rw-r--r--spec/lib/gitlab/health_checks/gitaly_check_spec.rb57
8 files changed, 220 insertions, 4 deletions
diff --git a/app/controllers/health_controller.rb b/app/controllers/health_controller.rb
index a931b456a93..16abf7bab7e 100644
--- a/app/controllers/health_controller.rb
+++ b/app/controllers/health_controller.rb
@@ -8,7 +8,8 @@ class HealthController < ActionController::Base
Gitlab::HealthChecks::Redis::CacheCheck,
Gitlab::HealthChecks::Redis::QueuesCheck,
Gitlab::HealthChecks::Redis::SharedStateCheck,
- Gitlab::HealthChecks::FsShardsCheck
+ Gitlab::HealthChecks::FsShardsCheck,
+ Gitlab::HealthChecks::GitalyCheck
].freeze
def readiness
diff --git a/changelogs/unreleased/sh-add-gitaly-health-check.yml b/changelogs/unreleased/sh-add-gitaly-health-check.yml
new file mode 100644
index 00000000000..32c4c5362b4
--- /dev/null
+++ b/changelogs/unreleased/sh-add-gitaly-health-check.yml
@@ -0,0 +1,5 @@
+---
+title: Add a gRPC health check to ensure Gitaly is up
+merge_request:
+author:
+type: added
diff --git a/lib/gitlab/gitaly_client.rb b/lib/gitlab/gitaly_client.rb
index 4507ea923b4..6bd256f57c7 100644
--- a/lib/gitlab/gitaly_client.rb
+++ b/lib/gitlab/gitaly_client.rb
@@ -1,6 +1,8 @@
require 'base64'
require 'gitaly'
+require 'grpc/health/v1/health_pb'
+require 'grpc/health/v1/health_services_pb'
module Gitlab
module GitalyClient
@@ -69,14 +71,27 @@ module Gitlab
@stubs ||= {}
@stubs[storage] ||= {}
@stubs[storage][name] ||= begin
- klass = Gitaly.const_get(name.to_s.camelcase.to_sym).const_get(:Stub)
- addr = address(storage)
- addr = addr.sub(%r{^tcp://}, '') if URI(addr).scheme == 'tcp'
+ klass = stub_class(name)
+ addr = stub_address(storage)
klass.new(addr, :this_channel_is_insecure)
end
end
end
+ def self.stub_class(name)
+ if name == :health_check
+ Grpc::Health::V1::Health::Stub
+ else
+ Gitaly.const_get(name.to_s.camelcase.to_sym).const_get(:Stub)
+ end
+ end
+
+ def self.stub_address(storage)
+ addr = address(storage)
+ addr = addr.sub(%r{^tcp://}, '') if URI(addr).scheme == 'tcp'
+ addr
+ end
+
def self.clear_stubs!
MUTEX.synchronize do
@stubs = nil
diff --git a/lib/gitlab/gitaly_client/health_check_service.rb b/lib/gitlab/gitaly_client/health_check_service.rb
new file mode 100644
index 00000000000..6c1213f5e20
--- /dev/null
+++ b/lib/gitlab/gitaly_client/health_check_service.rb
@@ -0,0 +1,19 @@
+module Gitlab
+ module GitalyClient
+ class HealthCheckService
+ def initialize(storage)
+ @storage = storage
+ end
+
+ # Sends a gRPC health ping to the Gitaly server for the storage shard.
+ def check
+ request = Grpc::Health::V1::HealthCheckRequest.new
+ response = GitalyClient.call(@storage, :health_check, :check, request, timeout: GitalyClient.fast_timeout)
+
+ { success: response&.status == :SERVING }
+ rescue GRPC::BadStatus => e
+ { success: false, message: e.to_s }
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/health_checks/gitaly_check.rb b/lib/gitlab/health_checks/gitaly_check.rb
new file mode 100644
index 00000000000..11416c002e3
--- /dev/null
+++ b/lib/gitlab/health_checks/gitaly_check.rb
@@ -0,0 +1,53 @@
+module Gitlab
+ module HealthChecks
+ class GitalyCheck
+ extend BaseAbstractCheck
+
+ METRIC_PREFIX = 'gitaly_health_check'.freeze
+
+ class << self
+ def readiness
+ repository_storages.map do |storage_name|
+ check(storage_name)
+ end
+ end
+
+ def metrics
+ repository_storages.flat_map do |storage_name|
+ result, elapsed = with_timing { check(storage_name) }
+ labels = { shard: storage_name }
+
+ [
+ metric("#{metric_prefix}_success", successful?(result) ? 1 : 0, **labels),
+ metric("#{metric_prefix}_latency_seconds", elapsed, **labels)
+ ].flatten
+ end
+ end
+
+ def check(storage_name)
+ serv = Gitlab::GitalyClient::HealthCheckService.new(storage_name)
+ result = serv.check
+ HealthChecks::Result.new(result[:success], result[:message], shard: storage_name)
+ end
+
+ private
+
+ def metric_prefix
+ METRIC_PREFIX
+ end
+
+ def successful?(result)
+ result[:success]
+ end
+
+ def repository_storages
+ storages.keys
+ end
+
+ def storages
+ Gitlab.config.repositories.storages
+ end
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/gitaly_client/health_check_service_spec.rb b/spec/lib/gitlab/gitaly_client/health_check_service_spec.rb
new file mode 100644
index 00000000000..2c7e5eb5787
--- /dev/null
+++ b/spec/lib/gitlab/gitaly_client/health_check_service_spec.rb
@@ -0,0 +1,41 @@
+require 'spec_helper'
+
+describe Gitlab::GitalyClient::HealthCheckService do
+ let(:project) { create(:project) }
+ let(:storage_name) { project.repository_storage }
+
+ subject { described_class.new(storage_name) }
+
+ describe '#check' do
+ it 'successfully sends a health check request' do
+ expect(Gitlab::GitalyClient).to receive(:call).with(
+ storage_name,
+ :health_check,
+ :check,
+ instance_of(Grpc::Health::V1::HealthCheckRequest),
+ timeout: Gitlab::GitalyClient.fast_timeout).and_call_original
+
+ expect(subject.check).to eq({ success: true })
+ end
+
+ it 'receives an unsuccessful health check request' do
+ expect_any_instance_of(Grpc::Health::V1::Health::Stub)
+ .to receive(:check)
+ .and_return(double(status: false))
+
+ expect(subject.check).to eq({ success: false })
+ end
+
+ it 'gracefully handles gRPC error' do
+ expect(Gitlab::GitalyClient).to receive(:call).with(
+ storage_name,
+ :health_check,
+ :check,
+ instance_of(Grpc::Health::V1::HealthCheckRequest),
+ timeout: Gitlab::GitalyClient.fast_timeout)
+ .and_raise(GRPC::Unavailable.new('Connection refused'))
+
+ expect(subject.check).to eq({ success: false, message: '14:Connection refused' })
+ end
+ end
+end
diff --git a/spec/lib/gitlab/gitaly_client_spec.rb b/spec/lib/gitlab/gitaly_client_spec.rb
index 309b7338ef0..81bcd8c28ed 100644
--- a/spec/lib/gitlab/gitaly_client_spec.rb
+++ b/spec/lib/gitlab/gitaly_client_spec.rb
@@ -3,6 +3,31 @@ require 'spec_helper'
# We stub Gitaly in `spec/support/gitaly.rb` for other tests. We don't want
# those stubs while testing the GitalyClient itself.
describe Gitlab::GitalyClient, skip_gitaly_mock: true do
+ describe '.stub_class' do
+ it 'returns the gRPC health check stub' do
+ expect(described_class.stub_class(:health_check)).to eq(::Grpc::Health::V1::Health::Stub)
+ end
+
+ it 'returns a Gitaly stub' do
+ expect(described_class.stub_class(:ref_service)).to eq(::Gitaly::RefService::Stub)
+ end
+ end
+
+ describe '.stub_address' do
+ it 'returns the same result after being called multiple times' do
+ address = 'localhost:9876'
+ prefixed_address = "tcp://#{address}"
+
+ allow(Gitlab.config.repositories).to receive(:storages).and_return({
+ 'default' => { 'gitaly_address' => prefixed_address }
+ })
+
+ 2.times do
+ expect(described_class.stub_address('default')).to eq('localhost:9876')
+ end
+ end
+ end
+
describe '.stub' do
# Notice that this is referring to gRPC "stubs", not rspec stubs
before do
diff --git a/spec/lib/gitlab/health_checks/gitaly_check_spec.rb b/spec/lib/gitlab/health_checks/gitaly_check_spec.rb
new file mode 100644
index 00000000000..724beefff69
--- /dev/null
+++ b/spec/lib/gitlab/health_checks/gitaly_check_spec.rb
@@ -0,0 +1,57 @@
+require 'spec_helper'
+
+describe Gitlab::HealthChecks::GitalyCheck do
+ let(:result_class) { Gitlab::HealthChecks::Result }
+ let(:repository_storages) { ['default'] }
+
+ before do
+ allow(described_class).to receive(:repository_storages) { repository_storages }
+ end
+
+ describe '#readiness' do
+ subject { described_class.readiness }
+
+ before do
+ expect(Gitlab::GitalyClient::HealthCheckService).to receive(:new).and_return(gitaly_check)
+ end
+
+ context 'Gitaly server is up' do
+ let(:gitaly_check) { double(check: { success: true }) }
+
+ it { is_expected.to eq([result_class.new(true, nil, shard: 'default')]) }
+ end
+
+ context 'Gitaly server is down' do
+ let(:gitaly_check) { double(check: { success: false, message: 'Connection refused' }) }
+
+ it { is_expected.to eq([result_class.new(false, 'Connection refused', shard: 'default')]) }
+ end
+ end
+
+ describe '#metrics' do
+ subject { described_class.metrics }
+
+ before do
+ expect(Gitlab::GitalyClient::HealthCheckService).to receive(:new).and_return(gitaly_check)
+ end
+
+ context 'Gitaly server is up' do
+ let(:gitaly_check) { double(check: { success: true }) }
+
+ it 'provides metrics' do
+ expect(subject).to all(have_attributes(labels: { shard: 'default' }))
+ expect(subject).to include(an_object_having_attributes(name: 'gitaly_health_check_success', value: 1))
+ expect(subject).to include(an_object_having_attributes(name: 'gitaly_health_check_latency_seconds', value: be >= 0))
+ end
+ end
+
+ context 'Gitaly server is down' do
+ let(:gitaly_check) { double(check: { success: false, message: 'Connection refused' }) }
+
+ it 'provides metrics' do
+ expect(subject).to include(an_object_having_attributes(name: 'gitaly_health_check_success', value: 0))
+ expect(subject).to include(an_object_having_attributes(name: 'gitaly_health_check_latency_seconds', value: be >= 0))
+ end
+ end
+ end
+end