summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--app/workers/all_queues.yml3
-rw-r--r--app/workers/concerns/each_shard_worker.rb31
-rw-r--r--app/workers/repository_check/batch_worker.rb17
-rw-r--r--app/workers/repository_check/dispatch_worker.rb15
-rw-r--r--changelogs/unreleased/tc-repo-check-per-shard.yml5
-rw-r--r--config/initializers/1_settings.rb2
-rw-r--r--lib/gitlab/shard_health_cache.rb41
-rw-r--r--spec/lib/gitlab/shard_health_cache_spec.rb52
-rw-r--r--spec/workers/repository_check/batch_worker_spec.rb29
-rw-r--r--spec/workers/repository_check/dispatch_worker_spec.rb36
10 files changed, 220 insertions, 11 deletions
diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml
index 026f756582d..d06f51b1828 100644
--- a/app/workers/all_queues.yml
+++ b/app/workers/all_queues.yml
@@ -11,7 +11,7 @@
- cronjob:remove_old_web_hook_logs
- cronjob:remove_unreferenced_lfs_objects
- cronjob:repository_archive_cache
-- cronjob:repository_check_batch
+- cronjob:repository_check_dispatch
- cronjob:requests_profiles
- cronjob:schedule_update_user_activity
- cronjob:stuck_ci_jobs
@@ -71,6 +71,7 @@
- pipeline_processing:update_head_pipeline_for_merge_request
- repository_check:repository_check_clear
+- repository_check:repository_check_batch
- repository_check:repository_check_single_repository
- default
diff --git a/app/workers/concerns/each_shard_worker.rb b/app/workers/concerns/each_shard_worker.rb
new file mode 100644
index 00000000000..d0a728fb495
--- /dev/null
+++ b/app/workers/concerns/each_shard_worker.rb
@@ -0,0 +1,31 @@
+module EachShardWorker
+ extend ActiveSupport::Concern
+ include ::Gitlab::Utils::StrongMemoize
+
+ def each_eligible_shard
+ Gitlab::ShardHealthCache.update(eligible_shard_names)
+
+ eligible_shard_names.each do |shard_name|
+ yield shard_name
+ end
+ end
+
+ # override when you want to filter out some shards
+ def eligible_shard_names
+ healthy_shard_names
+ end
+
+ def healthy_shard_names
+ strong_memoize(:healthy_shard_names) do
+ healthy_ready_shards.map { |result| result.labels[:shard] }
+ end
+ end
+
+ def healthy_ready_shards
+ ready_shards.select(&:success)
+ end
+
+ def ready_shards
+ Gitlab::HealthChecks::GitalyCheck.readiness
+ end
+end
diff --git a/app/workers/repository_check/batch_worker.rb b/app/workers/repository_check/batch_worker.rb
index 898bca976ec..051382a08a9 100644
--- a/app/workers/repository_check/batch_worker.rb
+++ b/app/workers/repository_check/batch_worker.rb
@@ -3,13 +3,18 @@
module RepositoryCheck
class BatchWorker
include ApplicationWorker
- include CronjobQueue
+ include RepositoryCheckQueue
RUN_TIME = 3600
BATCH_SIZE = 10_000
- def perform
+ attr_reader :shard_name
+
+ def perform(shard_name)
+ @shard_name = shard_name
+
return unless Gitlab::CurrentSettings.repository_checks_enabled
+ return unless Gitlab::ShardHealthCache.healthy_shard?(shard_name)
start = Time.now
@@ -39,18 +44,22 @@ module RepositoryCheck
end
def never_checked_project_ids(batch_size)
- Project.where(last_repository_check_at: nil)
+ projects_on_shard.where(last_repository_check_at: nil)
.where('created_at < ?', 24.hours.ago)
.limit(batch_size).pluck(:id)
end
def old_checked_project_ids(batch_size)
- Project.where.not(last_repository_check_at: nil)
+ projects_on_shard.where.not(last_repository_check_at: nil)
.where('last_repository_check_at < ?', 1.month.ago)
.reorder(last_repository_check_at: :asc)
.limit(batch_size).pluck(:id)
end
+ def projects_on_shard
+ Project.where(repository_storage: shard_name)
+ end
+
def try_obtain_lease(id)
# Use a 24-hour timeout because on servers/projects where 'git fsck' is
# super slow we definitely do not want to run it twice in parallel.
diff --git a/app/workers/repository_check/dispatch_worker.rb b/app/workers/repository_check/dispatch_worker.rb
new file mode 100644
index 00000000000..891a273afd7
--- /dev/null
+++ b/app/workers/repository_check/dispatch_worker.rb
@@ -0,0 +1,15 @@
+module RepositoryCheck
+ class DispatchWorker
+ include ApplicationWorker
+ include CronjobQueue
+ include ::EachShardWorker
+
+ def perform
+ return unless Gitlab::CurrentSettings.repository_checks_enabled
+
+ each_eligible_shard do |shard_name|
+ RepositoryCheck::BatchWorker.perform_async(shard_name)
+ end
+ end
+ end
+end
diff --git a/changelogs/unreleased/tc-repo-check-per-shard.yml b/changelogs/unreleased/tc-repo-check-per-shard.yml
new file mode 100644
index 00000000000..227b6b0b93b
--- /dev/null
+++ b/changelogs/unreleased/tc-repo-check-per-shard.yml
@@ -0,0 +1,5 @@
+---
+title: Run repository checks in parallel for each shard
+merge_request: 20179
+author:
+type: added
diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb
index 837e577bc91..550647ae1c6 100644
--- a/config/initializers/1_settings.rb
+++ b/config/initializers/1_settings.rb
@@ -279,7 +279,7 @@ Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '50 * * * *'
Settings.cron_jobs['expire_build_artifacts_worker']['job_class'] = 'ExpireBuildArtifactsWorker'
Settings.cron_jobs['repository_check_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['repository_check_worker']['cron'] ||= '20 * * * *'
-Settings.cron_jobs['repository_check_worker']['job_class'] = 'RepositoryCheck::BatchWorker'
+Settings.cron_jobs['repository_check_worker']['job_class'] = 'RepositoryCheck::DispatchWorker'
Settings.cron_jobs['admin_email_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['admin_email_worker']['cron'] ||= '0 0 * * 0'
Settings.cron_jobs['admin_email_worker']['job_class'] = 'AdminEmailWorker'
diff --git a/lib/gitlab/shard_health_cache.rb b/lib/gitlab/shard_health_cache.rb
new file mode 100644
index 00000000000..3f03f46d4b1
--- /dev/null
+++ b/lib/gitlab/shard_health_cache.rb
@@ -0,0 +1,41 @@
+module Gitlab
+ class ShardHealthCache
+ HEALTHY_SHARDS_KEY = 'gitlab-healthy-shards'.freeze
+ HEALTHY_SHARDS_TIMEOUT = 300
+
+ # Clears the Redis set storing the list of healthy shards
+ def self.clear
+ Gitlab::Redis::Cache.with { |redis| redis.del(HEALTHY_SHARDS_KEY) }
+ end
+
+ # Updates the list of healthy shards using a Redis set
+ #
+ # shards - An array of shard names to store
+ def self.update(shards)
+ Gitlab::Redis::Cache.with do |redis|
+ redis.multi do |m|
+ m.del(HEALTHY_SHARDS_KEY)
+ shards.each { |shard_name| m.sadd(HEALTHY_SHARDS_KEY, shard_name) }
+ m.expire(HEALTHY_SHARDS_KEY, HEALTHY_SHARDS_TIMEOUT)
+ end
+ end
+ end
+
+ # Returns an array of strings of healthy shards
+ def self.cached_healthy_shards
+ Gitlab::Redis::Cache.with { |redis| redis.smembers(HEALTHY_SHARDS_KEY) }
+ end
+
+ # Checks whether the given shard name is in the list of healthy shards.
+ #
+ # shard_name - The string to check
+ def self.healthy_shard?(shard_name)
+ Gitlab::Redis::Cache.with { |redis| redis.sismember(HEALTHY_SHARDS_KEY, shard_name) }
+ end
+
+ # Returns the number of healthy shards in the Redis set
+ def self.healthy_shard_count
+ Gitlab::Redis::Cache.with { |redis| redis.scard(HEALTHY_SHARDS_KEY) }
+ end
+ end
+end
diff --git a/spec/lib/gitlab/shard_health_cache_spec.rb b/spec/lib/gitlab/shard_health_cache_spec.rb
new file mode 100644
index 00000000000..e1a69261939
--- /dev/null
+++ b/spec/lib/gitlab/shard_health_cache_spec.rb
@@ -0,0 +1,52 @@
+require 'spec_helper'
+
+describe Gitlab::ShardHealthCache, :clean_gitlab_redis_cache do
+ let(:shards) { %w(foo bar) }
+
+ before do
+ described_class.update(shards)
+ end
+
+ describe '.clear' do
+ it 'leaves no shards around' do
+ described_class.clear
+
+ expect(described_class.healthy_shard_count).to eq(0)
+ end
+ end
+
+ describe '.update' do
+ it 'returns the healthy shards' do
+ expect(described_class.cached_healthy_shards).to match_array(shards)
+ end
+
+ it 'replaces the existing set' do
+ new_set = %w(test me more)
+ described_class.update(new_set)
+
+ expect(described_class.cached_healthy_shards).to match_array(new_set)
+ end
+ end
+
+ describe '.healthy_shard_count' do
+ it 'returns the healthy shard count' do
+ expect(described_class.healthy_shard_count).to eq(2)
+ end
+
+ it 'returns 0 if no shards are available' do
+ described_class.update([])
+
+ expect(described_class.healthy_shard_count).to eq(0)
+ end
+ end
+
+ describe '.healthy_shard?' do
+ it 'returns true for a healthy shard' do
+ expect(described_class.healthy_shard?('foo')).to be_truthy
+ end
+
+ it 'returns false for an unknown shard' do
+ expect(described_class.healthy_shard?('unknown')).to be_falsey
+ end
+ end
+end
diff --git a/spec/workers/repository_check/batch_worker_spec.rb b/spec/workers/repository_check/batch_worker_spec.rb
index 6cd27d2fafb..6bc551be9ad 100644
--- a/spec/workers/repository_check/batch_worker_spec.rb
+++ b/spec/workers/repository_check/batch_worker_spec.rb
@@ -1,14 +1,19 @@
require 'spec_helper'
describe RepositoryCheck::BatchWorker do
+ let(:shard_name) { 'default' }
subject { described_class.new }
+ before do
+ Gitlab::ShardHealthCache.update([shard_name])
+ end
+
it 'prefers projects that have never been checked' do
projects = create_list(:project, 3, created_at: 1.week.ago)
projects[0].update_column(:last_repository_check_at, 4.months.ago)
projects[2].update_column(:last_repository_check_at, 3.months.ago)
- expect(subject.perform).to eq(projects.values_at(1, 0, 2).map(&:id))
+ expect(subject.perform(shard_name)).to eq(projects.values_at(1, 0, 2).map(&:id))
end
it 'sorts projects by last_repository_check_at' do
@@ -17,7 +22,7 @@ describe RepositoryCheck::BatchWorker do
projects[1].update_column(:last_repository_check_at, 4.months.ago)
projects[2].update_column(:last_repository_check_at, 3.months.ago)
- expect(subject.perform).to eq(projects.values_at(1, 2, 0).map(&:id))
+ expect(subject.perform(shard_name)).to eq(projects.values_at(1, 2, 0).map(&:id))
end
it 'excludes projects that were checked recently' do
@@ -26,7 +31,14 @@ describe RepositoryCheck::BatchWorker do
projects[1].update_column(:last_repository_check_at, 2.months.ago)
projects[2].update_column(:last_repository_check_at, 3.days.ago)
- expect(subject.perform).to eq([projects[1].id])
+ expect(subject.perform(shard_name)).to eq([projects[1].id])
+ end
+
+ it 'excludes projects on another shard' do
+ projects = create_list(:project, 2, created_at: 1.week.ago)
+ projects[0].update_column(:repository_storage, 'other')
+
+ expect(subject.perform(shard_name)).to eq([projects[1].id])
end
it 'does nothing when repository checks are disabled' do
@@ -34,13 +46,20 @@ describe RepositoryCheck::BatchWorker do
stub_application_setting(repository_checks_enabled: false)
- expect(subject.perform).to eq(nil)
+ expect(subject.perform(shard_name)).to eq(nil)
+ end
+
+ it 'does nothing when shard is unhealthy' do
+ shard_name = 'broken'
+ create(:project, created_at: 1.week.ago, repository_storage: shard_name)
+
+ expect(subject.perform(shard_name)).to eq(nil)
end
it 'skips projects created less than 24 hours ago' do
project = create(:project)
project.update_column(:created_at, 23.hours.ago)
- expect(subject.perform).to eq([])
+ expect(subject.perform(shard_name)).to eq([])
end
end
diff --git a/spec/workers/repository_check/dispatch_worker_spec.rb b/spec/workers/repository_check/dispatch_worker_spec.rb
new file mode 100644
index 00000000000..20a4f1f5344
--- /dev/null
+++ b/spec/workers/repository_check/dispatch_worker_spec.rb
@@ -0,0 +1,36 @@
+require 'spec_helper'
+
+describe RepositoryCheck::DispatchWorker do
+ subject { described_class.new }
+
+ it 'does nothing when repository checks are disabled' do
+ stub_application_setting(repository_checks_enabled: false)
+
+ expect(RepositoryCheck::BatchWorker).not_to receive(:perform_async)
+
+ subject.perform
+ end
+
+ it 'dispatches work to RepositoryCheck::BatchWorker' do
+ expect(RepositoryCheck::BatchWorker).to receive(:perform_async).at_least(:once)
+
+ subject.perform
+ end
+
+ context 'with unhealthy shard' do
+ let(:default_shard_name) { 'default' }
+ let(:unhealthy_shard_name) { 'unhealthy' }
+ let(:default_shard) { Gitlab::HealthChecks::Result.new(true, nil, shard: default_shard_name) }
+ let(:unhealthy_shard) { Gitlab::HealthChecks::Result.new(false, '14:Connect Failed', shard: unhealthy_shard_name) }
+
+ before do
+ allow(Gitlab::HealthChecks::GitalyCheck).to receive(:readiness).and_return([default_shard, unhealthy_shard])
+ end
+
+ it 'only triggers RepositoryCheck::BatchWorker for healthy shards' do
+ expect(RepositoryCheck::BatchWorker).to receive(:perform_async).with('default')
+
+ subject.perform
+ end
+ end
+end