diff options
-rw-r--r-- | app/workers/all_queues.yml | 3 | ||||
-rw-r--r-- | app/workers/concerns/each_shard_worker.rb | 31 | ||||
-rw-r--r-- | app/workers/repository_check/batch_worker.rb | 17 | ||||
-rw-r--r-- | app/workers/repository_check/dispatch_worker.rb | 15 | ||||
-rw-r--r-- | changelogs/unreleased/tc-repo-check-per-shard.yml | 5 | ||||
-rw-r--r-- | config/initializers/1_settings.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/shard_health_cache.rb | 41 | ||||
-rw-r--r-- | spec/lib/gitlab/shard_health_cache_spec.rb | 52 | ||||
-rw-r--r-- | spec/workers/repository_check/batch_worker_spec.rb | 29 | ||||
-rw-r--r-- | spec/workers/repository_check/dispatch_worker_spec.rb | 36 |
10 files changed, 220 insertions, 11 deletions
diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 026f756582d..d06f51b1828 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -11,7 +11,7 @@ - cronjob:remove_old_web_hook_logs - cronjob:remove_unreferenced_lfs_objects - cronjob:repository_archive_cache -- cronjob:repository_check_batch +- cronjob:repository_check_dispatch - cronjob:requests_profiles - cronjob:schedule_update_user_activity - cronjob:stuck_ci_jobs @@ -71,6 +71,7 @@ - pipeline_processing:update_head_pipeline_for_merge_request - repository_check:repository_check_clear +- repository_check:repository_check_batch - repository_check:repository_check_single_repository - default diff --git a/app/workers/concerns/each_shard_worker.rb b/app/workers/concerns/each_shard_worker.rb new file mode 100644 index 00000000000..d0a728fb495 --- /dev/null +++ b/app/workers/concerns/each_shard_worker.rb @@ -0,0 +1,31 @@ +module EachShardWorker + extend ActiveSupport::Concern + include ::Gitlab::Utils::StrongMemoize + + def each_eligible_shard + Gitlab::ShardHealthCache.update(eligible_shard_names) + + eligible_shard_names.each do |shard_name| + yield shard_name + end + end + + # override when you want to filter out some shards + def eligible_shard_names + healthy_shard_names + end + + def healthy_shard_names + strong_memoize(:healthy_shard_names) do + healthy_ready_shards.map { |result| result.labels[:shard] } + end + end + + def healthy_ready_shards + ready_shards.select(&:success) + end + + def ready_shards + Gitlab::HealthChecks::GitalyCheck.readiness + end +end diff --git a/app/workers/repository_check/batch_worker.rb b/app/workers/repository_check/batch_worker.rb index 898bca976ec..051382a08a9 100644 --- a/app/workers/repository_check/batch_worker.rb +++ b/app/workers/repository_check/batch_worker.rb @@ -3,13 +3,18 @@ module RepositoryCheck class BatchWorker include ApplicationWorker - include CronjobQueue + include RepositoryCheckQueue RUN_TIME = 3600 BATCH_SIZE = 10_000 - def perform + attr_reader :shard_name + + def perform(shard_name) + @shard_name = shard_name + return unless Gitlab::CurrentSettings.repository_checks_enabled + return unless Gitlab::ShardHealthCache.healthy_shard?(shard_name) start = Time.now @@ -39,18 +44,22 @@ module RepositoryCheck end def never_checked_project_ids(batch_size) - Project.where(last_repository_check_at: nil) + projects_on_shard.where(last_repository_check_at: nil) .where('created_at < ?', 24.hours.ago) .limit(batch_size).pluck(:id) end def old_checked_project_ids(batch_size) - Project.where.not(last_repository_check_at: nil) + projects_on_shard.where.not(last_repository_check_at: nil) .where('last_repository_check_at < ?', 1.month.ago) .reorder(last_repository_check_at: :asc) .limit(batch_size).pluck(:id) end + def projects_on_shard + Project.where(repository_storage: shard_name) + end + def try_obtain_lease(id) # Use a 24-hour timeout because on servers/projects where 'git fsck' is # super slow we definitely do not want to run it twice in parallel. diff --git a/app/workers/repository_check/dispatch_worker.rb b/app/workers/repository_check/dispatch_worker.rb new file mode 100644 index 00000000000..891a273afd7 --- /dev/null +++ b/app/workers/repository_check/dispatch_worker.rb @@ -0,0 +1,15 @@ +module RepositoryCheck + class DispatchWorker + include ApplicationWorker + include CronjobQueue + include ::EachShardWorker + + def perform + return unless Gitlab::CurrentSettings.repository_checks_enabled + + each_eligible_shard do |shard_name| + RepositoryCheck::BatchWorker.perform_async(shard_name) + end + end + end +end diff --git a/changelogs/unreleased/tc-repo-check-per-shard.yml b/changelogs/unreleased/tc-repo-check-per-shard.yml new file mode 100644 index 00000000000..227b6b0b93b --- /dev/null +++ b/changelogs/unreleased/tc-repo-check-per-shard.yml @@ -0,0 +1,5 @@ +--- +title: Run repository checks in parallel for each shard +merge_request: 20179 +author: +type: added diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 837e577bc91..550647ae1c6 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -279,7 +279,7 @@ Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '50 * * * *' Settings.cron_jobs['expire_build_artifacts_worker']['job_class'] = 'ExpireBuildArtifactsWorker' Settings.cron_jobs['repository_check_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['repository_check_worker']['cron'] ||= '20 * * * *' -Settings.cron_jobs['repository_check_worker']['job_class'] = 'RepositoryCheck::BatchWorker' +Settings.cron_jobs['repository_check_worker']['job_class'] = 'RepositoryCheck::DispatchWorker' Settings.cron_jobs['admin_email_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['admin_email_worker']['cron'] ||= '0 0 * * 0' Settings.cron_jobs['admin_email_worker']['job_class'] = 'AdminEmailWorker' diff --git a/lib/gitlab/shard_health_cache.rb b/lib/gitlab/shard_health_cache.rb new file mode 100644 index 00000000000..3f03f46d4b1 --- /dev/null +++ b/lib/gitlab/shard_health_cache.rb @@ -0,0 +1,41 @@ +module Gitlab + class ShardHealthCache + HEALTHY_SHARDS_KEY = 'gitlab-healthy-shards'.freeze + HEALTHY_SHARDS_TIMEOUT = 300 + + # Clears the Redis set storing the list of healthy shards + def self.clear + Gitlab::Redis::Cache.with { |redis| redis.del(HEALTHY_SHARDS_KEY) } + end + + # Updates the list of healthy shards using a Redis set + # + # shards - An array of shard names to store + def self.update(shards) + Gitlab::Redis::Cache.with do |redis| + redis.multi do |m| + m.del(HEALTHY_SHARDS_KEY) + shards.each { |shard_name| m.sadd(HEALTHY_SHARDS_KEY, shard_name) } + m.expire(HEALTHY_SHARDS_KEY, HEALTHY_SHARDS_TIMEOUT) + end + end + end + + # Returns an array of strings of healthy shards + def self.cached_healthy_shards + Gitlab::Redis::Cache.with { |redis| redis.smembers(HEALTHY_SHARDS_KEY) } + end + + # Checks whether the given shard name is in the list of healthy shards. + # + # shard_name - The string to check + def self.healthy_shard?(shard_name) + Gitlab::Redis::Cache.with { |redis| redis.sismember(HEALTHY_SHARDS_KEY, shard_name) } + end + + # Returns the number of healthy shards in the Redis set + def self.healthy_shard_count + Gitlab::Redis::Cache.with { |redis| redis.scard(HEALTHY_SHARDS_KEY) } + end + end +end diff --git a/spec/lib/gitlab/shard_health_cache_spec.rb b/spec/lib/gitlab/shard_health_cache_spec.rb new file mode 100644 index 00000000000..e1a69261939 --- /dev/null +++ b/spec/lib/gitlab/shard_health_cache_spec.rb @@ -0,0 +1,52 @@ +require 'spec_helper' + +describe Gitlab::ShardHealthCache, :clean_gitlab_redis_cache do + let(:shards) { %w(foo bar) } + + before do + described_class.update(shards) + end + + describe '.clear' do + it 'leaves no shards around' do + described_class.clear + + expect(described_class.healthy_shard_count).to eq(0) + end + end + + describe '.update' do + it 'returns the healthy shards' do + expect(described_class.cached_healthy_shards).to match_array(shards) + end + + it 'replaces the existing set' do + new_set = %w(test me more) + described_class.update(new_set) + + expect(described_class.cached_healthy_shards).to match_array(new_set) + end + end + + describe '.healthy_shard_count' do + it 'returns the healthy shard count' do + expect(described_class.healthy_shard_count).to eq(2) + end + + it 'returns 0 if no shards are available' do + described_class.update([]) + + expect(described_class.healthy_shard_count).to eq(0) + end + end + + describe '.healthy_shard?' do + it 'returns true for a healthy shard' do + expect(described_class.healthy_shard?('foo')).to be_truthy + end + + it 'returns false for an unknown shard' do + expect(described_class.healthy_shard?('unknown')).to be_falsey + end + end +end diff --git a/spec/workers/repository_check/batch_worker_spec.rb b/spec/workers/repository_check/batch_worker_spec.rb index 6cd27d2fafb..6bc551be9ad 100644 --- a/spec/workers/repository_check/batch_worker_spec.rb +++ b/spec/workers/repository_check/batch_worker_spec.rb @@ -1,14 +1,19 @@ require 'spec_helper' describe RepositoryCheck::BatchWorker do + let(:shard_name) { 'default' } subject { described_class.new } + before do + Gitlab::ShardHealthCache.update([shard_name]) + end + it 'prefers projects that have never been checked' do projects = create_list(:project, 3, created_at: 1.week.ago) projects[0].update_column(:last_repository_check_at, 4.months.ago) projects[2].update_column(:last_repository_check_at, 3.months.ago) - expect(subject.perform).to eq(projects.values_at(1, 0, 2).map(&:id)) + expect(subject.perform(shard_name)).to eq(projects.values_at(1, 0, 2).map(&:id)) end it 'sorts projects by last_repository_check_at' do @@ -17,7 +22,7 @@ describe RepositoryCheck::BatchWorker do projects[1].update_column(:last_repository_check_at, 4.months.ago) projects[2].update_column(:last_repository_check_at, 3.months.ago) - expect(subject.perform).to eq(projects.values_at(1, 2, 0).map(&:id)) + expect(subject.perform(shard_name)).to eq(projects.values_at(1, 2, 0).map(&:id)) end it 'excludes projects that were checked recently' do @@ -26,7 +31,14 @@ describe RepositoryCheck::BatchWorker do projects[1].update_column(:last_repository_check_at, 2.months.ago) projects[2].update_column(:last_repository_check_at, 3.days.ago) - expect(subject.perform).to eq([projects[1].id]) + expect(subject.perform(shard_name)).to eq([projects[1].id]) + end + + it 'excludes projects on another shard' do + projects = create_list(:project, 2, created_at: 1.week.ago) + projects[0].update_column(:repository_storage, 'other') + + expect(subject.perform(shard_name)).to eq([projects[1].id]) end it 'does nothing when repository checks are disabled' do @@ -34,13 +46,20 @@ describe RepositoryCheck::BatchWorker do stub_application_setting(repository_checks_enabled: false) - expect(subject.perform).to eq(nil) + expect(subject.perform(shard_name)).to eq(nil) + end + + it 'does nothing when shard is unhealthy' do + shard_name = 'broken' + create(:project, created_at: 1.week.ago, repository_storage: shard_name) + + expect(subject.perform(shard_name)).to eq(nil) end it 'skips projects created less than 24 hours ago' do project = create(:project) project.update_column(:created_at, 23.hours.ago) - expect(subject.perform).to eq([]) + expect(subject.perform(shard_name)).to eq([]) end end diff --git a/spec/workers/repository_check/dispatch_worker_spec.rb b/spec/workers/repository_check/dispatch_worker_spec.rb new file mode 100644 index 00000000000..20a4f1f5344 --- /dev/null +++ b/spec/workers/repository_check/dispatch_worker_spec.rb @@ -0,0 +1,36 @@ +require 'spec_helper' + +describe RepositoryCheck::DispatchWorker do + subject { described_class.new } + + it 'does nothing when repository checks are disabled' do + stub_application_setting(repository_checks_enabled: false) + + expect(RepositoryCheck::BatchWorker).not_to receive(:perform_async) + + subject.perform + end + + it 'dispatches work to RepositoryCheck::BatchWorker' do + expect(RepositoryCheck::BatchWorker).to receive(:perform_async).at_least(:once) + + subject.perform + end + + context 'with unhealthy shard' do + let(:default_shard_name) { 'default' } + let(:unhealthy_shard_name) { 'unhealthy' } + let(:default_shard) { Gitlab::HealthChecks::Result.new(true, nil, shard: default_shard_name) } + let(:unhealthy_shard) { Gitlab::HealthChecks::Result.new(false, '14:Connect Failed', shard: unhealthy_shard_name) } + + before do + allow(Gitlab::HealthChecks::GitalyCheck).to receive(:readiness).and_return([default_shard, unhealthy_shard]) + end + + it 'only triggers RepositoryCheck::BatchWorker for healthy shards' do + expect(RepositoryCheck::BatchWorker).to receive(:perform_async).with('default') + + subject.perform + end + end +end |