summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2018-07-06 09:05:58 -0700
committerStan Hu <stanhu@gmail.com>2018-07-06 10:11:59 -0700
commitb33661d6ec8498ae1dadfb3b2be0e4a80e61f108 (patch)
treed6b80c77965177619a6f4d149a10b613815fbfe6
parenta291bcdf0d3ed892dcb805e11a43afcadbc20e8b (diff)
downloadgitlab-ce-b33661d6ec8498ae1dadfb3b2be0e4a80e61f108.tar.gz
Add ExclusiveLease guards for RepositoryCheck::{DispatchWorker,BatchWorker}
We saw in production that DispatchWorker was running about twice an hour, which would schedule twice as many jobs as it should. For some reason, BatchWorker was running 1000 times per hour, possibly due to Sidekiq RSS kills that caused these jobs to restart. Adding an ExclusiveLease prevents these jobs from running more than they should. Relates to https://gitlab.com/gitlab-com/infrastructure/issues/4526
-rw-r--r--app/workers/repository_check/batch_worker.rb20
-rw-r--r--app/workers/repository_check/dispatch_worker.rb13
-rw-r--r--spec/workers/repository_check/batch_worker_spec.rb8
-rw-r--r--spec/workers/repository_check/dispatch_worker_spec.rb8
4 files changed, 45 insertions, 4 deletions
diff --git a/app/workers/repository_check/batch_worker.rb b/app/workers/repository_check/batch_worker.rb
index 051382a08a9..07559ea479b 100644
--- a/app/workers/repository_check/batch_worker.rb
+++ b/app/workers/repository_check/batch_worker.rb
@@ -4,9 +4,11 @@ module RepositoryCheck
class BatchWorker
include ApplicationWorker
include RepositoryCheckQueue
+ include ExclusiveLeaseGuard
RUN_TIME = 3600
BATCH_SIZE = 10_000
+ LEASE_TIMEOUT = 1.hour
attr_reader :shard_name
@@ -16,6 +18,20 @@ module RepositoryCheck
return unless Gitlab::CurrentSettings.repository_checks_enabled
return unless Gitlab::ShardHealthCache.healthy_shard?(shard_name)
+ try_obtain_lease do
+ perform_repository_checks
+ end
+ end
+
+ def lease_timeout
+ LEASE_TIMEOUT
+ end
+
+ def lease_key
+ "repository_check_batch_worker:#{shard_name}"
+ end
+
+ def perform_repository_checks
start = Time.now
# This loop will break after a little more than one hour ('a little
@@ -26,7 +42,7 @@ module RepositoryCheck
project_ids.each do |project_id|
break if Time.now - start >= RUN_TIME
- next unless try_obtain_lease(project_id)
+ next unless try_obtain_lease_for_project(project_id)
SingleRepositoryWorker.new.perform(project_id)
end
@@ -60,7 +76,7 @@ module RepositoryCheck
Project.where(repository_storage: shard_name)
end
- def try_obtain_lease(id)
+ def try_obtain_lease_for_project(id)
# Use a 24-hour timeout because on servers/projects where 'git fsck' is
# super slow we definitely do not want to run it twice in parallel.
Gitlab::ExclusiveLease.new(
diff --git a/app/workers/repository_check/dispatch_worker.rb b/app/workers/repository_check/dispatch_worker.rb
index 891a273afd7..96634f09a15 100644
--- a/app/workers/repository_check/dispatch_worker.rb
+++ b/app/workers/repository_check/dispatch_worker.rb
@@ -3,13 +3,22 @@ module RepositoryCheck
include ApplicationWorker
include CronjobQueue
include ::EachShardWorker
+ include ExclusiveLeaseGuard
+
+ LEASE_TIMEOUT = 1.hour
def perform
return unless Gitlab::CurrentSettings.repository_checks_enabled
- each_eligible_shard do |shard_name|
- RepositoryCheck::BatchWorker.perform_async(shard_name)
+ try_obtain_lease do
+ each_eligible_shard do |shard_name|
+ RepositoryCheck::BatchWorker.perform_async(shard_name)
+ end
end
end
+
+ def lease_timeout
+ LEASE_TIMEOUT
+ end
end
end
diff --git a/spec/workers/repository_check/batch_worker_spec.rb b/spec/workers/repository_check/batch_worker_spec.rb
index 6bc551be9ad..ede271b2cdd 100644
--- a/spec/workers/repository_check/batch_worker_spec.rb
+++ b/spec/workers/repository_check/batch_worker_spec.rb
@@ -62,4 +62,12 @@ describe RepositoryCheck::BatchWorker do
expect(subject.perform(shard_name)).to eq([])
end
+
+ it 'does not run if the exclusive lease is taken' do
+ allow(subject).to receive(:try_obtain_lease).and_return(false)
+
+ expect(subject).not_to receive(:perform_repository_checks)
+
+ subject.perform(shard_name)
+ end
end
diff --git a/spec/workers/repository_check/dispatch_worker_spec.rb b/spec/workers/repository_check/dispatch_worker_spec.rb
index 20a4f1f5344..7877429aa8f 100644
--- a/spec/workers/repository_check/dispatch_worker_spec.rb
+++ b/spec/workers/repository_check/dispatch_worker_spec.rb
@@ -11,6 +11,14 @@ describe RepositoryCheck::DispatchWorker do
subject.perform
end
+ it 'does nothing if the exclusive lease is taken' do
+ allow(subject).to receive(:try_obtain_lease).and_return(false)
+
+ expect(RepositoryCheck::BatchWorker).not_to receive(:perform_async)
+
+ subject.perform
+ end
+
it 'dispatches work to RepositoryCheck::BatchWorker' do
expect(RepositoryCheck::BatchWorker).to receive(:perform_async).at_least(:once)