summaryrefslogtreecommitdiff
path: root/app/workers/background_migration_worker.rb
diff options
context:
space:
mode:
Diffstat (limited to 'app/workers/background_migration_worker.rb')
-rw-r--r--app/workers/background_migration_worker.rb57
1 files changed, 35 insertions, 22 deletions
diff --git a/app/workers/background_migration_worker.rb b/app/workers/background_migration_worker.rb
index 74a12dbff77..70c4ad53726 100644
--- a/app/workers/background_migration_worker.rb
+++ b/app/workers/background_migration_worker.rb
@@ -24,10 +24,14 @@ class BackgroundMigrationWorker # rubocop:disable Scalability/IdempotentWorker
# class_name - The class name of the background migration to run.
# arguments - The arguments to pass to the migration class.
# lease_attempts - The number of times we will try to obtain an exclusive
- # lease on the class before running anyway. Pass 0 to always run.
+ # lease on the class before giving up. See MR for more discussion.
+ # https://gitlab.com/gitlab-org/gitlab/-/merge_requests/45298#note_434304956
def perform(class_name, arguments = [], lease_attempts = 5)
with_context(caller_id: class_name.to_s) do
- should_perform, ttl = perform_and_ttl(class_name)
+ attempts_left = lease_attempts - 1
+ should_perform, ttl = perform_and_ttl(class_name, attempts_left)
+
+ break if should_perform.nil?
if should_perform
Gitlab::BackgroundMigration.perform(class_name, arguments)
@@ -37,32 +41,41 @@ class BackgroundMigrationWorker # rubocop:disable Scalability/IdempotentWorker
# we'll reschedule the job in such a way that it is picked up again around
# the time the lease expires.
self.class
- .perform_in(ttl || self.class.minimum_interval, class_name, arguments)
+ .perform_in(ttl || self.class.minimum_interval, class_name, arguments, attempts_left)
end
end
end
- def perform_and_ttl(class_name)
- if always_perform?
- # In test environments `perform_in` will run right away. This can then
- # lead to stack level errors in the above `#perform`. To work around this
- # we'll just perform the migration right away in the test environment.
- [true, nil]
- else
- lease = lease_for(class_name)
- perform = !!lease.try_obtain
-
- # If we managed to acquire the lease but the DB is not healthy, then we
- # want to simply reschedule our job and try again _after_ the lease
- # expires.
- if perform && !healthy_database?
- database_unhealthy_counter.increment
-
- perform = false
- end
+ def perform_and_ttl(class_name, attempts_left)
+ # In test environments `perform_in` will run right away. This can then
+ # lead to stack level errors in the above `#perform`. To work around this
+ # we'll just perform the migration right away in the test environment.
+ return [true, nil] if always_perform?
+
+ lease = lease_for(class_name)
+ lease_obtained = !!lease.try_obtain
+ healthy_db = healthy_database?
+ perform = lease_obtained && healthy_db
+
+ database_unhealthy_counter.increment if lease_obtained && !healthy_db
- [perform, lease.ttl]
+ # When the DB is unhealthy or the lease can't be obtained after several tries,
+ # then give up on the job and log a warning. Otherwise we could end up in
+ # an infinite rescheduling loop. Jobs can be tracked in the database with the
+ # use of Gitlab::Database::BackgroundMigrationJob
+ if !perform && attempts_left < 0
+ msg = if !lease_obtained
+ 'Job could not get an exclusive lease after several tries. Giving up.'
+ else
+ 'Database was unhealthy after several tries. Giving up.'
+ end
+
+ Sidekiq.logger.warn(class: class_name, message: msg, job_id: jid)
+
+ return [nil, nil]
end
+
+ [perform, lease.ttl]
end
def lease_for(class_name)