summaryrefslogtreecommitdiff
path: root/app/workers/background_migration/single_database_worker.rb
diff options
context:
space:
mode:
Diffstat (limited to 'app/workers/background_migration/single_database_worker.rb')
-rw-r--r--app/workers/background_migration/single_database_worker.rb148
1 files changed, 148 insertions, 0 deletions
diff --git a/app/workers/background_migration/single_database_worker.rb b/app/workers/background_migration/single_database_worker.rb
new file mode 100644
index 00000000000..b6661d4fd14
--- /dev/null
+++ b/app/workers/background_migration/single_database_worker.rb
@@ -0,0 +1,148 @@
+# frozen_string_literal: true
+
+module BackgroundMigration
+ module SingleDatabaseWorker
+ extend ActiveSupport::Concern
+
+ include ApplicationWorker
+
+ MAX_LEASE_ATTEMPTS = 5
+
+ included do
+ data_consistency :always
+
+ sidekiq_options retry: 3
+
+ feature_category :database
+ urgency :throttled
+ loggable_arguments 0, 1
+ end
+
+ class_methods do
+ # The minimum amount of time between processing two jobs of the same migration
+ # class.
+ #
+ # This interval is set to 2 or 5 minutes so autovacuuming and other
+ # maintenance related tasks have plenty of time to clean up after a migration
+ # has been performed.
+ def minimum_interval
+ 2.minutes.to_i
+ end
+
+ def tracking_database
+ raise NotImplementedError, "#{self.name} does not implement #{__method__}"
+ end
+
+ def unhealthy_metric_name
+ raise NotImplementedError, "#{self.name} does not implement #{__method__}"
+ end
+ end
+
+ # Performs the background migration.
+ #
+ # See Gitlab::BackgroundMigration.perform for more information.
+ #
+ # class_name - The class name of the background migration to run.
+ # arguments - The arguments to pass to the migration class.
+ # lease_attempts - The number of times we will try to obtain an exclusive
+ # lease on the class before giving up. See MR for more discussion.
+ # https://gitlab.com/gitlab-org/gitlab/-/merge_requests/45298#note_434304956
+ def perform(class_name, arguments = [], lease_attempts = MAX_LEASE_ATTEMPTS)
+ job_coordinator.with_shared_connection do
+ perform_with_connection(class_name, arguments, lease_attempts)
+ end
+ end
+
+ private
+
+ def job_coordinator
+ @job_coordinator ||= Gitlab::BackgroundMigration.coordinator_for_database(self.class.tracking_database)
+ end
+
+ def perform_with_connection(class_name, arguments, lease_attempts)
+ with_context(caller_id: class_name.to_s) do
+ retried = lease_attempts != MAX_LEASE_ATTEMPTS
+ attempts_left = lease_attempts - 1
+ should_perform, ttl = perform_and_ttl(class_name, attempts_left, retried)
+
+ break if should_perform.nil?
+
+ if should_perform
+ job_coordinator.perform(class_name, arguments)
+ else
+ # If the lease could not be obtained this means either another process is
+ # running a migration of this class or we ran one recently. In this case
+ # we'll reschedule the job in such a way that it is picked up again around
+ # the time the lease expires.
+ self.class
+ .perform_in(ttl || self.class.minimum_interval, class_name, arguments, attempts_left)
+ end
+ end
+ end
+
+ def perform_and_ttl(class_name, attempts_left, retried)
+ # In test environments `perform_in` will run right away. This can then
+ # lead to stack level errors in the above `#perform`. To work around this
+ # we'll just perform the migration right away in the test environment.
+ return [true, nil] if always_perform?
+
+ lease = lease_for(class_name, retried)
+ lease_obtained = !!lease.try_obtain
+ healthy_db = healthy_database?
+ perform = lease_obtained && healthy_db
+
+ database_unhealthy_counter.increment if lease_obtained && !healthy_db
+
+ # When the DB is unhealthy or the lease can't be obtained after several tries,
+ # then give up on the job and log a warning. Otherwise we could end up in
+ # an infinite rescheduling loop. Jobs can be tracked in the database with the
+ # use of Gitlab::Database::BackgroundMigrationJob
+ if !perform && attempts_left < 0
+ msg = if !lease_obtained
+ 'Job could not get an exclusive lease after several tries. Giving up.'
+ else
+ 'Database was unhealthy after several tries. Giving up.'
+ end
+
+ Sidekiq.logger.warn(class: class_name, message: msg, job_id: jid)
+
+ return [nil, nil]
+ end
+
+ [perform, lease.ttl]
+ end
+
+ def lease_for(class_name, retried)
+ Gitlab::ExclusiveLease
+ .new(lease_key_for(class_name, retried), timeout: self.class.minimum_interval)
+ end
+
+ def lease_key_for(class_name, retried)
+ key = "#{self.class.name}:#{class_name}"
+ # We use a different exclusive lock key for retried jobs to allow them running concurrently with the scheduled jobs.
+ # See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/68763 for more information.
+ key += ":retried" if retried
+ key
+ end
+
+ def always_perform?
+ Rails.env.test?
+ end
+
+ # Returns true if the database is healthy enough to allow the migration to be
+ # performed.
+ #
+ # class_name - The name of the background migration that we might want to
+ # run.
+ def healthy_database?
+ !Postgresql::ReplicationSlot.lag_too_great?
+ end
+
+ def database_unhealthy_counter
+ Gitlab::Metrics.counter(
+ self.class.unhealthy_metric_name,
+ 'The number of times a background migration is rescheduled because the database is unhealthy.'
+ )
+ end
+ end
+end