summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShinya Maeda <shinya@gitlab.com>2018-06-27 16:05:16 +0900
committerShinya Maeda <shinya@gitlab.com>2018-06-27 16:05:16 +0900
commitbdcbe0b19d8dfdbd50ff78a237173ee6e14397e5 (patch)
tree2b340ad1899872343d483ec81994ffcebe307149
parent292cf668905a55e7b305c67b314cb039d2681a54 (diff)
downloadgitlab-ce-background-migration-delta-splitter.tar.gz
Introduce delta_max for background migrationsbackground-migration-delta-splitter
-rw-r--r--lib/gitlab/database/migration_helpers.rb16
1 files changed, 11 insertions, 5 deletions
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 4fe5b4cc835..dd0118788bd 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -5,6 +5,7 @@ module Gitlab
BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
+ BACKGROUND_MIGRATION_DELTA_MAX = 500_000
# Adds `created_at` and `updated_at` columns with timezone information.
#
@@ -974,7 +975,7 @@ into similar problems in the future (e.g. when new tables are created).
# # do something
# end
# end
- def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
+ def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE, delta_max: BACKGROUND_MIGRATION_DELTA_MAX)
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
# To not overload the worker too much we enforce a minimum interval both
@@ -986,10 +987,15 @@ into similar problems in the future (e.g. when new tables are created).
model_class.each_batch(of: batch_size) do |relation, index|
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
- # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
- # the same time, which is not helpful in most cases where we wish to
- # spread the work over time.
- BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id])
+ while start_id < end_id
+ tmp_end_id = [start_id + delta_max, end_id].min
+
+ # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
+ # the same time, which is not helpful in most cases where we wish to
+ # spread the work over time.
+ BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, tmp_end_id])
+ start_id = tmp_end_id
+ end
end
end