diff options
author | Shinya Maeda <shinya@gitlab.com> | 2018-06-27 16:05:16 +0900 |
---|---|---|
committer | Shinya Maeda <shinya@gitlab.com> | 2018-06-27 16:05:16 +0900 |
commit | bdcbe0b19d8dfdbd50ff78a237173ee6e14397e5 (patch) | |
tree | 2b340ad1899872343d483ec81994ffcebe307149 | |
parent | 292cf668905a55e7b305c67b314cb039d2681a54 (diff) | |
download | gitlab-ce-background-migration-delta-splitter.tar.gz |
Introduce delta_max for background migrationsbackground-migration-delta-splitter
-rw-r--r-- | lib/gitlab/database/migration_helpers.rb | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 4fe5b4cc835..dd0118788bd 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -5,6 +5,7 @@ module Gitlab BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time + BACKGROUND_MIGRATION_DELTA_MAX = 500_000 # Adds `created_at` and `updated_at` columns with timezone information. # @@ -974,7 +975,7 @@ into similar problems in the future (e.g. when new tables are created). # # do something # end # end - def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE) + def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE, delta_max: BACKGROUND_MIGRATION_DELTA_MAX) raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id') # To not overload the worker too much we enforce a minimum interval both @@ -986,10 +987,15 @@ into similar problems in the future (e.g. when new tables are created). model_class.each_batch(of: batch_size) do |relation, index| start_id, end_id = relation.pluck('MIN(id), MAX(id)').first - # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for - # the same time, which is not helpful in most cases where we wish to - # spread the work over time. - BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id]) + while start_id < end_id + tmp_end_id = [start_id + delta_max, end_id].min + + # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for + # the same time, which is not helpful in most cases where we wish to + # spread the work over time. + BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, tmp_end_id]) + start_id = tmp_end_id + end end end |