From 0fa07d24c277e30e438086b69c159bc59f25e87e Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Thu, 7 Jun 2018 19:01:35 +0900 Subject: Use id list to select only batch number of rows --- ...09_migrate_legacy_artifacts_to_job_artifacts.rb | 8 ++-- .../migrate_legacy_artifacts.rb | 20 +++++----- lib/gitlab/database/migration_helpers.rb | 44 ++++++++++++++++++++++ .../migrate_legacy_artifacts_spec.rb | 24 ++++++------ ...grate_legacy_artifacts_to_job_artifacts_spec.rb | 2 +- 5 files changed, 71 insertions(+), 27 deletions(-) diff --git a/db/post_migrate/20180430161409_migrate_legacy_artifacts_to_job_artifacts.rb b/db/post_migrate/20180430161409_migrate_legacy_artifacts_to_job_artifacts.rb index 01f67e48de1..0eeaf0d4a0a 100644 --- a/db/post_migrate/20180430161409_migrate_legacy_artifacts_to_job_artifacts.rb +++ b/db/post_migrate/20180430161409_migrate_legacy_artifacts_to_job_artifacts.rb @@ -35,10 +35,10 @@ class MigrateLegacyArtifactsToJobArtifacts < ActiveRecord::Migration MigrateLegacyArtifactsToJobArtifacts::Build .with_legacy_artifacts.without_new_artifacts.tap do |relation| - queue_background_migration_jobs_by_range_at_intervals(relation, - MIGRATION, - 5.minutes, - batch_size: BATCH_SIZE) + queue_background_migration_jobs_by_list_at_intervals(relation, + MIGRATION, + 5.minutes, + batch_size: BATCH_SIZE) end remove_concurrent_index_by_name(:ci_builds, TMP_INDEX) diff --git a/lib/gitlab/background_migration/migrate_legacy_artifacts.rb b/lib/gitlab/background_migration/migrate_legacy_artifacts.rb index 3fbe3512be4..c2cd60bf730 100644 --- a/lib/gitlab/background_migration/migrate_legacy_artifacts.rb +++ b/lib/gitlab/background_migration/migrate_legacy_artifacts.rb @@ -10,17 +10,17 @@ module Gitlab METADATA_FILE_TYPE = 2 # equal to Ci::JobArtifact.file_types['metadata'] LEGACY_PATH_FILE_LOCATION = 1 # equal to Ci::JobArtifact.file_location['legacy_path'] - def perform(start_id, stop_id) + def perform(id_list) ActiveRecord::Base.transaction do - insert_archives(start_id, stop_id) - insert_metadatas(start_id, stop_id) - delete_legacy_artifacts(start_id, stop_id) + insert_archives(id_list) + insert_metadatas(id_list) + delete_legacy_artifacts(id_list) end end private - def insert_archives(start_id, stop_id) + def insert_archives(id_list) ActiveRecord::Base.connection.execute <<-EOF.strip_heredoc INSERT INTO ci_job_artifacts ( project_id, @@ -44,7 +44,7 @@ module Gitlab COALESCE(artifacts_file_store, #{FILE_LOCAL_STORE}), #{ARCHIVE_FILE_TYPE} FROM ci_builds - WHERE id BETWEEN #{start_id.to_i} AND #{stop_id.to_i} + WHERE id IN (#{id_list.join(',')}) AND artifacts_file <> '' AND NOT EXISTS ( SELECT 1 FROM ci_job_artifacts @@ -53,7 +53,7 @@ module Gitlab EOF end - def insert_metadatas(start_id, stop_id) + def insert_metadatas(id_list) ActiveRecord::Base.connection.execute <<-EOF.strip_heredoc INSERT INTO ci_job_artifacts ( project_id, @@ -77,7 +77,7 @@ module Gitlab COALESCE(artifacts_metadata_store, #{FILE_LOCAL_STORE}), #{METADATA_FILE_TYPE} FROM ci_builds - WHERE id BETWEEN #{start_id.to_i} AND #{stop_id.to_i} + WHERE id IN (#{id_list.join(',')}) AND artifacts_file <> '' AND artifacts_metadata <> '' AND NOT EXISTS ( SELECT 1 FROM ci_job_artifacts @@ -86,7 +86,7 @@ module Gitlab EOF end - def delete_legacy_artifacts(start_id, stop_id) + def delete_legacy_artifacts(id_list) ActiveRecord::Base.connection.execute <<-EOF.strip_heredoc UPDATE ci_builds SET artifacts_file = NULL, @@ -94,7 +94,7 @@ module Gitlab artifacts_size = NULL, artifacts_metadata = NULL, artifacts_metadata_store = NULL - WHERE id BETWEEN #{start_id.to_i} AND #{stop_id.to_i} + WHERE id IN (#{id_list.join(',')}) AND (artifacts_file <> '' OR artifacts_metadata <> '') EOF end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index c21bae5e16b..11a9b97b5e6 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -902,6 +902,50 @@ into similar problems in the future (e.g. when new tables are created). end end + # Queues background migration jobs for an entire table, batched by ID list. + # Each job is scheduled with a `delay_interval` in between. + # If you use a small interval, then some jobs may run at the same time. + # + # model_class - The table or relation being iterated over + # job_class_name - The background migration job class as a string + # delay_interval - The duration between each job's scheduled time (must respond to `to_f`) + # batch_size - The maximum number of rows per job + # + # Example: + # + # class Route < ActiveRecord::Base + # include EachBatch + # self.table_name = 'routes' + # end + # + # queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute) + # + # Where the model_class includes EachBatch, and the background migration exists: + # + # class Gitlab::BackgroundMigration::ProcessRoutes + # def perform(id_list) + # # do something + # end + # end + def queue_background_migration_jobs_by_list_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE) + raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id') + + # To not overload the worker too much we enforce a minimum interval both + # when scheduling and performing jobs. + if delay_interval < BackgroundMigrationWorker::MIN_INTERVAL + delay_interval = BackgroundMigrationWorker::MIN_INTERVAL + end + + model_class.each_batch(of: batch_size) do |relation, index| + id_list = relation.pluck('id') + + # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for + # the same time, which is not helpful in most cases where we wish to + # spread the work over time. + BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, id_list) + end + end + # Fetches indexes on a column by name for postgres. # # This will include indexes using an expression on the column, for example: diff --git a/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb b/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb index 730968ec887..1d9f63fc9ec 100644 --- a/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb +++ b/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb @@ -54,11 +54,11 @@ describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema expect(job_artifacts.count).to be_zero end - context 'when the record exists inside of the range of a background migration' do - let(:range) { [1, 1] } + context 'when the record exists inside of the id_list of a background migration' do + let(:id_list) { [1] } it 'migrates' do - described_class.new.perform(*range) + described_class.new.perform(id_list) expect(job_artifacts.order(:id).pluck('project_id, job_id, file_type, file_store, size, expire_at, file, file_sha256, file_location')) .to eq([[project_id, job_id, file_type_archive, file_store, artifacts_archive_attributes[:artifacts_size], artifacts_expire_at, 'archive.zip', nil, file_location_legacy_path], @@ -74,7 +74,7 @@ describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema it 'fills file_store by 1 (ObjectStorage::Store::LOCAL)' do expect(jobs.pluck('artifacts_file_store, artifacts_metadata_store')).to eq([[nil, nil]]) - described_class.new.perform(*range) + described_class.new.perform(id_list) expect(job_artifacts.pluck('file_store')).to eq([1, 1]) end @@ -87,7 +87,7 @@ describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema end it 'migrates metadata too' do - described_class.new.perform(*range) + described_class.new.perform(id_list) expect(job_artifacts.where(job_id: job_id, file_type: 2).pluck('file')).to eq(['metadata.gz']) end @@ -100,17 +100,17 @@ describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema end it 'does not migrate' do - expect { described_class.new.perform(*range) }.not_to change { job_artifacts.count } + expect { described_class.new.perform(id_list) }.not_to change { job_artifacts.count } end end end end - context 'when the record exists outside of the range of a background migration' do - let(:range) { [2, 2] } + context 'when the record exists outside of the id_list of a background migration' do + let(:id_list) { [2] } it 'does not migrate' do - described_class.new.perform(*range) + described_class.new.perform(id_list) expect(job_artifacts.count).to be_zero end @@ -127,11 +127,11 @@ describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema expect(jobs.pluck('artifacts_metadata, artifacts_metadata_store')).to eq([[nil, nil]]) end - context 'when the record exists inside of the range of a background migration' do - let(:range) { [1, 1] } + context 'when the record exists inside of the id_list of a background migration' do + let(:id_list) { [1] } it 'does not migrate' do - described_class.new.perform(*range) + described_class.new.perform(id_list) expect(job_artifacts.count).to be_zero end diff --git a/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb b/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb index 381369599af..ab232cd3af8 100644 --- a/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb +++ b/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb @@ -32,7 +32,7 @@ describe MigrateLegacyArtifactsToJobArtifacts, :migration, :sidekiq do Timecop.freeze do migrate! - expect(migration_name).to be_scheduled_delayed_migration(5.minutes, 1, 6) + expect(migration_name).to be_scheduled_delayed_migration(5.minutes, 1, 3, 5, 6) expect(BackgroundMigrationWorker.jobs.size).to eq 1 end end -- cgit v1.2.1