summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShinya Maeda <shinya@gitlab.com>2018-06-07 19:01:35 +0900
committerShinya Maeda <shinya@gitlab.com>2018-06-07 19:01:35 +0900
commit0fa07d24c277e30e438086b69c159bc59f25e87e (patch)
treeeffd99bce310c2d11c7cf110c963bf3d1b437b75
parent378011229864c1e056cf995444f947f6b352172c (diff)
downloadgitlab-ce-add-background-migration-for-legacy-traces-with-id-list.tar.gz
Use id list to select only batch number of rowsadd-background-migration-for-legacy-traces-with-id-list
-rw-r--r--db/post_migrate/20180430161409_migrate_legacy_artifacts_to_job_artifacts.rb8
-rw-r--r--lib/gitlab/background_migration/migrate_legacy_artifacts.rb20
-rw-r--r--lib/gitlab/database/migration_helpers.rb44
-rw-r--r--spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb24
-rw-r--r--spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb2
5 files changed, 71 insertions, 27 deletions
diff --git a/db/post_migrate/20180430161409_migrate_legacy_artifacts_to_job_artifacts.rb b/db/post_migrate/20180430161409_migrate_legacy_artifacts_to_job_artifacts.rb
index 01f67e48de1..0eeaf0d4a0a 100644
--- a/db/post_migrate/20180430161409_migrate_legacy_artifacts_to_job_artifacts.rb
+++ b/db/post_migrate/20180430161409_migrate_legacy_artifacts_to_job_artifacts.rb
@@ -35,10 +35,10 @@ class MigrateLegacyArtifactsToJobArtifacts < ActiveRecord::Migration
MigrateLegacyArtifactsToJobArtifacts::Build
.with_legacy_artifacts.without_new_artifacts.tap do |relation|
- queue_background_migration_jobs_by_range_at_intervals(relation,
- MIGRATION,
- 5.minutes,
- batch_size: BATCH_SIZE)
+ queue_background_migration_jobs_by_list_at_intervals(relation,
+ MIGRATION,
+ 5.minutes,
+ batch_size: BATCH_SIZE)
end
remove_concurrent_index_by_name(:ci_builds, TMP_INDEX)
diff --git a/lib/gitlab/background_migration/migrate_legacy_artifacts.rb b/lib/gitlab/background_migration/migrate_legacy_artifacts.rb
index 3fbe3512be4..c2cd60bf730 100644
--- a/lib/gitlab/background_migration/migrate_legacy_artifacts.rb
+++ b/lib/gitlab/background_migration/migrate_legacy_artifacts.rb
@@ -10,17 +10,17 @@ module Gitlab
METADATA_FILE_TYPE = 2 # equal to Ci::JobArtifact.file_types['metadata']
LEGACY_PATH_FILE_LOCATION = 1 # equal to Ci::JobArtifact.file_location['legacy_path']
- def perform(start_id, stop_id)
+ def perform(id_list)
ActiveRecord::Base.transaction do
- insert_archives(start_id, stop_id)
- insert_metadatas(start_id, stop_id)
- delete_legacy_artifacts(start_id, stop_id)
+ insert_archives(id_list)
+ insert_metadatas(id_list)
+ delete_legacy_artifacts(id_list)
end
end
private
- def insert_archives(start_id, stop_id)
+ def insert_archives(id_list)
ActiveRecord::Base.connection.execute <<-EOF.strip_heredoc
INSERT INTO ci_job_artifacts (
project_id,
@@ -44,7 +44,7 @@ module Gitlab
COALESCE(artifacts_file_store, #{FILE_LOCAL_STORE}),
#{ARCHIVE_FILE_TYPE}
FROM ci_builds
- WHERE id BETWEEN #{start_id.to_i} AND #{stop_id.to_i}
+ WHERE id IN (#{id_list.join(',')})
AND artifacts_file <> ''
AND NOT EXISTS (
SELECT 1 FROM ci_job_artifacts
@@ -53,7 +53,7 @@ module Gitlab
EOF
end
- def insert_metadatas(start_id, stop_id)
+ def insert_metadatas(id_list)
ActiveRecord::Base.connection.execute <<-EOF.strip_heredoc
INSERT INTO ci_job_artifacts (
project_id,
@@ -77,7 +77,7 @@ module Gitlab
COALESCE(artifacts_metadata_store, #{FILE_LOCAL_STORE}),
#{METADATA_FILE_TYPE}
FROM ci_builds
- WHERE id BETWEEN #{start_id.to_i} AND #{stop_id.to_i}
+ WHERE id IN (#{id_list.join(',')})
AND artifacts_file <> '' AND artifacts_metadata <> ''
AND NOT EXISTS (
SELECT 1 FROM ci_job_artifacts
@@ -86,7 +86,7 @@ module Gitlab
EOF
end
- def delete_legacy_artifacts(start_id, stop_id)
+ def delete_legacy_artifacts(id_list)
ActiveRecord::Base.connection.execute <<-EOF.strip_heredoc
UPDATE ci_builds
SET artifacts_file = NULL,
@@ -94,7 +94,7 @@ module Gitlab
artifacts_size = NULL,
artifacts_metadata = NULL,
artifacts_metadata_store = NULL
- WHERE id BETWEEN #{start_id.to_i} AND #{stop_id.to_i}
+ WHERE id IN (#{id_list.join(',')})
AND (artifacts_file <> '' OR artifacts_metadata <> '')
EOF
end
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index c21bae5e16b..11a9b97b5e6 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -902,6 +902,50 @@ into similar problems in the future (e.g. when new tables are created).
end
end
+ # Queues background migration jobs for an entire table, batched by ID list.
+ # Each job is scheduled with a `delay_interval` in between.
+ # If you use a small interval, then some jobs may run at the same time.
+ #
+ # model_class - The table or relation being iterated over
+ # job_class_name - The background migration job class as a string
+ # delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
+ # batch_size - The maximum number of rows per job
+ #
+ # Example:
+ #
+ # class Route < ActiveRecord::Base
+ # include EachBatch
+ # self.table_name = 'routes'
+ # end
+ #
+ # queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
+ #
+ # Where the model_class includes EachBatch, and the background migration exists:
+ #
+ # class Gitlab::BackgroundMigration::ProcessRoutes
+ # def perform(id_list)
+ # # do something
+ # end
+ # end
+ def queue_background_migration_jobs_by_list_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
+ raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
+
+ # To not overload the worker too much we enforce a minimum interval both
+ # when scheduling and performing jobs.
+ if delay_interval < BackgroundMigrationWorker::MIN_INTERVAL
+ delay_interval = BackgroundMigrationWorker::MIN_INTERVAL
+ end
+
+ model_class.each_batch(of: batch_size) do |relation, index|
+ id_list = relation.pluck('id')
+
+ # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
+ # the same time, which is not helpful in most cases where we wish to
+ # spread the work over time.
+ BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, id_list)
+ end
+ end
+
# Fetches indexes on a column by name for postgres.
#
# This will include indexes using an expression on the column, for example:
diff --git a/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb b/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb
index 730968ec887..1d9f63fc9ec 100644
--- a/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb
+++ b/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb
@@ -54,11 +54,11 @@ describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema
expect(job_artifacts.count).to be_zero
end
- context 'when the record exists inside of the range of a background migration' do
- let(:range) { [1, 1] }
+ context 'when the record exists inside of the id_list of a background migration' do
+ let(:id_list) { [1] }
it 'migrates' do
- described_class.new.perform(*range)
+ described_class.new.perform(id_list)
expect(job_artifacts.order(:id).pluck('project_id, job_id, file_type, file_store, size, expire_at, file, file_sha256, file_location'))
.to eq([[project_id, job_id, file_type_archive, file_store, artifacts_archive_attributes[:artifacts_size], artifacts_expire_at, 'archive.zip', nil, file_location_legacy_path],
@@ -74,7 +74,7 @@ describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema
it 'fills file_store by 1 (ObjectStorage::Store::LOCAL)' do
expect(jobs.pluck('artifacts_file_store, artifacts_metadata_store')).to eq([[nil, nil]])
- described_class.new.perform(*range)
+ described_class.new.perform(id_list)
expect(job_artifacts.pluck('file_store')).to eq([1, 1])
end
@@ -87,7 +87,7 @@ describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema
end
it 'migrates metadata too' do
- described_class.new.perform(*range)
+ described_class.new.perform(id_list)
expect(job_artifacts.where(job_id: job_id, file_type: 2).pluck('file')).to eq(['metadata.gz'])
end
@@ -100,17 +100,17 @@ describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema
end
it 'does not migrate' do
- expect { described_class.new.perform(*range) }.not_to change { job_artifacts.count }
+ expect { described_class.new.perform(id_list) }.not_to change { job_artifacts.count }
end
end
end
end
- context 'when the record exists outside of the range of a background migration' do
- let(:range) { [2, 2] }
+ context 'when the record exists outside of the id_list of a background migration' do
+ let(:id_list) { [2] }
it 'does not migrate' do
- described_class.new.perform(*range)
+ described_class.new.perform(id_list)
expect(job_artifacts.count).to be_zero
end
@@ -127,11 +127,11 @@ describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema
expect(jobs.pluck('artifacts_metadata, artifacts_metadata_store')).to eq([[nil, nil]])
end
- context 'when the record exists inside of the range of a background migration' do
- let(:range) { [1, 1] }
+ context 'when the record exists inside of the id_list of a background migration' do
+ let(:id_list) { [1] }
it 'does not migrate' do
- described_class.new.perform(*range)
+ described_class.new.perform(id_list)
expect(job_artifacts.count).to be_zero
end
diff --git a/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb b/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb
index 381369599af..ab232cd3af8 100644
--- a/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb
+++ b/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb
@@ -32,7 +32,7 @@ describe MigrateLegacyArtifactsToJobArtifacts, :migration, :sidekiq do
Timecop.freeze do
migrate!
- expect(migration_name).to be_scheduled_delayed_migration(5.minutes, 1, 6)
+ expect(migration_name).to be_scheduled_delayed_migration(5.minutes, 1, 3, 5, 6)
expect(BackgroundMigrationWorker.jobs.size).to eq 1
end
end