diff options
author | Stan Hu <stanhu@gmail.com> | 2017-11-16 16:14:24 -0800 |
---|---|---|
committer | Sean McGivern <sean@gitlab.com> | 2017-11-17 10:23:48 +0000 |
commit | 5cecff893db3188ff5ec779e07e9c598d3ed2e21 (patch) | |
tree | 33b5957717da5d0add5356c4db8e65650f9fb1ae | |
parent | d41e66cb632cf4a51428c87a07cbdd182e3e0697 (diff) | |
download | gitlab-ce-5cecff893db3188ff5ec779e07e9c598d3ed2e21.tar.gz |
Convert migration to populate latest merge request ID into a background migration
This is to smear updates over a few hours to avoid causing excessive
replication lag as seen in https://gitlab.com/gitlab-com/infrastructure/issues/3235.
-rw-r--r-- | db/post_migrate/20171026082505_populate_merge_requests_latest_merge_request_diff_id.rb | 27 | ||||
-rw-r--r-- | db/post_migrate/20171026082505_schedule_merge_request_latest_merge_request_diff_id_migrations.rb | 29 | ||||
-rw-r--r-- | lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb | 30 | ||||
-rw-r--r-- | spec/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id_spec.rb (renamed from spec/migrations/populate_merge_requests_latest_merge_request_diff_id_spec.rb) | 13 | ||||
-rw-r--r-- | spec/migrations/schedule_merge_request_latest_merge_request_diff_id_migrations_spec.rb | 64 |
5 files changed, 130 insertions, 33 deletions
diff --git a/db/post_migrate/20171026082505_populate_merge_requests_latest_merge_request_diff_id.rb b/db/post_migrate/20171026082505_populate_merge_requests_latest_merge_request_diff_id.rb deleted file mode 100644 index a7ebbbf34c0..00000000000 --- a/db/post_migrate/20171026082505_populate_merge_requests_latest_merge_request_diff_id.rb +++ /dev/null @@ -1,27 +0,0 @@ -class PopulateMergeRequestsLatestMergeRequestDiffId < ActiveRecord::Migration - include Gitlab::Database::MigrationHelpers - - DOWNTIME = false - BATCH_SIZE = 1_000 - - class MergeRequest < ActiveRecord::Base - self.table_name = 'merge_requests' - - include ::EachBatch - end - - disable_ddl_transaction! - - def up - update = ' - latest_merge_request_diff_id = ( - SELECT MAX(id) - FROM merge_request_diffs - WHERE merge_requests.id = merge_request_diffs.merge_request_id - )'.squish - - MergeRequest.where(latest_merge_request_diff_id: nil).each_batch(of: BATCH_SIZE) do |relation| - relation.update_all(update) - end - end -end diff --git a/db/post_migrate/20171026082505_schedule_merge_request_latest_merge_request_diff_id_migrations.rb b/db/post_migrate/20171026082505_schedule_merge_request_latest_merge_request_diff_id_migrations.rb new file mode 100644 index 00000000000..7a63382cc6d --- /dev/null +++ b/db/post_migrate/20171026082505_schedule_merge_request_latest_merge_request_diff_id_migrations.rb @@ -0,0 +1,29 @@ +class ScheduleMergeRequestLatestMergeRequestDiffIdMigrations < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + BATCH_SIZE = 50_000 + MIGRATION = 'PopulateMergeRequestsLatestMergeRequestDiffId' + + disable_ddl_transaction! + + class MergeRequest < ActiveRecord::Base + self.table_name = 'merge_requests' + + include ::EachBatch + end + + # On GitLab.com, we saw that we generated about 500,000 dead tuples over 5 minutes. + # To keep replication lag from ballooning, we'll aim for 50,000 updates over 5 minutes. + # + # Assuming that there are 5 million rows affected (which is more than on + # GitLab.com), and that each batch of 50,000 rows takes up to 5 minutes, then + # we can migrate all the rows in 8.5 hours. + def up + MergeRequest.where(latest_merge_request_diff_id: nil).each_batch(of: BATCH_SIZE) do |relation, index| + range = relation.pluck('MIN(id)', 'MAX(id)').first + + BackgroundMigrationWorker.perform_in(index * 5.minutes, MIGRATION, range) + end + end +end diff --git a/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb b/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb new file mode 100644 index 00000000000..7e109e96e73 --- /dev/null +++ b/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb @@ -0,0 +1,30 @@ +module Gitlab + module BackgroundMigration + class PopulateMergeRequestsLatestMergeRequestDiffId + BATCH_SIZE = 1_000 + + class MergeRequest < ActiveRecord::Base + self.table_name = 'merge_requests' + + include ::EachBatch + end + + def perform(start_id, stop_id) + update = ' + latest_merge_request_diff_id = ( + SELECT MAX(id) + FROM merge_request_diffs + WHERE merge_requests.id = merge_request_diffs.merge_request_id + )'.squish + + MergeRequest + .where(id: start_id..stop_id) + .where(latest_merge_request_diff_id: nil) + .each_batch(of: BATCH_SIZE) do |relation| + + relation.update_all(update) + end + end + end + end +end diff --git a/spec/migrations/populate_merge_requests_latest_merge_request_diff_id_spec.rb b/spec/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id_spec.rb index 4ea7f441f7c..0cb753c5853 100644 --- a/spec/migrations/populate_merge_requests_latest_merge_request_diff_id_spec.rb +++ b/spec/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id_spec.rb @@ -1,7 +1,6 @@ require 'spec_helper' -require Rails.root.join('db', 'post_migrate', '20171026082505_populate_merge_requests_latest_merge_request_diff_id') -describe PopulateMergeRequestsLatestMergeRequestDiffId, :migration do +describe Gitlab::BackgroundMigration::PopulateMergeRequestsLatestMergeRequestDiffId, :migration, schema: 20171026082505 do let(:projects_table) { table(:projects) } let(:merge_requests_table) { table(:merge_requests) } let(:merge_request_diffs_table) { table(:merge_request_diffs) } @@ -27,30 +26,32 @@ describe PopulateMergeRequestsLatestMergeRequestDiffId, :migration do merge_request_diffs_table.where(merge_request_id: merge_request.id) end - describe '#up' do + describe '#perform' do it 'ignores MRs without diffs' do merge_request_without_diff = create_mr!('without_diff') + mr_id = merge_request_without_diff.id expect(merge_request_without_diff.latest_merge_request_diff_id).to be_nil - expect { migrate! } + expect { subject.perform(mr_id, mr_id) } .not_to change { merge_request_without_diff.reload.latest_merge_request_diff_id } end it 'ignores MRs that have a diff ID already set' do merge_request_with_multiple_diffs = create_mr!('with_multiple_diffs', diffs: 3) diff_id = diffs_for(merge_request_with_multiple_diffs).minimum(:id) + mr_id = merge_request_with_multiple_diffs.id merge_request_with_multiple_diffs.update!(latest_merge_request_diff_id: diff_id) - expect { migrate! } + expect { subject.perform(mr_id, mr_id) } .not_to change { merge_request_with_multiple_diffs.reload.latest_merge_request_diff_id } end it 'migrates multiple MR diffs to the correct values' do merge_requests = Array.new(3).map.with_index { |_, i| create_mr!(i, diffs: 3) } - migrate! + subject.perform(merge_requests.first.id, merge_requests.last.id) merge_requests.each do |merge_request| expect(merge_request.reload.latest_merge_request_diff_id) diff --git a/spec/migrations/schedule_merge_request_latest_merge_request_diff_id_migrations_spec.rb b/spec/migrations/schedule_merge_request_latest_merge_request_diff_id_migrations_spec.rb new file mode 100644 index 00000000000..158d0bc02ed --- /dev/null +++ b/spec/migrations/schedule_merge_request_latest_merge_request_diff_id_migrations_spec.rb @@ -0,0 +1,64 @@ +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20171026082505_schedule_merge_request_latest_merge_request_diff_id_migrations') + +describe ScheduleMergeRequestLatestMergeRequestDiffIdMigrations, :migration, :sidekiq do + let(:projects_table) { table(:projects) } + let(:merge_requests_table) { table(:merge_requests) } + let(:merge_request_diffs_table) { table(:merge_request_diffs) } + + let(:project) { projects_table.create!(name: 'gitlab', path: 'gitlab-org/gitlab-ce') } + + let!(:merge_request_1) { create_mr!('mr_1', diffs: 1) } + let!(:merge_request_2) { create_mr!('mr_2', diffs: 2) } + let!(:merge_request_migrated) { create_mr!('merge_request_migrated', diffs: 3) } + let!(:merge_request_4) { create_mr!('mr_4', diffs: 3) } + + def create_mr!(name, diffs: 0) + merge_request = + merge_requests_table.create!(target_project_id: project.id, + target_branch: 'master', + source_project_id: project.id, + source_branch: name, + title: name) + + diffs.times do + merge_request_diffs_table.create!(merge_request_id: merge_request.id) + end + + merge_request + end + + def diffs_for(merge_request) + merge_request_diffs_table.where(merge_request_id: merge_request.id) + end + + before do + stub_const("#{described_class.name}::BATCH_SIZE", 1) + + diff_id = diffs_for(merge_request_migrated).minimum(:id) + merge_request_migrated.update!(latest_merge_request_diff_id: diff_id) + end + + it 'correctly schedules background migrations' do + Sidekiq::Testing.fake! do + Timecop.freeze do + migrate! + + expect(described_class::MIGRATION).to be_scheduled_migration(5.minutes, merge_request_1.id, merge_request_1.id) + expect(described_class::MIGRATION).to be_scheduled_migration(10.minutes, merge_request_2.id, merge_request_2.id) + expect(described_class::MIGRATION).to be_scheduled_migration(15.minutes, merge_request_4.id, merge_request_4.id) + expect(BackgroundMigrationWorker.jobs.size).to eq 3 + end + end + end + + it 'schedules background migrations' do + Sidekiq::Testing.inline! do + expect(merge_requests_table.where(latest_merge_request_diff_id: nil).count).to eq 3 + + migrate! + + expect(merge_requests_table.where(latest_merge_request_diff_id: nil).count).to eq 0 + end + end +end |