summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2017-11-16 16:14:24 -0800
committerStan Hu <stanhu@gmail.com>2017-11-16 16:33:28 -0800
commit7cd156f79df5c88d20274dd44da640850ac41a91 (patch)
tree29c12b1d86379462bf827058f56ba9393aae95ae
parentd41e66cb632cf4a51428c87a07cbdd182e3e0697 (diff)
downloadgitlab-ce-sh-background-migration-move-mr-diff-id.tar.gz
Convert migration to populate latest merge request ID into a background migrationsh-background-migration-move-mr-diff-id
This is to smear updates over a few hours to avoid causing excessive replication lag as seen in https://gitlab.com/gitlab-com/infrastructure/issues/3235.
-rw-r--r--db/post_migrate/20171026082505_populate_merge_requests_latest_merge_request_diff_id.rb27
-rw-r--r--db/post_migrate/20171026082505_schedule_merge_request_latest_merge_request_diff_id_migrations.rb29
-rw-r--r--lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb29
-rw-r--r--spec/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id_spec.rb (renamed from spec/migrations/populate_merge_requests_latest_merge_request_diff_id_spec.rb)13
-rw-r--r--spec/migrations/schedule_merge_request_latest_merge_request_diff_id_migrations_spec.rb64
5 files changed, 129 insertions, 33 deletions
diff --git a/db/post_migrate/20171026082505_populate_merge_requests_latest_merge_request_diff_id.rb b/db/post_migrate/20171026082505_populate_merge_requests_latest_merge_request_diff_id.rb
deleted file mode 100644
index a7ebbbf34c0..00000000000
--- a/db/post_migrate/20171026082505_populate_merge_requests_latest_merge_request_diff_id.rb
+++ /dev/null
@@ -1,27 +0,0 @@
-class PopulateMergeRequestsLatestMergeRequestDiffId < ActiveRecord::Migration
- include Gitlab::Database::MigrationHelpers
-
- DOWNTIME = false
- BATCH_SIZE = 1_000
-
- class MergeRequest < ActiveRecord::Base
- self.table_name = 'merge_requests'
-
- include ::EachBatch
- end
-
- disable_ddl_transaction!
-
- def up
- update = '
- latest_merge_request_diff_id = (
- SELECT MAX(id)
- FROM merge_request_diffs
- WHERE merge_requests.id = merge_request_diffs.merge_request_id
- )'.squish
-
- MergeRequest.where(latest_merge_request_diff_id: nil).each_batch(of: BATCH_SIZE) do |relation|
- relation.update_all(update)
- end
- end
-end
diff --git a/db/post_migrate/20171026082505_schedule_merge_request_latest_merge_request_diff_id_migrations.rb b/db/post_migrate/20171026082505_schedule_merge_request_latest_merge_request_diff_id_migrations.rb
new file mode 100644
index 00000000000..7a63382cc6d
--- /dev/null
+++ b/db/post_migrate/20171026082505_schedule_merge_request_latest_merge_request_diff_id_migrations.rb
@@ -0,0 +1,29 @@
+class ScheduleMergeRequestLatestMergeRequestDiffIdMigrations < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+ BATCH_SIZE = 50_000
+ MIGRATION = 'PopulateMergeRequestsLatestMergeRequestDiffId'
+
+ disable_ddl_transaction!
+
+ class MergeRequest < ActiveRecord::Base
+ self.table_name = 'merge_requests'
+
+ include ::EachBatch
+ end
+
+ # On GitLab.com, we saw that we generated about 500,000 dead tuples over 5 minutes.
+ # To keep replication lag from ballooning, we'll aim for 50,000 updates over 5 minutes.
+ #
+ # Assuming that there are 5 million rows affected (which is more than on
+ # GitLab.com), and that each batch of 50,000 rows takes up to 5 minutes, then
+ # we can migrate all the rows in 8.5 hours.
+ def up
+ MergeRequest.where(latest_merge_request_diff_id: nil).each_batch(of: BATCH_SIZE) do |relation, index|
+ range = relation.pluck('MIN(id)', 'MAX(id)').first
+
+ BackgroundMigrationWorker.perform_in(index * 5.minutes, MIGRATION, range)
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb b/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb
new file mode 100644
index 00000000000..98d63a47fa3
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb
@@ -0,0 +1,29 @@
+module Gitlab
+ module BackgroundMigration
+ class PopulateMergeRequestsLatestMergeRequestDiffId
+ BATCH_SIZE = 1_000
+
+ class MergeRequest < ActiveRecord::Base
+ self.table_name = 'merge_requests'
+
+ include ::EachBatch
+ end
+
+ def perform(start_id, stop_id)
+ update = '
+ latest_merge_request_diff_id = (
+ SELECT MAX(id)
+ FROM merge_request_diffs
+ WHERE merge_requests.id = merge_request_diffs.merge_request_id
+ )'.squish
+
+ MergeRequest
+ .where(id: start_id..stop_id)
+ .where(latest_merge_request_diff_id: nil)
+ .each_batch(of: BATCH_SIZE) do |relation|
+ relation.update_all(update)
+ end
+ end
+ end
+ end
+end
diff --git a/spec/migrations/populate_merge_requests_latest_merge_request_diff_id_spec.rb b/spec/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id_spec.rb
index 4ea7f441f7c..0cb753c5853 100644
--- a/spec/migrations/populate_merge_requests_latest_merge_request_diff_id_spec.rb
+++ b/spec/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id_spec.rb
@@ -1,7 +1,6 @@
require 'spec_helper'
-require Rails.root.join('db', 'post_migrate', '20171026082505_populate_merge_requests_latest_merge_request_diff_id')
-describe PopulateMergeRequestsLatestMergeRequestDiffId, :migration do
+describe Gitlab::BackgroundMigration::PopulateMergeRequestsLatestMergeRequestDiffId, :migration, schema: 20171026082505 do
let(:projects_table) { table(:projects) }
let(:merge_requests_table) { table(:merge_requests) }
let(:merge_request_diffs_table) { table(:merge_request_diffs) }
@@ -27,30 +26,32 @@ describe PopulateMergeRequestsLatestMergeRequestDiffId, :migration do
merge_request_diffs_table.where(merge_request_id: merge_request.id)
end
- describe '#up' do
+ describe '#perform' do
it 'ignores MRs without diffs' do
merge_request_without_diff = create_mr!('without_diff')
+ mr_id = merge_request_without_diff.id
expect(merge_request_without_diff.latest_merge_request_diff_id).to be_nil
- expect { migrate! }
+ expect { subject.perform(mr_id, mr_id) }
.not_to change { merge_request_without_diff.reload.latest_merge_request_diff_id }
end
it 'ignores MRs that have a diff ID already set' do
merge_request_with_multiple_diffs = create_mr!('with_multiple_diffs', diffs: 3)
diff_id = diffs_for(merge_request_with_multiple_diffs).minimum(:id)
+ mr_id = merge_request_with_multiple_diffs.id
merge_request_with_multiple_diffs.update!(latest_merge_request_diff_id: diff_id)
- expect { migrate! }
+ expect { subject.perform(mr_id, mr_id) }
.not_to change { merge_request_with_multiple_diffs.reload.latest_merge_request_diff_id }
end
it 'migrates multiple MR diffs to the correct values' do
merge_requests = Array.new(3).map.with_index { |_, i| create_mr!(i, diffs: 3) }
- migrate!
+ subject.perform(merge_requests.first.id, merge_requests.last.id)
merge_requests.each do |merge_request|
expect(merge_request.reload.latest_merge_request_diff_id)
diff --git a/spec/migrations/schedule_merge_request_latest_merge_request_diff_id_migrations_spec.rb b/spec/migrations/schedule_merge_request_latest_merge_request_diff_id_migrations_spec.rb
new file mode 100644
index 00000000000..158d0bc02ed
--- /dev/null
+++ b/spec/migrations/schedule_merge_request_latest_merge_request_diff_id_migrations_spec.rb
@@ -0,0 +1,64 @@
+require 'spec_helper'
+require Rails.root.join('db', 'post_migrate', '20171026082505_schedule_merge_request_latest_merge_request_diff_id_migrations')
+
+describe ScheduleMergeRequestLatestMergeRequestDiffIdMigrations, :migration, :sidekiq do
+ let(:projects_table) { table(:projects) }
+ let(:merge_requests_table) { table(:merge_requests) }
+ let(:merge_request_diffs_table) { table(:merge_request_diffs) }
+
+ let(:project) { projects_table.create!(name: 'gitlab', path: 'gitlab-org/gitlab-ce') }
+
+ let!(:merge_request_1) { create_mr!('mr_1', diffs: 1) }
+ let!(:merge_request_2) { create_mr!('mr_2', diffs: 2) }
+ let!(:merge_request_migrated) { create_mr!('merge_request_migrated', diffs: 3) }
+ let!(:merge_request_4) { create_mr!('mr_4', diffs: 3) }
+
+ def create_mr!(name, diffs: 0)
+ merge_request =
+ merge_requests_table.create!(target_project_id: project.id,
+ target_branch: 'master',
+ source_project_id: project.id,
+ source_branch: name,
+ title: name)
+
+ diffs.times do
+ merge_request_diffs_table.create!(merge_request_id: merge_request.id)
+ end
+
+ merge_request
+ end
+
+ def diffs_for(merge_request)
+ merge_request_diffs_table.where(merge_request_id: merge_request.id)
+ end
+
+ before do
+ stub_const("#{described_class.name}::BATCH_SIZE", 1)
+
+ diff_id = diffs_for(merge_request_migrated).minimum(:id)
+ merge_request_migrated.update!(latest_merge_request_diff_id: diff_id)
+ end
+
+ it 'correctly schedules background migrations' do
+ Sidekiq::Testing.fake! do
+ Timecop.freeze do
+ migrate!
+
+ expect(described_class::MIGRATION).to be_scheduled_migration(5.minutes, merge_request_1.id, merge_request_1.id)
+ expect(described_class::MIGRATION).to be_scheduled_migration(10.minutes, merge_request_2.id, merge_request_2.id)
+ expect(described_class::MIGRATION).to be_scheduled_migration(15.minutes, merge_request_4.id, merge_request_4.id)
+ expect(BackgroundMigrationWorker.jobs.size).to eq 3
+ end
+ end
+ end
+
+ it 'schedules background migrations' do
+ Sidekiq::Testing.inline! do
+ expect(merge_requests_table.where(latest_merge_request_diff_id: nil).count).to eq 3
+
+ migrate!
+
+ expect(merge_requests_table.where(latest_merge_request_diff_id: nil).count).to eq 0
+ end
+ end
+end