diff options
author | Oswaldo Ferreira <oswaldo@gitlab.com> | 2018-07-03 17:05:19 -0300 |
---|---|---|
committer | Oswaldo Ferreira <oswaldo@gitlab.com> | 2018-07-10 09:43:58 -0300 |
commit | e66535e8407ccb8dd229fefdce817902a364f58a (patch) | |
tree | c90cbfc48845479074fd508d3a19b88cc594fda9 | |
parent | 80a7be87f82b36c23e273b6a84b5a6bdbffaa947 (diff) | |
download | gitlab-ce-e66535e8407ccb8dd229fefdce817902a364f58a.tar.gz |
Create a diff deletion worker scheduler to avoid long-running post-migration
4 files changed, 94 insertions, 43 deletions
diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb index bd614aee75c..92950c1eec1 100644 --- a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb +++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb @@ -11,8 +11,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration DOWNTIME = false BATCH_SIZE = 1000 - MIGRATION = 'DeleteDiffFiles' - DELAY_INTERVAL = 10.minutes + SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze disable_ddl_transaction! @@ -38,23 +37,10 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration # Planning time: 0.752 ms # Execution time: 12.430 ms # - diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, outer_index| - # We slice the batches in groups of 5 and schedule each group of 5 at - # once. This should make writings on Redis go 5x faster. - job_batches = relation.pluck(:id).in_groups_of(5, false).map do |ids| - ids.map { |id| [MIGRATION, [id]] } - end + diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, scheduler_index| + ids = relation.pluck(:id).map { |id| [id] } - job_batches.each_with_index do |jobs, inner_index| - # This will give some space between batches of workers. - interval = DELAY_INTERVAL * outer_index + inner_index.minutes - - # A single `merge_request_diff` can be associated with way too many - # `merge_request_diff_files`. It's better to avoid scheduling big - # batches and go with 5 at a time. - # - BackgroundMigrationWorker.bulk_perform_in(interval, jobs) - end + BackgroundMigrationWorker.perform_async(SCHEDULER, [ids, scheduler_index]) end # We remove temporary index, because it is not required during standard diff --git a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb new file mode 100644 index 00000000000..d944ed90fce --- /dev/null +++ b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true +# rubocop:disable Metrics/AbcSize +# rubocop:disable Style/Documentation + +module Gitlab + module BackgroundMigration + class ScheduleDiffFilesDeletion + BATCH_SIZE = 5 + MIGRATION = 'DeleteDiffFiles' + DELAY_INTERVAL = 10.minutes + + def perform(diff_ids, scheduler_index) + relation = MergeRequestDiff.where(id: diff_ids) + + job_batches = relation.pluck(:id).in_groups_of(BATCH_SIZE, false).map do |ids| + ids.map { |id| [MIGRATION, [id]] } + end + + job_batches.each_with_index do |jobs, inner_index| + # This will give some space between batches of workers. + interval = DELAY_INTERVAL * scheduler_index + inner_index.minutes + + # A single `merge_request_diff` can be associated with way too many + # `merge_request_diff_files`. It's better to avoid scheduling big + # batches and go with 5 at a time. + # + BackgroundMigrationWorker.bulk_perform_in(interval, jobs) + end + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb new file mode 100644 index 00000000000..a699571ca90 --- /dev/null +++ b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb @@ -0,0 +1,43 @@ +require 'spec_helper' + +describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do + describe '#perform' do + let(:merge_request_diffs) { table(:merge_request_diffs) } + let(:merge_requests) { table(:merge_requests) } + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + + before do + stub_const("#{described_class.name}::BATCH_SIZE", 3) + + namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab') + projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab') + + merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged') + + merge_request_diffs.create!(id: 1, merge_request_id: 1) + merge_request_diffs.create!(id: 2, merge_request_id: 1) + merge_request_diffs.create!(id: 3, merge_request_id: 1) + merge_request_diffs.create!(id: 4, merge_request_id: 1) + merge_request_diffs.create!(id: 5, merge_request_id: 1) + end + + it 'correctly schedules diff file deletion workers' do + Sidekiq::Testing.fake! do + Timecop.freeze do + described_class.new.perform([1, 2, 3, 4, 5], 1) + + [1, 2, 3].each do |id| + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, id) + end + + [4, 5].each do |id| + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(11.minutes, id) + end + + expect(BackgroundMigrationWorker.jobs.size).to eq(5) + end + end + end + end +end diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb index 52d6e9b2c64..43dbd7e8f53 100644 --- a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb +++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb @@ -8,7 +8,7 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do let(:projects) { table(:projects) } before do - stub_const("#{described_class.name}::BATCH_SIZE", 7) + stub_const("#{described_class.name}::BATCH_SIZE", 4) namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab') projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab') @@ -22,35 +22,25 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty') merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected') merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected') - merge_request_diffs.create!(id: 8, merge_request_id: 1, state: 'collected') - merge_request_diffs.create!(id: 9, merge_request_id: 1, state: 'collected') - merge_request_diffs.create!(id: 10, merge_request_id: 1, state: 'collected') merge_requests.update(1, latest_merge_request_diff_id: 6) end - it 'correctly schedules diff file deletion workers' do + it 'correctly schedules diff file deletion workers schedulers' do Sidekiq::Testing.fake! do - Timecop.freeze do - migrate! - - # 1st batch schedule - [1, 3, 4, 6, 7].each do |id| - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, id) - end - [8, 9].each do |id| - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(11.minutes, id) - end - - # 2nd batch schedule - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(20.minutes, 10) - expect(BackgroundMigrationWorker.jobs.size).to eq(8) - end - end - end + # First scheduling batch + expect(BackgroundMigrationWorker).to receive(:perform_async) + .with(described_class::SCHEDULER, [[[1], [3], [4], [6]], 1]) + .and_call_original + + # Second scheduling batch + expect(BackgroundMigrationWorker).to receive(:perform_async) + .with(described_class::SCHEDULER, [[[7]], 2]) + .and_call_original - it 'migrates the data' do - expect { migrate! }.to change { merge_request_diffs.where(state: 'without_files').count } - .from(1).to(4) + migrate! + + expect(BackgroundMigrationWorker.jobs.size).to eq(2) + end end end |