summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOswaldo Ferreira <oswaldo@gitlab.com>2018-07-03 17:05:19 -0300
committerOswaldo Ferreira <oswaldo@gitlab.com>2018-07-10 09:43:58 -0300
commite66535e8407ccb8dd229fefdce817902a364f58a (patch)
treec90cbfc48845479074fd508d3a19b88cc594fda9
parent80a7be87f82b36c23e273b6a84b5a6bdbffaa947 (diff)
downloadgitlab-ce-e66535e8407ccb8dd229fefdce817902a364f58a.tar.gz
Create a diff deletion worker scheduler to avoid long-running post-migration
-rw-r--r--db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb22
-rw-r--r--lib/gitlab/background_migration/schedule_diff_files_deletion.rb32
-rw-r--r--spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb43
-rw-r--r--spec/migrations/enqueue_delete_diff_files_workers_spec.rb40
4 files changed, 94 insertions, 43 deletions
diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
index bd614aee75c..92950c1eec1 100644
--- a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
+++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
@@ -11,8 +11,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
DOWNTIME = false
BATCH_SIZE = 1000
- MIGRATION = 'DeleteDiffFiles'
- DELAY_INTERVAL = 10.minutes
+ SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze
TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
disable_ddl_transaction!
@@ -38,23 +37,10 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
# Planning time: 0.752 ms
# Execution time: 12.430 ms
#
- diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, outer_index|
- # We slice the batches in groups of 5 and schedule each group of 5 at
- # once. This should make writings on Redis go 5x faster.
- job_batches = relation.pluck(:id).in_groups_of(5, false).map do |ids|
- ids.map { |id| [MIGRATION, [id]] }
- end
+ diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, scheduler_index|
+ ids = relation.pluck(:id).map { |id| [id] }
- job_batches.each_with_index do |jobs, inner_index|
- # This will give some space between batches of workers.
- interval = DELAY_INTERVAL * outer_index + inner_index.minutes
-
- # A single `merge_request_diff` can be associated with way too many
- # `merge_request_diff_files`. It's better to avoid scheduling big
- # batches and go with 5 at a time.
- #
- BackgroundMigrationWorker.bulk_perform_in(interval, jobs)
- end
+ BackgroundMigrationWorker.perform_async(SCHEDULER, [ids, scheduler_index])
end
# We remove temporary index, because it is not required during standard
diff --git a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb
new file mode 100644
index 00000000000..d944ed90fce
--- /dev/null
+++ b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+# rubocop:disable Metrics/AbcSize
+# rubocop:disable Style/Documentation
+
+module Gitlab
+ module BackgroundMigration
+ class ScheduleDiffFilesDeletion
+ BATCH_SIZE = 5
+ MIGRATION = 'DeleteDiffFiles'
+ DELAY_INTERVAL = 10.minutes
+
+ def perform(diff_ids, scheduler_index)
+ relation = MergeRequestDiff.where(id: diff_ids)
+
+ job_batches = relation.pluck(:id).in_groups_of(BATCH_SIZE, false).map do |ids|
+ ids.map { |id| [MIGRATION, [id]] }
+ end
+
+ job_batches.each_with_index do |jobs, inner_index|
+ # This will give some space between batches of workers.
+ interval = DELAY_INTERVAL * scheduler_index + inner_index.minutes
+
+ # A single `merge_request_diff` can be associated with way too many
+ # `merge_request_diff_files`. It's better to avoid scheduling big
+ # batches and go with 5 at a time.
+ #
+ BackgroundMigrationWorker.bulk_perform_in(interval, jobs)
+ end
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
new file mode 100644
index 00000000000..a699571ca90
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
@@ -0,0 +1,43 @@
+require 'spec_helper'
+
+describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do
+ describe '#perform' do
+ let(:merge_request_diffs) { table(:merge_request_diffs) }
+ let(:merge_requests) { table(:merge_requests) }
+ let(:namespaces) { table(:namespaces) }
+ let(:projects) { table(:projects) }
+
+ before do
+ stub_const("#{described_class.name}::BATCH_SIZE", 3)
+
+ namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
+ projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
+
+ merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged')
+
+ merge_request_diffs.create!(id: 1, merge_request_id: 1)
+ merge_request_diffs.create!(id: 2, merge_request_id: 1)
+ merge_request_diffs.create!(id: 3, merge_request_id: 1)
+ merge_request_diffs.create!(id: 4, merge_request_id: 1)
+ merge_request_diffs.create!(id: 5, merge_request_id: 1)
+ end
+
+ it 'correctly schedules diff file deletion workers' do
+ Sidekiq::Testing.fake! do
+ Timecop.freeze do
+ described_class.new.perform([1, 2, 3, 4, 5], 1)
+
+ [1, 2, 3].each do |id|
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, id)
+ end
+
+ [4, 5].each do |id|
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(11.minutes, id)
+ end
+
+ expect(BackgroundMigrationWorker.jobs.size).to eq(5)
+ end
+ end
+ end
+ end
+end
diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
index 52d6e9b2c64..43dbd7e8f53 100644
--- a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
+++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
@@ -8,7 +8,7 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
let(:projects) { table(:projects) }
before do
- stub_const("#{described_class.name}::BATCH_SIZE", 7)
+ stub_const("#{described_class.name}::BATCH_SIZE", 4)
namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
@@ -22,35 +22,25 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty')
merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected')
- merge_request_diffs.create!(id: 8, merge_request_id: 1, state: 'collected')
- merge_request_diffs.create!(id: 9, merge_request_id: 1, state: 'collected')
- merge_request_diffs.create!(id: 10, merge_request_id: 1, state: 'collected')
merge_requests.update(1, latest_merge_request_diff_id: 6)
end
- it 'correctly schedules diff file deletion workers' do
+ it 'correctly schedules diff file deletion workers schedulers' do
Sidekiq::Testing.fake! do
- Timecop.freeze do
- migrate!
-
- # 1st batch schedule
- [1, 3, 4, 6, 7].each do |id|
- expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, id)
- end
- [8, 9].each do |id|
- expect(described_class::MIGRATION).to be_scheduled_delayed_migration(11.minutes, id)
- end
-
- # 2nd batch schedule
- expect(described_class::MIGRATION).to be_scheduled_delayed_migration(20.minutes, 10)
- expect(BackgroundMigrationWorker.jobs.size).to eq(8)
- end
- end
- end
+ # First scheduling batch
+ expect(BackgroundMigrationWorker).to receive(:perform_async)
+ .with(described_class::SCHEDULER, [[[1], [3], [4], [6]], 1])
+ .and_call_original
+
+ # Second scheduling batch
+ expect(BackgroundMigrationWorker).to receive(:perform_async)
+ .with(described_class::SCHEDULER, [[[7]], 2])
+ .and_call_original
- it 'migrates the data' do
- expect { migrate! }.to change { merge_request_diffs.where(state: 'without_files').count }
- .from(1).to(4)
+ migrate!
+
+ expect(BackgroundMigrationWorker.jobs.size).to eq(2)
+ end
end
end