From 80a7be87f82b36c23e273b6a84b5a6bdbffaa947 Mon Sep 17 00:00:00 2001 From: Oswaldo Ferreira Date: Tue, 3 Jul 2018 12:16:06 -0300 Subject: Schedule batches in bulks of 5 diffs Issuing 6M writings in a N+1 manner in Redis takes time, 3 hours to be precise. This commit makes it schedule 5 jobs at a time, what should make it schedule every job in approximately 40 minutes --- ...0619121030_enqueue_delete_diff_files_workers.rb | 19 +++++++++-------- .../enqueue_delete_diff_files_workers_spec.rb | 24 ++++++++++++++-------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb index 5fb3d545624..bd614aee75c 100644 --- a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb +++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb @@ -12,7 +12,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration DOWNTIME = false BATCH_SIZE = 1000 MIGRATION = 'DeleteDiffFiles' - DELAY_INTERVAL = 8.minutes + DELAY_INTERVAL = 10.minutes TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze disable_ddl_transaction! @@ -39,20 +39,21 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration # Execution time: 12.430 ms # diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, outer_index| - ids = relation.pluck(:id) + # We slice the batches in groups of 5 and schedule each group of 5 at + # once. This should make writings on Redis go 5x faster. + job_batches = relation.pluck(:id).in_groups_of(5, false).map do |ids| + ids.map { |id| [MIGRATION, [id]] } + end - ids.each_with_index do |diff_id, inner_index| + job_batches.each_with_index do |jobs, inner_index| # This will give some space between batches of workers. interval = DELAY_INTERVAL * outer_index + inner_index.minutes # A single `merge_request_diff` can be associated with way too many - # `merge_request_diff_files`. It's better to avoid batching these and - # schedule one at a time. - # - # Considering roughly 6M jobs, this should take ~30 days to process all - # of them. + # `merge_request_diff_files`. It's better to avoid scheduling big + # batches and go with 5 at a time. # - BackgroundMigrationWorker.perform_in(interval, MIGRATION, [diff_id]) + BackgroundMigrationWorker.bulk_perform_in(interval, jobs) end end diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb index 686027822b8..52d6e9b2c64 100644 --- a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb +++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb @@ -8,7 +8,7 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do let(:projects) { table(:projects) } before do - stub_const("#{described_class.name}::BATCH_SIZE", 2) + stub_const("#{described_class.name}::BATCH_SIZE", 7) namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab') projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab') @@ -21,6 +21,10 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected') merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty') merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 8, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 9, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 10, merge_request_id: 1, state: 'collected') merge_requests.update(1, latest_merge_request_diff_id: 6) end @@ -30,13 +34,17 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do Timecop.freeze do migrate! - # 1st batch - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(8.minutes, 1) - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(9.minutes, 3) - # 2nd batch - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(16.minutes, 4) - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(17.minutes, 6) - expect(BackgroundMigrationWorker.jobs.size).to eq(4) + # 1st batch schedule + [1, 3, 4, 6, 7].each do |id| + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, id) + end + [8, 9].each do |id| + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(11.minutes, id) + end + + # 2nd batch schedule + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(20.minutes, 10) + expect(BackgroundMigrationWorker.jobs.size).to eq(8) end end end -- cgit v1.2.1