diff options
author | Oswaldo Ferreira <oswaldo@gitlab.com> | 2018-07-04 16:06:30 -0300 |
---|---|---|
committer | Oswaldo Ferreira <oswaldo@gitlab.com> | 2018-07-10 09:43:58 -0300 |
commit | 4455904bc154f1a36cedeea574bb0f454f92a9e9 (patch) | |
tree | 7484b5e155ffb386cf87684f76d3a8bfe98c4c8d | |
parent | e66535e8407ccb8dd229fefdce817902a364f58a (diff) | |
download | gitlab-ce-4455904bc154f1a36cedeea574bb0f454f92a9e9.tar.gz |
Add 1000 files per minute deletion ratio on scheduler
6 files changed, 112 insertions, 100 deletions
diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb index 92950c1eec1..df30991737c 100644 --- a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb +++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb @@ -1,52 +1,20 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration include Gitlab::Database::MigrationHelpers - class MergeRequestDiff < ActiveRecord::Base - self.table_name = 'merge_request_diffs' - - belongs_to :merge_request - - include EachBatch - end - DOWNTIME = false - BATCH_SIZE = 1000 SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze disable_ddl_transaction! def up - # We add temporary index, to make iteration over batches more performant. - # Conditional here is to avoid the need of doing that in a separate - # migration file to make this operation idempotent. - # unless index_exists_by_name?(:merge_request_diffs, TMP_INDEX) add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX) end - - diffs_with_files = MergeRequestDiff.where.not(state: ['without_files', 'empty']) - - # explain (analyze, buffers) example for the iteration: - # - # Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1) - # Index Cond: ((id >= 764586) AND (id < 835298)) - # Heap Fetches: 8 - # Buffers: shared hit=18188 - # Planning time: 0.752 ms - # Execution time: 12.430 ms - # - diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, scheduler_index| - ids = relation.pluck(:id).map { |id| [id] } - - BackgroundMigrationWorker.perform_async(SCHEDULER, [ids, scheduler_index]) - end - - # We remove temporary index, because it is not required during standard - # operations and runtime. - # - remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX) + # We keep the index since this will be used async. + # Ideally we should remove it in an upcoming release. + BackgroundMigrationWorker.perform_async(SCHEDULER) end def down diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 8fb2c334048..e2c90fce6b1 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -4,6 +4,20 @@ module Gitlab module BackgroundMigration class DeleteDiffFiles + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' + + belongs_to :merge_request + + include EachBatch + end + + class MergeRequestDiffFile < ActiveRecord::Base + self.table_name = 'merge_request_diff_files' + + include EachBatch + end + def perform(merge_request_diff_id) merge_request_diff = MergeRequestDiff.find_by(id: merge_request_diff_id) diff --git a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb index d944ed90fce..6442468836b 100644 --- a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb +++ b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb @@ -5,27 +5,56 @@ module Gitlab module BackgroundMigration class ScheduleDiffFilesDeletion - BATCH_SIZE = 5 + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' + + has_many :merge_request_diff_files + + include EachBatch + end + + ITERATION_BATCH = 1000 + DELETION_BATCH = 1000 # per minute MIGRATION = 'DeleteDiffFiles' - DELAY_INTERVAL = 10.minutes - def perform(diff_ids, scheduler_index) - relation = MergeRequestDiff.where(id: diff_ids) + # Considering query times and Redis writings, this should take around 2 + # hours to complete. + def perform + diffs_with_files = MergeRequestDiff.where.not(state: %w(without_files empty)) - job_batches = relation.pluck(:id).in_groups_of(BATCH_SIZE, false).map do |ids| - ids.map { |id| [MIGRATION, [id]] } - end + # This will be increased for each scheduled job + process_job_in = 1.second - job_batches.each_with_index do |jobs, inner_index| - # This will give some space between batches of workers. - interval = DELAY_INTERVAL * scheduler_index + inner_index.minutes + # explain (analyze, buffers) example for the iteration: + # + # Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1) + # Index Cond: ((id >= 764586) AND (id < 835298)) + # Heap Fetches: 8 + # Buffers: shared hit=18188 + # Planning time: 0.752 ms + # Execution time: 12.430 ms + # + diffs_with_files.reorder(nil).each_batch(of: ITERATION_BATCH) do |relation, scheduler_index| + relation.each do |diff| + BackgroundMigrationWorker.perform_in(process_job_in, MIGRATION, [diff.id]) - # A single `merge_request_diff` can be associated with way too many - # `merge_request_diff_files`. It's better to avoid scheduling big - # batches and go with 5 at a time. - # - BackgroundMigrationWorker.bulk_perform_in(interval, jobs) + diff_files_count = diff.merge_request_diff_files.reorder(nil).count + + # We should limit on 1000 diff files deletion per minute to avoid + # replication lag issues. + # + interval = (diff_files_count.to_f / DELETION_BATCH).minutes + process_job_in += interval + end end + + log_days_to_process_all_jobs(process_job_in) + end + + def log_days_to_process_all_jobs(seconds_to_process) + days_to_process_all_jobs = (seconds_to_process / 60 / 60 / 24).to_i + Rails.logger.info("Gitlab::BackgroundMigration::DeleteDiffFiles will take " \ + "#{days_to_process_all_jobs} days to be processed") end end end diff --git a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb index a251ab323d8..d8497315e2d 100644 --- a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb +++ b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb @@ -22,7 +22,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180 end it 'rollsback if something goes wrong' do - expect(MergeRequestDiffFile).to receive_message_chain(:where, :delete_all) + expect(described_class::MergeRequestDiffFile).to receive_message_chain(:where, :delete_all) .and_raise expect { described_class.new.perform(merge_request_diff.id) } diff --git a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb index a699571ca90..04f20b75531 100644 --- a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb +++ b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb @@ -3,41 +3,71 @@ require 'spec_helper' describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do describe '#perform' do let(:merge_request_diffs) { table(:merge_request_diffs) } + let(:diff_files) { table(:merge_request_diff_files) } let(:merge_requests) { table(:merge_requests) } let(:namespaces) { table(:namespaces) } let(:projects) { table(:projects) } + def diff_file_params(extra_params = {}) + extra_params.merge(new_file: false, + renamed_file: false, + too_large: false, + deleted_file: false, + a_mode: 'foo', + b_mode: 'bar', + new_path: 'xpto', + old_path: 'kux', + diff: 'content') + end + + def create_diffs(id:, files_number:, state: 'collected') + merge_request_diffs.create!(id: id, merge_request_id: 1, state: state) + files_number.times.to_a.each do |index| + params = diff_file_params(merge_request_diff_id: id, relative_order: index) + + diff_files.create!(params) + end + end + before do - stub_const("#{described_class.name}::BATCH_SIZE", 3) + stub_const("#{described_class.name}::DELETION_BATCH", 10) namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab') projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab') merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged') - - merge_request_diffs.create!(id: 1, merge_request_id: 1) - merge_request_diffs.create!(id: 2, merge_request_id: 1) - merge_request_diffs.create!(id: 3, merge_request_id: 1) - merge_request_diffs.create!(id: 4, merge_request_id: 1) - merge_request_diffs.create!(id: 5, merge_request_id: 1) end it 'correctly schedules diff file deletion workers' do Sidekiq::Testing.fake! do Timecop.freeze do - described_class.new.perform([1, 2, 3, 4, 5], 1) + create_diffs(id: 1, files_number: 25) + create_diffs(id: 2, files_number: 11) + create_diffs(id: 3, files_number: 4, state: 'without_files') + create_diffs(id: 4, files_number: 5, state: 'empty') + create_diffs(id: 5, files_number: 9) + + worker = described_class.new - [1, 2, 3].each do |id| - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, id) - end + expect(worker).to receive(:log_days_to_process_all_jobs).with(1.second + 2.5.minutes + 1.1.minutes + 0.9.minutes) - [4, 5].each do |id| - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(11.minutes, id) - end + worker.perform - expect(BackgroundMigrationWorker.jobs.size).to eq(5) + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second, 1) + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second + 2.5.minutes, 2) + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second + 2.5.minutes + 1.1.minutes, 5) + expect(BackgroundMigrationWorker.jobs.size).to eq(3) end end end end + + describe '#days_to_process_all_jobs' do + it 'logs how many days it will take to run all jobs' do + expect(Rails).to receive_message_chain(:logger, :info) + .with("Gitlab::BackgroundMigration::DeleteDiffFiles will take 3 days to be processed") + + described_class.new.log_days_to_process_all_jobs(3.days.seconds) + end + end end diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb index 43dbd7e8f53..3d19c6fcc10 100644 --- a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb +++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb @@ -2,45 +2,16 @@ require 'spec_helper' require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb') describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do - let(:merge_request_diffs) { table(:merge_request_diffs) } - let(:merge_requests) { table(:merge_requests) } - let(:namespaces) { table(:namespaces) } - let(:projects) { table(:projects) } - - before do - stub_const("#{described_class.name}::BATCH_SIZE", 4) - - namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab') - projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab') - - merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged') - - merge_request_diffs.create!(id: 1, merge_request_id: 1, state: 'collected') - merge_request_diffs.create!(id: 2, merge_request_id: 1, state: 'without_files') - merge_request_diffs.create!(id: 3, merge_request_id: 1, state: 'collected') - merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected') - merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty') - merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected') - merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected') - - merge_requests.update(1, latest_merge_request_diff_id: 6) - end - - it 'correctly schedules diff file deletion workers schedulers' do + it 'correctly schedules diff file deletion scheduler' do Sidekiq::Testing.fake! do - # First scheduling batch - expect(BackgroundMigrationWorker).to receive(:perform_async) - .with(described_class::SCHEDULER, [[[1], [3], [4], [6]], 1]) - .and_call_original - - # Second scheduling batch - expect(BackgroundMigrationWorker).to receive(:perform_async) - .with(described_class::SCHEDULER, [[[7]], 2]) + expect(BackgroundMigrationWorker) + .to receive(:perform_async) + .with(described_class::SCHEDULER) .and_call_original migrate! - expect(BackgroundMigrationWorker.jobs.size).to eq(2) + expect(BackgroundMigrationWorker.jobs.size).to eq(1) end end end |