From 19966e7095fd6357183afb8e009c7f8e78a05591 Mon Sep 17 00:00:00 2001 From: Oswaldo Ferreira Date: Thu, 5 Jul 2018 17:15:57 -0300 Subject: Wait postgresql vacuum of deadtuples on merge_request_diff_files deletion --- ...0619121030_enqueue_delete_diff_files_workers.rb | 15 +--- .../background_migration/delete_diff_files.rb | 83 +++++++++++++++------- .../schedule_diff_files_deletion.rb | 61 ---------------- .../background_migration/delete_diff_files_spec.rb | 31 ++++++-- .../schedule_diff_files_deletion_spec.rb | 73 ------------------- .../enqueue_delete_diff_files_workers_spec.rb | 4 +- 6 files changed, 87 insertions(+), 180 deletions(-) delete mode 100644 lib/gitlab/background_migration/schedule_diff_files_deletion.rb delete mode 100644 spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb index df30991737c..9e96aaf50a0 100644 --- a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb +++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb @@ -2,24 +2,15 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration include Gitlab::Database::MigrationHelpers DOWNTIME = false - SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze - TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze + MIGRATION = 'DeleteDiffFiles'.freeze disable_ddl_transaction! def up - unless index_exists_by_name?(:merge_request_diffs, TMP_INDEX) - add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX) - end - - # We keep the index since this will be used async. - # Ideally we should remove it in an upcoming release. - BackgroundMigrationWorker.perform_async(SCHEDULER) + BackgroundMigrationWorker.perform_async(MIGRATION) end def down - if index_exists_by_name?(:merge_request_diffs, TMP_INDEX) - remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX) - end + # no-op end end diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index e2c90fce6b1..36161285aac 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -8,6 +8,7 @@ module Gitlab self.table_name = 'merge_request_diffs' belongs_to :merge_request + has_many :merge_request_diff_files include EachBatch end @@ -18,41 +19,73 @@ module Gitlab include EachBatch end - def perform(merge_request_diff_id) - merge_request_diff = MergeRequestDiff.find_by(id: merge_request_diff_id) + BATCH = 5_000 + DEAD_TUPLES_THRESHOLD = 50_000 + VACUUM_WAIT_TIME = 5.minutes - return unless merge_request_diff - return unless should_delete_diff_files?(merge_request_diff) + def perform + diffs_with_files = MergeRequestDiff + .joins(:merge_request) + .where("merge_requests.state = 'merged'") + .where('merge_requests.latest_merge_request_diff_id IS NOT NULL') + .where('merge_requests.latest_merge_request_diff_id != merge_request_diffs.id') + .where("merge_request_diffs.state NOT IN ('without_files', 'empty')") - MergeRequestDiff.transaction do - merge_request_diff.update_column(:state, 'without_files') - - # explain (analyze, buffers) when deleting 453 diff files: - # - # Delete on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actual time=43.265..43.265 rows=0 loops=1) - # Buffers: shared hit=2043 read=259 dirtied=254 - # -> Index Scan using index_merge_request_diff_files_on_mr_diff_id_and_order on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actu - # al time=0.466..26.317 rows=453 loops=1) - # Index Cond: (merge_request_diff_id = 463448) - # Buffers: shared hit=17 read=84 - # Planning time: 0.107 ms - # Execution time: 43.287 ms - # - MergeRequestDiffFile.where(merge_request_diff_id: merge_request_diff.id).delete_all + diffs_with_files.each_batch(of: BATCH) do |batch, index| + wait_deadtuple_vacuum(index) + prune_diff_files(batch, index) + end + end + + def wait_deadtuple_vacuum(index) + db_klass = Gitlab::Database + + if defined?(db_klass) && db_klass.respond_to?(:postgresql?) && db_klass.postgresql? + while diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD + log_info("Dead tuple threshold hit on merge_request_diff_files (#{index}th batch): " \ + "#{diff_files_dead_tuples_count}, waiting 5 minutes") + sleep VACUUM_WAIT_TIME + end end end private - def should_delete_diff_files?(merge_request_diff) - return false if merge_request_diff.state == 'without_files' + def diff_files_dead_tuples_count + dead_tuple = + execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ + "WHERE relname = 'merge_request_diff_files'")[0] + + if dead_tuple.present? + dead_tuple['n_dead_tup'].to_i + else + 0 + end + end - merge_request = merge_request_diff.merge_request + def prune_diff_files(batch, index) + diff_ids = batch.pluck(:id) - return false unless merge_request.state == 'merged' - return false if merge_request_diff.id == merge_request.latest_merge_request_diff_id + removed = 0 + updated = 0 + + MergeRequestDiff.transaction do + updated = MergeRequestDiff.where(id: diff_ids) + .update_all(state: 'without_files') + removed = MergeRequestDiffFile.where(merge_request_diff_id: diff_ids) + .delete_all + end + + log_info("#{index}th batch - Removed #{removed} merge_request_diff_files rows, "\ + "updated #{updated} merge_request_diffs rows") + end + + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end - true + def log_info(message) + Rails.logger.info("BackgroundMigration::DeleteDiffFiles - #{message}") end end end diff --git a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb deleted file mode 100644 index 6442468836b..00000000000 --- a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb +++ /dev/null @@ -1,61 +0,0 @@ -# frozen_string_literal: true -# rubocop:disable Metrics/AbcSize -# rubocop:disable Style/Documentation - -module Gitlab - module BackgroundMigration - class ScheduleDiffFilesDeletion - class MergeRequestDiff < ActiveRecord::Base - self.table_name = 'merge_request_diffs' - - has_many :merge_request_diff_files - - include EachBatch - end - - ITERATION_BATCH = 1000 - DELETION_BATCH = 1000 # per minute - MIGRATION = 'DeleteDiffFiles' - - # Considering query times and Redis writings, this should take around 2 - # hours to complete. - def perform - diffs_with_files = MergeRequestDiff.where.not(state: %w(without_files empty)) - - # This will be increased for each scheduled job - process_job_in = 1.second - - # explain (analyze, buffers) example for the iteration: - # - # Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1) - # Index Cond: ((id >= 764586) AND (id < 835298)) - # Heap Fetches: 8 - # Buffers: shared hit=18188 - # Planning time: 0.752 ms - # Execution time: 12.430 ms - # - diffs_with_files.reorder(nil).each_batch(of: ITERATION_BATCH) do |relation, scheduler_index| - relation.each do |diff| - BackgroundMigrationWorker.perform_in(process_job_in, MIGRATION, [diff.id]) - - diff_files_count = diff.merge_request_diff_files.reorder(nil).count - - # We should limit on 1000 diff files deletion per minute to avoid - # replication lag issues. - # - interval = (diff_files_count.to_f / DELETION_BATCH).minutes - process_job_in += interval - end - end - - log_days_to_process_all_jobs(process_job_in) - end - - def log_days_to_process_all_jobs(seconds_to_process) - days_to_process_all_jobs = (seconds_to_process / 60 / 60 / 24).to_i - Rails.logger.info("Gitlab::BackgroundMigration::DeleteDiffFiles will take " \ - "#{days_to_process_all_jobs} days to be processed") - end - end - end -end diff --git a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb index d8497315e2d..f128c9ecf1b 100644 --- a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb +++ b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb @@ -4,19 +4,19 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180 describe '#perform' do context 'when diff files can be deleted' do let(:merge_request) { create(:merge_request, :merged) } - let(:merge_request_diff) do + let!(:merge_request_diff) do merge_request.create_merge_request_diff merge_request.merge_request_diffs.first end it 'deletes all merge request diff files' do - expect { described_class.new.perform(merge_request_diff.id) } + expect { described_class.new.perform } .to change { merge_request_diff.merge_request_diff_files.count } .from(20).to(0) end it 'updates state to without_files' do - expect { described_class.new.perform(merge_request_diff.id) } + expect { described_class.new.perform } .to change { merge_request_diff.reload.state } .from('collected').to('without_files') end @@ -25,7 +25,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180 expect(described_class::MergeRequestDiffFile).to receive_message_chain(:where, :delete_all) .and_raise - expect { described_class.new.perform(merge_request_diff.id) } + expect { described_class.new.perform } .to raise_error merge_request_diff.reload @@ -40,7 +40,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180 merge_request.create_merge_request_diff merge_request_diff = merge_request.merge_request_diffs.first - expect { described_class.new.perform(merge_request_diff.id) } + expect { described_class.new.perform } .not_to change { merge_request_diff.merge_request_diff_files.count } .from(20) end @@ -52,7 +52,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180 merge_request_diff.clean! - expect { described_class.new.perform(merge_request_diff.id) } + expect { described_class.new.perform } .not_to change { merge_request_diff.merge_request_diff_files.count } .from(20) end @@ -61,9 +61,26 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180 merge_request = create(:merge_request, :merged) merge_request_diff = merge_request.merge_request_diff - expect { described_class.new.perform(merge_request_diff.id) } + expect { described_class.new.perform } .not_to change { merge_request_diff.merge_request_diff_files.count } .from(20) end end + + describe '#wait_deadtuple_vacuum' do + it 'sleeps process for VACUUM_WAIT_TIME when hitting DEAD_TUPLES_THRESHOLD', :postgresql do + worker = described_class.new + threshold_query_result = [{ "n_dead_tup" => described_class::DEAD_TUPLES_THRESHOLD.to_s }] + normal_query_result = [{ "n_dead_tup" => '3' }] + + allow(worker) + .to receive(:execute_statement) + .with(/SELECT n_dead_tup */) + .and_return(threshold_query_result, normal_query_result) + + expect(worker).to receive(:sleep).with(described_class::VACUUM_WAIT_TIME).once + + worker.wait_deadtuple_vacuum(1) + end + end end diff --git a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb deleted file mode 100644 index 04f20b75531..00000000000 --- a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb +++ /dev/null @@ -1,73 +0,0 @@ -require 'spec_helper' - -describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do - describe '#perform' do - let(:merge_request_diffs) { table(:merge_request_diffs) } - let(:diff_files) { table(:merge_request_diff_files) } - let(:merge_requests) { table(:merge_requests) } - let(:namespaces) { table(:namespaces) } - let(:projects) { table(:projects) } - - def diff_file_params(extra_params = {}) - extra_params.merge(new_file: false, - renamed_file: false, - too_large: false, - deleted_file: false, - a_mode: 'foo', - b_mode: 'bar', - new_path: 'xpto', - old_path: 'kux', - diff: 'content') - end - - def create_diffs(id:, files_number:, state: 'collected') - merge_request_diffs.create!(id: id, merge_request_id: 1, state: state) - files_number.times.to_a.each do |index| - params = diff_file_params(merge_request_diff_id: id, relative_order: index) - - diff_files.create!(params) - end - end - - before do - stub_const("#{described_class.name}::DELETION_BATCH", 10) - - namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab') - projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab') - - merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged') - end - - it 'correctly schedules diff file deletion workers' do - Sidekiq::Testing.fake! do - Timecop.freeze do - create_diffs(id: 1, files_number: 25) - create_diffs(id: 2, files_number: 11) - create_diffs(id: 3, files_number: 4, state: 'without_files') - create_diffs(id: 4, files_number: 5, state: 'empty') - create_diffs(id: 5, files_number: 9) - - worker = described_class.new - - expect(worker).to receive(:log_days_to_process_all_jobs).with(1.second + 2.5.minutes + 1.1.minutes + 0.9.minutes) - - worker.perform - - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second, 1) - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second + 2.5.minutes, 2) - expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second + 2.5.minutes + 1.1.minutes, 5) - expect(BackgroundMigrationWorker.jobs.size).to eq(3) - end - end - end - end - - describe '#days_to_process_all_jobs' do - it 'logs how many days it will take to run all jobs' do - expect(Rails).to receive_message_chain(:logger, :info) - .with("Gitlab::BackgroundMigration::DeleteDiffFiles will take 3 days to be processed") - - described_class.new.log_days_to_process_all_jobs(3.days.seconds) - end - end -end diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb index 3d19c6fcc10..803830d81cf 100644 --- a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb +++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb @@ -2,11 +2,11 @@ require 'spec_helper' require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb') describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do - it 'correctly schedules diff file deletion scheduler' do + it 'correctly schedules diff files deletion' do Sidekiq::Testing.fake! do expect(BackgroundMigrationWorker) .to receive(:perform_async) - .with(described_class::SCHEDULER) + .with(described_class::MIGRATION) .and_call_original migrate! -- cgit v1.2.1