summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOswaldo Ferreira <oswaldo@gitlab.com>2018-07-05 17:15:57 -0300
committerOswaldo Ferreira <oswaldo@gitlab.com>2018-07-10 09:43:58 -0300
commit19966e7095fd6357183afb8e009c7f8e78a05591 (patch)
treeacbdf99563af096d49e52fe6e5a0cdd326ef0c3e
parent4455904bc154f1a36cedeea574bb0f454f92a9e9 (diff)
downloadgitlab-ce-19966e7095fd6357183afb8e009c7f8e78a05591.tar.gz
Wait postgresql vacuum of deadtuples on merge_request_diff_files deletion
-rw-r--r--db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb15
-rw-r--r--lib/gitlab/background_migration/delete_diff_files.rb83
-rw-r--r--lib/gitlab/background_migration/schedule_diff_files_deletion.rb61
-rw-r--r--spec/lib/gitlab/background_migration/delete_diff_files_spec.rb31
-rw-r--r--spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb73
-rw-r--r--spec/migrations/enqueue_delete_diff_files_workers_spec.rb4
6 files changed, 87 insertions, 180 deletions
diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
index df30991737c..9e96aaf50a0 100644
--- a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
+++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
@@ -2,24 +2,15 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
- SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze
- TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
+ MIGRATION = 'DeleteDiffFiles'.freeze
disable_ddl_transaction!
def up
- unless index_exists_by_name?(:merge_request_diffs, TMP_INDEX)
- add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX)
- end
-
- # We keep the index since this will be used async.
- # Ideally we should remove it in an upcoming release.
- BackgroundMigrationWorker.perform_async(SCHEDULER)
+ BackgroundMigrationWorker.perform_async(MIGRATION)
end
def down
- if index_exists_by_name?(:merge_request_diffs, TMP_INDEX)
- remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX)
- end
+ # no-op
end
end
diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb
index e2c90fce6b1..36161285aac 100644
--- a/lib/gitlab/background_migration/delete_diff_files.rb
+++ b/lib/gitlab/background_migration/delete_diff_files.rb
@@ -8,6 +8,7 @@ module Gitlab
self.table_name = 'merge_request_diffs'
belongs_to :merge_request
+ has_many :merge_request_diff_files
include EachBatch
end
@@ -18,41 +19,73 @@ module Gitlab
include EachBatch
end
- def perform(merge_request_diff_id)
- merge_request_diff = MergeRequestDiff.find_by(id: merge_request_diff_id)
+ BATCH = 5_000
+ DEAD_TUPLES_THRESHOLD = 50_000
+ VACUUM_WAIT_TIME = 5.minutes
- return unless merge_request_diff
- return unless should_delete_diff_files?(merge_request_diff)
+ def perform
+ diffs_with_files = MergeRequestDiff
+ .joins(:merge_request)
+ .where("merge_requests.state = 'merged'")
+ .where('merge_requests.latest_merge_request_diff_id IS NOT NULL')
+ .where('merge_requests.latest_merge_request_diff_id != merge_request_diffs.id')
+ .where("merge_request_diffs.state NOT IN ('without_files', 'empty')")
- MergeRequestDiff.transaction do
- merge_request_diff.update_column(:state, 'without_files')
-
- # explain (analyze, buffers) when deleting 453 diff files:
- #
- # Delete on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actual time=43.265..43.265 rows=0 loops=1)
- # Buffers: shared hit=2043 read=259 dirtied=254
- # -> Index Scan using index_merge_request_diff_files_on_mr_diff_id_and_order on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actu
- # al time=0.466..26.317 rows=453 loops=1)
- # Index Cond: (merge_request_diff_id = 463448)
- # Buffers: shared hit=17 read=84
- # Planning time: 0.107 ms
- # Execution time: 43.287 ms
- #
- MergeRequestDiffFile.where(merge_request_diff_id: merge_request_diff.id).delete_all
+ diffs_with_files.each_batch(of: BATCH) do |batch, index|
+ wait_deadtuple_vacuum(index)
+ prune_diff_files(batch, index)
+ end
+ end
+
+ def wait_deadtuple_vacuum(index)
+ db_klass = Gitlab::Database
+
+ if defined?(db_klass) && db_klass.respond_to?(:postgresql?) && db_klass.postgresql?
+ while diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD
+ log_info("Dead tuple threshold hit on merge_request_diff_files (#{index}th batch): " \
+ "#{diff_files_dead_tuples_count}, waiting 5 minutes")
+ sleep VACUUM_WAIT_TIME
+ end
end
end
private
- def should_delete_diff_files?(merge_request_diff)
- return false if merge_request_diff.state == 'without_files'
+ def diff_files_dead_tuples_count
+ dead_tuple =
+ execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\
+ "WHERE relname = 'merge_request_diff_files'")[0]
+
+ if dead_tuple.present?
+ dead_tuple['n_dead_tup'].to_i
+ else
+ 0
+ end
+ end
- merge_request = merge_request_diff.merge_request
+ def prune_diff_files(batch, index)
+ diff_ids = batch.pluck(:id)
- return false unless merge_request.state == 'merged'
- return false if merge_request_diff.id == merge_request.latest_merge_request_diff_id
+ removed = 0
+ updated = 0
+
+ MergeRequestDiff.transaction do
+ updated = MergeRequestDiff.where(id: diff_ids)
+ .update_all(state: 'without_files')
+ removed = MergeRequestDiffFile.where(merge_request_diff_id: diff_ids)
+ .delete_all
+ end
+
+ log_info("#{index}th batch - Removed #{removed} merge_request_diff_files rows, "\
+ "updated #{updated} merge_request_diffs rows")
+ end
+
+ def execute_statement(sql)
+ ActiveRecord::Base.connection.execute(sql)
+ end
- true
+ def log_info(message)
+ Rails.logger.info("BackgroundMigration::DeleteDiffFiles - #{message}")
end
end
end
diff --git a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb
deleted file mode 100644
index 6442468836b..00000000000
--- a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb
+++ /dev/null
@@ -1,61 +0,0 @@
-# frozen_string_literal: true
-# rubocop:disable Metrics/AbcSize
-# rubocop:disable Style/Documentation
-
-module Gitlab
- module BackgroundMigration
- class ScheduleDiffFilesDeletion
- class MergeRequestDiff < ActiveRecord::Base
- self.table_name = 'merge_request_diffs'
-
- has_many :merge_request_diff_files
-
- include EachBatch
- end
-
- ITERATION_BATCH = 1000
- DELETION_BATCH = 1000 # per minute
- MIGRATION = 'DeleteDiffFiles'
-
- # Considering query times and Redis writings, this should take around 2
- # hours to complete.
- def perform
- diffs_with_files = MergeRequestDiff.where.not(state: %w(without_files empty))
-
- # This will be increased for each scheduled job
- process_job_in = 1.second
-
- # explain (analyze, buffers) example for the iteration:
- #
- # Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1)
- # Index Cond: ((id >= 764586) AND (id < 835298))
- # Heap Fetches: 8
- # Buffers: shared hit=18188
- # Planning time: 0.752 ms
- # Execution time: 12.430 ms
- #
- diffs_with_files.reorder(nil).each_batch(of: ITERATION_BATCH) do |relation, scheduler_index|
- relation.each do |diff|
- BackgroundMigrationWorker.perform_in(process_job_in, MIGRATION, [diff.id])
-
- diff_files_count = diff.merge_request_diff_files.reorder(nil).count
-
- # We should limit on 1000 diff files deletion per minute to avoid
- # replication lag issues.
- #
- interval = (diff_files_count.to_f / DELETION_BATCH).minutes
- process_job_in += interval
- end
- end
-
- log_days_to_process_all_jobs(process_job_in)
- end
-
- def log_days_to_process_all_jobs(seconds_to_process)
- days_to_process_all_jobs = (seconds_to_process / 60 / 60 / 24).to_i
- Rails.logger.info("Gitlab::BackgroundMigration::DeleteDiffFiles will take " \
- "#{days_to_process_all_jobs} days to be processed")
- end
- end
- end
-end
diff --git a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
index d8497315e2d..f128c9ecf1b 100644
--- a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
+++ b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
@@ -4,19 +4,19 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
describe '#perform' do
context 'when diff files can be deleted' do
let(:merge_request) { create(:merge_request, :merged) }
- let(:merge_request_diff) do
+ let!(:merge_request_diff) do
merge_request.create_merge_request_diff
merge_request.merge_request_diffs.first
end
it 'deletes all merge request diff files' do
- expect { described_class.new.perform(merge_request_diff.id) }
+ expect { described_class.new.perform }
.to change { merge_request_diff.merge_request_diff_files.count }
.from(20).to(0)
end
it 'updates state to without_files' do
- expect { described_class.new.perform(merge_request_diff.id) }
+ expect { described_class.new.perform }
.to change { merge_request_diff.reload.state }
.from('collected').to('without_files')
end
@@ -25,7 +25,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
expect(described_class::MergeRequestDiffFile).to receive_message_chain(:where, :delete_all)
.and_raise
- expect { described_class.new.perform(merge_request_diff.id) }
+ expect { described_class.new.perform }
.to raise_error
merge_request_diff.reload
@@ -40,7 +40,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
merge_request.create_merge_request_diff
merge_request_diff = merge_request.merge_request_diffs.first
- expect { described_class.new.perform(merge_request_diff.id) }
+ expect { described_class.new.perform }
.not_to change { merge_request_diff.merge_request_diff_files.count }
.from(20)
end
@@ -52,7 +52,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
merge_request_diff.clean!
- expect { described_class.new.perform(merge_request_diff.id) }
+ expect { described_class.new.perform }
.not_to change { merge_request_diff.merge_request_diff_files.count }
.from(20)
end
@@ -61,9 +61,26 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
merge_request = create(:merge_request, :merged)
merge_request_diff = merge_request.merge_request_diff
- expect { described_class.new.perform(merge_request_diff.id) }
+ expect { described_class.new.perform }
.not_to change { merge_request_diff.merge_request_diff_files.count }
.from(20)
end
end
+
+ describe '#wait_deadtuple_vacuum' do
+ it 'sleeps process for VACUUM_WAIT_TIME when hitting DEAD_TUPLES_THRESHOLD', :postgresql do
+ worker = described_class.new
+ threshold_query_result = [{ "n_dead_tup" => described_class::DEAD_TUPLES_THRESHOLD.to_s }]
+ normal_query_result = [{ "n_dead_tup" => '3' }]
+
+ allow(worker)
+ .to receive(:execute_statement)
+ .with(/SELECT n_dead_tup */)
+ .and_return(threshold_query_result, normal_query_result)
+
+ expect(worker).to receive(:sleep).with(described_class::VACUUM_WAIT_TIME).once
+
+ worker.wait_deadtuple_vacuum(1)
+ end
+ end
end
diff --git a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
deleted file mode 100644
index 04f20b75531..00000000000
--- a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
+++ /dev/null
@@ -1,73 +0,0 @@
-require 'spec_helper'
-
-describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do
- describe '#perform' do
- let(:merge_request_diffs) { table(:merge_request_diffs) }
- let(:diff_files) { table(:merge_request_diff_files) }
- let(:merge_requests) { table(:merge_requests) }
- let(:namespaces) { table(:namespaces) }
- let(:projects) { table(:projects) }
-
- def diff_file_params(extra_params = {})
- extra_params.merge(new_file: false,
- renamed_file: false,
- too_large: false,
- deleted_file: false,
- a_mode: 'foo',
- b_mode: 'bar',
- new_path: 'xpto',
- old_path: 'kux',
- diff: 'content')
- end
-
- def create_diffs(id:, files_number:, state: 'collected')
- merge_request_diffs.create!(id: id, merge_request_id: 1, state: state)
- files_number.times.to_a.each do |index|
- params = diff_file_params(merge_request_diff_id: id, relative_order: index)
-
- diff_files.create!(params)
- end
- end
-
- before do
- stub_const("#{described_class.name}::DELETION_BATCH", 10)
-
- namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
- projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
-
- merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged')
- end
-
- it 'correctly schedules diff file deletion workers' do
- Sidekiq::Testing.fake! do
- Timecop.freeze do
- create_diffs(id: 1, files_number: 25)
- create_diffs(id: 2, files_number: 11)
- create_diffs(id: 3, files_number: 4, state: 'without_files')
- create_diffs(id: 4, files_number: 5, state: 'empty')
- create_diffs(id: 5, files_number: 9)
-
- worker = described_class.new
-
- expect(worker).to receive(:log_days_to_process_all_jobs).with(1.second + 2.5.minutes + 1.1.minutes + 0.9.minutes)
-
- worker.perform
-
- expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second, 1)
- expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second + 2.5.minutes, 2)
- expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second + 2.5.minutes + 1.1.minutes, 5)
- expect(BackgroundMigrationWorker.jobs.size).to eq(3)
- end
- end
- end
- end
-
- describe '#days_to_process_all_jobs' do
- it 'logs how many days it will take to run all jobs' do
- expect(Rails).to receive_message_chain(:logger, :info)
- .with("Gitlab::BackgroundMigration::DeleteDiffFiles will take 3 days to be processed")
-
- described_class.new.log_days_to_process_all_jobs(3.days.seconds)
- end
- end
-end
diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
index 3d19c6fcc10..803830d81cf 100644
--- a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
+++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
@@ -2,11 +2,11 @@ require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb')
describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
- it 'correctly schedules diff file deletion scheduler' do
+ it 'correctly schedules diff files deletion' do
Sidekiq::Testing.fake! do
expect(BackgroundMigrationWorker)
.to receive(:perform_async)
- .with(described_class::SCHEDULER)
+ .with(described_class::MIGRATION)
.and_call_original
migrate!