summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOswaldo Ferreira <oswaldo@gitlab.com>2018-07-09 18:34:30 -0300
committerOswaldo Ferreira <oswaldo@gitlab.com>2018-07-10 10:25:48 -0300
commit659aeba43c498b7a45fb1495371ded7436b238c9 (patch)
tree7f040aa5ec7703330627334b6532af714becc530
parentc15f836c3a38e7c512a1851bc09440da90af8ac6 (diff)
downloadgitlab-ce-659aeba43c498b7a45fb1495371ded7436b238c9.tar.gz
Use schedulers and delete diff files upon deadtuples check
-rw-r--r--db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb4
-rw-r--r--lib/gitlab/background_migration/delete_diff_files.rb48
-rw-r--r--lib/gitlab/background_migration/schedule_diff_files_deletion.rb44
-rw-r--r--spec/lib/gitlab/background_migration/delete_diff_files_spec.rb52
-rw-r--r--spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb43
-rw-r--r--spec/migrations/enqueue_delete_diff_files_workers_spec.rb4
6 files changed, 119 insertions, 76 deletions
diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
index 100bd2a8f4e..c4d2f5f61a0 100644
--- a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
+++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
@@ -2,7 +2,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
- MIGRATION = 'DeleteDiffFiles'.freeze
+ SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze
TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
disable_ddl_transaction!
@@ -12,7 +12,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX)
end
- BackgroundMigrationWorker.perform_async(MIGRATION)
+ BackgroundMigrationWorker.perform_async(SCHEDULER)
# We don't remove the index since it's going to be used on DeleteDiffFiles
# worker. We should remove it in an upcoming release.
diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb
index 9824d0a0c37..664ead1af44 100644
--- a/lib/gitlab/background_migration/delete_diff_files.rb
+++ b/lib/gitlab/background_migration/delete_diff_files.rb
@@ -15,49 +15,37 @@ module Gitlab
self.table_name = 'merge_request_diff_files'
end
- DIFF_ROWS_LIMIT = 5_000
DEAD_TUPLES_THRESHOLD = 50_000
VACUUM_WAIT_TIME = 5.minutes
- def perform
- rescheduling do
- prune_diff_files(diffs_collection.limit(DIFF_ROWS_LIMIT))
- end
- end
-
- def should_wait_deadtuple_vacuum?
- return false unless Gitlab::Database.postgresql?
-
- diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD
- end
-
- private
+ def perform(ids)
+ @ids = ids
- def rescheduling(&block)
# We should reschedule until deadtuples get in a desirable
- # state (e.g. < 50_000). That may take move than one reschedule.
+ # state (e.g. < 50_000). That may take more than one reschedule.
#
if should_wait_deadtuple_vacuum?
reschedule
return
end
- block.call
+ prune_diff_files
+ end
+
+ def should_wait_deadtuple_vacuum?
+ return false unless Gitlab::Database.postgresql?
- reschedule if diffs_collection.limit(1).count > 0
+ diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD
end
+ private
+
def reschedule
- BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize)
+ BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize, [@ids])
end
def diffs_collection
- MergeRequestDiff
- .joins(:merge_request)
- .where("merge_requests.state = 'merged'")
- .where('merge_requests.latest_merge_request_diff_id IS NOT NULL')
- .where('merge_requests.latest_merge_request_diff_id != merge_request_diffs.id')
- .where("merge_request_diffs.state NOT IN ('without_files', 'empty')")
+ MergeRequestDiff.where(id: @ids)
end
def diff_files_dead_tuples_count
@@ -68,17 +56,13 @@ module Gitlab
dead_tuple&.fetch('n_dead_tup', 0).to_i
end
- def prune_diff_files(batch)
- diff_ids = batch.pluck(:id)
-
+ def prune_diff_files
removed = 0
updated = 0
MergeRequestDiff.transaction do
- updated = MergeRequestDiff.where(id: diff_ids)
- .update_all(state: 'without_files')
- removed = MergeRequestDiffFile.where(merge_request_diff_id: diff_ids)
- .delete_all
+ updated = diffs_collection.update_all(state: 'without_files')
+ removed = MergeRequestDiffFile.where(merge_request_diff_id: @ids).delete_all
end
log_info("Removed #{removed} merge_request_diff_files rows, "\
diff --git a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb
new file mode 100644
index 00000000000..609cf19187c
--- /dev/null
+++ b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+# rubocop:disable Style/Documentation
+
+module Gitlab
+ module BackgroundMigration
+ class ScheduleDiffFilesDeletion
+ class MergeRequestDiff < ActiveRecord::Base
+ self.table_name = 'merge_request_diffs'
+
+ belongs_to :merge_request
+
+ include EachBatch
+ end
+
+ DIFF_BATCH_SIZE = 5_000
+ INTERVAL = 5.minutes
+ MIGRATION = 'DeleteDiffFiles'
+
+ def perform
+ diffs = MergeRequestDiff
+ .from("(#{diffs_collection.to_sql}) merge_request_diffs")
+ .where('merge_request_diffs.id != merge_request_diffs.latest_merge_request_diff_id')
+ .select(:id)
+
+ diffs.each_batch(of: DIFF_BATCH_SIZE) do |relation, index|
+ ids = relation.pluck(:id)
+
+ BackgroundMigrationWorker.perform_in(index * INTERVAL, MIGRATION, [ids])
+ end
+ end
+
+ private
+
+ def diffs_collection
+ MergeRequestDiff
+ .joins(:merge_request)
+ .where("merge_requests.state = 'merged'")
+ .where('merge_requests.latest_merge_request_diff_id IS NOT NULL')
+ .where("merge_request_diffs.state NOT IN ('without_files', 'empty')")
+ .select('merge_requests.latest_merge_request_diff_id, merge_request_diffs.id')
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
index 247e0786dd6..64c994a268f 100644
--- a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
+++ b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
@@ -9,14 +9,18 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
merge_request.merge_request_diffs.first
end
+ let(:perform) do
+ described_class.new.perform(MergeRequestDiff.pluck(:id))
+ end
+
it 'deletes all merge request diff files' do
- expect { described_class.new.perform }
+ expect { perform }
.to change { merge_request_diff.merge_request_diff_files.count }
.from(20).to(0)
end
it 'updates state to without_files' do
- expect { described_class.new.perform }
+ expect { perform }
.to change { merge_request_diff.reload.state }
.from('collected').to('without_files')
end
@@ -25,7 +29,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
expect(described_class::MergeRequestDiffFile).to receive_message_chain(:where, :delete_all)
.and_raise
- expect { described_class.new.perform }
+ expect { perform }
.to raise_error
merge_request_diff.reload
@@ -35,50 +39,18 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
end
end
- it 'deletes no merge request diff files when MR is not merged' do
- merge_request = create(:merge_request, :opened)
- merge_request.create_merge_request_diff
- merge_request_diff = merge_request.merge_request_diffs.first
-
- expect { described_class.new.perform }
- .not_to change { merge_request_diff.merge_request_diff_files.count }
- .from(20)
- end
-
- it 'deletes no merge request diff files when diff is marked as "without_files"' do
- merge_request = create(:merge_request, :merged)
- merge_request.create_merge_request_diff
- merge_request_diff = merge_request.merge_request_diffs.first
-
- merge_request_diff.clean!
-
- expect { described_class.new.perform }
- .not_to change { merge_request_diff.merge_request_diff_files.count }
- .from(20)
- end
-
- it 'deletes no merge request diff files when diff is the latest' do
+ it 'reschedules itself when should_wait_deadtuple_vacuum' do
merge_request = create(:merge_request, :merged)
- merge_request_diff = merge_request.merge_request_diff
-
- expect { described_class.new.perform }
- .not_to change { merge_request_diff.merge_request_diff_files.count }
- .from(20)
- end
+ first_diff = merge_request.merge_request_diff
+ second_diff = merge_request.create_merge_request_diff
- it 'reschedules itself when should_wait_deadtuple_vacuum' do
Sidekiq::Testing.fake! do
worker = described_class.new
-
allow(worker).to receive(:should_wait_deadtuple_vacuum?) { true }
- expect(BackgroundMigrationWorker)
- .to receive(:perform_in)
- .with(described_class::VACUUM_WAIT_TIME, 'DeleteDiffFiles')
- .and_call_original
-
- worker.perform
+ worker.perform([first_diff.id, second_diff.id])
+ expect(described_class.name.demodulize).to be_scheduled_delayed_migration(5.minutes, [first_diff.id, second_diff.id])
expect(BackgroundMigrationWorker.jobs.size).to eq(1)
end
end
diff --git a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
new file mode 100644
index 00000000000..fb5093b0bd1
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
@@ -0,0 +1,43 @@
+require 'spec_helper'
+
+describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do
+ describe '#perform' do
+ let(:merge_request_diffs) { table(:merge_request_diffs) }
+ let(:merge_requests) { table(:merge_requests) }
+ let(:namespaces) { table(:namespaces) }
+ let(:projects) { table(:projects) }
+
+ before do
+ stub_const("#{described_class.name}::DIFF_BATCH_SIZE", 3)
+
+ namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
+ projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
+
+ merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged')
+
+ merge_request_diffs.create!(id: 1, merge_request_id: 1, state: 'collected')
+ merge_request_diffs.create!(id: 2, merge_request_id: 1, state: 'empty')
+ merge_request_diffs.create!(id: 3, merge_request_id: 1, state: 'without_files')
+ merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected')
+ merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'collected')
+ merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected')
+ merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected')
+
+ merge_requests.update(1, latest_merge_request_diff_id: 7)
+ end
+
+ it 'correctly schedules diff file deletion workers' do
+ Sidekiq::Testing.fake! do
+ Timecop.freeze do
+ described_class.new.perform
+
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(5.minutes, [1, 4, 5])
+
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, [6])
+
+ expect(BackgroundMigrationWorker.jobs.size).to eq(2)
+ end
+ end
+ end
+ end
+end
diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
index 803830d81cf..6bae870920c 100644
--- a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
+++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
@@ -2,11 +2,11 @@ require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb')
describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
- it 'correctly schedules diff files deletion' do
+ it 'correctly schedules diff files deletion schedulers' do
Sidekiq::Testing.fake! do
expect(BackgroundMigrationWorker)
.to receive(:perform_async)
- .with(described_class::MIGRATION)
+ .with(described_class::SCHEDULER)
.and_call_original
migrate!