summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOswaldo Ferreira <oswaldo@gitlab.com>2018-06-25 17:49:53 -0300
committerOswaldo Ferreira <oswaldo@gitlab.com>2018-06-26 20:40:04 -0300
commit1e8720a4ab3e7ff808f0e98177d6b1ec4c689c19 (patch)
tree7794271f22548f67f623ccf0efa69cf30dc25787
parent292cf668905a55e7b305c67b314cb039d2681a54 (diff)
downloadgitlab-ce-osw-delete-non-latest-mr-diff-files-migration.tar.gz
Schedule workers to delete non-latest diffs in post-migrationosw-delete-non-latest-mr-diff-files-migration
-rw-r--r--changelogs/unreleased/osw-delete-non-latest-mr-diff-files-migration.yml5
-rw-r--r--db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb70
-rw-r--r--lib/gitlab/background_migration/delete_diff_files.rb46
-rw-r--r--spec/lib/gitlab/background_migration/delete_diff_files_spec.rb69
-rw-r--r--spec/migrations/enqueue_delete_diff_files_workers_spec.rb48
5 files changed, 238 insertions, 0 deletions
diff --git a/changelogs/unreleased/osw-delete-non-latest-mr-diff-files-migration.yml b/changelogs/unreleased/osw-delete-non-latest-mr-diff-files-migration.yml
new file mode 100644
index 00000000000..e4cbae1a109
--- /dev/null
+++ b/changelogs/unreleased/osw-delete-non-latest-mr-diff-files-migration.yml
@@ -0,0 +1,5 @@
+---
+title: Schedule workers to delete non-latest diffs in post-migration
+merge_request:
+author:
+type: other
diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
new file mode 100644
index 00000000000..5fb3d545624
--- /dev/null
+++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
@@ -0,0 +1,70 @@
+class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ class MergeRequestDiff < ActiveRecord::Base
+ self.table_name = 'merge_request_diffs'
+
+ belongs_to :merge_request
+
+ include EachBatch
+ end
+
+ DOWNTIME = false
+ BATCH_SIZE = 1000
+ MIGRATION = 'DeleteDiffFiles'
+ DELAY_INTERVAL = 8.minutes
+ TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
+
+ disable_ddl_transaction!
+
+ def up
+ # We add temporary index, to make iteration over batches more performant.
+ # Conditional here is to avoid the need of doing that in a separate
+ # migration file to make this operation idempotent.
+ #
+ unless index_exists_by_name?(:merge_request_diffs, TMP_INDEX)
+ add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX)
+ end
+
+
+ diffs_with_files = MergeRequestDiff.where.not(state: ['without_files', 'empty'])
+
+ # explain (analyze, buffers) example for the iteration:
+ #
+ # Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1)
+ # Index Cond: ((id >= 764586) AND (id < 835298))
+ # Heap Fetches: 8
+ # Buffers: shared hit=18188
+ # Planning time: 0.752 ms
+ # Execution time: 12.430 ms
+ #
+ diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, outer_index|
+ ids = relation.pluck(:id)
+
+ ids.each_with_index do |diff_id, inner_index|
+ # This will give some space between batches of workers.
+ interval = DELAY_INTERVAL * outer_index + inner_index.minutes
+
+ # A single `merge_request_diff` can be associated with way too many
+ # `merge_request_diff_files`. It's better to avoid batching these and
+ # schedule one at a time.
+ #
+ # Considering roughly 6M jobs, this should take ~30 days to process all
+ # of them.
+ #
+ BackgroundMigrationWorker.perform_in(interval, MIGRATION, [diff_id])
+ end
+ end
+
+ # We remove temporary index, because it is not required during standard
+ # operations and runtime.
+ #
+ remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX)
+ end
+
+ def down
+ if index_exists_by_name?(:merge_request_diffs, TMP_INDEX)
+ remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX)
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb
new file mode 100644
index 00000000000..6d34dd00909
--- /dev/null
+++ b/lib/gitlab/background_migration/delete_diff_files.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+# rubocop:disable Metrics/AbcSize
+# rubocop:disable Style/Documentation
+
+module Gitlab
+ module BackgroundMigration
+ class DeleteDiffFiles
+ def perform(merge_request_diff_id)
+ merge_request_diff = MergeRequestDiff.find(merge_request_diff_id)
+
+ return unless should_delete_diff_files?(merge_request_diff)
+
+ MergeRequestDiff.transaction do
+ merge_request_diff.update_column(:state, 'without_files')
+
+ # explain (analyze, buffers) when deleting 453 diff files:
+ #
+ # Delete on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actual time=43.265..43.265 rows=0 loops=1)
+ # Buffers: shared hit=2043 read=259 dirtied=254
+ # -> Index Scan using index_merge_request_diff_files_on_mr_diff_id_and_order on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actu
+ # al time=0.466..26.317 rows=453 loops=1)
+ # Index Cond: (merge_request_diff_id = 463448)
+ # Buffers: shared hit=17 read=84
+ # Planning time: 0.107 ms
+ # Execution time: 43.287 ms
+ #
+ MergeRequestDiffFile.where(merge_request_diff_id: merge_request_diff.id).delete_all
+ end
+ end
+
+ private
+
+ def should_delete_diff_files?(merge_request_diff)
+ return false if merge_request_diff.without_files?
+
+ merge_request = merge_request_diff.merge_request
+
+ return false unless merge_request.merged?
+ return false unless merge_request.latest_merge_request_diff_id
+ return false if merge_request_diff.id == merge_request.latest_merge_request_diff_id
+
+ true
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
new file mode 100644
index 00000000000..a251ab323d8
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
@@ -0,0 +1,69 @@
+require 'spec_helper'
+
+describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180619121030 do
+ describe '#perform' do
+ context 'when diff files can be deleted' do
+ let(:merge_request) { create(:merge_request, :merged) }
+ let(:merge_request_diff) do
+ merge_request.create_merge_request_diff
+ merge_request.merge_request_diffs.first
+ end
+
+ it 'deletes all merge request diff files' do
+ expect { described_class.new.perform(merge_request_diff.id) }
+ .to change { merge_request_diff.merge_request_diff_files.count }
+ .from(20).to(0)
+ end
+
+ it 'updates state to without_files' do
+ expect { described_class.new.perform(merge_request_diff.id) }
+ .to change { merge_request_diff.reload.state }
+ .from('collected').to('without_files')
+ end
+
+ it 'rollsback if something goes wrong' do
+ expect(MergeRequestDiffFile).to receive_message_chain(:where, :delete_all)
+ .and_raise
+
+ expect { described_class.new.perform(merge_request_diff.id) }
+ .to raise_error
+
+ merge_request_diff.reload
+
+ expect(merge_request_diff.state).to eq('collected')
+ expect(merge_request_diff.merge_request_diff_files.count).to eq(20)
+ end
+ end
+
+ it 'deletes no merge request diff files when MR is not merged' do
+ merge_request = create(:merge_request, :opened)
+ merge_request.create_merge_request_diff
+ merge_request_diff = merge_request.merge_request_diffs.first
+
+ expect { described_class.new.perform(merge_request_diff.id) }
+ .not_to change { merge_request_diff.merge_request_diff_files.count }
+ .from(20)
+ end
+
+ it 'deletes no merge request diff files when diff is marked as "without_files"' do
+ merge_request = create(:merge_request, :merged)
+ merge_request.create_merge_request_diff
+ merge_request_diff = merge_request.merge_request_diffs.first
+
+ merge_request_diff.clean!
+
+ expect { described_class.new.perform(merge_request_diff.id) }
+ .not_to change { merge_request_diff.merge_request_diff_files.count }
+ .from(20)
+ end
+
+ it 'deletes no merge request diff files when diff is the latest' do
+ merge_request = create(:merge_request, :merged)
+ merge_request_diff = merge_request.merge_request_diff
+
+ expect { described_class.new.perform(merge_request_diff.id) }
+ .not_to change { merge_request_diff.merge_request_diff_files.count }
+ .from(20)
+ end
+ end
+end
diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
new file mode 100644
index 00000000000..686027822b8
--- /dev/null
+++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
@@ -0,0 +1,48 @@
+require 'spec_helper'
+require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb')
+
+describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
+ let(:merge_request_diffs) { table(:merge_request_diffs) }
+ let(:merge_requests) { table(:merge_requests) }
+ let(:namespaces) { table(:namespaces) }
+ let(:projects) { table(:projects) }
+
+ before do
+ stub_const("#{described_class.name}::BATCH_SIZE", 2)
+
+ namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
+ projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
+
+ merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged')
+
+ merge_request_diffs.create!(id: 1, merge_request_id: 1, state: 'collected')
+ merge_request_diffs.create!(id: 2, merge_request_id: 1, state: 'without_files')
+ merge_request_diffs.create!(id: 3, merge_request_id: 1, state: 'collected')
+ merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected')
+ merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty')
+ merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected')
+
+ merge_requests.update(1, latest_merge_request_diff_id: 6)
+ end
+
+ it 'correctly schedules diff file deletion workers' do
+ Sidekiq::Testing.fake! do
+ Timecop.freeze do
+ migrate!
+
+ # 1st batch
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(8.minutes, 1)
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(9.minutes, 3)
+ # 2nd batch
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(16.minutes, 4)
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(17.minutes, 6)
+ expect(BackgroundMigrationWorker.jobs.size).to eq(4)
+ end
+ end
+ end
+
+ it 'migrates the data' do
+ expect { migrate! }.to change { merge_request_diffs.where(state: 'without_files').count }
+ .from(1).to(4)
+ end
+end