summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOswaldo Ferreira <oswaldo@gitlab.com>2018-07-04 16:06:30 -0300
committerOswaldo Ferreira <oswaldo@gitlab.com>2018-07-10 09:43:58 -0300
commit4455904bc154f1a36cedeea574bb0f454f92a9e9 (patch)
tree7484b5e155ffb386cf87684f76d3a8bfe98c4c8d
parente66535e8407ccb8dd229fefdce817902a364f58a (diff)
downloadgitlab-ce-4455904bc154f1a36cedeea574bb0f454f92a9e9.tar.gz
Add 1000 files per minute deletion ratio on scheduler
-rw-r--r--db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb38
-rw-r--r--lib/gitlab/background_migration/delete_diff_files.rb14
-rw-r--r--lib/gitlab/background_migration/schedule_diff_files_deletion.rb59
-rw-r--r--spec/lib/gitlab/background_migration/delete_diff_files_spec.rb2
-rw-r--r--spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb60
-rw-r--r--spec/migrations/enqueue_delete_diff_files_workers_spec.rb39
6 files changed, 112 insertions, 100 deletions
diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
index 92950c1eec1..df30991737c 100644
--- a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
+++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
@@ -1,52 +1,20 @@
class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
- class MergeRequestDiff < ActiveRecord::Base
- self.table_name = 'merge_request_diffs'
-
- belongs_to :merge_request
-
- include EachBatch
- end
-
DOWNTIME = false
- BATCH_SIZE = 1000
SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze
TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
disable_ddl_transaction!
def up
- # We add temporary index, to make iteration over batches more performant.
- # Conditional here is to avoid the need of doing that in a separate
- # migration file to make this operation idempotent.
- #
unless index_exists_by_name?(:merge_request_diffs, TMP_INDEX)
add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX)
end
-
- diffs_with_files = MergeRequestDiff.where.not(state: ['without_files', 'empty'])
-
- # explain (analyze, buffers) example for the iteration:
- #
- # Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1)
- # Index Cond: ((id >= 764586) AND (id < 835298))
- # Heap Fetches: 8
- # Buffers: shared hit=18188
- # Planning time: 0.752 ms
- # Execution time: 12.430 ms
- #
- diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, scheduler_index|
- ids = relation.pluck(:id).map { |id| [id] }
-
- BackgroundMigrationWorker.perform_async(SCHEDULER, [ids, scheduler_index])
- end
-
- # We remove temporary index, because it is not required during standard
- # operations and runtime.
- #
- remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX)
+ # We keep the index since this will be used async.
+ # Ideally we should remove it in an upcoming release.
+ BackgroundMigrationWorker.perform_async(SCHEDULER)
end
def down
diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb
index 8fb2c334048..e2c90fce6b1 100644
--- a/lib/gitlab/background_migration/delete_diff_files.rb
+++ b/lib/gitlab/background_migration/delete_diff_files.rb
@@ -4,6 +4,20 @@
module Gitlab
module BackgroundMigration
class DeleteDiffFiles
+ class MergeRequestDiff < ActiveRecord::Base
+ self.table_name = 'merge_request_diffs'
+
+ belongs_to :merge_request
+
+ include EachBatch
+ end
+
+ class MergeRequestDiffFile < ActiveRecord::Base
+ self.table_name = 'merge_request_diff_files'
+
+ include EachBatch
+ end
+
def perform(merge_request_diff_id)
merge_request_diff = MergeRequestDiff.find_by(id: merge_request_diff_id)
diff --git a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb
index d944ed90fce..6442468836b 100644
--- a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb
+++ b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb
@@ -5,27 +5,56 @@
module Gitlab
module BackgroundMigration
class ScheduleDiffFilesDeletion
- BATCH_SIZE = 5
+ class MergeRequestDiff < ActiveRecord::Base
+ self.table_name = 'merge_request_diffs'
+
+ has_many :merge_request_diff_files
+
+ include EachBatch
+ end
+
+ ITERATION_BATCH = 1000
+ DELETION_BATCH = 1000 # per minute
MIGRATION = 'DeleteDiffFiles'
- DELAY_INTERVAL = 10.minutes
- def perform(diff_ids, scheduler_index)
- relation = MergeRequestDiff.where(id: diff_ids)
+ # Considering query times and Redis writings, this should take around 2
+ # hours to complete.
+ def perform
+ diffs_with_files = MergeRequestDiff.where.not(state: %w(without_files empty))
- job_batches = relation.pluck(:id).in_groups_of(BATCH_SIZE, false).map do |ids|
- ids.map { |id| [MIGRATION, [id]] }
- end
+ # This will be increased for each scheduled job
+ process_job_in = 1.second
- job_batches.each_with_index do |jobs, inner_index|
- # This will give some space between batches of workers.
- interval = DELAY_INTERVAL * scheduler_index + inner_index.minutes
+ # explain (analyze, buffers) example for the iteration:
+ #
+ # Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1)
+ # Index Cond: ((id >= 764586) AND (id < 835298))
+ # Heap Fetches: 8
+ # Buffers: shared hit=18188
+ # Planning time: 0.752 ms
+ # Execution time: 12.430 ms
+ #
+ diffs_with_files.reorder(nil).each_batch(of: ITERATION_BATCH) do |relation, scheduler_index|
+ relation.each do |diff|
+ BackgroundMigrationWorker.perform_in(process_job_in, MIGRATION, [diff.id])
- # A single `merge_request_diff` can be associated with way too many
- # `merge_request_diff_files`. It's better to avoid scheduling big
- # batches and go with 5 at a time.
- #
- BackgroundMigrationWorker.bulk_perform_in(interval, jobs)
+ diff_files_count = diff.merge_request_diff_files.reorder(nil).count
+
+ # We should limit on 1000 diff files deletion per minute to avoid
+ # replication lag issues.
+ #
+ interval = (diff_files_count.to_f / DELETION_BATCH).minutes
+ process_job_in += interval
+ end
end
+
+ log_days_to_process_all_jobs(process_job_in)
+ end
+
+ def log_days_to_process_all_jobs(seconds_to_process)
+ days_to_process_all_jobs = (seconds_to_process / 60 / 60 / 24).to_i
+ Rails.logger.info("Gitlab::BackgroundMigration::DeleteDiffFiles will take " \
+ "#{days_to_process_all_jobs} days to be processed")
end
end
end
diff --git a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
index a251ab323d8..d8497315e2d 100644
--- a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
+++ b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
@@ -22,7 +22,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
end
it 'rollsback if something goes wrong' do
- expect(MergeRequestDiffFile).to receive_message_chain(:where, :delete_all)
+ expect(described_class::MergeRequestDiffFile).to receive_message_chain(:where, :delete_all)
.and_raise
expect { described_class.new.perform(merge_request_diff.id) }
diff --git a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
index a699571ca90..04f20b75531 100644
--- a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
+++ b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
@@ -3,41 +3,71 @@ require 'spec_helper'
describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do
describe '#perform' do
let(:merge_request_diffs) { table(:merge_request_diffs) }
+ let(:diff_files) { table(:merge_request_diff_files) }
let(:merge_requests) { table(:merge_requests) }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
+ def diff_file_params(extra_params = {})
+ extra_params.merge(new_file: false,
+ renamed_file: false,
+ too_large: false,
+ deleted_file: false,
+ a_mode: 'foo',
+ b_mode: 'bar',
+ new_path: 'xpto',
+ old_path: 'kux',
+ diff: 'content')
+ end
+
+ def create_diffs(id:, files_number:, state: 'collected')
+ merge_request_diffs.create!(id: id, merge_request_id: 1, state: state)
+ files_number.times.to_a.each do |index|
+ params = diff_file_params(merge_request_diff_id: id, relative_order: index)
+
+ diff_files.create!(params)
+ end
+ end
+
before do
- stub_const("#{described_class.name}::BATCH_SIZE", 3)
+ stub_const("#{described_class.name}::DELETION_BATCH", 10)
namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged')
-
- merge_request_diffs.create!(id: 1, merge_request_id: 1)
- merge_request_diffs.create!(id: 2, merge_request_id: 1)
- merge_request_diffs.create!(id: 3, merge_request_id: 1)
- merge_request_diffs.create!(id: 4, merge_request_id: 1)
- merge_request_diffs.create!(id: 5, merge_request_id: 1)
end
it 'correctly schedules diff file deletion workers' do
Sidekiq::Testing.fake! do
Timecop.freeze do
- described_class.new.perform([1, 2, 3, 4, 5], 1)
+ create_diffs(id: 1, files_number: 25)
+ create_diffs(id: 2, files_number: 11)
+ create_diffs(id: 3, files_number: 4, state: 'without_files')
+ create_diffs(id: 4, files_number: 5, state: 'empty')
+ create_diffs(id: 5, files_number: 9)
+
+ worker = described_class.new
- [1, 2, 3].each do |id|
- expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, id)
- end
+ expect(worker).to receive(:log_days_to_process_all_jobs).with(1.second + 2.5.minutes + 1.1.minutes + 0.9.minutes)
- [4, 5].each do |id|
- expect(described_class::MIGRATION).to be_scheduled_delayed_migration(11.minutes, id)
- end
+ worker.perform
- expect(BackgroundMigrationWorker.jobs.size).to eq(5)
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second, 1)
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second + 2.5.minutes, 2)
+ expect(described_class::MIGRATION).to be_scheduled_delayed_migration(1.second + 2.5.minutes + 1.1.minutes, 5)
+ expect(BackgroundMigrationWorker.jobs.size).to eq(3)
end
end
end
end
+
+ describe '#days_to_process_all_jobs' do
+ it 'logs how many days it will take to run all jobs' do
+ expect(Rails).to receive_message_chain(:logger, :info)
+ .with("Gitlab::BackgroundMigration::DeleteDiffFiles will take 3 days to be processed")
+
+ described_class.new.log_days_to_process_all_jobs(3.days.seconds)
+ end
+ end
end
diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
index 43dbd7e8f53..3d19c6fcc10 100644
--- a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
+++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb
@@ -2,45 +2,16 @@ require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb')
describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
- let(:merge_request_diffs) { table(:merge_request_diffs) }
- let(:merge_requests) { table(:merge_requests) }
- let(:namespaces) { table(:namespaces) }
- let(:projects) { table(:projects) }
-
- before do
- stub_const("#{described_class.name}::BATCH_SIZE", 4)
-
- namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
- projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
-
- merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged')
-
- merge_request_diffs.create!(id: 1, merge_request_id: 1, state: 'collected')
- merge_request_diffs.create!(id: 2, merge_request_id: 1, state: 'without_files')
- merge_request_diffs.create!(id: 3, merge_request_id: 1, state: 'collected')
- merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected')
- merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty')
- merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected')
- merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected')
-
- merge_requests.update(1, latest_merge_request_diff_id: 6)
- end
-
- it 'correctly schedules diff file deletion workers schedulers' do
+ it 'correctly schedules diff file deletion scheduler' do
Sidekiq::Testing.fake! do
- # First scheduling batch
- expect(BackgroundMigrationWorker).to receive(:perform_async)
- .with(described_class::SCHEDULER, [[[1], [3], [4], [6]], 1])
- .and_call_original
-
- # Second scheduling batch
- expect(BackgroundMigrationWorker).to receive(:perform_async)
- .with(described_class::SCHEDULER, [[[7]], 2])
+ expect(BackgroundMigrationWorker)
+ .to receive(:perform_async)
+ .with(described_class::SCHEDULER)
.and_call_original
migrate!
- expect(BackgroundMigrationWorker.jobs.size).to eq(2)
+ expect(BackgroundMigrationWorker.jobs.size).to eq(1)
end
end
end