summaryrefslogtreecommitdiff
path: root/db
diff options
context:
space:
mode:
authorOswaldo Ferreira <oswaldo@gitlab.com>2018-07-03 12:16:06 -0300
committerOswaldo Ferreira <oswaldo@gitlab.com>2018-07-10 09:43:57 -0300
commit80a7be87f82b36c23e273b6a84b5a6bdbffaa947 (patch)
treec85fa428e079f3a19da342c7dedda662164f0bc0 /db
parent3acf7ba9caf19b1a02b34071454a631c959b5c58 (diff)
downloadgitlab-ce-80a7be87f82b36c23e273b6a84b5a6bdbffaa947.tar.gz
Schedule batches in bulks of 5 diffs
Issuing 6M writings in a N+1 manner in Redis takes time, 3 hours to be precise. This commit makes it schedule 5 jobs at a time, what should make it schedule every job in approximately 40 minutes
Diffstat (limited to 'db')
-rw-r--r--db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb19
1 files changed, 10 insertions, 9 deletions
diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
index 5fb3d545624..bd614aee75c 100644
--- a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
+++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
@@ -12,7 +12,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
DOWNTIME = false
BATCH_SIZE = 1000
MIGRATION = 'DeleteDiffFiles'
- DELAY_INTERVAL = 8.minutes
+ DELAY_INTERVAL = 10.minutes
TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
disable_ddl_transaction!
@@ -39,20 +39,21 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
# Execution time: 12.430 ms
#
diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, outer_index|
- ids = relation.pluck(:id)
+ # We slice the batches in groups of 5 and schedule each group of 5 at
+ # once. This should make writings on Redis go 5x faster.
+ job_batches = relation.pluck(:id).in_groups_of(5, false).map do |ids|
+ ids.map { |id| [MIGRATION, [id]] }
+ end
- ids.each_with_index do |diff_id, inner_index|
+ job_batches.each_with_index do |jobs, inner_index|
# This will give some space between batches of workers.
interval = DELAY_INTERVAL * outer_index + inner_index.minutes
# A single `merge_request_diff` can be associated with way too many
- # `merge_request_diff_files`. It's better to avoid batching these and
- # schedule one at a time.
- #
- # Considering roughly 6M jobs, this should take ~30 days to process all
- # of them.
+ # `merge_request_diff_files`. It's better to avoid scheduling big
+ # batches and go with 5 at a time.
#
- BackgroundMigrationWorker.perform_in(interval, MIGRATION, [diff_id])
+ BackgroundMigrationWorker.bulk_perform_in(interval, jobs)
end
end