diff options
author | Michael Kozono <mkozono@gmail.com> | 2017-09-08 13:10:53 -0700 |
---|---|---|
committer | Michael Kozono <mkozono@gmail.com> | 2017-09-14 14:17:23 -0700 |
commit | ee4f73916f586112e6479d80b3769e174414eb7e (patch) | |
tree | 260857524c956e31bbb6b6df2a7e7bdbddb46e45 | |
parent | dbf924c57dc026747dae00141c1da67fbf856c80 (diff) | |
download | gitlab-ce-ee4f73916f586112e6479d80b3769e174414eb7e.tar.gz |
Extract helper for queuing background jobs
4 files changed, 113 insertions, 34 deletions
diff --git a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb index 73bb4603eb6..021b52b9ed1 100644 --- a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb +++ b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb @@ -5,8 +5,6 @@ class DeleteConflictingRedirectRoutes < ActiveRecord::Migration include Gitlab::Database::MigrationHelpers DOWNTIME = false - BATCH_SIZE = 1000 # Number of rows to process per job - JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time MIGRATION = 'DeleteConflictingRedirectRoutesRange'.freeze disable_ddl_transaction! @@ -18,11 +16,9 @@ class DeleteConflictingRedirectRoutes < ActiveRecord::Migration end def up - jobs = [] - say opening_message - queue_background_migration_jobs(Route, MIGRATION) + queue_background_migration_jobs_by_range(Route, MIGRATION) end def down @@ -36,31 +32,4 @@ class DeleteConflictingRedirectRoutes < ActiveRecord::Migration https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/13357 MSG end - - def queue_background_migration_jobs(model_class, job_class_name, batch_size = BATCH_SIZE) - jobs = [] - - model_class.each_batch(of: batch_size) do |relation| - start_id, end_id = relation.pluck('MIN(id), MAX(id)').first - - # Note: This conditional will only be true if JOB_BUFFER_SIZE * batch_size < (total number of rows) - if jobs.length >= JOB_BUFFER_SIZE - # We push multiple jobs at a time to reduce the time spent in - # Sidekiq/Redis operations. We're using this buffer based approach so we - # don't need to run additional queries for every range. - bulk_queue_jobs(jobs) - jobs.clear - end - - jobs << [job_class_name, [start_id, end_id]] - end - - bulk_queue_jobs(jobs) unless jobs.empty? - end - - def bulk_queue_jobs(jobs) - say "Queuing #{jobs.size} BackgroundMigrationWorker jobs..." - - BackgroundMigrationWorker.perform_bulk(jobs) - end end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index fb14798efe6..18aefc9558b 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -1,6 +1,9 @@ module Gitlab module Database module MigrationHelpers + BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job + BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time + # Adds `created_at` and `updated_at` columns with timezone information. # # This method is an improved version of Rails' built-in method `add_timestamps`. @@ -653,6 +656,51 @@ into similar problems in the future (e.g. when new tables are created). EOF end end + + # Queues background migration jobs for an entire table, batched by ID range. + # + # model_class - The table being iterated over + # job_class_name - The background migration job class as a string + # batch_size - The maximum number of rows per job + # + # Example: + # + # class Route < ActiveRecord::Base + # include EachBatch + # self.table_name = 'routes' + # end + # + # queue_background_migration_jobs_by_range(Route, 'ProcessRoutes') + # + # Where the model_class includes EachBatch, and the background migration exists: + # + # class Gitlab::BackgroundMigration::ProcessRoutes + # def perform(start_id, end_id) + # # do something + # end + # end + def queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size = BACKGROUND_MIGRATION_BATCH_SIZE) + raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id') + + jobs = [] + + model_class.each_batch(of: batch_size) do |relation| + start_id, end_id = relation.pluck('MIN(id), MAX(id)').first + + if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE + # Note: This code path generally only helps with many millions of rows + # We push multiple jobs at a time to reduce the time spent in + # Sidekiq/Redis operations. We're using this buffer based approach so we + # don't need to run additional queries for every range. + BackgroundMigrationWorker.perform_bulk(jobs) + jobs.clear + end + + jobs << [job_class_name, [start_id, end_id]] + end + + BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty? + end end end end diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb index 1bcdc369c44..fab77889059 100644 --- a/spec/lib/gitlab/database/migration_helpers_spec.rb +++ b/spec/lib/gitlab/database/migration_helpers_spec.rb @@ -914,4 +914,66 @@ describe Gitlab::Database::MigrationHelpers do .to raise_error(RuntimeError, /Your database user is not allowed/) end end + + describe '#queue_background_migration_jobs_by_range', :sidekiq do + context 'when the model has an ID column' do + let!(:id1) { create(:user).id } + let!(:id2) { create(:user).id } + let!(:id3) { create(:user).id } + + before do + User.class_eval do + include EachBatch + end + end + + context 'with enough rows to bulk queue jobs more than once' do + before do + stub_const('Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE', 1) + end + + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) + end + end + + it 'queues jobs in groups of buffer size 1' do + expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]]]) + expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id3, id3]]]) + + model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + end + end + + context 'with not enough rows to bulk queue jobs more than once' do + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) + end + end + + it 'queues jobs in bulk all at once (big buffer size)' do + expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]], + ['FooJob', [id3, id3]]]) + + model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + end + end + end + + context "when the model doesn't have an ID column" do + it 'raises error (for now)' do + expect do + model.queue_background_migration_jobs_by_range(ProjectAuthorization, 'FooJob') + end.to raise_error(StandardError, /does not have an ID/) + end + end + end end diff --git a/spec/migrations/delete_conflicting_redirect_routes_spec.rb b/spec/migrations/delete_conflicting_redirect_routes_spec.rb index 5d277028b96..b79d04c1a21 100644 --- a/spec/migrations/delete_conflicting_redirect_routes_spec.rb +++ b/spec/migrations/delete_conflicting_redirect_routes_spec.rb @@ -6,8 +6,8 @@ describe DeleteConflictingRedirectRoutes, :migration, :sidekiq do let!(:routes) { table(:routes) } before do - stub_const("#{described_class.name}::BATCH_SIZE", 2) - stub_const("#{described_class.name}::JOB_BUFFER_SIZE", 2) + stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_BATCH_SIZE", 2) + stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE", 2) routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1') routes.create!(id: 2, source_id: 2, source_type: 'Namespace', path: 'foo2') |