diff options
author | Felipe Artur <felipefac@gmail.com> | 2019-03-01 16:24:47 -0300 |
---|---|---|
committer | Felipe Artur <felipefac@gmail.com> | 2019-03-01 16:24:47 -0300 |
commit | 294c5c41beaac1fbc60c67df2c8745f7583544a1 (patch) | |
tree | 2559085daf704a40e749288fa9b4bfeed313f725 | |
parent | 7bd066a1fa51018211e26ca0c5624aecbc364a66 (diff) | |
download | gitlab-ce-294c5c41beaac1fbc60c67df2c8745f7583544a1.tar.gz |
Remove auto vacuum logic, decrease batch size and interval
5 files changed, 32 insertions, 141 deletions
diff --git a/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb b/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb index 2167f19e022..6edb0bf2d5e 100644 --- a/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb +++ b/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb @@ -11,10 +11,12 @@ class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0] # issues count: 13587305 # merge requests count: 18925274 # - # Using 25000 as batch size should take around 26 hours - # to migrate both issues and merge requests - BATCH_SIZE = 25000 - DELAY_INTERVAL = 5.minutes.to_i + # Using 5000 as batch size and 115 seconds interval will give: + # 2718 jobs for issues - taking ~86 hours + # 3786 jobs for merge requests - taking ~120 hours + # + BATCH_SIZE = 5000 + DELAY_INTERVAL = 120.seconds.to_i ISSUES_MIGRATION = 'SyncIssuesStateId'.freeze MERGE_REQUESTS_MIGRATION = 'SyncMergeRequestsStateId'.freeze @@ -34,14 +36,14 @@ class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0] def up queue_background_migration_jobs_by_range_at_intervals( - Issue.where(state_id: nil), + Issue.all, ISSUES_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE ) queue_background_migration_jobs_by_range_at_intervals( - MergeRequest.where(state_id: nil), + MergeRequest.all, MERGE_REQUESTS_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE diff --git a/lib/gitlab/background_migration/reschedulable.rb b/lib/gitlab/background_migration/reschedulable.rb deleted file mode 100644 index 3d6781c07c0..00000000000 --- a/lib/gitlab/background_migration/reschedulable.rb +++ /dev/null @@ -1,60 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # == Reschedulable helper - # - # Allows background migrations to be rescheduled if a condition is met, - # the condition should be overridden in classes in #should_reschedule? method. - # - # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count - # on DB is not acceptable. - # - module Reschedulable - extend ActiveSupport::Concern - - # Use this method to perform the background migration and it will be rescheduled - # if #should_reschedule? returns true. - def reschedule_if_needed(*args, &block) - if should_reschedule? - BackgroundMigrationWorker.perform_in(wait_time, self.class.name.demodulize, args) - else - yield - end - end - - # Override this on base class if you need a different reschedule condition - def should_reschedule? - raise NotImplementedError, "#{self.class} does not implement #{__method__}" - end - - # Override in subclass if a different dead tuple threshold - def dead_tuples_threshold - @dead_tuples_threshold ||= 50_000 - end - - # Override in subclass if a different wait time - def wait_time - @wait_time ||= 5.minutes - end - - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - - def wait_for_deadtuple_vacuum?(table_name) - return false unless Gitlab::Database.postgresql? - - dead_tuples_count_for(table_name) >= dead_tuples_threshold - end - - def dead_tuples_count_for(table_name) - dead_tuple = - execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ - "WHERE relname = '#{table_name}'")[0] - - dead_tuple&.fetch('n_dead_tup', 0).to_i - end - end - end -end diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 053d154cef8..33b997c8533 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -4,29 +4,19 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId - include Reschedulable - def perform(start_id, end_id) Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") - reschedule_if_needed(start_id, end_id) do - execute_statement <<~SQL - UPDATE issues - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL - end - end - - private - - def should_reschedule? - wait_for_deadtuple_vacuum?('issues') + ActiveRecord::Base.connection.execute <<~SQL + UPDATE issues + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL end end end diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index a94529aaf5c..923ceaeec54 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -4,31 +4,21 @@ module Gitlab module BackgroundMigration class SyncMergeRequestsStateId - include Reschedulable - def perform(start_id, end_id) Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") - reschedule_if_needed(start_id, end_id) do - execute_statement <<~SQL - UPDATE merge_requests - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - WHEN 'merged' THEN 3 - WHEN 'locked' THEN 4 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL - end - end - - private - - def should_reschedule? - wait_for_deadtuple_vacuum?('issues') + ActiveRecord::Base.connection.execute <<~SQL + UPDATE merge_requests + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + WHEN 'merged' THEN 3 + WHEN 'locked' THEN 4 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL end end end diff --git a/spec/migrations/schedule_sync_issuables_state_id_spec.rb b/spec/migrations/schedule_sync_issuables_state_id_spec.rb index 6c4c31ae554..6b1ea804342 100644 --- a/spec/migrations/schedule_sync_issuables_state_id_spec.rb +++ b/spec/migrations/schedule_sync_issuables_state_id_spec.rb @@ -26,33 +26,14 @@ describe ScheduleSyncIssuablesStateId, :migration do Timecop.freeze do migrate! - expect(migration).to be_scheduled_delayed_migration(5.minutes, resource_1.id, resource_2.id) - expect(migration).to be_scheduled_delayed_migration(10.minutes, resource_3.id, resource_4.id) + expect(migration).to be_scheduled_delayed_migration(120.seconds, resource_1.id, resource_2.id) + expect(migration).to be_scheduled_delayed_migration(240.seconds, resource_3.id, resource_4.id) expect(BackgroundMigrationWorker.jobs.size).to eq(2) end end end end - shared_examples 'rescheduling migrations' do - before do - Sidekiq::Worker.clear_all - end - - it 'reschedules migrations when should wait for dead tuple vacuum' do - worker = worker_class.new - - Sidekiq::Testing.fake! do - allow(worker).to receive(:wait_for_deadtuple_vacuum?) { true } - - worker.perform(resource_1.id, resource_2.id) - - expect(worker_class.name.demodulize).to be_scheduled_delayed_migration(5.minutes, resource_1.id, resource_2.id) - expect(BackgroundMigrationWorker.jobs.size).to eq(1) - end - end - end - describe '#up' do context 'issues' do it 'migrates state column to integer' do @@ -76,12 +57,6 @@ describe ScheduleSyncIssuablesStateId, :migration do let!(:resource_3) { issues.create!(description: 'third', state: 'closed') } let!(:resource_4) { issues.create!(description: 'fourth', state: 'closed') } end - - it_behaves_like 'rescheduling migrations' do - let(:worker_class) { Gitlab::BackgroundMigration::SyncIssuesStateId } - let!(:resource_1) { issues.create!(description: 'first', state: 'opened') } - let!(:resource_2) { issues.create!(description: 'second', state: 'closed') } - end end context 'merge requests' do @@ -108,12 +83,6 @@ describe ScheduleSyncIssuablesStateId, :migration do let!(:resource_3) { merge_requests.create!(state: 'merged', target_project_id: @project.id, target_branch: 'feature3', source_branch: 'master') } let!(:resource_4) { merge_requests.create!(state: 'locked', target_project_id: @project.id, target_branch: 'feature4', source_branch: 'master') } end - - it_behaves_like 'rescheduling migrations' do - let(:worker_class) { Gitlab::BackgroundMigration::SyncMergeRequestsStateId } - let(:resource_1) { merge_requests.create!(state: 'opened', target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') } - let(:resource_2) { merge_requests.create!(state: 'closed', target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') } - end end end end |