From e9b84f50e961ee7c3abfb8192de8f4fc778df041 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Mon, 11 Feb 2019 15:48:37 -0200 Subject: Migrate issuable states to integer patch 1 Patch 1 that migrates issues/merge requests states from integer to string. On this commit we are only adding the state_id column and syncing it with a backgroud migration. On Patch 2 the code to use the new integer column will be deployed and the old column will be removed. --- .../sync_issuables_state_id.rb | 29 ++++++++++++++++++++++ lib/gitlab/database/migration_helpers.rb | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 lib/gitlab/background_migration/sync_issuables_state_id.rb (limited to 'lib') diff --git a/lib/gitlab/background_migration/sync_issuables_state_id.rb b/lib/gitlab/background_migration/sync_issuables_state_id.rb new file mode 100644 index 00000000000..1ac86b8acf2 --- /dev/null +++ b/lib/gitlab/background_migration/sync_issuables_state_id.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true +# rubocop:disable Style/Documentation + +module Gitlab + module BackgroundMigration + class SyncIssuablesStateId + def perform(start_id, end_id, model_class) + populate_new_state_id(start_id, end_id, model_class) + end + + def populate_new_state_id(start_id, end_id, model_class) + Rails.logger.info("#{model_class.model_name.human} - Populating state_id: #{start_id} - #{end_id}") + + ActiveRecord::Base.connection.execute <<~SQL + UPDATE #{model_class.table_name} + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + WHEN 'merged' THEN 3 + WHEN 'locked' THEN 4 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL + end + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 3abd0600e9d..20cbb9e096b 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -1033,7 +1033,7 @@ into similar problems in the future (e.g. when new tables are created). # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for # the same time, which is not helpful in most cases where we wish to # spread the work over time. - BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id]) + BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id, model_class]) end end -- cgit v1.2.1 From 362d56e65a0e23fcf4fd5bd4535d258c3659ffd5 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Tue, 12 Feb 2019 14:40:37 -0200 Subject: Schedule background migrations and specs --- lib/gitlab/background_migration/sync_issuables_state_id.rb | 10 +++++----- lib/gitlab/database/migration_helpers.rb | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/gitlab/background_migration/sync_issuables_state_id.rb b/lib/gitlab/background_migration/sync_issuables_state_id.rb index 1ac86b8acf2..95734a7310e 100644 --- a/lib/gitlab/background_migration/sync_issuables_state_id.rb +++ b/lib/gitlab/background_migration/sync_issuables_state_id.rb @@ -4,15 +4,15 @@ module Gitlab module BackgroundMigration class SyncIssuablesStateId - def perform(start_id, end_id, model_class) - populate_new_state_id(start_id, end_id, model_class) + def perform(start_id, end_id, table_name) + populate_new_state_id(start_id, end_id, table_name) end - def populate_new_state_id(start_id, end_id, model_class) - Rails.logger.info("#{model_class.model_name.human} - Populating state_id: #{start_id} - #{end_id}") + def populate_new_state_id(start_id, end_id, table_name) + Rails.logger.info("#{table_name} - Populating state_id: #{start_id} - #{end_id}") ActiveRecord::Base.connection.execute <<~SQL - UPDATE #{model_class.table_name} + UPDATE #{table_name} SET state_id = CASE state WHEN 'opened' THEN 1 diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 20cbb9e096b..46b36d07c20 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -1029,11 +1029,12 @@ into similar problems in the future (e.g. when new tables are created). model_class.each_batch(of: batch_size) do |relation, index| start_id, end_id = relation.pluck('MIN(id), MAX(id)').first + table_name = relation.base_class.table_name # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for # the same time, which is not helpful in most cases where we wish to # spread the work over time. - BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id, model_class]) + BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id, table_name]) end end -- cgit v1.2.1 From 37741c59a4daf1b0d6d9f7a6a51337e9d8effb66 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Thu, 14 Feb 2019 11:48:20 -0200 Subject: Split background migration for issues and merge requests --- .../sync_issuables_state_id.rb | 29 ---------------------- .../background_migration/sync_issues_state_id.rb | 23 +++++++++++++++++ .../sync_merge_requests_state_id.rb | 25 +++++++++++++++++++ lib/gitlab/database/migration_helpers.rb | 3 +-- 4 files changed, 49 insertions(+), 31 deletions(-) delete mode 100644 lib/gitlab/background_migration/sync_issuables_state_id.rb create mode 100644 lib/gitlab/background_migration/sync_issues_state_id.rb create mode 100644 lib/gitlab/background_migration/sync_merge_requests_state_id.rb (limited to 'lib') diff --git a/lib/gitlab/background_migration/sync_issuables_state_id.rb b/lib/gitlab/background_migration/sync_issuables_state_id.rb deleted file mode 100644 index 95734a7310e..00000000000 --- a/lib/gitlab/background_migration/sync_issuables_state_id.rb +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true -# rubocop:disable Style/Documentation - -module Gitlab - module BackgroundMigration - class SyncIssuablesStateId - def perform(start_id, end_id, table_name) - populate_new_state_id(start_id, end_id, table_name) - end - - def populate_new_state_id(start_id, end_id, table_name) - Rails.logger.info("#{table_name} - Populating state_id: #{start_id} - #{end_id}") - - ActiveRecord::Base.connection.execute <<~SQL - UPDATE #{table_name} - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - WHEN 'merged' THEN 3 - WHEN 'locked' THEN 4 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL - end - end - end -end diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb new file mode 100644 index 00000000000..33b997c8533 --- /dev/null +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true +# rubocop:disable Style/Documentation + +module Gitlab + module BackgroundMigration + class SyncIssuesStateId + def perform(start_id, end_id) + Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") + + ActiveRecord::Base.connection.execute <<~SQL + UPDATE issues + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL + end + end + end +end diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb new file mode 100644 index 00000000000..923ceaeec54 --- /dev/null +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true +# rubocop:disable Style/Documentation + +module Gitlab + module BackgroundMigration + class SyncMergeRequestsStateId + def perform(start_id, end_id) + Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") + + ActiveRecord::Base.connection.execute <<~SQL + UPDATE merge_requests + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + WHEN 'merged' THEN 3 + WHEN 'locked' THEN 4 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL + end + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 46b36d07c20..3abd0600e9d 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -1029,12 +1029,11 @@ into similar problems in the future (e.g. when new tables are created). model_class.each_batch(of: batch_size) do |relation, index| start_id, end_id = relation.pluck('MIN(id), MAX(id)').first - table_name = relation.base_class.table_name # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for # the same time, which is not helpful in most cases where we wish to # spread the work over time. - BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id, table_name]) + BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id]) end end -- cgit v1.2.1 From b1346db3a0b49b295a9702921285fdb30029563c Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Fri, 15 Feb 2019 17:11:41 -0200 Subject: Add Reschedulable module --- lib/gitlab/background_migration.rb | 2 + .../background_migration/concerns/reschedulable.rb | 50 ++++++++++++++++++++++ .../background_migration/delete_diff_files.rb | 37 +++------------- 3 files changed, 58 insertions(+), 31 deletions(-) create mode 100644 lib/gitlab/background_migration/concerns/reschedulable.rb (limited to 'lib') diff --git a/lib/gitlab/background_migration.rb b/lib/gitlab/background_migration.rb index 5251e0fadf9..b308e94bfa0 100644 --- a/lib/gitlab/background_migration.rb +++ b/lib/gitlab/background_migration.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +Dir[Rails.root.join("lib/gitlab/background_migration/concerns/*.rb")].each { |f| require f } + module Gitlab module BackgroundMigration def self.queue diff --git a/lib/gitlab/background_migration/concerns/reschedulable.rb b/lib/gitlab/background_migration/concerns/reschedulable.rb new file mode 100644 index 00000000000..fbf3d799743 --- /dev/null +++ b/lib/gitlab/background_migration/concerns/reschedulable.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module Reschedulable + extend ActiveSupport::Concern + + def reschedule_if_needed(args, &block) + if should_reschedule? + BackgroundMigrationWorker.perform_in(vacuum_wait_time, self.class.name.demodulize, args) + else + yield + end + end + + # Override this on base class if you need a different reschedule condition + def should_reschedule? + raise NotImplementedError, "#{self.class} does not implement #{__method__}" + end + + def wait_for_deadtuple_vacuum?(table_name) + return false unless Gitlab::Database.postgresql? + + dead_tuples_count_for(table_name) >= dead_tuples_threshold + end + + def dead_tuples_count_for(table_name) + dead_tuple = + execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ + "WHERE relname = '#{table_name}'")[0] + + dead_tuple&.fetch('n_dead_tup', 0).to_i + end + + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end + + # Override in subclass if you need a different dead tuple threshold + def dead_tuples_threshold + @dead_tuples_threshold ||= 50_000 + end + + # Override in subclass if you need a different vacuum wait time + def vacuum_wait_time + @vacuum_wait_time ||= 5.minutes + end + end + end +end diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 664ead1af44..0a7cbd5c30f 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -4,6 +4,8 @@ module Gitlab module BackgroundMigration class DeleteDiffFiles + include Reschedulable + class MergeRequestDiff < ActiveRecord::Base self.table_name = 'merge_request_diffs' @@ -15,47 +17,24 @@ module Gitlab self.table_name = 'merge_request_diff_files' end - DEAD_TUPLES_THRESHOLD = 50_000 - VACUUM_WAIT_TIME = 5.minutes - def perform(ids) @ids = ids - # We should reschedule until deadtuples get in a desirable - # state (e.g. < 50_000). That may take more than one reschedule. - # - if should_wait_deadtuple_vacuum? - reschedule - return + reschedule_if_needed([ids]) do + prune_diff_files end - - prune_diff_files - end - - def should_wait_deadtuple_vacuum? - return false unless Gitlab::Database.postgresql? - - diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD end private - def reschedule - BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize, [@ids]) + def should_reschedule? + wait_for_deadtuple_vacuum?(MergeRequestDiffFile.table_name) end def diffs_collection MergeRequestDiff.where(id: @ids) end - def diff_files_dead_tuples_count - dead_tuple = - execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ - "WHERE relname = 'merge_request_diff_files'")[0] - - dead_tuple&.fetch('n_dead_tup', 0).to_i - end - def prune_diff_files removed = 0 updated = 0 @@ -69,10 +48,6 @@ module Gitlab "updated #{updated} merge_request_diffs rows") end - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - def log_info(message) Rails.logger.info("BackgroundMigration::DeleteDiffFiles - #{message}") end -- cgit v1.2.1 From a9886c7dd434d6936c455c9877f6c695cafebe0a Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Mon, 18 Feb 2019 11:51:44 -0300 Subject: Fix rubocop --- lib/gitlab/background_migration.rb | 2 +- .../background_migration/concerns/reschedulable.rb | 50 ------------------ .../background_migration/delete_diff_files.rb | 2 +- .../background_migration/helpers/reschedulable.rb | 59 ++++++++++++++++++++++ 4 files changed, 61 insertions(+), 52 deletions(-) delete mode 100644 lib/gitlab/background_migration/concerns/reschedulable.rb create mode 100644 lib/gitlab/background_migration/helpers/reschedulable.rb (limited to 'lib') diff --git a/lib/gitlab/background_migration.rb b/lib/gitlab/background_migration.rb index b308e94bfa0..948488c844c 100644 --- a/lib/gitlab/background_migration.rb +++ b/lib/gitlab/background_migration.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -Dir[Rails.root.join("lib/gitlab/background_migration/concerns/*.rb")].each { |f| require f } +Dir[Rails.root.join("lib/gitlab/background_migration/helpers/*.rb")].each { |f| require f } module Gitlab module BackgroundMigration diff --git a/lib/gitlab/background_migration/concerns/reschedulable.rb b/lib/gitlab/background_migration/concerns/reschedulable.rb deleted file mode 100644 index fbf3d799743..00000000000 --- a/lib/gitlab/background_migration/concerns/reschedulable.rb +++ /dev/null @@ -1,50 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - module Reschedulable - extend ActiveSupport::Concern - - def reschedule_if_needed(args, &block) - if should_reschedule? - BackgroundMigrationWorker.perform_in(vacuum_wait_time, self.class.name.demodulize, args) - else - yield - end - end - - # Override this on base class if you need a different reschedule condition - def should_reschedule? - raise NotImplementedError, "#{self.class} does not implement #{__method__}" - end - - def wait_for_deadtuple_vacuum?(table_name) - return false unless Gitlab::Database.postgresql? - - dead_tuples_count_for(table_name) >= dead_tuples_threshold - end - - def dead_tuples_count_for(table_name) - dead_tuple = - execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ - "WHERE relname = '#{table_name}'")[0] - - dead_tuple&.fetch('n_dead_tup', 0).to_i - end - - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - - # Override in subclass if you need a different dead tuple threshold - def dead_tuples_threshold - @dead_tuples_threshold ||= 50_000 - end - - # Override in subclass if you need a different vacuum wait time - def vacuum_wait_time - @vacuum_wait_time ||= 5.minutes - end - end - end -end diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 0a7cbd5c30f..11851c23ee3 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -4,7 +4,7 @@ module Gitlab module BackgroundMigration class DeleteDiffFiles - include Reschedulable + include Helpers::Reschedulable class MergeRequestDiff < ActiveRecord::Base self.table_name = 'merge_request_diffs' diff --git a/lib/gitlab/background_migration/helpers/reschedulable.rb b/lib/gitlab/background_migration/helpers/reschedulable.rb new file mode 100644 index 00000000000..7810f2627c8 --- /dev/null +++ b/lib/gitlab/background_migration/helpers/reschedulable.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module Helpers + # == Reschedulable helper + # + # Allows background migrations to be reschedule if a condition is not met. + # + # Check DeleteDiffFiles migration which reschedules itself if dead tuple count + # on DB is not acceptable. + # + module Reschedulable + extend ActiveSupport::Concern + + def reschedule_if_needed(args, &block) + if should_reschedule? + BackgroundMigrationWorker.perform_in(vacuum_wait_time, self.class.name.demodulize, args) + else + yield + end + end + + # Override this on base class if you need a different reschedule condition + def should_reschedule? + raise NotImplementedError, "#{self.class} does not implement #{__method__}" + end + + def wait_for_deadtuple_vacuum?(table_name) + return false unless Gitlab::Database.postgresql? + + dead_tuples_count_for(table_name) >= dead_tuples_threshold + end + + def dead_tuples_count_for(table_name) + dead_tuple = + execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ + "WHERE relname = '#{table_name}'")[0] + + dead_tuple&.fetch('n_dead_tup', 0).to_i + end + + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end + + # Override in subclass if you need a different dead tuple threshold + def dead_tuples_threshold + @dead_tuples_threshold ||= 50_000 + end + + # Override in subclass if you need a different vacuum wait time + def vacuum_wait_time + @vacuum_wait_time ||= 5.minutes + end + end + end + end +end -- cgit v1.2.1 From d88b44caad35136bc42f245162c99e8bae0a5912 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Mon, 18 Feb 2019 12:00:14 -0300 Subject: Fix typo --- lib/gitlab/background_migration/helpers/reschedulable.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/gitlab/background_migration/helpers/reschedulable.rb b/lib/gitlab/background_migration/helpers/reschedulable.rb index 7810f2627c8..3b653059858 100644 --- a/lib/gitlab/background_migration/helpers/reschedulable.rb +++ b/lib/gitlab/background_migration/helpers/reschedulable.rb @@ -5,9 +5,9 @@ module Gitlab module Helpers # == Reschedulable helper # - # Allows background migrations to be reschedule if a condition is not met. + # Allows background migrations to be reschedule itself if a condition is not met. # - # Check DeleteDiffFiles migration which reschedules itself if dead tuple count + # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count # on DB is not acceptable. # module Reschedulable -- cgit v1.2.1 From cc7a44c8e130a8f3f753ba0ed32e45b0a6b0e6f7 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Mon, 18 Feb 2019 17:43:16 -0300 Subject: Make migrations reschedulable --- .../background_migration/delete_diff_files.rb | 2 +- .../background_migration/helpers/reschedulable.rb | 8 ++--- .../background_migration/sync_issues_state_id.rb | 30 ++++++++++++------- .../sync_merge_requests_state_id.rb | 34 ++++++++++++++-------- 4 files changed, 47 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 11851c23ee3..4cb068eb71a 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -27,7 +27,7 @@ module Gitlab private - def should_reschedule? + def need_reschedule? wait_for_deadtuple_vacuum?(MergeRequestDiffFile.table_name) end diff --git a/lib/gitlab/background_migration/helpers/reschedulable.rb b/lib/gitlab/background_migration/helpers/reschedulable.rb index 3b653059858..6087181609e 100644 --- a/lib/gitlab/background_migration/helpers/reschedulable.rb +++ b/lib/gitlab/background_migration/helpers/reschedulable.rb @@ -14,7 +14,7 @@ module Gitlab extend ActiveSupport::Concern def reschedule_if_needed(args, &block) - if should_reschedule? + if need_reschedule? BackgroundMigrationWorker.perform_in(vacuum_wait_time, self.class.name.demodulize, args) else yield @@ -22,9 +22,9 @@ module Gitlab end # Override this on base class if you need a different reschedule condition - def should_reschedule? - raise NotImplementedError, "#{self.class} does not implement #{__method__}" - end + # def need_reschedule? + # raise NotImplementedError, "#{self.class} does not implement #{__method__}" + # end def wait_for_deadtuple_vacuum?(table_name) return false unless Gitlab::Database.postgresql? diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 33b997c8533..f95c433c64c 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -4,19 +4,29 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId + include Helpers::Reschedulable + def perform(start_id, end_id) Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") - ActiveRecord::Base.connection.execute <<~SQL - UPDATE issues - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL + reschedule_if_needed([start_id, end_id]) do + ActiveRecord::Base.connection.execute <<~SQL + UPDATE issues + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL + end + end + + private + + def need_reschedule? + wait_for_deadtuple_vacuum?('issues') end end end diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index 923ceaeec54..5d92553f577 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -4,21 +4,31 @@ module Gitlab module BackgroundMigration class SyncMergeRequestsStateId + include Helpers::Reschedulable + def perform(start_id, end_id) Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") - ActiveRecord::Base.connection.execute <<~SQL - UPDATE merge_requests - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - WHEN 'merged' THEN 3 - WHEN 'locked' THEN 4 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL + reschedule_if_needed([start_id, end_id]) do + ActiveRecord::Base.connection.execute <<~SQL + UPDATE merge_requests + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + WHEN 'merged' THEN 3 + WHEN 'locked' THEN 4 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL + end + end + + private + + def need_reschedule? + wait_for_deadtuple_vacuum?('issues') end end end -- cgit v1.2.1 From 52155d8cf8374e9184c2ae834cab761b7520db93 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Tue, 19 Feb 2019 11:33:30 -0300 Subject: Add more specs and code improvements --- .../background_migration/delete_diff_files.rb | 4 +- .../background_migration/helpers/reschedulable.rb | 45 ++++++++++++---------- .../background_migration/sync_issues_state_id.rb | 6 +-- .../sync_merge_requests_state_id.rb | 6 +-- 4 files changed, 32 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 4cb068eb71a..0066faa23dd 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -20,14 +20,14 @@ module Gitlab def perform(ids) @ids = ids - reschedule_if_needed([ids]) do + reschedule_if_needed(ids) do prune_diff_files end end private - def need_reschedule? + def should_reschedule? wait_for_deadtuple_vacuum?(MergeRequestDiffFile.table_name) end diff --git a/lib/gitlab/background_migration/helpers/reschedulable.rb b/lib/gitlab/background_migration/helpers/reschedulable.rb index 6087181609e..60ab62cd973 100644 --- a/lib/gitlab/background_migration/helpers/reschedulable.rb +++ b/lib/gitlab/background_migration/helpers/reschedulable.rb @@ -5,7 +5,8 @@ module Gitlab module Helpers # == Reschedulable helper # - # Allows background migrations to be reschedule itself if a condition is not met. + # Allows background migrations to be rescheduled if a condition is met, + # the condition should be overridden in classes in #should_reschedule? method. # # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count # on DB is not acceptable. @@ -13,18 +14,34 @@ module Gitlab module Reschedulable extend ActiveSupport::Concern - def reschedule_if_needed(args, &block) - if need_reschedule? - BackgroundMigrationWorker.perform_in(vacuum_wait_time, self.class.name.demodulize, args) + # Use this method to perform the background migration and it will be rescheduled + # if #should_reschedule? returns true. + def reschedule_if_needed(*args, &block) + if should_reschedule? + BackgroundMigrationWorker.perform_in(wait_time, self.class.name.demodulize, args) else yield end end # Override this on base class if you need a different reschedule condition - # def need_reschedule? - # raise NotImplementedError, "#{self.class} does not implement #{__method__}" - # end + def should_reschedule? + raise NotImplementedError, "#{self.class} does not implement #{__method__}" + end + + # Override in subclass if a different dead tuple threshold + def dead_tuples_threshold + @dead_tuples_threshold ||= 50_000 + end + + # Override in subclass if a different wait time + def wait_time + @wait_time ||= 5.minutes + end + + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end def wait_for_deadtuple_vacuum?(table_name) return false unless Gitlab::Database.postgresql? @@ -39,20 +56,6 @@ module Gitlab dead_tuple&.fetch('n_dead_tup', 0).to_i end - - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - - # Override in subclass if you need a different dead tuple threshold - def dead_tuples_threshold - @dead_tuples_threshold ||= 50_000 - end - - # Override in subclass if you need a different vacuum wait time - def vacuum_wait_time - @vacuum_wait_time ||= 5.minutes - end end end end diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index f95c433c64c..50841c3fe4d 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -9,8 +9,8 @@ module Gitlab def perform(start_id, end_id) Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") - reschedule_if_needed([start_id, end_id]) do - ActiveRecord::Base.connection.execute <<~SQL + reschedule_if_needed(start_id, end_id) do + execute_statement <<~SQL UPDATE issues SET state_id = CASE state @@ -25,7 +25,7 @@ module Gitlab private - def need_reschedule? + def should_reschedule? wait_for_deadtuple_vacuum?('issues') end end diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index 5d92553f577..4cf8e993c52 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -9,8 +9,8 @@ module Gitlab def perform(start_id, end_id) Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") - reschedule_if_needed([start_id, end_id]) do - ActiveRecord::Base.connection.execute <<~SQL + reschedule_if_needed(start_id, end_id) do + execute_statement <<~SQL UPDATE merge_requests SET state_id = CASE state @@ -27,7 +27,7 @@ module Gitlab private - def need_reschedule? + def should_reschedule? wait_for_deadtuple_vacuum?('issues') end end -- cgit v1.2.1 From 7bd066a1fa51018211e26ca0c5624aecbc364a66 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Tue, 19 Feb 2019 14:00:53 -0300 Subject: Address review comments --- lib/gitlab/background_migration.rb | 2 - .../background_migration/delete_diff_files.rb | 37 ++++++++++--- .../background_migration/helpers/reschedulable.rb | 62 ---------------------- lib/gitlab/background_migration/reschedulable.rb | 60 +++++++++++++++++++++ .../background_migration/sync_issues_state_id.rb | 2 +- .../sync_merge_requests_state_id.rb | 2 +- 6 files changed, 93 insertions(+), 72 deletions(-) delete mode 100644 lib/gitlab/background_migration/helpers/reschedulable.rb create mode 100644 lib/gitlab/background_migration/reschedulable.rb (limited to 'lib') diff --git a/lib/gitlab/background_migration.rb b/lib/gitlab/background_migration.rb index 948488c844c..5251e0fadf9 100644 --- a/lib/gitlab/background_migration.rb +++ b/lib/gitlab/background_migration.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -Dir[Rails.root.join("lib/gitlab/background_migration/helpers/*.rb")].each { |f| require f } - module Gitlab module BackgroundMigration def self.queue diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 0066faa23dd..664ead1af44 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -4,8 +4,6 @@ module Gitlab module BackgroundMigration class DeleteDiffFiles - include Helpers::Reschedulable - class MergeRequestDiff < ActiveRecord::Base self.table_name = 'merge_request_diffs' @@ -17,24 +15,47 @@ module Gitlab self.table_name = 'merge_request_diff_files' end + DEAD_TUPLES_THRESHOLD = 50_000 + VACUUM_WAIT_TIME = 5.minutes + def perform(ids) @ids = ids - reschedule_if_needed(ids) do - prune_diff_files + # We should reschedule until deadtuples get in a desirable + # state (e.g. < 50_000). That may take more than one reschedule. + # + if should_wait_deadtuple_vacuum? + reschedule + return end + + prune_diff_files + end + + def should_wait_deadtuple_vacuum? + return false unless Gitlab::Database.postgresql? + + diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD end private - def should_reschedule? - wait_for_deadtuple_vacuum?(MergeRequestDiffFile.table_name) + def reschedule + BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize, [@ids]) end def diffs_collection MergeRequestDiff.where(id: @ids) end + def diff_files_dead_tuples_count + dead_tuple = + execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ + "WHERE relname = 'merge_request_diff_files'")[0] + + dead_tuple&.fetch('n_dead_tup', 0).to_i + end + def prune_diff_files removed = 0 updated = 0 @@ -48,6 +69,10 @@ module Gitlab "updated #{updated} merge_request_diffs rows") end + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end + def log_info(message) Rails.logger.info("BackgroundMigration::DeleteDiffFiles - #{message}") end diff --git a/lib/gitlab/background_migration/helpers/reschedulable.rb b/lib/gitlab/background_migration/helpers/reschedulable.rb deleted file mode 100644 index 60ab62cd973..00000000000 --- a/lib/gitlab/background_migration/helpers/reschedulable.rb +++ /dev/null @@ -1,62 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - module Helpers - # == Reschedulable helper - # - # Allows background migrations to be rescheduled if a condition is met, - # the condition should be overridden in classes in #should_reschedule? method. - # - # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count - # on DB is not acceptable. - # - module Reschedulable - extend ActiveSupport::Concern - - # Use this method to perform the background migration and it will be rescheduled - # if #should_reschedule? returns true. - def reschedule_if_needed(*args, &block) - if should_reschedule? - BackgroundMigrationWorker.perform_in(wait_time, self.class.name.demodulize, args) - else - yield - end - end - - # Override this on base class if you need a different reschedule condition - def should_reschedule? - raise NotImplementedError, "#{self.class} does not implement #{__method__}" - end - - # Override in subclass if a different dead tuple threshold - def dead_tuples_threshold - @dead_tuples_threshold ||= 50_000 - end - - # Override in subclass if a different wait time - def wait_time - @wait_time ||= 5.minutes - end - - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - - def wait_for_deadtuple_vacuum?(table_name) - return false unless Gitlab::Database.postgresql? - - dead_tuples_count_for(table_name) >= dead_tuples_threshold - end - - def dead_tuples_count_for(table_name) - dead_tuple = - execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ - "WHERE relname = '#{table_name}'")[0] - - dead_tuple&.fetch('n_dead_tup', 0).to_i - end - end - end - end -end diff --git a/lib/gitlab/background_migration/reschedulable.rb b/lib/gitlab/background_migration/reschedulable.rb new file mode 100644 index 00000000000..3d6781c07c0 --- /dev/null +++ b/lib/gitlab/background_migration/reschedulable.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # == Reschedulable helper + # + # Allows background migrations to be rescheduled if a condition is met, + # the condition should be overridden in classes in #should_reschedule? method. + # + # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count + # on DB is not acceptable. + # + module Reschedulable + extend ActiveSupport::Concern + + # Use this method to perform the background migration and it will be rescheduled + # if #should_reschedule? returns true. + def reschedule_if_needed(*args, &block) + if should_reschedule? + BackgroundMigrationWorker.perform_in(wait_time, self.class.name.demodulize, args) + else + yield + end + end + + # Override this on base class if you need a different reschedule condition + def should_reschedule? + raise NotImplementedError, "#{self.class} does not implement #{__method__}" + end + + # Override in subclass if a different dead tuple threshold + def dead_tuples_threshold + @dead_tuples_threshold ||= 50_000 + end + + # Override in subclass if a different wait time + def wait_time + @wait_time ||= 5.minutes + end + + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end + + def wait_for_deadtuple_vacuum?(table_name) + return false unless Gitlab::Database.postgresql? + + dead_tuples_count_for(table_name) >= dead_tuples_threshold + end + + def dead_tuples_count_for(table_name) + dead_tuple = + execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ + "WHERE relname = '#{table_name}'")[0] + + dead_tuple&.fetch('n_dead_tup', 0).to_i + end + end + end +end diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 50841c3fe4d..053d154cef8 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -4,7 +4,7 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId - include Helpers::Reschedulable + include Reschedulable def perform(start_id, end_id) Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index 4cf8e993c52..a94529aaf5c 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -4,7 +4,7 @@ module Gitlab module BackgroundMigration class SyncMergeRequestsStateId - include Helpers::Reschedulable + include Reschedulable def perform(start_id, end_id) Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") -- cgit v1.2.1 From 294c5c41beaac1fbc60c67df2c8745f7583544a1 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Fri, 1 Mar 2019 16:24:47 -0300 Subject: Remove auto vacuum logic, decrease batch size and interval --- lib/gitlab/background_migration/reschedulable.rb | 60 ---------------------- .../background_migration/sync_issues_state_id.rb | 30 ++++------- .../sync_merge_requests_state_id.rb | 34 +++++------- 3 files changed, 22 insertions(+), 102 deletions(-) delete mode 100644 lib/gitlab/background_migration/reschedulable.rb (limited to 'lib') diff --git a/lib/gitlab/background_migration/reschedulable.rb b/lib/gitlab/background_migration/reschedulable.rb deleted file mode 100644 index 3d6781c07c0..00000000000 --- a/lib/gitlab/background_migration/reschedulable.rb +++ /dev/null @@ -1,60 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # == Reschedulable helper - # - # Allows background migrations to be rescheduled if a condition is met, - # the condition should be overridden in classes in #should_reschedule? method. - # - # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count - # on DB is not acceptable. - # - module Reschedulable - extend ActiveSupport::Concern - - # Use this method to perform the background migration and it will be rescheduled - # if #should_reschedule? returns true. - def reschedule_if_needed(*args, &block) - if should_reschedule? - BackgroundMigrationWorker.perform_in(wait_time, self.class.name.demodulize, args) - else - yield - end - end - - # Override this on base class if you need a different reschedule condition - def should_reschedule? - raise NotImplementedError, "#{self.class} does not implement #{__method__}" - end - - # Override in subclass if a different dead tuple threshold - def dead_tuples_threshold - @dead_tuples_threshold ||= 50_000 - end - - # Override in subclass if a different wait time - def wait_time - @wait_time ||= 5.minutes - end - - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - - def wait_for_deadtuple_vacuum?(table_name) - return false unless Gitlab::Database.postgresql? - - dead_tuples_count_for(table_name) >= dead_tuples_threshold - end - - def dead_tuples_count_for(table_name) - dead_tuple = - execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ - "WHERE relname = '#{table_name}'")[0] - - dead_tuple&.fetch('n_dead_tup', 0).to_i - end - end - end -end diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 053d154cef8..33b997c8533 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -4,29 +4,19 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId - include Reschedulable - def perform(start_id, end_id) Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") - reschedule_if_needed(start_id, end_id) do - execute_statement <<~SQL - UPDATE issues - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL - end - end - - private - - def should_reschedule? - wait_for_deadtuple_vacuum?('issues') + ActiveRecord::Base.connection.execute <<~SQL + UPDATE issues + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL end end end diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index a94529aaf5c..923ceaeec54 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -4,31 +4,21 @@ module Gitlab module BackgroundMigration class SyncMergeRequestsStateId - include Reschedulable - def perform(start_id, end_id) Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") - reschedule_if_needed(start_id, end_id) do - execute_statement <<~SQL - UPDATE merge_requests - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - WHEN 'merged' THEN 3 - WHEN 'locked' THEN 4 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL - end - end - - private - - def should_reschedule? - wait_for_deadtuple_vacuum?('issues') + ActiveRecord::Base.connection.execute <<~SQL + UPDATE merge_requests + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + WHEN 'merged' THEN 3 + WHEN 'locked' THEN 4 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL end end end -- cgit v1.2.1 From b2fb3a9c62862476a7591ecb5ab4a6a3146e0c49 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Thu, 28 Mar 2019 11:31:14 -0300 Subject: Address review comments --- lib/gitlab/background_migration/sync_issues_state_id.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 33b997c8533..33002f58be6 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -5,7 +5,7 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId def perform(start_id, end_id) - Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") + Gitlab::AppLogger.info("Issues - Populating state_id: #{start_id} - #{end_id}") ActiveRecord::Base.connection.execute <<~SQL UPDATE issues -- cgit v1.2.1 From f2b7da4bf507691cffd419d3dd759fcf6311cdd6 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Mon, 1 Apr 2019 15:04:36 -0300 Subject: Remove additional logging --- lib/gitlab/background_migration/sync_issues_state_id.rb | 2 -- lib/gitlab/background_migration/sync_merge_requests_state_id.rb | 2 -- 2 files changed, 4 deletions(-) (limited to 'lib') diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 33002f58be6..2a0751928b8 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -5,8 +5,6 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId def perform(start_id, end_id) - Gitlab::AppLogger.info("Issues - Populating state_id: #{start_id} - #{end_id}") - ActiveRecord::Base.connection.execute <<~SQL UPDATE issues SET state_id = diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index 923ceaeec54..6707e178d8b 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -5,8 +5,6 @@ module Gitlab module BackgroundMigration class SyncMergeRequestsStateId def perform(start_id, end_id) - Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") - ActiveRecord::Base.connection.execute <<~SQL UPDATE merge_requests SET state_id = -- cgit v1.2.1