diff options
Diffstat (limited to 'lib/gitlab/background_migration')
28 files changed, 617 insertions, 598 deletions
diff --git a/lib/gitlab/background_migration/add_primary_email_to_emails_if_user_confirmed.rb b/lib/gitlab/background_migration/add_primary_email_to_emails_if_user_confirmed.rb new file mode 100644 index 00000000000..b39c0953fb1 --- /dev/null +++ b/lib/gitlab/background_migration/add_primary_email_to_emails_if_user_confirmed.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Add user primary email to emails table if confirmed + class AddPrimaryEmailToEmailsIfUserConfirmed + INNER_BATCH_SIZE = 1_000 + + # Stubbed class to access the User table + class User < ActiveRecord::Base + include ::EachBatch + + self.table_name = 'users' + self.inheritance_column = :_type_disabled + + scope :confirmed, -> { where.not(confirmed_at: nil) } + + has_many :emails + end + + # Stubbed class to access the Emails table + class Email < ActiveRecord::Base + self.table_name = 'emails' + self.inheritance_column = :_type_disabled + + belongs_to :user + end + + def perform(start_id, end_id) + User.confirmed.where(id: start_id..end_id).select(:id, :email, :confirmed_at).each_batch(of: INNER_BATCH_SIZE) do |users| + current_time = Time.now.utc + + attributes = users.map do |user| + { + user_id: user.id, + email: user.email, + confirmed_at: user.confirmed_at, + created_at: current_time, + updated_at: current_time + } + end + + Email.insert_all(attributes) + end + mark_job_as_succeeded(start_id, end_id) + end + + private + + def mark_job_as_succeeded(*arguments) + Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( + 'AddPrimaryEmailToEmailsIfUserConfirmed', + arguments + ) + end + end + end +end diff --git a/lib/gitlab/background_migration/backfill_design_internal_ids.rb b/lib/gitlab/background_migration/backfill_design_internal_ids.rb deleted file mode 100644 index 236c6b6eb9a..00000000000 --- a/lib/gitlab/background_migration/backfill_design_internal_ids.rb +++ /dev/null @@ -1,130 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # Backfill design.iid for a range of projects - class BackfillDesignInternalIds - # See app/models/internal_id - # This is a direct copy of the application code with the following changes: - # - usage enum is hard-coded to the value for design_management_designs - # - init is not passed around, but ignored - class InternalId < ActiveRecord::Base - def self.track_greatest(subject, scope, new_value) - InternalIdGenerator.new(subject, scope).track_greatest(new_value) - end - - # Increments #last_value with new_value if it is greater than the current, - # and saves the record - # - # The operation locks the record and gathers a `ROW SHARE` lock (in PostgreSQL). - # As such, the increment is atomic and safe to be called concurrently. - def track_greatest_and_save!(new_value) - update_and_save { self.last_value = [last_value || 0, new_value].max } - end - - private - - def update_and_save(&block) - lock! - yield - # update_and_save_counter.increment(usage: usage, changed: last_value_changed?) - save! - last_value - end - end - - # See app/models/internal_id - class InternalIdGenerator - attr_reader :subject, :scope, :scope_attrs - - def initialize(subject, scope) - @subject = subject - @scope = scope - - raise ArgumentError, 'Scope is not well-defined, need at least one column for scope (given: 0)' if scope.empty? - end - - # Create a record in internal_ids if one does not yet exist - # and set its new_value if it is higher than the current last_value - # - # Note this will acquire a ROW SHARE lock on the InternalId record - def track_greatest(new_value) - subject.transaction do - record.track_greatest_and_save!(new_value) - end - end - - def record - @record ||= (lookup || create_record) - end - - def lookup - InternalId.find_by(**scope, usage: usage_value) - end - - def usage_value - 10 # see Enums::InternalId - this is the value for design_management_designs - end - - # Create InternalId record for (scope, usage) combination, if it doesn't exist - # - # We blindly insert without synchronization. If another process - # was faster in doing this, we'll realize once we hit the unique key constraint - # violation. We can safely roll-back the nested transaction and perform - # a lookup instead to retrieve the record. - def create_record - subject.transaction(requires_new: true) do # rubocop:disable Performance/ActiveRecordSubtransactions - InternalId.create!( - **scope, - usage: usage_value, - last_value: 0 - ) - end - rescue ActiveRecord::RecordNotUnique - lookup - end - end - - attr_reader :design_class - - def initialize(design_class) - @design_class = design_class - end - - def perform(relation) - start_id, end_id = relation.pluck("min(project_id), max(project_id)").flatten - table = 'design_management_designs' - - ActiveRecord::Base.connection.execute <<~SQL - WITH - starting_iids(project_id, iid) as #{Gitlab::Database::AsWithMaterialized.materialized_if_supported}( - SELECT project_id, MAX(COALESCE(iid, 0)) - FROM #{table} - WHERE project_id BETWEEN #{start_id} AND #{end_id} - GROUP BY project_id - ), - with_calculated_iid(id, iid) as #{Gitlab::Database::AsWithMaterialized.materialized_if_supported}( - SELECT design.id, - init.iid + ROW_NUMBER() OVER (PARTITION BY design.project_id ORDER BY design.id ASC) - FROM #{table} as design, starting_iids as init - WHERE design.project_id BETWEEN #{start_id} AND #{end_id} - AND design.iid IS NULL - AND init.project_id = design.project_id - ) - - UPDATE #{table} - SET iid = with_calculated_iid.iid - FROM with_calculated_iid - WHERE #{table}.id = with_calculated_iid.id - SQL - - # track the new greatest IID value - relation.each do |design| - current_max = design_class.where(project_id: design.project_id).maximum(:iid) - scope = { project_id: design.project_id } - InternalId.track_greatest(design, scope, current_max) - end - end - end - end -end diff --git a/lib/gitlab/background_migration/backfill_project_repositories.rb b/lib/gitlab/background_migration/backfill_project_repositories.rb index a9eaeb0562d..05e2ed72fb3 100644 --- a/lib/gitlab/background_migration/backfill_project_repositories.rb +++ b/lib/gitlab/background_migration/backfill_project_repositories.rb @@ -189,7 +189,7 @@ module Gitlab end def perform(start_id, stop_id) - Gitlab::Database.main.bulk_insert(:project_repositories, project_repositories(start_id, stop_id)) # rubocop:disable Gitlab/BulkInsert + ApplicationRecord.legacy_bulk_insert(:project_repositories, project_repositories(start_id, stop_id)) # rubocop:disable Gitlab/BulkInsert end private diff --git a/lib/gitlab/background_migration/backfill_user_namespace.rb b/lib/gitlab/background_migration/backfill_user_namespace.rb new file mode 100644 index 00000000000..f55eaa3b14e --- /dev/null +++ b/lib/gitlab/background_migration/backfill_user_namespace.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Backfills the `namespaces.type` column, replacing any + # instances of `NULL` with `User` + class BackfillUserNamespace + include Gitlab::Database::DynamicModelHelpers + + def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, pause_ms) + parent_batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id) + parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size, order_hint: :type) do |sub_batch| + batch_metrics.time_operation(:update_all) do + sub_batch.update_all(type: 'User') + end + pause_ms = 0 if pause_ms < 0 + sleep(pause_ms * 0.001) + end + end + + def batch_metrics + @batch_metrics ||= Gitlab::Database::BackgroundMigration::BatchMetrics.new + end + + private + + def connection + ActiveRecord::Base.connection + end + + def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id) + define_batchable_model(source_table) + .where(source_key_column => start_id..stop_id) + .where(type: nil) + end + end + end +end diff --git a/lib/gitlab/background_migration/copy_merge_request_target_project_to_merge_request_metrics.rb b/lib/gitlab/background_migration/copy_merge_request_target_project_to_merge_request_metrics.rb deleted file mode 100644 index 691bdb457d7..00000000000 --- a/lib/gitlab/background_migration/copy_merge_request_target_project_to_merge_request_metrics.rb +++ /dev/null @@ -1,25 +0,0 @@ -# frozen_string_literal: true -# rubocop:disable Style/Documentation - -module Gitlab - module BackgroundMigration - class CopyMergeRequestTargetProjectToMergeRequestMetrics - extend ::Gitlab::Utils::Override - - def perform(start_id, stop_id) - ActiveRecord::Base.connection.execute <<~SQL - WITH merge_requests_batch AS #{Gitlab::Database::AsWithMaterialized.materialized_if_supported} ( - SELECT id, target_project_id - FROM merge_requests WHERE id BETWEEN #{Integer(start_id)} AND #{Integer(stop_id)} - ) - UPDATE - merge_request_metrics - SET - target_project_id = merge_requests_batch.target_project_id - FROM merge_requests_batch - WHERE merge_request_metrics.merge_request_id=merge_requests_batch.id - SQL - end - end - end -end diff --git a/lib/gitlab/background_migration/fix_merge_request_diff_commit_users.rb b/lib/gitlab/background_migration/fix_merge_request_diff_commit_users.rb new file mode 100644 index 00000000000..ea3e56cb14a --- /dev/null +++ b/lib/gitlab/background_migration/fix_merge_request_diff_commit_users.rb @@ -0,0 +1,156 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Background migration for fixing merge_request_diff_commit rows that don't + # have committer/author details due to + # https://gitlab.com/gitlab-org/gitlab/-/issues/344080. + # + # This migration acts on a single project and corrects its data. Because + # this process needs Git/Gitaly access, and duplicating all that code is far + # too much, this migration relies on global models such as Project, + # MergeRequest, etc. + # rubocop: disable Metrics/ClassLength + class FixMergeRequestDiffCommitUsers + BATCH_SIZE = 100 + + def initialize + @commits = {} + @users = {} + end + + def perform(project_id) + if (project = ::Project.find_by_id(project_id)) + process(project) + end + + ::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( + 'FixMergeRequestDiffCommitUsers', + [project_id] + ) + + schedule_next_job + end + + def process(project) + # Loading everything using one big query may result in timeouts (e.g. + # for projects the size of gitlab-org/gitlab). So instead we query + # data on a per merge request basis. + project.merge_requests.each_batch(column: :iid) do |mrs| + mrs.ids.each do |mr_id| + each_row_to_check(mr_id) do |commit| + update_commit(project, commit) + end + end + end + end + + def each_row_to_check(merge_request_id, &block) + columns = %w[merge_request_diff_id relative_order].map do |col| + Pagination::Keyset::ColumnOrderDefinition.new( + attribute_name: col, + order_expression: MergeRequestDiffCommit.arel_table[col.to_sym].asc, + nullable: :not_nullable, + distinct: false + ) + end + + order = Pagination::Keyset::Order.build(columns) + scope = MergeRequestDiffCommit + .joins(:merge_request_diff) + .where(merge_request_diffs: { merge_request_id: merge_request_id }) + .where('commit_author_id IS NULL OR committer_id IS NULL') + .order(order) + + Pagination::Keyset::Iterator + .new(scope: scope, use_union_optimization: true) + .each_batch(of: BATCH_SIZE) do |rows| + rows + .select([ + :merge_request_diff_id, + :relative_order, + :sha, + :committer_id, + :commit_author_id + ]) + .each(&block) + end + end + + # rubocop: disable Metrics/AbcSize + def update_commit(project, row) + commit = find_commit(project, row.sha) + updates = [] + + unless row.commit_author_id + author_id = find_or_create_user(commit, :author_name, :author_email) + + updates << [arel_table[:commit_author_id], author_id] if author_id + end + + unless row.committer_id + committer_id = + find_or_create_user(commit, :committer_name, :committer_email) + + updates << [arel_table[:committer_id], committer_id] if committer_id + end + + return if updates.empty? + + update = Arel::UpdateManager + .new + .table(MergeRequestDiffCommit.arel_table) + .where(matches_row(row)) + .set(updates) + .to_sql + + MergeRequestDiffCommit.connection.execute(update) + end + # rubocop: enable Metrics/AbcSize + + def schedule_next_job + job = Database::BackgroundMigrationJob + .for_migration_class('FixMergeRequestDiffCommitUsers') + .pending + .first + + return unless job + + BackgroundMigrationWorker.perform_in( + 2.minutes, + 'FixMergeRequestDiffCommitUsers', + job.arguments + ) + end + + def find_commit(project, sha) + @commits[sha] ||= (project.commit(sha)&.to_hash || {}) + end + + def find_or_create_user(commit, name_field, email_field) + name = commit[name_field] + email = commit[email_field] + + return unless name && email + + @users[[name, email]] ||= + MergeRequest::DiffCommitUser.find_or_create(name, email).id + end + + def matches_row(row) + primary_key = Arel::Nodes::Grouping + .new([arel_table[:merge_request_diff_id], arel_table[:relative_order]]) + + primary_val = Arel::Nodes::Grouping + .new([row.merge_request_diff_id, row.relative_order]) + + primary_key.eq(primary_val) + end + + def arel_table + MergeRequestDiffCommit.arel_table + end + end + # rubocop: enable Metrics/ClassLength + end +end diff --git a/lib/gitlab/background_migration/fix_orphan_promoted_issues.rb b/lib/gitlab/background_migration/fix_orphan_promoted_issues.rb deleted file mode 100644 index c50bf430d92..00000000000 --- a/lib/gitlab/background_migration/fix_orphan_promoted_issues.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # No OP for CE - class FixOrphanPromotedIssues - def perform(note_id) - end - end - end -end - -Gitlab::BackgroundMigration::FixOrphanPromotedIssues.prepend_mod_with('Gitlab::BackgroundMigration::FixOrphanPromotedIssues') diff --git a/lib/gitlab/background_migration/fix_ruby_object_in_audit_events.rb b/lib/gitlab/background_migration/fix_ruby_object_in_audit_events.rb deleted file mode 100644 index 47a68c61fcc..00000000000 --- a/lib/gitlab/background_migration/fix_ruby_object_in_audit_events.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # Remove serialized Ruby object in audit_events - class FixRubyObjectInAuditEvents - def perform(start_id, stop_id) - end - end - end -end - -Gitlab::BackgroundMigration::FixRubyObjectInAuditEvents.prepend_mod_with('Gitlab::BackgroundMigration::FixRubyObjectInAuditEvents') diff --git a/lib/gitlab/background_migration/job_coordinator.rb b/lib/gitlab/background_migration/job_coordinator.rb new file mode 100644 index 00000000000..1c8819eaa62 --- /dev/null +++ b/lib/gitlab/background_migration/job_coordinator.rb @@ -0,0 +1,134 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Class responsible for executing background migrations based on the given database. + # + # Chooses the correct worker class when selecting jobs from the queue based on the + # convention of how the queues and worker classes are setup for each database. + # + # Also provides a database connection to the correct tracking database. + class JobCoordinator + VALID_DATABASES = %i[main].freeze + WORKER_CLASS_NAME = 'BackgroundMigrationWorker' + + def self.for_database(database) + database = database.to_sym + + unless VALID_DATABASES.include?(database) + raise ArgumentError, "database must be one of [#{VALID_DATABASES.join(', ')}], got '#{database}'" + end + + namespace = database.to_s.capitalize unless database == :main + namespaced_worker_class = [namespace, WORKER_CLASS_NAME].compact.join('::') + + new(database, "::#{namespaced_worker_class}".constantize) + end + + attr_reader :database, :worker_class + + def queue + @queue ||= worker_class.sidekiq_options['queue'] + end + + def with_shared_connection(&block) + Gitlab::Database::SharedModel.using_connection(connection, &block) + end + + def steal(steal_class, retry_dead_jobs: false) + with_shared_connection do + queues = [ + Sidekiq::ScheduledSet.new, + Sidekiq::Queue.new(self.queue) + ] + + if retry_dead_jobs + queues << Sidekiq::RetrySet.new + queues << Sidekiq::DeadSet.new + end + + queues.each do |queue| + queue.each do |job| + migration_class, migration_args = job.args + + next unless job.klass == worker_class.name + next unless migration_class == steal_class + next if block_given? && !(yield job) + + begin + perform(migration_class, migration_args) if job.delete + rescue Exception # rubocop:disable Lint/RescueException + worker_class # enqueue this migration again + .perform_async(migration_class, migration_args) + + raise + end + end + end + end + end + + def perform(class_name, arguments) + with_shared_connection do + migration_class_for(class_name).new.perform(*arguments) + end + end + + def remaining + enqueued = Sidekiq::Queue.new(self.queue) + scheduled = Sidekiq::ScheduledSet.new + + [enqueued, scheduled].sum do |set| + set.count do |job| + job.klass == worker_class.name + end + end + end + + def exists?(migration_class, additional_queues = []) + enqueued = Sidekiq::Queue.new(self.queue) + scheduled = Sidekiq::ScheduledSet.new + + enqueued_job?([enqueued, scheduled], migration_class) + end + + def dead_jobs?(migration_class) + dead_set = Sidekiq::DeadSet.new + + enqueued_job?([dead_set], migration_class) + end + + def retrying_jobs?(migration_class) + retry_set = Sidekiq::RetrySet.new + + enqueued_job?([retry_set], migration_class) + end + + def migration_class_for(class_name) + Gitlab::BackgroundMigration.const_get(class_name, false) + end + + def enqueued_job?(queues, migration_class) + queues.any? do |queue| + queue.any? do |job| + job.klass == worker_class.name && job.args.first == migration_class + end + end + end + + private + + def initialize(database, worker_class) + @database = database + @worker_class = worker_class + end + + def connection + @connection ||= Gitlab::Database + .database_base_models + .fetch(database, Gitlab::Database::PRIMARY_DATABASE_NAME) + .connection + end + end + end +end diff --git a/lib/gitlab/background_migration/migrate_fingerprint_sha256_within_keys.rb b/lib/gitlab/background_migration/migrate_fingerprint_sha256_within_keys.rb index 1c60473750d..36a339c6b80 100644 --- a/lib/gitlab/background_migration/migrate_fingerprint_sha256_within_keys.rb +++ b/lib/gitlab/background_migration/migrate_fingerprint_sha256_within_keys.rb @@ -34,7 +34,7 @@ module Gitlab end end - Gitlab::Database.main.bulk_insert(TEMP_TABLE, fingerprints) # rubocop:disable Gitlab/BulkInsert + ApplicationRecord.legacy_bulk_insert(TEMP_TABLE, fingerprints) # rubocop:disable Gitlab/BulkInsert execute("ANALYZE #{TEMP_TABLE}") diff --git a/lib/gitlab/background_migration/migrate_issue_trackers_sensitive_data.rb b/lib/gitlab/background_migration/migrate_issue_trackers_sensitive_data.rb deleted file mode 100644 index 14c72bb4a72..00000000000 --- a/lib/gitlab/background_migration/migrate_issue_trackers_sensitive_data.rb +++ /dev/null @@ -1,146 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # This migration takes all issue trackers - # and move data from properties to data field tables (jira_tracker_data and issue_tracker_data) - class MigrateIssueTrackersSensitiveData - delegate :select_all, :execute, :quote_string, to: :connection - - # we need to define this class and set fields encryption - class IssueTrackerData < ApplicationRecord - self.table_name = 'issue_tracker_data' - - def self.encryption_options - { - key: Settings.attr_encrypted_db_key_base_32, - encode: true, - mode: :per_attribute_iv, - algorithm: 'aes-256-gcm' - } - end - - attr_encrypted :project_url, encryption_options - attr_encrypted :issues_url, encryption_options - attr_encrypted :new_issue_url, encryption_options - end - - # we need to define this class and set fields encryption - class JiraTrackerData < ApplicationRecord - self.table_name = 'jira_tracker_data' - - def self.encryption_options - { - key: Settings.attr_encrypted_db_key_base_32, - encode: true, - mode: :per_attribute_iv, - algorithm: 'aes-256-gcm' - } - end - - attr_encrypted :url, encryption_options - attr_encrypted :api_url, encryption_options - attr_encrypted :username, encryption_options - attr_encrypted :password, encryption_options - end - - def perform(start_id, stop_id) - columns = 'id, properties, title, description, type' - batch_condition = "id >= #{start_id} AND id <= #{stop_id} AND category = 'issue_tracker' \ - AND properties IS NOT NULL AND properties != '{}' AND properties != ''" - - data_subselect = "SELECT 1 \ - FROM jira_tracker_data \ - WHERE jira_tracker_data.service_id = services.id \ - UNION SELECT 1 \ - FROM issue_tracker_data \ - WHERE issue_tracker_data.service_id = services.id" - - query = "SELECT #{columns} FROM services WHERE #{batch_condition} AND NOT EXISTS (#{data_subselect})" - - migrated_ids = [] - data_to_insert(query).each do |table, data| - service_ids = data.map { |s| s['service_id'] } - - next if service_ids.empty? - - migrated_ids += service_ids - Gitlab::Database.main.bulk_insert(table, data) # rubocop:disable Gitlab/BulkInsert - end - - return if migrated_ids.empty? - - move_title_description(migrated_ids) - end - - private - - def data_to_insert(query) - data = { 'jira_tracker_data' => [], 'issue_tracker_data' => [] } - select_all(query).each do |service| - begin - properties = Gitlab::Json.parse(service['properties']) - rescue JSON::ParserError - logger.warn( - message: 'Properties data not parsed - invalid json', - service_id: service['id'], - properties: service['properties'] - ) - next - end - - if service['type'] == 'JiraService' - row = data_row(JiraTrackerData, jira_mapping(properties), service) - key = 'jira_tracker_data' - else - row = data_row(IssueTrackerData, issue_tracker_mapping(properties), service) - key = 'issue_tracker_data' - end - - data[key] << row if row - end - - data - end - - def data_row(klass, mapping, service) - base_params = { service_id: service['id'], created_at: Time.current, updated_at: Time.current } - klass.new(mapping).slice(*klass.column_names).compact.merge(base_params) - end - - def move_title_description(service_ids) - query = "UPDATE services SET \ - title = cast(properties as json)->>'title', \ - description = cast(properties as json)->>'description' \ - WHERE id IN (#{service_ids.join(',')}) AND title IS NULL AND description IS NULL" - - execute(query) - end - - def jira_mapping(properties) - { - url: properties['url'], - api_url: properties['api_url'], - username: properties['username'], - password: properties['password'] - } - end - - def issue_tracker_mapping(properties) - { - project_url: properties['project_url'], - issues_url: properties['issues_url'], - new_issue_url: properties['new_issue_url'] - } - end - - def connection - @connection ||= ActiveRecord::Base.connection - end - - def logger - @logger ||= Gitlab::BackgroundMigration::Logger.build - end - end - end -end diff --git a/lib/gitlab/background_migration/migrate_requirements_to_work_items.rb b/lib/gitlab/background_migration/migrate_requirements_to_work_items.rb new file mode 100644 index 00000000000..017791f197c --- /dev/null +++ b/lib/gitlab/background_migration/migrate_requirements_to_work_items.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # No op on CE + class MigrateRequirementsToWorkItems + def perform(start_id, end_id) + end + end + end +end + +Gitlab::BackgroundMigration::MigrateRequirementsToWorkItems.prepend_mod_with('Gitlab::BackgroundMigration::MigrateRequirementsToWorkItems') diff --git a/lib/gitlab/background_migration/migrate_users_bio_to_user_details.rb b/lib/gitlab/background_migration/migrate_users_bio_to_user_details.rb deleted file mode 100644 index bbe2164ae4e..00000000000 --- a/lib/gitlab/background_migration/migrate_users_bio_to_user_details.rb +++ /dev/null @@ -1,32 +0,0 @@ -# frozen_string_literal: true -# rubocop:disable Style/Documentation - -module Gitlab - module BackgroundMigration - class MigrateUsersBioToUserDetails - class User < ActiveRecord::Base - self.table_name = 'users' - end - - class UserDetails < ActiveRecord::Base - self.table_name = 'user_details' - end - - def perform(start_id, stop_id) - relation = User - .select("id AS user_id", "substring(COALESCE(bio, '') from 1 for 255) AS bio") - .where("(COALESCE(bio, '') IS DISTINCT FROM '')") - .where(id: (start_id..stop_id)) - - ActiveRecord::Base.connection.execute <<-EOF.strip_heredoc - INSERT INTO user_details - (user_id, bio) - #{relation.to_sql} - ON CONFLICT (user_id) - DO UPDATE SET - "bio" = EXCLUDED."bio"; - EOF - end - end - end -end diff --git a/lib/gitlab/background_migration/populate_issue_email_participants.rb b/lib/gitlab/background_migration/populate_issue_email_participants.rb index 0a56ac1dae8..2b959b81f45 100644 --- a/lib/gitlab/background_migration/populate_issue_email_participants.rb +++ b/lib/gitlab/background_migration/populate_issue_email_participants.rb @@ -21,7 +21,7 @@ module Gitlab } end - Gitlab::Database.main.bulk_insert(:issue_email_participants, rows, on_conflict: :do_nothing) # rubocop:disable Gitlab/BulkInsert + ApplicationRecord.legacy_bulk_insert(:issue_email_participants, rows, on_conflict: :do_nothing) # rubocop:disable Gitlab/BulkInsert end end end diff --git a/lib/gitlab/background_migration/populate_user_highest_roles_table.rb b/lib/gitlab/background_migration/populate_user_highest_roles_table.rb deleted file mode 100644 index 16386ebf9c3..00000000000 --- a/lib/gitlab/background_migration/populate_user_highest_roles_table.rb +++ /dev/null @@ -1,58 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # This background migration creates records on user_highest_roles according to - # the given user IDs range. IDs will load users with a left outer joins to - # have a record for users without a Group or Project. One INSERT per ID is - # issued. - class PopulateUserHighestRolesTable - BATCH_SIZE = 100 - - # rubocop:disable Style/Documentation - class User < ActiveRecord::Base - self.table_name = 'users' - - scope :active, -> { - where(state: 'active', user_type: nil, bot_type: nil) - .where('ghost IS NOT TRUE') - } - end - - def perform(from_id, to_id) - return unless User.column_names.include?('bot_type') - - (from_id..to_id).each_slice(BATCH_SIZE) do |ids| - execute( - <<-EOF - INSERT INTO user_highest_roles (updated_at, user_id, highest_access_level) - #{select_sql(from_id, to_id)} - ON CONFLICT (user_id) DO - UPDATE SET highest_access_level = EXCLUDED.highest_access_level - EOF - ) - end - end - - private - - def select_sql(from_id, to_id) - User - .select('NOW() as updated_at, users.id, MAX(access_level) AS highest_access_level') - .joins('LEFT OUTER JOIN members ON members.user_id = users.id AND members.requested_at IS NULL') - .where(users: { id: active_user_ids(from_id, to_id) }) - .group('users.id') - .to_sql - end - - def active_user_ids(from_id, to_id) - User.active.where(users: { id: from_id..to_id }).pluck(:id) - end - - def execute(sql) - @connection ||= ActiveRecord::Base.connection - @connection.execute(sql) - end - end - end -end diff --git a/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb b/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb new file mode 100644 index 00000000000..8e94c16369e --- /dev/null +++ b/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb @@ -0,0 +1,151 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module ProjectNamespaces + # Back-fill project namespaces for projects that do not yet have a namespace. + # + # TODO: remove this comment when an actuall backfill migration is added. + # + # This is first being added without an actual migration as we need to initially test + # if backfilling project namespaces affects performance in any significant way. + # rubocop: disable Metrics/ClassLength + class BackfillProjectNamespaces + BATCH_SIZE = 100 + DELETE_BATCH_SIZE = 10 + PROJECT_NAMESPACE_STI_NAME = 'Project' + + IsolatedModels = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models + + def perform(start_id, end_id, namespace_id, migration_type = 'up') + load_project_ids(start_id, end_id, namespace_id) + + case migration_type + when 'up' + backfill_project_namespaces(namespace_id) + mark_job_as_succeeded(start_id, end_id, namespace_id, 'up') + when 'down' + cleanup_backfilled_project_namespaces(namespace_id) + mark_job_as_succeeded(start_id, end_id, namespace_id, 'down') + else + raise "Unknown migration type" + end + end + + private + + attr_accessor :project_ids + + def backfill_project_namespaces(namespace_id) + project_ids.each_slice(BATCH_SIZE) do |project_ids| + # We need to lock these project records for the period when we create project namespaces + # and link them to projects so that if a project is modified in the time between creating + # project namespaces `batch_insert_namespaces` and linking them to projects `batch_update_projects` + # we do not get them out of sync. + # + # see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/72527#note_730679469 + Project.transaction do + Project.where(id: project_ids).select(:id).lock!('FOR UPDATE') + + batch_insert_namespaces(project_ids) + batch_update_projects(project_ids) + end + + batch_update_project_namespaces_traversal_ids(project_ids) + end + end + + def cleanup_backfilled_project_namespaces(namespace_id) + project_ids.each_slice(BATCH_SIZE) do |project_ids| + # IMPORTANT: first nullify project_namespace_id in projects table to avoid removing projects when records + # from namespaces are deleted due to FK/triggers + nullify_project_namespaces_in_projects(project_ids) + delete_project_namespace_records(project_ids) + end + end + + def batch_insert_namespaces(project_ids) + projects = IsolatedModels::Project.where(id: project_ids) + .select("projects.id, projects.name, projects.path, projects.namespace_id, projects.visibility_level, shared_runners_enabled, '#{PROJECT_NAMESPACE_STI_NAME}', now(), now()") + + ActiveRecord::Base.connection.execute <<~SQL + INSERT INTO namespaces (tmp_project_id, name, path, parent_id, visibility_level, shared_runners_enabled, type, created_at, updated_at) + #{projects.to_sql} + ON CONFLICT DO NOTHING; + SQL + end + + def batch_update_projects(project_ids) + projects = IsolatedModels::Project.where(id: project_ids) + .joins("INNER JOIN namespaces ON projects.id = namespaces.tmp_project_id") + .select("namespaces.id, namespaces.tmp_project_id") + + ActiveRecord::Base.connection.execute <<~SQL + WITH cte(project_namespace_id, project_id) AS #{::Gitlab::Database::AsWithMaterialized.materialized_if_supported} ( + #{projects.to_sql} + ) + UPDATE projects + SET project_namespace_id = cte.project_namespace_id + FROM cte + WHERE id = cte.project_id AND projects.project_namespace_id IS DISTINCT FROM cte.project_namespace_id + SQL + end + + def batch_update_project_namespaces_traversal_ids(project_ids) + namespaces = Namespace.where(tmp_project_id: project_ids) + .joins("INNER JOIN namespaces n2 ON namespaces.parent_id = n2.id") + .select("namespaces.id as project_namespace_id, n2.traversal_ids") + + ActiveRecord::Base.connection.execute <<~SQL + UPDATE namespaces + SET traversal_ids = array_append(project_namespaces.traversal_ids, project_namespaces.project_namespace_id) + FROM (#{namespaces.to_sql}) as project_namespaces(project_namespace_id, traversal_ids) + WHERE id = project_namespaces.project_namespace_id + SQL + end + + def nullify_project_namespaces_in_projects(project_ids) + IsolatedModels::Project.where(id: project_ids).update_all(project_namespace_id: nil) + end + + def delete_project_namespace_records(project_ids) + project_ids.each_slice(DELETE_BATCH_SIZE) do |p_ids| + IsolatedModels::Namespace.where(type: PROJECT_NAMESPACE_STI_NAME).where(tmp_project_id: p_ids).delete_all + end + end + + def load_project_ids(start_id, end_id, namespace_id) + projects = IsolatedModels::Project.arel_table + relation = IsolatedModels::Project.where(projects[:id].between(start_id..end_id)) + relation = relation.where(projects[:namespace_id].in(Arel::Nodes::SqlLiteral.new(hierarchy_cte(namespace_id)))) if namespace_id + + @project_ids = relation.pluck(:id) + end + + def mark_job_as_succeeded(*arguments) + ::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillProjectNamespaces', arguments) + end + + def hierarchy_cte(root_namespace_id) + <<-SQL + WITH RECURSIVE "base_and_descendants" AS ( + ( + SELECT "namespaces"."id" + FROM "namespaces" + WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = #{root_namespace_id.to_i} + ) + UNION + ( + SELECT "namespaces"."id" + FROM "namespaces", "base_and_descendants" + WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id" + ) + ) + SELECT "id" FROM "base_and_descendants" AS "namespaces" + SQL + end + end + # rubocop: enable Metrics/ClassLength + end + end +end diff --git a/lib/gitlab/background_migration/project_namespaces/models/namespace.rb b/lib/gitlab/background_migration/project_namespaces/models/namespace.rb new file mode 100644 index 00000000000..5576c34cf65 --- /dev/null +++ b/lib/gitlab/background_migration/project_namespaces/models/namespace.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module ProjectNamespaces + module Models + # isolated Namespace model + class Namespace < ActiveRecord::Base + include EachBatch + + self.table_name = 'namespaces' + self.inheritance_column = :_type_disabled + end + end + end + end +end diff --git a/lib/gitlab/background_migration/project_namespaces/models/project.rb b/lib/gitlab/background_migration/project_namespaces/models/project.rb new file mode 100644 index 00000000000..4a6a309e289 --- /dev/null +++ b/lib/gitlab/background_migration/project_namespaces/models/project.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module ProjectNamespaces + module Models + # isolated Project model + class Project < ActiveRecord::Base + include EachBatch + + self.table_name = 'projects' + end + end + end + end +end diff --git a/lib/gitlab/background_migration/remove_duplicate_vulnerabilities_findings.rb b/lib/gitlab/background_migration/remove_duplicate_vulnerabilities_findings.rb index ca61118a06c..15799659b55 100644 --- a/lib/gitlab/background_migration/remove_duplicate_vulnerabilities_findings.rb +++ b/lib/gitlab/background_migration/remove_duplicate_vulnerabilities_findings.rb @@ -2,7 +2,7 @@ # rubocop: disable Style/Documentation class Gitlab::BackgroundMigration::RemoveDuplicateVulnerabilitiesFindings - DELETE_BATCH_SIZE = 100 + DELETE_BATCH_SIZE = 50 # rubocop:disable Gitlab/NamespacedClass class VulnerabilitiesFinding < ActiveRecord::Base @@ -10,6 +10,12 @@ class Gitlab::BackgroundMigration::RemoveDuplicateVulnerabilitiesFindings end # rubocop:enable Gitlab/NamespacedClass + # rubocop:disable Gitlab/NamespacedClass + class Vulnerability < ActiveRecord::Base + self.table_name = "vulnerabilities" + end + # rubocop:enable Gitlab/NamespacedClass + def perform(start_id, end_id) batch = VulnerabilitiesFinding.where(id: start_id..end_id) @@ -40,11 +46,19 @@ class Gitlab::BackgroundMigration::RemoveDuplicateVulnerabilitiesFindings ids_to_delete.concat(duplicate_ids) if ids_to_delete.size == DELETE_BATCH_SIZE - VulnerabilitiesFinding.where(id: ids_to_delete).delete_all + delete_findings_and_vulnerabilities(ids_to_delete) ids_to_delete.clear end end - VulnerabilitiesFinding.where(id: ids_to_delete).delete_all if ids_to_delete.any? + delete_findings_and_vulnerabilities(ids_to_delete) if ids_to_delete.any? + end + + private + + def delete_findings_and_vulnerabilities(ids) + vulnerability_ids = VulnerabilitiesFinding.where(id: ids).pluck(:vulnerability_id).compact + VulnerabilitiesFinding.where(id: ids).delete_all + Vulnerability.where(id: vulnerability_ids).delete_all end end diff --git a/lib/gitlab/background_migration/remove_undefined_occurrence_confidence_level.rb b/lib/gitlab/background_migration/remove_undefined_occurrence_confidence_level.rb deleted file mode 100644 index 540ffc6f548..00000000000 --- a/lib/gitlab/background_migration/remove_undefined_occurrence_confidence_level.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true -# rubocop:disable Style/Documentation - -module Gitlab - module BackgroundMigration - class RemoveUndefinedOccurrenceConfidenceLevel - def perform(start_id, stop_id) - end - end - end -end - -Gitlab::BackgroundMigration::RemoveUndefinedOccurrenceConfidenceLevel.prepend_mod_with('Gitlab::BackgroundMigration::RemoveUndefinedOccurrenceConfidenceLevel') diff --git a/lib/gitlab/background_migration/remove_undefined_occurrence_severity_level.rb b/lib/gitlab/background_migration/remove_undefined_occurrence_severity_level.rb deleted file mode 100644 index cecb385afa0..00000000000 --- a/lib/gitlab/background_migration/remove_undefined_occurrence_severity_level.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true -# rubocop:disable Style/Documentation - -module Gitlab - module BackgroundMigration - class RemoveUndefinedOccurrenceSeverityLevel - def perform(start_id, stop_id) - end - end - end -end - -Gitlab::BackgroundMigration::RemoveUndefinedOccurrenceSeverityLevel.prepend_mod_with('Gitlab::BackgroundMigration::RemoveUndefinedOccurrenceSeverityLevel') diff --git a/lib/gitlab/background_migration/remove_undefined_vulnerability_severity_level.rb b/lib/gitlab/background_migration/remove_undefined_vulnerability_severity_level.rb deleted file mode 100644 index 1ea483f929f..00000000000 --- a/lib/gitlab/background_migration/remove_undefined_vulnerability_severity_level.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true -# rubocop:disable Style/Documentation - -module Gitlab - module BackgroundMigration - class RemoveUndefinedVulnerabilitySeverityLevel - def perform(start_id, stop_id) - end - end - end -end - -Gitlab::BackgroundMigration::RemoveUndefinedVulnerabilitySeverityLevel.prepend_mod_with('Gitlab::BackgroundMigration::RemoveUndefinedVulnerabilitySeverityLevel') diff --git a/lib/gitlab/background_migration/set_default_iteration_cadences.rb b/lib/gitlab/background_migration/set_default_iteration_cadences.rb deleted file mode 100644 index 42f9d33ab71..00000000000 --- a/lib/gitlab/background_migration/set_default_iteration_cadences.rb +++ /dev/null @@ -1,60 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # rubocop:disable Style/Documentation - class SetDefaultIterationCadences - class Iteration < ApplicationRecord - self.table_name = 'sprints' - end - - class IterationCadence < ApplicationRecord - self.table_name = 'iterations_cadences' - - include BulkInsertSafe - end - - class Group < ApplicationRecord - self.table_name = 'namespaces' - - self.inheritance_column = :_type_disabled - end - - def perform(*group_ids) - create_iterations_cadences(group_ids) - assign_iterations_cadences(group_ids) - end - - private - - def create_iterations_cadences(group_ids) - groups_with_cadence = IterationCadence.select(:group_id) - - new_cadences = Group.where(id: group_ids).where.not(id: groups_with_cadence).map do |group| - last_iteration = Iteration.where(group_id: group.id).order(:start_date)&.last - - next unless last_iteration - - time = Time.now - IterationCadence.new( - group_id: group.id, - title: "#{group.name} Iterations", - start_date: last_iteration.start_date, - last_run_date: last_iteration.start_date, - automatic: false, - created_at: time, - updated_at: time - ) - end - - IterationCadence.bulk_insert!(new_cadences.compact, skip_duplicates: true) - end - - def assign_iterations_cadences(group_ids) - IterationCadence.where(group_id: group_ids).each do |cadence| - Iteration.where(iterations_cadence_id: nil).where(group_id: cadence.group_id).update_all(iterations_cadence_id: cadence.id) - end - end - end - end -end diff --git a/lib/gitlab/background_migration/set_null_external_diff_store_to_local_value.rb b/lib/gitlab/background_migration/set_null_external_diff_store_to_local_value.rb deleted file mode 100644 index 71f3483987e..00000000000 --- a/lib/gitlab/background_migration/set_null_external_diff_store_to_local_value.rb +++ /dev/null @@ -1,24 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # This class is responsible for migrating a range of merge request diffs - # with external_diff_store == NULL to 1. - # - # The index `index_merge_request_diffs_external_diff_store_is_null` is - # expected to be used to find the rows here and in the migration scheduling - # the jobs that run this class. - class SetNullExternalDiffStoreToLocalValue - LOCAL_STORE = 1 # equal to ObjectStorage::Store::LOCAL - - # Temporary AR class for merge request diffs - class MergeRequestDiff < ActiveRecord::Base - self.table_name = 'merge_request_diffs' - end - - def perform(start_id, stop_id) - MergeRequestDiff.where(external_diff_store: nil, id: start_id..stop_id).update_all(external_diff_store: LOCAL_STORE) - end - end - end -end diff --git a/lib/gitlab/background_migration/set_null_package_files_file_store_to_local_value.rb b/lib/gitlab/background_migration/set_null_package_files_file_store_to_local_value.rb deleted file mode 100644 index c485c23f3be..00000000000 --- a/lib/gitlab/background_migration/set_null_package_files_file_store_to_local_value.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # This class is responsible for migrating a range of package files - # with file_store == NULL to 1. - # - # The index `index_packages_package_files_file_store_is_null` is - # expected to be used to find the rows here and in the migration scheduling - # the jobs that run this class. - class SetNullPackageFilesFileStoreToLocalValue - LOCAL_STORE = 1 # equal to ObjectStorage::Store::LOCAL - - module Packages - # Temporary AR class for package files - class PackageFile < ActiveRecord::Base - self.table_name = 'packages_package_files' - end - end - - def perform(start_id, stop_id) - Packages::PackageFile.where(file_store: nil, id: start_id..stop_id).update_all(file_store: LOCAL_STORE) - end - end - end -end diff --git a/lib/gitlab/background_migration/update_vulnerabilities_to_dismissed.rb b/lib/gitlab/background_migration/update_vulnerabilities_to_dismissed.rb deleted file mode 100644 index 60adb6b7e3e..00000000000 --- a/lib/gitlab/background_migration/update_vulnerabilities_to_dismissed.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # rubocop: disable Style/Documentation - class UpdateVulnerabilitiesToDismissed - def perform(project_id) - end - end - end -end - -Gitlab::BackgroundMigration::UpdateVulnerabilitiesToDismissed.prepend_mod_with('Gitlab::BackgroundMigration::UpdateVulnerabilitiesToDismissed') diff --git a/lib/gitlab/background_migration/update_vulnerability_confidence.rb b/lib/gitlab/background_migration/update_vulnerability_confidence.rb deleted file mode 100644 index 40d29978dd4..00000000000 --- a/lib/gitlab/background_migration/update_vulnerability_confidence.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # rubocop: disable Style/Documentation - class UpdateVulnerabilityConfidence - def perform(start_id, stop_id) - end - end - end -end - -Gitlab::BackgroundMigration::UpdateVulnerabilityConfidence.prepend_mod_with('Gitlab::BackgroundMigration::UpdateVulnerabilityConfidence') diff --git a/lib/gitlab/background_migration/update_vulnerability_occurrences_location.rb b/lib/gitlab/background_migration/update_vulnerability_occurrences_location.rb new file mode 100644 index 00000000000..458e0537f1c --- /dev/null +++ b/lib/gitlab/background_migration/update_vulnerability_occurrences_location.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # rubocop: disable Style/Documentation + class UpdateVulnerabilityOccurrencesLocation + def perform(start_id, stop_id) + end + end + # rubocop: enable Style/Documentation + end +end + +Gitlab::BackgroundMigration::UpdateVulnerabilityOccurrencesLocation.prepend_mod_with('Gitlab::BackgroundMigration::UpdateVulnerabilityOccurrencesLocation') |