diff options
Diffstat (limited to 'lib')
13 files changed, 902 insertions, 0 deletions
diff --git a/lib/gitlab/background_migration/backfill_ci_queuing_tables.rb b/lib/gitlab/background_migration/backfill_ci_queuing_tables.rb new file mode 100644 index 00000000000..63112b52584 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_ci_queuing_tables.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Ensure queuing entries are present even if admins skip upgrades. + class BackfillCiQueuingTables + class Namespace < ActiveRecord::Base # rubocop:disable Style/Documentation + self.table_name = 'namespaces' + self.inheritance_column = :_type_disabled + end + + class Project < ActiveRecord::Base # rubocop:disable Style/Documentation + self.table_name = 'projects' + + belongs_to :namespace + has_one :ci_cd_settings, class_name: 'Gitlab::BackgroundMigration::BackfillCiQueuingTables::ProjectCiCdSetting' + + def group_runners_enabled? + return false unless ci_cd_settings + + ci_cd_settings.group_runners_enabled? + end + end + + class ProjectCiCdSetting < ActiveRecord::Base # rubocop:disable Style/Documentation + self.table_name = 'project_ci_cd_settings' + end + + class Taggings < ActiveRecord::Base # rubocop:disable Style/Documentation + self.table_name = 'taggings' + end + + module Ci + class Build < ActiveRecord::Base # rubocop:disable Style/Documentation + include EachBatch + + self.table_name = 'ci_builds' + self.inheritance_column = :_type_disabled + + belongs_to :project + + scope :pending, -> do + where(status: :pending, type: 'Ci::Build', runner_id: nil) + end + + def self.each_batch(of: 1000, column: :id, order: { runner_id: :asc, id: :asc }, order_hint: nil) + start = except(:select).select(column).reorder(order) + start = start.take + return unless start + + start_id = start[column] + arel_table = self.arel_table + + 1.step do |index| + start_cond = arel_table[column].gteq(start_id) + stop = except(:select).select(column).where(start_cond).reorder(order) + stop = stop.offset(of).limit(1).take + relation = where(start_cond) + + if stop + stop_id = stop[column] + start_id = stop_id + stop_cond = arel_table[column].lt(stop_id) + relation = relation.where(stop_cond) + end + + # Any ORDER BYs are useless for this relation and can lead to less + # efficient UPDATE queries, hence we get rid of it. + relation = relation.except(:order) + + # Using unscoped is necessary to prevent leaking the current scope used by + # ActiveRecord to chain `each_batch` method. + unscoped { yield relation, index } + + break unless stop + end + end + + def tags_ids + BackfillCiQueuingTables::Taggings + .where(taggable_id: id, taggable_type: 'CommitStatus') + .pluck(:tag_id) + end + end + + class PendingBuild < ActiveRecord::Base # rubocop:disable Style/Documentation + self.table_name = 'ci_pending_builds' + + class << self + def upsert_from_build!(build) + entry = self.new(args_from_build(build)) + + self.upsert( + entry.attributes.compact, + returning: %w[build_id], + unique_by: :build_id) + end + + def args_from_build(build) + project = build.project + + { + build_id: build.id, + project_id: build.project_id, + protected: build.protected?, + namespace_id: project.namespace_id, + tag_ids: build.tags_ids, + instance_runners_enabled: project.shared_runners_enabled?, + namespace_traversal_ids: namespace_traversal_ids(project) + } + end + + def namespace_traversal_ids(project) + if project.group_runners_enabled? + project.namespace.traversal_ids + else + [] + end + end + end + end + end + + BATCH_SIZE = 100 + + def perform(start_id, end_id) + scope = BackfillCiQueuingTables::Ci::Build.pending.where(id: start_id..end_id) + pending_builds_query = BackfillCiQueuingTables::Ci::PendingBuild + .where('ci_builds.id = ci_pending_builds.build_id') + .select(1) + + scope.each_batch(of: BATCH_SIZE) do |builds| + builds = builds.where('NOT EXISTS (?)', pending_builds_query) + builds = builds.includes(:project, project: [:namespace, :ci_cd_settings]) + + builds.each do |build| + BackfillCiQueuingTables::Ci::PendingBuild.upsert_from_build!(build) + end + end + + mark_job_as_succeeded(start_id, end_id) + end + + private + + def mark_job_as_succeeded(*arguments) + Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( + self.class.name.demodulize, + arguments) + end + end + end +end diff --git a/lib/gitlab/background_migration/backfill_integrations_type_new.rb b/lib/gitlab/background_migration/backfill_integrations_type_new.rb new file mode 100644 index 00000000000..b07d9371c19 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_integrations_type_new.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Backfills the new `integrations.type_new` column, which contains + # the real class name, rather than the legacy class name in `type` + # which is mapped via `Gitlab::Integrations::StiType`. + class BackfillIntegrationsTypeNew + include Gitlab::Database::DynamicModelHelpers + + def perform(start_id, stop_id, batch_table, batch_column, sub_batch_size, pause_ms) + parent_batch_relation = define_batchable_model(batch_table, connection: connection) + .where(batch_column => start_id..stop_id) + + parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch| + process_sub_batch(sub_batch) + + sleep(pause_ms * 0.001) if pause_ms > 0 + end + end + + private + + def connection + ApplicationRecord.connection + end + + def process_sub_batch(sub_batch) + # Extract the start/stop IDs from the current sub-batch + sub_start_id, sub_stop_id = sub_batch.pick(Arel.sql('MIN(id), MAX(id)')) + + # This matches the mapping from the INSERT trigger added in + # db/migrate/20210721135638_add_triggers_to_integrations_type_new.rb + connection.execute(<<~SQL) + WITH mapping(old_type, new_type) AS (VALUES + ('AsanaService', 'Integrations::Asana'), + ('AssemblaService', 'Integrations::Assembla'), + ('BambooService', 'Integrations::Bamboo'), + ('BugzillaService', 'Integrations::Bugzilla'), + ('BuildkiteService', 'Integrations::Buildkite'), + ('CampfireService', 'Integrations::Campfire'), + ('ConfluenceService', 'Integrations::Confluence'), + ('CustomIssueTrackerService', 'Integrations::CustomIssueTracker'), + ('DatadogService', 'Integrations::Datadog'), + ('DiscordService', 'Integrations::Discord'), + ('DroneCiService', 'Integrations::DroneCi'), + ('EmailsOnPushService', 'Integrations::EmailsOnPush'), + ('EwmService', 'Integrations::Ewm'), + ('ExternalWikiService', 'Integrations::ExternalWiki'), + ('FlowdockService', 'Integrations::Flowdock'), + ('HangoutsChatService', 'Integrations::HangoutsChat'), + ('IrkerService', 'Integrations::Irker'), + ('JenkinsService', 'Integrations::Jenkins'), + ('JiraService', 'Integrations::Jira'), + ('MattermostService', 'Integrations::Mattermost'), + ('MattermostSlashCommandsService', 'Integrations::MattermostSlashCommands'), + ('MicrosoftTeamsService', 'Integrations::MicrosoftTeams'), + ('MockCiService', 'Integrations::MockCi'), + ('MockMonitoringService', 'Integrations::MockMonitoring'), + ('PackagistService', 'Integrations::Packagist'), + ('PipelinesEmailService', 'Integrations::PipelinesEmail'), + ('PivotaltrackerService', 'Integrations::Pivotaltracker'), + ('PrometheusService', 'Integrations::Prometheus'), + ('PushoverService', 'Integrations::Pushover'), + ('RedmineService', 'Integrations::Redmine'), + ('SlackService', 'Integrations::Slack'), + ('SlackSlashCommandsService', 'Integrations::SlackSlashCommands'), + ('TeamcityService', 'Integrations::Teamcity'), + ('UnifyCircuitService', 'Integrations::UnifyCircuit'), + ('WebexTeamsService', 'Integrations::WebexTeams'), + ('YoutrackService', 'Integrations::Youtrack'), + + -- EE-only integrations + ('GithubService', 'Integrations::Github'), + ('GitlabSlackApplicationService', 'Integrations::GitlabSlackApplication') + ) + + UPDATE integrations SET type_new = mapping.new_type + FROM mapping + WHERE integrations.id BETWEEN #{sub_start_id} AND #{sub_stop_id} + AND integrations.type = mapping.old_type + SQL + end + end + end +end diff --git a/lib/gitlab/background_migration/encrypt_static_object_token.rb b/lib/gitlab/background_migration/encrypt_static_object_token.rb new file mode 100644 index 00000000000..961dea028c9 --- /dev/null +++ b/lib/gitlab/background_migration/encrypt_static_object_token.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Populates "static_object_token_encrypted" field with encrypted versions + # of values from "static_object_token" field + class EncryptStaticObjectToken + # rubocop:disable Style/Documentation + class User < ActiveRecord::Base + include ::EachBatch + self.table_name = 'users' + scope :with_static_object_token, -> { where.not(static_object_token: nil) } + scope :without_static_object_token_encrypted, -> { where(static_object_token_encrypted: nil) } + end + # rubocop:enable Style/Documentation + + BATCH_SIZE = 100 + + def perform(start_id, end_id) + ranged_query = User + .where(id: start_id..end_id) + .with_static_object_token + .without_static_object_token_encrypted + + ranged_query.each_batch(of: BATCH_SIZE) do |sub_batch| + first, last = sub_batch.pick(Arel.sql('min(id), max(id)')) + + batch_query = User.unscoped + .where(id: first..last) + .with_static_object_token + .without_static_object_token_encrypted + + user_tokens = batch_query.pluck(:id, :static_object_token) + + user_encrypted_tokens = user_tokens.map do |(id, plaintext_token)| + next if plaintext_token.blank? + + [id, Gitlab::CryptoHelper.aes256_gcm_encrypt(plaintext_token)] + end + + encrypted_tokens_sql = user_encrypted_tokens.compact.map { |(id, token)| "(#{id}, '#{token}')" }.join(',') + + next unless user_encrypted_tokens.present? + + User.connection.execute(<<~SQL) + WITH cte(cte_id, cte_token) AS #{::Gitlab::Database::AsWithMaterialized.materialized_if_supported} ( + SELECT * + FROM (VALUES #{encrypted_tokens_sql}) AS t (id, token) + ) + UPDATE #{User.table_name} + SET static_object_token_encrypted = cte_token + FROM cte + WHERE cte_id = id + SQL + end + + mark_job_as_succeeded(start_id, end_id) + end + + private + + def mark_job_as_succeeded(*arguments) + Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( + self.class.name.demodulize, + arguments + ) + end + end + end +end diff --git a/lib/gitlab/background_migration/fix_incorrect_max_seats_used.rb b/lib/gitlab/background_migration/fix_incorrect_max_seats_used.rb new file mode 100644 index 00000000000..2c09b8c0b24 --- /dev/null +++ b/lib/gitlab/background_migration/fix_incorrect_max_seats_used.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # rubocop: disable Style/Documentation + class FixIncorrectMaxSeatsUsed + def perform(batch = nil) + end + end + end +end + +Gitlab::BackgroundMigration::FixIncorrectMaxSeatsUsed.prepend_mod_with('Gitlab::BackgroundMigration::FixIncorrectMaxSeatsUsed') diff --git a/lib/gitlab/background_migration/merge_topics_with_same_name.rb b/lib/gitlab/background_migration/merge_topics_with_same_name.rb new file mode 100644 index 00000000000..07231098a5f --- /dev/null +++ b/lib/gitlab/background_migration/merge_topics_with_same_name.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # The class to merge project topics with the same case insensitive name + class MergeTopicsWithSameName + # Temporary AR model for topics + class Topic < ActiveRecord::Base + self.table_name = 'topics' + end + + # Temporary AR model for project topic assignment + class ProjectTopic < ActiveRecord::Base + self.table_name = 'project_topics' + end + + def perform(topic_names) + topic_names.each do |topic_name| + topics = Topic.where('LOWER(name) = ?', topic_name) + .order(total_projects_count: :desc, non_private_projects_count: :desc, id: :asc) + .to_a + topic_to_keep = topics.shift + merge_topics(topic_to_keep, topics) if topics.any? + end + end + + private + + def merge_topics(topic_to_keep, topics_to_remove) + description = topic_to_keep.description + + topics_to_remove.each do |topic| + description ||= topic.description if topic.description.present? + process_avatar(topic_to_keep, topic) if topic.avatar.present? + + ProjectTopic.transaction do + ProjectTopic.where(topic_id: topic.id) + .where.not(project_id: ProjectTopic.where(topic_id: topic_to_keep).select(:project_id)) + .update_all(topic_id: topic_to_keep.id) + ProjectTopic.where(topic_id: topic.id).delete_all + end + end + + Topic.where(id: topics_to_remove).delete_all + + topic_to_keep.update( + description: description, + total_projects_count: total_projects_count(topic_to_keep.id), + non_private_projects_count: non_private_projects_count(topic_to_keep.id) + ) + end + + # We intentionally use application code here because we need to copy/remove avatar files + def process_avatar(topic_to_keep, topic_to_remove) + topic_to_remove = ::Projects::Topic.find(topic_to_remove.id) + topic_to_keep = ::Projects::Topic.find(topic_to_keep.id) + unless topic_to_keep.avatar.present? + topic_to_keep.avatar = topic_to_remove.avatar + topic_to_keep.save! + end + + topic_to_remove.remove_avatar! + topic_to_remove.save! + end + + def total_projects_count(topic_id) + ProjectTopic.where(topic_id: topic_id).count + end + + def non_private_projects_count(topic_id) + ProjectTopic.joins('INNER JOIN projects ON project_topics.project_id = projects.id') + .where(project_topics: { topic_id: topic_id }).where('projects.visibility_level in (10, 20)').count + end + end + end +end diff --git a/lib/gitlab/background_migration/populate_namespace_statistics.rb b/lib/gitlab/background_migration/populate_namespace_statistics.rb new file mode 100644 index 00000000000..97927ef48c2 --- /dev/null +++ b/lib/gitlab/background_migration/populate_namespace_statistics.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # This class creates/updates those namespace statistics + # that haven't been created nor initialized. + # It also updates the related namespace statistics + class PopulateNamespaceStatistics + def perform(group_ids, statistics) + # Updating group statistics might involve calling Gitaly. + # For example, when calculating `wiki_size`, we will need + # to perform the request to check if the repo exists and + # also the repository size. + # + # The `allow_n_plus_1_calls` method is only intended for + # dev and test. It won't be raised in prod. + ::Gitlab::GitalyClient.allow_n_plus_1_calls do + relation(group_ids).each do |group| + upsert_namespace_statistics(group, statistics) + end + end + end + + private + + def upsert_namespace_statistics(group, statistics) + response = ::Groups::UpdateStatisticsService.new(group, statistics: statistics).execute + + error_message("#{response.message} group: #{group.id}") if response.error? + end + + def logger + @logger ||= ::Gitlab::BackgroundMigration::Logger.build + end + + def error_message(message) + logger.error(message: "Namespace Statistics Migration: #{message}") + end + + def relation(group_ids) + Group.includes(:namespace_statistics).where(id: group_ids) + end + end + end +end + +Gitlab::BackgroundMigration::PopulateNamespaceStatistics.prepend_mod_with('Gitlab::BackgroundMigration::PopulateNamespaceStatistics') diff --git a/lib/gitlab/background_migration/populate_test_reports_issue_id.rb b/lib/gitlab/background_migration/populate_test_reports_issue_id.rb new file mode 100644 index 00000000000..301efd0c943 --- /dev/null +++ b/lib/gitlab/background_migration/populate_test_reports_issue_id.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true +# rubocop: disable Style/Documentation + +module Gitlab + module BackgroundMigration + class PopulateTestReportsIssueId + def perform(start_id, stop_id) + # NO OP + end + end + end +end + +Gitlab::BackgroundMigration::PopulateTestReportsIssueId.prepend_mod diff --git a/lib/gitlab/background_migration/populate_topics_non_private_projects_count.rb b/lib/gitlab/background_migration/populate_topics_non_private_projects_count.rb new file mode 100644 index 00000000000..1f2b55004e4 --- /dev/null +++ b/lib/gitlab/background_migration/populate_topics_non_private_projects_count.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # The class to populates the non private projects counter of topics + class PopulateTopicsNonPrivateProjectsCount + SUB_BATCH_SIZE = 100 + + # Temporary AR model for topics + class Topic < ActiveRecord::Base + include EachBatch + + self.table_name = 'topics' + end + + def perform(start_id, stop_id) + Topic.where(id: start_id..stop_id).each_batch(of: SUB_BATCH_SIZE) do |batch| + ApplicationRecord.connection.execute(<<~SQL) + WITH batched_relation AS #{Gitlab::Database::AsWithMaterialized.materialized_if_supported} (#{batch.select(:id).limit(SUB_BATCH_SIZE).to_sql}) + UPDATE topics + SET non_private_projects_count = ( + SELECT COUNT(*) + FROM project_topics + INNER JOIN projects + ON project_topics.project_id = projects.id + WHERE project_topics.topic_id = batched_relation.id + AND projects.visibility_level > 0 + ) + FROM batched_relation + WHERE topics.id = batched_relation.id + SQL + end + end + end + end +end diff --git a/lib/gitlab/background_migration/populate_vulnerability_reads.rb b/lib/gitlab/background_migration/populate_vulnerability_reads.rb new file mode 100644 index 00000000000..656c62d9ee5 --- /dev/null +++ b/lib/gitlab/background_migration/populate_vulnerability_reads.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # rubocop:disable Style/Documentation + class PopulateVulnerabilityReads + include Gitlab::Database::DynamicModelHelpers + + PAUSE_SECONDS = 0.1 + + def perform(start_id, end_id, sub_batch_size) + vulnerability_model.where(id: start_id..end_id).each_batch(of: sub_batch_size) do |sub_batch| + first, last = sub_batch.pick(Arel.sql('min(id), max(id)')) + connection.execute(insert_query(first, last)) + + sleep PAUSE_SECONDS + end + + mark_job_as_succeeded(start_id, end_id, sub_batch_size) + end + + private + + def vulnerability_model + define_batchable_model('vulnerabilities', connection: connection) + end + + def connection + ApplicationRecord.connection + end + + def insert_query(start_id, end_id) + <<~SQL + INSERT INTO vulnerability_reads ( + vulnerability_id, + project_id, + scanner_id, + report_type, + severity, + state, + has_issues, + resolved_on_default_branch, + uuid, + location_image + ) + SELECT + vulnerabilities.id, + vulnerabilities.project_id, + vulnerability_scanners.id, + vulnerabilities.report_type, + vulnerabilities.severity, + vulnerabilities.state, + CASE + WHEN + vulnerability_issue_links.vulnerability_id IS NOT NULL + THEN + true + ELSE + false + END + has_issues, + vulnerabilities.resolved_on_default_branch, + vulnerability_occurrences.uuid::uuid, + vulnerability_occurrences.location ->> 'image' + FROM + vulnerabilities + INNER JOIN vulnerability_occurrences ON vulnerability_occurrences.vulnerability_id = vulnerabilities.id + INNER JOIN vulnerability_scanners ON vulnerability_scanners.id = vulnerability_occurrences.scanner_id + LEFT JOIN vulnerability_issue_links ON vulnerability_issue_links.vulnerability_id = vulnerabilities.id + WHERE vulnerabilities.id BETWEEN #{start_id} AND #{end_id} + ON CONFLICT(vulnerability_id) DO NOTHING; + SQL + end + + def mark_job_as_succeeded(*arguments) + Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( + self.class.name.demodulize, + arguments + ) + end + end + # rubocop:enable Style/Documentation + end +end diff --git a/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb b/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb new file mode 100644 index 00000000000..9a42d035285 --- /dev/null +++ b/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb @@ -0,0 +1,218 @@ +# frozen_string_literal: true + +# rubocop: disable Style/Documentation +class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid # rubocop:disable Metrics/ClassLength + # rubocop: disable Gitlab/NamespacedClass + class VulnerabilitiesIdentifier < ActiveRecord::Base + self.table_name = "vulnerability_identifiers" + has_many :primary_findings, class_name: 'VulnerabilitiesFinding', inverse_of: :primary_identifier, foreign_key: 'primary_identifier_id' + end + + class VulnerabilitiesFinding < ActiveRecord::Base + include EachBatch + include ShaAttribute + + self.table_name = "vulnerability_occurrences" + + has_many :signatures, foreign_key: 'finding_id', class_name: 'VulnerabilityFindingSignature', inverse_of: :finding + belongs_to :primary_identifier, class_name: 'VulnerabilitiesIdentifier', inverse_of: :primary_findings, foreign_key: 'primary_identifier_id' + + REPORT_TYPES = { + sast: 0, + dependency_scanning: 1, + container_scanning: 2, + dast: 3, + secret_detection: 4, + coverage_fuzzing: 5, + api_fuzzing: 6, + cluster_image_scanning: 7, + generic: 99 + }.with_indifferent_access.freeze + enum report_type: REPORT_TYPES + + sha_attribute :fingerprint + sha_attribute :location_fingerprint + end + + class VulnerabilityFindingSignature < ActiveRecord::Base + include ShaAttribute + + self.table_name = 'vulnerability_finding_signatures' + belongs_to :finding, foreign_key: 'finding_id', inverse_of: :signatures, class_name: 'VulnerabilitiesFinding' + + sha_attribute :signature_sha + end + + class VulnerabilitiesFindingPipeline < ActiveRecord::Base + include EachBatch + self.table_name = "vulnerability_occurrence_pipelines" + end + + class Vulnerability < ActiveRecord::Base + include EachBatch + self.table_name = "vulnerabilities" + end + + class CalculateFindingUUID + FINDING_NAMESPACES_IDS = { + development: "a143e9e2-41b3-47bc-9a19-081d089229f4", + test: "a143e9e2-41b3-47bc-9a19-081d089229f4", + staging: "a6930898-a1b2-4365-ab18-12aa474d9b26", + production: "58dc0f06-936c-43b3-93bb-71693f1b6570" + }.freeze + + NAMESPACE_REGEX = /(\h{8})-(\h{4})-(\h{4})-(\h{4})-(\h{4})(\h{8})/.freeze + PACK_PATTERN = "NnnnnN" + + def self.call(value) + Digest::UUID.uuid_v5(namespace_id, value) + end + + def self.namespace_id + namespace_uuid = FINDING_NAMESPACES_IDS.fetch(Rails.env.to_sym) + # Digest::UUID is broken when using an UUID in namespace_id + # https://github.com/rails/rails/issues/37681#issue-520718028 + namespace_uuid.scan(NAMESPACE_REGEX).flatten.map { |s| s.to_i(16) }.pack(PACK_PATTERN) + end + end + # rubocop: enable Gitlab/NamespacedClass + + # rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength + def perform(start_id, end_id) + log_info('Migration started', start_id: start_id, end_id: end_id) + + VulnerabilitiesFinding + .joins(:primary_identifier) + .includes(:signatures) + .select(:id, :report_type, :primary_identifier_id, :fingerprint, :location_fingerprint, :project_id, :created_at, :vulnerability_id, :uuid) + .where(id: start_id..end_id) + .each_batch(of: 50) do |relation| + duplicates = find_duplicates(relation) + remove_findings(ids: duplicates) if duplicates.present? + + to_update = relation.reject { |finding| duplicates.include?(finding.id) } + + begin + known_uuids = Set.new + to_be_deleted = [] + + mappings = to_update.each_with_object({}) do |finding, hash| + uuid = calculate_uuid_v5_for_finding(finding) + + if known_uuids.add?(uuid) + hash[finding] = { uuid: uuid } + else + to_be_deleted << finding.id + end + end + + # It is technically still possible to have duplicate uuids + # if the data integrity is broken somehow and the primary identifiers of + # the findings are pointing to different projects with the same fingerprint values. + if to_be_deleted.present? + log_info('Conflicting UUIDs found within the batch', finding_ids: to_be_deleted) + + remove_findings(ids: to_be_deleted) + end + + ::Gitlab::Database::BulkUpdate.execute(%i[uuid], mappings) if mappings.present? + + log_info('Recalculation is done', finding_ids: mappings.keys.pluck(:id)) + rescue ActiveRecord::RecordNotUnique => error + log_info('RecordNotUnique error received') + + match_data = /\(uuid\)=\((?<uuid>\S{36})\)/.match(error.message) + + # This exception returns the **correct** UUIDv5 which probably comes from a later record + # and it's the one we can drop in the easiest way before retrying the UPDATE query + if match_data + uuid = match_data[:uuid] + log_info('Conflicting UUID found', uuid: uuid) + + id = VulnerabilitiesFinding.find_by(uuid: uuid)&.id + remove_findings(ids: id) if id + retry + else + log_error('Couldnt find conflicting uuid') + + Gitlab::ErrorTracking.track_and_raise_exception(error) + end + end + end + + mark_job_as_succeeded(start_id, end_id) + rescue StandardError => error + log_error('An exception happened') + + Gitlab::ErrorTracking.track_and_raise_exception(error) + end + # rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength + + private + + def find_duplicates(relation) + to_exclude = [] + relation.flat_map do |record| + # Assuming we're scanning id 31 and the duplicate is id 40 + # first we'd process 31 and add 40 to the list of ids to remove + # then we would process record 40 and add 31 to the list of removals + # so we would drop both records + to_exclude << record.id + + VulnerabilitiesFinding.where( + report_type: record.report_type, + location_fingerprint: record.location_fingerprint, + primary_identifier_id: record.primary_identifier_id, + project_id: record.project_id + ).where.not(id: to_exclude).pluck(:id) + end + end + + def remove_findings(ids:) + ids = Array(ids) + log_info('Removing Findings and associated records', ids: ids) + + vulnerability_ids = VulnerabilitiesFinding.where(id: ids).pluck(:vulnerability_id).uniq.compact + + VulnerabilitiesFindingPipeline.where(occurrence_id: ids).each_batch { |batch| batch.delete_all } + Vulnerability.where(id: vulnerability_ids).each_batch { |batch| batch.delete_all } + VulnerabilitiesFinding.where(id: ids).delete_all + end + + def calculate_uuid_v5_for_finding(vulnerability_finding) + return unless vulnerability_finding + + signatures = vulnerability_finding.signatures.sort_by { |signature| signature.algorithm_type_before_type_cast } + location_fingerprint = signatures.last&.signature_sha || vulnerability_finding.location_fingerprint + + uuid_v5_name_components = { + report_type: vulnerability_finding.report_type, + primary_identifier_fingerprint: vulnerability_finding.fingerprint, + location_fingerprint: location_fingerprint, + project_id: vulnerability_finding.project_id + } + + name = uuid_v5_name_components.values.join('-') + + CalculateFindingUUID.call(name) + end + + def log_info(message, **extra) + logger.info(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra) + end + + def log_error(message, **extra) + logger.error(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra) + end + + def logger + @logger ||= Gitlab::BackgroundMigration::Logger.build + end + + def mark_job_as_succeeded(*arguments) + Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( + 'RecalculateVulnerabilitiesOccurrencesUuid', + arguments + ) + end +end diff --git a/lib/gitlab/background_migration/recalculate_vulnerability_finding_signatures_for_findings.rb b/lib/gitlab/background_migration/recalculate_vulnerability_finding_signatures_for_findings.rb new file mode 100644 index 00000000000..20200a1d508 --- /dev/null +++ b/lib/gitlab/background_migration/recalculate_vulnerability_finding_signatures_for_findings.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # rubocop: disable Style/Documentation + class RecalculateVulnerabilityFindingSignaturesForFindings + def perform(start_id, stop_id) + end + end + end +end + +Gitlab::BackgroundMigration::RecalculateVulnerabilityFindingSignaturesForFindings.prepend_mod diff --git a/lib/gitlab/background_migration/remove_all_trace_expiration_dates.rb b/lib/gitlab/background_migration/remove_all_trace_expiration_dates.rb new file mode 100644 index 00000000000..d47aa76f24b --- /dev/null +++ b/lib/gitlab/background_migration/remove_all_trace_expiration_dates.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Removing expire_at timestamps that shouldn't have + # been written to traces on gitlab.com. + class RemoveAllTraceExpirationDates + include Gitlab::Database::MigrationHelpers + + BATCH_SIZE = 1_000 + + # Stubbed class to connect to the CI database + # connects_to has to be called in abstract classes. + class MultiDbAdaptableClass < ActiveRecord::Base + self.abstract_class = true + + if Gitlab::Database.has_config?(:ci) + connects_to database: { writing: :ci, reading: :ci } + end + end + + # Stubbed class to access the ci_job_artifacts table + class JobArtifact < MultiDbAdaptableClass + include EachBatch + + self.table_name = 'ci_job_artifacts' + + TARGET_TIMESTAMPS = [ + Date.new(2021, 04, 22).midnight.utc, + Date.new(2021, 05, 22).midnight.utc, + Date.new(2021, 06, 22).midnight.utc, + Date.new(2022, 01, 22).midnight.utc, + Date.new(2022, 02, 22).midnight.utc, + Date.new(2022, 03, 22).midnight.utc, + Date.new(2022, 04, 22).midnight.utc + ].freeze + + scope :traces, -> { where(file_type: 3) } + scope :between, -> (start_id, end_id) { where(id: start_id..end_id) } + scope :in_targeted_timestamps, -> { where(expire_at: TARGET_TIMESTAMPS) } + end + + def perform(start_id, end_id) + return unless Gitlab.com? + + JobArtifact.traces + .between(start_id, end_id) + .in_targeted_timestamps + .each_batch(of: BATCH_SIZE) { |batch| batch.update_all(expire_at: nil) } + end + end + end +end diff --git a/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb b/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb new file mode 100644 index 00000000000..b61f2ee7f4c --- /dev/null +++ b/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Class to populate spent_at for timelogs + class UpdateTimelogsNullSpentAt + include Gitlab::Database::DynamicModelHelpers + + BATCH_SIZE = 100 + + def perform(start_id, stop_id) + define_batchable_model('timelogs', connection: connection) + .where(spent_at: nil, id: start_id..stop_id) + .each_batch(of: 100) do |subbatch| + batch_start, batch_end = subbatch.pick('min(id), max(id)') + + update_timelogs(batch_start, batch_end) + end + end + + def update_timelogs(batch_start, batch_stop) + execute(<<~SQL) + UPDATE timelogs + SET spent_at = created_at + WHERE spent_at IS NULL + AND timelogs.id BETWEEN #{batch_start} AND #{batch_stop}; + SQL + end + + def connection + @connection ||= ApplicationRecord.connection + end + + def execute(sql) + connection.execute(sql) + end + end + end +end |