summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/background_migration/backfill_ci_queuing_tables.rb153
-rw-r--r--lib/gitlab/background_migration/backfill_integrations_type_new.rb86
-rw-r--r--lib/gitlab/background_migration/encrypt_static_object_token.rb70
-rw-r--r--lib/gitlab/background_migration/fix_incorrect_max_seats_used.rb13
-rw-r--r--lib/gitlab/background_migration/merge_topics_with_same_name.rb76
-rw-r--r--lib/gitlab/background_migration/populate_namespace_statistics.rb47
-rw-r--r--lib/gitlab/background_migration/populate_test_reports_issue_id.rb14
-rw-r--r--lib/gitlab/background_migration/populate_topics_non_private_projects_count.rb36
-rw-r--r--lib/gitlab/background_migration/populate_vulnerability_reads.rb84
-rw-r--r--lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb218
-rw-r--r--lib/gitlab/background_migration/recalculate_vulnerability_finding_signatures_for_findings.rb13
-rw-r--r--lib/gitlab/background_migration/remove_all_trace_expiration_dates.rb53
-rw-r--r--lib/gitlab/background_migration/update_timelogs_null_spent_at.rb39
13 files changed, 902 insertions, 0 deletions
diff --git a/lib/gitlab/background_migration/backfill_ci_queuing_tables.rb b/lib/gitlab/background_migration/backfill_ci_queuing_tables.rb
new file mode 100644
index 00000000000..63112b52584
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_ci_queuing_tables.rb
@@ -0,0 +1,153 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Ensure queuing entries are present even if admins skip upgrades.
+ class BackfillCiQueuingTables
+ class Namespace < ActiveRecord::Base # rubocop:disable Style/Documentation
+ self.table_name = 'namespaces'
+ self.inheritance_column = :_type_disabled
+ end
+
+ class Project < ActiveRecord::Base # rubocop:disable Style/Documentation
+ self.table_name = 'projects'
+
+ belongs_to :namespace
+ has_one :ci_cd_settings, class_name: 'Gitlab::BackgroundMigration::BackfillCiQueuingTables::ProjectCiCdSetting'
+
+ def group_runners_enabled?
+ return false unless ci_cd_settings
+
+ ci_cd_settings.group_runners_enabled?
+ end
+ end
+
+ class ProjectCiCdSetting < ActiveRecord::Base # rubocop:disable Style/Documentation
+ self.table_name = 'project_ci_cd_settings'
+ end
+
+ class Taggings < ActiveRecord::Base # rubocop:disable Style/Documentation
+ self.table_name = 'taggings'
+ end
+
+ module Ci
+ class Build < ActiveRecord::Base # rubocop:disable Style/Documentation
+ include EachBatch
+
+ self.table_name = 'ci_builds'
+ self.inheritance_column = :_type_disabled
+
+ belongs_to :project
+
+ scope :pending, -> do
+ where(status: :pending, type: 'Ci::Build', runner_id: nil)
+ end
+
+ def self.each_batch(of: 1000, column: :id, order: { runner_id: :asc, id: :asc }, order_hint: nil)
+ start = except(:select).select(column).reorder(order)
+ start = start.take
+ return unless start
+
+ start_id = start[column]
+ arel_table = self.arel_table
+
+ 1.step do |index|
+ start_cond = arel_table[column].gteq(start_id)
+ stop = except(:select).select(column).where(start_cond).reorder(order)
+ stop = stop.offset(of).limit(1).take
+ relation = where(start_cond)
+
+ if stop
+ stop_id = stop[column]
+ start_id = stop_id
+ stop_cond = arel_table[column].lt(stop_id)
+ relation = relation.where(stop_cond)
+ end
+
+ # Any ORDER BYs are useless for this relation and can lead to less
+ # efficient UPDATE queries, hence we get rid of it.
+ relation = relation.except(:order)
+
+ # Using unscoped is necessary to prevent leaking the current scope used by
+ # ActiveRecord to chain `each_batch` method.
+ unscoped { yield relation, index }
+
+ break unless stop
+ end
+ end
+
+ def tags_ids
+ BackfillCiQueuingTables::Taggings
+ .where(taggable_id: id, taggable_type: 'CommitStatus')
+ .pluck(:tag_id)
+ end
+ end
+
+ class PendingBuild < ActiveRecord::Base # rubocop:disable Style/Documentation
+ self.table_name = 'ci_pending_builds'
+
+ class << self
+ def upsert_from_build!(build)
+ entry = self.new(args_from_build(build))
+
+ self.upsert(
+ entry.attributes.compact,
+ returning: %w[build_id],
+ unique_by: :build_id)
+ end
+
+ def args_from_build(build)
+ project = build.project
+
+ {
+ build_id: build.id,
+ project_id: build.project_id,
+ protected: build.protected?,
+ namespace_id: project.namespace_id,
+ tag_ids: build.tags_ids,
+ instance_runners_enabled: project.shared_runners_enabled?,
+ namespace_traversal_ids: namespace_traversal_ids(project)
+ }
+ end
+
+ def namespace_traversal_ids(project)
+ if project.group_runners_enabled?
+ project.namespace.traversal_ids
+ else
+ []
+ end
+ end
+ end
+ end
+ end
+
+ BATCH_SIZE = 100
+
+ def perform(start_id, end_id)
+ scope = BackfillCiQueuingTables::Ci::Build.pending.where(id: start_id..end_id)
+ pending_builds_query = BackfillCiQueuingTables::Ci::PendingBuild
+ .where('ci_builds.id = ci_pending_builds.build_id')
+ .select(1)
+
+ scope.each_batch(of: BATCH_SIZE) do |builds|
+ builds = builds.where('NOT EXISTS (?)', pending_builds_query)
+ builds = builds.includes(:project, project: [:namespace, :ci_cd_settings])
+
+ builds.each do |build|
+ BackfillCiQueuingTables::Ci::PendingBuild.upsert_from_build!(build)
+ end
+ end
+
+ mark_job_as_succeeded(start_id, end_id)
+ end
+
+ private
+
+ def mark_job_as_succeeded(*arguments)
+ Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
+ self.class.name.demodulize,
+ arguments)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/backfill_integrations_type_new.rb b/lib/gitlab/background_migration/backfill_integrations_type_new.rb
new file mode 100644
index 00000000000..b07d9371c19
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_integrations_type_new.rb
@@ -0,0 +1,86 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Backfills the new `integrations.type_new` column, which contains
+ # the real class name, rather than the legacy class name in `type`
+ # which is mapped via `Gitlab::Integrations::StiType`.
+ class BackfillIntegrationsTypeNew
+ include Gitlab::Database::DynamicModelHelpers
+
+ def perform(start_id, stop_id, batch_table, batch_column, sub_batch_size, pause_ms)
+ parent_batch_relation = define_batchable_model(batch_table, connection: connection)
+ .where(batch_column => start_id..stop_id)
+
+ parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch|
+ process_sub_batch(sub_batch)
+
+ sleep(pause_ms * 0.001) if pause_ms > 0
+ end
+ end
+
+ private
+
+ def connection
+ ApplicationRecord.connection
+ end
+
+ def process_sub_batch(sub_batch)
+ # Extract the start/stop IDs from the current sub-batch
+ sub_start_id, sub_stop_id = sub_batch.pick(Arel.sql('MIN(id), MAX(id)'))
+
+ # This matches the mapping from the INSERT trigger added in
+ # db/migrate/20210721135638_add_triggers_to_integrations_type_new.rb
+ connection.execute(<<~SQL)
+ WITH mapping(old_type, new_type) AS (VALUES
+ ('AsanaService', 'Integrations::Asana'),
+ ('AssemblaService', 'Integrations::Assembla'),
+ ('BambooService', 'Integrations::Bamboo'),
+ ('BugzillaService', 'Integrations::Bugzilla'),
+ ('BuildkiteService', 'Integrations::Buildkite'),
+ ('CampfireService', 'Integrations::Campfire'),
+ ('ConfluenceService', 'Integrations::Confluence'),
+ ('CustomIssueTrackerService', 'Integrations::CustomIssueTracker'),
+ ('DatadogService', 'Integrations::Datadog'),
+ ('DiscordService', 'Integrations::Discord'),
+ ('DroneCiService', 'Integrations::DroneCi'),
+ ('EmailsOnPushService', 'Integrations::EmailsOnPush'),
+ ('EwmService', 'Integrations::Ewm'),
+ ('ExternalWikiService', 'Integrations::ExternalWiki'),
+ ('FlowdockService', 'Integrations::Flowdock'),
+ ('HangoutsChatService', 'Integrations::HangoutsChat'),
+ ('IrkerService', 'Integrations::Irker'),
+ ('JenkinsService', 'Integrations::Jenkins'),
+ ('JiraService', 'Integrations::Jira'),
+ ('MattermostService', 'Integrations::Mattermost'),
+ ('MattermostSlashCommandsService', 'Integrations::MattermostSlashCommands'),
+ ('MicrosoftTeamsService', 'Integrations::MicrosoftTeams'),
+ ('MockCiService', 'Integrations::MockCi'),
+ ('MockMonitoringService', 'Integrations::MockMonitoring'),
+ ('PackagistService', 'Integrations::Packagist'),
+ ('PipelinesEmailService', 'Integrations::PipelinesEmail'),
+ ('PivotaltrackerService', 'Integrations::Pivotaltracker'),
+ ('PrometheusService', 'Integrations::Prometheus'),
+ ('PushoverService', 'Integrations::Pushover'),
+ ('RedmineService', 'Integrations::Redmine'),
+ ('SlackService', 'Integrations::Slack'),
+ ('SlackSlashCommandsService', 'Integrations::SlackSlashCommands'),
+ ('TeamcityService', 'Integrations::Teamcity'),
+ ('UnifyCircuitService', 'Integrations::UnifyCircuit'),
+ ('WebexTeamsService', 'Integrations::WebexTeams'),
+ ('YoutrackService', 'Integrations::Youtrack'),
+
+ -- EE-only integrations
+ ('GithubService', 'Integrations::Github'),
+ ('GitlabSlackApplicationService', 'Integrations::GitlabSlackApplication')
+ )
+
+ UPDATE integrations SET type_new = mapping.new_type
+ FROM mapping
+ WHERE integrations.id BETWEEN #{sub_start_id} AND #{sub_stop_id}
+ AND integrations.type = mapping.old_type
+ SQL
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/encrypt_static_object_token.rb b/lib/gitlab/background_migration/encrypt_static_object_token.rb
new file mode 100644
index 00000000000..961dea028c9
--- /dev/null
+++ b/lib/gitlab/background_migration/encrypt_static_object_token.rb
@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Populates "static_object_token_encrypted" field with encrypted versions
+ # of values from "static_object_token" field
+ class EncryptStaticObjectToken
+ # rubocop:disable Style/Documentation
+ class User < ActiveRecord::Base
+ include ::EachBatch
+ self.table_name = 'users'
+ scope :with_static_object_token, -> { where.not(static_object_token: nil) }
+ scope :without_static_object_token_encrypted, -> { where(static_object_token_encrypted: nil) }
+ end
+ # rubocop:enable Style/Documentation
+
+ BATCH_SIZE = 100
+
+ def perform(start_id, end_id)
+ ranged_query = User
+ .where(id: start_id..end_id)
+ .with_static_object_token
+ .without_static_object_token_encrypted
+
+ ranged_query.each_batch(of: BATCH_SIZE) do |sub_batch|
+ first, last = sub_batch.pick(Arel.sql('min(id), max(id)'))
+
+ batch_query = User.unscoped
+ .where(id: first..last)
+ .with_static_object_token
+ .without_static_object_token_encrypted
+
+ user_tokens = batch_query.pluck(:id, :static_object_token)
+
+ user_encrypted_tokens = user_tokens.map do |(id, plaintext_token)|
+ next if plaintext_token.blank?
+
+ [id, Gitlab::CryptoHelper.aes256_gcm_encrypt(plaintext_token)]
+ end
+
+ encrypted_tokens_sql = user_encrypted_tokens.compact.map { |(id, token)| "(#{id}, '#{token}')" }.join(',')
+
+ next unless user_encrypted_tokens.present?
+
+ User.connection.execute(<<~SQL)
+ WITH cte(cte_id, cte_token) AS #{::Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
+ SELECT *
+ FROM (VALUES #{encrypted_tokens_sql}) AS t (id, token)
+ )
+ UPDATE #{User.table_name}
+ SET static_object_token_encrypted = cte_token
+ FROM cte
+ WHERE cte_id = id
+ SQL
+ end
+
+ mark_job_as_succeeded(start_id, end_id)
+ end
+
+ private
+
+ def mark_job_as_succeeded(*arguments)
+ Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
+ self.class.name.demodulize,
+ arguments
+ )
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/fix_incorrect_max_seats_used.rb b/lib/gitlab/background_migration/fix_incorrect_max_seats_used.rb
new file mode 100644
index 00000000000..2c09b8c0b24
--- /dev/null
+++ b/lib/gitlab/background_migration/fix_incorrect_max_seats_used.rb
@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # rubocop: disable Style/Documentation
+ class FixIncorrectMaxSeatsUsed
+ def perform(batch = nil)
+ end
+ end
+ end
+end
+
+Gitlab::BackgroundMigration::FixIncorrectMaxSeatsUsed.prepend_mod_with('Gitlab::BackgroundMigration::FixIncorrectMaxSeatsUsed')
diff --git a/lib/gitlab/background_migration/merge_topics_with_same_name.rb b/lib/gitlab/background_migration/merge_topics_with_same_name.rb
new file mode 100644
index 00000000000..07231098a5f
--- /dev/null
+++ b/lib/gitlab/background_migration/merge_topics_with_same_name.rb
@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # The class to merge project topics with the same case insensitive name
+ class MergeTopicsWithSameName
+ # Temporary AR model for topics
+ class Topic < ActiveRecord::Base
+ self.table_name = 'topics'
+ end
+
+ # Temporary AR model for project topic assignment
+ class ProjectTopic < ActiveRecord::Base
+ self.table_name = 'project_topics'
+ end
+
+ def perform(topic_names)
+ topic_names.each do |topic_name|
+ topics = Topic.where('LOWER(name) = ?', topic_name)
+ .order(total_projects_count: :desc, non_private_projects_count: :desc, id: :asc)
+ .to_a
+ topic_to_keep = topics.shift
+ merge_topics(topic_to_keep, topics) if topics.any?
+ end
+ end
+
+ private
+
+ def merge_topics(topic_to_keep, topics_to_remove)
+ description = topic_to_keep.description
+
+ topics_to_remove.each do |topic|
+ description ||= topic.description if topic.description.present?
+ process_avatar(topic_to_keep, topic) if topic.avatar.present?
+
+ ProjectTopic.transaction do
+ ProjectTopic.where(topic_id: topic.id)
+ .where.not(project_id: ProjectTopic.where(topic_id: topic_to_keep).select(:project_id))
+ .update_all(topic_id: topic_to_keep.id)
+ ProjectTopic.where(topic_id: topic.id).delete_all
+ end
+ end
+
+ Topic.where(id: topics_to_remove).delete_all
+
+ topic_to_keep.update(
+ description: description,
+ total_projects_count: total_projects_count(topic_to_keep.id),
+ non_private_projects_count: non_private_projects_count(topic_to_keep.id)
+ )
+ end
+
+ # We intentionally use application code here because we need to copy/remove avatar files
+ def process_avatar(topic_to_keep, topic_to_remove)
+ topic_to_remove = ::Projects::Topic.find(topic_to_remove.id)
+ topic_to_keep = ::Projects::Topic.find(topic_to_keep.id)
+ unless topic_to_keep.avatar.present?
+ topic_to_keep.avatar = topic_to_remove.avatar
+ topic_to_keep.save!
+ end
+
+ topic_to_remove.remove_avatar!
+ topic_to_remove.save!
+ end
+
+ def total_projects_count(topic_id)
+ ProjectTopic.where(topic_id: topic_id).count
+ end
+
+ def non_private_projects_count(topic_id)
+ ProjectTopic.joins('INNER JOIN projects ON project_topics.project_id = projects.id')
+ .where(project_topics: { topic_id: topic_id }).where('projects.visibility_level in (10, 20)').count
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/populate_namespace_statistics.rb b/lib/gitlab/background_migration/populate_namespace_statistics.rb
new file mode 100644
index 00000000000..97927ef48c2
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_namespace_statistics.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # This class creates/updates those namespace statistics
+ # that haven't been created nor initialized.
+ # It also updates the related namespace statistics
+ class PopulateNamespaceStatistics
+ def perform(group_ids, statistics)
+ # Updating group statistics might involve calling Gitaly.
+ # For example, when calculating `wiki_size`, we will need
+ # to perform the request to check if the repo exists and
+ # also the repository size.
+ #
+ # The `allow_n_plus_1_calls` method is only intended for
+ # dev and test. It won't be raised in prod.
+ ::Gitlab::GitalyClient.allow_n_plus_1_calls do
+ relation(group_ids).each do |group|
+ upsert_namespace_statistics(group, statistics)
+ end
+ end
+ end
+
+ private
+
+ def upsert_namespace_statistics(group, statistics)
+ response = ::Groups::UpdateStatisticsService.new(group, statistics: statistics).execute
+
+ error_message("#{response.message} group: #{group.id}") if response.error?
+ end
+
+ def logger
+ @logger ||= ::Gitlab::BackgroundMigration::Logger.build
+ end
+
+ def error_message(message)
+ logger.error(message: "Namespace Statistics Migration: #{message}")
+ end
+
+ def relation(group_ids)
+ Group.includes(:namespace_statistics).where(id: group_ids)
+ end
+ end
+ end
+end
+
+Gitlab::BackgroundMigration::PopulateNamespaceStatistics.prepend_mod_with('Gitlab::BackgroundMigration::PopulateNamespaceStatistics')
diff --git a/lib/gitlab/background_migration/populate_test_reports_issue_id.rb b/lib/gitlab/background_migration/populate_test_reports_issue_id.rb
new file mode 100644
index 00000000000..301efd0c943
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_test_reports_issue_id.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+# rubocop: disable Style/Documentation
+
+module Gitlab
+ module BackgroundMigration
+ class PopulateTestReportsIssueId
+ def perform(start_id, stop_id)
+ # NO OP
+ end
+ end
+ end
+end
+
+Gitlab::BackgroundMigration::PopulateTestReportsIssueId.prepend_mod
diff --git a/lib/gitlab/background_migration/populate_topics_non_private_projects_count.rb b/lib/gitlab/background_migration/populate_topics_non_private_projects_count.rb
new file mode 100644
index 00000000000..1f2b55004e4
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_topics_non_private_projects_count.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # The class to populates the non private projects counter of topics
+ class PopulateTopicsNonPrivateProjectsCount
+ SUB_BATCH_SIZE = 100
+
+ # Temporary AR model for topics
+ class Topic < ActiveRecord::Base
+ include EachBatch
+
+ self.table_name = 'topics'
+ end
+
+ def perform(start_id, stop_id)
+ Topic.where(id: start_id..stop_id).each_batch(of: SUB_BATCH_SIZE) do |batch|
+ ApplicationRecord.connection.execute(<<~SQL)
+ WITH batched_relation AS #{Gitlab::Database::AsWithMaterialized.materialized_if_supported} (#{batch.select(:id).limit(SUB_BATCH_SIZE).to_sql})
+ UPDATE topics
+ SET non_private_projects_count = (
+ SELECT COUNT(*)
+ FROM project_topics
+ INNER JOIN projects
+ ON project_topics.project_id = projects.id
+ WHERE project_topics.topic_id = batched_relation.id
+ AND projects.visibility_level > 0
+ )
+ FROM batched_relation
+ WHERE topics.id = batched_relation.id
+ SQL
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/populate_vulnerability_reads.rb b/lib/gitlab/background_migration/populate_vulnerability_reads.rb
new file mode 100644
index 00000000000..656c62d9ee5
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_vulnerability_reads.rb
@@ -0,0 +1,84 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # rubocop:disable Style/Documentation
+ class PopulateVulnerabilityReads
+ include Gitlab::Database::DynamicModelHelpers
+
+ PAUSE_SECONDS = 0.1
+
+ def perform(start_id, end_id, sub_batch_size)
+ vulnerability_model.where(id: start_id..end_id).each_batch(of: sub_batch_size) do |sub_batch|
+ first, last = sub_batch.pick(Arel.sql('min(id), max(id)'))
+ connection.execute(insert_query(first, last))
+
+ sleep PAUSE_SECONDS
+ end
+
+ mark_job_as_succeeded(start_id, end_id, sub_batch_size)
+ end
+
+ private
+
+ def vulnerability_model
+ define_batchable_model('vulnerabilities', connection: connection)
+ end
+
+ def connection
+ ApplicationRecord.connection
+ end
+
+ def insert_query(start_id, end_id)
+ <<~SQL
+ INSERT INTO vulnerability_reads (
+ vulnerability_id,
+ project_id,
+ scanner_id,
+ report_type,
+ severity,
+ state,
+ has_issues,
+ resolved_on_default_branch,
+ uuid,
+ location_image
+ )
+ SELECT
+ vulnerabilities.id,
+ vulnerabilities.project_id,
+ vulnerability_scanners.id,
+ vulnerabilities.report_type,
+ vulnerabilities.severity,
+ vulnerabilities.state,
+ CASE
+ WHEN
+ vulnerability_issue_links.vulnerability_id IS NOT NULL
+ THEN
+ true
+ ELSE
+ false
+ END
+ has_issues,
+ vulnerabilities.resolved_on_default_branch,
+ vulnerability_occurrences.uuid::uuid,
+ vulnerability_occurrences.location ->> 'image'
+ FROM
+ vulnerabilities
+ INNER JOIN vulnerability_occurrences ON vulnerability_occurrences.vulnerability_id = vulnerabilities.id
+ INNER JOIN vulnerability_scanners ON vulnerability_scanners.id = vulnerability_occurrences.scanner_id
+ LEFT JOIN vulnerability_issue_links ON vulnerability_issue_links.vulnerability_id = vulnerabilities.id
+ WHERE vulnerabilities.id BETWEEN #{start_id} AND #{end_id}
+ ON CONFLICT(vulnerability_id) DO NOTHING;
+ SQL
+ end
+
+ def mark_job_as_succeeded(*arguments)
+ Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
+ self.class.name.demodulize,
+ arguments
+ )
+ end
+ end
+ # rubocop:enable Style/Documentation
+ end
+end
diff --git a/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb b/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb
new file mode 100644
index 00000000000..9a42d035285
--- /dev/null
+++ b/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb
@@ -0,0 +1,218 @@
+# frozen_string_literal: true
+
+# rubocop: disable Style/Documentation
+class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid # rubocop:disable Metrics/ClassLength
+ # rubocop: disable Gitlab/NamespacedClass
+ class VulnerabilitiesIdentifier < ActiveRecord::Base
+ self.table_name = "vulnerability_identifiers"
+ has_many :primary_findings, class_name: 'VulnerabilitiesFinding', inverse_of: :primary_identifier, foreign_key: 'primary_identifier_id'
+ end
+
+ class VulnerabilitiesFinding < ActiveRecord::Base
+ include EachBatch
+ include ShaAttribute
+
+ self.table_name = "vulnerability_occurrences"
+
+ has_many :signatures, foreign_key: 'finding_id', class_name: 'VulnerabilityFindingSignature', inverse_of: :finding
+ belongs_to :primary_identifier, class_name: 'VulnerabilitiesIdentifier', inverse_of: :primary_findings, foreign_key: 'primary_identifier_id'
+
+ REPORT_TYPES = {
+ sast: 0,
+ dependency_scanning: 1,
+ container_scanning: 2,
+ dast: 3,
+ secret_detection: 4,
+ coverage_fuzzing: 5,
+ api_fuzzing: 6,
+ cluster_image_scanning: 7,
+ generic: 99
+ }.with_indifferent_access.freeze
+ enum report_type: REPORT_TYPES
+
+ sha_attribute :fingerprint
+ sha_attribute :location_fingerprint
+ end
+
+ class VulnerabilityFindingSignature < ActiveRecord::Base
+ include ShaAttribute
+
+ self.table_name = 'vulnerability_finding_signatures'
+ belongs_to :finding, foreign_key: 'finding_id', inverse_of: :signatures, class_name: 'VulnerabilitiesFinding'
+
+ sha_attribute :signature_sha
+ end
+
+ class VulnerabilitiesFindingPipeline < ActiveRecord::Base
+ include EachBatch
+ self.table_name = "vulnerability_occurrence_pipelines"
+ end
+
+ class Vulnerability < ActiveRecord::Base
+ include EachBatch
+ self.table_name = "vulnerabilities"
+ end
+
+ class CalculateFindingUUID
+ FINDING_NAMESPACES_IDS = {
+ development: "a143e9e2-41b3-47bc-9a19-081d089229f4",
+ test: "a143e9e2-41b3-47bc-9a19-081d089229f4",
+ staging: "a6930898-a1b2-4365-ab18-12aa474d9b26",
+ production: "58dc0f06-936c-43b3-93bb-71693f1b6570"
+ }.freeze
+
+ NAMESPACE_REGEX = /(\h{8})-(\h{4})-(\h{4})-(\h{4})-(\h{4})(\h{8})/.freeze
+ PACK_PATTERN = "NnnnnN"
+
+ def self.call(value)
+ Digest::UUID.uuid_v5(namespace_id, value)
+ end
+
+ def self.namespace_id
+ namespace_uuid = FINDING_NAMESPACES_IDS.fetch(Rails.env.to_sym)
+ # Digest::UUID is broken when using an UUID in namespace_id
+ # https://github.com/rails/rails/issues/37681#issue-520718028
+ namespace_uuid.scan(NAMESPACE_REGEX).flatten.map { |s| s.to_i(16) }.pack(PACK_PATTERN)
+ end
+ end
+ # rubocop: enable Gitlab/NamespacedClass
+
+ # rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength
+ def perform(start_id, end_id)
+ log_info('Migration started', start_id: start_id, end_id: end_id)
+
+ VulnerabilitiesFinding
+ .joins(:primary_identifier)
+ .includes(:signatures)
+ .select(:id, :report_type, :primary_identifier_id, :fingerprint, :location_fingerprint, :project_id, :created_at, :vulnerability_id, :uuid)
+ .where(id: start_id..end_id)
+ .each_batch(of: 50) do |relation|
+ duplicates = find_duplicates(relation)
+ remove_findings(ids: duplicates) if duplicates.present?
+
+ to_update = relation.reject { |finding| duplicates.include?(finding.id) }
+
+ begin
+ known_uuids = Set.new
+ to_be_deleted = []
+
+ mappings = to_update.each_with_object({}) do |finding, hash|
+ uuid = calculate_uuid_v5_for_finding(finding)
+
+ if known_uuids.add?(uuid)
+ hash[finding] = { uuid: uuid }
+ else
+ to_be_deleted << finding.id
+ end
+ end
+
+ # It is technically still possible to have duplicate uuids
+ # if the data integrity is broken somehow and the primary identifiers of
+ # the findings are pointing to different projects with the same fingerprint values.
+ if to_be_deleted.present?
+ log_info('Conflicting UUIDs found within the batch', finding_ids: to_be_deleted)
+
+ remove_findings(ids: to_be_deleted)
+ end
+
+ ::Gitlab::Database::BulkUpdate.execute(%i[uuid], mappings) if mappings.present?
+
+ log_info('Recalculation is done', finding_ids: mappings.keys.pluck(:id))
+ rescue ActiveRecord::RecordNotUnique => error
+ log_info('RecordNotUnique error received')
+
+ match_data = /\(uuid\)=\((?<uuid>\S{36})\)/.match(error.message)
+
+ # This exception returns the **correct** UUIDv5 which probably comes from a later record
+ # and it's the one we can drop in the easiest way before retrying the UPDATE query
+ if match_data
+ uuid = match_data[:uuid]
+ log_info('Conflicting UUID found', uuid: uuid)
+
+ id = VulnerabilitiesFinding.find_by(uuid: uuid)&.id
+ remove_findings(ids: id) if id
+ retry
+ else
+ log_error('Couldnt find conflicting uuid')
+
+ Gitlab::ErrorTracking.track_and_raise_exception(error)
+ end
+ end
+ end
+
+ mark_job_as_succeeded(start_id, end_id)
+ rescue StandardError => error
+ log_error('An exception happened')
+
+ Gitlab::ErrorTracking.track_and_raise_exception(error)
+ end
+ # rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength
+
+ private
+
+ def find_duplicates(relation)
+ to_exclude = []
+ relation.flat_map do |record|
+ # Assuming we're scanning id 31 and the duplicate is id 40
+ # first we'd process 31 and add 40 to the list of ids to remove
+ # then we would process record 40 and add 31 to the list of removals
+ # so we would drop both records
+ to_exclude << record.id
+
+ VulnerabilitiesFinding.where(
+ report_type: record.report_type,
+ location_fingerprint: record.location_fingerprint,
+ primary_identifier_id: record.primary_identifier_id,
+ project_id: record.project_id
+ ).where.not(id: to_exclude).pluck(:id)
+ end
+ end
+
+ def remove_findings(ids:)
+ ids = Array(ids)
+ log_info('Removing Findings and associated records', ids: ids)
+
+ vulnerability_ids = VulnerabilitiesFinding.where(id: ids).pluck(:vulnerability_id).uniq.compact
+
+ VulnerabilitiesFindingPipeline.where(occurrence_id: ids).each_batch { |batch| batch.delete_all }
+ Vulnerability.where(id: vulnerability_ids).each_batch { |batch| batch.delete_all }
+ VulnerabilitiesFinding.where(id: ids).delete_all
+ end
+
+ def calculate_uuid_v5_for_finding(vulnerability_finding)
+ return unless vulnerability_finding
+
+ signatures = vulnerability_finding.signatures.sort_by { |signature| signature.algorithm_type_before_type_cast }
+ location_fingerprint = signatures.last&.signature_sha || vulnerability_finding.location_fingerprint
+
+ uuid_v5_name_components = {
+ report_type: vulnerability_finding.report_type,
+ primary_identifier_fingerprint: vulnerability_finding.fingerprint,
+ location_fingerprint: location_fingerprint,
+ project_id: vulnerability_finding.project_id
+ }
+
+ name = uuid_v5_name_components.values.join('-')
+
+ CalculateFindingUUID.call(name)
+ end
+
+ def log_info(message, **extra)
+ logger.info(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra)
+ end
+
+ def log_error(message, **extra)
+ logger.error(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra)
+ end
+
+ def logger
+ @logger ||= Gitlab::BackgroundMigration::Logger.build
+ end
+
+ def mark_job_as_succeeded(*arguments)
+ Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
+ 'RecalculateVulnerabilitiesOccurrencesUuid',
+ arguments
+ )
+ end
+end
diff --git a/lib/gitlab/background_migration/recalculate_vulnerability_finding_signatures_for_findings.rb b/lib/gitlab/background_migration/recalculate_vulnerability_finding_signatures_for_findings.rb
new file mode 100644
index 00000000000..20200a1d508
--- /dev/null
+++ b/lib/gitlab/background_migration/recalculate_vulnerability_finding_signatures_for_findings.rb
@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # rubocop: disable Style/Documentation
+ class RecalculateVulnerabilityFindingSignaturesForFindings
+ def perform(start_id, stop_id)
+ end
+ end
+ end
+end
+
+Gitlab::BackgroundMigration::RecalculateVulnerabilityFindingSignaturesForFindings.prepend_mod
diff --git a/lib/gitlab/background_migration/remove_all_trace_expiration_dates.rb b/lib/gitlab/background_migration/remove_all_trace_expiration_dates.rb
new file mode 100644
index 00000000000..d47aa76f24b
--- /dev/null
+++ b/lib/gitlab/background_migration/remove_all_trace_expiration_dates.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Removing expire_at timestamps that shouldn't have
+ # been written to traces on gitlab.com.
+ class RemoveAllTraceExpirationDates
+ include Gitlab::Database::MigrationHelpers
+
+ BATCH_SIZE = 1_000
+
+ # Stubbed class to connect to the CI database
+ # connects_to has to be called in abstract classes.
+ class MultiDbAdaptableClass < ActiveRecord::Base
+ self.abstract_class = true
+
+ if Gitlab::Database.has_config?(:ci)
+ connects_to database: { writing: :ci, reading: :ci }
+ end
+ end
+
+ # Stubbed class to access the ci_job_artifacts table
+ class JobArtifact < MultiDbAdaptableClass
+ include EachBatch
+
+ self.table_name = 'ci_job_artifacts'
+
+ TARGET_TIMESTAMPS = [
+ Date.new(2021, 04, 22).midnight.utc,
+ Date.new(2021, 05, 22).midnight.utc,
+ Date.new(2021, 06, 22).midnight.utc,
+ Date.new(2022, 01, 22).midnight.utc,
+ Date.new(2022, 02, 22).midnight.utc,
+ Date.new(2022, 03, 22).midnight.utc,
+ Date.new(2022, 04, 22).midnight.utc
+ ].freeze
+
+ scope :traces, -> { where(file_type: 3) }
+ scope :between, -> (start_id, end_id) { where(id: start_id..end_id) }
+ scope :in_targeted_timestamps, -> { where(expire_at: TARGET_TIMESTAMPS) }
+ end
+
+ def perform(start_id, end_id)
+ return unless Gitlab.com?
+
+ JobArtifact.traces
+ .between(start_id, end_id)
+ .in_targeted_timestamps
+ .each_batch(of: BATCH_SIZE) { |batch| batch.update_all(expire_at: nil) }
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb b/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb
new file mode 100644
index 00000000000..b61f2ee7f4c
--- /dev/null
+++ b/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Class to populate spent_at for timelogs
+ class UpdateTimelogsNullSpentAt
+ include Gitlab::Database::DynamicModelHelpers
+
+ BATCH_SIZE = 100
+
+ def perform(start_id, stop_id)
+ define_batchable_model('timelogs', connection: connection)
+ .where(spent_at: nil, id: start_id..stop_id)
+ .each_batch(of: 100) do |subbatch|
+ batch_start, batch_end = subbatch.pick('min(id), max(id)')
+
+ update_timelogs(batch_start, batch_end)
+ end
+ end
+
+ def update_timelogs(batch_start, batch_stop)
+ execute(<<~SQL)
+ UPDATE timelogs
+ SET spent_at = created_at
+ WHERE spent_at IS NULL
+ AND timelogs.id BETWEEN #{batch_start} AND #{batch_stop};
+ SQL
+ end
+
+ def connection
+ @connection ||= ApplicationRecord.connection
+ end
+
+ def execute(sql)
+ connection.execute(sql)
+ end
+ end
+ end
+end