diff options
Diffstat (limited to 'lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb')
-rw-r--r-- | lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb | 148 |
1 files changed, 134 insertions, 14 deletions
diff --git a/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb b/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb index 84ff7423254..c1b8de1f6aa 100644 --- a/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb +++ b/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true # rubocop: disable Style/Documentation -class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid +class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid # rubocop:disable Metrics/ClassLength # rubocop: disable Gitlab/NamespacedClass class VulnerabilitiesIdentifier < ActiveRecord::Base self.table_name = "vulnerability_identifiers" @@ -9,10 +9,14 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid end class VulnerabilitiesFinding < ActiveRecord::Base + include EachBatch include ShaAttribute self.table_name = "vulnerability_occurrences" + + has_many :signatures, foreign_key: 'finding_id', class_name: 'VulnerabilityFindingSignature', inverse_of: :finding belongs_to :primary_identifier, class_name: 'VulnerabilitiesIdentifier', inverse_of: :primary_findings, foreign_key: 'primary_identifier_id' + REPORT_TYPES = { sast: 0, dependency_scanning: 1, @@ -20,7 +24,9 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid dast: 3, secret_detection: 4, coverage_fuzzing: 5, - api_fuzzing: 6 + api_fuzzing: 6, + cluster_image_scanning: 7, + generic: 99 }.with_indifferent_access.freeze enum report_type: REPORT_TYPES @@ -28,6 +34,25 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid sha_attribute :location_fingerprint end + class VulnerabilityFindingSignature < ActiveRecord::Base + include ShaAttribute + + self.table_name = 'vulnerability_finding_signatures' + belongs_to :finding, foreign_key: 'finding_id', inverse_of: :signatures, class_name: 'VulnerabilitiesFinding' + + sha_attribute :signature_sha + end + + class VulnerabilitiesFindingPipeline < ActiveRecord::Base + include EachBatch + self.table_name = "vulnerability_occurrence_pipelines" + end + + class Vulnerability < ActiveRecord::Base + include EachBatch + self.table_name = "vulnerabilities" + end + class CalculateFindingUUID FINDING_NAMESPACES_IDS = { development: "a143e9e2-41b3-47bc-9a19-081d089229f4", @@ -52,35 +77,122 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid end # rubocop: enable Gitlab/NamespacedClass + # rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength def perform(start_id, end_id) - findings = VulnerabilitiesFinding - .joins(:primary_identifier) - .select(:id, :report_type, :fingerprint, :location_fingerprint, :project_id) - .where(id: start_id..end_id) - - mappings = findings.each_with_object({}) do |finding, hash| - hash[finding] = { uuid: calculate_uuid_v5_for_finding(finding) } + unless Feature.enabled?(:migrate_vulnerability_finding_uuids, default_enabled: true) + return log_info('Migration is disabled by the feature flag', start_id: start_id, end_id: end_id) end - ::Gitlab::Database::BulkUpdate.execute(%i[uuid], mappings) + log_info('Migration started', start_id: start_id, end_id: end_id) - logger.info(message: 'RecalculateVulnerabilitiesOccurrencesUuid Migration: recalculation is done for:', - finding_ids: mappings.keys.pluck(:id)) + VulnerabilitiesFinding + .joins(:primary_identifier) + .includes(:signatures) + .select(:id, :report_type, :primary_identifier_id, :fingerprint, :location_fingerprint, :project_id, :created_at, :vulnerability_id, :uuid) + .where(id: start_id..end_id) + .each_batch(of: 50) do |relation| + duplicates = find_duplicates(relation) + remove_findings(ids: duplicates) if duplicates.present? + + to_update = relation.reject { |finding| duplicates.include?(finding.id) } + + begin + known_uuids = Set.new + to_be_deleted = [] + + mappings = to_update.each_with_object({}) do |finding, hash| + uuid = calculate_uuid_v5_for_finding(finding) + + if known_uuids.add?(uuid) + hash[finding] = { uuid: uuid } + else + to_be_deleted << finding.id + end + end + + # It is technically still possible to have duplicate uuids + # if the data integrity is broken somehow and the primary identifiers of + # the findings are pointing to different projects with the same fingerprint values. + if to_be_deleted.present? + log_info('Conflicting UUIDs found within the batch', finding_ids: to_be_deleted) + + remove_findings(ids: to_be_deleted) + end + + ::Gitlab::Database::BulkUpdate.execute(%i[uuid], mappings) if mappings.present? + + log_info('Recalculation is done', finding_ids: mappings.keys.pluck(:id)) + rescue ActiveRecord::RecordNotUnique => error + log_info('RecordNotUnique error received') + + match_data = /\(uuid\)=\((?<uuid>\S{36})\)/.match(error.message) + + # This exception returns the **correct** UUIDv5 which probably comes from a later record + # and it's the one we can drop in the easiest way before retrying the UPDATE query + if match_data + uuid = match_data[:uuid] + log_info('Conflicting UUID found', uuid: uuid) + + id = VulnerabilitiesFinding.find_by(uuid: uuid)&.id + remove_findings(ids: id) if id + retry + else + log_error('Couldnt find conflicting uuid') + + Gitlab::ErrorTracking.track_and_raise_exception(error) + end + end + end mark_job_as_succeeded(start_id, end_id) rescue StandardError => error - Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error) + log_error('An exception happened') + + Gitlab::ErrorTracking.track_and_raise_exception(error) end + # rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength private + def find_duplicates(relation) + to_exclude = [] + relation.flat_map do |record| + # Assuming we're scanning id 31 and the duplicate is id 40 + # first we'd process 31 and add 40 to the list of ids to remove + # then we would process record 40 and add 31 to the list of removals + # so we would drop both records + to_exclude << record.id + + VulnerabilitiesFinding.where( + report_type: record.report_type, + location_fingerprint: record.location_fingerprint, + primary_identifier_id: record.primary_identifier_id, + project_id: record.project_id + ).where.not(id: to_exclude).pluck(:id) + end + end + + def remove_findings(ids:) + ids = Array(ids) + log_info('Removing Findings and associated records', ids: ids) + + vulnerability_ids = VulnerabilitiesFinding.where(id: ids).pluck(:vulnerability_id).uniq.compact + + VulnerabilitiesFindingPipeline.where(occurrence_id: ids).each_batch { |batch| batch.delete_all } + Vulnerability.where(id: vulnerability_ids).each_batch { |batch| batch.delete_all } + VulnerabilitiesFinding.where(id: ids).delete_all + end + def calculate_uuid_v5_for_finding(vulnerability_finding) return unless vulnerability_finding + signatures = vulnerability_finding.signatures.sort_by { |signature| signature.algorithm_type_before_type_cast } + location_fingerprint = signatures.last&.signature_sha || vulnerability_finding.location_fingerprint + uuid_v5_name_components = { report_type: vulnerability_finding.report_type, primary_identifier_fingerprint: vulnerability_finding.fingerprint, - location_fingerprint: vulnerability_finding.location_fingerprint, + location_fingerprint: location_fingerprint, project_id: vulnerability_finding.project_id } @@ -89,6 +201,14 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid CalculateFindingUUID.call(name) end + def log_info(message, **extra) + logger.info(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra) + end + + def log_error(message, **extra) + logger.error(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra) + end + def logger @logger ||= Gitlab::BackgroundMigration::Logger.build end |