diff options
Diffstat (limited to 'spec/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid_spec.rb')
-rw-r--r-- | spec/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid_spec.rb | 530 |
1 files changed, 530 insertions, 0 deletions
diff --git a/spec/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid_spec.rb b/spec/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid_spec.rb new file mode 100644 index 00000000000..543dd204f89 --- /dev/null +++ b/spec/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid_spec.rb @@ -0,0 +1,530 @@ +# frozen_string_literal: true + +require 'spec_helper' + +def create_background_migration_job(ids, status) + proper_status = case status + when :pending + Gitlab::Database::BackgroundMigrationJob.statuses['pending'] + when :succeeded + Gitlab::Database::BackgroundMigrationJob.statuses['succeeded'] + else + raise ArgumentError + end + + background_migration_jobs.create!( + class_name: 'RecalculateVulnerabilitiesOccurrencesUuid', + arguments: Array(ids), + status: proper_status, + created_at: Time.now.utc + ) +end + +RSpec.describe Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid, :suppress_gitlab_schemas_validate_connection, schema: 20211202041233 do + let(:background_migration_jobs) { table(:background_migration_jobs) } + let(:pending_jobs) { background_migration_jobs.where(status: Gitlab::Database::BackgroundMigrationJob.statuses['pending']) } + let(:succeeded_jobs) { background_migration_jobs.where(status: Gitlab::Database::BackgroundMigrationJob.statuses['succeeded']) } + let(:namespace) { table(:namespaces).create!(name: 'user', path: 'user') } + let(:users) { table(:users) } + let(:user) { create_user! } + let(:project) { table(:projects).create!(id: 123, namespace_id: namespace.id) } + let(:scanners) { table(:vulnerability_scanners) } + let(:scanner) { scanners.create!(project_id: project.id, external_id: 'test 1', name: 'test scanner 1') } + let(:scanner2) { scanners.create!(project_id: project.id, external_id: 'test 2', name: 'test scanner 2') } + let(:vulnerabilities) { table(:vulnerabilities) } + let(:vulnerability_findings) { table(:vulnerability_occurrences) } + let(:vulnerability_finding_pipelines) { table(:vulnerability_occurrence_pipelines) } + let(:vulnerability_finding_signatures) { table(:vulnerability_finding_signatures) } + let(:vulnerability_identifiers) { table(:vulnerability_identifiers) } + + let(:identifier_1) { 'identifier-1' } + let!(:vulnerability_identifier) do + vulnerability_identifiers.create!( + project_id: project.id, + external_type: identifier_1, + external_id: identifier_1, + fingerprint: Gitlab::Database::ShaAttribute.serialize('ff9ef548a6e30a0462795d916f3f00d1e2b082ca'), + name: 'Identifier 1') + end + + let(:identifier_2) { 'identifier-2' } + let!(:vulnerability_identfier2) do + vulnerability_identifiers.create!( + project_id: project.id, + external_type: identifier_2, + external_id: identifier_2, + fingerprint: Gitlab::Database::ShaAttribute.serialize('4299e8ddd819f9bde9cfacf45716724c17b5ddf7'), + name: 'Identifier 2') + end + + let(:identifier_3) { 'identifier-3' } + let!(:vulnerability_identifier3) do + vulnerability_identifiers.create!( + project_id: project.id, + external_type: identifier_3, + external_id: identifier_3, + fingerprint: Gitlab::Database::ShaAttribute.serialize('8e91632f9c6671e951834a723ee221c44cc0d844'), + name: 'Identifier 3') + end + + let(:known_uuid_v4) { "b3cc2518-5446-4dea-871c-89d5e999c1ac" } + let(:known_uuid_v5) { "05377088-dc26-5161-920e-52a7159fdaa1" } + let(:desired_uuid_v5) { "f3e9a23f-9181-54bf-a5ab-c5bc7a9b881a" } + + subject { described_class.new.perform(start_id, end_id) } + + context "when finding has a UUIDv4" do + before do + @uuid_v4 = create_finding!( + vulnerability_id: nil, + project_id: project.id, + scanner_id: scanner2.id, + primary_identifier_id: vulnerability_identfier2.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize("fa18f432f1d56675f4098d318739c3cd5b14eb3e"), + uuid: known_uuid_v4 + ) + end + + let(:start_id) { @uuid_v4.id } + let(:end_id) { @uuid_v4.id } + + it "replaces it with UUIDv5" do + expect(vulnerability_findings.pluck(:uuid)).to match_array([known_uuid_v4]) + + subject + + expect(vulnerability_findings.pluck(:uuid)).to match_array([desired_uuid_v5]) + end + + it 'logs recalculation' do + expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance| + expect(instance).to receive(:info).twice + end + + subject + end + end + + context "when finding has a UUIDv5" do + before do + @uuid_v5 = create_finding!( + vulnerability_id: nil, + project_id: project.id, + scanner_id: scanner.id, + primary_identifier_id: vulnerability_identifier.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize("838574be0210968bf6b9f569df9c2576242cbf0a"), + uuid: known_uuid_v5 + ) + end + + let(:start_id) { @uuid_v5.id } + let(:end_id) { @uuid_v5.id } + + it "stays the same" do + expect(vulnerability_findings.pluck(:uuid)).to match_array([known_uuid_v5]) + + subject + + expect(vulnerability_findings.pluck(:uuid)).to match_array([known_uuid_v5]) + end + end + + context 'if a duplicate UUID would be generated' do # rubocop: disable RSpec/MultipleMemoizedHelpers + let(:v1) do + create_vulnerability!( + project_id: project.id, + author_id: user.id + ) + end + + let!(:finding_with_incorrect_uuid) do + create_finding!( + vulnerability_id: v1.id, + project_id: project.id, + scanner_id: scanner.id, + primary_identifier_id: vulnerability_identifier.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis') + uuid: 'bd95c085-71aa-51d7-9bb6-08ae669c262e' + ) + end + + let(:v2) do + create_vulnerability!( + project_id: project.id, + author_id: user.id + ) + end + + let!(:finding_with_correct_uuid) do + create_finding!( + vulnerability_id: v2.id, + project_id: project.id, + primary_identifier_id: vulnerability_identifier.id, + scanner_id: scanner2.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis') + uuid: '91984483-5efe-5215-b471-d524ac5792b1' + ) + end + + let(:v3) do + create_vulnerability!( + project_id: project.id, + author_id: user.id + ) + end + + let!(:finding_with_incorrect_uuid2) do + create_finding!( + vulnerability_id: v3.id, + project_id: project.id, + scanner_id: scanner.id, + primary_identifier_id: vulnerability_identfier2.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis') + uuid: '00000000-1111-2222-3333-444444444444' + ) + end + + let(:v4) do + create_vulnerability!( + project_id: project.id, + author_id: user.id + ) + end + + let!(:finding_with_correct_uuid2) do + create_finding!( + vulnerability_id: v4.id, + project_id: project.id, + scanner_id: scanner2.id, + primary_identifier_id: vulnerability_identfier2.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis') + uuid: '1edd751e-ef9a-5391-94db-a832c8635bfc' + ) + end + + let!(:finding_with_incorrect_uuid3) do + create_finding!( + vulnerability_id: nil, + project_id: project.id, + scanner_id: scanner.id, + primary_identifier_id: vulnerability_identifier3.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis') + uuid: '22222222-3333-4444-5555-666666666666' + ) + end + + let!(:duplicate_not_in_the_same_batch) do + create_finding!( + id: 99999, + vulnerability_id: nil, + project_id: project.id, + scanner_id: scanner2.id, + primary_identifier_id: vulnerability_identifier3.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis') + uuid: '4564f9d5-3c6b-5cc3-af8c-7c25285362a7' + ) + end + + let(:start_id) { finding_with_incorrect_uuid.id } + let(:end_id) { finding_with_incorrect_uuid3.id } + + before do + 4.times do + create_finding_pipeline!(project_id: project.id, finding_id: finding_with_incorrect_uuid.id) + create_finding_pipeline!(project_id: project.id, finding_id: finding_with_correct_uuid.id) + create_finding_pipeline!(project_id: project.id, finding_id: finding_with_incorrect_uuid2.id) + create_finding_pipeline!(project_id: project.id, finding_id: finding_with_correct_uuid2.id) + end + end + + it 'drops duplicates and related records', :aggregate_failures do + expect(vulnerability_findings.pluck(:id)).to match_array( + [ + finding_with_correct_uuid.id, + finding_with_incorrect_uuid.id, + finding_with_correct_uuid2.id, + finding_with_incorrect_uuid2.id, + finding_with_incorrect_uuid3.id, + duplicate_not_in_the_same_batch.id + ]) + + expect { subject }.to change(vulnerability_finding_pipelines, :count).from(16).to(8) + .and change(vulnerability_findings, :count).from(6).to(3) + .and change(vulnerabilities, :count).from(4).to(2) + + expect(vulnerability_findings.pluck(:id)).to match_array([finding_with_incorrect_uuid.id, finding_with_incorrect_uuid2.id, finding_with_incorrect_uuid3.id]) + end + + context 'if there are conflicting UUID values within the batch' do # rubocop: disable RSpec/MultipleMemoizedHelpers + let(:end_id) { finding_with_broken_data_integrity.id } + let(:vulnerability_5) { create_vulnerability!(project_id: project.id, author_id: user.id) } + let(:different_project) { table(:projects).create!(namespace_id: namespace.id) } + let!(:identifier_with_broken_data_integrity) do + vulnerability_identifiers.create!( + project_id: different_project.id, + external_type: identifier_2, + external_id: identifier_2, + fingerprint: Gitlab::Database::ShaAttribute.serialize('4299e8ddd819f9bde9cfacf45716724c17b5ddf7'), + name: 'Identifier 2') + end + + let(:finding_with_broken_data_integrity) do + create_finding!( + vulnerability_id: vulnerability_5, + project_id: project.id, + scanner_id: scanner.id, + primary_identifier_id: identifier_with_broken_data_integrity.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis') + uuid: SecureRandom.uuid + ) + end + + it 'deletes the conflicting record' do + expect { subject }.to change { vulnerability_findings.find_by_id(finding_with_broken_data_integrity.id) }.to(nil) + end + end + + context 'if a conflicting UUID is found during the migration' do # rubocop:disable RSpec/MultipleMemoizedHelpers + let(:finding_class) { Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid::VulnerabilitiesFinding } + let(:uuid) { '4564f9d5-3c6b-5cc3-af8c-7c25285362a7' } + + before do + exception = ActiveRecord::RecordNotUnique.new("(uuid)=(#{uuid})") + + call_count = 0 + allow(::Gitlab::Database::BulkUpdate).to receive(:execute) do + call_count += 1 + call_count.eql?(1) ? raise(exception) : {} + end + + allow(finding_class).to receive(:find_by).with(uuid: uuid).and_return(duplicate_not_in_the_same_batch) + end + + it 'retries the recalculation' do + subject + + expect(Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid::VulnerabilitiesFinding) + .to have_received(:find_by).with(uuid: uuid).once + end + + it 'logs the conflict' do + expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance| + expect(instance).to receive(:info).exactly(6).times + end + + subject + end + + it 'marks the job as done' do + create_background_migration_job([start_id, end_id], :pending) + + subject + + expect(pending_jobs.count).to eq(0) + expect(succeeded_jobs.count).to eq(1) + end + end + + it 'logs an exception if a different uniquness problem was found' do + exception = ActiveRecord::RecordNotUnique.new("Totally not an UUID uniqueness problem") + allow(::Gitlab::Database::BulkUpdate).to receive(:execute).and_raise(exception) + allow(Gitlab::ErrorTracking).to receive(:track_and_raise_exception) + + subject + + expect(Gitlab::ErrorTracking).to have_received(:track_and_raise_exception).with(exception).once + end + + it 'logs a duplicate found message' do + expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance| + expect(instance).to receive(:info).exactly(3).times + end + + subject + end + end + + context 'when finding has a signature' do + before do + @f1 = create_finding!( + vulnerability_id: nil, + project_id: project.id, + scanner_id: scanner.id, + primary_identifier_id: vulnerability_identifier.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis') + uuid: 'd15d774d-e4b1-5a1b-929b-19f2a53e35ec' + ) + + vulnerability_finding_signatures.create!( + finding_id: @f1.id, + algorithm_type: 2, # location + signature_sha: Gitlab::Database::ShaAttribute.serialize('57d4e05205f6462a73f039a5b2751aa1ab344e6e') # sha1('youshouldusethis') + ) + + vulnerability_finding_signatures.create!( + finding_id: @f1.id, + algorithm_type: 1, # hash + signature_sha: Gitlab::Database::ShaAttribute.serialize('c554d8d8df1a7a14319eafdaae24af421bf5b587') # sha1('andnotthis') + ) + + @f2 = create_finding!( + vulnerability_id: nil, + project_id: project.id, + scanner_id: scanner.id, + primary_identifier_id: vulnerability_identfier2.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis') + uuid: '4be029b5-75e5-5ac0-81a2-50ab41726135' + ) + + vulnerability_finding_signatures.create!( + finding_id: @f2.id, + algorithm_type: 2, # location + signature_sha: Gitlab::Database::ShaAttribute.serialize('57d4e05205f6462a73f039a5b2751aa1ab344e6e') # sha1('youshouldusethis') + ) + + vulnerability_finding_signatures.create!( + finding_id: @f2.id, + algorithm_type: 1, # hash + signature_sha: Gitlab::Database::ShaAttribute.serialize('c554d8d8df1a7a14319eafdaae24af421bf5b587') # sha1('andnotthis') + ) + end + + let(:start_id) { @f1.id } + let(:end_id) { @f2.id } + + let(:uuids_before) { [@f1.uuid, @f2.uuid] } + let(:uuids_after) { %w[d3b60ddd-d312-5606-b4d3-ad058eebeacb 349d9bec-c677-5530-a8ac-5e58889c3b1a] } + + it 'is recalculated using signature' do + expect(vulnerability_findings.pluck(:uuid)).to match_array(uuids_before) + + subject + + expect(vulnerability_findings.pluck(:uuid)).to match_array(uuids_after) + end + end + + context 'if all records are removed before the job ran' do + let(:start_id) { 1 } + let(:end_id) { 9 } + + before do + create_background_migration_job([start_id, end_id], :pending) + end + + it 'does not error out' do + expect { subject }.not_to raise_error + end + + it 'marks the job as done' do + subject + + expect(pending_jobs.count).to eq(0) + expect(succeeded_jobs.count).to eq(1) + end + end + + context 'when recalculation fails' do + before do + @uuid_v4 = create_finding!( + vulnerability_id: nil, + project_id: project.id, + scanner_id: scanner2.id, + primary_identifier_id: vulnerability_identfier2.id, + report_type: 0, # "sast" + location_fingerprint: Gitlab::Database::ShaAttribute.serialize("fa18f432f1d56675f4098d318739c3cd5b14eb3e"), + uuid: known_uuid_v4 + ) + + allow(Gitlab::ErrorTracking).to receive(:track_and_raise_exception) + allow(::Gitlab::Database::BulkUpdate).to receive(:execute).and_raise(expected_error) + end + + let(:start_id) { @uuid_v4.id } + let(:end_id) { @uuid_v4.id } + let(:expected_error) { RuntimeError.new } + + it 'captures the errors and does not crash entirely' do + expect { subject }.not_to raise_error + + allow(Gitlab::ErrorTracking).to receive(:track_and_raise_exception) + expect(Gitlab::ErrorTracking).to have_received(:track_and_raise_exception).with(expected_error).once + end + + it_behaves_like 'marks background migration job records' do + let(:arguments) { [1, 4] } + subject { described_class.new } + end + end + + it_behaves_like 'marks background migration job records' do + let(:arguments) { [1, 4] } + subject { described_class.new } + end + + private + + def create_vulnerability!(project_id:, author_id:, title: 'test', severity: 7, confidence: 7, report_type: 0) + vulnerabilities.create!( + project_id: project_id, + author_id: author_id, + title: title, + severity: severity, + confidence: confidence, + report_type: report_type + ) + end + + # rubocop:disable Metrics/ParameterLists + def create_finding!( + vulnerability_id:, project_id:, scanner_id:, primary_identifier_id:, id: nil, + name: "test", severity: 7, confidence: 7, report_type: 0, + project_fingerprint: '123qweasdzxc', location_fingerprint: 'test', + metadata_version: 'test', raw_metadata: 'test', uuid: SecureRandom.uuid) + vulnerability_findings.create!({ + id: id, + vulnerability_id: vulnerability_id, + project_id: project_id, + name: name, + severity: severity, + confidence: confidence, + report_type: report_type, + project_fingerprint: project_fingerprint, + scanner_id: scanner_id, + primary_identifier_id: primary_identifier_id, + location_fingerprint: location_fingerprint, + metadata_version: metadata_version, + raw_metadata: raw_metadata, + uuid: uuid + }.compact + ) + end + # rubocop:enable Metrics/ParameterLists + + def create_user!(name: "Example User", email: "user@example.com", user_type: nil, created_at: Time.zone.now, confirmed_at: Time.zone.now) + users.create!( + name: name, + email: email, + username: name, + projects_limit: 0, + user_type: user_type, + confirmed_at: confirmed_at + ) + end + + def create_finding_pipeline!(project_id:, finding_id:) + pipeline = table(:ci_pipelines).create!(project_id: project_id) + vulnerability_finding_pipelines.create!(pipeline_id: pipeline.id, occurrence_id: finding_id) + end +end |