diff options
-rw-r--r-- | changelogs/unreleased/mk-rake-task-verify-remote-files.yml | 5 | ||||
-rw-r--r-- | doc/administration/raketasks/check.md | 7 | ||||
-rw-r--r-- | lib/gitlab/verify/batch_verifier.rb | 59 | ||||
-rw-r--r-- | lib/gitlab/verify/job_artifacts.rb | 10 | ||||
-rw-r--r-- | lib/gitlab/verify/lfs_objects.rb | 12 | ||||
-rw-r--r-- | lib/gitlab/verify/rake_task.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/verify/uploads.rb | 12 | ||||
-rw-r--r-- | spec/lib/gitlab/verify/job_artifacts_spec.rb | 29 | ||||
-rw-r--r-- | spec/lib/gitlab/verify/lfs_objects_spec.rb | 25 | ||||
-rw-r--r-- | spec/lib/gitlab/verify/uploads_spec.rb | 27 |
10 files changed, 147 insertions, 41 deletions
diff --git a/changelogs/unreleased/mk-rake-task-verify-remote-files.yml b/changelogs/unreleased/mk-rake-task-verify-remote-files.yml new file mode 100644 index 00000000000..772aa11d89b --- /dev/null +++ b/changelogs/unreleased/mk-rake-task-verify-remote-files.yml @@ -0,0 +1,5 @@ +--- +title: Add support for verifying remote uploads, artifacts, and LFS objects in check rake tasks +merge_request: 19501 +author: +type: added diff --git a/doc/administration/raketasks/check.md b/doc/administration/raketasks/check.md index 7d34d35e7d1..2649bf61d74 100644 --- a/doc/administration/raketasks/check.md +++ b/doc/administration/raketasks/check.md @@ -78,9 +78,10 @@ Example output: ## Uploaded Files Integrity -Various types of file can be uploaded to a GitLab installation by users. -Checksums are generated and stored in the database upon upload, and integrity -checks using those checksums can be run. These checks also detect missing files. +Various types of files can be uploaded to a GitLab installation by users. +These integrity checks can detect missing files. Additionally, for locally +stored files, checksums are generated and stored in the database upon upload, +and these checks will verify them against current files. Currently, integrity checks are supported for the following types of file: diff --git a/lib/gitlab/verify/batch_verifier.rb b/lib/gitlab/verify/batch_verifier.rb index 1ef369a4b67..167ba1b3149 100644 --- a/lib/gitlab/verify/batch_verifier.rb +++ b/lib/gitlab/verify/batch_verifier.rb @@ -7,13 +7,15 @@ module Gitlab @batch_size = batch_size @start = start @finish = finish + + fix_google_api_logger end # Yields a Range of IDs and a Hash of failed verifications (object => error) def run_batches(&blk) - relation.in_batches(of: batch_size, start: start, finish: finish) do |relation| # rubocop: disable Cop/InBatches - range = relation.first.id..relation.last.id - failures = run_batch(relation) + all_relation.in_batches(of: batch_size, start: start, finish: finish) do |batch| # rubocop: disable Cop/InBatches + range = batch.first.id..batch.last.id + failures = run_batch_for(batch) yield(range, failures) end @@ -29,24 +31,56 @@ module Gitlab private - def run_batch(relation) - relation.map { |upload| verify(upload) }.compact.to_h + def run_batch_for(batch) + batch.map { |upload| verify(upload) }.compact.to_h end def verify(object) + local?(object) ? verify_local(object) : verify_remote(object) + rescue => err + failure(object, err.inspect) + end + + def verify_local(object) expected = expected_checksum(object) actual = actual_checksum(object) - raise 'Checksum missing' unless expected.present? - raise 'Checksum mismatch' unless expected == actual + return failure(object, 'Checksum missing') unless expected.present? + return failure(object, 'Checksum mismatch') unless expected == actual + + success + end + # We don't calculate checksum for remote objects, so just check existence + def verify_remote(object) + return failure(object, 'Remote object does not exist') unless remote_object_exists?(object) + + success + end + + def success nil - rescue => err - [object, err] + end + + def failure(object, message) + [object, message] + end + + # It's already set to Logger::INFO, but acts as if it is set to + # Logger::DEBUG, and this fixes it... + def fix_google_api_logger + if Object.const_defined?('Google::Apis') + Google::Apis.logger.level = Logger::INFO + end end # This should return an ActiveRecord::Relation suitable for calling #in_batches on - def relation + def all_relation + raise NotImplementedError.new + end + + # Should return true if the object is stored locally + def local?(_object) raise NotImplementedError.new end @@ -59,6 +93,11 @@ module Gitlab def actual_checksum(_object) raise NotImplementedError.new end + + # Be sure to perform a hard check of the remote object (don't just check DB value) + def remote_object_exists?(object) + raise NotImplementedError.new + end end end end diff --git a/lib/gitlab/verify/job_artifacts.rb b/lib/gitlab/verify/job_artifacts.rb index 03500a61074..dbadfbde9e3 100644 --- a/lib/gitlab/verify/job_artifacts.rb +++ b/lib/gitlab/verify/job_artifacts.rb @@ -11,10 +11,14 @@ module Gitlab private - def relation + def all_relation ::Ci::JobArtifact.all end + def local?(artifact) + artifact.local_store? + end + def expected_checksum(artifact) artifact.file_sha256 end @@ -22,6 +26,10 @@ module Gitlab def actual_checksum(artifact) Digest::SHA256.file(artifact.file.path).hexdigest end + + def remote_object_exists?(artifact) + artifact.file.file.exists? + end end end end diff --git a/lib/gitlab/verify/lfs_objects.rb b/lib/gitlab/verify/lfs_objects.rb index 970e2a7b718..d3f58a73ac7 100644 --- a/lib/gitlab/verify/lfs_objects.rb +++ b/lib/gitlab/verify/lfs_objects.rb @@ -11,8 +11,12 @@ module Gitlab private - def relation - LfsObject.with_files_stored_locally + def all_relation + LfsObject.all + end + + def local?(lfs_object) + lfs_object.local_store? end def expected_checksum(lfs_object) @@ -22,6 +26,10 @@ module Gitlab def actual_checksum(lfs_object) LfsObject.calculate_oid(lfs_object.file.path) end + + def remote_object_exists?(lfs_object) + lfs_object.file.file.exists? + end end end end diff --git a/lib/gitlab/verify/rake_task.rb b/lib/gitlab/verify/rake_task.rb index dd138e6b92b..e190eaddc79 100644 --- a/lib/gitlab/verify/rake_task.rb +++ b/lib/gitlab/verify/rake_task.rb @@ -45,7 +45,7 @@ module Gitlab return unless verbose? failures.each do |object, error| - say " - #{verifier.describe(object)}: #{error.inspect}".color(:red) + say " - #{verifier.describe(object)}: #{error}".color(:red) end end end diff --git a/lib/gitlab/verify/uploads.rb b/lib/gitlab/verify/uploads.rb index 0ffa71a6d72..01f09ab8df7 100644 --- a/lib/gitlab/verify/uploads.rb +++ b/lib/gitlab/verify/uploads.rb @@ -11,8 +11,12 @@ module Gitlab private - def relation - Upload.with_files_stored_locally + def all_relation + Upload.all + end + + def local?(upload) + upload.local? end def expected_checksum(upload) @@ -22,6 +26,10 @@ module Gitlab def actual_checksum(upload) Upload.hexdigest(upload.absolute_path) end + + def remote_object_exists?(upload) + upload.build_uploader.file.exists? + end end end end diff --git a/spec/lib/gitlab/verify/job_artifacts_spec.rb b/spec/lib/gitlab/verify/job_artifacts_spec.rb index ec490bdfde2..6e916a56564 100644 --- a/spec/lib/gitlab/verify/job_artifacts_spec.rb +++ b/spec/lib/gitlab/verify/job_artifacts_spec.rb @@ -21,15 +21,38 @@ describe Gitlab::Verify::JobArtifacts do FileUtils.rm_f(artifact.file.path) expect(failures.keys).to contain_exactly(artifact) - expect(failure).to be_a(Errno::ENOENT) - expect(failure.to_s).to include(artifact.file.path) + expect(failure).to include('No such file or directory') + expect(failure).to include(artifact.file.path) end it 'fails artifacts with a mismatched checksum' do File.truncate(artifact.file.path, 0) expect(failures.keys).to contain_exactly(artifact) - expect(failure.to_s).to include('Checksum mismatch') + expect(failure).to include('Checksum mismatch') + end + + context 'with remote files' do + let(:file) { double(:file) } + + before do + stub_artifacts_object_storage + artifact.update!(file_store: ObjectStorage::Store::REMOTE) + expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file) + end + + it 'passes artifacts in object storage that exist' do + expect(file).to receive(:exists?).and_return(true) + + expect(failures).to eq({}) + end + + it 'fails artifacts in object storage that do not exist' do + expect(file).to receive(:exists?).and_return(false) + + expect(failures.keys).to contain_exactly(artifact) + expect(failure).to include('Remote object does not exist') + end end end end diff --git a/spec/lib/gitlab/verify/lfs_objects_spec.rb b/spec/lib/gitlab/verify/lfs_objects_spec.rb index 0f890e2c7ce..2feaedd6f14 100644 --- a/spec/lib/gitlab/verify/lfs_objects_spec.rb +++ b/spec/lib/gitlab/verify/lfs_objects_spec.rb @@ -21,30 +21,37 @@ describe Gitlab::Verify::LfsObjects do FileUtils.rm_f(lfs_object.file.path) expect(failures.keys).to contain_exactly(lfs_object) - expect(failure).to be_a(Errno::ENOENT) - expect(failure.to_s).to include(lfs_object.file.path) + expect(failure).to include('No such file or directory') + expect(failure).to include(lfs_object.file.path) end it 'fails LFS objects with a mismatched oid' do File.truncate(lfs_object.file.path, 0) expect(failures.keys).to contain_exactly(lfs_object) - expect(failure.to_s).to include('Checksum mismatch') + expect(failure).to include('Checksum mismatch') end context 'with remote files' do + let(:file) { double(:file) } + before do stub_lfs_object_storage + lfs_object.update!(file_store: ObjectStorage::Store::REMOTE) + expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file) end - it 'skips LFS objects in object storage' do - local_failure = create(:lfs_object) - create(:lfs_object, :object_storage) + it 'passes LFS objects in object storage that exist' do + expect(file).to receive(:exists?).and_return(true) + + expect(failures).to eq({}) + end - failures = {} - described_class.new(batch_size: 10).run_batches { |_, failed| failures.merge!(failed) } + it 'fails LFS objects in object storage that do not exist' do + expect(file).to receive(:exists?).and_return(false) - expect(failures.keys).to contain_exactly(local_failure) + expect(failures.keys).to contain_exactly(lfs_object) + expect(failure).to include('Remote object does not exist') end end end diff --git a/spec/lib/gitlab/verify/uploads_spec.rb b/spec/lib/gitlab/verify/uploads_spec.rb index 85768308edc..296866d3319 100644 --- a/spec/lib/gitlab/verify/uploads_spec.rb +++ b/spec/lib/gitlab/verify/uploads_spec.rb @@ -23,37 +23,44 @@ describe Gitlab::Verify::Uploads do FileUtils.rm_f(upload.absolute_path) expect(failures.keys).to contain_exactly(upload) - expect(failure).to be_a(Errno::ENOENT) - expect(failure.to_s).to include(upload.absolute_path) + expect(failure).to include('No such file or directory') + expect(failure).to include(upload.absolute_path) end it 'fails uploads with a mismatched checksum' do upload.update!(checksum: 'something incorrect') expect(failures.keys).to contain_exactly(upload) - expect(failure.to_s).to include('Checksum mismatch') + expect(failure).to include('Checksum mismatch') end it 'fails uploads with a missing precalculated checksum' do upload.update!(checksum: '') expect(failures.keys).to contain_exactly(upload) - expect(failure.to_s).to include('Checksum missing') + expect(failure).to include('Checksum missing') end context 'with remote files' do + let(:file) { double(:file) } + before do stub_uploads_object_storage(AvatarUploader) + upload.update!(store: ObjectStorage::Store::REMOTE) + expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file) end - it 'skips uploads in object storage' do - local_failure = create(:upload) - create(:upload, :object_storage) + it 'passes uploads in object storage that exist' do + expect(file).to receive(:exists?).and_return(true) + + expect(failures).to eq({}) + end - failures = {} - described_class.new(batch_size: 10).run_batches { |_, failed| failures.merge!(failed) } + it 'fails uploads in object storage that do not exist' do + expect(file).to receive(:exists?).and_return(false) - expect(failures.keys).to contain_exactly(local_failure) + expect(failures.keys).to contain_exactly(upload) + expect(failure).to include('Remote object does not exist') end end end |