summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2018-06-13 17:11:43 +0000
committerStan Hu <stanhu@gmail.com>2018-06-13 17:11:43 +0000
commit61b0577d918748e6957d63e10cd33219756d48d7 (patch)
tree86cd1a740cd6531b13c6f7a73f4766985cbf8cdb
parentcc1b03545c83c83e5791b5d04d7ff0b86f892364 (diff)
parent2f40fb456bbc99fa4d3816696dd9ac612a21482c (diff)
downloadgitlab-ce-61b0577d918748e6957d63e10cd33219756d48d7.tar.gz
Merge branch 'mk/rake-task-verify-remote-files' into 'master'
Add support for verifying remote uploads, artifacts, and LFS objects in check rake tasks Closes #47139 See merge request gitlab-org/gitlab-ce!19501
-rw-r--r--changelogs/unreleased/mk-rake-task-verify-remote-files.yml5
-rw-r--r--doc/administration/raketasks/check.md7
-rw-r--r--lib/gitlab/verify/batch_verifier.rb59
-rw-r--r--lib/gitlab/verify/job_artifacts.rb10
-rw-r--r--lib/gitlab/verify/lfs_objects.rb12
-rw-r--r--lib/gitlab/verify/rake_task.rb2
-rw-r--r--lib/gitlab/verify/uploads.rb12
-rw-r--r--spec/lib/gitlab/verify/job_artifacts_spec.rb29
-rw-r--r--spec/lib/gitlab/verify/lfs_objects_spec.rb25
-rw-r--r--spec/lib/gitlab/verify/uploads_spec.rb27
10 files changed, 147 insertions, 41 deletions
diff --git a/changelogs/unreleased/mk-rake-task-verify-remote-files.yml b/changelogs/unreleased/mk-rake-task-verify-remote-files.yml
new file mode 100644
index 00000000000..772aa11d89b
--- /dev/null
+++ b/changelogs/unreleased/mk-rake-task-verify-remote-files.yml
@@ -0,0 +1,5 @@
+---
+title: Add support for verifying remote uploads, artifacts, and LFS objects in check rake tasks
+merge_request: 19501
+author:
+type: added
diff --git a/doc/administration/raketasks/check.md b/doc/administration/raketasks/check.md
index 7d34d35e7d1..2649bf61d74 100644
--- a/doc/administration/raketasks/check.md
+++ b/doc/administration/raketasks/check.md
@@ -78,9 +78,10 @@ Example output:
## Uploaded Files Integrity
-Various types of file can be uploaded to a GitLab installation by users.
-Checksums are generated and stored in the database upon upload, and integrity
-checks using those checksums can be run. These checks also detect missing files.
+Various types of files can be uploaded to a GitLab installation by users.
+These integrity checks can detect missing files. Additionally, for locally
+stored files, checksums are generated and stored in the database upon upload,
+and these checks will verify them against current files.
Currently, integrity checks are supported for the following types of file:
diff --git a/lib/gitlab/verify/batch_verifier.rb b/lib/gitlab/verify/batch_verifier.rb
index 1ef369a4b67..167ba1b3149 100644
--- a/lib/gitlab/verify/batch_verifier.rb
+++ b/lib/gitlab/verify/batch_verifier.rb
@@ -7,13 +7,15 @@ module Gitlab
@batch_size = batch_size
@start = start
@finish = finish
+
+ fix_google_api_logger
end
# Yields a Range of IDs and a Hash of failed verifications (object => error)
def run_batches(&blk)
- relation.in_batches(of: batch_size, start: start, finish: finish) do |relation| # rubocop: disable Cop/InBatches
- range = relation.first.id..relation.last.id
- failures = run_batch(relation)
+ all_relation.in_batches(of: batch_size, start: start, finish: finish) do |batch| # rubocop: disable Cop/InBatches
+ range = batch.first.id..batch.last.id
+ failures = run_batch_for(batch)
yield(range, failures)
end
@@ -29,24 +31,56 @@ module Gitlab
private
- def run_batch(relation)
- relation.map { |upload| verify(upload) }.compact.to_h
+ def run_batch_for(batch)
+ batch.map { |upload| verify(upload) }.compact.to_h
end
def verify(object)
+ local?(object) ? verify_local(object) : verify_remote(object)
+ rescue => err
+ failure(object, err.inspect)
+ end
+
+ def verify_local(object)
expected = expected_checksum(object)
actual = actual_checksum(object)
- raise 'Checksum missing' unless expected.present?
- raise 'Checksum mismatch' unless expected == actual
+ return failure(object, 'Checksum missing') unless expected.present?
+ return failure(object, 'Checksum mismatch') unless expected == actual
+
+ success
+ end
+ # We don't calculate checksum for remote objects, so just check existence
+ def verify_remote(object)
+ return failure(object, 'Remote object does not exist') unless remote_object_exists?(object)
+
+ success
+ end
+
+ def success
nil
- rescue => err
- [object, err]
+ end
+
+ def failure(object, message)
+ [object, message]
+ end
+
+ # It's already set to Logger::INFO, but acts as if it is set to
+ # Logger::DEBUG, and this fixes it...
+ def fix_google_api_logger
+ if Object.const_defined?('Google::Apis')
+ Google::Apis.logger.level = Logger::INFO
+ end
end
# This should return an ActiveRecord::Relation suitable for calling #in_batches on
- def relation
+ def all_relation
+ raise NotImplementedError.new
+ end
+
+ # Should return true if the object is stored locally
+ def local?(_object)
raise NotImplementedError.new
end
@@ -59,6 +93,11 @@ module Gitlab
def actual_checksum(_object)
raise NotImplementedError.new
end
+
+ # Be sure to perform a hard check of the remote object (don't just check DB value)
+ def remote_object_exists?(object)
+ raise NotImplementedError.new
+ end
end
end
end
diff --git a/lib/gitlab/verify/job_artifacts.rb b/lib/gitlab/verify/job_artifacts.rb
index 03500a61074..dbadfbde9e3 100644
--- a/lib/gitlab/verify/job_artifacts.rb
+++ b/lib/gitlab/verify/job_artifacts.rb
@@ -11,10 +11,14 @@ module Gitlab
private
- def relation
+ def all_relation
::Ci::JobArtifact.all
end
+ def local?(artifact)
+ artifact.local_store?
+ end
+
def expected_checksum(artifact)
artifact.file_sha256
end
@@ -22,6 +26,10 @@ module Gitlab
def actual_checksum(artifact)
Digest::SHA256.file(artifact.file.path).hexdigest
end
+
+ def remote_object_exists?(artifact)
+ artifact.file.file.exists?
+ end
end
end
end
diff --git a/lib/gitlab/verify/lfs_objects.rb b/lib/gitlab/verify/lfs_objects.rb
index 970e2a7b718..d3f58a73ac7 100644
--- a/lib/gitlab/verify/lfs_objects.rb
+++ b/lib/gitlab/verify/lfs_objects.rb
@@ -11,8 +11,12 @@ module Gitlab
private
- def relation
- LfsObject.with_files_stored_locally
+ def all_relation
+ LfsObject.all
+ end
+
+ def local?(lfs_object)
+ lfs_object.local_store?
end
def expected_checksum(lfs_object)
@@ -22,6 +26,10 @@ module Gitlab
def actual_checksum(lfs_object)
LfsObject.calculate_oid(lfs_object.file.path)
end
+
+ def remote_object_exists?(lfs_object)
+ lfs_object.file.file.exists?
+ end
end
end
end
diff --git a/lib/gitlab/verify/rake_task.rb b/lib/gitlab/verify/rake_task.rb
index dd138e6b92b..e190eaddc79 100644
--- a/lib/gitlab/verify/rake_task.rb
+++ b/lib/gitlab/verify/rake_task.rb
@@ -45,7 +45,7 @@ module Gitlab
return unless verbose?
failures.each do |object, error|
- say " - #{verifier.describe(object)}: #{error.inspect}".color(:red)
+ say " - #{verifier.describe(object)}: #{error}".color(:red)
end
end
end
diff --git a/lib/gitlab/verify/uploads.rb b/lib/gitlab/verify/uploads.rb
index 0ffa71a6d72..01f09ab8df7 100644
--- a/lib/gitlab/verify/uploads.rb
+++ b/lib/gitlab/verify/uploads.rb
@@ -11,8 +11,12 @@ module Gitlab
private
- def relation
- Upload.with_files_stored_locally
+ def all_relation
+ Upload.all
+ end
+
+ def local?(upload)
+ upload.local?
end
def expected_checksum(upload)
@@ -22,6 +26,10 @@ module Gitlab
def actual_checksum(upload)
Upload.hexdigest(upload.absolute_path)
end
+
+ def remote_object_exists?(upload)
+ upload.build_uploader.file.exists?
+ end
end
end
end
diff --git a/spec/lib/gitlab/verify/job_artifacts_spec.rb b/spec/lib/gitlab/verify/job_artifacts_spec.rb
index ec490bdfde2..6e916a56564 100644
--- a/spec/lib/gitlab/verify/job_artifacts_spec.rb
+++ b/spec/lib/gitlab/verify/job_artifacts_spec.rb
@@ -21,15 +21,38 @@ describe Gitlab::Verify::JobArtifacts do
FileUtils.rm_f(artifact.file.path)
expect(failures.keys).to contain_exactly(artifact)
- expect(failure).to be_a(Errno::ENOENT)
- expect(failure.to_s).to include(artifact.file.path)
+ expect(failure).to include('No such file or directory')
+ expect(failure).to include(artifact.file.path)
end
it 'fails artifacts with a mismatched checksum' do
File.truncate(artifact.file.path, 0)
expect(failures.keys).to contain_exactly(artifact)
- expect(failure.to_s).to include('Checksum mismatch')
+ expect(failure).to include('Checksum mismatch')
+ end
+
+ context 'with remote files' do
+ let(:file) { double(:file) }
+
+ before do
+ stub_artifacts_object_storage
+ artifact.update!(file_store: ObjectStorage::Store::REMOTE)
+ expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file)
+ end
+
+ it 'passes artifacts in object storage that exist' do
+ expect(file).to receive(:exists?).and_return(true)
+
+ expect(failures).to eq({})
+ end
+
+ it 'fails artifacts in object storage that do not exist' do
+ expect(file).to receive(:exists?).and_return(false)
+
+ expect(failures.keys).to contain_exactly(artifact)
+ expect(failure).to include('Remote object does not exist')
+ end
end
end
end
diff --git a/spec/lib/gitlab/verify/lfs_objects_spec.rb b/spec/lib/gitlab/verify/lfs_objects_spec.rb
index 0f890e2c7ce..2feaedd6f14 100644
--- a/spec/lib/gitlab/verify/lfs_objects_spec.rb
+++ b/spec/lib/gitlab/verify/lfs_objects_spec.rb
@@ -21,30 +21,37 @@ describe Gitlab::Verify::LfsObjects do
FileUtils.rm_f(lfs_object.file.path)
expect(failures.keys).to contain_exactly(lfs_object)
- expect(failure).to be_a(Errno::ENOENT)
- expect(failure.to_s).to include(lfs_object.file.path)
+ expect(failure).to include('No such file or directory')
+ expect(failure).to include(lfs_object.file.path)
end
it 'fails LFS objects with a mismatched oid' do
File.truncate(lfs_object.file.path, 0)
expect(failures.keys).to contain_exactly(lfs_object)
- expect(failure.to_s).to include('Checksum mismatch')
+ expect(failure).to include('Checksum mismatch')
end
context 'with remote files' do
+ let(:file) { double(:file) }
+
before do
stub_lfs_object_storage
+ lfs_object.update!(file_store: ObjectStorage::Store::REMOTE)
+ expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file)
end
- it 'skips LFS objects in object storage' do
- local_failure = create(:lfs_object)
- create(:lfs_object, :object_storage)
+ it 'passes LFS objects in object storage that exist' do
+ expect(file).to receive(:exists?).and_return(true)
+
+ expect(failures).to eq({})
+ end
- failures = {}
- described_class.new(batch_size: 10).run_batches { |_, failed| failures.merge!(failed) }
+ it 'fails LFS objects in object storage that do not exist' do
+ expect(file).to receive(:exists?).and_return(false)
- expect(failures.keys).to contain_exactly(local_failure)
+ expect(failures.keys).to contain_exactly(lfs_object)
+ expect(failure).to include('Remote object does not exist')
end
end
end
diff --git a/spec/lib/gitlab/verify/uploads_spec.rb b/spec/lib/gitlab/verify/uploads_spec.rb
index 85768308edc..296866d3319 100644
--- a/spec/lib/gitlab/verify/uploads_spec.rb
+++ b/spec/lib/gitlab/verify/uploads_spec.rb
@@ -23,37 +23,44 @@ describe Gitlab::Verify::Uploads do
FileUtils.rm_f(upload.absolute_path)
expect(failures.keys).to contain_exactly(upload)
- expect(failure).to be_a(Errno::ENOENT)
- expect(failure.to_s).to include(upload.absolute_path)
+ expect(failure).to include('No such file or directory')
+ expect(failure).to include(upload.absolute_path)
end
it 'fails uploads with a mismatched checksum' do
upload.update!(checksum: 'something incorrect')
expect(failures.keys).to contain_exactly(upload)
- expect(failure.to_s).to include('Checksum mismatch')
+ expect(failure).to include('Checksum mismatch')
end
it 'fails uploads with a missing precalculated checksum' do
upload.update!(checksum: '')
expect(failures.keys).to contain_exactly(upload)
- expect(failure.to_s).to include('Checksum missing')
+ expect(failure).to include('Checksum missing')
end
context 'with remote files' do
+ let(:file) { double(:file) }
+
before do
stub_uploads_object_storage(AvatarUploader)
+ upload.update!(store: ObjectStorage::Store::REMOTE)
+ expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file)
end
- it 'skips uploads in object storage' do
- local_failure = create(:upload)
- create(:upload, :object_storage)
+ it 'passes uploads in object storage that exist' do
+ expect(file).to receive(:exists?).and_return(true)
+
+ expect(failures).to eq({})
+ end
- failures = {}
- described_class.new(batch_size: 10).run_batches { |_, failed| failures.merge!(failed) }
+ it 'fails uploads in object storage that do not exist' do
+ expect(file).to receive(:exists?).and_return(false)
- expect(failures.keys).to contain_exactly(local_failure)
+ expect(failures.keys).to contain_exactly(upload)
+ expect(failure).to include('Remote object does not exist')
end
end
end