diff options
4 files changed, 109 insertions, 0 deletions
diff --git a/changelogs/unreleased/da-verify-integrity-of-uploaded-files.yml b/changelogs/unreleased/da-verify-integrity-of-uploaded-files.yml
new file mode 100644
index 00000000000..5b850c92d17
--- /dev/null
+++ b/changelogs/unreleased/da-verify-integrity-of-uploaded-files.yml
@@ -0,0 +1,5 @@
+title: Add rake task to check integrity of uploaded files
+type: added
diff --git a/doc/administration/raketasks/ b/doc/administration/raketasks/
index c39cb49b1c6..d1ed152b58c 100644
--- a/doc/administration/raketasks/
+++ b/doc/administration/raketasks/
@@ -76,6 +76,39 @@ Example output:
![gitlab:user:check_repos output](../img/raketasks/check_repos_output.png)
+## Uploaded Files Integrity
+The uploads check Rake task will loop through all uploads in the database
+and run two checks to determine the integrity of each file:
+1. Check if the file exist on the file system.
+1. Check if the checksum of the file on the file system matches the checksum in the database.
+**Omnibus Installation**
+sudo gitlab-rake gitlab:uploads:check
+**Source Installation**
+sudo -u git -H bundle exec rake gitlab:uploads:check RAILS_ENV=production
+This task also accepts some environment variables which you can use to override
+certain values:
+Variable | Type | Description
+-------- | ---- | -----------
+`BATCH` | integer | Specifies the size of the batch. Defaults to 200.
+`ID_FROM` | integer | Specifies the ID to start from, inclusive of the value.
+`ID_TO` | integer | Specifies the ID value to end at, inclusive of the value.
+sudo gitlab-rake gitlab:uploads:check BATCH=100 ID_FROM=50 ID_TO=250
## LDAP Check
The LDAP check Rake task will test the bind_dn and password credentials
diff --git a/lib/tasks/gitlab/uploads.rake b/lib/tasks/gitlab/uploads.rake
new file mode 100644
index 00000000000..df31567ce64
--- /dev/null
+++ b/lib/tasks/gitlab/uploads.rake
@@ -0,0 +1,44 @@
+namespace :gitlab do
+ namespace :uploads do
+ desc 'GitLab | Uploads | Check integrity of uploaded files'
+ task check: :environment do
+ puts 'Checking integrity of uploaded files'
+ uploads_batches do |batch|
+ batch.each do |upload|
+ puts "- Checking file (#{}): #{upload.absolute_path}".color(:green)
+ if upload.exist?
+ check_checksum(upload)
+ else
+ puts " * File does not exist on the file system".color(:red)
+ end
+ end
+ end
+ puts 'Done!'
+ end
+ def batch_size
+ ENV.fetch('BATCH', 200).to_i
+ end
+ def calculate_checksum(absolute_path)
+ Digest::SHA256.file(absolute_path).hexdigest
+ end
+ def check_checksum(upload)
+ checksum = calculate_checksum(upload.absolute_path)
+ if checksum != upload.checksum
+ puts " * File checksum (#{checksum}) does not match the one in the database (#{upload.checksum})".color(:red)
+ end
+ end
+ def uploads_batches(&block)
+ Upload.all.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
+ yield relation
+ end
+ end
+ end
diff --git a/spec/tasks/gitlab/uploads_rake_spec.rb b/spec/tasks/gitlab/uploads_rake_spec.rb
new file mode 100644
index 00000000000..ac0005e51e0
--- /dev/null
+++ b/spec/tasks/gitlab/uploads_rake_spec.rb
@@ -0,0 +1,27 @@
+require 'rake_helper'
+describe 'gitlab:uploads rake tasks' do
+ describe 'check' do
+ let!(:upload) { create(:upload, path: Rails.root.join('spec/fixtures/banana_sample.gif')) }
+ before do
+ Rake.application.rake_require 'tasks/gitlab/uploads'
+ end
+ it 'outputs the integrity check for each uploaded file' do
+ expect { run_rake_task('gitlab:uploads:check') }.to output(/Checking file \(#{}\): #{Regexp.quote(upload.absolute_path)}/).to_stdout
+ end
+ it 'errors out about missing files on the file system' do
+ create(:upload)
+ expect { run_rake_task('gitlab:uploads:check') }.to output(/File does not exist on the file system/).to_stdout
+ end
+ it 'errors out about invalid checksum' do
+ upload.update_column(:checksum, '01a3156db2cf4f67ec823680b40b7302f89ab39179124ad219f94919b8a1769e')
+ expect { run_rake_task('gitlab:uploads:check') }.to output(/File checksum \(9e697aa09fe196909813ee36103e34f721fe47a5fdc8aac0e4e4ac47b9b38282\) does not match the one in the database \(#{upload.checksum}\)/).to_stdout
+ end
+ end