summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Kozono <mkozono@gmail.com>2018-07-26 14:23:33 -0700
committerMichael Kozono <mkozono@gmail.com>2018-07-31 11:09:42 -0700
commit3cbd8b13436be25074db17dfbd555588e917279d (patch)
tree483da69a036ae655db97ac56b056e0b811f25b15
parent400925c480d95df735862d7edd58cc36cb8fbf68 (diff)
downloadgitlab-ce-3cbd8b13436be25074db17dfbd555588e917279d.tar.gz
Add local project uploads cleanup task
-rw-r--r--changelogs/unreleased/mk-add-local-project-uploads-cleanup-task.yml5
-rw-r--r--doc/raketasks/cleanup.md31
-rw-r--r--lib/gitlab/cleanup/project_upload_file_finder.rb66
-rw-r--r--lib/gitlab/cleanup/project_uploads.rb125
-rw-r--r--lib/tasks/gitlab/cleanup.rake39
-rw-r--r--spec/tasks/gitlab/cleanup_rake_spec.rb317
6 files changed, 578 insertions, 5 deletions
diff --git a/changelogs/unreleased/mk-add-local-project-uploads-cleanup-task.yml b/changelogs/unreleased/mk-add-local-project-uploads-cleanup-task.yml
new file mode 100644
index 00000000000..9d38b353a41
--- /dev/null
+++ b/changelogs/unreleased/mk-add-local-project-uploads-cleanup-task.yml
@@ -0,0 +1,5 @@
+---
+title: Add local project uploads cleanup task
+merge_request: 20863
+author:
+type: added
diff --git a/doc/raketasks/cleanup.md b/doc/raketasks/cleanup.md
index cf891cd90ad..e2eb342361a 100644
--- a/doc/raketasks/cleanup.md
+++ b/doc/raketasks/cleanup.md
@@ -22,3 +22,34 @@ sudo gitlab-rake gitlab:cleanup:repos
# installation from source
bundle exec rake gitlab:cleanup:repos RAILS_ENV=production
```
+
+Clean up local project upload files if they don't exist in GitLab database. The
+task attempts to fix the file if it can find its project, otherwise it moves the
+file to a lost and found directory.
+
+```
+# omnibus-gitlab
+sudo gitlab-rake gitlab:cleanup:project_uploads
+
+# installation from source
+bundle exec rake gitlab:cleanup:project_uploads RAILS_ENV=production
+```
+
+Example output:
+
+```
+$ sudo gitlab-rake gitlab:cleanup:project_uploads
+I, [2018-07-27T12:08:27.671559 #89817] INFO -- : Looking for orphaned project uploads to clean up. Dry run...
+D, [2018-07-27T12:08:28.293568 #89817] DEBUG -- : Processing batch of 500 project upload file paths, starting with /opt/gitlab/embedded/service/gitlab-rails/public/uploads/test.out
+I, [2018-07-27T12:08:28.689869 #89817] INFO -- : Can move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/test.out -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/test.out
+I, [2018-07-27T12:08:28.755624 #89817] INFO -- : Can fix /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/qux/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt
+I, [2018-07-27T12:08:28.760257 #89817] INFO -- : Can move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png
+I, [2018-07-27T12:08:28.764470 #89817] INFO -- : To cleanup these files run this command with DRY_RUN=false
+
+$ sudo gitlab-rake gitlab:cleanup:project_uploads DRY_RUN=false
+I, [2018-07-27T12:08:32.944414 #89936] INFO -- : Looking for orphaned project uploads to clean up...
+D, [2018-07-27T12:08:33.293568 #89817] DEBUG -- : Processing batch of 500 project upload file paths, starting with /opt/gitlab/embedded/service/gitlab-rails/public/uploads/test.out
+I, [2018-07-27T12:08:33.689869 #89817] INFO -- : Did move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/test.out -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/test.out
+I, [2018-07-27T12:08:33.755624 #89817] INFO -- : Did fix /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/qux/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt
+I, [2018-07-27T12:08:33.760257 #89817] INFO -- : Did move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png
+``` \ No newline at end of file
diff --git a/lib/gitlab/cleanup/project_upload_file_finder.rb b/lib/gitlab/cleanup/project_upload_file_finder.rb
new file mode 100644
index 00000000000..2ee8b60e76a
--- /dev/null
+++ b/lib/gitlab/cleanup/project_upload_file_finder.rb
@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ class ProjectUploadFileFinder
+ FIND_BATCH_SIZE = 500
+ ABSOLUTE_UPLOAD_DIR = FileUploader.root.freeze
+ EXCLUDED_SYSTEM_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/-/*".freeze
+ EXCLUDED_HASHED_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/@hashed/*".freeze
+ EXCLUDED_TMP_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/tmp/*".freeze
+
+ # Paths are relative to the upload directory
+ def each_file_batch(batch_size: FIND_BATCH_SIZE, &block)
+ cmd = build_find_command(ABSOLUTE_UPLOAD_DIR)
+
+ Open3.popen2(*cmd) do |stdin, stdout, status_thread|
+ yield_paths_in_batches(stdout, batch_size, &block)
+
+ raise "Find command failed" unless status_thread.value.success?
+ end
+ end
+
+ private
+
+ def yield_paths_in_batches(stdout, batch_size, &block)
+ paths = []
+
+ stdout.each_line("\0") do |line|
+ paths << line.chomp("\0")
+
+ if paths.size >= batch_size
+ yield(paths)
+ paths = []
+ end
+ end
+
+ yield(paths) if paths.any?
+ end
+
+ def build_find_command(search_dir)
+ cmd = %W[find -L #{search_dir}
+ -type f
+ ! ( -path #{EXCLUDED_SYSTEM_UPLOADS_PATH} -prune )
+ ! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune )
+ ! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune )
+ -print0]
+
+ ionice = which_ionice
+ cmd = %W[#{ionice} -c Idle] + cmd if ionice
+
+ log_msg = "find command: \"#{cmd.join(' ')}\""
+ Rails.logger.info log_msg
+
+ cmd
+ end
+
+ def which_ionice
+ Gitlab::Utils.which('ionice')
+ rescue StandardError
+ # In this case, returning false is relatively safe,
+ # even though it isn't very nice
+ false
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/cleanup/project_uploads.rb b/lib/gitlab/cleanup/project_uploads.rb
new file mode 100644
index 00000000000..b88e00311d5
--- /dev/null
+++ b/lib/gitlab/cleanup/project_uploads.rb
@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ class ProjectUploads
+ LOST_AND_FOUND = File.join(ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR, '-', 'project-lost-found')
+
+ attr_reader :logger
+
+ def initialize(logger: nil)
+ @logger = logger || Rails.logger
+ end
+
+ def run!(dry_run: true)
+ logger.info "Looking for orphaned project uploads to clean up#{'. Dry run' if dry_run}..."
+
+ each_orphan_file do |path, upload_path|
+ result = cleanup(path, upload_path, dry_run)
+
+ logger.info result
+ end
+ end
+
+ private
+
+ def cleanup(path, upload_path, dry_run)
+ # This happened in staging:
+ # `find` returned a path on which `File.delete` raised `Errno::ENOENT`
+ return "Cannot find file: #{path}" unless File.exist?(path)
+
+ correct_path = upload_path && find_correct_path(upload_path)
+
+ if correct_path
+ move(path, correct_path, 'fix', dry_run)
+ else
+ move_to_lost_and_found(path, dry_run)
+ end
+ end
+
+ # Accepts a path in the form of "#{hex_secret}/#{filename}"
+ def find_correct_path(upload_path)
+ upload = Upload.find_by(uploader: 'FileUploader', path: upload_path)
+ return unless upload && upload.local?
+
+ upload.absolute_path
+ rescue => e
+ logger.error e.message
+
+ # absolute_path depends on a lot of code. If it doesn't work, then it
+ # it doesn't matter if the upload file is in the right place. Treat it
+ # as uncorrectable.
+ # I.e. the project record might be missing, which raises an exception.
+ nil
+ end
+
+ def move_to_lost_and_found(path, dry_run)
+ new_path = path.sub(/\A#{ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR}/, LOST_AND_FOUND)
+
+ move(path, new_path, 'move to lost and found', dry_run)
+ end
+
+ def move(path, new_path, prefix, dry_run)
+ action = "#{prefix} #{path} -> #{new_path}"
+
+ if dry_run
+ "Can #{action}"
+ else
+ begin
+ FileUtils.mkdir_p(File.dirname(new_path))
+ FileUtils.mv(path, new_path)
+
+ "Did #{action}"
+ rescue => e
+ "Error during #{action}: #{e.inspect}"
+ end
+ end
+ end
+
+ # Yields absolute paths of project upload files that are not in the
+ # uploads table
+ def each_orphan_file
+ ProjectUploadFileFinder.new.each_file_batch do |file_paths|
+ logger.debug "Processing batch of #{file_paths.size} project upload file paths, starting with #{file_paths.first}"
+
+ file_paths.each do |path|
+ pup = ProjectUploadPath.from_path(path)
+
+ yield(path, pup.upload_path) if pup.orphan?
+ end
+ end
+ end
+
+ class ProjectUploadPath
+ PROJECT_FULL_PATH_REGEX = %r{\A#{FileUploader.root}/(.+)/(\h+/[^/]+)\z}.freeze
+
+ attr_reader :full_path, :upload_path
+
+ def initialize(full_path, upload_path)
+ @full_path = full_path
+ @upload_path = upload_path
+ end
+
+ def self.from_path(path)
+ path_matched = path.match(PROJECT_FULL_PATH_REGEX)
+ return new(nil, nil) unless path_matched
+
+ new(path_matched[1], path_matched[2])
+ end
+
+ def orphan?
+ return true if full_path.nil? || upload_path.nil?
+
+ # It's possible to reduce to one query, but `where_full_path_in` is complex
+ !Upload.exists?(path: upload_path, model_id: project_id, model_type: 'Project', uploader: 'FileUploader')
+ end
+
+ private
+
+ def project_id
+ @project_id ||= Project.where_full_path_in([full_path]).pluck(:id)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/tasks/gitlab/cleanup.rake b/lib/tasks/gitlab/cleanup.rake
index 5e07b12ee1c..a2feb074b1d 100644
--- a/lib/tasks/gitlab/cleanup.rake
+++ b/lib/tasks/gitlab/cleanup.rake
@@ -7,9 +7,8 @@ namespace :gitlab do
desc "GitLab | Cleanup | Clean namespaces"
task dirs: :gitlab_environment do
warn_user_is_not_gitlab
- remove_flag = ENV['REMOVE']
- namespaces = Namespace.pluck(:path)
+ namespaces = Namespace.pluck(:path)
namespaces << HASHED_REPOSITORY_NAME # add so that it will be ignored
Gitlab.config.repositories.storages.each do |name, repository_storage|
git_base_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
@@ -31,7 +30,7 @@ namespace :gitlab do
end
all_dirs.each do |dir_path|
- if remove_flag
+ if remove?
if FileUtils.rm_rf dir_path
puts "Removed...#{dir_path}".color(:red)
else
@@ -43,7 +42,7 @@ namespace :gitlab do
end
end
- unless remove_flag
+ unless remove?
puts "To cleanup this directories run this command with REMOVE=true".color(:yellow)
end
end
@@ -104,5 +103,37 @@ namespace :gitlab do
puts "To block these users run this command with BLOCK=true".color(:yellow)
end
end
+
+ desc "GitLab | Cleanup | Clean orphaned project uploads"
+ task project_uploads: :gitlab_environment do
+ warn_user_is_not_gitlab
+
+ cleaner = Gitlab::Cleanup::ProjectUploads.new(logger: logger)
+ cleaner.run!(dry_run: dry_run?)
+
+ if dry_run?
+ logger.info "To clean up these files run this command with DRY_RUN=false".color(:yellow)
+ end
+ end
+
+ def remove?
+ ENV['REMOVE'] == 'true'
+ end
+
+ def dry_run?
+ ENV['DRY_RUN'] != 'false'
+ end
+
+ def logger
+ return @logger if defined?(@logger)
+
+ @logger = if Rails.env.development? || Rails.env.production?
+ Logger.new(STDOUT).tap do |stdout_logger|
+ stdout_logger.extend(ActiveSupport::Logger.broadcast(Rails.logger))
+ end
+ else
+ Rails.logger
+ end
+ end
end
end
diff --git a/spec/tasks/gitlab/cleanup_rake_spec.rb b/spec/tasks/gitlab/cleanup_rake_spec.rb
index 2bf873c923f..ba08ece1b4b 100644
--- a/spec/tasks/gitlab/cleanup_rake_spec.rb
+++ b/spec/tasks/gitlab/cleanup_rake_spec.rb
@@ -5,7 +5,7 @@ describe 'gitlab:cleanup rake tasks' do
Rake.application.rake_require 'tasks/gitlab/cleanup'
end
- describe 'cleanup' do
+ describe 'cleanup namespaces and repos' do
let(:storages) do
{
'default' => Gitlab::GitalyClient::StorageSettings.new(@default_storage_hash.merge('path' => 'tmp/tests/default_storage'))
@@ -67,4 +67,319 @@ describe 'gitlab:cleanup rake tasks' do
end
end
end
+
+ describe 'cleanup:project_uploads' do
+ context 'orphaned project upload file' do
+ context 'when an upload record matching the secret and filename is found' do
+ context 'when the project is still in legacy storage' do
+ let!(:orphaned) { create(:upload, :issuable_upload, :with_file, model: build(:project, :legacy_storage)) }
+ let!(:correct_path) { orphaned.absolute_path }
+ let!(:other_project) { create(:project, :legacy_storage) }
+ let!(:orphaned_path) { correct_path.sub(/#{orphaned.model.full_path}/, other_project.full_path) }
+
+ before do
+ FileUtils.mkdir_p(File.dirname(orphaned_path))
+ FileUtils.mv(correct_path, orphaned_path)
+ end
+
+ it 'moves the file to its proper location' do
+ expect(Rails.logger).to receive(:info).twice
+ expect(Rails.logger).to receive(:info).with("Did fix #{orphaned_path} -> #{correct_path}")
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(correct_path)).to be_falsey
+
+ stub_env('DRY_RUN', 'false')
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_falsey
+ expect(File.exist?(correct_path)).to be_truthy
+ end
+
+ it 'a dry run does not move the file' do
+ expect(Rails.logger).to receive(:info).twice
+ expect(Rails.logger).to receive(:info).with("Can fix #{orphaned_path} -> #{correct_path}")
+ expect(Rails.logger).to receive(:info)
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(correct_path)).to be_falsey
+
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(correct_path)).to be_falsey
+ end
+
+ context 'when the project record is missing (Upload#absolute_path raises error)' do
+ let!(:lost_and_found_path) { File.join(FileUploader.root, '-', 'project-lost-found', other_project.full_path, orphaned.path) }
+
+ before do
+ orphaned.model.delete
+ end
+
+ it 'moves the file to lost and found' do
+ expect(Rails.logger).to receive(:info).twice
+ expect(Rails.logger).to receive(:info).with("Did move to lost and found #{orphaned_path} -> #{lost_and_found_path}")
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(lost_and_found_path)).to be_falsey
+
+ stub_env('DRY_RUN', 'false')
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_falsey
+ expect(File.exist?(lost_and_found_path)).to be_truthy
+ end
+
+ it 'a dry run does not move the file' do
+ expect(Rails.logger).to receive(:info).twice
+ expect(Rails.logger).to receive(:info).with("Can move to lost and found #{orphaned_path} -> #{lost_and_found_path}")
+ expect(Rails.logger).to receive(:info)
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(lost_and_found_path)).to be_falsey
+
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(lost_and_found_path)).to be_falsey
+ end
+ end
+ end
+
+ context 'when the project was moved to hashed storage' do
+ let!(:orphaned) { create(:upload, :issuable_upload, :with_file) }
+ let!(:correct_path) { orphaned.absolute_path }
+ let!(:orphaned_path) { File.join(FileUploader.root, 'foo', 'bar', orphaned.path) }
+
+ before do
+ FileUtils.mkdir_p(File.dirname(orphaned_path))
+ FileUtils.mv(correct_path, orphaned_path)
+ end
+
+ it 'moves the file to its proper location' do
+ expect(Rails.logger).to receive(:info).twice
+ expect(Rails.logger).to receive(:info).with("Did fix #{orphaned_path} -> #{correct_path}")
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(correct_path)).to be_falsey
+
+ stub_env('DRY_RUN', 'false')
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_falsey
+ expect(File.exist?(correct_path)).to be_truthy
+ end
+
+ it 'a dry run does not move the file' do
+ expect(Rails.logger).to receive(:info).twice
+ expect(Rails.logger).to receive(:info).with("Can fix #{orphaned_path} -> #{correct_path}")
+ expect(Rails.logger).to receive(:info)
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(correct_path)).to be_falsey
+
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(correct_path)).to be_falsey
+ end
+ end
+ end
+
+ context 'when a matching upload record can not be found' do
+ context 'when the file path fits the known pattern' do
+ let!(:orphaned) { create(:upload, :issuable_upload, :with_file, model: build(:project, :legacy_storage)) }
+ let!(:orphaned_path) { orphaned.absolute_path }
+ let!(:lost_and_found_path) { File.join(FileUploader.root, '-', 'project-lost-found', orphaned.model.full_path, orphaned.path) }
+
+ before do
+ orphaned.delete
+ end
+
+ it 'moves the file to lost and found' do
+ expect(Rails.logger).to receive(:info).twice
+ expect(Rails.logger).to receive(:info).with("Did move to lost and found #{orphaned_path} -> #{lost_and_found_path}")
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(lost_and_found_path)).to be_falsey
+
+ stub_env('DRY_RUN', 'false')
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_falsey
+ expect(File.exist?(lost_and_found_path)).to be_truthy
+ end
+
+ it 'a dry run does not move the file' do
+ expect(Rails.logger).to receive(:info).twice
+ expect(Rails.logger).to receive(:info).with("Can move to lost and found #{orphaned_path} -> #{lost_and_found_path}")
+ expect(Rails.logger).to receive(:info)
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(lost_and_found_path)).to be_falsey
+
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(lost_and_found_path)).to be_falsey
+ end
+ end
+
+ context 'when the file path does not fit the known pattern' do
+ let!(:invalid_path) { File.join('group', 'file.jpg') }
+ let!(:orphaned_path) { File.join(FileUploader.root, invalid_path) }
+ let!(:lost_and_found_path) { File.join(FileUploader.root, '-', 'project-lost-found', invalid_path) }
+
+ before do
+ FileUtils.mkdir_p(File.dirname(orphaned_path))
+ FileUtils.touch(orphaned_path)
+ end
+
+ after do
+ File.delete(orphaned_path) if File.exist?(orphaned_path)
+ end
+
+ it 'moves the file to lost and found' do
+ expect(Rails.logger).to receive(:info).twice
+ expect(Rails.logger).to receive(:info).with("Did move to lost and found #{orphaned_path} -> #{lost_and_found_path}")
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(lost_and_found_path)).to be_falsey
+
+ stub_env('DRY_RUN', 'false')
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_falsey
+ expect(File.exist?(lost_and_found_path)).to be_truthy
+ end
+
+ it 'a dry run does not move the file' do
+ expect(Rails.logger).to receive(:info).twice
+ expect(Rails.logger).to receive(:info).with("Can move to lost and found #{orphaned_path} -> #{lost_and_found_path}")
+ expect(Rails.logger).to receive(:info)
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(lost_and_found_path)).to be_falsey
+
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ expect(File.exist?(lost_and_found_path)).to be_falsey
+ end
+ end
+ end
+ end
+
+ context 'non-orphaned project upload file' do
+ it 'does not move the file' do
+ tracked = create(:upload, :issuable_upload, :with_file, model: build(:project, :legacy_storage))
+ tracked_path = tracked.absolute_path
+
+ expect(Rails.logger).not_to receive(:info).with(/move|fix/i)
+ expect(File.exist?(tracked_path)).to be_truthy
+
+ stub_env('DRY_RUN', 'false')
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(tracked_path)).to be_truthy
+ end
+ end
+
+ context 'ignorable cases' do
+ shared_examples_for 'does not move anything' do
+ it 'does not move even an orphan file' do
+ orphaned = create(:upload, :issuable_upload, :with_file, model: project)
+ orphaned_path = orphaned.absolute_path
+ orphaned.delete
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(orphaned_path)).to be_truthy
+ end
+ end
+
+ # Because we aren't concerned about these, and can save a lot of
+ # processing time by ignoring them. If we wish to cleanup hashed storage
+ # directories, it should simply require removing this test and modifying
+ # the find command.
+ context 'when the file is already in hashed storage' do
+ let(:project) { create(:project) }
+
+ before do
+ stub_env('DRY_RUN', 'false')
+ expect(Rails.logger).not_to receive(:info).with(/move|fix/i)
+ end
+
+ it_behaves_like 'does not move anything'
+ end
+
+ context 'when DRY_RUN env var is unset' do
+ let(:project) { create(:project, :legacy_storage) }
+
+ it_behaves_like 'does not move anything'
+ end
+
+ context 'when DRY_RUN env var is true' do
+ let(:project) { create(:project, :legacy_storage) }
+
+ before do
+ stub_env('DRY_RUN', 'true')
+ end
+
+ it_behaves_like 'does not move anything'
+ end
+
+ context 'when DRY_RUN env var is foo' do
+ let(:project) { create(:project, :legacy_storage) }
+
+ before do
+ stub_env('DRY_RUN', 'foo')
+ end
+
+ it_behaves_like 'does not move anything'
+ end
+
+ it 'does not move any non-project (FileUploader) uploads' do
+ stub_env('DRY_RUN', 'false')
+
+ paths = []
+ orphaned1 = create(:upload, :personal_snippet_upload, :with_file)
+ orphaned2 = create(:upload, :namespace_upload, :with_file)
+ orphaned3 = create(:upload, :attachment_upload, :with_file)
+ paths << orphaned1.absolute_path
+ paths << orphaned2.absolute_path
+ paths << orphaned3.absolute_path
+ Upload.delete_all
+
+ expect(Rails.logger).not_to receive(:info).with(/move|fix/i)
+ paths.each do |path|
+ expect(File.exist?(path)).to be_truthy
+ end
+
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ paths.each do |path|
+ expect(File.exist?(path)).to be_truthy
+ end
+ end
+
+ it 'does not move any uploads in tmp (which would interfere with ongoing upload activity)' do
+ stub_env('DRY_RUN', 'false')
+
+ path = File.join(FileUploader.root, 'tmp', 'foo.jpg')
+ FileUtils.mkdir_p(File.dirname(path))
+ FileUtils.touch(path)
+
+ expect(Rails.logger).not_to receive(:info).with(/move|fix/i)
+ expect(File.exist?(path)).to be_truthy
+
+ run_rake_task('gitlab:cleanup:project_uploads')
+
+ expect(File.exist?(path)).to be_truthy
+ end
+ end
+ end
end