summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarka Kadlecová <jarka@gitlab.com>2018-07-30 20:14:38 +0200
committerJarka Kadlecová <jarka@gitlab.com>2018-08-07 12:51:58 +0200
commit6aaeb6dc411d6a92e9dc8d7968aa774c9e8ae044 (patch)
treef9877c182926456459693cd09c0aa63da21baa01
parentab08f9986de070b8d6bc15c36115653bc3ef3000 (diff)
downloadgitlab-ce-6aaeb6dc411d6a92e9dc8d7968aa774c9e8ae044.tar.gz
Clean orphaned files in object storage
-rw-r--r--changelogs/unreleased/46535-orphaned-uploads.yml5
-rw-r--r--doc/raketasks/cleanup.md31
-rw-r--r--lib/gitlab/cleanup/remote_uploads.rb80
-rw-r--r--lib/tasks/gitlab/cleanup.rake10
-rw-r--r--spec/lib/gitlab/cleanup/remote_uploads_spec.rb74
5 files changed, 199 insertions, 1 deletions
diff --git a/changelogs/unreleased/46535-orphaned-uploads.yml b/changelogs/unreleased/46535-orphaned-uploads.yml
new file mode 100644
index 00000000000..1cd087a6aad
--- /dev/null
+++ b/changelogs/unreleased/46535-orphaned-uploads.yml
@@ -0,0 +1,5 @@
+---
+title: Clean orphaned files in object storage
+merge_request: 20918
+author:
+type: added
diff --git a/doc/raketasks/cleanup.md b/doc/raketasks/cleanup.md
index e2eb342361a..e70a009323e 100644
--- a/doc/raketasks/cleanup.md
+++ b/doc/raketasks/cleanup.md
@@ -52,4 +52,33 @@ D, [2018-07-27T12:08:33.293568 #89817] DEBUG -- : Processing batch of 500 projec
I, [2018-07-27T12:08:33.689869 #89817] INFO -- : Did move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/test.out -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/test.out
I, [2018-07-27T12:08:33.755624 #89817] INFO -- : Did fix /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/qux/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt
I, [2018-07-27T12:08:33.760257 #89817] INFO -- : Did move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png
-``` \ No newline at end of file
+```
+
+Remove object store upload files if they don't exist in GitLab database.
+
+```
+# omnibus-gitlab
+sudo gitlab-rake gitlab:cleanup:remote_upload_files
+
+# installation from source
+bundle exec rake gitlab:cleanup:remote_upload_files RAILS_ENV=production
+```
+
+Example output:
+
+```
+$ sudo gitlab-rake gitlab:cleanup:remote_upload_files
+
+I, [2018-08-02T10:26:13.995978 #45011] INFO -- : Looking for orphaned remote uploads to remove. Dry run...
+I, [2018-08-02T10:26:14.120400 #45011] INFO -- : Can be moved to lost and found: @hashed/6b/DSC_6152.JPG
+I, [2018-08-02T10:26:14.120482 #45011] INFO -- : Can be moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg
+I, [2018-08-02T10:26:14.120634 #45011] INFO -- : To cleanup these files run this command with DRY_RUN=false
+```
+
+```
+$ sudo gitlab-rake gitlab:cleanup:remote_upload_files DRY_RUN=false
+
+I, [2018-08-02T10:26:47.598424 #45087] INFO -- : Looking for orphaned remote uploads to remove...
+I, [2018-08-02T10:26:47.753131 #45087] INFO -- : Moved to lost and found: @hashed/6b/DSC_6152.JPG -> lost_and_found/@hashed/6b/DSC_6152.JPG
+I, [2018-08-02T10:26:47.764356 #45087] INFO -- : Moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg -> lost_and_found/@hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg
+```
diff --git a/lib/gitlab/cleanup/remote_uploads.rb b/lib/gitlab/cleanup/remote_uploads.rb
new file mode 100644
index 00000000000..45a5aea4fcd
--- /dev/null
+++ b/lib/gitlab/cleanup/remote_uploads.rb
@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+module Gitlab
+ module Cleanup
+ class RemoteUploads
+ attr_reader :logger
+
+ BATCH_SIZE = 100
+
+ def initialize(logger: nil)
+ @logger = logger || Rails.logger
+ end
+
+ def run!(dry_run: false)
+ unless configuration.enabled
+ logger.warn "Object storage not enabled. Exit".color(:yellow)
+
+ return
+ end
+
+ logger.info "Looking for orphaned remote uploads to remove#{'. Dry run' if dry_run}..."
+
+ each_orphan_file do |file|
+ info = if dry_run
+ "Can be moved to lost and found: #{file.key}"
+ else
+ new_path = move_to_lost_and_found(file)
+ "Moved to lost and found: #{file.key} -> #{new_path}"
+ end
+
+ logger.info(info)
+ end
+ end
+
+ private
+
+ def each_orphan_file
+ # we want to skip files already moved to lost_and_found directory
+ lost_dir_match = "^#{lost_and_found_dir}\/"
+
+ remote_directory.files.each_slice(BATCH_SIZE) do |remote_files|
+ remote_files.reject! { |file| file.key.match(/#{lost_dir_match}/) }
+ file_paths = remote_files.map(&:key)
+ tracked_paths = Upload
+ .where(store: ObjectStorage::Store::REMOTE, path: file_paths)
+ .pluck(:path)
+
+ remote_files.reject! { |file| tracked_paths.include?(file.key) }
+ remote_files.each do |file|
+ yield file
+ end
+ end
+ end
+
+ def move_to_lost_and_found(file)
+ new_path = "#{lost_and_found_dir}/#{file.key}"
+
+ file.copy(configuration['remote_directory'], new_path)
+ file.destroy
+
+ new_path
+ end
+
+ def lost_and_found_dir
+ 'lost_and_found'
+ end
+
+ def remote_directory
+ connection.directories.get(configuration['remote_directory'])
+ end
+
+ def connection
+ ::Fog::Storage.new(configuration['connection'].symbolize_keys)
+ end
+
+ def configuration
+ Gitlab.config.uploads.object_store
+ end
+ end
+ end
+end
diff --git a/lib/tasks/gitlab/cleanup.rake b/lib/tasks/gitlab/cleanup.rake
index a2feb074b1d..c8a8863443e 100644
--- a/lib/tasks/gitlab/cleanup.rake
+++ b/lib/tasks/gitlab/cleanup.rake
@@ -116,6 +116,16 @@ namespace :gitlab do
end
end
+ desc 'GitLab | Cleanup | Clean orphan remote upload files that do not exist in the db'
+ task remote_upload_files: :environment do
+ cleaner = Gitlab::Cleanup::RemoteUploads.new(logger: logger)
+ cleaner.run!(dry_run: dry_run?)
+
+ if dry_run?
+ logger.info "To cleanup these files run this command with DRY_RUN=false".color(:yellow)
+ end
+ end
+
def remove?
ENV['REMOVE'] == 'true'
end
diff --git a/spec/lib/gitlab/cleanup/remote_uploads_spec.rb b/spec/lib/gitlab/cleanup/remote_uploads_spec.rb
new file mode 100644
index 00000000000..8d03baeb07b
--- /dev/null
+++ b/spec/lib/gitlab/cleanup/remote_uploads_spec.rb
@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+require 'spec_helper'
+
+describe Gitlab::Cleanup::RemoteUploads do
+ context 'when object_storage is enabled' do
+ let(:connection) { double }
+ let(:directory) { double }
+ let!(:uploads) do
+ [
+ create(:upload, path: 'dir/file1', store: ObjectStorage::Store::REMOTE),
+ create(:upload, path: 'dir/file2', store: ObjectStorage::Store::LOCAL)
+ ]
+ end
+ let(:remote_files) do
+ [
+ double(key: 'dir/file1'),
+ double(key: 'dir/file2'),
+ double(key: 'dir/file3'),
+ double(key: 'lost_and_found/dir/file3')
+ ]
+ end
+
+ before do
+ stub_uploads_object_storage(FileUploader)
+
+ expect(::Fog::Storage).to receive(:new).and_return(connection)
+
+ expect(connection).to receive(:directories).and_return(double(get: directory))
+ expect(directory).to receive(:files).and_return(remote_files)
+ end
+
+ context 'when dry_run is set to false' do
+ subject { described_class.new.run!(dry_run: false) }
+
+ it 'moves files that are not in uploads table' do
+ expect(remote_files[0]).not_to receive(:copy)
+ expect(remote_files[0]).not_to receive(:destroy)
+ expect(remote_files[1]).to receive(:copy)
+ expect(remote_files[1]).to receive(:destroy)
+ expect(remote_files[2]).to receive(:copy)
+ expect(remote_files[2]).to receive(:destroy)
+ expect(remote_files[3]).not_to receive(:copy)
+ expect(remote_files[3]).not_to receive(:destroy)
+
+ subject
+ end
+ end
+
+ context 'when dry_run is set to true' do
+ subject { described_class.new.run!(dry_run: true) }
+
+ it 'does not move filese' do
+ expect(remote_files[0]).not_to receive(:copy)
+ expect(remote_files[0]).not_to receive(:destroy)
+ expect(remote_files[1]).not_to receive(:copy)
+ expect(remote_files[1]).not_to receive(:destroy)
+ expect(remote_files[2]).not_to receive(:copy)
+ expect(remote_files[2]).not_to receive(:destroy)
+ expect(remote_files[3]).not_to receive(:copy)
+ expect(remote_files[3]).not_to receive(:destroy)
+
+ subject
+ end
+ end
+ end
+
+ context 'when object_storage is not enabled' do
+ it 'does not connect to any storage' do
+ expect(::Fog::Storage).not_to receive(:new)
+
+ subject
+ end
+ end
+end