summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouglas Barbosa Alexandre <dbalexandre@gmail.com>2016-07-18 16:38:36 -0300
committerDouglas Barbosa Alexandre <dbalexandre@gmail.com>2016-07-21 10:31:49 -0300
commita6de806498c405355380be4b80f63d134658b779 (patch)
tree16729b67fb209feb247fa7812a0fc6f48147a082
parent6ae177ef53e57d98f95238df990da2225bca3fab (diff)
downloadgitlab-ce-a6de806498c405355380be4b80f63d134658b779.tar.gz
Add service to clean up repository archive cache
Replace invocation of `find` with Ruby code that matches old cached files in a better, and safe way to avoid data-integrity issues.
-rw-r--r--app/models/repository.rb10
-rw-r--r--app/services/repository_archive_clean_up_service.rb42
-rw-r--r--app/workers/repository_archive_cache_worker.rb2
-rw-r--r--spec/models/repository_spec.rb24
-rw-r--r--spec/services/repository_archive_clean_up_service_spec.rb31
5 files changed, 74 insertions, 35 deletions
diff --git a/app/models/repository.rb b/app/models/repository.rb
index 511df2d67c6..a6580e85498 100644
--- a/app/models/repository.rb
+++ b/app/models/repository.rb
@@ -11,16 +11,6 @@ class Repository
attr_accessor :path_with_namespace, :project
- def self.clean_old_archives
- Gitlab::Metrics.measure(:clean_old_archives) do
- repository_downloads_path = Gitlab.config.gitlab.repository_downloads_path
-
- return unless File.directory?(repository_downloads_path)
-
- Gitlab::Popen.popen(%W(find #{repository_downloads_path} -not -path #{repository_downloads_path} -mmin +120 -delete))
- end
- end
-
def initialize(path_with_namespace, project)
@path_with_namespace = path_with_namespace
@project = project
diff --git a/app/services/repository_archive_clean_up_service.rb b/app/services/repository_archive_clean_up_service.rb
new file mode 100644
index 00000000000..9764df6492d
--- /dev/null
+++ b/app/services/repository_archive_clean_up_service.rb
@@ -0,0 +1,42 @@
+class RepositoryArchiveCleanUpService
+ ALLOWED_ARCHIVE_EXTENSIONS = %w[tar tar.bz2 tar.gz zip].join(',').freeze
+ LAST_MODIFIED_TIME_IN_MINUTES = 120
+
+ def initialize(mmin = LAST_MODIFIED_TIME_IN_MINUTES)
+ @mmin = mmin
+ @path = Gitlab.config.gitlab.repository_downloads_path
+ end
+
+ def execute
+ Gitlab::Metrics.measure(:repository_archive_clean_up) do
+ return unless File.directory?(path)
+
+ clean_up_old_archives
+ clean_up_empty_directories
+ end
+ end
+
+ private
+
+ attr_reader :mmin, :path
+
+ def clean_up_old_archives
+ Dir.glob("#{path}/**.git/*{#{ALLOWED_ARCHIVE_EXTENSIONS}}") do |filename|
+ File.delete(filename) if older?(filename)
+ end
+ end
+
+ def older?(filename)
+ File.exist?(filename) && File.new(filename).mtime < (Time.now - mmin * 60)
+ end
+
+ def clean_up_empty_directories
+ Dir.glob("#{path}/**.git/").reverse_each do |dir|
+ Dir.rmdir(dir) if empty?(dir)
+ end
+ end
+
+ def empty?(dir)
+ (Dir.entries(dir) - %w[. ..]).empty?
+ end
+end
diff --git a/app/workers/repository_archive_cache_worker.rb b/app/workers/repository_archive_cache_worker.rb
index 47c5a670ed4..a2e49c61f59 100644
--- a/app/workers/repository_archive_cache_worker.rb
+++ b/app/workers/repository_archive_cache_worker.rb
@@ -4,6 +4,6 @@ class RepositoryArchiveCacheWorker
sidekiq_options queue: :default
def perform
- Repository.clean_old_archives
+ RepositoryArchiveCleanUpService.new.execute
end
end
diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb
index 59c5732c075..38b0c345b48 100644
--- a/spec/models/repository_spec.rb
+++ b/spec/models/repository_spec.rb
@@ -1172,30 +1172,6 @@ describe Repository, models: true do
end
end
- describe '.clean_old_archives' do
- let(:path) { Gitlab.config.gitlab.repository_downloads_path }
-
- context 'when the downloads directory does not exist' do
- it 'does not remove any archives' do
- expect(File).to receive(:directory?).with(path).and_return(false)
-
- expect(Gitlab::Popen).not_to receive(:popen)
-
- described_class.clean_old_archives
- end
- end
-
- context 'when the downloads directory exists' do
- it 'removes old archives' do
- expect(File).to receive(:directory?).with(path).and_return(true)
-
- expect(Gitlab::Popen).to receive(:popen)
-
- described_class.clean_old_archives
- end
- end
- end
-
describe "#keep_around" do
it "stores a reference to the specified commit sha so it isn't garbage collected" do
repository.keep_around(sample_commit.id)
diff --git a/spec/services/repository_archive_clean_up_service_spec.rb b/spec/services/repository_archive_clean_up_service_spec.rb
new file mode 100644
index 00000000000..a9ac21258da
--- /dev/null
+++ b/spec/services/repository_archive_clean_up_service_spec.rb
@@ -0,0 +1,31 @@
+require 'spec_helper'
+
+describe RepositoryArchiveCleanUpService, services: true do
+ describe '#execute' do
+ let(:path) { Gitlab.config.gitlab.repository_downloads_path }
+
+ subject(:service) { described_class.new }
+
+ context 'when the downloads directory does not exist' do
+ it 'does not remove any archives' do
+ expect(File).to receive(:directory?).with(path).and_return(false)
+
+ expect(service).not_to receive(:clean_up_old_archives)
+ expect(service).not_to receive(:clean_up_empty_directories)
+
+ service.execute
+ end
+ end
+
+ context 'when the downloads directory exists' do
+ it 'removes old archives' do
+ expect(File).to receive(:directory?).with(path).and_return(true)
+
+ expect(service).to receive(:clean_up_old_archives)
+ expect(service).to receive(:clean_up_empty_directories)
+
+ service.execute
+ end
+ end
+ end
+end