From a6de806498c405355380be4b80f63d134658b779 Mon Sep 17 00:00:00 2001 From: Douglas Barbosa Alexandre Date: Mon, 18 Jul 2016 16:38:36 -0300 Subject: Add service to clean up repository archive cache Replace invocation of `find` with Ruby code that matches old cached files in a better, and safe way to avoid data-integrity issues. --- app/models/repository.rb | 10 ------ .../repository_archive_clean_up_service.rb | 42 ++++++++++++++++++++++ app/workers/repository_archive_cache_worker.rb | 2 +- spec/models/repository_spec.rb | 24 ------------- .../repository_archive_clean_up_service_spec.rb | 31 ++++++++++++++++ 5 files changed, 74 insertions(+), 35 deletions(-) create mode 100644 app/services/repository_archive_clean_up_service.rb create mode 100644 spec/services/repository_archive_clean_up_service_spec.rb diff --git a/app/models/repository.rb b/app/models/repository.rb index 511df2d67c6..a6580e85498 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -11,16 +11,6 @@ class Repository attr_accessor :path_with_namespace, :project - def self.clean_old_archives - Gitlab::Metrics.measure(:clean_old_archives) do - repository_downloads_path = Gitlab.config.gitlab.repository_downloads_path - - return unless File.directory?(repository_downloads_path) - - Gitlab::Popen.popen(%W(find #{repository_downloads_path} -not -path #{repository_downloads_path} -mmin +120 -delete)) - end - end - def initialize(path_with_namespace, project) @path_with_namespace = path_with_namespace @project = project diff --git a/app/services/repository_archive_clean_up_service.rb b/app/services/repository_archive_clean_up_service.rb new file mode 100644 index 00000000000..9764df6492d --- /dev/null +++ b/app/services/repository_archive_clean_up_service.rb @@ -0,0 +1,42 @@ +class RepositoryArchiveCleanUpService + ALLOWED_ARCHIVE_EXTENSIONS = %w[tar tar.bz2 tar.gz zip].join(',').freeze + LAST_MODIFIED_TIME_IN_MINUTES = 120 + + def initialize(mmin = LAST_MODIFIED_TIME_IN_MINUTES) + @mmin = mmin + @path = Gitlab.config.gitlab.repository_downloads_path + end + + def execute + Gitlab::Metrics.measure(:repository_archive_clean_up) do + return unless File.directory?(path) + + clean_up_old_archives + clean_up_empty_directories + end + end + + private + + attr_reader :mmin, :path + + def clean_up_old_archives + Dir.glob("#{path}/**.git/*{#{ALLOWED_ARCHIVE_EXTENSIONS}}") do |filename| + File.delete(filename) if older?(filename) + end + end + + def older?(filename) + File.exist?(filename) && File.new(filename).mtime < (Time.now - mmin * 60) + end + + def clean_up_empty_directories + Dir.glob("#{path}/**.git/").reverse_each do |dir| + Dir.rmdir(dir) if empty?(dir) + end + end + + def empty?(dir) + (Dir.entries(dir) - %w[. ..]).empty? + end +end diff --git a/app/workers/repository_archive_cache_worker.rb b/app/workers/repository_archive_cache_worker.rb index 47c5a670ed4..a2e49c61f59 100644 --- a/app/workers/repository_archive_cache_worker.rb +++ b/app/workers/repository_archive_cache_worker.rb @@ -4,6 +4,6 @@ class RepositoryArchiveCacheWorker sidekiq_options queue: :default def perform - Repository.clean_old_archives + RepositoryArchiveCleanUpService.new.execute end end diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb index 59c5732c075..38b0c345b48 100644 --- a/spec/models/repository_spec.rb +++ b/spec/models/repository_spec.rb @@ -1172,30 +1172,6 @@ describe Repository, models: true do end end - describe '.clean_old_archives' do - let(:path) { Gitlab.config.gitlab.repository_downloads_path } - - context 'when the downloads directory does not exist' do - it 'does not remove any archives' do - expect(File).to receive(:directory?).with(path).and_return(false) - - expect(Gitlab::Popen).not_to receive(:popen) - - described_class.clean_old_archives - end - end - - context 'when the downloads directory exists' do - it 'removes old archives' do - expect(File).to receive(:directory?).with(path).and_return(true) - - expect(Gitlab::Popen).to receive(:popen) - - described_class.clean_old_archives - end - end - end - describe "#keep_around" do it "stores a reference to the specified commit sha so it isn't garbage collected" do repository.keep_around(sample_commit.id) diff --git a/spec/services/repository_archive_clean_up_service_spec.rb b/spec/services/repository_archive_clean_up_service_spec.rb new file mode 100644 index 00000000000..a9ac21258da --- /dev/null +++ b/spec/services/repository_archive_clean_up_service_spec.rb @@ -0,0 +1,31 @@ +require 'spec_helper' + +describe RepositoryArchiveCleanUpService, services: true do + describe '#execute' do + let(:path) { Gitlab.config.gitlab.repository_downloads_path } + + subject(:service) { described_class.new } + + context 'when the downloads directory does not exist' do + it 'does not remove any archives' do + expect(File).to receive(:directory?).with(path).and_return(false) + + expect(service).not_to receive(:clean_up_old_archives) + expect(service).not_to receive(:clean_up_empty_directories) + + service.execute + end + end + + context 'when the downloads directory exists' do + it 'removes old archives' do + expect(File).to receive(:directory?).with(path).and_return(true) + + expect(service).to receive(:clean_up_old_archives) + expect(service).to receive(:clean_up_empty_directories) + + service.execute + end + end + end +end -- cgit v1.2.1