summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Cai <jcai@gitlab.com>2019-04-23 10:32:06 -0700
committerJohn Cai <jcai@gitlab.com>2019-05-21 13:34:31 -0700
commit6c35fb59b79e7abc321cee65fc1730ce67908b6b (patch)
treedad50d3e2e3d3df05ac48fc81a93c37ea8c8c482
parentbb5bbbaa1a64d18108470132bb2ed8ed5da52ce7 (diff)
downloadgitlab-ce-jc-git-deduplication-service.tar.gz
Add GitDeduplicationService for deduplication housekeepingjc-git-deduplication-service
GitDeduplicationService performs idempotent operations on deduplicated projects.
-rw-r--r--app/services/projects/git_deduplication_service.rb64
-rw-r--r--app/workers/git_garbage_collect_worker.rb4
-rw-r--r--spec/services/projects/git_deduplication_service_spec.rb90
3 files changed, 157 insertions, 1 deletions
diff --git a/app/services/projects/git_deduplication_service.rb b/app/services/projects/git_deduplication_service.rb
new file mode 100644
index 00000000000..74d469ecf37
--- /dev/null
+++ b/app/services/projects/git_deduplication_service.rb
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+module Projects
+ class GitDeduplicationService < BaseService
+ include ExclusiveLeaseGuard
+
+ LEASE_TIMEOUT = 86400
+
+ delegate :pool_repository, to: :project
+ attr_reader :project
+
+ def initialize(project)
+ @project = project
+ end
+
+ def execute
+ try_obtain_lease do
+ unless project.has_pool_repository?
+ disconnect_git_alternates
+ break
+ end
+
+ if source_project? && pool_can_fetch_from_source?
+ fetch_from_source
+ end
+
+ project.link_pool_repository if same_storage_as_pool?(project.repository)
+ end
+ end
+
+ private
+
+ def disconnect_git_alternates
+ project.repository.disconnect_alternates
+ end
+
+ def pool_can_fetch_from_source?
+ project.git_objects_poolable? &&
+ same_storage_as_pool?(pool_repository.source_project.repository)
+ end
+
+ def same_storage_as_pool?(repository)
+ pool_repository.object_pool.repository.storage == repository.storage
+ end
+
+ def fetch_from_source
+ project.pool_repository.object_pool.fetch
+ end
+
+ def source_project?
+ return unless project.has_pool_repository?
+
+ project.pool_repository.source_project == project
+ end
+
+ def lease_timeout
+ LEASE_TIMEOUT
+ end
+
+ def lease_key
+ "git_deduplication:#{project.id}"
+ end
+ end
+end
diff --git a/app/workers/git_garbage_collect_worker.rb b/app/workers/git_garbage_collect_worker.rb
index d4a6f53dae5..489d6215774 100644
--- a/app/workers/git_garbage_collect_worker.rb
+++ b/app/workers/git_garbage_collect_worker.rb
@@ -23,7 +23,9 @@ class GitGarbageCollectWorker
end
task = task.to_sym
- project.link_pool_repository
+
+ ::Projects::GitDeduplicationService.new(project).execute
+
gitaly_call(task, project.repository.raw_repository)
# Refresh the branch cache in case garbage collection caused a ref lookup to fail
diff --git a/spec/services/projects/git_deduplication_service_spec.rb b/spec/services/projects/git_deduplication_service_spec.rb
new file mode 100644
index 00000000000..3acbc46b473
--- /dev/null
+++ b/spec/services/projects/git_deduplication_service_spec.rb
@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Projects::GitDeduplicationService do
+ include ExclusiveLeaseHelpers
+
+ let(:pool) { create(:pool_repository, :ready) }
+ let(:project) { create(:project, :repository) }
+ let(:lease_key) { "git_deduplication:#{project.id}" }
+ let(:lease_timeout) { Projects::GitDeduplicationService::LEASE_TIMEOUT }
+
+ subject(:service) { described_class.new(project) }
+
+ describe '#execute' do
+ context 'when there is not already a lease' do
+ context 'when the project does not have a pool repository' do
+ it 'calls disconnect_git_alternates' do
+ stub_exclusive_lease(lease_key, timeout: lease_timeout)
+
+ expect(project.repository).to receive(:disconnect_alternates)
+
+ service.execute
+ end
+ end
+
+ context 'when the project has a pool repository' do
+ let(:project) { create(:project, :repository, pool_repository: pool) }
+
+ context 'when the project is a source project' do
+ let(:lease_key) { "git_deduplication:#{pool.source_project.id}" }
+
+ subject(:service) { described_class.new(pool.source_project) }
+
+ it 'calls fetch' do
+ stub_exclusive_lease(lease_key, timeout: lease_timeout)
+ allow(pool.source_project).to receive(:git_objects_poolable?).and_return(true)
+
+ expect(pool.object_pool).to receive(:fetch)
+
+ service.execute
+ end
+
+ it 'does not call fetch if git objects are not poolable' do
+ stub_exclusive_lease(lease_key, timeout: lease_timeout)
+ allow(pool.source_project).to receive(:git_objects_poolable?).and_return(false)
+
+ expect(pool.object_pool).not_to receive(:fetch)
+
+ service.execute
+ end
+
+ it 'does not call fetch if pool and project are not on the same storage' do
+ stub_exclusive_lease(lease_key, timeout: lease_timeout)
+ allow(pool.source_project.repository).to receive(:storage).and_return('special_storage_001')
+
+ expect(pool.object_pool).not_to receive(:fetch)
+
+ service.execute
+ end
+ end
+
+ it 'links the repository to the object pool' do
+ expect(project).to receive(:link_pool_repository)
+
+ service.execute
+ end
+
+ it 'does not link the repository to the object pool if they are not on the same storage' do
+ allow(project.repository).to receive(:storage).and_return('special_storage_001')
+ expect(project).not_to receive(:link_pool_repository)
+
+ service.execute
+ end
+ end
+
+ context 'when a lease is already out' do
+ before do
+ stub_exclusive_lease_taken(lease_key, timeout: lease_timeout)
+ end
+
+ it 'fails when a lease is already out' do
+ expect(service).to receive(:log_error).with('Cannot obtain an exclusive lease. There must be another instance already in execution.')
+
+ service.execute
+ end
+ end
+ end
+ end
+end