summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--changelogs/unreleased/detect-orphaned-repositories.yml5
-rw-r--r--lib/system_check/orphans/namespace_check.rb54
-rw-r--r--lib/system_check/orphans/repository_check.rb68
-rw-r--r--lib/tasks/gitlab/check.rake29
-rw-r--r--spec/lib/system_check/orphans/namespace_check_spec.rb61
-rw-r--r--spec/lib/system_check/orphans/repository_check_spec.rb68
6 files changed, 285 insertions, 0 deletions
diff --git a/changelogs/unreleased/detect-orphaned-repositories.yml b/changelogs/unreleased/detect-orphaned-repositories.yml
new file mode 100644
index 00000000000..101c1897826
--- /dev/null
+++ b/changelogs/unreleased/detect-orphaned-repositories.yml
@@ -0,0 +1,5 @@
+---
+title: Scripts to detect orphaned repositories
+merge_request: 14204
+author:
+type: added
diff --git a/lib/system_check/orphans/namespace_check.rb b/lib/system_check/orphans/namespace_check.rb
new file mode 100644
index 00000000000..b8446300f72
--- /dev/null
+++ b/lib/system_check/orphans/namespace_check.rb
@@ -0,0 +1,54 @@
+module SystemCheck
+ module Orphans
+ class NamespaceCheck < SystemCheck::BaseCheck
+ set_name 'Orphaned namespaces:'
+
+ def multi_check
+ Gitlab.config.repositories.storages.each do |storage_name, repository_storage|
+ $stdout.puts
+ $stdout.puts "* Storage: #{storage_name} (#{repository_storage['path']})".color(:yellow)
+ toplevel_namespace_dirs = disk_namespaces(repository_storage['path'])
+
+ orphans = (toplevel_namespace_dirs - existing_namespaces)
+ print_orphans(orphans, storage_name)
+ end
+
+ clear_namespaces! # releases memory when check finishes
+ end
+
+ private
+
+ def print_orphans(orphans, storage_name)
+ if orphans.empty?
+ $stdout.puts "* No orphaned namespaces for #{storage_name} storage".color(:green)
+ return
+ end
+
+ orphans.each do |orphan|
+ $stdout.puts " - #{orphan}".color(:red)
+ end
+ end
+
+ def disk_namespaces(storage_path)
+ fetch_disk_namespaces(storage_path).each_with_object([]) do |namespace_path, result|
+ namespace = File.basename(namespace_path)
+ next if namespace.eql?('@hashed')
+
+ result << namespace
+ end
+ end
+
+ def fetch_disk_namespaces(storage_path)
+ Dir.glob(File.join(storage_path, '*'))
+ end
+
+ def existing_namespaces
+ @namespaces ||= Namespace.where(parent: nil).all.pluck(:path)
+ end
+
+ def clear_namespaces!
+ @namespaces = nil
+ end
+ end
+ end
+end
diff --git a/lib/system_check/orphans/repository_check.rb b/lib/system_check/orphans/repository_check.rb
new file mode 100644
index 00000000000..9b6b2429783
--- /dev/null
+++ b/lib/system_check/orphans/repository_check.rb
@@ -0,0 +1,68 @@
+module SystemCheck
+ module Orphans
+ class RepositoryCheck < SystemCheck::BaseCheck
+ set_name 'Orphaned repositories:'
+ attr_accessor :orphans
+
+ def multi_check
+ Gitlab.config.repositories.storages.each do |storage_name, repository_storage|
+ $stdout.puts
+ $stdout.puts "* Storage: #{storage_name} (#{repository_storage['path']})".color(:yellow)
+
+ repositories = disk_repositories(repository_storage['path'])
+ orphans = (repositories - fetch_repositories(storage_name))
+
+ print_orphans(orphans, storage_name)
+ end
+ end
+
+ private
+
+ def print_orphans(orphans, storage_name)
+ if orphans.empty?
+ $stdout.puts "* No orphaned repositories for #{storage_name} storage".color(:green)
+ return
+ end
+
+ orphans.each do |orphan|
+ $stdout.puts " - #{orphan}".color(:red)
+ end
+ end
+
+ def disk_repositories(storage_path)
+ fetch_disk_namespaces(storage_path).each_with_object([]) do |namespace_path, result|
+ namespace = File.basename(namespace_path)
+ next if namespace.eql?('@hashed')
+
+ fetch_disk_repositories(namespace_path).each do |repo|
+ result << "#{namespace}/#{File.basename(repo)}"
+ end
+ end
+ end
+
+ def fetch_repositories(storage_name)
+ sql = "
+ SELECT
+ CONCAT(n.path, '/', p.path, '.git') repo,
+ CONCAT(n.path, '/', p.path, '.wiki.git') wiki
+ FROM projects p
+ JOIN namespaces n
+ ON (p.namespace_id = n.id AND
+ n.parent_id IS NULL)
+ WHERE (p.repository_storage LIKE ?)
+ "
+
+ query = ActiveRecord::Base.send(:sanitize_sql_array, [sql, storage_name]) # rubocop:disable GitlabSecurity/PublicSend
+ ActiveRecord::Base.connection.select_all(query).rows.try(:flatten!) || []
+ end
+
+ def fetch_disk_namespaces(storage_path)
+ Dir.glob(File.join(storage_path, '*'))
+ end
+
+ def fetch_disk_repositories(namespace_path)
+ Dir.glob(File.join(namespace_path, '*'))
+ end
+ end
+ end
+end
diff --git a/lib/tasks/gitlab/check.rake b/lib/tasks/gitlab/check.rake
index 654f638c454..dfade1f3885 100644
--- a/lib/tasks/gitlab/check.rake
+++ b/lib/tasks/gitlab/check.rake
@@ -398,6 +398,35 @@ namespace :gitlab do
end
end
+ namespace :orphans do
+ desc 'Gitlab | Check for orphaned namespaces and repositories'
+ task check: :environment do
+ warn_user_is_not_gitlab
+ checks = [
+ SystemCheck::Orphans::NamespaceCheck,
+ SystemCheck::Orphans::RepositoryCheck
+ ]
+
+ SystemCheck.run('Orphans', checks)
+ end
+
+ desc 'GitLab | Check for orphaned namespaces in the repositories path'
+ task check_namespaces: :environment do
+ warn_user_is_not_gitlab
+ checks = [SystemCheck::Orphans::NamespaceCheck]
+
+ SystemCheck.run('Orphans', checks)
+ end
+
+ desc 'GitLab | Check for orphaned repositories in the repositories path'
+ task check_repositories: :environment do
+ warn_user_is_not_gitlab
+ checks = [SystemCheck::Orphans::RepositoryCheck]
+
+ SystemCheck.run('Orphans', checks)
+ end
+ end
+
namespace :user do
desc "GitLab | Check the integrity of a specific user's repositories"
task :check_repos, [:username] => :environment do |t, args|
diff --git a/spec/lib/system_check/orphans/namespace_check_spec.rb b/spec/lib/system_check/orphans/namespace_check_spec.rb
new file mode 100644
index 00000000000..2a61ff3ad65
--- /dev/null
+++ b/spec/lib/system_check/orphans/namespace_check_spec.rb
@@ -0,0 +1,61 @@
+require 'spec_helper'
+require 'rake_helper'
+
+describe SystemCheck::Orphans::NamespaceCheck do
+ let(:storages) { Gitlab.config.repositories.storages.reject { |key, _| key.eql? 'broken' } }
+
+ before do
+ allow(Gitlab.config.repositories).to receive(:storages).and_return(storages)
+ allow(subject).to receive(:fetch_disk_namespaces).and_return(disk_namespaces)
+ silence_output
+ end
+
+ describe '#multi_check' do
+ context 'all orphans' do
+ let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 repos/@hashed) }
+
+ it 'prints list of all orphaned namespaces except @hashed' do
+ expect_list_of_orphans(%w(orphan1 orphan2))
+
+ subject.multi_check
+ end
+ end
+
+ context 'few orphans with existing namespace' do
+ let!(:first_level) { create(:group, path: 'my-namespace') }
+ let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 /repos/my-namespace /repos/@hashed) }
+
+ it 'prints list of orphaned namespaces' do
+ expect_list_of_orphans(%w(orphan1 orphan2))
+
+ subject.multi_check
+ end
+ end
+
+ context 'few orphans with existing namespace and parents with same name as orphans' do
+ let!(:first_level) { create(:group, path: 'my-namespace') }
+ let!(:second_level) { create(:group, path: 'second-level', parent: first_level) }
+ let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 /repos/my-namespace /repos/second-level /repos/@hashed) }
+
+ it 'prints list of orphaned namespaces ignoring parents with same namespace as orphans' do
+ expect_list_of_orphans(%w(orphan1 orphan2 second-level))
+
+ subject.multi_check
+ end
+ end
+
+ context 'no orphans' do
+ let(:disk_namespaces) { %w(@hashed) }
+
+ it 'prints an empty list ignoring @hashed' do
+ expect_list_of_orphans([])
+
+ subject.multi_check
+ end
+ end
+ end
+
+ def expect_list_of_orphans(orphans)
+ expect(subject).to receive(:print_orphans).with(orphans, 'default')
+ end
+end
diff --git a/spec/lib/system_check/orphans/repository_check_spec.rb b/spec/lib/system_check/orphans/repository_check_spec.rb
new file mode 100644
index 00000000000..b0c2267d177
--- /dev/null
+++ b/spec/lib/system_check/orphans/repository_check_spec.rb
@@ -0,0 +1,68 @@
+require 'spec_helper'
+require 'rake_helper'
+
+describe SystemCheck::Orphans::RepositoryCheck do
+ let(:storages) { Gitlab.config.repositories.storages.reject { |key, _| key.eql? 'broken' } }
+
+ before do
+ allow(Gitlab.config.repositories).to receive(:storages).and_return(storages)
+ allow(subject).to receive(:fetch_disk_namespaces).and_return(disk_namespaces)
+ allow(subject).to receive(:fetch_disk_repositories).and_return(disk_repositories)
+ # silence_output
+ end
+
+ describe '#multi_check' do
+ context 'all orphans' do
+ let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 repos/@hashed) }
+ let(:disk_repositories) { %w(repo1.git repo2.git) }
+
+ it 'prints list of all orphaned namespaces except @hashed' do
+ expect_list_of_orphans(%w(orphan1/repo1.git orphan1/repo2.git orphan2/repo1.git orphan2/repo2.git))
+
+ subject.multi_check
+ end
+ end
+
+ context 'few orphans with existing namespace' do
+ let!(:first_level) { create(:group, path: 'my-namespace') }
+ let!(:project) { create(:project, path: 'repo', namespace: first_level) }
+ let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 /repos/my-namespace /repos/@hashed) }
+ let(:disk_repositories) { %w(repo.git) }
+
+ it 'prints list of orphaned namespaces' do
+ expect_list_of_orphans(%w(orphan1/repo.git orphan2/repo.git))
+
+ subject.multi_check
+ end
+ end
+
+ context 'few orphans with existing namespace and parents with same name as orphans' do
+ let!(:first_level) { create(:group, path: 'my-namespace') }
+ let!(:second_level) { create(:group, path: 'second-level', parent: first_level) }
+ let!(:project) { create(:project, path: 'repo', namespace: first_level) }
+ let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 /repos/my-namespace /repos/second-level /repos/@hashed) }
+ let(:disk_repositories) { %w(repo.git) }
+
+ it 'prints list of orphaned namespaces ignoring parents with same namespace as orphans' do
+ expect_list_of_orphans(%w(orphan1/repo.git orphan2/repo.git second-level/repo.git))
+
+ subject.multi_check
+ end
+ end
+
+ context 'no orphans' do
+ let(:disk_namespaces) { %w(@hashed) }
+ let(:disk_repositories) { %w(repo.git) }
+
+ it 'prints an empty list ignoring @hashed' do
+ expect_list_of_orphans([])
+
+ subject.multi_check
+ end
+ end
+ end
+
+ def expect_list_of_orphans(orphans)
+ expect(subject).to receive(:print_orphans).with(orphans, 'default')
+ end
+end