diff options
-rw-r--r-- | changelogs/unreleased/detect-orphaned-repositories.yml | 5 | ||||
-rw-r--r-- | lib/system_check/orphans/namespace_check.rb | 54 | ||||
-rw-r--r-- | lib/system_check/orphans/repository_check.rb | 68 | ||||
-rw-r--r-- | lib/tasks/gitlab/check.rake | 29 | ||||
-rw-r--r-- | spec/lib/system_check/orphans/namespace_check_spec.rb | 61 | ||||
-rw-r--r-- | spec/lib/system_check/orphans/repository_check_spec.rb | 68 |
6 files changed, 285 insertions, 0 deletions
diff --git a/changelogs/unreleased/detect-orphaned-repositories.yml b/changelogs/unreleased/detect-orphaned-repositories.yml new file mode 100644 index 00000000000..101c1897826 --- /dev/null +++ b/changelogs/unreleased/detect-orphaned-repositories.yml @@ -0,0 +1,5 @@ +--- +title: Scripts to detect orphaned repositories +merge_request: 14204 +author: +type: added diff --git a/lib/system_check/orphans/namespace_check.rb b/lib/system_check/orphans/namespace_check.rb new file mode 100644 index 00000000000..b8446300f72 --- /dev/null +++ b/lib/system_check/orphans/namespace_check.rb @@ -0,0 +1,54 @@ +module SystemCheck + module Orphans + class NamespaceCheck < SystemCheck::BaseCheck + set_name 'Orphaned namespaces:' + + def multi_check + Gitlab.config.repositories.storages.each do |storage_name, repository_storage| + $stdout.puts + $stdout.puts "* Storage: #{storage_name} (#{repository_storage['path']})".color(:yellow) + toplevel_namespace_dirs = disk_namespaces(repository_storage['path']) + + orphans = (toplevel_namespace_dirs - existing_namespaces) + print_orphans(orphans, storage_name) + end + + clear_namespaces! # releases memory when check finishes + end + + private + + def print_orphans(orphans, storage_name) + if orphans.empty? + $stdout.puts "* No orphaned namespaces for #{storage_name} storage".color(:green) + return + end + + orphans.each do |orphan| + $stdout.puts " - #{orphan}".color(:red) + end + end + + def disk_namespaces(storage_path) + fetch_disk_namespaces(storage_path).each_with_object([]) do |namespace_path, result| + namespace = File.basename(namespace_path) + next if namespace.eql?('@hashed') + + result << namespace + end + end + + def fetch_disk_namespaces(storage_path) + Dir.glob(File.join(storage_path, '*')) + end + + def existing_namespaces + @namespaces ||= Namespace.where(parent: nil).all.pluck(:path) + end + + def clear_namespaces! + @namespaces = nil + end + end + end +end diff --git a/lib/system_check/orphans/repository_check.rb b/lib/system_check/orphans/repository_check.rb new file mode 100644 index 00000000000..9b6b2429783 --- /dev/null +++ b/lib/system_check/orphans/repository_check.rb @@ -0,0 +1,68 @@ +module SystemCheck + module Orphans + class RepositoryCheck < SystemCheck::BaseCheck + set_name 'Orphaned repositories:' + attr_accessor :orphans + + def multi_check + Gitlab.config.repositories.storages.each do |storage_name, repository_storage| + $stdout.puts + $stdout.puts "* Storage: #{storage_name} (#{repository_storage['path']})".color(:yellow) + + repositories = disk_repositories(repository_storage['path']) + orphans = (repositories - fetch_repositories(storage_name)) + + print_orphans(orphans, storage_name) + end + end + + private + + def print_orphans(orphans, storage_name) + if orphans.empty? + $stdout.puts "* No orphaned repositories for #{storage_name} storage".color(:green) + return + end + + orphans.each do |orphan| + $stdout.puts " - #{orphan}".color(:red) + end + end + + def disk_repositories(storage_path) + fetch_disk_namespaces(storage_path).each_with_object([]) do |namespace_path, result| + namespace = File.basename(namespace_path) + next if namespace.eql?('@hashed') + + fetch_disk_repositories(namespace_path).each do |repo| + result << "#{namespace}/#{File.basename(repo)}" + end + end + end + + def fetch_repositories(storage_name) + sql = " + SELECT + CONCAT(n.path, '/', p.path, '.git') repo, + CONCAT(n.path, '/', p.path, '.wiki.git') wiki + FROM projects p + JOIN namespaces n + ON (p.namespace_id = n.id AND + n.parent_id IS NULL) + WHERE (p.repository_storage LIKE ?) + " + + query = ActiveRecord::Base.send(:sanitize_sql_array, [sql, storage_name]) # rubocop:disable GitlabSecurity/PublicSend + ActiveRecord::Base.connection.select_all(query).rows.try(:flatten!) || [] + end + + def fetch_disk_namespaces(storage_path) + Dir.glob(File.join(storage_path, '*')) + end + + def fetch_disk_repositories(namespace_path) + Dir.glob(File.join(namespace_path, '*')) + end + end + end +end diff --git a/lib/tasks/gitlab/check.rake b/lib/tasks/gitlab/check.rake index 654f638c454..dfade1f3885 100644 --- a/lib/tasks/gitlab/check.rake +++ b/lib/tasks/gitlab/check.rake @@ -398,6 +398,35 @@ namespace :gitlab do end end + namespace :orphans do + desc 'Gitlab | Check for orphaned namespaces and repositories' + task check: :environment do + warn_user_is_not_gitlab + checks = [ + SystemCheck::Orphans::NamespaceCheck, + SystemCheck::Orphans::RepositoryCheck + ] + + SystemCheck.run('Orphans', checks) + end + + desc 'GitLab | Check for orphaned namespaces in the repositories path' + task check_namespaces: :environment do + warn_user_is_not_gitlab + checks = [SystemCheck::Orphans::NamespaceCheck] + + SystemCheck.run('Orphans', checks) + end + + desc 'GitLab | Check for orphaned repositories in the repositories path' + task check_repositories: :environment do + warn_user_is_not_gitlab + checks = [SystemCheck::Orphans::RepositoryCheck] + + SystemCheck.run('Orphans', checks) + end + end + namespace :user do desc "GitLab | Check the integrity of a specific user's repositories" task :check_repos, [:username] => :environment do |t, args| diff --git a/spec/lib/system_check/orphans/namespace_check_spec.rb b/spec/lib/system_check/orphans/namespace_check_spec.rb new file mode 100644 index 00000000000..2a61ff3ad65 --- /dev/null +++ b/spec/lib/system_check/orphans/namespace_check_spec.rb @@ -0,0 +1,61 @@ +require 'spec_helper' +require 'rake_helper' + +describe SystemCheck::Orphans::NamespaceCheck do + let(:storages) { Gitlab.config.repositories.storages.reject { |key, _| key.eql? 'broken' } } + + before do + allow(Gitlab.config.repositories).to receive(:storages).and_return(storages) + allow(subject).to receive(:fetch_disk_namespaces).and_return(disk_namespaces) + silence_output + end + + describe '#multi_check' do + context 'all orphans' do + let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 repos/@hashed) } + + it 'prints list of all orphaned namespaces except @hashed' do + expect_list_of_orphans(%w(orphan1 orphan2)) + + subject.multi_check + end + end + + context 'few orphans with existing namespace' do + let!(:first_level) { create(:group, path: 'my-namespace') } + let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 /repos/my-namespace /repos/@hashed) } + + it 'prints list of orphaned namespaces' do + expect_list_of_orphans(%w(orphan1 orphan2)) + + subject.multi_check + end + end + + context 'few orphans with existing namespace and parents with same name as orphans' do + let!(:first_level) { create(:group, path: 'my-namespace') } + let!(:second_level) { create(:group, path: 'second-level', parent: first_level) } + let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 /repos/my-namespace /repos/second-level /repos/@hashed) } + + it 'prints list of orphaned namespaces ignoring parents with same namespace as orphans' do + expect_list_of_orphans(%w(orphan1 orphan2 second-level)) + + subject.multi_check + end + end + + context 'no orphans' do + let(:disk_namespaces) { %w(@hashed) } + + it 'prints an empty list ignoring @hashed' do + expect_list_of_orphans([]) + + subject.multi_check + end + end + end + + def expect_list_of_orphans(orphans) + expect(subject).to receive(:print_orphans).with(orphans, 'default') + end +end diff --git a/spec/lib/system_check/orphans/repository_check_spec.rb b/spec/lib/system_check/orphans/repository_check_spec.rb new file mode 100644 index 00000000000..b0c2267d177 --- /dev/null +++ b/spec/lib/system_check/orphans/repository_check_spec.rb @@ -0,0 +1,68 @@ +require 'spec_helper' +require 'rake_helper' + +describe SystemCheck::Orphans::RepositoryCheck do + let(:storages) { Gitlab.config.repositories.storages.reject { |key, _| key.eql? 'broken' } } + + before do + allow(Gitlab.config.repositories).to receive(:storages).and_return(storages) + allow(subject).to receive(:fetch_disk_namespaces).and_return(disk_namespaces) + allow(subject).to receive(:fetch_disk_repositories).and_return(disk_repositories) + # silence_output + end + + describe '#multi_check' do + context 'all orphans' do + let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 repos/@hashed) } + let(:disk_repositories) { %w(repo1.git repo2.git) } + + it 'prints list of all orphaned namespaces except @hashed' do + expect_list_of_orphans(%w(orphan1/repo1.git orphan1/repo2.git orphan2/repo1.git orphan2/repo2.git)) + + subject.multi_check + end + end + + context 'few orphans with existing namespace' do + let!(:first_level) { create(:group, path: 'my-namespace') } + let!(:project) { create(:project, path: 'repo', namespace: first_level) } + let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 /repos/my-namespace /repos/@hashed) } + let(:disk_repositories) { %w(repo.git) } + + it 'prints list of orphaned namespaces' do + expect_list_of_orphans(%w(orphan1/repo.git orphan2/repo.git)) + + subject.multi_check + end + end + + context 'few orphans with existing namespace and parents with same name as orphans' do + let!(:first_level) { create(:group, path: 'my-namespace') } + let!(:second_level) { create(:group, path: 'second-level', parent: first_level) } + let!(:project) { create(:project, path: 'repo', namespace: first_level) } + let(:disk_namespaces) { %w(/repos/orphan1 /repos/orphan2 /repos/my-namespace /repos/second-level /repos/@hashed) } + let(:disk_repositories) { %w(repo.git) } + + it 'prints list of orphaned namespaces ignoring parents with same namespace as orphans' do + expect_list_of_orphans(%w(orphan1/repo.git orphan2/repo.git second-level/repo.git)) + + subject.multi_check + end + end + + context 'no orphans' do + let(:disk_namespaces) { %w(@hashed) } + let(:disk_repositories) { %w(repo.git) } + + it 'prints an empty list ignoring @hashed' do + expect_list_of_orphans([]) + + subject.multi_check + end + end + end + + def expect_list_of_orphans(orphans) + expect(subject).to receive(:print_orphans).with(orphans, 'default') + end +end |