summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorZeger-Jan van de Weg <git@zjvandeweg.nl>2018-09-07 11:16:34 +0200
committerZeger-Jan van de Weg <git@zjvandeweg.nl>2018-09-07 13:58:31 +0200
commit3aedccb17a5dbee40b5a08014c92cab8ea11e9fb (patch)
treeacd5edd14ea25f8a7bb7db251f1de1dbaa673065 /lib
parentc380d3acebd181f13629a25d2e2acca46ffe1e00 (diff)
downloadgitlab-ce-3aedccb17a5dbee40b5a08014c92cab8ea11e9fb.tar.gz
Port cleanup tasks to use Gitaly
Rake tasks cleaning up the Git storage were still using direct disk access, which won't work if these aren't attached. To mitigate a migration issue was created. To port gitlab:cleanup:dirs, and gitlab:cleanup:repos, a new RPC was required, ListDirectories. This was implemented in Gitaly, through https://gitlab.com/gitlab-org/gitaly/merge_requests/868. To be able to use the new RPC the Gitaly server was bumped to v0.120. This is an RPC that will not use feature gates, as this doesn't scale on .com so there is no way to test it at scale. Futhermore, we _know_ it doesn't scale, but this might be a useful task for smaller instances. Lastly, the tests are slightly updated to also work when the disk isn't attached. Eventhough this is not planned, it was very little effort and thus I applied the boy scout rule. Closes https://gitlab.com/gitlab-org/gitaly/issues/954 Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/40529
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/gitaly_client/storage_service.rb8
-rw-r--r--lib/tasks/gitlab/cleanup.rake89
2 files changed, 47 insertions, 50 deletions
diff --git a/lib/gitlab/gitaly_client/storage_service.rb b/lib/gitlab/gitaly_client/storage_service.rb
index eb0e910665b..3a26dd58ff4 100644
--- a/lib/gitlab/gitaly_client/storage_service.rb
+++ b/lib/gitlab/gitaly_client/storage_service.rb
@@ -5,6 +5,14 @@ module Gitlab
@storage = storage
end
+ # Returns all directories in the git storage directory, lexically ordered
+ def list_directories(depth: 1)
+ request = Gitaly::ListDirectoriesRequest.new(storage_name: @storage, depth: depth)
+
+ GitalyClient.call(@storage, :storage_service, :list_directories, request)
+ .flat_map(&:paths)
+ end
+
# Delete all repositories in the storage. This is a slow and VERY DESTRUCTIVE operation.
def delete_all_repositories
request = Gitaly::DeleteAllRepositoriesRequest.new(storage_name: @storage)
diff --git a/lib/tasks/gitlab/cleanup.rake b/lib/tasks/gitlab/cleanup.rake
index c8a8863443e..e8ae5dfa540 100644
--- a/lib/tasks/gitlab/cleanup.rake
+++ b/lib/tasks/gitlab/cleanup.rake
@@ -1,40 +1,29 @@
-# Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/954
-#
+# frozen_string_literal: true
+require 'set'
+
namespace :gitlab do
namespace :cleanup do
- HASHED_REPOSITORY_NAME = '@hashed'.freeze
-
desc "GitLab | Cleanup | Clean namespaces"
task dirs: :gitlab_environment do
- warn_user_is_not_gitlab
+ namespaces = Set.new(Namespace.pluck(:path))
+ namespaces << Storage::HashedProject::ROOT_PATH_PREFIX
- namespaces = Namespace.pluck(:path)
- namespaces << HASHED_REPOSITORY_NAME # add so that it will be ignored
- Gitlab.config.repositories.storages.each do |name, repository_storage|
- git_base_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
- all_dirs = Dir.glob(git_base_path + '/*')
+ Gitaly::Server.all.each do |server|
+ all_dirs = Gitlab::GitalyClient::StorageService
+ .new(server.storage)
+ .list_directories(depth: 0)
+ .reject { |dir| dir.ends_with?('.git') || namespaces.include?(File.basename(dir)) }
- puts git_base_path.color(:yellow)
puts "Looking for directories to remove... "
-
- all_dirs.reject! do |dir|
- # skip if git repo
- dir =~ /.git$/
- end
-
- all_dirs.reject! do |dir|
- dir_name = File.basename dir
-
- # skip if namespace present
- namespaces.include?(dir_name)
- end
-
all_dirs.each do |dir_path|
if remove?
- if FileUtils.rm_rf dir_path
- puts "Removed...#{dir_path}".color(:red)
- else
- puts "Cannot remove #{dir_path}".color(:red)
+ begin
+ Gitlab::GitalyClient::NamespaceService.new(server.storage)
+ .remove(dir_path)
+
+ puts "Removed...#{dir_path}"
+ rescue StandardError => e
+ puts "Cannot remove #{dir_path}: #{e.message}".color(:red)
end
else
puts "Can be removed: #{dir_path}".color(:red)
@@ -49,29 +38,29 @@ namespace :gitlab do
desc "GitLab | Cleanup | Clean repositories"
task repos: :gitlab_environment do
- warn_user_is_not_gitlab
-
move_suffix = "+orphaned+#{Time.now.to_i}"
- Gitlab.config.repositories.storages.each do |name, repository_storage|
- repo_root = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
-
- # Look for global repos (legacy, depth 1) and normal repos (depth 2)
- IO.popen(%W(find #{repo_root} -mindepth 1 -maxdepth 2 -name *.git)) do |find|
- find.each_line do |path|
- path.chomp!
- repo_with_namespace = path
- .sub(repo_root, '')
- .sub(%r{^/*}, '')
- .chomp('.git')
- .chomp('.wiki')
-
- # TODO ignoring hashed repositories for now. But revisit to fully support
- # possible orphaned hashed repos
- next if repo_with_namespace.start_with?("#{HASHED_REPOSITORY_NAME}/") || Project.find_by_full_path(repo_with_namespace)
-
- new_path = path + move_suffix
- puts path.inspect + ' -> ' + new_path.inspect
- File.rename(path, new_path)
+
+ Gitaly::Server.all.each do |server|
+ Gitlab::GitalyClient::StorageService
+ .new(server.storage)
+ .list_directories
+ .each do |path|
+ repo_with_namespace = path.chomp('.git').chomp('.wiki')
+
+ # TODO ignoring hashed repositories for now. But revisit to fully support
+ # possible orphaned hashed repos
+ next if repo_with_namespace.start_with?(Storage::HashedProject::ROOT_PATH_PREFIX)
+ next if Project.find_by_full_path(repo_with_namespace)
+
+ new_path = path + move_suffix
+ puts path.inspect + ' -> ' + new_path.inspect
+
+ begin
+ Gitlab::GitalyClient::NamespaceService
+ .new(server.storage)
+ .rename(path, new_path)
+ rescue StandardError => e
+ puts "Error occured while moving the repository: #{e.message}".color(:red)
end
end
end