From 0ad80cab40097658b1eec5b87d440cfcd60d2755 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 8 May 2017 17:13:02 +0200 Subject: Use worker to destroy namespaceless projects in post-deploy Destroying projects can be very time consuming. So instead of destroying them in the post-deploy, just schedule them and make Sidekiq do the hard work. They are scheduled in batches of 5000 records. This way the number of database requests is limited while also the amount data read to memory is limited. --- .../namespaceless_project_destroy_worker.rb | 46 ++++++++++++ config/sidekiq_queues.yml | 1 + ...70502101023_clean_up_pending_delete_projects.rb | 44 ------------ ...leanup_namespaceless_pending_delete_projects.rb | 50 +++++++++++++ .../clean_up_pending_delete_projects_spec.rb | 72 ------------------- ...p_namespaceless_pending_delete_projects_spec.rb | 24 +++++++ .../namespaceless_project_destroy_worker_spec.rb | 81 ++++++++++++++++++++++ 7 files changed, 202 insertions(+), 116 deletions(-) create mode 100644 app/workers/namespaceless_project_destroy_worker.rb delete mode 100644 db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb create mode 100644 db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb delete mode 100644 spec/migrations/clean_up_pending_delete_projects_spec.rb create mode 100644 spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb create mode 100644 spec/workers/namespaceless_project_destroy_worker_spec.rb diff --git a/app/workers/namespaceless_project_destroy_worker.rb b/app/workers/namespaceless_project_destroy_worker.rb new file mode 100644 index 00000000000..da02084c688 --- /dev/null +++ b/app/workers/namespaceless_project_destroy_worker.rb @@ -0,0 +1,46 @@ +# Worker to destroy projects that do not have a namespace +# +# It destroys everything it can without having the info about the namespace it +# used to belong to. Projects in this state should be rare. +# The worker will reject doing anything for projects that *do* have a +# namespace. For those use ProjectDestroyWorker instead. +class NamespacelessProjectDestroyWorker + include Sidekiq::Worker + include DedicatedSidekiqQueue + + def self.bulk_perform_async(args_list) + Sidekiq::Client.push_bulk('class' => self, 'queue' => sidekiq_options['queue'], 'args' => args_list) + end + + def perform(project_id, user_id, params) + begin + project = Project.unscoped.find(project_id) + rescue ActiveRecord::RecordNotFound + return + end + return unless project.namespace_id.nil? # Reject doing anything for projects that *do* have a namespace + + user = User.find(user_id) + return unless user.can?(:remove_project, project) + + project.team.truncate + + unlink_fork(project) if project.forked? + + # Override Project#remove_pages for this instance so it doesn't do anything + def project.remove_pages + end + + project.destroy! + end + + private + + def unlink_fork(project) + merge_requests = project.forked_from_project.merge_requests.opened.from_project(project) + + merge_requests.update_all(state: 'closed') + + project.forked_project_link.destroy + end +end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 433381e79d3..0ca1f565185 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -40,6 +40,7 @@ - [expire_build_instance_artifacts, 1] - [group_destroy, 1] - [irker, 1] + - [namespaceless_project_destroy, 1] - [project_cache, 1] - [project_destroy, 1] - [project_export, 1] diff --git a/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb b/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb deleted file mode 100644 index dd812964902..00000000000 --- a/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb +++ /dev/null @@ -1,44 +0,0 @@ -# See http://doc.gitlab.com/ce/development/migration_style_guide.html -# for more information on how to write migrations for GitLab. - -class CleanUpPendingDeleteProjects < ActiveRecord::Migration - include Gitlab::Database::MigrationHelpers - - DOWNTIME = false - - disable_ddl_transaction! - - def up - Project.unscoped.where(pending_delete: true).each { |project| delete_project(project, admin) } - end - - def down - # noop - end - - private - - def delete_project(project) - project.team.truncate - - unlink_fork(project) if project.forked? - - [:events, :issues, :merge_requests, :labels, :milestones, :notes, :snippets].each do |thing| - project.send(thing).delete_all - end - - # Override Project#remove_pages for this instance so it doesn't do anything - def project.remove_pages - end - - project.destroy! - end - - def unlink_fork(project) - merge_requests = project.forked_from_project.merge_requests.opened.from_project(project) - - merge_requests.update_all(state: 'closed') - - project.forked_project_link.destroy - end -end diff --git a/db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb b/db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb new file mode 100644 index 00000000000..76ed109898b --- /dev/null +++ b/db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb @@ -0,0 +1,50 @@ +# This is the counterpart of RequeuePendingDeleteProjects and cleans all +# projects with `pending_delete = true` and that do not have a namespace. +class CleanupNamespacelessPendingDeleteProjects < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + def up + admin = User.find_by(admin: true) + return unless admin + + @offset = 0 + + loop do + ids = pending_delete_batch + + break if ids.rows.count.zero? + + args = ids.map { |id| [id['id'], admin.id, {}] } + + NamespacelessProjectDestroyWorker.bulk_perform_async(args) + + @offset += 1 + end + end + + def down + # noop + end + + private + + def pending_delete_batch + connection.exec_query(find_batch) + end + + BATCH_SIZE = 5000 + + def find_batch + projects = Arel::Table.new(:projects) + projects.project(projects[:id]). + where(projects[:pending_delete].eq(true)). + where(projects[:namespace_id].eq(nil)). + skip(@offset * BATCH_SIZE). + take(BATCH_SIZE). + to_sql + end +end diff --git a/spec/migrations/clean_up_pending_delete_projects_spec.rb b/spec/migrations/clean_up_pending_delete_projects_spec.rb deleted file mode 100644 index a7c13f91245..00000000000 --- a/spec/migrations/clean_up_pending_delete_projects_spec.rb +++ /dev/null @@ -1,72 +0,0 @@ -# encoding: utf-8 - -require 'spec_helper' -require Rails.root.join('db', 'post_migrate', '20170502101023_clean_up_pending_delete_projects.rb') - -describe CleanUpPendingDeleteProjects do - let(:migration) { described_class.new } - let!(:admin) { create(:admin) } - let!(:project) { create(:empty_project, pending_delete: true) } - - describe '#up' do - it 'only cleans up pending delete projects' do - create(:empty_project) - - expect do - migration.up - end.to change { Project.unscoped.count }.by(-1) - end - - it "truncates the project's team" do - project.add_master(admin) - - expect_any_instance_of(ProjectTeam).to receive(:truncate) - - migration.up - end - - it 'calls Project#destroy!' do - expect_any_instance_of(Project).to receive(:destroy!) - - migration.up - end - - it 'does not do anything in Project#remove_pages method' do - expect(Gitlab::PagesTransfer).not_to receive(:new) - - migration.up - end - - context 'project not a fork of another project' do - it "doesn't call unlink_fork" do - expect(migration).not_to receive(:unlink_fork) - - migration.up - end - end - - context 'project forked from another' do - let!(:parent_project) { create(:empty_project) } - - before do - create(:forked_project_link, forked_to_project: project, forked_from_project: parent_project) - end - - it 'closes open merge requests' do - project.update_attribute(:pending_delete, false) # needed to create the MR - merge_request = create(:merge_request, source_project: project, target_project: parent_project) - project.update_attribute(:pending_delete, true) - - migration.up - - expect(merge_request.reload).to be_closed - end - - it 'destroys the link' do - migration.up - - expect(parent_project.forked_project_links).to be_empty - end - end - end -end diff --git a/spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb b/spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb new file mode 100644 index 00000000000..cee12d521dc --- /dev/null +++ b/spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb @@ -0,0 +1,24 @@ +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20170502101023_cleanup_namespaceless_pending_delete_projects.rb') + +describe CleanupNamespacelessPendingDeleteProjects do + before do + # Stub after_save callbacks that will fail when Project has no namespace + allow_any_instance_of(Project).to receive(:ensure_dir_exist).and_return(nil) + allow_any_instance_of(Project).to receive(:update_project_statistics).and_return(nil) + end + + describe '#up' do + it 'only cleans up pending delete projects' do + admin = create(:admin) + create(:empty_project) + create(:empty_project, pending_delete: true) + project = build(:empty_project, pending_delete: true, namespace_id: nil) + project.save(validate: false) + + expect(NamespacelessProjectDestroyWorker).to receive(:bulk_perform_async).with([[project.id.to_s, admin.id, {}]]) + + described_class.new.up + end + end +end diff --git a/spec/workers/namespaceless_project_destroy_worker_spec.rb b/spec/workers/namespaceless_project_destroy_worker_spec.rb new file mode 100644 index 00000000000..ed78236ebc0 --- /dev/null +++ b/spec/workers/namespaceless_project_destroy_worker_spec.rb @@ -0,0 +1,81 @@ +require 'spec_helper' + +describe NamespacelessProjectDestroyWorker do + subject { described_class.new } + + before do + # Stub after_save callbacks that will fail when Project has no namespace + allow_any_instance_of(Project).to receive(:ensure_dir_exist).and_return(nil) + allow_any_instance_of(Project).to receive(:update_project_statistics).and_return(nil) + end + + describe '#perform' do + context 'project has namespace' do + it 'does not do anything' do + project = create(:empty_project) + + subject.perform(project.id, project.owner.id, {}) + + expect(Project.unscoped.all).to include(project) + end + end + + context 'project has no namespace' do + let(:admin) { create(:admin) } + + let!(:project) do + project = build(:empty_project, namespace_id: nil) + project.save(validate: false) + project + end + + context 'project not a fork of another project' do + it "truncates the project's team" do + expect_any_instance_of(ProjectTeam).to receive(:truncate) + + subject.perform(project.id, admin.id, {}) + end + + it 'deletes the project' do + subject.perform(project.id, admin.id, {}) + + expect(Project.unscoped.all).not_to include(project) + end + + it 'does not call unlink_fork' do + is_expected.not_to receive(:unlink_fork) + + subject.perform(project.id, admin.id, {}) + end + + it 'does not do anything in Project#remove_pages method' do + expect(Gitlab::PagesTransfer).not_to receive(:new) + + subject.perform(project.id, admin.id, {}) + end + end + + context 'project forked from another' do + let!(:parent_project) { create(:empty_project) } + + before do + create(:forked_project_link, forked_to_project: project, forked_from_project: parent_project) + end + + it 'closes open merge requests' do + merge_request = create(:merge_request, source_project: project, target_project: parent_project) + + subject.perform(project.id, admin.id, {}) + + expect(merge_request.reload).to be_closed + end + + it 'destroys the link' do + subject.perform(project.id, admin.id, {}) + + expect(parent_project.forked_project_links).to be_empty + end + end + end + end +end -- cgit v1.2.1