From 2cfd0b59aeb410c1541530ca3eb5f5c472b978e0 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Tue, 24 Mar 2015 13:15:59 +0100 Subject: Archive repositories in background worker. --- Procfile | 2 +- .../projects/repositories_controller.rb | 8 ++- app/models/repository.rb | 3 + app/services/archive_repository_service.rb | 67 +++++++++++++++++++--- app/workers/repository_archive_worker.rb | 46 +++++++++++++++ lib/api/repositories.rb | 11 ++-- 6 files changed, 121 insertions(+), 16 deletions(-) create mode 100644 app/workers/repository_archive_worker.rb diff --git a/Procfile b/Procfile index a0ab4a734a4..799b92729fa 100644 --- a/Procfile +++ b/Procfile @@ -1,2 +1,2 @@ web: bundle exec unicorn_rails -p ${PORT:="3000"} -E ${RAILS_ENV:="development"} -c ${UNICORN_CONFIG:="config/unicorn.rb"} -worker: bundle exec sidekiq -q post_receive -q mailer -q system_hook -q project_web_hook -q gitlab_shell -q common -q default +worker: bundle exec sidekiq -q post_receive -q mailer -q archive_repo -q system_hook -q project_web_hook -q gitlab_shell -q common -q default diff --git a/app/controllers/projects/repositories_controller.rb b/app/controllers/projects/repositories_controller.rb index cbb888b25e8..112365ba91a 100644 --- a/app/controllers/projects/repositories_controller.rb +++ b/app/controllers/projects/repositories_controller.rb @@ -15,14 +15,18 @@ class Projects::RepositoriesController < Projects::ApplicationController render_404 and return end - file_path = ArchiveRepositoryService.new.execute(@project, params[:ref], params[:format]) + begin + file_path = ArchiveRepositoryService.new(@project, params[:ref], params[:format]).execute + rescue + return render_404 + end if file_path # Send file to user response.headers["Content-Length"] = File.open(file_path).size.to_s send_file file_path else - render_404 + redirect_to request.fullpath end end end diff --git a/app/models/repository.rb b/app/models/repository.rb index 77765cae1a0..72769498872 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -267,6 +267,9 @@ class Repository # Remove archives older than 2 hours def clean_old_archives repository_downloads_path = Gitlab.config.gitlab.repository_downloads_path + + return unless File.directory?(repository_downloads_path) + Gitlab::Popen.popen(%W(find #{repository_downloads_path} -not -path #{repository_downloads_path} -mmin +120 -delete)) end diff --git a/app/services/archive_repository_service.rb b/app/services/archive_repository_service.rb index 8823f6fdc67..cb2026fedba 100644 --- a/app/services/archive_repository_service.rb +++ b/app/services/archive_repository_service.rb @@ -1,14 +1,65 @@ class ArchiveRepositoryService - def execute(project, ref, format) - storage_path = Gitlab.config.gitlab.repository_downloads_path + attr_reader :project, :ref, :format - unless File.directory?(storage_path) - FileUtils.mkdir_p(storage_path) + def initialize(project, ref, format) + format ||= 'tar.gz' + @project, @ref, @format = project, ref, format + end + + def execute + project.repository.clean_old_archives + + raise "No archive file path" unless file_path + + return file_path if archived? + + unless archiving? + RepositoryArchiveWorker.perform_async(project.id, ref, format) end - format ||= 'tar.gz' - repository = project.repository - repository.clean_old_archives - repository.archive_repo(ref, storage_path, format.downcase) + archived = wait_until_archived + + file_path if archived + end + + private + + def storage_path + Gitlab.config.gitlab.repository_downloads_path + end + + def archive_args + @archive_args ||= [ref, storage_path, format.downcase] + end + + def file_path + @file_path ||= project.repository.archive_file_path(*archive_args) + end + + def pid_file_path + @pid_file_path ||= project.repository.archive_pid_file_path(*archive_args) + end + + def archived? + File.exist?(file_path) + end + + def archiving? + File.exist?(pid_file_path) + end + + def wait_until_archived + timeout = 5.0 + t1 = Time.now + + begin + sleep 0.1 + + success = archived? + + t2 = Time.now + end until success || t2 - t1 >= timeout + + success end end diff --git a/app/workers/repository_archive_worker.rb b/app/workers/repository_archive_worker.rb new file mode 100644 index 00000000000..3f4681a80f4 --- /dev/null +++ b/app/workers/repository_archive_worker.rb @@ -0,0 +1,46 @@ +class RepositoryArchiveWorker + include Sidekiq::Worker + + sidekiq_options queue: :archive_repo + + attr_accessor :project, :ref, :format + + def perform(project_id, ref, format) + @project = Project.find(project_id) + @ref, @format = ref, format + + repository = project.repository + + repository.clean_old_archives + + return if archived? || archiving? + + repository.archive_repo(*archive_args) + end + + private + + def storage_path + Gitlab.config.gitlab.repository_downloads_path + end + + def archive_args + @archive_args ||= [ref, storage_path, format.downcase] + end + + def file_path + @file_path ||= project.repository.archive_file_path(*archive_args) + end + + def pid_file_path + @pid_file_path ||= project.repository.archive_pid_file_path(*archive_args) + end + + def archived? + File.exist?(file_path) + end + + def archiving? + File.exist?(pid_file_path) + end +end diff --git a/lib/api/repositories.rb b/lib/api/repositories.rb index b259914a01c..1fbf3dca3c6 100644 --- a/lib/api/repositories.rb +++ b/lib/api/repositories.rb @@ -133,10 +133,11 @@ module API authorize! :download_code, user_project begin - file_path = ArchiveRepositoryService.new.execute( - user_project, - params[:sha], - params[:format]) + file_path = ArchiveRepositoryService.new( + user_project, + params[:sha], + params[:format] + ).execute rescue not_found!('File') end @@ -149,7 +150,7 @@ module API env['api.format'] = :binary present data else - not_found!('File') + redirect request.fullpath end end -- cgit v1.2.1 From e7bbd85b3424ec64cb7f56452fd7f5ee48663a60 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Tue, 24 Mar 2015 13:16:11 +0100 Subject: Limit number of concurrent archive_repo jobs to 2. --- Gemfile | 1 + Gemfile.lock | 3 +++ config/initializers/4_sidekiq.rb | 2 ++ 3 files changed, 6 insertions(+) diff --git a/Gemfile b/Gemfile index e767aec5053..0dd285d0fdd 100644 --- a/Gemfile +++ b/Gemfile @@ -122,6 +122,7 @@ gem 'slim' gem 'sinatra', require: nil gem 'sidekiq', '~> 3.3' gem 'sidetiq', '0.6.3' +gem 'sidekiq-limit_fetch' # HTTP requests gem "httparty" diff --git a/Gemfile.lock b/Gemfile.lock index ed8663b358b..c6ecdb4db0a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -547,6 +547,8 @@ GEM json redis (>= 3.0.6) redis-namespace (>= 1.3.1) + sidekiq-limit_fetch (2.4.1) + sidekiq (>= 2.6.5, < 4.0) sidetiq (0.6.3) celluloid (>= 0.14.1) ice_cube (= 0.11.1) @@ -769,6 +771,7 @@ DEPENDENCIES settingslogic shoulda-matchers (~> 2.7.0) sidekiq (~> 3.3) + sidekiq-limit_fetch sidetiq (= 0.6.3) simplecov sinatra diff --git a/config/initializers/4_sidekiq.rb b/config/initializers/4_sidekiq.rb index e856499732e..da2e8a08b07 100644 --- a/config/initializers/4_sidekiq.rb +++ b/config/initializers/4_sidekiq.rb @@ -25,3 +25,5 @@ Sidekiq.configure_client do |config| namespace: 'resque:gitlab' } end + +Sidekiq::Queue["archive_repo"].limit = 2 -- cgit v1.2.1 From ccc01b36ba3cd0e1f9c3a6e0201207114e3d1d41 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Tue, 24 Mar 2015 13:24:35 +0100 Subject: Small code cleanup. --- app/services/archive_repository_service.rb | 13 ++++--------- app/workers/repository_archive_worker.rb | 12 ++++-------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/app/services/archive_repository_service.rb b/app/services/archive_repository_service.rb index cb2026fedba..40b0a64fb73 100644 --- a/app/services/archive_repository_service.rb +++ b/app/services/archive_repository_service.rb @@ -3,7 +3,7 @@ class ArchiveRepositoryService def initialize(project, ref, format) format ||= 'tar.gz' - @project, @ref, @format = project, ref, format + @project, @ref, @format = project, ref, format.downcase end def execute @@ -28,16 +28,12 @@ class ArchiveRepositoryService Gitlab.config.gitlab.repository_downloads_path end - def archive_args - @archive_args ||= [ref, storage_path, format.downcase] - end - def file_path - @file_path ||= project.repository.archive_file_path(*archive_args) + @file_path ||= project.repository.archive_file_path(ref, storage_path, format) end def pid_file_path - @pid_file_path ||= project.repository.archive_pid_file_path(*archive_args) + @pid_file_path ||= project.repository.archive_pid_file_path(ref, storage_path, format) end def archived? @@ -48,8 +44,7 @@ class ArchiveRepositoryService File.exist?(pid_file_path) end - def wait_until_archived - timeout = 5.0 + def wait_until_archived(timeout = 5.0) t1 = Time.now begin diff --git a/app/workers/repository_archive_worker.rb b/app/workers/repository_archive_worker.rb index 3f4681a80f4..42ac77c588e 100644 --- a/app/workers/repository_archive_worker.rb +++ b/app/workers/repository_archive_worker.rb @@ -7,7 +7,7 @@ class RepositoryArchiveWorker def perform(project_id, ref, format) @project = Project.find(project_id) - @ref, @format = ref, format + @ref, @format = ref, format.downcase repository = project.repository @@ -15,7 +15,7 @@ class RepositoryArchiveWorker return if archived? || archiving? - repository.archive_repo(*archive_args) + repository.archive_repo(ref, storage_path, format) end private @@ -24,16 +24,12 @@ class RepositoryArchiveWorker Gitlab.config.gitlab.repository_downloads_path end - def archive_args - @archive_args ||= [ref, storage_path, format.downcase] - end - def file_path - @file_path ||= project.repository.archive_file_path(*archive_args) + @file_path ||= project.repository.archive_file_path(ref, storage_path, format) end def pid_file_path - @pid_file_path ||= project.repository.archive_pid_file_path(*archive_args) + @pid_file_path ||= project.repository.archive_pid_file_path(ref, storage_path, format) end def archived? -- cgit v1.2.1 From 2fdae52fd1048779a84770b39fe2961f31b1bd79 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Tue, 24 Mar 2015 13:24:38 +0100 Subject: Add item to changelog. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 37054da46b8..03c20e8f3ed 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -48,6 +48,7 @@ v 7.10.0 (unreleased) - Prevent note form from being cleared when submitting failed. - Improve file icons rendering on tree (Sullivan Sénéchal) - API: Add pagination to project events + - Archive repositories in background worker. v 7.9.0 - Send EmailsOnPush email when branch or tag is created or deleted. -- cgit v1.2.1 From 91761b06851832be643258f390ea7593beacae16 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Tue, 31 Mar 2015 13:37:21 +0200 Subject: Add tests. --- .../projects/repositories_controller.rb | 6 +- app/services/archive_repository_service.rb | 6 +- app/workers/repository_archive_worker.rb | 1 + .../projects/repositories_controller_spec.rb | 65 +++++++++++++++ spec/services/archive_repository_service_spec.rb | 93 ++++++++++++++++++++++ spec/workers/repository_archive_worker_spec.rb | 80 +++++++++++++++++++ 6 files changed, 244 insertions(+), 7 deletions(-) create mode 100644 spec/controllers/projects/repositories_controller_spec.rb create mode 100644 spec/services/archive_repository_service_spec.rb create mode 100644 spec/workers/repository_archive_worker_spec.rb diff --git a/app/controllers/projects/repositories_controller.rb b/app/controllers/projects/repositories_controller.rb index 112365ba91a..96defb0c721 100644 --- a/app/controllers/projects/repositories_controller.rb +++ b/app/controllers/projects/repositories_controller.rb @@ -11,14 +11,10 @@ class Projects::RepositoriesController < Projects::ApplicationController end def archive - unless can?(current_user, :download_code, @project) - render_404 and return - end - begin file_path = ArchiveRepositoryService.new(@project, params[:ref], params[:format]).execute rescue - return render_404 + return head :not_found end if file_path diff --git a/app/services/archive_repository_service.rb b/app/services/archive_repository_service.rb index 40b0a64fb73..e1b41527d8d 100644 --- a/app/services/archive_repository_service.rb +++ b/app/services/archive_repository_service.rb @@ -6,7 +6,7 @@ class ArchiveRepositoryService @project, @ref, @format = project, ref, format.downcase end - def execute + def execute(options = {}) project.repository.clean_old_archives raise "No archive file path" unless file_path @@ -17,7 +17,7 @@ class ArchiveRepositoryService RepositoryArchiveWorker.perform_async(project.id, ref, format) end - archived = wait_until_archived + archived = wait_until_archived(options[:timeout] || 5.0) file_path if archived end @@ -45,6 +45,8 @@ class ArchiveRepositoryService end def wait_until_archived(timeout = 5.0) + return archived? if timeout == 0.0 + t1 = Time.now begin diff --git a/app/workers/repository_archive_worker.rb b/app/workers/repository_archive_worker.rb index 42ac77c588e..021c1139568 100644 --- a/app/workers/repository_archive_worker.rb +++ b/app/workers/repository_archive_worker.rb @@ -13,6 +13,7 @@ class RepositoryArchiveWorker repository.clean_old_archives + return unless file_path return if archived? || archiving? repository.archive_repo(ref, storage_path, format) diff --git a/spec/controllers/projects/repositories_controller_spec.rb b/spec/controllers/projects/repositories_controller_spec.rb new file mode 100644 index 00000000000..91856ed0cc0 --- /dev/null +++ b/spec/controllers/projects/repositories_controller_spec.rb @@ -0,0 +1,65 @@ +require "spec_helper" + +describe Projects::RepositoriesController do + let(:project) { create(:project) } + let(:user) { create(:user) } + + describe "GET archive" do + before do + sign_in(user) + project.team << [user, :developer] + + allow(ArchiveRepositoryService).to receive(:new).and_return(service) + end + + let(:service) { ArchiveRepositoryService.new(project, "master", "zip") } + + it "executes ArchiveRepositoryService" do + expect(ArchiveRepositoryService).to receive(:new).with(project, "master", "zip") + expect(service).to receive(:execute) + + get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip" + end + + context "when the service raises an error" do + + before do + allow(service).to receive(:execute).and_raise("Archive failed") + end + + it "renders Not Found" do + get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip" + + expect(response.status).to eq(404) + end + end + + context "when the service doesn't return a path" do + + before do + allow(service).to receive(:execute).and_return(nil) + end + + it "reloads the page" do + get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip" + + expect(response).to redirect_to(archive_namespace_project_repository_path(project.namespace, project, ref: "master", format: "zip")) + end + end + + context "when the service returns a path" do + + let(:path) { Rails.root.join("spec/fixtures/dk.png").to_s } + + before do + allow(service).to receive(:execute).and_return(path) + end + + it "sends the file" do + get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip" + + expect(response.body).to eq(File.binread(path)) + end + end + end +end diff --git a/spec/services/archive_repository_service_spec.rb b/spec/services/archive_repository_service_spec.rb new file mode 100644 index 00000000000..f168a913976 --- /dev/null +++ b/spec/services/archive_repository_service_spec.rb @@ -0,0 +1,93 @@ +require 'spec_helper' + +describe ArchiveRepositoryService do + let(:project) { create(:project) } + subject { ArchiveRepositoryService.new(project, "master", "zip") } + + describe "#execute" do + it "cleans old archives" do + expect(project.repository).to receive(:clean_old_archives) + + subject.execute(timeout: 0.0) + end + + context "when the repository doesn't have an archive file path" do + before do + allow(project.repository).to receive(:archive_file_path).and_return(nil) + end + + it "raises an error" do + expect { + subject.execute(timeout: 0.0) + }.to raise_error + end + end + + context "when the repository has an archive file path" do + let(:file_path) { "/archive.zip" } + let(:pid_file_path) { "/archive.zip.pid" } + + before do + allow(project.repository).to receive(:archive_file_path).and_return(file_path) + allow(project.repository).to receive(:archive_pid_file_path).and_return(pid_file_path) + end + + context "when the archive file already exists" do + before do + allow(File).to receive(:exist?).with(file_path).and_return(true) + end + + it "returns the file path" do + expect(subject.execute(timeout: 0.0)).to eq(file_path) + end + end + + context "when the archive file doesn't exist yet" do + before do + allow(File).to receive(:exist?).with(file_path).and_return(false) + allow(File).to receive(:exist?).with(pid_file_path).and_return(true) + end + + context "when the archive pid file doesn't exist yet" do + before do + allow(File).to receive(:exist?).with(pid_file_path).and_return(false) + end + + it "queues the RepositoryArchiveWorker" do + expect(RepositoryArchiveWorker).to receive(:perform_async) + + subject.execute(timeout: 0.0) + end + end + + context "when the archive pid file already exists" do + it "doesn't queue the RepositoryArchiveWorker" do + expect(RepositoryArchiveWorker).not_to receive(:perform_async) + + subject.execute(timeout: 0.0) + end + end + + context "when the archive file exists after a little while" do + before do + Thread.new do + sleep 0.1 + allow(File).to receive(:exist?).with(file_path).and_return(true) + end + end + + it "returns the file path" do + expect(subject.execute(timeout: 0.2)).to eq(file_path) + end + end + + context "when the archive file doesn't exist after the timeout" do + it "returns nil" do + expect(subject.execute(timeout: 0.0)).to eq(nil) + end + end + end + end + end +end + diff --git a/spec/workers/repository_archive_worker_spec.rb b/spec/workers/repository_archive_worker_spec.rb new file mode 100644 index 00000000000..c2362058cfd --- /dev/null +++ b/spec/workers/repository_archive_worker_spec.rb @@ -0,0 +1,80 @@ +require 'spec_helper' + +describe RepositoryArchiveWorker do + let(:project) { create(:project) } + subject { RepositoryArchiveWorker.new } + + before do + allow(Project).to receive(:find).and_return(project) + end + + describe "#perform" do + it "cleans old archives" do + expect(project.repository).to receive(:clean_old_archives) + + subject.perform(project.id, "master", "zip") + end + + context "when the repository doesn't have an archive file path" do + before do + allow(project.repository).to receive(:archive_file_path).and_return(nil) + end + + it "doesn't archive the repo" do + expect(project.repository).not_to receive(:archive_repo) + + subject.perform(project.id, "master", "zip") + end + end + + context "when the repository has an archive file path" do + let(:file_path) { "/archive.zip" } + let(:pid_file_path) { "/archive.zip.pid" } + + before do + allow(project.repository).to receive(:archive_file_path).and_return(file_path) + allow(project.repository).to receive(:archive_pid_file_path).and_return(pid_file_path) + end + + context "when the archive file already exists" do + before do + allow(File).to receive(:exist?).with(file_path).and_return(true) + end + + it "doesn't archive the repo" do + expect(project.repository).not_to receive(:archive_repo) + + subject.perform(project.id, "master", "zip") + end + end + + context "when the archive file doesn't exist yet" do + before do + allow(File).to receive(:exist?).with(file_path).and_return(false) + allow(File).to receive(:exist?).with(pid_file_path).and_return(true) + end + + context "when the archive pid file doesn't exist yet" do + before do + allow(File).to receive(:exist?).with(pid_file_path).and_return(false) + end + + it "archives the repo" do + expect(project.repository).to receive(:archive_repo) + + subject.perform(project.id, "master", "zip") + end + end + + context "when the archive pid file already exists" do + it "doesn't archive the repo" do + expect(project.repository).not_to receive(:archive_repo) + + subject.perform(project.id, "master", "zip") + end + end + end + end + end +end + -- cgit v1.2.1 From 85b29f99f0c0bc8cad71d323b744c740d8be7ce0 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Wed, 1 Apr 2015 14:36:17 +0200 Subject: Make sure Sidekiq picks up archive_repo queue in production. --- bin/background_jobs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/background_jobs b/bin/background_jobs index 59a51c5c868..a041a4b0433 100755 --- a/bin/background_jobs +++ b/bin/background_jobs @@ -37,7 +37,7 @@ start_no_deamonize() start_sidekiq() { - bundle exec sidekiq -q post_receive -q mailer -q system_hook -q project_web_hook -q gitlab_shell -q common -q default -e $RAILS_ENV -P $sidekiq_pidfile $@ >> $sidekiq_logfile 2>&1 + bundle exec sidekiq -q post_receive -q mailer -q archive_repo -q system_hook -q project_web_hook -q gitlab_shell -q common -q default -e $RAILS_ENV -P $sidekiq_pidfile $@ >> $sidekiq_logfile 2>&1 } load_ok() -- cgit v1.2.1