diff options
author | Dmitriy Zaporozhets <dmitriy.zaporozhets@gmail.com> | 2015-04-02 20:30:55 +0000 |
---|---|---|
committer | Dmitriy Zaporozhets <dmitriy.zaporozhets@gmail.com> | 2015-04-02 20:30:55 +0000 |
commit | 424cbf46d5d84e604a8404bb0af67a99fe21d337 (patch) | |
tree | 094de5dfc3d6088a336beb5b3309e2dd2df00986 | |
parent | f8f9750323b47d6d9d3c65936a4dfe87fe0c2a60 (diff) | |
parent | 85b29f99f0c0bc8cad71d323b744c740d8be7ce0 (diff) | |
download | gitlab-ce-424cbf46d5d84e604a8404bb0af67a99fe21d337.tar.gz |
Merge branch 'repository-archive-worker' into 'master'
Archive repositories in background worker.
Depends on https://gitlab.com/gitlab-org/gitlab_git/merge_requests/17 being merged, a new `gitlab_git` being released and this MR's `Gemfile.lock` being updated..
See private issue https://dev.gitlab.org/gitlab/gitlabhq/issues/2173.
To do after this is merged: Update https://gitlab.com/gitlab-org/omnibus-gitlab/blob/master/files/gitlab-cookbooks/gitlab/templates/default/sv-sidekiq-run.erb in omnibus.
See merge request !436
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | Gemfile | 1 | ||||
-rw-r--r-- | Gemfile.lock | 3 | ||||
-rw-r--r-- | Procfile | 2 | ||||
-rw-r--r-- | app/controllers/projects/repositories_controller.rb | 10 | ||||
-rw-r--r-- | app/models/repository.rb | 3 | ||||
-rw-r--r-- | app/services/archive_repository_service.rb | 64 | ||||
-rw-r--r-- | app/workers/repository_archive_worker.rb | 43 | ||||
-rwxr-xr-x | bin/background_jobs | 2 | ||||
-rw-r--r-- | config/initializers/4_sidekiq.rb | 2 | ||||
-rw-r--r-- | lib/api/repositories.rb | 11 | ||||
-rw-r--r-- | spec/controllers/projects/repositories_controller_spec.rb | 65 | ||||
-rw-r--r-- | spec/services/archive_repository_service_spec.rb | 93 | ||||
-rw-r--r-- | spec/workers/repository_archive_worker_spec.rb | 80 |
14 files changed, 360 insertions, 20 deletions
diff --git a/CHANGELOG b/CHANGELOG index 6e963fe965d..268e14d08b0 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -54,6 +54,7 @@ v 7.10.0 (unreleased) - Don't leak private group existence by redirecting from namespace controller to group controller. - Ability to skip some items from backup (database, respositories or uploads) - Fix "Hello @username." references not working by no longer allowing usernames to end in period. + - Archive repositories in background worker. v 7.9.2 @@ -122,6 +122,7 @@ gem 'slim' gem 'sinatra', require: nil gem 'sidekiq', '~> 3.3' gem 'sidetiq', '0.6.3' +gem 'sidekiq-limit_fetch' # HTTP requests gem "httparty" diff --git a/Gemfile.lock b/Gemfile.lock index ed8663b358b..c6ecdb4db0a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -547,6 +547,8 @@ GEM json redis (>= 3.0.6) redis-namespace (>= 1.3.1) + sidekiq-limit_fetch (2.4.1) + sidekiq (>= 2.6.5, < 4.0) sidetiq (0.6.3) celluloid (>= 0.14.1) ice_cube (= 0.11.1) @@ -769,6 +771,7 @@ DEPENDENCIES settingslogic shoulda-matchers (~> 2.7.0) sidekiq (~> 3.3) + sidekiq-limit_fetch sidetiq (= 0.6.3) simplecov sinatra @@ -1,2 +1,2 @@ web: bundle exec unicorn_rails -p ${PORT:="3000"} -E ${RAILS_ENV:="development"} -c ${UNICORN_CONFIG:="config/unicorn.rb"} -worker: bundle exec sidekiq -q post_receive -q mailer -q system_hook -q project_web_hook -q gitlab_shell -q common -q default +worker: bundle exec sidekiq -q post_receive -q mailer -q archive_repo -q system_hook -q project_web_hook -q gitlab_shell -q common -q default diff --git a/app/controllers/projects/repositories_controller.rb b/app/controllers/projects/repositories_controller.rb index cbb888b25e8..96defb0c721 100644 --- a/app/controllers/projects/repositories_controller.rb +++ b/app/controllers/projects/repositories_controller.rb @@ -11,18 +11,18 @@ class Projects::RepositoriesController < Projects::ApplicationController end def archive - unless can?(current_user, :download_code, @project) - render_404 and return + begin + file_path = ArchiveRepositoryService.new(@project, params[:ref], params[:format]).execute + rescue + return head :not_found end - file_path = ArchiveRepositoryService.new.execute(@project, params[:ref], params[:format]) - if file_path # Send file to user response.headers["Content-Length"] = File.open(file_path).size.to_s send_file file_path else - render_404 + redirect_to request.fullpath end end end diff --git a/app/models/repository.rb b/app/models/repository.rb index 77765cae1a0..72769498872 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -267,6 +267,9 @@ class Repository # Remove archives older than 2 hours def clean_old_archives repository_downloads_path = Gitlab.config.gitlab.repository_downloads_path + + return unless File.directory?(repository_downloads_path) + Gitlab::Popen.popen(%W(find #{repository_downloads_path} -not -path #{repository_downloads_path} -mmin +120 -delete)) end diff --git a/app/services/archive_repository_service.rb b/app/services/archive_repository_service.rb index 8823f6fdc67..e1b41527d8d 100644 --- a/app/services/archive_repository_service.rb +++ b/app/services/archive_repository_service.rb @@ -1,14 +1,62 @@ class ArchiveRepositoryService - def execute(project, ref, format) - storage_path = Gitlab.config.gitlab.repository_downloads_path + attr_reader :project, :ref, :format - unless File.directory?(storage_path) - FileUtils.mkdir_p(storage_path) + def initialize(project, ref, format) + format ||= 'tar.gz' + @project, @ref, @format = project, ref, format.downcase + end + + def execute(options = {}) + project.repository.clean_old_archives + + raise "No archive file path" unless file_path + + return file_path if archived? + + unless archiving? + RepositoryArchiveWorker.perform_async(project.id, ref, format) end - format ||= 'tar.gz' - repository = project.repository - repository.clean_old_archives - repository.archive_repo(ref, storage_path, format.downcase) + archived = wait_until_archived(options[:timeout] || 5.0) + + file_path if archived + end + + private + + def storage_path + Gitlab.config.gitlab.repository_downloads_path + end + + def file_path + @file_path ||= project.repository.archive_file_path(ref, storage_path, format) + end + + def pid_file_path + @pid_file_path ||= project.repository.archive_pid_file_path(ref, storage_path, format) + end + + def archived? + File.exist?(file_path) + end + + def archiving? + File.exist?(pid_file_path) + end + + def wait_until_archived(timeout = 5.0) + return archived? if timeout == 0.0 + + t1 = Time.now + + begin + sleep 0.1 + + success = archived? + + t2 = Time.now + end until success || t2 - t1 >= timeout + + success end end diff --git a/app/workers/repository_archive_worker.rb b/app/workers/repository_archive_worker.rb new file mode 100644 index 00000000000..021c1139568 --- /dev/null +++ b/app/workers/repository_archive_worker.rb @@ -0,0 +1,43 @@ +class RepositoryArchiveWorker + include Sidekiq::Worker + + sidekiq_options queue: :archive_repo + + attr_accessor :project, :ref, :format + + def perform(project_id, ref, format) + @project = Project.find(project_id) + @ref, @format = ref, format.downcase + + repository = project.repository + + repository.clean_old_archives + + return unless file_path + return if archived? || archiving? + + repository.archive_repo(ref, storage_path, format) + end + + private + + def storage_path + Gitlab.config.gitlab.repository_downloads_path + end + + def file_path + @file_path ||= project.repository.archive_file_path(ref, storage_path, format) + end + + def pid_file_path + @pid_file_path ||= project.repository.archive_pid_file_path(ref, storage_path, format) + end + + def archived? + File.exist?(file_path) + end + + def archiving? + File.exist?(pid_file_path) + end +end diff --git a/bin/background_jobs b/bin/background_jobs index 59a51c5c868..a041a4b0433 100755 --- a/bin/background_jobs +++ b/bin/background_jobs @@ -37,7 +37,7 @@ start_no_deamonize() start_sidekiq() { - bundle exec sidekiq -q post_receive -q mailer -q system_hook -q project_web_hook -q gitlab_shell -q common -q default -e $RAILS_ENV -P $sidekiq_pidfile $@ >> $sidekiq_logfile 2>&1 + bundle exec sidekiq -q post_receive -q mailer -q archive_repo -q system_hook -q project_web_hook -q gitlab_shell -q common -q default -e $RAILS_ENV -P $sidekiq_pidfile $@ >> $sidekiq_logfile 2>&1 } load_ok() diff --git a/config/initializers/4_sidekiq.rb b/config/initializers/4_sidekiq.rb index e856499732e..da2e8a08b07 100644 --- a/config/initializers/4_sidekiq.rb +++ b/config/initializers/4_sidekiq.rb @@ -25,3 +25,5 @@ Sidekiq.configure_client do |config| namespace: 'resque:gitlab' } end + +Sidekiq::Queue["archive_repo"].limit = 2 diff --git a/lib/api/repositories.rb b/lib/api/repositories.rb index b259914a01c..1fbf3dca3c6 100644 --- a/lib/api/repositories.rb +++ b/lib/api/repositories.rb @@ -133,10 +133,11 @@ module API authorize! :download_code, user_project begin - file_path = ArchiveRepositoryService.new.execute( - user_project, - params[:sha], - params[:format]) + file_path = ArchiveRepositoryService.new( + user_project, + params[:sha], + params[:format] + ).execute rescue not_found!('File') end @@ -149,7 +150,7 @@ module API env['api.format'] = :binary present data else - not_found!('File') + redirect request.fullpath end end diff --git a/spec/controllers/projects/repositories_controller_spec.rb b/spec/controllers/projects/repositories_controller_spec.rb new file mode 100644 index 00000000000..91856ed0cc0 --- /dev/null +++ b/spec/controllers/projects/repositories_controller_spec.rb @@ -0,0 +1,65 @@ +require "spec_helper" + +describe Projects::RepositoriesController do + let(:project) { create(:project) } + let(:user) { create(:user) } + + describe "GET archive" do + before do + sign_in(user) + project.team << [user, :developer] + + allow(ArchiveRepositoryService).to receive(:new).and_return(service) + end + + let(:service) { ArchiveRepositoryService.new(project, "master", "zip") } + + it "executes ArchiveRepositoryService" do + expect(ArchiveRepositoryService).to receive(:new).with(project, "master", "zip") + expect(service).to receive(:execute) + + get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip" + end + + context "when the service raises an error" do + + before do + allow(service).to receive(:execute).and_raise("Archive failed") + end + + it "renders Not Found" do + get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip" + + expect(response.status).to eq(404) + end + end + + context "when the service doesn't return a path" do + + before do + allow(service).to receive(:execute).and_return(nil) + end + + it "reloads the page" do + get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip" + + expect(response).to redirect_to(archive_namespace_project_repository_path(project.namespace, project, ref: "master", format: "zip")) + end + end + + context "when the service returns a path" do + + let(:path) { Rails.root.join("spec/fixtures/dk.png").to_s } + + before do + allow(service).to receive(:execute).and_return(path) + end + + it "sends the file" do + get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip" + + expect(response.body).to eq(File.binread(path)) + end + end + end +end diff --git a/spec/services/archive_repository_service_spec.rb b/spec/services/archive_repository_service_spec.rb new file mode 100644 index 00000000000..f168a913976 --- /dev/null +++ b/spec/services/archive_repository_service_spec.rb @@ -0,0 +1,93 @@ +require 'spec_helper' + +describe ArchiveRepositoryService do + let(:project) { create(:project) } + subject { ArchiveRepositoryService.new(project, "master", "zip") } + + describe "#execute" do + it "cleans old archives" do + expect(project.repository).to receive(:clean_old_archives) + + subject.execute(timeout: 0.0) + end + + context "when the repository doesn't have an archive file path" do + before do + allow(project.repository).to receive(:archive_file_path).and_return(nil) + end + + it "raises an error" do + expect { + subject.execute(timeout: 0.0) + }.to raise_error + end + end + + context "when the repository has an archive file path" do + let(:file_path) { "/archive.zip" } + let(:pid_file_path) { "/archive.zip.pid" } + + before do + allow(project.repository).to receive(:archive_file_path).and_return(file_path) + allow(project.repository).to receive(:archive_pid_file_path).and_return(pid_file_path) + end + + context "when the archive file already exists" do + before do + allow(File).to receive(:exist?).with(file_path).and_return(true) + end + + it "returns the file path" do + expect(subject.execute(timeout: 0.0)).to eq(file_path) + end + end + + context "when the archive file doesn't exist yet" do + before do + allow(File).to receive(:exist?).with(file_path).and_return(false) + allow(File).to receive(:exist?).with(pid_file_path).and_return(true) + end + + context "when the archive pid file doesn't exist yet" do + before do + allow(File).to receive(:exist?).with(pid_file_path).and_return(false) + end + + it "queues the RepositoryArchiveWorker" do + expect(RepositoryArchiveWorker).to receive(:perform_async) + + subject.execute(timeout: 0.0) + end + end + + context "when the archive pid file already exists" do + it "doesn't queue the RepositoryArchiveWorker" do + expect(RepositoryArchiveWorker).not_to receive(:perform_async) + + subject.execute(timeout: 0.0) + end + end + + context "when the archive file exists after a little while" do + before do + Thread.new do + sleep 0.1 + allow(File).to receive(:exist?).with(file_path).and_return(true) + end + end + + it "returns the file path" do + expect(subject.execute(timeout: 0.2)).to eq(file_path) + end + end + + context "when the archive file doesn't exist after the timeout" do + it "returns nil" do + expect(subject.execute(timeout: 0.0)).to eq(nil) + end + end + end + end + end +end + diff --git a/spec/workers/repository_archive_worker_spec.rb b/spec/workers/repository_archive_worker_spec.rb new file mode 100644 index 00000000000..c2362058cfd --- /dev/null +++ b/spec/workers/repository_archive_worker_spec.rb @@ -0,0 +1,80 @@ +require 'spec_helper' + +describe RepositoryArchiveWorker do + let(:project) { create(:project) } + subject { RepositoryArchiveWorker.new } + + before do + allow(Project).to receive(:find).and_return(project) + end + + describe "#perform" do + it "cleans old archives" do + expect(project.repository).to receive(:clean_old_archives) + + subject.perform(project.id, "master", "zip") + end + + context "when the repository doesn't have an archive file path" do + before do + allow(project.repository).to receive(:archive_file_path).and_return(nil) + end + + it "doesn't archive the repo" do + expect(project.repository).not_to receive(:archive_repo) + + subject.perform(project.id, "master", "zip") + end + end + + context "when the repository has an archive file path" do + let(:file_path) { "/archive.zip" } + let(:pid_file_path) { "/archive.zip.pid" } + + before do + allow(project.repository).to receive(:archive_file_path).and_return(file_path) + allow(project.repository).to receive(:archive_pid_file_path).and_return(pid_file_path) + end + + context "when the archive file already exists" do + before do + allow(File).to receive(:exist?).with(file_path).and_return(true) + end + + it "doesn't archive the repo" do + expect(project.repository).not_to receive(:archive_repo) + + subject.perform(project.id, "master", "zip") + end + end + + context "when the archive file doesn't exist yet" do + before do + allow(File).to receive(:exist?).with(file_path).and_return(false) + allow(File).to receive(:exist?).with(pid_file_path).and_return(true) + end + + context "when the archive pid file doesn't exist yet" do + before do + allow(File).to receive(:exist?).with(pid_file_path).and_return(false) + end + + it "archives the repo" do + expect(project.repository).to receive(:archive_repo) + + subject.perform(project.id, "master", "zip") + end + end + + context "when the archive pid file already exists" do + it "doesn't archive the repo" do + expect(project.repository).not_to receive(:archive_repo) + + subject.perform(project.id, "master", "zip") + end + end + end + end + end +end + |