summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitriy Zaporozhets <dmitriy.zaporozhets@gmail.com>2015-04-02 20:30:55 +0000
committerDmitriy Zaporozhets <dmitriy.zaporozhets@gmail.com>2015-04-02 20:30:55 +0000
commit424cbf46d5d84e604a8404bb0af67a99fe21d337 (patch)
tree094de5dfc3d6088a336beb5b3309e2dd2df00986
parentf8f9750323b47d6d9d3c65936a4dfe87fe0c2a60 (diff)
parent85b29f99f0c0bc8cad71d323b744c740d8be7ce0 (diff)
downloadgitlab-ce-424cbf46d5d84e604a8404bb0af67a99fe21d337.tar.gz
Merge branch 'repository-archive-worker' into 'master'
Archive repositories in background worker. Depends on https://gitlab.com/gitlab-org/gitlab_git/merge_requests/17 being merged, a new `gitlab_git` being released and this MR's `Gemfile.lock` being updated.. See private issue https://dev.gitlab.org/gitlab/gitlabhq/issues/2173. To do after this is merged: Update https://gitlab.com/gitlab-org/omnibus-gitlab/blob/master/files/gitlab-cookbooks/gitlab/templates/default/sv-sidekiq-run.erb in omnibus. See merge request !436
-rw-r--r--CHANGELOG1
-rw-r--r--Gemfile1
-rw-r--r--Gemfile.lock3
-rw-r--r--Procfile2
-rw-r--r--app/controllers/projects/repositories_controller.rb10
-rw-r--r--app/models/repository.rb3
-rw-r--r--app/services/archive_repository_service.rb64
-rw-r--r--app/workers/repository_archive_worker.rb43
-rwxr-xr-xbin/background_jobs2
-rw-r--r--config/initializers/4_sidekiq.rb2
-rw-r--r--lib/api/repositories.rb11
-rw-r--r--spec/controllers/projects/repositories_controller_spec.rb65
-rw-r--r--spec/services/archive_repository_service_spec.rb93
-rw-r--r--spec/workers/repository_archive_worker_spec.rb80
14 files changed, 360 insertions, 20 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 6e963fe965d..268e14d08b0 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -54,6 +54,7 @@ v 7.10.0 (unreleased)
- Don't leak private group existence by redirecting from namespace controller to group controller.
- Ability to skip some items from backup (database, respositories or uploads)
- Fix "Hello @username." references not working by no longer allowing usernames to end in period.
+ - Archive repositories in background worker.
v 7.9.2
diff --git a/Gemfile b/Gemfile
index e767aec5053..0dd285d0fdd 100644
--- a/Gemfile
+++ b/Gemfile
@@ -122,6 +122,7 @@ gem 'slim'
gem 'sinatra', require: nil
gem 'sidekiq', '~> 3.3'
gem 'sidetiq', '0.6.3'
+gem 'sidekiq-limit_fetch'
# HTTP requests
gem "httparty"
diff --git a/Gemfile.lock b/Gemfile.lock
index ed8663b358b..c6ecdb4db0a 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -547,6 +547,8 @@ GEM
json
redis (>= 3.0.6)
redis-namespace (>= 1.3.1)
+ sidekiq-limit_fetch (2.4.1)
+ sidekiq (>= 2.6.5, < 4.0)
sidetiq (0.6.3)
celluloid (>= 0.14.1)
ice_cube (= 0.11.1)
@@ -769,6 +771,7 @@ DEPENDENCIES
settingslogic
shoulda-matchers (~> 2.7.0)
sidekiq (~> 3.3)
+ sidekiq-limit_fetch
sidetiq (= 0.6.3)
simplecov
sinatra
diff --git a/Procfile b/Procfile
index a0ab4a734a4..799b92729fa 100644
--- a/Procfile
+++ b/Procfile
@@ -1,2 +1,2 @@
web: bundle exec unicorn_rails -p ${PORT:="3000"} -E ${RAILS_ENV:="development"} -c ${UNICORN_CONFIG:="config/unicorn.rb"}
-worker: bundle exec sidekiq -q post_receive -q mailer -q system_hook -q project_web_hook -q gitlab_shell -q common -q default
+worker: bundle exec sidekiq -q post_receive -q mailer -q archive_repo -q system_hook -q project_web_hook -q gitlab_shell -q common -q default
diff --git a/app/controllers/projects/repositories_controller.rb b/app/controllers/projects/repositories_controller.rb
index cbb888b25e8..96defb0c721 100644
--- a/app/controllers/projects/repositories_controller.rb
+++ b/app/controllers/projects/repositories_controller.rb
@@ -11,18 +11,18 @@ class Projects::RepositoriesController < Projects::ApplicationController
end
def archive
- unless can?(current_user, :download_code, @project)
- render_404 and return
+ begin
+ file_path = ArchiveRepositoryService.new(@project, params[:ref], params[:format]).execute
+ rescue
+ return head :not_found
end
- file_path = ArchiveRepositoryService.new.execute(@project, params[:ref], params[:format])
-
if file_path
# Send file to user
response.headers["Content-Length"] = File.open(file_path).size.to_s
send_file file_path
else
- render_404
+ redirect_to request.fullpath
end
end
end
diff --git a/app/models/repository.rb b/app/models/repository.rb
index 77765cae1a0..72769498872 100644
--- a/app/models/repository.rb
+++ b/app/models/repository.rb
@@ -267,6 +267,9 @@ class Repository
# Remove archives older than 2 hours
def clean_old_archives
repository_downloads_path = Gitlab.config.gitlab.repository_downloads_path
+
+ return unless File.directory?(repository_downloads_path)
+
Gitlab::Popen.popen(%W(find #{repository_downloads_path} -not -path #{repository_downloads_path} -mmin +120 -delete))
end
diff --git a/app/services/archive_repository_service.rb b/app/services/archive_repository_service.rb
index 8823f6fdc67..e1b41527d8d 100644
--- a/app/services/archive_repository_service.rb
+++ b/app/services/archive_repository_service.rb
@@ -1,14 +1,62 @@
class ArchiveRepositoryService
- def execute(project, ref, format)
- storage_path = Gitlab.config.gitlab.repository_downloads_path
+ attr_reader :project, :ref, :format
- unless File.directory?(storage_path)
- FileUtils.mkdir_p(storage_path)
+ def initialize(project, ref, format)
+ format ||= 'tar.gz'
+ @project, @ref, @format = project, ref, format.downcase
+ end
+
+ def execute(options = {})
+ project.repository.clean_old_archives
+
+ raise "No archive file path" unless file_path
+
+ return file_path if archived?
+
+ unless archiving?
+ RepositoryArchiveWorker.perform_async(project.id, ref, format)
end
- format ||= 'tar.gz'
- repository = project.repository
- repository.clean_old_archives
- repository.archive_repo(ref, storage_path, format.downcase)
+ archived = wait_until_archived(options[:timeout] || 5.0)
+
+ file_path if archived
+ end
+
+ private
+
+ def storage_path
+ Gitlab.config.gitlab.repository_downloads_path
+ end
+
+ def file_path
+ @file_path ||= project.repository.archive_file_path(ref, storage_path, format)
+ end
+
+ def pid_file_path
+ @pid_file_path ||= project.repository.archive_pid_file_path(ref, storage_path, format)
+ end
+
+ def archived?
+ File.exist?(file_path)
+ end
+
+ def archiving?
+ File.exist?(pid_file_path)
+ end
+
+ def wait_until_archived(timeout = 5.0)
+ return archived? if timeout == 0.0
+
+ t1 = Time.now
+
+ begin
+ sleep 0.1
+
+ success = archived?
+
+ t2 = Time.now
+ end until success || t2 - t1 >= timeout
+
+ success
end
end
diff --git a/app/workers/repository_archive_worker.rb b/app/workers/repository_archive_worker.rb
new file mode 100644
index 00000000000..021c1139568
--- /dev/null
+++ b/app/workers/repository_archive_worker.rb
@@ -0,0 +1,43 @@
+class RepositoryArchiveWorker
+ include Sidekiq::Worker
+
+ sidekiq_options queue: :archive_repo
+
+ attr_accessor :project, :ref, :format
+
+ def perform(project_id, ref, format)
+ @project = Project.find(project_id)
+ @ref, @format = ref, format.downcase
+
+ repository = project.repository
+
+ repository.clean_old_archives
+
+ return unless file_path
+ return if archived? || archiving?
+
+ repository.archive_repo(ref, storage_path, format)
+ end
+
+ private
+
+ def storage_path
+ Gitlab.config.gitlab.repository_downloads_path
+ end
+
+ def file_path
+ @file_path ||= project.repository.archive_file_path(ref, storage_path, format)
+ end
+
+ def pid_file_path
+ @pid_file_path ||= project.repository.archive_pid_file_path(ref, storage_path, format)
+ end
+
+ def archived?
+ File.exist?(file_path)
+ end
+
+ def archiving?
+ File.exist?(pid_file_path)
+ end
+end
diff --git a/bin/background_jobs b/bin/background_jobs
index 59a51c5c868..a041a4b0433 100755
--- a/bin/background_jobs
+++ b/bin/background_jobs
@@ -37,7 +37,7 @@ start_no_deamonize()
start_sidekiq()
{
- bundle exec sidekiq -q post_receive -q mailer -q system_hook -q project_web_hook -q gitlab_shell -q common -q default -e $RAILS_ENV -P $sidekiq_pidfile $@ >> $sidekiq_logfile 2>&1
+ bundle exec sidekiq -q post_receive -q mailer -q archive_repo -q system_hook -q project_web_hook -q gitlab_shell -q common -q default -e $RAILS_ENV -P $sidekiq_pidfile $@ >> $sidekiq_logfile 2>&1
}
load_ok()
diff --git a/config/initializers/4_sidekiq.rb b/config/initializers/4_sidekiq.rb
index e856499732e..da2e8a08b07 100644
--- a/config/initializers/4_sidekiq.rb
+++ b/config/initializers/4_sidekiq.rb
@@ -25,3 +25,5 @@ Sidekiq.configure_client do |config|
namespace: 'resque:gitlab'
}
end
+
+Sidekiq::Queue["archive_repo"].limit = 2
diff --git a/lib/api/repositories.rb b/lib/api/repositories.rb
index b259914a01c..1fbf3dca3c6 100644
--- a/lib/api/repositories.rb
+++ b/lib/api/repositories.rb
@@ -133,10 +133,11 @@ module API
authorize! :download_code, user_project
begin
- file_path = ArchiveRepositoryService.new.execute(
- user_project,
- params[:sha],
- params[:format])
+ file_path = ArchiveRepositoryService.new(
+ user_project,
+ params[:sha],
+ params[:format]
+ ).execute
rescue
not_found!('File')
end
@@ -149,7 +150,7 @@ module API
env['api.format'] = :binary
present data
else
- not_found!('File')
+ redirect request.fullpath
end
end
diff --git a/spec/controllers/projects/repositories_controller_spec.rb b/spec/controllers/projects/repositories_controller_spec.rb
new file mode 100644
index 00000000000..91856ed0cc0
--- /dev/null
+++ b/spec/controllers/projects/repositories_controller_spec.rb
@@ -0,0 +1,65 @@
+require "spec_helper"
+
+describe Projects::RepositoriesController do
+ let(:project) { create(:project) }
+ let(:user) { create(:user) }
+
+ describe "GET archive" do
+ before do
+ sign_in(user)
+ project.team << [user, :developer]
+
+ allow(ArchiveRepositoryService).to receive(:new).and_return(service)
+ end
+
+ let(:service) { ArchiveRepositoryService.new(project, "master", "zip") }
+
+ it "executes ArchiveRepositoryService" do
+ expect(ArchiveRepositoryService).to receive(:new).with(project, "master", "zip")
+ expect(service).to receive(:execute)
+
+ get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip"
+ end
+
+ context "when the service raises an error" do
+
+ before do
+ allow(service).to receive(:execute).and_raise("Archive failed")
+ end
+
+ it "renders Not Found" do
+ get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip"
+
+ expect(response.status).to eq(404)
+ end
+ end
+
+ context "when the service doesn't return a path" do
+
+ before do
+ allow(service).to receive(:execute).and_return(nil)
+ end
+
+ it "reloads the page" do
+ get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip"
+
+ expect(response).to redirect_to(archive_namespace_project_repository_path(project.namespace, project, ref: "master", format: "zip"))
+ end
+ end
+
+ context "when the service returns a path" do
+
+ let(:path) { Rails.root.join("spec/fixtures/dk.png").to_s }
+
+ before do
+ allow(service).to receive(:execute).and_return(path)
+ end
+
+ it "sends the file" do
+ get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip"
+
+ expect(response.body).to eq(File.binread(path))
+ end
+ end
+ end
+end
diff --git a/spec/services/archive_repository_service_spec.rb b/spec/services/archive_repository_service_spec.rb
new file mode 100644
index 00000000000..f168a913976
--- /dev/null
+++ b/spec/services/archive_repository_service_spec.rb
@@ -0,0 +1,93 @@
+require 'spec_helper'
+
+describe ArchiveRepositoryService do
+ let(:project) { create(:project) }
+ subject { ArchiveRepositoryService.new(project, "master", "zip") }
+
+ describe "#execute" do
+ it "cleans old archives" do
+ expect(project.repository).to receive(:clean_old_archives)
+
+ subject.execute(timeout: 0.0)
+ end
+
+ context "when the repository doesn't have an archive file path" do
+ before do
+ allow(project.repository).to receive(:archive_file_path).and_return(nil)
+ end
+
+ it "raises an error" do
+ expect {
+ subject.execute(timeout: 0.0)
+ }.to raise_error
+ end
+ end
+
+ context "when the repository has an archive file path" do
+ let(:file_path) { "/archive.zip" }
+ let(:pid_file_path) { "/archive.zip.pid" }
+
+ before do
+ allow(project.repository).to receive(:archive_file_path).and_return(file_path)
+ allow(project.repository).to receive(:archive_pid_file_path).and_return(pid_file_path)
+ end
+
+ context "when the archive file already exists" do
+ before do
+ allow(File).to receive(:exist?).with(file_path).and_return(true)
+ end
+
+ it "returns the file path" do
+ expect(subject.execute(timeout: 0.0)).to eq(file_path)
+ end
+ end
+
+ context "when the archive file doesn't exist yet" do
+ before do
+ allow(File).to receive(:exist?).with(file_path).and_return(false)
+ allow(File).to receive(:exist?).with(pid_file_path).and_return(true)
+ end
+
+ context "when the archive pid file doesn't exist yet" do
+ before do
+ allow(File).to receive(:exist?).with(pid_file_path).and_return(false)
+ end
+
+ it "queues the RepositoryArchiveWorker" do
+ expect(RepositoryArchiveWorker).to receive(:perform_async)
+
+ subject.execute(timeout: 0.0)
+ end
+ end
+
+ context "when the archive pid file already exists" do
+ it "doesn't queue the RepositoryArchiveWorker" do
+ expect(RepositoryArchiveWorker).not_to receive(:perform_async)
+
+ subject.execute(timeout: 0.0)
+ end
+ end
+
+ context "when the archive file exists after a little while" do
+ before do
+ Thread.new do
+ sleep 0.1
+ allow(File).to receive(:exist?).with(file_path).and_return(true)
+ end
+ end
+
+ it "returns the file path" do
+ expect(subject.execute(timeout: 0.2)).to eq(file_path)
+ end
+ end
+
+ context "when the archive file doesn't exist after the timeout" do
+ it "returns nil" do
+ expect(subject.execute(timeout: 0.0)).to eq(nil)
+ end
+ end
+ end
+ end
+ end
+end
+
diff --git a/spec/workers/repository_archive_worker_spec.rb b/spec/workers/repository_archive_worker_spec.rb
new file mode 100644
index 00000000000..c2362058cfd
--- /dev/null
+++ b/spec/workers/repository_archive_worker_spec.rb
@@ -0,0 +1,80 @@
+require 'spec_helper'
+
+describe RepositoryArchiveWorker do
+ let(:project) { create(:project) }
+ subject { RepositoryArchiveWorker.new }
+
+ before do
+ allow(Project).to receive(:find).and_return(project)
+ end
+
+ describe "#perform" do
+ it "cleans old archives" do
+ expect(project.repository).to receive(:clean_old_archives)
+
+ subject.perform(project.id, "master", "zip")
+ end
+
+ context "when the repository doesn't have an archive file path" do
+ before do
+ allow(project.repository).to receive(:archive_file_path).and_return(nil)
+ end
+
+ it "doesn't archive the repo" do
+ expect(project.repository).not_to receive(:archive_repo)
+
+ subject.perform(project.id, "master", "zip")
+ end
+ end
+
+ context "when the repository has an archive file path" do
+ let(:file_path) { "/archive.zip" }
+ let(:pid_file_path) { "/archive.zip.pid" }
+
+ before do
+ allow(project.repository).to receive(:archive_file_path).and_return(file_path)
+ allow(project.repository).to receive(:archive_pid_file_path).and_return(pid_file_path)
+ end
+
+ context "when the archive file already exists" do
+ before do
+ allow(File).to receive(:exist?).with(file_path).and_return(true)
+ end
+
+ it "doesn't archive the repo" do
+ expect(project.repository).not_to receive(:archive_repo)
+
+ subject.perform(project.id, "master", "zip")
+ end
+ end
+
+ context "when the archive file doesn't exist yet" do
+ before do
+ allow(File).to receive(:exist?).with(file_path).and_return(false)
+ allow(File).to receive(:exist?).with(pid_file_path).and_return(true)
+ end
+
+ context "when the archive pid file doesn't exist yet" do
+ before do
+ allow(File).to receive(:exist?).with(pid_file_path).and_return(false)
+ end
+
+ it "archives the repo" do
+ expect(project.repository).to receive(:archive_repo)
+
+ subject.perform(project.id, "master", "zip")
+ end
+ end
+
+ context "when the archive pid file already exists" do
+ it "doesn't archive the repo" do
+ expect(project.repository).not_to receive(:archive_repo)
+
+ subject.perform(project.id, "master", "zip")
+ end
+ end
+ end
+ end
+ end
+end
+