summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Vosmaer <contact@jacobvosmaer.nl>2016-04-13 15:56:05 +0200
committerJacob Vosmaer <contact@jacobvosmaer.nl>2016-04-13 15:56:05 +0200
commit0f602be99f99f1ae493478a8a28df2907cfa0082 (patch)
treea49a9a7ac70ca6825547db88a32f8b4c00343aa3
parent9a30d3b5aef732e782e9496b2e8ae62069ba521a (diff)
downloadgitlab-ce-0f602be99f99f1ae493478a8a28df2907cfa0082.tar.gz
Clear repository check columns asynchronously
-rw-r--r--app/controllers/admin/application_settings_controller.rb8
-rw-r--r--app/controllers/admin/projects_controller.rb2
-rw-r--r--app/workers/repository_check/batch_worker.rb63
-rw-r--r--app/workers/repository_check/clear_worker.rb17
-rw-r--r--app/workers/repository_check/single_repository_worker.rb36
-rw-r--r--app/workers/repository_check_worker.rb61
-rw-r--r--app/workers/single_repository_check_worker.rb34
-rw-r--r--config/initializers/1_settings.rb2
-rw-r--r--spec/features/admin/admin_uses_repository_checks_spec.rb27
-rw-r--r--spec/workers/repository_check/batch_worker_spec.rb (renamed from spec/workers/repository_check_worker_spec.rb)4
-rw-r--r--spec/workers/repository_check/clear_worker_spec.rb17
11 files changed, 150 insertions, 121 deletions
diff --git a/app/controllers/admin/application_settings_controller.rb b/app/controllers/admin/application_settings_controller.rb
index a54864480a2..b4a28b8dd3f 100644
--- a/app/controllers/admin/application_settings_controller.rb
+++ b/app/controllers/admin/application_settings_controller.rb
@@ -20,18 +20,14 @@ class Admin::ApplicationSettingsController < Admin::ApplicationController
end
def clear_repository_check_states
- Project.update_all(
- last_repository_check_failed: false,
- last_repository_check_at: nil
- )
+ RepositoryCheck::ClearWorker.perform_async
redirect_to(
admin_application_settings_path,
- notice: 'All repository check states were cleared.'
+ notice: 'Started asynchronous removal of all repository check states.'
)
end
-
private
def set_application_setting
diff --git a/app/controllers/admin/projects_controller.rb b/app/controllers/admin/projects_controller.rb
index 6854e57b650..87986fdf8b1 100644
--- a/app/controllers/admin/projects_controller.rb
+++ b/app/controllers/admin/projects_controller.rb
@@ -32,7 +32,7 @@ class Admin::ProjectsController < Admin::ApplicationController
end
def repository_check
- SingleRepositoryCheckWorker.perform_async(@project.id)
+ RepositoryCheck::SingleRepositoryWorker.perform_async(@project.id)
redirect_to(
admin_namespace_project_path(@project.namespace, @project),
diff --git a/app/workers/repository_check/batch_worker.rb b/app/workers/repository_check/batch_worker.rb
new file mode 100644
index 00000000000..16cd77a9bf0
--- /dev/null
+++ b/app/workers/repository_check/batch_worker.rb
@@ -0,0 +1,63 @@
+module RepositoryCheck
+ class BatchWorker
+ include Sidekiq::Worker
+
+ RUN_TIME = 3600
+
+ sidekiq_options retry: false
+
+ def perform
+ start = Time.now
+
+ # This loop will break after a little more than one hour ('a little
+ # more' because `git fsck` may take a few minutes), or if it runs out of
+ # projects to check. By default sidekiq-cron will start a new
+ # RepositoryCheckWorker each hour so that as long as there are repositories to
+ # check, only one (or two) will be checked at a time.
+ project_ids.each do |project_id|
+ break if Time.now - start >= RUN_TIME
+ break unless current_settings.repository_checks_enabled
+
+ next unless try_obtain_lease(project_id)
+
+ SingleRepositoryWorker.new.perform(project_id)
+ end
+ end
+
+ private
+
+ # Project.find_each does not support WHERE clauses and
+ # Project.find_in_batches does not support ordering. So we just build an
+ # array of ID's. This is OK because we do it only once an hour, because
+ # getting ID's from Postgres is not terribly slow, and because no user
+ # has to sit and wait for this query to finish.
+ def project_ids
+ limit = 10_000
+ never_checked_projects = Project.where('last_repository_check_at IS NULL').limit(limit).
+ pluck(:id)
+ old_check_projects = Project.where('last_repository_check_at < ?', 1.week.ago).
+ reorder('last_repository_check_at ASC').limit(limit).pluck(:id)
+ never_checked_projects + old_check_projects
+ end
+
+ def try_obtain_lease(id)
+ # Use a 24-hour timeout because on servers/projects where 'git fsck' is
+ # super slow we definitely do not want to run it twice in parallel.
+ Gitlab::ExclusiveLease.new(
+ "project_repository_check:#{id}",
+ timeout: 24.hours
+ ).try_obtain
+ end
+
+ def current_settings
+ # No caching of the settings! If we cache them and an admin disables
+ # this feature, an active RepositoryCheckWorker would keep going for up
+ # to 1 hour after the feature was disabled.
+ if Rails.env.test?
+ Gitlab::CurrentSettings.fake_application_settings
+ else
+ ApplicationSetting.current
+ end
+ end
+ end
+end
diff --git a/app/workers/repository_check/clear_worker.rb b/app/workers/repository_check/clear_worker.rb
new file mode 100644
index 00000000000..fe0cce9aab7
--- /dev/null
+++ b/app/workers/repository_check/clear_worker.rb
@@ -0,0 +1,17 @@
+module RepositoryCheck
+ class ClearWorker
+ include Sidekiq::Worker
+
+ sidekiq_options retry: false
+
+ def perform
+ # Do batched updates because these updates will be slow and locking
+ Project.select(:id).find_in_batches(batch_size: 1000) do |batch|
+ Project.where(id: batch.map(&:id)).update_all(
+ last_repository_check_failed: nil,
+ last_repository_check_at: nil,
+ )
+ end
+ end
+ end
+end \ No newline at end of file
diff --git a/app/workers/repository_check/single_repository_worker.rb b/app/workers/repository_check/single_repository_worker.rb
new file mode 100644
index 00000000000..e54ae86d06c
--- /dev/null
+++ b/app/workers/repository_check/single_repository_worker.rb
@@ -0,0 +1,36 @@
+module RepositoryCheck
+ class SingleRepositoryWorker
+ include Sidekiq::Worker
+
+ sidekiq_options retry: false
+
+ def perform(project_id)
+ project = Project.find(project_id)
+ project.update_columns(
+ last_repository_check_failed: !check(project),
+ last_repository_check_at: Time.now,
+ )
+ end
+
+ private
+
+ def check(project)
+ # Use 'map do', not 'all? do', to prevent short-circuiting
+ [project.repository, project.wiki.repository].map do |repository|
+ git_fsck(repository.path_to_repo)
+ end.all?
+ end
+
+ def git_fsck(path)
+ cmd = %W(nice git --git-dir=#{path} fsck)
+ output, status = Gitlab::Popen.popen(cmd)
+
+ if status.zero?
+ true
+ else
+ Gitlab::RepositoryCheckLogger.error("command failed: #{cmd.join(' ')}\n#{output}")
+ false
+ end
+ end
+ end
+end
diff --git a/app/workers/repository_check_worker.rb b/app/workers/repository_check_worker.rb
deleted file mode 100644
index d7ead91f94e..00000000000
--- a/app/workers/repository_check_worker.rb
+++ /dev/null
@@ -1,61 +0,0 @@
-class RepositoryCheckWorker
- include Sidekiq::Worker
-
- RUN_TIME = 3600
-
- sidekiq_options retry: false
-
- def perform
- start = Time.now
-
- # This loop will break after a little more than one hour ('a little
- # more' because `git fsck` may take a few minutes), or if it runs out of
- # projects to check. By default sidekiq-cron will start a new
- # RepositoryCheckWorker each hour so that as long as there are repositories to
- # check, only one (or two) will be checked at a time.
- project_ids.each do |project_id|
- break if Time.now - start >= RUN_TIME
- break unless current_settings.repository_checks_enabled
-
- next unless try_obtain_lease(project_id)
-
- SingleRepositoryCheckWorker.new.perform(project_id)
- end
- end
-
- private
-
- # Project.find_each does not support WHERE clauses and
- # Project.find_in_batches does not support ordering. So we just build an
- # array of ID's. This is OK because we do it only once an hour, because
- # getting ID's from Postgres is not terribly slow, and because no user
- # has to sit and wait for this query to finish.
- def project_ids
- limit = 10_000
- never_checked_projects = Project.where('last_repository_check_at IS NULL').limit(limit).
- pluck(:id)
- old_check_projects = Project.where('last_repository_check_at < ?', 1.week.ago).
- reorder('last_repository_check_at ASC').limit(limit).pluck(:id)
- never_checked_projects + old_check_projects
- end
-
- def try_obtain_lease(id)
- # Use a 24-hour timeout because on servers/projects where 'git fsck' is
- # super slow we definitely do not want to run it twice in parallel.
- Gitlab::ExclusiveLease.new(
- "project_repository_check:#{id}",
- timeout: 24.hours
- ).try_obtain
- end
-
- def current_settings
- # No caching of the settings! If we cache them and an admin disables
- # this feature, an active RepositoryCheckWorker would keep going for up
- # to 1 hour after the feature was disabled.
- if Rails.env.test?
- Gitlab::CurrentSettings.fake_application_settings
- else
- ApplicationSetting.current
- end
- end
-end
diff --git a/app/workers/single_repository_check_worker.rb b/app/workers/single_repository_check_worker.rb
deleted file mode 100644
index f6c345df8b5..00000000000
--- a/app/workers/single_repository_check_worker.rb
+++ /dev/null
@@ -1,34 +0,0 @@
-class SingleRepositoryCheckWorker
- include Sidekiq::Worker
-
- sidekiq_options retry: false
-
- def perform(project_id)
- project = Project.find(project_id)
- project.update_columns(
- last_repository_check_failed: !check(project),
- last_repository_check_at: Time.now,
- )
- end
-
- private
-
- def check(project)
- # Use 'map do', not 'all? do', to prevent short-circuiting
- [project.repository, project.wiki.repository].map do |repository|
- git_fsck(repository.path_to_repo)
- end.all?
- end
-
- def git_fsck(path)
- cmd = %W(nice git --git-dir=#{path} fsck)
- output, status = Gitlab::Popen.popen(cmd)
-
- if status.zero?
- true
- else
- Gitlab::RepositoryCheckLogger.error("command failed: #{cmd.join(' ')}\n#{output}")
- false
- end
- end
-end
diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb
index 5eb7fdff551..3124dc43065 100644
--- a/config/initializers/1_settings.rb
+++ b/config/initializers/1_settings.rb
@@ -241,7 +241,7 @@ Settings.cron_jobs['stuck_ci_builds_worker']['cron'] ||= '0 0 * * *'
Settings.cron_jobs['stuck_ci_builds_worker']['job_class'] = 'StuckCiBuildsWorker'
Settings.cron_jobs['repository_check_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['repository_check_worker']['cron'] ||= '20 * * * *'
-Settings.cron_jobs['repository_check_worker']['job_class'] = 'RepositoryCheckWorker'
+Settings.cron_jobs['repository_check_worker']['job_class'] = 'RepositoryCheck::BatchWorker'
Settings.cron_jobs['admin_email_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['admin_email_worker']['cron'] ||= '0 0 * * *'
Settings.cron_jobs['admin_email_worker']['job_class'] = 'AdminEmailWorker'
diff --git a/spec/features/admin/admin_uses_repository_checks_spec.rb b/spec/features/admin/admin_uses_repository_checks_spec.rb
index 69b82441916..661fb761809 100644
--- a/spec/features/admin/admin_uses_repository_checks_spec.rb
+++ b/spec/features/admin/admin_uses_repository_checks_spec.rb
@@ -1,9 +1,7 @@
require 'rails_helper'
feature 'Admin uses repository checks', feature: true do
- before do
- login_as :admin
- end
+ before { login_as :admin }
scenario 'to trigger a single check' do
project = create(:empty_project)
@@ -17,7 +15,12 @@ feature 'Admin uses repository checks', feature: true do
end
scenario 'to see a single failed repository check' do
- visit_admin_project_page(broken_project)
+ project = create(:empty_project)
+ project.update_columns(
+ last_repository_check_failed: true,
+ last_repository_check_at: Time.now,
+ )
+ visit_admin_project_page(project)
page.within('.alert') do
expect(page.text).to match(/Last repository check \(.* ago\) failed/)
@@ -25,24 +28,16 @@ feature 'Admin uses repository checks', feature: true do
end
scenario 'to clear all repository checks', js: true do
- project = broken_project
visit admin_application_settings_path
+
+ expect(RepositoryCheck::ClearWorker).to receive(:perform_async)
- click_link 'Clear all repository checks' # pop-up should be auto confirmed
+ click_link 'Clear all repository checks'
- expect(project.reload.last_repository_check_failed).to eq(false)
+ expect(page).to have_content('Started asynchronous removal of all repository check states.')
end
def visit_admin_project_page(project)
visit admin_namespace_project_path(project.namespace, project)
end
-
- def broken_project
- project = create(:empty_project)
- project.update_columns(
- last_repository_check_failed: true,
- last_repository_check_at: Time.now,
- )
- project
- end
end
diff --git a/spec/workers/repository_check_worker_spec.rb b/spec/workers/repository_check/batch_worker_spec.rb
index 7a757658e97..51caa645f47 100644
--- a/spec/workers/repository_check_worker_spec.rb
+++ b/spec/workers/repository_check/batch_worker_spec.rb
@@ -1,7 +1,7 @@
require 'spec_helper'
-describe RepositoryCheckWorker do
- subject { RepositoryCheckWorker.new }
+describe RepositoryCheck::BatchWorker do
+ subject { described_class.new }
it 'prefers projects that have never been checked' do
projects = create_list(:project, 3)
diff --git a/spec/workers/repository_check/clear_worker_spec.rb b/spec/workers/repository_check/clear_worker_spec.rb
new file mode 100644
index 00000000000..a3b70c74787
--- /dev/null
+++ b/spec/workers/repository_check/clear_worker_spec.rb
@@ -0,0 +1,17 @@
+require 'spec_helper'
+
+describe RepositoryCheck::ClearWorker do
+ it 'clears repository check columns' do
+ project = create(:empty_project)
+ project.update_columns(
+ last_repository_check_failed: true,
+ last_repository_check_at: Time.now,
+ )
+
+ described_class.new.perform
+ project.reload
+
+ expect(project.last_repository_check_failed).to be_nil
+ expect(project.last_repository_check_at).to be_nil
+ end
+end