summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2018-05-07 13:50:00 +0000
committerDouwe Maan <douwe@gitlab.com>2018-05-07 13:50:00 +0000
commit6fb1dc67853225058954af7a93067fa2842fe43b (patch)
treee776caef21bee75f01b939b10337c6d120b2fb94
parent965d0394d371f427ad0423bda2a6dc8867b31563 (diff)
parente076bd9b17ed0b5f603b53c3b9f27603d7bb3feb (diff)
downloadgitlab-ce-6fb1dc67853225058954af7a93067fa2842fe43b.tar.gz
Merge branch 'tc-repo-verify-mails' into 'master'
Various improvements to repository checks See merge request gitlab-org/gitlab-ce!18484
-rw-r--r--app/views/admin/application_settings/_repository_check.html.haml2
-rw-r--r--app/workers/admin_email_worker.rb6
-rw-r--r--app/workers/repository_check/batch_worker.rb35
-rw-r--r--app/workers/repository_check/single_repository_worker.rb48
-rw-r--r--changelogs/unreleased/tc-repo-verify-mails.yml5
-rw-r--r--doc/administration/repository_checks.md26
-rw-r--r--spec/workers/admin_email_worker_spec.rb41
-rw-r--r--spec/workers/repository_check/batch_worker_spec.rb4
-rw-r--r--spec/workers/repository_check/single_repository_worker_spec.rb79
9 files changed, 169 insertions, 77 deletions
diff --git a/app/views/admin/application_settings/_repository_check.html.haml b/app/views/admin/application_settings/_repository_check.html.haml
index f33769b23c2..fe335f30a62 100644
--- a/app/views/admin/application_settings/_repository_check.html.haml
+++ b/app/views/admin/application_settings/_repository_check.html.haml
@@ -12,7 +12,7 @@
Enable Repository Checks
.help-block
GitLab will periodically run
- %a{ href: 'https://www.kernel.org/pub/software/scm/git/docs/git-fsck.html', target: 'blank' } 'git fsck'
+ %a{ href: 'https://git-scm.com/docs/git-fsck', target: 'blank' } 'git fsck'
in all project and wiki repositories to look for silent disk corruption issues.
.form-group
.col-sm-offset-2.col-sm-10
diff --git a/app/workers/admin_email_worker.rb b/app/workers/admin_email_worker.rb
index bec0a003a1c..044e470141e 100644
--- a/app/workers/admin_email_worker.rb
+++ b/app/workers/admin_email_worker.rb
@@ -3,6 +3,12 @@ class AdminEmailWorker
include CronjobQueue
def perform
+ send_repository_check_mail if Gitlab::CurrentSettings.repository_checks_enabled
+ end
+
+ private
+
+ def send_repository_check_mail
repository_check_failed_count = Project.where(last_repository_check_failed: true).count
return if repository_check_failed_count.zero?
diff --git a/app/workers/repository_check/batch_worker.rb b/app/workers/repository_check/batch_worker.rb
index 76688cf51c1..72f0a9b0619 100644
--- a/app/workers/repository_check/batch_worker.rb
+++ b/app/workers/repository_check/batch_worker.rb
@@ -4,8 +4,11 @@ module RepositoryCheck
include CronjobQueue
RUN_TIME = 3600
+ BATCH_SIZE = 10_000
def perform
+ return unless Gitlab::CurrentSettings.repository_checks_enabled
+
start = Time.now
# This loop will break after a little more than one hour ('a little
@@ -15,7 +18,6 @@ module RepositoryCheck
# check, only one (or two) will be checked at a time.
project_ids.each do |project_id|
break if Time.now - start >= RUN_TIME
- break unless current_settings.repository_checks_enabled
next unless try_obtain_lease(project_id)
@@ -31,12 +33,20 @@ module RepositoryCheck
# getting ID's from Postgres is not terribly slow, and because no user
# has to sit and wait for this query to finish.
def project_ids
- limit = 10_000
- never_checked_projects = Project.where('last_repository_check_at IS NULL AND created_at < ?', 24.hours.ago)
- .limit(limit).pluck(:id)
- old_check_projects = Project.where('last_repository_check_at < ?', 1.month.ago)
- .reorder('last_repository_check_at ASC').limit(limit).pluck(:id)
- never_checked_projects + old_check_projects
+ never_checked_project_ids(BATCH_SIZE) + old_checked_project_ids(BATCH_SIZE)
+ end
+
+ def never_checked_project_ids(batch_size)
+ Project.where(last_repository_check_at: nil)
+ .where('created_at < ?', 24.hours.ago)
+ .limit(batch_size).pluck(:id)
+ end
+
+ def old_checked_project_ids(batch_size)
+ Project.where.not(last_repository_check_at: nil)
+ .where('last_repository_check_at < ?', 1.month.ago)
+ .reorder(last_repository_check_at: :asc)
+ .limit(batch_size).pluck(:id)
end
def try_obtain_lease(id)
@@ -47,16 +57,5 @@ module RepositoryCheck
timeout: 24.hours
).try_obtain
end
-
- def current_settings
- # No caching of the settings! If we cache them and an admin disables
- # this feature, an active RepositoryCheckWorker would keep going for up
- # to 1 hour after the feature was disabled.
- if Rails.env.test?
- Gitlab::CurrentSettings.fake_application_settings
- else
- ApplicationSetting.current
- end
- end
end
end
diff --git a/app/workers/repository_check/single_repository_worker.rb b/app/workers/repository_check/single_repository_worker.rb
index 116bc185b38..3cffb8b14e4 100644
--- a/app/workers/repository_check/single_repository_worker.rb
+++ b/app/workers/repository_check/single_repository_worker.rb
@@ -5,27 +5,34 @@ module RepositoryCheck
def perform(project_id)
project = Project.find(project_id)
+ healthy = project_healthy?(project)
+
+ update_repository_check_status(project, healthy)
+ end
+
+ private
+
+ def update_repository_check_status(project, healthy)
project.update_columns(
- last_repository_check_failed: !check(project),
+ last_repository_check_failed: !healthy,
last_repository_check_at: Time.now
)
end
- private
+ def project_healthy?(project)
+ repo_healthy?(project) && wiki_repo_healthy?(project)
+ end
- def check(project)
- if has_pushes?(project) && !git_fsck(project.repository)
- false
- elsif project.wiki_enabled?
- # Historically some projects never had their wiki repos initialized;
- # this happens on project creation now. Let's initialize an empty repo
- # if it is not already there.
- project.create_wiki
+ def repo_healthy?(project)
+ return true unless has_changes?(project)
- git_fsck(project.wiki.repository)
- else
- true
- end
+ git_fsck(project.repository)
+ end
+
+ def wiki_repo_healthy?(project)
+ return true unless has_wiki_changes?(project)
+
+ git_fsck(project.wiki.repository)
end
def git_fsck(repository)
@@ -39,8 +46,19 @@ module RepositoryCheck
false
end
- def has_pushes?(project)
+ def has_changes?(project)
Project.with_push.exists?(project.id)
end
+
+ def has_wiki_changes?(project)
+ return false unless project.wiki_enabled?
+
+ # Historically some projects never had their wiki repos initialized;
+ # this happens on project creation now. Let's initialize an empty repo
+ # if it is not already there.
+ return false unless project.create_wiki
+
+ has_changes?(project)
+ end
end
end
diff --git a/changelogs/unreleased/tc-repo-verify-mails.yml b/changelogs/unreleased/tc-repo-verify-mails.yml
new file mode 100644
index 00000000000..b4d3c4b1596
--- /dev/null
+++ b/changelogs/unreleased/tc-repo-verify-mails.yml
@@ -0,0 +1,5 @@
+---
+title: Small improvements to repository checks
+merge_request: 18484
+author:
+type: changed
diff --git a/doc/administration/repository_checks.md b/doc/administration/repository_checks.md
index ee37ea49874..efeec9db517 100644
--- a/doc/administration/repository_checks.md
+++ b/doc/administration/repository_checks.md
@@ -13,12 +13,12 @@ checks failed you can see their output on the admin log page under
## Periodic checks
-When enabled, GitLab periodically runs a repository check on all project
-repositories and wiki repositories in order to detect data corruption problems.
+When enabled, GitLab periodically runs a repository check on all project
+repositories and wiki repositories in order to detect data corruption.
A project will be checked no more than once per month. If any projects
fail their repository checks all GitLab administrators will receive an email
-notification of the situation. This notification is sent out once a week on
-Sunday, by default.
+notification of the situation. This notification is sent out once a week,
+by default, midnight at the start of Sunday.
## Disabling periodic checks
@@ -28,16 +28,18 @@ panel.
## What to do if a check failed
If the repository check fails for some repository you should look up the error
-in repocheck.log (in the admin panel or on disk; see
-`/var/log/gitlab/gitlab-rails` for Omnibus installations or
-`/home/git/gitlab/log` for installations from source). Once you have
-resolved the issue use the admin panel to trigger a new repository check on
-the project. This will clear the 'check failed' state.
+in `repocheck.log`:
+
+- in the [admin panel](logs.md#repocheck.log)
+- or on disk, see:
+ - `/var/log/gitlab/gitlab-rails` for Omnibus installations
+ - `/home/git/gitlab/log` for installations from source
If for some reason the periodic repository check caused a lot of false
-alarms you can choose to clear ALL repository check states from the
-'Settings' page of the admin panel.
+alarms you can choose to clear *all* repository check states by
+clicking "Clear all repository checks" on the **Settings** page of the
+admin panel (`/admin/application_settings`).
---
[ce-3232]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/3232 "Auto git fsck"
-[git-fsck]: https://www.kernel.org/pub/software/scm/git/docs/git-fsck.html "git fsck documentation"
+[git-fsck]: https://git-scm.com/docs/git-fsck "git fsck documentation"
diff --git a/spec/workers/admin_email_worker_spec.rb b/spec/workers/admin_email_worker_spec.rb
new file mode 100644
index 00000000000..27687f069ea
--- /dev/null
+++ b/spec/workers/admin_email_worker_spec.rb
@@ -0,0 +1,41 @@
+require 'spec_helper'
+
+describe AdminEmailWorker do
+ subject(:worker) { described_class.new }
+
+ describe '.perform' do
+ it 'does not attempt to send repository check mail when they are disabled' do
+ stub_application_setting(repository_checks_enabled: false)
+
+ expect(worker).not_to receive(:send_repository_check_mail)
+
+ worker.perform
+ end
+
+ context 'repository_checks enabled' do
+ before do
+ stub_application_setting(repository_checks_enabled: true)
+ end
+
+ it 'checks if repository check mail should be sent' do
+ expect(worker).to receive(:send_repository_check_mail)
+
+ worker.perform
+ end
+
+ it 'does not send mail when there are no failed repos' do
+ expect(RepositoryCheckMailer).not_to receive(:notify)
+
+ worker.perform
+ end
+
+ it 'send mail when there is a failed repo' do
+ create(:project, last_repository_check_failed: true, last_repository_check_at: Date.yesterday)
+
+ expect(RepositoryCheckMailer).to receive(:notify).and_return(spy)
+
+ worker.perform
+ end
+ end
+ end
+end
diff --git a/spec/workers/repository_check/batch_worker_spec.rb b/spec/workers/repository_check/batch_worker_spec.rb
index 850b8cd8f5c..6cd27d2fafb 100644
--- a/spec/workers/repository_check/batch_worker_spec.rb
+++ b/spec/workers/repository_check/batch_worker_spec.rb
@@ -31,8 +31,8 @@ describe RepositoryCheck::BatchWorker do
it 'does nothing when repository checks are disabled' do
create(:project, created_at: 1.week.ago)
- current_settings = double('settings', repository_checks_enabled: false)
- expect(subject).to receive(:current_settings) { current_settings }
+
+ stub_application_setting(repository_checks_enabled: false)
expect(subject.perform).to eq(nil)
end
diff --git a/spec/workers/repository_check/single_repository_worker_spec.rb b/spec/workers/repository_check/single_repository_worker_spec.rb
index 1d9bbf2ca62..a021235aed6 100644
--- a/spec/workers/repository_check/single_repository_worker_spec.rb
+++ b/spec/workers/repository_check/single_repository_worker_spec.rb
@@ -2,44 +2,60 @@ require 'spec_helper'
require 'fileutils'
describe RepositoryCheck::SingleRepositoryWorker do
- subject { described_class.new }
+ subject(:worker) { described_class.new }
- it 'passes when the project has no push events' do
- project = create(:project_empty_repo, :wiki_disabled)
+ it 'skips when the project has no push events' do
+ project = create(:project, :repository, :wiki_disabled)
project.events.destroy_all
- break_repo(project)
+ break_project(project)
- subject.perform(project.id)
+ expect(worker).not_to receive(:git_fsck)
+
+ worker.perform(project.id)
expect(project.reload.last_repository_check_failed).to eq(false)
end
it 'fails when the project has push events and a broken repository' do
- project = create(:project_empty_repo)
+ project = create(:project, :repository)
create_push_event(project)
- break_repo(project)
+ break_project(project)
- subject.perform(project.id)
+ worker.perform(project.id)
expect(project.reload.last_repository_check_failed).to eq(true)
end
+ it 'succeeds when the project repo is valid' do
+ project = create(:project, :repository, :wiki_disabled)
+ create_push_event(project)
+
+ expect(worker).to receive(:git_fsck).and_call_original
+
+ expect do
+ worker.perform(project.id)
+ end.to change { project.reload.last_repository_check_at }
+
+ expect(project.reload.last_repository_check_failed).to eq(false)
+ end
+
it 'fails if the wiki repository is broken' do
- project = create(:project_empty_repo, :wiki_enabled)
+ project = create(:project, :repository, :wiki_enabled)
project.create_wiki
+ create_push_event(project)
# Test sanity: everything should be fine before the wiki repo is broken
- subject.perform(project.id)
+ worker.perform(project.id)
expect(project.reload.last_repository_check_failed).to eq(false)
break_wiki(project)
- subject.perform(project.id)
+ worker.perform(project.id)
expect(project.reload.last_repository_check_failed).to eq(true)
end
it 'skips wikis when disabled' do
- project = create(:project_empty_repo, :wiki_disabled)
+ project = create(:project, :wiki_disabled)
# Make sure the test would fail if the wiki repo was checked
break_wiki(project)
@@ -49,8 +65,8 @@ describe RepositoryCheck::SingleRepositoryWorker do
end
it 'creates missing wikis' do
- project = create(:project_empty_repo, :wiki_enabled)
- FileUtils.rm_rf(wiki_path(project))
+ project = create(:project, :wiki_enabled)
+ Gitlab::Shell.new.rm_directory(project.repository_storage, project.wiki.path)
subject.perform(project.id)
@@ -58,34 +74,39 @@ describe RepositoryCheck::SingleRepositoryWorker do
end
it 'does not create a wiki if the main repo does not exist at all' do
- project = create(:project_empty_repo)
- create_push_event(project)
- FileUtils.rm_rf(project.repository.path_to_repo)
- FileUtils.rm_rf(wiki_path(project))
+ project = create(:project, :repository)
+ Gitlab::Shell.new.rm_directory(project.repository_storage, project.path)
+ Gitlab::Shell.new.rm_directory(project.repository_storage, project.wiki.path)
subject.perform(project.id)
- expect(File.exist?(wiki_path(project))).to eq(false)
+ expect(Gitlab::Shell.new.exists?(project.repository_storage, project.wiki.path)).to eq(false)
end
- def break_wiki(project)
- objects_dir = wiki_path(project) + '/objects'
+ def create_push_event(project)
+ project.events.create(action: Event::PUSHED, author_id: create(:user).id)
+ end
- # Replace the /objects directory with a file so that the repo is
- # invalid, _and_ 'git init' cannot fix it.
- FileUtils.rm_rf(objects_dir)
- FileUtils.touch(objects_dir) if File.directory?(wiki_path(project))
+ def break_wiki(project)
+ break_repo(wiki_path(project))
end
def wiki_path(project)
project.wiki.repository.path_to_repo
end
- def create_push_event(project)
- project.events.create(action: Event::PUSHED, author_id: create(:user).id)
+ def break_project(project)
+ break_repo(project.repository.path_to_repo)
end
- def break_repo(project)
- FileUtils.rm_rf(File.join(project.repository.path_to_repo, 'objects'))
+ def break_repo(repo)
+ # Create or replace blob ffffffffffffffffffffffffffffffffffffffff with an empty file
+ # This will make the repo invalid, _and_ 'git init' cannot fix it.
+ path = File.join(repo, 'objects', 'ff')
+ file = File.join(path, 'ffffffffffffffffffffffffffffffffffffff')
+
+ FileUtils.mkdir_p(path)
+ FileUtils.rm_f(file)
+ FileUtils.touch(file)
end
end