diff options
author | Jacob Vosmaer <contact@jacobvosmaer.nl> | 2016-04-13 15:56:05 +0200 |
---|---|---|
committer | Jacob Vosmaer <contact@jacobvosmaer.nl> | 2016-04-13 15:56:05 +0200 |
commit | 0f602be99f99f1ae493478a8a28df2907cfa0082 (patch) | |
tree | a49a9a7ac70ca6825547db88a32f8b4c00343aa3 /app/workers | |
parent | 9a30d3b5aef732e782e9496b2e8ae62069ba521a (diff) | |
download | gitlab-ce-0f602be99f99f1ae493478a8a28df2907cfa0082.tar.gz |
Clear repository check columns asynchronously
Diffstat (limited to 'app/workers')
-rw-r--r-- | app/workers/repository_check/batch_worker.rb | 63 | ||||
-rw-r--r-- | app/workers/repository_check/clear_worker.rb | 17 | ||||
-rw-r--r-- | app/workers/repository_check/single_repository_worker.rb | 36 | ||||
-rw-r--r-- | app/workers/repository_check_worker.rb | 61 | ||||
-rw-r--r-- | app/workers/single_repository_check_worker.rb | 34 |
5 files changed, 116 insertions, 95 deletions
diff --git a/app/workers/repository_check/batch_worker.rb b/app/workers/repository_check/batch_worker.rb new file mode 100644 index 00000000000..16cd77a9bf0 --- /dev/null +++ b/app/workers/repository_check/batch_worker.rb @@ -0,0 +1,63 @@ +module RepositoryCheck + class BatchWorker + include Sidekiq::Worker + + RUN_TIME = 3600 + + sidekiq_options retry: false + + def perform + start = Time.now + + # This loop will break after a little more than one hour ('a little + # more' because `git fsck` may take a few minutes), or if it runs out of + # projects to check. By default sidekiq-cron will start a new + # RepositoryCheckWorker each hour so that as long as there are repositories to + # check, only one (or two) will be checked at a time. + project_ids.each do |project_id| + break if Time.now - start >= RUN_TIME + break unless current_settings.repository_checks_enabled + + next unless try_obtain_lease(project_id) + + SingleRepositoryWorker.new.perform(project_id) + end + end + + private + + # Project.find_each does not support WHERE clauses and + # Project.find_in_batches does not support ordering. So we just build an + # array of ID's. This is OK because we do it only once an hour, because + # getting ID's from Postgres is not terribly slow, and because no user + # has to sit and wait for this query to finish. + def project_ids + limit = 10_000 + never_checked_projects = Project.where('last_repository_check_at IS NULL').limit(limit). + pluck(:id) + old_check_projects = Project.where('last_repository_check_at < ?', 1.week.ago). + reorder('last_repository_check_at ASC').limit(limit).pluck(:id) + never_checked_projects + old_check_projects + end + + def try_obtain_lease(id) + # Use a 24-hour timeout because on servers/projects where 'git fsck' is + # super slow we definitely do not want to run it twice in parallel. + Gitlab::ExclusiveLease.new( + "project_repository_check:#{id}", + timeout: 24.hours + ).try_obtain + end + + def current_settings + # No caching of the settings! If we cache them and an admin disables + # this feature, an active RepositoryCheckWorker would keep going for up + # to 1 hour after the feature was disabled. + if Rails.env.test? + Gitlab::CurrentSettings.fake_application_settings + else + ApplicationSetting.current + end + end + end +end diff --git a/app/workers/repository_check/clear_worker.rb b/app/workers/repository_check/clear_worker.rb new file mode 100644 index 00000000000..fe0cce9aab7 --- /dev/null +++ b/app/workers/repository_check/clear_worker.rb @@ -0,0 +1,17 @@ +module RepositoryCheck + class ClearWorker + include Sidekiq::Worker + + sidekiq_options retry: false + + def perform + # Do batched updates because these updates will be slow and locking + Project.select(:id).find_in_batches(batch_size: 1000) do |batch| + Project.where(id: batch.map(&:id)).update_all( + last_repository_check_failed: nil, + last_repository_check_at: nil, + ) + end + end + end +end
\ No newline at end of file diff --git a/app/workers/repository_check/single_repository_worker.rb b/app/workers/repository_check/single_repository_worker.rb new file mode 100644 index 00000000000..e54ae86d06c --- /dev/null +++ b/app/workers/repository_check/single_repository_worker.rb @@ -0,0 +1,36 @@ +module RepositoryCheck + class SingleRepositoryWorker + include Sidekiq::Worker + + sidekiq_options retry: false + + def perform(project_id) + project = Project.find(project_id) + project.update_columns( + last_repository_check_failed: !check(project), + last_repository_check_at: Time.now, + ) + end + + private + + def check(project) + # Use 'map do', not 'all? do', to prevent short-circuiting + [project.repository, project.wiki.repository].map do |repository| + git_fsck(repository.path_to_repo) + end.all? + end + + def git_fsck(path) + cmd = %W(nice git --git-dir=#{path} fsck) + output, status = Gitlab::Popen.popen(cmd) + + if status.zero? + true + else + Gitlab::RepositoryCheckLogger.error("command failed: #{cmd.join(' ')}\n#{output}") + false + end + end + end +end diff --git a/app/workers/repository_check_worker.rb b/app/workers/repository_check_worker.rb deleted file mode 100644 index d7ead91f94e..00000000000 --- a/app/workers/repository_check_worker.rb +++ /dev/null @@ -1,61 +0,0 @@ -class RepositoryCheckWorker - include Sidekiq::Worker - - RUN_TIME = 3600 - - sidekiq_options retry: false - - def perform - start = Time.now - - # This loop will break after a little more than one hour ('a little - # more' because `git fsck` may take a few minutes), or if it runs out of - # projects to check. By default sidekiq-cron will start a new - # RepositoryCheckWorker each hour so that as long as there are repositories to - # check, only one (or two) will be checked at a time. - project_ids.each do |project_id| - break if Time.now - start >= RUN_TIME - break unless current_settings.repository_checks_enabled - - next unless try_obtain_lease(project_id) - - SingleRepositoryCheckWorker.new.perform(project_id) - end - end - - private - - # Project.find_each does not support WHERE clauses and - # Project.find_in_batches does not support ordering. So we just build an - # array of ID's. This is OK because we do it only once an hour, because - # getting ID's from Postgres is not terribly slow, and because no user - # has to sit and wait for this query to finish. - def project_ids - limit = 10_000 - never_checked_projects = Project.where('last_repository_check_at IS NULL').limit(limit). - pluck(:id) - old_check_projects = Project.where('last_repository_check_at < ?', 1.week.ago). - reorder('last_repository_check_at ASC').limit(limit).pluck(:id) - never_checked_projects + old_check_projects - end - - def try_obtain_lease(id) - # Use a 24-hour timeout because on servers/projects where 'git fsck' is - # super slow we definitely do not want to run it twice in parallel. - Gitlab::ExclusiveLease.new( - "project_repository_check:#{id}", - timeout: 24.hours - ).try_obtain - end - - def current_settings - # No caching of the settings! If we cache them and an admin disables - # this feature, an active RepositoryCheckWorker would keep going for up - # to 1 hour after the feature was disabled. - if Rails.env.test? - Gitlab::CurrentSettings.fake_application_settings - else - ApplicationSetting.current - end - end -end diff --git a/app/workers/single_repository_check_worker.rb b/app/workers/single_repository_check_worker.rb deleted file mode 100644 index f6c345df8b5..00000000000 --- a/app/workers/single_repository_check_worker.rb +++ /dev/null @@ -1,34 +0,0 @@ -class SingleRepositoryCheckWorker - include Sidekiq::Worker - - sidekiq_options retry: false - - def perform(project_id) - project = Project.find(project_id) - project.update_columns( - last_repository_check_failed: !check(project), - last_repository_check_at: Time.now, - ) - end - - private - - def check(project) - # Use 'map do', not 'all? do', to prevent short-circuiting - [project.repository, project.wiki.repository].map do |repository| - git_fsck(repository.path_to_repo) - end.all? - end - - def git_fsck(path) - cmd = %W(nice git --git-dir=#{path} fsck) - output, status = Gitlab::Popen.popen(cmd) - - if status.zero? - true - else - Gitlab::RepositoryCheckLogger.error("command failed: #{cmd.join(' ')}\n#{output}") - false - end - end -end |