From 58371efbb0bd051d3a82f82acac98ad4692efeb4 Mon Sep 17 00:00:00 2001 From: James Lopez Date: Fri, 24 Mar 2017 12:08:34 +0100 Subject: Periodically mark projects that are stuck in importing as failed Adds import jid to projects Refactor middleware to set custom expiration time via sidekiq options Add completed_jids option to sidekiq status and a few other changes --- app/workers/repository_import_worker.rb | 4 +- app/workers/stuck_import_jobs_worker.rb | 50 ++++++++++++++++++++++ .../unreleased/fix-gh-import-status-check.yml | 4 ++ config/initializers/1_settings.rb | 3 ++ .../20170324110346_add_import_jid_to_projects.rb | 9 ++++ db/schema.rb | 1 + lib/gitlab/sidekiq_status.rb | 13 ++++++ lib/gitlab/sidekiq_status/client_middleware.rb | 4 +- .../sidekiq_status/client_middleware_spec.rb | 2 +- spec/lib/gitlab/sidekiq_status_spec.rb | 13 ++++++ spec/workers/repository_import_worker_spec.rb | 2 + spec/workers/stuck_import_jobs_worker_spec.rb | 36 ++++++++++++++++ 12 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 app/workers/stuck_import_jobs_worker.rb create mode 100644 changelogs/unreleased/fix-gh-import-status-check.yml create mode 100644 db/migrate/20170324110346_add_import_jid_to_projects.rb create mode 100644 spec/workers/stuck_import_jobs_worker_spec.rb diff --git a/app/workers/repository_import_worker.rb b/app/workers/repository_import_worker.rb index 68a6fd76e70..b33ba2ed7c1 100644 --- a/app/workers/repository_import_worker.rb +++ b/app/workers/repository_import_worker.rb @@ -2,6 +2,8 @@ class RepositoryImportWorker include Sidekiq::Worker include DedicatedSidekiqQueue + sidekiq_options status_expiration: StuckImportJobsWorker::IMPORT_EXPIRATION + attr_accessor :project, :current_user def perform(project_id) @@ -12,7 +14,7 @@ class RepositoryImportWorker import_url: @project.import_url, path: @project.path_with_namespace) - project.update_column(:import_error, nil) + project.update_columns(import_jid: self.jid, import_error: nil) result = Projects::ImportService.new(project, current_user).execute diff --git a/app/workers/stuck_import_jobs_worker.rb b/app/workers/stuck_import_jobs_worker.rb new file mode 100644 index 00000000000..c7871d99492 --- /dev/null +++ b/app/workers/stuck_import_jobs_worker.rb @@ -0,0 +1,50 @@ +class StuckImportJobsWorker + include Sidekiq::Worker + include CronjobQueue + + EXCLUSIVE_LEASE_KEY = 'fail_stuck_imports_worker_lease'.freeze + IMPORT_EXPIRATION = 15.hours.to_i + + def perform + return unless try_obtain_lease + + stuck_projects.find_in_batches(batch_size: 500) do |group| + jids = group.map(&:import_jid) + + # Find the jobs that aren't currently running or that exceeded the threshold. + completed_jids = Gitlab::SidekiqStatus.completed_jids(jids) + + if completed_jids.any? + completed_ids = group.select { |project| completed_jids.include?(project.import_jid) }.map(&:id) + + fail_batch!(completed_jids, completed_ids) + end + end + + remove_lease + end + + private + + def stuck_projects + Project.select('id, import_jid').with_import_status(:started).where.not(import_jid: nil) + end + + def fail_batch!(completed_jids, completed_ids) + Project.where(id: completed_ids).update_all(import_status: 'failed', import_error: error_message) + + Rails.logger.info("Marked stuck import jobs as failed. JIDs: #{completed_jids.join(', ')}") + end + + def error_message + "Import timed out. Import took longer than #{IMPORT_EXPIRATION} seconds" + end + + def try_obtain_lease + @uuid = Gitlab::ExclusiveLease.new(EXCLUSIVE_LEASE_KEY, timeout: 30.minutes).try_obtain + end + + def remove_lease + Gitlab::ExclusiveLease.cancel(EXCLUSIVE_LEASE_KEY, @uuid) + end +end diff --git a/changelogs/unreleased/fix-gh-import-status-check.yml b/changelogs/unreleased/fix-gh-import-status-check.yml new file mode 100644 index 00000000000..d04bc2954a0 --- /dev/null +++ b/changelogs/unreleased/fix-gh-import-status-check.yml @@ -0,0 +1,4 @@ +--- +title: Periodically mark projects that are stuck in importing as failed +merge_request: +author: diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index e8fef0000c1..f7cae84088e 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -349,6 +349,9 @@ Settings.cron_jobs['trending_projects_worker']['job_class'] = 'TrendingProjectsW Settings.cron_jobs['remove_unreferenced_lfs_objects_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['remove_unreferenced_lfs_objects_worker']['cron'] ||= '20 0 * * *' Settings.cron_jobs['remove_unreferenced_lfs_objects_worker']['job_class'] = 'RemoveUnreferencedLfsObjectsWorker' +Settings.cron_jobs['stuck_import_jobs_worker'] ||= Settingslogic.new({}) +Settings.cron_jobs['stuck_import_jobs_worker']['cron'] ||= '15 * * * *' +Settings.cron_jobs['stuck_import_jobs_worker']['job_class'] = 'StuckImportJobsWorker' # # GitLab Shell diff --git a/db/migrate/20170324110346_add_import_jid_to_projects.rb b/db/migrate/20170324110346_add_import_jid_to_projects.rb new file mode 100644 index 00000000000..55b87b9d56d --- /dev/null +++ b/db/migrate/20170324110346_add_import_jid_to_projects.rb @@ -0,0 +1,9 @@ +class AddImportJidToProjects < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + add_column :projects, :import_jid, :string + end +end diff --git a/db/schema.rb b/db/schema.rb index ccf18d07179..76f6a511561 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -920,6 +920,7 @@ ActiveRecord::Schema.define(version: 20170402231018) do t.text "description_html" t.boolean "only_allow_merge_if_all_discussions_are_resolved" t.boolean "printing_merge_request_link_enabled", default: true, null: false + t.string "import_jid" end add_index "projects", ["ci_id"], name: "index_projects_on_ci_id", using: :btree diff --git a/lib/gitlab/sidekiq_status.rb b/lib/gitlab/sidekiq_status.rb index 11e5f1b645c..ca8d3271541 100644 --- a/lib/gitlab/sidekiq_status.rb +++ b/lib/gitlab/sidekiq_status.rb @@ -72,6 +72,8 @@ module Gitlab # job_ids - The Sidekiq job IDs to check. # # Returns an array of true or false indicating job completion. + # true = job is still running + # false = job completed def self.job_status(job_ids) keys = job_ids.map { |jid| key_for(jid) } @@ -82,6 +84,17 @@ module Gitlab end end + # Returns the JIDs that are completed + # + # job_ids - The Sidekiq job IDs to check. + # + # Returns an array of completed JIDs + def self.completed_jids(job_ids) + Sidekiq.redis do |redis| + job_ids.reject { |jid| redis.exists(key_for(jid)) } + end + end + def self.key_for(jid) STATUS_KEY % jid end diff --git a/lib/gitlab/sidekiq_status/client_middleware.rb b/lib/gitlab/sidekiq_status/client_middleware.rb index d47609f490d..00983b3284a 100644 --- a/lib/gitlab/sidekiq_status/client_middleware.rb +++ b/lib/gitlab/sidekiq_status/client_middleware.rb @@ -2,7 +2,9 @@ module Gitlab module SidekiqStatus class ClientMiddleware def call(_, job, _, _) - Gitlab::SidekiqStatus.set(job['jid']) + status_expiration = job['status_expiration'] || Gitlab::SidekiqStatus::DEFAULT_EXPIRATION + + Gitlab::SidekiqStatus.set(job['jid'], status_expiration) yield end end diff --git a/spec/lib/gitlab/sidekiq_status/client_middleware_spec.rb b/spec/lib/gitlab/sidekiq_status/client_middleware_spec.rb index 287bf62d9bd..6307f8c16a3 100644 --- a/spec/lib/gitlab/sidekiq_status/client_middleware_spec.rb +++ b/spec/lib/gitlab/sidekiq_status/client_middleware_spec.rb @@ -3,7 +3,7 @@ require 'spec_helper' describe Gitlab::SidekiqStatus::ClientMiddleware do describe '#call' do it 'tracks the job in Redis' do - expect(Gitlab::SidekiqStatus).to receive(:set).with('123') + expect(Gitlab::SidekiqStatus).to receive(:set).with('123', Gitlab::SidekiqStatus::DEFAULT_EXPIRATION) described_class.new. call('Foo', { 'jid' => '123' }, double(:queue), double(:pool)) { nil } diff --git a/spec/lib/gitlab/sidekiq_status_spec.rb b/spec/lib/gitlab/sidekiq_status_spec.rb index 56f06b61afb..496e50fbae4 100644 --- a/spec/lib/gitlab/sidekiq_status_spec.rb +++ b/spec/lib/gitlab/sidekiq_status_spec.rb @@ -73,4 +73,17 @@ describe Gitlab::SidekiqStatus do expect(key).to include('123') end end + + describe 'completed', :redis do + it 'returns the completed job' do + expect(described_class.completed_jids(%w(123))).to eq(['123']) + end + + it 'returns only the jobs completed' do + described_class.set('123') + described_class.set('456') + + expect(described_class.completed_jids(%w(123 456 789))).to eq(['789']) + end + end end diff --git a/spec/workers/repository_import_worker_spec.rb b/spec/workers/repository_import_worker_spec.rb index fbb22439f33..5a2c0671dac 100644 --- a/spec/workers/repository_import_worker_spec.rb +++ b/spec/workers/repository_import_worker_spec.rb @@ -23,10 +23,12 @@ describe RepositoryImportWorker do error = %q{remote: Not Found fatal: repository 'https://user:pass@test.com/root/repoC.git/' not found } expect_any_instance_of(Projects::ImportService).to receive(:execute). and_return({ status: :error, message: error }) + allow(subject).to receive(:jid).and_return('123') subject.perform(project.id) expect(project.reload.import_error).to include("https://*****:*****@test.com/root/repoC.git/") + expect(project.reload.import_jid).not_to be_nil end end end diff --git a/spec/workers/stuck_import_jobs_worker_spec.rb b/spec/workers/stuck_import_jobs_worker_spec.rb new file mode 100644 index 00000000000..466277a5e5e --- /dev/null +++ b/spec/workers/stuck_import_jobs_worker_spec.rb @@ -0,0 +1,36 @@ +require 'spec_helper' + +describe StuckImportJobsWorker do + let(:worker) { described_class.new } + let(:exclusive_lease_uuid) { SecureRandom.uuid } + + before do + allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).and_return(exclusive_lease_uuid) + end + + describe 'long running import' do + let(:project) { create(:empty_project, import_jid: '123', import_status: 'started') } + + before do + allow(Gitlab::SidekiqStatus).to receive(:completed_jids).and_return(['123']) + end + + it 'marks the project as failed' do + expect { worker.perform }.to change { project.reload.import_status }.to('failed') + end + end + + describe 'running import' do + let(:project) { create(:empty_project, import_jid: '123', import_status: 'started') } + + before do + allow(Gitlab::SidekiqStatus).to receive(:completed_jids).and_return([]) + end + + it 'does not mark the project as failed' do + worker.perform + + expect(project.reload.import_status).to eq('started') + end + end +end -- cgit v1.2.1 From 189da6b4df2853adcbf66ea0004b23d6788cc3c3 Mon Sep 17 00:00:00 2001 From: James Lopez Date: Tue, 4 Apr 2017 16:27:23 +0000 Subject: attempt to fix db failure --- db/schema.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/schema.rb b/db/schema.rb index 76f6a511561..0ab4a4eabef 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20170402231018) do +ActiveRecord::Schema.define(version: 20170324110346) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" -- cgit v1.2.1 From 7c9a46bc3e9b4ce5bb9ba5ab287176e34ed0a0c6 Mon Sep 17 00:00:00 2001 From: James Lopez Date: Wed, 5 Apr 2017 06:28:43 +0000 Subject: Revert schema.rb --- db/schema.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/schema.rb b/db/schema.rb index 0ab4a4eabef..76f6a511561 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20170324110346) do +ActiveRecord::Schema.define(version: 20170402231018) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" -- cgit v1.2.1 From 0bd4c17ee6480e88e05bb1a614c0d88819a37c74 Mon Sep 17 00:00:00 2001 From: James Lopez Date: Wed, 5 Apr 2017 10:13:21 +0200 Subject: attempt to fix migration --- db/migrate/20170324110346_add_import_jid_to_projects.rb | 9 --------- db/migrate/20170405080720_add_import_jid_to_projects.rb | 9 +++++++++ db/schema.rb | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) delete mode 100644 db/migrate/20170324110346_add_import_jid_to_projects.rb create mode 100644 db/migrate/20170405080720_add_import_jid_to_projects.rb diff --git a/db/migrate/20170324110346_add_import_jid_to_projects.rb b/db/migrate/20170324110346_add_import_jid_to_projects.rb deleted file mode 100644 index 55b87b9d56d..00000000000 --- a/db/migrate/20170324110346_add_import_jid_to_projects.rb +++ /dev/null @@ -1,9 +0,0 @@ -class AddImportJidToProjects < ActiveRecord::Migration - include Gitlab::Database::MigrationHelpers - - DOWNTIME = false - - def change - add_column :projects, :import_jid, :string - end -end diff --git a/db/migrate/20170405080720_add_import_jid_to_projects.rb b/db/migrate/20170405080720_add_import_jid_to_projects.rb new file mode 100644 index 00000000000..55b87b9d56d --- /dev/null +++ b/db/migrate/20170405080720_add_import_jid_to_projects.rb @@ -0,0 +1,9 @@ +class AddImportJidToProjects < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + add_column :projects, :import_jid, :string + end +end diff --git a/db/schema.rb b/db/schema.rb index 76f6a511561..582f68cbee7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20170402231018) do +ActiveRecord::Schema.define(version: 20170405080720) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" -- cgit v1.2.1 From 44ec40028c60b4f3a5d3f1bb049996f4d2caf9af Mon Sep 17 00:00:00 2001 From: James Lopez Date: Wed, 5 Apr 2017 16:59:28 +0200 Subject: fix project authorizations migration issue --- db/migrate/20170130204620_add_index_to_project_authorizations.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/db/migrate/20170130204620_add_index_to_project_authorizations.rb b/db/migrate/20170130204620_add_index_to_project_authorizations.rb index 629b49436e3..8c593c39440 100644 --- a/db/migrate/20170130204620_add_index_to_project_authorizations.rb +++ b/db/migrate/20170130204620_add_index_to_project_authorizations.rb @@ -6,7 +6,9 @@ class AddIndexToProjectAuthorizations < ActiveRecord::Migration disable_ddl_transaction! def up - add_concurrent_index(:project_authorizations, :project_id) + unless index_exists?(:project_authorizations, :project_id) + add_concurrent_index(:project_authorizations, :project_id) + end end def down -- cgit v1.2.1 From 94716c279f35c787d5b41ed626ad9bfc1acf83aa Mon Sep 17 00:00:00 2001 From: James Lopez Date: Thu, 6 Apr 2017 09:48:58 +0200 Subject: remove unnecessary lease as cron job --- app/workers/stuck_import_jobs_worker.rb | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/app/workers/stuck_import_jobs_worker.rb b/app/workers/stuck_import_jobs_worker.rb index c7871d99492..bfc5e667bb6 100644 --- a/app/workers/stuck_import_jobs_worker.rb +++ b/app/workers/stuck_import_jobs_worker.rb @@ -2,12 +2,9 @@ class StuckImportJobsWorker include Sidekiq::Worker include CronjobQueue - EXCLUSIVE_LEASE_KEY = 'fail_stuck_imports_worker_lease'.freeze IMPORT_EXPIRATION = 15.hours.to_i def perform - return unless try_obtain_lease - stuck_projects.find_in_batches(batch_size: 500) do |group| jids = group.map(&:import_jid) @@ -20,8 +17,6 @@ class StuckImportJobsWorker fail_batch!(completed_jids, completed_ids) end end - - remove_lease end private @@ -39,12 +34,4 @@ class StuckImportJobsWorker def error_message "Import timed out. Import took longer than #{IMPORT_EXPIRATION} seconds" end - - def try_obtain_lease - @uuid = Gitlab::ExclusiveLease.new(EXCLUSIVE_LEASE_KEY, timeout: 30.minutes).try_obtain - end - - def remove_lease - Gitlab::ExclusiveLease.cancel(EXCLUSIVE_LEASE_KEY, @uuid) - end end -- cgit v1.2.1