diff options
author | Kamil Trzciński <ayufan@ayufan.eu> | 2018-02-07 12:44:23 +0000 |
---|---|---|
committer | Kamil Trzciński <ayufan@ayufan.eu> | 2018-02-07 12:44:23 +0000 |
commit | 9bd044d9a74b6dd2fa252ed085562d56aef5a6ac (patch) | |
tree | 1aea80a68828ad50fcf0fab08e5280a63bd613ec | |
parent | d08bf247bc2a99120498e939ba573f44e5f27f07 (diff) | |
parent | e27ea805457a3b794d7a8b3b6b0355eddb1c1eca (diff) | |
download | gitlab-ce-9bd044d9a74b6dd2fa252ed085562d56aef5a6ac.tar.gz |
Merge branch '38265-stuckcijobsworker-wrongly-detects-cancels-stuck-builds-when-per-job-timeout-is-more-than-an-hour' into 'master'
Resolve "StuckCiJobsWorker wrongly detects, cancels 'stuck' builds when per-job timeout is more than an hour"
Closes #38265, #42196, and #42750
See merge request gitlab-org/gitlab-ce!16756
-rw-r--r-- | app/models/ci/runner.rb | 27 | ||||
-rw-r--r-- | app/models/concerns/redis_cacheable.rb | 41 | ||||
-rw-r--r-- | changelogs/unreleased/38265-stuckcijobsworker-wrongly-detects-cancels-stuck-builds-when-per-job-timeout-is-more-than-an-hour.yml | 5 | ||||
-rw-r--r-- | lib/api/helpers/runner.rb | 21 | ||||
-rw-r--r-- | lib/api/runner.rb | 1 | ||||
-rw-r--r-- | spec/models/ci/runner_spec.rb | 108 | ||||
-rw-r--r-- | spec/models/concerns/redis_cacheable_spec.rb | 39 | ||||
-rw-r--r-- | spec/requests/api/runner_spec.rb | 3 |
8 files changed, 212 insertions, 33 deletions
diff --git a/app/models/ci/runner.rb b/app/models/ci/runner.rb index dcbb397fb78..13c784bea0d 100644 --- a/app/models/ci/runner.rb +++ b/app/models/ci/runner.rb @@ -2,9 +2,11 @@ module Ci class Runner < ActiveRecord::Base extend Gitlab::Ci::Model include Gitlab::SQL::Pattern + include RedisCacheable RUNNER_QUEUE_EXPIRY_TIME = 60.minutes ONLINE_CONTACT_TIMEOUT = 1.hour + UPDATE_DB_RUNNER_INFO_EVERY = 40.minutes AVAILABLE_SCOPES = %w[specific shared active paused online].freeze FORM_EDITABLE = %i[description tag_list active run_untagged locked access_level].freeze @@ -47,6 +49,8 @@ module Ci ref_protected: 1 } + cached_attr_reader :version, :revision, :platform, :architecture, :contacted_at + # Searches for runners matching the given query. # # This method uses ILIKE on PostgreSQL and LIKE on MySQL. @@ -152,6 +156,18 @@ module Ci ensure_runner_queue_value == value if value.present? end + def update_cached_info(values) + values = values&.slice(:version, :revision, :platform, :architecture) || {} + values[:contacted_at] = Time.now + + cache_attributes(values) + + if persist_cached_data? + self.assign_attributes(values) + self.save if self.changed? + end + end + private def cleanup_runner_queue @@ -164,6 +180,17 @@ module Ci "runner:build_queue:#{self.token}" end + def persist_cached_data? + # Use a random threshold to prevent beating DB updates. + # It generates a distribution between [40m, 80m]. + + contacted_at_max_age = UPDATE_DB_RUNNER_INFO_EVERY + Random.rand(UPDATE_DB_RUNNER_INFO_EVERY) + + real_contacted_at = read_attribute(:contacted_at) + real_contacted_at.nil? || + (Time.now - real_contacted_at) >= contacted_at_max_age + end + def tag_constraints unless has_tags? || run_untagged? errors.add(:tags_list, diff --git a/app/models/concerns/redis_cacheable.rb b/app/models/concerns/redis_cacheable.rb new file mode 100644 index 00000000000..b889f4202dc --- /dev/null +++ b/app/models/concerns/redis_cacheable.rb @@ -0,0 +1,41 @@ +module RedisCacheable + extend ActiveSupport::Concern + include Gitlab::Utils::StrongMemoize + + CACHED_ATTRIBUTES_EXPIRY_TIME = 24.hours + + class_methods do + def cached_attr_reader(*attributes) + attributes.each do |attribute| + define_method("#{attribute}") do + cached_attribute(attribute) || read_attribute(attribute) + end + end + end + end + + def cached_attribute(attribute) + (cached_attributes || {})[attribute] + end + + def cache_attributes(values) + Gitlab::Redis::SharedState.with do |redis| + redis.set(cache_attribute_key, values.to_json, ex: CACHED_ATTRIBUTES_EXPIRY_TIME) + end + end + + private + + def cache_attribute_key + "cache:#{self.class.name}:#{self.id}:attributes" + end + + def cached_attributes + strong_memoize(:cached_attributes) do + Gitlab::Redis::SharedState.with do |redis| + data = redis.get(cache_attribute_key) + JSON.parse(data, symbolize_names: true) if data + end + end + end +end diff --git a/changelogs/unreleased/38265-stuckcijobsworker-wrongly-detects-cancels-stuck-builds-when-per-job-timeout-is-more-than-an-hour.yml b/changelogs/unreleased/38265-stuckcijobsworker-wrongly-detects-cancels-stuck-builds-when-per-job-timeout-is-more-than-an-hour.yml new file mode 100644 index 00000000000..4d8e6acfcb7 --- /dev/null +++ b/changelogs/unreleased/38265-stuckcijobsworker-wrongly-detects-cancels-stuck-builds-when-per-job-timeout-is-more-than-an-hour.yml @@ -0,0 +1,5 @@ +--- +title: Update runner info on all authenticated requests +merge_request: 16756 +author: +type: changed diff --git a/lib/api/helpers/runner.rb b/lib/api/helpers/runner.rb index 3d0d1287407..fbe30192a16 100644 --- a/lib/api/helpers/runner.rb +++ b/lib/api/helpers/runner.rb @@ -3,7 +3,6 @@ module API module Runner JOB_TOKEN_HEADER = 'HTTP_JOB_TOKEN'.freeze JOB_TOKEN_PARAM = :token - UPDATE_RUNNER_EVERY = 10 * 60 def runner_registration_token_valid? ActiveSupport::SecurityUtils.variable_size_secure_compare(params[:token], @@ -18,30 +17,14 @@ module API def authenticate_runner! forbidden! unless current_runner + + current_runner.update_cached_info(get_runner_version_from_params) end def current_runner @runner ||= ::Ci::Runner.find_by_token(params[:token].to_s) end - def update_runner_info - return unless update_runner? - - current_runner.contacted_at = Time.now - current_runner.assign_attributes(get_runner_version_from_params) - current_runner.save if current_runner.changed? - end - - def update_runner? - # Use a random threshold to prevent beating DB updates. - # It generates a distribution between [40m, 80m]. - # - contacted_at_max_age = UPDATE_RUNNER_EVERY + Random.rand(UPDATE_RUNNER_EVERY) - - current_runner.contacted_at.nil? || - (Time.now - current_runner.contacted_at) >= contacted_at_max_age - end - def validate_job!(job) not_found! unless job diff --git a/lib/api/runner.rb b/lib/api/runner.rb index 1f80646a2ea..5469cba69a6 100644 --- a/lib/api/runner.rb +++ b/lib/api/runner.rb @@ -78,7 +78,6 @@ module API post '/request' do authenticate_runner! no_content! unless current_runner.active? - update_runner_info if current_runner.runner_queue_value_latest?(params[:last_update]) header 'X-GitLab-Last-Update', params[:last_update] diff --git a/spec/models/ci/runner_spec.rb b/spec/models/ci/runner_spec.rb index b2b64e6ff48..ab170e6351c 100644 --- a/spec/models/ci/runner_spec.rb +++ b/spec/models/ci/runner_spec.rb @@ -95,28 +95,68 @@ describe Ci::Runner do subject { runner.online? } - context 'never contacted' do + before do + allow_any_instance_of(described_class).to receive(:cached_attribute).and_call_original + allow_any_instance_of(described_class).to receive(:cached_attribute) + .with(:platform).and_return("darwin") + end + + context 'no cache value' do before do - runner.contacted_at = nil + stub_redis_runner_contacted_at(nil) end - it { is_expected.to be_falsey } - end + context 'never contacted' do + before do + runner.contacted_at = nil + end - context 'contacted long time ago time' do - before do - runner.contacted_at = 1.year.ago + it { is_expected.to be_falsey } + end + + context 'contacted long time ago time' do + before do + runner.contacted_at = 1.year.ago + end + + it { is_expected.to be_falsey } end - it { is_expected.to be_falsey } + context 'contacted 1s ago' do + before do + runner.contacted_at = 1.second.ago + end + + it { is_expected.to be_truthy } + end end - context 'contacted 1s ago' do - before do - runner.contacted_at = 1.second.ago + context 'with cache value' do + context 'contacted long time ago time' do + before do + runner.contacted_at = 1.year.ago + stub_redis_runner_contacted_at(1.year.ago.to_s) + end + + it { is_expected.to be_falsey } + end + + context 'contacted 1s ago' do + before do + runner.contacted_at = 50.minutes.ago + stub_redis_runner_contacted_at(1.second.ago.to_s) + end + + it { is_expected.to be_truthy } end + end - it { is_expected.to be_truthy } + def stub_redis_runner_contacted_at(value) + Gitlab::Redis::SharedState.with do |redis| + cache_key = runner.send(:cache_attribute_key) + expect(redis).to receive(:get).with(cache_key) + .and_return({ contacted_at: value }.to_json).at_least(:once) + end end end @@ -361,6 +401,50 @@ describe Ci::Runner do end end + describe '#update_cached_info' do + let(:runner) { create(:ci_runner) } + + subject { runner.update_cached_info(architecture: '18-bit') } + + context 'when database was updated recently' do + before do + runner.contacted_at = Time.now + end + + it 'updates cache' do + expect_redis_update + + subject + end + end + + context 'when database was not updated recently' do + before do + runner.contacted_at = 2.hours.ago + end + + it 'updates database' do + expect_redis_update + + expect { subject }.to change { runner.reload.read_attribute(:contacted_at) } + .and change { runner.reload.read_attribute(:architecture) } + end + + it 'updates cache' do + expect_redis_update + + subject + end + end + + def expect_redis_update + Gitlab::Redis::SharedState.with do |redis| + redis_key = runner.send(:cache_attribute_key) + expect(redis).to receive(:set).with(redis_key, anything, any_args) + end + end + end + describe '#destroy' do let(:runner) { create(:ci_runner) } diff --git a/spec/models/concerns/redis_cacheable_spec.rb b/spec/models/concerns/redis_cacheable_spec.rb new file mode 100644 index 00000000000..3d7963120b6 --- /dev/null +++ b/spec/models/concerns/redis_cacheable_spec.rb @@ -0,0 +1,39 @@ +require 'spec_helper' + +describe RedisCacheable do + let(:model) { double } + + before do + model.extend(described_class) + allow(model).to receive(:cache_attribute_key).and_return('key') + end + + describe '#cached_attribute' do + let(:payload) { { attribute: 'value' } } + + subject { model.cached_attribute(payload.keys.first) } + + it 'gets the cache attribute' do + Gitlab::Redis::SharedState.with do |redis| + expect(redis).to receive(:get).with('key') + .and_return(payload.to_json) + end + + expect(subject).to eq(payload.values.first) + end + end + + describe '#cache_attributes' do + let(:values) { { name: 'new_name' } } + + subject { model.cache_attributes(values) } + + it 'sets the cache attributes' do + Gitlab::Redis::SharedState.with do |redis| + expect(redis).to receive(:set).with('key', values.to_json, anything) + end + + subject + end + end +end diff --git a/spec/requests/api/runner_spec.rb b/spec/requests/api/runner_spec.rb index 0bd88748479..f10b6e43d09 100644 --- a/spec/requests/api/runner_spec.rb +++ b/spec/requests/api/runner_spec.rb @@ -8,6 +8,7 @@ describe API::Runner do before do stub_gitlab_calls stub_application_setting(runners_registration_token: registration_token) + allow_any_instance_of(Ci::Runner).to receive(:cache_attributes) end describe '/api/v4/runners' do @@ -408,7 +409,7 @@ describe API::Runner do expect { request_job }.to change { runner.reload.contacted_at } end - %w(name version revision platform architecture).each do |param| + %w(version revision platform architecture).each do |param| context "when info parameter '#{param}' is present" do let(:value) { "#{param}_value" } |