diff options
Diffstat (limited to 'app/services/ci/update_build_state_service.rb')
-rw-r--r-- | app/services/ci/update_build_state_service.rb | 118 |
1 files changed, 88 insertions, 30 deletions
diff --git a/app/services/ci/update_build_state_service.rb b/app/services/ci/update_build_state_service.rb index 61e4c77c1e5..cc8e2060888 100644 --- a/app/services/ci/update_build_state_service.rb +++ b/app/services/ci/update_build_state_service.rb @@ -2,7 +2,10 @@ module Ci class UpdateBuildStateService - Result = Struct.new(:status, keyword_init: true) + include ::Gitlab::Utils::StrongMemoize + include ::Gitlab::ExclusiveLeaseHelpers + + Result = Struct.new(:status, :backoff, keyword_init: true) ACCEPT_TIMEOUT = 5.minutes.freeze @@ -17,44 +20,65 @@ module Ci def execute overwrite_trace! if has_trace? - if accept_request? - accept_build_state! - else - check_migration_state - update_build_state! + unless accept_available? + return update_build_state! + end + + ensure_pending_state! + + in_build_trace_lock do + process_build_state! end end private - def accept_build_state! - if Time.current - ensure_pending_state.created_at > ACCEPT_TIMEOUT - metrics.increment_trace_operation(operation: :discarded) + def overwrite_trace! + metrics.increment_trace_operation(operation: :overwrite) - return update_build_state! + build.trace.set(params[:trace]) if Gitlab::Ci::Features.trace_overwrite? + end + + def ensure_pending_state! + pending_state.created_at + end + + def process_build_state! + if live_chunks_pending? + if pending_state_outdated? + discard_build_trace! + update_build_state! + else + accept_build_state! + end + else + validate_build_trace! + update_build_state! end + end + def accept_build_state! build.trace_chunks.live.find_each do |chunk| chunk.schedule_to_persist! end metrics.increment_trace_operation(operation: :accepted) - Result.new(status: 202) - end - - def overwrite_trace! - metrics.increment_trace_operation(operation: :overwrite) - - build.trace.set(params[:trace]) if Gitlab::Ci::Features.trace_overwrite? + ::Gitlab::Ci::Runner::Backoff.new(pending_state.created_at).then do |backoff| + Result.new(status: 202, backoff: backoff.to_seconds) + end end - def check_migration_state - return unless accept_available? + def validate_build_trace! + return unless has_chunks? - if has_chunks? && !live_chunks_pending? + unless live_chunks_pending? metrics.increment_trace_operation(operation: :finalized) end + + unless ::Gitlab::Ci::Trace::Checksum.new(build).valid? + metrics.increment_trace_operation(operation: :invalid) + end end def update_build_state! @@ -76,12 +100,24 @@ module Ci end end + def discard_build_trace! + metrics.increment_trace_operation(operation: :discarded) + end + def accept_available? !build_running? && has_checksum? && chunks_migration_enabled? end - def accept_request? - accept_available? && live_chunks_pending? + def live_chunks_pending? + build.trace_chunks.live.any? + end + + def has_chunks? + build.trace_chunks.any? + end + + def pending_state_outdated? + Time.current - pending_state.created_at > ACCEPT_TIMEOUT end def build_state @@ -96,18 +132,14 @@ module Ci params.dig(:checksum).present? end - def has_chunks? - build.trace_chunks.any? - end - - def live_chunks_pending? - build.trace_chunks.live.any? - end - def build_running? build_state == 'running' end + def pending_state + strong_memoize(:pending_state) { ensure_pending_state } + end + def ensure_pending_state Ci::BuildPendingState.create_or_find_by!( build_id: build.id, @@ -121,6 +153,32 @@ module Ci build.pending_state end + ## + # This method is releasing an exclusive lock on a build trace the moment we + # conclude that build status has been written and the build state update + # has been committed to the database. + # + # Because a build state machine schedules a bunch of workers to run after + # build status transition to complete, we do not want to keep the lease + # until all the workers are scheduled because it opens a possibility of + # race conditions happening. + # + # Instead of keeping the lease until the transition is fully done and + # workers are scheduled, we immediately release the lock after the database + # commit happens. + # + def in_build_trace_lock(&block) + build.trace.lock do |_, lease| # rubocop:disable CodeReuse/ActiveRecord + build.run_on_status_commit { lease.cancel } + + yield + end + rescue ::Gitlab::Ci::Trace::LockedError + metrics.increment_trace_operation(operation: :locked) + + accept_build_state! + end + def chunks_migration_enabled? ::Gitlab::Ci::Features.accept_trace?(build.project) end |