1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
|
# frozen_string_literal: true
module Ci
class UpdateBuildStateService
include ::Gitlab::Utils::StrongMemoize
include ::Gitlab::ExclusiveLeaseHelpers
Result = Struct.new(:status, :backoff, keyword_init: true)
InvalidTraceError = Class.new(StandardError)
ACCEPT_TIMEOUT = 5.minutes.freeze
attr_reader :build, :params, :metrics
def initialize(build, params, metrics = ::Gitlab::Ci::Trace::Metrics.new)
@build = build
@params = params
@metrics = metrics
end
def execute
overwrite_trace! if has_trace?
unless accept_available?
return update_build_state!
end
ensure_pending_state!
in_build_trace_lock do
process_build_state!
end
end
private
def overwrite_trace!
metrics.increment_trace_operation(operation: :overwrite)
build.trace.set(params[:trace]) if Gitlab::Ci::Features.trace_overwrite?
end
def ensure_pending_state!
pending_state.created_at
end
def process_build_state!
if live_chunks_pending?
if pending_state_outdated?
discard_build_trace!
update_build_state!
else
accept_build_state!
end
else
validate_build_trace!
update_build_state!
end
end
def accept_build_state!
build.trace_chunks.live.find_each do |chunk|
chunk.schedule_to_persist!
end
metrics.increment_trace_operation(operation: :accepted)
::Gitlab::Ci::Runner::Backoff.new(pending_state.created_at).then do |backoff|
Result.new(status: 202, backoff: backoff.to_seconds)
end
end
def validate_build_trace!
return unless has_chunks?
unless live_chunks_pending?
metrics.increment_trace_operation(operation: :finalized)
metrics.observe_migration_duration(pending_state_seconds)
end
::Gitlab::Ci::Trace::Checksum.new(build).then do |checksum|
unless checksum.valid?
metrics.increment_trace_operation(operation: :invalid)
if checksum.corrupted?
metrics.increment_trace_operation(operation: :corrupted)
end
next unless log_invalid_chunks?
::Gitlab::ErrorTracking.log_exception(InvalidTraceError.new,
project_path: build.project.full_path,
build_id: build.id,
state_crc32: checksum.state_crc32,
chunks_crc32: checksum.chunks_crc32,
chunks_count: checksum.chunks_count,
chunks_corrupted: checksum.corrupted?
)
end
end
end
def update_build_state!
case build_state
when 'running'
build.touch if build.needs_touch?
Result.new(status: 200)
when 'success'
build.success!
Result.new(status: 200)
when 'failed'
build.drop_with_exit_code!(params[:failure_reason] || :unknown_failure, params[:exit_code])
Result.new(status: 200)
else
Result.new(status: 400)
end
end
def discard_build_trace!
metrics.increment_trace_operation(operation: :discarded)
end
def accept_available?
!build_running? && has_checksum? && chunks_migration_enabled?
end
def live_chunks_pending?
build.trace_chunks.live.any?
end
def has_chunks?
build.trace_chunks.any?
end
def pending_state_outdated?
pending_state_duration > ACCEPT_TIMEOUT
end
def pending_state_duration
Time.current - pending_state.created_at
end
def pending_state_seconds
pending_state_duration.seconds
end
def build_state
params.dig(:state).to_s
end
def has_trace?
params.dig(:trace).present?
end
def has_checksum?
trace_checksum.present?
end
def build_running?
build_state == 'running'
end
def trace_checksum
params.dig(:output, :checksum) || params.dig(:checksum)
end
def trace_bytesize
params.dig(:output, :bytesize)
end
def pending_state
strong_memoize(:pending_state) { ensure_pending_state }
end
def ensure_pending_state
build_state = Ci::BuildPendingState.safe_find_or_create_by(
build_id: build.id,
state: params.fetch(:state),
trace_checksum: trace_checksum,
trace_bytesize: trace_bytesize,
failure_reason: params.dig(:failure_reason)
)
unless build_state.present?
metrics.increment_trace_operation(operation: :conflict)
end
build_state || build.pending_state
end
##
# This method is releasing an exclusive lock on a build trace the moment we
# conclude that build status has been written and the build state update
# has been committed to the database.
#
# Because a build state machine schedules a bunch of workers to run after
# build status transition to complete, we do not want to keep the lease
# until all the workers are scheduled because it opens a possibility of
# race conditions happening.
#
# Instead of keeping the lease until the transition is fully done and
# workers are scheduled, we immediately release the lock after the database
# commit happens.
#
def in_build_trace_lock(&block)
build.trace.lock do |_, lease| # rubocop:disable CodeReuse/ActiveRecord
build.run_on_status_commit { lease.cancel }
yield
end
rescue ::Gitlab::Ci::Trace::LockedError
metrics.increment_trace_operation(operation: :locked)
accept_build_state!
end
def chunks_migration_enabled?
::Gitlab::Ci::Features.accept_trace?(build.project)
end
def log_invalid_chunks?
::Gitlab::Ci::Features.log_invalid_trace_chunks?(build.project)
end
end
end
|