1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
|
# frozen_string_literal: true
module Gitlab
module Ci
module Queue
class Metrics
extend Gitlab::Utils::StrongMemoize
QUEUE_DURATION_SECONDS_BUCKETS = [1, 3, 10, 30, 60, 300, 900, 1800, 3600].freeze
QUEUE_ACTIVE_RUNNERS_BUCKETS = [1, 3, 10, 30, 60, 300, 900, 1800, 3600].freeze
QUEUE_DEPTH_TOTAL_BUCKETS = [1, 2, 3, 5, 8, 16, 32, 50, 100, 250, 500, 1000, 2000, 5000].freeze
QUEUE_SIZE_TOTAL_BUCKETS = [1, 5, 10, 50, 100, 500, 1000, 2000, 5000, 7500, 10000, 15000, 20000].freeze
QUEUE_PROCESSING_DURATION_SECONDS_BUCKETS = [0.01, 0.05, 0.1, 0.3, 0.5, 1, 5, 10, 30, 60, 180, 300].freeze
METRICS_SHARD_TAG_PREFIX = 'metrics_shard::'
DEFAULT_METRICS_SHARD = 'default'
JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET = 5
OPERATION_COUNTERS = [
:build_can_pick,
:build_not_pick,
:build_not_pending,
:build_temporary_locked,
:build_conflict_lock,
:build_conflict_exception,
:build_conflict_transition,
:queue_attempt,
:queue_conflict,
:queue_iteration,
:queue_depth_limit,
:queue_replication_lag,
:runner_pre_assign_checks_failed,
:runner_pre_assign_checks_success,
:runner_queue_tick
].to_set.freeze
QUEUE_DEPTH_HISTOGRAMS = [
:found,
:not_found,
:conflict
].to_set.freeze
attr_reader :runner
def initialize(runner)
@runner = runner
end
def register_failure
self.class.failed_attempt_counter.increment
self.class.attempt_counter.increment
end
def register_success(job)
labels = { shared_runner: runner.instance_type?,
jobs_running_for_project: jobs_running_for_project(job),
shard: DEFAULT_METRICS_SHARD }
if runner.instance_type?
shard = runner.tag_list.sort.find { |name| name.starts_with?(METRICS_SHARD_TAG_PREFIX) }
labels[:shard] = shard.gsub(METRICS_SHARD_TAG_PREFIX, '') if shard
end
self.class.job_queue_duration_seconds.observe(labels, Time.current - job.queued_at) unless job.queued_at.nil?
self.class.attempt_counter.increment
end
# rubocop: disable CodeReuse/ActiveRecord
def jobs_running_for_project(job)
return '+Inf' unless runner.instance_type?
# excluding currently started job
running_jobs_count = job.project.builds.running.where(runner: ::Ci::Runner.instance_type)
.limit(JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET + 1).count - 1
running_jobs_count < JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET ? running_jobs_count : "#{JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET}+"
end
# rubocop: enable CodeReuse/ActiveRecord
def increment_queue_operation(operation)
if !Rails.env.production? && !OPERATION_COUNTERS.include?(operation)
raise ArgumentError, "unknown queue operation: #{operation}"
end
self.class.queue_operations_total.increment(operation: operation)
end
def observe_queue_depth(queue, size)
return unless Feature.enabled?(:gitlab_ci_builds_queuing_metrics, default_enabled: false)
if !Rails.env.production? && !QUEUE_DEPTH_HISTOGRAMS.include?(queue)
raise ArgumentError, "unknown queue depth label: #{queue}"
end
self.class.queue_depth_total.observe({ queue: queue }, size.to_f)
end
def observe_queue_size(size_proc, runner_type)
return unless Feature.enabled?(:gitlab_ci_builds_queuing_metrics, default_enabled: false)
self.class.queue_size_total.observe({ runner_type: runner_type }, size_proc.call.to_f)
end
def observe_queue_time(metric, runner_type)
start_time = ::Gitlab::Metrics::System.monotonic_time
result = yield
return result unless Feature.enabled?(:gitlab_ci_builds_queuing_metrics, default_enabled: false)
seconds = ::Gitlab::Metrics::System.monotonic_time - start_time
case metric
when :process
self.class.queue_iteration_duration_seconds.observe({ runner_type: runner_type }, seconds.to_f)
when :retrieve
self.class.queue_retrieval_duration_seconds.observe({ runner_type: runner_type }, seconds.to_f)
else
raise ArgumentError unless Rails.env.production?
end
result
end
def self.observe_active_runners(runners_proc)
return unless Feature.enabled?(:gitlab_ci_builds_queuing_metrics, default_enabled: false)
queue_active_runners_total.observe({}, runners_proc.call.to_f)
end
def self.increment_runner_tick(runner)
self.new(runner).increment_queue_operation(:runner_queue_tick)
end
def self.failed_attempt_counter
strong_memoize(:failed_attempt_counter) do
name = :job_register_attempts_failed_total
comment = 'Counts the times a runner tries to register a job'
Gitlab::Metrics.counter(name, comment)
end
end
def self.attempt_counter
strong_memoize(:attempt_counter) do
name = :job_register_attempts_total
comment = 'Counts the times a runner tries to register a job'
Gitlab::Metrics.counter(name, comment)
end
end
def self.job_queue_duration_seconds
strong_memoize(:job_queue_duration_seconds) do
name = :job_queue_duration_seconds
comment = 'Request handling execution time'
buckets = QUEUE_DURATION_SECONDS_BUCKETS
labels = {}
Gitlab::Metrics.histogram(name, comment, labels, buckets)
end
end
def self.queue_operations_total
strong_memoize(:queue_operations_total) do
name = :gitlab_ci_queue_operations_total
comment = 'Counts all the operations that are happening inside a queue'
Gitlab::Metrics.counter(name, comment)
end
end
def self.queue_depth_total
strong_memoize(:queue_depth_total) do
name = :gitlab_ci_queue_depth_total
comment = 'Size of a CI/CD builds queue in relation to the operation result'
buckets = QUEUE_DEPTH_TOTAL_BUCKETS
labels = {}
Gitlab::Metrics.histogram(name, comment, labels, buckets)
end
end
def self.queue_size_total
strong_memoize(:queue_size_total) do
name = :gitlab_ci_queue_size_total
comment = 'Size of initialized CI/CD builds queue'
buckets = QUEUE_SIZE_TOTAL_BUCKETS
labels = {}
Gitlab::Metrics.histogram(name, comment, labels, buckets)
end
end
def self.queue_iteration_duration_seconds
strong_memoize(:queue_iteration_duration_seconds) do
name = :gitlab_ci_queue_iteration_duration_seconds
comment = 'Time it takes to find a build in CI/CD queue'
buckets = QUEUE_PROCESSING_DURATION_SECONDS_BUCKETS
labels = {}
Gitlab::Metrics.histogram(name, comment, labels, buckets)
end
end
def self.queue_retrieval_duration_seconds
strong_memoize(:queue_retrieval_duration_seconds) do
name = :gitlab_ci_queue_retrieval_duration_seconds
comment = 'Time it takes to execute a SQL query to retrieve builds queue'
buckets = QUEUE_PROCESSING_DURATION_SECONDS_BUCKETS
labels = {}
Gitlab::Metrics.histogram(name, comment, labels, buckets)
end
end
def self.queue_active_runners_total
strong_memoize(:queue_active_runners_total) do
name = :gitlab_ci_queue_active_runners_total
comment = 'The amount of active runners that can process queue in a project'
buckets = QUEUE_ACTIVE_RUNNERS_BUCKETS
labels = {}
Gitlab::Metrics.histogram(name, comment, labels, buckets)
end
end
end
end
end
end
|