1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
|
# frozen_string_literal: true
module Gitlab
module Database
# This class provides a way to automatically execute code that relies on acquiring a database lock in a way
# designed to minimize impact on a busy production database.
#
# A default timing configuration is provided that makes repeated attempts to acquire the necessary lock, with
# varying lock_timeout settings, and also serves to limit the maximum number of attempts.
class WithLockRetries
AttemptsExhaustedError = Class.new(StandardError)
NULL_LOGGER = Gitlab::JsonLogger.new('/dev/null')
# Each element of the array represents a retry iteration.
# - DEFAULT_TIMING_CONFIGURATION.size provides the iteration count.
# - First element: DB lock_timeout
# - Second element: Sleep time after unsuccessful lock attempt (LockWaitTimeout error raised)
# - Worst case, this configuration would retry for about 40 minutes.
DEFAULT_TIMING_CONFIGURATION = [
[0.1.seconds, 0.05.seconds], # short timings, lock_timeout: 100ms, sleep after LockWaitTimeout: 50ms
[0.1.seconds, 0.05.seconds],
[0.2.seconds, 0.05.seconds],
[0.3.seconds, 0.10.seconds],
[0.4.seconds, 0.15.seconds],
[0.5.seconds, 2.seconds],
[0.5.seconds, 2.seconds],
[0.5.seconds, 2.seconds],
[0.5.seconds, 2.seconds],
[1.second, 5.seconds], # probably high traffic, increase timings
[1.second, 1.minute],
[0.1.seconds, 0.05.seconds],
[0.1.seconds, 0.05.seconds],
[0.2.seconds, 0.05.seconds],
[0.3.seconds, 0.10.seconds],
[0.4.seconds, 0.15.seconds],
[0.5.seconds, 2.seconds],
[0.5.seconds, 2.seconds],
[0.5.seconds, 2.seconds],
[3.seconds, 3.minutes], # probably high traffic or long locks, increase timings
[0.1.seconds, 0.05.seconds],
[0.1.seconds, 0.05.seconds],
[0.5.seconds, 2.seconds],
[0.5.seconds, 2.seconds],
[5.seconds, 2.minutes],
[0.5.seconds, 0.5.seconds],
[0.5.seconds, 0.5.seconds],
[7.seconds, 5.minutes],
[0.5.seconds, 0.5.seconds],
[0.5.seconds, 0.5.seconds],
[7.seconds, 5.minutes],
[0.5.seconds, 0.5.seconds],
[0.5.seconds, 0.5.seconds],
[7.seconds, 5.minutes],
[0.1.seconds, 0.05.seconds],
[0.1.seconds, 0.05.seconds],
[0.5.seconds, 2.seconds],
[10.seconds, 10.minutes],
[0.1.seconds, 0.05.seconds],
[0.5.seconds, 2.seconds],
[10.seconds, 10.minutes]
].freeze
def initialize(logger: NULL_LOGGER, allow_savepoints: true, timing_configuration: DEFAULT_TIMING_CONFIGURATION, klass: nil, env: ENV, connection: ActiveRecord::Base.connection)
@logger = logger
@klass = klass
@allow_savepoints = allow_savepoints
@timing_configuration = timing_configuration
@env = env
@current_iteration = 1
@log_params = { method: 'with_lock_retries', class: klass.to_s }
@connection = connection
end
# Executes a block of code, retrying it whenever a database lock can't be acquired in time
#
# When a database lock can't be acquired, ActiveRecord throws ActiveRecord::LockWaitTimeout
# exception which we intercept to re-execute the block of code, until it finishes or we reach the
# max attempt limit. The default behavior when max attempts have been reached is to make a final attempt with the
# lock_timeout disabled, but this can be altered with the raise_on_exhaustion parameter.
#
# @see DEFAULT_TIMING_CONFIGURATION for the timings used when attempting a retry
# @param [Boolean] raise_on_exhaustion whether to raise `AttemptsExhaustedError` when exhausting max attempts
# @param [Proc] block of code that will be executed
def run(raise_on_exhaustion: false, &block)
raise 'no block given' unless block_given?
@block = block
if lock_retries_disabled?
log(message: 'DISABLE_LOCK_RETRIES environment variable is true, executing the block without retry')
return run_block
end
begin
run_block_with_lock_timeout
rescue ActiveRecord::LockWaitTimeout
if retry_with_lock_timeout?
disable_idle_in_transaction_timeout if connection.transaction_open?
wait_until_next_retry
reset_db_settings
retry
else
reset_db_settings
raise AttemptsExhaustedError, 'configured attempts to obtain locks are exhausted' if raise_on_exhaustion
run_block_without_lock_timeout
end
ensure
reset_db_settings
end
end
private
attr_reader :logger, :env, :block, :current_iteration, :log_params, :timing_configuration, :connection
def run_block
block.call
end
def run_block_with_lock_timeout
raise "WithLockRetries should not run inside already open transaction" if connection.transaction_open? && @allow_savepoints.blank?
connection.transaction(requires_new: true) do # rubocop:disable Performance/ActiveRecordSubtransactions
execute("SET LOCAL lock_timeout TO '#{current_lock_timeout_in_ms}ms'")
log(message: 'Lock timeout is set', current_iteration: current_iteration, lock_timeout_in_ms: current_lock_timeout_in_ms)
run_block
log(message: 'Migration finished', current_iteration: current_iteration, lock_timeout_in_ms: current_lock_timeout_in_ms)
end
end
def retry_with_lock_timeout?
current_iteration != retry_count
end
def wait_until_next_retry
log(message: 'ActiveRecord::LockWaitTimeout error, retrying after sleep', current_iteration: current_iteration, sleep_time_in_seconds: current_sleep_time_in_seconds)
sleep(current_sleep_time_in_seconds)
@current_iteration += 1
end
def run_block_without_lock_timeout
log(message: "Couldn't acquire lock to perform the migration", current_iteration: current_iteration)
log(message: "Executing the migration without lock timeout", current_iteration: current_iteration)
disable_lock_timeout if connection.transaction_open?
run_block
log(message: 'Migration finished', current_iteration: current_iteration)
end
def lock_retries_disabled?
Gitlab::Utils.to_boolean(env['DISABLE_LOCK_RETRIES'])
end
def log(params)
logger.info(log_params.merge(params))
end
def execute(statement)
connection.execute(statement)
end
def retry_count
timing_configuration.size
end
def current_lock_timeout_in_ms
Integer(timing_configuration[current_iteration - 1][0].in_milliseconds)
end
def current_sleep_time_in_seconds
timing_configuration[current_iteration - 1][1].to_f
end
def disable_idle_in_transaction_timeout
execute("SET LOCAL idle_in_transaction_session_timeout TO '0'")
end
def disable_lock_timeout
execute("SET LOCAL lock_timeout TO '0'")
end
def reset_db_settings
execute('RESET idle_in_transaction_session_timeout; RESET lock_timeout')
end
end
end
end
|