blob: a0433689e99af0b1bd40978f769e4165663778f0 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
|
# frozen_string_literal: true
module QA
module Service
class PraefectManager
include Service::Shellout
def initialize
@gitlab = 'gitlab-gitaly-ha'
@praefect = 'praefect'
@postgres = 'postgres'
@primary_node = 'gitaly1'
@secondary_node = 'gitaly2'
@tertiary_node = 'gitaly3'
@virtual_storage = 'default'
end
def enable_writes
shell "docker exec #{@praefect} bash -c '/opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml enable-writes -virtual-storage #{@virtual_storage}'"
end
def replicated?(project_id)
shell %(docker exec gitlab-gitaly-ha bash -c 'gitlab-rake "gitlab:praefect:replicas[#{project_id}]"') do |line|
# The output of the rake task looks something like this:
#
# Project name | gitaly1 (primary) | gitaly2 | gitaly3
# ----------------------------------------------------------------------------------------------------------------------------------------------------------------
# gitaly_cluster-3aff1f2bd14e6c98 | 23c4422629234d62b62adacafd0a33a8364e8619 | 23c4422629234d62b62adacafd0a33a8364e8619 | 23c4422629234d62b62adacafd0a33a8364e8619
#
# We want to confirm that the checksums are identical
break line.split('|').map(&:strip)[1..3].uniq.one? if line.start_with?("gitaly_cluster")
end
end
def start_praefect
start_node(@praefect)
end
def stop_praefect
stop_node(@praefect)
end
def start_node(name)
shell "docker start #{name}"
end
def stop_node(name)
shell "docker stop #{name}"
end
def trigger_failover_by_stopping_primary_node
stop_node(@primary_node)
end
def clear_replication_queue
QA::Runtime::Logger.debug("Clearing the replication queue")
shell <<~CMD
docker exec --env PGPASSWORD=SQL_PASSWORD #{@postgres} \
bash -c "psql -U postgres -d praefect_production -h postgres.test \
-c \\"delete from replication_queue_job_lock; delete from replication_queue_lock; delete from replication_queue;\\""
CMD
end
def create_stalled_replication_queue
QA::Runtime::Logger.debug("Setting jobs in replication queue to `in_progress` and acquiring locks")
shell <<~CMD
docker exec --env PGPASSWORD=SQL_PASSWORD #{@postgres} \
bash -c "psql -U postgres -d praefect_production -h postgres.test \
-c \\"update replication_queue set state = 'in_progress';
insert into replication_queue_job_lock (job_id, lock_id, triggered_at)
select id, rq.lock_id, created_at from replication_queue rq
left join replication_queue_job_lock rqjl on rq.id = rqjl.job_id
where state = 'in_progress' and rqjl.job_id is null;
update replication_queue_lock set acquired = 't';\\""
CMD
end
def replication_queue_lock_count
result = []
cmd = <<~CMD
docker exec --env PGPASSWORD=SQL_PASSWORD #{@postgres} \
bash -c "psql -U postgres -d praefect_production -h postgres.test \
-c \\"select count(*) from replication_queue_lock where acquired = 't';\\""
CMD
shell cmd do |line|
result << line
end
# The result looks like:
# count
# -----
# 1
result[2].to_i
end
def reset_cluster
start_node(@praefect)
start_node(@primary_node)
start_node(@secondary_node)
start_node(@tertiary_node)
enable_writes
end
def wait_for_praefect
wait_until_shell_command_matches(
"docker exec #{@praefect} bash -c 'cat /var/log/gitlab/praefect/current'",
/listening at tcp address/
)
end
def wait_for_sql_ping
wait_until_shell_command_matches(
"docker exec #{@praefect} bash -c '/opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml sql-ping'",
/praefect sql-ping: OK/
)
end
def wait_for_storage_nodes
nodes_confirmed = {
@primary_node => false,
@secondary_node => false,
@tertiary_node => false
}
wait_until_shell_command("docker exec #{@praefect} bash -c '/opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml dial-nodes'") do |line|
QA::Runtime::Logger.info(line.chomp)
nodes_confirmed.each_key do |node|
nodes_confirmed[node] = true if line =~ /SUCCESS: confirmed Gitaly storage "#{node}" in virtual storages \[#{@virtual_storage}\] is served/
end
nodes_confirmed.values.all?
end
end
def wait_for_gitaly_check
storage_ok = false
check_finished = false
wait_until_shell_command("docker exec #{@gitlab} bash -c 'gitlab-rake gitlab:gitaly:check'") do |line|
QA::Runtime::Logger.info(line.chomp)
storage_ok = true if line =~ /Gitaly: ... #{@virtual_storage} ... OK/
check_finished = true if line =~ /Checking Gitaly ... Finished/
storage_ok && check_finished
end
end
def wait_for_gitlab_shell_check
wait_until_shell_command_matches(
"docker exec #{@gitlab} bash -c 'gitlab-rake gitlab:gitlab_shell:check'",
/Checking GitLab Shell ... Finished/
)
end
def wait_for_reliable_connection
wait_for_praefect
wait_for_sql_ping
wait_for_storage_nodes
wait_for_gitaly_check
wait_for_gitlab_shell_check
end
private
def wait_until_shell_command(cmd)
Support::Waiter.wait_until do
shell cmd do |line|
break true if yield line
end
end
end
def wait_until_shell_command_matches(cmd, regex)
wait_until_shell_command(cmd) do |line|
QA::Runtime::Logger.info(line.chomp)
line =~ regex
end
end
end
end
end
|