summaryrefslogtreecommitdiff
path: root/qa/qa/service
diff options
context:
space:
mode:
Diffstat (limited to 'qa/qa/service')
-rw-r--r--qa/qa/service/kubernetes_cluster.rb2
-rw-r--r--qa/qa/service/praefect_manager.rb50
2 files changed, 20 insertions, 32 deletions
diff --git a/qa/qa/service/kubernetes_cluster.rb b/qa/qa/service/kubernetes_cluster.rb
index ec53b9d8163..dafce4acc33 100644
--- a/qa/qa/service/kubernetes_cluster.rb
+++ b/qa/qa/service/kubernetes_cluster.rb
@@ -77,7 +77,7 @@ module QA
install_ingress
# need to wait since the ingress-nginx service has an initial delay set of 10 seconds
- sleep 10
+ sleep 12
ingress_ip = `kubectl get svc --all-namespaces --no-headers=true -l app.kubernetes.io/name=ingress-nginx -o custom-columns=:'status.loadBalancer.ingress[0].ip' | grep -v 'none'`
QA::Runtime::Logger.debug "Has ingress address set to: #{ingress_ip}"
ingress_ip
diff --git a/qa/qa/service/praefect_manager.rb b/qa/qa/service/praefect_manager.rb
index dbb49f18881..dd4cce5d0b0 100644
--- a/qa/qa/service/praefect_manager.rb
+++ b/qa/qa/service/praefect_manager.rb
@@ -58,6 +58,7 @@ module QA
def start_praefect
start_node(@praefect)
+ wait_for_praefect
end
def stop_praefect
@@ -176,6 +177,7 @@ module QA
start_node(@primary_node)
start_node(@secondary_node)
start_node(@tertiary_node)
+ start_node(@praefect)
wait_for_health_check_all_nodes
wait_for_reliable_connection
@@ -198,13 +200,13 @@ module QA
retry_on_exception: true
)
- # Praefect can fail to start if unable to dial one of the gitaly nodes
- # See https://gitlab.com/gitlab-org/gitaly/-/issues/2847
- # We tail the logs to allow us to confirm if that is the problem if tests fail
+ QA::Runtime::Logger.info('Wait until Praefect starts and is listening')
+ wait_until_shell_command_matches(
+ "docker exec #{@praefect} bash -c 'cat /var/log/gitlab/praefect/current'",
+ /listening at tcp address/
+ )
- shell "docker exec #{@praefect} bash -c 'tail /var/log/gitlab/praefect/current'" do |line|
- QA::Runtime::Logger.debug(line.chomp)
- end
+ wait_for_gitaly_check
end
def wait_for_sql_ping
@@ -244,7 +246,7 @@ module QA
def wait_for_storage_nodes
wait_for_no_praefect_storage_error
- Support::Waiter.repeat_until(max_attempts: 3) do
+ Support::Waiter.repeat_until(max_attempts: 3, max_duration: 120, sleep_interval: 1) do
nodes_confirmed = {
@primary_node => false,
@secondary_node => false,
@@ -304,7 +306,7 @@ module QA
end
def wait_until_node_is_removed_from_healthy_storages(node)
- Support::Waiter.wait_until(max_duration: 60, sleep_interval: 3, raise_on_failure: false) do
+ Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do
result = []
shell sql_to_docker_exec_cmd("SELECT count(*) FROM healthy_storages WHERE storage = '#{node}';") do |line|
result << line
@@ -315,7 +317,7 @@ module QA
end
def wait_until_node_is_marked_as_healthy_storage(node)
- Support::Waiter.wait_until(max_duration: 60, sleep_interval: 3, raise_on_failure: false) do
+ Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do
result = []
shell sql_to_docker_exec_cmd("SELECT count(*) FROM healthy_storages WHERE storage = '#{node}';") do |line|
result << line
@@ -327,17 +329,10 @@ module QA
end
def wait_for_gitaly_check
- Support::Waiter.repeat_until(max_attempts: 3) do
- storage_ok = false
- check_finished = false
-
- wait_until_shell_command("docker exec #{@gitlab} bash -c 'gitlab-rake gitlab:gitaly:check'") do |line|
+ Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do
+ wait_until_shell_command("docker exec #{@gitlab} bash -c 'gitlab-rake gitlab:git:fsck'") do |line|
QA::Runtime::Logger.debug(line.chomp)
-
- storage_ok = true if line =~ /Gitaly: ... #{@virtual_storage} ... OK/
- check_finished = true if line =~ /Checking Gitaly ... Finished/
-
- storage_ok && check_finished
+ line.include?('Done')
end
end
end
@@ -347,7 +342,7 @@ module QA
# has no pre-read data, consider it to have had zero reads.
def wait_for_read_count_change(pre_read_data)
diff_found = false
- Support::Waiter.wait_until(sleep_interval: 5) do
+ Support::Waiter.wait_until(sleep_interval: 1, max_duration: 60) do
query_read_distribution.each_with_index do |data, index|
diff_found = true if data[:value] > value_for_node(pre_read_data, data[:node])
end
@@ -361,10 +356,8 @@ module QA
def wait_for_reliable_connection
QA::Runtime::Logger.info('Wait until GitLab and Praefect can communicate reliably')
- wait_for_praefect
wait_for_sql_ping
wait_for_storage_nodes
- wait_for_gitaly_check
end
def wait_for_replication(project_id)
@@ -400,20 +393,15 @@ module QA
def list_untracked_repositories
untracked_repositories = []
shell "docker exec #{@praefect} bash -c 'gitlab-ctl praefect list-untracked-repositories'" do |line|
- # Results look like this depending on whether untracked items found or not
- # Running list-untracked-repositories
- # Done.
-
- # Running list-untracked-repositories
+ # Results look like this
+ # The following repositories were found on disk, but missing from the tracking database:
# {"relative_path":"@hashed/aa/bb.git","storage":"gitaly1","virtual_storage":"default"}
# {"relative_path":"@hashed/bb/cc.git","storage":"gitaly3","virtual_storage":"default"}
- # Done.
QA::Runtime::Logger.debug(line.chomp)
- next if line.start_with?('Running list-untracked-repositories')
- next if line.start_with?('Done.')
-
untracked_repositories.append(JSON.parse(line))
+ rescue JSON::ParserError
+ # Ignore lines that can't be parsed as JSON
end
QA::Runtime::Logger.debug("list_untracked_repositories --- #{untracked_repositories}")