diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2021-10-20 08:43:02 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2021-10-20 08:43:02 +0000 |
commit | d9ab72d6080f594d0b3cae15f14b3ef2c6c638cb (patch) | |
tree | 2341ef426af70ad1e289c38036737e04b0aa5007 /qa/qa/service | |
parent | d6e514dd13db8947884cd58fe2a9c2a063400a9b (diff) | |
download | gitlab-ce-d9ab72d6080f594d0b3cae15f14b3ef2c6c638cb.tar.gz |
Add latest changes from gitlab-org/gitlab@14-4-stable-eev14.4.0-rc42
Diffstat (limited to 'qa/qa/service')
-rw-r--r-- | qa/qa/service/praefect_manager.rb | 126 | ||||
-rw-r--r-- | qa/qa/service/shellout.rb | 2 |
2 files changed, 70 insertions, 58 deletions
diff --git a/qa/qa/service/praefect_manager.rb b/qa/qa/service/praefect_manager.rb index 5adc52680f0..71e3383a534 100644 --- a/qa/qa/service/praefect_manager.rb +++ b/qa/qa/service/praefect_manager.rb @@ -46,6 +46,10 @@ module QA end end + def stop_primary_node + stop_node(@primary_node) + end + def start_primary_node start_node(@primary_node) end @@ -66,20 +70,29 @@ module QA start_node(@secondary_node) end + def stop_tertiary_node + stop_node(@tertiary_node) + end + + def start_tertiary_node + start_node(@tertiary_node) + end + def start_node(name) shell "docker start #{name}" + wait_until_shell_command_matches( + "docker inspect -f {{.State.Running}} #{name}", + /true/, + sleep_interval: 3, + max_duration: 180, + retry_on_exception: true + ) end def stop_node(name) shell "docker stop #{name}" end - def trigger_failover_by_stopping_primary_node - QA::Runtime::Logger.info("Stopping node #{@primary_node} to trigger failover") - stop_node(@primary_node) - wait_for_new_primary - end - def clear_replication_queue QA::Runtime::Logger.info("Clearing the replication queue") shell sql_to_docker_exec_cmd( @@ -157,22 +170,8 @@ module QA result[2].to_i end - # Makes the original primary (gitaly1) the primary again by - # stopping the other nodes, waiting for gitaly1 to be made the - # primary again, and then it starts the other nodes and enables - # writes - def reset_primary_to_original - QA::Runtime::Logger.info("Checking primary node...") - - return if @primary_node == current_primary_node - - QA::Runtime::Logger.info("Reset primary node to #{@primary_node}") + def start_all_nodes start_node(@primary_node) - stop_node(@secondary_node) - stop_node(@tertiary_node) - - wait_for_new_primary_node(@primary_node) - start_node(@secondary_node) start_node(@tertiary_node) @@ -189,10 +188,12 @@ module QA end def wait_for_praefect - QA::Runtime::Logger.info('Wait until Praefect starts and is listening') wait_until_shell_command_matches( - "docker exec #{@praefect} bash -c 'cat /var/log/gitlab/praefect/current'", - /listening at tcp address/ + "docker inspect -f {{.State.Running}} #{@praefect}", + /true/, + sleep_interval: 3, + max_duration: 180, + retry_on_exception: true ) # Praefect can fail to start if unable to dial one of the gitaly nodes @@ -204,20 +205,6 @@ module QA end end - def wait_for_new_primary_node(node) - QA::Runtime::Logger.info("Wait until #{node} is the primary node") - with_praefect_log(max_duration: 120) do |log| - break true if log['msg'] == 'primary node changed' && log['newPrimary'] == node - end - end - - def wait_for_new_primary - QA::Runtime::Logger.info("Wait until a new primary node is selected") - with_praefect_log(max_duration: 120) do |log| - break true if log['msg'] == 'primary node changed' - end - end - def wait_for_sql_ping wait_until_shell_command_matches( "docker exec #{@praefect} bash -c '/opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml sql-ping'", @@ -274,10 +261,6 @@ module QA end end - def wait_for_health_check_current_primary_node - wait_for_health_check(current_primary_node) - end - def wait_for_health_check_all_nodes wait_for_health_check(@primary_node) wait_for_health_check(@secondary_node) @@ -286,29 +269,58 @@ module QA def wait_for_health_check(node) QA::Runtime::Logger.info("Waiting for health check on #{node}") - wait_until_shell_command("docker exec #{node} bash -c 'cat /var/log/gitlab/gitaly/current'") do |line| - QA::Runtime::Logger.debug(line.chomp) - log = JSON.parse(line) + wait_until_node_is_marked_as_healthy_storage(node) + end - log['grpc.request.fullMethod'] == '/grpc.health.v1.Health/Check' && log['grpc.code'] == 'OK' - rescue JSON::ParserError - # Ignore lines that can't be parsed as JSON - end + def wait_for_primary_node_health_check + wait_for_health_check(@primary_node) + end + + def wait_for_secondary_node_health_check + wait_for_health_check(@secondary_node) + end + + def wait_for_tertiary_node_health_check + wait_for_health_check(@tertiary_node) + end + + def wait_for_health_check_failure(node) + QA::Runtime::Logger.info("Waiting for health check failure on #{node}") + wait_until_node_is_removed_from_healthy_storages(node) + end + + def wait_for_primary_node_health_check_failure + wait_for_health_check_failure(@primary_node) end def wait_for_secondary_node_health_check_failure wait_for_health_check_failure(@secondary_node) end - def wait_for_health_check_failure(node) - QA::Runtime::Logger.info("Waiting for Praefect to record a health check failure on #{node}") - wait_until_shell_command("docker exec #{@praefect} bash -c 'tail -n 1 /var/log/gitlab/praefect/current'") do |line| - QA::Runtime::Logger.debug(line.chomp) - log = JSON.parse(line) + def wait_for_tertiary_node_health_check_failure + wait_for_health_check_failure(@tertiary_node) + end - health_check_failure_message?(log['msg']) && log['storage'] == node - rescue JSON::ParserError - # Ignore lines that can't be parsed as JSON + def wait_until_node_is_removed_from_healthy_storages(node) + Support::Waiter.wait_until(max_duration: 60, sleep_interval: 3, raise_on_failure: false) do + result = [] + shell sql_to_docker_exec_cmd("SELECT count(*) FROM healthy_storages WHERE storage = '#{node}';") do |line| + result << line + end + QA::Runtime::Logger.debug("result is ---#{result}") + result[2].to_i == 0 + end + end + + def wait_until_node_is_marked_as_healthy_storage(node) + Support::Waiter.wait_until(max_duration: 60, sleep_interval: 3, raise_on_failure: false) do + result = [] + shell sql_to_docker_exec_cmd("SELECT count(*) FROM healthy_storages WHERE storage = '#{node}';") do |line| + result << line + end + + QA::Runtime::Logger.debug("result is ---#{result}") + result[2].to_i == 1 end end diff --git a/qa/qa/service/shellout.rb b/qa/qa/service/shellout.rb index 81cfaa125a9..5a35d8c251e 100644 --- a/qa/qa/service/shellout.rb +++ b/qa/qa/service/shellout.rb @@ -52,7 +52,7 @@ module QA end def wait_until_shell_command_matches(cmd, regex, **kwargs) - wait_until_shell_command(cmd, kwargs) do |line| + wait_until_shell_command(cmd, **kwargs) do |line| QA::Runtime::Logger.debug(line.chomp) line =~ regex |