diff options
Diffstat (limited to 'qa/qa/service')
-rw-r--r-- | qa/qa/service/cluster_provider/gcloud.rb | 2 | ||||
-rw-r--r-- | qa/qa/service/docker_run/smocker.rb | 56 | ||||
-rw-r--r-- | qa/qa/service/praefect_manager.rb | 102 | ||||
-rw-r--r-- | qa/qa/service/shellout.rb | 11 |
4 files changed, 153 insertions, 18 deletions
diff --git a/qa/qa/service/cluster_provider/gcloud.rb b/qa/qa/service/cluster_provider/gcloud.rb index c6d1f6cfe88..77677745f7a 100644 --- a/qa/qa/service/cluster_provider/gcloud.rb +++ b/qa/qa/service/cluster_provider/gcloud.rb @@ -49,7 +49,7 @@ module QA if account.empty? raise "Failed to login to gcloud. No credentials provided in environment and no credentials found locally." else - puts "gcloud account found. Using: #{account} for creating K8s cluster." + QA::Runtime::Logger.debug("gcloud account found. Using: #{account} for creating K8s cluster.") end end end diff --git a/qa/qa/service/docker_run/smocker.rb b/qa/qa/service/docker_run/smocker.rb new file mode 100644 index 00000000000..83ab58887da --- /dev/null +++ b/qa/qa/service/docker_run/smocker.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module QA + module Service + module DockerRun + class Smocker < Base + def initialize + @image = 'thiht/smocker:0.17.1' + @name = 'smocker-server' + @public_port = '8080' + @admin_port = '8081' + super + @network_cache = network + end + + def host_name + return '127.0.0.1' unless QA::Runtime::Env.running_in_ci? || QA::Runtime::Env.qa_hostname + + "#{@name}.#{@network_cache}" + end + + def base_url + "http://#{host_name}:#{@public_port}" + end + + def admin_url + "http://#{host_name}:#{@admin_port}" + end + + def wait_for_running + Support::Waiter.wait_until(raise_on_failure: false, reload_page: false) do + running? + end + end + + def register! + command = <<~CMD.tr("\n", ' ') + docker run -d --rm + --network #{@network_cache} + --hostname #{host_name} + --name #{@name} + --publish #{@public_port}:8080 + --publish #{@admin_port}:8081 + #{@image} + CMD + + unless QA::Runtime::Env.running_in_ci? || QA::Runtime::Env.qa_hostname + command.gsub!("--network #{@network_cache} ", '') + end + + shell command + end + end + end + end +end diff --git a/qa/qa/service/praefect_manager.rb b/qa/qa/service/praefect_manager.rb index 7e47049d446..8ffb7c47652 100644 --- a/qa/qa/service/praefect_manager.rb +++ b/qa/qa/service/praefect_manager.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'digest' + module QA module Service class PraefectManager @@ -50,6 +52,7 @@ module QA def stop_primary_node stop_node(@primary_node) + wait_until_node_is_removed_from_healthy_storages(@primary_node) end def start_primary_node @@ -67,6 +70,7 @@ module QA def stop_secondary_node stop_node(@secondary_node) + wait_until_node_is_removed_from_healthy_storages(@secondary_node) end def start_secondary_node @@ -75,6 +79,7 @@ module QA def stop_tertiary_node stop_node(@tertiary_node) + wait_until_node_is_removed_from_healthy_storages(@tertiary_node) end def start_tertiary_node @@ -82,20 +87,41 @@ module QA end def start_node(name) - shell "docker start #{name}" - end + state = node_state(name) + return if state == "running" + + if state == "paused" + shell "docker unpause #{name}" + end + + if state == "stopped" + shell "docker start #{name}" + end - def stop_node(name) - shell "docker stop #{name}" wait_until_shell_command_matches( "docker inspect -f {{.State.Running}} #{name}", - /false/, + /true/, sleep_interval: 3, max_duration: 180, retry_on_exception: true ) end + def stop_node(name) + return if node_state(name) == 'paused' + + shell "docker pause #{name}" + end + + def node_state(name) + state = "stopped" + wait_until_shell_command("docker inspect -f {{.State.Status}} #{name}") do |line| + QA::Runtime::Logger.debug(line) + break state = "running" if line.include?("running") + break state = "paused" if line.include?("paused") + end + end + def clear_replication_queue QA::Runtime::Logger.info("Clearing the replication queue") shell sql_to_docker_exec_cmd( @@ -174,15 +200,25 @@ module QA end def start_all_nodes - start_node(@postgres) + start_postgres start_node(@primary_node) start_node(@secondary_node) start_node(@tertiary_node) - start_node(@praefect) + start_praefect wait_for_health_check_all_nodes end + def start_postgres + start_node(@postgres) + + Support::Waiter.repeat_until(max_attempts: 60, sleep_interval: 1) do + shell(sql_to_docker_exec_cmd("SELECT 1 as healthy_database"), fail_on_exception: false) do |line| + break true if line.include?("healthy_database") + end + end + end + def verify_storage_move(source_storage, destination_storage, repo_type: :project) return if Specs::Helpers::ContextSelector.dot_com? @@ -194,9 +230,8 @@ module QA def wait_for_praefect QA::Runtime::Logger.info("Waiting for health check on praefect") Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do - # praefect runs a grpc server on port 2305, which will return an error 'Connection refused' until such time it is ready - wait_until_shell_command("docker exec #{@gitaly_cluster} bash -c 'curl #{@praefect}:2305'") do |line| - break if line.include?('curl: (1) Received HTTP/0.9 when not allowed') + wait_until_shell_command("docker exec #{@praefect} gitlab-ctl status praefect") do |line| + break true if line.include?('run: praefect: ') QA::Runtime::Logger.debug(line.chomp) end @@ -250,6 +285,48 @@ module QA end end + def praefect_dataloss_information(project_id) + dataloss_info = [] + cmd = "docker exec #{@praefect} praefect -config /var/opt/gitlab/praefect/config.toml dataloss --partially-unavailable=true" + shell(cmd) { |line| dataloss_info << line.strip } + + # Expected will have a record for each repository in the storage, in the following format + # @hashed/bc/52/bc52dd634277c4a34a2d6210994a9a5e2ab6d33bb4a3a8963410e00ca6c15a02.git: + # Primary: gitaly1 + # In-Sync Storages: + # gitaly1, assigned host + # gitaly3, assigned host + # Outdated Storages: + # gitaly2 is behind by 1 change or less, assigned host + # + # Alternatively, if all repositories are in sync, a concise message is returned + # Virtual storage: default + # All repositories are fully available on all assigned storages! + + # extract the relevant project under test info if it is identified + start_index = dataloss_info.index { |line| line.include?("#{Digest::SHA256.hexdigest(project_id.to_s)}.git") } + unless start_index.nil? + dataloss_info = dataloss_info[start_index, 7] + end + + dataloss_info&.each { |info| QA::Runtime::Logger.debug(info) } + dataloss_info + end + + def praefect_dataloss_info_for_project(project_id) + dataloss_info = [] + Support::Retrier.retry_until(max_duration: 60) do + dataloss_info = praefect_dataloss_information(project_id) + dataloss_info.include?("#{Digest::SHA256.hexdigest(project_id.to_s)}.git") + end + end + + def wait_for_project_synced_across_all_storages(project_id) + Support::Retrier.retry_until(max_duration: 60) do + praefect_dataloss_information(project_id).include?('All repositories are fully available on all assigned storages!') + end + end + def wait_for_health_check_all_nodes wait_for_gitaly_health_check(@primary_node) wait_for_gitaly_health_check(@secondary_node) @@ -259,9 +336,8 @@ module QA def wait_for_gitaly_health_check(node) QA::Runtime::Logger.info("Waiting for health check on #{node}") Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do - # gitaly runs a grpc server on port 8075, which will return an error 'Connection refused' until such time it is ready - wait_until_shell_command("docker exec #{@praefect} bash -c 'curl #{node}:8075'") do |line| - break if line.include?('curl: (1) Received HTTP/0.9 when not allowed') + wait_until_shell_command("docker exec #{node} gitlab-ctl status gitaly") do |line| + break true if line.include?('run: gitaly: ') QA::Runtime::Logger.debug(line.chomp) end diff --git a/qa/qa/service/shellout.rb b/qa/qa/service/shellout.rb index 5a35d8c251e..33d1d10b515 100644 --- a/qa/qa/service/shellout.rb +++ b/qa/qa/service/shellout.rb @@ -5,6 +5,7 @@ require 'open3' module QA module Service module Shellout + using Rainbow CommandError = Class.new(StandardError) module_function @@ -13,23 +14,25 @@ module QA # TODO, make it possible to use generic QA framework classes # as a library - gitlab-org/gitlab-qa#94 # - def shell(command, stdin_data: nil) - puts "Executing `#{command}`" + def shell(command, stdin_data: nil, fail_on_exception: true) + QA::Runtime::Logger.info("Executing `#{command}`".cyan) Open3.popen2e(*command) do |stdin, out, wait| stdin.puts(stdin_data) if stdin_data stdin.close if stdin_data + cmd_output = '' if block_given? out.each do |line| + cmd_output += line yield line end end out.each_char { |char| print char } - if wait.value.exited? && wait.value.exitstatus.nonzero? - raise CommandError, "Command `#{command}` failed!" + if wait.value.exited? && wait.value.exitstatus.nonzero? && fail_on_exception + raise CommandError, "Command failed: #{command} \nCommand Output: #{cmd_output}" end end end |