summaryrefslogtreecommitdiff
path: root/qa/qa/specs/features/api/3_create
diff options
context:
space:
mode:
Diffstat (limited to 'qa/qa/specs/features/api/3_create')
-rw-r--r--qa/qa/specs/features/api/3_create/gitaly/automatic_failover_and_recovery_spec.rb100
-rw-r--r--qa/qa/specs/features/api/3_create/gitaly/backend_node_recovery_spec.rb71
-rw-r--r--qa/qa/specs/features/api/3_create/gitaly/changing_repository_storage_spec.rb (renamed from qa/qa/specs/features/api/3_create/repository/changing_repository_storage_spec.rb)26
-rw-r--r--qa/qa/specs/features/api/3_create/gitaly/distributed_reads_spec.rb99
-rw-r--r--qa/qa/specs/features/api/3_create/gitaly/praefect_replication_queue_spec.rb (renamed from qa/qa/specs/features/api/3_create/repository/praefect_replication_queue_spec.rb)5
5 files changed, 290 insertions, 11 deletions
diff --git a/qa/qa/specs/features/api/3_create/gitaly/automatic_failover_and_recovery_spec.rb b/qa/qa/specs/features/api/3_create/gitaly/automatic_failover_and_recovery_spec.rb
new file mode 100644
index 00000000000..064f5280625
--- /dev/null
+++ b/qa/qa/specs/features/api/3_create/gitaly/automatic_failover_and_recovery_spec.rb
@@ -0,0 +1,100 @@
+# frozen_string_literal: true
+
+module QA
+ RSpec.describe 'Create' do
+ context 'Gitaly automatic failover and manual recovery', :orchestrated, :gitaly_cluster, quarantine: { issue: 'https://gitlab.com/gitlab-org/gitlab/-/issues/238953', type: :flaky } do
+ # Variables shared between contexts. They're used and shared between
+ # contexts so they can't be `let` variables.
+ praefect_manager = Service::PraefectManager.new
+ project = nil
+
+ let(:intial_commit_message) { 'Initial commit' }
+ let(:first_added_commit_message) { 'pushed to primary gitaly node' }
+ let(:second_added_commit_message) { 'commit to failover node' }
+
+ before(:context) do
+ # Reset the cluster in case previous tests left it in a bad state
+ praefect_manager.reset_primary_to_original
+
+ project = Resource::Project.fabricate! do |project|
+ project.name = "gitaly_cluster"
+ project.initialize_with_readme = true
+ end
+ end
+
+ after(:context) do
+ # Leave the cluster in a suitable state for subsequent tests,
+ # if there was a problem during the tests here
+ praefect_manager.reset_primary_to_original
+ end
+
+ it 'automatically fails over' do
+ # Create a new project with a commit and wait for it to replicate
+ Resource::Repository::ProjectPush.fabricate! do |push|
+ push.project = project
+ push.commit_message = first_added_commit_message
+ push.new_branch = false
+ push.file_content = "This should exist on both nodes"
+ end
+
+ praefect_manager.wait_for_replication(project.id)
+
+ # Stop the primary node to trigger failover, and then wait
+ # for Gitaly to be ready for writes again
+ praefect_manager.trigger_failover_by_stopping_primary_node
+ praefect_manager.wait_for_new_primary
+ praefect_manager.wait_for_health_check_current_primary_node
+ praefect_manager.wait_for_gitaly_check
+
+ Resource::Repository::Commit.fabricate_via_api! do |commit|
+ commit.project = project
+ commit.commit_message = second_added_commit_message
+ commit.add_files([
+ {
+ file_path: "file-#{SecureRandom.hex(8)}",
+ content: 'This should exist on one node before reconciliation'
+ }
+ ])
+ end
+
+ # Confirm that we have access to the repo after failover,
+ # including the commit we just added
+ expect(project.commits.map { |commit| commit[:message].chomp })
+ .to include(intial_commit_message)
+ .and include(first_added_commit_message)
+ .and include(second_added_commit_message)
+ end
+
+ context 'when recovering from dataloss after failover' do
+ it 'allows reconciliation', quarantine: { issue: 'https://gitlab.com/gitlab-org/gitlab/-/issues/238187', type: :stale } do
+ # Start the old primary node again
+ praefect_manager.start_primary_node
+ praefect_manager.wait_for_health_check_current_primary_node
+
+ # Confirm dataloss (i.e., inconsistent nodes)
+ expect(praefect_manager.replicated?(project.id)).to be false
+
+ # Reconcile nodes to recover from dataloss
+ praefect_manager.reconcile_nodes
+ praefect_manager.wait_for_replication(project.id)
+
+ # Confirm that all commits are available after reconciliation
+ expect(project.commits.map { |commit| commit[:message].chomp })
+ .to include(intial_commit_message)
+ .and include(first_added_commit_message)
+ .and include(second_added_commit_message)
+
+ # Restore the original primary node
+ praefect_manager.reset_primary_to_original
+
+ # Check that all commits are still available even though the primary
+ # node was offline when one was made
+ expect(project.commits.map { |commit| commit[:message].chomp })
+ .to include(intial_commit_message)
+ .and include(first_added_commit_message)
+ .and include(second_added_commit_message)
+ end
+ end
+ end
+ end
+end
diff --git a/qa/qa/specs/features/api/3_create/gitaly/backend_node_recovery_spec.rb b/qa/qa/specs/features/api/3_create/gitaly/backend_node_recovery_spec.rb
new file mode 100644
index 00000000000..52674f08e15
--- /dev/null
+++ b/qa/qa/specs/features/api/3_create/gitaly/backend_node_recovery_spec.rb
@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+
+module QA
+ RSpec.describe 'Create' do
+ context 'Gitaly' do
+ describe 'Backend node recovery', :orchestrated, :gitaly_cluster, :skip_live_env do
+ let(:praefect_manager) { Service::PraefectManager.new }
+ let(:project) do
+ Resource::Project.fabricate! do |project|
+ project.name = "gitaly_cluster"
+ project.initialize_with_readme = true
+ end
+ end
+
+ before do
+ # Reset the cluster in case previous tests left it in a bad state
+ praefect_manager.reset_primary_to_original
+ end
+
+ after do
+ # Leave the cluster in a suitable state for subsequent tests
+ praefect_manager.reset_primary_to_original
+ end
+
+ it 'recovers from dataloss', quarantine: { issue: 'https://gitlab.com/gitlab-org/gitlab/-/issues/238186', type: :investigating } do
+ # Create a new project with a commit and wait for it to replicate
+ praefect_manager.wait_for_replication(project.id)
+
+ # Stop the primary node to trigger failover, and then wait
+ # for Gitaly to be ready for writes again
+ praefect_manager.trigger_failover_by_stopping_primary_node
+ praefect_manager.wait_for_new_primary
+ praefect_manager.wait_for_health_check_current_primary_node
+ praefect_manager.wait_for_gitaly_check
+
+ # Confirm that we have access to the repo after failover
+ Support::Waiter.wait_until(retry_on_exception: true, sleep_interval: 5) do
+ Resource::Repository::Commit.fabricate_via_api! do |commits|
+ commits.project = project
+ commits.sha = 'master'
+ end
+ end
+
+ # Push a commit to the new primary
+ Resource::Repository::ProjectPush.fabricate! do |push|
+ push.project = project
+ push.new_branch = false
+ push.commit_message = 'pushed after failover'
+ push.file_name = 'new_file'
+ push.file_content = 'new file'
+ end
+
+ # Start the old primary node again
+ praefect_manager.start_primary_node
+ praefect_manager.wait_for_health_check_current_primary_node
+
+ # Confirm dataloss (i.e., inconsistent nodes)
+ expect(praefect_manager.replicated?(project.id)).to be false
+
+ # Reconcile nodes to recover from dataloss
+ praefect_manager.reconcile_nodes
+ praefect_manager.wait_for_replication(project.id)
+
+ # Confirm that both commits are available after reconciliation
+ expect(project.commits.map { |commit| commit[:message].chomp })
+ .to include("Initial commit").and include("pushed after failover")
+ end
+ end
+ end
+ end
+end
diff --git a/qa/qa/specs/features/api/3_create/repository/changing_repository_storage_spec.rb b/qa/qa/specs/features/api/3_create/gitaly/changing_repository_storage_spec.rb
index 11e7db5b097..432598d1cb3 100644
--- a/qa/qa/specs/features/api/3_create/repository/changing_repository_storage_spec.rb
+++ b/qa/qa/specs/features/api/3_create/gitaly/changing_repository_storage_spec.rb
@@ -2,14 +2,15 @@
module QA
RSpec.describe 'Create' do
- describe 'Changing Gitaly repository storage', :requires_admin do
+ describe 'Changing Gitaly repository storage', :requires_admin, quarantine: { issue: 'https://gitlab.com/gitlab-org/gitlab/-/issues/236195', type: :investigating } do
+ praefect_manager = Service::PraefectManager.new
+ praefect_manager.gitlab = 'gitlab'
+
shared_examples 'repository storage move' do
it 'confirms a `finished` status after moving project repository storage' do
expect(project).to have_file('README.md')
-
- project.change_repository_storage(destination_storage)
-
- expect(Runtime::API::RepositoryStorageMoves).to have_status(project, 'finished', destination_storage)
+ expect { project.change_repository_storage(destination_storage[:name]) }.not_to raise_error
+ expect { praefect_manager.verify_storage_move(source_storage, destination_storage) }.not_to raise_error
Resource::Repository::ProjectPush.fabricate! do |push|
push.project = project
@@ -25,28 +26,35 @@ module QA
end
context 'when moving from one Gitaly storage to another', :orchestrated, :repository_storage do
+ let(:source_storage) { { type: :gitaly, name: 'default' } }
+ let(:destination_storage) { { type: :gitaly, name: QA::Runtime::Env.additional_repository_storage } }
+
let(:project) do
Resource::Project.fabricate_via_api! do |project|
project.name = 'repo-storage-move-status'
project.initialize_with_readme = true
+ project.api_client = Runtime::API::Client.as_admin
end
end
- let(:destination_storage) { QA::Runtime::Env.additional_repository_storage }
it_behaves_like 'repository storage move'
end
# Note: This test doesn't have the :orchestrated tag because it runs in the Test::Integration::Praefect
# scenario with other tests that aren't considered orchestrated.
- context 'when moving from Gitaly to Gitaly Cluster', :requires_praefect, quarantine: { issue: 'https://gitlab.com/gitlab-org/gitlab/-/issues/227127', type: :investigating } do
+ # It also runs on staging using nfs-file07 as non-cluster storage and nfs-file22 as cluster/praefect storage
+ context 'when moving from Gitaly to Gitaly Cluster', :requires_praefect do
+ let(:source_storage) { { type: :gitaly, name: QA::Runtime::Env.non_cluster_repository_storage } }
+ let(:destination_storage) { { type: :praefect, name: QA::Runtime::Env.praefect_repository_storage } }
+
let(:project) do
Resource::Project.fabricate_via_api! do |project|
project.name = 'repo-storage-move'
project.initialize_with_readme = true
- project.repository_storage = 'gitaly'
+ project.repository_storage = source_storage[:name]
+ project.api_client = Runtime::API::Client.as_admin
end
end
- let(:destination_storage) { QA::Runtime::Env.praefect_repository_storage }
it_behaves_like 'repository storage move'
end
diff --git a/qa/qa/specs/features/api/3_create/gitaly/distributed_reads_spec.rb b/qa/qa/specs/features/api/3_create/gitaly/distributed_reads_spec.rb
new file mode 100644
index 00000000000..6292ca821ca
--- /dev/null
+++ b/qa/qa/specs/features/api/3_create/gitaly/distributed_reads_spec.rb
@@ -0,0 +1,99 @@
+# frozen_string_literal: true
+
+require 'parallel'
+
+module QA
+ RSpec.describe 'Create' do
+ context 'Gitaly' do
+ # Issue to track removal of feature flag: https://gitlab.com/gitlab-org/quality/team-tasks/-/issues/602
+ describe 'Distributed reads', :orchestrated, :gitaly_cluster, :skip_live_env, :requires_admin do
+ let(:number_of_reads_per_loop) { 9 }
+ let(:praefect_manager) { Service::PraefectManager.new }
+ let(:project) do
+ Resource::Project.fabricate! do |project|
+ project.name = "gitaly_cluster"
+ project.initialize_with_readme = true
+ end
+ end
+
+ before do
+ Runtime::Feature.enable_and_verify('gitaly_distributed_reads')
+ praefect_manager.wait_for_replication(project.id)
+ end
+
+ after do
+ Runtime::Feature.disable_and_verify('gitaly_distributed_reads')
+ end
+
+ it 'reads from each node' do
+ pre_read_data = praefect_manager.query_read_distribution
+
+ wait_for_reads_to_increase(project, number_of_reads_per_loop, pre_read_data)
+
+ aggregate_failures "each gitaly node" do
+ praefect_manager.query_read_distribution.each_with_index do |data, index|
+ pre_read_count = praefect_manager.value_for_node(pre_read_data, data[:node])
+ QA::Runtime::Logger.debug("Node: #{data[:node]}; before: #{pre_read_count}; now: #{data[:value]}")
+ expect(data[:value]).to be > pre_read_count,
+ "Read counts did not differ for node #{data[:node]}"
+ end
+ end
+ end
+
+ context 'when a node is unhealthy' do
+ before do
+ praefect_manager.stop_secondary_node
+ praefect_manager.wait_for_secondary_node_health_check_failure
+ end
+
+ after do
+ # Leave the cluster in a suitable state for subsequent tests
+ praefect_manager.start_secondary_node
+ praefect_manager.wait_for_health_check_all_nodes
+ praefect_manager.wait_for_reliable_connection
+ end
+
+ it 'does not read from the unhealthy node' do
+ pre_read_data = praefect_manager.query_read_distribution
+
+ read_from_project(project, number_of_reads_per_loop * 10)
+
+ praefect_manager.wait_for_read_count_change(pre_read_data)
+
+ post_read_data = praefect_manager.query_read_distribution
+
+ aggregate_failures "each gitaly node" do
+ expect(praefect_manager.value_for_node(post_read_data, 'gitaly1')).to be > praefect_manager.value_for_node(pre_read_data, 'gitaly1')
+ expect(praefect_manager.value_for_node(post_read_data, 'gitaly2')).to eq praefect_manager.value_for_node(pre_read_data, 'gitaly2')
+ expect(praefect_manager.value_for_node(post_read_data, 'gitaly3')).to be > praefect_manager.value_for_node(pre_read_data, 'gitaly3')
+ end
+ end
+ end
+
+ def read_from_project(project, number_of_reads)
+ QA::Runtime::Logger.info('Reading from the repository')
+ Parallel.each((1..number_of_reads)) do
+ Git::Repository.perform do |repository|
+ repository.uri = project.repository_http_location.uri
+ repository.use_default_credentials
+ repository.clone
+ end
+ end
+ end
+
+ def wait_for_reads_to_increase(project, number_of_reads, pre_read_data)
+ diff_found = pre_read_data
+
+ Support::Waiter.wait_until(sleep_interval: 5, raise_on_failure: false) do
+ read_from_project(project, number_of_reads)
+
+ praefect_manager.query_read_distribution.each_with_index do |data, index|
+ diff_found[index][:diff] = true if data[:value] > praefect_manager.value_for_node(pre_read_data, data[:node])
+ end
+ diff_found.all? { |node| node.key?(:diff) && node[:diff] }
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/qa/qa/specs/features/api/3_create/repository/praefect_replication_queue_spec.rb b/qa/qa/specs/features/api/3_create/gitaly/praefect_replication_queue_spec.rb
index a4040a46b84..78c8639a029 100644
--- a/qa/qa/specs/features/api/3_create/repository/praefect_replication_queue_spec.rb
+++ b/qa/qa/specs/features/api/3_create/gitaly/praefect_replication_queue_spec.rb
@@ -4,7 +4,7 @@ require 'parallel'
module QA
RSpec.describe 'Create' do
- context 'Gitaly Cluster replication queue', :orchestrated, :gitaly_ha, :skip_live_env do
+ context 'Gitaly Cluster replication queue', :orchestrated, :gitaly_cluster, :skip_live_env do
let(:praefect_manager) { Service::PraefectManager.new }
let(:project) do
Resource::Project.fabricate! do |project|
@@ -14,7 +14,8 @@ module QA
end
after do
- praefect_manager.reset_cluster
+ praefect_manager.start_praefect
+ praefect_manager.wait_for_reliable_connection
praefect_manager.clear_replication_queue
end