blob: 89bf92cd3af3c94eec03194b65840e7eece140b3 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
# frozen_string_literal: true
module QA
RSpec.describe 'Create' do
context 'Gitaly' do
describe 'Backend node recovery', :orchestrated, :gitaly_cluster, :skip_live_env, quarantine: { issue: 'https://gitlab.com/gitlab-org/gitlab/-/issues/238186', type: :investigating } do
let(:praefect_manager) { Service::PraefectManager.new }
let(:project) do
Resource::Project.fabricate! do |project|
project.name = "gitaly_cluster"
project.initialize_with_readme = true
end
end
before do
# Reset the cluster in case previous tests left it in a bad state
praefect_manager.reset_primary_to_original
end
after do
# Leave the cluster in a suitable state for subsequent tests
praefect_manager.reset_primary_to_original
end
it 'recovers from dataloss', testcase: 'https://gitlab.com/gitlab-org/quality/testcases/-/issues/978' do
# Create a new project with a commit and wait for it to replicate
praefect_manager.wait_for_replication(project.id)
# Stop the primary node to trigger failover, and then wait
# for Gitaly to be ready for writes again
praefect_manager.trigger_failover_by_stopping_primary_node
praefect_manager.wait_for_new_primary
praefect_manager.wait_for_health_check_current_primary_node
praefect_manager.wait_for_gitaly_check
# Confirm that we have access to the repo after failover
Support::Waiter.wait_until(retry_on_exception: true, sleep_interval: 5) do
Resource::Repository::Commit.fabricate_via_api! do |commits|
commits.project = project
commits.sha = project.default_branch
end
end
# Push a commit to the new primary
Resource::Repository::ProjectPush.fabricate! do |push|
push.project = project
push.new_branch = false
push.commit_message = 'pushed after failover'
push.file_name = 'new_file'
push.file_content = 'new file'
end
# Start the old primary node again
praefect_manager.start_primary_node
praefect_manager.wait_for_health_check_current_primary_node
# Confirm dataloss (i.e., inconsistent nodes)
expect(praefect_manager.replicated?(project.id)).to be false
# Reconcile nodes to recover from dataloss
praefect_manager.reconcile_nodes
praefect_manager.wait_for_replication(project.id)
# Confirm that both commits are available after reconciliation
expect(project.commits.map { |commit| commit[:message].chomp })
.to include("Initial commit").and include("pushed after failover")
end
end
end
end
end
|