summaryrefslogtreecommitdiff
path: root/jstests/replsets/reconfig_avoids_diverging_configs.js
blob: 6697b0497a55378143ed92904412f9dd8fbf519f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/**
 * In a 4-node set, verify that two diverging non-force replica set reconfigs
 * are not allowed to succeed. Diverging reconfigs contain non-overlapping quorums. For example,
 * C1: {n1,n2,n3}
 * C2: {n1,n3,n4}
 * The C1 quorum {n1,n2} and the C2 quorum {n3,n4} do not overlap.
 *
 * 1. Node0 is the initial primary.
 * 2. Disconnect node0 from all other nodes.
 * 3. Issue a reconfig to node0 that removes node3.
 * 4. Step up node1, which creates a two primary scenario.
 * 5. Issue a reconfig to node1 that removes node2. We now have diverging configs
 *   from two different primaries.
 * 6. Reconnect node0 to the rest of the set and verify that its reconfig fails.
 *
 * @tags: [requires_fcv_44]
 */
(function() {
"use strict";
load("jstests/libs/parallel_shell_helpers.js");
load('jstests/libs/test_background_ops.js');
load("jstests/replsets/rslib.js");
load('jstests/aggregation/extras/utils.js');

let rst = new ReplSetTest({nodes: 4, useBridge: true});
rst.startSet();
rst.initiateWithHighElectionTimeout();

const node0 = rst.getPrimary();
const [node1, node2, node3] = rst.getSecondaries();
jsTestLog("Current replica set topology: [node0 (Primary), node1, node2, node3]");

// The quorum check places stricter bounds on the safe reconfig
// protocol and won't allow this specific scenario of diverging configs
// to happen. However, it's still worth testing the original reconfig
// protocol that omitted the check for correctness.
configureFailPoint(rst.getPrimary(), "omitConfigQuorumCheck");

// Reconfig to remove the node3. The new config, C1, is now {node0, node1, node2}.
const C1 = Object.assign({}, rst.getReplSetConfigFromNode());
C1.members = C1.members.slice(0, 3);  // Remove the last node.
// Increase the C1 version by a high number to ensure the following config
// C2 will win the propagation by having a higher term.
C1.version = C1.version + 1000;
waitForConfigReplication(node0);
rst.awaitReplication();

jsTestLog("Disconnecting the primary from other nodes");
assert.eq(rst.getPrimary(), node0);
node0.disconnect([node1, node2, node3]);
jsTestLog("Current replica set topology: [node0 (Primary)] [node1, node2, node3]");
// Create parallel shell to execute reconfig on partitioned primary.
// This reconfig will not get propagated.
const parallelShell = startParallelShell(
    funWithArgs(function(config) {
        assert.commandFailedWithCode(db.getMongo().adminCommand({replSetReconfig: config}),
                                     ErrorCodes.InterruptedDueToReplStateChange,
                                     "Reconfig C1 should fail");
    }, C1), node0.port);

assert.commandWorked(node1.adminCommand({replSetStepUp: 1}));
rst.awaitNodesAgreeOnPrimary(rst.kDefaultTimeoutMS, [node1, node2, node3], 1);
jsTestLog("Current replica set topology: [node0 (Primary)] [node1 (Primary), node2, node3]");
assert.soon(() => node1.getDB('admin').runCommand({ismaster: 1}).ismaster);
assert.soon(() => isConfigCommitted(node1));

// Reconfig to remove a secondary. We need to specify the node to get the original
// config from as there are two primaries, node0 and node1, in the replset.
// The new config is now {node0, node1, node3}.
let C2 = Object.assign({}, rst.getReplSetConfigFromNode(1));
const removedSecondary = C2.members.splice(2, 1);
C2.version++;
assert.commandWorked(node1.adminCommand({replSetReconfig: C2}));
assert.soon(() => isConfigCommitted(node1));

// Reconnect the partitioned primary, node0, to the other nodes.
node0.reconnect([node1, node2, node3]);
// The newly connected node will receive a heartbeat with a higher term, and
// step down from being primary. The reconfig command issued to this node, C1, will fail.
rst.waitForState(node0, ReplSetTest.State.SECONDARY);
rst.awaitNodesAgreeOnPrimary(rst.kDefaultTimeoutMS, [node0, node1, node3]);
waitForConfigReplication(node1);
assert.eq(C2, rst.getReplSetConfigFromNode());

// The new config is now {node0, node1, node2, node3}.
let C3 = Object.assign({}, rst.getReplSetConfigFromNode(1));
C3.members.push(removedSecondary[0]);
C3.version++;

assert.commandWorked(node1.adminCommand({replSetReconfig: C3}));
assert.soon(() => isConfigCommitted(node1));
rst.awaitNodesAgreeOnPrimary();
parallelShell();
rst.stopSet();
}());