summaryrefslogtreecommitdiff
path: root/jstests/replsets/catchup_takeover_two_nodes_ahead.js
blob: 4cdefc8fcdc680c9d40b504f7709aac9ef9d0927 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
// Test to ensure that a catchup takeover happens when the primary is lagged.
// Make sure that when two nodes are more caught up than the primary,
// the most up-to-date node becomes the primary.

// 5-node replica set
// Start replica set. Ensure that node 0 becomes primary.
// Stop the replication for some nodes and have the primary write something.
// Stop replication for an up-to-date node and have the primary write something.
// Now the primary is most-up-to-date and another node is more up-to-date than others.
// Make a lagged node the next primary.
// Confirm that the most up-to-date node becomes primary.

(function() {
'use strict';

load('jstests/replsets/rslib.js');
load('jstests/replsets/libs/election_metrics.js');

var name = 'catchup_takeover_two_nodes_ahead';
var replSet = new ReplSetTest({name: name, nodes: 5});
var nodes = replSet.startSet();
var config = replSet.getReplSetConfig();
// Prevent nodes from syncing from other secondaries.
config.settings = {
    chainingAllowed: false
};
replSet.initiate(config);
replSet.awaitReplication();

// Write something so that nodes 0 and 1 are ahead.
stopServerReplication(nodes.slice(2, 5));
var primary = replSet.getPrimary();
var writeConcern = {writeConcern: {w: 2, wtimeout: replSet.kDefaultTimeoutMS}};
assert.writeOK(primary.getDB(name).bar.insert({x: 100}, writeConcern));

// Write something so that node 0 is ahead of node 1.
stopServerReplication(nodes[1]);
writeConcern = {
    writeConcern: {w: 1, wtimeout: replSet.kDefaultTimeoutMS}
};
assert.writeOK(primary.getDB(name).bar.insert({y: 100}, writeConcern));

const initialPrimaryStatus = assert.commandWorked(primary.adminCommand({serverStatus: 1}));
const initialNode2Status = assert.commandWorked(nodes[2].adminCommand({serverStatus: 1}));

// Step up one of the lagged nodes.
assert.commandWorked(nodes[2].adminCommand({replSetStepUp: 1}));
replSet.awaitNodesAgreeOnPrimary();
assert.eq(ReplSetTest.State.PRIMARY,
          assert.commandWorked(nodes[2].adminCommand('replSetGetStatus')).myState,
          nodes[2].host + " was not primary after step-up");
jsTestLog('node 2 is now primary, but cannot accept writes');

// Make sure that node 2 cannot write anything. Because it is lagged and replication
// has been stopped, it shouldn't be able to become master.
assert.commandFailedWithCode(nodes[2].getDB(name).bar.insert({z: 100}, writeConcern),
                             ErrorCodes.NotMaster);

// Confirm that the most up-to-date node becomes primary after the default catchup delay.
replSet.waitForState(0, ReplSetTest.State.PRIMARY, 60 * 1000);

// Check that both the 'called' and 'successful' fields of the 'catchUpTakeover' election reason
// counter have been incremented in serverStatus.
const newPrimaryStatus = assert.commandWorked(primary.adminCommand({serverStatus: 1}));
verifyServerStatusElectionReasonCounterChange(
    initialPrimaryStatus.electionMetrics, newPrimaryStatus.electionMetrics, "catchUpTakeover", 1);

// Wait until the old primary steps down.
replSet.waitForState(2, ReplSetTest.State.SECONDARY, replSet.kDefaultTimeoutMS);

// Check that the 'numCatchUpsFailedWithNewTerm' field has been incremented in serverStatus, and
// that none of the other reasons for catchup concluding has been incremented.
const newNode2Status = assert.commandWorked(nodes[2].adminCommand({serverStatus: 1}));
verifyCatchUpConclusionReason(initialNode2Status.electionMetrics,
                              newNode2Status.electionMetrics,
                              'numCatchUpsFailedWithNewTerm');

// Let the nodes catchup.
restartServerReplication(nodes.slice(1, 5));
replSet.stopSet();
})();