1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
|
// Test the catch-up behavior of new primaries.
(function() {
"use strict";
load("jstests/libs/check_log.js");
load("jstests/libs/write_concern_util.js");
load("jstests/replsets/libs/election_metrics.js");
load("jstests/replsets/rslib.js");
var name = "catch_up";
var rst = new ReplSetTest({name: name, nodes: 3, useBridge: true, waitForKeys: true});
rst.startSet();
var conf = rst.getReplSetConfig();
conf.members[2].priority = 0;
conf.settings = {
heartbeatIntervalMillis: 500,
electionTimeoutMillis: 10000,
catchUpTimeoutMillis: 4 * 60 * 1000
};
rst.initiate(conf);
rst.awaitSecondaryNodes();
var primary = rst.getPrimary();
var primaryColl = primary.getDB("test").coll;
// Set verbosity for replication on all nodes.
var verbosity = {
"setParameter": 1,
"logComponentVerbosity": {
"replication": {"verbosity": 2},
}
};
rst.nodes.forEach(function(node) {
node.adminCommand(verbosity);
});
function stepUpNode(node) {
assert.soonNoExcept(function() {
assert.commandWorked(node.adminCommand({replSetStepUp: 1}));
rst.awaitNodesAgreeOnPrimary(rst.kDefaultTimeoutMS, rst.nodes, rst.getNodeId(node));
return node.adminCommand('replSetGetStatus').myState == ReplSetTest.State.PRIMARY;
}, 'failed to step up node ' + node.host, rst.kDefaultTimeoutMS);
return node;
}
function checkOpInOplog(node, op, count) {
node.getDB("admin").getMongo().setSlaveOk();
var oplog = node.getDB("local")['oplog.rs'];
var oplogArray = oplog.find().toArray();
assert.eq(oplog.count(op), count, "op: " + tojson(op) + ", oplog: " + tojson(oplogArray));
}
// Stop replication on secondaries, do writes and step up one of the secondaries.
//
// The old primary has extra writes that are not replicated to the other nodes yet,
// but the new primary steps up, getting the vote from the the third node "voter".
function stopReplicationAndEnforceNewPrimaryToCatchUp() {
// Write documents that cannot be replicated to secondaries in time.
var oldSecondaries = rst.getSecondaries();
var oldPrimary = rst.getPrimary();
stopServerReplication(oldSecondaries);
for (var i = 0; i < 3; i++) {
assert.writeOK(oldPrimary.getDB("test").foo.insert({x: i}));
}
var latestOpOnOldPrimary = getLatestOp(oldPrimary);
// New primary wins immediately, but needs to catch up.
var newPrimary = stepUpNode(oldSecondaries[0]);
var latestOpOnNewPrimary = getLatestOp(newPrimary);
// Check this node is not writable.
assert.eq(newPrimary.getDB("test").isMaster().ismaster, false);
return {
oldSecondaries: oldSecondaries,
oldPrimary: oldPrimary,
newPrimary: newPrimary,
voter: oldSecondaries[1],
latestOpOnOldPrimary: latestOpOnOldPrimary,
latestOpOnNewPrimary: latestOpOnNewPrimary
};
}
function reconfigElectionAndCatchUpTimeout(electionTimeout, catchupTimeout) {
// Reconnect all nodes to make sure reconfig succeeds.
rst.nodes.forEach(reconnect);
// Reconfigure replica set to decrease catchup timeout.
var newConfig = rst.getReplSetConfigFromNode();
newConfig.version++;
newConfig.settings.catchUpTimeoutMillis = catchupTimeout;
newConfig.settings.electionTimeoutMillis = electionTimeout;
reconfig(rst, newConfig);
rst.awaitReplication();
rst.awaitNodesAgreeOnPrimary();
}
rst.awaitReplication();
jsTest.log("Case 1: The primary is up-to-date after refreshing heartbeats.");
let initialNewPrimaryStatus =
assert.commandWorked(rst.getSecondary().adminCommand({serverStatus: 1}));
// Should complete transition to primary immediately.
var newPrimary = stepUpNode(rst.getSecondary());
// Should win an election and finish the transition very quickly.
assert.eq(newPrimary, rst.getPrimary());
rst.awaitReplication();
// Check that the 'numCatchUps' field has not been incremented in serverStatus.
let newNewPrimaryStatus = assert.commandWorked(newPrimary.adminCommand({serverStatus: 1}));
verifyServerStatusChange(
initialNewPrimaryStatus.electionMetrics, newNewPrimaryStatus.electionMetrics, 'numCatchUps', 0);
// Check that the 'numCatchUpsAlreadyCaughtUp' field has been incremented in serverStatus, and
// that none of the other reasons for catchup concluding has been incremented.
verifyCatchUpConclusionReason(initialNewPrimaryStatus.electionMetrics,
newNewPrimaryStatus.electionMetrics,
'numCatchUpsAlreadyCaughtUp');
// Check that the 'electionCandidateMetrics' section of the replSetGetStatus response does not have
// a 'targetCatchupOpTime' field if the target opTime for catchup is not set.
let res = assert.commandWorked(newPrimary.adminCommand({replSetGetStatus: 1}));
assert(res.electionCandidateMetrics,
() => "Response should have an 'electionCandidateMetrics' field: " + tojson(res));
assert(!res.electionCandidateMetrics.targetCatchupOpTime,
() => "Response should not have an 'electionCandidateMetrics.targetCatchupOpTime' field: " +
tojson(res.electionCandidateMetrics));
// Check that the 'electionCandidateMetrics' section of the replSetGetStatus response has a
// 'numCatchUpOps' field once the primary is caught up, and that it has the correct value.
assert(res.electionCandidateMetrics.numCatchUpOps,
() => "Response should have an 'electionCandidateMetrics.numCatchUpOps' field: " +
tojson(res.electionCandidateMetrics));
assert.eq(res.electionCandidateMetrics.numCatchUpOps, 0);
jsTest.log("Case 2: The primary needs to catch up, succeeds in time.");
initialNewPrimaryStatus =
assert.commandWorked(rst.getSecondaries()[0].adminCommand({serverStatus: 1}));
var stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp();
// Check that the 'electionCandidateMetrics' section of the replSetGetStatus response does not have
// a 'newTermStartDate' field before the transition to primary is complete.
res = assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetGetStatus: 1}));
assert(res.electionCandidateMetrics,
() => "Response should have an 'electionCandidateMetrics' field: " + tojson(res));
assert(!res.electionCandidateMetrics.newTermStartDate,
() => "Response should not have an 'electionCandidateMetrics.newTermStartDate' field: " +
tojson(res.electionCandidateMetrics));
// Disable fail point to allow replication.
restartServerReplication(stepUpResults.oldSecondaries);
// getPrimary() blocks until the primary finishes drain mode.
assert.eq(stepUpResults.newPrimary, rst.getPrimary());
// Wait until the new primary completes the transition to primary and writes a no-op.
checkLog.contains(stepUpResults.newPrimary, "transition to primary complete");
// Check that the new primary's term has been updated because of the no-op.
assert.eq(getLatestOp(stepUpResults.newPrimary).t, stepUpResults.latestOpOnNewPrimary.t + 1);
// Check that the 'electionCandidateMetrics' section of the replSetGetStatus response has a
// 'newTermStartDate' field once the transition to primary is complete.
res = assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetGetStatus: 1}));
assert(res.electionCandidateMetrics,
() => "Response should have an 'electionCandidateMetrics' field: " + tojson(res));
assert(res.electionCandidateMetrics.newTermStartDate,
() => "Response should have an 'electionCandidateMetrics.newTermStartDate' field: " +
tojson(res.electionCandidateMetrics));
// Check that the 'numCatchUps' field has been incremented in serverStatus.
newNewPrimaryStatus =
assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1}));
verifyServerStatusChange(
initialNewPrimaryStatus.electionMetrics, newNewPrimaryStatus.electionMetrics, 'numCatchUps', 1);
// Check that the 'numCatchUpsSucceeded' field has been incremented in serverStatus, and that
// none of the other reasons for catchup concluding has been incremented.
verifyCatchUpConclusionReason(initialNewPrimaryStatus.electionMetrics,
newNewPrimaryStatus.electionMetrics,
'numCatchUpsSucceeded');
// Check that the 'electionCandidateMetrics' section of the replSetGetStatus response has
// 'targetCatchupOpTime' and 'numCatchUpOps' fields once the primary is caught up, and that they
// have the correct values.
assert(res.electionCandidateMetrics.targetCatchupOpTime,
() => "Response should have an 'electionCandidateMetrics.targetCatchupOpTime' field: " +
tojson(res.electionCandidateMetrics));
assert.eq(res.electionCandidateMetrics.targetCatchupOpTime.ts,
stepUpResults.latestOpOnOldPrimary.ts);
assert.eq(res.electionCandidateMetrics.targetCatchupOpTime.t, stepUpResults.latestOpOnOldPrimary.t);
assert(res.electionCandidateMetrics.numCatchUpOps,
() => "Response should have an 'electionCandidateMetrics.numCatchUpOps' field: " +
tojson(res.electionCandidateMetrics));
// numCatchUpOps should be 4 because the 'foo' collection is implicitly created during the 3
// inserts, and that's where the additional oplog entry comes from.
assert.eq(res.electionCandidateMetrics.numCatchUpOps, 4);
// Wait for all secondaries to catch up
rst.awaitReplication();
// Check the latest op on old primary is preserved on the new one.
checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 1);
rst.awaitReplication();
jsTest.log("Case 3: The primary needs to catch up, but has to change sync source to catch up.");
// Reconfig the election timeout to be longer than 1 minute so that the third node will no
// longer be blacklisted by the new primary if it happened to be at the beginning of the test.
reconfigElectionAndCatchUpTimeout(3 * 60 * 1000, conf.settings.catchUpTimeoutMillis);
stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp();
// Disable fail point on the voter. Wait until it catches up with the old primary.
restartServerReplication(stepUpResults.voter);
assert.commandWorked(
stepUpResults.voter.adminCommand({replSetSyncFrom: stepUpResults.oldPrimary.host}));
// Wait until the new primary knows the last applied optime on the voter, so it will keep
// catching up after the old primary is disconnected.
assert.soon(function() {
var replSetStatus =
assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetGetStatus: 1}));
var voterStatus = replSetStatus.members.filter(m => m.name == stepUpResults.voter.host)[0];
return rs.compareOpTimes(voterStatus.optime, stepUpResults.latestOpOnOldPrimary) == 0;
});
// Disconnect the new primary and the old one.
stepUpResults.oldPrimary.disconnect(stepUpResults.newPrimary);
// Disable the failpoint, the new primary should sync from the other secondary.
restartServerReplication(stepUpResults.newPrimary);
assert.eq(stepUpResults.newPrimary, rst.getPrimary());
checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 1);
// Restore the broken connection
stepUpResults.oldPrimary.reconnect(stepUpResults.newPrimary);
rst.awaitReplication();
jsTest.log("Case 4: The primary needs to catch up, fails due to timeout.");
initialNewPrimaryStatus =
assert.commandWorked(rst.getSecondaries()[0].adminCommand({serverStatus: 1}));
// Reconfig to make the catchup timeout shorter.
reconfigElectionAndCatchUpTimeout(conf.settings.electionTimeoutMillis, 10 * 1000);
stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp();
// Wait until the new primary completes the transition to primary and writes a no-op.
checkLog.contains(stepUpResults.newPrimary, "Catchup timed out after becoming primary");
restartServerReplication(stepUpResults.newPrimary);
assert.eq(stepUpResults.newPrimary, rst.getPrimary());
// Check that the 'numCatchUpsTimedOut' field has been incremented in serverStatus, and that
// none of the other reasons for catchup concluding has been incremented.
newNewPrimaryStatus =
assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1}));
verifyCatchUpConclusionReason(initialNewPrimaryStatus.electionMetrics,
newNewPrimaryStatus.electionMetrics,
'numCatchUpsTimedOut');
// Wait for the no-op "new primary" after winning an election, so that we know it has
// finished transition to primary.
assert.soon(function() {
return rs.compareOpTimes(stepUpResults.latestOpOnOldPrimary,
getLatestOp(stepUpResults.newPrimary)) < 0;
});
// The extra oplog entries on the old primary are not replicated to the new one.
checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0);
restartServerReplication(stepUpResults.voter);
rst.awaitReplication();
jsTest.log("Case 5: The primary needs to catch up with no timeout, then gets aborted.");
// Reconfig to make the catchup timeout infinite.
reconfigElectionAndCatchUpTimeout(conf.settings.electionTimeoutMillis, -1);
stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp();
initialNewPrimaryStatus =
assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1}));
// Abort catchup.
assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetAbortPrimaryCatchUp: 1}));
// Check that the 'numCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd' field has been
// incremented in serverStatus, and that none of the other reasons for catchup concluding has
// been incremented.
newNewPrimaryStatus =
assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1}));
verifyCatchUpConclusionReason(initialNewPrimaryStatus.electionMetrics,
newNewPrimaryStatus.electionMetrics,
'numCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd');
// Wait for the no-op "new primary" after winning an election, so that we know it has
// finished transition to primary.
assert.soon(function() {
return rs.compareOpTimes(stepUpResults.latestOpOnOldPrimary,
getLatestOp(stepUpResults.newPrimary)) < 0;
});
// The extra oplog entries on the old primary are not replicated to the new one.
checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0);
restartServerReplication(stepUpResults.oldSecondaries);
rst.awaitReplication();
checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0);
jsTest.log("Case 6: The primary needs to catch up with no timeout, but steps down.");
initialNewPrimaryStatus =
assert.commandWorked(rst.getSecondaries()[0].adminCommand({serverStatus: 1}));
var stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp();
// Step-down command should abort catchup.
assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetStepDown: 60}));
// Check that the 'numCatchUpsFailedWithError' field has been incremented in serverStatus, and
// that none of the other reasons for catchup concluding has been incremented.
newNewPrimaryStatus =
assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1}));
verifyCatchUpConclusionReason(initialNewPrimaryStatus.electionMetrics,
newNewPrimaryStatus.electionMetrics,
'numCatchUpsFailedWithError');
// Rename the primary.
var steppedDownPrimary = stepUpResults.newPrimary;
var newPrimary = rst.getPrimary();
assert.neq(newPrimary, steppedDownPrimary);
// Enable data replication on the stepped down primary and make sure it syncs old writes.
rst.nodes.forEach(reconnect);
restartServerReplication(stepUpResults.oldSecondaries);
rst.awaitReplication();
checkOpInOplog(steppedDownPrimary, stepUpResults.latestOpOnOldPrimary, 1);
rst.stopSet();
})();
|