1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
/**
* Tests the fields returned by hello responses as a node goes through a step down and step up.
*/
(function() {
"use strict";
load("jstests/libs/parallel_shell_helpers.js");
load("jstests/libs/fail_point_util.js");
// Test hello paramaters on a single node replica set.
const replSetName = "awaitable_hello_stepup";
const replTest = new ReplSetTest({name: replSetName, nodes: 1});
replTest.startSet();
replTest.initiate();
const dbName = "awaitable_hello_test";
const node = replTest.getPrimary();
const db = node.getDB(dbName);
// Check hello response contains a topologyVersion even if maxAwaitTimeMS and topologyVersion are
// not included in the request.
const res = assert.commandWorked(db.runCommand({hello: 1}));
assert(res.hasOwnProperty("topologyVersion"), tojson(res));
const topologyVersionField = res.topologyVersion;
assert(topologyVersionField.hasOwnProperty("processId"), tojson(topologyVersionField));
assert(topologyVersionField.hasOwnProperty("counter"), tojson(topologyVersionField));
function runAwaitableHelloBeforeStepDown(topologyVersionField) {
const resAfterDisablingWrites = assert.commandWorked(db.runCommand({
hello: 1,
topologyVersion: topologyVersionField,
maxAwaitTimeMS: 99999999,
}));
assert.eq(topologyVersionField.counter + 1, resAfterDisablingWrites.topologyVersion.counter);
// Validate that an hello response returns once writes have been disabled on the primary
// even though the node has yet to transition to secondary.
assert.eq(false, resAfterDisablingWrites.isWritablePrimary, resAfterDisablingWrites);
assert.eq(false, resAfterDisablingWrites.secondary, resAfterDisablingWrites);
assert.hasFields(resAfterDisablingWrites, ["primary"]);
// The TopologyVersion from resAfterDisablingWrites should now be stale since the old primary
// has completed its transition to secondary. This hello request should respond immediately.
const resAfterStepdownComplete = assert.commandWorked(db.runCommand({
hello: 1,
topologyVersion: resAfterDisablingWrites.topologyVersion,
maxAwaitTimeMS: 99999999,
}));
assert.eq(resAfterDisablingWrites.topologyVersion.counter + 1,
resAfterStepdownComplete.topologyVersion.counter,
resAfterStepdownComplete);
assert.eq(false, resAfterStepdownComplete.isWritablePrimary, resAfterStepdownComplete);
assert.eq(true, resAfterStepdownComplete.secondary, resAfterStepdownComplete);
assert(!resAfterStepdownComplete.hasOwnProperty("primary"), resAfterStepdownComplete);
}
function runAwaitableHelloBeforeStepUp(topologyVersionField) {
const resAfterEnteringDrainMode = assert.commandWorked(db.runCommand({
hello: 1,
topologyVersion: topologyVersionField,
maxAwaitTimeMS: 99999999,
}));
assert.eq(topologyVersionField.counter + 1, resAfterEnteringDrainMode.topologyVersion.counter);
// Validate that the hello response returns once the primary enters drain mode. At this
// point, we expect the 'primary' field to exist but 'isWritablePrimary' will still be false.
assert.eq(false, resAfterEnteringDrainMode.isWritablePrimary, resAfterEnteringDrainMode);
assert.eq(true, resAfterEnteringDrainMode.secondary, resAfterEnteringDrainMode);
assert.hasFields(resAfterEnteringDrainMode, ["primary"]);
// The TopologyVersion from resAfterEnteringDrainMode should now be stale since we expect
// the primary to increase the config term and increment the counter once again.
const resAfterReconfigOnStepUp = assert.commandWorked(db.runCommand({
hello: 1,
topologyVersion: resAfterEnteringDrainMode.topologyVersion,
maxAwaitTimeMS: 99999999,
}));
assert.eq(resAfterEnteringDrainMode.topologyVersion.counter + 1,
resAfterReconfigOnStepUp.topologyVersion.counter,
resAfterReconfigOnStepUp);
assert.eq(false, resAfterReconfigOnStepUp.isWritablePrimary, resAfterReconfigOnStepUp);
assert.eq(true, resAfterReconfigOnStepUp.secondary, resAfterReconfigOnStepUp);
assert.hasFields(resAfterReconfigOnStepUp, ["primary"]);
}
function runAwaitableHelloAfterStepUp(topologyVersionField) {
// The TopologyVersion from resAfterReconfigOnStepUp should now be stale since we expect
// the primary to exit drain mode and increment the counter once again.
const resAfterExitingDrainMode = assert.commandWorked(db.runCommand({
hello: 1,
topologyVersion: topologyVersionField,
maxAwaitTimeMS: 99999999,
}));
assert.eq(topologyVersionField.counter + 1, resAfterExitingDrainMode.topologyVersion.counter);
assert.eq(true, resAfterExitingDrainMode.isWritablePrimary, resAfterExitingDrainMode);
assert.eq(false, resAfterExitingDrainMode.secondary, resAfterExitingDrainMode);
assert.hasFields(resAfterExitingDrainMode, ["primary"]);
}
// A failpoint signalling that the server has received the hello request and is waiting for a
// topology change.
let failPoint = configureFailPoint(node, "waitForHelloResponse");
// Send an awaitable hello request. This will block until maxAwaitTimeMS has elapsed or a
// topology change happens.
let awaitHelloBeforeStepDown = startParallelShell(
funWithArgs(runAwaitableHelloBeforeStepDown, topologyVersionField), node.port);
failPoint.wait();
// Call stepdown to increment the server TopologyVersion and respond to the waiting hello
// request. We expect stepDown to increment the TopologyVersion twice - once for when the writes are
// disabled and once again for when the primary completes its transition to secondary.
assert.commandWorked(db.adminCommand({replSetStepDown: 60, force: true}));
awaitHelloBeforeStepDown();
let response = assert.commandWorked(node.getDB(dbName).runCommand({hello: 1}));
assert(response.hasOwnProperty("topologyVersion"), tojson(res));
const topologyVersionAfterStepDown = response.topologyVersion;
// Reconfigure the failpoint to refresh the number of times the failpoint has been entered.
failPoint = configureFailPoint(node, "waitForHelloResponse");
const hangFailPoint = configureFailPoint(node, "hangAfterReconfigOnDrainComplete");
// Send an awaitable hello request. This will block until maxAwaitTimeMS has elapsed or a
// topology change happens.
let awaitHelloBeforeStepUp = startParallelShell(
funWithArgs(runAwaitableHelloBeforeStepUp, topologyVersionAfterStepDown), node.port);
failPoint.wait();
// Unfreezing the old primary will cause the node to step up in a single node replica set.
assert.commandWorked(node.adminCommand({replSetFreeze: 0}));
// Wait until stepup thread hangs after the reconfig.
hangFailPoint.wait();
awaitHelloBeforeStepUp();
response = assert.commandWorked(node.getDB(dbName).runCommand({hello: 1}));
assert(response.hasOwnProperty("topologyVersion"), tojson(res));
const topologyVersionAfterStepUp = response.topologyVersion;
// Reconfigure the failpoint to refresh the number of times the failpoint has been entered.
failPoint = configureFailPoint(node, "waitForHelloResponse");
// Send an awaitable hello request. This will block until maxAwaitTimeMS has elapsed or a
// topology change happens.
let awaitHelloAfterStepUp = startParallelShell(
funWithArgs(runAwaitableHelloAfterStepUp, topologyVersionAfterStepUp), node.port);
failPoint.wait();
// Let the stepup thread to continue.
hangFailPoint.off();
awaitHelloAfterStepUp();
replTest.stopSet();
})();
|