summaryrefslogtreecommitdiff
path: root/jstests/replsets/step_down_during_draining2.js
blob: ad37a858dc4f8a38b248490b5281002e5032b4a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
// Test stepdown during drain mode
// 1. Set up a 3-node set. Assume Node 0 is the primary at the beginning for simplicity.
// 2. Prevent applying retrieved ops on all secondaries, including Node 1.
// 3. Insert data to ensure Node 1 has ops to apply in its queue.
// 4. Step up Node 1. Now it enters drain mode, but cannot proceed.
// 5. Block Node 1's ability to process stepdowns.
// 5. Shut down nodes 0 and 2. Wait until Node 1 begins stepping down due to no longer having a
//    majority
// 6. Re-enable Node 1's ability to apply operations, ensure that clearing it's buffer doesn't
//    cause it to finish drain mode because of the pending stepdown request.
// 7. Allow Node 1 to finish stepping down.

(function() {
"use strict";

load("jstests/replsets/rslib.js");
load("jstests/libs/fail_point_util.js");

var replSet = new ReplSetTest({name: 'testSet', nodes: 3});
var nodes = replSet.nodeList();
replSet.startSet();
var conf = replSet.getReplSetConfig();
conf.members[2].priority = 0;
conf.settings = conf.settings || {};
conf.settings.chainingAllowed = false;
conf.settings.catchUpTimeoutMillis = 0;
replSet.initiate(conf);

var primary = replSet.getPrimary();
var secondary = replSet.getSecondary();

// Set verbosity for replication on all nodes.
var verbosity = {
    "setParameter": 1,
    "logComponentVerbosity": {
        "replication": {"verbosity": 3},
    }
};
replSet.nodes.forEach(function(node) {
    node.adminCommand(verbosity);
});

function enableFailPoint(node) {
    jsTest.log("enable failpoint " + node.host);
    assert.commandWorked(
        node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'alwaysOn'}));
}

function disableFailPoint(node) {
    jsTest.log("disable failpoint " + node.host);
    assert.commandWorked(node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'off'}));
}

// Do an initial insert to prevent the secondary from going into recovery
var numDocuments = 20;
var coll = primary.getDB("foo").foo;
assert.commandWorked(coll.insert({x: 0}, {writeConcern: {w: 3}}));
replSet.awaitReplication();

// Enable fail point to stop replication.
var secondaries = replSet.getSecondaries();
secondaries.forEach(enableFailPoint);

var bufferCountBefore = secondary.getDB('foo').serverStatus().metrics.repl.buffer.count;
for (var i = 1; i < numDocuments; ++i) {
    assert.commandWorked(coll.insert({x: i}));
}
jsTestLog('Number of documents inserted into collection on primary: ' + numDocuments);
assert.eq(numDocuments, primary.getDB("foo").foo.find().itcount());

assert.soon(
    function() {
        var serverStatus = secondary.getDB('foo').serverStatus();
        var bufferCount = serverStatus.metrics.repl.buffer.count;
        var bufferCountChange = bufferCount - bufferCountBefore;
        jsTestLog('Number of operations buffered on secondary since stopping applier: ' +
                  bufferCountChange);
        return bufferCountChange == numDocuments - 1;
    },
    'secondary did not buffer operations for new inserts on primary',
    replSet.kDefaultTimeoutMs,
    1000);

reconnect(secondary);
replSet.stepUpNoAwaitReplication(secondary);

// Secondary doesn't allow writes yet.
var res = secondary.getDB("admin").runCommand({"isMaster": 1});
assert(!res.ismaster);

assert.commandFailedWithCode(
    secondary.adminCommand({
        replSetTest: 1,
        waitForDrainFinish: 5000,
    }),
    ErrorCodes.ExceededTimeLimit,
    'replSetTest waitForDrainFinish should time out when draining is not allowed to complete');

// Prevent the current primary from stepping down
jsTest.log("disallowing heartbeat stepdown " + secondary.host);
var blockHeartbeatStepdownFailPoint = configureFailPoint(secondary, "blockHeartbeatStepdown");
jsTestLog("Shut down the rest of the set so the primary-elect has to step down");
replSet.stop(primary);
disableFailPoint(replSet.nodes[2]);  // Fail point needs to be off when node is shut down.
replSet.stop(2);

jsTestLog("Waiting for secondary to begin stepping down while in drain mode");
blockHeartbeatStepdownFailPoint.wait();

// Disable fail point to allow replication and allow secondary to finish drain mode while in the
// process of stepping down.
jsTestLog("Re-enabling replication on secondary");
assert.gt(numDocuments, secondary.getDB("foo").foo.find().itcount());
disableFailPoint(secondary);

// The node should now be able to apply the writes in its buffer.
jsTestLog("Waiting for node to drain its apply buffer");
assert.soon(function() {
    return secondary.getDB("foo").foo.find().itcount() == numDocuments;
});

// Even though it finished draining its buffer, it shouldn't be able to exit drain mode due to
// pending stepdown.
assert.commandFailedWithCode(
    secondary.adminCommand({
        replSetTest: 1,
        waitForDrainFinish: 5000,
    }),
    ErrorCodes.ExceededTimeLimit,
    'replSetTest waitForDrainFinish should time out when in the middle of stepping down');

jsTestLog("Checking that node is PRIMARY but not master");
assert.eq(ReplSetTest.State.PRIMARY, secondary.adminCommand({replSetGetStatus: 1}).myState);
assert(!secondary.adminCommand('ismaster').ismaster);

jsTest.log("allowing heartbeat stepdown " + secondary.host);
blockHeartbeatStepdownFailPoint.off();

jsTestLog("Checking that node successfully stepped down");
replSet.waitForState(secondary, ReplSetTest.State.SECONDARY);
assert(!secondary.adminCommand('ismaster').ismaster);

// Now ensure that the node can successfully become primary again.
replSet.restart(0);
replSet.restart(2);
replSet.stepUpNoAwaitReplication(secondary);

assert.soon(function() {
    return secondary.adminCommand('ismaster').ismaster;
});

jsTestLog('Ensure new primary is writable.');
assert.commandWorked(secondary.getDB("foo").flag.insert({sentinel: 1}, {writeConcern: {w: 3}}));
// Check that no writes were lost.
assert.eq(secondary.getDB("foo").foo.find().itcount(), numDocuments);
replSet.stopSet();
})();