summaryrefslogtreecommitdiff
path: root/jstests/replsets/step_down_during_draining.js
blob: 47c8ee2651a01d37a7f36d89c599e8598c28e8e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
// Test stepdown during drain mode
// 1. Set up a 3-node set. Assume Node 1 is the primary at the beginning for simplicity.
// 2. Prevent applying retrieved ops on all secondaries, including Node 2.
// 3. Insert data to ensure Node 2 has ops to apply in its queue.
// 4. Step up Node 2. Now it enters drain mode, but cannot proceed.
// 5. Step up Node 1. Wait until Node 2 knows of a higher term and steps down.
//    Node 2 re-enables bgsync producer while it's still in drain mode.
// 6. Step up Node 2 again. It enters drain mode again.
// 7. Enable applying ops.
// 8. Ensure the ops in queue are applied and that Node 2 begins to accept writes as usual.

load("jstests/replsets/rslib.js");

(function() {
"use strict";
var replSet = new ReplSetTest({name: 'testSet', nodes: 3});
var nodes = replSet.nodeList();
replSet.startSet();
var conf = replSet.getReplSetConfig();
conf.members[2].priority = 0;
conf.settings = conf.settings || {};
conf.settings.chainingAllowed = false;
conf.settings.catchUpTimeoutMillis = 0;
replSet.initiate(conf);

var primary = replSet.getPrimary();
var secondary = replSet.getSecondary();

// Set verbosity for replication on all nodes.
var verbosity = {
    "setParameter": 1,
    "logComponentVerbosity": {
        "replication": {"verbosity": 3},
    }
};
replSet.nodes.forEach(function(node) {
    node.adminCommand(verbosity);
});

function enableFailPoint(node) {
    jsTest.log("enable failpoint " + node.host);
    assert.commandWorked(
        node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'alwaysOn'}));
}

function disableFailPoint(node) {
    jsTest.log("disable failpoint " + node.host);
    assert.commandWorked(node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'off'}));
}

// Since this test blocks a node in drain mode, we cannot use the ReplSetTest stepUp helper
// that waits for a node to leave drain mode.
function stepUpNode(node) {
    assert.soonNoExcept(function() {
        assert.commandWorked(node.adminCommand({replSetStepUp: 1}));
        // We do not specify a specific primary so that if a different primary gets elected
        // due to unfortunate timing we can try again.
        replSet.awaitNodesAgreeOnPrimary();
        return node.adminCommand('replSetGetStatus').myState === ReplSetTest.State.PRIMARY;
    }, 'failed to step up node ' + node.host, replSet.kDefaultTimeoutMS);
}

// Do an initial insert to prevent the secondary from going into recovery
var numDocuments = 20;
var coll = primary.getDB("foo").foo;
assert.writeOK(coll.insert({x: 0}, {writeConcern: {w: 3}}));
replSet.awaitReplication();

// Enable fail point to stop replication.
var secondaries = replSet.getSecondaries();
secondaries.forEach(enableFailPoint);

var bufferCountBefore = secondary.getDB('foo').serverStatus().metrics.repl.buffer.count;
for (var i = 1; i < numDocuments; ++i) {
    assert.writeOK(coll.insert({x: i}));
}
jsTestLog('Number of documents inserted into collection on primary: ' + numDocuments);
assert.eq(numDocuments, primary.getDB("foo").foo.find().itcount());

assert.soon(
    function() {
        var serverStatus = secondary.getDB('foo').serverStatus();
        var bufferCount = serverStatus.metrics.repl.buffer.count;
        var bufferCountChange = bufferCount - bufferCountBefore;
        jsTestLog('Number of operations buffered on secondary since stopping applier: ' +
                  bufferCountChange);
        return bufferCountChange == numDocuments - 1;
    },
    'secondary did not buffer operations for new inserts on primary',
    replSet.kDefaultTimeoutMs,
    1000);

reconnect(secondary);
stepUpNode(secondary);

// Secondary doesn't allow writes yet.
var res = secondary.getDB("admin").runCommand({"isMaster": 1});
assert(!res.ismaster);

assert.commandFailedWithCode(
    secondary.adminCommand({
        replSetTest: 1,
        waitForDrainFinish: 5000,
    }),
    ErrorCodes.ExceededTimeLimit,
    'replSetTest waitForDrainFinish should time out when draining is not allowed to complete');

// Original primary steps up.
reconnect(primary);
stepUpNode(primary);

reconnect(secondary);
stepUpNode(secondary);

// Disable fail point to allow replication.
secondaries.forEach(disableFailPoint);

assert.commandWorked(
    secondary.adminCommand({
        replSetTest: 1,
        waitForDrainFinish: replSet.kDefaultTimeoutMS,
    }),
    'replSetTest waitForDrainFinish should work when draining is allowed to complete');

// Ensure new primary is writable.
jsTestLog('New primary should be writable after draining is complete');
assert.writeOK(secondary.getDB("foo").flag.insert({sentinel: 1}));
// Check that all writes reached the secondary's op queue prior to
// stepping down the original primary and got applied.
assert.eq(secondary.getDB("foo").foo.find().itcount(), numDocuments);
replSet.stopSet();
})();