summaryrefslogtreecommitdiff
path: root/jstests/replsets/step_down_during_draining.js
blob: a52602ea3d72feb250e1d21ac757e16aaa62becf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Test stepdown during drain mode
// 1. Set up a 3-node set. Assume Node 1 is the primary at the beginning for simplicity.
// 2. Prevent applying retrieved ops on all secondaries, including Node 2.
// 3. Insert data to ensure Node 2 has ops to apply in its queue.
// 4. Step up Node 2. Now it enters drain mode, but cannot proceed.
// 5. Step up Node 1. Wait until Node 2 knows of a higher term and steps down.
//    Node 2 re-enables bgsync producer while it's still in drain mode.
// 6. Step up Node 2 again. It enters drain mode again.
// 7. Enable applying ops.
// 8. Ensure the ops in queue are applied and that Node 2 begins to accept writes as usual.

load("jstests/replsets/rslib.js");

(function() {
"use strict";
var replSet = new ReplSetTest({name: 'testSet', nodes: 3});
var nodes = replSet.nodeList();
replSet.startSet();
var conf = replSet.getReplSetConfig();
conf.members[2].priority = 0;
conf.settings = conf.settings || {};
conf.settings.chainingAllowed = false;
conf.settings.catchUpTimeoutMillis = 0;
replSet.initiate(conf);

var primary = replSet.getPrimary();
var secondary = replSet.getSecondary();

// Set verbosity for replication on all nodes.
var verbosity = {
    "setParameter": 1,
    "logComponentVerbosity": {
        "replication": {"verbosity": 3},
    }
};
replSet.nodes.forEach(function(node) {
    node.adminCommand(verbosity);
});

function enableFailPoint(node) {
    jsTest.log("enable failpoint " + node.host);
    assert.commandWorked(
        node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'alwaysOn'}));
}

function disableFailPoint(node) {
    jsTest.log("disable failpoint " + node.host);
    assert.commandWorked(node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'off'}));
}

// Do an initial insert to prevent the secondary from going into recovery
var numDocuments = 20;
var coll = primary.getDB("foo").foo;
assert.commandWorked(coll.insert({x: 0}, {writeConcern: {w: 3}}));
replSet.awaitReplication();

// Enable fail point to stop replication.
var secondaries = replSet.getSecondaries();
secondaries.forEach(enableFailPoint);

var bufferCountBefore = secondary.getDB('foo').serverStatus().metrics.repl.buffer.count;
for (var i = 1; i < numDocuments; ++i) {
    assert.commandWorked(coll.insert({x: i}));
}
jsTestLog('Number of documents inserted into collection on primary: ' + numDocuments);
assert.eq(numDocuments, primary.getDB("foo").foo.find().itcount());

assert.soon(
    function() {
        var serverStatus = secondary.getDB('foo').serverStatus();
        var bufferCount = serverStatus.metrics.repl.buffer.count;
        var bufferCountChange = bufferCount - bufferCountBefore;
        jsTestLog('Number of operations buffered on secondary since stopping applier: ' +
                  bufferCountChange);
        return bufferCountChange == numDocuments - 1;
    },
    'secondary did not buffer operations for new inserts on primary',
    replSet.kDefaultTimeoutMs,
    1000);

reconnect(secondary);
replSet.stepUp(secondary, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});

// Secondary doesn't allow writes yet.
var res = secondary.getDB("admin").runCommand({"hello": 1});
assert(!res.isWritablePrimary);

// Original primary steps up.
reconnect(primary);
replSet.stepUp(primary, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});

reconnect(secondary);
replSet.stepUp(secondary, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});

// Disable fail point to allow replication.
secondaries.forEach(disableFailPoint);

// Wait for draining to complete.
const newPrimary = replSet.getPrimary();
assert.eq(secondary, newPrimary);

// Ensure new primary is writable, and wait until the write has been replicated to all nodes, to
// allow rollback (should it occur), to complete.
jsTestLog('New primary should be writable after draining is complete');
assert.commandWorked(newPrimary.getDB("foo").flag.insert({sentinel: 1}, {writeConcern: {w: 3}}));
// Check that all writes reached the secondary's op queue prior to
// stepping down the original primary and got applied.
assert.eq(newPrimary.getDB("foo").foo.find().itcount(), numDocuments);
replSet.stopSet();
})();