summaryrefslogtreecommitdiff
path: root/jstests/sharding/config_version_rollback.js
blob: b96c04743350bf20bb492658a327ee5c8b55ddbd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/**
 * Tests that if the config.version document on a config server is rolled back, that config server
 * will detect the new config.version document when it gets recreated.
 * @tags: [requires_persistence]
 */

(function() {
    "use strict";

    // Wait for fail point message to be logged.
    var checkLog = function(node, msg) {
        assert.soon(
            function() {
                var logMessages = assert.commandWorked(node.adminCommand({getLog: 'global'})).log;
                for (var i = 0; i < logMessages.length; i++) {
                    if (logMessages[i].indexOf(msg) != -1) {
                        return true;
                    }
                }
                return false;
            },
            'Did not see a log entry for ' + node + ' containing the following message: ' + msg,
            60000,
            1000);
    };

    // The config.version document is written on transition to primary. We need to ensure this
    // config.version document is rolled back for this test.
    //
    // This means we have to guarantee the config.version document is not replicated by a secondary
    // during any of 1) initial sync, 2) steady state replication, or 3) catchup after election.
    //
    // 1) initial sync
    // We need non-primaries to finish initial sync so that they are electable, but without
    // replicating the config.version document. Since we can't control when the config.version
    // document is written (it's an internal write, not a client write), we turn on a failpoint
    // that stalls the write of the config.version document until we have ascertained that the
    // secondaries have finished initial sync.
    //
    // 2) steady state replication
    // Once the non-primaries have transitioned to secondary, we stop the secondaries from
    // replicating anything further by turning on a failpoint that stops the OplogFetcher. We then
    // allow the primary to write the config.verison document.
    //
    // 3) catchup after election
    // When the primary is stepped down and one of the secondaries is elected, the new primary will
    // notice that it is behind the original primary and try to catchup for a short period. So, we
    // also ensure that this short period is 0 by setting catchupTimeoutMillis to 0 earlier in the
    // ReplSetConfig passed to initiate().
    //
    // Thus, we guarantee the new primary will not have replicated the config.version document in
    // initial sync, steady state replication, or catchup, so the document will be rolled back.

    jsTest.log("Starting the replica set and waiting for secondaries to finish initial sync");
    var configRS = new ReplSetTest({nodes: 3});
    var nodes = configRS.startSet({
        configsvr: '',
        storageEngine: 'wiredTiger',
        setParameter: {
            "failpoint.transitionToPrimaryHangBeforeTakingGlobalExclusiveLock":
                "{'mode':'alwaysOn'}"
        }
    });
    var conf = configRS.getReplSetConfig();
    conf.settings = {catchUpTimeoutMillis: 0};

    // Ensure conf.members[0] is the only node that can become primary at first, so we know on which
    // nodes to wait for transition to SECONDARY.
    conf.members[1].priority = 0;
    conf.members[2].priority = 0;
    configRS.nodes[0].adminCommand({replSetInitiate: conf});

    jsTest.log("Waiting for " + nodes[1] + " and " + nodes[2] + " to transition to SECONDARY.");
    configRS.waitForState([nodes[1], nodes[2]], ReplSetTest.State.SECONDARY);

    jsTest.log("Stopping the OplogFetcher on all nodes");
    // Now that the secondaries have finished initial sync and are electable, stop replication.
    nodes.forEach(function(node) {
        assert.commandWorked(node.getDB('admin').runCommand(
            {configureFailPoint: 'stopOplogFetcher', mode: 'alwaysOn'}));
    });

    jsTest.log("Allowing the primary to write the config.version doc");
    nodes.forEach(function(node) {
        assert.commandWorked(node.adminCommand({
            configureFailPoint: "transitionToPrimaryHangBeforeTakingGlobalExclusiveLock",
            mode: "off"
        }));
    });

    var origPriConn = configRS.getPrimary();
    var secondaries = configRS.getSecondaries();

    jsTest.log("Confirming that the primary has the config.version doc but the secondaries do not");
    var origConfigVersionDoc;
    assert.soon(function() {
        origConfigVersionDoc = origPriConn.getCollection('config.version').findOne();
        return null !== origConfigVersionDoc;
    });
    secondaries.forEach(function(secondary) {
        secondary.setSlaveOk();
        assert.eq(null, secondary.getCollection('config.version').findOne());
    });

    jsTest.log("Checking that manually deleting the config.version document is not allowed.");
    assert.writeErrorWithCode(origPriConn.getCollection('config.version').remove({}), 40302);
    assert.commandFailedWithCode(origPriConn.getDB('config').runCommand({drop: 'version'}), 40303);

    jsTest.log("Making the secondaries electable by giving all nodes non-zero, equal priority.");
    var res = configRS.getPrimary().adminCommand({replSetGetConfig: 1});
    assert.commandWorked(res);
    conf = res.config;
    conf.members[0].priority = 1;
    conf.members[1].priority = 1;
    conf.members[2].priority = 1;
    conf.version++;
    configRS.getPrimary().adminCommand({replSetReconfig: conf});

    jsTest.log("Stepping down original primary");
    try {
        origPriConn.adminCommand({replSetStepDown: 60, force: true});
    } catch (x) {
        // replSetStepDown closes all connections, thus a network exception is expected here.
    }

    jsTest.log("Waiting for new primary to be elected and write a new config.version document");
    var newPriConn = configRS.getPrimary();
    assert.neq(newPriConn, origPriConn);

    var newConfigVersionDoc = newPriConn.getCollection('config.version').findOne();
    assert.neq(null, newConfigVersionDoc);
    assert.neq(origConfigVersionDoc.clusterId, newConfigVersionDoc.clusterId);

    jsTest.log("Re-enabling replication on all nodes");
    nodes.forEach(function(node) {
        assert.commandWorked(
            node.getDB('admin').runCommand({configureFailPoint: 'stopOplogFetcher', mode: 'off'}));
    });

    jsTest.log(
        "Waiting for original primary to rollback and replicate new config.version document");
    configRS.waitForState(origPriConn, ReplSetTest.State.SECONDARY);
    origPriConn.setSlaveOk();
    assert.soonNoExcept(function() {
        var foundClusterId = origPriConn.getCollection('config.version').findOne().clusterId;
        return friendlyEqual(newConfigVersionDoc.clusterId, foundClusterId);
    });

    jsTest.log("Forcing original primary to step back up and become primary again.");

    // Do prep work to make original primary transtion to primary again smoother by
    // waiting for all nodes to catch up to make them eligible to become primary and
    // step down the current primary to make it stop generating new oplog entries.
    configRS.awaitReplication();

    try {
        newPriConn.adminCommand({replSetStepDown: 60, force: true});
    } catch (x) {
        // replSetStepDown closes all connections, thus a network exception is expected here.
    }

    // Ensure former primary is eligible to become primary once more.
    assert.commandWorked(origPriConn.adminCommand({replSetFreeze: 0}));

    // Keep on trying until this node becomes the primary. One reason it can fail is when the other
    // nodes have newer oplog entries and will thus refuse to vote for this node.
    assert.soon(function() {
        return (origPriConn.adminCommand({replSetStepUp: 1})).ok;
    });

    assert.soon(function() {
        return origPriConn == configRS.getPrimary();
    });

    // Now we just need to start up a mongos and add a shard to confirm that the shard gets added
    // with the proper clusterId value.
    jsTest.log("Starting mongos");
    var mongos = MongoRunner.runMongos({configdb: configRS.getURL()});

    jsTest.log("Starting shard mongod");
    var shard = MongoRunner.runMongod({shardsvr: ""});

    jsTest.log("Adding shard to cluster");
    assert.commandWorked(mongos.adminCommand({addShard: shard.host}));

    jsTest.log("Verifying that shard was provided the proper clusterId");
    var shardIdentityDoc = shard.getDB('admin').system.version.findOne({_id: 'shardIdentity'});
    printjson(shardIdentityDoc);
    assert.eq(newConfigVersionDoc.clusterId,
              shardIdentityDoc.clusterId,
              "oldPriClusterId: " + origConfigVersionDoc.clusterId);
    configRS.stopSet();

})();