summaryrefslogtreecommitdiff
path: root/jstests/replsets/clean_shutdown_oplog_state.js
blob: 51dba43ff980590ec5211f602a61a145d91a04ce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
// SERVER-24933 3.2 clean shutdown left entries in the oplog that hadn't been applied. This could
// lead to data loss following a downgrade to 3.0. This tests that following a clean shutdown all
// ops in the oplog have been applied. This only applies to 3.2 and is not a requirement for 3.4.
// WARNING: this test does not always fail deterministically. It is possible for the bug to be
// present without this test failing. In particular if the rst.stop(1) doesn't execute mid-batch,
// it isn't actually testing this bug. However, if the test fails there is definitely a bug.
//
// @tags: [requires_persistence]
(function() {
    "use strict";

    var rst = new ReplSetTest({
        name: "name",
        nodes: 2,
    });

    rst.startSet();
    var conf = rst.getReplSetConfig();
    conf.members[1].votes = 0;
    conf.members[1].priority = 0;
    printjson(conf);
    rst.initiate(conf);

    var primary = rst.getPrimary();  // Waits for PRIMARY state.
    var slave = rst.nodes[1];

    // Stop replication on the secondary.
    assert.commandWorked(
        slave.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'alwaysOn'}));

    // Prime the main collection.
    primary.getCollection("test.coll").insert({_id: -1});

    // Start a w:2 write that will block until replication is resumed.
    var waitForReplStart = startParallelShell(function() {
        printjson(assert.writeOK(db.getCollection('side').insert({}, {writeConcern: {w: 2}})));
    }, primary.host.split(':')[1]);

    // Insert a lot of data in increasing order to test.coll.
    var op = primary.getCollection("test.coll").initializeUnorderedBulkOp();
    for (var i = 0; i < 100 * 1000; i++) {
        op.insert({_id: i});
    }
    assert.writeOK(op.execute());

    // Resume replication and wait for ops to start replicating, then do a clean shutdown on the
    // secondary.
    assert.commandWorked(slave.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'off'}));
    waitForReplStart();
    sleep(100);  // wait a bit to increase the chances of killing mid-batch.
    rst.stop(1);

    // Restart the secondary as a standalone node.
    var options = slave.savedOptions;
    options.noCleanData = true;
    delete options.replSet;
    var conn = MongoRunner.runMongod(options);
    assert.neq(null, conn, "secondary failed to start");

    // Following a clean shutdown of a 3.2 node, the oplog must exactly match the applied
    // operations. Additionally, the begin field must not be in the minValid document and the ts
    // must match the top of the oplog (SERVER-25353).
    var oplogDoc = conn.getCollection('local.oplog.rs')
                       .find({ns: 'test.coll'})
                       .sort({$natural: -1})
                       .limit(1)[0];
    var collDoc = conn.getCollection('test.coll').find().sort({_id: -1}).limit(1)[0];
    var minValidDoc =
        conn.getCollection('local.replset.minvalid').find().sort({$natural: -1}).limit(1)[0];
    printjson({oplogDoc: oplogDoc, collDoc: collDoc, minValidDoc: minValidDoc});
    assert.eq(collDoc._id, oplogDoc.o._id);
    assert(!('begin' in minValidDoc), 'begin in minValidDoc');
    assert.eq(minValidDoc.ts, oplogDoc.ts);

    rst.stopSet();
})();