jstests/replsets/dont_read_oplog_hole_on_step_up.js


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

/*
 * Tests that we don't read an oplog hole when we step up while waiting for a tailable oplog query.
 * This test creates a configuration where one secondary, 'secondary', is syncing from a different
 * secondary, 'newPrimary', which is soon to become primary. As the new node becomes primary, the
 * other secondary oplog tailer should not observe any oplog holes.
 *
 * @tags: [
 *   multiversion_incompatible,
 * ]
 */
(function() {
'use strict';

load("jstests/replsets/rslib.js");
load("jstests/libs/fail_point_util.js");

var rst = new ReplSetTest({
    name: TestData.testName,
    // The long election timeout results in a 30-second getMore, plenty of time to hit bugs.
    settings: {chainingAllowed: true, electionTimeoutMillis: 60 * 1000},
    nodes: [
        {},
        {},
    ],
});
const nodes = rst.startSet();
// Initiate in two steps so that the first two nodes finish initial sync before the third begins
// its initial sync. This prevents the long getMore timeout from causing the first initial sync to
// take so much time that the second cannot succeed.
rst.initiate();

const oldPrimary = nodes[0];
const newPrimary = nodes[1];
const secondary = rst.add({rsConfig: {priority: 0}});

// Make sure this secondary syncs only from the node bound to be the new primary.
assert.commandWorked(secondary.adminCommand({
    configureFailPoint: "forceSyncSourceCandidate",
    mode: "alwaysOn",
    data: {hostAndPort: newPrimary.host}
}));
rst.reInitiate();

// Make sure when the original primary syncs, it's only from the secondary; this avoids spurious log
// messages.
assert.commandWorked(oldPrimary.adminCommand({
    configureFailPoint: "forceSyncSourceCandidate",
    mode: "alwaysOn",
    data: {hostAndPort: secondary.host}
}));

assert.commandWorked(oldPrimary.getDB(TestData.testName).test.insert({x: 1}));
rst.awaitReplication();

// Force the the secondary tailing the newPrimary to yield its getMore.
const planExecFP = configureFailPoint(newPrimary, "planExecutorHangWhileYieldedInWaitForInserts");

jsTestLog("Stepping up new primary");
assert.commandWorked(newPrimary.adminCommand({replSetStepUp: 1}));

// Wait for the node to transition to primary and accept writes.
assert.eq(newPrimary, rst.getPrimary());

const createCollFP = configureFailPoint(newPrimary, "hangBeforeLoggingCreateCollection");
const createShell = startParallelShell(() => {
    // Implicitly creates the collection.
    assert.commandWorked(db.getSiblingDB(TestData.testName).newcoll.insert({y: 2}));
}, newPrimary.port);

jsTestLog("Waiting for oplog tailer to yield");
planExecFP.wait();

jsTestLog("Waiting for collection creation to hang");
createCollFP.wait();

jsTestLog("Creating hole and resuming oplog tail");
assert.commandWorked(newPrimary.getDB(TestData.testName).test.insert({x: 2}));
planExecFP.off();

// Give enough time for the oplog tailer to resume and observe the oplog hole. The expectation is
// that the secondary oplog tailer should not see any holes. If it does, and misses the collection
// creation oplog entry, then it will crash because it will attempt to apply the insert operation on
// a non-existent namespace. While this specific scenario produces a crash, in general this type of
// bug can introduce data corruption.
sleep(3000);

createCollFP.off();
createShell();

rst.awaitReplication();
rst.stopSet();
}());