summaryrefslogtreecommitdiff
path: root/jstests/replsets/read_operations_during_step_up.js
blob: f3a7bb96008bbb0ced34ad16aaa4652c954dd016 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/*
 * Test that the read operations are not killed and their connections are also not
 * closed during step up.
 */
load('jstests/libs/parallelTester.js');
load("jstests/libs/curop_helpers.js");  // for waitForCurOpByFailPoint().
load("jstests/replsets/rslib.js");

(function() {

"use strict";

const testName = jsTestName();
const dbName = "test";
const collName = "coll";

const rst = new ReplSetTest({name: testName, nodes: 2});
rst.startSet();
rst.initiateWithHighElectionTimeout();

const primary = rst.getPrimary();
const primaryDB = primary.getDB(dbName);
const primaryColl = primaryDB[collName];

const secondary = rst.getSecondary();
const secondaryDB = secondary.getDB(dbName);
const secondaryAdmin = secondary.getDB("admin");
const secondaryColl = secondaryDB[collName];
const secondaryCollNss = secondaryColl.getFullName();

TestData.dbName = dbName;
TestData.collName = collName;

jsTestLog("1. Do a document write");
assert.commandWorked(
        primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
rst.awaitReplication();

// It's possible for notPrimaryUnacknowledgedWrites to be non-zero because of mirrored reads during
// initial sync.
let replMetrics = assert.commandWorked(secondaryAdmin.adminCommand({serverStatus: 1})).metrics.repl;
const startingNumNotMasterErrors = replMetrics.network.notPrimaryUnacknowledgedWrites;

// Open a cursor on secondary.
const cursorIdToBeReadAfterStepUp =
    assert.commandWorked(secondaryDB.runCommand({"find": collName, batchSize: 0})).cursor.id;

jsTestLog("2. Start blocking getMore cmd before step up");
const joinGetMoreThread = startParallelShell(() => {
    // Open another cursor on secondary before step up.
    secondaryDB = db.getSiblingDB(TestData.dbName);
    secondaryDB.getMongo().setSecondaryOk();

    const cursorIdToBeReadDuringStepUp =
        assert.commandWorked(secondaryDB.runCommand({"find": TestData.collName, batchSize: 0}))
            .cursor.id;

    // Enable the fail point for get more cmd.
    assert.commandWorked(db.adminCommand(
        {configureFailPoint: "waitAfterPinningCursorBeforeGetMoreBatch", mode: "alwaysOn"}));

    getMoreRes = assert.commandWorked(secondaryDB.runCommand(
        {"getMore": cursorIdToBeReadDuringStepUp, collection: TestData.collName}));
    assert.docEq([{_id: 0}], getMoreRes.cursor.nextBatch);
}, secondary.port);

// Wait for getmore cmd to reach the fail point.
waitForCurOpByFailPoint(
    secondaryAdmin, secondaryCollNss, "waitAfterPinningCursorBeforeGetMoreBatch");

jsTestLog("2. Start blocking find cmd before step up");
const joinFindThread = startParallelShell(() => {
    secondaryDB = db.getSiblingDB(TestData.dbName);
    secondaryDB.getMongo().setSecondaryOk();

    // Enable the fail point for find cmd.
    assert.commandWorked(
        db.adminCommand({configureFailPoint: "waitInFindBeforeMakingBatch", mode: "alwaysOn"}));

    const findRes = assert.commandWorked(secondaryDB.runCommand({"find": TestData.collName}));
    assert.docEq([{_id: 0}], findRes.cursor.firstBatch);
}, secondary.port);

// Wait for find cmd to reach the fail point.
waitForCurOpByFailPoint(secondaryAdmin, secondaryCollNss, "waitInFindBeforeMakingBatch");

jsTestLog("3. Make primary step up");
const joinStepUpThread = startParallelShell(() => {
    assert.commandWorked(db.adminCommand({"replSetStepUp": 100, "force": true}));
}, secondary.port);

// Wait until the step up has started to kill user operations.
checkLog.contains(secondary, "Starting to kill user operations");

// Enable "waitAfterCommandFinishesExecution" fail point to make sure the find and get more
// commands on database 'test' does not complete before step up.
configureFailPoint(secondaryAdmin,
                   "waitAfterCommandFinishesExecution",
                   {ns: secondaryCollNss, commands: ["find", "getMore"]});

jsTestLog("4. Disable fail points");
configureFailPoint(secondaryAdmin, "waitInFindBeforeMakingBatch", {} /* data */, "off");
configureFailPoint(
    secondaryAdmin, "waitAfterPinningCursorBeforeGetMoreBatch", {} /* data */, "off");

// Wait until the secondary transitioned to PRIMARY state.
joinStepUpThread();
rst.waitForState(secondary, ReplSetTest.State.PRIMARY);

// We don't want to check if we have reached "waitAfterCommandFinishesExecution" fail point
// because we already know that the secondary has stepped up successfully. This implies that
// the find and get more commands are still running even after the node stepped up.
configureFailPoint(secondaryAdmin, "waitAfterCommandFinishesExecution", {} /* data */, "off");

// Wait for find & getmore thread to join.
joinGetMoreThread();
joinFindThread();

jsTestLog("5. Start get more cmd after step up");
const getMoreRes = assert.commandWorked(
    secondaryDB.runCommand({"getMore": cursorIdToBeReadAfterStepUp, collection: collName}));
assert.docEq([{_id: 0}], getMoreRes.cursor.nextBatch);

// Validate that no operations got killed on step up and no network disconnection happened due
// to failed unacknowledged operations.
replMetrics = assert.commandWorked(secondaryAdmin.adminCommand({serverStatus: 1})).metrics.repl;
assert.eq(replMetrics.stateTransition.lastStateTransition, "stepUp");
assert.eq(replMetrics.stateTransition.userOperationsKilled, 0);
// Should account for find and getmore commands issued before step up.
assert.gte(replMetrics.stateTransition.userOperationsRunning, 2);
assert.eq(replMetrics.network.notPrimaryUnacknowledgedWrites, startingNumNotMasterErrors);

rst.stopSet();
})();