summaryrefslogtreecommitdiff
path: root/jstests/replsets/read_operations_during_step_down.js
blob: 4f9507ff902f276fab44e39384d4d5b3bd119e08 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/*
 * Test that the read operations are not killed and their connections are also not
 * closed during step down.
 */
load("jstests/libs/check_log.js");
load('jstests/libs/parallelTester.js');
load("jstests/libs/curop_helpers.js");  // for waitForCurOpByFailPoint().

(function() {

    "use strict";

    const testName = "readOpsDuringStepDown";
    const dbName = "test";
    const collName = "coll";

    var rst = new ReplSetTest({name: testName, nodes: [{}, {rsConfig: {priority: 0}}]});
    rst.startSet();
    rst.initiate();

    const primary = rst.getPrimary();
    const primaryDB = primary.getDB(dbName);
    const primaryAdmin = primary.getDB("admin");
    const primaryColl = primaryDB[collName];
    const collNss = primaryColl.getFullName();

    TestData.dbName = dbName;
    TestData.collName = collName;

    jsTestLog("1. Do a document write");
    assert.writeOK(
        primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
    rst.awaitReplication();

    // Open a cursor on primary.
    const cursorIdToBeReadAfterStepDown =
        assert.commandWorked(primaryDB.runCommand({"find": collName, batchSize: 0})).cursor.id;

    jsTestLog("2. Start blocking getMore cmd before step down");
    const joinGetMoreThread = startParallelShell(() => {
        // Open another cursor on primary before step down.
        primaryDB = db.getSiblingDB(TestData.dbName);
        const cursorIdToBeReadDuringStepDown =
            assert.commandWorked(primaryDB.runCommand({"find": TestData.collName, batchSize: 0}))
                .cursor.id;

        // Enable the fail point for get more cmd.
        assert.commandWorked(db.adminCommand(
            {configureFailPoint: "waitAfterPinningCursorBeforeGetMoreBatch", mode: "alwaysOn"}));

        getMoreRes = assert.commandWorked(primaryDB.runCommand(
            {"getMore": cursorIdToBeReadDuringStepDown, collection: TestData.collName}));
        assert.docEq([{_id: 0}], getMoreRes.cursor.nextBatch);
    }, primary.port);

    // Wait for getmore cmd to reach the fail point.
    waitForCurOpByFailPoint(primaryAdmin, collNss, "waitAfterPinningCursorBeforeGetMoreBatch");

    jsTestLog("2. Start blocking find cmd before step down");
    const joinFindThread = startParallelShell(() => {
        // Enable the fail point for find cmd.
        assert.commandWorked(
            db.adminCommand({configureFailPoint: "waitInFindBeforeMakingBatch", mode: "alwaysOn"}));

        var findRes = assert.commandWorked(
            db.getSiblingDB(TestData.dbName).runCommand({"find": TestData.collName}));
        assert.docEq([{_id: 0}], findRes.cursor.firstBatch);

    }, primary.port);

    // Wait for find cmd to reach the fail point.
    waitForCurOpByFailPoint(primaryAdmin, collNss, "waitInFindBeforeMakingBatch");

    jsTestLog("3. Make primary step down");
    const joinStepDownThread = startParallelShell(() => {
        assert.commandWorked(db.adminCommand({"replSetStepDown": 100, "force": true}));
    }, primary.port);

    // Wait until the step down has started to kill user operations.
    checkLog.contains(primary, "Starting to kill user operations");

    // Enable "waitAfterReadCommandFinishesExecution" fail point to make sure the find and get more
    // commands on database 'test' does not complete before step down.
    assert.commandWorked(primaryAdmin.runCommand({
        configureFailPoint: "waitAfterReadCommandFinishesExecution",
        data: {db: dbName},
        mode: "alwaysOn"
    }));

    jsTestLog("4. Disable fail points");
    assert.commandWorked(
        primaryAdmin.runCommand({configureFailPoint: "waitInFindBeforeMakingBatch", mode: "off"}));
    assert.commandWorked(primaryAdmin.runCommand(
        {configureFailPoint: "waitAfterPinningCursorBeforeGetMoreBatch", mode: "off"}));

    // Wait until the primary transitioned to SECONDARY state.
    joinStepDownThread();
    rst.waitForState(primary, ReplSetTest.State.SECONDARY);

    // We don't want to check if we have reached "waitAfterReadCommandFinishesExecution" fail point
    // because we already know that the primary has stepped down successfully. This implies that
    // the find and get more commands are still running even after the node stepped down.
    assert.commandWorked(primaryAdmin.runCommand(
        {configureFailPoint: "waitAfterReadCommandFinishesExecution", mode: "off"}));

    // Wait for find & getmore thread to join.
    joinGetMoreThread();
    joinFindThread();

    jsTestLog("5. Start get more cmd after step down");
    var getMoreRes = assert.commandWorked(
        primaryDB.runCommand({"getMore": cursorIdToBeReadAfterStepDown, collection: collName}));
    assert.docEq([{_id: 0}], getMoreRes.cursor.nextBatch);

    // Validate that no operations got killed on step down and no network disconnection happened due
    // to failed unacknowledged operations.
    let replMetrics =
        assert.commandWorked(primaryAdmin.adminCommand({serverStatus: 1})).metrics.repl;
    assert.eq(replMetrics.stepDown.userOperationsKilled, 0);
    // Should account for find and getmore commands issued before step down.
    assert.gte(replMetrics.stepDown.userOperationsRunning, 2);
    assert.eq(replMetrics.network.notMasterUnacknowledgedWrites, 0);

    rst.stopSet();
})();