summaryrefslogtreecommitdiff
path: root/jstests/replsets/step_down_on_secondary.js
blob: 98ff70506a4f141d964f05f75a473782fed57f4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/*
 * Tests that we don't hit a 3 way deadlock when a step down thread waits for the RSTL in SECONDARY
 * state. This occurs when two stepdowns begin concurrently and both attempt to acquire the RSTL.
 *
 * This test creates a scenario where:
 * 1) Read thread acquires RSTL in MODE_IX and is blocked by a prepared txn (from secondary oplog
 *    application) due to a prepare conflict.
 * 2) Step down enqueues the RSTL in MODE_X and is blocked behind the read thread.
 * 3) Oplog applier is blocked trying to apply a 'commitTransaction' oplog entry. The commit is
 *    attempting to acquire the RSTL lock in MODE_IX but is blocked behind the step down thread.
 *
 * @tags: [uses_transactions, uses_prepare_transaction]
 */
(function() {

"use strict";
load('jstests/libs/parallelTester.js');
load("jstests/libs/curop_helpers.js");  // for waitForCurOpByFailPoint().
load("jstests/core/txns/libs/prepare_helpers.js");
load("jstests/libs/fail_point_util.js");

const dbName = "test";
const collName = "coll";

const rst = new ReplSetTest({nodes: [{}, {rsConfig: {priority: 0}}]});
rst.startSet();
rst.initiate();

const primary = rst.getPrimary();
const primaryDB = primary.getDB(dbName);
const primaryColl = primaryDB[collName];
const collNss = primaryColl.getFullName();
const secondary = rst.getSecondary();

TestData.dbName = dbName;
TestData.collName = collName;
TestData.collNss = collNss;

jsTestLog("Do a document write");
assert.commandWorked(primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
rst.awaitReplication();

jsTestLog("Hang primary on step down");
const joinStepDownThread = startParallelShell(() => {
    assert.commandWorked(
        db.adminCommand({configureFailPoint: "stepdownHangBeforeRSTLEnqueue", mode: "alwaysOn"}));

    const freezeSecs = 24 * 60 * 60;  // 24 hours
    assert.commandFailedWithCode(db.adminCommand({"replSetStepDown": freezeSecs, "force": true}),
                                 ErrorCodes.NotWritablePrimary);
}, primary.port);

waitForCurOpByFailPointNoNS(primaryDB, "stepdownHangBeforeRSTLEnqueue");

jsTestLog("Force reconfig to swap the electable node");
// We must specify the node in getReplSetConfigFromNode, because we do not have a writable primary.
const newConfig = rst.getReplSetConfigFromNode(0);
const oldPrimaryId = rst.getNodeId(primary);
const newPrimaryId = rst.getNodeId(secondary);
newConfig.members[newPrimaryId].priority = 1;
newConfig.members[oldPrimaryId].priority = 0;
newConfig.version++;
assert.commandWorked(secondary.adminCommand({"replSetReconfig": newConfig, force: true}));

jsTestLog("Step up the new electable node");
rst.stepUp(secondary);

jsTestLog("Wait for step up to complete");
// Wait until the primary successfully steps down via heartbeat reconfig.
rst.waitForState(secondary, ReplSetTest.State.PRIMARY);
rst.waitForState(primary, ReplSetTest.State.SECONDARY);
const newPrimary = rst.getPrimary();

jsTestLog("Prepare a transaction on the new primary");
const session = newPrimary.startSession();
const sessionDb = session.getDatabase(dbName);
const sessionColl = sessionDb[collName];
session.startTransaction({writeConcern: {w: "majority"}});
assert.commandWorked(sessionColl.update({_id: 0}, {$set: {"b": 1}}));
const prepareTimestamp = PrepareHelpers.prepareTransaction(session);

jsTestLog("Get a cluster time for afterClusterTime reads");
TestData.clusterTimeAfterPrepare =
    assert
        .commandWorked(newPrimary.getDB(dbName)[collName].runCommand(
            "insert", {documents: [{_id: "clusterTimeAfterPrepare"}]}))
        .operationTime;

// Make sure the insert gets replicated to the old primary (current secondary) so that its
// clusterTime advances before we try to do an afterClusterTime read at the time of the insert.
rst.awaitReplication();

jsTestLog("Do a read that hits a prepare conflict on the old primary");
const wTPrintPrepareConflictLogFailPoint = configureFailPoint(primary, "WTPrintPrepareConflictLog");

const joinReadThread = startParallelShell(() => {
    db.getMongo().setSecondaryOk();
    oldPrimaryDB = db.getSiblingDB(TestData.dbName);

    assert.commandFailedWithCode(oldPrimaryDB.runCommand({
        find: TestData.collName,
        filter: {_id: 0},
        readConcern: {level: "local", afterClusterTime: TestData.clusterTimeAfterPrepare},
    }),
                                 ErrorCodes.InterruptedDueToReplStateChange);
}, primary.port);

jsTestLog("Wait to hit a prepare conflict");
wTPrintPrepareConflictLogFailPoint.wait();

jsTestLog("Allow step down to complete");
assert.commandWorked(
    primary.adminCommand({configureFailPoint: "stepdownHangBeforeRSTLEnqueue", mode: "off"}));

jsTestLog("Wait for step down to start killing operations");
checkLog.contains(primary, "Starting to kill user operations");

jsTestLog("Commit the prepared transaction");
assert.commandWorked(PrepareHelpers.commitTransaction(session, prepareTimestamp));

jsTestLog("Join parallel shells");
joinStepDownThread();
joinReadThread();

// Validate that the read operation got killed during step down.
const replMetrics = assert.commandWorked(primary.adminCommand({serverStatus: 1})).metrics.repl;
assert.eq(replMetrics.stateTransition.lastStateTransition, "stepDown");
assert.eq(replMetrics.stateTransition.userOperationsKilled, 1, replMetrics);

jsTestLog("Check nodes have correct data");
assert.docEq(newPrimary.getDB(dbName)[collName].find({_id: 0}).toArray(), [{_id: 0, b: 1}]);
rst.awaitReplication();
assert.docEq(primary.getDB(dbName)[collName].find({_id: 0}).toArray(), [{_id: 0, b: 1}]);

rst.stopSet();
})();