summaryrefslogtreecommitdiff
path: root/jstests/replsets/initial_sync_fcv_downgrade.js
blob: e5e5c6f7b740fc0a55924a624e186ebaa69009bc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/*
 * Tests the following scenarios.
 * 1. Replica set config containing 'newlyAdded' members should make fcv downgrade to fail.
 * 2. FCV downgrade blocks after a new config got mutated with 'newlyAdded' field (due to
 * addition of voters) until the mutated new config gets stored locally.
 * 3. FCV downgrade blocks until all nodes in the replica set have latest config without
 * 'newlyAdded' field.
 *
 * This tests behavior centered around downgrading FCV.
 * @tags: [multiversion_incompatible]
 */
(function() {
'use strict';

load('jstests/replsets/rslib.js');
load("jstests/libs/parallel_shell_helpers.js");  // for funWithArgs()

// Start a single node replica set.
// Disable Chaining so that initial sync nodes always sync from primary.
const rst = new ReplSetTest({nodes: 1, settings: {chainingAllowed: false}});
rst.startSet();
rst.initiateWithHighElectionTimeout();

const dbName = jsTest.name();
const collName = "coll";

const primary = rst.getPrimary();
const db = primary.getDB(dbName);
const primaryColl = db[collName];
const primaryAdminDB = primary.getDB("admin");

function testCleanup(conn) {
    jsTestLog("Perform test cleanup");
    assert.commandWorked(
        conn.adminCommand({configureFailPoint: "initialSyncHangAfterDataCloning", mode: 'off'}));

    // Wait for the new node to be no longer newly added.
    waitForNewlyAddedRemovalForNodeToBeCommitted(primary, rst.getNodeId(conn));
    rst.waitForState(conn, ReplSetTest.State.SECONDARY);

    // Insert a doc and wait for it to replicate to all nodes.
    assert.commandWorked(primaryColl.insert({x: "somedoc"}));
    rst.awaitReplication();

    // Clear the RAM logs.
    assert.commandWorked(primary.adminCommand({clearLog: "global"}));
}

function checkFCV({version, targetVersion}) {
    assert.eq(primaryAdminDB.system.version.findOne({_id: "featureCompatibilityVersion"}).version,
              version);
    assert.eq(
        primaryAdminDB.system.version.findOne({_id: "featureCompatibilityVersion"}).targetVersion,
        targetVersion);
}

function addNewVotingNode({parallel: parallel = false, startupParams: startupParams = {}}) {
    const conn = rst.add({rsConfig: {priority: 0, votes: 1}, setParameter: startupParams});

    jsTestLog("Adding a new voting node {" + conn.host + "} to the replica set");
    let newConfig = rst.getReplSetConfigFromNode();
    newConfig.members = rst.getReplSetConfig().members;
    newConfig.version += 1;
    var reInitiate = (newConfig) => {
        assert.adminCommandWorkedAllowingNetworkError(
            db, {replSetReconfig: newConfig, maxTimeMS: ReplSetTest.kDefaultTimeoutMS});
    };

    const reconfigThread = parallel
        ? startParallelShell(funWithArgs(reInitiate, newConfig), primary.port)
        : reInitiate(newConfig);

    return {conn, reconfigThread};
}

function waitForInitialSyncHang(conn) {
    jsTestLog("Wait for " + conn.host + " to hang during initial sync");
    checkLog.containsJson(conn, 21184);
}

// Scenario # 1: Test 'newlyAdded' members in repl config makes fcv downgrade fail.
let newNode = addNewVotingNode(
    {startupParams: {"failpoint.initialSyncHangAfterDataCloning": tojson({mode: 'alwaysOn'})}});

waitForInitialSyncHang(newNode.conn);

// Check that 'newlyAdded' field is set.
assert(isMemberNewlyAdded(primary, 1));
assertVoteCount(primary, {
    votingMembersCount: 1,
    majorityVoteCount: 1,
    writableVotingMembersCount: 1,
    writeMajorityCount: 1,
    totalMembersCount: 2,
});

jsTestLog("Downgrade FCV to " + lastLTSFCV);
assert.commandFailedWithCode(primary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}),
                             ErrorCodes.ConflictingOperationInProgress);

checkFCV({version: latestFCV, targetVersion: null});

// Cleanup the test.
testCleanup(newNode.conn);

// Scenario # 2: FCV downgrade blocks after a new config got mutated with 'newlyAdded' field
// (due to addition of voters) until the mutated new config gets stored locally.
//
// Make reconfig cmd to hang.
assert.commandWorked(primary.adminCommand(
    {configureFailPoint: "ReconfigHangBeforeConfigValidationCheck", mode: 'alwaysOn'}));

// Start reconfig command in parallel shell.
newNode = addNewVotingNode({
    parallel: true,
    startupParams: {"failpoint.initialSyncHangAfterDataCloning": tojson({mode: 'alwaysOn'})}
});

jsTestLog("Wait for reconfig command on primary to hang before storing the new config locally.");
checkLog.containsJson(primary, 4637900);

let fcvDowngradeThread = startParallelShell(() => {
    jsTestLog("Downgrade FCV to " + lastLTSFCV);
    assert.commandFailedWithCode(db.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}),
                                 ErrorCodes.ConflictingOperationInProgress);
}, primary.port);

jsTestLog("Wait for 'setFeatureCompatibilityVersion' cmd to hang on fcv resource mutex lock");
assert.soon(
    () => {
        return primaryAdminDB
                   .currentOp({
                       "command.setFeatureCompatibilityVersion": lastLTSFCV,
                       waitingForLock: true,
                       "lockStats.Mutex.acquireWaitCount.W": NumberLong(1)
                   })
                   .inprog.length === 1;
    },
    () => {
        return "Failed to find a matching op" + tojson(primaryAdminDB.currentOp().toArray());
    });

jsTestLog("Resume reconfig to unblock fcv command.");
assert.commandWorked(primary.adminCommand(
    {configureFailPoint: "ReconfigHangBeforeConfigValidationCheck", mode: 'off'}));

waitForInitialSyncHang(newNode.conn);

// Check that 'newlyAdded' field is set.
assert(isMemberNewlyAdded(primary, 2));
assertVoteCount(primary, {
    votingMembersCount: 2,
    majorityVoteCount: 2,
    writableVotingMembersCount: 2,
    writeMajorityCount: 2,
    totalMembersCount: 3,
});

// Wait for threads to join and cleanup the test.
fcvDowngradeThread();
newNode.reconfigThread();

checkFCV({version: latestFCV, targetVersion: null});

// Cleanup the test.
testCleanup(newNode.conn);

// Scenario # 3: FCV downgrade blocks until all nodes in the replica set have latest config without
// 'newlyAdded' field.
const secondary = rst.getSecondary();
// Enable fail point on secondary0 to block secondary0 from receiving new config via heartbeat.
assert.commandWorked(
    secondary.adminCommand({configureFailPoint: "blockHeartbeatReconfigFinish", mode: 'alwaysOn'}));

newNode = addNewVotingNode({});

// Wait until primary removed the 'newlyAdded' field from repl config.
waitForNewlyAddedRemovalForNodeToBeCommitted(primary, rst.getNodeId(newNode));

// Check that 'newlyAdded' field is not set.
assert(!isMemberNewlyAdded(primary, 3));
assertVoteCount(primary, {
    votingMembersCount: 4,
    majorityVoteCount: 3,
    writableVotingMembersCount: 4,
    writeMajorityCount: 3,
    totalMembersCount: 4,
});

// FCV downgrade should fail as secondary0 config version is not up-to-date with primary's config
// version.
jsTestLog("Downgrade FCV to " + lastLTSFCV);
const res = assert.commandFailedWithCode(
    primary.adminCommand(
        {setFeatureCompatibilityVersion: lastLTSFCV, "writeConcern": {wtimeout: 5 * 1000}}),
    ErrorCodes.WriteConcernFailed);
assert(res.errmsg.startsWith(
           "Failed to wait for the current replica set config to propagate to all nodes"),
       res.errmsg);

checkFCV({version: latestFCV, targetVersion: null});

assert.commandWorked(
    secondary.adminCommand({configureFailPoint: "blockHeartbeatReconfigFinish", mode: 'off'}));
// Cleanup the test.
testCleanup(newNode.conn);

rst.stopSet();
}());