diff options
author | XueruiFa <xuerui.fa@mongodb.com> | 2021-05-03 17:35:34 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-09-22 15:29:57 +0000 |
commit | 25d41df0440ea5bc136f255f980816f506c0d72a (patch) | |
tree | 6cbf41c035db5d08891ca55f46a01d0cde5c3b8f | |
parent | 75e59101ebc9d8a4633b270a3fd44d6e2058a1b9 (diff) | |
download | mongo-25d41df0440ea5bc136f255f980816f506c0d72a.tar.gz |
SERVER-55376: Ensure reconfig cannot roll back committed writes in PSA sets
(cherry picked from commit b5261275423215d567599f3d9862416f09e05aa2)
-rw-r--r-- | jstests/noPassthrough/reconfig_for_psa_set_shell.js | 42 | ||||
-rw-r--r-- | jstests/replsets/reconfig_avoids_rolling_back_committed_writes_in_psa_sets.js | 133 | ||||
-rw-r--r-- | jstests/replsets/rslib.js | 16 | ||||
-rw-r--r-- | jstests/replsets/unsafe_reconfig_to_psa_set_fails.js | 169 | ||||
-rw-r--r-- | src/mongo/db/repl/repl_server_parameters.idl | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/repl_set_config.h | 7 | ||||
-rw-r--r-- | src/mongo/db/repl/repl_set_config_checks.cpp | 44 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 6 | ||||
-rw-r--r-- | src/mongo/shell/replsettest.js | 57 | ||||
-rw-r--r-- | src/mongo/shell/servers.js | 1 | ||||
-rw-r--r-- | src/mongo/shell/utils.js | 57 |
11 files changed, 522 insertions, 20 deletions
diff --git a/jstests/noPassthrough/reconfig_for_psa_set_shell.js b/jstests/noPassthrough/reconfig_for_psa_set_shell.js new file mode 100644 index 00000000000..2b0ba125432 --- /dev/null +++ b/jstests/noPassthrough/reconfig_for_psa_set_shell.js @@ -0,0 +1,42 @@ +/** + * Tests the 'reconfigForPSASet()' shell function and makes sure that reconfig will succeed while + * preserving majority reads. + * + * @tags: [requires_journaling] + */ + +(function() { +'use strict'; + +// Start up a PSA set with the secondary having 'votes: 0' and 'priority: 0'. +const rst = new ReplSetTest({ + name: jsTestName(), + nodes: [{}, {rsConfig: {votes: 0, priority: 0}}, {rsConfig: {arbiterOnly: true}}], +}); +rst.startSet(); +rst.initiateWithHighElectionTimeout(); + +const primary = rst.getPrimary(); +assert.eq(primary, rst.nodes[0], "the primary should be the node at index 0"); + +// Verify that a reconfig that directly gives the secondary 'votes: 1' and 'priority: 1' will fail. +const config = rst.getReplSetConfigFromNode(); +config.members[1].votes = 1; +config.members[1].priority = 1; + +let reconfigScript = `assert.commandFailedWithCode(rs.reconfig(${ + tojson(config)}), ErrorCodes.NewReplicaSetConfigurationIncompatible)`; +let result = runMongoProgram('mongo', '--port', primary.port, '--eval', reconfigScript); +assert.eq(0, result, `reconfig did not fail with expected error code`); + +// Verify that calling 'reconfigForPSASet()' will succeed. +reconfigScript = `assert.commandWorked(rs.reconfigForPSASet(1, ${tojson(config)}))`; +result = runMongoProgram('mongo', '--port', primary.port, '--eval', reconfigScript); +assert.eq(0, result, `reconfig did not succeed as expected`); + +const replSetGetConfig = assert.commandWorked(primary.adminCommand({replSetGetConfig: 1})).config; +assert.eq(1, replSetGetConfig.members[1].votes); +assert.eq(1, replSetGetConfig.members[1].priority); + +rst.stopSet(); +})(); diff --git a/jstests/replsets/reconfig_avoids_rolling_back_committed_writes_in_psa_sets.js b/jstests/replsets/reconfig_avoids_rolling_back_committed_writes_in_psa_sets.js new file mode 100644 index 00000000000..29603ee32ee --- /dev/null +++ b/jstests/replsets/reconfig_avoids_rolling_back_committed_writes_in_psa_sets.js @@ -0,0 +1,133 @@ +/** + * Tests the correct workflow for adding a voting electable node in a PSA set and ensures that no + * committed writes will be rolled back after that workflow. We start with a PSA set. We shut down + * the secondary and reconfigure it to have votes 0. Then, we do a majority write that will commit + * only on the primary, so that the secondary is now missing a majority committed write. Next, we + * test the correct workflow, which involves two reconfigs: + * + * 1) Give the secondary votes: 1 but priority: 0. This will not allow the stale secondary to run + * for election + * + * 2) Increase the priority on the secondary. With this reconfig, because of the Oplog + * Committment rule, the secondary must have the previously committed write, and so it can safely + * become the primary + * + * Finally, we step up the secondary and verify that the oplog entry was not rolled back. + * + * @tags: [requires_fcv_44] + */ + +(function() { +"use strict"; +load("jstests/replsets/rslib.js"); +load("jstests/libs/write_concern_util.js"); + +const rst = new ReplSetTest({ + name: jsTestName(), + nodes: [{}, {}, {rsConfig: {arbiterOnly: true}}], +}); +rst.startSet(); +rst.initiateWithHighElectionTimeout(); + +const collName = jsTestName(); +const primary = rst.getPrimary(); +assert.eq(primary, rst.nodes[0], "the primary should be the node at index 0"); + +const testDb = primary.getDB("test"); +assert.commandWorked(testDb[collName].insert({a: 1}, {writeConcern: {w: "majority"}})); + +assertVoteCount(primary, { + votingMembersCount: 3, + majorityVoteCount: 2, + writableVotingMembersCount: 2, + writeMajorityCount: 2, + totalMembersCount: 3, +}); + +jsTestLog("Stop the secondary, which should be node 1"); +rst.stop(1); + +jsTestLog("Do a majority write that fails waiting for write concern"); +let res = testDb.runCommand( + {insert: collName, documents: [{a: 2}], writeConcern: {w: "majority", wtimeout: 3 * 1000}}); +assert.commandWorkedIgnoringWriteConcernErrors(res); +checkWriteConcernTimedOut(res); + +// In config C0, the secondary will have 'votes: 0' and 'priority: 0'. +let config = rst.getReplSetConfigFromNode(); +jsTestLog("Original config: " + tojson(config)); +config.members[1].votes = 0; +config.members[1].priority = 0; +config.version += 1; +jsTestLog("Reconfiguring set to remove the secondary's vote. Config C0: " + tojson(config)); +assert.commandWorked(primary.adminCommand({replSetReconfig: config})); + +assertVoteCount(primary, { + votingMembersCount: 2, + majorityVoteCount: 2, + writableVotingMembersCount: 1, + writeMajorityCount: 1, + totalMembersCount: 3, +}); + +jsTestLog("Do a majority write that succeeds"); +// The secondary will not have this write because it was shut down. +assert.commandWorked(testDb[collName].insert({a: 3}, {writeConcern: {w: "majority"}})); + +// At this point, the majority vote count is greater than the writable voting members count, since +// the secondary still has 'votes: 0'. This indicates that there may not be an overlap between the +// election quorum and the write quorum. + +// As a result, if we make the secondary a voter AND electable in a future reconfig, it is possible +// for the secondary to be elected without the recent majority committed write. To avoid this, when +// making the secondary a voting node again, first configure the secondary to have 'priority: 0', so +// that it is not electable. Label this config 'C1'. +config = rst.getReplSetConfigFromNode(primary.nodeId); +config.members[1].votes = 1; +config.members[1].priority = 0; +config.version += 1; +jsTestLog( + "Reconfiguring set to re-enable the secondary's vote and make it unelectable. Config C1: " + + tojson(config)); +assert.commandWorked(primary.adminCommand({replSetReconfig: config})); + +// The next reconfig, C2, will increase the priority of the secondary, so that it can +// run for election. This is safe due to the Oplog Committment rule, which guarantees that anything +// committed in C0 will also be committed in C1. +config = rst.getReplSetConfigFromNode(primary.nodeId); +config.members[1].priority = 1; +config.version += 1; +jsTestLog("Reconfiguring set to allow the secondary to run for election. Config C2: " + + tojson(config)); + +// Since the secondary is currently down, this reconfig will hang on waiting for the previous +// majority write to be committed in the current config, C1. +assert.commandFailedWithCode(primary.adminCommand({replSetReconfig: config, maxTimeMS: 3 * 1000}), + ErrorCodes.CurrentConfigNotCommittedYet); + +// After restarting the secondary, this reconfig should succeed. +jsTestLog("Restarting the secondary"); +rst.restart(1); +assert.commandWorked(primary.adminCommand({replSetReconfig: config})); + +assertVoteCount(primary, { + votingMembersCount: 3, + majorityVoteCount: 2, + writableVotingMembersCount: 2, + writeMajorityCount: 2, + totalMembersCount: 3, +}); + +jsTestLog("Stepping up the secondary"); +assert.soonNoExcept(() => { + assert.commandWorked(rst.nodes[1].adminCommand({replSetStepUp: 1})); + assert.eq(rst.getPrimary(), rst.nodes[1]); + return true; +}); + +// Verify that the committed write was not rolled back. +assert.eq(rst.nodes[0].getDB("test")[collName].find({a: 3}).itcount(), 1); +assert.eq(rst.nodes[1].getDB("test")[collName].find({a: 3}).itcount(), 1); + +rst.stopSet(); +})(); diff --git a/jstests/replsets/rslib.js b/jstests/replsets/rslib.js index 5ea11ccc00d..fc7b4fad1f5 100644 --- a/jstests/replsets/rslib.js +++ b/jstests/replsets/rslib.js @@ -21,6 +21,7 @@ var clearFailPoint; var isConfigCommitted; var waitForConfigReplication; var assertSameConfigContent; +var assertVoteCount; (function() { "use strict"; @@ -738,4 +739,19 @@ assertSameConfigContent = function(configA, configB) { configB.version = versionB; configB.term = termB; }; + +assertVoteCount = function(node, { + votingMembersCount, + majorityVoteCount, + writableVotingMembersCount, + writeMajorityCount, + totalMembersCount +}) { + const status = assert.commandWorked(node.adminCommand({replSetGetStatus: 1})); + assert.eq(status["votingMembersCount"], votingMembersCount, status); + assert.eq(status["majorityVoteCount"], majorityVoteCount, status); + assert.eq(status["writableVotingMembersCount"], writableVotingMembersCount, status); + assert.eq(status["writeMajorityCount"], writeMajorityCount, status); + assert.eq(status["members"].length, totalMembersCount, status); +}; }()); diff --git a/jstests/replsets/unsafe_reconfig_to_psa_set_fails.js b/jstests/replsets/unsafe_reconfig_to_psa_set_fails.js new file mode 100644 index 00000000000..165489f7e33 --- /dev/null +++ b/jstests/replsets/unsafe_reconfig_to_psa_set_fails.js @@ -0,0 +1,169 @@ +/** + * Asserts that a reconfig from a replica set with one writable voting node to a + * Primary-Secondary-Arbiter (PSA) topology fails if the secondary is electable. We test two + * reconfig scenarios, both of which should fail: + * + * 1) PA set to PSA set + * 2) PSA set with S having {votes: 0, priority: 0} to S with {votes: 1, priority: 1} + * + * Finally, we test the correct workflow for converting a replica set with only one writable voting + * node to a PSA architecture. This involves running two reconfigs. The first reconfig should + * add/configure the secondary to have {votes: 1, priority: 0}, to prevent it from being electable. + * The second reconfig should then increase its priority to the desired level. + * + * @tags: [requires_fcv_44] + */ + +(function() { +"use strict"; +load("jstests/replsets/rslib.js"); + +{ + jsTestLog("Testing reconfig from PA set to PSA set fails"); + const rst = new ReplSetTest({ + name: jsTestName(), + nodes: [{}, {rsConfig: {arbiterOnly: true}}], + }); + rst.startSet(); + rst.initiateWithHighElectionTimeout(); + + const primary = rst.getPrimary(); + assertVoteCount(primary, { + votingMembersCount: 2, + majorityVoteCount: 2, + writableVotingMembersCount: 1, + writeMajorityCount: 1, + totalMembersCount: 2, + }); + + const config = rst.getReplSetConfigFromNode(); + jsTestLog("Original config: " + tojson(config)); + + // This new node will be a secondary with {votes: 1, priority: 1}, which should not be able to + // be added in reconfig if the new topology has a PSA architecture. + rst.add({}); + const newConfig = rst.getReplSetConfig(); + config.members = newConfig.members; + config.version += 1; + jsTestLog(`New config with secondary added: ${tojson(config)}`); + + assert.commandFailedWithCode(primary.adminCommand({replSetReconfig: config}), + ErrorCodes.NewReplicaSetConfigurationIncompatible); + + // Verify that the vote counts have not changed, since the reconfig did not successfully + // complete. + assertVoteCount(primary, { + votingMembersCount: 2, + majorityVoteCount: 2, + writableVotingMembersCount: 1, + writeMajorityCount: 1, + totalMembersCount: 2, + }); + + // The node was not successfully added to the config, so we should not run validation checks on + // it when we shut down the replica set. + rst.stopSet(null, null, {skipCheckDBHashes: true}); +} + +{ + jsTestLog("Testing reconfig to remove {votes: 0} from secondary in PSA set fails"); + const rst = new ReplSetTest({ + nodes: [{}, {rsConfig: {votes: 0, priority: 0}}, {rsConfig: {arbiterOnly: true}}], + }); + rst.startSet(); + rst.initiateWithHighElectionTimeout(); + + const primary = rst.getPrimary(); + assertVoteCount(primary, { + votingMembersCount: 2, + majorityVoteCount: 2, + writableVotingMembersCount: 1, + writeMajorityCount: 1, + totalMembersCount: 3, + }); + + const config = rst.getReplSetConfigFromNode(); + jsTestLog("Original config: " + tojson(config)); + + // Modify the secondary to have {votes: 1, priority: 1}. This will also fail the reconfig. + config.members[1].votes = 1; + config.members[1].priority = 1; + jsTestLog( + `New config with secondary reconfigured to have {votes: 1, priority: 1}: + ${tojson(config)}`); + + assert.commandFailedWithCode(primary.adminCommand({replSetReconfig: config}), + ErrorCodes.NewReplicaSetConfigurationIncompatible); + + // Verify that the vote counts have not changed, since the reconfig did not successfully + // complete. + assertVoteCount(primary, { + votingMembersCount: 2, + majorityVoteCount: 2, + writableVotingMembersCount: 1, + writeMajorityCount: 1, + totalMembersCount: 3, + }); + + rst.stopSet(); +} + +{ + jsTestLog( + "Testing that the correct workflow for converting a replica set with only one writable voting node to a PSA architecture succeeds"); + const rst = new ReplSetTest({ + nodes: [{}, {rsConfig: {arbiterOnly: true}}], + }); + rst.startSet(); + rst.initiateWithHighElectionTimeout(); + + const primary = rst.getPrimary(); + assertVoteCount(primary, { + votingMembersCount: 2, + majorityVoteCount: 2, + writableVotingMembersCount: 1, + writeMajorityCount: 1, + totalMembersCount: 2, + }); + + let config = rst.getReplSetConfigFromNode(); + jsTestLog("Original config: " + tojson(config)); + + // First, add the secondary with {priority: 0}, so that it is not electable. + rst.add({rsConfig: {votes: 1, priority: 0}}); + const newConfig = rst.getReplSetConfig(); + config.members = newConfig.members; + config.version += 1; + jsTestLog(`Reconfiguring set to add a secondary with {votes: 1: priority: 0. New config: ${ + tojson(config)}`); + assert.commandWorked(primary.adminCommand({replSetReconfig: config})); + rst.awaitReplication(); + + assertVoteCount(primary, { + votingMembersCount: 3, + majorityVoteCount: 2, + writableVotingMembersCount: 2, + writeMajorityCount: 2, + totalMembersCount: 3 + }); + + // Second, give the secondary a non-zero priority level. + config = rst.getReplSetConfigFromNode(); + config.members[1].priority = 1; + config.version += 1; + jsTestLog(`Reconfiguring set to give the secondary a positive priority. New config: ${ + tojson(config)}`); + assert.commandWorked(primary.adminCommand({replSetReconfig: config})); + rst.awaitReplication(); + + assertVoteCount(primary, { + votingMembersCount: 3, + majorityVoteCount: 2, + writableVotingMembersCount: 2, + writeMajorityCount: 2, + totalMembersCount: 3 + }); + + rst.stopSet(); +} +})(); diff --git a/src/mongo/db/repl/repl_server_parameters.idl b/src/mongo/db/repl/repl_server_parameters.idl index 2e06b7101f7..607141d502b 100644 --- a/src/mongo/db/repl/repl_server_parameters.idl +++ b/src/mongo/db/repl/repl_server_parameters.idl @@ -335,3 +335,13 @@ server_parameters: cpp_vartype: AtomicWord<bool> cpp_varname: gStoreFindAndModifyImagesInSideCollection default: false + + enableReconfigRollbackCommittedWritesCheck: + description: >- + Enables the reconfig check to ensure that committed writes cannot be rolled back in + sets with arbiters. Enabled by default. Test-only. + test_only: true + set_at: [ startup, runtime ] + cpp_vartype: AtomicWord<bool> + cpp_varname: enableReconfigRollbackCommittedWritesCheck + default: true diff --git a/src/mongo/db/repl/repl_set_config.h b/src/mongo/db/repl/repl_set_config.h index 257fb9b11f2..d4a1c262955 100644 --- a/src/mongo/db/repl/repl_set_config.h +++ b/src/mongo/db/repl/repl_set_config.h @@ -489,6 +489,13 @@ public: */ bool containsArbiter() const; + /** + * Returns true if the config consists of a Primary-Secondary-Arbiter (PSA) architecture. + */ + bool isPSASet() const { + return getNumMembers() == 3 && getNumDataBearingMembers() == 2; + } + private: /** * Parses the "settings" subdocument of a replica set configuration. diff --git a/src/mongo/db/repl/repl_set_config_checks.cpp b/src/mongo/db/repl/repl_set_config_checks.cpp index 0462c840fbc..d5a2e2d7364 100644 --- a/src/mongo/db/repl/repl_set_config_checks.cpp +++ b/src/mongo/db/repl/repl_set_config_checks.cpp @@ -259,6 +259,50 @@ Status validateOldAndNewConfigsCompatible(const ReplSetConfig& oldConfig, } } } + + if (!enableReconfigRollbackCommittedWritesCheck.load()) { + // Skip the following check. This parameter can only be set to false in tests. + return Status::OK(); + } + + const int numVotersOldConfig = std::count_if(oldConfig.membersBegin(), + oldConfig.membersEnd(), + [](const auto& x) { return x.isVoter(); }); + const int numArbitersOldConfig = std::count_if(oldConfig.membersBegin(), + oldConfig.membersEnd(), + [](const auto& x) { return x.isArbiter(); }); + const int majorityVoteCountOldConfig = numVotersOldConfig / 2 + 1; + const int writableVotingMembersCountOldConfig = numVotersOldConfig - numArbitersOldConfig; + + // An overlap between an election and write quorum is guaranteed to exist if the number of + // writable voting members is greater than or equal to the majority of voters. This is because + // at least one writable voting member will be a part of the majority in any election. This + // overlap is important so that if a candidate node that has not replicated recently committed + // writes decides to run for election, the writable voting member participating in the election + // will not vote for the candidate. As a result, the candidate cannot successfully become the + // primary. + const auto overlapBetweenElectionAndWriteQuorumOldConfig = + majorityVoteCountOldConfig <= writableVotingMembersCountOldConfig; + const auto numElectableNodesNewConfig = std::count_if( + newConfig.membersBegin(), + newConfig.membersEnd(), + // Use 'getBasePriority()' since newly added nodes also temporarily have 'priority: 0'. + [](const MemberConfig& mem) { return mem.getPriority() > 0.0; }); + + // If the aforementioned overlap doesn't exist, and we have a PSA set where the secondary can + // run for election, there is a risk that the secondary will not have replicated recent majority + // committed writes, but will be elected primary with the help of the arbiter. To prevent this + // from happening,, we fail the reconfig and refer the user to the appropriate next steps. + if (!overlapBetweenElectionAndWriteQuorumOldConfig && newConfig.isPSASet() && + numElectableNodesNewConfig > 1) { + return Status( + ErrorCodes::NewReplicaSetConfigurationIncompatible, + // TODO (SERVER-56801): Add placeholder link. + str::stream() + << "Rejecting reconfig where the new config has a PSA topology and the secondary " + "is electable, but the old config contains only one writable node"); + } + return Status::OK(); } } // namespace diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 66c36a993dd..d12ed99f13a 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -581,8 +581,7 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig( myIndex = StatusWith<int>(-1); } - if (serverGlobalParams.enableMajorityReadConcern && localConfig.getNumMembers() == 3 && - localConfig.getNumDataBearingMembers() == 2) { + if (serverGlobalParams.enableMajorityReadConcern && localConfig.isPSASet()) { LOGV2_OPTIONS(21315, {logv2::LogTag::kStartupWarnings}, ""); LOGV2_OPTIONS( 21316, @@ -3338,7 +3337,8 @@ Status ReplicationCoordinatorImpl::doReplSetReconfig(OperationContext* opCtx, "replSetReconfig got {error} while validating {newConfig}", "replSetReconfig error while validating new config", "error"_attr = validateStatus, - "newConfig"_attr = newConfigObj); + "newConfig"_attr = newConfigObj, + "oldConfig"_attr = oldConfigObj); return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, validateStatus.reason()); } diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index db481bd0b4f..f15d13a4839 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -1368,6 +1368,57 @@ var ReplSetTest = function(opts) { }); } + asCluster(self.nodes, () => { + for (let node of self.nodes) { + // asCluster() currently does not validate connections with X509 authentication. + // If the test is using X509, we skip disabling the server parameter as the + // 'setParameter' command will fail. + const nodeId = "n" + self.getNodeId(node); + const nodeOptions = self.nodeOptions[nodeId] || {}; + const options = + (nodeOptions === {} || !self.startOptions) ? nodeOptions : self.startOptions; + const authMode = options.clusterAuthMode; + const notX509 = + authMode != "sendX509" && authMode != "x509" && authMode != "sendKeyFile"; + + // We should only be checking the binary version if we are not using X509 auth, + // as any server command will fail if the 'authMode' is X509. + if (notX509) { + let serverStatus; + try { + serverStatus = + assert.commandWorked(node.getDB("admin").runCommand({serverStatus: 1})); + } catch (e) { + // If we are not authorized, skip resetting the flag to enable reconfig + // checks. This is safe as we should have sufficient coverage across + // non-auth tests. + assert.eq(ErrorCodes.Unauthorized, e.code, tojson(e)); + continue; + } + + const currVersion = serverStatus.version; + const binVersionLatest = + MongoRunner.areBinVersionsTheSame(MongoRunner.getBinVersionFor(currVersion), + MongoRunner.getBinVersionFor("latest")); + + // Only set the following server parameters for nodes running on the latest + // binary version. + if (!binVersionLatest) { + continue; + } + + // Re-enable the reconfig check to ensure that committed writes cannot be rolled + // back. We disabled this check during initialization to ensure that replica + // sets will not fail to start up. + if (jsTestOptions().enableTestCommands && + !jsTestOptions().networkErrorAndTxnOverrideConfig.retryOnNetworkErrors) { + assert.commandWorked(node.adminCommand( + {setParameter: 1, enableReconfigRollbackCommittedWritesCheck: true})); + } + } + } + }); + const awaitTsStart = new Date(); // Measure duration of awaitLastStableRecoveryTimestamp. if (!doNotWaitForStableRecoveryTimestamp) { // Speed up the polling interval so we can detect recovery timestamps more quickly. @@ -2835,6 +2886,12 @@ var ReplSetTest = function(opts) { options.setParameter.numInitialSyncConnectAttempts = options.setParameter.numInitialSyncConnectAttempts || 60; + // Disable a check in reconfig that will prevent certain configs with arbiters from + // spinning up. We will re-enable this check after the replica set has finished initiating. + if (jsTestOptions().enableTestCommands) { + options.setParameter.enableReconfigRollbackCommittedWritesCheck = false; + } + if (tojson(options) != tojson({})) printjson(options); diff --git a/src/mongo/shell/servers.js b/src/mongo/shell/servers.js index b5ee35bab4d..002bc1fce21 100644 --- a/src/mongo/shell/servers.js +++ b/src/mongo/shell/servers.js @@ -679,6 +679,7 @@ MongoRunner.mongodOptions = function(opts = {}) { _removeSetParameterIfBeforeVersion(opts, "numInitialSyncAttempts", "3.3.12"); _removeSetParameterIfBeforeVersion(opts, "numInitialSyncConnectAttempts", "3.3.12"); _removeSetParameterIfBeforeVersion(opts, "migrationLockAcquisitionMaxWaitMS", "4.1.7"); + _removeSetParameterIfBeforeVersion(opts, "enableReconfigRollbackCommittedWritesCheck", "4.4.7"); if (!opts.logFile && opts.useLogFiles) { opts.logFile = opts.dbpath + "/mongod.log"; diff --git a/src/mongo/shell/utils.js b/src/mongo/shell/utils.js index 14170901732..a65ad7cb933 100644 --- a/src/mongo/shell/utils.js +++ b/src/mongo/shell/utils.js @@ -1445,35 +1445,42 @@ _awaitRSHostViaRSMonitor = function(hostAddr, desiredState, rsName, timeout) { rs.help = function() { print( - "\trs.status() { replSetGetStatus : 1 } checks repl set status"); + "\trs.status() { replSetGetStatus : 1 } checks repl set status"); print( - "\trs.initiate() { replSetInitiate : null } initiates set with default settings"); + "\trs.initiate() { replSetInitiate : null } initiates set with default settings"); print( - "\trs.initiate(cfg) { replSetInitiate : cfg } initiates set with configuration cfg"); + "\trs.initiate(cfg) { replSetInitiate : cfg } initiates set with configuration cfg"); print( - "\trs.conf() get the current configuration object from local.system.replset"); + "\trs.conf() get the current configuration object from local.system.replset"); print( - "\trs.reconfig(cfg) updates the configuration of a running replica set with cfg (disconnects)"); + "\trs.reconfig(cfg, opts) updates the configuration of a running replica set with cfg, using the given opts (disconnects)"); print( - "\trs.add(hostportstr) add a new member to the set with default attributes (disconnects)"); + "\trs.reconfigForPSASet(memberIndex, cfg, opts) updates the configuration of a Primary-Secondary-Arbiter (PSA) replica set while preserving majority writes"); print( - "\trs.add(membercfgobj) add a new member to the set with extra attributes (disconnects)"); + "\t memberIndex: index of the node being updated; cfg: the desired new config; opts: options passed in with the reconfig"); + // TODO (SERVER-56801): Add placeholder link. print( - "\trs.addArb(hostportstr) add a new member which is arbiterOnly:true (disconnects)"); - print("\trs.stepDown([stepdownSecs, catchUpSecs]) step down as primary (disconnects)"); + "\t Not to be used with every configuration"); print( - "\trs.syncFrom(hostportstr) make a secondary sync from the given member"); + "\trs.add(hostportstr) add a new member to the set with default attributes (disconnects)"); print( - "\trs.freeze(secs) make a node ineligible to become primary for the time specified"); + "\trs.add(membercfgobj) add a new member to the set with extra attributes (disconnects)"); print( - "\trs.remove(hostportstr) remove a host from the replica set (disconnects)"); - print("\trs.secondaryOk() allow queries on secondary nodes"); + "\trs.addArb(hostportstr) add a new member which is arbiterOnly:true (disconnects)"); + print("\trs.stepDown([stepdownSecs, catchUpSecs]) step down as primary (disconnects)"); + print( + "\trs.syncFrom(hostportstr) make a secondary sync from the given member"); + print( + "\trs.freeze(secs) make a node ineligible to become primary for the time specified"); + print( + "\trs.remove(hostportstr) remove a host from the replica set (disconnects)"); + print("\trs.secondaryOk() allow queries on secondary nodes"); print(); - print("\trs.printReplicationInfo() check oplog size and time range"); + print("\trs.printReplicationInfo() check oplog size and time range"); print( - "\trs.printSecondaryReplicationInfo() check replica set members and replication lag"); - print("\tdb.isMaster() check who is primary"); - print("\tdb.hello() check who is primary"); + "\trs.printSecondaryReplicationInfo() check replica set members and replication lag"); + print("\tdb.isMaster() check who is primary"); + print("\tdb.hello() check who is primary"); print(); print("\treconfiguration helpers disconnect from the database so the shell will display"); print("\tan error, even if the command succeeds."); @@ -1543,6 +1550,22 @@ rs.reconfig = function(cfg, options) { } return this._runCmd(cmd); }; +rs.reconfigForPSASet = function(memberIndex, cfg, options) { + const memberPriority = cfg.members[memberIndex].priority; + print( + `Running first reconfig to give member at index ${memberIndex} { votes: 1, priority: 0 }`); + cfg.members[memberIndex].votes = 1; + cfg.members[memberIndex].priority = 0; + const res = rs.reconfig(cfg, options); + if (!res.ok) { + return res; + } + + print(`Running second reconfig to give member at index ${memberIndex} { priority: ${ + memberPriority} }`); + cfg.members[memberIndex].priority = memberPriority; + return rs.reconfig(cfg, options); +}; rs.add = function(hostport, arb) { var cfg = hostport; |