summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXueruiFa <xuerui.fa@mongodb.com>2021-05-03 17:35:34 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-09-22 15:29:57 +0000
commit25d41df0440ea5bc136f255f980816f506c0d72a (patch)
tree6cbf41c035db5d08891ca55f46a01d0cde5c3b8f
parent75e59101ebc9d8a4633b270a3fd44d6e2058a1b9 (diff)
downloadmongo-25d41df0440ea5bc136f255f980816f506c0d72a.tar.gz
SERVER-55376: Ensure reconfig cannot roll back committed writes in PSA sets
(cherry picked from commit b5261275423215d567599f3d9862416f09e05aa2)
-rw-r--r--jstests/noPassthrough/reconfig_for_psa_set_shell.js42
-rw-r--r--jstests/replsets/reconfig_avoids_rolling_back_committed_writes_in_psa_sets.js133
-rw-r--r--jstests/replsets/rslib.js16
-rw-r--r--jstests/replsets/unsafe_reconfig_to_psa_set_fails.js169
-rw-r--r--src/mongo/db/repl/repl_server_parameters.idl10
-rw-r--r--src/mongo/db/repl/repl_set_config.h7
-rw-r--r--src/mongo/db/repl/repl_set_config_checks.cpp44
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp6
-rw-r--r--src/mongo/shell/replsettest.js57
-rw-r--r--src/mongo/shell/servers.js1
-rw-r--r--src/mongo/shell/utils.js57
11 files changed, 522 insertions, 20 deletions
diff --git a/jstests/noPassthrough/reconfig_for_psa_set_shell.js b/jstests/noPassthrough/reconfig_for_psa_set_shell.js
new file mode 100644
index 00000000000..2b0ba125432
--- /dev/null
+++ b/jstests/noPassthrough/reconfig_for_psa_set_shell.js
@@ -0,0 +1,42 @@
+/**
+ * Tests the 'reconfigForPSASet()' shell function and makes sure that reconfig will succeed while
+ * preserving majority reads.
+ *
+ * @tags: [requires_journaling]
+ */
+
+(function() {
+'use strict';
+
+// Start up a PSA set with the secondary having 'votes: 0' and 'priority: 0'.
+const rst = new ReplSetTest({
+ name: jsTestName(),
+ nodes: [{}, {rsConfig: {votes: 0, priority: 0}}, {rsConfig: {arbiterOnly: true}}],
+});
+rst.startSet();
+rst.initiateWithHighElectionTimeout();
+
+const primary = rst.getPrimary();
+assert.eq(primary, rst.nodes[0], "the primary should be the node at index 0");
+
+// Verify that a reconfig that directly gives the secondary 'votes: 1' and 'priority: 1' will fail.
+const config = rst.getReplSetConfigFromNode();
+config.members[1].votes = 1;
+config.members[1].priority = 1;
+
+let reconfigScript = `assert.commandFailedWithCode(rs.reconfig(${
+ tojson(config)}), ErrorCodes.NewReplicaSetConfigurationIncompatible)`;
+let result = runMongoProgram('mongo', '--port', primary.port, '--eval', reconfigScript);
+assert.eq(0, result, `reconfig did not fail with expected error code`);
+
+// Verify that calling 'reconfigForPSASet()' will succeed.
+reconfigScript = `assert.commandWorked(rs.reconfigForPSASet(1, ${tojson(config)}))`;
+result = runMongoProgram('mongo', '--port', primary.port, '--eval', reconfigScript);
+assert.eq(0, result, `reconfig did not succeed as expected`);
+
+const replSetGetConfig = assert.commandWorked(primary.adminCommand({replSetGetConfig: 1})).config;
+assert.eq(1, replSetGetConfig.members[1].votes);
+assert.eq(1, replSetGetConfig.members[1].priority);
+
+rst.stopSet();
+})();
diff --git a/jstests/replsets/reconfig_avoids_rolling_back_committed_writes_in_psa_sets.js b/jstests/replsets/reconfig_avoids_rolling_back_committed_writes_in_psa_sets.js
new file mode 100644
index 00000000000..29603ee32ee
--- /dev/null
+++ b/jstests/replsets/reconfig_avoids_rolling_back_committed_writes_in_psa_sets.js
@@ -0,0 +1,133 @@
+/**
+ * Tests the correct workflow for adding a voting electable node in a PSA set and ensures that no
+ * committed writes will be rolled back after that workflow. We start with a PSA set. We shut down
+ * the secondary and reconfigure it to have votes 0. Then, we do a majority write that will commit
+ * only on the primary, so that the secondary is now missing a majority committed write. Next, we
+ * test the correct workflow, which involves two reconfigs:
+ *
+ * 1) Give the secondary votes: 1 but priority: 0. This will not allow the stale secondary to run
+ * for election
+ *
+ * 2) Increase the priority on the secondary. With this reconfig, because of the Oplog
+ * Committment rule, the secondary must have the previously committed write, and so it can safely
+ * become the primary
+ *
+ * Finally, we step up the secondary and verify that the oplog entry was not rolled back.
+ *
+ * @tags: [requires_fcv_44]
+ */
+
+(function() {
+"use strict";
+load("jstests/replsets/rslib.js");
+load("jstests/libs/write_concern_util.js");
+
+const rst = new ReplSetTest({
+ name: jsTestName(),
+ nodes: [{}, {}, {rsConfig: {arbiterOnly: true}}],
+});
+rst.startSet();
+rst.initiateWithHighElectionTimeout();
+
+const collName = jsTestName();
+const primary = rst.getPrimary();
+assert.eq(primary, rst.nodes[0], "the primary should be the node at index 0");
+
+const testDb = primary.getDB("test");
+assert.commandWorked(testDb[collName].insert({a: 1}, {writeConcern: {w: "majority"}}));
+
+assertVoteCount(primary, {
+ votingMembersCount: 3,
+ majorityVoteCount: 2,
+ writableVotingMembersCount: 2,
+ writeMajorityCount: 2,
+ totalMembersCount: 3,
+});
+
+jsTestLog("Stop the secondary, which should be node 1");
+rst.stop(1);
+
+jsTestLog("Do a majority write that fails waiting for write concern");
+let res = testDb.runCommand(
+ {insert: collName, documents: [{a: 2}], writeConcern: {w: "majority", wtimeout: 3 * 1000}});
+assert.commandWorkedIgnoringWriteConcernErrors(res);
+checkWriteConcernTimedOut(res);
+
+// In config C0, the secondary will have 'votes: 0' and 'priority: 0'.
+let config = rst.getReplSetConfigFromNode();
+jsTestLog("Original config: " + tojson(config));
+config.members[1].votes = 0;
+config.members[1].priority = 0;
+config.version += 1;
+jsTestLog("Reconfiguring set to remove the secondary's vote. Config C0: " + tojson(config));
+assert.commandWorked(primary.adminCommand({replSetReconfig: config}));
+
+assertVoteCount(primary, {
+ votingMembersCount: 2,
+ majorityVoteCount: 2,
+ writableVotingMembersCount: 1,
+ writeMajorityCount: 1,
+ totalMembersCount: 3,
+});
+
+jsTestLog("Do a majority write that succeeds");
+// The secondary will not have this write because it was shut down.
+assert.commandWorked(testDb[collName].insert({a: 3}, {writeConcern: {w: "majority"}}));
+
+// At this point, the majority vote count is greater than the writable voting members count, since
+// the secondary still has 'votes: 0'. This indicates that there may not be an overlap between the
+// election quorum and the write quorum.
+
+// As a result, if we make the secondary a voter AND electable in a future reconfig, it is possible
+// for the secondary to be elected without the recent majority committed write. To avoid this, when
+// making the secondary a voting node again, first configure the secondary to have 'priority: 0', so
+// that it is not electable. Label this config 'C1'.
+config = rst.getReplSetConfigFromNode(primary.nodeId);
+config.members[1].votes = 1;
+config.members[1].priority = 0;
+config.version += 1;
+jsTestLog(
+ "Reconfiguring set to re-enable the secondary's vote and make it unelectable. Config C1: " +
+ tojson(config));
+assert.commandWorked(primary.adminCommand({replSetReconfig: config}));
+
+// The next reconfig, C2, will increase the priority of the secondary, so that it can
+// run for election. This is safe due to the Oplog Committment rule, which guarantees that anything
+// committed in C0 will also be committed in C1.
+config = rst.getReplSetConfigFromNode(primary.nodeId);
+config.members[1].priority = 1;
+config.version += 1;
+jsTestLog("Reconfiguring set to allow the secondary to run for election. Config C2: " +
+ tojson(config));
+
+// Since the secondary is currently down, this reconfig will hang on waiting for the previous
+// majority write to be committed in the current config, C1.
+assert.commandFailedWithCode(primary.adminCommand({replSetReconfig: config, maxTimeMS: 3 * 1000}),
+ ErrorCodes.CurrentConfigNotCommittedYet);
+
+// After restarting the secondary, this reconfig should succeed.
+jsTestLog("Restarting the secondary");
+rst.restart(1);
+assert.commandWorked(primary.adminCommand({replSetReconfig: config}));
+
+assertVoteCount(primary, {
+ votingMembersCount: 3,
+ majorityVoteCount: 2,
+ writableVotingMembersCount: 2,
+ writeMajorityCount: 2,
+ totalMembersCount: 3,
+});
+
+jsTestLog("Stepping up the secondary");
+assert.soonNoExcept(() => {
+ assert.commandWorked(rst.nodes[1].adminCommand({replSetStepUp: 1}));
+ assert.eq(rst.getPrimary(), rst.nodes[1]);
+ return true;
+});
+
+// Verify that the committed write was not rolled back.
+assert.eq(rst.nodes[0].getDB("test")[collName].find({a: 3}).itcount(), 1);
+assert.eq(rst.nodes[1].getDB("test")[collName].find({a: 3}).itcount(), 1);
+
+rst.stopSet();
+})();
diff --git a/jstests/replsets/rslib.js b/jstests/replsets/rslib.js
index 5ea11ccc00d..fc7b4fad1f5 100644
--- a/jstests/replsets/rslib.js
+++ b/jstests/replsets/rslib.js
@@ -21,6 +21,7 @@ var clearFailPoint;
var isConfigCommitted;
var waitForConfigReplication;
var assertSameConfigContent;
+var assertVoteCount;
(function() {
"use strict";
@@ -738,4 +739,19 @@ assertSameConfigContent = function(configA, configB) {
configB.version = versionB;
configB.term = termB;
};
+
+assertVoteCount = function(node, {
+ votingMembersCount,
+ majorityVoteCount,
+ writableVotingMembersCount,
+ writeMajorityCount,
+ totalMembersCount
+}) {
+ const status = assert.commandWorked(node.adminCommand({replSetGetStatus: 1}));
+ assert.eq(status["votingMembersCount"], votingMembersCount, status);
+ assert.eq(status["majorityVoteCount"], majorityVoteCount, status);
+ assert.eq(status["writableVotingMembersCount"], writableVotingMembersCount, status);
+ assert.eq(status["writeMajorityCount"], writeMajorityCount, status);
+ assert.eq(status["members"].length, totalMembersCount, status);
+};
}());
diff --git a/jstests/replsets/unsafe_reconfig_to_psa_set_fails.js b/jstests/replsets/unsafe_reconfig_to_psa_set_fails.js
new file mode 100644
index 00000000000..165489f7e33
--- /dev/null
+++ b/jstests/replsets/unsafe_reconfig_to_psa_set_fails.js
@@ -0,0 +1,169 @@
+/**
+ * Asserts that a reconfig from a replica set with one writable voting node to a
+ * Primary-Secondary-Arbiter (PSA) topology fails if the secondary is electable. We test two
+ * reconfig scenarios, both of which should fail:
+ *
+ * 1) PA set to PSA set
+ * 2) PSA set with S having {votes: 0, priority: 0} to S with {votes: 1, priority: 1}
+ *
+ * Finally, we test the correct workflow for converting a replica set with only one writable voting
+ * node to a PSA architecture. This involves running two reconfigs. The first reconfig should
+ * add/configure the secondary to have {votes: 1, priority: 0}, to prevent it from being electable.
+ * The second reconfig should then increase its priority to the desired level.
+ *
+ * @tags: [requires_fcv_44]
+ */
+
+(function() {
+"use strict";
+load("jstests/replsets/rslib.js");
+
+{
+ jsTestLog("Testing reconfig from PA set to PSA set fails");
+ const rst = new ReplSetTest({
+ name: jsTestName(),
+ nodes: [{}, {rsConfig: {arbiterOnly: true}}],
+ });
+ rst.startSet();
+ rst.initiateWithHighElectionTimeout();
+
+ const primary = rst.getPrimary();
+ assertVoteCount(primary, {
+ votingMembersCount: 2,
+ majorityVoteCount: 2,
+ writableVotingMembersCount: 1,
+ writeMajorityCount: 1,
+ totalMembersCount: 2,
+ });
+
+ const config = rst.getReplSetConfigFromNode();
+ jsTestLog("Original config: " + tojson(config));
+
+ // This new node will be a secondary with {votes: 1, priority: 1}, which should not be able to
+ // be added in reconfig if the new topology has a PSA architecture.
+ rst.add({});
+ const newConfig = rst.getReplSetConfig();
+ config.members = newConfig.members;
+ config.version += 1;
+ jsTestLog(`New config with secondary added: ${tojson(config)}`);
+
+ assert.commandFailedWithCode(primary.adminCommand({replSetReconfig: config}),
+ ErrorCodes.NewReplicaSetConfigurationIncompatible);
+
+ // Verify that the vote counts have not changed, since the reconfig did not successfully
+ // complete.
+ assertVoteCount(primary, {
+ votingMembersCount: 2,
+ majorityVoteCount: 2,
+ writableVotingMembersCount: 1,
+ writeMajorityCount: 1,
+ totalMembersCount: 2,
+ });
+
+ // The node was not successfully added to the config, so we should not run validation checks on
+ // it when we shut down the replica set.
+ rst.stopSet(null, null, {skipCheckDBHashes: true});
+}
+
+{
+ jsTestLog("Testing reconfig to remove {votes: 0} from secondary in PSA set fails");
+ const rst = new ReplSetTest({
+ nodes: [{}, {rsConfig: {votes: 0, priority: 0}}, {rsConfig: {arbiterOnly: true}}],
+ });
+ rst.startSet();
+ rst.initiateWithHighElectionTimeout();
+
+ const primary = rst.getPrimary();
+ assertVoteCount(primary, {
+ votingMembersCount: 2,
+ majorityVoteCount: 2,
+ writableVotingMembersCount: 1,
+ writeMajorityCount: 1,
+ totalMembersCount: 3,
+ });
+
+ const config = rst.getReplSetConfigFromNode();
+ jsTestLog("Original config: " + tojson(config));
+
+ // Modify the secondary to have {votes: 1, priority: 1}. This will also fail the reconfig.
+ config.members[1].votes = 1;
+ config.members[1].priority = 1;
+ jsTestLog(
+ `New config with secondary reconfigured to have {votes: 1, priority: 1}:
+ ${tojson(config)}`);
+
+ assert.commandFailedWithCode(primary.adminCommand({replSetReconfig: config}),
+ ErrorCodes.NewReplicaSetConfigurationIncompatible);
+
+ // Verify that the vote counts have not changed, since the reconfig did not successfully
+ // complete.
+ assertVoteCount(primary, {
+ votingMembersCount: 2,
+ majorityVoteCount: 2,
+ writableVotingMembersCount: 1,
+ writeMajorityCount: 1,
+ totalMembersCount: 3,
+ });
+
+ rst.stopSet();
+}
+
+{
+ jsTestLog(
+ "Testing that the correct workflow for converting a replica set with only one writable voting node to a PSA architecture succeeds");
+ const rst = new ReplSetTest({
+ nodes: [{}, {rsConfig: {arbiterOnly: true}}],
+ });
+ rst.startSet();
+ rst.initiateWithHighElectionTimeout();
+
+ const primary = rst.getPrimary();
+ assertVoteCount(primary, {
+ votingMembersCount: 2,
+ majorityVoteCount: 2,
+ writableVotingMembersCount: 1,
+ writeMajorityCount: 1,
+ totalMembersCount: 2,
+ });
+
+ let config = rst.getReplSetConfigFromNode();
+ jsTestLog("Original config: " + tojson(config));
+
+ // First, add the secondary with {priority: 0}, so that it is not electable.
+ rst.add({rsConfig: {votes: 1, priority: 0}});
+ const newConfig = rst.getReplSetConfig();
+ config.members = newConfig.members;
+ config.version += 1;
+ jsTestLog(`Reconfiguring set to add a secondary with {votes: 1: priority: 0. New config: ${
+ tojson(config)}`);
+ assert.commandWorked(primary.adminCommand({replSetReconfig: config}));
+ rst.awaitReplication();
+
+ assertVoteCount(primary, {
+ votingMembersCount: 3,
+ majorityVoteCount: 2,
+ writableVotingMembersCount: 2,
+ writeMajorityCount: 2,
+ totalMembersCount: 3
+ });
+
+ // Second, give the secondary a non-zero priority level.
+ config = rst.getReplSetConfigFromNode();
+ config.members[1].priority = 1;
+ config.version += 1;
+ jsTestLog(`Reconfiguring set to give the secondary a positive priority. New config: ${
+ tojson(config)}`);
+ assert.commandWorked(primary.adminCommand({replSetReconfig: config}));
+ rst.awaitReplication();
+
+ assertVoteCount(primary, {
+ votingMembersCount: 3,
+ majorityVoteCount: 2,
+ writableVotingMembersCount: 2,
+ writeMajorityCount: 2,
+ totalMembersCount: 3
+ });
+
+ rst.stopSet();
+}
+})();
diff --git a/src/mongo/db/repl/repl_server_parameters.idl b/src/mongo/db/repl/repl_server_parameters.idl
index 2e06b7101f7..607141d502b 100644
--- a/src/mongo/db/repl/repl_server_parameters.idl
+++ b/src/mongo/db/repl/repl_server_parameters.idl
@@ -335,3 +335,13 @@ server_parameters:
cpp_vartype: AtomicWord<bool>
cpp_varname: gStoreFindAndModifyImagesInSideCollection
default: false
+
+ enableReconfigRollbackCommittedWritesCheck:
+ description: >-
+ Enables the reconfig check to ensure that committed writes cannot be rolled back in
+ sets with arbiters. Enabled by default. Test-only.
+ test_only: true
+ set_at: [ startup, runtime ]
+ cpp_vartype: AtomicWord<bool>
+ cpp_varname: enableReconfigRollbackCommittedWritesCheck
+ default: true
diff --git a/src/mongo/db/repl/repl_set_config.h b/src/mongo/db/repl/repl_set_config.h
index 257fb9b11f2..d4a1c262955 100644
--- a/src/mongo/db/repl/repl_set_config.h
+++ b/src/mongo/db/repl/repl_set_config.h
@@ -489,6 +489,13 @@ public:
*/
bool containsArbiter() const;
+ /**
+ * Returns true if the config consists of a Primary-Secondary-Arbiter (PSA) architecture.
+ */
+ bool isPSASet() const {
+ return getNumMembers() == 3 && getNumDataBearingMembers() == 2;
+ }
+
private:
/**
* Parses the "settings" subdocument of a replica set configuration.
diff --git a/src/mongo/db/repl/repl_set_config_checks.cpp b/src/mongo/db/repl/repl_set_config_checks.cpp
index 0462c840fbc..d5a2e2d7364 100644
--- a/src/mongo/db/repl/repl_set_config_checks.cpp
+++ b/src/mongo/db/repl/repl_set_config_checks.cpp
@@ -259,6 +259,50 @@ Status validateOldAndNewConfigsCompatible(const ReplSetConfig& oldConfig,
}
}
}
+
+ if (!enableReconfigRollbackCommittedWritesCheck.load()) {
+ // Skip the following check. This parameter can only be set to false in tests.
+ return Status::OK();
+ }
+
+ const int numVotersOldConfig = std::count_if(oldConfig.membersBegin(),
+ oldConfig.membersEnd(),
+ [](const auto& x) { return x.isVoter(); });
+ const int numArbitersOldConfig = std::count_if(oldConfig.membersBegin(),
+ oldConfig.membersEnd(),
+ [](const auto& x) { return x.isArbiter(); });
+ const int majorityVoteCountOldConfig = numVotersOldConfig / 2 + 1;
+ const int writableVotingMembersCountOldConfig = numVotersOldConfig - numArbitersOldConfig;
+
+ // An overlap between an election and write quorum is guaranteed to exist if the number of
+ // writable voting members is greater than or equal to the majority of voters. This is because
+ // at least one writable voting member will be a part of the majority in any election. This
+ // overlap is important so that if a candidate node that has not replicated recently committed
+ // writes decides to run for election, the writable voting member participating in the election
+ // will not vote for the candidate. As a result, the candidate cannot successfully become the
+ // primary.
+ const auto overlapBetweenElectionAndWriteQuorumOldConfig =
+ majorityVoteCountOldConfig <= writableVotingMembersCountOldConfig;
+ const auto numElectableNodesNewConfig = std::count_if(
+ newConfig.membersBegin(),
+ newConfig.membersEnd(),
+ // Use 'getBasePriority()' since newly added nodes also temporarily have 'priority: 0'.
+ [](const MemberConfig& mem) { return mem.getPriority() > 0.0; });
+
+ // If the aforementioned overlap doesn't exist, and we have a PSA set where the secondary can
+ // run for election, there is a risk that the secondary will not have replicated recent majority
+ // committed writes, but will be elected primary with the help of the arbiter. To prevent this
+ // from happening,, we fail the reconfig and refer the user to the appropriate next steps.
+ if (!overlapBetweenElectionAndWriteQuorumOldConfig && newConfig.isPSASet() &&
+ numElectableNodesNewConfig > 1) {
+ return Status(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ // TODO (SERVER-56801): Add placeholder link.
+ str::stream()
+ << "Rejecting reconfig where the new config has a PSA topology and the secondary "
+ "is electable, but the old config contains only one writable node");
+ }
+
return Status::OK();
}
} // namespace
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 66c36a993dd..d12ed99f13a 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -581,8 +581,7 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig(
myIndex = StatusWith<int>(-1);
}
- if (serverGlobalParams.enableMajorityReadConcern && localConfig.getNumMembers() == 3 &&
- localConfig.getNumDataBearingMembers() == 2) {
+ if (serverGlobalParams.enableMajorityReadConcern && localConfig.isPSASet()) {
LOGV2_OPTIONS(21315, {logv2::LogTag::kStartupWarnings}, "");
LOGV2_OPTIONS(
21316,
@@ -3338,7 +3337,8 @@ Status ReplicationCoordinatorImpl::doReplSetReconfig(OperationContext* opCtx,
"replSetReconfig got {error} while validating {newConfig}",
"replSetReconfig error while validating new config",
"error"_attr = validateStatus,
- "newConfig"_attr = newConfigObj);
+ "newConfig"_attr = newConfigObj,
+ "oldConfig"_attr = oldConfigObj);
return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, validateStatus.reason());
}
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index db481bd0b4f..f15d13a4839 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -1368,6 +1368,57 @@ var ReplSetTest = function(opts) {
});
}
+ asCluster(self.nodes, () => {
+ for (let node of self.nodes) {
+ // asCluster() currently does not validate connections with X509 authentication.
+ // If the test is using X509, we skip disabling the server parameter as the
+ // 'setParameter' command will fail.
+ const nodeId = "n" + self.getNodeId(node);
+ const nodeOptions = self.nodeOptions[nodeId] || {};
+ const options =
+ (nodeOptions === {} || !self.startOptions) ? nodeOptions : self.startOptions;
+ const authMode = options.clusterAuthMode;
+ const notX509 =
+ authMode != "sendX509" && authMode != "x509" && authMode != "sendKeyFile";
+
+ // We should only be checking the binary version if we are not using X509 auth,
+ // as any server command will fail if the 'authMode' is X509.
+ if (notX509) {
+ let serverStatus;
+ try {
+ serverStatus =
+ assert.commandWorked(node.getDB("admin").runCommand({serverStatus: 1}));
+ } catch (e) {
+ // If we are not authorized, skip resetting the flag to enable reconfig
+ // checks. This is safe as we should have sufficient coverage across
+ // non-auth tests.
+ assert.eq(ErrorCodes.Unauthorized, e.code, tojson(e));
+ continue;
+ }
+
+ const currVersion = serverStatus.version;
+ const binVersionLatest =
+ MongoRunner.areBinVersionsTheSame(MongoRunner.getBinVersionFor(currVersion),
+ MongoRunner.getBinVersionFor("latest"));
+
+ // Only set the following server parameters for nodes running on the latest
+ // binary version.
+ if (!binVersionLatest) {
+ continue;
+ }
+
+ // Re-enable the reconfig check to ensure that committed writes cannot be rolled
+ // back. We disabled this check during initialization to ensure that replica
+ // sets will not fail to start up.
+ if (jsTestOptions().enableTestCommands &&
+ !jsTestOptions().networkErrorAndTxnOverrideConfig.retryOnNetworkErrors) {
+ assert.commandWorked(node.adminCommand(
+ {setParameter: 1, enableReconfigRollbackCommittedWritesCheck: true}));
+ }
+ }
+ }
+ });
+
const awaitTsStart = new Date(); // Measure duration of awaitLastStableRecoveryTimestamp.
if (!doNotWaitForStableRecoveryTimestamp) {
// Speed up the polling interval so we can detect recovery timestamps more quickly.
@@ -2835,6 +2886,12 @@ var ReplSetTest = function(opts) {
options.setParameter.numInitialSyncConnectAttempts =
options.setParameter.numInitialSyncConnectAttempts || 60;
+ // Disable a check in reconfig that will prevent certain configs with arbiters from
+ // spinning up. We will re-enable this check after the replica set has finished initiating.
+ if (jsTestOptions().enableTestCommands) {
+ options.setParameter.enableReconfigRollbackCommittedWritesCheck = false;
+ }
+
if (tojson(options) != tojson({}))
printjson(options);
diff --git a/src/mongo/shell/servers.js b/src/mongo/shell/servers.js
index b5ee35bab4d..002bc1fce21 100644
--- a/src/mongo/shell/servers.js
+++ b/src/mongo/shell/servers.js
@@ -679,6 +679,7 @@ MongoRunner.mongodOptions = function(opts = {}) {
_removeSetParameterIfBeforeVersion(opts, "numInitialSyncAttempts", "3.3.12");
_removeSetParameterIfBeforeVersion(opts, "numInitialSyncConnectAttempts", "3.3.12");
_removeSetParameterIfBeforeVersion(opts, "migrationLockAcquisitionMaxWaitMS", "4.1.7");
+ _removeSetParameterIfBeforeVersion(opts, "enableReconfigRollbackCommittedWritesCheck", "4.4.7");
if (!opts.logFile && opts.useLogFiles) {
opts.logFile = opts.dbpath + "/mongod.log";
diff --git a/src/mongo/shell/utils.js b/src/mongo/shell/utils.js
index 14170901732..a65ad7cb933 100644
--- a/src/mongo/shell/utils.js
+++ b/src/mongo/shell/utils.js
@@ -1445,35 +1445,42 @@ _awaitRSHostViaRSMonitor = function(hostAddr, desiredState, rsName, timeout) {
rs.help = function() {
print(
- "\trs.status() { replSetGetStatus : 1 } checks repl set status");
+ "\trs.status() { replSetGetStatus : 1 } checks repl set status");
print(
- "\trs.initiate() { replSetInitiate : null } initiates set with default settings");
+ "\trs.initiate() { replSetInitiate : null } initiates set with default settings");
print(
- "\trs.initiate(cfg) { replSetInitiate : cfg } initiates set with configuration cfg");
+ "\trs.initiate(cfg) { replSetInitiate : cfg } initiates set with configuration cfg");
print(
- "\trs.conf() get the current configuration object from local.system.replset");
+ "\trs.conf() get the current configuration object from local.system.replset");
print(
- "\trs.reconfig(cfg) updates the configuration of a running replica set with cfg (disconnects)");
+ "\trs.reconfig(cfg, opts) updates the configuration of a running replica set with cfg, using the given opts (disconnects)");
print(
- "\trs.add(hostportstr) add a new member to the set with default attributes (disconnects)");
+ "\trs.reconfigForPSASet(memberIndex, cfg, opts) updates the configuration of a Primary-Secondary-Arbiter (PSA) replica set while preserving majority writes");
print(
- "\trs.add(membercfgobj) add a new member to the set with extra attributes (disconnects)");
+ "\t memberIndex: index of the node being updated; cfg: the desired new config; opts: options passed in with the reconfig");
+ // TODO (SERVER-56801): Add placeholder link.
print(
- "\trs.addArb(hostportstr) add a new member which is arbiterOnly:true (disconnects)");
- print("\trs.stepDown([stepdownSecs, catchUpSecs]) step down as primary (disconnects)");
+ "\t Not to be used with every configuration");
print(
- "\trs.syncFrom(hostportstr) make a secondary sync from the given member");
+ "\trs.add(hostportstr) add a new member to the set with default attributes (disconnects)");
print(
- "\trs.freeze(secs) make a node ineligible to become primary for the time specified");
+ "\trs.add(membercfgobj) add a new member to the set with extra attributes (disconnects)");
print(
- "\trs.remove(hostportstr) remove a host from the replica set (disconnects)");
- print("\trs.secondaryOk() allow queries on secondary nodes");
+ "\trs.addArb(hostportstr) add a new member which is arbiterOnly:true (disconnects)");
+ print("\trs.stepDown([stepdownSecs, catchUpSecs]) step down as primary (disconnects)");
+ print(
+ "\trs.syncFrom(hostportstr) make a secondary sync from the given member");
+ print(
+ "\trs.freeze(secs) make a node ineligible to become primary for the time specified");
+ print(
+ "\trs.remove(hostportstr) remove a host from the replica set (disconnects)");
+ print("\trs.secondaryOk() allow queries on secondary nodes");
print();
- print("\trs.printReplicationInfo() check oplog size and time range");
+ print("\trs.printReplicationInfo() check oplog size and time range");
print(
- "\trs.printSecondaryReplicationInfo() check replica set members and replication lag");
- print("\tdb.isMaster() check who is primary");
- print("\tdb.hello() check who is primary");
+ "\trs.printSecondaryReplicationInfo() check replica set members and replication lag");
+ print("\tdb.isMaster() check who is primary");
+ print("\tdb.hello() check who is primary");
print();
print("\treconfiguration helpers disconnect from the database so the shell will display");
print("\tan error, even if the command succeeds.");
@@ -1543,6 +1550,22 @@ rs.reconfig = function(cfg, options) {
}
return this._runCmd(cmd);
};
+rs.reconfigForPSASet = function(memberIndex, cfg, options) {
+ const memberPriority = cfg.members[memberIndex].priority;
+ print(
+ `Running first reconfig to give member at index ${memberIndex} { votes: 1, priority: 0 }`);
+ cfg.members[memberIndex].votes = 1;
+ cfg.members[memberIndex].priority = 0;
+ const res = rs.reconfig(cfg, options);
+ if (!res.ok) {
+ return res;
+ }
+
+ print(`Running second reconfig to give member at index ${memberIndex} { priority: ${
+ memberPriority} }`);
+ cfg.members[memberIndex].priority = memberPriority;
+ return rs.reconfig(cfg, options);
+};
rs.add = function(hostport, arb) {
var cfg = hostport;