summaryrefslogtreecommitdiff
path: root/jstests
diff options
context:
space:
mode:
authorWilliam Schultz <william.schultz@mongodb.com>2020-04-14 11:48:15 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-14 16:14:41 +0000
commitfcd1c0aae55c0b713ff329c18435c77867365748 (patch)
tree1db91bad86ca625ba9a06d59b6eefd8e814c678f /jstests
parent8b81217b65fa99c2391d248a879a43749e2f16b4 (diff)
downloadmongo-fcd1c0aae55c0b713ff329c18435c77867365748.tar.gz
SERVER-45575 Add Javascript helper function to do automatic safe reconfigs
(cherry picked from commit b52c7b320bf6e6a031055e611fbb58cc76967352)
Diffstat (limited to 'jstests')
-rw-r--r--jstests/noPassthrough/auto_safe_reconfig_helper_max_voting_nodes.js79
-rw-r--r--jstests/noPassthrough/auto_safe_reconfig_helpers.js185
-rw-r--r--jstests/replsets/libs/rename_across_dbs.js3
-rw-r--r--jstests/replsets/libs/tags.js3
-rw-r--r--jstests/replsets/remove1.js2
-rw-r--r--jstests/replsets/rslib.js218
-rw-r--r--jstests/sharding/primary_config_server_blackholed_from_mongos.js6
7 files changed, 462 insertions, 34 deletions
diff --git a/jstests/noPassthrough/auto_safe_reconfig_helper_max_voting_nodes.js b/jstests/noPassthrough/auto_safe_reconfig_helper_max_voting_nodes.js
new file mode 100644
index 00000000000..92bba2fc789
--- /dev/null
+++ b/jstests/noPassthrough/auto_safe_reconfig_helper_max_voting_nodes.js
@@ -0,0 +1,79 @@
+/**
+ * Test that the 'reconfig' helper function correctly executes reconfigs between configs that have
+ * the maximum number of allowed voting nodes.
+ *
+ * @tags: [requires_replication]
+ */
+(function() {
+"use strict";
+
+// Make secondaries unelectable. Add 7 voting nodes, which is the maximum allowed.
+const replTest = new ReplSetTest({
+ nodes: [
+ {},
+ {rsConfig: {priority: 0}},
+ {rsConfig: {priority: 0}},
+ {rsConfig: {priority: 0}},
+ {rsConfig: {priority: 0}},
+ {rsConfig: {priority: 0}},
+ {rsConfig: {priority: 0}},
+ {rsConfig: {priority: 0, votes: 0}}
+ ]
+});
+replTest.startSet();
+let conf = replTest.getReplSetConfig();
+conf.settings = {
+ // Speed up config propagation.
+ heartbeatIntervalMillis: 100,
+};
+replTest.initiate(conf);
+
+// Start out with config {n0,n1,n2}
+let config = replTest.getReplSetConfigFromNode();
+let origConfig = Object.assign({}, config);
+let [m0, m1, m2, m3, m4, m5, m6, m7] = origConfig.members;
+
+//
+// Test max voting constraint.
+//
+
+jsTestLog("Test max voting constraint.");
+
+// Test making one node non voting and the other voting.
+m6.votes = 0;
+m6.priority = 0;
+m7.votes = 1;
+m7.priority = 1;
+config.members = [m0, m1, m2, m3, m4, m5, m6, m7];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// And test switching the vote back.
+m6.votes = 1;
+m6.priority = 0;
+m7.votes = 0;
+m7.priority = 0;
+config.members = [m0, m1, m2, m3, m4, m5, m6, m7];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Test swapping out a voting member.
+m6.votes = 1;
+m6.priority = 0;
+config.members = [m0, m1, m2, m3, m4, m5, m6];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+m7.votes = 1;
+m7.priority = 1;
+config.members = [m0, m1, m2, m3, m4, m5, m7];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Restore the original config before shutting down.
+m7.votes = 0;
+m7.priority = 0;
+config.members = [m0, m1, m2, m3, m4, m5, m6, m7];
+reconfig(replTest, config);
+replTest.stopSet();
+})();
diff --git a/jstests/noPassthrough/auto_safe_reconfig_helpers.js b/jstests/noPassthrough/auto_safe_reconfig_helpers.js
new file mode 100644
index 00000000000..fc87c512171
--- /dev/null
+++ b/jstests/noPassthrough/auto_safe_reconfig_helpers.js
@@ -0,0 +1,185 @@
+/**
+ * Test that the 'reconfig' helper function correctly executes arbitrary reconfigs.
+ *
+ * @tags: [requires_replication]
+ */
+(function() {
+"use strict";
+
+// Make secondaries unelectable.
+const replTest =
+ new ReplSetTest({nodes: [{}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0}}]});
+replTest.startSet();
+let conf = replTest.getReplSetConfig();
+conf.settings = {
+ // Speed up config propagation.
+ heartbeatIntervalMillis: 100,
+};
+replTest.initiate(conf);
+
+// Start out with config {n0,n1,n2}
+let config = replTest.getReplSetConfigFromNode();
+let origConfig = Object.assign({}, config);
+let [m0, m1, m2] = origConfig.members;
+
+//
+// Test reconfigs that only change config settings but not the member set.
+//
+
+jsTestLog("Testing reconfigs that don't modify the member set.");
+
+// Change the 'electionTimeoutMillis' setting.
+config.settings.electionTimeoutMillis = config.settings.electionTimeoutMillis + 1;
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Do a reconfig that leaves out a config setting that will take on a default.
+delete config.settings.electionTimeoutMillis;
+reconfig(replTest, config);
+// The installed config should be the same as the given config except for the default value.
+let actualConfig = replTest.getReplSetConfigFromNode();
+assert(actualConfig.settings.hasOwnProperty("electionTimeoutMillis"));
+config.settings.electionTimeoutMillis = actualConfig.settings.electionTimeoutMillis;
+assertSameConfigContent(actualConfig, config);
+
+// Change a member config parameter.
+config.members[0].priority = 2;
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+//
+// Test member set changes.
+//
+
+jsTestLog("Testing member set changes.");
+
+// Start in the original config and reset the config object.
+reconfig(replTest, origConfig);
+config = replTest.getReplSetConfigFromNode();
+
+// Remove 2 nodes, {n1, n2}.
+config.members = [m0];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Add 2 nodes, {n1, n2}.
+config.members = [m0, m1, m2];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Remove one node so we can test swapping a node out.
+config.members = [m0, m2];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Remove n2 and add n1 simultaneously (swap a node).
+config.members = [m0, m1];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Remove both existing nodes (n0, n1) and add a new node (n2). Removing a node that is executing
+// the reconfig shouldn't be allowed, but we test it here to make sure it fails in an expected way.
+m2.priority = 1;
+config.members = [m2];
+try {
+ reconfig(replTest, config);
+} catch (e) {
+ assert.eq(e.code, ErrorCodes.NewReplicaSetConfigurationIncompatible, tojson(e));
+}
+
+// Reset the member's priority.
+m2.priority = 0;
+
+//
+// Test voting set changes that don't change the member set.
+//
+
+jsTestLog("Testing voting set changes.");
+
+// Start in the original config.
+reconfig(replTest, origConfig);
+
+// Remove two nodes, {n1,n2}, from the voting set.
+m1.votes = 0;
+m2.votes = 0;
+config.members = [m0, m1, m2];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Add two nodes, {n1,n2}, to the voting set.
+m1.votes = 1;
+m2.votes = 1;
+config.members = [m0, m1, m2];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Remove one node n1 from the voting set.
+m1.votes = 0;
+m2.votes = 1;
+config.members = [m0, m1, m2];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Add one node (n1) and remove one node (n2) from the voting set.
+m1.votes = 1;
+m2.votes = 0;
+config.members = [m0, m1, m2];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Make n2 voting by omitting a 'votes' field, which is allowed.
+delete m2.votes;
+config.members = [m0, m1, m2];
+reconfig(replTest, config);
+actualConfig = replTest.getReplSetConfigFromNode();
+assert.eq(actualConfig.members[2].votes, 1);
+config.members[2].votes = 1;
+assertSameConfigContent(actualConfig, config);
+
+// Remove the primary (n0) from the voting set and remove n2. We expect this to fail.
+m0.votes = 0;
+m0.priority = 0;
+m1.priority = 1;
+m1.votes = 1;
+m2.priority = 0;
+m2.votes = 0;
+config.members = [m0, m1, m2];
+try {
+ reconfig(replTest, config);
+} catch (e) {
+ assert.eq(e.code, ErrorCodes.NewReplicaSetConfigurationIncompatible, tojson(e));
+}
+
+//
+// Test simultaneous voting set and member set changes.
+//
+
+jsTestLog("Testing simultaneous voting set and member set changes.");
+
+// Start in the original config and reset vote counts.
+m0.votes = 1;
+m0.priority = 1;
+m1.votes = 1;
+m1.priority = 0;
+m2.votes = 1;
+m2.priority = 0;
+reconfig(replTest, origConfig);
+
+// Remove voting node n2 and make n1 non voting.
+m1.votes = 0;
+m2.votes = 1;
+config.members = [m0, m1];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Add voting node n2 and make n1 voting.
+m1.votes = 1;
+m2.votes = 1;
+config.members = [m0, m1, m2];
+reconfig(replTest, config);
+assertSameConfigContent(replTest.getReplSetConfigFromNode(), config);
+
+// Restore the original config before shutting down.
+reconfig(replTest, origConfig);
+replTest.stopSet();
+})();
diff --git a/jstests/replsets/libs/rename_across_dbs.js b/jstests/replsets/libs/rename_across_dbs.js
index ba9584c83aa..d32d6a11627 100644
--- a/jstests/replsets/libs/rename_across_dbs.js
+++ b/jstests/replsets/libs/rename_across_dbs.js
@@ -85,8 +85,7 @@ var RenameAcrossDatabasesTest = function(options) {
version: nextVersion,
};
- const force = true; // TODO (SERVER-45575): Update this to be a non-force reconfig.
- reconfig(replTest, replSetConfig, force);
+ reconfig(replTest, replSetConfig);
replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
replTest.awaitReplication();
diff --git a/jstests/replsets/libs/tags.js b/jstests/replsets/libs/tags.js
index d51683b2610..e5861ee0bad 100644
--- a/jstests/replsets/libs/tags.js
+++ b/jstests/replsets/libs/tags.js
@@ -133,8 +133,7 @@ var TagsTest = function(options) {
version: nextVersion,
};
- const force = true; // TODO (SERVER-45575): Update this to be a non-force reconfig.
- reconfig(replTest, replSetConfig, force);
+ reconfig(replTest, replSetConfig);
assert.soonNoExcept(() => replTest.nodes[2].adminCommand({replSetStepUp: 1}).ok);
replTest.waitForState(replTest.nodes[2], ReplSetTest.State.PRIMARY);
diff --git a/jstests/replsets/remove1.js b/jstests/replsets/remove1.js
index 62977c517d6..42f61d1665c 100644
--- a/jstests/replsets/remove1.js
+++ b/jstests/replsets/remove1.js
@@ -74,7 +74,7 @@ config.version = nextVersion;
// perception that the secondary is still "down".
assert.soon(function() {
try {
- reconfig(replTest, config);
+ assert.commandWorked(replTest.getPrimary().adminCommand({replSetReconfig: config}));
return true;
} catch (e) {
return false;
diff --git a/jstests/replsets/rslib.js b/jstests/replsets/rslib.js
index bf643c6c76a..a95d548d87d 100644
--- a/jstests/replsets/rslib.js
+++ b/jstests/replsets/rslib.js
@@ -19,6 +19,7 @@ var setFailPoint;
var clearFailPoint;
var isConfigCommitted;
var waitForConfigReplication;
+var assertSameConfigContent;
(function() {
"use strict";
@@ -193,34 +194,183 @@ waitForAllMembers = function(master, timeout) {
print("All members are now in state PRIMARY, SECONDARY, or ARBITER");
};
-reconfig = function(rs, config, force) {
- "use strict";
- var admin = rs.getPrimary().getDB("admin");
- var e;
- var master;
- try {
- var reconfigCommand = {replSetReconfig: rs._updateConfigIfNotDurable(config), force: force};
- var res = admin.runCommand(reconfigCommand);
-
- // Retry reconfig if quorum check failed because not enough voting nodes responded.
- if (!res.ok && res.code === ErrorCodes.NodeNotFound) {
- print("Replset reconfig failed because quorum check failed. Retry reconfig once. " +
- "Error: " + tojson(res));
- res = admin.runCommand(reconfigCommand);
- }
+/**
+ * Run a 'replSetReconfig' command with one retry.
+ */
+function reconfigWithRetry(primary, config, force) {
+ var admin = primary.getDB("admin");
+ force = force || false;
+ var reconfigCommand = {
+ replSetReconfig: config,
+ force: force,
+ maxTimeMS: ReplSetTest.kDefaultTimeoutMS
+ };
+ var res = admin.runCommand(reconfigCommand);
- assert.commandWorked(res);
- } catch (e) {
- if (!isNetworkError(e)) {
- throw e;
- }
- print("Calling replSetReconfig failed. " + tojson(e));
+ // Retry reconfig if quorum check failed because not enough voting nodes responded.
+ if (!res.ok && res.code === ErrorCodes.NodeNotFound) {
+ print("Replset reconfig failed because quorum check failed. Retry reconfig once. " +
+ "Error: " + tojson(res));
+ res = admin.runCommand(reconfigCommand);
}
+ assert.commandWorked(res);
+}
+
+/**
+ * Executes an arbitrary reconfig as a sequence of non 'force' reconfigs.
+ *
+ * If this function fails for any reason, the replica set config may be left in an intermediate
+ * state i.e. neither in the original or target config.
+ *
+ * @param rst - a ReplSetTest instance.
+ * @param targetConfig - the final, desired replica set config. After this function returns, the
+ * given replica set should be in 'targetConfig', except with a higher version.
+ */
+function autoReconfig(rst, targetConfig) {
+ //
+ // The goal of this function is to transform the source config (the current config on the
+ // primary) into the 'targetConfig' via a sequence of non 'force' reconfigurations. Non force
+ // reconfigs are only permitted to add or remove a single voting node, so we need to represent
+ // some given, arbitrary reconfig as a sequence of single node add/remove operations. We execute
+ // the overall transformation in the following steps:
+ //
+ // (1) Remove members present in the source but not in the target.
+ // (2) Update members present in both the source and target whose vote is removed.
+ // (3) Update members present in both the source and target whose vote is added or unmodified.
+ // (4) Add members present in the target but not in the source.
+ //
+ // After executing the above steps the config member set should be equal to the target config
+ // member set. We then execute one last reconfig that attempts to install the given
+ // targetConfig directly. This serves to update any top level properties of the config and it
+ // also ensures that the order of the final config member list matches the order in the given
+ // target config.
+ //
+ // Note that the order of the steps above is important to avoid passing through invalid configs
+ // during the config transformation sequence. There are certain constraints imposed on replica
+ // set configs e.g. there must be at least 1 electable node and less than a certain number of
+ // maximum voting nodes. We know that the source and target configs are valid with respect to
+ // these constraints, but we must ensure that any sequence of reconfigs executed by this
+ // function never moves us to an intermediate config that violates one of these constraints.
+ // Since the primary, an electable node, can never be removed from the config, it is safe to do
+ // the removal of all voting nodes first, since we will be guaranteed to never go below the
+ // minimum number of electable nodes. Doing removals first similarly ensures that when adding
+ // nodes, we will never exceed an upper bound constraint, since we have already removed all
+ // necessary voting nodes.
+ //
+ // Note also that this procedure may not perform the desired config transformation in the
+ // minimal number of steps. For example, if the overall transformation removes 2 non-voting
+ // nodes from a config we could do this with a single reconfig, but the procedure implemented
+ // here will do it as a sequence of 2 reconfigs. We are not so worried about making this
+ // procedure optimal since each reconfig should be relatively quick and most reconfigs shouldn't
+ // take more than a few steps.
+ //
+
+ let primary = rst.getPrimary();
+ const sourceConfig = rst.getReplSetConfigFromNode();
+ let config = Object.assign({}, sourceConfig);
+
+ // Look up the index of a given member in the given array by its member id.
+ const memberIndex = (cfg, id) => cfg.members.findIndex(m => m._id === id);
+ const memberInConfig = (cfg, id) => cfg.members.find(m => m._id === id);
+ const getMember = (cfg, id) => cfg.members[memberIndex(cfg, id)];
+ const getVotes = (cfg, id) =>
+ getMember(cfg, id).hasOwnProperty("votes") ? getMember(cfg, id).votes : 1;
+
+ print(`autoReconfig: source config: ${tojson(sourceConfig)}, target config: ${
+ tojson(targetConfig)}`);
+
+ // All the members in the target that aren't in the source.
+ let membersToAdd = targetConfig.members.filter(m => !memberInConfig(sourceConfig, m._id));
+ // All the members in the source that aren't in the target.
+ let membersToRemove = sourceConfig.members.filter(m => !memberInConfig(targetConfig, m._id));
+ // All the members that appear in both the source and target and have changed.
+ let membersToUpdate = targetConfig.members.filter(
+ (m) => memberInConfig(sourceConfig, m._id) &&
+ bsonWoCompare(m, memberInConfig(sourceConfig, m._id)) !== 0);
+
+ // Sort the members to ensure that we do updates that remove a node's vote first.
+ let membersToUpdateRemoveVote = membersToUpdate.filter(
+ (m) => (getVotes(targetConfig, m._id) < getVotes(sourceConfig, m._id)));
+ let membersToUpdateAddVote = membersToUpdate.filter(
+ (m) => (getVotes(targetConfig, m._id) >= getVotes(sourceConfig, m._id)));
+ membersToUpdate = membersToUpdateRemoveVote.concat(membersToUpdateAddVote);
+
+ print(`autoReconfig: Starting with membersToRemove: ${
+ tojsononeline(membersToRemove)}, membersToUpdate: ${
+ tojsononeline(membersToUpdate)}, membersToAdd: ${tojsononeline(membersToAdd)}`);
+
+ // Remove members.
+ membersToRemove.forEach(toRemove => {
+ config.members = config.members.filter(m => m._id !== toRemove._id);
+ config.version++;
+ print(`autoReconfig: remove member id ${toRemove._id}, reconfiguring to member set: ${
+ tojsononeline(config.members)}`);
+ reconfigWithRetry(primary, config);
+ });
- var master = rs.getPrimary().getDB("admin");
- waitForAllMembers(master);
+ // Update members.
+ membersToUpdate.forEach(toUpdate => {
+ let configIndex = memberIndex(config, toUpdate._id);
+ config.members[configIndex] = toUpdate;
+ config.version++;
+ print(`autoReconfig: update member id ${toUpdate._id}, reconfiguring to member set: ${
+ tojsononeline(config.members)}`);
+ reconfigWithRetry(primary, config);
+ });
- return master;
+ // Add members.
+ membersToAdd.forEach(toAdd => {
+ config.members.push(toAdd);
+ config.version++;
+ print(`autoReconfig: add member id ${toAdd._id}, reconfiguring to member set: ${
+ tojsononeline(config.members)}`);
+ reconfigWithRetry(primary, config);
+ });
+
+ // Verify that the final set of members is correct.
+ assert.sameMembers(targetConfig.members.map(m => m._id),
+ rst.getReplSetConfigFromNode().members.map(m => m._id),
+ "final config does not have the expected member set.");
+
+ // Do a final reconfig to update any other top level config fields. This also ensures the
+ // correct member order in the final config since the add/remove procedure above will result in
+ // a members array that has the correct set of members but the members may not be in the same
+ // order as the specified target config.
+ print("autoReconfig: doing final reconfig to reach target config.");
+ targetConfig.version = rst.getReplSetConfigFromNode().version + 1;
+ reconfigWithRetry(primary, targetConfig);
+}
+
+/**
+ * Executes a replica set reconfiguration on the given ReplSetTest instance.
+ *
+ * If this function fails for any reason while doing a non force reconfig, the replica set config
+ * may be left in an intermediate state i.e. neither in the original or target config.
+ *
+ * @param rst - a ReplSetTest instance.
+ * @param config - the desired target config. After this function returns, the
+ * given replica set should be in 'config', except with a higher version.
+ * @param force - should this be a 'force' reconfig or not.
+ */
+reconfig = function(rst, config, force) {
+ "use strict";
+ var primary = rst.getPrimary();
+ config = rst._updateConfigIfNotDurable(config);
+
+ // If this is a non 'force' reconfig, execute the reconfig as a series of reconfigs. Safe
+ // reconfigs only allow addition/removal of a single voting node at a time, so arbitrary
+ // reconfigs must be carried out in multiple steps. Using safe reconfigs guarantees that we
+ // don't violate correctness properties of the replication protocol.
+ if (!force) {
+ autoReconfig(rst, config);
+ } else {
+ // Force reconfigs can always be executed in one step.
+ reconfigWithRetry(primary, config, force);
+ }
+
+ var primaryAdminDB = rst.getPrimary().getDB("admin");
+ waitForAllMembers(primaryAdminDB);
+ return primaryAdminDB;
};
awaitOpTime = function(catchingUpNode, latestOpTimeNode) {
@@ -542,4 +692,24 @@ waitForConfigReplication = function(primary, nodes) {
return members.every((m) => hasSameConfig(m));
});
};
+
+/**
+ * Asserts that replica set config A is the same as replica set config B ignoring the 'version' and
+ * 'term' field.
+ */
+assertSameConfigContent = function(configA, configB) {
+ // Save original versions and terms.
+ const [versionA, termA] = [configA.version, configA.term];
+ const [versionB, termB] = [configB.version, configB.term];
+
+ configA.version = configA.term = 0;
+ configB.version = configB.term = 0;
+ assert.eq(configA, configB);
+
+ // Reset values so we don't modify the original objects.
+ configA.version = versionA;
+ configA.term = termA;
+ configB.version = versionB;
+ configB.term = termB;
+};
}());
diff --git a/jstests/sharding/primary_config_server_blackholed_from_mongos.js b/jstests/sharding/primary_config_server_blackholed_from_mongos.js
index 72985bf36f3..c13d6c12b8a 100644
--- a/jstests/sharding/primary_config_server_blackholed_from_mongos.js
+++ b/jstests/sharding/primary_config_server_blackholed_from_mongos.js
@@ -36,11 +36,7 @@ for (let i = 0; i < conf.members.length; i++) {
conf.members[i].priority = 0;
}
}
-conf.version++;
-// TODO (SERVER-45575): Update this to be a non-force reconfig.
-const response = admin.runCommand({replSetReconfig: conf, force: true});
-assert.commandWorked(response);
-
+reconfig(st.configRS, conf);
jsTest.log('Partitioning the config server primary from the mongos');
configPrimary.discardMessagesFrom(st.s, 1.0);
st.s.discardMessagesFrom(configPrimary, 1.0);