diff options
author | William Schultz <william.schultz@mongodb.com> | 2020-04-14 11:48:15 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-14 16:14:41 +0000 |
commit | fcd1c0aae55c0b713ff329c18435c77867365748 (patch) | |
tree | 1db91bad86ca625ba9a06d59b6eefd8e814c678f /jstests | |
parent | 8b81217b65fa99c2391d248a879a43749e2f16b4 (diff) | |
download | mongo-fcd1c0aae55c0b713ff329c18435c77867365748.tar.gz |
SERVER-45575 Add Javascript helper function to do automatic safe reconfigs
(cherry picked from commit b52c7b320bf6e6a031055e611fbb58cc76967352)
Diffstat (limited to 'jstests')
-rw-r--r-- | jstests/noPassthrough/auto_safe_reconfig_helper_max_voting_nodes.js | 79 | ||||
-rw-r--r-- | jstests/noPassthrough/auto_safe_reconfig_helpers.js | 185 | ||||
-rw-r--r-- | jstests/replsets/libs/rename_across_dbs.js | 3 | ||||
-rw-r--r-- | jstests/replsets/libs/tags.js | 3 | ||||
-rw-r--r-- | jstests/replsets/remove1.js | 2 | ||||
-rw-r--r-- | jstests/replsets/rslib.js | 218 | ||||
-rw-r--r-- | jstests/sharding/primary_config_server_blackholed_from_mongos.js | 6 |
7 files changed, 462 insertions, 34 deletions
diff --git a/jstests/noPassthrough/auto_safe_reconfig_helper_max_voting_nodes.js b/jstests/noPassthrough/auto_safe_reconfig_helper_max_voting_nodes.js new file mode 100644 index 00000000000..92bba2fc789 --- /dev/null +++ b/jstests/noPassthrough/auto_safe_reconfig_helper_max_voting_nodes.js @@ -0,0 +1,79 @@ +/** + * Test that the 'reconfig' helper function correctly executes reconfigs between configs that have + * the maximum number of allowed voting nodes. + * + * @tags: [requires_replication] + */ +(function() { +"use strict"; + +// Make secondaries unelectable. Add 7 voting nodes, which is the maximum allowed. +const replTest = new ReplSetTest({ + nodes: [ + {}, + {rsConfig: {priority: 0}}, + {rsConfig: {priority: 0}}, + {rsConfig: {priority: 0}}, + {rsConfig: {priority: 0}}, + {rsConfig: {priority: 0}}, + {rsConfig: {priority: 0}}, + {rsConfig: {priority: 0, votes: 0}} + ] +}); +replTest.startSet(); +let conf = replTest.getReplSetConfig(); +conf.settings = { + // Speed up config propagation. + heartbeatIntervalMillis: 100, +}; +replTest.initiate(conf); + +// Start out with config {n0,n1,n2} +let config = replTest.getReplSetConfigFromNode(); +let origConfig = Object.assign({}, config); +let [m0, m1, m2, m3, m4, m5, m6, m7] = origConfig.members; + +// +// Test max voting constraint. +// + +jsTestLog("Test max voting constraint."); + +// Test making one node non voting and the other voting. +m6.votes = 0; +m6.priority = 0; +m7.votes = 1; +m7.priority = 1; +config.members = [m0, m1, m2, m3, m4, m5, m6, m7]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// And test switching the vote back. +m6.votes = 1; +m6.priority = 0; +m7.votes = 0; +m7.priority = 0; +config.members = [m0, m1, m2, m3, m4, m5, m6, m7]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Test swapping out a voting member. +m6.votes = 1; +m6.priority = 0; +config.members = [m0, m1, m2, m3, m4, m5, m6]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +m7.votes = 1; +m7.priority = 1; +config.members = [m0, m1, m2, m3, m4, m5, m7]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Restore the original config before shutting down. +m7.votes = 0; +m7.priority = 0; +config.members = [m0, m1, m2, m3, m4, m5, m6, m7]; +reconfig(replTest, config); +replTest.stopSet(); +})(); diff --git a/jstests/noPassthrough/auto_safe_reconfig_helpers.js b/jstests/noPassthrough/auto_safe_reconfig_helpers.js new file mode 100644 index 00000000000..fc87c512171 --- /dev/null +++ b/jstests/noPassthrough/auto_safe_reconfig_helpers.js @@ -0,0 +1,185 @@ +/** + * Test that the 'reconfig' helper function correctly executes arbitrary reconfigs. + * + * @tags: [requires_replication] + */ +(function() { +"use strict"; + +// Make secondaries unelectable. +const replTest = + new ReplSetTest({nodes: [{}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0}}]}); +replTest.startSet(); +let conf = replTest.getReplSetConfig(); +conf.settings = { + // Speed up config propagation. + heartbeatIntervalMillis: 100, +}; +replTest.initiate(conf); + +// Start out with config {n0,n1,n2} +let config = replTest.getReplSetConfigFromNode(); +let origConfig = Object.assign({}, config); +let [m0, m1, m2] = origConfig.members; + +// +// Test reconfigs that only change config settings but not the member set. +// + +jsTestLog("Testing reconfigs that don't modify the member set."); + +// Change the 'electionTimeoutMillis' setting. +config.settings.electionTimeoutMillis = config.settings.electionTimeoutMillis + 1; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Do a reconfig that leaves out a config setting that will take on a default. +delete config.settings.electionTimeoutMillis; +reconfig(replTest, config); +// The installed config should be the same as the given config except for the default value. +let actualConfig = replTest.getReplSetConfigFromNode(); +assert(actualConfig.settings.hasOwnProperty("electionTimeoutMillis")); +config.settings.electionTimeoutMillis = actualConfig.settings.electionTimeoutMillis; +assertSameConfigContent(actualConfig, config); + +// Change a member config parameter. +config.members[0].priority = 2; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// +// Test member set changes. +// + +jsTestLog("Testing member set changes."); + +// Start in the original config and reset the config object. +reconfig(replTest, origConfig); +config = replTest.getReplSetConfigFromNode(); + +// Remove 2 nodes, {n1, n2}. +config.members = [m0]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Add 2 nodes, {n1, n2}. +config.members = [m0, m1, m2]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Remove one node so we can test swapping a node out. +config.members = [m0, m2]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Remove n2 and add n1 simultaneously (swap a node). +config.members = [m0, m1]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Remove both existing nodes (n0, n1) and add a new node (n2). Removing a node that is executing +// the reconfig shouldn't be allowed, but we test it here to make sure it fails in an expected way. +m2.priority = 1; +config.members = [m2]; +try { + reconfig(replTest, config); +} catch (e) { + assert.eq(e.code, ErrorCodes.NewReplicaSetConfigurationIncompatible, tojson(e)); +} + +// Reset the member's priority. +m2.priority = 0; + +// +// Test voting set changes that don't change the member set. +// + +jsTestLog("Testing voting set changes."); + +// Start in the original config. +reconfig(replTest, origConfig); + +// Remove two nodes, {n1,n2}, from the voting set. +m1.votes = 0; +m2.votes = 0; +config.members = [m0, m1, m2]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Add two nodes, {n1,n2}, to the voting set. +m1.votes = 1; +m2.votes = 1; +config.members = [m0, m1, m2]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Remove one node n1 from the voting set. +m1.votes = 0; +m2.votes = 1; +config.members = [m0, m1, m2]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Add one node (n1) and remove one node (n2) from the voting set. +m1.votes = 1; +m2.votes = 0; +config.members = [m0, m1, m2]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Make n2 voting by omitting a 'votes' field, which is allowed. +delete m2.votes; +config.members = [m0, m1, m2]; +reconfig(replTest, config); +actualConfig = replTest.getReplSetConfigFromNode(); +assert.eq(actualConfig.members[2].votes, 1); +config.members[2].votes = 1; +assertSameConfigContent(actualConfig, config); + +// Remove the primary (n0) from the voting set and remove n2. We expect this to fail. +m0.votes = 0; +m0.priority = 0; +m1.priority = 1; +m1.votes = 1; +m2.priority = 0; +m2.votes = 0; +config.members = [m0, m1, m2]; +try { + reconfig(replTest, config); +} catch (e) { + assert.eq(e.code, ErrorCodes.NewReplicaSetConfigurationIncompatible, tojson(e)); +} + +// +// Test simultaneous voting set and member set changes. +// + +jsTestLog("Testing simultaneous voting set and member set changes."); + +// Start in the original config and reset vote counts. +m0.votes = 1; +m0.priority = 1; +m1.votes = 1; +m1.priority = 0; +m2.votes = 1; +m2.priority = 0; +reconfig(replTest, origConfig); + +// Remove voting node n2 and make n1 non voting. +m1.votes = 0; +m2.votes = 1; +config.members = [m0, m1]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Add voting node n2 and make n1 voting. +m1.votes = 1; +m2.votes = 1; +config.members = [m0, m1, m2]; +reconfig(replTest, config); +assertSameConfigContent(replTest.getReplSetConfigFromNode(), config); + +// Restore the original config before shutting down. +reconfig(replTest, origConfig); +replTest.stopSet(); +})(); diff --git a/jstests/replsets/libs/rename_across_dbs.js b/jstests/replsets/libs/rename_across_dbs.js index ba9584c83aa..d32d6a11627 100644 --- a/jstests/replsets/libs/rename_across_dbs.js +++ b/jstests/replsets/libs/rename_across_dbs.js @@ -85,8 +85,7 @@ var RenameAcrossDatabasesTest = function(options) { version: nextVersion, }; - const force = true; // TODO (SERVER-45575): Update this to be a non-force reconfig. - reconfig(replTest, replSetConfig, force); + reconfig(replTest, replSetConfig); replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY); replTest.awaitReplication(); diff --git a/jstests/replsets/libs/tags.js b/jstests/replsets/libs/tags.js index d51683b2610..e5861ee0bad 100644 --- a/jstests/replsets/libs/tags.js +++ b/jstests/replsets/libs/tags.js @@ -133,8 +133,7 @@ var TagsTest = function(options) { version: nextVersion, }; - const force = true; // TODO (SERVER-45575): Update this to be a non-force reconfig. - reconfig(replTest, replSetConfig, force); + reconfig(replTest, replSetConfig); assert.soonNoExcept(() => replTest.nodes[2].adminCommand({replSetStepUp: 1}).ok); replTest.waitForState(replTest.nodes[2], ReplSetTest.State.PRIMARY); diff --git a/jstests/replsets/remove1.js b/jstests/replsets/remove1.js index 62977c517d6..42f61d1665c 100644 --- a/jstests/replsets/remove1.js +++ b/jstests/replsets/remove1.js @@ -74,7 +74,7 @@ config.version = nextVersion; // perception that the secondary is still "down". assert.soon(function() { try { - reconfig(replTest, config); + assert.commandWorked(replTest.getPrimary().adminCommand({replSetReconfig: config})); return true; } catch (e) { return false; diff --git a/jstests/replsets/rslib.js b/jstests/replsets/rslib.js index bf643c6c76a..a95d548d87d 100644 --- a/jstests/replsets/rslib.js +++ b/jstests/replsets/rslib.js @@ -19,6 +19,7 @@ var setFailPoint; var clearFailPoint; var isConfigCommitted; var waitForConfigReplication; +var assertSameConfigContent; (function() { "use strict"; @@ -193,34 +194,183 @@ waitForAllMembers = function(master, timeout) { print("All members are now in state PRIMARY, SECONDARY, or ARBITER"); }; -reconfig = function(rs, config, force) { - "use strict"; - var admin = rs.getPrimary().getDB("admin"); - var e; - var master; - try { - var reconfigCommand = {replSetReconfig: rs._updateConfigIfNotDurable(config), force: force}; - var res = admin.runCommand(reconfigCommand); - - // Retry reconfig if quorum check failed because not enough voting nodes responded. - if (!res.ok && res.code === ErrorCodes.NodeNotFound) { - print("Replset reconfig failed because quorum check failed. Retry reconfig once. " + - "Error: " + tojson(res)); - res = admin.runCommand(reconfigCommand); - } +/** + * Run a 'replSetReconfig' command with one retry. + */ +function reconfigWithRetry(primary, config, force) { + var admin = primary.getDB("admin"); + force = force || false; + var reconfigCommand = { + replSetReconfig: config, + force: force, + maxTimeMS: ReplSetTest.kDefaultTimeoutMS + }; + var res = admin.runCommand(reconfigCommand); - assert.commandWorked(res); - } catch (e) { - if (!isNetworkError(e)) { - throw e; - } - print("Calling replSetReconfig failed. " + tojson(e)); + // Retry reconfig if quorum check failed because not enough voting nodes responded. + if (!res.ok && res.code === ErrorCodes.NodeNotFound) { + print("Replset reconfig failed because quorum check failed. Retry reconfig once. " + + "Error: " + tojson(res)); + res = admin.runCommand(reconfigCommand); } + assert.commandWorked(res); +} + +/** + * Executes an arbitrary reconfig as a sequence of non 'force' reconfigs. + * + * If this function fails for any reason, the replica set config may be left in an intermediate + * state i.e. neither in the original or target config. + * + * @param rst - a ReplSetTest instance. + * @param targetConfig - the final, desired replica set config. After this function returns, the + * given replica set should be in 'targetConfig', except with a higher version. + */ +function autoReconfig(rst, targetConfig) { + // + // The goal of this function is to transform the source config (the current config on the + // primary) into the 'targetConfig' via a sequence of non 'force' reconfigurations. Non force + // reconfigs are only permitted to add or remove a single voting node, so we need to represent + // some given, arbitrary reconfig as a sequence of single node add/remove operations. We execute + // the overall transformation in the following steps: + // + // (1) Remove members present in the source but not in the target. + // (2) Update members present in both the source and target whose vote is removed. + // (3) Update members present in both the source and target whose vote is added or unmodified. + // (4) Add members present in the target but not in the source. + // + // After executing the above steps the config member set should be equal to the target config + // member set. We then execute one last reconfig that attempts to install the given + // targetConfig directly. This serves to update any top level properties of the config and it + // also ensures that the order of the final config member list matches the order in the given + // target config. + // + // Note that the order of the steps above is important to avoid passing through invalid configs + // during the config transformation sequence. There are certain constraints imposed on replica + // set configs e.g. there must be at least 1 electable node and less than a certain number of + // maximum voting nodes. We know that the source and target configs are valid with respect to + // these constraints, but we must ensure that any sequence of reconfigs executed by this + // function never moves us to an intermediate config that violates one of these constraints. + // Since the primary, an electable node, can never be removed from the config, it is safe to do + // the removal of all voting nodes first, since we will be guaranteed to never go below the + // minimum number of electable nodes. Doing removals first similarly ensures that when adding + // nodes, we will never exceed an upper bound constraint, since we have already removed all + // necessary voting nodes. + // + // Note also that this procedure may not perform the desired config transformation in the + // minimal number of steps. For example, if the overall transformation removes 2 non-voting + // nodes from a config we could do this with a single reconfig, but the procedure implemented + // here will do it as a sequence of 2 reconfigs. We are not so worried about making this + // procedure optimal since each reconfig should be relatively quick and most reconfigs shouldn't + // take more than a few steps. + // + + let primary = rst.getPrimary(); + const sourceConfig = rst.getReplSetConfigFromNode(); + let config = Object.assign({}, sourceConfig); + + // Look up the index of a given member in the given array by its member id. + const memberIndex = (cfg, id) => cfg.members.findIndex(m => m._id === id); + const memberInConfig = (cfg, id) => cfg.members.find(m => m._id === id); + const getMember = (cfg, id) => cfg.members[memberIndex(cfg, id)]; + const getVotes = (cfg, id) => + getMember(cfg, id).hasOwnProperty("votes") ? getMember(cfg, id).votes : 1; + + print(`autoReconfig: source config: ${tojson(sourceConfig)}, target config: ${ + tojson(targetConfig)}`); + + // All the members in the target that aren't in the source. + let membersToAdd = targetConfig.members.filter(m => !memberInConfig(sourceConfig, m._id)); + // All the members in the source that aren't in the target. + let membersToRemove = sourceConfig.members.filter(m => !memberInConfig(targetConfig, m._id)); + // All the members that appear in both the source and target and have changed. + let membersToUpdate = targetConfig.members.filter( + (m) => memberInConfig(sourceConfig, m._id) && + bsonWoCompare(m, memberInConfig(sourceConfig, m._id)) !== 0); + + // Sort the members to ensure that we do updates that remove a node's vote first. + let membersToUpdateRemoveVote = membersToUpdate.filter( + (m) => (getVotes(targetConfig, m._id) < getVotes(sourceConfig, m._id))); + let membersToUpdateAddVote = membersToUpdate.filter( + (m) => (getVotes(targetConfig, m._id) >= getVotes(sourceConfig, m._id))); + membersToUpdate = membersToUpdateRemoveVote.concat(membersToUpdateAddVote); + + print(`autoReconfig: Starting with membersToRemove: ${ + tojsononeline(membersToRemove)}, membersToUpdate: ${ + tojsononeline(membersToUpdate)}, membersToAdd: ${tojsononeline(membersToAdd)}`); + + // Remove members. + membersToRemove.forEach(toRemove => { + config.members = config.members.filter(m => m._id !== toRemove._id); + config.version++; + print(`autoReconfig: remove member id ${toRemove._id}, reconfiguring to member set: ${ + tojsononeline(config.members)}`); + reconfigWithRetry(primary, config); + }); - var master = rs.getPrimary().getDB("admin"); - waitForAllMembers(master); + // Update members. + membersToUpdate.forEach(toUpdate => { + let configIndex = memberIndex(config, toUpdate._id); + config.members[configIndex] = toUpdate; + config.version++; + print(`autoReconfig: update member id ${toUpdate._id}, reconfiguring to member set: ${ + tojsononeline(config.members)}`); + reconfigWithRetry(primary, config); + }); - return master; + // Add members. + membersToAdd.forEach(toAdd => { + config.members.push(toAdd); + config.version++; + print(`autoReconfig: add member id ${toAdd._id}, reconfiguring to member set: ${ + tojsononeline(config.members)}`); + reconfigWithRetry(primary, config); + }); + + // Verify that the final set of members is correct. + assert.sameMembers(targetConfig.members.map(m => m._id), + rst.getReplSetConfigFromNode().members.map(m => m._id), + "final config does not have the expected member set."); + + // Do a final reconfig to update any other top level config fields. This also ensures the + // correct member order in the final config since the add/remove procedure above will result in + // a members array that has the correct set of members but the members may not be in the same + // order as the specified target config. + print("autoReconfig: doing final reconfig to reach target config."); + targetConfig.version = rst.getReplSetConfigFromNode().version + 1; + reconfigWithRetry(primary, targetConfig); +} + +/** + * Executes a replica set reconfiguration on the given ReplSetTest instance. + * + * If this function fails for any reason while doing a non force reconfig, the replica set config + * may be left in an intermediate state i.e. neither in the original or target config. + * + * @param rst - a ReplSetTest instance. + * @param config - the desired target config. After this function returns, the + * given replica set should be in 'config', except with a higher version. + * @param force - should this be a 'force' reconfig or not. + */ +reconfig = function(rst, config, force) { + "use strict"; + var primary = rst.getPrimary(); + config = rst._updateConfigIfNotDurable(config); + + // If this is a non 'force' reconfig, execute the reconfig as a series of reconfigs. Safe + // reconfigs only allow addition/removal of a single voting node at a time, so arbitrary + // reconfigs must be carried out in multiple steps. Using safe reconfigs guarantees that we + // don't violate correctness properties of the replication protocol. + if (!force) { + autoReconfig(rst, config); + } else { + // Force reconfigs can always be executed in one step. + reconfigWithRetry(primary, config, force); + } + + var primaryAdminDB = rst.getPrimary().getDB("admin"); + waitForAllMembers(primaryAdminDB); + return primaryAdminDB; }; awaitOpTime = function(catchingUpNode, latestOpTimeNode) { @@ -542,4 +692,24 @@ waitForConfigReplication = function(primary, nodes) { return members.every((m) => hasSameConfig(m)); }); }; + +/** + * Asserts that replica set config A is the same as replica set config B ignoring the 'version' and + * 'term' field. + */ +assertSameConfigContent = function(configA, configB) { + // Save original versions and terms. + const [versionA, termA] = [configA.version, configA.term]; + const [versionB, termB] = [configB.version, configB.term]; + + configA.version = configA.term = 0; + configB.version = configB.term = 0; + assert.eq(configA, configB); + + // Reset values so we don't modify the original objects. + configA.version = versionA; + configA.term = termA; + configB.version = versionB; + configB.term = termB; +}; }()); diff --git a/jstests/sharding/primary_config_server_blackholed_from_mongos.js b/jstests/sharding/primary_config_server_blackholed_from_mongos.js index 72985bf36f3..c13d6c12b8a 100644 --- a/jstests/sharding/primary_config_server_blackholed_from_mongos.js +++ b/jstests/sharding/primary_config_server_blackholed_from_mongos.js @@ -36,11 +36,7 @@ for (let i = 0; i < conf.members.length; i++) { conf.members[i].priority = 0; } } -conf.version++; -// TODO (SERVER-45575): Update this to be a non-force reconfig. -const response = admin.runCommand({replSetReconfig: conf, force: true}); -assert.commandWorked(response); - +reconfig(st.configRS, conf); jsTest.log('Partitioning the config server primary from the mongos'); configPrimary.discardMessagesFrom(st.s, 1.0); st.s.discardMessagesFrom(configPrimary, 1.0); |