diff options
author | Ali Mir <ali.mir@mongodb.com> | 2020-02-24 18:01:15 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-10 23:49:38 +0000 |
commit | d3c55652b6cab504ad7acf32b16298809ac9f5e6 (patch) | |
tree | b989dadee347f45cc13701e56250cfcf495b1cdd | |
parent | 7a4da2ea31f9c699e9458853d9029bd3a1f7de75 (diff) | |
download | mongo-d3c55652b6cab504ad7acf32b16298809ac9f5e6.tar.gz |
SERVER-45088 Test safe reconfig avoids diverging configs
(cherry picked from commit 980acca9877377d9de08f82f6e36280085661805)
-rw-r--r-- | jstests/replsets/reconfig_avoids_diverging_configs.js | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/jstests/replsets/reconfig_avoids_diverging_configs.js b/jstests/replsets/reconfig_avoids_diverging_configs.js new file mode 100644 index 00000000000..6dbd0507b2f --- /dev/null +++ b/jstests/replsets/reconfig_avoids_diverging_configs.js @@ -0,0 +1,122 @@ +/** + * In a 4-node set, verify that two diverging non-force replica set reconfigs + * are not allowed to succeed. Diverging reconfigs contain non-overlapping quorums. For example, + * C1: {n1,n2,n3} + * C2: {n1,n3,n4} + * The C1 quorum {n1,n2} and the C2 quorum {n3,n4} do not overlap. + * + * 1. Node1 is the initial primary. + * 2. Disconnect node4 from all three other nodes. + * 3. Step down node1 and step up node2. + * 4. Disconnect the current primary, node2, from all other nodes. + * 5. Issue a reconfig to node2 that removes node4. + * 6. Reconnect node4 to the current secondaries, node1 and node3. + * 7. Step up node3, which creates a two primary scenario. + * 8. Issue a reconfig to node3 that removes node2. We now have diverging configs + * from two different primaries. + * 9. Reconnect node2 to the rest of the set and verify that its reconfig fails. + * + * @tags: [requires_fcv_44] + */ +(function() { +"use strict"; +load("jstests/libs/parallel_shell_helpers.js"); +load('jstests/libs/test_background_ops.js'); +load("jstests/replsets/rslib.js"); +load('jstests/aggregation/extras/utils.js'); + +const dbName = "test"; +const collName = "coll"; +let rst = new ReplSetTest({nodes: 4, useBridge: true}); +rst.startSet(); +rst.initiateWithHighElectionTimeout(); + +const node1 = rst.getPrimary(); +const secondaries = rst.getSecondaries(); +const node2 = secondaries[0]; +const node3 = secondaries[1]; +const node4 = secondaries[2]; +const coll = node1.getDB(dbName)[collName]; + +// Partition the 4th node. +node4.disconnect([node1, node2, node3]); + +jsTestLog("Current replica set topology: [Primary-Secondary-Secondary] [Secondary]"); +assert.commandWorked(node1.adminCommand({replSetStepDown: 120})); +// Step up a new primary in the partitioned repl set. +assert.commandWorked(node2.adminCommand({replSetStepUp: 1})); + +// Wait until the config has been committed. +assert.soon(() => isConfigCommitted(rst.getPrimary())); +// The quorum check places stricter bounds on the safe reconfig +// protocol and won't allow this specific scenario of diverging configs +// to happen. However, it's still worth testing the original reconfig +// protocol that omitted the check for correctness. +configureFailPoint(rst.getPrimary(), "omitConfigQuorumCheck"); + +// Reconfig to remove the 4th node. +const C1 = Object.assign({}, rst.getReplSetConfigFromNode()); +C1.members = C1.members.slice(0, 3); // Remove the last node. +C1.version++; + +jsTestLog("Disconnecting the primary from other nodes"); +assert.eq(rst.getPrimary(), node2); +node2.disconnect([node1, node3, node4]); +jsTestLog("Current replica set topology: [Primary] [Secondary-Secondary] [Secondary]"); +// Create parallel shell to execute reconfig on partitioned primary. +// This reconfig will succeed due to the omission of the quorum check, but +// will not get propagated. +startParallelShell(funWithArgs(function(config) { + assert.commandWorked(db.getMongo().adminCommand({replSetReconfig: config})); + }, C1), node2.port); + +// Reconnect the 4th node to the secondaries. +node4.reconnect([node1, node3]); +node3.adminCommand({replSetStepUp: 1}); +rst.awaitNodesAgreeOnPrimary(rst.kDefaultTimeoutMS, [node1, node3, node4]); +jsTestLog("Current replica set topology: [Primary-Secondary-Secondary] [Primary]"); +assert.soon(function() { + return isConfigCommitted(node3); +}); + +// Reconfig to remove a secondary. We need to specify the node to get the original +// config from as there are two primaries, node2 and node3, in the replset. +let C2 = Object.assign({}, rst.getReplSetConfigFromNode(2)); +const removedSecondary = C2.members.splice(0, 1); +C2.version++; +assert.commandWorked(node3.adminCommand({replSetReconfig: C2})); +assert.soon(() => isConfigCommitted(node3)); + +// Reconnect partitioned node to the other nodes. +node2.reconnect([node3, node4]); +// The newly connected node will receive a heartbeat with a higher term, and +// step down from being primary. The reconfig command issued to this node will fail. +rst.waitForState(node2, ReplSetTest.State.SECONDARY); + +// Make sure the newly connected secondary has updated its config. +assert.soon(function() { + const node2TermUpdated = bsonWoCompare(node2.adminCommand({replSetGetStatus: 1}).term, + node3.adminCommand({replSetGetStatus: 1}).term) == 0; + const node2ConfigTermUpdated = + node2.adminCommand({replSetGetStatus: 1}).members[1].configTerm == + node3.adminCommand({replSetGetStatus: 1}).members[2].configTerm; + const node2ConfigVersionUpdated = + node2.adminCommand({replSetGetStatus: 1}).members[1].configVersion == + node3.adminCommand({replSetGetStatus: 1}).members[2].configVersion; + return node2TermUpdated && node2ConfigTermUpdated && node2ConfigVersionUpdated; +}); + +// Reconnect the 4th node to return to a stable state. +let C3 = Object.assign({}, rst.getReplSetConfigFromNode(2)); +C3.members.push(removedSecondary[0]); +C3.version++; + +node1.reconnect([node2, node3, node4]); +assert.commandWorked(node3.adminCommand({replSetReconfig: C3})); +assert.soon(function() { + return isConfigCommitted(node3); +}); +rst.awaitNodesAgreeOnPrimary(); +rst.awaitNodesAgreeOnConfigVersion(); +rst.stopSet(); +}()); |