summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSuganthi Mani <suganthi.mani@mongodb.com>2020-06-30 02:37:34 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-20 06:45:42 +0000
commitdb72156b34591a37f98f1eeae0e5d0c67ed555ff (patch)
tree650efdc3a77533dc75c53d83798746b556a7546f
parent9870937b91b88348e619580f1050965b1006e33d (diff)
downloadmongo-db72156b34591a37f98f1eeae0e5d0c67ed555ff.tar.gz
SERVER-43847 Make ReplSetTest's stepUp function resilient to slow machines.
(cherry picked from commit c5a53e4882bd316dcb37141ccfab56f5acaec8f4) SERVER-49187 Make ReplSetTest.stepUp() robust to election failures. (cherry picked from commit 311b7982f61009fd08bd7b76b1638d62cc8703de)
-rw-r--r--jstests/replsets/avg_num_catchup_ops.js3
-rw-r--r--jstests/replsets/catchup.js4
-rw-r--r--jstests/replsets/change_stream_stepdown.js29
-rw-r--r--jstests/replsets/reconstruct_prepared_transactions_initial_sync.js4
-rw-r--r--jstests/replsets/rslib.js3
-rw-r--r--jstests/replsets/step_down_during_draining.js6
-rw-r--r--jstests/replsets/step_down_during_draining2.js4
-rw-r--r--jstests/replsets/step_down_during_draining3.js2
-rw-r--r--src/mongo/shell/assert.js14
-rw-r--r--src/mongo/shell/replsettest.js82
10 files changed, 87 insertions, 64 deletions
diff --git a/jstests/replsets/avg_num_catchup_ops.js b/jstests/replsets/avg_num_catchup_ops.js
index 5a3bd488722..418f7770ac4 100644
--- a/jstests/replsets/avg_num_catchup_ops.js
+++ b/jstests/replsets/avg_num_catchup_ops.js
@@ -46,8 +46,7 @@ assert(testNodeServerStatus.electionMetrics.averageCatchUpOps,
assert.eq(testNodeServerStatus.electionMetrics.averageCatchUpOps, 4);
// Step up another node temporarily.
-const tempPrimary = rst.stepUpNoAwaitReplication(rst.getSecondaries()[0]);
-assert.eq(tempPrimary, rst.getPrimary());
+const tempPrimary = rst.stepUp(rst.getSecondaries()[0]);
rst.awaitReplication();
// Step up the testNode and force it to catchup again.
diff --git a/jstests/replsets/catchup.js b/jstests/replsets/catchup.js
index d565fa2b5d5..fd8f9b9a699 100644
--- a/jstests/replsets/catchup.js
+++ b/jstests/replsets/catchup.js
@@ -62,9 +62,7 @@ let initialNewPrimaryStatus =
assert.commandWorked(rst.getSecondary().adminCommand({serverStatus: 1}));
// Should complete transition to primary immediately.
-var newPrimary = rst.stepUpNoAwaitReplication(rst.getSecondary());
-// Should win an election and finish the transition very quickly.
-assert.eq(newPrimary, rst.getPrimary());
+var newPrimary = rst.stepUp(rst.getSecondary(), {awaitReplicationBeforeStepUp: false});
rst.awaitReplication();
// Check that the 'numCatchUps' field has not been incremented in serverStatus.
diff --git a/jstests/replsets/change_stream_stepdown.js b/jstests/replsets/change_stream_stepdown.js
index 68219931b6f..6cd1eaa2d60 100644
--- a/jstests/replsets/change_stream_stepdown.js
+++ b/jstests/replsets/change_stream_stepdown.js
@@ -18,11 +18,8 @@ replTest.initiateWithHighElectionTimeout();
function stepUp(replTest, conn) {
assert.commandWorked(conn.adminCommand({replSetFreeze: 0}));
- // Steps up the node in conn but this function does not wait for the new primary to be able to
- // accept writes.
- replTest.stepUpNoAwaitReplication(conn);
- // Waits for the new primary to accept new writes.
- return replTest.getPrimary();
+ // Steps up the node in conn and awaits for the stepped up node to become writable primary.
+ return replTest.stepUp(conn, {awaitReplicationBeforeStepUp: false});
}
const dbName = name;
@@ -30,9 +27,7 @@ const collName = "change_stream_stepdown";
const changeStreamComment = collName + "_comment";
const primary = replTest.getPrimary();
-const secondary = replTest.getSecondary();
const primaryDb = primary.getDB(dbName);
-const secondaryDb = secondary.getDB(dbName);
const primaryColl = primaryDb[collName];
// Open a change stream.
@@ -96,9 +91,10 @@ jsTestLog("Testing that changestream waiting on old primary sees docs inserted o
replTest.awaitReplication(); // Ensure secondary is up to date and can win an election.
-function shellFn(secondaryHost, dbName, collName, changeStreamComment, stepUpFn) {
+function shellFn(dbName, collName, changeStreamComment, stepUpFn) {
// Wait for the getMore to be in progress.
- assert.soon(() => db.getSiblingDB("admin")
+ const primary = db.getMongo();
+ assert.soon(() => primary.getDB("admin")
.aggregate([
{'$currentOp': {}},
{
@@ -110,19 +106,18 @@ function shellFn(secondaryHost, dbName, collName, changeStreamComment, stepUpFn)
])
.itcount() == 1);
- const replTest = new ReplSetTest(secondaryHost);
- const secondary = new Mongo(secondaryHost);
- const secondaryDb = secondary.getDB(dbName);
+ const replTest = new ReplSetTest(primary.host);
+
// Step down the old primary and wait for new primary.
- jsTestLog(`Stepping up ${secondaryHost} and waiting for new primary`);
- stepUpFn(replTest, secondary);
+ const newPrimary = stepUpFn(replTest, replTest.getSecondary());
+ const newPrimaryDB = newPrimary.getDB(dbName);
+ assert.neq(newPrimary, primary, "Primary didn't change.");
jsTestLog("Inserting document on new primary");
- assert.commandWorked(secondaryDb[collName].insert({_id: 4}), {writeConcern: {w: "majority"}});
+ assert.commandWorked(newPrimaryDB[collName].insert({_id: 4}), {writeConcern: {w: "majority"}});
}
let waitForShell = startParallelShell(
- funWithArgs(shellFn, secondary.host, dbName, collName, changeStreamComment, stepUp),
- primary.port);
+ funWithArgs(shellFn, dbName, collName, changeStreamComment, stepUp), primary.port);
res = assert.commandWorked(primaryDb.runCommand({
getMore: cursorId,
diff --git a/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js b/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js
index 4458bbbc4b1..6257f066eab 100644
--- a/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js
+++ b/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js
@@ -172,9 +172,7 @@ jsTestLog("Stepping up the secondary");
// Step up the secondary after initial sync is done and make sure the other two transactions are
// properly prepared.
-replTest.stepUpNoAwaitReplication(secondary);
-replTest.waitForState(secondary, ReplSetTest.State.PRIMARY);
-const newPrimary = replTest.getPrimary();
+const newPrimary = replTest.stepUp(secondary, {awaitReplicationBeforeStepUp: false});
testDB = newPrimary.getDB(dbName);
testColl = testDB.getCollection(collName);
diff --git a/jstests/replsets/rslib.js b/jstests/replsets/rslib.js
index 3b385e70e29..6f397b08ca4 100644
--- a/jstests/replsets/rslib.js
+++ b/jstests/replsets/rslib.js
@@ -500,7 +500,8 @@ stopReplicationAndEnforceNewPrimaryToCatchUp = function(rst, node) {
const latestOpOnOldPrimary = getLatestOp(oldPrimary);
// New primary wins immediately, but needs to catch up.
- const newPrimary = rst.stepUpNoAwaitReplication(node);
+ const newPrimary =
+ rst.stepUp(node, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});
const latestOpOnNewPrimary = getLatestOp(newPrimary);
// Check this node is not writable.
assert.eq(newPrimary.getDB("test").isMaster().ismaster, false);
diff --git a/jstests/replsets/step_down_during_draining.js b/jstests/replsets/step_down_during_draining.js
index 53009c66f36..dde02adff58 100644
--- a/jstests/replsets/step_down_during_draining.js
+++ b/jstests/replsets/step_down_during_draining.js
@@ -79,7 +79,7 @@ assert.soon(
1000);
reconnect(secondary);
-replSet.stepUpNoAwaitReplication(secondary);
+replSet.stepUp(secondary, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});
// Secondary doesn't allow writes yet.
var res = secondary.getDB("admin").runCommand({"isMaster": 1});
@@ -95,10 +95,10 @@ assert.commandFailedWithCode(
// Original primary steps up.
reconnect(primary);
-replSet.stepUpNoAwaitReplication(primary);
+replSet.stepUp(primary, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});
reconnect(secondary);
-replSet.stepUpNoAwaitReplication(secondary);
+replSet.stepUp(secondary, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});
// Disable fail point to allow replication.
secondaries.forEach(disableFailPoint);
diff --git a/jstests/replsets/step_down_during_draining2.js b/jstests/replsets/step_down_during_draining2.js
index 5b972cfb51a..366854350c1 100644
--- a/jstests/replsets/step_down_during_draining2.js
+++ b/jstests/replsets/step_down_during_draining2.js
@@ -81,7 +81,7 @@ assert.soon(
1000);
reconnect(secondary);
-replSet.stepUpNoAwaitReplication(secondary);
+replSet.stepUp(secondary, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});
// Secondary doesn't allow writes yet.
var res = secondary.getDB("admin").runCommand({"isMaster": 1});
@@ -134,7 +134,7 @@ assert(!secondary.adminCommand('ismaster').ismaster);
// Now ensure that the node can successfully become primary again.
replSet.restart(0);
replSet.restart(2);
-replSet.stepUpNoAwaitReplication(secondary);
+replSet.stepUp(secondary, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});
assert.soon(function() {
return secondary.adminCommand('ismaster').ismaster;
diff --git a/jstests/replsets/step_down_during_draining3.js b/jstests/replsets/step_down_during_draining3.js
index ac1f2239498..9cd965d48cd 100644
--- a/jstests/replsets/step_down_during_draining3.js
+++ b/jstests/replsets/step_down_during_draining3.js
@@ -71,7 +71,7 @@ assert.soon(
1000);
reconnect(secondary);
-replSet.stepUpNoAwaitReplication(secondary);
+replSet.stepUp(secondary, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});
// Secondary doesn't allow writes yet.
var res = secondary.getDB("admin").runCommand({"isMaster": 1});
diff --git a/src/mongo/shell/assert.js b/src/mongo/shell/assert.js
index 20c8f2d481b..5c06110b4ad 100644
--- a/src/mongo/shell/assert.js
+++ b/src/mongo/shell/assert.js
@@ -351,7 +351,19 @@ assert = (function() {
assert.soonNoExcept = function(func, msg, timeout, interval) {
var safeFunc =
_convertExceptionToReturnStatus(func, "assert.soonNoExcept caught exception");
- assert.soon(safeFunc, msg, timeout, interval);
+ var safeFuncwithMinimizedNoise = () => {
+ // Turns off printing the JavaScript stacktrace in doassert() to avoid generating an
+ // overwhelming amount of log messages when handling transient errors.
+ const origTraceExceptions = TestData.traceExceptions;
+ TestData.traceExceptions = false;
+
+ const res = safeFunc();
+
+ // Restore it's value to original value.
+ TestData.traceExceptions = origTraceExceptions;
+ return res;
+ };
+ assert.soon(safeFuncwithMinimizedNoise, msg, timeout, interval);
};
/*
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index 1e5bf078159..481c1f60339 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -853,7 +853,7 @@ var ReplSetTest = function(opts) {
print("AwaitNodesAgreeOnPrimary: Nodes agreed on primary " + nodes[primary].name);
return true;
- }, "Awaiting nodes to agree on primary", timeout);
+ }, "Awaiting nodes to agree on primary timed out", timeout);
};
/**
@@ -1204,40 +1204,56 @@ var ReplSetTest = function(opts) {
};
/**
- * Steps up 'node' as primary.
- * Waits for all nodes to reach the same optime before sending the replSetStepUp command
- * to 'node'.
+ * Steps up 'node' as primary and by default it waits for the stepped up node to become a
+ * writable primary and waits for all nodes to reach the same optime before sending the
+ * replSetStepUp command to 'node'.
+ *
* Calls awaitReplication() which requires all connections in 'nodes' to be authenticated.
+ * This stepUp() assumes that there is no network partition in the replica set.
*/
- this.stepUp = function(node) {
- this.awaitReplication();
- this.awaitNodesAgreeOnAppliedOpTime();
- this.awaitNodesAgreeOnPrimary();
- if (this.getPrimary() === node) {
- return;
- }
+ this.stepUp = function(node, {
+ awaitReplicationBeforeStepUp: awaitReplicationBeforeStepUp = true,
+ awaitWritablePrimary: awaitWritablePrimary = true
+ } = {}) {
+ jsTest.log("ReplSetTest stepUp: Stepping up " + node.host);
+
+ if (awaitReplicationBeforeStepUp) {
+ this.awaitReplication();
+ }
+
+ assert.soonNoExcept(() => {
+ const res = node.adminCommand({replSetStepUp: 1});
+ // This error is possible if we are running mongoDB binary < 3.4 as
+ // part of multi-version upgrade test. So, for those older branches,
+ // simply wait for the requested node to get elected as primary due
+ // to election timeout.
+ if (!res.ok && res.code === ErrorCodes.CommandNotFound) {
+ jsTest.log(
+ 'replSetStepUp command not supported on node ' + node.host +
+ " ; so wait for the requested node to get elected due to election timeout.");
+ if (this.getPrimary() === node) {
+ return true;
+ }
+ }
+ assert.commandWorked(res);
- jsTest.log("Stepping up: " + node.host + " in stepUp");
- assert.commandWorked(node.adminCommand({replSetStepUp: 1}));
- this.awaitNodesAgreeOnPrimary();
- assert.eq(this.getPrimary(), node, 'failed to step up node ' + node.host + ' in stepUp');
- };
+ // Since assert.soon() timeout is 10 minutes (default), setting
+ // awaitNodesAgreeOnPrimary() timeout as 1 minute to allow retry of replSetStepUp
+ // command on failure of the replica set to agree on the primary.
+ const timeout = 60 * 100;
+ this.awaitNodesAgreeOnPrimary(timeout, this.nodes, this.getNodeId(node));
- /**
- * Steps up 'node' as primary.
- */
- this.stepUpNoAwaitReplication = function(node) {
- jsTest.log("Stepping up: " + node.host + " in stepUpNoAwaitReplication");
- assert.soonNoExcept(
- function() {
- assert.commandWorked(node.adminCommand({replSetStepUp: 1}));
- self.awaitNodesAgreeOnPrimary(
- self.kDefaultTimeoutMS, self.nodes, self.getNodeId(node));
- return node.adminCommand('replSetGetStatus').myState === ReplSetTest.State.PRIMARY;
- },
- 'failed to step up node ' + node.host + ' in stepUpNoAwaitReplication',
- self.kDefaultTimeoutMS);
+ // getPrimary() guarantees that there will be only one writable primary for a replica
+ // set.
+ if (!awaitWritablePrimary || this.getPrimary() === node) {
+ return true;
+ }
+
+ jsTest.log(node.host + ' is not primary after stepUp command');
+ return false;
+ }, "Timed out while waiting for stepUp to succeed on node in port: " + node.port);
+ jsTest.log("ReplSetTest stepUp: Finished stepping up " + node.host);
return node;
};
@@ -2844,7 +2860,11 @@ var ReplSetTest = function(opts) {
var existingNodes = conf.members.map(member => member.host);
self.ports = existingNodes.map(node => node.split(':')[1]);
- self.nodes = existingNodes.map(node => new Mongo(node));
+ self.nodes = existingNodes.map(node => {
+ let conn = new Mongo(node);
+ conn.name = conn.host;
+ return conn;
+ });
self.waitForKeys = false;
self.host = existingNodes[0].split(':')[0];
self.name = conf._id;