summaryrefslogtreecommitdiff
path: root/jstests
diff options
context:
space:
mode:
authorSuganthi Mani <suganthi.mani@mongodb.com>2020-01-15 04:25:51 +0000
committerevergreen <evergreen@mongodb.com>2020-01-15 04:25:51 +0000
commit9b17593e40df04bc1e001bac2cf9cdda62570011 (patch)
treee7728d0b226cd44bb3c4c1ea836c546bfa6c38df /jstests
parent8457a35c063b50b9ab9c2ad50b8e66143609a27d (diff)
downloadmongo-9b17593e40df04bc1e001bac2cf9cdda62570011.tar.gz
SERVER-37390 Run rollback test fixture with high election timeout to
avoid any unplanned election. (cherry picked from commit 02ce213b40c56096c9c57e093778b0889c335bb9) (cherry picked from commit 5b85b8787d6e8cfd4234b09304f3538506f70bd9) (cherry picked from commit 9b470eb73873f5db5c9fcee5df5316d477a1fa12)
Diffstat (limited to 'jstests')
-rw-r--r--jstests/multiVersion/downgrade_after_rollback_via_refetch.js2
-rw-r--r--jstests/multiVersion/libs/multiversion_rollback.js2
-rw-r--r--jstests/multiVersion/mixed_version_transactions_during_rollback_via_refetch.js2
-rw-r--r--jstests/noPassthrough/rollback_wt_cache_full.js2
-rw-r--r--jstests/replsets/avg_num_catchup_ops.js9
-rw-r--r--jstests/replsets/change_stream_speculative_majority_rollback.js2
-rw-r--r--jstests/replsets/libs/rollback_test.js142
-rw-r--r--jstests/replsets/rollback_after_disabling_majority_reads.js2
-rw-r--r--jstests/replsets/rollback_after_enabling_majority_reads.js2
-rw-r--r--jstests/replsets/rollback_via_refetch_commit_transaction.js2
-rw-r--r--jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js2
-rw-r--r--jstests/replsets/transactions_after_rollback_via_refetch.js2
12 files changed, 90 insertions, 81 deletions
diff --git a/jstests/multiVersion/downgrade_after_rollback_via_refetch.js b/jstests/multiVersion/downgrade_after_rollback_via_refetch.js
index 83de5b47a0e..e964cdab4f3 100644
--- a/jstests/multiVersion/downgrade_after_rollback_via_refetch.js
+++ b/jstests/multiVersion/downgrade_after_rollback_via_refetch.js
@@ -23,7 +23,7 @@ function testDowngrade(enableMajorityReadConcern) {
let config = replTest.getReplSetConfig();
config.members[2].priority = 0;
config.settings = {chainingAllowed: false};
- replTest.initiate(config);
+ replTest.initiateWithHighElectionTimeout(config);
let rollbackTest = new RollbackTest(name, replTest);
// Set the featureCompatibilityVersion to 4.0, so that we can downgrade the rollback node.
diff --git a/jstests/multiVersion/libs/multiversion_rollback.js b/jstests/multiVersion/libs/multiversion_rollback.js
index 7ae61aaf732..2df9cc03e31 100644
--- a/jstests/multiVersion/libs/multiversion_rollback.js
+++ b/jstests/multiVersion/libs/multiversion_rollback.js
@@ -114,7 +114,7 @@ function setupReplicaSet(testName, rollbackNodeVersion, syncSourceVersion) {
var rst = new ReplSetTest(
{name: testName, nodes: initialNodes, useBridge: true, settings: {chainingAllowed: false}});
rst.startSet();
- rst.initiate();
+ rst.initiateWithHighElectionTimeout();
// Wait for both nodes to be up.
waitForState(rst.nodes[0], ReplSetTest.State.PRIMARY);
diff --git a/jstests/multiVersion/mixed_version_transactions_during_rollback_via_refetch.js b/jstests/multiVersion/mixed_version_transactions_during_rollback_via_refetch.js
index db3bcada64f..78372327d28 100644
--- a/jstests/multiVersion/mixed_version_transactions_during_rollback_via_refetch.js
+++ b/jstests/multiVersion/mixed_version_transactions_during_rollback_via_refetch.js
@@ -23,7 +23,7 @@ const config = rst.getReplSetConfig();
config.settings = {
chainingAllowed: false
};
-rst.initiate(config);
+rst.initiateWithHighElectionTimeout(config);
// A 4.2 binVersion primary with empty data files will set FCV to 4.2 when elected. This will
// cause an IncompatibleServerVersion error when connecting with a 4.0 binVersion node.
// Therefore, we wait until the replica set is initiated with FCV4.0 before switching the
diff --git a/jstests/noPassthrough/rollback_wt_cache_full.js b/jstests/noPassthrough/rollback_wt_cache_full.js
index 6ea271b1dba..c406ee94fb6 100644
--- a/jstests/noPassthrough/rollback_wt_cache_full.js
+++ b/jstests/noPassthrough/rollback_wt_cache_full.js
@@ -32,7 +32,7 @@ config.members[2].priority = 0;
config.settings = {
chainingAllowed: false
};
-rst.initiate(config);
+rst.initiateWithHighElectionTimeout(config);
// Prior to 4.0, rollback imposed a 300 MB limit on the total size of documents to refetch from
// the sync source. Therefore, we select values for numDocs and minDocSizeMB, while accounting
diff --git a/jstests/replsets/avg_num_catchup_ops.js b/jstests/replsets/avg_num_catchup_ops.js
index 4d3feae0d28..cd9828e7630 100644
--- a/jstests/replsets/avg_num_catchup_ops.js
+++ b/jstests/replsets/avg_num_catchup_ops.js
@@ -11,14 +11,11 @@ load("jstests/replsets/libs/election_metrics.js");
load("jstests/replsets/rslib.js");
const name = jsTestName();
-const rst = new ReplSetTest({name: name, nodes: 3, useBridge: true});
+const rst = new ReplSetTest(
+ {name: name, nodes: 3, useBridge: true, settings: {catchUpTimeoutMillis: 4 * 60 * 1000}});
rst.startSet();
-const confSettings = {
- catchUpTimeoutMillis: 4 * 60 * 1000,
-};
-
-rst.initiateWithHighElectionTimeout(confSettings);
+rst.initiateWithHighElectionTimeout();
rst.awaitSecondaryNodes();
rst.awaitReplication();
diff --git a/jstests/replsets/change_stream_speculative_majority_rollback.js b/jstests/replsets/change_stream_speculative_majority_rollback.js
index 06e4fccc51d..70d340ec678 100644
--- a/jstests/replsets/change_stream_speculative_majority_rollback.js
+++ b/jstests/replsets/change_stream_speculative_majority_rollback.js
@@ -27,7 +27,7 @@ const replTest = new ReplSetTest({
replTest.startSet();
let config = replTest.getReplSetConfig();
config.members[2].priority = 0;
-replTest.initiate(config);
+replTest.initiateWithHighElectionTimeout(config);
const rollbackTest = new RollbackTest(name, replTest);
const primary = rollbackTest.getPrimary();
diff --git a/jstests/replsets/libs/rollback_test.js b/jstests/replsets/libs/rollback_test.js
index 7ab83e9fa34..f138448abd9 100644
--- a/jstests/replsets/libs/rollback_test.js
+++ b/jstests/replsets/libs/rollback_test.js
@@ -12,6 +12,28 @@
* 4. kSyncSourceOpsDuringRollback: apply operations on the sync source after rollback has begun.
* 5. kSteadyStateOps: (same as stage 1) with the option of waiting for the rollback to finish.
*
+ * --------------------------------------------------
+ * | STATE TRANSITION | NETWORK TOPOLOGY |
+ * |-------------------------------------------------
+ * | kSteadyStateOps | T |
+ * | | / \ |
+ * | | P1 - S |
+ * |-----------------------------|------------------|
+ * | kRollbackOps | T |
+ * | | / |
+ * | | P1 S |
+ * |-----------------------------|------------------|
+ * | kSyncSourceOpsBeforeRollback| T |
+ * | | \ |
+ * | | P1 P2 |
+ * |-----------------------------|------------------|
+ * | kSyncSourceOpsDuringRollback| T |
+ * | | \ |
+ * | | R - P2 |
+ * |-------------------------------------------------
+ * Note: 'T' refers to tiebreaker node, 'S' refers to secondary, 'P[n]' refers to primary in
+ * nth term and 'R' refers to rollback node.
+ *
* Please refer to the various `transition*` functions for more information on the behavior
* of each stage.
*/
@@ -31,6 +53,8 @@ load("jstests/hooks/validate_collections.js");
* must be configured with priority: 0 so that it won't be elected primary. Throughout
* this file, this secondary will be referred to as the tiebreaker node.
* 2. It must be running with mongobridge.
+ * 3. Must initiate the replset with high election timeout to avoid unplanned elections in the
+ * rollback test.
*
* If the caller does not provide their own replica set, a standard three-node
* replset will be initialized instead, with all nodes running the latest version.
@@ -61,7 +85,6 @@ function RollbackTest(name = "RollbackTest", replSet) {
const SIGTERM = 15;
const kNumDataBearingNodes = 3;
const kElectableNodes = 2;
- const kForeverSecs = 24 * 60 * 60;
let rst;
let curPrimary;
@@ -106,6 +129,12 @@ function RollbackTest(name = "RollbackTest", replSet) {
false,
"Must set up ReplSetTest with chaining disabled.");
+ // Make sure electionTimeoutMillis is set to high value to avoid unplanned elections in
+ // the rollback test.
+ assert.gte(config.settings.electionTimeoutMillis,
+ ReplSetTest.kForeverMillis,
+ "Must initiate the replset with high election timeout");
+
// Make sure the primary is not a priority: 0 node.
assert.neq(0, config.members[0].priority);
assert.eq(config.members[0].host, curPrimary.host);
@@ -146,7 +175,7 @@ function RollbackTest(name = "RollbackTest", replSet) {
let config = replSet.getReplSetConfig();
config.members[2].priority = 0;
config.settings = {chainingAllowed: false};
- replSet.initiate(config);
+ replSet.initiateWithHighElectionTimeout(config);
assert.eq(replSet.nodes.length,
kNumDataBearingNodes,
@@ -200,15 +229,23 @@ function RollbackTest(name = "RollbackTest", replSet) {
}
}
+ function stepUp(conn) {
+ log(`Waiting for the new primary ${conn.host} to be elected`);
+ assert.soonNoExcept(() => {
+ const res = conn.adminCommand({replSetStepUp: 1});
+ return res.ok;
+ });
+
+ // Waits for the primary to accept new writes.
+ return rst.getPrimary();
+ }
+
/**
* Transition from a rollback state to a steady state. Operations applied in this phase will
* be replicated to all nodes and should not be rolled back.
*/
this.transitionToSteadyStateOperations = function({skipDataConsistencyChecks = false} = {}) {
- // If we shut down the primary before the secondary begins rolling back against it, then
- // the secondary may get elected and not actually roll back. In that case we do not check
- // the RBID and just await replication.
- if (!TestData.rollbackShutdowns) {
+ if (this.isMajorityReadConcernEnabledOnRollbackNode) {
log(`Waiting for rollback to complete on ${curSecondary.host}`, true);
let rbid = -1;
assert.soon(() => {
@@ -225,6 +262,8 @@ function RollbackTest(name = "RollbackTest", replSet) {
return rbid === lastRBID + 1;
}, "Timed out waiting for RBID to increment on " + curSecondary.host);
} else {
+ // TODO: After fixing SERVER-45178, we can remove the else block as we are guaranteed
+ // that the rollback id will get updated if the rollback has happened on that node.
log(`Skipping RBID check on ${curSecondary.host} because shutdowns ` +
`may prevent a rollback here.`);
}
@@ -241,9 +280,6 @@ function RollbackTest(name = "RollbackTest", replSet) {
log(`Rollback on ${curSecondary.host} (if needed) and awaitReplication completed`, true);
- // Unfreeze the node if it was previously frozen, so that it can run for the election.
- assert.commandWorked(curSecondary.adminCommand({replSetFreeze: 0}));
-
// We call transition to steady state ops after awaiting replication has finished,
// otherwise it could be confusing to see operations being replicated when we're already
// in rollback complete state.
@@ -276,6 +312,12 @@ function RollbackTest(name = "RollbackTest", replSet) {
rst.awaitSecondaryNodes();
rst.awaitReplication(null, null, [curSecondary]);
+ // The current primary will be the node that rolls back. Check if it supports majority reads
+ // here while we are in a steady state.
+ this.isMajorityReadConcernEnabledOnRollbackNode =
+ assert.commandWorked(curPrimary.adminCommand({serverStatus: 1}))
+ .storageEngine.supportsCommittedReads;
+
transitionIfAllowed(State.kRollbackOps);
// Disconnect the secondary from the tiebreaker node before we disconnect the secondary from
@@ -329,13 +371,7 @@ function RollbackTest(name = "RollbackTest", replSet) {
elected`);
curSecondary.reconnect([tiebreakerNode]);
- log(`Waiting for the new primary ${curSecondary.host} to be elected`);
- assert.soonNoExcept(() => {
- const res = curSecondary.adminCommand({replSetStepUp: 1});
- return res.ok;
- });
-
- const newPrimary = rst.getPrimary();
+ const newPrimary = stepUp(curSecondary);
// As a sanity check, ensure the new primary is the old secondary. The opposite scenario
// should never be possible with 2 electable nodes and the sequence of operations thus far.
@@ -347,6 +383,16 @@ function RollbackTest(name = "RollbackTest", replSet) {
curSecondary = curPrimary;
curPrimary = newPrimary;
+ // To ensure rollback won't be skipped for shutdowns, wait till the no-op oplog
+ // entry ("new primary") written in the new term gets persisted in the disk.
+ // Note: rollbackShutdowns are not allowed for in-memory/ephemeral storage engines.
+ if (TestData.rollbackShutdowns) {
+ const dbName = "TermGetsPersisted";
+ assert.commandWorked(curPrimary.getDB(dbName).ensureRollback.insert(
+ {thisDocument: 'is inserted to ensure rollback is not skipped'},
+ {writeConcern: {w: 1, j: true}}));
+ }
+
lastRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
// The current primary, which is the old secondary, will later become the sync source.
@@ -365,18 +411,6 @@ function RollbackTest(name = "RollbackTest", replSet) {
this.transitionToSyncSourceOperationsDuringRollback = function() {
transitionIfAllowed(State.kSyncSourceOpsDuringRollback);
- // If the rollback node was restarted, make sure it has finished restarting and become a
- // secondary again. Otherwise, the subsequent 'replSetFreeze' command could fail with
- // NotYetInitialized if the node is still in the process of restarting (e.g. not yet loaded
- // the local config or reached the STARTUP2 state).
- waitForState(curSecondary, ReplSetTest.State.SECONDARY);
-
- // If the nodes are restarted after the rollback node is able to rollback successfully and
- // catch up to curPrimary's oplog, then the rollback node can become the new primary.
- // If so, it can lead to unplanned state transitions, like unconditional step down, during
- // the test. To avoid those problems, prevent rollback node from starting an election.
- assert.commandWorked(curSecondary.adminCommand({replSetFreeze: kForeverSecs}));
-
log(`Reconnecting the secondary ${curSecondary.host} so it'll go into rollback`);
// Reconnect the rollback node to the current primary, which is the node we want to sync
// from. If we reconnect to both the current primary and the tiebreaker node, the rollback
@@ -441,43 +475,21 @@ function RollbackTest(name = "RollbackTest", replSet) {
log(`Restarting node ${hostName}`);
rst.start(nodeId, startOptions, true /* restart */);
- // Freeze the node if the restarted node is the rollback node.
- if (curState === State.kSyncSourceOpsDuringRollback &&
- rst.getNodeId(curSecondary) === nodeId) {
- assert.soon(() => {
- try {
- // Try stepping down the rollback node if it became the primary after its
- // restart, as it might have caught up with the original primary and facing
- // arbitrary machine/network slowness.
- curSecondary.adminCommand({"replSetStepDown": kForeverSecs, "force": true});
- // Prevent rollback node from running election. There is a chance that this
- // node might have started running election or became primary after
- // 'replSetStepDown' cmd, so 'replSetFreeze' cmd can fail.
- assert.commandWorked(
- curSecondary.adminCommand({"replSetFreeze": kForeverSecs}));
- return true;
- } catch (e) {
- // Network error can happen if the node simultaneously tries to transition to
- // ROLLBACK state.
- if (isNetworkError(e) || e.code === ErrorCodes.NotSecondary ||
- e.code === ErrorCodes.NotYetInitialized) {
- log('Failed to freeze the node.' + tojson(e));
- return false;
- }
-
- throw e;
- }
- }, `Failed to run replSetFreeze cmd on ${curSecondary.host}`);
- }
-
- const oldPrimary = curPrimary;
- // Wait for the new primary to be elected and ready to take operations before continuing.
- curPrimary = rst.getPrimary();
-
- // The primary can change after node restarts only if all the 3 nodes are connected to each
- // other.
- if (curState !== State.kSteadyStateOps) {
- assert.eq(curPrimary, oldPrimary);
+ // Step up if the restarted node is the current primary.
+ if (rst.getNodeId(curPrimary) === nodeId) {
+ // To prevent below step up from being flaky, we step down and freeze the
+ // current secondary to prevent starting a new election. The current secondary
+ // can start running election due to explicit step up by the shutting down of current
+ // primary if the server parameter "enableElectionHandoff" is set to true.
+ rst.freeze(curSecondary);
+
+ const newPrimary = stepUp(curPrimary);
+ // As a sanity check, ensure the new primary is the current primary. This is true,
+ // because we have configured the replica set with high electionTimeoutMillis.
+ assert.eq(newPrimary, curPrimary, "Did not elect the same node as primary");
+
+ // Unfreeze the current secondary so that it can step up again.
+ assert.commandWorked(curSecondary.adminCommand({replSetFreeze: 0}));
}
curSecondary = rst.getSecondary();
diff --git a/jstests/replsets/rollback_after_disabling_majority_reads.js b/jstests/replsets/rollback_after_disabling_majority_reads.js
index e8b2eeeebba..f1154068ee5 100644
--- a/jstests/replsets/rollback_after_disabling_majority_reads.js
+++ b/jstests/replsets/rollback_after_disabling_majority_reads.js
@@ -21,7 +21,7 @@ config.members[2].priority = 0;
config.settings = {
chainingAllowed: false
};
-replTest.initiate(config);
+replTest.initiateWithHighElectionTimeout(config);
const rollbackTest = new RollbackTest(name, replTest);
const rollbackNode = rollbackTest.transitionToRollbackOperations();
diff --git a/jstests/replsets/rollback_after_enabling_majority_reads.js b/jstests/replsets/rollback_after_enabling_majority_reads.js
index 6090d7967da..e13c0a3d5e4 100644
--- a/jstests/replsets/rollback_after_enabling_majority_reads.js
+++ b/jstests/replsets/rollback_after_enabling_majority_reads.js
@@ -27,7 +27,7 @@ config.members[2].priority = 0;
config.settings = {
chainingAllowed: false
};
-replTest.initiate(config);
+replTest.initiateWithHighElectionTimeout(config);
let rollbackTest = new RollbackTest(name, replTest);
jsTest.log("Ensure the stable timestamp is ahead of the common point on the rollback node.");
diff --git a/jstests/replsets/rollback_via_refetch_commit_transaction.js b/jstests/replsets/rollback_via_refetch_commit_transaction.js
index 317fc7b97f8..35e21b25bd1 100644
--- a/jstests/replsets/rollback_via_refetch_commit_transaction.js
+++ b/jstests/replsets/rollback_via_refetch_commit_transaction.js
@@ -31,7 +31,7 @@ config.members[2].priority = 0;
config.settings = {
chainingAllowed: false
};
-rst.initiate(config);
+rst.initiateWithHighElectionTimeout(config);
const primaryNode = rst.getPrimary();
diff --git a/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js b/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js
index 7cf47857d2a..3c8bd73da51 100644
--- a/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js
+++ b/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js
@@ -27,7 +27,7 @@ config.members[2].priority = 0;
config.settings = {
chainingAllowed: false
};
-rst.initiate(config);
+rst.initiateWithHighElectionTimeout(config);
const rollbackTest = new RollbackTest(collName, rst);
diff --git a/jstests/replsets/transactions_after_rollback_via_refetch.js b/jstests/replsets/transactions_after_rollback_via_refetch.js
index 463d70f4489..80ef4a8ded9 100644
--- a/jstests/replsets/transactions_after_rollback_via_refetch.js
+++ b/jstests/replsets/transactions_after_rollback_via_refetch.js
@@ -61,7 +61,7 @@ let replTest = new ReplSetTest({
replTest.startSet();
let config = replTest.getReplSetConfig();
config.members[2].priority = 0;
-replTest.initiate(config);
+replTest.initiateWithHighElectionTimeout(config);
let rollbackTest = new RollbackTest(name, replTest);