summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/multiVersion/genericSetFCVUsage/downgrade_after_rollback_via_refetch.js2
-rw-r--r--jstests/multiVersion/libs/multiversion_rollback.js2
-rw-r--r--jstests/noPassthrough/rollback_wt_cache_full.js2
-rw-r--r--jstests/replsets/avg_num_catchup_ops.js9
-rw-r--r--jstests/replsets/change_stream_speculative_majority_rollback.js2
-rw-r--r--jstests/replsets/libs/rollback_test.js115
-rw-r--r--jstests/replsets/rollback_after_disabling_majority_reads.js2
-rw-r--r--jstests/replsets/rollback_after_enabling_majority_reads.js2
-rw-r--r--jstests/replsets/rollback_via_refetch_commit_transaction.js2
-rw-r--r--jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js2
-rw-r--r--jstests/replsets/transactions_after_rollback_via_refetch.js2
-rw-r--r--jstests/replsets/unrecoverable_rollback_early_exit.js2
-rw-r--r--src/mongo/shell/replsettest.js10
13 files changed, 93 insertions, 61 deletions
diff --git a/jstests/multiVersion/genericSetFCVUsage/downgrade_after_rollback_via_refetch.js b/jstests/multiVersion/genericSetFCVUsage/downgrade_after_rollback_via_refetch.js
index d3ffe4a3b48..9f48b3a82a0 100644
--- a/jstests/multiVersion/genericSetFCVUsage/downgrade_after_rollback_via_refetch.js
+++ b/jstests/multiVersion/genericSetFCVUsage/downgrade_after_rollback_via_refetch.js
@@ -23,7 +23,7 @@ function testDowngrade(enableMajorityReadConcern) {
let config = replTest.getReplSetConfig();
config.members[2].priority = 0;
config.settings = {chainingAllowed: false};
- replTest.initiate(config);
+ replTest.initiateWithHighElectionTimeout(config);
let rollbackTest = new RollbackTest(name, replTest);
// Set the featureCompatibilityVersion to the last-stable version, so that we can downgrade
diff --git a/jstests/multiVersion/libs/multiversion_rollback.js b/jstests/multiVersion/libs/multiversion_rollback.js
index 66db2114ad9..9263052a956 100644
--- a/jstests/multiVersion/libs/multiversion_rollback.js
+++ b/jstests/multiVersion/libs/multiversion_rollback.js
@@ -114,7 +114,7 @@ function setupReplicaSet(testName, rollbackNodeVersion, syncSourceVersion) {
var rst = new ReplSetTest(
{name: testName, nodes: initialNodes, useBridge: true, settings: {chainingAllowed: false}});
rst.startSet();
- rst.initiate();
+ rst.initiateWithHighElectionTimeout();
// Wait for both nodes to be up.
waitForState(rst.nodes[0], ReplSetTest.State.PRIMARY);
diff --git a/jstests/noPassthrough/rollback_wt_cache_full.js b/jstests/noPassthrough/rollback_wt_cache_full.js
index f7733a0110b..c2c77bad07f 100644
--- a/jstests/noPassthrough/rollback_wt_cache_full.js
+++ b/jstests/noPassthrough/rollback_wt_cache_full.js
@@ -32,7 +32,7 @@ config.members[2].priority = 0;
config.settings = {
chainingAllowed: false
};
-rst.initiate(config);
+rst.initiateWithHighElectionTimeout(config);
// Prior to 4.0, rollback imposed a 300 MB limit on the total size of documents to refetch from
// the sync source. Therefore, we select values for numDocs and minDocSizeMB, while accounting
diff --git a/jstests/replsets/avg_num_catchup_ops.js b/jstests/replsets/avg_num_catchup_ops.js
index 3b194e700ab..5a3bd488722 100644
--- a/jstests/replsets/avg_num_catchup_ops.js
+++ b/jstests/replsets/avg_num_catchup_ops.js
@@ -10,14 +10,11 @@ load("jstests/replsets/libs/election_metrics.js");
load("jstests/replsets/rslib.js");
const name = jsTestName();
-const rst = new ReplSetTest({name: name, nodes: 3, useBridge: true});
+const rst = new ReplSetTest(
+ {name: name, nodes: 3, useBridge: true, settings: {catchUpTimeoutMillis: 4 * 60 * 1000}});
rst.startSet();
-const confSettings = {
- catchUpTimeoutMillis: 4 * 60 * 1000,
-};
-
-rst.initiateWithHighElectionTimeout(confSettings);
+rst.initiateWithHighElectionTimeout();
rst.awaitSecondaryNodes();
rst.awaitReplication();
diff --git a/jstests/replsets/change_stream_speculative_majority_rollback.js b/jstests/replsets/change_stream_speculative_majority_rollback.js
index 3fad115cadd..e53b65ade88 100644
--- a/jstests/replsets/change_stream_speculative_majority_rollback.js
+++ b/jstests/replsets/change_stream_speculative_majority_rollback.js
@@ -27,7 +27,7 @@ const replTest = new ReplSetTest({
replTest.startSet();
let config = replTest.getReplSetConfig();
config.members[2].priority = 0;
-replTest.initiate(config);
+replTest.initiateWithHighElectionTimeout(config);
const rollbackTest = new RollbackTest(name, replTest);
const primary = rollbackTest.getPrimary();
diff --git a/jstests/replsets/libs/rollback_test.js b/jstests/replsets/libs/rollback_test.js
index ba76c4885f8..eb37aa4f449 100644
--- a/jstests/replsets/libs/rollback_test.js
+++ b/jstests/replsets/libs/rollback_test.js
@@ -12,6 +12,28 @@
* 4. kSyncSourceOpsDuringRollback: apply operations on the sync source after rollback has begun.
* 5. kSteadyStateOps: (same as stage 1) with the option of waiting for the rollback to finish.
*
+ * --------------------------------------------------
+ * | STATE TRANSITION | NETWORK TOPOLOGY |
+ * |-------------------------------------------------
+ * | kSteadyStateOps | T |
+ * | | / \ |
+ * | | P1 - S |
+ * |-----------------------------|------------------|
+ * | kRollbackOps | T |
+ * | | / |
+ * | | P1 S |
+ * |-----------------------------|------------------|
+ * | kSyncSourceOpsBeforeRollback| T |
+ * | | \ |
+ * | | P1 P2 |
+ * |-----------------------------|------------------|
+ * | kSyncSourceOpsDuringRollback| T |
+ * | | \ |
+ * | | R - P2 |
+ * |-------------------------------------------------
+ * Note: 'T' refers to tiebreaker node, 'S' refers to secondary, 'P[n]' refers to primary in
+ * nth term and 'R' refers to rollback node.
+ *
* Please refer to the various `transition*` functions for more information on the behavior
* of each stage.
*/
@@ -31,6 +53,8 @@ load("jstests/hooks/validate_collections.js");
* must be configured with priority: 0 so that it won't be elected primary. Throughout
* this file, this secondary will be referred to as the tiebreaker node.
* 2. It must be running with mongobridge.
+ * 3. Must initiate the replset with high election timeout to avoid unplanned elections in the
+ * rollback test.
*
* If the caller does not provide their own replica set, a standard three-node
* replset will be initialized instead, with all nodes running the latest version.
@@ -112,6 +136,12 @@ function RollbackTest(name = "RollbackTest", replSet) {
false,
"Must set up ReplSetTest with chaining disabled.");
+ // Make sure electionTimeoutMillis is set to high value to avoid unplanned elections in
+ // the rollback test.
+ assert.gte(config.settings.electionTimeoutMillis,
+ ReplSetTest.kForeverMillis,
+ "Must initiate the replset with high election timeout");
+
// Make sure the primary is not a priority: 0 node.
assert.neq(0, config.members[0].priority);
assert.eq(config.members[0].host, curPrimary.host);
@@ -160,7 +190,7 @@ function RollbackTest(name = "RollbackTest", replSet) {
let config = replSet.getReplSetConfig();
config.members[2].priority = 0;
config.settings = {chainingAllowed: false};
- replSet.initiate(config);
+ replSet.initiateWithHighElectionTimeout(config);
assert.eq(replSet.nodes.length,
kNumDataBearingNodes,
@@ -214,6 +244,17 @@ function RollbackTest(name = "RollbackTest", replSet) {
}
}
+ function stepUp(conn) {
+ log(`Waiting for the new primary ${conn.host} to be elected`);
+ assert.soonNoExcept(() => {
+ const res = conn.adminCommand({replSetStepUp: 1});
+ return res.ok;
+ });
+
+ // Waits for the primary to accept new writes.
+ return rst.getPrimary();
+ }
+
/**
* Add a node to the ReplSetTest. It must be a non-voting node. If reInitiate is true,
* also run ReplSetTest.reInitiate to configure the replset to include the new node.
@@ -232,10 +273,10 @@ function RollbackTest(name = "RollbackTest", replSet) {
* be replicated to all nodes and should not be rolled back.
*/
this.transitionToSteadyStateOperations = function({skipDataConsistencyChecks = false} = {}) {
- // If we shut down the primary before the secondary begins rolling back against it, then
- // the secondary may get elected and not actually roll back. In that case we do not check
- // the RBID and just await replication.
- if (!TestData.rollbackShutdowns) {
+ const isMajorityReadConcernEnabledOnRollbackNode =
+ assert.commandWorked(curSecondary.adminCommand({serverStatus: 1}))
+ .storageEngine.supportsCommittedReads;
+ if (isMajorityReadConcernEnabledOnRollbackNode) {
log(`Waiting for rollback to complete on ${curSecondary.host}`, true);
let rbid = -1;
assert.soon(() => {
@@ -252,6 +293,8 @@ function RollbackTest(name = "RollbackTest", replSet) {
return rbid === lastRBID + 1;
}, "Timed out waiting for RBID to increment on " + curSecondary.host);
} else {
+ // TODO: After fixing SERVER-45178, we can remove the else block as we are guaranteed
+ // that the rollback id will get updated if the rollback has happened on that node.
log(`Skipping RBID check on ${curSecondary.host} because shutdowns ` +
`may prevent a rollback here.`);
}
@@ -287,9 +330,6 @@ function RollbackTest(name = "RollbackTest", replSet) {
log(`Rollback on ${curSecondary.host} (if needed) and awaitReplication completed`, true);
- // Unfreeze the node if it was previously frozen, so that it can run for the election.
- assert.commandWorked(curSecondary.adminCommand({replSetFreeze: 0}));
-
// We call transition to steady state ops after awaiting replication has finished,
// otherwise it could be confusing to see operations being replicated when we're already
// in rollback complete state.
@@ -375,13 +415,7 @@ function RollbackTest(name = "RollbackTest", replSet) {
elected`);
curSecondary.reconnect([tiebreakerNode]);
- log(`Waiting for the new primary ${curSecondary.host} to be elected`);
- assert.soonNoExcept(() => {
- const res = curSecondary.adminCommand({replSetStepUp: 1});
- return res.ok;
- });
-
- const newPrimary = rst.getPrimary();
+ const newPrimary = stepUp(curSecondary);
// As a sanity check, ensure the new primary is the old secondary. The opposite scenario
// should never be possible with 2 electable nodes and the sequence of operations thus far.
@@ -393,6 +427,16 @@ function RollbackTest(name = "RollbackTest", replSet) {
curSecondary = curPrimary;
curPrimary = newPrimary;
+ // To ensure rollback won't be skipped for shutdowns, wait till the no-op oplog
+ // entry ("new primary") written in the new term gets persisted in the disk.
+ // Note: rollbackShutdowns are not allowed for in-memory/ephemeral storage engines.
+ if (TestData.rollbackShutdowns) {
+ const dbName = "TermGetsPersisted";
+ assert.commandWorked(curPrimary.getDB(dbName).ensureRollback.insert(
+ {thisDocument: 'is inserted to ensure rollback is not skipped'},
+ {writeConcern: {w: 1, j: true}}));
+ }
+
lastRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
// The current primary, which is the old secondary, will later become the sync source.
@@ -411,18 +455,6 @@ function RollbackTest(name = "RollbackTest", replSet) {
this.transitionToSyncSourceOperationsDuringRollback = function() {
transitionIfAllowed(State.kSyncSourceOpsDuringRollback);
- // If the rollback node was restarted, make sure it has finished restarting and become a
- // secondary again. Otherwise, the subsequent 'replSetFreeze' command could fail with
- // NotYetInitialized if the node is still in the process of restarting (e.g. not yet loaded
- // the local config or reached the STARTUP2 state).
- waitForState(curSecondary, ReplSetTest.State.SECONDARY);
-
- // If the nodes are restarted after the rollback node is able to rollback successfully and
- // catch up to curPrimary's oplog, then the rollback node can become the new primary.
- // If so, it can lead to unplanned state transitions, like unconditional step down, during
- // the test. To avoid those problems, prevent rollback node from starting an election.
- assert.commandWorked(curSecondary.adminCommand({replSetFreeze: ReplSetTest.kForeverSecs}));
-
log(`Reconnecting the secondary ${curSecondary.host} so it'll go into rollback`);
// Reconnect the rollback node to the current primary, which is the node we want to sync
// from. If we reconnect to both the current primary and the tiebreaker node, the rollback
@@ -487,20 +519,21 @@ function RollbackTest(name = "RollbackTest", replSet) {
log(`Restarting node ${hostName}`);
rst.start(nodeId, startOptions, true /* restart */);
- // Freeze the node if the restarted node is the rollback node.
- if (curState === State.kSyncSourceOpsDuringRollback &&
- rst.getNodeId(curSecondary) === nodeId) {
- rst.freeze(nodeId);
- }
-
- const oldPrimary = curPrimary;
- // Wait for the new primary to be elected and ready to take operations before continuing.
- curPrimary = rst.getPrimary();
-
- // The primary can change after node restarts only if all the 3 nodes are connected to each
- // other.
- if (curState !== State.kSteadyStateOps) {
- assert.eq(curPrimary, oldPrimary);
+ // Step up if the restarted node is the current primary.
+ if (rst.getNodeId(curPrimary) === nodeId) {
+ // To prevent below step up from being flaky, we step down and freeze the
+ // current secondary to prevent starting a new election. The current secondary
+ // can start running election due to explicit step up by the shutting down of current
+ // primary if the server parameter "enableElectionHandoff" is set to true.
+ rst.freeze(curSecondary);
+
+ const newPrimary = stepUp(curPrimary);
+ // As a sanity check, ensure the new primary is the current primary. This is true,
+ // because we have configured the replica set with high electionTimeoutMillis.
+ assert.eq(newPrimary, curPrimary, "Did not elect the same node as primary");
+
+ // Unfreeze the current secondary so that it can step up again.
+ assert.commandWorked(curSecondary.adminCommand({replSetFreeze: 0}));
}
curSecondary = rst.getSecondary();
diff --git a/jstests/replsets/rollback_after_disabling_majority_reads.js b/jstests/replsets/rollback_after_disabling_majority_reads.js
index e8b2eeeebba..f1154068ee5 100644
--- a/jstests/replsets/rollback_after_disabling_majority_reads.js
+++ b/jstests/replsets/rollback_after_disabling_majority_reads.js
@@ -21,7 +21,7 @@ config.members[2].priority = 0;
config.settings = {
chainingAllowed: false
};
-replTest.initiate(config);
+replTest.initiateWithHighElectionTimeout(config);
const rollbackTest = new RollbackTest(name, replTest);
const rollbackNode = rollbackTest.transitionToRollbackOperations();
diff --git a/jstests/replsets/rollback_after_enabling_majority_reads.js b/jstests/replsets/rollback_after_enabling_majority_reads.js
index 85093baa51f..f7c346aca9e 100644
--- a/jstests/replsets/rollback_after_enabling_majority_reads.js
+++ b/jstests/replsets/rollback_after_enabling_majority_reads.js
@@ -27,7 +27,7 @@ config.members[2].priority = 0;
config.settings = {
chainingAllowed: false
};
-replTest.initiate(config);
+replTest.initiateWithHighElectionTimeout(config);
let rollbackTest = new RollbackTest(name, replTest);
jsTest.log("Ensure the stable timestamp is ahead of the common point on the rollback node.");
diff --git a/jstests/replsets/rollback_via_refetch_commit_transaction.js b/jstests/replsets/rollback_via_refetch_commit_transaction.js
index 317fc7b97f8..35e21b25bd1 100644
--- a/jstests/replsets/rollback_via_refetch_commit_transaction.js
+++ b/jstests/replsets/rollback_via_refetch_commit_transaction.js
@@ -31,7 +31,7 @@ config.members[2].priority = 0;
config.settings = {
chainingAllowed: false
};
-rst.initiate(config);
+rst.initiateWithHighElectionTimeout(config);
const primaryNode = rst.getPrimary();
diff --git a/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js b/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js
index acb8bea802b..bd229838a62 100644
--- a/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js
+++ b/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js
@@ -26,7 +26,7 @@ config.members[2].priority = 0;
config.settings = {
chainingAllowed: false
};
-rst.initiate(config);
+rst.initiateWithHighElectionTimeout(config);
const rollbackTest = new RollbackTest(collName, rst);
diff --git a/jstests/replsets/transactions_after_rollback_via_refetch.js b/jstests/replsets/transactions_after_rollback_via_refetch.js
index 463d70f4489..80ef4a8ded9 100644
--- a/jstests/replsets/transactions_after_rollback_via_refetch.js
+++ b/jstests/replsets/transactions_after_rollback_via_refetch.js
@@ -61,7 +61,7 @@ let replTest = new ReplSetTest({
replTest.startSet();
let config = replTest.getReplSetConfig();
config.members[2].priority = 0;
-replTest.initiate(config);
+replTest.initiateWithHighElectionTimeout(config);
let rollbackTest = new RollbackTest(name, replTest);
diff --git a/jstests/replsets/unrecoverable_rollback_early_exit.js b/jstests/replsets/unrecoverable_rollback_early_exit.js
index 96428a719a7..c709820eaa6 100644
--- a/jstests/replsets/unrecoverable_rollback_early_exit.js
+++ b/jstests/replsets/unrecoverable_rollback_early_exit.js
@@ -30,7 +30,7 @@ const rst = new ReplSetTest({
nodeOptions: {enableMajorityReadConcern: "false"}
});
rst.startSet();
-rst.initiate();
+rst.initiateWithHighElectionTimeout();
const rollbackTest = new RollbackTest(testName, rst);
const rollbackNode = rollbackTest.transitionToRollbackOperations();
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index 1b3b9df19eb..ff2e713a695 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -1386,10 +1386,11 @@ var ReplSetTest = function(opts) {
* Modifies the election timeout to be 24 hours so that no unplanned elections happen. Then
* runs replSetInitiate on the replica set with the new config.
*/
- this.initiateWithHighElectionTimeout = function(opts = {}) {
- let cfg = this.getReplSetConfig();
- cfg.settings = Object.assign(opts, {"electionTimeoutMillis": 24 * 60 * 60 * 1000});
- this.initiate(cfg);
+ this.initiateWithHighElectionTimeout = function(config) {
+ config = config || this.getReplSetConfig();
+ config.settings = config.settings || {};
+ config.settings["electionTimeoutMillis"] = ReplSetTest.kForeverMillis;
+ this.initiate(config);
};
/**
@@ -3197,6 +3198,7 @@ ReplSetTest.kDefaultTimeoutMS = 10 * 60 * 1000;
* Global default number that's effectively infinite.
*/
ReplSetTest.kForeverSecs = 24 * 60 * 60;
+ReplSetTest.kForeverMillis = ReplSetTest.kForeverSecs * 1000;
/**
* Set of states that the replica set can be in. Used for the wait functions.