summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--etc/backports_required_for_multiversion_tests.yml2
-rw-r--r--jstests/replsets/libs/rollback_test.js20
-rw-r--r--jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js66
-rw-r--r--src/mongo/db/repl/rs_rollback.cpp9
4 files changed, 76 insertions, 21 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 2fb4145c37c..c38b750f49e 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -53,6 +53,8 @@ replica_sets_multiversion:
test_file: jstests/replsets/rollback_dup_ids_clean_shutdown_during_rollback.js
- ticket: SERVER-45906
test_file: jstests/replsets/trigger_initial_stable_checkpoint.js
+- ticket: SERVER-45178
+ test_file: jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js
sharding_multiversion:
- ticket: SERVER-38691
diff --git a/jstests/replsets/libs/rollback_test.js b/jstests/replsets/libs/rollback_test.js
index 80a73fee1d4..6d0f87d4aeb 100644
--- a/jstests/replsets/libs/rollback_test.js
+++ b/jstests/replsets/libs/rollback_test.js
@@ -338,17 +338,9 @@ function RollbackTest(name = "RollbackTest", replSet) {
throw e;
}
- if (this.isMajorityReadConcernEnabledOnRollbackNode) {
- let rbid = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
- assert(rbid > lastRBID,
- `Expected RBID to increment past ${lastRBID} on ${curSecondary.host}`);
- } else {
- // TODO: After fixing SERVER-45178, we can remove the else block as we are
- // guaranteed that the rollback id will get updated if the rollback has happened on
- // that node.
- log(`Skipping RBID check on ${curSecondary.host} because shutdowns ` +
- `may prevent a rollback here.`);
- }
+ let rbid = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
+ assert(rbid > lastRBID,
+ `Expected RBID to increment past ${lastRBID} on ${curSecondary.host}`);
assert.eq(oplogTop(curPrimary), oplogTop(curSecondary));
@@ -396,12 +388,6 @@ function RollbackTest(name = "RollbackTest", replSet) {
rst.awaitSecondaryNodes(null, [curSecondary, tiebreakerNode]);
rst.awaitReplication(null, null, [curSecondary]);
- // The current primary will be the node that rolls back. Check if it supports majority reads
- // here while we are in a steady state.
- this.isMajorityReadConcernEnabledOnRollbackNode =
- assert.commandWorked(curPrimary.adminCommand({serverStatus: 1}))
- .storageEngine.supportsCommittedReads;
-
transitionIfAllowed(State.kRollbackOps);
// Disconnect the secondary from the tiebreaker node before we disconnect the secondary from
diff --git a/jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js b/jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js
new file mode 100644
index 00000000000..0effc3cc1f5
--- /dev/null
+++ b/jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js
@@ -0,0 +1,66 @@
+/**
+ * This test demonstrates that rollback via refetch always increments the rollback id as soon as it
+ * resolves the common point and before proceeding with other operations.
+ *
+ * This is a regression test that makes sure we avoid the scenario where we truncate our oplog (at
+ * which point the rollback is effectively finished), then shut down uncleanly before we get a
+ * chance to update the rollbackId.
+ *
+ * @tags: [requires_journaling]
+ */
+
+(function() {
+"use strict";
+load("jstests/libs/fail_point_util.js");
+load("jstests/replsets/libs/rollback_test.js");
+load("jstests/replsets/rslib.js");
+
+const name = jsTestName();
+TestData.allowUncleanShutdowns = true;
+
+jsTest.log("Set up a RollbackTest with enableMajorityReadConcern=false");
+const rst = new ReplSetTest({
+ name,
+ nodes: [{}, {}, {rsConfig: {priority: 0}}],
+ useBridge: true,
+ nodeOptions: {enableMajorityReadConcern: "false"},
+ settings: {chainingAllowed: false}
+});
+
+rst.startSet();
+rst.initiateWithHighElectionTimeout();
+
+const rollbackTest = new RollbackTest(name, rst);
+const rollbackNode = rollbackTest.transitionToRollbackOperations();
+
+const baseRBID = assert.commandWorked(rollbackNode.adminCommand("replSetGetRBID")).rbid;
+
+rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
+
+jsTestLog("Make rollback-via-refetch exit early after truncating the oplog");
+assert.commandWorked(rollbackNode.adminCommand(
+ {configureFailPoint: "rollbackExitEarlyAfterCollectionDrop", mode: "alwaysOn"}));
+
+rollbackTest.transitionToSyncSourceOperationsDuringRollback();
+
+jsTestLog("Wait until we hit the failpoint");
+assert.soonNoExcept(function() {
+ assert.commandWorked(rollbackNode.adminCommand({
+ waitForFailPoint: "rollbackExitEarlyAfterCollectionDrop",
+ timesEntered: 1,
+ maxTimeMS: kDefaultWaitForFailPointTimeout
+ }));
+ return true;
+}, "failed to reconnect for waitForFailPoint");
+
+// Check that the RBID has still managed to advance.
+// Looking at the RBID directly is our first line of defense.
+assert.eq(baseRBID + 1, assert.commandWorked(rollbackNode.adminCommand("replSetGetRBID")).rbid);
+
+assert.commandWorked(rollbackNode.adminCommand(
+ {configureFailPoint: "rollbackExitEarlyAfterCollectionDrop", mode: "off"}));
+
+// Verify that the node can rejoin the set as normal.
+rollbackTest.transitionToSteadyStateOperations();
+rollbackTest.stop();
+}()); \ No newline at end of file
diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp
index aad64b6db3c..f11d195259d 100644
--- a/src/mongo/db/repl/rs_rollback.cpp
+++ b/src/mongo/db/repl/rs_rollback.cpp
@@ -1244,10 +1244,11 @@ Status _syncRollback(OperationContext* opCtx,
invariant(commonPoint >= committedSnapshot);
try {
- ON_BLOCK_EXIT([&] {
- auto status = replicationProcess->incrementRollbackID(opCtx);
- fassert(40497, status);
- });
+ // It is always safe to increment the rollback ID first, even if we fail to complete
+ // the rollback.
+ auto status = replicationProcess->incrementRollbackID(opCtx);
+ fassert(40497, status);
+
syncFixUp(opCtx, how, rollbackSource, replCoord, replicationProcess);
if (MONGO_unlikely(rollbackExitEarlyAfterCollectionDrop.shouldFail())) {