diff options
4 files changed, 76 insertions, 21 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 2fb4145c37c..c38b750f49e 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -53,6 +53,8 @@ replica_sets_multiversion: test_file: jstests/replsets/rollback_dup_ids_clean_shutdown_during_rollback.js - ticket: SERVER-45906 test_file: jstests/replsets/trigger_initial_stable_checkpoint.js +- ticket: SERVER-45178 + test_file: jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js sharding_multiversion: - ticket: SERVER-38691 diff --git a/jstests/replsets/libs/rollback_test.js b/jstests/replsets/libs/rollback_test.js index 80a73fee1d4..6d0f87d4aeb 100644 --- a/jstests/replsets/libs/rollback_test.js +++ b/jstests/replsets/libs/rollback_test.js @@ -338,17 +338,9 @@ function RollbackTest(name = "RollbackTest", replSet) { throw e; } - if (this.isMajorityReadConcernEnabledOnRollbackNode) { - let rbid = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid; - assert(rbid > lastRBID, - `Expected RBID to increment past ${lastRBID} on ${curSecondary.host}`); - } else { - // TODO: After fixing SERVER-45178, we can remove the else block as we are - // guaranteed that the rollback id will get updated if the rollback has happened on - // that node. - log(`Skipping RBID check on ${curSecondary.host} because shutdowns ` + - `may prevent a rollback here.`); - } + let rbid = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid; + assert(rbid > lastRBID, + `Expected RBID to increment past ${lastRBID} on ${curSecondary.host}`); assert.eq(oplogTop(curPrimary), oplogTop(curSecondary)); @@ -396,12 +388,6 @@ function RollbackTest(name = "RollbackTest", replSet) { rst.awaitSecondaryNodes(null, [curSecondary, tiebreakerNode]); rst.awaitReplication(null, null, [curSecondary]); - // The current primary will be the node that rolls back. Check if it supports majority reads - // here while we are in a steady state. - this.isMajorityReadConcernEnabledOnRollbackNode = - assert.commandWorked(curPrimary.adminCommand({serverStatus: 1})) - .storageEngine.supportsCommittedReads; - transitionIfAllowed(State.kRollbackOps); // Disconnect the secondary from the tiebreaker node before we disconnect the secondary from diff --git a/jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js b/jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js new file mode 100644 index 00000000000..0effc3cc1f5 --- /dev/null +++ b/jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js @@ -0,0 +1,66 @@ +/** + * This test demonstrates that rollback via refetch always increments the rollback id as soon as it + * resolves the common point and before proceeding with other operations. + * + * This is a regression test that makes sure we avoid the scenario where we truncate our oplog (at + * which point the rollback is effectively finished), then shut down uncleanly before we get a + * chance to update the rollbackId. + * + * @tags: [requires_journaling] + */ + +(function() { +"use strict"; +load("jstests/libs/fail_point_util.js"); +load("jstests/replsets/libs/rollback_test.js"); +load("jstests/replsets/rslib.js"); + +const name = jsTestName(); +TestData.allowUncleanShutdowns = true; + +jsTest.log("Set up a RollbackTest with enableMajorityReadConcern=false"); +const rst = new ReplSetTest({ + name, + nodes: [{}, {}, {rsConfig: {priority: 0}}], + useBridge: true, + nodeOptions: {enableMajorityReadConcern: "false"}, + settings: {chainingAllowed: false} +}); + +rst.startSet(); +rst.initiateWithHighElectionTimeout(); + +const rollbackTest = new RollbackTest(name, rst); +const rollbackNode = rollbackTest.transitionToRollbackOperations(); + +const baseRBID = assert.commandWorked(rollbackNode.adminCommand("replSetGetRBID")).rbid; + +rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); + +jsTestLog("Make rollback-via-refetch exit early after truncating the oplog"); +assert.commandWorked(rollbackNode.adminCommand( + {configureFailPoint: "rollbackExitEarlyAfterCollectionDrop", mode: "alwaysOn"})); + +rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + +jsTestLog("Wait until we hit the failpoint"); +assert.soonNoExcept(function() { + assert.commandWorked(rollbackNode.adminCommand({ + waitForFailPoint: "rollbackExitEarlyAfterCollectionDrop", + timesEntered: 1, + maxTimeMS: kDefaultWaitForFailPointTimeout + })); + return true; +}, "failed to reconnect for waitForFailPoint"); + +// Check that the RBID has still managed to advance. +// Looking at the RBID directly is our first line of defense. +assert.eq(baseRBID + 1, assert.commandWorked(rollbackNode.adminCommand("replSetGetRBID")).rbid); + +assert.commandWorked(rollbackNode.adminCommand( + {configureFailPoint: "rollbackExitEarlyAfterCollectionDrop", mode: "off"})); + +// Verify that the node can rejoin the set as normal. +rollbackTest.transitionToSteadyStateOperations(); +rollbackTest.stop(); +}());
\ No newline at end of file diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp index aad64b6db3c..f11d195259d 100644 --- a/src/mongo/db/repl/rs_rollback.cpp +++ b/src/mongo/db/repl/rs_rollback.cpp @@ -1244,10 +1244,11 @@ Status _syncRollback(OperationContext* opCtx, invariant(commonPoint >= committedSnapshot); try { - ON_BLOCK_EXIT([&] { - auto status = replicationProcess->incrementRollbackID(opCtx); - fassert(40497, status); - }); + // It is always safe to increment the rollback ID first, even if we fail to complete + // the rollback. + auto status = replicationProcess->incrementRollbackID(opCtx); + fassert(40497, status); + syncFixUp(opCtx, how, rollbackSource, replCoord, replicationProcess); if (MONGO_unlikely(rollbackExitEarlyAfterCollectionDrop.shouldFail())) { |