diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2020-02-13 19:39:27 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-02-26 10:21:37 +0000 |
commit | 1939a3ca2a29d22e7b9976171bf2cc54bd9a1846 (patch) | |
tree | 64732f4be61fa7f988202e1d478dbd40590d9ac0 | |
parent | 24b02a2342f59e9f6f3d5ad775252d48a2731d7b (diff) | |
download | mongo-1939a3ca2a29d22e7b9976171bf2cc54bd9a1846.tar.gz |
SERVER-45178 Always update rollbackId before truncating oplog for rollback-via-refetch
create mode 100644 jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js
(cherry picked from commit 04a2c9acc7ca061fb86736b377b897b11f6c7c48)
5 files changed, 88 insertions, 8 deletions
diff --git a/jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js b/jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js new file mode 100644 index 00000000000..d560cc7d419 --- /dev/null +++ b/jstests/replsets/rollback_via_refetch_update_rollback_id_before_oplog_truncation.js @@ -0,0 +1,58 @@ +/** + * This test demonstrates that rollback via refetch always increments the rollback id as soon as it + * resolves the common point and before proceeding with other operations. + * + * This is a regression test that makes sure we avoid the scenario where we truncate our oplog (at + * which point the rollback is effectively finished), then shut down uncleanly before we get a + * chance to update the rollbackId. + * + * @tags: [requires_journaling] + */ + +(function() { + "use strict"; + load("jstests/replsets/libs/rollback_test.js"); + load("jstests/replsets/rslib.js"); + + const name = jsTestName(); + TestData.allowUncleanShutdowns = true; + + jsTest.log("Set up a RollbackTest with enableMajorityReadConcern=false"); + const rst = new ReplSetTest({ + name, + nodes: [{}, {}, {rsConfig: {arbiterOnly: true}}], + useBridge: true, + nodeOptions: {enableMajorityReadConcern: "false"}, + settings: {chainingAllowed: false} + }); + + rst.startSet(); + rst.initiate(); + + const rollbackTest = new RollbackTest(name, rst); + const rollbackNode = rollbackTest.transitionToRollbackOperations(); + + const baseRBID = assert.commandWorked(rollbackNode.adminCommand("replSetGetRBID")).rbid; + + rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); + + jsTestLog("Make rollback-via-refetch exit early after truncating the oplog"); + assert.commandWorked(rollbackNode.adminCommand( + {configureFailPoint: "rollbackExitEarlyAfterCollectionDrop", mode: "alwaysOn"})); + + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + + jsTestLog("Wait until we hit the failpoint"); + checkLog.contains(rollbackNode, "rollbackExitEarlyAfterCollectionDrop fail point enabled"); + + // Check that the RBID has still managed to advance. + // Looking at the RBID directly is our first line of defense. + assert.eq(baseRBID + 1, assert.commandWorked(rollbackNode.adminCommand("replSetGetRBID")).rbid); + + assert.commandWorked(rollbackNode.adminCommand( + {configureFailPoint: "rollbackExitEarlyAfterCollectionDrop", mode: "off"})); + + // Verify that the node can rejoin the set as normal. + rollbackTest.transitionToSteadyStateOperations(); + rollbackTest.stop(); +}());
\ No newline at end of file diff --git a/src/mongo/db/repl/roll_back_local_operations.cpp b/src/mongo/db/repl/roll_back_local_operations.cpp index 56bd2da369a..6bbd42387dd 100644 --- a/src/mongo/db/repl/roll_back_local_operations.cpp +++ b/src/mongo/db/repl/roll_back_local_operations.cpp @@ -48,6 +48,9 @@ namespace repl { // Failpoint which causes rollback to hang before finishing. MONGO_FP_DECLARE(rollbackHangBeforeFinish); +// Failpoint which exits early right after syncFixUp. +MONGO_FP_DECLARE(rollbackExitEarlyAfterCollectionDrop); + // Failpoint which causes rollback to hang and then fail after minValid is written. MONGO_FP_DECLARE(rollbackHangThenFailAfterWritingMinValid); diff --git a/src/mongo/db/repl/roll_back_local_operations.h b/src/mongo/db/repl/roll_back_local_operations.h index ba8dcfe79ae..db7f911a2b4 100644 --- a/src/mongo/db/repl/roll_back_local_operations.h +++ b/src/mongo/db/repl/roll_back_local_operations.h @@ -49,6 +49,7 @@ namespace repl { // MongoDB 3.8 is released, we no longer need to maintain rs_rollback_no_uuid // code and these forward declares can be removed. See SERVER-29766. MONGO_FP_FORWARD_DECLARE(rollbackHangBeforeFinish); +MONGO_FP_FORWARD_DECLARE(rollbackExitEarlyAfterCollectionDrop); MONGO_FP_FORWARD_DECLARE(rollbackHangThenFailAfterWritingMinValid); // This is needed by rs_rollback and rollback_impl. diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp index 33e15f3ae85..c6f88408daf 100644 --- a/src/mongo/db/repl/rs_rollback.cpp +++ b/src/mongo/db/repl/rs_rollback.cpp @@ -941,11 +941,21 @@ Status _syncRollback(OperationContext* opCtx, invariant(commonPoint >= committedSnapshot); try { - ON_BLOCK_EXIT([&] { - auto status = replicationProcess->incrementRollbackID(opCtx); - fassertStatusOK(40497, status); - }); + // It is always safe to increment the rollback ID first, even if we fail to complete + // the rollback. + auto status = replicationProcess->incrementRollbackID(opCtx); + fassertStatusOK(40497, status); + syncFixUp(opCtx, how, rollbackSource, replCoord, replicationProcess); + + if (MONGO_FAIL_POINT(rollbackExitEarlyAfterCollectionDrop)) { + log() << "rollbackExitEarlyAfterCollectionDrop fail point enabled. Returning early " + "until fail point is disabled."; + return Status(ErrorCodes::NamespaceNotFound, + str::stream() << "Failing rollback because " + "rollbackExitEarlyAfterCollectionDrop fail point " + "enabled."); + } } catch (const RSFatalException& e) { return Status(ErrorCodes::UnrecoverableRollbackError, e.what()); } diff --git a/src/mongo/db/repl/rs_rollback_no_uuid.cpp b/src/mongo/db/repl/rs_rollback_no_uuid.cpp index d080315ffa2..e002d2e67d4 100644 --- a/src/mongo/db/repl/rs_rollback_no_uuid.cpp +++ b/src/mongo/db/repl/rs_rollback_no_uuid.cpp @@ -1007,11 +1007,19 @@ Status _syncRollback(OperationContext* opCtx, invariant(commonPoint >= committedSnapshot); try { - ON_BLOCK_EXIT([&] { - auto status = replicationProcess->incrementRollbackID(opCtx); - fassertStatusOK(40425, status); - }); + auto status = replicationProcess->incrementRollbackID(opCtx); + fassertStatusOK(40425, status); + syncFixUp(opCtx, how, rollbackSource, replCoord, replicationProcess); + + if (MONGO_FAIL_POINT(rollbackExitEarlyAfterCollectionDrop)) { + log() << "rollbackExitEarlyAfterCollectionDrop fail point enabled. Returning early " + "until fail point is disabled."; + return Status(ErrorCodes::NamespaceNotFound, + str::stream() << "Failing rollback because " + "rollbackExitEarlyAfterCollectionDrop fail point " + "enabled."); + } } catch (const RSFatalException& e) { return Status(ErrorCodes::UnrecoverableRollbackError, e.what()); } |