From 14b059e544578774d7f7ee3aacdc6ac47a15bb4c Mon Sep 17 00:00:00 2001 From: XueruiFa Date: Tue, 21 Sep 2021 21:25:51 +0000 Subject: =?UTF-8?q?SERVER-60096:=20Add=20rollbackHangCommonPointBeforeRepl?= =?UTF-8?q?CommitPoint=20failpo=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- jstests/replsets/resync_majority_member.js | 13 ++++++++++--- src/mongo/db/repl/rollback_impl.cpp | 9 +++++---- src/mongo/db/repl/rs_rollback.cpp | 9 +++++++++ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/jstests/replsets/resync_majority_member.js b/jstests/replsets/resync_majority_member.js index 72c191d8cc5..630cbd03d9a 100644 --- a/jstests/replsets/resync_majority_member.js +++ b/jstests/replsets/resync_majority_member.js @@ -14,8 +14,9 @@ (function() { "use strict"; -load("jstests/libs/write_concern_util.js"); load("jstests/libs/fail_point_util.js"); +load("jstests/libs/storage_engine_utils.js"); +load("jstests/libs/write_concern_util.js"); TestData.skipCheckDBHashes = true; // the set is not consistent when we shutdown the test @@ -97,8 +98,14 @@ assert.commandWorked(syncSource.getDB(dbName).getCollection(collName).insert( // This failpoint will only be hit if the node's rollback common point is before the replication // commit point, which triggers an invariant. This failpoint is used to verify the invariant // will be hit without having to search the logs. -const rollbackCommittedWritesFailPoint = - configureFailPoint(rollbackNode, "rollbackHangCommonPointBeforeReplCommitPoint"); +let rollbackCommittedWritesFailPoint; +if (storageEngineIsWiredTigerOrInMemory()) { + rollbackCommittedWritesFailPoint = + configureFailPoint(rollbackNode, "rollbackToTimestampHangCommonPointBeforeReplCommitPoint"); +} else { + rollbackCommittedWritesFailPoint = + configureFailPoint(rollbackNode, "rollbackViaRefetchHangCommonPointBeforeReplCommitPoint"); +} // Node 1 will have to roll back to rejoin the set. It will crash as it will refuse to roll back // majority committed data. diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp index ebeda465968..a03e6793ea0 100644 --- a/src/mongo/db/repl/rollback_impl.cpp +++ b/src/mongo/db/repl/rollback_impl.cpp @@ -75,7 +75,7 @@ namespace repl { using namespace fmt::literals; MONGO_FAIL_POINT_DEFINE(rollbackHangAfterTransitionToRollback); -MONGO_FAIL_POINT_DEFINE(rollbackHangCommonPointBeforeReplCommitPoint); +MONGO_FAIL_POINT_DEFINE(rollbackToTimestampHangCommonPointBeforeReplCommitPoint); namespace { @@ -1116,10 +1116,11 @@ StatusWith RollbackImpl::_findComm "commonPointOpTime"_attr = commonPointOpTime); // This failpoint is used for testing the invariant below. - if (MONGO_unlikely(rollbackHangCommonPointBeforeReplCommitPoint.shouldFail()) && + if (MONGO_unlikely(rollbackToTimestampHangCommonPointBeforeReplCommitPoint.shouldFail()) && (commonPointOpTime.getTimestamp() < lastCommittedOpTime.getTimestamp())) { - LOGV2(5812200, "Hanging due to rollbackHangCommonPointBeforeReplCommitPoint failpoint"); - rollbackHangCommonPointBeforeReplCommitPoint.pauseWhileSet(opCtx); + LOGV2(5812200, + "Hanging due to rollbackToTimestampHangCommonPointBeforeReplCommitPoint failpoint"); + rollbackToTimestampHangCommonPointBeforeReplCommitPoint.pauseWhileSet(opCtx); } // Rollback common point should be >= the replication commit point. diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp index 813c058e7c7..2bdf56d833f 100644 --- a/src/mongo/db/repl/rs_rollback.cpp +++ b/src/mongo/db/repl/rs_rollback.cpp @@ -96,6 +96,7 @@ using std::unique_ptr; namespace repl { MONGO_FAIL_POINT_DEFINE(rollbackExitEarlyAfterCollectionDrop); +MONGO_FAIL_POINT_DEFINE(rollbackViaRefetchHangCommonPointBeforeReplCommitPoint); using namespace rollback_internal; @@ -1264,6 +1265,14 @@ Status _syncRollback(OperationContext* opCtx, "Rollback common point", "commonPoint"_attr = commonPointOpTime); + // This failpoint is used for testing the invariant below. + if (MONGO_unlikely(rollbackViaRefetchHangCommonPointBeforeReplCommitPoint.shouldFail()) && + (commonPointOpTime.getTimestamp() < lastCommittedOpTime.getTimestamp())) { + LOGV2(6009600, + "Hanging due to rollbackViaRefetchHangCommonPointBeforeReplCommitPoint failpoint"); + rollbackViaRefetchHangCommonPointBeforeReplCommitPoint.pauseWhileSet(opCtx); + } + // Rollback common point should be >= the replication commit point. invariant(commonPointOpTime.getTimestamp() >= lastCommittedOpTime.getTimestamp()); invariant(commonPointOpTime >= lastCommittedOpTime); -- cgit v1.2.1