diff options
author | Moustafa Maher Khalil <m.maher@mongodb.com> | 2023-04-26 19:41:50 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-26 21:06:53 +0000 |
commit | 269961220e0a5b954a2e0d878c82bd58068030ae (patch) | |
tree | 339f5498a26af40fa213d61d38505b1fbadba7cc /src/mongo/db/repl/transaction_oplog_application.cpp | |
parent | 518f3df1276fa9c396b1384554e69dd96e633b6c (diff) | |
download | mongo-269961220e0a5b954a2e0d878c82bd58068030ae.tar.gz |
SERVER-54150 Recovery from a stable checkpoint should fassert on oplog application failures
Diffstat (limited to 'src/mongo/db/repl/transaction_oplog_application.cpp')
-rw-r--r-- | src/mongo/db/repl/transaction_oplog_application.cpp | 34 |
1 files changed, 27 insertions, 7 deletions
diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp index 01ad3f0aa27..7506a9b2122 100644 --- a/src/mongo/db/repl/transaction_oplog_application.cpp +++ b/src/mongo/db/repl/transaction_oplog_application.cpp @@ -148,9 +148,26 @@ Status _applyOperationsForTransaction(OperationContext* opCtx, } } catch (const DBException& ex) { // Ignore NamespaceNotFound errors if we are in initial sync or recovering mode. + // During recovery we reconsutuct prepared transactions at the end after applying all + // the oplogs, so 'NamespaceNotFound' error shouldn't be hit whether it is a stable or + // unstable recovery. However we have some scenarios when this error should be skipped: + // 1- This code path can be called while applying commit oplog during unstable recovery + // when 'startupRecoveryForRestore' is set. + // 2- During selective backup: + // - During restore when 'recoverFromOplogAsStandalone' is set which is usually be + // done in a stable recovery mode. + // - After the restore finished as the standalone node started with the flag + // 'takeUnstableCheckpointOnShutdown' so after restarting the node as a replica + // set member it will go through unstable recovery. const bool ignoreException = ex.code() == ErrorCodes::NamespaceNotFound && (oplogApplicationMode == repl::OplogApplication::Mode::kInitialSync || - oplogApplicationMode == repl::OplogApplication::Mode::kRecovering); + repl::OplogApplication::inRecovering(oplogApplicationMode)); + + if (ex.code() == ErrorCodes::NamespaceNotFound && + oplogApplicationMode == repl::OplogApplication::Mode::kStableRecovering) { + repl::OplogApplication::checkOnOplogFailureForRecovery( + opCtx, op.getNss(), redact(op.toBSONForLogging()), redact(ex)); + } if (!ignoreException) { LOGV2_DEBUG( @@ -190,7 +207,7 @@ Status _applyTransactionFromOplogChain(OperationContext* opCtx, repl::OplogApplication::Mode mode, Timestamp commitTimestamp, Timestamp durableTimestamp) { - invariant(mode == repl::OplogApplication::Mode::kRecovering); + invariant(repl::OplogApplication::inRecovering(mode)); auto ops = readTransactionOperationsFromOplogChain(opCtx, entry, {}); @@ -308,7 +325,8 @@ Status applyCommitTransaction(OperationContext* opCtx, auto commitTimestamp = *commitCommand.getCommitTimestamp(); switch (mode) { - case repl::OplogApplication::Mode::kRecovering: { + case repl::OplogApplication::Mode::kUnstableRecovering: + case repl::OplogApplication::Mode::kStableRecovering: { return _applyTransactionFromOplogChain( opCtx, *op, mode, commitTimestamp, op->getOpTime().getTimestamp()); } @@ -351,7 +369,8 @@ Status applyAbortTransaction(OperationContext* opCtx, const ApplierOperation& op, repl::OplogApplication::Mode mode) { switch (mode) { - case repl::OplogApplication::Mode::kRecovering: { + case repl::OplogApplication::Mode::kUnstableRecovering: + case repl::OplogApplication::Mode::kStableRecovering: { // We don't put transactions into the prepare state until the end of recovery, // so there is no transaction to abort. return Status::OK(); @@ -577,7 +596,7 @@ Status _applyPrepareTransaction(OperationContext* opCtx, opCtx->recoveryUnit()->setPrepareConflictBehavior( PrepareConflictBehavior::kIgnoreConflictsAllowWrites); // We might replay a prepared transaction behind oldest timestamp. - if (mode == repl::OplogApplication::Mode::kRecovering || + if (repl::OplogApplication::inRecovering(mode) || mode == repl::OplogApplication::Mode::kInitialSync) { opCtx->recoveryUnit()->setRoundUpPreparedTimestamps(true); } @@ -596,7 +615,7 @@ Status _applyPrepareTransaction(OperationContext* opCtx, // Set this in case the application of any ops needs to use the prepare timestamp // of this transaction. It should be cleared automatically when the txn finishes. - if (mode == repl::OplogApplication::Mode::kRecovering || + if (repl::OplogApplication::inRecovering(mode) || mode == repl::OplogApplication::Mode::kInitialSync) { txnParticipant.setPrepareOpTimeForRecovery(opCtx, prepareOp.getOpTime()); } @@ -675,7 +694,8 @@ Status applyPrepareTransaction(OperationContext* opCtx, const ApplierOperation& op, repl::OplogApplication::Mode mode) { switch (mode) { - case repl::OplogApplication::Mode::kRecovering: { + case repl::OplogApplication::Mode::kUnstableRecovering: + case repl::OplogApplication::Mode::kStableRecovering: { if (!serverGlobalParams.enableMajorityReadConcern) { LOGV2_ERROR(21850, "Cannot replay a prepared transaction when " |