summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl/transaction_oplog_application.cpp
diff options
context:
space:
mode:
authorMoustafa Maher Khalil <m.maher@mongodb.com>2023-04-26 19:41:50 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-26 21:06:53 +0000
commit269961220e0a5b954a2e0d878c82bd58068030ae (patch)
tree339f5498a26af40fa213d61d38505b1fbadba7cc /src/mongo/db/repl/transaction_oplog_application.cpp
parent518f3df1276fa9c396b1384554e69dd96e633b6c (diff)
downloadmongo-269961220e0a5b954a2e0d878c82bd58068030ae.tar.gz
SERVER-54150 Recovery from a stable checkpoint should fassert on oplog application failures
Diffstat (limited to 'src/mongo/db/repl/transaction_oplog_application.cpp')
-rw-r--r--src/mongo/db/repl/transaction_oplog_application.cpp34
1 files changed, 27 insertions, 7 deletions
diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp
index 01ad3f0aa27..7506a9b2122 100644
--- a/src/mongo/db/repl/transaction_oplog_application.cpp
+++ b/src/mongo/db/repl/transaction_oplog_application.cpp
@@ -148,9 +148,26 @@ Status _applyOperationsForTransaction(OperationContext* opCtx,
}
} catch (const DBException& ex) {
// Ignore NamespaceNotFound errors if we are in initial sync or recovering mode.
+ // During recovery we reconsutuct prepared transactions at the end after applying all
+ // the oplogs, so 'NamespaceNotFound' error shouldn't be hit whether it is a stable or
+ // unstable recovery. However we have some scenarios when this error should be skipped:
+ // 1- This code path can be called while applying commit oplog during unstable recovery
+ // when 'startupRecoveryForRestore' is set.
+ // 2- During selective backup:
+ // - During restore when 'recoverFromOplogAsStandalone' is set which is usually be
+ // done in a stable recovery mode.
+ // - After the restore finished as the standalone node started with the flag
+ // 'takeUnstableCheckpointOnShutdown' so after restarting the node as a replica
+ // set member it will go through unstable recovery.
const bool ignoreException = ex.code() == ErrorCodes::NamespaceNotFound &&
(oplogApplicationMode == repl::OplogApplication::Mode::kInitialSync ||
- oplogApplicationMode == repl::OplogApplication::Mode::kRecovering);
+ repl::OplogApplication::inRecovering(oplogApplicationMode));
+
+ if (ex.code() == ErrorCodes::NamespaceNotFound &&
+ oplogApplicationMode == repl::OplogApplication::Mode::kStableRecovering) {
+ repl::OplogApplication::checkOnOplogFailureForRecovery(
+ opCtx, op.getNss(), redact(op.toBSONForLogging()), redact(ex));
+ }
if (!ignoreException) {
LOGV2_DEBUG(
@@ -190,7 +207,7 @@ Status _applyTransactionFromOplogChain(OperationContext* opCtx,
repl::OplogApplication::Mode mode,
Timestamp commitTimestamp,
Timestamp durableTimestamp) {
- invariant(mode == repl::OplogApplication::Mode::kRecovering);
+ invariant(repl::OplogApplication::inRecovering(mode));
auto ops = readTransactionOperationsFromOplogChain(opCtx, entry, {});
@@ -308,7 +325,8 @@ Status applyCommitTransaction(OperationContext* opCtx,
auto commitTimestamp = *commitCommand.getCommitTimestamp();
switch (mode) {
- case repl::OplogApplication::Mode::kRecovering: {
+ case repl::OplogApplication::Mode::kUnstableRecovering:
+ case repl::OplogApplication::Mode::kStableRecovering: {
return _applyTransactionFromOplogChain(
opCtx, *op, mode, commitTimestamp, op->getOpTime().getTimestamp());
}
@@ -351,7 +369,8 @@ Status applyAbortTransaction(OperationContext* opCtx,
const ApplierOperation& op,
repl::OplogApplication::Mode mode) {
switch (mode) {
- case repl::OplogApplication::Mode::kRecovering: {
+ case repl::OplogApplication::Mode::kUnstableRecovering:
+ case repl::OplogApplication::Mode::kStableRecovering: {
// We don't put transactions into the prepare state until the end of recovery,
// so there is no transaction to abort.
return Status::OK();
@@ -577,7 +596,7 @@ Status _applyPrepareTransaction(OperationContext* opCtx,
opCtx->recoveryUnit()->setPrepareConflictBehavior(
PrepareConflictBehavior::kIgnoreConflictsAllowWrites);
// We might replay a prepared transaction behind oldest timestamp.
- if (mode == repl::OplogApplication::Mode::kRecovering ||
+ if (repl::OplogApplication::inRecovering(mode) ||
mode == repl::OplogApplication::Mode::kInitialSync) {
opCtx->recoveryUnit()->setRoundUpPreparedTimestamps(true);
}
@@ -596,7 +615,7 @@ Status _applyPrepareTransaction(OperationContext* opCtx,
// Set this in case the application of any ops needs to use the prepare timestamp
// of this transaction. It should be cleared automatically when the txn finishes.
- if (mode == repl::OplogApplication::Mode::kRecovering ||
+ if (repl::OplogApplication::inRecovering(mode) ||
mode == repl::OplogApplication::Mode::kInitialSync) {
txnParticipant.setPrepareOpTimeForRecovery(opCtx, prepareOp.getOpTime());
}
@@ -675,7 +694,8 @@ Status applyPrepareTransaction(OperationContext* opCtx,
const ApplierOperation& op,
repl::OplogApplication::Mode mode) {
switch (mode) {
- case repl::OplogApplication::Mode::kRecovering: {
+ case repl::OplogApplication::Mode::kUnstableRecovering:
+ case repl::OplogApplication::Mode::kStableRecovering: {
if (!serverGlobalParams.enableMajorityReadConcern) {
LOGV2_ERROR(21850,
"Cannot replay a prepared transaction when "