summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoustafa Maher Khalil <m.maher@mongodb.com>2023-04-11 22:34:04 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-12 00:03:23 +0000
commite19b996a2d05a3a1a8f361b73f5d50fa36a451b3 (patch)
tree55550ee3c1591c18ef0b55dcbf6029a663b86cbd
parent18e5e638baa2d289d42a4bb61ad06c696271ad3a (diff)
downloadmongo-e19b996a2d05a3a1a8f361b73f5d50fa36a451b3.tar.gz
Revert "SERVER-54150 Recovery from a stable checkpoint should fassert on oplog application failures"
This reverts commit 347f059c439dfafe9e8a34365b4c5e7a17c22acf.
-rw-r--r--jstests/noPassthrough/oplog_application_while_recovering_must_succeed.js114
-rw-r--r--src/mongo/db/repl/oplog.cpp72
-rw-r--r--src/mongo/db/repl/oplog.h20
-rw-r--r--src/mongo/db/repl/oplog_applier.h8
-rw-r--r--src/mongo/db/repl/oplog_applier_impl_test.cpp6
-rw-r--r--src/mongo/db/repl/oplog_applier_utils.cpp15
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp8
-rw-r--r--src/mongo/db/repl/replication_recovery.cpp26
-rw-r--r--src/mongo/db/repl/replication_recovery.h11
-rw-r--r--src/mongo/db/repl/replication_recovery_mock.h6
-rw-r--r--src/mongo/db/repl/replication_recovery_test.cpp37
-rw-r--r--src/mongo/db/repl/rollback_impl.cpp2
-rw-r--r--src/mongo/db/repl/rs_rollback.cpp2
-rw-r--r--src/mongo/db/repl/transaction_oplog_application.cpp34
-rw-r--r--src/mongo/dbtests/repltests.cpp6
-rw-r--r--src/mongo/dbtests/storage_timestamp_tests.cpp4
16 files changed, 55 insertions, 316 deletions
diff --git a/jstests/noPassthrough/oplog_application_while_recovering_must_succeed.js b/jstests/noPassthrough/oplog_application_while_recovering_must_succeed.js
deleted file mode 100644
index 07a68e02b7f..00000000000
--- a/jstests/noPassthrough/oplog_application_while_recovering_must_succeed.js
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * Tests that server fasserts on oplog applications failures in recovering mode.
- * This behavior is expected only in tests as we ignore these errors in production but output a
- * warning message.
- * @tags: [
- * requires_persistence,
- * requires_replication,
- * ]
- */
-(function() {
-"use strict";
-
-// This test appends 'insert' and 'delete' oplogs directly to the oplog system collection in
-// standalone mode, When restarting as a replica set member we assume the collections will play
-// forward to the appropriate count. But because we added a new oplog entry that's going to turn
-// into an insert, we play forward to have more documents than expected (or the reverse observation
-// for adding a delete oplog entry).
-TestData.skipEnforceFastCountOnValidate = true;
-
-const ops = ["Update", "Delete", "Insert"];
-const oplogApplicationResults = ["Success", "CrudError", "NamespaceNotFound"];
-
-function runTest(op, result) {
- jsTestLog(`Testing "${op}" oplog application during recovery that finishes with "${result}".`);
- const rst = new ReplSetTest({nodes: 1});
- rst.startSet();
- rst.initiateWithHighElectionTimeout();
- let conn = rst.getPrimary();
-
- // Construct a valid oplog entry.
- function constructOplogEntry(oplog) {
- const lastOplogEntry = oplog.find().sort({ts: -1}).limit(1).toArray()[0];
- const testCollOplogEntry =
- oplog.find({op: "i", ns: "test.coll"}).sort({ts: -1}).limit(1).toArray()[0];
- const highestTS = lastOplogEntry.ts;
- const oplogToInsertTS = Timestamp(highestTS.getTime(), highestTS.getInc() + 1);
- delete testCollOplogEntry.o2;
- if (op === "Delete") {
- return Object.extend(testCollOplogEntry,
- {op: "d", ns: "test.coll", o: {_id: 0}, ts: oplogToInsertTS});
- } else if (op === "Update") {
- return Object.extend(testCollOplogEntry, {
- op: "u",
- ns: "test.coll",
- o: {$v: 2, diff: {u: {a: 1}}},
- o2: {_id: 0},
- ts: oplogToInsertTS
- });
- } else if (op === "Insert") {
- return Object.extend(
- testCollOplogEntry,
- {op: "i", ns: "test.coll", o: {_id: 1, a: 1}, ts: oplogToInsertTS});
- }
- }
-
- // Do an initial insert.
- assert.commandWorked(
- conn.getDB("test").coll.insert({_id: 0, a: 0}, {writeConcern: {w: "majority"}}));
- if (result == "CrudError") {
- // For 'update' and 'delete' oplog to not find the document.
- assert.commandWorked(
- conn.getDB("test").coll.deleteOne({_id: 0}, {writeConcern: {w: "majority"}}));
- // For 'insert' to fail with duplicate key error.
- assert.commandWorked(
- conn.getDB("test").coll.insert({_id: 1}, {writeConcern: {w: "majority"}}));
- } else if (result == "NamespaceNotFound") {
- conn.getDB("test").coll.drop();
- }
-
- jsTestLog("Restart the node in standalone mode to append the new oplog.");
- rst.stop(0, undefined /*signal*/, undefined /*opts*/, {forRestart: true});
- conn = rst.start(0, {noReplSet: true, noCleanData: true});
- let oplog = conn.getDB("local").oplog.rs;
-
- // Construct a valid oplog entry using the highest timestamp in the oplog. The highest timestamp
- // may differ from the one above due to concurrent internal writes when the node was a primary.
- let oplogToInsert = constructOplogEntry(oplog);
- jsTestLog(`Inserting oplog entry: ${tojson(oplogToInsert)}`);
- assert.commandWorked(oplog.insert(oplogToInsert));
-
- jsTestLog("Restart the node with replication enabled to let the added oplog entry" +
- " get applied as part of replication recovery.");
- rst.stop(0, undefined /*signal*/, undefined /*opts*/, {forRestart: true});
- if (result == "Success") {
- jsTestLog("The added oplog is applied successfully during replication recovery.");
- rst.start(0, {noCleanData: true});
- conn = rst.getPrimary();
- if (op === "Update") {
- assert.eq({_id: 0, a: 1}, conn.getDB("test").coll.findOne());
- } else if (op === "Delete") {
- assert.eq(null, conn.getDB("test").coll.findOne());
- } else if (op === "Insert") {
- assert.eq({_id: 0, a: 0}, conn.getDB("test").coll.findOne({_id: 0}));
- assert.eq({_id: 1, a: 1}, conn.getDB("test").coll.findOne({_id: 1}));
- }
- } else {
- jsTestLog(
- "Server should crash while applying the added oplog during replication recovery.");
- const node = rst.start(0, {noCleanData: true, waitForConnect: false});
- const exitCode = waitProgram(node.pid);
- assert.eq(exitCode, MongoRunner.EXIT_ABORT);
- assert.soon(
- function() {
- return rawMongoProgramOutput().search(/Fatal assertion.*5415000/) >= 0;
- },
- "Node should have fasserted upon encountering a fatal error during startup",
- ReplSetTest.kDefaultTimeoutMS);
- }
-
- rst.stopSet();
-}
-
-ops.forEach(op => oplogApplicationResults.forEach(result => runTest(op, result)));
-}());
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index edcf21cebca..e2bb12b3c61 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -110,7 +110,6 @@ using std::string;
using std::stringstream;
using std::unique_ptr;
using std::vector;
-using namespace std::string_literals;
using IndexVersion = IndexDescriptor::IndexVersion;
@@ -1099,8 +1098,6 @@ const StringMap<ApplyOpMetadata> kOpsMap = {
constexpr StringData OplogApplication::kInitialSyncOplogApplicationMode;
constexpr StringData OplogApplication::kRecoveringOplogApplicationMode;
-constexpr StringData OplogApplication::kStableRecoveringOplogApplicationMode;
-constexpr StringData OplogApplication::kUnstableRecoveringOplogApplicationMode;
constexpr StringData OplogApplication::kSecondaryOplogApplicationMode;
constexpr StringData OplogApplication::kApplyOpsCmdOplogApplicationMode;
@@ -1108,10 +1105,8 @@ StringData OplogApplication::modeToString(OplogApplication::Mode mode) {
switch (mode) {
case OplogApplication::Mode::kInitialSync:
return OplogApplication::kInitialSyncOplogApplicationMode;
- case OplogApplication::Mode::kUnstableRecovering:
- return OplogApplication::kUnstableRecoveringOplogApplicationMode;
- case OplogApplication::Mode::kStableRecovering:
- return OplogApplication::kStableRecoveringOplogApplicationMode;
+ case OplogApplication::Mode::kRecovering:
+ return OplogApplication::kRecoveringOplogApplicationMode;
case OplogApplication::Mode::kSecondary:
return OplogApplication::kSecondaryOplogApplicationMode;
case OplogApplication::Mode::kApplyOpsCmd:
@@ -1124,9 +1119,7 @@ StatusWith<OplogApplication::Mode> OplogApplication::parseMode(const std::string
if (mode == OplogApplication::kInitialSyncOplogApplicationMode) {
return OplogApplication::Mode::kInitialSync;
} else if (mode == OplogApplication::kRecoveringOplogApplicationMode) {
- // This only being used in applyOps command which is controlled by the client, so it should
- // be unstable.
- return OplogApplication::Mode::kUnstableRecovering;
+ return OplogApplication::Mode::kRecovering;
} else if (mode == OplogApplication::kSecondaryOplogApplicationMode) {
return OplogApplication::Mode::kSecondary;
} else if (mode == OplogApplication::kApplyOpsCmdOplogApplicationMode) {
@@ -1138,33 +1131,6 @@ StatusWith<OplogApplication::Mode> OplogApplication::parseMode(const std::string
MONGO_UNREACHABLE;
}
-void OplogApplication::checkOnOplogFailureForRecovery(OperationContext* opCtx,
- const mongo::BSONObj& oplogEntry,
- const std::string& errorMsg) {
- const bool isReplicaSet =
- repl::ReplicationCoordinator::get(opCtx->getServiceContext())->getReplicationMode() ==
- repl::ReplicationCoordinator::modeReplSet;
- // Relax the constraints of oplog application if the node is not a replica set member.
- if (!isReplicaSet) {
- return;
- }
-
- // Only fassert in test environment.
- if (getTestCommandsEnabled()) {
- LOGV2_FATAL(5415000,
- "Error applying operation while recovering from stable "
- "checkpoint. This can lead to data corruption.",
- "oplogEntry"_attr = oplogEntry,
- "error"_attr = errorMsg);
- } else {
- LOGV2_WARNING(5415001,
- "Error applying operation while recovering from stable "
- "checkpoint. This can lead to data corruption.",
- "oplogEntry"_attr = oplogEntry,
- "error"_attr = errorMsg);
- }
-}
-
// @return failure status if an update should have happened and the document DNE.
// See replset initial sync code.
Status applyOperation_inlock(OperationContext* opCtx,
@@ -1201,21 +1167,11 @@ Status applyOperation_inlock(OperationContext* opCtx,
return Status::OK();
}
- const bool inStableRecovery = mode == OplogApplication::Mode::kStableRecovering;
NamespaceString requestNss;
CollectionPtr collection = nullptr;
if (auto uuid = op.getUuid()) {
auto catalog = CollectionCatalog::get(opCtx);
collection = catalog->lookupCollectionByUUID(opCtx, uuid.get());
- if (!collection && inStableRecovery) {
- repl::OplogApplication::checkOnOplogFailureForRecovery(
- opCtx,
- redact(opOrGroupedInserts.toBSON()),
- str::stream()
- << "(NamespaceNotFound): Failed to apply operation due to missing collection ("
- << uuid.value() << ")");
- }
-
uassert(ErrorCodes::NamespaceNotFound,
str::stream() << "Failed to apply operation due to missing collection ("
<< uuid.get() << "): " << redact(opOrGroupedInserts.toBSON()),
@@ -1283,7 +1239,7 @@ Status applyOperation_inlock(OperationContext* opCtx,
case ReplicationCoordinator::modeNone: {
// Only assign timestamps on standalones during replication recovery when
// started with the 'recoverFromOplogAsStandalone' flag.
- return OplogApplication::inRecovering(mode);
+ return mode == OplogApplication::Mode::kRecovering;
}
}
}
@@ -1432,9 +1388,6 @@ Status applyOperation_inlock(OperationContext* opCtx,
if (oplogApplicationEnforcesSteadyStateConstraints) {
return status;
}
- } else if (inStableRecovery) {
- repl::OplogApplication::checkOnOplogFailureForRecovery(
- opCtx, redact(op.toBSONForLogging()), redact(status));
}
// Continue to the next block to retry the operation as an upsert.
needToDoUpsert = true;
@@ -1672,10 +1625,6 @@ Status applyOperation_inlock(OperationContext* opCtx,
});
if (!status.isOK()) {
- if (inStableRecovery) {
- repl::OplogApplication::checkOnOplogFailureForRecovery(
- opCtx, redact(op.toBSONForLogging()), redact(status));
- }
return status;
}
@@ -1743,17 +1692,6 @@ Status applyOperation_inlock(OperationContext* opCtx,
&upsertConfigImage);
}
- if (result.nDeleted == 0 && inStableRecovery) {
- repl::OplogApplication::checkOnOplogFailureForRecovery(
- opCtx,
- redact(op.toBSONForLogging()),
- !collection ? str::stream()
- << "(NamespaceNotFound): Failed to apply operation due "
- "to missing collection ("
- << requestNss << ")"
- : "Applied a delete which did not delete anything."s);
- }
-
if (result.nDeleted == 0 && mode == OplogApplication::Mode::kSecondary) {
LOGV2_WARNING(2170002,
"Applied a delete which did not delete anything in steady state "
@@ -1900,7 +1838,7 @@ Status applyCommand_inlock(OperationContext* opCtx,
case ReplicationCoordinator::modeNone: {
// Only assign timestamps on standalones during replication recovery when
// started with 'recoverFromOplogAsStandalone'.
- return OplogApplication::inRecovering(mode);
+ return mode == OplogApplication::Mode::kRecovering;
}
}
MONGO_UNREACHABLE;
diff --git a/src/mongo/db/repl/oplog.h b/src/mongo/db/repl/oplog.h
index 635f3a3a116..e975ee42095 100644
--- a/src/mongo/db/repl/oplog.h
+++ b/src/mongo/db/repl/oplog.h
@@ -171,10 +171,7 @@ using IncrementOpsAppliedStatsFn = std::function<void()>;
class OplogApplication {
public:
static constexpr StringData kInitialSyncOplogApplicationMode = "InitialSync"_sd;
- // This only being used in 'applyOps' command when sent by client.
static constexpr StringData kRecoveringOplogApplicationMode = "Recovering"_sd;
- static constexpr StringData kStableRecoveringOplogApplicationMode = "StableRecovering"_sd;
- static constexpr StringData kUnstableRecoveringOplogApplicationMode = "UnstableRecovering"_sd;
static constexpr StringData kSecondaryOplogApplicationMode = "Secondary"_sd;
static constexpr StringData kApplyOpsCmdOplogApplicationMode = "ApplyOps"_sd;
@@ -183,12 +180,9 @@ public:
kInitialSync,
// Used when we are applying oplog operations to recover the database state following an
- // clean/unclean shutdown, or when we are recovering from the oplog after we rollback to a
+ // unclean shutdown, or when we are recovering from the oplog after we rollback to a
// checkpoint.
- // If recovering from a unstable stable checkpoint.
- kUnstableRecovering,
- // If recovering from a stable checkpoint.~
- kStableRecovering,
+ kRecovering,
// Used when a secondary node is applying oplog operations from the primary during steady
// state replication.
@@ -199,19 +193,9 @@ public:
kApplyOpsCmd
};
- static bool inRecovering(Mode mode) {
- return mode == Mode::kUnstableRecovering || mode == Mode::kStableRecovering;
- }
-
static StringData modeToString(Mode mode);
static StatusWith<Mode> parseMode(const std::string& mode);
-
- // Server will crash on oplog application failure during recovery from stable checkpoint in the
- // test environment.
- static void checkOnOplogFailureForRecovery(OperationContext* opCtx,
- const mongo::BSONObj& oplogEntry,
- const std::string& errorMsg);
};
inline std::ostream& operator<<(std::ostream& s, OplogApplication::Mode mode) {
diff --git a/src/mongo/db/repl/oplog_applier.h b/src/mongo/db/repl/oplog_applier.h
index 252070077a2..b582b282efd 100644
--- a/src/mongo/db/repl/oplog_applier.h
+++ b/src/mongo/db/repl/oplog_applier.h
@@ -67,10 +67,10 @@ public:
Options() = delete;
explicit Options(OplogApplication::Mode inputMode)
: mode(inputMode),
- allowNamespaceNotFoundErrorsOnCrudOps(inputMode ==
- OplogApplication::Mode::kInitialSync ||
- OplogApplication::inRecovering(inputMode)),
- skipWritesToOplog(OplogApplication::inRecovering(inputMode)) {}
+ allowNamespaceNotFoundErrorsOnCrudOps(
+ inputMode == OplogApplication::Mode::kInitialSync ||
+ inputMode == OplogApplication::Mode::kRecovering),
+ skipWritesToOplog(inputMode == OplogApplication::Mode::kRecovering) {}
// Used to determine which operations should be applied. Only initial sync will set this to
// be something other than the null optime.
diff --git a/src/mongo/db/repl/oplog_applier_impl_test.cpp b/src/mongo/db/repl/oplog_applier_impl_test.cpp
index 97e09475c24..5d517b148ad 100644
--- a/src/mongo/db/repl/oplog_applier_impl_test.cpp
+++ b/src/mongo/db/repl/oplog_applier_impl_test.cpp
@@ -767,7 +767,7 @@ TEST_F(MultiOplogEntryOplogApplierImplTest, MultiApplyUnpreparedTransactionAllAt
ReplicationCoordinator::get(_opCtx.get()),
getConsistencyMarkers(),
getStorageInterface(),
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kStableRecovering),
+ repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering),
_writerPool.get());
// Apply both inserts and the commit in a single batch. We expect no oplog entries to
@@ -1249,7 +1249,7 @@ TEST_F(MultiOplogEntryPreparedTransactionTest, MultiApplyPreparedTransactionReco
ReplicationCoordinator::get(_opCtx.get()),
getConsistencyMarkers(),
getStorageInterface(),
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kStableRecovering),
+ repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering),
_writerPool.get());
// Apply a batch with the insert operations. This should have no effect, because this is
@@ -1483,7 +1483,7 @@ TEST_F(MultiOplogEntryPreparedTransactionTest,
ReplicationCoordinator::get(_opCtx.get()),
getConsistencyMarkers(),
getStorageInterface(),
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kStableRecovering),
+ repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering),
_writerPool.get());
const auto expectedStartOpTime = _singlePrepareApplyOp->getOpTime();
diff --git a/src/mongo/db/repl/oplog_applier_utils.cpp b/src/mongo/db/repl/oplog_applier_utils.cpp
index 300b4f0308e..7484b500754 100644
--- a/src/mongo/db/repl/oplog_applier_utils.cpp
+++ b/src/mongo/db/repl/oplog_applier_utils.cpp
@@ -302,7 +302,6 @@ Status OplogApplierUtils::applyOplogBatchCommon(
InsertGroup insertGroup(
ops, opCtx, oplogApplicationMode, isDataConsistent, applyOplogEntryOrGroupedInserts);
- const bool inStableRecovery = oplogApplicationMode == OplogApplication::Mode::kStableRecovering;
for (auto it = ops->cbegin(); it != ops->cend(); ++it) {
const OplogEntry& entry = **it;
@@ -322,15 +321,9 @@ Status OplogApplierUtils::applyOplogBatchCommon(
if (!status.isOK()) {
// Tried to apply an update operation but the document is missing, there must be
// a delete operation for the document later in the oplog.
- // Server will crash on oplog application failure during recovery from stable
- // checkpoint in the test environment.
if (status == ErrorCodes::UpdateOperationFailed &&
(oplogApplicationMode == OplogApplication::Mode::kInitialSync ||
- OplogApplication::inRecovering(oplogApplicationMode))) {
- if (inStableRecovery) {
- repl::OplogApplication::checkOnOplogFailureForRecovery(
- opCtx, redact(entry.toBSONForLogging()), redact(status));
- }
+ oplogApplicationMode == OplogApplication::Mode::kRecovering)) {
continue;
}
@@ -344,14 +337,8 @@ Status OplogApplierUtils::applyOplogBatchCommon(
} catch (const DBException& e) {
// SERVER-24927 If we have a NamespaceNotFound exception, then this document will be
// dropped before initial sync or recovery ends anyways and we should ignore it.
- // Server will crash on oplog application failure during recovery from stable checkpoint
- // in the test environment.
if (e.code() == ErrorCodes::NamespaceNotFound && entry.isCrudOpType() &&
allowNamespaceNotFoundErrorsOnCrudOps) {
- if (inStableRecovery) {
- repl::OplogApplication::checkOnOplogFailureForRecovery(
- opCtx, redact(entry.toBSONForLogging()), redact(e));
- }
continue;
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 0bf719ddcf1..a6df2f77d29 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -521,15 +521,13 @@ bool ReplicationCoordinatorImpl::_startLoadLocalConfig(
LOGV2(4280504, "Cleaning up any partially applied oplog batches & reading last op from oplog");
// Read the last op from the oplog after cleaning up any partially applied batches.
- auto stableTimestamp =
- _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, boost::none);
+ const auto stableTimestamp = boost::none;
+ _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, stableTimestamp);
LOGV2(4280505,
"Creating any necessary TenantMigrationAccessBlockers for unfinished migrations");
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx);
LOGV2(4280506, "Reconstructing prepared transactions");
- reconstructPreparedTransactions(opCtx,
- stableTimestamp ? OplogApplication::Mode::kStableRecovering
- : OplogApplication::Mode::kUnstableRecovering);
+ reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering);
const auto lastOpTimeAndWallTimeResult = _externalState->loadLastOpTimeAndWallTime(opCtx);
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index dc69371f557..7bce580fd7a 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -329,7 +329,7 @@ void ReplicationRecoveryImpl::recoverFromOplogAsStandalone(OperationContext* opC
// Initialize the cached pointer to the oplog collection.
acquireOplogCollectionForLogging(opCtx);
- boost::optional<Timestamp> stableTimestamp = boost::none;
+
if (recoveryTS || startupRecoveryForRestore) {
if (startupRecoveryForRestore && !recoveryTS) {
LOGV2_WARNING(5576601,
@@ -340,7 +340,8 @@ void ReplicationRecoveryImpl::recoverFromOplogAsStandalone(OperationContext* opC
// We pass in "none" for the stable timestamp so that recoverFromOplog asks storage
// for the recoveryTimestamp just like on replica set recovery.
- stableTimestamp = recoverFromOplog(opCtx, boost::none);
+ const auto stableTimestamp = boost::none;
+ recoverFromOplog(opCtx, stableTimestamp);
} else {
if (gTakeUnstableCheckpointOnShutdown) {
// Ensure 'recoverFromOplogAsStandalone' with 'takeUnstableCheckpointOnShutdown'
@@ -358,9 +359,7 @@ void ReplicationRecoveryImpl::recoverFromOplogAsStandalone(OperationContext* opC
}
}
- reconstructPreparedTransactions(opCtx,
- stableTimestamp ? OplogApplication::Mode::kStableRecovering
- : OplogApplication::Mode::kUnstableRecovering);
+ reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering);
// Two-phase index builds are built in the background, which may still be in-progress after
// recovering from the oplog. To prevent crashing the server, skip enabling read-only mode.
@@ -430,14 +429,14 @@ void ReplicationRecoveryImpl::recoverFromOplogUpTo(OperationContext* opCtx, Time
invariant(appliedUpTo <= endPoint);
}
- reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kStableRecovering);
+ reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering);
}
-boost::optional<Timestamp> ReplicationRecoveryImpl::recoverFromOplog(
- OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) try {
+void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx,
+ boost::optional<Timestamp> stableTimestamp) try {
if (_consistencyMarkers->getInitialSyncFlag(opCtx)) {
LOGV2(21542, "No recovery needed. Initial sync flag set");
- return stableTimestamp; // Initial Sync will take over so no cleanup is needed.
+ return; // Initial Sync will take over so no cleanup is needed.
}
const auto serviceCtx = getGlobalServiceContext();
@@ -479,7 +478,7 @@ boost::optional<Timestamp> ReplicationRecoveryImpl::recoverFromOplog(
// Oplog is empty. There are no oplog entries to apply, so we exit recovery and go into
// initial sync.
LOGV2(21543, "No oplog entries to apply for recovery. Oplog is empty");
- return stableTimestamp;
+ return;
}
fassert(40290, topOfOplogSW);
const auto topOfOplog = topOfOplogSW.getValue();
@@ -493,7 +492,6 @@ boost::optional<Timestamp> ReplicationRecoveryImpl::recoverFromOplog(
_recoverFromUnstableCheckpoint(
opCtx, _consistencyMarkers->getAppliedThrough(opCtx), topOfOplog);
}
- return stableTimestamp;
} catch (...) {
LOGV2_FATAL_CONTINUE(21570,
"Caught exception during replication recovery: {error}",
@@ -684,10 +682,6 @@ Timestamp ReplicationRecoveryImpl::_applyOplogOperations(OperationContext* opCtx
RecoveryOplogApplierStats stats;
- auto oplogApplicationMode = (recoveryMode == RecoveryMode::kStartupFromStableTimestamp ||
- recoveryMode == RecoveryMode::kRollbackFromStableTimestamp)
- ? OplogApplication::Mode::kStableRecovering
- : OplogApplication::Mode::kUnstableRecovering;
auto writerPool = makeReplWriterPool();
auto* replCoord = ReplicationCoordinator::get(opCtx);
OplogApplierImpl oplogApplier(nullptr,
@@ -696,7 +690,7 @@ Timestamp ReplicationRecoveryImpl::_applyOplogOperations(OperationContext* opCtx
replCoord,
_consistencyMarkers,
_storageInterface,
- OplogApplier::Options(oplogApplicationMode),
+ OplogApplier::Options(OplogApplication::Mode::kRecovering),
writerPool.get());
OplogApplier::BatchLimits batchLimits;
diff --git a/src/mongo/db/repl/replication_recovery.h b/src/mongo/db/repl/replication_recovery.h
index 9bcec9982fc..614d7fba865 100644
--- a/src/mongo/db/repl/replication_recovery.h
+++ b/src/mongo/db/repl/replication_recovery.h
@@ -52,12 +52,9 @@ public:
/**
* Recovers the data on disk from the oplog. If the provided stable timestamp is not "none",
* this function assumes the data reflects that timestamp.
- * Returns the provided stable timestamp. If the provided stable timestamp is "none", this
- * function might try to ask storage for the last stable timestamp if it exists before doing
- * recovery which will be returned after performing successful recovery.
*/
- virtual boost::optional<Timestamp> recoverFromOplog(
- OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) = 0;
+ virtual void recoverFromOplog(OperationContext* opCtx,
+ boost::optional<Timestamp> stableTimestamp) = 0;
/**
* Recovers the data on disk from the oplog and puts the node in readOnly mode. If
@@ -80,8 +77,8 @@ public:
ReplicationRecoveryImpl(StorageInterface* storageInterface,
ReplicationConsistencyMarkers* consistencyMarkers);
- boost::optional<Timestamp> recoverFromOplog(
- OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) override;
+ void recoverFromOplog(OperationContext* opCtx,
+ boost::optional<Timestamp> stableTimestamp) override;
void recoverFromOplogAsStandalone(OperationContext* opCtx) override;
diff --git a/src/mongo/db/repl/replication_recovery_mock.h b/src/mongo/db/repl/replication_recovery_mock.h
index 39665085e57..ec546d594c7 100644
--- a/src/mongo/db/repl/replication_recovery_mock.h
+++ b/src/mongo/db/repl/replication_recovery_mock.h
@@ -42,10 +42,8 @@ class ReplicationRecoveryMock : public ReplicationRecovery {
public:
ReplicationRecoveryMock() = default;
- boost::optional<Timestamp> recoverFromOplog(
- OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) override {
- return stableTimestamp;
- }
+ void recoverFromOplog(OperationContext* opCtx,
+ boost::optional<Timestamp> stableTimestamp) override {}
void recoverFromOplogAsStandalone(OperationContext* opCtx) override {}
diff --git a/src/mongo/db/repl/replication_recovery_test.cpp b/src/mongo/db/repl/replication_recovery_test.cpp
index 0d352265387..e3818a07c71 100644
--- a/src/mongo/db/repl/replication_recovery_test.cpp
+++ b/src/mongo/db/repl/replication_recovery_test.cpp
@@ -712,7 +712,7 @@ TEST_F(ReplicationRecoveryTest,
testRecoveryToStableAppliesDocumentsWithNoAppliedThrough(false);
}
-TEST_F(ReplicationRecoveryTest, UnstableRecoveryIgnoresDroppedCollections) {
+TEST_F(ReplicationRecoveryTest, RecoveryIgnoresDroppedCollections) {
ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers());
auto opCtx = getOperationContext();
@@ -724,7 +724,7 @@ TEST_F(ReplicationRecoveryTest, UnstableRecoveryIgnoresDroppedCollections) {
ASSERT_FALSE(autoColl.getCollection());
}
- // Not setting a stable timestamp in order to perform unstable recovery,
+ getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(2, 2));
recovery.recoverFromOplog(opCtx, boost::none);
_assertDocsInOplog(opCtx, {1, 2, 3, 4, 5});
@@ -736,24 +736,6 @@ TEST_F(ReplicationRecoveryTest, UnstableRecoveryIgnoresDroppedCollections) {
ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(5, 5), 1));
}
-DEATH_TEST_REGEX_F(ReplicationRecoveryTest,
- StableRecoveryCrashOnDroppedCollectionsInTests,
- "Fatal assertion.*5415000") {
- ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers());
- auto opCtx = getOperationContext();
-
- _setUpOplog(opCtx, getStorageInterface(), {1, 2, 3, 4, 5});
-
- ASSERT_OK(getStorageInterface()->dropCollection(opCtx, testNs));
- {
- AutoGetCollectionForReadCommand autoColl(opCtx, testNs);
- ASSERT_FALSE(autoColl.getCollection());
- }
-
- getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(2, 2));
- recovery.recoverFromOplog(opCtx, boost::none);
-}
-
TEST_F(ReplicationRecoveryTest, RecoveryAppliesDocumentsWhenAppliedThroughIsBehindAfterTruncation) {
ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers());
auto opCtx = getOperationContext();
@@ -1251,7 +1233,7 @@ TEST_F(ReplicationRecoveryTest,
ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(3, 0), 1));
}
-TEST_F(ReplicationRecoveryTest, RecoverFromOplogUpTo) {
+TEST_F(ReplicationRecoveryTest, RecoverFromOplogUpToBeforeEndOfOplog) {
ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers());
auto opCtx = getOperationContext();
@@ -1262,16 +1244,8 @@ TEST_F(ReplicationRecoveryTest, RecoverFromOplogUpTo) {
recovery.recoverFromOplogUpTo(opCtx, Timestamp(5, 5));
_assertDocsInTestCollection(opCtx, {3, 4, 5});
ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(5, 5), 1));
-}
-
-TEST_F(ReplicationRecoveryTest, RecoverFromOplogUpToBeforeEndOfOplog) {
- ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers());
- auto opCtx = getOperationContext();
-
- _setUpOplog(opCtx, getStorageInterface(), {2, 3, 4, 5, 6, 7, 8, 9, 10});
- getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(2, 2));
- // Recovers operations with timestamps: 3, 4, 5, 6, 7, 8, 9.
+ // Recovers operations with timestamps: 6, 7, 8, 9.
recovery.recoverFromOplogUpTo(opCtx, Timestamp(9, 9));
_assertDocsInTestCollection(opCtx, {3, 4, 5, 6, 7, 8, 9});
ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(9, 9), 1));
@@ -1341,6 +1315,9 @@ TEST_F(ReplicationRecoveryTest, RecoverFromOplogUpToDoesNotExceedEndPoint) {
_setUpOplog(opCtx, getStorageInterface(), {2, 5, 10});
getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(2, 2));
+ recovery.recoverFromOplogUpTo(opCtx, Timestamp(9, 9));
+ ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(5, 5), 1));
+
recovery.recoverFromOplogUpTo(opCtx, Timestamp(15, 15));
ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(10, 10), 1));
}
diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp
index 2ba2cff09e3..7c37be215ee 100644
--- a/src/mongo/db/repl/rollback_impl.cpp
+++ b/src/mongo/db/repl/rollback_impl.cpp
@@ -648,7 +648,7 @@ void RollbackImpl::_runPhaseFromAbortToReconstructPreparedTxns(
// transactions were aborted (i.e. the in-memory counts were rolled-back) before computing
// collection counts, reconstruct the prepared transactions now, adding on any additional counts
// to the now corrected record store.
- reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kStableRecovering);
+ reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering);
}
void RollbackImpl::_correctRecordStoreCounts(OperationContext* opCtx) {
diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp
index c0baecbb166..3f96511361b 100644
--- a/src/mongo/db/repl/rs_rollback.cpp
+++ b/src/mongo/db/repl/rs_rollback.cpp
@@ -998,7 +998,7 @@ void rollbackDropIndexes(OperationContext* opCtx,
"uuid"_attr = uuid,
"indexName"_attr = indexName);
- createIndexForApplyOps(opCtx, indexSpec, *nss, OplogApplication::Mode::kStableRecovering);
+ createIndexForApplyOps(opCtx, indexSpec, *nss, OplogApplication::Mode::kRecovering);
LOGV2_DEBUG(21676,
1,
diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp
index 9abfcc0c270..fc035a5ff78 100644
--- a/src/mongo/db/repl/transaction_oplog_application.cpp
+++ b/src/mongo/db/repl/transaction_oplog_application.cpp
@@ -85,26 +85,9 @@ Status _applyOperationsForTransaction(OperationContext* opCtx,
}
} catch (const DBException& ex) {
// Ignore NamespaceNotFound errors if we are in initial sync or recovering mode.
- // During recovery we reconsutuct prepared transactions at the end after applying all
- // the oplogs, so 'NamespaceNotFound' error shouldn't be hit whether it is a stable or
- // unstable recovery. However we have some scenarios when this error should be skipped:
- // 1- This code path can be called while applying commit oplog during unstable recovery
- // when 'startupRecoveryForRestore' is set.
- // 2- During selective backup:
- // - During restore when 'recoverFromOplogAsStandalone' is set which is usually be
- // done in a stable recovery mode.
- // - After the restore finished as the standalone node started with the flag
- // 'takeUnstableCheckpointOnShutdown' so after restarting the node as a replica
- // set member it will go through unstable recovery.
const bool ignoreException = ex.code() == ErrorCodes::NamespaceNotFound &&
(oplogApplicationMode == repl::OplogApplication::Mode::kInitialSync ||
- repl::OplogApplication::inRecovering(oplogApplicationMode));
-
- if (ex.code() == ErrorCodes::NamespaceNotFound &&
- oplogApplicationMode == repl::OplogApplication::Mode::kStableRecovering) {
- repl::OplogApplication::checkOnOplogFailureForRecovery(
- opCtx, redact(op.toBSONForLogging()), redact(ex));
- }
+ oplogApplicationMode == repl::OplogApplication::Mode::kRecovering);
if (!ignoreException) {
LOGV2_DEBUG(
@@ -144,7 +127,7 @@ Status _applyTransactionFromOplogChain(OperationContext* opCtx,
repl::OplogApplication::Mode mode,
Timestamp commitTimestamp,
Timestamp durableTimestamp) {
- invariant(repl::OplogApplication::inRecovering(mode));
+ invariant(mode == repl::OplogApplication::Mode::kRecovering);
auto ops = readTransactionOperationsFromOplogChain(opCtx, entry, {});
@@ -203,8 +186,7 @@ Status applyCommitTransaction(OperationContext* opCtx,
invariant(commitCommand.getCommitTimestamp());
switch (mode) {
- case repl::OplogApplication::Mode::kUnstableRecovering:
- case repl::OplogApplication::Mode::kStableRecovering: {
+ case repl::OplogApplication::Mode::kRecovering: {
return _applyTransactionFromOplogChain(opCtx,
entry,
mode,
@@ -248,8 +230,7 @@ Status applyAbortTransaction(OperationContext* opCtx,
const OplogEntry& entry,
repl::OplogApplication::Mode mode) {
switch (mode) {
- case repl::OplogApplication::Mode::kUnstableRecovering:
- case repl::OplogApplication::Mode::kStableRecovering: {
+ case repl::OplogApplication::Mode::kRecovering: {
// We don't put transactions into the prepare state until the end of recovery,
// so there is no transaction to abort.
return Status::OK();
@@ -398,7 +379,7 @@ Status _applyPrepareTransaction(OperationContext* opCtx,
// The prepare time of the transaction is set explicitly below.
auto ops = readTransactionOperationsFromOplogChain(opCtx, entry, {});
- if (repl::OplogApplication::inRecovering(mode) ||
+ if (mode == repl::OplogApplication::Mode::kRecovering ||
mode == repl::OplogApplication::Mode::kInitialSync) {
// We might replay a prepared transaction behind oldest timestamp. Note that since this is
// scoped to the storage transaction, and readTransactionOperationsFromOplogChain implicitly
@@ -468,7 +449,7 @@ Status _applyPrepareTransaction(OperationContext* opCtx,
// Set this in case the application of any ops need to use the prepare timestamp of this
// transaction. It should be cleared automatically when the transaction finishes.
- if (repl::OplogApplication::inRecovering(mode) ||
+ if (mode == repl::OplogApplication::Mode::kRecovering ||
mode == repl::OplogApplication::Mode::kInitialSync) {
txnParticipant.setPrepareOpTimeForRecovery(opCtx, entry.getOpTime());
}
@@ -542,8 +523,7 @@ Status applyPrepareTransaction(OperationContext* opCtx,
const OplogEntry& entry,
repl::OplogApplication::Mode mode) {
switch (mode) {
- case repl::OplogApplication::Mode::kUnstableRecovering:
- case repl::OplogApplication::Mode::kStableRecovering: {
+ case repl::OplogApplication::Mode::kRecovering: {
if (!serverGlobalParams.enableMajorityReadConcern) {
LOGV2_ERROR(
21850,
diff --git a/src/mongo/dbtests/repltests.cpp b/src/mongo/dbtests/repltests.cpp
index 9c66ede578c..c1594a0a009 100644
--- a/src/mongo/dbtests/repltests.cpp
+++ b/src/mongo/dbtests/repltests.cpp
@@ -365,12 +365,12 @@ protected:
virtual void reset() const = 0;
};
-// Some operations are only idempotent when in RECOVERING from unstable checkpoint, not in
-// SECONDARY. This includes duplicate inserts and deletes.
+// Some operations are only idempotent when in RECOVERING, not in SECONDARY. This includes
+// duplicate inserts and deletes.
class Recovering : public Base {
protected:
virtual OplogApplication::Mode getOplogApplicationMode() {
- return OplogApplication::Mode::kUnstableRecovering;
+ return OplogApplication::Mode::kRecovering;
}
};
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp
index f2cee34a605..0c0ffd7ad1e 100644
--- a/src/mongo/dbtests/storage_timestamp_tests.cpp
+++ b/src/mongo/dbtests/storage_timestamp_tests.cpp
@@ -1415,7 +1415,7 @@ public:
_coordinatorMock,
_consistencyMarkers,
storageInterface,
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kStableRecovering),
+ repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering),
writerPool.get());
uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops));
@@ -1517,7 +1517,7 @@ public:
_coordinatorMock,
_consistencyMarkers,
storageInterface,
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kStableRecovering),
+ repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering),
writerPool.get());
uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops));