diff options
author | Moustafa Maher Khalil <m.maher@mongodb.com> | 2023-04-11 22:34:04 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-12 00:03:23 +0000 |
commit | e19b996a2d05a3a1a8f361b73f5d50fa36a451b3 (patch) | |
tree | 55550ee3c1591c18ef0b55dcbf6029a663b86cbd | |
parent | 18e5e638baa2d289d42a4bb61ad06c696271ad3a (diff) | |
download | mongo-e19b996a2d05a3a1a8f361b73f5d50fa36a451b3.tar.gz |
Revert "SERVER-54150 Recovery from a stable checkpoint should fassert on oplog application failures"
This reverts commit 347f059c439dfafe9e8a34365b4c5e7a17c22acf.
-rw-r--r-- | jstests/noPassthrough/oplog_application_while_recovering_must_succeed.js | 114 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.cpp | 72 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.h | 20 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog_applier.h | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog_applier_impl_test.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog_applier_utils.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery.h | 11 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery_mock.h | 6 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery_test.cpp | 37 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_rollback.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/transaction_oplog_application.cpp | 34 | ||||
-rw-r--r-- | src/mongo/dbtests/repltests.cpp | 6 | ||||
-rw-r--r-- | src/mongo/dbtests/storage_timestamp_tests.cpp | 4 |
16 files changed, 55 insertions, 316 deletions
diff --git a/jstests/noPassthrough/oplog_application_while_recovering_must_succeed.js b/jstests/noPassthrough/oplog_application_while_recovering_must_succeed.js deleted file mode 100644 index 07a68e02b7f..00000000000 --- a/jstests/noPassthrough/oplog_application_while_recovering_must_succeed.js +++ /dev/null @@ -1,114 +0,0 @@ -/** - * Tests that server fasserts on oplog applications failures in recovering mode. - * This behavior is expected only in tests as we ignore these errors in production but output a - * warning message. - * @tags: [ - * requires_persistence, - * requires_replication, - * ] - */ -(function() { -"use strict"; - -// This test appends 'insert' and 'delete' oplogs directly to the oplog system collection in -// standalone mode, When restarting as a replica set member we assume the collections will play -// forward to the appropriate count. But because we added a new oplog entry that's going to turn -// into an insert, we play forward to have more documents than expected (or the reverse observation -// for adding a delete oplog entry). -TestData.skipEnforceFastCountOnValidate = true; - -const ops = ["Update", "Delete", "Insert"]; -const oplogApplicationResults = ["Success", "CrudError", "NamespaceNotFound"]; - -function runTest(op, result) { - jsTestLog(`Testing "${op}" oplog application during recovery that finishes with "${result}".`); - const rst = new ReplSetTest({nodes: 1}); - rst.startSet(); - rst.initiateWithHighElectionTimeout(); - let conn = rst.getPrimary(); - - // Construct a valid oplog entry. - function constructOplogEntry(oplog) { - const lastOplogEntry = oplog.find().sort({ts: -1}).limit(1).toArray()[0]; - const testCollOplogEntry = - oplog.find({op: "i", ns: "test.coll"}).sort({ts: -1}).limit(1).toArray()[0]; - const highestTS = lastOplogEntry.ts; - const oplogToInsertTS = Timestamp(highestTS.getTime(), highestTS.getInc() + 1); - delete testCollOplogEntry.o2; - if (op === "Delete") { - return Object.extend(testCollOplogEntry, - {op: "d", ns: "test.coll", o: {_id: 0}, ts: oplogToInsertTS}); - } else if (op === "Update") { - return Object.extend(testCollOplogEntry, { - op: "u", - ns: "test.coll", - o: {$v: 2, diff: {u: {a: 1}}}, - o2: {_id: 0}, - ts: oplogToInsertTS - }); - } else if (op === "Insert") { - return Object.extend( - testCollOplogEntry, - {op: "i", ns: "test.coll", o: {_id: 1, a: 1}, ts: oplogToInsertTS}); - } - } - - // Do an initial insert. - assert.commandWorked( - conn.getDB("test").coll.insert({_id: 0, a: 0}, {writeConcern: {w: "majority"}})); - if (result == "CrudError") { - // For 'update' and 'delete' oplog to not find the document. - assert.commandWorked( - conn.getDB("test").coll.deleteOne({_id: 0}, {writeConcern: {w: "majority"}})); - // For 'insert' to fail with duplicate key error. - assert.commandWorked( - conn.getDB("test").coll.insert({_id: 1}, {writeConcern: {w: "majority"}})); - } else if (result == "NamespaceNotFound") { - conn.getDB("test").coll.drop(); - } - - jsTestLog("Restart the node in standalone mode to append the new oplog."); - rst.stop(0, undefined /*signal*/, undefined /*opts*/, {forRestart: true}); - conn = rst.start(0, {noReplSet: true, noCleanData: true}); - let oplog = conn.getDB("local").oplog.rs; - - // Construct a valid oplog entry using the highest timestamp in the oplog. The highest timestamp - // may differ from the one above due to concurrent internal writes when the node was a primary. - let oplogToInsert = constructOplogEntry(oplog); - jsTestLog(`Inserting oplog entry: ${tojson(oplogToInsert)}`); - assert.commandWorked(oplog.insert(oplogToInsert)); - - jsTestLog("Restart the node with replication enabled to let the added oplog entry" + - " get applied as part of replication recovery."); - rst.stop(0, undefined /*signal*/, undefined /*opts*/, {forRestart: true}); - if (result == "Success") { - jsTestLog("The added oplog is applied successfully during replication recovery."); - rst.start(0, {noCleanData: true}); - conn = rst.getPrimary(); - if (op === "Update") { - assert.eq({_id: 0, a: 1}, conn.getDB("test").coll.findOne()); - } else if (op === "Delete") { - assert.eq(null, conn.getDB("test").coll.findOne()); - } else if (op === "Insert") { - assert.eq({_id: 0, a: 0}, conn.getDB("test").coll.findOne({_id: 0})); - assert.eq({_id: 1, a: 1}, conn.getDB("test").coll.findOne({_id: 1})); - } - } else { - jsTestLog( - "Server should crash while applying the added oplog during replication recovery."); - const node = rst.start(0, {noCleanData: true, waitForConnect: false}); - const exitCode = waitProgram(node.pid); - assert.eq(exitCode, MongoRunner.EXIT_ABORT); - assert.soon( - function() { - return rawMongoProgramOutput().search(/Fatal assertion.*5415000/) >= 0; - }, - "Node should have fasserted upon encountering a fatal error during startup", - ReplSetTest.kDefaultTimeoutMS); - } - - rst.stopSet(); -} - -ops.forEach(op => oplogApplicationResults.forEach(result => runTest(op, result))); -}()); diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index edcf21cebca..e2bb12b3c61 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -110,7 +110,6 @@ using std::string; using std::stringstream; using std::unique_ptr; using std::vector; -using namespace std::string_literals; using IndexVersion = IndexDescriptor::IndexVersion; @@ -1099,8 +1098,6 @@ const StringMap<ApplyOpMetadata> kOpsMap = { constexpr StringData OplogApplication::kInitialSyncOplogApplicationMode; constexpr StringData OplogApplication::kRecoveringOplogApplicationMode; -constexpr StringData OplogApplication::kStableRecoveringOplogApplicationMode; -constexpr StringData OplogApplication::kUnstableRecoveringOplogApplicationMode; constexpr StringData OplogApplication::kSecondaryOplogApplicationMode; constexpr StringData OplogApplication::kApplyOpsCmdOplogApplicationMode; @@ -1108,10 +1105,8 @@ StringData OplogApplication::modeToString(OplogApplication::Mode mode) { switch (mode) { case OplogApplication::Mode::kInitialSync: return OplogApplication::kInitialSyncOplogApplicationMode; - case OplogApplication::Mode::kUnstableRecovering: - return OplogApplication::kUnstableRecoveringOplogApplicationMode; - case OplogApplication::Mode::kStableRecovering: - return OplogApplication::kStableRecoveringOplogApplicationMode; + case OplogApplication::Mode::kRecovering: + return OplogApplication::kRecoveringOplogApplicationMode; case OplogApplication::Mode::kSecondary: return OplogApplication::kSecondaryOplogApplicationMode; case OplogApplication::Mode::kApplyOpsCmd: @@ -1124,9 +1119,7 @@ StatusWith<OplogApplication::Mode> OplogApplication::parseMode(const std::string if (mode == OplogApplication::kInitialSyncOplogApplicationMode) { return OplogApplication::Mode::kInitialSync; } else if (mode == OplogApplication::kRecoveringOplogApplicationMode) { - // This only being used in applyOps command which is controlled by the client, so it should - // be unstable. - return OplogApplication::Mode::kUnstableRecovering; + return OplogApplication::Mode::kRecovering; } else if (mode == OplogApplication::kSecondaryOplogApplicationMode) { return OplogApplication::Mode::kSecondary; } else if (mode == OplogApplication::kApplyOpsCmdOplogApplicationMode) { @@ -1138,33 +1131,6 @@ StatusWith<OplogApplication::Mode> OplogApplication::parseMode(const std::string MONGO_UNREACHABLE; } -void OplogApplication::checkOnOplogFailureForRecovery(OperationContext* opCtx, - const mongo::BSONObj& oplogEntry, - const std::string& errorMsg) { - const bool isReplicaSet = - repl::ReplicationCoordinator::get(opCtx->getServiceContext())->getReplicationMode() == - repl::ReplicationCoordinator::modeReplSet; - // Relax the constraints of oplog application if the node is not a replica set member. - if (!isReplicaSet) { - return; - } - - // Only fassert in test environment. - if (getTestCommandsEnabled()) { - LOGV2_FATAL(5415000, - "Error applying operation while recovering from stable " - "checkpoint. This can lead to data corruption.", - "oplogEntry"_attr = oplogEntry, - "error"_attr = errorMsg); - } else { - LOGV2_WARNING(5415001, - "Error applying operation while recovering from stable " - "checkpoint. This can lead to data corruption.", - "oplogEntry"_attr = oplogEntry, - "error"_attr = errorMsg); - } -} - // @return failure status if an update should have happened and the document DNE. // See replset initial sync code. Status applyOperation_inlock(OperationContext* opCtx, @@ -1201,21 +1167,11 @@ Status applyOperation_inlock(OperationContext* opCtx, return Status::OK(); } - const bool inStableRecovery = mode == OplogApplication::Mode::kStableRecovering; NamespaceString requestNss; CollectionPtr collection = nullptr; if (auto uuid = op.getUuid()) { auto catalog = CollectionCatalog::get(opCtx); collection = catalog->lookupCollectionByUUID(opCtx, uuid.get()); - if (!collection && inStableRecovery) { - repl::OplogApplication::checkOnOplogFailureForRecovery( - opCtx, - redact(opOrGroupedInserts.toBSON()), - str::stream() - << "(NamespaceNotFound): Failed to apply operation due to missing collection (" - << uuid.value() << ")"); - } - uassert(ErrorCodes::NamespaceNotFound, str::stream() << "Failed to apply operation due to missing collection (" << uuid.get() << "): " << redact(opOrGroupedInserts.toBSON()), @@ -1283,7 +1239,7 @@ Status applyOperation_inlock(OperationContext* opCtx, case ReplicationCoordinator::modeNone: { // Only assign timestamps on standalones during replication recovery when // started with the 'recoverFromOplogAsStandalone' flag. - return OplogApplication::inRecovering(mode); + return mode == OplogApplication::Mode::kRecovering; } } } @@ -1432,9 +1388,6 @@ Status applyOperation_inlock(OperationContext* opCtx, if (oplogApplicationEnforcesSteadyStateConstraints) { return status; } - } else if (inStableRecovery) { - repl::OplogApplication::checkOnOplogFailureForRecovery( - opCtx, redact(op.toBSONForLogging()), redact(status)); } // Continue to the next block to retry the operation as an upsert. needToDoUpsert = true; @@ -1672,10 +1625,6 @@ Status applyOperation_inlock(OperationContext* opCtx, }); if (!status.isOK()) { - if (inStableRecovery) { - repl::OplogApplication::checkOnOplogFailureForRecovery( - opCtx, redact(op.toBSONForLogging()), redact(status)); - } return status; } @@ -1743,17 +1692,6 @@ Status applyOperation_inlock(OperationContext* opCtx, &upsertConfigImage); } - if (result.nDeleted == 0 && inStableRecovery) { - repl::OplogApplication::checkOnOplogFailureForRecovery( - opCtx, - redact(op.toBSONForLogging()), - !collection ? str::stream() - << "(NamespaceNotFound): Failed to apply operation due " - "to missing collection (" - << requestNss << ")" - : "Applied a delete which did not delete anything."s); - } - if (result.nDeleted == 0 && mode == OplogApplication::Mode::kSecondary) { LOGV2_WARNING(2170002, "Applied a delete which did not delete anything in steady state " @@ -1900,7 +1838,7 @@ Status applyCommand_inlock(OperationContext* opCtx, case ReplicationCoordinator::modeNone: { // Only assign timestamps on standalones during replication recovery when // started with 'recoverFromOplogAsStandalone'. - return OplogApplication::inRecovering(mode); + return mode == OplogApplication::Mode::kRecovering; } } MONGO_UNREACHABLE; diff --git a/src/mongo/db/repl/oplog.h b/src/mongo/db/repl/oplog.h index 635f3a3a116..e975ee42095 100644 --- a/src/mongo/db/repl/oplog.h +++ b/src/mongo/db/repl/oplog.h @@ -171,10 +171,7 @@ using IncrementOpsAppliedStatsFn = std::function<void()>; class OplogApplication { public: static constexpr StringData kInitialSyncOplogApplicationMode = "InitialSync"_sd; - // This only being used in 'applyOps' command when sent by client. static constexpr StringData kRecoveringOplogApplicationMode = "Recovering"_sd; - static constexpr StringData kStableRecoveringOplogApplicationMode = "StableRecovering"_sd; - static constexpr StringData kUnstableRecoveringOplogApplicationMode = "UnstableRecovering"_sd; static constexpr StringData kSecondaryOplogApplicationMode = "Secondary"_sd; static constexpr StringData kApplyOpsCmdOplogApplicationMode = "ApplyOps"_sd; @@ -183,12 +180,9 @@ public: kInitialSync, // Used when we are applying oplog operations to recover the database state following an - // clean/unclean shutdown, or when we are recovering from the oplog after we rollback to a + // unclean shutdown, or when we are recovering from the oplog after we rollback to a // checkpoint. - // If recovering from a unstable stable checkpoint. - kUnstableRecovering, - // If recovering from a stable checkpoint.~ - kStableRecovering, + kRecovering, // Used when a secondary node is applying oplog operations from the primary during steady // state replication. @@ -199,19 +193,9 @@ public: kApplyOpsCmd }; - static bool inRecovering(Mode mode) { - return mode == Mode::kUnstableRecovering || mode == Mode::kStableRecovering; - } - static StringData modeToString(Mode mode); static StatusWith<Mode> parseMode(const std::string& mode); - - // Server will crash on oplog application failure during recovery from stable checkpoint in the - // test environment. - static void checkOnOplogFailureForRecovery(OperationContext* opCtx, - const mongo::BSONObj& oplogEntry, - const std::string& errorMsg); }; inline std::ostream& operator<<(std::ostream& s, OplogApplication::Mode mode) { diff --git a/src/mongo/db/repl/oplog_applier.h b/src/mongo/db/repl/oplog_applier.h index 252070077a2..b582b282efd 100644 --- a/src/mongo/db/repl/oplog_applier.h +++ b/src/mongo/db/repl/oplog_applier.h @@ -67,10 +67,10 @@ public: Options() = delete; explicit Options(OplogApplication::Mode inputMode) : mode(inputMode), - allowNamespaceNotFoundErrorsOnCrudOps(inputMode == - OplogApplication::Mode::kInitialSync || - OplogApplication::inRecovering(inputMode)), - skipWritesToOplog(OplogApplication::inRecovering(inputMode)) {} + allowNamespaceNotFoundErrorsOnCrudOps( + inputMode == OplogApplication::Mode::kInitialSync || + inputMode == OplogApplication::Mode::kRecovering), + skipWritesToOplog(inputMode == OplogApplication::Mode::kRecovering) {} // Used to determine which operations should be applied. Only initial sync will set this to // be something other than the null optime. diff --git a/src/mongo/db/repl/oplog_applier_impl_test.cpp b/src/mongo/db/repl/oplog_applier_impl_test.cpp index 97e09475c24..5d517b148ad 100644 --- a/src/mongo/db/repl/oplog_applier_impl_test.cpp +++ b/src/mongo/db/repl/oplog_applier_impl_test.cpp @@ -767,7 +767,7 @@ TEST_F(MultiOplogEntryOplogApplierImplTest, MultiApplyUnpreparedTransactionAllAt ReplicationCoordinator::get(_opCtx.get()), getConsistencyMarkers(), getStorageInterface(), - repl::OplogApplier::Options(repl::OplogApplication::Mode::kStableRecovering), + repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering), _writerPool.get()); // Apply both inserts and the commit in a single batch. We expect no oplog entries to @@ -1249,7 +1249,7 @@ TEST_F(MultiOplogEntryPreparedTransactionTest, MultiApplyPreparedTransactionReco ReplicationCoordinator::get(_opCtx.get()), getConsistencyMarkers(), getStorageInterface(), - repl::OplogApplier::Options(repl::OplogApplication::Mode::kStableRecovering), + repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering), _writerPool.get()); // Apply a batch with the insert operations. This should have no effect, because this is @@ -1483,7 +1483,7 @@ TEST_F(MultiOplogEntryPreparedTransactionTest, ReplicationCoordinator::get(_opCtx.get()), getConsistencyMarkers(), getStorageInterface(), - repl::OplogApplier::Options(repl::OplogApplication::Mode::kStableRecovering), + repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering), _writerPool.get()); const auto expectedStartOpTime = _singlePrepareApplyOp->getOpTime(); diff --git a/src/mongo/db/repl/oplog_applier_utils.cpp b/src/mongo/db/repl/oplog_applier_utils.cpp index 300b4f0308e..7484b500754 100644 --- a/src/mongo/db/repl/oplog_applier_utils.cpp +++ b/src/mongo/db/repl/oplog_applier_utils.cpp @@ -302,7 +302,6 @@ Status OplogApplierUtils::applyOplogBatchCommon( InsertGroup insertGroup( ops, opCtx, oplogApplicationMode, isDataConsistent, applyOplogEntryOrGroupedInserts); - const bool inStableRecovery = oplogApplicationMode == OplogApplication::Mode::kStableRecovering; for (auto it = ops->cbegin(); it != ops->cend(); ++it) { const OplogEntry& entry = **it; @@ -322,15 +321,9 @@ Status OplogApplierUtils::applyOplogBatchCommon( if (!status.isOK()) { // Tried to apply an update operation but the document is missing, there must be // a delete operation for the document later in the oplog. - // Server will crash on oplog application failure during recovery from stable - // checkpoint in the test environment. if (status == ErrorCodes::UpdateOperationFailed && (oplogApplicationMode == OplogApplication::Mode::kInitialSync || - OplogApplication::inRecovering(oplogApplicationMode))) { - if (inStableRecovery) { - repl::OplogApplication::checkOnOplogFailureForRecovery( - opCtx, redact(entry.toBSONForLogging()), redact(status)); - } + oplogApplicationMode == OplogApplication::Mode::kRecovering)) { continue; } @@ -344,14 +337,8 @@ Status OplogApplierUtils::applyOplogBatchCommon( } catch (const DBException& e) { // SERVER-24927 If we have a NamespaceNotFound exception, then this document will be // dropped before initial sync or recovery ends anyways and we should ignore it. - // Server will crash on oplog application failure during recovery from stable checkpoint - // in the test environment. if (e.code() == ErrorCodes::NamespaceNotFound && entry.isCrudOpType() && allowNamespaceNotFoundErrorsOnCrudOps) { - if (inStableRecovery) { - repl::OplogApplication::checkOnOplogFailureForRecovery( - opCtx, redact(entry.toBSONForLogging()), redact(e)); - } continue; } diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 0bf719ddcf1..a6df2f77d29 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -521,15 +521,13 @@ bool ReplicationCoordinatorImpl::_startLoadLocalConfig( LOGV2(4280504, "Cleaning up any partially applied oplog batches & reading last op from oplog"); // Read the last op from the oplog after cleaning up any partially applied batches. - auto stableTimestamp = - _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, boost::none); + const auto stableTimestamp = boost::none; + _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, stableTimestamp); LOGV2(4280505, "Creating any necessary TenantMigrationAccessBlockers for unfinished migrations"); tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx); LOGV2(4280506, "Reconstructing prepared transactions"); - reconstructPreparedTransactions(opCtx, - stableTimestamp ? OplogApplication::Mode::kStableRecovering - : OplogApplication::Mode::kUnstableRecovering); + reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering); const auto lastOpTimeAndWallTimeResult = _externalState->loadLastOpTimeAndWallTime(opCtx); diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp index dc69371f557..7bce580fd7a 100644 --- a/src/mongo/db/repl/replication_recovery.cpp +++ b/src/mongo/db/repl/replication_recovery.cpp @@ -329,7 +329,7 @@ void ReplicationRecoveryImpl::recoverFromOplogAsStandalone(OperationContext* opC // Initialize the cached pointer to the oplog collection. acquireOplogCollectionForLogging(opCtx); - boost::optional<Timestamp> stableTimestamp = boost::none; + if (recoveryTS || startupRecoveryForRestore) { if (startupRecoveryForRestore && !recoveryTS) { LOGV2_WARNING(5576601, @@ -340,7 +340,8 @@ void ReplicationRecoveryImpl::recoverFromOplogAsStandalone(OperationContext* opC // We pass in "none" for the stable timestamp so that recoverFromOplog asks storage // for the recoveryTimestamp just like on replica set recovery. - stableTimestamp = recoverFromOplog(opCtx, boost::none); + const auto stableTimestamp = boost::none; + recoverFromOplog(opCtx, stableTimestamp); } else { if (gTakeUnstableCheckpointOnShutdown) { // Ensure 'recoverFromOplogAsStandalone' with 'takeUnstableCheckpointOnShutdown' @@ -358,9 +359,7 @@ void ReplicationRecoveryImpl::recoverFromOplogAsStandalone(OperationContext* opC } } - reconstructPreparedTransactions(opCtx, - stableTimestamp ? OplogApplication::Mode::kStableRecovering - : OplogApplication::Mode::kUnstableRecovering); + reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering); // Two-phase index builds are built in the background, which may still be in-progress after // recovering from the oplog. To prevent crashing the server, skip enabling read-only mode. @@ -430,14 +429,14 @@ void ReplicationRecoveryImpl::recoverFromOplogUpTo(OperationContext* opCtx, Time invariant(appliedUpTo <= endPoint); } - reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kStableRecovering); + reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering); } -boost::optional<Timestamp> ReplicationRecoveryImpl::recoverFromOplog( - OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) try { +void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx, + boost::optional<Timestamp> stableTimestamp) try { if (_consistencyMarkers->getInitialSyncFlag(opCtx)) { LOGV2(21542, "No recovery needed. Initial sync flag set"); - return stableTimestamp; // Initial Sync will take over so no cleanup is needed. + return; // Initial Sync will take over so no cleanup is needed. } const auto serviceCtx = getGlobalServiceContext(); @@ -479,7 +478,7 @@ boost::optional<Timestamp> ReplicationRecoveryImpl::recoverFromOplog( // Oplog is empty. There are no oplog entries to apply, so we exit recovery and go into // initial sync. LOGV2(21543, "No oplog entries to apply for recovery. Oplog is empty"); - return stableTimestamp; + return; } fassert(40290, topOfOplogSW); const auto topOfOplog = topOfOplogSW.getValue(); @@ -493,7 +492,6 @@ boost::optional<Timestamp> ReplicationRecoveryImpl::recoverFromOplog( _recoverFromUnstableCheckpoint( opCtx, _consistencyMarkers->getAppliedThrough(opCtx), topOfOplog); } - return stableTimestamp; } catch (...) { LOGV2_FATAL_CONTINUE(21570, "Caught exception during replication recovery: {error}", @@ -684,10 +682,6 @@ Timestamp ReplicationRecoveryImpl::_applyOplogOperations(OperationContext* opCtx RecoveryOplogApplierStats stats; - auto oplogApplicationMode = (recoveryMode == RecoveryMode::kStartupFromStableTimestamp || - recoveryMode == RecoveryMode::kRollbackFromStableTimestamp) - ? OplogApplication::Mode::kStableRecovering - : OplogApplication::Mode::kUnstableRecovering; auto writerPool = makeReplWriterPool(); auto* replCoord = ReplicationCoordinator::get(opCtx); OplogApplierImpl oplogApplier(nullptr, @@ -696,7 +690,7 @@ Timestamp ReplicationRecoveryImpl::_applyOplogOperations(OperationContext* opCtx replCoord, _consistencyMarkers, _storageInterface, - OplogApplier::Options(oplogApplicationMode), + OplogApplier::Options(OplogApplication::Mode::kRecovering), writerPool.get()); OplogApplier::BatchLimits batchLimits; diff --git a/src/mongo/db/repl/replication_recovery.h b/src/mongo/db/repl/replication_recovery.h index 9bcec9982fc..614d7fba865 100644 --- a/src/mongo/db/repl/replication_recovery.h +++ b/src/mongo/db/repl/replication_recovery.h @@ -52,12 +52,9 @@ public: /** * Recovers the data on disk from the oplog. If the provided stable timestamp is not "none", * this function assumes the data reflects that timestamp. - * Returns the provided stable timestamp. If the provided stable timestamp is "none", this - * function might try to ask storage for the last stable timestamp if it exists before doing - * recovery which will be returned after performing successful recovery. */ - virtual boost::optional<Timestamp> recoverFromOplog( - OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) = 0; + virtual void recoverFromOplog(OperationContext* opCtx, + boost::optional<Timestamp> stableTimestamp) = 0; /** * Recovers the data on disk from the oplog and puts the node in readOnly mode. If @@ -80,8 +77,8 @@ public: ReplicationRecoveryImpl(StorageInterface* storageInterface, ReplicationConsistencyMarkers* consistencyMarkers); - boost::optional<Timestamp> recoverFromOplog( - OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) override; + void recoverFromOplog(OperationContext* opCtx, + boost::optional<Timestamp> stableTimestamp) override; void recoverFromOplogAsStandalone(OperationContext* opCtx) override; diff --git a/src/mongo/db/repl/replication_recovery_mock.h b/src/mongo/db/repl/replication_recovery_mock.h index 39665085e57..ec546d594c7 100644 --- a/src/mongo/db/repl/replication_recovery_mock.h +++ b/src/mongo/db/repl/replication_recovery_mock.h @@ -42,10 +42,8 @@ class ReplicationRecoveryMock : public ReplicationRecovery { public: ReplicationRecoveryMock() = default; - boost::optional<Timestamp> recoverFromOplog( - OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) override { - return stableTimestamp; - } + void recoverFromOplog(OperationContext* opCtx, + boost::optional<Timestamp> stableTimestamp) override {} void recoverFromOplogAsStandalone(OperationContext* opCtx) override {} diff --git a/src/mongo/db/repl/replication_recovery_test.cpp b/src/mongo/db/repl/replication_recovery_test.cpp index 0d352265387..e3818a07c71 100644 --- a/src/mongo/db/repl/replication_recovery_test.cpp +++ b/src/mongo/db/repl/replication_recovery_test.cpp @@ -712,7 +712,7 @@ TEST_F(ReplicationRecoveryTest, testRecoveryToStableAppliesDocumentsWithNoAppliedThrough(false); } -TEST_F(ReplicationRecoveryTest, UnstableRecoveryIgnoresDroppedCollections) { +TEST_F(ReplicationRecoveryTest, RecoveryIgnoresDroppedCollections) { ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); @@ -724,7 +724,7 @@ TEST_F(ReplicationRecoveryTest, UnstableRecoveryIgnoresDroppedCollections) { ASSERT_FALSE(autoColl.getCollection()); } - // Not setting a stable timestamp in order to perform unstable recovery, + getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(2, 2)); recovery.recoverFromOplog(opCtx, boost::none); _assertDocsInOplog(opCtx, {1, 2, 3, 4, 5}); @@ -736,24 +736,6 @@ TEST_F(ReplicationRecoveryTest, UnstableRecoveryIgnoresDroppedCollections) { ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(5, 5), 1)); } -DEATH_TEST_REGEX_F(ReplicationRecoveryTest, - StableRecoveryCrashOnDroppedCollectionsInTests, - "Fatal assertion.*5415000") { - ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); - auto opCtx = getOperationContext(); - - _setUpOplog(opCtx, getStorageInterface(), {1, 2, 3, 4, 5}); - - ASSERT_OK(getStorageInterface()->dropCollection(opCtx, testNs)); - { - AutoGetCollectionForReadCommand autoColl(opCtx, testNs); - ASSERT_FALSE(autoColl.getCollection()); - } - - getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(2, 2)); - recovery.recoverFromOplog(opCtx, boost::none); -} - TEST_F(ReplicationRecoveryTest, RecoveryAppliesDocumentsWhenAppliedThroughIsBehindAfterTruncation) { ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); @@ -1251,7 +1233,7 @@ TEST_F(ReplicationRecoveryTest, ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(3, 0), 1)); } -TEST_F(ReplicationRecoveryTest, RecoverFromOplogUpTo) { +TEST_F(ReplicationRecoveryTest, RecoverFromOplogUpToBeforeEndOfOplog) { ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); @@ -1262,16 +1244,8 @@ TEST_F(ReplicationRecoveryTest, RecoverFromOplogUpTo) { recovery.recoverFromOplogUpTo(opCtx, Timestamp(5, 5)); _assertDocsInTestCollection(opCtx, {3, 4, 5}); ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(5, 5), 1)); -} - -TEST_F(ReplicationRecoveryTest, RecoverFromOplogUpToBeforeEndOfOplog) { - ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); - auto opCtx = getOperationContext(); - - _setUpOplog(opCtx, getStorageInterface(), {2, 3, 4, 5, 6, 7, 8, 9, 10}); - getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(2, 2)); - // Recovers operations with timestamps: 3, 4, 5, 6, 7, 8, 9. + // Recovers operations with timestamps: 6, 7, 8, 9. recovery.recoverFromOplogUpTo(opCtx, Timestamp(9, 9)); _assertDocsInTestCollection(opCtx, {3, 4, 5, 6, 7, 8, 9}); ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(9, 9), 1)); @@ -1341,6 +1315,9 @@ TEST_F(ReplicationRecoveryTest, RecoverFromOplogUpToDoesNotExceedEndPoint) { _setUpOplog(opCtx, getStorageInterface(), {2, 5, 10}); getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(2, 2)); + recovery.recoverFromOplogUpTo(opCtx, Timestamp(9, 9)); + ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(5, 5), 1)); + recovery.recoverFromOplogUpTo(opCtx, Timestamp(15, 15)); ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(10, 10), 1)); } diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp index 2ba2cff09e3..7c37be215ee 100644 --- a/src/mongo/db/repl/rollback_impl.cpp +++ b/src/mongo/db/repl/rollback_impl.cpp @@ -648,7 +648,7 @@ void RollbackImpl::_runPhaseFromAbortToReconstructPreparedTxns( // transactions were aborted (i.e. the in-memory counts were rolled-back) before computing // collection counts, reconstruct the prepared transactions now, adding on any additional counts // to the now corrected record store. - reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kStableRecovering); + reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering); } void RollbackImpl::_correctRecordStoreCounts(OperationContext* opCtx) { diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp index c0baecbb166..3f96511361b 100644 --- a/src/mongo/db/repl/rs_rollback.cpp +++ b/src/mongo/db/repl/rs_rollback.cpp @@ -998,7 +998,7 @@ void rollbackDropIndexes(OperationContext* opCtx, "uuid"_attr = uuid, "indexName"_attr = indexName); - createIndexForApplyOps(opCtx, indexSpec, *nss, OplogApplication::Mode::kStableRecovering); + createIndexForApplyOps(opCtx, indexSpec, *nss, OplogApplication::Mode::kRecovering); LOGV2_DEBUG(21676, 1, diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp index 9abfcc0c270..fc035a5ff78 100644 --- a/src/mongo/db/repl/transaction_oplog_application.cpp +++ b/src/mongo/db/repl/transaction_oplog_application.cpp @@ -85,26 +85,9 @@ Status _applyOperationsForTransaction(OperationContext* opCtx, } } catch (const DBException& ex) { // Ignore NamespaceNotFound errors if we are in initial sync or recovering mode. - // During recovery we reconsutuct prepared transactions at the end after applying all - // the oplogs, so 'NamespaceNotFound' error shouldn't be hit whether it is a stable or - // unstable recovery. However we have some scenarios when this error should be skipped: - // 1- This code path can be called while applying commit oplog during unstable recovery - // when 'startupRecoveryForRestore' is set. - // 2- During selective backup: - // - During restore when 'recoverFromOplogAsStandalone' is set which is usually be - // done in a stable recovery mode. - // - After the restore finished as the standalone node started with the flag - // 'takeUnstableCheckpointOnShutdown' so after restarting the node as a replica - // set member it will go through unstable recovery. const bool ignoreException = ex.code() == ErrorCodes::NamespaceNotFound && (oplogApplicationMode == repl::OplogApplication::Mode::kInitialSync || - repl::OplogApplication::inRecovering(oplogApplicationMode)); - - if (ex.code() == ErrorCodes::NamespaceNotFound && - oplogApplicationMode == repl::OplogApplication::Mode::kStableRecovering) { - repl::OplogApplication::checkOnOplogFailureForRecovery( - opCtx, redact(op.toBSONForLogging()), redact(ex)); - } + oplogApplicationMode == repl::OplogApplication::Mode::kRecovering); if (!ignoreException) { LOGV2_DEBUG( @@ -144,7 +127,7 @@ Status _applyTransactionFromOplogChain(OperationContext* opCtx, repl::OplogApplication::Mode mode, Timestamp commitTimestamp, Timestamp durableTimestamp) { - invariant(repl::OplogApplication::inRecovering(mode)); + invariant(mode == repl::OplogApplication::Mode::kRecovering); auto ops = readTransactionOperationsFromOplogChain(opCtx, entry, {}); @@ -203,8 +186,7 @@ Status applyCommitTransaction(OperationContext* opCtx, invariant(commitCommand.getCommitTimestamp()); switch (mode) { - case repl::OplogApplication::Mode::kUnstableRecovering: - case repl::OplogApplication::Mode::kStableRecovering: { + case repl::OplogApplication::Mode::kRecovering: { return _applyTransactionFromOplogChain(opCtx, entry, mode, @@ -248,8 +230,7 @@ Status applyAbortTransaction(OperationContext* opCtx, const OplogEntry& entry, repl::OplogApplication::Mode mode) { switch (mode) { - case repl::OplogApplication::Mode::kUnstableRecovering: - case repl::OplogApplication::Mode::kStableRecovering: { + case repl::OplogApplication::Mode::kRecovering: { // We don't put transactions into the prepare state until the end of recovery, // so there is no transaction to abort. return Status::OK(); @@ -398,7 +379,7 @@ Status _applyPrepareTransaction(OperationContext* opCtx, // The prepare time of the transaction is set explicitly below. auto ops = readTransactionOperationsFromOplogChain(opCtx, entry, {}); - if (repl::OplogApplication::inRecovering(mode) || + if (mode == repl::OplogApplication::Mode::kRecovering || mode == repl::OplogApplication::Mode::kInitialSync) { // We might replay a prepared transaction behind oldest timestamp. Note that since this is // scoped to the storage transaction, and readTransactionOperationsFromOplogChain implicitly @@ -468,7 +449,7 @@ Status _applyPrepareTransaction(OperationContext* opCtx, // Set this in case the application of any ops need to use the prepare timestamp of this // transaction. It should be cleared automatically when the transaction finishes. - if (repl::OplogApplication::inRecovering(mode) || + if (mode == repl::OplogApplication::Mode::kRecovering || mode == repl::OplogApplication::Mode::kInitialSync) { txnParticipant.setPrepareOpTimeForRecovery(opCtx, entry.getOpTime()); } @@ -542,8 +523,7 @@ Status applyPrepareTransaction(OperationContext* opCtx, const OplogEntry& entry, repl::OplogApplication::Mode mode) { switch (mode) { - case repl::OplogApplication::Mode::kUnstableRecovering: - case repl::OplogApplication::Mode::kStableRecovering: { + case repl::OplogApplication::Mode::kRecovering: { if (!serverGlobalParams.enableMajorityReadConcern) { LOGV2_ERROR( 21850, diff --git a/src/mongo/dbtests/repltests.cpp b/src/mongo/dbtests/repltests.cpp index 9c66ede578c..c1594a0a009 100644 --- a/src/mongo/dbtests/repltests.cpp +++ b/src/mongo/dbtests/repltests.cpp @@ -365,12 +365,12 @@ protected: virtual void reset() const = 0; }; -// Some operations are only idempotent when in RECOVERING from unstable checkpoint, not in -// SECONDARY. This includes duplicate inserts and deletes. +// Some operations are only idempotent when in RECOVERING, not in SECONDARY. This includes +// duplicate inserts and deletes. class Recovering : public Base { protected: virtual OplogApplication::Mode getOplogApplicationMode() { - return OplogApplication::Mode::kUnstableRecovering; + return OplogApplication::Mode::kRecovering; } }; diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp index f2cee34a605..0c0ffd7ad1e 100644 --- a/src/mongo/dbtests/storage_timestamp_tests.cpp +++ b/src/mongo/dbtests/storage_timestamp_tests.cpp @@ -1415,7 +1415,7 @@ public: _coordinatorMock, _consistencyMarkers, storageInterface, - repl::OplogApplier::Options(repl::OplogApplication::Mode::kStableRecovering), + repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering), writerPool.get()); uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops)); @@ -1517,7 +1517,7 @@ public: _coordinatorMock, _consistencyMarkers, storageInterface, - repl::OplogApplier::Options(repl::OplogApplication::Mode::kStableRecovering), + repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering), writerPool.get()); uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops)); |