diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2021-05-28 05:04:48 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-08 22:19:36 +0000 |
commit | 8f2183cc0f064b477e90c903701815369a867807 (patch) | |
tree | ce7a02d9747263b55c7efc2bb7f1316fb1f58ca0 | |
parent | 5dbe78f5ecf516be8996d74604479595657b406f (diff) | |
download | mongo-8f2183cc0f064b477e90c903701815369a867807.tar.gz |
SERVER-54666 Use earlier oplog entry if recovery timestamp cannot be found in oplog
-rw-r--r-- | src/mongo/db/repl/replication_recovery.cpp | 48 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery.h | 12 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery_test.cpp | 18 |
3 files changed, 73 insertions, 5 deletions
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp index 2bc00bfefe2..f1fe31c6de3 100644 --- a/src/mongo/db/repl/replication_recovery.cpp +++ b/src/mongo/db/repl/replication_recovery.cpp @@ -384,6 +384,8 @@ void ReplicationRecoveryImpl::recoverFromOplogUpTo(OperationContext* opCtx, Time fassert(31436, "No recovery timestamp, cannot recover from the oplog"); } + startPoint = _adjustStartPointIfNecessary(opCtx, startPoint.get()); + invariant(!endPoint.isNull()); if (*startPoint == endPoint) { @@ -511,7 +513,8 @@ void ReplicationRecoveryImpl::_recoverFromStableTimestamp(OperationContext* opCt // Allow "oldest" timestamp to move forward freely. _storageInterface->setStableTimestamp(opCtx->getServiceContext(), Timestamp::min()); } - _applyToEndOfOplog(opCtx, stableTimestamp, topOfOplog.getTimestamp(), recoveryMode); + auto startPoint = _adjustStartPointIfNecessary(opCtx, stableTimestamp); + _applyToEndOfOplog(opCtx, startPoint, topOfOplog.getTimestamp(), recoveryMode); if (recoveryMode == RecoveryMode::kStartupFromStableTimestamp && startupRecoveryForRestore) { _storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(), topOfOplog.getTimestamp()); @@ -844,5 +847,48 @@ void ReplicationRecoveryImpl::_truncateOplogIfNeededAndThenClearOplogTruncateAft JournalFlusher::get(opCtx)->waitForJournalFlush(); } +Timestamp ReplicationRecoveryImpl::_adjustStartPointIfNecessary(OperationContext* opCtx, + Timestamp startPoint) { + // Set up read on oplog collection. + AutoGetOplog oplogRead(opCtx, OplogAccessMode::kRead); + const auto& oplogCollection = oplogRead.getCollection(); + if (!oplogCollection) { + LOGV2_FATAL_NOTRACE( + 5466600, + "Cannot find oplog collection for recovery oplog application start point", + "oplogNss"_attr = NamespaceString::kRsOplogNamespace); + } + + boost::optional<BSONObj> adjustmentOplogEntryBSON = + _storageInterface->findOplogEntryLessThanOrEqualToTimestamp( + opCtx, oplogCollection, startPoint); + + if (!adjustmentOplogEntryBSON) { + LOGV2_FATAL_NOTRACE( + 5466601, + "Could not find LTE oplog entry for oplog application start point for recovery", + "startPoint"_attr = startPoint); + } + + auto adjustmentOpTime = + fassert(5466602, OpTime::parseFromOplogEntry(adjustmentOplogEntryBSON.get())); + auto adjustmentTimestamp = adjustmentOpTime.getTimestamp(); + + if (startPoint != adjustmentTimestamp) { + LOGV2(5466603, + "Start point for recovery oplog application not found in oplog. Adjusting start " + "point to earlier entry", + "oldStartPoint"_attr = startPoint, + "newStartPoint"_attr = adjustmentTimestamp); + invariant(adjustmentTimestamp < startPoint); + return adjustmentTimestamp; + } + + LOGV2(5466604, + "Start point for recovery oplog application exists in oplog. No adjustment necessary", + "startPoint"_attr = startPoint); + return startPoint; +} + } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/replication_recovery.h b/src/mongo/db/repl/replication_recovery.h index 9439655e512..614d7fba865 100644 --- a/src/mongo/db/repl/replication_recovery.h +++ b/src/mongo/db/repl/replication_recovery.h @@ -166,6 +166,18 @@ private: void _truncateOplogIfNeededAndThenClearOplogTruncateAfterPoint( OperationContext* opCtx, boost::optional<Timestamp>* stableTimestamp); + /** + * Checks if the proposed oplog application start point (which is typically derived from the + * stable timestamp) exists in the oplog. If it does, this returns that same start point + * unchanged. If that point is not in the oplog, this function returns an entry before + * that start point. + * It is safe to do as as we make sure that we always keep an oplog entry that is less than + * or equal to the stable timestamp so such a correction always pushes the start point back and + * never forward. Applying entries from an earlier point is permissible due to oplog entry + * idempotency (and also due to the order being preserved.) + */ + Timestamp _adjustStartPointIfNecessary(OperationContext* opCtx, Timestamp startPoint); + StorageInterface* _storageInterface; ReplicationConsistencyMarkers* _consistencyMarkers; }; diff --git a/src/mongo/db/repl/replication_recovery_test.cpp b/src/mongo/db/repl/replication_recovery_test.cpp index cf0e7f023c1..a3dece9f441 100644 --- a/src/mongo/db/repl/replication_recovery_test.cpp +++ b/src/mongo/db/repl/replication_recovery_test.cpp @@ -785,11 +785,12 @@ TEST_F(ReplicationRecoveryTest, DEATH_TEST_REGEX_F(ReplicationRecoveryTest, AppliedThroughBehindOplogFasserts, - "Fatal assertion.*40292") { + "Fatal assertion.*5466601") { ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(1, 1), 1)); + getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(1, 1)); _setUpOplog(opCtx, getStorageInterface(), {3, 4, 5}); recovery.recoverFromOplog(opCtx, boost::none); @@ -807,13 +808,12 @@ DEATH_TEST_REGEX_F(ReplicationRecoveryTest, recovery.recoverFromOplog(opCtx, boost::none); } -DEATH_TEST_REGEX_F(ReplicationRecoveryTest, - AppliedThroughNotInOplogCausesFassert, - "Fatal assertion.*40292") { +TEST_F(ReplicationRecoveryTest, AppliedThroughNotInOplog) { ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(3, 3), 1)); + getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(3, 3)); _setUpOplog(opCtx, getStorageInterface(), {1, 2, 4, 5}); recovery.recoverFromOplog(opCtx, boost::none); @@ -1597,4 +1597,14 @@ DEATH_TEST_REGEX_F( recovery.recoverFromOplogAsStandalone(opCtx); } +TEST_F(ReplicationRecoveryTest, RecoverStartFromClosestLTEEntryIfRecoveryTsNotInOplog) { + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + auto recoveryTs = Timestamp(4, 4); + getStorageInterfaceRecovery()->setRecoveryTimestamp(recoveryTs); + _setUpOplog(opCtx, getStorageInterface(), {1, 2, 3, 5, 6, 7}); + recovery.recoverFromOplog(opCtx, recoveryTs); +} + } // namespace |