summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2021-05-28 05:04:48 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-08 22:19:36 +0000
commit8f2183cc0f064b477e90c903701815369a867807 (patch)
treece7a02d9747263b55c7efc2bb7f1316fb1f58ca0
parent5dbe78f5ecf516be8996d74604479595657b406f (diff)
downloadmongo-8f2183cc0f064b477e90c903701815369a867807.tar.gz
SERVER-54666 Use earlier oplog entry if recovery timestamp cannot be found in oplog
-rw-r--r--src/mongo/db/repl/replication_recovery.cpp48
-rw-r--r--src/mongo/db/repl/replication_recovery.h12
-rw-r--r--src/mongo/db/repl/replication_recovery_test.cpp18
3 files changed, 73 insertions, 5 deletions
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index 2bc00bfefe2..f1fe31c6de3 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -384,6 +384,8 @@ void ReplicationRecoveryImpl::recoverFromOplogUpTo(OperationContext* opCtx, Time
fassert(31436, "No recovery timestamp, cannot recover from the oplog");
}
+ startPoint = _adjustStartPointIfNecessary(opCtx, startPoint.get());
+
invariant(!endPoint.isNull());
if (*startPoint == endPoint) {
@@ -511,7 +513,8 @@ void ReplicationRecoveryImpl::_recoverFromStableTimestamp(OperationContext* opCt
// Allow "oldest" timestamp to move forward freely.
_storageInterface->setStableTimestamp(opCtx->getServiceContext(), Timestamp::min());
}
- _applyToEndOfOplog(opCtx, stableTimestamp, topOfOplog.getTimestamp(), recoveryMode);
+ auto startPoint = _adjustStartPointIfNecessary(opCtx, stableTimestamp);
+ _applyToEndOfOplog(opCtx, startPoint, topOfOplog.getTimestamp(), recoveryMode);
if (recoveryMode == RecoveryMode::kStartupFromStableTimestamp && startupRecoveryForRestore) {
_storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(),
topOfOplog.getTimestamp());
@@ -844,5 +847,48 @@ void ReplicationRecoveryImpl::_truncateOplogIfNeededAndThenClearOplogTruncateAft
JournalFlusher::get(opCtx)->waitForJournalFlush();
}
+Timestamp ReplicationRecoveryImpl::_adjustStartPointIfNecessary(OperationContext* opCtx,
+ Timestamp startPoint) {
+ // Set up read on oplog collection.
+ AutoGetOplog oplogRead(opCtx, OplogAccessMode::kRead);
+ const auto& oplogCollection = oplogRead.getCollection();
+ if (!oplogCollection) {
+ LOGV2_FATAL_NOTRACE(
+ 5466600,
+ "Cannot find oplog collection for recovery oplog application start point",
+ "oplogNss"_attr = NamespaceString::kRsOplogNamespace);
+ }
+
+ boost::optional<BSONObj> adjustmentOplogEntryBSON =
+ _storageInterface->findOplogEntryLessThanOrEqualToTimestamp(
+ opCtx, oplogCollection, startPoint);
+
+ if (!adjustmentOplogEntryBSON) {
+ LOGV2_FATAL_NOTRACE(
+ 5466601,
+ "Could not find LTE oplog entry for oplog application start point for recovery",
+ "startPoint"_attr = startPoint);
+ }
+
+ auto adjustmentOpTime =
+ fassert(5466602, OpTime::parseFromOplogEntry(adjustmentOplogEntryBSON.get()));
+ auto adjustmentTimestamp = adjustmentOpTime.getTimestamp();
+
+ if (startPoint != adjustmentTimestamp) {
+ LOGV2(5466603,
+ "Start point for recovery oplog application not found in oplog. Adjusting start "
+ "point to earlier entry",
+ "oldStartPoint"_attr = startPoint,
+ "newStartPoint"_attr = adjustmentTimestamp);
+ invariant(adjustmentTimestamp < startPoint);
+ return adjustmentTimestamp;
+ }
+
+ LOGV2(5466604,
+ "Start point for recovery oplog application exists in oplog. No adjustment necessary",
+ "startPoint"_attr = startPoint);
+ return startPoint;
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_recovery.h b/src/mongo/db/repl/replication_recovery.h
index 9439655e512..614d7fba865 100644
--- a/src/mongo/db/repl/replication_recovery.h
+++ b/src/mongo/db/repl/replication_recovery.h
@@ -166,6 +166,18 @@ private:
void _truncateOplogIfNeededAndThenClearOplogTruncateAfterPoint(
OperationContext* opCtx, boost::optional<Timestamp>* stableTimestamp);
+ /**
+ * Checks if the proposed oplog application start point (which is typically derived from the
+ * stable timestamp) exists in the oplog. If it does, this returns that same start point
+ * unchanged. If that point is not in the oplog, this function returns an entry before
+ * that start point.
+ * It is safe to do as as we make sure that we always keep an oplog entry that is less than
+ * or equal to the stable timestamp so such a correction always pushes the start point back and
+ * never forward. Applying entries from an earlier point is permissible due to oplog entry
+ * idempotency (and also due to the order being preserved.)
+ */
+ Timestamp _adjustStartPointIfNecessary(OperationContext* opCtx, Timestamp startPoint);
+
StorageInterface* _storageInterface;
ReplicationConsistencyMarkers* _consistencyMarkers;
};
diff --git a/src/mongo/db/repl/replication_recovery_test.cpp b/src/mongo/db/repl/replication_recovery_test.cpp
index cf0e7f023c1..a3dece9f441 100644
--- a/src/mongo/db/repl/replication_recovery_test.cpp
+++ b/src/mongo/db/repl/replication_recovery_test.cpp
@@ -785,11 +785,12 @@ TEST_F(ReplicationRecoveryTest,
DEATH_TEST_REGEX_F(ReplicationRecoveryTest,
AppliedThroughBehindOplogFasserts,
- "Fatal assertion.*40292") {
+ "Fatal assertion.*5466601") {
ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers());
auto opCtx = getOperationContext();
getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(1, 1), 1));
+ getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(1, 1));
_setUpOplog(opCtx, getStorageInterface(), {3, 4, 5});
recovery.recoverFromOplog(opCtx, boost::none);
@@ -807,13 +808,12 @@ DEATH_TEST_REGEX_F(ReplicationRecoveryTest,
recovery.recoverFromOplog(opCtx, boost::none);
}
-DEATH_TEST_REGEX_F(ReplicationRecoveryTest,
- AppliedThroughNotInOplogCausesFassert,
- "Fatal assertion.*40292") {
+TEST_F(ReplicationRecoveryTest, AppliedThroughNotInOplog) {
ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers());
auto opCtx = getOperationContext();
getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(3, 3), 1));
+ getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(3, 3));
_setUpOplog(opCtx, getStorageInterface(), {1, 2, 4, 5});
recovery.recoverFromOplog(opCtx, boost::none);
@@ -1597,4 +1597,14 @@ DEATH_TEST_REGEX_F(
recovery.recoverFromOplogAsStandalone(opCtx);
}
+TEST_F(ReplicationRecoveryTest, RecoverStartFromClosestLTEEntryIfRecoveryTsNotInOplog) {
+ ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers());
+ auto opCtx = getOperationContext();
+
+ auto recoveryTs = Timestamp(4, 4);
+ getStorageInterfaceRecovery()->setRecoveryTimestamp(recoveryTs);
+ _setUpOplog(opCtx, getStorageInterface(), {1, 2, 3, 5, 6, 7});
+ recovery.recoverFromOplog(opCtx, recoveryTs);
+}
+
} // namespace