diff options
author | Judah Schvimer <judah@mongodb.com> | 2017-08-18 11:13:38 -0400 |
---|---|---|
committer | Judah Schvimer <judah@mongodb.com> | 2017-08-18 11:13:38 -0400 |
commit | 58649cdfda534881c1cbfb5a5cdbbaddf523a3e0 (patch) | |
tree | a27cdd4338fb3b6f5e3c880aa8566ae5401675b4 | |
parent | d0f77885b187132be69d31fd9e79060bc7422e63 (diff) | |
download | mongo-58649cdfda534881c1cbfb5a5cdbbaddf523a3e0.tar.gz |
SERVER-29894 Start oplog application during recovery at checkpoint timestamp
-rw-r--r-- | jstests/replsets/oplog_replay_on_startup.js | 120 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery.cpp | 126 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery.h | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery_test.cpp | 97 |
4 files changed, 149 insertions, 198 deletions
diff --git a/jstests/replsets/oplog_replay_on_startup.js b/jstests/replsets/oplog_replay_on_startup.js index 8684436fee7..bfe48252c18 100644 --- a/jstests/replsets/oplog_replay_on_startup.js +++ b/jstests/replsets/oplog_replay_on_startup.js @@ -80,7 +80,7 @@ }, // minvalid: - t: term, + t: NumberLong(term), ts: ts(minValid), }; @@ -251,114 +251,6 @@ }); // - // 3.2 -> 3.4 upgrade cases - // - - runTest({ - oplogEntries: [1, 2, 3], - collectionContents: [1, 2, 3], - deletePoint: null, - begin: 3, - minValid: 6, - expectedState: 'RECOVERING', - expectedApplied: [1, 2, 3], - }); - - runTest({ - oplogEntries: [1, 2, 3, 4, 5], - collectionContents: [1, 2, 3], - deletePoint: null, - begin: 3, - minValid: 6, - expectedState: 'RECOVERING', - expectedApplied: [1, 2, 3, 4, 5], - }); - - runTest({ - oplogEntries: [1, 2, 3, 4, 5], - collectionContents: [1, 2, 3, 4, 5], - deletePoint: null, - begin: null, - minValid: 6, - expectedState: 'RECOVERING', - expectedApplied: [1, 2, 3, 4, 5], - }); - - // - // 3.4 -> 3.2 -> 3.4 downgrade/reupgrade cases - // - - runTest({ - oplogEntries: [1, 2, 3], - collectionContents: [1, 2, 3], - deletePoint: 4, - begin: 3, - minValid: 6, - expectedState: 'RECOVERING', - expectedApplied: [1, 2, 3], - }); - - runTest({ - oplogEntries: [1, 2, 3, 4, 5], - collectionContents: [1, 2, 3], - deletePoint: 4, - begin: 3, - minValid: 6, - expectedState: 'RECOVERING', - expectedApplied: [1, 2, 3], - }); - - runTest({ - oplogEntries: [1, 2, 3, /*4,*/ 5, 6], - collectionContents: [1, 2, 3], - deletePoint: 4, - begin: 3, - minValid: 6, - expectedState: 'RECOVERING', - expectedApplied: [1, 2, 3], - }); - - runTest({ - oplogEntries: [1, 2, 3], - collectionContents: [1, 2, 3], - deletePoint: 2, - begin: null, - minValid: 3, - expectedState: 'SECONDARY', - expectedApplied: [1, 2, 3], - }); - - runTest({ - oplogEntries: [1, 2, 3], - collectionContents: [1, 2, 3], - deletePoint: 2, - begin: 3, - minValid: 6, - expectedState: 'RECOVERING', - expectedApplied: [1, 2, 3], - }); - - runTest({ - oplogEntries: [1, 2, 3, 4, 5], - collectionContents: [1, 2, 3], - deletePoint: 2, - begin: 3, - minValid: 6, - expectedState: 'RECOVERING', - expectedApplied: [1, 2, 3, 4, 5], - }); - - runTest({ - oplogEntries: [1, 2, 3, 4, 5, 6], - collectionContents: [1, 2, 3], - deletePoint: 2, - begin: 3, - minValid: 6, - expectedState: 'SECONDARY', - expectedApplied: [1, 2, 3, 4, 5, 6], - }); - - // // These states should be impossible to get into. // @@ -391,16 +283,6 @@ runTest({ oplogEntries: [1, 2, 3, 4, 5, 6], - collectionContents: [1, 2, 3], - deletePoint: 2, - begin: 3, - minValid: 3, - expectedState: 'SECONDARY', - expectedApplied: [1, 2, 3, 4, 5, 6], - }); - - runTest({ - oplogEntries: [1, 2, 3, 4, 5, 6], collectionContents: [1, 2, 3, 4, 5], deletePoint: null, begin: 5, diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp index 7a7597642eb..bab0894f929 100644 --- a/src/mongo/db/repl/replication_recovery.cpp +++ b/src/mongo/db/repl/replication_recovery.cpp @@ -38,11 +38,53 @@ #include "mongo/db/repl/storage_interface.h" #include "mongo/db/repl/sync_tail.h" #include "mongo/util/log.h" -#include "mongo/util/scopeguard.h" namespace mongo { namespace repl { +namespace { + +/** + * Returns the timestamp at which we should start oplog application. Returns boost::none if + * there are no oplog entries to apply. + */ +boost::optional<Timestamp> _getOplogApplicationStartPoint(Timestamp checkpointTimestamp, + OpTime appliedThrough) { + if (!checkpointTimestamp.isNull() && !appliedThrough.isNull()) { + // In versions that support "recover to stable timestamp" you should never see a + // non-null appliedThrough in a checkpoint, since we never take checkpoints in the middle + // of a secondary batch application, and a node that does not support "recover to stable + // timestamp" should never see a non-null checkpointTimestamp. + severe() << "checkpointTimestamp (" << checkpointTimestamp.toBSON() + << ") and appliedThrough (" << appliedThrough << ") cannot both be non-null."; + fassertFailedNoTrace(40603); + + } else if (!checkpointTimestamp.isNull()) { + // If appliedThrough is null and the checkpointTimestamp is not null, then we recovered + // to a checkpoint and should use that checkpoint timestamp as the oplog application + // start point. + log() << "Starting recovery oplog application at the checkpointTimestamp: " + << checkpointTimestamp.toBSON(); + return checkpointTimestamp; + + } else if (!appliedThrough.isNull()) { + // If the checkpointTimestamp is null and the appliedThrough is not null, then we did not + // recover to a checkpoint and we should use the appliedThrough as the oplog application + // start point. + log() << "Starting recovery oplog application at the appliedThrough: " << appliedThrough; + return appliedThrough.getTimestamp(); + + } else { + log() << "No oplog entries to apply for recovery. appliedThrough and " + "checkpointTimestamp are both null."; + // No follow-up work to do. + return boost::none; + } + MONGO_UNREACHABLE; +} + +} // namespace + ReplicationRecoveryImpl::ReplicationRecoveryImpl(StorageInterface* storageInterface, ReplicationConsistencyMarkers* consistencyMarkers) : _storageInterface(storageInterface), _consistencyMarkers(consistencyMarkers) {} @@ -56,22 +98,14 @@ void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx) try { const auto truncateAfterPoint = _consistencyMarkers->getOplogTruncateAfterPoint(opCtx); const auto appliedThrough = _consistencyMarkers->getAppliedThrough(opCtx); - const bool needToDeleteEndOfOplog = !truncateAfterPoint.isNull() && - // This version should never have a non-null truncateAfterPoint with a null appliedThrough. - // This scenario means that we downgraded after unclean shutdown, then the downgraded node - // deleted the ragged end of our oplog, then did a clean shutdown. - !appliedThrough.isNull() && - // Similarly we should never have an appliedThrough higher than the truncateAfterPoint. This - // means that the downgraded node deleted our ragged end then applied ahead of our - // truncateAfterPoint and then had an unclean shutdown before upgrading. We are ok with - // applying these ops because older versions wrote to the oplog from a single thread so we - // know they are in order. - !(appliedThrough.getTimestamp() >= truncateAfterPoint); - if (needToDeleteEndOfOplog) { + if (!truncateAfterPoint.isNull()) { log() << "Removing unapplied entries starting at: " << truncateAfterPoint.toBSON(); _truncateOplogTo(opCtx, truncateAfterPoint); } - _consistencyMarkers->setOplogTruncateAfterPoint(opCtx, {}); // clear the truncateAfterPoint + + // Clear the truncateAfterPoint so that we don't truncate the next batch of oplog entries + // erroneously. + _consistencyMarkers->setOplogTruncateAfterPoint(opCtx, {}); // TODO (SERVER-30556): Delete this line since the old oplog delete from point cannot exist. _consistencyMarkers->removeOldOplogDeleteFromPointField(opCtx); @@ -90,47 +124,44 @@ void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx) try { // there. If there is nothing in the oplog, then we do not set the initial data timestamp. auto checkpointTimestamp = _consistencyMarkers->getCheckpointTimestamp(opCtx); if (!checkpointTimestamp.isNull()) { + // If we have a checkpoint timestamp, we set the initial data timestamp now so that // the operations we apply below can be given the proper timestamps. _storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(), SnapshotName(checkpointTimestamp)); } - // If we don't have a checkpoint timestamp, then we are either not running a storage engine - // that supports 'recover to stable timestamp' or we just upgraded from 3.4. In both cases, the - // data on disk is not consistent until we have applied all oplog entries to the end of the - // oplog, since we do not know which ones actually got applied before shutdown. As a result, - // we do not set the initial data timestamp until after we have applied to the end of the - // oplog. - ON_BLOCK_EXIT([&] { - if (checkpointTimestamp.isNull() && topOfOplog) { - _storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(), - SnapshotName(topOfOplog->getTimestamp())); - } - }); - + // Oplog is empty. There are no oplog entries to apply, so we exit recovery. If there was a + // checkpointTimestamp then we already set the initial data timestamp. Otherwise, there is + // nothing to set it to. if (!topOfOplog) { - invariant(appliedThrough.isNull()); log() << "No oplog entries to apply for recovery. Oplog is empty."; return; } - // If appliedThrough is null, that means we are consistent at the top of the oplog. - if (appliedThrough.isNull()) { - log() << "No oplog entries to apply for recovery. appliedThrough is null."; - // No follow-up work to do. - return; + if (auto startPoint = _getOplogApplicationStartPoint(checkpointTimestamp, appliedThrough)) { + _applyToEndOfOplog(opCtx, startPoint.get(), topOfOplog->getTimestamp()); + } + + // If we don't have a checkpoint timestamp, then we are either not running a storage engine + // that supports "recover to stable timestamp" or we just upgraded from a version that didn't. + // In both cases, the data on disk is not consistent until we have applied all oplog entries to + // the end of the oplog, since we do not know which ones actually got applied before shutdown. + // As a result, we do not set the initial data timestamp until after we have applied to the end + // of the oplog. + if (checkpointTimestamp.isNull()) { + _storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(), + SnapshotName(topOfOplog->getTimestamp())); } - _applyToEndOfOplog(opCtx, appliedThrough, topOfOplog.get()); } catch (...) { severe() << "Caught exception during replication recovery: " << exceptionToStatus(); std::terminate(); } void ReplicationRecoveryImpl::_applyToEndOfOplog(OperationContext* opCtx, - OpTime oplogApplicationStartPoint, - OpTime topOfOplog) { + Timestamp oplogApplicationStartPoint, + Timestamp topOfOplog) { invariant(!oplogApplicationStartPoint.isNull()); invariant(!topOfOplog.isNull()); @@ -141,17 +172,17 @@ void ReplicationRecoveryImpl::_applyToEndOfOplog(OperationContext* opCtx, << "No oplog entries to apply for recovery. appliedThrough is at the top of the oplog."; return; // We've applied all the valid oplog we have. } else if (oplogApplicationStartPoint > topOfOplog) { - severe() << "Applied op " << oplogApplicationStartPoint << " not found. Top of oplog is " - << topOfOplog << '.'; + severe() << "Applied op " << oplogApplicationStartPoint.toBSON() + << " not found. Top of oplog is " << topOfOplog.toBSON() << '.'; fassertFailedNoTrace(40313); } - log() << "Replaying stored operations from " << oplogApplicationStartPoint << " (exclusive) to " - << topOfOplog << " (inclusive)."; + log() << "Replaying stored operations from " << oplogApplicationStartPoint.toBSON() + << " (exclusive) to " << topOfOplog.toBSON() << " (inclusive)."; DBDirectClient db(opCtx); auto cursor = db.query(NamespaceString::kRsOplogNamespace.ns(), - QUERY("ts" << BSON("$gte" << oplogApplicationStartPoint.getTimestamp())), + QUERY("ts" << BSON("$gte" << oplogApplicationStartPoint)), /*batchSize*/ 0, /*skip*/ 0, /*projection*/ nullptr, @@ -163,15 +194,16 @@ void ReplicationRecoveryImpl::_applyToEndOfOplog(OperationContext* opCtx, // This should really be impossible because we check above that the top of the oplog is // strictly > appliedThrough. If this fails it represents a serious bug in either the // storage engine or query's implementation of OplogReplay. - severe() << "Couldn't find any entries in the oplog >= " << oplogApplicationStartPoint - << " which should be impossible."; + severe() << "Couldn't find any entries in the oplog >= " + << oplogApplicationStartPoint.toBSON() << " which should be impossible."; fassertFailedNoTrace(40293); } - auto firstOpTimeFound = fassertStatusOK(40291, OpTime::parseFromOplogEntry(cursor->nextSafe())); - if (firstOpTimeFound != oplogApplicationStartPoint) { - severe() << "Oplog entry at " << oplogApplicationStartPoint - << " is missing; actual entry found is " << firstOpTimeFound; + auto firstTimestampFound = + fassertStatusOK(40291, OpTime::parseFromOplogEntry(cursor->nextSafe())).getTimestamp(); + if (firstTimestampFound != oplogApplicationStartPoint) { + severe() << "Oplog entry at " << oplogApplicationStartPoint.toBSON() + << " is missing; actual entry found is " << firstTimestampFound.toBSON(); fassertFailedNoTrace(40292); } diff --git a/src/mongo/db/repl/replication_recovery.h b/src/mongo/db/repl/replication_recovery.h index 994baefe301..6748d695fa5 100644 --- a/src/mongo/db/repl/replication_recovery.h +++ b/src/mongo/db/repl/replication_recovery.h @@ -70,8 +70,8 @@ private: * (inclusive). This fasserts if oplogApplicationStartPoint is not in the oplog. */ void _applyToEndOfOplog(OperationContext* opCtx, - OpTime oplogApplicationStartPoint, - OpTime topOfOplog); + Timestamp oplogApplicationStartPoint, + Timestamp topOfOplog); /** * Gets the last applied OpTime from the end of the oplog. Returns CollectionIsEmpty if there is diff --git a/src/mongo/db/repl/replication_recovery_test.cpp b/src/mongo/db/repl/replication_recovery_test.cpp index 79300eebf16..5a63cd5cc4a 100644 --- a/src/mongo/db/repl/replication_recovery_test.cpp +++ b/src/mongo/db/repl/replication_recovery_test.cpp @@ -54,9 +54,12 @@ const NamespaceString testNs("a.a"); class StorageInterfaceRecovery : public StorageInterfaceImpl { public: + using OnSetInitialDataTimestampFn = stdx::function<void()>; + void setInitialDataTimestamp(ServiceContext* serviceCtx, SnapshotName snapshotName) override { stdx::lock_guard<stdx::mutex> lock(_mutex); _initialDataTimestamp = snapshotName; + _onSetInitialDataTimestampFn(); } SnapshotName getInitialDataTimestamp() const { @@ -64,9 +67,15 @@ public: return _initialDataTimestamp; } + void setOnSetInitialDataTimestampFn(OnSetInitialDataTimestampFn onSetInitialDataTimestampFn) { + stdx::lock_guard<stdx::mutex> lock(_mutex); + _onSetInitialDataTimestampFn = onSetInitialDataTimestampFn; + } + private: mutable stdx::mutex _mutex; SnapshotName _initialDataTimestamp = SnapshotName::min(); + OnSetInitialDataTimestampFn _onSetInitialDataTimestampFn = []() {}; }; class ReplicationRecoveryTest : public ServiceContextMongoDTest { @@ -224,21 +233,6 @@ TEST_F(ReplicationRecoveryTest, RecoveryWithEmptyOplogSucceeds) { } DEATH_TEST_F(ReplicationRecoveryTest, - RecoveryWithEmptyOplogAndNonNullAppliedThroughInvariants, - "Invariant failure appliedThrough.isNull()") { - ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); - auto opCtx = getOperationContext(); - - _setUpOplog(opCtx, getStorageInterface(), {}); - - getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(3, 3), 1)); - recovery.recoverFromOplog(opCtx); - - _assertDocsInOplog(opCtx, {}); - _assertDocsInTestCollection(opCtx, {}); -} - -DEATH_TEST_F(ReplicationRecoveryTest, TruncateFassertsWithoutOplogCollection, "Fatal assertion 34418 NamespaceNotFound: Can't find local.oplog.rs") { ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); @@ -301,29 +295,46 @@ TEST_F(ReplicationRecoveryTest, RecoverySkipsEverythingIfInitialSyncFlagIsSet) { ASSERT_EQ(getStorageInterfaceRecovery()->getInitialDataTimestamp(), SnapshotName::min()); } -TEST_F(ReplicationRecoveryTest, RecoveryResetsOplogTruncateAfterPointWhenAppliedThroughIsNull) { +TEST_F(ReplicationRecoveryTest, RecoveryAppliesDocumentsWhenAppliedThroughIsBehind) { ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); - getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx, Timestamp(4, 4)); - getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime()); + getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(3, 3), 1)); _setUpOplog(opCtx, getStorageInterface(), {1, 2, 3, 4, 5}); recovery.recoverFromOplog(opCtx); _assertDocsInOplog(opCtx, {1, 2, 3, 4, 5}); - _assertDocsInTestCollection(opCtx, {}); + _assertDocsInTestCollection(opCtx, {4, 5}); ASSERT_EQ(getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx), Timestamp()); - ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime()); + ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(5, 5), 1)); ASSERT_EQ(getStorageInterfaceRecovery()->getInitialDataTimestamp(), SnapshotName(Timestamp(5, 5))); } -TEST_F(ReplicationRecoveryTest, RecoveryAppliesDocumentsWhenAppliedThroughIsBehind) { +TEST_F(ReplicationRecoveryTest, RecoveryAppliesDocumentsWhenAppliedThroughIsBehindAfterTruncation) { ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); - getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(3, 3), 1)); + getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx, Timestamp(4, 4)); + getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(1, 1), 1)); + _setUpOplog(opCtx, getStorageInterface(), {1, 2, 3, 4, 5}); + + recovery.recoverFromOplog(opCtx); + + _assertDocsInOplog(opCtx, {1, 2, 3}); + _assertDocsInTestCollection(opCtx, {2, 3}); + ASSERT_EQ(getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx), Timestamp()); + ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(3, 3), 1)); + ASSERT_EQ(getStorageInterfaceRecovery()->getInitialDataTimestamp(), + SnapshotName(Timestamp(3, 3))); +} + +TEST_F(ReplicationRecoveryTest, RecoveryAppliesDocumentsWhenCheckpointTimestampIsBehind) { + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + getConsistencyMarkers()->writeCheckpointTimestamp(opCtx, Timestamp(3, 3)); _setUpOplog(opCtx, getStorageInterface(), {1, 2, 3, 4, 5}); recovery.recoverFromOplog(opCtx); @@ -333,15 +344,16 @@ TEST_F(ReplicationRecoveryTest, RecoveryAppliesDocumentsWhenAppliedThroughIsBehi ASSERT_EQ(getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx), Timestamp()); ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(5, 5), 1)); ASSERT_EQ(getStorageInterfaceRecovery()->getInitialDataTimestamp(), - SnapshotName(Timestamp(5, 5))); + SnapshotName(Timestamp(3, 3))); } -TEST_F(ReplicationRecoveryTest, RecoveryAppliesDocumentsWhenAppliedThroughIsBehindAfterTruncation) { +TEST_F(ReplicationRecoveryTest, + RecoveryAppliesDocumentsWhenCheckpointTimestampIsBehindAfterTruncation) { ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx, Timestamp(4, 4)); - getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(1, 1), 1)); + getConsistencyMarkers()->writeCheckpointTimestamp(opCtx, Timestamp(1, 1)); _setUpOplog(opCtx, getStorageInterface(), {1, 2, 3, 4, 5}); recovery.recoverFromOplog(opCtx); @@ -351,7 +363,7 @@ TEST_F(ReplicationRecoveryTest, RecoveryAppliesDocumentsWhenAppliedThroughIsBehi ASSERT_EQ(getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx), Timestamp()); ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(3, 3), 1)); ASSERT_EQ(getStorageInterfaceRecovery()->getInitialDataTimestamp(), - SnapshotName(Timestamp(3, 3))); + SnapshotName(Timestamp(1, 1))); } DEATH_TEST_F(ReplicationRecoveryTest, AppliedThroughBehindOplogFasserts, "Fatal Assertion 40292") { @@ -392,15 +404,19 @@ TEST_F(ReplicationRecoveryTest, RecoverySetsInitialDataTimestampToCheckpointTime ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); + // Assert that we set the initial data timestamp before we apply operations. + getStorageInterfaceRecovery()->setOnSetInitialDataTimestampFn( + [&]() { ASSERT(getConsistencyMarkers()->getAppliedThrough(opCtx).isNull()); }); + getConsistencyMarkers()->writeCheckpointTimestamp(opCtx, Timestamp(4, 4)); _setUpOplog(opCtx, getStorageInterface(), {1, 2, 3, 4, 5, 6}); recovery.recoverFromOplog(opCtx); _assertDocsInOplog(opCtx, {1, 2, 3, 4, 5, 6}); - _assertDocsInTestCollection(opCtx, {}); + _assertDocsInTestCollection(opCtx, {5, 6}); ASSERT(getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull()); - ASSERT(getConsistencyMarkers()->getAppliedThrough(opCtx).isNull()); + ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(6, 6), 6)); ASSERT_EQ(getStorageInterfaceRecovery()->getInitialDataTimestamp(), SnapshotName(Timestamp(4, 4))); } @@ -428,14 +444,20 @@ TEST_F(ReplicationRecoveryTest, ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); auto opCtx = getOperationContext(); + // Assert that we set the initial data timestamp after we apply operations. + getStorageInterfaceRecovery()->setOnSetInitialDataTimestampFn([&]() { + ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(6, 6), 6)); + }); + getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(5, 5), 5)); + _setUpOplog(opCtx, getStorageInterface(), {5, 6}); recovery.recoverFromOplog(opCtx); _assertDocsInOplog(opCtx, {5, 6}); - _assertDocsInTestCollection(opCtx, {}); + _assertDocsInTestCollection(opCtx, {6}); ASSERT(getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull()); - ASSERT(getConsistencyMarkers()->getAppliedThrough(opCtx).isNull()); + ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(6, 6), 6)); ASSERT(getConsistencyMarkers()->getCheckpointTimestamp(opCtx).isNull()); ASSERT_EQ(getStorageInterfaceRecovery()->getInitialDataTimestamp(), SnapshotName(Timestamp(6, 6))); @@ -458,4 +480,19 @@ TEST_F(ReplicationRecoveryTest, ASSERT_EQ(getStorageInterfaceRecovery()->getInitialDataTimestamp(), SnapshotName::min()); } +DEATH_TEST_F(ReplicationRecoveryTest, + RecoveryFassertsWithNonNullCheckpointTimestampAndAppliedThrough, + "Fatal Assertion 40603") { + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + _setUpOplog(opCtx, getStorageInterface(), {5}); + + getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(3, 3), 1)); + getConsistencyMarkers()->writeCheckpointTimestamp(opCtx, Timestamp(4, 4)); + + recovery.recoverFromOplog(opCtx); +} + + } // namespace |