diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2019-04-29 18:06:07 -0400 |
---|---|---|
committer | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2019-05-09 12:42:37 -0400 |
commit | 29b29b2af6883b99f58c7a90a95f57221874214f (patch) | |
tree | 9447cc1cf2d2d6529b31bf74bfd0f2239f3da1fe | |
parent | bf47260ea0cbc58d3744d8964b2eb036b9a1a19e (diff) | |
download | mongo-29b29b2af6883b99f58c7a90a95f57221874214f.tar.gz |
SERVER-40614 Make rollback errors fatal between aborting and reconstructing prepared transactions
-rw-r--r-- | src/mongo/db/repl/rollback_impl.cpp | 249 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl.h | 20 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl_test.cpp | 141 |
3 files changed, 198 insertions, 212 deletions
diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp index 1b6fd2482fb..647988123a1 100644 --- a/src/mongo/db/repl/rollback_impl.cpp +++ b/src/mongo/db/repl/rollback_impl.cpp @@ -226,111 +226,22 @@ Status RollbackImpl::runRollback(OperationContext* opCtx) { return status; } _rollbackStats.rollbackId = _replicationProcess->getRollbackID(); + _listener->onRollbackIDIncremented(); - // Before computing record store counts, abort all active transactions. This ensures that the - // count adjustments are based on correct values where no prepared transactions are active and - // all in-memory counts have been rolled-back. - // Before calling recoverToStableTimestamp, we must abort the storage transaction of any - // prepared transaction. This will require us to scan all sessions and call - // abortPreparedTransactionForRollback() on any txnParticipant with a prepared transaction. - killSessionsAbortAllPreparedTransactions(opCtx); - - // Ask the record store for the pre-rollback counts of any collections whose counts will change - // and create a map with the adjusted counts for post-rollback. While finding the common - // point, we keep track of how much each collection's count will change during the rollback. - // Note: these numbers are relative to the common point, not the stable timestamp, and thus - // must be set after recovering from the oplog. - // TODO (SERVER-40614): This error should be fatal. - status = _findRecordStoreCounts(opCtx); + // Execute the critical section in rollback. It is illegal to exit rollback cleanly between + // aborting prepared transactions and reconstructing them. During this window, no interruptions + // are allowed and all errors should be made fatal. + status = _runRollbackCriticalSection(opCtx, commonPoint); if (!status.isOK()) { - return status; + fassertFailedWithStatus(31049, status.withContext("Error in rollback critical section")); } + _listener->onPreparedTransactionsReconstructed(); - if (shouldCreateDataFiles()) { - // Write a rollback file for each namespace that has documents that would be deleted by - // rollback. We need to do this after aborting prepared transactions. Otherwise, we risk - // unecessary prepare conflicts when trying to read documents that were modified by those - // prepared transactions, which we know we will abort anyway. - // TODO (SERVER-40614): This error should be fatal. - status = _writeRollbackFiles(opCtx); - if (!status.isOK()) { - return status; - } - } else { - log() << "Not writing rollback files. 'createRollbackDataFiles' set to false."; - } - - // If there were rolled back operations on any session, invalidate all sessions. - // We invalidate sessions before we recover so that we avoid invalidating sessions that had - // just recovered prepared transactions. - if (_observerInfo.rollbackSessionIds.size() > 0) { - MongoDSessionCatalog::invalidateSessions(opCtx, boost::none); - } - - // Recover to the stable timestamp. - auto stableTimestampSW = _recoverToStableTimestamp(opCtx); - // TODO (SERVER-40614): This error should be fatal. - if (!stableTimestampSW.isOK()) { - return stableTimestampSW.getStatus(); - } - _rollbackStats.stableTimestamp = stableTimestampSW.getValue(); - _listener->onRecoverToStableTimestamp(stableTimestampSW.getValue()); - - // Log the total number of insert and update operations that have been rolled back as a result - // of recovering to the stable timestamp. - log() << "Rollback reverted " << _observerInfo.rollbackCommandCounts[kInsertCmdName] - << " insert operations, " << _observerInfo.rollbackCommandCounts[kUpdateCmdName] - << " update operations and " << _observerInfo.rollbackCommandCounts[kDeleteCmdName] - << " delete operations."; - - // During replication recovery, we truncate all oplog entries with timestamps greater than or - // equal to the oplog truncate after point. As a result, we must find the oplog entry after - // the common point so we do not truncate the common point itself. If we entered rollback, - // we are guaranteed to have at least one oplog entry after the common point. - Timestamp truncatePoint = _findTruncateTimestamp(opCtx, commonPointSW.getValue()); - - // We cannot have an interrupt point between setting the oplog truncation point and fixing the - // record store counts or else a clean shutdown could produce incorrect counts. We explicitly - // check for shutdown here to safely maximize interruptibility. - // TODO (SERVER-40614): This interrupt point should be removed. + // We can now accept interruptions again. if (_isInShutdown()) { return Status(ErrorCodes::ShutdownInProgress, "rollback shutting down"); } - // Persist the truncate point to the 'oplogTruncateAfterPoint' document. We save this value so - // that the replication recovery logic knows where to truncate the oplog. We save this value - // durably to match the behavior during startup recovery. This must occur after we successfully - // recover to a stable timestamp. If recovering to a stable timestamp fails and we still - // truncate the oplog then the oplog will not match the data files. If we crash at any earlier - // point, we will recover, find a new sync source, and restart roll back (if necessary on the - // new sync source). This is safe because a crash before this point would recover to a stable - // checkpoint anyways at or earlier than the stable timestamp. - // - // Note that storage engine timestamp recovery only restores the database *data* to a stable - // timestamp, but does not revert the oplog, which must be done as part of the rollback process. - _replicationProcess->getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx, truncatePoint); - _rollbackStats.truncateTimestamp = truncatePoint; - _listener->onSetOplogTruncateAfterPoint(truncatePoint); - - // Align the drop pending reaper state with what's on disk. Oplog recovery depends on those - // being consistent. - _resetDropPendingState(opCtx); - - // Run the recovery process. - _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, - stableTimestampSW.getValue()); - _listener->onRecoverFromOplog(); - - // Sets the correct post-rollback counts on any collections whose counts changed during the - // rollback. - _correctRecordStoreCounts(opCtx); - - // Reconstruct prepared transactions after counts have been adjusted. Since prepared - // transactions were aborted (i.e. the in-memory counts were rolled-back) before computing - // collection counts, reconstruct the prepared transactions now, adding on any additional counts - // to the now corrected record store. - reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering); - // At this point, the last applied and durable optimes on this node still point to ops on // the divergent branch of history. We therefore update the last optimes to the top of the // oplog, which should now be at the common point. @@ -497,6 +408,110 @@ StatusWith<std::set<NamespaceString>> RollbackImpl::_namespacesForOp(const Oplog return namespaces; } +Status RollbackImpl::_runRollbackCriticalSection( + OperationContext* opCtx, + RollBackLocalOperations::RollbackCommonPoint commonPoint) noexcept try { + // Before computing record store counts, abort all active transactions. This ensures that + // the count adjustments are based on correct values where no prepared transactions are + // active and all in-memory counts have been rolled-back. + // Before calling recoverToStableTimestamp, we must abort the storage transaction of any + // prepared transaction. This will require us to scan all sessions and call + // abortPreparedTransactionForRollback() on any txnParticipant with a prepared transaction. + killSessionsAbortAllPreparedTransactions(opCtx); + + // Ask the record store for the pre-rollback counts of any collections whose counts will + // change and create a map with the adjusted counts for post-rollback. While finding the + // common point, we keep track of how much each collection's count will change during the + // rollback. Note: these numbers are relative to the common point, not the stable timestamp, + // and thus must be set after recovering from the oplog. + auto status = _findRecordStoreCounts(opCtx); + if (!status.isOK()) { + return status.withContext("Error while finding record store counts"); + } + + if (shouldCreateDataFiles()) { + // Write a rollback file for each namespace that has documents that would be deleted by + // rollback. We need to do this after aborting prepared transactions. Otherwise, we risk + // unecessary prepare conflicts when trying to read documents that were modified by + // those prepared transactions, which we know we will abort anyway. + status = _writeRollbackFiles(opCtx); + if (!status.isOK()) { + return status.withContext("Error while writing out rollback files"); + } + } else { + log() << "Not writing rollback files. 'createRollbackDataFiles' set to false."; + } + + // If there were rolled back operations on any session, invalidate all sessions. + // We invalidate sessions before we recover so that we avoid invalidating sessions that had + // just recovered prepared transactions. + if (_observerInfo.rollbackSessionIds.size() > 0) { + MongoDSessionCatalog::invalidateSessions(opCtx, boost::none); + } + + // Recover to the stable timestamp. + auto stableTimestampSW = _recoverToStableTimestamp(opCtx); + if (!stableTimestampSW.isOK()) { + auto status = stableTimestampSW.getStatus(); + return status.withContext("Error while recovering to stable timestamp"); + } + _rollbackStats.stableTimestamp = stableTimestampSW.getValue(); + _listener->onRecoverToStableTimestamp(stableTimestampSW.getValue()); + + // Log the total number of insert and update operations that have been rolled back as a + // result of recovering to the stable timestamp. + log() << "Rollback reverted " << _observerInfo.rollbackCommandCounts[kInsertCmdName] + << " insert operations, " << _observerInfo.rollbackCommandCounts[kUpdateCmdName] + << " update operations and " << _observerInfo.rollbackCommandCounts[kDeleteCmdName] + << " delete operations."; + + // During replication recovery, we truncate all oplog entries with timestamps greater than + // or equal to the oplog truncate after point. As a result, we must find the oplog entry + // after the common point so we do not truncate the common point itself. If we entered + // rollback, we are guaranteed to have at least one oplog entry after the common point. + Timestamp truncatePoint = _findTruncateTimestamp(opCtx, commonPoint); + + // Persist the truncate point to the 'oplogTruncateAfterPoint' document. We save this value so + // that the replication recovery logic knows where to truncate the oplog. We save this value + // durably to match the behavior during startup recovery. This must occur after we successfully + // recover to a stable timestamp. If recovering to a stable timestamp fails and we still + // truncate the oplog then the oplog will not match the data files. If we crash at any earlier + // point, we will recover, find a new sync source, and restart roll back (if necessary on the + // new sync source). This is safe because a crash before this point would recover to a stable + // checkpoint anyways at or earlier than the stable timestamp. + // + // Note that storage engine timestamp recovery only restores the database *data* to a stable + // timestamp, but does not revert the oplog, which must be done as part of the rollback process. + _replicationProcess->getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx, truncatePoint); + _rollbackStats.truncateTimestamp = truncatePoint; + _listener->onSetOplogTruncateAfterPoint(truncatePoint); + + // Align the drop pending reaper state with what's on disk. Oplog recovery depends on those + // being consistent. + _resetDropPendingState(opCtx); + + // Run the recovery process. + _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, + stableTimestampSW.getValue()); + _listener->onRecoverFromOplog(); + + // Sets the correct post-rollback counts on any collections whose counts changed during the + // rollback. + _correctRecordStoreCounts(opCtx); + + // Reconstruct prepared transactions after counts have been adjusted. Since prepared + // transactions were aborted (i.e. the in-memory counts were rolled-back) before computing + // collection counts, reconstruct the prepared transactions now, adding on any additional counts + // to the now corrected record store. + reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering); + + return Status::OK(); +} catch (...) { + // Any exceptions here should be made fatal. + severe() << "Caught exception during critical section in rollback: " << exceptionToStatus(); + std::terminate(); +} + void RollbackImpl::_correctRecordStoreCounts(OperationContext* opCtx) { // This function explicitly does not check for shutdown since a clean shutdown post oplog // truncation is not allowed to occur until the record store counts are corrected. @@ -572,10 +587,6 @@ void RollbackImpl::_correctRecordStoreCounts(OperationContext* opCtx) { } Status RollbackImpl::_findRecordStoreCounts(OperationContext* opCtx) { - // TODO (SERVER-40614): This interrupt point should be removed. - if (_isInShutdown()) { - return Status(ErrorCodes::ShutdownInProgress, "rollback shutting down"); - } const auto& catalog = CollectionCatalog::get(opCtx); auto storageEngine = opCtx->getServiceContext()->getStorageEngine(); @@ -1026,20 +1037,9 @@ Status RollbackImpl::_writeRollbackFiles(OperationContext* opCtx) { str::stream() << "The collection with UUID " << uuid << " is unexpectedly missing in the CollectionCatalog"); - if (_isInShutdown()) { - log() << "Rollback shutting down; not writing rollback file for namespace " << nss->ns() - << " with uuid " << uuid; - continue; - } - _writeRollbackFileForNamespace(opCtx, uuid, *nss, entry.second); } - // TODO (SERVER-40614): This interrupt point should be removed. - if (_isInShutdown()) { - return {ErrorCodes::ShutdownInProgress, "rollback shutting down"}; - } - return Status::OK(); } @@ -1088,27 +1088,10 @@ void RollbackImpl::_writeRollbackFileForNamespace(OperationContext* opCtx, } StatusWith<Timestamp> RollbackImpl::_recoverToStableTimestamp(OperationContext* opCtx) { - // TODO (SERVER-40614): This interrupt point should be removed. - if (_isInShutdown()) { - return Status(ErrorCodes::ShutdownInProgress, "rollback shutting down"); - } - // Recover to the stable timestamp while holding the global exclusive lock. - { - Lock::GlobalWrite globalWrite(opCtx); - try { - auto stableTimestampSW = _storageInterface->recoverToStableTimestamp(opCtx); - if (!stableTimestampSW.isOK()) { - severe() << "RecoverToStableTimestamp failed. " - << causedBy(stableTimestampSW.getStatus()); - // TODO (SERVER-40614): fassert here instead of depending on the caller to do it - return {ErrorCodes::UnrecoverableRollbackError, - "Recover to stable timestamp failed."}; - } - return stableTimestampSW; - } catch (...) { - return exceptionToStatus(); - } - } + // Recover to the stable timestamp while holding the global exclusive lock. This may throw, + // which the caller must handle. + Lock::GlobalWrite globalWrite(opCtx); + return _storageInterface->recoverToStableTimestamp(opCtx); } Status RollbackImpl::_triggerOpObserver(OperationContext* opCtx) { diff --git a/src/mongo/db/repl/rollback_impl.h b/src/mongo/db/repl/rollback_impl.h index e219d2b1fd1..ba9cbd95fa7 100644 --- a/src/mongo/db/repl/rollback_impl.h +++ b/src/mongo/db/repl/rollback_impl.h @@ -187,6 +187,11 @@ public: virtual void onCommonPointFound(Timestamp commonPoint) noexcept {} /** + * Function called after we have incremented the rollback ID. + */ + virtual void onRollbackIDIncremented() noexcept {} + + /** * Function called after a rollback file has been written for each namespace with inserts or * updates that are being rolled back. */ @@ -194,8 +199,9 @@ public: /** * Function called after we recover to the stable timestamp. + * NOTE: This may throw, for testing purposes. */ - virtual void onRecoverToStableTimestamp(Timestamp stableTimestamp) noexcept {} + virtual void onRecoverToStableTimestamp(Timestamp stableTimestamp) {} /** * Function called after we set the oplog truncate after point. @@ -208,6 +214,11 @@ public: virtual void onRecoverFromOplog() noexcept {} /** + * Function called after we reconstruct prepared transactions. + */ + virtual void onPreparedTransactionsReconstructed() noexcept {} + + /** * Function called after we have triggered the 'onRollback' OpObserver method. */ virtual void onRollbackOpObserver(const OpObserver::RollbackObserverInfo& rbInfo) noexcept { @@ -375,6 +386,13 @@ private: Status _findRecordStoreCounts(OperationContext* opCtx); /** + * Executes the critical section in rollback, defined as the window between aborting and + * reconstructing prepared transactions. + */ + Status _runRollbackCriticalSection( + OperationContext* opCtx, RollBackLocalOperations::RollbackCommonPoint commonPoint) noexcept; + + /** * Sets the record store counts to be the values stored in _newCounts. */ void _correctRecordStoreCounts(OperationContext* opCtx); diff --git a/src/mongo/db/repl/rollback_impl_test.cpp b/src/mongo/db/repl/rollback_impl_test.cpp index 328b1f1572d..7df54ceea37 100644 --- a/src/mongo/db/repl/rollback_impl_test.cpp +++ b/src/mongo/db/repl/rollback_impl_test.cpp @@ -258,6 +258,14 @@ protected: bool _recoveredFromOplog = false; stdx::function<void()> _onRecoverFromOplogFn = [this]() { _recoveredFromOplog = true; }; + bool _incrementedRollbackID = false; + stdx::function<void()> _onRollbackIDIncrementedFn = [this]() { _incrementedRollbackID = true; }; + + bool _reconstructedPreparedTransactions = false; + stdx::function<void()> _onPreparedTransactionsReconstructedFn = [this]() { + _reconstructedPreparedTransactions = true; + }; + Timestamp _commonPointFound; stdx::function<void(Timestamp commonPoint)> _onCommonPointFoundFn = [this](Timestamp commonPoint) { _commonPointFound = commonPoint; }; @@ -318,11 +326,15 @@ public: _test->_onCommonPointFoundFn(commonPoint); } + void onRollbackIDIncremented() noexcept override { + _test->_onRollbackIDIncrementedFn(); + } + void onRollbackFileWrittenForNamespace(UUID uuid, NamespaceString nss) noexcept final { _test->_onRollbackFileWrittenForNamespaceFn(std::move(uuid), std::move(nss)); } - void onRecoverToStableTimestamp(Timestamp stableTimestamp) noexcept override { + void onRecoverToStableTimestamp(Timestamp stableTimestamp) override { _test->_onRecoverToStableTimestampFn(stableTimestamp); } @@ -334,6 +346,10 @@ public: _test->_onRecoverFromOplogFn(); } + void onPreparedTransactionsReconstructed() noexcept override { + _test->_onPreparedTransactionsReconstructedFn(); + } + void onRollbackOpObserver(const OpObserver::RollbackObserverInfo& rbInfo) noexcept override { _test->_onRollbackOpObserverFn(rbInfo); } @@ -584,7 +600,9 @@ TEST_F(RollbackImplTest, RollbackCallsRecoverToStableTimestamp) { ASSERT_EQUALS(stableTimestamp, _stableTimestamp); } -TEST_F(RollbackImplTest, RollbackReturnsBadStatusIfRecoverToStableTimestampFails) { +DEATH_TEST_F(RollbackImplTest, + RollbackFassertsIfRecoverToStableTimestampFails, + "Fatal assertion 31049") { auto op = makeOpAndRecordId(1); _remoteOplog->setOperations({op}); ASSERT_OK(_insertOplogEntry(op.first)); @@ -609,24 +627,8 @@ TEST_F(RollbackImplTest, RollbackReturnsBadStatusIfRecoverToStableTimestampFails ASSERT_EQUALS(currTimestamp, _storageInterface->getCurrentTimestamp()); ASSERT_EQUALS(Timestamp(), _stableTimestamp); - // Run rollback. - auto rollbackStatus = _rollback->runRollback(_opCtx.get()); - - // Make sure rollback failed with an UnrecoverableRollbackError, and didn't execute the - // recover to timestamp logic. - ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, rollbackStatus.code()); - ASSERT_EQUALS(currTimestamp, _storageInterface->getCurrentTimestamp()); - ASSERT_EQUALS(Timestamp(), _stableTimestamp); - - // Make sure we transitioned back to SECONDARY state. - ASSERT_EQUALS(_coordinator->getMemberState(), MemberState::RS_SECONDARY); - - // Don't set the truncate after point if we fail early. - _assertDocsInOplog(_opCtx.get(), {1, 2}); - truncateAfterPoint = - _replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(_opCtx.get()); - ASSERT_EQUALS(Timestamp(), truncateAfterPoint); - ASSERT_EQUALS(_truncatePoint, Timestamp()); + // Run rollback. It should fassert. + _rollback->runRollback(_opCtx.get()).ignore(); } TEST_F(RollbackImplTest, RollbackReturnsBadStatusIfIncrementRollbackIDFails) { @@ -676,41 +678,61 @@ TEST_F(RollbackImplTest, RollbackCallsRecoverFromOplog) { ASSERT(_recoveredFromOplog); } -TEST_F(RollbackImplTest, RollbackSkipsRecoverFromOplogWhenShutdownDuringRTT) { +TEST_F(RollbackImplTest, + RollbackCannotBeShutDownBetweenAbortingAndReconstructingPreparedTransactions) { auto op = makeOpAndRecordId(1); _remoteOplog->setOperations({op}); ASSERT_OK(_insertOplogEntry(op.first)); ASSERT_OK(_insertOplogEntry(makeOp(2))); _assertDocsInOplog(_opCtx.get(), {1, 2}); - auto truncateAfterPoint = - _replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(_opCtx.get()); - ASSERT_EQUALS(Timestamp(), truncateAfterPoint); - _onRecoverToStableTimestampFn = [this](Timestamp stableTimestamp) { - _recoveredToStableTimestamp = true; - _stableTimestamp = stableTimestamp; + _storageInterface->setStableTimestamp(nullptr, Timestamp(1, 1)); + + // Called before aborting prepared transactions. We request the shutdown here. + _onRollbackIDIncrementedFn = [this]() { + _incrementedRollbackID = true; _rollback->shutdown(); }; - // Run rollback. - auto status = _rollback->runRollback(_opCtx.get()); + // Called after reconstructing prepared transactions. + _onPreparedTransactionsReconstructedFn = [this]() { + ASSERT(_incrementedRollbackID); + _reconstructedPreparedTransactions = true; + }; - // Make sure shutdown occurred before oplog recovery. - ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, _rollback->runRollback(_opCtx.get())); - ASSERT(_recoveredToStableTimestamp); - ASSERT_FALSE(_recoveredFromOplog); - ASSERT_FALSE(_coordinator->lastOpTimesWereReset()); + // Shutting down is still allowed but it must occur after that window. + ASSERT_EQ(ErrorCodes::ShutdownInProgress, _rollback->runRollback(_opCtx.get())); + ASSERT(_incrementedRollbackID); + ASSERT(_reconstructedPreparedTransactions); +} - // Make sure we transitioned back to SECONDARY state. - ASSERT_EQUALS(_coordinator->getMemberState(), MemberState::RS_SECONDARY); - ASSERT(_stableTimestamp.isNull()); +DEATH_TEST_F(RollbackImplTest, + RollbackUassertsAreFatalBetweenAbortingAndReconstructingPreparedTransactions, + "Caught exception during critical section in rollback") { + auto op = makeOpAndRecordId(1); + _remoteOplog->setOperations({op}); + ASSERT_OK(_insertOplogEntry(op.first)); + ASSERT_OK(_insertOplogEntry(makeOp(2))); _assertDocsInOplog(_opCtx.get(), {1, 2}); - truncateAfterPoint = - _replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(_opCtx.get()); - ASSERT_EQUALS(Timestamp(), truncateAfterPoint); - ASSERT_EQUALS(_truncatePoint, Timestamp()); + + _storageInterface->setStableTimestamp(nullptr, Timestamp(1, 1)); + + // Called before aborting prepared transactions. + _onRollbackIDIncrementedFn = [this]() { _incrementedRollbackID = true; }; + + // Called during the critical section. + _onRecoverToStableTimestampFn = [this](Timestamp stableTimestamp) { + _recoveredToStableTimestamp = true; + uasserted(ErrorCodes::UnknownError, "error for test"); + }; + + // Called after reconstructing prepared transactions. We should not be getting here. + _onPreparedTransactionsReconstructedFn = [this]() { ASSERT(false); }; + + // We expect to crash when we hit the exception. + _rollback->runRollback(_opCtx.get()).ignore(); } TEST_F(RollbackImplTest, @@ -1141,43 +1163,6 @@ TEST_F(RollbackImplTest, RollbackProperlySavesFilesWhenCreateCollAndInsertsAreRo SimpleBSONObjComparator::kInstance.makeEqualTo())); } -TEST_F(RollbackImplTest, RollbackStopsWritingRollbackFilesWhenShutdownIsInProgress) { - const auto commonOp = makeOpAndRecordId(1); - _remoteOplog->setOperations({commonOp}); - ASSERT_OK(_insertOplogEntry(commonOp.first)); - _storageInterface->setStableTimestamp(nullptr, Timestamp(1, 1)); - - const auto nss1 = NamespaceString("db.people"); - const auto uuid1 = UUID::gen(); - const auto coll1 = _initializeCollection(_opCtx.get(), uuid1, nss1); - const auto obj1 = BSON("_id" << 0 << "name" - << "kyle"); - _insertDocAndGenerateOplogEntry(obj1, uuid1, nss1); - - const auto nss2 = NamespaceString("db.persons"); - const auto uuid2 = UUID::gen(); - const auto coll2 = _initializeCollection(_opCtx.get(), uuid2, nss2); - const auto obj2 = BSON("_id" << 0 << "name" - << "jungsoo"); - _insertDocAndGenerateOplogEntry(obj2, uuid2, nss2); - - // Register a listener that sends rollback into shutdown. - std::vector<UUID> collsWithSuccessfullyWrittenDataFiles; - _onRollbackFileWrittenForNamespaceFn = - [this, &collsWithSuccessfullyWrittenDataFiles](UUID uuid, NamespaceString nss) { - collsWithSuccessfullyWrittenDataFiles.emplace_back(std::move(uuid)); - _rollback->shutdown(); - }; - - ASSERT_EQ(_rollback->runRollback(_opCtx.get()), ErrorCodes::ShutdownInProgress); - - ASSERT_EQ(collsWithSuccessfullyWrittenDataFiles.size(), 1UL); - const auto& uuid = collsWithSuccessfullyWrittenDataFiles.front(); - ASSERT(uuid == uuid1 || uuid == uuid2) << "wrote out a data file for unknown uuid " << uuid - << "; expected it to be either " << uuid1 << " or " - << uuid2; -} - DEATH_TEST_F(RollbackImplTest, InvariantFailureIfNamespaceIsMissingWhenWritingRollbackFiles, "unexpectedly missing in the CollectionCatalog") { |