summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2019-04-29 18:06:07 -0400
committerVesselina Ratcheva <vesselina.ratcheva@10gen.com>2019-05-09 12:42:37 -0400
commit29b29b2af6883b99f58c7a90a95f57221874214f (patch)
tree9447cc1cf2d2d6529b31bf74bfd0f2239f3da1fe
parentbf47260ea0cbc58d3744d8964b2eb036b9a1a19e (diff)
downloadmongo-29b29b2af6883b99f58c7a90a95f57221874214f.tar.gz
SERVER-40614 Make rollback errors fatal between aborting and reconstructing prepared transactions
-rw-r--r--src/mongo/db/repl/rollback_impl.cpp249
-rw-r--r--src/mongo/db/repl/rollback_impl.h20
-rw-r--r--src/mongo/db/repl/rollback_impl_test.cpp141
3 files changed, 198 insertions, 212 deletions
diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp
index 1b6fd2482fb..647988123a1 100644
--- a/src/mongo/db/repl/rollback_impl.cpp
+++ b/src/mongo/db/repl/rollback_impl.cpp
@@ -226,111 +226,22 @@ Status RollbackImpl::runRollback(OperationContext* opCtx) {
return status;
}
_rollbackStats.rollbackId = _replicationProcess->getRollbackID();
+ _listener->onRollbackIDIncremented();
- // Before computing record store counts, abort all active transactions. This ensures that the
- // count adjustments are based on correct values where no prepared transactions are active and
- // all in-memory counts have been rolled-back.
- // Before calling recoverToStableTimestamp, we must abort the storage transaction of any
- // prepared transaction. This will require us to scan all sessions and call
- // abortPreparedTransactionForRollback() on any txnParticipant with a prepared transaction.
- killSessionsAbortAllPreparedTransactions(opCtx);
-
- // Ask the record store for the pre-rollback counts of any collections whose counts will change
- // and create a map with the adjusted counts for post-rollback. While finding the common
- // point, we keep track of how much each collection's count will change during the rollback.
- // Note: these numbers are relative to the common point, not the stable timestamp, and thus
- // must be set after recovering from the oplog.
- // TODO (SERVER-40614): This error should be fatal.
- status = _findRecordStoreCounts(opCtx);
+ // Execute the critical section in rollback. It is illegal to exit rollback cleanly between
+ // aborting prepared transactions and reconstructing them. During this window, no interruptions
+ // are allowed and all errors should be made fatal.
+ status = _runRollbackCriticalSection(opCtx, commonPoint);
if (!status.isOK()) {
- return status;
+ fassertFailedWithStatus(31049, status.withContext("Error in rollback critical section"));
}
+ _listener->onPreparedTransactionsReconstructed();
- if (shouldCreateDataFiles()) {
- // Write a rollback file for each namespace that has documents that would be deleted by
- // rollback. We need to do this after aborting prepared transactions. Otherwise, we risk
- // unecessary prepare conflicts when trying to read documents that were modified by those
- // prepared transactions, which we know we will abort anyway.
- // TODO (SERVER-40614): This error should be fatal.
- status = _writeRollbackFiles(opCtx);
- if (!status.isOK()) {
- return status;
- }
- } else {
- log() << "Not writing rollback files. 'createRollbackDataFiles' set to false.";
- }
-
- // If there were rolled back operations on any session, invalidate all sessions.
- // We invalidate sessions before we recover so that we avoid invalidating sessions that had
- // just recovered prepared transactions.
- if (_observerInfo.rollbackSessionIds.size() > 0) {
- MongoDSessionCatalog::invalidateSessions(opCtx, boost::none);
- }
-
- // Recover to the stable timestamp.
- auto stableTimestampSW = _recoverToStableTimestamp(opCtx);
- // TODO (SERVER-40614): This error should be fatal.
- if (!stableTimestampSW.isOK()) {
- return stableTimestampSW.getStatus();
- }
- _rollbackStats.stableTimestamp = stableTimestampSW.getValue();
- _listener->onRecoverToStableTimestamp(stableTimestampSW.getValue());
-
- // Log the total number of insert and update operations that have been rolled back as a result
- // of recovering to the stable timestamp.
- log() << "Rollback reverted " << _observerInfo.rollbackCommandCounts[kInsertCmdName]
- << " insert operations, " << _observerInfo.rollbackCommandCounts[kUpdateCmdName]
- << " update operations and " << _observerInfo.rollbackCommandCounts[kDeleteCmdName]
- << " delete operations.";
-
- // During replication recovery, we truncate all oplog entries with timestamps greater than or
- // equal to the oplog truncate after point. As a result, we must find the oplog entry after
- // the common point so we do not truncate the common point itself. If we entered rollback,
- // we are guaranteed to have at least one oplog entry after the common point.
- Timestamp truncatePoint = _findTruncateTimestamp(opCtx, commonPointSW.getValue());
-
- // We cannot have an interrupt point between setting the oplog truncation point and fixing the
- // record store counts or else a clean shutdown could produce incorrect counts. We explicitly
- // check for shutdown here to safely maximize interruptibility.
- // TODO (SERVER-40614): This interrupt point should be removed.
+ // We can now accept interruptions again.
if (_isInShutdown()) {
return Status(ErrorCodes::ShutdownInProgress, "rollback shutting down");
}
- // Persist the truncate point to the 'oplogTruncateAfterPoint' document. We save this value so
- // that the replication recovery logic knows where to truncate the oplog. We save this value
- // durably to match the behavior during startup recovery. This must occur after we successfully
- // recover to a stable timestamp. If recovering to a stable timestamp fails and we still
- // truncate the oplog then the oplog will not match the data files. If we crash at any earlier
- // point, we will recover, find a new sync source, and restart roll back (if necessary on the
- // new sync source). This is safe because a crash before this point would recover to a stable
- // checkpoint anyways at or earlier than the stable timestamp.
- //
- // Note that storage engine timestamp recovery only restores the database *data* to a stable
- // timestamp, but does not revert the oplog, which must be done as part of the rollback process.
- _replicationProcess->getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx, truncatePoint);
- _rollbackStats.truncateTimestamp = truncatePoint;
- _listener->onSetOplogTruncateAfterPoint(truncatePoint);
-
- // Align the drop pending reaper state with what's on disk. Oplog recovery depends on those
- // being consistent.
- _resetDropPendingState(opCtx);
-
- // Run the recovery process.
- _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx,
- stableTimestampSW.getValue());
- _listener->onRecoverFromOplog();
-
- // Sets the correct post-rollback counts on any collections whose counts changed during the
- // rollback.
- _correctRecordStoreCounts(opCtx);
-
- // Reconstruct prepared transactions after counts have been adjusted. Since prepared
- // transactions were aborted (i.e. the in-memory counts were rolled-back) before computing
- // collection counts, reconstruct the prepared transactions now, adding on any additional counts
- // to the now corrected record store.
- reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering);
-
// At this point, the last applied and durable optimes on this node still point to ops on
// the divergent branch of history. We therefore update the last optimes to the top of the
// oplog, which should now be at the common point.
@@ -497,6 +408,110 @@ StatusWith<std::set<NamespaceString>> RollbackImpl::_namespacesForOp(const Oplog
return namespaces;
}
+Status RollbackImpl::_runRollbackCriticalSection(
+ OperationContext* opCtx,
+ RollBackLocalOperations::RollbackCommonPoint commonPoint) noexcept try {
+ // Before computing record store counts, abort all active transactions. This ensures that
+ // the count adjustments are based on correct values where no prepared transactions are
+ // active and all in-memory counts have been rolled-back.
+ // Before calling recoverToStableTimestamp, we must abort the storage transaction of any
+ // prepared transaction. This will require us to scan all sessions and call
+ // abortPreparedTransactionForRollback() on any txnParticipant with a prepared transaction.
+ killSessionsAbortAllPreparedTransactions(opCtx);
+
+ // Ask the record store for the pre-rollback counts of any collections whose counts will
+ // change and create a map with the adjusted counts for post-rollback. While finding the
+ // common point, we keep track of how much each collection's count will change during the
+ // rollback. Note: these numbers are relative to the common point, not the stable timestamp,
+ // and thus must be set after recovering from the oplog.
+ auto status = _findRecordStoreCounts(opCtx);
+ if (!status.isOK()) {
+ return status.withContext("Error while finding record store counts");
+ }
+
+ if (shouldCreateDataFiles()) {
+ // Write a rollback file for each namespace that has documents that would be deleted by
+ // rollback. We need to do this after aborting prepared transactions. Otherwise, we risk
+ // unecessary prepare conflicts when trying to read documents that were modified by
+ // those prepared transactions, which we know we will abort anyway.
+ status = _writeRollbackFiles(opCtx);
+ if (!status.isOK()) {
+ return status.withContext("Error while writing out rollback files");
+ }
+ } else {
+ log() << "Not writing rollback files. 'createRollbackDataFiles' set to false.";
+ }
+
+ // If there were rolled back operations on any session, invalidate all sessions.
+ // We invalidate sessions before we recover so that we avoid invalidating sessions that had
+ // just recovered prepared transactions.
+ if (_observerInfo.rollbackSessionIds.size() > 0) {
+ MongoDSessionCatalog::invalidateSessions(opCtx, boost::none);
+ }
+
+ // Recover to the stable timestamp.
+ auto stableTimestampSW = _recoverToStableTimestamp(opCtx);
+ if (!stableTimestampSW.isOK()) {
+ auto status = stableTimestampSW.getStatus();
+ return status.withContext("Error while recovering to stable timestamp");
+ }
+ _rollbackStats.stableTimestamp = stableTimestampSW.getValue();
+ _listener->onRecoverToStableTimestamp(stableTimestampSW.getValue());
+
+ // Log the total number of insert and update operations that have been rolled back as a
+ // result of recovering to the stable timestamp.
+ log() << "Rollback reverted " << _observerInfo.rollbackCommandCounts[kInsertCmdName]
+ << " insert operations, " << _observerInfo.rollbackCommandCounts[kUpdateCmdName]
+ << " update operations and " << _observerInfo.rollbackCommandCounts[kDeleteCmdName]
+ << " delete operations.";
+
+ // During replication recovery, we truncate all oplog entries with timestamps greater than
+ // or equal to the oplog truncate after point. As a result, we must find the oplog entry
+ // after the common point so we do not truncate the common point itself. If we entered
+ // rollback, we are guaranteed to have at least one oplog entry after the common point.
+ Timestamp truncatePoint = _findTruncateTimestamp(opCtx, commonPoint);
+
+ // Persist the truncate point to the 'oplogTruncateAfterPoint' document. We save this value so
+ // that the replication recovery logic knows where to truncate the oplog. We save this value
+ // durably to match the behavior during startup recovery. This must occur after we successfully
+ // recover to a stable timestamp. If recovering to a stable timestamp fails and we still
+ // truncate the oplog then the oplog will not match the data files. If we crash at any earlier
+ // point, we will recover, find a new sync source, and restart roll back (if necessary on the
+ // new sync source). This is safe because a crash before this point would recover to a stable
+ // checkpoint anyways at or earlier than the stable timestamp.
+ //
+ // Note that storage engine timestamp recovery only restores the database *data* to a stable
+ // timestamp, but does not revert the oplog, which must be done as part of the rollback process.
+ _replicationProcess->getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx, truncatePoint);
+ _rollbackStats.truncateTimestamp = truncatePoint;
+ _listener->onSetOplogTruncateAfterPoint(truncatePoint);
+
+ // Align the drop pending reaper state with what's on disk. Oplog recovery depends on those
+ // being consistent.
+ _resetDropPendingState(opCtx);
+
+ // Run the recovery process.
+ _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx,
+ stableTimestampSW.getValue());
+ _listener->onRecoverFromOplog();
+
+ // Sets the correct post-rollback counts on any collections whose counts changed during the
+ // rollback.
+ _correctRecordStoreCounts(opCtx);
+
+ // Reconstruct prepared transactions after counts have been adjusted. Since prepared
+ // transactions were aborted (i.e. the in-memory counts were rolled-back) before computing
+ // collection counts, reconstruct the prepared transactions now, adding on any additional counts
+ // to the now corrected record store.
+ reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering);
+
+ return Status::OK();
+} catch (...) {
+ // Any exceptions here should be made fatal.
+ severe() << "Caught exception during critical section in rollback: " << exceptionToStatus();
+ std::terminate();
+}
+
void RollbackImpl::_correctRecordStoreCounts(OperationContext* opCtx) {
// This function explicitly does not check for shutdown since a clean shutdown post oplog
// truncation is not allowed to occur until the record store counts are corrected.
@@ -572,10 +587,6 @@ void RollbackImpl::_correctRecordStoreCounts(OperationContext* opCtx) {
}
Status RollbackImpl::_findRecordStoreCounts(OperationContext* opCtx) {
- // TODO (SERVER-40614): This interrupt point should be removed.
- if (_isInShutdown()) {
- return Status(ErrorCodes::ShutdownInProgress, "rollback shutting down");
- }
const auto& catalog = CollectionCatalog::get(opCtx);
auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
@@ -1026,20 +1037,9 @@ Status RollbackImpl::_writeRollbackFiles(OperationContext* opCtx) {
str::stream() << "The collection with UUID " << uuid
<< " is unexpectedly missing in the CollectionCatalog");
- if (_isInShutdown()) {
- log() << "Rollback shutting down; not writing rollback file for namespace " << nss->ns()
- << " with uuid " << uuid;
- continue;
- }
-
_writeRollbackFileForNamespace(opCtx, uuid, *nss, entry.second);
}
- // TODO (SERVER-40614): This interrupt point should be removed.
- if (_isInShutdown()) {
- return {ErrorCodes::ShutdownInProgress, "rollback shutting down"};
- }
-
return Status::OK();
}
@@ -1088,27 +1088,10 @@ void RollbackImpl::_writeRollbackFileForNamespace(OperationContext* opCtx,
}
StatusWith<Timestamp> RollbackImpl::_recoverToStableTimestamp(OperationContext* opCtx) {
- // TODO (SERVER-40614): This interrupt point should be removed.
- if (_isInShutdown()) {
- return Status(ErrorCodes::ShutdownInProgress, "rollback shutting down");
- }
- // Recover to the stable timestamp while holding the global exclusive lock.
- {
- Lock::GlobalWrite globalWrite(opCtx);
- try {
- auto stableTimestampSW = _storageInterface->recoverToStableTimestamp(opCtx);
- if (!stableTimestampSW.isOK()) {
- severe() << "RecoverToStableTimestamp failed. "
- << causedBy(stableTimestampSW.getStatus());
- // TODO (SERVER-40614): fassert here instead of depending on the caller to do it
- return {ErrorCodes::UnrecoverableRollbackError,
- "Recover to stable timestamp failed."};
- }
- return stableTimestampSW;
- } catch (...) {
- return exceptionToStatus();
- }
- }
+ // Recover to the stable timestamp while holding the global exclusive lock. This may throw,
+ // which the caller must handle.
+ Lock::GlobalWrite globalWrite(opCtx);
+ return _storageInterface->recoverToStableTimestamp(opCtx);
}
Status RollbackImpl::_triggerOpObserver(OperationContext* opCtx) {
diff --git a/src/mongo/db/repl/rollback_impl.h b/src/mongo/db/repl/rollback_impl.h
index e219d2b1fd1..ba9cbd95fa7 100644
--- a/src/mongo/db/repl/rollback_impl.h
+++ b/src/mongo/db/repl/rollback_impl.h
@@ -187,6 +187,11 @@ public:
virtual void onCommonPointFound(Timestamp commonPoint) noexcept {}
/**
+ * Function called after we have incremented the rollback ID.
+ */
+ virtual void onRollbackIDIncremented() noexcept {}
+
+ /**
* Function called after a rollback file has been written for each namespace with inserts or
* updates that are being rolled back.
*/
@@ -194,8 +199,9 @@ public:
/**
* Function called after we recover to the stable timestamp.
+ * NOTE: This may throw, for testing purposes.
*/
- virtual void onRecoverToStableTimestamp(Timestamp stableTimestamp) noexcept {}
+ virtual void onRecoverToStableTimestamp(Timestamp stableTimestamp) {}
/**
* Function called after we set the oplog truncate after point.
@@ -208,6 +214,11 @@ public:
virtual void onRecoverFromOplog() noexcept {}
/**
+ * Function called after we reconstruct prepared transactions.
+ */
+ virtual void onPreparedTransactionsReconstructed() noexcept {}
+
+ /**
* Function called after we have triggered the 'onRollback' OpObserver method.
*/
virtual void onRollbackOpObserver(const OpObserver::RollbackObserverInfo& rbInfo) noexcept {
@@ -375,6 +386,13 @@ private:
Status _findRecordStoreCounts(OperationContext* opCtx);
/**
+ * Executes the critical section in rollback, defined as the window between aborting and
+ * reconstructing prepared transactions.
+ */
+ Status _runRollbackCriticalSection(
+ OperationContext* opCtx, RollBackLocalOperations::RollbackCommonPoint commonPoint) noexcept;
+
+ /**
* Sets the record store counts to be the values stored in _newCounts.
*/
void _correctRecordStoreCounts(OperationContext* opCtx);
diff --git a/src/mongo/db/repl/rollback_impl_test.cpp b/src/mongo/db/repl/rollback_impl_test.cpp
index 328b1f1572d..7df54ceea37 100644
--- a/src/mongo/db/repl/rollback_impl_test.cpp
+++ b/src/mongo/db/repl/rollback_impl_test.cpp
@@ -258,6 +258,14 @@ protected:
bool _recoveredFromOplog = false;
stdx::function<void()> _onRecoverFromOplogFn = [this]() { _recoveredFromOplog = true; };
+ bool _incrementedRollbackID = false;
+ stdx::function<void()> _onRollbackIDIncrementedFn = [this]() { _incrementedRollbackID = true; };
+
+ bool _reconstructedPreparedTransactions = false;
+ stdx::function<void()> _onPreparedTransactionsReconstructedFn = [this]() {
+ _reconstructedPreparedTransactions = true;
+ };
+
Timestamp _commonPointFound;
stdx::function<void(Timestamp commonPoint)> _onCommonPointFoundFn =
[this](Timestamp commonPoint) { _commonPointFound = commonPoint; };
@@ -318,11 +326,15 @@ public:
_test->_onCommonPointFoundFn(commonPoint);
}
+ void onRollbackIDIncremented() noexcept override {
+ _test->_onRollbackIDIncrementedFn();
+ }
+
void onRollbackFileWrittenForNamespace(UUID uuid, NamespaceString nss) noexcept final {
_test->_onRollbackFileWrittenForNamespaceFn(std::move(uuid), std::move(nss));
}
- void onRecoverToStableTimestamp(Timestamp stableTimestamp) noexcept override {
+ void onRecoverToStableTimestamp(Timestamp stableTimestamp) override {
_test->_onRecoverToStableTimestampFn(stableTimestamp);
}
@@ -334,6 +346,10 @@ public:
_test->_onRecoverFromOplogFn();
}
+ void onPreparedTransactionsReconstructed() noexcept override {
+ _test->_onPreparedTransactionsReconstructedFn();
+ }
+
void onRollbackOpObserver(const OpObserver::RollbackObserverInfo& rbInfo) noexcept override {
_test->_onRollbackOpObserverFn(rbInfo);
}
@@ -584,7 +600,9 @@ TEST_F(RollbackImplTest, RollbackCallsRecoverToStableTimestamp) {
ASSERT_EQUALS(stableTimestamp, _stableTimestamp);
}
-TEST_F(RollbackImplTest, RollbackReturnsBadStatusIfRecoverToStableTimestampFails) {
+DEATH_TEST_F(RollbackImplTest,
+ RollbackFassertsIfRecoverToStableTimestampFails,
+ "Fatal assertion 31049") {
auto op = makeOpAndRecordId(1);
_remoteOplog->setOperations({op});
ASSERT_OK(_insertOplogEntry(op.first));
@@ -609,24 +627,8 @@ TEST_F(RollbackImplTest, RollbackReturnsBadStatusIfRecoverToStableTimestampFails
ASSERT_EQUALS(currTimestamp, _storageInterface->getCurrentTimestamp());
ASSERT_EQUALS(Timestamp(), _stableTimestamp);
- // Run rollback.
- auto rollbackStatus = _rollback->runRollback(_opCtx.get());
-
- // Make sure rollback failed with an UnrecoverableRollbackError, and didn't execute the
- // recover to timestamp logic.
- ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, rollbackStatus.code());
- ASSERT_EQUALS(currTimestamp, _storageInterface->getCurrentTimestamp());
- ASSERT_EQUALS(Timestamp(), _stableTimestamp);
-
- // Make sure we transitioned back to SECONDARY state.
- ASSERT_EQUALS(_coordinator->getMemberState(), MemberState::RS_SECONDARY);
-
- // Don't set the truncate after point if we fail early.
- _assertDocsInOplog(_opCtx.get(), {1, 2});
- truncateAfterPoint =
- _replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(_opCtx.get());
- ASSERT_EQUALS(Timestamp(), truncateAfterPoint);
- ASSERT_EQUALS(_truncatePoint, Timestamp());
+ // Run rollback. It should fassert.
+ _rollback->runRollback(_opCtx.get()).ignore();
}
TEST_F(RollbackImplTest, RollbackReturnsBadStatusIfIncrementRollbackIDFails) {
@@ -676,41 +678,61 @@ TEST_F(RollbackImplTest, RollbackCallsRecoverFromOplog) {
ASSERT(_recoveredFromOplog);
}
-TEST_F(RollbackImplTest, RollbackSkipsRecoverFromOplogWhenShutdownDuringRTT) {
+TEST_F(RollbackImplTest,
+ RollbackCannotBeShutDownBetweenAbortingAndReconstructingPreparedTransactions) {
auto op = makeOpAndRecordId(1);
_remoteOplog->setOperations({op});
ASSERT_OK(_insertOplogEntry(op.first));
ASSERT_OK(_insertOplogEntry(makeOp(2)));
_assertDocsInOplog(_opCtx.get(), {1, 2});
- auto truncateAfterPoint =
- _replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(_opCtx.get());
- ASSERT_EQUALS(Timestamp(), truncateAfterPoint);
- _onRecoverToStableTimestampFn = [this](Timestamp stableTimestamp) {
- _recoveredToStableTimestamp = true;
- _stableTimestamp = stableTimestamp;
+ _storageInterface->setStableTimestamp(nullptr, Timestamp(1, 1));
+
+ // Called before aborting prepared transactions. We request the shutdown here.
+ _onRollbackIDIncrementedFn = [this]() {
+ _incrementedRollbackID = true;
_rollback->shutdown();
};
- // Run rollback.
- auto status = _rollback->runRollback(_opCtx.get());
+ // Called after reconstructing prepared transactions.
+ _onPreparedTransactionsReconstructedFn = [this]() {
+ ASSERT(_incrementedRollbackID);
+ _reconstructedPreparedTransactions = true;
+ };
- // Make sure shutdown occurred before oplog recovery.
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, _rollback->runRollback(_opCtx.get()));
- ASSERT(_recoveredToStableTimestamp);
- ASSERT_FALSE(_recoveredFromOplog);
- ASSERT_FALSE(_coordinator->lastOpTimesWereReset());
+ // Shutting down is still allowed but it must occur after that window.
+ ASSERT_EQ(ErrorCodes::ShutdownInProgress, _rollback->runRollback(_opCtx.get()));
+ ASSERT(_incrementedRollbackID);
+ ASSERT(_reconstructedPreparedTransactions);
+}
- // Make sure we transitioned back to SECONDARY state.
- ASSERT_EQUALS(_coordinator->getMemberState(), MemberState::RS_SECONDARY);
- ASSERT(_stableTimestamp.isNull());
+DEATH_TEST_F(RollbackImplTest,
+ RollbackUassertsAreFatalBetweenAbortingAndReconstructingPreparedTransactions,
+ "Caught exception during critical section in rollback") {
+ auto op = makeOpAndRecordId(1);
+ _remoteOplog->setOperations({op});
+ ASSERT_OK(_insertOplogEntry(op.first));
+ ASSERT_OK(_insertOplogEntry(makeOp(2)));
_assertDocsInOplog(_opCtx.get(), {1, 2});
- truncateAfterPoint =
- _replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(_opCtx.get());
- ASSERT_EQUALS(Timestamp(), truncateAfterPoint);
- ASSERT_EQUALS(_truncatePoint, Timestamp());
+
+ _storageInterface->setStableTimestamp(nullptr, Timestamp(1, 1));
+
+ // Called before aborting prepared transactions.
+ _onRollbackIDIncrementedFn = [this]() { _incrementedRollbackID = true; };
+
+ // Called during the critical section.
+ _onRecoverToStableTimestampFn = [this](Timestamp stableTimestamp) {
+ _recoveredToStableTimestamp = true;
+ uasserted(ErrorCodes::UnknownError, "error for test");
+ };
+
+ // Called after reconstructing prepared transactions. We should not be getting here.
+ _onPreparedTransactionsReconstructedFn = [this]() { ASSERT(false); };
+
+ // We expect to crash when we hit the exception.
+ _rollback->runRollback(_opCtx.get()).ignore();
}
TEST_F(RollbackImplTest,
@@ -1141,43 +1163,6 @@ TEST_F(RollbackImplTest, RollbackProperlySavesFilesWhenCreateCollAndInsertsAreRo
SimpleBSONObjComparator::kInstance.makeEqualTo()));
}
-TEST_F(RollbackImplTest, RollbackStopsWritingRollbackFilesWhenShutdownIsInProgress) {
- const auto commonOp = makeOpAndRecordId(1);
- _remoteOplog->setOperations({commonOp});
- ASSERT_OK(_insertOplogEntry(commonOp.first));
- _storageInterface->setStableTimestamp(nullptr, Timestamp(1, 1));
-
- const auto nss1 = NamespaceString("db.people");
- const auto uuid1 = UUID::gen();
- const auto coll1 = _initializeCollection(_opCtx.get(), uuid1, nss1);
- const auto obj1 = BSON("_id" << 0 << "name"
- << "kyle");
- _insertDocAndGenerateOplogEntry(obj1, uuid1, nss1);
-
- const auto nss2 = NamespaceString("db.persons");
- const auto uuid2 = UUID::gen();
- const auto coll2 = _initializeCollection(_opCtx.get(), uuid2, nss2);
- const auto obj2 = BSON("_id" << 0 << "name"
- << "jungsoo");
- _insertDocAndGenerateOplogEntry(obj2, uuid2, nss2);
-
- // Register a listener that sends rollback into shutdown.
- std::vector<UUID> collsWithSuccessfullyWrittenDataFiles;
- _onRollbackFileWrittenForNamespaceFn =
- [this, &collsWithSuccessfullyWrittenDataFiles](UUID uuid, NamespaceString nss) {
- collsWithSuccessfullyWrittenDataFiles.emplace_back(std::move(uuid));
- _rollback->shutdown();
- };
-
- ASSERT_EQ(_rollback->runRollback(_opCtx.get()), ErrorCodes::ShutdownInProgress);
-
- ASSERT_EQ(collsWithSuccessfullyWrittenDataFiles.size(), 1UL);
- const auto& uuid = collsWithSuccessfullyWrittenDataFiles.front();
- ASSERT(uuid == uuid1 || uuid == uuid2) << "wrote out a data file for unknown uuid " << uuid
- << "; expected it to be either " << uuid1 << " or "
- << uuid2;
-}
-
DEATH_TEST_F(RollbackImplTest,
InvariantFailureIfNamespaceIsMissingWhenWritingRollbackFiles,
"unexpectedly missing in the CollectionCatalog") {