summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorJason Chan <jason.chan@mongodb.com>2021-04-28 19:53:03 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-04 22:29:28 +0000
commit320e2d28396c250dfd69640dcf865dff50ea0b55 (patch)
treee5ddaba95d3c5ee27ee386137a74435b17637993 /src/mongo
parent4aa27885874b90e098c1225fccb10f4daa3b3d38 (diff)
downloadmongo-320e2d28396c250dfd69640dcf865dff50ea0b55.tar.gz
SERVER-55305 Add new step to replication rollback to restore the txns table to be consistent with the stableTimestamp
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/repl/replication_recovery.cpp4
-rw-r--r--src/mongo/db/repl/rollback_impl.cpp83
-rw-r--r--src/mongo/db/repl/rollback_impl.h11
-rw-r--r--src/mongo/db/repl/rollback_impl_test.cpp330
4 files changed, 428 insertions, 0 deletions
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index c9d4aa3c381..2bc00bfefe2 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -60,6 +60,8 @@
namespace mongo {
namespace repl {
+MONGO_FAIL_POINT_DEFINE(hangAfterOplogTruncationInRollback);
+
namespace {
const auto kRecoveryBatchLogLevel = logv2::LogSeverity::Debug(2);
@@ -449,6 +451,8 @@ void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx,
// This may take an IS lock on the oplog collection.
_truncateOplogIfNeededAndThenClearOplogTruncateAfterPoint(opCtx, &stableTimestamp);
+ hangAfterOplogTruncationInRollback.pauseWhileSet();
+
auto topOfOplogSW = _getTopOfOplog(opCtx);
if (topOfOplogSW.getStatus() == ErrorCodes::CollectionIsEmpty ||
topOfOplogSW.getStatus() == ErrorCodes::NamespaceNotFound) {
diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp
index 1ce1bb85b8e..14be427c59c 100644
--- a/src/mongo/db/repl/rollback_impl.cpp
+++ b/src/mongo/db/repl/rollback_impl.cpp
@@ -43,7 +43,10 @@
#include "mongo/db/commands.h"
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
+#include "mongo/db/concurrency/write_conflict_exception.h"
#include "mongo/db/db_raii.h"
+#include "mongo/db/dbdirectclient.h"
+#include "mongo/db/dbhelpers.h"
#include "mongo/db/index_builds_coordinator.h"
#include "mongo/db/kill_sessions_local.h"
#include "mongo/db/logical_time_validator.h"
@@ -457,6 +460,80 @@ StatusWith<std::set<NamespaceString>> RollbackImpl::_namespacesForOp(const Oplog
return namespaces;
}
+void RollbackImpl::_restoreTxnsTableEntryFromRetryableWrites(OperationContext* opCtx,
+ Timestamp stableTimestamp) {
+ auto client = std::make_unique<DBDirectClient>(opCtx);
+ // Query for retryable writes oplog entries with a non-null 'prevWriteOpTime' value
+ // less than or equal to the 'stableTimestamp'. This query intends to include no-op
+ // retryable writes oplog entries that have been applied through a migration process.
+ const auto filter = BSON("op" << BSON("$in" << BSON_ARRAY("i"
+ << "u"
+ << "d")));
+ // We use the 'fromMigrate' field to differentiate migrated retryable writes entries from
+ // transactions entries.
+ const auto filterFromMigration = BSON("op"
+ << "n"
+ << "fromMigrate" << true);
+ auto cursor = client->query(
+ NamespaceString::kRsOplogNamespace,
+ QUERY("ts" << BSON("$gt" << stableTimestamp) << "txnNumber" << BSON("$exists" << true)
+ << "stmtId" << BSON("$exists" << true) << "prevOpTime.ts"
+ << BSON("$gte" << Timestamp(1, 0) << "$lte" << stableTimestamp) << "$or"
+ << BSON_ARRAY(filter << filterFromMigration)));
+ while (cursor->more()) {
+ auto doc = cursor->next();
+ auto swEntry = OplogEntry::parse(doc);
+ fassert(5530502, swEntry.isOK());
+ auto entry = swEntry.getValue();
+ auto prevWriteOpTime = *entry.getPrevWriteOpTimeInTransaction();
+ OperationSessionInfo opSessionInfo = entry.getOperationSessionInfo();
+ const auto sessionId = *opSessionInfo.getSessionId();
+ const auto txnNumber = *opSessionInfo.getTxnNumber();
+ const auto wallClockTime = entry.getWallClockTime();
+
+ invariant(!prevWriteOpTime.isNull() && prevWriteOpTime.getTimestamp() <= stableTimestamp);
+ // This is a retryable writes oplog entry with a non-null 'prevWriteOpTime' value that
+ // is less than or equal to the 'stableTimestamp'.
+ LOGV2(5530501,
+ "Restoring sessions entry to be consistent with 'stableTimestamp'",
+ "stableTimestamp"_attr = stableTimestamp,
+ "sessionId"_attr = sessionId,
+ "txnNumber"_attr = txnNumber,
+ "lastWriteOpTime"_attr = prevWriteOpTime);
+ SessionTxnRecord sessionTxnRecord;
+ sessionTxnRecord.setSessionId(sessionId);
+ sessionTxnRecord.setTxnNum(txnNumber);
+ try {
+ TransactionHistoryIterator iter(prevWriteOpTime);
+ auto nextOplogEntry = iter.next(opCtx);
+ sessionTxnRecord.setLastWriteOpTime(nextOplogEntry.getOpTime());
+ sessionTxnRecord.setLastWriteDate(nextOplogEntry.getWallClockTime());
+ } catch (ExceptionFor<ErrorCodes::IncompleteTransactionHistory>&) {
+ // It's possible that the next entry in the oplog chain has been truncated due to
+ // oplog cap maintenance.
+ sessionTxnRecord.setLastWriteOpTime(prevWriteOpTime);
+ sessionTxnRecord.setLastWriteDate(wallClockTime);
+ }
+ const auto nss = NamespaceString::kSessionTransactionsTableNamespace;
+ writeConflictRetry(opCtx, "updateSessionTransactionsTableInRollback", nss.ns(), [&] {
+ AutoGetCollection collection(opCtx, nss, MODE_IX);
+ auto filter = BSON(SessionTxnRecord::kSessionIdFieldName << sessionId.toBSON());
+ UnreplicatedWritesBlock uwb(opCtx);
+ // Perform an untimestamped write so that it will not be rolled back on recovering
+ // to the 'stableTimestamp' if we were to crash. This is safe because this update is
+ // meant to be consistent with the 'stableTimestamp' and not the common point.
+ Helpers::upsert(
+ opCtx, nss.ns(), filter, sessionTxnRecord.toBSON(), /*fromMigrate=*/false);
+ });
+ }
+ // Take a stable checkpoint so that writes to the 'config.transactions' table are
+ // persisted to disk before truncating the oplog. If we were to take an unstable checkpoint, we
+ // would have to update replication metadata like 'minValid.appliedThrough' to be consistent
+ // with the oplog.
+ opCtx->recoveryUnit()->waitUntilUnjournaledWritesDurable(opCtx,
+ /*stableCheckpoint=*/true);
+}
+
void RollbackImpl::_runPhaseFromAbortToReconstructPreparedTxns(
OperationContext* opCtx, RollBackLocalOperations::RollbackCommonPoint commonPoint) noexcept {
// Stop and wait for all background index builds to complete before starting the rollback
@@ -514,6 +591,12 @@ void RollbackImpl::_runPhaseFromAbortToReconstructPreparedTxns(
"update"_attr = _observerInfo.rollbackCommandCounts[kUpdateCmdName],
"delete"_attr = _observerInfo.rollbackCommandCounts[kDeleteCmdName]);
+ // Retryable writes create derived updates to the transactions table which can be coalesced into
+ // one operation, so certain session operations history may be lost after restoring to the
+ // 'stableTimestamp'. We must scan the oplog and restore the transactions table entries to
+ // detail the last executed writes.
+ _restoreTxnsTableEntryFromRetryableWrites(opCtx, stableTimestamp);
+
// During replication recovery, we truncate all oplog entries with timestamps greater than the
// oplog truncate after point. If we entered rollback, we are guaranteed to have at least one
// oplog entry after the common point.
diff --git a/src/mongo/db/repl/rollback_impl.h b/src/mongo/db/repl/rollback_impl.h
index 4d95e5e7e4e..9fe3206f4f7 100644
--- a/src/mongo/db/repl/rollback_impl.h
+++ b/src/mongo/db/repl/rollback_impl.h
@@ -367,6 +367,17 @@ private:
void _stopAndWaitForIndexBuilds(OperationContext* opCtx);
/**
+ * Performs a forward scan of the oplog starting at 'stableTimestamp', exclusive. For every
+ * retryable write oplog entry that has a 'prevOpTime' <= 'stableTimestamp', update the
+ * transactions table with the appropriate information to detail the last executed operation. We
+ * do this because derived updates to the transactions table can be coalesced into one
+ * operation, and so certain session entry updates may not exist when restoring to the
+ * 'stableTimestamp'.
+ */
+ void _restoreTxnsTableEntryFromRetryableWrites(OperationContext* opCtx,
+ Timestamp stableTimestamp);
+
+ /**
* Recovers to the stable timestamp while holding the global exclusive lock.
* Returns the stable timestamp that the storage engine recovered to.
*/
diff --git a/src/mongo/db/repl/rollback_impl_test.cpp b/src/mongo/db/repl/rollback_impl_test.cpp
index 767c0e81703..f03faaa390b 100644
--- a/src/mongo/db/repl/rollback_impl_test.cpp
+++ b/src/mongo/db/repl/rollback_impl_test.cpp
@@ -410,6 +410,60 @@ OplogInterfaceMock::Operation makeOpAndRecordId(int count) {
}
/**
+ * Helper to create a noop entry that represents a migrated retryable write or transaction oplog
+ * entry.
+ */
+BSONObj makeMigratedNoop(OpTime opTime,
+ boost::optional<BSONObj> o2,
+ LogicalSessionId lsid,
+ int txnNum,
+ OpTime prevOpTime,
+ boost::optional<int> stmtId,
+ int wallClockMillis,
+ bool isRetryableWrite) {
+ repl::MutableOplogEntry op;
+ op.setOpType(repl::OpTypeEnum::kNoop);
+ op.setNss(nss);
+ op.setObject(BSONObj());
+ op.setOpTime(opTime);
+ if (isRetryableWrite) {
+ op.setFromMigrate(true);
+ }
+ op.setObject2(o2);
+ if (stmtId) {
+ op.setStatementIds({*stmtId});
+ }
+ OperationSessionInfo sessionInfo;
+ sessionInfo.setSessionId(lsid);
+ sessionInfo.setTxnNumber(txnNum);
+ op.setOperationSessionInfo(sessionInfo);
+ op.setPrevWriteOpTimeInTransaction(prevOpTime);
+ op.setWallClockTime(Date_t::fromMillisSinceEpoch(wallClockMillis));
+ return op.toBSON();
+}
+
+/**
+ * Helper to create a transaction command oplog entry.
+ */
+BSONObj makeTransactionOplogEntry(OpTime opTime,
+ LogicalSessionId lsid,
+ int txnNum,
+ OpTime prevOpTime) {
+ repl::MutableOplogEntry op;
+ op.setOpType(repl::OpTypeEnum::kCommand);
+ op.setNss(nss);
+ op.setObject(BSON("applyOps" << BSONArray()));
+ op.setOpTime(opTime);
+ OperationSessionInfo sessionInfo;
+ sessionInfo.setSessionId(lsid);
+ sessionInfo.setTxnNumber(txnNum);
+ op.setOperationSessionInfo(sessionInfo);
+ op.setPrevWriteOpTimeInTransaction(prevOpTime);
+ op.setWallClockTime(Date_t());
+ return op.toBSON();
+}
+
+/**
* Asserts that the documents in the oplog have the given timestamps.
*/
void _assertDocsInOplog(OperationContext* opCtx, std::vector<int> timestamps) {
@@ -1524,6 +1578,282 @@ TEST_F(RollbackImplTest, RollbackFixesCountForUnpreparedTransactionApplyOpsChain
ASSERT_EQ(_storageInterface->getFinalCollectionCount(collId), 1);
}
+TEST_F(RollbackImplTest, RollbackRestoresTxnTableEntryToBeConsistentWithStableTimestamp) {
+ const auto collUuid = UUID::gen();
+ const auto nss = NamespaceString::kSessionTransactionsTableNamespace;
+ _initializeCollection(_opCtx.get(), collUuid, nss);
+ LogicalSessionFromClient fromClient{};
+ fromClient.setId(UUID::gen());
+ LogicalSessionId lsid = makeLogicalSessionId(fromClient, _opCtx.get());
+ LogicalSessionFromClient fromClient2{};
+ fromClient2.setId(UUID::gen());
+ LogicalSessionId lsid2 = makeLogicalSessionId(fromClient2, _opCtx.get());
+ LogicalSessionFromClient fromClient3{};
+ fromClient3.setId(UUID::gen());
+ LogicalSessionId lsid3 = makeLogicalSessionId(fromClient3, _opCtx.get());
+
+ auto commonOpTime = OpTime(Timestamp(4, 4), 4);
+ auto commonPoint = makeOpAndRecordId(commonOpTime);
+ _remoteOplog->setOperations({commonPoint});
+ _storageInterface->setStableTimestamp(nullptr, commonOpTime.getTimestamp());
+
+ auto insertObj1 = BSON("_id" << 1);
+ auto insertObj2 = BSON("_id" << 2);
+ const auto txnNumOne = 1LL;
+ // Create retryable write oplog entry before 'stableTimestamp'.
+ auto opBeforeStableTs = makeInsertOplogEntry(1, insertObj1, nss.ns(), collUuid);
+ const auto prevOpTime = OpTime(opBeforeStableTs["ts"].timestamp(), 1);
+ BSONObjBuilder opBeforeStableTsBuilder(opBeforeStableTs);
+ opBeforeStableTsBuilder.append("lsid", lsid.toBSON());
+ opBeforeStableTsBuilder.append("txnNumber", txnNumOne);
+ opBeforeStableTsBuilder.append("prevOpTime", OpTime().toBSON());
+ opBeforeStableTsBuilder.append("stmtId", 1);
+ BSONObj oplogEntryBeforeStableTs = opBeforeStableTsBuilder.done();
+
+ const auto txnNumTwo = 2LL;
+ // Create no-op retryable write entry before 'stableTimestamp'.
+ auto noopPrevOpTime = OpTime(Timestamp(2, 2), 2);
+ auto noopEntryBeforeStableTs = makeMigratedNoop(noopPrevOpTime,
+ oplogEntryBeforeStableTs,
+ lsid2,
+ txnNumTwo,
+ OpTime(),
+ 1 /* stmtId */,
+ 2 /* wallClockMillis */,
+ true /* isRetryableWrite */);
+
+ // Create transactions entry after 'stableTimestamp'. Transactions entries are of 'command' op
+ // type.
+ auto txnOpTime = OpTime(Timestamp(3, 3), 3);
+ auto txnEntryBeforeStableTs = makeTransactionOplogEntry(txnOpTime, lsid3, 3, OpTime());
+
+ // Create retryable write oplog entry after 'stableTimestamp'.
+ auto firstOpAfterStableTs = makeInsertOplogEntry(5, insertObj2, nss.ns(), collUuid);
+ BSONObjBuilder opAfterStableTsBuilder(firstOpAfterStableTs);
+ opAfterStableTsBuilder.append("lsid", lsid.toBSON());
+ opAfterStableTsBuilder.append("txnNumber", txnNumOne);
+ // 'prevOpTime' points to 'oplogEntryBeforeStableTs'.
+ opAfterStableTsBuilder.append("prevOpTime", prevOpTime.toBSON());
+ opAfterStableTsBuilder.append("stmtId", 2);
+ BSONObj firstOplogEntryAfterStableTs = opAfterStableTsBuilder.done();
+
+ // Create no-op retryable write entry after 'stableTimestamp'.
+ auto noopEntryAfterStableTs = makeMigratedNoop(OpTime(Timestamp(6, 6), 6),
+ firstOplogEntryAfterStableTs,
+ lsid2,
+ txnNumTwo,
+ noopPrevOpTime,
+ 2 /* stmtId */,
+ 5 /* wallClockMillis */,
+ true /* isRetryableWrite */);
+
+ // Create transactions entry after 'stableTimestamp'. Transactions entries are of 'command' op
+ // type.
+ auto txnEntryAfterStableTs =
+ makeTransactionOplogEntry(OpTime(Timestamp(7, 7), 7), lsid3, 3, txnOpTime);
+
+ ASSERT_OK(_insertOplogEntry(oplogEntryBeforeStableTs));
+ ASSERT_OK(_insertOplogEntry(noopEntryBeforeStableTs));
+ ASSERT_OK(_insertOplogEntry(txnEntryBeforeStableTs));
+ ASSERT_OK(_insertOplogEntry(commonPoint.first));
+ ASSERT_OK(_insertOplogEntry(firstOplogEntryAfterStableTs));
+ ASSERT_OK(_insertOplogEntry(noopEntryAfterStableTs));
+ ASSERT_OK(_insertOplogEntry(txnEntryAfterStableTs));
+
+ auto status = _storageInterface->findSingleton(_opCtx.get(), nss);
+ // The 'config.transactions' table is currently empty.
+ ASSERT_NOT_OK(status);
+
+ // Doing a rollback should upsert two entries into the 'config.transactions' table.
+ ASSERT_OK(_rollback->runRollback(_opCtx.get()));
+ auto swDoc = _storageInterface->findById(
+ _opCtx.get(), nss, firstOplogEntryAfterStableTs.getField("lsid"));
+ ASSERT_OK(swDoc);
+ auto sessionsEntryBson = swDoc.getValue();
+ // New sessions entry should match the session information retrieved from the retryable writes
+ // oplog entry from before the 'stableTimestamp'.
+ ASSERT_EQUALS(sessionsEntryBson["txnNum"].numberInt(),
+ oplogEntryBeforeStableTs["txnNumber"].numberInt());
+ ASSERT_EQUALS(sessionsEntryBson["lastWriteOpTime"].timestamp(),
+ oplogEntryBeforeStableTs["prevOpTime"].timestamp());
+ ASSERT_EQUALS(sessionsEntryBson["lastWriteDate"].date(),
+ oplogEntryBeforeStableTs["wall"].date());
+
+ swDoc =
+ _storageInterface->findById(_opCtx.get(), nss, noopEntryBeforeStableTs.getField("lsid"));
+ ASSERT_OK(swDoc);
+ sessionsEntryBson = swDoc.getValue();
+ // New sessions entry should match the session information retrieved from the noop retryable
+ // writes oplog entry from before the 'stableTimestamp'.
+ ASSERT_EQUALS(sessionsEntryBson["txnNum"].numberInt(),
+ noopEntryBeforeStableTs["txnNumber"].numberInt());
+ ASSERT_EQUALS(sessionsEntryBson["lastWriteOpTime"].timestamp(),
+ noopEntryBeforeStableTs["prevOpTime"].timestamp());
+ ASSERT_EQUALS(sessionsEntryBson["lastWriteDate"].date(),
+ noopEntryBeforeStableTs["wall"].date());
+
+ // 'lsid3' does not get restored because it is not a retryable writes entry.
+ status = _storageInterface->findById(_opCtx.get(), nss, txnEntryAfterStableTs.getField("lsid"));
+ ASSERT_NOT_OK(status);
+}
+
+TEST_F(
+ RollbackImplTest,
+ RollbackRestoresTxnTableEntryToBeConsistentWithStableTimestampWithMissingPrevWriteOplogEntry) {
+ const auto collUuid = UUID::gen();
+ const auto nss = NamespaceString::kSessionTransactionsTableNamespace;
+ _initializeCollection(_opCtx.get(), collUuid, nss);
+ LogicalSessionFromClient fromClient{};
+ fromClient.setId(UUID::gen());
+ LogicalSessionId lsid = makeLogicalSessionId(fromClient, _opCtx.get());
+ LogicalSessionFromClient fromClient2{};
+ fromClient2.setId(UUID::gen());
+ LogicalSessionId lsid2 = makeLogicalSessionId(fromClient2, _opCtx.get());
+
+ auto commonOpTime = OpTime(Timestamp(2, 2), 1);
+ auto commonPoint = makeOpAndRecordId(OpTime(Timestamp(2, 2), 1));
+ _remoteOplog->setOperations({commonPoint});
+ _storageInterface->setStableTimestamp(nullptr, commonOpTime.getTimestamp());
+
+ // Create oplog entry after 'stableTimestamp'.
+ auto insertObj = BSON("_id" << 1);
+ auto firstOpAfterStableTs = makeInsertOplogEntry(3, insertObj, nss.ns(), collUuid);
+ BSONObjBuilder builder(firstOpAfterStableTs);
+ builder.append("lsid", lsid.toBSON());
+ builder.append("txnNumber", 2LL);
+ // 'prevOpTime' points to an opTime not found in the oplog. This can happen in practice
+ // due to the 'OplogCapMaintainerThread' truncating entries from before the 'stableTimestamp'.
+ builder.append("prevOpTime", OpTime(Timestamp(1, 0), 1).toBSON());
+ builder.append("stmtId", 2);
+ BSONObj firstOplogEntryAfterStableTs = builder.done();
+
+ // Create no-op entry after 'stableTimestamp'.
+ // 'prevOpTime' point sto an opTime not found in the oplog.
+ auto noopEntryAfterStableTs = makeMigratedNoop(OpTime(Timestamp(5, 5), 5),
+ firstOplogEntryAfterStableTs,
+ lsid2,
+ 3 /* txnNum */,
+ OpTime(Timestamp(2, 1), 1),
+ 2 /* stmtId */,
+ 5 /* wallClockMillis */,
+ true /*isRetryableWrite */);
+
+ ASSERT_OK(_insertOplogEntry(commonPoint.first));
+ ASSERT_OK(_insertOplogEntry(firstOplogEntryAfterStableTs));
+ ASSERT_OK(_insertOplogEntry(noopEntryAfterStableTs));
+
+ auto status = _storageInterface->findSingleton(_opCtx.get(), nss);
+ // The 'config.transactions' table is currently empty.
+ ASSERT_NOT_OK(status);
+
+ // Doing a rollback should upsert two entries into the 'config.transactions' table.
+ ASSERT_OK(_rollback->runRollback(_opCtx.get()));
+ auto swDoc = _storageInterface->findById(
+ _opCtx.get(), nss, firstOplogEntryAfterStableTs.getField("lsid"));
+ ASSERT_OK(swDoc);
+ auto sessionsEntryBson = swDoc.getValue();
+ // New sessions entry should match the session information retrieved from the retryable writes
+ // oplog entry from after the 'stableTimestamp'.
+ ASSERT_EQUALS(sessionsEntryBson["txnNum"].numberInt(),
+ firstOplogEntryAfterStableTs["txnNumber"].numberInt());
+ ASSERT_EQUALS(sessionsEntryBson["lastWriteOpTime"].timestamp(),
+ firstOplogEntryAfterStableTs["prevOpTime"].timestamp());
+ ASSERT_EQUALS(sessionsEntryBson["lastWriteDate"].date(),
+ firstOplogEntryAfterStableTs["wall"].date());
+
+ swDoc = _storageInterface->findById(_opCtx.get(), nss, noopEntryAfterStableTs.getField("lsid"));
+ ASSERT_OK(swDoc);
+ sessionsEntryBson = swDoc.getValue();
+ // New sessions entry should match the session information retrieved from the retryable writes
+ // no-op oplog entry from after the 'stableTimestamp'.
+ ASSERT_EQUALS(sessionsEntryBson["txnNum"].numberInt(),
+ noopEntryAfterStableTs["txnNumber"].numberInt());
+ ASSERT_EQUALS(sessionsEntryBson["lastWriteOpTime"].timestamp(),
+ noopEntryAfterStableTs["prevOpTime"].timestamp());
+ ASSERT_EQUALS(sessionsEntryBson["lastWriteDate"].date(), noopEntryAfterStableTs["wall"].date());
+}
+
+TEST_F(RollbackImplTest, RollbackDoesNotRestoreTxnsTableWhenNoRetryableWritesEntriesAfterStableTs) {
+ const auto collUuid = UUID::gen();
+ const auto nss = NamespaceString::kSessionTransactionsTableNamespace;
+ _initializeCollection(_opCtx.get(), collUuid, nss);
+ LogicalSessionFromClient fromClient{};
+ fromClient.setId(UUID::gen());
+ LogicalSessionId lsid = makeLogicalSessionId(fromClient, _opCtx.get());
+ LogicalSessionFromClient fromClient2{};
+ fromClient2.setId(UUID::gen());
+ LogicalSessionId lsid2 = makeLogicalSessionId(fromClient2, _opCtx.get());
+
+ auto commonOpTime = OpTime(Timestamp(4, 4), 4);
+ auto commonPoint = makeOpAndRecordId(commonOpTime);
+ _remoteOplog->setOperations({commonPoint});
+ _storageInterface->setStableTimestamp(nullptr, commonOpTime.getTimestamp());
+
+ auto insertObj1 = BSON("_id" << 1);
+ auto insertObj2 = BSON("_id" << 2);
+ const auto txnNumOne = 1LL;
+ // Create oplog entry before 'stableTimestamp'.
+ auto opBeforeStableTs = makeInsertOplogEntry(1, insertObj1, nss.ns(), collUuid);
+ BSONObjBuilder opBeforeStableTsBuilder(opBeforeStableTs);
+ opBeforeStableTsBuilder.append("lsid", lsid.toBSON());
+ opBeforeStableTsBuilder.append("txnNumber", txnNumOne);
+ opBeforeStableTsBuilder.append("prevOpTime", OpTime().toBSON());
+ opBeforeStableTsBuilder.append("stmtId", 1);
+ BSONObj oplogEntryBeforeStableTs = opBeforeStableTsBuilder.done();
+
+ const auto txnNumTwo = 2LL;
+ // Create no-op entry before 'stableTimestamp'.
+ auto noopEntryBeforeStableTs = makeMigratedNoop(OpTime(Timestamp(2, 2), 2),
+ oplogEntryBeforeStableTs,
+ lsid2,
+ txnNumTwo,
+ OpTime(),
+ 1 /* stmtId*/,
+ 2 /* wallClockMillis */,
+ true /* isRetryableWrite */);
+
+ // Create migrated no-op transactions entry before 'stableTimestamp'.
+ auto txnOpTime = OpTime(Timestamp(3, 3), 3);
+ auto txnEntryBeforeStableTs = makeMigratedNoop(txnOpTime,
+ BSONObj(),
+ lsid,
+ txnNumTwo,
+ OpTime(),
+ boost::none /* stmtId */,
+ 4 /* wallClockMillis */,
+ false /* isRetryableWrite */);
+
+ // Create regular non-retryable write oplog entry after 'stableTimestamp'.
+ auto insertEntry = makeInsertOplogEntry(7, BSON("_id" << 3), nss.ns(), collUuid);
+
+ // Create migrated no-op transactions entry after 'stableTimestamp'.
+ auto txnEntryAfterStableTs = makeMigratedNoop(txnOpTime,
+ BSONObj(),
+ lsid,
+ txnNumTwo,
+ txnOpTime,
+ boost::none /* stmtId */,
+ 10 /* wallClockMillis */,
+ false /* isRetryableWrite */);
+
+ ASSERT_OK(_insertOplogEntry(oplogEntryBeforeStableTs));
+ ASSERT_OK(_insertOplogEntry(noopEntryBeforeStableTs));
+ ASSERT_OK(_insertOplogEntry(txnEntryBeforeStableTs));
+ ASSERT_OK(_insertOplogEntry(commonPoint.first));
+ ASSERT_OK(_insertOplogEntry(insertEntry));
+ ASSERT_OK(_insertOplogEntry(txnEntryAfterStableTs));
+
+ auto status = _storageInterface->findSingleton(_opCtx.get(), nss);
+ // The 'config.transactions' table is currently empty.
+ ASSERT_NOT_OK(status);
+
+ // Doing a rollback should not restore any entries into the 'config.transactions' table.
+ ASSERT_OK(_rollback->runRollback(_opCtx.get()));
+ status = _storageInterface->findSingleton(_opCtx.get(), nss);
+ // No upserts were made to the 'config.transactions' table.
+ ASSERT_NOT_OK(status);
+}
+
/**
* Fixture to help test that rollback records the correct information in its RollbackObserverInfo
* struct.