From 9354aa6ccf769929ed3e7bd09f0dd95f8ab3a2a2 Mon Sep 17 00:00:00 2001 From: Cheahuychou Mao Date: Tue, 16 May 2023 18:42:34 +0000 Subject: SERVER-76807 Avoid adding opTimes for non-retryable internal transactions to the session migration new opTime buffer --- src/mongo/db/op_observer/op_observer.h | 15 ++++++++++++--- src/mongo/db/op_observer/op_observer_impl.cpp | 3 ++- src/mongo/db/op_observer/op_observer_impl.h | 2 ++ src/mongo/db/op_observer/op_observer_noop.h | 1 + src/mongo/db/op_observer/op_observer_registry.h | 3 ++- src/mongo/db/repl/transaction_oplog_application.cpp | 3 ++- src/mongo/db/s/op_observer_sharding_impl.cpp | 3 ++- src/mongo/db/s/op_observer_sharding_impl.h | 1 + src/mongo/db/s/session_catalog_migration_source.cpp | 7 +++++++ src/mongo/db/s/session_catalog_migration_source_test.cpp | 9 +++++++-- 10 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/mongo/db/op_observer/op_observer.h b/src/mongo/db/op_observer/op_observer.h index 2e55e918a33..ed9a4213af6 100644 --- a/src/mongo/db/op_observer/op_observer.h +++ b/src/mongo/db/op_observer/op_observer.h @@ -521,11 +521,20 @@ public: Date_t wallClockTime) = 0; /** - * This is called when a transaction transitions into prepare while it is not primary. Example - * case can include secondary oplog application or when node was restared and tries to - * recover prepared transactions from the oplog. + * This method is called when a transaction transitions into prepare while it is not primary, + * e.g. during secondary oplog application or recoverying prepared transactions from the + * oplog after restart. The method explicitly requires a session id (i.e. does not use the + * session id attached to the opCtx) because transaction oplog application currently applies the + * oplog entries for each prepared transaction in multiple internal sessions acquired from the + * InternalSessionPool. Currently, those internal sessions are completely unrelated to the + * session for the transaction itself. For a non-retryable internal transaction, not using the + * transaction session id in the codepath here can cause the opTime for the transaction to + * show up in the chunk migration opTime buffer although the writes they correspond to are not + * retryable and therefore are discarded anyway. + * */ virtual void onTransactionPrepareNonPrimary(OperationContext* opCtx, + const LogicalSessionId& lsid, const std::vector& statements, const repl::OpTime& prepareOpTime) = 0; diff --git a/src/mongo/db/op_observer/op_observer_impl.cpp b/src/mongo/db/op_observer/op_observer_impl.cpp index 0b38699f6b7..8139911f37a 100644 --- a/src/mongo/db/op_observer/op_observer_impl.cpp +++ b/src/mongo/db/op_observer/op_observer_impl.cpp @@ -2143,9 +2143,10 @@ void OpObserverImpl::onTransactionPrepare( } void OpObserverImpl::onTransactionPrepareNonPrimary(OperationContext* opCtx, + const LogicalSessionId& lsid, const std::vector& statements, const repl::OpTime& prepareOpTime) { - shardObserveNonPrimaryTransactionPrepare(opCtx, statements, prepareOpTime); + shardObserveNonPrimaryTransactionPrepare(opCtx, lsid, statements, prepareOpTime); } void OpObserverImpl::onTransactionAbort(OperationContext* opCtx, diff --git a/src/mongo/db/op_observer/op_observer_impl.h b/src/mongo/db/op_observer/op_observer_impl.h index c080a3d6b1e..dbebc4317e2 100644 --- a/src/mongo/db/op_observer/op_observer_impl.h +++ b/src/mongo/db/op_observer/op_observer_impl.h @@ -226,6 +226,7 @@ public: Date_t wallClockTime) final; void onTransactionPrepareNonPrimary(OperationContext* opCtx, + const LogicalSessionId& lsid, const std::vector& statements, const repl::OpTime& prepareOpTime) final; @@ -266,6 +267,7 @@ private: const repl::OpTime& prepareOrCommitOptime) {} virtual void shardObserveNonPrimaryTransactionPrepare( OperationContext* opCtx, + const LogicalSessionId& lsid, const std::vector& stmts, const repl::OpTime& prepareOrCommitOptime) {} diff --git a/src/mongo/db/op_observer/op_observer_noop.h b/src/mongo/db/op_observer/op_observer_noop.h index 98a4b081c34..faef9e944aa 100644 --- a/src/mongo/db/op_observer/op_observer_noop.h +++ b/src/mongo/db/op_observer/op_observer_noop.h @@ -244,6 +244,7 @@ public: Date_t wallClockTime) override {} void onTransactionPrepareNonPrimary(OperationContext* opCtx, + const LogicalSessionId& lsid, const std::vector& statements, const repl::OpTime& prepareOpTime) override {} diff --git a/src/mongo/db/op_observer/op_observer_registry.h b/src/mongo/db/op_observer/op_observer_registry.h index 23eb711883e..8a2dcfb4a2f 100644 --- a/src/mongo/db/op_observer/op_observer_registry.h +++ b/src/mongo/db/op_observer/op_observer_registry.h @@ -443,11 +443,12 @@ public: } void onTransactionPrepareNonPrimary(OperationContext* opCtx, + const LogicalSessionId& lsid, const std::vector& statements, const repl::OpTime& prepareOpTime) override { ReservedTimes times{opCtx}; for (auto& observer : _observers) { - observer->onTransactionPrepareNonPrimary(opCtx, statements, prepareOpTime); + observer->onTransactionPrepareNonPrimary(opCtx, lsid, statements, prepareOpTime); } } diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp index 77630c0026f..40e38b05152 100644 --- a/src/mongo/db/repl/transaction_oplog_application.cpp +++ b/src/mongo/db/repl/transaction_oplog_application.cpp @@ -646,7 +646,8 @@ Status _applyPrepareTransaction(OperationContext* opCtx, auto opObserver = opCtx->getServiceContext()->getOpObserver(); invariant(opObserver); - opObserver->onTransactionPrepareNonPrimary(opCtx, txnOps, prepareOp.getOpTime()); + opObserver->onTransactionPrepareNonPrimary( + opCtx, *prepareOp.getSessionId(), txnOps, prepareOp.getOpTime()); // Prepare transaction success. abortOnError.dismiss(); diff --git a/src/mongo/db/s/op_observer_sharding_impl.cpp b/src/mongo/db/s/op_observer_sharding_impl.cpp index 4c72cd0e30e..493a131a9d2 100644 --- a/src/mongo/db/s/op_observer_sharding_impl.cpp +++ b/src/mongo/db/s/op_observer_sharding_impl.cpp @@ -226,12 +226,13 @@ void OpObserverShardingImpl::shardObserveTransactionPrepareOrUnpreparedCommit( void OpObserverShardingImpl::shardObserveNonPrimaryTransactionPrepare( OperationContext* opCtx, + const LogicalSessionId& lsid, const std::vector& stmts, const repl::OpTime& prepareOrCommitOptime) { opCtx->recoveryUnit()->registerChange( std::make_unique( - *opCtx->getLogicalSessionId(), stmts, prepareOrCommitOptime)); + lsid, stmts, prepareOrCommitOptime)); } } // namespace mongo diff --git a/src/mongo/db/s/op_observer_sharding_impl.h b/src/mongo/db/s/op_observer_sharding_impl.h index 9d0e1de61d3..1a48946a2f5 100644 --- a/src/mongo/db/s/op_observer_sharding_impl.h +++ b/src/mongo/db/s/op_observer_sharding_impl.h @@ -68,6 +68,7 @@ protected: const repl::OpTime& prepareOrCommitOptime) override; void shardObserveNonPrimaryTransactionPrepare( OperationContext* opCtx, + const LogicalSessionId& lsid, const std::vector& stmts, const repl::OpTime& prepareOrCommitOptime) override; }; diff --git a/src/mongo/db/s/session_catalog_migration_source.cpp b/src/mongo/db/s/session_catalog_migration_source.cpp index f10a0bdfcbc..37a87b5e131 100644 --- a/src/mongo/db/s/session_catalog_migration_source.cpp +++ b/src/mongo/db/s/session_catalog_migration_source.cpp @@ -685,6 +685,13 @@ bool SessionCatalogMigrationSource::_fetchNextNewWriteOplog(OperationContext* op const auto sessionId = *nextNewWriteOplog.getSessionId(); if (isInternalSessionForNonRetryableWrite(sessionId)) { + dassert(0, + str::stream() << "Cannot add op time for a non-retryable " + "internal transaction to the " + "session migration op time queue - " + << "session id:" << sessionId << " oplog entry: " + << redact(nextNewWriteOplog.toBSONForLogging())); + // Transactions inside internal sessions for non-retryable writes are not // retryable so there is no need to transfer their write history to the // recipient. diff --git a/src/mongo/db/s/session_catalog_migration_source_test.cpp b/src/mongo/db/s/session_catalog_migration_source_test.cpp index 3cca3b085b8..fc489def1bb 100644 --- a/src/mongo/db/s/session_catalog_migration_source_test.cpp +++ b/src/mongo/db/s/session_catalog_migration_source_test.cpp @@ -996,8 +996,9 @@ TEST_F(SessionCatalogMigrationSourceTest, ASSERT_EQ(migrationSource.getSessionOplogEntriesSkippedSoFarLowerBound(), 0); } -TEST_F(SessionCatalogMigrationSourceTest, - DiscardOplogEntriesForNewCommittedInternalTransactionForNonRetryableWrite) { +DEATH_TEST_F(SessionCatalogMigrationSourceTest, + DiscardOplogEntriesForNewCommittedInternalTransactionForNonRetryableWrite, + "invariant") { SessionCatalogMigrationSource migrationSource(opCtx(), kNs, kChunkRange, kShardKey); migrationSource.init(opCtx(), kMigrationLsid); ASSERT_FALSE(migrationSource.fetchNextOplog(opCtx())); @@ -1021,6 +1022,10 @@ TEST_F(SessionCatalogMigrationSourceTest, ASSERT_TRUE(migrationSource.hasMoreOplog()); ASSERT_FALSE(migrationSource.fetchNextOplog(opCtx())); ASSERT_EQ(migrationSource.getSessionOplogEntriesToBeMigratedSoFar(), 0); + + // notifyNewWriteOpTime() uses dassert, so it will only invariant in debug mode. Deliberately + // crash here in non-debug mode to make the test work in both modes. + invariant(kDebugBuild); } TEST_F(SessionCatalogMigrationSourceTest, -- cgit v1.2.1