diff options
author | Daniel Gómez Ferro <daniel.gomezferro@mongodb.com> | 2022-04-07 07:16:46 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-04-14 15:10:56 +0000 |
commit | 8051a9e387702e20256b2f53e3c27af6ef67d820 (patch) | |
tree | 2a4303e304a0c89723ba7de8185700a7585c0c72 | |
parent | 8c96607bfbb82f2cf161177b4546f0ebeb07042a (diff) | |
download | mongo-8051a9e387702e20256b2f53e3c27af6ef67d820.tar.gz |
SERVER-64983 Abort transaction after releasing Client lock
-rw-r--r-- | src/mongo/db/transaction_participant.cpp | 45 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant.h | 6 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant_test.cpp | 28 |
3 files changed, 60 insertions, 19 deletions
diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index ae2a12fe2e7..e7a49152983 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -2113,11 +2113,12 @@ void TransactionParticipant::Participant::_abortTransactionOnSession(OperationCo const auto nextState = o().txnState.isPrepared() ? TransactionState::kAbortedWithPrepare : TransactionState::kAbortedWithoutPrepare; - stdx::lock_guard<Client> lk(*opCtx->getClient()); + stdx::unique_lock<Client> lk(*opCtx->getClient()); if (o().txnResourceStash && opCtx->recoveryUnit()->getNoEvictionAfterRollback()) { o(lk).txnResourceStash->setNoEvictionAfterRollback(); } - _resetTransactionState(lk, nextState); + _resetTransactionStateAndUnlock(&lk, nextState); + _resetRetryableWriteState(); } @@ -2633,18 +2634,18 @@ void TransactionParticipant::Participant::_setNewTxnNumberAndRetryCounter( _abortTransactionOnSession(opCtx); } - stdx::lock_guard<Client> lk(*opCtx->getClient()); + stdx::unique_lock<Client> lk(*opCtx->getClient()); o(lk).activeTxnNumberAndRetryCounter = txnNumberAndRetryCounter; o(lk).lastWriteOpTime = repl::OpTime(); // Reset the retryable writes state _resetRetryableWriteState(); - // Reset the transactional state - _resetTransactionState(lk, TransactionState::kNone); - // Reset the transactions metrics o(lk).transactionMetricsObserver.resetSingleTransactionStats(txnNumberAndRetryCounter); + + // Reset the transactional state + _resetTransactionStateAndUnlock(&lk, TransactionState::kNone); } void RetryableWriteTransactionParticipantCatalog::addParticipant( @@ -2907,32 +2908,42 @@ void TransactionParticipant::Participant::_resetRetryableWriteState() { p().hasIncompleteHistory = false; } -void TransactionParticipant::Participant::_resetTransactionState( - WithLock wl, TransactionState::StateFlag state) { +void TransactionParticipant::Participant::_resetTransactionStateAndUnlock( + stdx::unique_lock<Client>* lk, TransactionState::StateFlag state) { + invariant(lk && lk->owns_lock()); + // If we are transitioning to kNone, we are either starting a new transaction or aborting a // prepared transaction for rollback. In the latter case, we will need to relax the // invariant that prevents transitioning from kPrepared to kNone. if (o().txnState.isPrepared() && state == TransactionState::kNone) { - o(wl).txnState.transitionTo( + o(*lk).txnState.transitionTo( state, TransactionState::TransitionValidation::kRelaxTransitionValidation); } else { - o(wl).txnState.transitionTo(state); + o(*lk).txnState.transitionTo(state); } p().transactionOperationBytes = 0; p().transactionOperations.clear(); p().transactionStmtIds.clear(); - o(wl).prepareOpTime = repl::OpTime(); - o(wl).recoveryPrepareOpTime = repl::OpTime(); + o(*lk).prepareOpTime = repl::OpTime(); + o(*lk).recoveryPrepareOpTime = repl::OpTime(); p().autoCommit = boost::none; p().needToWriteAbortEntry = false; - // Release any locks held by this participant and abort the storage transaction. - o(wl).txnResourceStash = boost::none; + // Swap out txnResourceStash while holding the Client lock, then release any locks held by this + // participant and abort the storage transaction after releasing the lock. The transaction + // rollback can block indefinitely if the storage engine recruits it for eviction. In that case + // we should not be holding the Client lock, as that would block tasks like the periodic + // transaction killer from making progress. + using std::swap; + boost::optional<TxnResources> temporary; + swap(o(*lk).txnResourceStash, temporary); + lk->unlock(); + temporary = boost::none; } void TransactionParticipant::Participant::invalidate(OperationContext* opCtx) { - stdx::lock_guard<Client> lg(*opCtx->getClient()); + stdx::unique_lock<Client> lk(*opCtx->getClient()); uassert(ErrorCodes::PreparedTransactionInProgress, "Cannot invalidate prepared transaction", @@ -2940,7 +2951,7 @@ void TransactionParticipant::Participant::invalidate(OperationContext* opCtx) { // Invalidate the session and clear both the retryable writes and transactional states on // this participant. - _invalidate(lg); + _invalidate(lk); _resetRetryableWriteState(); // Get the RetryableWriteTransactionParticipantCatalog without checking the opCtx has checked @@ -2954,7 +2965,7 @@ void TransactionParticipant::Participant::invalidate(OperationContext* opCtx) { retryableWriteTxnParticipantCatalog.invalidate(); } - _resetTransactionState(lg, TransactionState::kNone); + _resetTransactionStateAndUnlock(&lk, TransactionState::kNone); } boost::optional<repl::OplogEntry> TransactionParticipant::Participant::checkStatementExecuted( diff --git a/src/mongo/db/transaction_participant.h b/src/mongo/db/transaction_participant.h index fdaacdddcff..d4773c87c82 100644 --- a/src/mongo/db/transaction_participant.h +++ b/src/mongo/db/transaction_participant.h @@ -944,8 +944,10 @@ public: void _resetRetryableWriteState(); // Helper that resets the transactional state. This is used when aborting a transaction, - // invalidating a transaction, or starting a new transaction. - void _resetTransactionState(WithLock wl, TransactionState::StateFlag state); + // invalidating a transaction, or starting a new transaction. It releases the Client lock + // before releasing this participant's locks and aborting its storage transaction. + void _resetTransactionStateAndUnlock(stdx::unique_lock<Client>* lk, + TransactionState::StateFlag state); /* Releases the resources held in *o().txnResources to the operation context. * o().txnResources must be engaged prior to calling this. diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp index 6d2df9f6ec5..9cf71f2f847 100644 --- a/src/mongo/db/transaction_participant_test.cpp +++ b/src/mongo/db/transaction_participant_test.cpp @@ -875,6 +875,34 @@ TEST_F(TxnParticipantTest, KillOpBeforeAbortingPreparedTransaction) { runFunctionFromDifferentOpCtx(commitPreparedFunc); ASSERT_TRUE(txnParticipant.transactionIsCommitted()); } +TEST_F(TxnParticipantTest, StashedRollbackDoesntHoldClientLock) { + auto sessionCheckout = checkOutSession(); + auto txnParticipant = TransactionParticipant::get(opCtx()); + txnParticipant.unstashTransactionResources(opCtx(), "prepareTransaction"); + + txnParticipant.prepareTransaction(opCtx(), {}); + + unittest::Barrier startedRollback(2); + unittest::Barrier finishRollback(2); + + // Rollback changes are executed in reverse order. + opCtx()->recoveryUnit()->onRollback([&] { finishRollback.countDownAndWait(); }); + opCtx()->recoveryUnit()->onRollback([&] { startedRollback.countDownAndWait(); }); + + auto future = stdx::async(stdx::launch::async, [&] { + startedRollback.countDownAndWait(); + + // Verify we can take the Client lock during the rollback of the stashed transaction. + stdx::lock_guard<Client> lk(*opCtx()->getClient()); + + finishRollback.countDownAndWait(); + }); + + txnParticipant.stashTransactionResources(opCtx()); + txnParticipant.abortTransaction(opCtx()); + + future.get(); +} TEST_F(TxnParticipantTest, ThrowDuringOnTransactionPrepareAbortsTransaction) { auto sessionCheckout = checkOutSession(); |