diff options
author | Suganthi Mani <suganthi.mani@mongodb.com> | 2019-07-24 14:11:58 -0400 |
---|---|---|
committer | Suganthi Mani <suganthi.mani@mongodb.com> | 2019-07-25 00:09:17 -0400 |
commit | 7be60542714d7cd17a6ecf5e43a47bb7aef9a0d5 (patch) | |
tree | 30149a739a068ab07d90022b0f6712bd91a0970d | |
parent | ada10b0aca7e7dc977ae706e5ea054d39341c9a8 (diff) | |
download | mongo-7be60542714d7cd17a6ecf5e43a47bb7aef9a0d5.tar.gz |
SERVER-41881 Unstashing the transaction lock resources should ignore the saved value of maxLockTimeout and explicitly set the maxLockTimeout based on node's state.
SERVER-41883 Replication state transition reacquires locks and tickets of a prepared transaction with no lock timeout.
SERVER-41556 Handles failure to reacquire locks and ticket when unstashing transactions.
(cherry picked from commit 2ff54098b19ebc2b4bbf5516de6e6befb46f9fe7)
-rw-r--r-- | jstests/replsets/transactions_reaped_with_tickets_exhausted.js | 5 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant.cpp | 78 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant.h | 26 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant_test.cpp | 45 |
4 files changed, 138 insertions, 16 deletions
diff --git a/jstests/replsets/transactions_reaped_with_tickets_exhausted.js b/jstests/replsets/transactions_reaped_with_tickets_exhausted.js index 7c5963c7bb6..905890af13d 100644 --- a/jstests/replsets/transactions_reaped_with_tickets_exhausted.js +++ b/jstests/replsets/transactions_reaped_with_tickets_exhausted.js @@ -86,8 +86,9 @@ } // Transaction should already be aborted. - assert.commandFailedWithCode(session.abortTransaction_forTesting(), - ErrorCodes.NoSuchTransaction); + let res = assert.commandFailedWithCode(session.abortTransaction_forTesting(), + ErrorCodes.NoSuchTransaction); + assert(res.errmsg.match(/Transaction .* has been aborted/), res.errmsg); session.endSession(); rst.stopSet(); diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index a79577f40f0..9cd25e613e7 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -83,6 +83,8 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeReleasingTransactionOplogHole); MONGO_FAIL_POINT_DEFINE(skipCommitTxnCheckPrepareMajorityCommitted); +MONGO_FAIL_POINT_DEFINE(restoreLocksFail); + const auto getTransactionParticipant = Session::declareDecoration<TransactionParticipant>(); // The command names that are allowed in a prepared transaction. @@ -701,7 +703,7 @@ TransactionParticipant::TxnResources::TxnResources(WithLock wl, } // On secondaries, max lock timeout must not be set. - invariant(stashStyle != StashStyle::kSecondary || !opCtx->lockState()->hasMaxLockTimeout()); + invariant(!(stashStyle == StashStyle::kSecondary && opCtx->lockState()->hasMaxLockTimeout())); _recoveryUnit = opCtx->releaseRecoveryUnit(); opCtx->setRecoveryUnit(std::unique_ptr<RecoveryUnit>( @@ -727,19 +729,42 @@ TransactionParticipant::TxnResources::~TxnResources() { void TransactionParticipant::TxnResources::release(OperationContext* opCtx) { // Perform operations that can fail the release before marking the TxnResources as released. + auto onError = makeGuard([&] { + // Release any locks acquired as part of lock restoration. + if (_lockSnapshot) { + // WUOW should be released before unlocking. + Locker::WUOWLockSnapshot dummyWUOWLockInfo; + _locker->releaseWriteUnitOfWork(&dummyWUOWLockInfo); + + Locker::LockSnapshot dummyLockInfo; + _locker->saveLockStateAndUnlock(&dummyLockInfo); + } + // Release the ticket if acquired. + // restoreWriteUnitOfWorkAndLock() can reacquire the ticket as well. + if (_locker->getClientState() != Locker::ClientState::kInactive) { + _locker->releaseTicket(); + } + }); // Restore locks if they are yielded. if (_lockSnapshot) { invariant(!_locker->isLocked()); // opCtx is passed in to enable the restoration to be interrupted. _locker->restoreWriteUnitOfWorkAndLock(opCtx, *_lockSnapshot); - _lockSnapshot.reset(nullptr); } _locker->reacquireTicket(opCtx); + if (MONGO_FAIL_POINT(restoreLocksFail)) { + uasserted(ErrorCodes::LockTimeout, str::stream() << "Lock restore failed due to failpoint"); + } + invariant(!_released); _released = true; + // Successfully reacquired the locks and tickets. + onError.dismiss(); + _lockSnapshot.reset(nullptr); + // It is necessary to lock the client to change the Locker on the OperationContext. stdx::lock_guard<Client> lk(*opCtx->getClient()); invariant(opCtx->lockState()->getClientState() == Locker::ClientState::kInactive); @@ -844,7 +869,7 @@ void TransactionParticipant::Participant::resetRetryableWriteState(OperationCont } void TransactionParticipant::Participant::_releaseTransactionResourcesToOpCtx( - OperationContext* opCtx) { + OperationContext* opCtx, MaxLockTimeout maxLockTimeout) { // Transaction resources already exist for this transaction. Transfer them from the // stash to the operation context. // @@ -852,14 +877,41 @@ void TransactionParticipant::Participant::_releaseTransactionResourcesToOpCtx( // must hold the Client clock to mutate txnResourceStash, we jump through some hoops here to // move the TxnResources in txnResourceStash into a local variable that can be manipulated // without holding the Client lock. - [&]() noexcept { + auto tempTxnResourceStash = [&]() noexcept { using std::swap; boost::optional<TxnResources> trs; stdx::lock_guard<Client> lk(*opCtx->getClient()); swap(trs, o(lk).txnResourceStash); - return std::move(*trs); + return trs; } - ().release(opCtx); + (); + + auto releaseOnError = makeGuard([&] { + // Restore the lock resources back to transaction participant. + using std::swap; + stdx::lock_guard<Client> lk(*opCtx->getClient()); + swap(o(lk).txnResourceStash, tempTxnResourceStash); + }); + + invariant(tempTxnResourceStash); + auto stashLocker = tempTxnResourceStash->locker(); + invariant(stashLocker); + + if (maxLockTimeout == MaxLockTimeout::kNotAllowed) { + stashLocker->unsetMaxLockTimeout(); + } else { + // If maxTransactionLockRequestTimeoutMillis is set, then we will ensure no + // future lock request waits longer than maxTransactionLockRequestTimeoutMillis + // to acquire a lock. This is to avoid deadlocks and minimize non-transaction + // operation performance degradations. + auto maxTransactionLockMillis = gMaxTransactionLockRequestTimeoutMillis.load(); + if (maxTransactionLockMillis >= 0) { + stashLocker->setMaxLockTimeout(Milliseconds(maxTransactionLockMillis)); + } + } + + tempTxnResourceStash->release(opCtx); + releaseOnError.dismiss(); } void TransactionParticipant::Participant::unstashTransactionResources(OperationContext* opCtx, @@ -875,7 +927,14 @@ void TransactionParticipant::Participant::unstashTransactionResources(OperationC _checkIsCommandValidWithTxnState(*opCtx->getTxnNumber(), cmdName); if (o().txnResourceStash) { - _releaseTransactionResourcesToOpCtx(opCtx); + MaxLockTimeout maxLockTimeout; + // Max lock timeout must not be set on secondaries, since secondary oplog application cannot + // fail. And, primaries should respect the transaction lock timeout, since it can prevent + // the transaction from making progress. + maxLockTimeout = + opCtx->writesAreReplicated() ? MaxLockTimeout::kAllowed : MaxLockTimeout::kNotAllowed; + + _releaseTransactionResourcesToOpCtx(opCtx, maxLockTimeout); stdx::lock_guard<Client> lg(*opCtx->getClient()); o(lg).transactionMetricsObserver.onUnstash(ServerTransactionsMetrics::get(opCtx), opCtx->getServiceContext()->getTickSource()); @@ -948,12 +1007,13 @@ void TransactionParticipant::Participant::refreshLocksForPreparedTransaction( invariant(!opCtx->lockState()->isRSTLLocked()); invariant(!opCtx->lockState()->isLocked()); - // The node must have txn resource. invariant(o().txnResourceStash); invariant(o().txnState.isPrepared()); - _releaseTransactionResourcesToOpCtx(opCtx); + // Lock and Ticket reacquisition of a prepared transaction should not fail for + // state transitions (step up/step down). + _releaseTransactionResourcesToOpCtx(opCtx, MaxLockTimeout::kNotAllowed); // Snapshot transactions don't conflict with PBWM lock on both primary and secondary. invariant(!opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()); diff --git a/src/mongo/db/transaction_participant.h b/src/mongo/db/transaction_participant.h index 2ad8de03f48..f32b75608e1 100644 --- a/src/mongo/db/transaction_participant.h +++ b/src/mongo/db/transaction_participant.h @@ -202,9 +202,9 @@ public: TxnResources& operator=(TxnResources&&) = default; /** - * Returns a const pointer to the stashed lock state, or nullptr if no stashed locks exist. + * Returns a pointer to the stashed lock state, or nullptr if no stashed locks exist. */ - const Locker* locker() const { + Locker* locker() const { return _locker.get(); } @@ -355,6 +355,9 @@ public: */ class Participant : public Observer { public: + // Indicates whether the future lock requests should have timeouts. + enum class MaxLockTimeout { kNotAllowed, kAllowed }; + explicit Participant(OperationContext* opCtx); explicit Participant(const SessionToKill& session); @@ -768,9 +771,22 @@ public: // invalidating a transaction, or starting a new transaction. void _resetTransactionState(WithLock wl, TransactionState::StateFlag state); - // Releases the resources held in *o().txnResources to the operation context. - // o().txnResources must be engaged prior to calling this. - void _releaseTransactionResourcesToOpCtx(OperationContext* opCtx); + /* Releases the resources held in *o().txnResources to the operation context. + * o().txnResources must be engaged prior to calling this. + * + * maxLockTimeout will determine whether future lock requests should have lock timeouts. + * - MaxLockTimeout::kNotAllowed will clear the lock timeout. + * - MaxLockTimeout::kAllowed will set the timeout as + * MaxTransactionLockRequestTimeoutMillis. + * + * ------------------------------------------------------------------ + * | | PRIMARY | SECONDARY | STATE TRANSITION | + * |----------------|------------|---------------|------------------| + * |maxLockTimeout | kAllowed | kNotAllowed | kNotAllowed | + * ------------------------------------------------------------------ + */ + void _releaseTransactionResourcesToOpCtx(OperationContext* opCtx, + MaxLockTimeout maxLockTimeout); TransactionParticipant::PrivateState& p() { return _tp->_p; diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp index 03951d7c2ae..f4764ad0e8c 100644 --- a/src/mongo/db/transaction_participant_test.cpp +++ b/src/mongo/db/transaction_participant_test.cpp @@ -770,6 +770,51 @@ TEST_F(TxnParticipantTest, ThrowDuringOnTransactionPrepareAbortsTransaction) { ASSERT(txnParticipant.transactionIsAborted()); } +TEST_F(TxnParticipantTest, UnstashFailsShouldLeaveTxnResourceStashUnchanged) { + auto sessionCheckout = checkOutSession(); + auto txnParticipant = TransactionParticipant::get(opCtx()); + + txnParticipant.unstashTransactionResources(opCtx(), "prepareTransaction"); + ASSERT_TRUE(opCtx()->lockState()->isLocked()); + + // Simulate the locking of an insert. + { + Lock::DBLock dbLock(opCtx(), "test", MODE_IX); + Lock::CollectionLock collLock(opCtx(), NamespaceString("test.foo"), MODE_IX); + } + + auto prepareTimestamp = txnParticipant.prepareTransaction(opCtx(), {}); + + // Simulate a secondary style lock stashing such that the locks are yielded. + { + repl::UnreplicatedWritesBlock uwb(opCtx()); + opCtx()->lockState()->unsetMaxLockTimeout(); + txnParticipant.stashTransactionResources(opCtx()); + } + ASSERT_FALSE(txnParticipant.getTxnResourceStashLockerForTest()->isLocked()); + + // Enable fail point. + getGlobalFailPointRegistry()->getFailPoint("restoreLocksFail")->setMode(FailPoint::alwaysOn); + + ASSERT_THROWS_CODE(txnParticipant.unstashTransactionResources(opCtx(), "commitTransaction"), + AssertionException, + ErrorCodes::LockTimeout); + + // Above unstash attempt fail should leave the txnResourceStash unchanged. + ASSERT_FALSE(txnParticipant.getTxnResourceStashLockerForTest()->isLocked()); + + // Disable fail point. + getGlobalFailPointRegistry()->getFailPoint("restoreLocksFail")->setMode(FailPoint::off); + + // Should be successfully able to perform lock restore. + txnParticipant.unstashTransactionResources(opCtx(), "commitTransaction"); + ASSERT_TRUE(opCtx()->lockState()->isLocked()); + + // Commit the transaction to release the locks. + txnParticipant.commitPreparedTransaction(opCtx(), prepareTimestamp, boost::none); + ASSERT_TRUE(txnParticipant.transactionIsCommitted()); +} + TEST_F(TxnParticipantTest, StepDownAfterPrepareDoesNotBlock) { auto sessionCheckout = checkOutSession(); auto txnParticipant = TransactionParticipant::get(opCtx()); |