summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorSuganthi Mani <suganthi.mani@mongodb.com>2019-07-24 14:11:58 -0400
committerSuganthi Mani <suganthi.mani@mongodb.com>2019-07-24 23:57:48 -0400
commit2ff54098b19ebc2b4bbf5516de6e6befb46f9fe7 (patch)
tree200a13c7e8f0cb5ea5bd6f35093689c150220bc4 /src/mongo/db
parentf5f62f877b62523205d02f810f351b06cf1e58c4 (diff)
downloadmongo-2ff54098b19ebc2b4bbf5516de6e6befb46f9fe7.tar.gz
SERVER-41881 Unstashing the transaction lock resources should ignore the saved value of maxLockTimeout and explicitly set the maxLockTimeout based on node's state.
SERVER-41883 Replication state transition reacquires locks and tickets of a prepared transaction with no lock timeout. SERVER-41556 Handles failure to reacquire locks and ticket when unstashing transactions.
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/transaction_participant.cpp78
-rw-r--r--src/mongo/db/transaction_participant.h26
-rw-r--r--src/mongo/db/transaction_participant_test.cpp45
3 files changed, 135 insertions, 14 deletions
diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp
index 72a33c2657a..6af9c7f4bca 100644
--- a/src/mongo/db/transaction_participant.cpp
+++ b/src/mongo/db/transaction_participant.cpp
@@ -83,6 +83,8 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeReleasingTransactionOplogHole);
MONGO_FAIL_POINT_DEFINE(skipCommitTxnCheckPrepareMajorityCommitted);
+MONGO_FAIL_POINT_DEFINE(restoreLocksFail);
+
const auto getTransactionParticipant = Session::declareDecoration<TransactionParticipant>();
// The command names that are allowed in a prepared transaction.
@@ -701,7 +703,7 @@ TransactionParticipant::TxnResources::TxnResources(WithLock wl,
}
// On secondaries, max lock timeout must not be set.
- invariant(stashStyle != StashStyle::kSecondary || !opCtx->lockState()->hasMaxLockTimeout());
+ invariant(!(stashStyle == StashStyle::kSecondary && opCtx->lockState()->hasMaxLockTimeout()));
_recoveryUnit = opCtx->releaseRecoveryUnit();
opCtx->setRecoveryUnit(std::unique_ptr<RecoveryUnit>(
@@ -727,19 +729,42 @@ TransactionParticipant::TxnResources::~TxnResources() {
void TransactionParticipant::TxnResources::release(OperationContext* opCtx) {
// Perform operations that can fail the release before marking the TxnResources as released.
+ auto onError = makeGuard([&] {
+ // Release any locks acquired as part of lock restoration.
+ if (_lockSnapshot) {
+ // WUOW should be released before unlocking.
+ Locker::WUOWLockSnapshot dummyWUOWLockInfo;
+ _locker->releaseWriteUnitOfWork(&dummyWUOWLockInfo);
+
+ Locker::LockSnapshot dummyLockInfo;
+ _locker->saveLockStateAndUnlock(&dummyLockInfo);
+ }
+ // Release the ticket if acquired.
+ // restoreWriteUnitOfWorkAndLock() can reacquire the ticket as well.
+ if (_locker->getClientState() != Locker::ClientState::kInactive) {
+ _locker->releaseTicket();
+ }
+ });
// Restore locks if they are yielded.
if (_lockSnapshot) {
invariant(!_locker->isLocked());
// opCtx is passed in to enable the restoration to be interrupted.
_locker->restoreWriteUnitOfWorkAndLock(opCtx, *_lockSnapshot);
- _lockSnapshot.reset(nullptr);
}
_locker->reacquireTicket(opCtx);
+ if (MONGO_FAIL_POINT(restoreLocksFail)) {
+ uasserted(ErrorCodes::LockTimeout, str::stream() << "Lock restore failed due to failpoint");
+ }
+
invariant(!_released);
_released = true;
+ // Successfully reacquired the locks and tickets.
+ onError.dismiss();
+ _lockSnapshot.reset(nullptr);
+
// It is necessary to lock the client to change the Locker on the OperationContext.
stdx::lock_guard<Client> lk(*opCtx->getClient());
invariant(opCtx->lockState()->getClientState() == Locker::ClientState::kInactive);
@@ -844,7 +869,7 @@ void TransactionParticipant::Participant::resetRetryableWriteState(OperationCont
}
void TransactionParticipant::Participant::_releaseTransactionResourcesToOpCtx(
- OperationContext* opCtx) {
+ OperationContext* opCtx, MaxLockTimeout maxLockTimeout) {
// Transaction resources already exist for this transaction. Transfer them from the
// stash to the operation context.
//
@@ -852,14 +877,41 @@ void TransactionParticipant::Participant::_releaseTransactionResourcesToOpCtx(
// must hold the Client clock to mutate txnResourceStash, we jump through some hoops here to
// move the TxnResources in txnResourceStash into a local variable that can be manipulated
// without holding the Client lock.
- [&]() noexcept {
+ auto tempTxnResourceStash = [&]() noexcept {
using std::swap;
boost::optional<TxnResources> trs;
stdx::lock_guard<Client> lk(*opCtx->getClient());
swap(trs, o(lk).txnResourceStash);
- return std::move(*trs);
+ return trs;
}
- ().release(opCtx);
+ ();
+
+ auto releaseOnError = makeGuard([&] {
+ // Restore the lock resources back to transaction participant.
+ using std::swap;
+ stdx::lock_guard<Client> lk(*opCtx->getClient());
+ swap(o(lk).txnResourceStash, tempTxnResourceStash);
+ });
+
+ invariant(tempTxnResourceStash);
+ auto stashLocker = tempTxnResourceStash->locker();
+ invariant(stashLocker);
+
+ if (maxLockTimeout == MaxLockTimeout::kNotAllowed) {
+ stashLocker->unsetMaxLockTimeout();
+ } else {
+ // If maxTransactionLockRequestTimeoutMillis is set, then we will ensure no
+ // future lock request waits longer than maxTransactionLockRequestTimeoutMillis
+ // to acquire a lock. This is to avoid deadlocks and minimize non-transaction
+ // operation performance degradations.
+ auto maxTransactionLockMillis = gMaxTransactionLockRequestTimeoutMillis.load();
+ if (maxTransactionLockMillis >= 0) {
+ stashLocker->setMaxLockTimeout(Milliseconds(maxTransactionLockMillis));
+ }
+ }
+
+ tempTxnResourceStash->release(opCtx);
+ releaseOnError.dismiss();
}
void TransactionParticipant::Participant::unstashTransactionResources(OperationContext* opCtx,
@@ -875,7 +927,14 @@ void TransactionParticipant::Participant::unstashTransactionResources(OperationC
_checkIsCommandValidWithTxnState(*opCtx->getTxnNumber(), cmdName);
if (o().txnResourceStash) {
- _releaseTransactionResourcesToOpCtx(opCtx);
+ MaxLockTimeout maxLockTimeout;
+ // Max lock timeout must not be set on secondaries, since secondary oplog application cannot
+ // fail. And, primaries should respect the transaction lock timeout, since it can prevent
+ // the transaction from making progress.
+ maxLockTimeout =
+ opCtx->writesAreReplicated() ? MaxLockTimeout::kAllowed : MaxLockTimeout::kNotAllowed;
+
+ _releaseTransactionResourcesToOpCtx(opCtx, maxLockTimeout);
stdx::lock_guard<Client> lg(*opCtx->getClient());
o(lg).transactionMetricsObserver.onUnstash(ServerTransactionsMetrics::get(opCtx),
opCtx->getServiceContext()->getTickSource());
@@ -948,12 +1007,13 @@ void TransactionParticipant::Participant::refreshLocksForPreparedTransaction(
invariant(!opCtx->lockState()->isRSTLLocked());
invariant(!opCtx->lockState()->isLocked());
-
// The node must have txn resource.
invariant(o().txnResourceStash);
invariant(o().txnState.isPrepared());
- _releaseTransactionResourcesToOpCtx(opCtx);
+ // Lock and Ticket reacquisition of a prepared transaction should not fail for
+ // state transitions (step up/step down).
+ _releaseTransactionResourcesToOpCtx(opCtx, MaxLockTimeout::kNotAllowed);
// Snapshot transactions don't conflict with PBWM lock on both primary and secondary.
invariant(!opCtx->lockState()->shouldConflictWithSecondaryBatchApplication());
diff --git a/src/mongo/db/transaction_participant.h b/src/mongo/db/transaction_participant.h
index d6947e36b6b..e907a61d077 100644
--- a/src/mongo/db/transaction_participant.h
+++ b/src/mongo/db/transaction_participant.h
@@ -202,9 +202,9 @@ public:
TxnResources& operator=(TxnResources&&) = default;
/**
- * Returns a const pointer to the stashed lock state, or nullptr if no stashed locks exist.
+ * Returns a pointer to the stashed lock state, or nullptr if no stashed locks exist.
*/
- const Locker* locker() const {
+ Locker* locker() const {
return _locker.get();
}
@@ -357,6 +357,9 @@ public:
*/
class Participant : public Observer {
public:
+ // Indicates whether the future lock requests should have timeouts.
+ enum class MaxLockTimeout { kNotAllowed, kAllowed };
+
explicit Participant(OperationContext* opCtx);
explicit Participant(const SessionToKill& session);
@@ -770,9 +773,22 @@ public:
// invalidating a transaction, or starting a new transaction.
void _resetTransactionState(WithLock wl, TransactionState::StateFlag state);
- // Releases the resources held in *o().txnResources to the operation context.
- // o().txnResources must be engaged prior to calling this.
- void _releaseTransactionResourcesToOpCtx(OperationContext* opCtx);
+ /* Releases the resources held in *o().txnResources to the operation context.
+ * o().txnResources must be engaged prior to calling this.
+ *
+ * maxLockTimeout will determine whether future lock requests should have lock timeouts.
+ * - MaxLockTimeout::kNotAllowed will clear the lock timeout.
+ * - MaxLockTimeout::kAllowed will set the timeout as
+ * MaxTransactionLockRequestTimeoutMillis.
+ *
+ * ------------------------------------------------------------------
+ * | | PRIMARY | SECONDARY | STATE TRANSITION |
+ * |----------------|------------|---------------|------------------|
+ * |maxLockTimeout | kAllowed | kNotAllowed | kNotAllowed |
+ * ------------------------------------------------------------------
+ */
+ void _releaseTransactionResourcesToOpCtx(OperationContext* opCtx,
+ MaxLockTimeout maxLockTimeout);
TransactionParticipant::PrivateState& p() {
return _tp->_p;
diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp
index b2b92ec6892..646431735da 100644
--- a/src/mongo/db/transaction_participant_test.cpp
+++ b/src/mongo/db/transaction_participant_test.cpp
@@ -769,6 +769,51 @@ TEST_F(TxnParticipantTest, ThrowDuringOnTransactionPrepareAbortsTransaction) {
ASSERT(txnParticipant.transactionIsAborted());
}
+TEST_F(TxnParticipantTest, UnstashFailsShouldLeaveTxnResourceStashUnchanged) {
+ auto sessionCheckout = checkOutSession();
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+
+ txnParticipant.unstashTransactionResources(opCtx(), "prepareTransaction");
+ ASSERT_TRUE(opCtx()->lockState()->isLocked());
+
+ // Simulate the locking of an insert.
+ {
+ Lock::DBLock dbLock(opCtx(), "test", MODE_IX);
+ Lock::CollectionLock collLock(opCtx(), NamespaceString("test.foo"), MODE_IX);
+ }
+
+ auto prepareTimestamp = txnParticipant.prepareTransaction(opCtx(), {});
+
+ // Simulate a secondary style lock stashing such that the locks are yielded.
+ {
+ repl::UnreplicatedWritesBlock uwb(opCtx());
+ opCtx()->lockState()->unsetMaxLockTimeout();
+ txnParticipant.stashTransactionResources(opCtx());
+ }
+ ASSERT_FALSE(txnParticipant.getTxnResourceStashLockerForTest()->isLocked());
+
+ // Enable fail point.
+ getGlobalFailPointRegistry()->getFailPoint("restoreLocksFail")->setMode(FailPoint::alwaysOn);
+
+ ASSERT_THROWS_CODE(txnParticipant.unstashTransactionResources(opCtx(), "commitTransaction"),
+ AssertionException,
+ ErrorCodes::LockTimeout);
+
+ // Above unstash attempt fail should leave the txnResourceStash unchanged.
+ ASSERT_FALSE(txnParticipant.getTxnResourceStashLockerForTest()->isLocked());
+
+ // Disable fail point.
+ getGlobalFailPointRegistry()->getFailPoint("restoreLocksFail")->setMode(FailPoint::off);
+
+ // Should be successfully able to perform lock restore.
+ txnParticipant.unstashTransactionResources(opCtx(), "commitTransaction");
+ ASSERT_TRUE(opCtx()->lockState()->isLocked());
+
+ // Commit the transaction to release the locks.
+ txnParticipant.commitPreparedTransaction(opCtx(), prepareTimestamp, boost::none);
+ ASSERT_TRUE(txnParticipant.transactionIsCommitted());
+}
+
TEST_F(TxnParticipantTest, StepDownAfterPrepareDoesNotBlock) {
auto sessionCheckout = checkOutSession();
auto txnParticipant = TransactionParticipant::get(opCtx());