summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Russotto <matthew.russotto@10gen.com>2018-08-07 13:08:06 -0400
committerMatthew Russotto <matthew.russotto@10gen.com>2018-08-24 11:57:13 -0400
commitecf9c1bd4cc1b1ed8a1606cfc5c0480eaf7f7020 (patch)
tree65a53a806d752c03153a47cc9a0b9449c5f5e116
parent8d657bc4b13632cfd224ebb5bbe973e80557868e (diff)
downloadmongo-ecf9c1bd4cc1b1ed8a1606cfc5c0480eaf7f7020.tar.gz
SERVER-35770 Running a multi-statement transaction when all WiredTiger write tickets are exhausted may lead to deadlock
(cherry picked from commit 210bb5d91cb3c77bb3ed169114f8b85cd1062fb3)
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_auth_misc.yml1
-rw-r--r--jstests/replsets/transactions_reaped_with_tickets_exhausted.js95
-rw-r--r--src/mongo/db/concurrency/lock_state.cpp22
-rw-r--r--src/mongo/db/session.cpp6
-rw-r--r--src/mongo/db/session.h4
5 files changed, 125 insertions, 3 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_auth_misc.yml b/buildscripts/resmokeconfig/suites/replica_sets_auth_misc.yml
index 86c8721ad02..79cbd56a8a8 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_auth_misc.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_auth_misc.yml
@@ -232,6 +232,7 @@ selector:
- jstests/replsets/bulk_api_wc.js
- jstests/replsets/dbcheck.js
- jstests/replsets/batch_write_command_wc.js
+ - jstests/replsets/transactions_reaped_with_tickets_exhausted.js
executor:
config:
diff --git a/jstests/replsets/transactions_reaped_with_tickets_exhausted.js b/jstests/replsets/transactions_reaped_with_tickets_exhausted.js
new file mode 100644
index 00000000000..a6250c13560
--- /dev/null
+++ b/jstests/replsets/transactions_reaped_with_tickets_exhausted.js
@@ -0,0 +1,95 @@
+/**
+ * Test ensures that exhausting the number of write tickets in the system does not prevent
+ * transactions from being reaped by the expired transaction reaper.
+ *
+ * @tags: [uses_transactions]
+ */
+(function() {
+ "use strict";
+
+ load("jstests/libs/parallelTester.js"); // for ScopedThread
+
+ // We set the number of write tickets to be a small value in order to avoid needing to spawn a
+ // large number of threads to exhaust all of the available ones.
+ const kNumWriteTickets = 5;
+
+ const rst = new ReplSetTest({
+ nodes: 1,
+ nodeOptions: {
+ setParameter: {
+ wiredTigerConcurrentWriteTransactions: kNumWriteTickets,
+
+ // Setting a transaction lifetime of 5 seconds works fine locally because the
+ // threads which attempt to run the drop command are spawned quickly enough. This
+ // might not be the case for Evergreen hosts and may need to be tuned accordingly.
+ transactionLifetimeLimitSeconds: 5,
+ }
+ }
+ });
+ rst.startSet();
+ rst.initiate();
+
+ const primary = rst.getPrimary();
+ const db = primary.getDB("test");
+
+ const session = primary.startSession({causalConsistency: false});
+ const sessionDb = session.getDatabase("test");
+
+ assert.commandWorked(db.runCommand({create: "mycoll"}));
+
+ session.startTransaction();
+ assert.commandWorked(sessionDb.mycoll.insert({}));
+
+ const threads = [];
+
+ for (let i = 0; i < kNumWriteTickets; ++i) {
+ const thread = new ScopedThread(function(host) {
+ try {
+ const conn = new Mongo(host);
+ const db = conn.getDB("test");
+
+ // Dropping a collection requires a database X lock and therefore blocks behind the
+ // transaction committing or aborting.
+ db.mycoll.drop();
+
+ return {ok: 1};
+ } catch (e) {
+ return {ok: 0, error: e.toString(), stack: e.stack};
+ }
+ }, primary.host);
+
+ threads.push(thread);
+ thread.start();
+ }
+
+ // We wait until all of the drop commands are waiting for a lock to know that we've exhausted
+ // all of the available write tickets.
+ assert.soon(
+ () => {
+ const ops = db.currentOp({"command.drop": "mycoll", waitingForLock: true});
+ return ops.inprog.length === kNumWriteTickets;
+ },
+ () => {
+ return `Didn't find ${kNumWriteTickets} drop commands running: ` +
+ tojson(db.currentOp());
+ });
+
+ // Attempting to perform another operation inside of the transaction will block and should
+ // eventually cause it to be aborted.
+ assert.commandFailedWithCode(sessionDb.mycoll.insert({}), ErrorCodes.LockTimeout);
+
+ for (let thread of threads) {
+ thread.join();
+ }
+
+ for (let thread of threads) {
+ assert.commandWorked(thread.returnData());
+ }
+
+ // Transaction should already be aborted.
+ assert.commandFailedWithCode(session.abortTransaction_forTesting(),
+ ErrorCodes.NoSuchTransaction);
+
+ session.endSession();
+ rst.stopSet();
+})();
diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp
index 33e3d9b1a3e..d3b95d7b589 100644
--- a/src/mongo/db/concurrency/lock_state.cpp
+++ b/src/mongo/db/concurrency/lock_state.cpp
@@ -319,7 +319,25 @@ LockResult LockerImpl<IsForMMAPV1>::lockGlobal(OperationContext* opCtx, LockMode
template <bool IsForMMAPV1>
void LockerImpl<IsForMMAPV1>::reacquireTicket(OperationContext* opCtx) {
invariant(_modeForTicket != MODE_NONE);
+ auto clientState = _clientState.load();
+ const bool reader = isSharedLockMode(_modeForTicket);
+
+ // Ensure that either we don't have a ticket, or the current ticket mode matches the lock mode.
+ invariant(clientState == kInactive || (clientState == kActiveReader && reader) ||
+ (clientState == kActiveWriter && !reader));
+
+ // If we already have a ticket, there's nothing to do.
+ if (clientState != kInactive)
+ return;
+
auto acquireTicketResult = _acquireTicket(opCtx, _modeForTicket, Date_t::max());
+ uassert(ErrorCodes::LockTimeout,
+ str::stream() << "Unable to acquire ticket with mode '" << _modeForTicket
+ << "' within a max lock request timeout of '"
+ << _maxLockTimeout.get()
+ << "' milliseconds.",
+ acquireTicketResult == LOCK_OK || !_maxLockTimeout);
+ // If no deadline is specified we should always get a ticket.
invariant(acquireTicketResult == LOCK_OK);
}
@@ -332,6 +350,10 @@ LockResult LockerImpl<IsForMMAPV1>::_acquireTicket(OperationContext* opCtx,
if (holder) {
_clientState.store(reader ? kQueuedReader : kQueuedWriter);
+ if (_maxLockTimeout && !_uninterruptibleLocksRequested) {
+ deadline = std::min(deadline, Date_t::now() + _maxLockTimeout.get());
+ }
+
// If the ticket wait is interrupted, restore the state of the client.
auto restoreStateOnErrorGuard = MakeGuard([&] { _clientState.store(kInactive); });
if (deadline == Date_t::max()) {
diff --git a/src/mongo/db/session.cpp b/src/mongo/db/session.cpp
index 47bde4c01f3..cf814e8d6d9 100644
--- a/src/mongo/db/session.cpp
+++ b/src/mongo/db/session.cpp
@@ -654,13 +654,15 @@ void Session::_checkTxnValid(WithLock, TxnNumber txnNumber) const {
txnNumber >= _activeTxnNumber);
}
-Session::TxnResources::TxnResources(OperationContext* opCtx) {
+Session::TxnResources::TxnResources(OperationContext* opCtx, bool keepTicket) {
stdx::lock_guard<Client> lk(*opCtx->getClient());
_ruState = opCtx->getWriteUnitOfWork()->release();
opCtx->setWriteUnitOfWork(nullptr);
_locker = opCtx->swapLockState(stdx::make_unique<DefaultLockerImpl>());
- _locker->releaseTicket();
+ if (!keepTicket) {
+ _locker->releaseTicket();
+ }
_locker->unsetThreadId();
// This thread must still respect the transaction lock timeout, since it can prevent the
diff --git a/src/mongo/db/session.h b/src/mongo/db/session.h
index bfa7487d33f..5ca119f1094 100644
--- a/src/mongo/db/session.h
+++ b/src/mongo/db/session.h
@@ -72,8 +72,10 @@ public:
/**
* Stashes transaction state from 'opCtx' in the newly constructed TxnResources.
* This ephemerally takes the Client lock associated with the opCtx.
+ * keepTicket: If true, do not release locker's throttling ticket.
+ * Use only for short-term stashing.
*/
- TxnResources(OperationContext* opCtx);
+ TxnResources(OperationContext* opCtx, bool keepTicket = false);
~TxnResources();