diff options
author | Matthew Russotto <matthew.russotto@10gen.com> | 2018-08-07 13:08:06 -0400 |
---|---|---|
committer | Matthew Russotto <matthew.russotto@10gen.com> | 2018-08-24 11:57:13 -0400 |
commit | ecf9c1bd4cc1b1ed8a1606cfc5c0480eaf7f7020 (patch) | |
tree | 65a53a806d752c03153a47cc9a0b9449c5f5e116 | |
parent | 8d657bc4b13632cfd224ebb5bbe973e80557868e (diff) | |
download | mongo-ecf9c1bd4cc1b1ed8a1606cfc5c0480eaf7f7020.tar.gz |
SERVER-35770 Running a multi-statement transaction when all WiredTiger write tickets are exhausted may lead to deadlock
(cherry picked from commit 210bb5d91cb3c77bb3ed169114f8b85cd1062fb3)
-rw-r--r-- | buildscripts/resmokeconfig/suites/replica_sets_auth_misc.yml | 1 | ||||
-rw-r--r-- | jstests/replsets/transactions_reaped_with_tickets_exhausted.js | 95 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.cpp | 22 | ||||
-rw-r--r-- | src/mongo/db/session.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/session.h | 4 |
5 files changed, 125 insertions, 3 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_auth_misc.yml b/buildscripts/resmokeconfig/suites/replica_sets_auth_misc.yml index 86c8721ad02..79cbd56a8a8 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_auth_misc.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_auth_misc.yml @@ -232,6 +232,7 @@ selector: - jstests/replsets/bulk_api_wc.js - jstests/replsets/dbcheck.js - jstests/replsets/batch_write_command_wc.js + - jstests/replsets/transactions_reaped_with_tickets_exhausted.js executor: config: diff --git a/jstests/replsets/transactions_reaped_with_tickets_exhausted.js b/jstests/replsets/transactions_reaped_with_tickets_exhausted.js new file mode 100644 index 00000000000..a6250c13560 --- /dev/null +++ b/jstests/replsets/transactions_reaped_with_tickets_exhausted.js @@ -0,0 +1,95 @@ +/** + * Test ensures that exhausting the number of write tickets in the system does not prevent + * transactions from being reaped by the expired transaction reaper. + * + * @tags: [uses_transactions] + */ +(function() { + "use strict"; + + load("jstests/libs/parallelTester.js"); // for ScopedThread + + // We set the number of write tickets to be a small value in order to avoid needing to spawn a + // large number of threads to exhaust all of the available ones. + const kNumWriteTickets = 5; + + const rst = new ReplSetTest({ + nodes: 1, + nodeOptions: { + setParameter: { + wiredTigerConcurrentWriteTransactions: kNumWriteTickets, + + // Setting a transaction lifetime of 5 seconds works fine locally because the + // threads which attempt to run the drop command are spawned quickly enough. This + // might not be the case for Evergreen hosts and may need to be tuned accordingly. + transactionLifetimeLimitSeconds: 5, + } + } + }); + rst.startSet(); + rst.initiate(); + + const primary = rst.getPrimary(); + const db = primary.getDB("test"); + + const session = primary.startSession({causalConsistency: false}); + const sessionDb = session.getDatabase("test"); + + assert.commandWorked(db.runCommand({create: "mycoll"})); + + session.startTransaction(); + assert.commandWorked(sessionDb.mycoll.insert({})); + + const threads = []; + + for (let i = 0; i < kNumWriteTickets; ++i) { + const thread = new ScopedThread(function(host) { + try { + const conn = new Mongo(host); + const db = conn.getDB("test"); + + // Dropping a collection requires a database X lock and therefore blocks behind the + // transaction committing or aborting. + db.mycoll.drop(); + + return {ok: 1}; + } catch (e) { + return {ok: 0, error: e.toString(), stack: e.stack}; + } + }, primary.host); + + threads.push(thread); + thread.start(); + } + + // We wait until all of the drop commands are waiting for a lock to know that we've exhausted + // all of the available write tickets. + assert.soon( + () => { + const ops = db.currentOp({"command.drop": "mycoll", waitingForLock: true}); + return ops.inprog.length === kNumWriteTickets; + }, + () => { + return `Didn't find ${kNumWriteTickets} drop commands running: ` + + tojson(db.currentOp()); + }); + + // Attempting to perform another operation inside of the transaction will block and should + // eventually cause it to be aborted. + assert.commandFailedWithCode(sessionDb.mycoll.insert({}), ErrorCodes.LockTimeout); + + for (let thread of threads) { + thread.join(); + } + + for (let thread of threads) { + assert.commandWorked(thread.returnData()); + } + + // Transaction should already be aborted. + assert.commandFailedWithCode(session.abortTransaction_forTesting(), + ErrorCodes.NoSuchTransaction); + + session.endSession(); + rst.stopSet(); +})(); diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp index 33e3d9b1a3e..d3b95d7b589 100644 --- a/src/mongo/db/concurrency/lock_state.cpp +++ b/src/mongo/db/concurrency/lock_state.cpp @@ -319,7 +319,25 @@ LockResult LockerImpl<IsForMMAPV1>::lockGlobal(OperationContext* opCtx, LockMode template <bool IsForMMAPV1> void LockerImpl<IsForMMAPV1>::reacquireTicket(OperationContext* opCtx) { invariant(_modeForTicket != MODE_NONE); + auto clientState = _clientState.load(); + const bool reader = isSharedLockMode(_modeForTicket); + + // Ensure that either we don't have a ticket, or the current ticket mode matches the lock mode. + invariant(clientState == kInactive || (clientState == kActiveReader && reader) || + (clientState == kActiveWriter && !reader)); + + // If we already have a ticket, there's nothing to do. + if (clientState != kInactive) + return; + auto acquireTicketResult = _acquireTicket(opCtx, _modeForTicket, Date_t::max()); + uassert(ErrorCodes::LockTimeout, + str::stream() << "Unable to acquire ticket with mode '" << _modeForTicket + << "' within a max lock request timeout of '" + << _maxLockTimeout.get() + << "' milliseconds.", + acquireTicketResult == LOCK_OK || !_maxLockTimeout); + // If no deadline is specified we should always get a ticket. invariant(acquireTicketResult == LOCK_OK); } @@ -332,6 +350,10 @@ LockResult LockerImpl<IsForMMAPV1>::_acquireTicket(OperationContext* opCtx, if (holder) { _clientState.store(reader ? kQueuedReader : kQueuedWriter); + if (_maxLockTimeout && !_uninterruptibleLocksRequested) { + deadline = std::min(deadline, Date_t::now() + _maxLockTimeout.get()); + } + // If the ticket wait is interrupted, restore the state of the client. auto restoreStateOnErrorGuard = MakeGuard([&] { _clientState.store(kInactive); }); if (deadline == Date_t::max()) { diff --git a/src/mongo/db/session.cpp b/src/mongo/db/session.cpp index 47bde4c01f3..cf814e8d6d9 100644 --- a/src/mongo/db/session.cpp +++ b/src/mongo/db/session.cpp @@ -654,13 +654,15 @@ void Session::_checkTxnValid(WithLock, TxnNumber txnNumber) const { txnNumber >= _activeTxnNumber); } -Session::TxnResources::TxnResources(OperationContext* opCtx) { +Session::TxnResources::TxnResources(OperationContext* opCtx, bool keepTicket) { stdx::lock_guard<Client> lk(*opCtx->getClient()); _ruState = opCtx->getWriteUnitOfWork()->release(); opCtx->setWriteUnitOfWork(nullptr); _locker = opCtx->swapLockState(stdx::make_unique<DefaultLockerImpl>()); - _locker->releaseTicket(); + if (!keepTicket) { + _locker->releaseTicket(); + } _locker->unsetThreadId(); // This thread must still respect the transaction lock timeout, since it can prevent the diff --git a/src/mongo/db/session.h b/src/mongo/db/session.h index bfa7487d33f..5ca119f1094 100644 --- a/src/mongo/db/session.h +++ b/src/mongo/db/session.h @@ -72,8 +72,10 @@ public: /** * Stashes transaction state from 'opCtx' in the newly constructed TxnResources. * This ephemerally takes the Client lock associated with the opCtx. + * keepTicket: If true, do not release locker's throttling ticket. + * Use only for short-term stashing. */ - TxnResources(OperationContext* opCtx); + TxnResources(OperationContext* opCtx, bool keepTicket = false); ~TxnResources(); |