diff options
author | Matt Kneiser <matt.kneiser@mongodb.com> | 2023-03-29 00:06:27 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-03-29 09:44:49 +0000 |
commit | b2e62a90e39c2ede741c5c92e78f85d192bb9d68 (patch) | |
tree | 620333a1ec78232b46376c63e5f66a9dc7eba3fe | |
parent | 31e02552b7f98a6363bf8a46a6f9e525bdbfe7ae (diff) | |
download | mongo-b2e62a90e39c2ede741c5c92e78f85d192bb9d68.tar.gz |
SERVER-75205 Fix deadlock involving FCV lock
With minor jstest amendments.
(cherry picked from commit e74f9c2fdf73ad707419fa4a8ae57aec70423ca6)
-rw-r--r-- | jstests/noPassthrough/read_ticket_exhaustion_with_stepdown.js | 220 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.cpp | 14 |
2 files changed, 232 insertions, 2 deletions
diff --git a/jstests/noPassthrough/read_ticket_exhaustion_with_stepdown.js b/jstests/noPassthrough/read_ticket_exhaustion_with_stepdown.js new file mode 100644 index 00000000000..e2a76dfe3e9 --- /dev/null +++ b/jstests/noPassthrough/read_ticket_exhaustion_with_stepdown.js @@ -0,0 +1,220 @@ +/** + * Tests exhaustion of read tickets, trying to force a deadlock reproduction + * after yield restores lock state. See SERVER-75205 for more information. + * + * Deadlock: + * The deadlock in question involves readers acquiring the RSTL lock, which no longer + * happens in 5.0+ due to lock-free reads. Therefore, this test doesn't exercise the + * deadlock behavior on 5.0+. 4.4 is the primary target of this test, but there isn't + * much test coverage of ticket exhaustion, so this test may provide other benefits on + * 5.0+. + * + * Parallel Shell Coordination: + * The 'jsTestName().timing_coordination' collection is used to coordinate timing + * between the main thread of the test and all the readers via writes to specific + * documents. One side will wait until the document appears. + * + * Two types of Parallel Shells: + * There are two sets of readers - queued and new - meant to saturate the ticketing + * system before and after the sleep operation that holds the global X lock. This + * dual system of readers ensures that enqueued/blocked readers as well as newly + * arriving reads are serviced without deadlocking. + * queuedLongReadsFunc - Issues long read commands until told to stop. + * newLongReadsFunc - When told to begin, issues long read commands until told + * to stop. + * + * Test Steps: + * 0) Start ReplSet with special params: + * - lower read ticket concurrency + * - increase yielding + * 1) Insert 1000 documents. + * 2) Kick off parallel readers that perform long collection scans, subject to yields. + * 3) Sleep with global X Lock (including RSTL), thus queuing up reads. + * 4) Signal new readers that will be received after the global lock is released. + * 5) Initiate step down while queue is working its way down to ensure there is a mix of + * enqueued readers from the global X lock and new readers initiated afterwards. + * <<Should have deadlocked by now for this scenario>> + * 6) Stop Readers. + * + * @tags: [ + * multiversion_incompatible, + * requires_replication, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/parallel_shell_helpers.js"); + +const kNumReadTickets = 5; +const replTest = new ReplSetTest({ + name: jsTestName(), + nodes: 1, + nodeOptions: { + setParameter: { + // This test seeks the minimum amount of concurrency to force ticket exhaustion. + wiredTigerConcurrentReadTransactions: kNumReadTickets, + // Make yielding more common. + internalQueryExecYieldPeriodMS: 1, + internalQueryExecYieldIterations: 1 + } + } +}); + +const dbName = jsTestName(); +const collName = "testcoll"; +let nQueuedReaders = 20; +let nNewReaders = 10; +TestData.dbName = dbName; +TestData.collName = collName; + +// Issues long read commands until told to stop. +// Should be run in a parallel shell via startParallelShell() with a unique id. +function queuedLongReadsFunc(id) { + jsTestLog("Starting Queued Reader [" + id + "]"); + + try { + for (let i = 0; + db.getSiblingDB(TestData.dbName).timing_coordination.findOne({_id: "stop reading"}) == + null; + i++) { + jsTestLog("queuedLongReadsFunc on " + TestData.dbName + "." + TestData.collName + + " read (" + i + ") beg. Reader id:" + id); + try { + db.getSiblingDB(TestData.dbName)[TestData.collName].aggregate([{"$count": "x"}]); + } catch (e) { + jsTestLog("ignoring failed read, possible due to stepdown", e); + } + jsTestLog("queuedLongReadsFunc read (" + i + ") end. Reader id:" + id); + } + } catch (e) { + jsTestLog("Exiting reader [" + id + "] early due to:" + e); + } + jsTestLog("Queued Reader complete [" + id + "]"); +} + +// When told to begin, issues long read commands until told to stop. +// Should be run in a parallel shell via startParallelShell() with a unique id. +function newLongReadsFunc(id) { + jsTestLog("Starting New Reader [" + id + "]"); + + // Coordinate all readers to begin at the same time. + assert.soon(() => db.getSiblingDB(TestData.dbName).timing_coordination.findOne({ + _id: "begin new readers" + }) !== null, + "Expected main test thread to insert a document."); + + try { + for (let i = 0; + db.getSiblingDB(TestData.dbName).timing_coordination.findOne({_id: "stop reading"}) == + null; + i++) { + jsTestLog("newLongReadsFunc on " + TestData.dbName + "." + TestData.collName + + " read (" + i + ") beg. Reader id:" + id); + try { + db.getSiblingDB(TestData.dbName)[TestData.collName].aggregate([{"$count": "x"}]); + } catch (e) { + jsTestLog("ignoring failed read, possible due to stepdown", e); + } + jsTestLog("newLongReadsFunc read (" + i + ") end. Reader id:" + id); + } + } catch (e) { + jsTestLog("Exiting reader [" + id + "] early due to:" + e); + } + jsTestLog("New Reader complete [" + id + "]"); +} + +function runStepDown() { + jsTestLog("Making primary step down."); + let stats = db.runCommand({serverStatus: 1}); + jsTestLog(stats.locks); + jsTestLog(stats.wiredTiger.concurrentTransactions); + const stepDownSecs = 5; + assert.commandWorked(primaryAdmin.runCommand({"replSetStepDown": stepDownSecs, "force": true})); + + // Wait until the primary transitioned to SECONDARY state. + replTest.waitForState(primary, ReplSetTest.State.SECONDARY); + + // Enforce the replSetStepDown timer. + sleep(stepDownSecs * 1000); + + replTest.waitForState(primary, ReplSetTest.State.PRIMARY); + replTest.getPrimary(); +} + +/****************************************************/ + +// 0) Start ReplSet with special params: +// - lower read ticket concurrency +// - increase yielding +replTest.startSet(); +replTest.initiate(); +let primary = replTest.getPrimary(); +let db = primary.getDB(dbName); +let primaryAdmin = primary.getDB("admin"); +let primaryColl = db[collName]; +let queuedReaders = []; +let newReaders = []; + +// 1) Insert 1000 documents. +jsTestLog("Fill collection [" + dbName + "." + collName + "] with 1000 docs"); +for (let i = 0; i < 1000; i++) { + assert.commandWorked(primaryColl.insert({"x": i})); +} +jsTestLog("1000 inserts done"); + +// 2) Kick off parallel readers that perform long collection scans, subject to yields. +for (let i = 0; i < nQueuedReaders; i++) { + queuedReaders.push(startParallelShell(funWithArgs(queuedLongReadsFunc, i), primary.port)); + jsTestLog("queued reader " + queuedReaders.length + " initiated"); +} + +for (let i = 0; i < newReaders; i++) { + newReaders.push(startParallelShell(funWithArgs(newLongReadsFunc, i), primary.port)); + jsTestLog("new reader " + newReaders.length + " initiated"); +} + +// 3) Sleep with global X Lock (including RSTL), thus queuing up reads. +let ns = dbName + "." + collName; +jsTestLog("Sleeping with Global X Lock on " + ns); +db.adminCommand({ + sleep: 1, + secs: 5, + lock: "w", // MODE_X lock. + $comment: "Global lock sleep" +}); +jsTestLog("Done sleeping with Global X Lock on " + ns); + +// 4) Signal new readers that will be received after the global lock is released. +assert.commandWorked( + db.getSiblingDB(dbName).timing_coordination.insertOne({_id: "begin new readers"})); + +// 5) Initiate step down while queue is working its way down to ensure there is a mix of +// enqueued readers from the global X lock and new readers initiated afterwards. +assert.soon( + () => db.getSiblingDB("admin") + .aggregate([{$currentOp: {}}, {$match: {"command.aggregate": TestData.collName}}]) + .toArray() + .length >= kNumReadTickets, + "Expected more readers than read tickets."); +runStepDown(); + +// 6) Stop Readers. +jsTestLog("Stopping Readers"); +assert.commandWorked(db.getSiblingDB(dbName).timing_coordination.insertOne({_id: "stop reading"})); + +for (let i = 0; i < queuedReaders.length; i++) { + const awaitQueuedReader = queuedReaders[i]; + awaitQueuedReader(); + jsTestLog("queued reader " + i + " done"); +} +for (let i = 0; i < newReaders.length; i++) { + const awaitNewReader = newReaders[i]; + awaitNewReader(); + jsTestLog("new reader " + i + " done"); +} +queuedReaders = []; +newReaders = []; + +replTest.stopSet(); +})(); diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp index f0e3caf4ce4..d6d72997727 100644 --- a/src/mongo/db/concurrency/lock_state.cpp +++ b/src/mongo/db/concurrency/lock_state.cpp @@ -822,13 +822,21 @@ void LockerImpl::restoreLockState(OperationContext* opCtx, const Locker::LockSna } std::vector<OneLock>::const_iterator it = state.locks.begin(); - // If we locked the PBWM, it must be locked before the resourceIdGlobal and - // resourceIdReplicationStateTransitionLock resources. + // If we locked the PBWM, it must be locked before the + // resourceIdFeatureCompatibilityVersion, resourceIdReplicationStateTransitionLock, and + // resourceIdGlobal resources. if (it != state.locks.end() && it->resourceId == resourceIdParallelBatchWriterMode) { lock(opCtx, it->resourceId, it->mode); it++; } + // If we locked the FCV lock, it must be locked before the + // resourceIdReplicationStateTransitionLock and resourceIdGlobal resources. + if (it != state.locks.end() && it->resourceId == resourceIdFeatureCompatibilityVersion) { + lock(opCtx, it->resourceId, it->mode); + it++; + } + // If we locked the RSTL, it must be locked before the resourceIdGlobal resource. if (it != state.locks.end() && it->resourceId == resourceIdReplicationStateTransitionLock) { lock(opCtx, it->resourceId, it->mode); @@ -837,6 +845,8 @@ void LockerImpl::restoreLockState(OperationContext* opCtx, const Locker::LockSna lockGlobal(opCtx, state.globalMode); for (; it != state.locks.end(); it++) { + // Ensures we don't acquire locks out of order which can lead to deadlock. + invariant(it->resourceId.getType() != ResourceType::RESOURCE_GLOBAL); lock(opCtx, it->resourceId, it->mode); } invariant(_modeForTicket != MODE_NONE); |