summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Kneiser <matt.kneiser@mongodb.com>2023-03-29 00:06:27 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-03-29 09:44:49 +0000
commitb2e62a90e39c2ede741c5c92e78f85d192bb9d68 (patch)
tree620333a1ec78232b46376c63e5f66a9dc7eba3fe
parent31e02552b7f98a6363bf8a46a6f9e525bdbfe7ae (diff)
downloadmongo-b2e62a90e39c2ede741c5c92e78f85d192bb9d68.tar.gz
SERVER-75205 Fix deadlock involving FCV lock
With minor jstest amendments. (cherry picked from commit e74f9c2fdf73ad707419fa4a8ae57aec70423ca6)
-rw-r--r--jstests/noPassthrough/read_ticket_exhaustion_with_stepdown.js220
-rw-r--r--src/mongo/db/concurrency/lock_state.cpp14
2 files changed, 232 insertions, 2 deletions
diff --git a/jstests/noPassthrough/read_ticket_exhaustion_with_stepdown.js b/jstests/noPassthrough/read_ticket_exhaustion_with_stepdown.js
new file mode 100644
index 00000000000..e2a76dfe3e9
--- /dev/null
+++ b/jstests/noPassthrough/read_ticket_exhaustion_with_stepdown.js
@@ -0,0 +1,220 @@
+/**
+ * Tests exhaustion of read tickets, trying to force a deadlock reproduction
+ * after yield restores lock state. See SERVER-75205 for more information.
+ *
+ * Deadlock:
+ * The deadlock in question involves readers acquiring the RSTL lock, which no longer
+ * happens in 5.0+ due to lock-free reads. Therefore, this test doesn't exercise the
+ * deadlock behavior on 5.0+. 4.4 is the primary target of this test, but there isn't
+ * much test coverage of ticket exhaustion, so this test may provide other benefits on
+ * 5.0+.
+ *
+ * Parallel Shell Coordination:
+ * The 'jsTestName().timing_coordination' collection is used to coordinate timing
+ * between the main thread of the test and all the readers via writes to specific
+ * documents. One side will wait until the document appears.
+ *
+ * Two types of Parallel Shells:
+ * There are two sets of readers - queued and new - meant to saturate the ticketing
+ * system before and after the sleep operation that holds the global X lock. This
+ * dual system of readers ensures that enqueued/blocked readers as well as newly
+ * arriving reads are serviced without deadlocking.
+ * queuedLongReadsFunc - Issues long read commands until told to stop.
+ * newLongReadsFunc - When told to begin, issues long read commands until told
+ * to stop.
+ *
+ * Test Steps:
+ * 0) Start ReplSet with special params:
+ * - lower read ticket concurrency
+ * - increase yielding
+ * 1) Insert 1000 documents.
+ * 2) Kick off parallel readers that perform long collection scans, subject to yields.
+ * 3) Sleep with global X Lock (including RSTL), thus queuing up reads.
+ * 4) Signal new readers that will be received after the global lock is released.
+ * 5) Initiate step down while queue is working its way down to ensure there is a mix of
+ * enqueued readers from the global X lock and new readers initiated afterwards.
+ * <<Should have deadlocked by now for this scenario>>
+ * 6) Stop Readers.
+ *
+ * @tags: [
+ * multiversion_incompatible,
+ * requires_replication,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/parallel_shell_helpers.js");
+
+const kNumReadTickets = 5;
+const replTest = new ReplSetTest({
+ name: jsTestName(),
+ nodes: 1,
+ nodeOptions: {
+ setParameter: {
+ // This test seeks the minimum amount of concurrency to force ticket exhaustion.
+ wiredTigerConcurrentReadTransactions: kNumReadTickets,
+ // Make yielding more common.
+ internalQueryExecYieldPeriodMS: 1,
+ internalQueryExecYieldIterations: 1
+ }
+ }
+});
+
+const dbName = jsTestName();
+const collName = "testcoll";
+let nQueuedReaders = 20;
+let nNewReaders = 10;
+TestData.dbName = dbName;
+TestData.collName = collName;
+
+// Issues long read commands until told to stop.
+// Should be run in a parallel shell via startParallelShell() with a unique id.
+function queuedLongReadsFunc(id) {
+ jsTestLog("Starting Queued Reader [" + id + "]");
+
+ try {
+ for (let i = 0;
+ db.getSiblingDB(TestData.dbName).timing_coordination.findOne({_id: "stop reading"}) ==
+ null;
+ i++) {
+ jsTestLog("queuedLongReadsFunc on " + TestData.dbName + "." + TestData.collName +
+ " read (" + i + ") beg. Reader id:" + id);
+ try {
+ db.getSiblingDB(TestData.dbName)[TestData.collName].aggregate([{"$count": "x"}]);
+ } catch (e) {
+ jsTestLog("ignoring failed read, possible due to stepdown", e);
+ }
+ jsTestLog("queuedLongReadsFunc read (" + i + ") end. Reader id:" + id);
+ }
+ } catch (e) {
+ jsTestLog("Exiting reader [" + id + "] early due to:" + e);
+ }
+ jsTestLog("Queued Reader complete [" + id + "]");
+}
+
+// When told to begin, issues long read commands until told to stop.
+// Should be run in a parallel shell via startParallelShell() with a unique id.
+function newLongReadsFunc(id) {
+ jsTestLog("Starting New Reader [" + id + "]");
+
+ // Coordinate all readers to begin at the same time.
+ assert.soon(() => db.getSiblingDB(TestData.dbName).timing_coordination.findOne({
+ _id: "begin new readers"
+ }) !== null,
+ "Expected main test thread to insert a document.");
+
+ try {
+ for (let i = 0;
+ db.getSiblingDB(TestData.dbName).timing_coordination.findOne({_id: "stop reading"}) ==
+ null;
+ i++) {
+ jsTestLog("newLongReadsFunc on " + TestData.dbName + "." + TestData.collName +
+ " read (" + i + ") beg. Reader id:" + id);
+ try {
+ db.getSiblingDB(TestData.dbName)[TestData.collName].aggregate([{"$count": "x"}]);
+ } catch (e) {
+ jsTestLog("ignoring failed read, possible due to stepdown", e);
+ }
+ jsTestLog("newLongReadsFunc read (" + i + ") end. Reader id:" + id);
+ }
+ } catch (e) {
+ jsTestLog("Exiting reader [" + id + "] early due to:" + e);
+ }
+ jsTestLog("New Reader complete [" + id + "]");
+}
+
+function runStepDown() {
+ jsTestLog("Making primary step down.");
+ let stats = db.runCommand({serverStatus: 1});
+ jsTestLog(stats.locks);
+ jsTestLog(stats.wiredTiger.concurrentTransactions);
+ const stepDownSecs = 5;
+ assert.commandWorked(primaryAdmin.runCommand({"replSetStepDown": stepDownSecs, "force": true}));
+
+ // Wait until the primary transitioned to SECONDARY state.
+ replTest.waitForState(primary, ReplSetTest.State.SECONDARY);
+
+ // Enforce the replSetStepDown timer.
+ sleep(stepDownSecs * 1000);
+
+ replTest.waitForState(primary, ReplSetTest.State.PRIMARY);
+ replTest.getPrimary();
+}
+
+/****************************************************/
+
+// 0) Start ReplSet with special params:
+// - lower read ticket concurrency
+// - increase yielding
+replTest.startSet();
+replTest.initiate();
+let primary = replTest.getPrimary();
+let db = primary.getDB(dbName);
+let primaryAdmin = primary.getDB("admin");
+let primaryColl = db[collName];
+let queuedReaders = [];
+let newReaders = [];
+
+// 1) Insert 1000 documents.
+jsTestLog("Fill collection [" + dbName + "." + collName + "] with 1000 docs");
+for (let i = 0; i < 1000; i++) {
+ assert.commandWorked(primaryColl.insert({"x": i}));
+}
+jsTestLog("1000 inserts done");
+
+// 2) Kick off parallel readers that perform long collection scans, subject to yields.
+for (let i = 0; i < nQueuedReaders; i++) {
+ queuedReaders.push(startParallelShell(funWithArgs(queuedLongReadsFunc, i), primary.port));
+ jsTestLog("queued reader " + queuedReaders.length + " initiated");
+}
+
+for (let i = 0; i < newReaders; i++) {
+ newReaders.push(startParallelShell(funWithArgs(newLongReadsFunc, i), primary.port));
+ jsTestLog("new reader " + newReaders.length + " initiated");
+}
+
+// 3) Sleep with global X Lock (including RSTL), thus queuing up reads.
+let ns = dbName + "." + collName;
+jsTestLog("Sleeping with Global X Lock on " + ns);
+db.adminCommand({
+ sleep: 1,
+ secs: 5,
+ lock: "w", // MODE_X lock.
+ $comment: "Global lock sleep"
+});
+jsTestLog("Done sleeping with Global X Lock on " + ns);
+
+// 4) Signal new readers that will be received after the global lock is released.
+assert.commandWorked(
+ db.getSiblingDB(dbName).timing_coordination.insertOne({_id: "begin new readers"}));
+
+// 5) Initiate step down while queue is working its way down to ensure there is a mix of
+// enqueued readers from the global X lock and new readers initiated afterwards.
+assert.soon(
+ () => db.getSiblingDB("admin")
+ .aggregate([{$currentOp: {}}, {$match: {"command.aggregate": TestData.collName}}])
+ .toArray()
+ .length >= kNumReadTickets,
+ "Expected more readers than read tickets.");
+runStepDown();
+
+// 6) Stop Readers.
+jsTestLog("Stopping Readers");
+assert.commandWorked(db.getSiblingDB(dbName).timing_coordination.insertOne({_id: "stop reading"}));
+
+for (let i = 0; i < queuedReaders.length; i++) {
+ const awaitQueuedReader = queuedReaders[i];
+ awaitQueuedReader();
+ jsTestLog("queued reader " + i + " done");
+}
+for (let i = 0; i < newReaders.length; i++) {
+ const awaitNewReader = newReaders[i];
+ awaitNewReader();
+ jsTestLog("new reader " + i + " done");
+}
+queuedReaders = [];
+newReaders = [];
+
+replTest.stopSet();
+})();
diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp
index f0e3caf4ce4..d6d72997727 100644
--- a/src/mongo/db/concurrency/lock_state.cpp
+++ b/src/mongo/db/concurrency/lock_state.cpp
@@ -822,13 +822,21 @@ void LockerImpl::restoreLockState(OperationContext* opCtx, const Locker::LockSna
}
std::vector<OneLock>::const_iterator it = state.locks.begin();
- // If we locked the PBWM, it must be locked before the resourceIdGlobal and
- // resourceIdReplicationStateTransitionLock resources.
+ // If we locked the PBWM, it must be locked before the
+ // resourceIdFeatureCompatibilityVersion, resourceIdReplicationStateTransitionLock, and
+ // resourceIdGlobal resources.
if (it != state.locks.end() && it->resourceId == resourceIdParallelBatchWriterMode) {
lock(opCtx, it->resourceId, it->mode);
it++;
}
+ // If we locked the FCV lock, it must be locked before the
+ // resourceIdReplicationStateTransitionLock and resourceIdGlobal resources.
+ if (it != state.locks.end() && it->resourceId == resourceIdFeatureCompatibilityVersion) {
+ lock(opCtx, it->resourceId, it->mode);
+ it++;
+ }
+
// If we locked the RSTL, it must be locked before the resourceIdGlobal resource.
if (it != state.locks.end() && it->resourceId == resourceIdReplicationStateTransitionLock) {
lock(opCtx, it->resourceId, it->mode);
@@ -837,6 +845,8 @@ void LockerImpl::restoreLockState(OperationContext* opCtx, const Locker::LockSna
lockGlobal(opCtx, state.globalMode);
for (; it != state.locks.end(); it++) {
+ // Ensures we don't acquire locks out of order which can lead to deadlock.
+ invariant(it->resourceId.getType() != ResourceType::RESOURCE_GLOBAL);
lock(opCtx, it->resourceId, it->mode);
}
invariant(_modeForTicket != MODE_NONE);