summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordi Olivares Provencio <jordi.olivares-provencio@mongodb.com>2022-11-03 16:39:30 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-03 17:08:12 +0000
commit9709ef3d8cbb715196c5609339b06793a4551886 (patch)
treec4c65e074c715259ca945aaad9c9c884f86ba0ba
parentf71c00d795f7744de3a899a76d6cbbc61fe01a25 (diff)
downloadmongo-9709ef3d8cbb715196c5609339b06793a4551886.tar.gz
SERVER-60016 serverStatus should not block on the RSTL lock with the WT storage engine
-rw-r--r--jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js72
-rw-r--r--src/mongo/db/commands/sleep_command.cpp22
-rw-r--r--src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp24
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp8
4 files changed, 104 insertions, 22 deletions
diff --git a/jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js b/jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js
new file mode 100644
index 00000000000..40eb22187fb
--- /dev/null
+++ b/jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js
@@ -0,0 +1,72 @@
+/**
+ * Tests that serverStatus is not blocked by an exclusive RSTL lock. Only enforcing on WT.
+ *
+ * @tags: [
+ * # Certain serverStatus sections might pivot to taking the RSTL lock if an action is unsupported
+ * # by a non-WT storage engine.
+ * requires_wiredtiger,
+ * # Replication requires journaling support so this tag also implies exclusion from --nojournal
+ * # test configurations.
+ * requires_sharding,
+ * requires_replication,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/parallel_shell_helpers.js"); // startParallelShell
+load("jstests/libs/wait_for_command.js"); // waitForCommand
+
+// Use a sharding environment in order to exercise the sharding specific serverStatus sections.
+const st = new ShardingTest(
+ {mongos: 1, config: 1, shards: 1, rs: {nodes: 1, setParameter: {watchdogPeriodSeconds: 60}}});
+const testDB = st.rs0.getPrimary().getDB("test");
+
+jsTestLog("Starting the sleep command in a parallel thread to take the RSTL MODE_X lock");
+let rstlXLockSleepJoin = startParallelShell(() => {
+ jsTestLog("Parallel Shell: about to start sleep command");
+ assert.commandFailedWithCode(db.adminCommand({
+ sleep: 1,
+ secs: 60 * 60,
+ // RSTL MODE_X lock.
+ lockTarget: "RSTL",
+ $comment: "RSTL lock sleep"
+ }),
+ ErrorCodes.Interrupted);
+}, testDB.getMongo().port);
+
+jsTestLog("Waiting for the sleep command to start and fetch the opID");
+const sleepCmdOpID =
+ waitForCommand("RSTL lock", op => (op["command"]["$comment"] == "RSTL lock sleep"), testDB);
+
+jsTestLog("Wait for the sleep command to log that the RSTL MODE_X lock was acquired");
+checkLog.containsJson(testDB, 6001600);
+
+try {
+ jsTestLog("Running serverStatus concurrently with the RSTL X lock held by the sleep cmd");
+ const serverStatusResult = assert.commandWorked(testDB.adminCommand({
+ serverStatus: 1,
+ repl: 1,
+ mirroredReads: 1,
+ advisoryHostFQDNs: 1,
+ defaultRWConcern: 1,
+ heapProfile: 1,
+ http_client: 1,
+ latchAnalysis: 1,
+ opWriteConcernCounters: 1,
+ oplog: 1,
+ resourceConsumption: 1,
+ sharding: 1,
+ tenantMigrationAccessBlocker: 1,
+ watchdog: 1,
+ maxTimeMS: 20 * 1000
+ }));
+ jsTestLog("ServerStatus results: " + tojson(serverStatusResult));
+} finally {
+ jsTestLog("Ensure the sleep cmd releases the lock so that the server can shutdown");
+ assert.commandWorked(testDB.killOp(sleepCmdOpID)); // kill the sleep cmd
+ rstlXLockSleepJoin(); // wait for the thread running the sleep cmd to finish
+}
+
+st.stop();
+})();
diff --git a/src/mongo/db/commands/sleep_command.cpp b/src/mongo/db/commands/sleep_command.cpp
index 625dab44b37..8475eaa75d6 100644
--- a/src/mongo/db/commands/sleep_command.cpp
+++ b/src/mongo/db/commands/sleep_command.cpp
@@ -78,6 +78,9 @@ public:
const StringData& ns) {
if (ns.empty()) {
Lock::GlobalLock lk(opCtx, mode, Date_t::max(), Lock::InterruptBehavior::kThrow);
+ LOGV2(6001601,
+ "Global lock acquired by sleep command.",
+ "lockMode"_attr = modeName(mode));
opCtx->sleepFor(Milliseconds(millis));
return;
}
@@ -94,6 +97,9 @@ public:
Lock::DBLock dbLock(opCtx, nss.db(), dbMode, Date_t::max());
if (nsIsDbOnly(ns)) {
+ LOGV2(6001602,
+ "Database lock acquired by sleep command.",
+ "lockMode"_attr = modeName(dbMode));
opCtx->sleepFor(Milliseconds(millis));
return;
}
@@ -103,6 +109,9 @@ public:
"lockTarget is not a valid namespace",
NamespaceString::validCollectionComponent(ns));
Lock::CollectionLock collLock(opCtx, nss, mode, Date_t::max());
+ LOGV2(6001603,
+ "Collection lock acquired by sleep command.",
+ "lockMode"_attr = modeName(mode));
opCtx->sleepFor(Milliseconds(millis));
}
@@ -113,6 +122,14 @@ public:
pbwm.unlock();
}
+ void _sleepInRSTL(mongo::OperationContext* opCtx, long long millis) {
+ Lock::ResourceLock rstl(opCtx->lockState(), resourceIdReplicationStateTransitionLock);
+ rstl.lock(nullptr, MODE_X);
+ LOGV2(6001600, "RSTL MODE_X lock acquired by sleep command.");
+ opCtx->sleepFor(Milliseconds(millis));
+ rstl.unlock();
+ }
+
CmdSleep() : BasicCommand("sleep") {}
bool run(OperationContext* opCtx,
const std::string& ns,
@@ -171,6 +188,11 @@ public:
continue;
}
+ if (lockTarget == "RSTL") {
+ _sleepInRSTL(opCtx, msRemaining.count());
+ continue;
+ }
+
if (!cmdObj["lock"]) {
// The caller may specify either 'w' as true or false to take a global X lock or
// global S lock, respectively.
diff --git a/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp b/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp
index 58788e9439a..94cf1387ea3 100644
--- a/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp
+++ b/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp
@@ -58,26 +58,10 @@ public:
return builder.obj();
}
- Lock::GlobalLock lk(
- opCtx, LockMode::MODE_IS, Date_t::now(), Lock::InterruptBehavior::kLeaveUnlocked);
- if (!lk.isLocked()) {
- LOGV2_DEBUG(4822100, 2, "Failed to retrieve oplogTruncation statistics");
- return BSONObj();
- }
-
- AutoGetOplog oplogRead(opCtx, OplogAccessMode::kRead);
- const auto& oplog = oplogRead.getCollection();
- if (oplog) {
- const auto localDb =
- DatabaseHolder::get(opCtx)->getDb(opCtx, NamespaceString::kLocalDb);
- invariant(localDb);
- AutoStatsTracker statsTracker(
- opCtx,
- NamespaceString::kRsOplogNamespace,
- Top::LockType::ReadLocked,
- AutoStatsTracker::LogMode::kUpdateTop,
- CollectionCatalog::get(opCtx)->getDatabaseProfileLevel(NamespaceString::kLocalDb));
- oplog->getRecordStore()->getOplogTruncateStats(builder);
+ auto oplogCollection = CollectionCatalog::get(opCtx)->lookupCollectionByNamespaceForRead(
+ opCtx, NamespaceString::kRsOplogNamespace);
+ if (oplogCollection) {
+ oplogCollection->getRecordStore()->getOplogTruncateStats(builder);
}
return builder.obj();
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
index b05d7d0d552..89ca98b23fd 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
@@ -57,8 +57,12 @@ bool WiredTigerServerStatusSection::includeByDefault() const {
BSONObj WiredTigerServerStatusSection::generateSection(OperationContext* opCtx,
const BSONElement& configElement) const {
- Lock::GlobalLock lk(
- opCtx, LockMode::MODE_IS, Date_t::now(), Lock::InterruptBehavior::kLeaveUnlocked);
+ Lock::GlobalLock lk(opCtx,
+ LockMode::MODE_IS,
+ Date_t::now(),
+ Lock::InterruptBehavior::kLeaveUnlocked,
+ // Replication state change does not affect the following operation.
+ true /* skipRSTLLock */);
if (!lk.isLocked()) {
LOGV2_DEBUG(3088800, 2, "Failed to retrieve wiredTiger statistics");
return BSONObj();