diff options
author | Jordi Olivares Provencio <jordi.olivares-provencio@mongodb.com> | 2022-11-03 16:39:30 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-03 17:08:12 +0000 |
commit | 9709ef3d8cbb715196c5609339b06793a4551886 (patch) | |
tree | c4c65e074c715259ca945aaad9c9c884f86ba0ba | |
parent | f71c00d795f7744de3a899a76d6cbbc61fe01a25 (diff) | |
download | mongo-9709ef3d8cbb715196c5609339b06793a4551886.tar.gz |
SERVER-60016 serverStatus should not block on the RSTL lock with the WT storage engine
4 files changed, 104 insertions, 22 deletions
diff --git a/jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js b/jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js new file mode 100644 index 00000000000..40eb22187fb --- /dev/null +++ b/jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js @@ -0,0 +1,72 @@ +/** + * Tests that serverStatus is not blocked by an exclusive RSTL lock. Only enforcing on WT. + * + * @tags: [ + * # Certain serverStatus sections might pivot to taking the RSTL lock if an action is unsupported + * # by a non-WT storage engine. + * requires_wiredtiger, + * # Replication requires journaling support so this tag also implies exclusion from --nojournal + * # test configurations. + * requires_sharding, + * requires_replication, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/parallel_shell_helpers.js"); // startParallelShell +load("jstests/libs/wait_for_command.js"); // waitForCommand + +// Use a sharding environment in order to exercise the sharding specific serverStatus sections. +const st = new ShardingTest( + {mongos: 1, config: 1, shards: 1, rs: {nodes: 1, setParameter: {watchdogPeriodSeconds: 60}}}); +const testDB = st.rs0.getPrimary().getDB("test"); + +jsTestLog("Starting the sleep command in a parallel thread to take the RSTL MODE_X lock"); +let rstlXLockSleepJoin = startParallelShell(() => { + jsTestLog("Parallel Shell: about to start sleep command"); + assert.commandFailedWithCode(db.adminCommand({ + sleep: 1, + secs: 60 * 60, + // RSTL MODE_X lock. + lockTarget: "RSTL", + $comment: "RSTL lock sleep" + }), + ErrorCodes.Interrupted); +}, testDB.getMongo().port); + +jsTestLog("Waiting for the sleep command to start and fetch the opID"); +const sleepCmdOpID = + waitForCommand("RSTL lock", op => (op["command"]["$comment"] == "RSTL lock sleep"), testDB); + +jsTestLog("Wait for the sleep command to log that the RSTL MODE_X lock was acquired"); +checkLog.containsJson(testDB, 6001600); + +try { + jsTestLog("Running serverStatus concurrently with the RSTL X lock held by the sleep cmd"); + const serverStatusResult = assert.commandWorked(testDB.adminCommand({ + serverStatus: 1, + repl: 1, + mirroredReads: 1, + advisoryHostFQDNs: 1, + defaultRWConcern: 1, + heapProfile: 1, + http_client: 1, + latchAnalysis: 1, + opWriteConcernCounters: 1, + oplog: 1, + resourceConsumption: 1, + sharding: 1, + tenantMigrationAccessBlocker: 1, + watchdog: 1, + maxTimeMS: 20 * 1000 + })); + jsTestLog("ServerStatus results: " + tojson(serverStatusResult)); +} finally { + jsTestLog("Ensure the sleep cmd releases the lock so that the server can shutdown"); + assert.commandWorked(testDB.killOp(sleepCmdOpID)); // kill the sleep cmd + rstlXLockSleepJoin(); // wait for the thread running the sleep cmd to finish +} + +st.stop(); +})(); diff --git a/src/mongo/db/commands/sleep_command.cpp b/src/mongo/db/commands/sleep_command.cpp index 625dab44b37..8475eaa75d6 100644 --- a/src/mongo/db/commands/sleep_command.cpp +++ b/src/mongo/db/commands/sleep_command.cpp @@ -78,6 +78,9 @@ public: const StringData& ns) { if (ns.empty()) { Lock::GlobalLock lk(opCtx, mode, Date_t::max(), Lock::InterruptBehavior::kThrow); + LOGV2(6001601, + "Global lock acquired by sleep command.", + "lockMode"_attr = modeName(mode)); opCtx->sleepFor(Milliseconds(millis)); return; } @@ -94,6 +97,9 @@ public: Lock::DBLock dbLock(opCtx, nss.db(), dbMode, Date_t::max()); if (nsIsDbOnly(ns)) { + LOGV2(6001602, + "Database lock acquired by sleep command.", + "lockMode"_attr = modeName(dbMode)); opCtx->sleepFor(Milliseconds(millis)); return; } @@ -103,6 +109,9 @@ public: "lockTarget is not a valid namespace", NamespaceString::validCollectionComponent(ns)); Lock::CollectionLock collLock(opCtx, nss, mode, Date_t::max()); + LOGV2(6001603, + "Collection lock acquired by sleep command.", + "lockMode"_attr = modeName(mode)); opCtx->sleepFor(Milliseconds(millis)); } @@ -113,6 +122,14 @@ public: pbwm.unlock(); } + void _sleepInRSTL(mongo::OperationContext* opCtx, long long millis) { + Lock::ResourceLock rstl(opCtx->lockState(), resourceIdReplicationStateTransitionLock); + rstl.lock(nullptr, MODE_X); + LOGV2(6001600, "RSTL MODE_X lock acquired by sleep command."); + opCtx->sleepFor(Milliseconds(millis)); + rstl.unlock(); + } + CmdSleep() : BasicCommand("sleep") {} bool run(OperationContext* opCtx, const std::string& ns, @@ -171,6 +188,11 @@ public: continue; } + if (lockTarget == "RSTL") { + _sleepInRSTL(opCtx, msRemaining.count()); + continue; + } + if (!cmdObj["lock"]) { // The caller may specify either 'w' as true or false to take a global X lock or // global S lock, respectively. diff --git a/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp b/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp index 58788e9439a..94cf1387ea3 100644 --- a/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp +++ b/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp @@ -58,26 +58,10 @@ public: return builder.obj(); } - Lock::GlobalLock lk( - opCtx, LockMode::MODE_IS, Date_t::now(), Lock::InterruptBehavior::kLeaveUnlocked); - if (!lk.isLocked()) { - LOGV2_DEBUG(4822100, 2, "Failed to retrieve oplogTruncation statistics"); - return BSONObj(); - } - - AutoGetOplog oplogRead(opCtx, OplogAccessMode::kRead); - const auto& oplog = oplogRead.getCollection(); - if (oplog) { - const auto localDb = - DatabaseHolder::get(opCtx)->getDb(opCtx, NamespaceString::kLocalDb); - invariant(localDb); - AutoStatsTracker statsTracker( - opCtx, - NamespaceString::kRsOplogNamespace, - Top::LockType::ReadLocked, - AutoStatsTracker::LogMode::kUpdateTop, - CollectionCatalog::get(opCtx)->getDatabaseProfileLevel(NamespaceString::kLocalDb)); - oplog->getRecordStore()->getOplogTruncateStats(builder); + auto oplogCollection = CollectionCatalog::get(opCtx)->lookupCollectionByNamespaceForRead( + opCtx, NamespaceString::kRsOplogNamespace); + if (oplogCollection) { + oplogCollection->getRecordStore()->getOplogTruncateStats(builder); } return builder.obj(); } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp index b05d7d0d552..89ca98b23fd 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp @@ -57,8 +57,12 @@ bool WiredTigerServerStatusSection::includeByDefault() const { BSONObj WiredTigerServerStatusSection::generateSection(OperationContext* opCtx, const BSONElement& configElement) const { - Lock::GlobalLock lk( - opCtx, LockMode::MODE_IS, Date_t::now(), Lock::InterruptBehavior::kLeaveUnlocked); + Lock::GlobalLock lk(opCtx, + LockMode::MODE_IS, + Date_t::now(), + Lock::InterruptBehavior::kLeaveUnlocked, + // Replication state change does not affect the following operation. + true /* skipRSTLLock */); if (!lk.isLocked()) { LOGV2_DEBUG(3088800, 2, "Failed to retrieve wiredTiger statistics"); return BSONObj(); |