summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Caimano <ben.caimano@mongodb.com>2019-09-17 13:14:26 +0000
committerevergreen <evergreen@mongodb.com>2019-09-17 13:14:26 +0000
commit149c8f62af00f9dead5d4046368b8a2c3974cfbe (patch)
tree8e6abd648d927172d23fd1eb017ad6dc3ecc6a2a
parent3897a749daab4f9e600af14205eba59775f9b278 (diff)
downloadmongo-149c8f62af00f9dead5d4046368b8a2c3974cfbe.tar.gz
SERVER-42595 Refactor failpoint in curop to fix jstest
-rw-r--r--jstests/core/currentop_waiting_for_latch.js90
-rw-r--r--jstests/libs/parallelTester.js3
-rw-r--r--src/mongo/db/curop.cpp21
-rw-r--r--src/mongo/db/pipeline/mongo_process_common.cpp8
-rw-r--r--src/mongo/platform/mutex.cpp10
-rw-r--r--src/mongo/platform/mutex.h8
-rw-r--r--src/mongo/util/diagnostic_info.cpp104
-rw-r--r--src/mongo/util/diagnostic_info.h23
8 files changed, 191 insertions, 76 deletions
diff --git a/jstests/core/currentop_waiting_for_latch.js b/jstests/core/currentop_waiting_for_latch.js
index 5f23b23c8e2..90b7e0fd478 100644
--- a/jstests/core/currentop_waiting_for_latch.js
+++ b/jstests/core/currentop_waiting_for_latch.js
@@ -10,35 +10,69 @@
const adminDB = db.getSiblingDB("admin");
const getCurrentOp = function() {
+ jsTestLog("Getting $currentOp");
+ let result =
+ adminDB
+ .aggregate(
+ [
+ {
+ $currentOp:
+ {allUsers: true, idleConnections: true, localOps: true, backtrace: true}
+ },
+ ],
+ {readConcern: {level: "local"}})
+ .toArray();
+ assert(result);
+ return result;
+};
+
+const blockedOpClientName = "DiagnosticCaptureTest";
+
+const getClientName = function() {
let myUri = adminDB.runCommand({whatsmyuri: 1}).you;
- return adminDB
- .aggregate(
- [
- {$currentOp: {localOps: true, allUsers: false, backtrace: true}},
- {$match: {client: myUri}}
- ],
- {readConcern: {level: "local"}})
- .toArray()[0];
+ return adminDB.aggregate([{$currentOp: {localOps: true}}, {$match: {client: myUri}}])
+ .toArray()[0]
+ .desc;
};
-assert.commandWorked(db.adminCommand(
- {"configureFailPoint": 'keepDiagnosticCaptureOnFailedLock', "mode": 'alwaysOn'}));
-var result = getCurrentOp();
-
-assert(result.hasOwnProperty("waitingForLatch"));
-assert(result["waitingForLatch"].hasOwnProperty("timestamp"));
-assert(result["waitingForLatch"].hasOwnProperty("captureName"));
-assert(result["waitingForLatch"].hasOwnProperty("backtrace"));
-result["waitingForLatch"]["backtrace"].forEach(function(obj) {
- assert(obj.hasOwnProperty("addr"));
- assert(typeof obj["addr"] === "string");
- assert(obj.hasOwnProperty("path"));
- assert(typeof obj["path"] === "string");
-});
-
-assert.commandWorked(
- db.adminCommand({"configureFailPoint": 'keepDiagnosticCaptureOnFailedLock', "mode": 'off'}));
-result = getCurrentOp();
-
-assert(!result.hasOwnProperty("waitingForLatch"));
+let clientName = getClientName();
+
+try {
+ assert.commandWorked(db.adminCommand({
+ "configureFailPoint": 'currentOpSpawnsThreadWaitingForLatch',
+ "mode": 'alwaysOn',
+ "data": {
+ 'clientName': clientName,
+ },
+ }));
+
+ let result = null;
+ getCurrentOp().forEach(function(op) {
+ jsTestLog(tojson(op));
+ if (op["desc"] == blockedOpClientName) {
+ result = op;
+ }
+ });
+ assert(result);
+ assert(result.hasOwnProperty("waitingForLatch"));
+ assert(result["waitingForLatch"].hasOwnProperty("timestamp"));
+ assert(result["waitingForLatch"].hasOwnProperty("captureName"));
+ assert(result["waitingForLatch"].hasOwnProperty("backtrace"));
+ result["waitingForLatch"]["backtrace"].forEach(function(frame) {
+ assert(frame.hasOwnProperty("addr"));
+ assert(typeof frame["addr"] === "string");
+ assert(frame.hasOwnProperty("path"));
+ assert(typeof frame["path"] === "string");
+ });
+} finally {
+ assert.commandWorked(db.adminCommand(
+ {"configureFailPoint": 'currentOpSpawnsThreadWaitingForLatch', "mode": 'off'}));
+
+ getCurrentOp().forEach(function(op) {
+ jsTestLog(tojson(op));
+ if (op["desc"] == blockedOpClientName) {
+ assert(!op.hasOwnProperty("waitingForLatch"));
+ }
+ });
+}
})();
diff --git a/jstests/libs/parallelTester.js b/jstests/libs/parallelTester.js
index 41ea256713e..489f8846637 100644
--- a/jstests/libs/parallelTester.js
+++ b/jstests/libs/parallelTester.js
@@ -204,9 +204,6 @@ if (typeof _threadInject != "undefined") {
// Destroys and recreates the catalog, which will interfere with other tests.
"restart_catalog.js",
- // Concurrently using keepDiagnosticCaptureOnFailedLock fails, will be fixed soon.
- "currentop_waiting_for_latch.js",
-
// This test works close to the BSON document limit for entries in the durable catalog,
// so running it in parallel with other tests will cause failures.
"long_collection_names.js",
diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp
index 6fcb94b2938..cef1fcd7449 100644
--- a/src/mongo/db/curop.cpp
+++ b/src/mongo/db/curop.cpp
@@ -48,7 +48,6 @@
#include "mongo/db/prepare_conflict_tracker.h"
#include "mongo/db/query/getmore_request.h"
#include "mongo/db/query/plan_summary_stats.h"
-#include "mongo/platform/mutex.h"
#include "mongo/rpc/metadata/client_metadata.h"
#include "mongo/rpc/metadata/client_metadata_ismaster.h"
#include "mongo/rpc/metadata/impersonated_user_metadata.h"
@@ -234,28 +233,12 @@ CurOp* CurOp::get(const OperationContext& opCtx) {
return _curopStack(opCtx).top();
}
-namespace {
-
-struct {
- Mutex mutex = Mutex("TestMutex"_sd, Seconds(1));
- stdx::unique_lock<Mutex> lock = stdx::unique_lock<Mutex>(mutex, stdx::defer_lock);
-} gHangLock;
-
-} // namespace
void CurOp::reportCurrentOpForClient(OperationContext* opCtx,
Client* client,
bool truncateOps,
bool backtraceMode,
BSONObjBuilder* infoBuilder) {
invariant(client);
- if (MONGO_unlikely(keepDiagnosticCaptureOnFailedLock.shouldFail())) {
- gHangLock.lock.lock();
- try {
- stdx::lock_guard testLock(gHangLock.mutex);
- } catch (const DBException& e) {
- log() << "Successfully caught " << e;
- }
- }
OperationContext* clientOpCtx = client->getOperationContext();
@@ -336,10 +319,6 @@ void CurOp::reportCurrentOpForClient(OperationContext* opCtx,
}
}
}
-
- if (MONGO_unlikely(keepDiagnosticCaptureOnFailedLock.shouldFail())) {
- gHangLock.lock.unlock();
- }
}
void CurOp::setGenericCursor_inlock(GenericCursor gc) {
diff --git a/src/mongo/db/pipeline/mongo_process_common.cpp b/src/mongo/db/pipeline/mongo_process_common.cpp
index 1c3a9166cfa..3d20d646258 100644
--- a/src/mongo/db/pipeline/mongo_process_common.cpp
+++ b/src/mongo/db/pipeline/mongo_process_common.cpp
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
+
#include "mongo/platform/basic.h"
#include "mongo/db/pipeline/mongo_process_common.h"
@@ -39,8 +41,12 @@
#include "mongo/db/operation_context.h"
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/service_context.h"
+#include "mongo/platform/atomic_word.h"
+#include "mongo/platform/mutex.h"
#include "mongo/s/catalog_cache.h"
#include "mongo/s/grid.h"
+#include "mongo/util/diagnostic_info.h"
+#include "mongo/util/log.h"
#include "mongo/util/net/socket_utils.h"
namespace mongo {
@@ -58,6 +64,8 @@ std::vector<BSONObj> MongoProcessCommon::getCurrentOps(
std::vector<BSONObj> ops;
+ auto blockedOpGuard = DiagnosticInfo::maybeMakeBlockedOpForTest(opCtx->getClient());
+
for (ServiceContext::LockedClientsCursor cursor(opCtx->getClient()->getServiceContext());
Client* client = cursor.next();) {
invariant(client);
diff --git a/src/mongo/platform/mutex.cpp b/src/mongo/platform/mutex.cpp
index 47dd212c92c..49ececfd25e 100644
--- a/src/mongo/platform/mutex.cpp
+++ b/src/mongo/platform/mutex.cpp
@@ -43,17 +43,11 @@ void Mutex::lock() {
if (gLockActions) {
gLockActions->onContendedLock(_name);
}
- hasLock = _mutex.try_lock_for(_lockTimeout.toSystemDuration() -
- kContendedLockTimeout.toSystemDuration());
- if (gLockActions && !hasLock) {
- gLockActions->onFailedLock();
- }
- uassert(
- ErrorCodes::InternalError, "Unable to take latch, wait time exceeds set timeout", hasLock);
+ _mutex.lock();
}
void Mutex::unlock() {
if (gLockActions) {
- gLockActions->onUnlock();
+ gLockActions->onUnlock(_name);
}
_mutex.unlock();
}
diff --git a/src/mongo/platform/mutex.h b/src/mongo/platform/mutex.h
index 4ba2a7b7737..ded61a90783 100644
--- a/src/mongo/platform/mutex.h
+++ b/src/mongo/platform/mutex.h
@@ -42,20 +42,18 @@ class LockActions {
public:
virtual ~LockActions() = default;
virtual void onContendedLock(const StringData& name) = 0;
- virtual void onUnlock() = 0;
- virtual void onFailedLock() = 0;
+ virtual void onUnlock(const StringData& name) = 0;
};
class Mutex {
public:
static constexpr auto kAnonymousMutexStr = "AnonymousMutex"_sd;
+ static constexpr Milliseconds kContendedLockTimeout = Milliseconds(100);
Mutex() : Mutex(kAnonymousMutexStr) {}
// Note that StringData is a view type, thus the underlying string for _name must outlive any
// given Mutex
explicit Mutex(const StringData& name) : _name(name) {}
- explicit Mutex(const StringData& name, Seconds lockTimeout)
- : _name(name), _lockTimeout(lockTimeout) {}
void lock();
void unlock();
@@ -68,8 +66,6 @@ public:
private:
const StringData _name;
- const Seconds _lockTimeout = Seconds(60);
- static constexpr Milliseconds kContendedLockTimeout = Milliseconds(100);
stdx::timed_mutex _mutex;
};
diff --git a/src/mongo/util/diagnostic_info.cpp b/src/mongo/util/diagnostic_info.cpp
index a05dcb8a3f6..9681414e797 100644
--- a/src/mongo/util/diagnostic_info.cpp
+++ b/src/mongo/util/diagnostic_info.cpp
@@ -54,9 +54,79 @@ using namespace fmt::literals;
namespace mongo {
// Maximum number of stack frames to appear in a backtrace.
const unsigned int kMaxBackTraceFrames = 100;
-MONGO_FAIL_POINT_DEFINE(keepDiagnosticCaptureOnFailedLock);
namespace {
+MONGO_FAIL_POINT_DEFINE(currentOpSpawnsThreadWaitingForLatch);
+
+constexpr auto kBlockedOpMutexName = "BlockedOpForTest"_sd;
+
+class BlockedOp {
+public:
+ void start(ServiceContext* serviceContext);
+ void join();
+ void setIsContended(bool value);
+
+private:
+ Mutex _testMutex{kBlockedOpMutexName};
+
+ stdx::condition_variable _cv;
+ stdx::mutex _m;
+
+ struct State {
+ bool isContended = false;
+ boost::optional<stdx::thread> thread{boost::none};
+ };
+ State _state;
+} gBlockedOp;
+
+// This function causes us to make an additional thread with a self-contended lock so that
+// $currentOp can observe its DiagnosticInfo. Note that we track each thread that called us so that
+// we can join the thread when they are gone.
+void BlockedOp::start(ServiceContext* serviceContext) {
+ stdx::unique_lock<stdx::mutex> lk(_m);
+
+ invariant(!_state.thread);
+
+ _testMutex.lock();
+ _state.thread = stdx::thread([this, serviceContext]() mutable {
+ ThreadClient tc("DiagnosticCaptureTest", serviceContext);
+
+ log() << "Entered currentOpSpawnsThreadWaitingForLatch thread";
+
+ stdx::lock_guard testLock(_testMutex);
+
+ log() << "Joining currentOpSpawnsThreadWaitingForLatch thread";
+ });
+
+ _cv.wait(lk, [this] { return _state.isContended; });
+ log() << "Started thread for currentOpSpawnsThreadWaitingForLatch";
+}
+
+// This function unlocks testMutex and joins if there are no more callers of BlockedOp::start()
+// remaining
+void BlockedOp::join() {
+ auto thread = [&] {
+ stdx::lock_guard<stdx::mutex> lk(_m);
+
+ invariant(_state.thread);
+
+ _testMutex.unlock();
+
+ _state.isContended = false;
+ _cv.notify_one();
+
+ return std::exchange(_state.thread, boost::none);
+ }();
+ thread->join();
+}
+
+void BlockedOp::setIsContended(bool value) {
+ log() << "Setting isContended to " << (value ? "true" : "false");
+ stdx::lock_guard lk(_m);
+ _state.isContended = value;
+ _cv.notify_one();
+}
+
const auto gDiagnosticHandle = Client::declareDecoration<DiagnosticInfo::Diagnostic>();
MONGO_INITIALIZER(LockActions)(InitializerContext* context) {
@@ -68,15 +138,15 @@ MONGO_INITIALIZER(LockActions)(InitializerContext* context) {
Client::getCurrent(),
std::make_shared<DiagnosticInfo>(takeDiagnosticInfo(name)));
}
+
+ if (currentOpSpawnsThreadWaitingForLatch.shouldFail() &&
+ (name == kBlockedOpMutexName)) {
+ gBlockedOp.setIsContended(true);
+ }
}
- void onUnlock() override {
+ void onUnlock(const StringData&) override {
DiagnosticInfo::Diagnostic::clearDiagnostic();
}
- void onFailedLock() override {
- if (!MONGO_unlikely(keepDiagnosticCaptureOnFailedLock.shouldFail())) {
- DiagnosticInfo::Diagnostic::clearDiagnostic();
- }
- }
};
std::unique_ptr<LockActions> mutexPointer = std::make_unique<LockActionsSubclass>();
@@ -275,4 +345,24 @@ DiagnosticInfo takeDiagnosticInfo(const StringData& captureName) {
captureName,
getBacktraceAddresses());
}
+
+DiagnosticInfo::BlockedOpGuard::~BlockedOpGuard() {
+ gBlockedOp.join();
+}
+
+auto DiagnosticInfo::maybeMakeBlockedOpForTest(Client* client) -> std::unique_ptr<BlockedOpGuard> {
+ std::unique_ptr<BlockedOpGuard> guard;
+ currentOpSpawnsThreadWaitingForLatch.executeIf(
+ [&](const BSONObj&) {
+ gBlockedOp.start(client->getServiceContext());
+ guard = std::make_unique<BlockedOpGuard>();
+ },
+ [&](const BSONObj& data) {
+ return data.hasField("clientName") &&
+ (data.getStringField("clientName") == client->desc());
+ });
+
+ return guard;
+}
+
} // namespace mongo
diff --git a/src/mongo/util/diagnostic_info.h b/src/mongo/util/diagnostic_info.h
index 0a4958f35de..a2c3339d3a5 100644
--- a/src/mongo/util/diagnostic_info.h
+++ b/src/mongo/util/diagnostic_info.h
@@ -30,19 +30,29 @@
#pragma once
#include "mongo/base/string_data.h"
-#include "mongo/db/service_context.h"
-#include "mongo/stdx/mutex.h"
+#include "mongo/db/client.h"
+#include "mongo/platform/condition_variable.h"
+#include "mongo/platform/mutex.h"
#include "mongo/util/fail_point_service.h"
#include "mongo/util/time_support.h"
namespace mongo {
-extern FailPoint keepDiagnosticCaptureOnFailedLock;
/**
* DiagnosticInfo keeps track of diagnostic information such as a developer provided
* name, the time when a lock was first acquired, and a partial caller call stack.
*/
class DiagnosticInfo {
public:
+ /**
+ * A simple RAII guard to attempt to join a blocked op once it is no longer needed
+ *
+ * This type is used in tests in conjunction with maybeMakeBlockedOpForTest
+ */
+ class BlockedOpGuard {
+ public:
+ ~BlockedOpGuard();
+ };
+
struct Diagnostic {
static std::shared_ptr<DiagnosticInfo> get(Client*);
static void set(Client*, std::shared_ptr<DiagnosticInfo>);
@@ -92,6 +102,12 @@ public:
std::string toString() const;
friend DiagnosticInfo takeDiagnosticInfo(const StringData& captureName);
+ /**
+ * This function checks the FailPoint currentOpSpawnsThreadWaitingForLatch and potentially
+ * launches a blocked operation to populate waitingForLatch for $currentOp.
+ */
+ static std::unique_ptr<BlockedOpGuard> maybeMakeBlockedOpForTest(Client* client);
+
private:
friend bool operator==(const DiagnosticInfo& info1, const DiagnosticInfo& info2);
friend bool operator!=(const DiagnosticInfo& info1, const DiagnosticInfo& info2) {
@@ -124,4 +140,5 @@ inline std::ostream& operator<<(std::ostream& s, const DiagnosticInfo& info) {
* Captures the diagnostic information based on the caller's context.
*/
DiagnosticInfo takeDiagnosticInfo(const StringData& captureName);
+
} // namespace mongo