summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBlake Oler <blake.oler@mongodb.com>2022-11-28 16:16:52 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-28 18:09:26 +0000
commitf0b32c045f0f7309b26dedd4813e5f0a0658dca6 (patch)
treefa23a86eb867d516f7f79926c6aa53ee92359460
parent73b75eee65455053e5662ce528c1a1e5a119c147 (diff)
downloadmongo-f0b32c045f0f7309b26dedd4813e5f0a0658dca6.tar.gz
SERVER-71029 Update slow SessionWorkflow log criteria
-rw-r--r--jstests/noPassthrough/slow_session_workflow_log.js59
-rw-r--r--src/mongo/embedded/service_entry_point_embedded.cpp4
-rw-r--r--src/mongo/embedded/service_entry_point_embedded.h1
-rw-r--r--src/mongo/transport/service_entry_point.h8
-rw-r--r--src/mongo/transport/service_entry_point_impl.cpp4
-rw-r--r--src/mongo/transport/service_entry_point_impl.h10
-rw-r--r--src/mongo/transport/session_workflow.cpp62
-rw-r--r--src/mongo/transport/transport_layer_asio_test.cpp8
-rw-r--r--src/mongo/util/net/ssl_manager_test.cpp4
9 files changed, 96 insertions, 64 deletions
diff --git a/jstests/noPassthrough/slow_session_workflow_log.js b/jstests/noPassthrough/slow_session_workflow_log.js
index ddaf41ae5a1..aa4c131ffd2 100644
--- a/jstests/noPassthrough/slow_session_workflow_log.js
+++ b/jstests/noPassthrough/slow_session_workflow_log.js
@@ -12,14 +12,7 @@ load("jstests/libs/fail_point_util.js");
load("jstests/libs/log.js"); // For findMatchingLogLine
const expectedLogId = 6983000;
-const sleepMillisInQueryFilter = 200;
-const sleepMillisBetweenQueries = 100;
-
-// This specific test doesn't care about precision in the times reported by the server. Since the
-// server may be doing some extra work before listening for another message, and since the server
-// and this test may vary in the precision by which they convert ticks to milliseconds, we allow
-// some error when comparing local times to server times.
-const errorAllowance = 0.9;
+const sleepMillisInSendResponse = 200;
const expectedFields = [
"totalMillis",
@@ -51,27 +44,18 @@ function runTest(conn) {
assert.commandWorked(assert.commandWorked(coll.insert({_id: 1})));
// In order to find the new log lines, a baseline needs to be established.
- let slowSessionWorkflowCount = getSlowLogCount(conn);
-
- // Do a query that we would expect to be fast. Expect no new slow SessionWorkflows are logged.
- let count = coll.find({}).toArray();
- assert.eq(count.length, 1, "expected 1 document");
- let prevSlowSessionWorkflowCount = slowSessionWorkflowCount;
- slowSessionWorkflowCount = getSlowLogCount(conn);
- assert.eq(slowSessionWorkflowCount,
- prevSlowSessionWorkflowCount,
- "There should not be a slow SessionWorkflow log at this point but one was found.");
+ const prevSlowSessionWorkflowCount = getSlowLogCount(conn);
// Wait, then do a query beyond the 100ms threshold. Make sure the slow loop log line exists.
- sleep(sleepMillisBetweenQueries);
- coll.find({$where: 'function() { sleep(' + sleepMillisInQueryFilter + '); return true; }'})
- .toArray();
+ const fp = configureFailPoint(
+ conn, "sessionWorkflowDelaySendMessage", {millis: sleepMillisInSendResponse});
+ coll.find().toArray();
+ fp.off();
let logAndCount = getSlowLogAndCount(conn);
- prevSlowSessionWorkflowCount = slowSessionWorkflowCount;
- slowSessionWorkflowCount = logAndCount.count;
- assert.eq(slowSessionWorkflowCount,
- prevSlowSessionWorkflowCount + 1,
- "Expected to find a slow SessionWorkflow log.");
+ const slowSessionWorkflowCount = logAndCount.count;
+ assert.gt(slowSessionWorkflowCount,
+ prevSlowSessionWorkflowCount,
+ "Expected to find at least one slow SessionWorkflow log.");
// Do some sanity checks over the actual contents of the log.
const slowLoopObj = JSON.parse(logAndCount.log);
@@ -81,26 +65,11 @@ function runTest(conn) {
assert(expectedField in elapsedObj,
"Expected to find field but couldn't: " + expectedField);
});
- let totalElapsed = elapsedObj.totalMillis;
- let activeElapsed = elapsedObj.activeMillis;
- let sourceWorkElapsed = elapsedObj.receiveWorkMillis;
- let processWorkElapsed = elapsedObj.processWorkMillis;
- assert.gte(
- sourceWorkElapsed,
- sleepMillisBetweenQueries * errorAllowance,
- "The time reported sourcing a message didn't include the time sleeping between queries.");
- assert.gte(processWorkElapsed,
- sleepMillisInQueryFilter,
- "The time reported processing work didn't include the sleep in the find filter.");
+ const sendResponseElapsed = elapsedObj.sendResponseMillis;
- // When comparing server time to another server time, there is no reason to expect error.
- assert.gte(activeElapsed,
- processWorkElapsed,
- "The time reported as active time didn't include the time processing work.");
- assert.gte(
- totalElapsed,
- sourceWorkElapsed + activeElapsed,
- "The total time reported didn't include the sum of active time and message sourcing time.");
+ assert.gte(sendResponseElapsed,
+ sleepMillisInSendResponse,
+ "The time reported sending a response didn't include the sleep in the failpoint.");
}
// Test standalone.
diff --git a/src/mongo/embedded/service_entry_point_embedded.cpp b/src/mongo/embedded/service_entry_point_embedded.cpp
index c1f3ad842f1..2dfb0ba9f2a 100644
--- a/src/mongo/embedded/service_entry_point_embedded.cpp
+++ b/src/mongo/embedded/service_entry_point_embedded.cpp
@@ -187,4 +187,8 @@ size_t ServiceEntryPointEmbedded::numOpenSessions() const {
UASSERT_NOT_IMPLEMENTED;
}
+logv2::LogSeverity ServiceEntryPointEmbedded::slowSessionWorkflowLogSeverity() {
+ UASSERT_NOT_IMPLEMENTED;
+}
+
} // namespace mongo
diff --git a/src/mongo/embedded/service_entry_point_embedded.h b/src/mongo/embedded/service_entry_point_embedded.h
index bfa06dd672c..e4aeca09c5e 100644
--- a/src/mongo/embedded/service_entry_point_embedded.h
+++ b/src/mongo/embedded/service_entry_point_embedded.h
@@ -51,6 +51,7 @@ public:
bool shutdown(Milliseconds timeout) override;
void appendStats(BSONObjBuilder* bob) const override;
size_t numOpenSessions() const override;
+ logv2::LogSeverity slowSessionWorkflowLogSeverity() override;
private:
class Hooks;
diff --git a/src/mongo/transport/service_entry_point.h b/src/mongo/transport/service_entry_point.h
index 8852f869a41..dc008cac720 100644
--- a/src/mongo/transport/service_entry_point.h
+++ b/src/mongo/transport/service_entry_point.h
@@ -34,6 +34,7 @@
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/client.h"
#include "mongo/db/dbmessage.h"
+#include "mongo/logv2/log_severity.h"
#include "mongo/transport/session.h"
#include "mongo/util/future.h"
@@ -91,6 +92,13 @@ public:
}
/**
+ * Returns the current severity to log slow SessionWorkflow lifecycles. Returns a suppressed
+ * severity every X seconds (defined in SEP::impl) to prevent logs from overwhelming the rest
+ * of the system.
+ */
+ virtual logv2::LogSeverity slowSessionWorkflowLogSeverity() = 0;
+
+ /**
* Processes a request and fills out a DbResponse.
*/
virtual Future<DbResponse> handleRequest(OperationContext* opCtx,
diff --git a/src/mongo/transport/service_entry_point_impl.cpp b/src/mongo/transport/service_entry_point_impl.cpp
index bffdb51d86e..0d98e57d7c9 100644
--- a/src/mongo/transport/service_entry_point_impl.cpp
+++ b/src/mongo/transport/service_entry_point_impl.cpp
@@ -389,6 +389,10 @@ size_t ServiceEntryPointImpl::maxOpenSessions() const {
return _maxSessions;
}
+logv2::LogSeverity ServiceEntryPointImpl::slowSessionWorkflowLogSeverity() {
+ return _slowSessionWorkflowLogSuppressor();
+}
+
bool ServiceEntryPointImpl::shutdownAndWait(Milliseconds timeout) {
auto deadline = _svcCtx->getPreciseClockSource()->now() + timeout;
diff --git a/src/mongo/transport/service_entry_point_impl.h b/src/mongo/transport/service_entry_point_impl.h
index 683ad266fa3..405af8c1bc0 100644
--- a/src/mongo/transport/service_entry_point_impl.h
+++ b/src/mongo/transport/service_entry_point_impl.h
@@ -36,6 +36,7 @@
#include "mongo/base/status.h"
#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/logv2/log_severity_suppressor.h"
#include "mongo/stdx/variant.h"
#include "mongo/transport/service_entry_point.h"
#include "mongo/transport/session.h"
@@ -54,6 +55,8 @@ class ServiceContext;
*/
class ServiceEntryPointImpl : public ServiceEntryPoint {
public:
+ static constexpr Seconds kSlowSessionWorkflowLogSuppresionPeriod{5};
+
explicit ServiceEntryPointImpl(ServiceContext* svcCtx);
~ServiceEntryPointImpl();
@@ -76,6 +79,8 @@ public:
size_t maxOpenSessions() const final;
+ logv2::LogSeverity slowSessionWorkflowLogSeverity() final;
+
void onClientDisconnect(Client* client) final;
/** `onClientDisconnect` calls this before doing anything else. */
@@ -95,6 +100,11 @@ private:
size_t _rejectedSessions;
std::unique_ptr<Sessions> _sessions;
+
+ logv2::SeveritySuppressor _slowSessionWorkflowLogSuppressor{
+ kSlowSessionWorkflowLogSuppresionPeriod,
+ logv2::LogSeverity::Info(),
+ logv2::LogSeverity::Debug(2)};
};
/*
diff --git a/src/mongo/transport/session_workflow.cpp b/src/mongo/transport/session_workflow.cpp
index 41d40b03dad..080d1565912 100644
--- a/src/mongo/transport/session_workflow.cpp
+++ b/src/mongo/transport/session_workflow.cpp
@@ -83,6 +83,7 @@ namespace metrics_detail {
X(receivedWork) \
X(processedWork) \
X(sentResponse) \
+ X(yielded) \
X(done) \
/**/
@@ -97,13 +98,15 @@ namespace metrics_detail {
* | [receivedWork]
* | | [processedWork]
* | | | [sentResponse]
- * | | | | [done]
- * |<------------->| total
- * | |<--------->| active
- * |<->| | | | receivedWork
- * | |<->| | | processWork
- * | | |<->| | sendResponse
- * | | | |<->| finalize
+ * | | | | [yielded]
+ * | | | | | [done]
+ * |<----------------->| total
+ * | |<------------->| active
+ * |<->| | | | | receivedWork
+ * | |<->| | | | processWork
+ * | | |<->| | | sendResponse
+ * | | | |<->| | yield
+ * | | | | |<->| finalize
*/
#define EXPAND_INTERVAL_IDS(X) \
X(total, started, done) \
@@ -111,7 +114,8 @@ namespace metrics_detail {
X(receiveWork, started, receivedWork) \
X(processWork, receivedWork, processedWork) \
X(sendResponse, processedWork, sentResponse) \
- X(finalize, sentResponse, done) \
+ X(yield, sentResponse, yielded) \
+ X(finalize, yielded, done) \
/**/
#define X_ID(id, ...) id,
@@ -165,6 +169,8 @@ struct SplitTimerPolicy {
static constexpr size_t numTimeSplitIds = enumExtent<TimeSplitIdType>;
static constexpr size_t numIntervalIds = enumExtent<IntervalIdType>;
+ explicit SplitTimerPolicy(ServiceEntryPoint* sep) : _sep(sep) {}
+
template <typename E>
static constexpr size_t toIdx(E e) {
return static_cast<size_t>(e);
@@ -192,23 +198,35 @@ struct SplitTimerPolicy {
void onFinish(SplitTimer<SplitTimerPolicy>* splitTimer) {
splitTimer->notify(TimeSplitIdType::done);
- auto t = splitTimer->getSplitInterval(IntervalIdType::active);
+ auto t = splitTimer->getSplitInterval(IntervalIdType::sendResponse);
if (MONGO_likely(!t || *t < Milliseconds{serverGlobalParams.slowMS.load()}))
return;
BSONObjBuilder bob;
splitTimer->appendIntervals(bob);
- LOGV2(6983000, "Slow SessionWorkflow loop", "elapsed"_attr = bob.obj());
+
+ logv2::LogSeverity severity = sessionWorkflowDelaySendMessage.shouldFail()
+ ? logv2::LogSeverity::Info()
+ : _sep->slowSessionWorkflowLogSeverity();
+
+ LOGV2_DEBUG(6983000,
+ severity.toInt(),
+ "Slow network response send time",
+ "elapsed"_attr = bob.obj());
}
Timer makeTimer() {
return Timer{};
}
+
+ ServiceEntryPoint* _sep;
};
class SessionWorkflowMetrics {
public:
+ explicit SessionWorkflowMetrics(ServiceEntryPoint* sep) : _sep(sep) {}
+
void start() {
- _t.emplace();
+ _t.emplace(SplitTimerPolicy{_sep});
}
void received() {
_t->notify(TimeSplitId::receivedWork);
@@ -222,11 +240,15 @@ public:
duration_cast<Milliseconds>(*_t->getSplitInterval(IntervalId::processWork)),
duration_cast<Milliseconds>(*_t->getSplitInterval(IntervalId::sendResponse)));
}
+ void yielded() {
+ _t->notify(TimeSplitId::yielded);
+ }
void finish() {
_t.reset();
}
private:
+ ServiceEntryPoint* _sep;
boost::optional<SplitTimer<SplitTimerPolicy>> _t;
};
} // namespace metrics_detail
@@ -425,7 +447,7 @@ private:
SessionWorkflow* const _workflow;
ServiceContext* const _serviceContext;
- ServiceEntryPoint* const _sep;
+ ServiceEntryPoint* _sep;
RunnerAndSource _taskRunner;
AtomicWord<bool> _isTerminated{false};
@@ -434,7 +456,7 @@ private:
std::unique_ptr<WorkItem> _work;
std::unique_ptr<WorkItem> _nextWork; /**< created by exhaust responses */
- metrics_detail::SessionWorkflowMetrics _metrics;
+ metrics_detail::SessionWorkflowMetrics _metrics{_sep};
};
class SessionWorkflow::Impl::WorkItem {
@@ -583,12 +605,6 @@ void SessionWorkflow::Impl::sendMessage() {
"connectionId"_attr = session()->id());
uassertStatusOK(status);
}
-
- // Performance testing showed a significant benefit from yielding here.
- // TODO SERVER-57531: Once we enable the use of a fixed-size thread pool
- // for handling client connection handshaking, we should only yield here if
- // we're on a dedicated thread.
- executor()->yieldIfAppropriate();
}
Future<void> SessionWorkflow::Impl::processMessage() {
@@ -707,6 +723,14 @@ void SessionWorkflow::Impl::startNewLoop(const Status& executorStatus) {
if (_work->hasOut()) {
sendMessage();
_metrics.sent(*session());
+
+ // Performance testing showed a significant benefit from yielding here.
+ // TODO SERVER-57531: Once we enable the use of a fixed-size thread pool
+ // for handling client connection handshaking, we should only yield here if
+ // we're on a dedicated thread.
+ executor()->yieldIfAppropriate();
+
+ _metrics.yielded();
}
})
.getAsync([this, anchor = shared_from_this()](Status status) {
diff --git a/src/mongo/transport/transport_layer_asio_test.cpp b/src/mongo/transport/transport_layer_asio_test.cpp
index 11a128e35ec..2b30b0431f1 100644
--- a/src/mongo/transport/transport_layer_asio_test.cpp
+++ b/src/mongo/transport/transport_layer_asio_test.cpp
@@ -269,6 +269,10 @@ public:
return _sessions->size();
}
+ logv2::LogSeverity slowSessionWorkflowLogSeverity() override {
+ MONGO_UNIMPLEMENTED;
+ }
+
void setOnStartSession(std::function<void(SessionThread&)> cb) {
_onStartSession = std::move(cb);
}
@@ -867,6 +871,10 @@ class BatonASIOLinuxTest : public LockerNoopServiceContextTest {
MONGO_UNREACHABLE;
}
+ logv2::LogSeverity slowSessionWorkflowLogSeverity() override {
+ MONGO_UNIMPLEMENTED;
+ }
+
private:
Promise<transport::SessionHandle> _promise;
};
diff --git a/src/mongo/util/net/ssl_manager_test.cpp b/src/mongo/util/net/ssl_manager_test.cpp
index a33d4571b14..c44fbfd4d2a 100644
--- a/src/mongo/util/net/ssl_manager_test.cpp
+++ b/src/mongo/util/net/ssl_manager_test.cpp
@@ -94,6 +94,10 @@ public:
MONGO_UNREACHABLE;
}
+ logv2::LogSeverity slowSessionWorkflowLogSeverity() override {
+ MONGO_UNIMPLEMENTED;
+ }
+
void setTransportLayer(transport::TransportLayer* tl) {
_transport = tl;
}