diff options
author | Blake Oler <blake.oler@mongodb.com> | 2022-11-28 16:16:52 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-28 18:09:26 +0000 |
commit | f0b32c045f0f7309b26dedd4813e5f0a0658dca6 (patch) | |
tree | fa23a86eb867d516f7f79926c6aa53ee92359460 | |
parent | 73b75eee65455053e5662ce528c1a1e5a119c147 (diff) | |
download | mongo-f0b32c045f0f7309b26dedd4813e5f0a0658dca6.tar.gz |
SERVER-71029 Update slow SessionWorkflow log criteria
-rw-r--r-- | jstests/noPassthrough/slow_session_workflow_log.js | 59 | ||||
-rw-r--r-- | src/mongo/embedded/service_entry_point_embedded.cpp | 4 | ||||
-rw-r--r-- | src/mongo/embedded/service_entry_point_embedded.h | 1 | ||||
-rw-r--r-- | src/mongo/transport/service_entry_point.h | 8 | ||||
-rw-r--r-- | src/mongo/transport/service_entry_point_impl.cpp | 4 | ||||
-rw-r--r-- | src/mongo/transport/service_entry_point_impl.h | 10 | ||||
-rw-r--r-- | src/mongo/transport/session_workflow.cpp | 62 | ||||
-rw-r--r-- | src/mongo/transport/transport_layer_asio_test.cpp | 8 | ||||
-rw-r--r-- | src/mongo/util/net/ssl_manager_test.cpp | 4 |
9 files changed, 96 insertions, 64 deletions
diff --git a/jstests/noPassthrough/slow_session_workflow_log.js b/jstests/noPassthrough/slow_session_workflow_log.js index ddaf41ae5a1..aa4c131ffd2 100644 --- a/jstests/noPassthrough/slow_session_workflow_log.js +++ b/jstests/noPassthrough/slow_session_workflow_log.js @@ -12,14 +12,7 @@ load("jstests/libs/fail_point_util.js"); load("jstests/libs/log.js"); // For findMatchingLogLine const expectedLogId = 6983000; -const sleepMillisInQueryFilter = 200; -const sleepMillisBetweenQueries = 100; - -// This specific test doesn't care about precision in the times reported by the server. Since the -// server may be doing some extra work before listening for another message, and since the server -// and this test may vary in the precision by which they convert ticks to milliseconds, we allow -// some error when comparing local times to server times. -const errorAllowance = 0.9; +const sleepMillisInSendResponse = 200; const expectedFields = [ "totalMillis", @@ -51,27 +44,18 @@ function runTest(conn) { assert.commandWorked(assert.commandWorked(coll.insert({_id: 1}))); // In order to find the new log lines, a baseline needs to be established. - let slowSessionWorkflowCount = getSlowLogCount(conn); - - // Do a query that we would expect to be fast. Expect no new slow SessionWorkflows are logged. - let count = coll.find({}).toArray(); - assert.eq(count.length, 1, "expected 1 document"); - let prevSlowSessionWorkflowCount = slowSessionWorkflowCount; - slowSessionWorkflowCount = getSlowLogCount(conn); - assert.eq(slowSessionWorkflowCount, - prevSlowSessionWorkflowCount, - "There should not be a slow SessionWorkflow log at this point but one was found."); + const prevSlowSessionWorkflowCount = getSlowLogCount(conn); // Wait, then do a query beyond the 100ms threshold. Make sure the slow loop log line exists. - sleep(sleepMillisBetweenQueries); - coll.find({$where: 'function() { sleep(' + sleepMillisInQueryFilter + '); return true; }'}) - .toArray(); + const fp = configureFailPoint( + conn, "sessionWorkflowDelaySendMessage", {millis: sleepMillisInSendResponse}); + coll.find().toArray(); + fp.off(); let logAndCount = getSlowLogAndCount(conn); - prevSlowSessionWorkflowCount = slowSessionWorkflowCount; - slowSessionWorkflowCount = logAndCount.count; - assert.eq(slowSessionWorkflowCount, - prevSlowSessionWorkflowCount + 1, - "Expected to find a slow SessionWorkflow log."); + const slowSessionWorkflowCount = logAndCount.count; + assert.gt(slowSessionWorkflowCount, + prevSlowSessionWorkflowCount, + "Expected to find at least one slow SessionWorkflow log."); // Do some sanity checks over the actual contents of the log. const slowLoopObj = JSON.parse(logAndCount.log); @@ -81,26 +65,11 @@ function runTest(conn) { assert(expectedField in elapsedObj, "Expected to find field but couldn't: " + expectedField); }); - let totalElapsed = elapsedObj.totalMillis; - let activeElapsed = elapsedObj.activeMillis; - let sourceWorkElapsed = elapsedObj.receiveWorkMillis; - let processWorkElapsed = elapsedObj.processWorkMillis; - assert.gte( - sourceWorkElapsed, - sleepMillisBetweenQueries * errorAllowance, - "The time reported sourcing a message didn't include the time sleeping between queries."); - assert.gte(processWorkElapsed, - sleepMillisInQueryFilter, - "The time reported processing work didn't include the sleep in the find filter."); + const sendResponseElapsed = elapsedObj.sendResponseMillis; - // When comparing server time to another server time, there is no reason to expect error. - assert.gte(activeElapsed, - processWorkElapsed, - "The time reported as active time didn't include the time processing work."); - assert.gte( - totalElapsed, - sourceWorkElapsed + activeElapsed, - "The total time reported didn't include the sum of active time and message sourcing time."); + assert.gte(sendResponseElapsed, + sleepMillisInSendResponse, + "The time reported sending a response didn't include the sleep in the failpoint."); } // Test standalone. diff --git a/src/mongo/embedded/service_entry_point_embedded.cpp b/src/mongo/embedded/service_entry_point_embedded.cpp index c1f3ad842f1..2dfb0ba9f2a 100644 --- a/src/mongo/embedded/service_entry_point_embedded.cpp +++ b/src/mongo/embedded/service_entry_point_embedded.cpp @@ -187,4 +187,8 @@ size_t ServiceEntryPointEmbedded::numOpenSessions() const { UASSERT_NOT_IMPLEMENTED; } +logv2::LogSeverity ServiceEntryPointEmbedded::slowSessionWorkflowLogSeverity() { + UASSERT_NOT_IMPLEMENTED; +} + } // namespace mongo diff --git a/src/mongo/embedded/service_entry_point_embedded.h b/src/mongo/embedded/service_entry_point_embedded.h index bfa06dd672c..e4aeca09c5e 100644 --- a/src/mongo/embedded/service_entry_point_embedded.h +++ b/src/mongo/embedded/service_entry_point_embedded.h @@ -51,6 +51,7 @@ public: bool shutdown(Milliseconds timeout) override; void appendStats(BSONObjBuilder* bob) const override; size_t numOpenSessions() const override; + logv2::LogSeverity slowSessionWorkflowLogSeverity() override; private: class Hooks; diff --git a/src/mongo/transport/service_entry_point.h b/src/mongo/transport/service_entry_point.h index 8852f869a41..dc008cac720 100644 --- a/src/mongo/transport/service_entry_point.h +++ b/src/mongo/transport/service_entry_point.h @@ -34,6 +34,7 @@ #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/client.h" #include "mongo/db/dbmessage.h" +#include "mongo/logv2/log_severity.h" #include "mongo/transport/session.h" #include "mongo/util/future.h" @@ -91,6 +92,13 @@ public: } /** + * Returns the current severity to log slow SessionWorkflow lifecycles. Returns a suppressed + * severity every X seconds (defined in SEP::impl) to prevent logs from overwhelming the rest + * of the system. + */ + virtual logv2::LogSeverity slowSessionWorkflowLogSeverity() = 0; + + /** * Processes a request and fills out a DbResponse. */ virtual Future<DbResponse> handleRequest(OperationContext* opCtx, diff --git a/src/mongo/transport/service_entry_point_impl.cpp b/src/mongo/transport/service_entry_point_impl.cpp index bffdb51d86e..0d98e57d7c9 100644 --- a/src/mongo/transport/service_entry_point_impl.cpp +++ b/src/mongo/transport/service_entry_point_impl.cpp @@ -389,6 +389,10 @@ size_t ServiceEntryPointImpl::maxOpenSessions() const { return _maxSessions; } +logv2::LogSeverity ServiceEntryPointImpl::slowSessionWorkflowLogSeverity() { + return _slowSessionWorkflowLogSuppressor(); +} + bool ServiceEntryPointImpl::shutdownAndWait(Milliseconds timeout) { auto deadline = _svcCtx->getPreciseClockSource()->now() + timeout; diff --git a/src/mongo/transport/service_entry_point_impl.h b/src/mongo/transport/service_entry_point_impl.h index 683ad266fa3..405af8c1bc0 100644 --- a/src/mongo/transport/service_entry_point_impl.h +++ b/src/mongo/transport/service_entry_point_impl.h @@ -36,6 +36,7 @@ #include "mongo/base/status.h" #include "mongo/bson/bsonobjbuilder.h" +#include "mongo/logv2/log_severity_suppressor.h" #include "mongo/stdx/variant.h" #include "mongo/transport/service_entry_point.h" #include "mongo/transport/session.h" @@ -54,6 +55,8 @@ class ServiceContext; */ class ServiceEntryPointImpl : public ServiceEntryPoint { public: + static constexpr Seconds kSlowSessionWorkflowLogSuppresionPeriod{5}; + explicit ServiceEntryPointImpl(ServiceContext* svcCtx); ~ServiceEntryPointImpl(); @@ -76,6 +79,8 @@ public: size_t maxOpenSessions() const final; + logv2::LogSeverity slowSessionWorkflowLogSeverity() final; + void onClientDisconnect(Client* client) final; /** `onClientDisconnect` calls this before doing anything else. */ @@ -95,6 +100,11 @@ private: size_t _rejectedSessions; std::unique_ptr<Sessions> _sessions; + + logv2::SeveritySuppressor _slowSessionWorkflowLogSuppressor{ + kSlowSessionWorkflowLogSuppresionPeriod, + logv2::LogSeverity::Info(), + logv2::LogSeverity::Debug(2)}; }; /* diff --git a/src/mongo/transport/session_workflow.cpp b/src/mongo/transport/session_workflow.cpp index 41d40b03dad..080d1565912 100644 --- a/src/mongo/transport/session_workflow.cpp +++ b/src/mongo/transport/session_workflow.cpp @@ -83,6 +83,7 @@ namespace metrics_detail { X(receivedWork) \ X(processedWork) \ X(sentResponse) \ + X(yielded) \ X(done) \ /**/ @@ -97,13 +98,15 @@ namespace metrics_detail { * | [receivedWork] * | | [processedWork] * | | | [sentResponse] - * | | | | [done] - * |<------------->| total - * | |<--------->| active - * |<->| | | | receivedWork - * | |<->| | | processWork - * | | |<->| | sendResponse - * | | | |<->| finalize + * | | | | [yielded] + * | | | | | [done] + * |<----------------->| total + * | |<------------->| active + * |<->| | | | | receivedWork + * | |<->| | | | processWork + * | | |<->| | | sendResponse + * | | | |<->| | yield + * | | | | |<->| finalize */ #define EXPAND_INTERVAL_IDS(X) \ X(total, started, done) \ @@ -111,7 +114,8 @@ namespace metrics_detail { X(receiveWork, started, receivedWork) \ X(processWork, receivedWork, processedWork) \ X(sendResponse, processedWork, sentResponse) \ - X(finalize, sentResponse, done) \ + X(yield, sentResponse, yielded) \ + X(finalize, yielded, done) \ /**/ #define X_ID(id, ...) id, @@ -165,6 +169,8 @@ struct SplitTimerPolicy { static constexpr size_t numTimeSplitIds = enumExtent<TimeSplitIdType>; static constexpr size_t numIntervalIds = enumExtent<IntervalIdType>; + explicit SplitTimerPolicy(ServiceEntryPoint* sep) : _sep(sep) {} + template <typename E> static constexpr size_t toIdx(E e) { return static_cast<size_t>(e); @@ -192,23 +198,35 @@ struct SplitTimerPolicy { void onFinish(SplitTimer<SplitTimerPolicy>* splitTimer) { splitTimer->notify(TimeSplitIdType::done); - auto t = splitTimer->getSplitInterval(IntervalIdType::active); + auto t = splitTimer->getSplitInterval(IntervalIdType::sendResponse); if (MONGO_likely(!t || *t < Milliseconds{serverGlobalParams.slowMS.load()})) return; BSONObjBuilder bob; splitTimer->appendIntervals(bob); - LOGV2(6983000, "Slow SessionWorkflow loop", "elapsed"_attr = bob.obj()); + + logv2::LogSeverity severity = sessionWorkflowDelaySendMessage.shouldFail() + ? logv2::LogSeverity::Info() + : _sep->slowSessionWorkflowLogSeverity(); + + LOGV2_DEBUG(6983000, + severity.toInt(), + "Slow network response send time", + "elapsed"_attr = bob.obj()); } Timer makeTimer() { return Timer{}; } + + ServiceEntryPoint* _sep; }; class SessionWorkflowMetrics { public: + explicit SessionWorkflowMetrics(ServiceEntryPoint* sep) : _sep(sep) {} + void start() { - _t.emplace(); + _t.emplace(SplitTimerPolicy{_sep}); } void received() { _t->notify(TimeSplitId::receivedWork); @@ -222,11 +240,15 @@ public: duration_cast<Milliseconds>(*_t->getSplitInterval(IntervalId::processWork)), duration_cast<Milliseconds>(*_t->getSplitInterval(IntervalId::sendResponse))); } + void yielded() { + _t->notify(TimeSplitId::yielded); + } void finish() { _t.reset(); } private: + ServiceEntryPoint* _sep; boost::optional<SplitTimer<SplitTimerPolicy>> _t; }; } // namespace metrics_detail @@ -425,7 +447,7 @@ private: SessionWorkflow* const _workflow; ServiceContext* const _serviceContext; - ServiceEntryPoint* const _sep; + ServiceEntryPoint* _sep; RunnerAndSource _taskRunner; AtomicWord<bool> _isTerminated{false}; @@ -434,7 +456,7 @@ private: std::unique_ptr<WorkItem> _work; std::unique_ptr<WorkItem> _nextWork; /**< created by exhaust responses */ - metrics_detail::SessionWorkflowMetrics _metrics; + metrics_detail::SessionWorkflowMetrics _metrics{_sep}; }; class SessionWorkflow::Impl::WorkItem { @@ -583,12 +605,6 @@ void SessionWorkflow::Impl::sendMessage() { "connectionId"_attr = session()->id()); uassertStatusOK(status); } - - // Performance testing showed a significant benefit from yielding here. - // TODO SERVER-57531: Once we enable the use of a fixed-size thread pool - // for handling client connection handshaking, we should only yield here if - // we're on a dedicated thread. - executor()->yieldIfAppropriate(); } Future<void> SessionWorkflow::Impl::processMessage() { @@ -707,6 +723,14 @@ void SessionWorkflow::Impl::startNewLoop(const Status& executorStatus) { if (_work->hasOut()) { sendMessage(); _metrics.sent(*session()); + + // Performance testing showed a significant benefit from yielding here. + // TODO SERVER-57531: Once we enable the use of a fixed-size thread pool + // for handling client connection handshaking, we should only yield here if + // we're on a dedicated thread. + executor()->yieldIfAppropriate(); + + _metrics.yielded(); } }) .getAsync([this, anchor = shared_from_this()](Status status) { diff --git a/src/mongo/transport/transport_layer_asio_test.cpp b/src/mongo/transport/transport_layer_asio_test.cpp index 11a128e35ec..2b30b0431f1 100644 --- a/src/mongo/transport/transport_layer_asio_test.cpp +++ b/src/mongo/transport/transport_layer_asio_test.cpp @@ -269,6 +269,10 @@ public: return _sessions->size(); } + logv2::LogSeverity slowSessionWorkflowLogSeverity() override { + MONGO_UNIMPLEMENTED; + } + void setOnStartSession(std::function<void(SessionThread&)> cb) { _onStartSession = std::move(cb); } @@ -867,6 +871,10 @@ class BatonASIOLinuxTest : public LockerNoopServiceContextTest { MONGO_UNREACHABLE; } + logv2::LogSeverity slowSessionWorkflowLogSeverity() override { + MONGO_UNIMPLEMENTED; + } + private: Promise<transport::SessionHandle> _promise; }; diff --git a/src/mongo/util/net/ssl_manager_test.cpp b/src/mongo/util/net/ssl_manager_test.cpp index a33d4571b14..c44fbfd4d2a 100644 --- a/src/mongo/util/net/ssl_manager_test.cpp +++ b/src/mongo/util/net/ssl_manager_test.cpp @@ -94,6 +94,10 @@ public: MONGO_UNREACHABLE; } + logv2::LogSeverity slowSessionWorkflowLogSeverity() override { + MONGO_UNIMPLEMENTED; + } + void setTransportLayer(transport::TransportLayer* tl) { _transport = tl; } |