summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAmirsaman Memaripour <amirsaman.memaripour@mongodb.com>2020-04-29 21:03:20 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-14 14:36:40 +0000
commitb77252a641838f43e3c09796a09be22ab8f44e1c (patch)
tree158d46c351017adb5ce393759fa90af4e02473de /src
parentc9a0717687ed1dc70efac8dbaff06b14551ee6f3 (diff)
downloadmongo-b77252a641838f43e3c09796a09be22ab8f44e1c.tar.gz
SERVER-47802 Destroy opCtx after responding to clients
Postpone destruction of opCtx until after responding to clients to reduce the cost of destroying opCtx on the critical execution path.
Diffstat (limited to 'src')
-rw-r--r--src/mongo/base/error_codes.yml3
-rw-r--r--src/mongo/db/operation_context_test.cpp83
-rw-r--r--src/mongo/db/service_context.cpp57
-rw-r--r--src/mongo/db/service_context.h21
-rw-r--r--src/mongo/transport/service_state_machine.cpp21
-rw-r--r--src/mongo/transport/service_state_machine.h4
6 files changed, 173 insertions, 16 deletions
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml
index 07f245a848e..3df82a37c51 100644
--- a/src/mongo/base/error_codes.yml
+++ b/src/mongo/base/error_codes.yml
@@ -360,8 +360,9 @@ error_codes:
- {code: 309,name: ExhaustCommandFinished}
- {code: 310,name: PeriodicJobIsStopped,categories: [CancelationError]}
- # The code below is for internal use only and must never be returned in a network response.
+ # The codes below are for internal use only and must never be returned in a network response.
- {code: 311,name: TransactionCoordinatorCanceled}
+ - {code: 312,name: OperationIsKilledAndDelisted,categories: [CancelationError]}
# Error codes 4000-8999 are reserved.
diff --git a/src/mongo/db/operation_context_test.cpp b/src/mongo/db/operation_context_test.cpp
index 27a8cd8bfd9..15ae8c19f28 100644
--- a/src/mongo/db/operation_context_test.cpp
+++ b/src/mongo/db/operation_context_test.cpp
@@ -27,19 +27,25 @@
* it in the license file.
*/
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
+
#include "mongo/platform/basic.h"
#include <boost/optional.hpp>
#include <memory>
#include "mongo/db/client.h"
+#include "mongo/db/curop.h"
#include "mongo/db/json.h"
#include "mongo/db/logical_session_id.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/operation_context_group.h"
#include "mongo/db/service_context.h"
+#include "mongo/logv2/log.h"
#include "mongo/stdx/future.h"
#include "mongo/stdx/thread.h"
+#include "mongo/transport/session.h"
+#include "mongo/transport/transport_layer_mock.h"
#include "mongo/unittest/barrier.h"
#include "mongo/unittest/death_test.h"
#include "mongo/unittest/unittest.h"
@@ -1010,6 +1016,83 @@ TEST(OperationContextTest, TestIsWaitingForConditionOrInterrupt) {
ASSERT_FALSE(optCtx->isWaitingForConditionOrInterrupt());
}
+TEST(OperationContextTest, TestActiveClientOperationsForClientsWithoutSession) {
+ auto serviceCtx = ServiceContext::make();
+ auto client = serviceCtx->makeClient("OperationContextTest");
+ ASSERT_EQ(serviceCtx->getActiveClientOperations(), 0);
+ {
+ auto opCtx = client->makeOperationContext();
+ ASSERT_EQ(serviceCtx->getActiveClientOperations(), 0);
+ }
+ ASSERT_EQ(serviceCtx->getActiveClientOperations(), 0);
+}
+
+TEST(OperationContextTest, TestActiveClientOperations) {
+ transport::TransportLayerMock transportLayer;
+ transport::SessionHandle session = transportLayer.createSession();
+
+ auto serviceCtx = ServiceContext::make();
+ auto client = serviceCtx->makeClient("OperationContextTest", session);
+ ASSERT_EQ(serviceCtx->getActiveClientOperations(), 0);
+
+ {
+ auto optCtx = client->makeOperationContext();
+ ASSERT_EQ(serviceCtx->getActiveClientOperations(), 1);
+ }
+ ASSERT_EQ(serviceCtx->getActiveClientOperations(), 0);
+
+ {
+ auto optCtx = client->makeOperationContext();
+ ASSERT_EQ(serviceCtx->getActiveClientOperations(), 1);
+ serviceCtx->killAndDelistOperation(optCtx.get());
+ ASSERT_EQ(serviceCtx->getActiveClientOperations(), 0);
+ }
+ ASSERT_EQ(serviceCtx->getActiveClientOperations(), 0);
+}
+
+TEST(OperationContextTest, CurrentOpExcludesKilledOperations) {
+ auto serviceCtx = ServiceContext::make();
+ auto client = serviceCtx->makeClient("MainClient");
+ auto opCtx = client->makeOperationContext();
+
+ for (auto truncateOps : {true, false}) {
+ for (auto backtraceMode : {true, false}) {
+ BSONObjBuilder bobNoOpCtx, bobKilledOpCtx;
+ // We use a separate client thread to generate CurrentOp reports in presence and absence
+ // of an `opCtx`. This is because `CurOp::reportCurrentOpForClient()` accepts an `opCtx`
+ // as input and requires it to be present throughout its execution.
+ stdx::thread thread([&]() mutable {
+ auto threadClient = serviceCtx->makeClient("ThreadClient");
+
+ // Generate report in absence of any opCtx
+ CurOp::reportCurrentOpForClient(
+ opCtx.get(), threadClient.get(), truncateOps, backtraceMode, &bobNoOpCtx);
+
+ auto threadOpCtx = threadClient->makeOperationContext();
+ serviceCtx->killAndDelistOperation(threadOpCtx.get());
+
+ // Generate report in presence of a killed opCtx
+ CurOp::reportCurrentOpForClient(
+ opCtx.get(), threadClient.get(), truncateOps, backtraceMode, &bobKilledOpCtx);
+ });
+
+ thread.join();
+ auto objNoOpCtx = bobNoOpCtx.obj();
+ auto objKilledOpCtx = bobKilledOpCtx.obj();
+
+ LOGV2_DEBUG(4780201, 1, "With no opCtx", "object"_attr = objNoOpCtx);
+ LOGV2_DEBUG(4780202, 1, "With killed opCtx", "object"_attr = objKilledOpCtx);
+
+ ASSERT_EQ(objNoOpCtx.nFields(), objKilledOpCtx.nFields());
+
+ auto compareBSONObjs = [](BSONObj& a, BSONObj& b) -> bool {
+ return (a == b).type == BSONObj::DeferredComparison::Type::kEQ;
+ };
+ ASSERT(compareBSONObjs(objNoOpCtx, objKilledOpCtx));
+ }
+ }
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/service_context.cpp b/src/mongo/db/service_context.cpp
index a0df164a749..58f440af121 100644
--- a/src/mongo/db/service_context.cpp
+++ b/src/mongo/db/service_context.cpp
@@ -282,20 +282,12 @@ ServiceContext::UniqueOperationContext ServiceContext::makeOperationContext(Clie
void ServiceContext::OperationContextDeleter::operator()(OperationContext* opCtx) const {
auto client = opCtx->getClient();
- if (client->session()) {
- _numCurrentOps.subtractAndFetch(1);
- }
- auto service = client->getServiceContext();
+ invariant(client);
- {
- stdx::lock_guard lk(service->_mutex);
- service->_clientByOperationId.erase(opCtx->getOpID());
- }
+ auto service = client->getServiceContext();
+ invariant(service);
- {
- stdx::lock_guard<Client> lk(*client);
- client->resetOperationContext();
- }
+ service->_delistOperation(opCtx);
opCtx->getBaton()->detach();
onDestroy(opCtx, service->_clientObservers);
@@ -369,6 +361,47 @@ void ServiceContext::killOperation(WithLock, OperationContext* opCtx, ErrorCodes
}
}
+void ServiceContext::_delistOperation(OperationContext* opCtx) noexcept {
+ // Removing `opCtx` from `_clientByOperationId` must always precede removing the `opCtx` from
+ // its client to prevent situations that another thread could use the service context to get a
+ // hold of an `opCtx` that has been removed from its client.
+ {
+ stdx::lock_guard lk(_mutex);
+ if (_clientByOperationId.erase(opCtx->getOpID()) != 1) {
+ // Another thread has already delisted this `opCtx`.
+ return;
+ }
+ }
+
+ auto client = opCtx->getClient();
+ stdx::lock_guard clientLock(*client);
+ // Reaching here implies this call was able to remove the `opCtx` from ServiceContext.
+
+ // Assigning a new opCtx to the client must never precede the destruction of any existing opCtx
+ // that references the client.
+ invariant(client->getOperationContext() == opCtx);
+ client->resetOperationContext();
+
+ if (client->session()) {
+ _numCurrentOps.subtractAndFetch(1);
+ }
+}
+
+void ServiceContext::killAndDelistOperation(OperationContext* opCtx,
+ ErrorCodes::Error killCode) noexcept {
+
+ auto client = opCtx->getClient();
+ invariant(client);
+
+ auto service = client->getServiceContext();
+ invariant(service == this);
+
+ _delistOperation(opCtx);
+
+ stdx::lock_guard clientLock(*client);
+ killOperation(clientLock, opCtx, killCode);
+}
+
void ServiceContext::unsetKillAllOperations() {
_globalKill.store(false);
}
diff --git a/src/mongo/db/service_context.h b/src/mongo/db/service_context.h
index a1847c7dd84..49cb7a602ba 100644
--- a/src/mongo/db/service_context.h
+++ b/src/mongo/db/service_context.h
@@ -431,6 +431,19 @@ public:
ErrorCodes::Error killCode = ErrorCodes::Interrupted);
/**
+ * Kills the operation "opCtx" with the code "killCode", if opCtx has not already been killed,
+ * and delists the operation by removing it from "_clientByOperationId" and its client. Both
+ * "opCtx->getClient()->getServiceContext()" and "this" must point to the same instance of
+ * service context. Also, "opCtx" should never be deleted before this method returns. Finally,
+ * the thread invoking this method must not hold (own) the client and the service context locks.
+ * It is highly recommended to use "ErrorCodes::OperationIsKilledAndDelisted" as the error code
+ * to facilitate debugging.
+ */
+ void killAndDelistOperation(
+ OperationContext* opCtx,
+ ErrorCodes::Error killError = ErrorCodes::OperationIsKilledAndDelisted) noexcept;
+
+ /**
* Registers a listener to be notified each time an op is killed.
*
* listener does not become owned by the environment. As there is currently no way to
@@ -608,6 +621,14 @@ private:
std::unique_ptr<ClientObserver> _observer;
};
+ /**
+ * Removes the operation from its client and the `_clientByOperationId` of its service context.
+ * It will acquire both client and service context locks, and should only be used internally by
+ * other ServiceContext methods. To ensure delisted operations are shortly deleted, this method
+ * should only be called after killing an operation or in its destructor.
+ */
+ void _delistOperation(OperationContext* opCtx) noexcept;
+
Mutex _mutex = MONGO_MAKE_LATCH(/*HierarchicalAcquisitionLevel(2), */ "ServiceContext::_mutex");
/**
diff --git a/src/mongo/transport/service_state_machine.cpp b/src/mongo/transport/service_state_machine.cpp
index d66547faad5..3f1ad8a05bd 100644
--- a/src/mongo/transport/service_state_machine.cpp
+++ b/src/mongo/transport/service_state_machine.cpp
@@ -474,9 +474,12 @@ void ServiceStateMachine::_processMessage(ThreadGuard guard) {
// database work for this request.
DbResponse dbresponse = _sep->handleRequest(opCtx.get(), _inMessage);
- // opCtx must be destroyed here so that the operation cannot show
- // up in currentOp results after the response reaches the client
- opCtx.reset();
+ // opCtx must be killed and delisted here so that the operation cannot show up in currentOp
+ // results after the response reaches the client. The destruction is postponed for later to
+ // mitigate its performance impact on the critical path of execution.
+ _serviceContext->killAndDelistOperation(opCtx.get(), ErrorCodes::OperationIsKilledAndDelisted);
+ invariant(!_killedOpCtx);
+ _killedOpCtx = std::move(opCtx);
// Format our response, if we have one
Message& toSink = dbresponse.response;
@@ -542,6 +545,12 @@ void ServiceStateMachine::_runNextInGuard(ThreadGuard guard) {
_state.store(curState);
}
+ // Destroy the opCtx (already killed) here, to potentially use the delay between clients'
+ // requests to hide the destruction cost.
+ if (MONGO_likely(_killedOpCtx)) {
+ _killedOpCtx.reset();
+ }
+
// Make sure the current Client got set correctly
dassert(Client::getCurrent() == _dbClientPtr);
try {
@@ -676,6 +685,12 @@ void ServiceStateMachine::_cleanupExhaustResources() noexcept try {
}
void ServiceStateMachine::_cleanupSession(ThreadGuard guard) {
+ // Ensure the delayed destruction of opCtx always happens before doing the cleanup.
+ if (MONGO_likely(_killedOpCtx)) {
+ _killedOpCtx.reset();
+ }
+ invariant(!_killedOpCtx);
+
_cleanupExhaustResources();
_state.store(State::Ended);
diff --git a/src/mongo/transport/service_state_machine.h b/src/mongo/transport/service_state_machine.h
index 46a9bc42f6e..148d4b90737 100644
--- a/src/mongo/transport/service_state_machine.h
+++ b/src/mongo/transport/service_state_machine.h
@@ -244,6 +244,10 @@ private:
boost::optional<MessageCompressorId> _compressorId;
Message _inMessage;
+ // Allows delegating destruction of opCtx to another function to potentially remove its cost
+ // from the critical path. This is currently only used in `_processMessage()`.
+ ServiceContext::UniqueOperationContext _killedOpCtx;
+
AtomicWord<Ownership> _owned{Ownership::kUnowned};
#if MONGO_CONFIG_DEBUG_BUILD
AtomicWord<stdx::thread::id> _owningThread;