From af1a9dc12adcfa83cc19571cb3faba26eeddac92 Mon Sep 17 00:00:00 2001 From: Dianna Hohensee Date: Tue, 28 Dec 2021 22:41:48 +0000 Subject: SERVER-45953 Exempt internal replication oplog readers from acquiring read tickets in order to avoid deadlocks (cherry picked from commit d1cb83d9199c1a25158d74e47b3aa88b5c33fe8b) --- src/mongo/db/commands/find_cmd.cpp | 17 ++++++++++++++--- src/mongo/db/commands/getmore_cmd.cpp | 13 ++++++++++++- src/mongo/db/query/query_request.cpp | 2 +- src/mongo/db/query/query_request.h | 2 +- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index 4ce8c38e5f2..f59df4f4d8b 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -232,8 +232,8 @@ public: // Parse the command BSON to a QueryRequest. const bool isExplain = false; // Pass parseNs to makeFromFindCommand in case cmdObj does not have a UUID. - auto qrStatus = QueryRequest::makeFromFindCommand( - NamespaceString(parseNs(dbname, cmdObj)), cmdObj, isExplain); + const NamespaceString parsedNss(parseNs(dbname, cmdObj)); + auto qrStatus = QueryRequest::makeFromFindCommand(parsedNss, cmdObj, isExplain); uassertStatusOK(qrStatus.getStatus()); auto replCoord = repl::ReplicationCoordinator::get(opCtx); @@ -245,12 +245,23 @@ public: !(session->inActiveOrKilledMultiDocumentTransaction() && qr->isTailable())); // Validate term before acquiring locks, if provided. - if (auto term = qr->getReplicationTerm()) { + auto term = qr->getReplicationTerm(); + if (term) { Status status = replCoord->updateTerm(opCtx, *term); // Note: updateTerm returns ok if term stayed the same. uassertStatusOK(status); } + // The presence of a term in the request indicates that this is an internal replication + // oplog read request. + if (term && parsedNss == NamespaceString::kRsOplogNamespace) { + // We do not want to take tickets for internal (replication) oplog reads. Stalling + // on ticket acquisition can cause complicated deadlocks. Primaries may depend on + // data reaching secondaries in order to proceed; and secondaries may get stalled + // replicating because of an inability to acquire a read ticket. + opCtx->lockState()->skipAcquireTicket(); + } + // Acquire locks. If the query is on a view, we release our locks and convert the query // request into an aggregation command. boost::optional ctx; diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 89d351baf86..886a6138b06 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -223,12 +223,23 @@ public: auto curOp = CurOp::get(opCtx); curOp->debug().cursorid = request.cursorid; - // Validate term before acquiring locks, if provided. if (request.term) { + // Validate term before acquiring locks, if provided. auto replCoord = repl::ReplicationCoordinator::get(opCtx); Status status = replCoord->updateTerm(opCtx, *request.term); // Note: updateTerm returns ok if term stayed the same. uassertStatusOK(status); + + // If this is an oplog request, then this is a getMore for replication oplog + // fetching. The term field is only allowed for internal clients (see + // checkAuthForGetMore). + if (request.nss == NamespaceString::kRsOplogNamespace) { + // We do not want to take tickets for internal (replication) oplog reads. + // Stalling on ticket acquisition can cause complicated deadlocks. Primaries may + // depend on data reaching secondaries in order to proceed; and secondaries may + // get stalled replicating because of an inability to acquire a read ticket. + opCtx->lockState()->skipAcquireTicket(); + } } // Cursors come in one of two flavors: diff --git a/src/mongo/db/query/query_request.cpp b/src/mongo/db/query/query_request.cpp index 0dd44bc7f3f..a2e098f11df 100644 --- a/src/mongo/db/query/query_request.cpp +++ b/src/mongo/db/query/query_request.cpp @@ -401,7 +401,7 @@ StatusWith> QueryRequest::parseFromFindCommand(unique_p return std::move(qr); } -StatusWith> QueryRequest::makeFromFindCommand(NamespaceString nss, +StatusWith> QueryRequest::makeFromFindCommand(const NamespaceString& nss, const BSONObj& cmdObj, bool isExplain) { BSONElement first = cmdObj.firstElement(); diff --git a/src/mongo/db/query/query_request.h b/src/mongo/db/query/query_request.h index 6ac6de5ce9b..88792140aca 100644 --- a/src/mongo/db/query/query_request.h +++ b/src/mongo/db/query/query_request.h @@ -74,7 +74,7 @@ public: * Returns a heap allocated QueryRequest on success or an error if 'cmdObj' is not well * formed. */ - static StatusWith> makeFromFindCommand(NamespaceString nss, + static StatusWith> makeFromFindCommand(const NamespaceString& nss, const BSONObj& cmdObj, bool isExplain); -- cgit v1.2.1