diff options
author | Dianna Hohensee <dianna.hohensee@mongodb.com> | 2020-06-02 15:12:40 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-12-02 18:42:16 +0000 |
commit | 1151e6fec016a7abed435b8a9c81cc9a00aea980 (patch) | |
tree | 1231c81f2eb3254b0fda72bc629f0272e64e6ba9 | |
parent | 63a1e9a49bc4e9bb447a2ad2802022c5d3c17227 (diff) | |
download | mongo-1151e6fec016a7abed435b8a9c81cc9a00aea980.tar.gz |
SERVER-45953 Exempt internal replication oplog readers from acquiring read tickets in order to avoid deadlocks
(cherry picked from commit d1cb83d9199c1a25158d74e47b3aa88b5c33fe8b)
Conflicts:
src/mongo/db/commands/getmore_cmd.cpp
-rw-r--r-- | src/mongo/db/commands/find_cmd.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/commands/getmore_cmd.cpp | 13 |
2 files changed, 24 insertions, 2 deletions
diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index e7313d834f9..50384636a63 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -316,11 +316,22 @@ public: !qr->getReadAtClusterTime() || storageEngine->supportsDocLocking()); // Validate term before acquiring locks, if provided. - if (auto term = qr->getReplicationTerm()) { + auto term = qr->getReplicationTerm(); + if (term) { // Note: updateTerm returns ok if term stayed the same. uassertStatusOK(replCoord->updateTerm(opCtx, *term)); } + // The presence of a term in the request indicates that this is an internal replication + // oplog read request. + if (term && parsedNss == NamespaceString::kRsOplogNamespace) { + // We do not want to take tickets for internal (replication) oplog reads. Stalling + // on ticket acquisition can cause complicated deadlocks. Primaries may depend on + // data reaching secondaries in order to proceed; and secondaries may get stalled + // replicating because of an inability to acquire a read ticket. + opCtx->lockState()->skipAcquireTicket(); + } + // We call RecoveryUnit::setTimestampReadSource() before acquiring a lock on the // collection via AutoGetCollectionForRead in order to ensure the comparison to the // collection's minimum visible snapshot is accurate. diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index af923252d7a..450fc19c4bd 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -622,11 +622,22 @@ public: auto curOp = CurOp::get(opCtx); curOp->debug().cursorid = _request.cursorid; - // Validate term before acquiring locks, if provided. if (_request.term) { + // Validate term before acquiring locks. auto replCoord = repl::ReplicationCoordinator::get(opCtx); // Note: updateTerm returns ok if term stayed the same. uassertStatusOK(replCoord->updateTerm(opCtx, *_request.term)); + + // If this is an oplog request, then this is a getMore for replication oplog + // fetching. The term field is only allowed for internal clients (see + // checkAuthForGetMore). + if (_request.nss == NamespaceString::kRsOplogNamespace) { + // We do not want to take tickets for internal (replication) oplog reads. + // Stalling on ticket acquisition can cause complicated deadlocks. Primaries may + // depend on data reaching secondaries in order to proceed; and secondaries may + // get stalled replicating because of an inability to acquire a read ticket. + opCtx->lockState()->skipAcquireTicket(); + } } auto cursorManager = CursorManager::get(opCtx); |