summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2018-04-30 12:54:34 -0400
committerCharlie Swanson <charlie.swanson@mongodb.com>2018-05-15 09:22:58 -0400
commitfa9278c892a6833694983191a5726b20f6a187a0 (patch)
tree3ef2ffc47c32a962d04507c127d2487485317041 /src/mongo/db
parenta9ba9539c5042911f030a1d4b3e4ce8ff0a449fc (diff)
downloadmongo-fa9278c892a6833694983191a5726b20f6a187a0.tar.gz
SERVER-33959 Avoid deadlock during global cursor registration
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/clientcursor.cpp3
-rw-r--r--src/mongo/db/commands/list_collections.cpp142
-rw-r--r--src/mongo/db/commands/list_indexes.cpp140
-rw-r--r--src/mongo/db/cursor_manager.cpp103
-rw-r--r--src/mongo/db/cursor_manager.h7
5 files changed, 217 insertions, 178 deletions
diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp
index f960afc60d7..200a128b664 100644
--- a/src/mongo/db/clientcursor.cpp
+++ b/src/mongo/db/clientcursor.cpp
@@ -204,7 +204,8 @@ void ClientCursorPin::release() {
deleteUnderlying();
} else {
// Unpin the cursor under the collection cursor manager lock.
- _cursor->_cursorManager->unpin(_opCtx, _cursor);
+ _cursor->_cursorManager->unpin(
+ _opCtx, std::unique_ptr<ClientCursor, ClientCursor::Deleter>(_cursor));
cursorStatsOpenPinned.decrement();
}
diff --git a/src/mongo/db/commands/list_collections.cpp b/src/mongo/db/commands/list_collections.cpp
index dce39421301..b59d8ff6669 100644
--- a/src/mongo/db/commands/list_collections.cpp
+++ b/src/mongo/db/commands/list_collections.cpp
@@ -277,89 +277,91 @@ public:
jsobj, "includePendingDrops", false, &includePendingDrops);
uassertStatusOK(status);
- AutoGetDb autoDb(opCtx, dbname, MODE_IS);
-
- Database* db = autoDb.getDb();
-
- auto ws = make_unique<WorkingSet>();
- auto root = make_unique<QueuedDataStage>(opCtx, ws.get());
-
- if (db) {
- if (auto collNames = _getExactNameMatches(matcher.get())) {
- for (auto&& collName : *collNames) {
- auto nss = NamespaceString(db->name(), collName);
- Collection* collection = db->getCollection(opCtx, nss);
- BSONObj collBson =
- buildCollectionBson(opCtx, collection, includePendingDrops, nameOnly);
- if (!collBson.isEmpty()) {
- _addWorkingSetMember(opCtx, collBson, matcher.get(), ws.get(), root.get());
+ const NamespaceString cursorNss = NamespaceString::makeListCollectionsNSS(dbname);
+ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec;
+ BSONArrayBuilder firstBatch;
+ {
+ AutoGetDb autoDb(opCtx, dbname, MODE_IS);
+ Database* db = autoDb.getDb();
+
+ auto ws = make_unique<WorkingSet>();
+ auto root = make_unique<QueuedDataStage>(opCtx, ws.get());
+
+ if (db) {
+ if (auto collNames = _getExactNameMatches(matcher.get())) {
+ for (auto&& collName : *collNames) {
+ auto nss = NamespaceString(db->name(), collName);
+ Collection* collection = db->getCollection(opCtx, nss);
+ BSONObj collBson =
+ buildCollectionBson(opCtx, collection, includePendingDrops, nameOnly);
+ if (!collBson.isEmpty()) {
+ _addWorkingSetMember(
+ opCtx, collBson, matcher.get(), ws.get(), root.get());
+ }
}
- }
- } else {
- for (auto&& collection : *db) {
- BSONObj collBson =
- buildCollectionBson(opCtx, collection, includePendingDrops, nameOnly);
- if (!collBson.isEmpty()) {
- _addWorkingSetMember(opCtx, collBson, matcher.get(), ws.get(), root.get());
+ } else {
+ for (auto&& collection : *db) {
+ BSONObj collBson =
+ buildCollectionBson(opCtx, collection, includePendingDrops, nameOnly);
+ if (!collBson.isEmpty()) {
+ _addWorkingSetMember(
+ opCtx, collBson, matcher.get(), ws.get(), root.get());
+ }
}
}
- }
- // Skipping views is only necessary for internal cloning operations.
- bool skipViews = filterElt.type() == mongo::Object &&
- SimpleBSONObjComparator::kInstance.evaluate(
- filterElt.Obj() == ListCollectionsFilter::makeTypeCollectionFilter());
- if (!skipViews) {
- db->getViewCatalog()->iterate(opCtx, [&](const ViewDefinition& view) {
- BSONObj viewBson = buildViewBson(view, nameOnly);
- if (!viewBson.isEmpty()) {
- _addWorkingSetMember(opCtx, viewBson, matcher.get(), ws.get(), root.get());
- }
- });
+ // Skipping views is only necessary for internal cloning operations.
+ bool skipViews = filterElt.type() == mongo::Object &&
+ SimpleBSONObjComparator::kInstance.evaluate(
+ filterElt.Obj() == ListCollectionsFilter::makeTypeCollectionFilter());
+ if (!skipViews) {
+ db->getViewCatalog()->iterate(opCtx, [&](const ViewDefinition& view) {
+ BSONObj viewBson = buildViewBson(view, nameOnly);
+ if (!viewBson.isEmpty()) {
+ _addWorkingSetMember(
+ opCtx, viewBson, matcher.get(), ws.get(), root.get());
+ }
+ });
+ }
}
- }
- const NamespaceString cursorNss = NamespaceString::makeListCollectionsNSS(dbname);
+ exec = uassertStatusOK(PlanExecutor::make(
+ opCtx, std::move(ws), std::move(root), cursorNss, PlanExecutor::NO_YIELD));
- auto statusWithPlanExecutor = PlanExecutor::make(
- opCtx, std::move(ws), std::move(root), cursorNss, PlanExecutor::NO_YIELD);
- uassertStatusOK(statusWithPlanExecutor.getStatus());
- auto exec = std::move(statusWithPlanExecutor.getValue());
+ for (long long objCount = 0; objCount < batchSize; objCount++) {
+ BSONObj next;
+ PlanExecutor::ExecState state = exec->getNext(&next, NULL);
+ if (state == PlanExecutor::IS_EOF) {
+ break;
+ }
+ invariant(state == PlanExecutor::ADVANCED);
- BSONArrayBuilder firstBatch;
+ // If we can't fit this result inside the current batch, then we stash it for later.
+ if (!FindCommon::haveSpaceForNext(next, objCount, firstBatch.len())) {
+ exec->enqueue(next);
+ break;
+ }
- for (long long objCount = 0; objCount < batchSize; objCount++) {
- BSONObj next;
- PlanExecutor::ExecState state = exec->getNext(&next, NULL);
- if (state == PlanExecutor::IS_EOF) {
- break;
+ firstBatch.append(next);
}
- invariant(state == PlanExecutor::ADVANCED);
-
- // If we can't fit this result inside the current batch, then we stash it for later.
- if (!FindCommon::haveSpaceForNext(next, objCount, firstBatch.len())) {
- exec->enqueue(next);
- break;
+ if (exec->isEOF()) {
+ appendCursorResponseObject(0LL, cursorNss.ns(), firstBatch.arr(), &result);
+ return true;
}
-
- firstBatch.append(next);
- }
-
- CursorId cursorId = 0LL;
- if (!exec->isEOF()) {
exec->saveState();
exec->detachFromOperationContext();
- auto pinnedCursor = CursorManager::getGlobalCursorManager()->registerCursor(
- opCtx,
- {std::move(exec),
- cursorNss,
- AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserNames(),
- opCtx->recoveryUnit()->getReadConcernLevel(),
- jsobj});
- cursorId = pinnedCursor.getCursor()->cursorid();
- }
-
- appendCursorResponseObject(cursorId, cursorNss.ns(), firstBatch.arr(), &result);
+ } // Drop db lock. Global cursor registration must be done without holding any locks.
+
+ auto pinnedCursor = CursorManager::getGlobalCursorManager()->registerCursor(
+ opCtx,
+ {std::move(exec),
+ cursorNss,
+ AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserNames(),
+ opCtx->recoveryUnit()->getReadConcernLevel(),
+ jsobj});
+
+ appendCursorResponseObject(
+ pinnedCursor.getCursor()->cursorid(), cursorNss.ns(), firstBatch.arr(), &result);
return true;
}
diff --git a/src/mongo/db/commands/list_indexes.cpp b/src/mongo/db/commands/list_indexes.cpp
index dce38f51fa5..a3565df8e9e 100644
--- a/src/mongo/db/commands/list_indexes.cpp
+++ b/src/mongo/db/commands/list_indexes.cpp
@@ -125,83 +125,89 @@ public:
uassertStatusOK(
CursorRequest::parseCommandCursorOptions(cmdObj, defaultBatchSize, &batchSize));
- AutoGetCollectionForReadCommand ctx(opCtx, CommandHelpers::parseNsOrUUID(dbname, cmdObj));
- Collection* collection = ctx.getCollection();
- uassert(ErrorCodes::NamespaceNotFound,
- str::stream() << "ns does not exist: " << ctx.getNss().ns(),
- collection);
-
- const CollectionCatalogEntry* cce = collection->getCatalogEntry();
- invariant(cce);
-
- const auto nss = ctx.getNss();
-
- vector<string> indexNames;
- writeConflictRetry(opCtx, "listIndexes", nss.ns(), [&indexNames, &cce, &opCtx] {
- indexNames.clear();
- cce->getReadyIndexes(opCtx, &indexNames);
- });
-
- auto ws = make_unique<WorkingSet>();
- auto root = make_unique<QueuedDataStage>(opCtx, ws.get());
-
- for (size_t i = 0; i < indexNames.size(); i++) {
- BSONObj indexSpec =
- writeConflictRetry(opCtx, "listIndexes", nss.ns(), [&cce, &opCtx, &indexNames, i] {
- return cce->getIndexSpec(opCtx, indexNames[i]);
- });
-
- WorkingSetID id = ws->allocate();
- WorkingSetMember* member = ws->get(id);
- member->keyData.clear();
- member->recordId = RecordId();
- member->obj = Snapshotted<BSONObj>(SnapshotId(), indexSpec.getOwned());
- member->transitionToOwnedObj();
- root->pushBack(id);
- }
+ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec;
+ NamespaceString cursorNss;
+ BSONArrayBuilder firstBatch;
+ {
+ AutoGetCollectionForReadCommand ctx(opCtx,
+ CommandHelpers::parseNsOrUUID(dbname, cmdObj));
+ Collection* collection = ctx.getCollection();
+ uassert(ErrorCodes::NamespaceNotFound,
+ str::stream() << "ns does not exist: " << ctx.getNss().ns(),
+ collection);
+
+ const CollectionCatalogEntry* cce = collection->getCatalogEntry();
+ invariant(cce);
+
+ const auto nss = ctx.getNss();
+
+ vector<string> indexNames;
+ writeConflictRetry(opCtx, "listIndexes", nss.ns(), [&indexNames, &cce, &opCtx] {
+ indexNames.clear();
+ cce->getReadyIndexes(opCtx, &indexNames);
+ });
+
+ auto ws = make_unique<WorkingSet>();
+ auto root = make_unique<QueuedDataStage>(opCtx, ws.get());
+
+ for (size_t i = 0; i < indexNames.size(); i++) {
+ BSONObj indexSpec = writeConflictRetry(
+ opCtx, "listIndexes", nss.ns(), [&cce, &opCtx, &indexNames, i] {
+ return cce->getIndexSpec(opCtx, indexNames[i]);
+ });
+
+ WorkingSetID id = ws->allocate();
+ WorkingSetMember* member = ws->get(id);
+ member->keyData.clear();
+ member->recordId = RecordId();
+ member->obj = Snapshotted<BSONObj>(SnapshotId(), indexSpec.getOwned());
+ member->transitionToOwnedObj();
+ root->pushBack(id);
+ }
- const NamespaceString cursorNss = NamespaceString::makeListIndexesNSS(dbname, nss.coll());
- invariant(nss == cursorNss.getTargetNSForListIndexes());
+ cursorNss = NamespaceString::makeListIndexesNSS(dbname, nss.coll());
+ invariant(nss == cursorNss.getTargetNSForListIndexes());
- auto statusWithPlanExecutor = PlanExecutor::make(
- opCtx, std::move(ws), std::move(root), cursorNss, PlanExecutor::NO_YIELD);
- uassertStatusOK(statusWithPlanExecutor.getStatus());
- auto exec = std::move(statusWithPlanExecutor.getValue());
+ exec = uassertStatusOK(PlanExecutor::make(
+ opCtx, std::move(ws), std::move(root), cursorNss, PlanExecutor::NO_YIELD));
- BSONArrayBuilder firstBatch;
+ for (long long objCount = 0; objCount < batchSize; objCount++) {
+ BSONObj next;
+ PlanExecutor::ExecState state = exec->getNext(&next, NULL);
+ if (state == PlanExecutor::IS_EOF) {
+ break;
+ }
+ invariant(state == PlanExecutor::ADVANCED);
- for (long long objCount = 0; objCount < batchSize; objCount++) {
- BSONObj next;
- PlanExecutor::ExecState state = exec->getNext(&next, NULL);
- if (state == PlanExecutor::IS_EOF) {
- break;
- }
- invariant(state == PlanExecutor::ADVANCED);
+ // If we can't fit this result inside the current batch, then we stash it for later.
+ if (!FindCommon::haveSpaceForNext(next, objCount, firstBatch.len())) {
+ exec->enqueue(next);
+ break;
+ }
- // If we can't fit this result inside the current batch, then we stash it for later.
- if (!FindCommon::haveSpaceForNext(next, objCount, firstBatch.len())) {
- exec->enqueue(next);
- break;
+ firstBatch.append(next);
}
- firstBatch.append(next);
- }
+ if (exec->isEOF()) {
+ appendCursorResponseObject(0LL, cursorNss.ns(), firstBatch.arr(), &result);
+ return true;
+ }
- CursorId cursorId = 0LL;
- if (!exec->isEOF()) {
exec->saveState();
exec->detachFromOperationContext();
- auto pinnedCursor = CursorManager::getGlobalCursorManager()->registerCursor(
- opCtx,
- {std::move(exec),
- cursorNss,
- AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserNames(),
- opCtx->recoveryUnit()->getReadConcernLevel(),
- cmdObj});
- cursorId = pinnedCursor.getCursor()->cursorid();
- }
-
- appendCursorResponseObject(cursorId, cursorNss.ns(), firstBatch.arr(), &result);
+ } // Drop collection lock. Global cursor registration must be done without holding any
+ // locks.
+
+ const auto pinnedCursor = CursorManager::getGlobalCursorManager()->registerCursor(
+ opCtx,
+ {std::move(exec),
+ cursorNss,
+ AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserNames(),
+ opCtx->recoveryUnit()->getReadConcernLevel(),
+ cmdObj});
+
+ appendCursorResponseObject(
+ pinnedCursor.getCursor()->cursorid(), cursorNss.ns(), firstBatch.arr(), &result);
return true;
}
diff --git a/src/mongo/db/cursor_manager.cpp b/src/mongo/db/cursor_manager.cpp
index d6c00d996d2..d58e25bbbc7 100644
--- a/src/mongo/db/cursor_manager.cpp
+++ b/src/mongo/db/cursor_manager.cpp
@@ -573,32 +573,40 @@ void CursorManager::invalidateAll(OperationContext* opCtx,
// Mark all cursors as killed, but keep around those we can in order to provide a useful error
// message to the user when they attempt to use it next time.
- auto allCurrentPartitions = _cursorMap->lockAllPartitions();
- for (auto&& partition : allCurrentPartitions) {
- for (auto it = partition.begin(); it != partition.end();) {
- auto* cursor = it->second;
- cursor->markAsKilled({ErrorCodes::QueryPlanKilled, reason});
-
- // If there's an operation actively using the cursor, then that operation is now
- // responsible for cleaning it up. Otherwise we can immediately dispose of it.
- if (cursor->_operationUsingCursor) {
- it = partition.erase(it);
- removeTransactionCursorReference(cursor);
- continue;
- }
+ std::vector<std::unique_ptr<ClientCursor, ClientCursor::Deleter>> toDisposeWithoutMutex;
+ {
+ auto allCurrentPartitions = _cursorMap->lockAllPartitions();
+ for (auto&& partition : allCurrentPartitions) {
+ for (auto it = partition.begin(); it != partition.end();) {
+ auto* cursor = it->second;
+ cursor->markAsKilled({ErrorCodes::QueryPlanKilled, reason});
+
+ // If there's an operation actively using the cursor, then that operation is now
+ // responsible for cleaning it up. Otherwise we can immediately dispose of it.
+ if (cursor->_operationUsingCursor) {
+ it = partition.erase(it);
+ removeTransactionCursorReference(cursor);
+ continue;
+ }
- if (!collectionGoingAway) {
- // We keep around unpinned cursors so that future attempts to use the cursor will
- // result in a useful error message.
- ++it;
- } else {
- removeTransactionCursorReference(cursor);
- cursor->dispose(opCtx);
- delete cursor;
- it = partition.erase(it);
+ if (!collectionGoingAway) {
+ // We keep around unpinned cursors so that future attempts to use the cursor
+ // will result in a useful error message.
+ ++it;
+ } else {
+ removeTransactionCursorReference(cursor);
+ toDisposeWithoutMutex.emplace_back(cursor);
+ it = partition.erase(it);
+ }
}
}
}
+
+ // Dispose of the cursors we can now delete. This might involve lock acquisitions for safe
+ // cleanup, so avoid doing it while holding mutexes.
+ for (auto&& cursor : toDisposeWithoutMutex) {
+ cursor->dispose(opCtx);
+ }
}
void CursorManager::invalidateDocument(OperationContext* opCtx,
@@ -636,17 +644,15 @@ bool CursorManager::cursorShouldTimeout_inlock(const ClientCursor* cursor, Date_
}
std::size_t CursorManager::timeoutCursors(OperationContext* opCtx, Date_t now) {
- std::vector<std::unique_ptr<ClientCursor, ClientCursor::Deleter>> toDelete;
+ std::vector<std::unique_ptr<ClientCursor, ClientCursor::Deleter>> toDisposeWithoutMutex;
for (size_t partitionId = 0; partitionId < kNumPartitions; ++partitionId) {
auto lockedPartition = _cursorMap->lockOnePartitionById(partitionId);
for (auto it = lockedPartition->begin(); it != lockedPartition->end();) {
auto* cursor = it->second;
if (cursorShouldTimeout_inlock(cursor, now)) {
- // Dispose of the cursor and remove it from the partition.
removeTransactionCursorReference(cursor);
- cursor->dispose(opCtx);
- toDelete.push_back(std::unique_ptr<ClientCursor, ClientCursor::Deleter>{cursor});
+ toDisposeWithoutMutex.emplace_back(cursor);
it = lockedPartition->erase(it);
} else {
++it;
@@ -654,7 +660,11 @@ std::size_t CursorManager::timeoutCursors(OperationContext* opCtx, Date_t now) {
}
}
- return toDelete.size();
+ // Be careful not to dispose of cursors while holding the partition lock.
+ for (auto&& cursor : toDisposeWithoutMutex) {
+ cursor->dispose(opCtx);
+ }
+ return toDisposeWithoutMutex.size();
}
namespace {
@@ -691,10 +701,9 @@ StatusWith<ClientCursorPin> CursorManager::pinCursor(OperationContext* opCtx,
if (cursor->getExecutor()->isMarkedAsKilled()) {
// This cursor was killed while it was idle.
Status error = cursor->getExecutor()->getKillStatus();
- lockedPartition->erase(cursor->cursorid());
- removeTransactionCursorReference(cursor);
- cursor->dispose(opCtx);
- delete cursor;
+ deregisterAndDestroyCursor(std::move(lockedPartition),
+ opCtx,
+ std::unique_ptr<ClientCursor, ClientCursor::Deleter>(cursor));
return error;
}
@@ -716,7 +725,8 @@ StatusWith<ClientCursorPin> CursorManager::pinCursor(OperationContext* opCtx,
return ClientCursorPin(opCtx, cursor);
}
-void CursorManager::unpin(OperationContext* opCtx, ClientCursor* cursor) {
+void CursorManager::unpin(OperationContext* opCtx,
+ std::unique_ptr<ClientCursor, ClientCursor::Deleter> cursor) {
// Avoid computing the current time within the critical section.
auto now = opCtx->getServiceContext()->getPreciseClockSource()->now();
@@ -737,13 +747,14 @@ void CursorManager::unpin(OperationContext* opCtx, ClientCursor* cursor) {
if (interruptStatus == ErrorCodes::Interrupted || interruptStatus == ErrorCodes::CursorKilled) {
LOG(0) << "removing cursor " << cursor->cursorid()
<< " after completing batch: " << interruptStatus;
- partition->erase(cursor->cursorid());
- removeTransactionCursorReference(cursor);
- cursor->dispose(opCtx);
- delete cursor;
+ return deregisterAndDestroyCursor(std::move(partition), opCtx, std::move(cursor));
} else if (!interruptStatus.isOK()) {
cursor->markAsKilled(interruptStatus);
}
+
+ // The cursor will stay around in '_cursorMap', so release the unique pointer to avoid deleting
+ // it.
+ cursor.release();
}
void CursorManager::getCursorIds(std::set<CursorId>* openCursors) const {
@@ -863,6 +874,22 @@ void CursorManager::deregisterCursor(ClientCursor* cursor) {
removeTransactionCursorReference(cursor);
}
+void CursorManager::deregisterAndDestroyCursor(
+ Partitioned<stdx::unordered_map<CursorId, ClientCursor*>, kNumPartitions>::OnePartition&& lk,
+ OperationContext* opCtx,
+ std::unique_ptr<ClientCursor, ClientCursor::Deleter> cursor) {
+ {
+ auto lockWithRestrictedScope = std::move(lk);
+ lockWithRestrictedScope->erase(cursor->cursorid());
+ removeTransactionCursorReference(cursor.get());
+ }
+ // Dispose of the cursor without holding any cursor manager mutexes. Disposal of a cursor can
+ // require taking lock manager locks, which we want to avoid while holding a mutex. If we did
+ // so, any caller of a CursorManager method which already held a lock manager lock could induce
+ // a deadlock when trying to acquire a CursorManager lock.
+ cursor->dispose(opCtx);
+}
+
Status CursorManager::killCursor(OperationContext* opCtx,
CursorId id,
bool shouldAudit,
@@ -911,9 +938,7 @@ Status CursorManager::killCursor(OperationContext* opCtx,
audit::logKillCursorsAuthzCheck(opCtx->getClient(), _nss, id, ErrorCodes::OK);
}
- lockedPartition->erase(ownedCursor->cursorid());
- cursor->_cursorManager->removeTransactionCursorReference(cursor);
- ownedCursor->dispose(opCtx);
+ deregisterAndDestroyCursor(std::move(lockedPartition), opCtx, std::move(ownedCursor));
return Status::OK();
}
diff --git a/src/mongo/db/cursor_manager.h b/src/mongo/db/cursor_manager.h
index 2104a558bae..6817455ec0d 100644
--- a/src/mongo/db/cursor_manager.h
+++ b/src/mongo/db/cursor_manager.h
@@ -292,8 +292,13 @@ private:
OperationContext* opCtx, std::unique_ptr<ClientCursor, ClientCursor::Deleter> clientCursor);
void deregisterCursor(ClientCursor* cursor);
+ void deregisterAndDestroyCursor(
+ Partitioned<stdx::unordered_map<CursorId, ClientCursor*>, kNumPartitions>::OnePartition&&,
+ OperationContext* opCtx,
+ std::unique_ptr<ClientCursor, ClientCursor::Deleter> cursor);
- void unpin(OperationContext* opCtx, ClientCursor* cursor);
+ void unpin(OperationContext* opCtx,
+ std::unique_ptr<ClientCursor, ClientCursor::Deleter> cursor);
bool cursorShouldTimeout_inlock(const ClientCursor* cursor, Date_t now);