diff options
Diffstat (limited to 'src/mongo/db/query/find.cpp')
-rw-r--r-- | src/mongo/db/query/find.cpp | 131 |
1 files changed, 54 insertions, 77 deletions
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp index 44f69823096..62a08389e3f 100644 --- a/src/mongo/db/query/find.cpp +++ b/src/mongo/db/query/find.cpp @@ -55,6 +55,7 @@ #include "mongo/db/server_options.h" #include "mongo/db/server_parameters.h" #include "mongo/db/service_context.h" +#include "mongo/db/stats/top.h" #include "mongo/db/storage/storage_options.h" #include "mongo/s/chunk_version.h" #include "mongo/s/stale_exception.h" @@ -62,10 +63,10 @@ #include "mongo/util/fail_point_service.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" +#include "mongo/util/scopeguard.h" namespace mongo { -using std::endl; using std::unique_ptr; using stdx::make_unique; @@ -238,47 +239,45 @@ Message getMore(OperationContext* opCtx, const NamespaceString nss(ns); - // Depending on the type of cursor being operated on, we hold locks for the whole getMore, - // or none of the getMore, or part of the getMore. The three cases in detail: + // Cursors come in one of two flavors: + // - Cursors owned by the collection cursor manager, such as those generated via the find + // command. For these cursors, we hold the appropriate collection lock for the duration of + // the getMore using AutoGetCollectionForRead. This will automatically update the CurOp + // object appropriately and record execution time via Top upon completion. + // - Cursors owned by the global cursor manager, such as those generated via the aggregate + // command. These cursors either hold no collection state or manage their collection state + // internally, so we acquire no locks. In this case we use the AutoStatsTracker object to + // update the CurOp object appropriately and record execution time via Top upon + // completion. // - // 1) Normal cursor: we lock with "ctx" and hold it for the whole getMore. - // 2) Cursor owned by global cursor manager: we don't lock anything. These cursors don't own - // any collection state. These cursors are generated either by the listCollections or - // listIndexes commands, as these special cursor-generating commands operate over catalog - // data rather than targeting the data within a collection. - // 3) Agg cursor: we lock with "ctx", then release, then relock with "unpinDBLock" and - // "unpinCollLock". This is because agg cursors handle locking internally (hence the - // release), but the pin and unpin of the cursor must occur under the collection lock. - // We don't use our AutoGetCollectionForRead "ctx" to relock, because - // AutoGetCollectionForRead checks the sharding version (and we want the relock for the - // unpin to succeed even if the sharding version has changed). - // - // Note that we declare our locks before our ClientCursorPin, in order to ensure that the - // pin's destructor is called before the lock destructors (so that the unpin occurs under - // the lock). - unique_ptr<AutoGetCollectionForReadCommand> ctx; - unique_ptr<Lock::DBLock> unpinDBLock; - unique_ptr<Lock::CollectionLock> unpinCollLock; - + // Thus, only one of 'readLock' and 'statsTracker' will be populated as we populate + // 'cursorManager'. + boost::optional<AutoGetCollectionForReadCommand> readLock; + boost::optional<AutoStatsTracker> statsTracker; CursorManager* cursorManager; - if (nss.isListIndexesCursorNS() || nss.isListCollectionsCursorNS()) { - // List collections and list indexes are special cursor-generating commands whose - // cursors are managed globally, as they operate over catalog data rather than targeting - // the data within a collection. + + if (CursorManager::isGloballyManagedCursor(cursorid)) { cursorManager = CursorManager::getGlobalCursorManager(); - } else { - ctx = stdx::make_unique<AutoGetCollectionOrViewForReadCommand>(opCtx, nss); - auto viewCtx = static_cast<AutoGetCollectionOrViewForReadCommand*>(ctx.get()); - if (viewCtx->getView()) { - uasserted( + + if (boost::optional<NamespaceString> nssForCurOp = nss.isGloballyManagedNamespace() + ? nss.getTargetNSForGloballyManagedNamespace() + : nss) { + AutoGetDb autoDb(opCtx, nssForCurOp->db(), MODE_IS); + const auto profilingLevel = autoDb.getDb() + ? boost::optional<int>{autoDb.getDb()->getProfilingLevel()} + : boost::none; + statsTracker.emplace(opCtx, *nssForCurOp, Top::LockType::NotLocked, profilingLevel); + uassert( ErrorCodes::CommandNotSupportedOnView, - str::stream() << "Namespace " << nss.ns() + str::stream() << "Namespace " << nssForCurOp->ns() << " is a view. OP_GET_MORE operations are not supported on views. " << "Only clients which support the getMore command can be used to " - "query views."); + "query views.", + !autoDb.getDb()->getViewCatalog()->lookup(opCtx, nssForCurOp->ns())); } - - Collection* collection = ctx->getCollection(); + } else { + readLock.emplace(opCtx, nss); + Collection* collection = readLock->getCollection(); uassert(17356, "collection dropped between getMore calls", collection); cursorManager = collection->getCursorManager(); } @@ -323,8 +322,8 @@ Message getMore(OperationContext* opCtx, str::stream() << "Requested getMore on namespace " << ns << ", but cursor " << cursorid << " belongs to namespace " - << cc->ns(), - ns == cc->ns()); + << cc->nss().ns(), + nss == cc->nss()); *isCursorAuthorized = true; if (cc->isReadCommitted()) @@ -345,11 +344,6 @@ Message getMore(OperationContext* opCtx, cc->updateSlaveLocation(opCtx); - if (cc->isAggCursor()) { - // Agg cursors handle their own locking internally. - ctx.reset(); // unlocks - } - // If we're replaying the oplog, we save the last time that we read. Timestamp slaveReadTill; @@ -359,12 +353,12 @@ Message getMore(OperationContext* opCtx, uint64_t notifierVersion = 0; std::shared_ptr<CappedInsertNotifier> notifier; if (isCursorAwaitData(cc)) { - invariant(ctx->getCollection()->isCapped()); + invariant(readLock->getCollection()->isCapped()); // Retrieve the notifier which we will wait on until new data arrives. We make sure // to do this in the lock because once we drop the lock it is possible for the // collection to become invalid. The notifier itself will outlive the collection if // the collection is dropped, as we keep a shared_ptr to it. - notifier = ctx->getCollection()->getCappedInsertNotifier(); + notifier = readLock->getCollection()->getCappedInsertNotifier(); // Must get the version before we call generateBatch in case a write comes in after // that call and before we call wait on the notifier. @@ -384,7 +378,7 @@ Message getMore(OperationContext* opCtx, // and currentOp. Upconvert _query to resemble a getMore command, and set the original // command or upconverted legacy query in the originatingCommand field. curOp.setQuery_inlock(upconvertGetMoreEntry(nss, cursorid, ntoreturn)); - curOp.setOriginatingCommand_inlock(cc->getQuery()); + curOp.setOriginatingCommand_inlock(cc->getOriginatingCommandObj()); } PlanExecutor::ExecState state; @@ -402,7 +396,7 @@ Message getMore(OperationContext* opCtx, if (isCursorAwaitData(cc) && state == PlanExecutor::IS_EOF && numResults == 0) { // Save the PlanExecutor and drop our locks. exec->saveState(); - ctx.reset(); + readLock.reset(); // Block waiting for data for up to 1 second. Seconds timeout(1); @@ -414,7 +408,7 @@ Message getMore(OperationContext* opCtx, curOp.setExpectedLatencyMs(durationCount<Milliseconds>(timeout)); // Reacquiring locks. - ctx = make_unique<AutoGetCollectionForReadCommand>(opCtx, nss); + readLock.emplace(opCtx, nss); exec->restoreState(); // We woke up because either the timed_wait expired, or there was more data. Either @@ -428,44 +422,28 @@ Message getMore(OperationContext* opCtx, postExecutionStats.totalDocsExamined -= preExecutionStats.totalDocsExamined; curOp.debug().setPlanSummaryMetrics(postExecutionStats); - // We do not report 'execStats' for aggregation, both in the original request and - // subsequent getMore. The reason for this is that aggregation's source PlanExecutor - // could be destroyed before we know whether we need execStats and we do not want to - // generate for all operations due to cost. - if (!cc->isAggCursor() && curOp.shouldDBProfile()) { + // We do not report 'execStats' for aggregation or other globally managed cursors, both in + // the original request and subsequent getMore. It would be useful to have this information + // for an aggregation, but the source PlanExecutor could be destroyed before we know whether + // we need execStats and we do not want to generate for all operations due to cost. + if (!CursorManager::isGloballyManagedCursor(cursorid) && curOp.shouldDBProfile()) { BSONObjBuilder execStatsBob; Explain::getWinningPlanStats(exec, &execStatsBob); curOp.debug().execStats = execStatsBob.obj(); } - // We have to do this before re-acquiring locks in the agg case because - // shouldSaveCursorGetMore() can make a network call for agg cursors. - // - // TODO: Getting rid of PlanExecutor::isEOF() in favor of PlanExecutor::IS_EOF would mean - // that this network operation is no longer necessary. - const bool shouldSaveCursor = shouldSaveCursorGetMore(state, exec, isCursorTailable(cc)); - - // In order to deregister a cursor, we need to be holding the DB + collection lock and - // if the cursor is aggregation, we release these locks. - if (cc->isAggCursor()) { - invariant(NULL == ctx.get()); - unpinDBLock = make_unique<Lock::DBLock>(opCtx, nss.db(), MODE_IS); - unpinCollLock = - make_unique<Lock::CollectionLock>(opCtx->lockState(), nss.ns(), MODE_IS); - } - // Our two possible ClientCursorPin cleanup paths are: // 1) If the cursor is not going to be saved, we call deleteUnderlying() on the pin. - // 2) If the cursor is going to be saved, we simply let the pin go out of scope. In - // this case, the pin's destructor will be invoked, which will call release() on the - // pin. Because our ClientCursorPin is declared after our lock is declared, this - // will happen under the lock. - if (!shouldSaveCursor) { + // 2) If the cursor is going to be saved, we simply let the pin go out of scope. In this + // case, the pin's destructor will be invoked, which will call release() on the pin. + // Because our ClientCursorPin is declared after our lock is declared, this will happen + // under the lock if any locking was necessary. + if (!shouldSaveCursorGetMore(state, exec, isCursorTailable(cc))) { ccPin.getValue().deleteUnderlying(); // cc is now invalid, as is the executor cursorid = 0; - cc = NULL; + cc = nullptr; curOp.debug().cursorExhausted = true; LOG(5) << "getMore NOT saving client cursor, ended with state " @@ -673,10 +651,9 @@ std::string runQuery(OperationContext* opCtx, // Allocate a new ClientCursor and register it with the cursor manager. ClientCursorPin pinnedCursor = collection->getCursorManager()->registerCursor( - {exec.release(), - nss.ns(), + {std::move(exec), + nss, opCtx->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), - qr.getOptions(), upconvertQueryEntry(q.query, qr.nss(), q.ntoreturn, q.ntoskip)}); ccId = pinnedCursor.getCursor()->cursorid(); |