diff options
author | David Storch <david.storch@10gen.com> | 2015-03-14 18:49:43 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2015-03-18 10:01:44 -0400 |
commit | 4049c8328c98d8eb2b84fffca43ff4904e936909 (patch) | |
tree | f6819274232596c3bb2d3df0fb5b89fcde344c71 /src | |
parent | 465ca1774ddb7078566cb2edf0408f5881d6aae3 (diff) | |
download | mongo-4049c8328c98d8eb2b84fffca43ff4904e936909.tar.gz |
SERVER-17282 implement FindCmd::run()
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/commands/find_cmd.cpp | 374 | ||||
-rw-r--r-- | src/mongo/db/commands/find_cmd.h | 88 | ||||
-rw-r--r-- | src/mongo/db/query/find.cpp | 195 | ||||
-rw-r--r-- | src/mongo/db/query/find.h | 40 |
4 files changed, 443 insertions, 254 deletions
diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index 36c62862650..277ea0fec87 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -26,121 +26,323 @@ * it in the license file. */ -#include "mongo/platform/basic.h" +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery -#include "mongo/db/commands/find_cmd.h" +#include "mongo/platform/basic.h" -#include <boost/scoped_ptr.hpp> +#include <memory> #include "mongo/db/auth/authorization_session.h" #include "mongo/db/catalog/collection.h" #include "mongo/db/catalog/database.h" #include "mongo/db/client.h" +#include "mongo/db/clientcursor.h" #include "mongo/db/commands.h" +#include "mongo/db/exec/working_set_common.h" +#include "mongo/db/global_environment_experiment.h" #include "mongo/db/query/explain.h" -#include "mongo/db/query/get_executor.h" #include "mongo/db/query/find.h" +#include "mongo/db/query/get_executor.h" +#include "mongo/db/stats/counters.h" #include "mongo/s/d_state.h" +#include "mongo/util/log.h" +#include "mongo/util/scopeguard.h" namespace mongo { - using boost::scoped_ptr; - using std::auto_ptr; - using std::string; + /** + * A command for running .find() queries. + */ + class FindCmd : public Command { + public: + FindCmd() : Command("find") { } + + virtual bool isWriteCommandForConfigServer() const { return false; } + + virtual bool slaveOk() const { return false; } + + virtual bool slaveOverrideOk() const { return true; } + + virtual bool maintenanceOk() const { return false; } - static FindCmd findCmd; + virtual bool adminOnly() const { return false; } - Status FindCmd::checkAuthForCommand(ClientBasic* client, + virtual void help(std::stringstream& help) const { + help << "query for documents"; + } + + /** + * A find command does not increment the command counter, but rather increments the + * query counter. + */ + bool shouldAffectCommandCounter() const { return false; } + + virtual Status checkAuthForCommand(ClientBasic* client, const std::string& dbname, const BSONObj& cmdObj) { - AuthorizationSession* authzSession = client->getAuthorizationSession(); - ResourcePattern pattern = parseResourcePattern(dbname, cmdObj); + AuthorizationSession* authzSession = client->getAuthorizationSession(); + ResourcePattern pattern = parseResourcePattern(dbname, cmdObj); - if (authzSession->isAuthorizedForActionsOnResource(pattern, ActionType::find)) { - return Status::OK(); - } + if (authzSession->isAuthorizedForActionsOnResource(pattern, ActionType::find)) { + return Status::OK(); + } - return Status(ErrorCodes::Unauthorized, "unauthorized"); - } - - Status FindCmd::explain(OperationContext* txn, - const std::string& dbname, - const BSONObj& cmdObj, - ExplainCommon::Verbosity verbosity, - BSONObjBuilder* out) const { - const string fullns = parseNs(dbname, cmdObj); - - // Parse the command BSON to a LiteParsedQuery. - LiteParsedQuery* rawLpq; - bool isExplain = true; - Status lpqStatus = LiteParsedQuery::make(fullns, cmdObj, isExplain, &rawLpq); - if (!lpqStatus.isOK()) { - return lpqStatus; - } - auto_ptr<LiteParsedQuery> lpq(rawLpq); - - const NamespaceString nss(fullns); - - // Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery. - // This requires a lock on the collection in case we're parsing $where: where-specific - // parsing code assumes we have a lock and creates execution machinery that requires it. - CanonicalQuery* rawCq; - WhereCallbackReal whereCallback(txn, nss.db()); - Status canonStatus = CanonicalQuery::canonicalize(lpq.release(), &rawCq, whereCallback); - if (!canonStatus.isOK()) { - return canonStatus; + return Status(ErrorCodes::Unauthorized, "unauthorized"); } - auto_ptr<CanonicalQuery> cq(rawCq); - - AutoGetCollectionForRead ctx(txn, nss); - // The collection may be NULL. If so, getExecutor() should handle it by returning - // an execution tree with an EOFStage. - Collection* collection = ctx.getCollection(); - - // We have a parsed query. Time to get the execution plan for it. - PlanExecutor* rawExec; - Status execStatus = Status::OK(); - if (cq->getParsed().isOplogReplay()) { - execStatus = getOplogStartHack(txn, collection, cq.release(), &rawExec); - } - else { - size_t options = QueryPlannerParams::DEFAULT; - // TODO: The version attached to the TLS cannot be relied upon, the shard - // version should be passed as part of the command parameter. - if (shardingState.needCollectionMetadata(cq->getParsed().ns())) { - options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; + + virtual Status explain(OperationContext* txn, + const std::string& dbname, + const BSONObj& cmdObj, + ExplainCommon::Verbosity verbosity, + BSONObjBuilder* out) const { + const std::string fullns = parseNs(dbname, cmdObj); + const NamespaceString nss(fullns); + + // Parse the command BSON to a LiteParsedQuery. + std::unique_ptr<LiteParsedQuery> lpq; + { + LiteParsedQuery* rawLpq; + const bool isExplain = true; + Status lpqStatus = LiteParsedQuery::make(fullns, cmdObj, isExplain, &rawLpq); + if (!lpqStatus.isOK()) { + return lpqStatus; + } + lpq.reset(rawLpq); } - execStatus = getExecutor(txn, - collection, - cq.release(), - PlanExecutor::YIELD_AUTO, - &rawExec, - options); - } + // Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery. + std::unique_ptr<CanonicalQuery> cq; + { + CanonicalQuery* rawCq; + WhereCallbackReal whereCallback(txn, nss.db()); + Status canonStatus = CanonicalQuery::canonicalize(lpq.release(), + &rawCq, + whereCallback); + if (!canonStatus.isOK()) { + return canonStatus; + } + cq.reset(rawCq); + } - if (!execStatus.isOK()) { - return execStatus; - } + AutoGetCollectionForRead ctx(txn, nss); + // The collection may be NULL. If so, getExecutor() should handle it by returning + // an execution tree with an EOFStage. + Collection* collection = ctx.getCollection(); - scoped_ptr<PlanExecutor> exec(rawExec); + // We have a parsed query. Time to get the execution plan for it. + std::unique_ptr<PlanExecutor> exec; + { + PlanExecutor* rawExec; + Status execStatus = Status::OK(); + if (cq->getParsed().isOplogReplay()) { + execStatus = getOplogStartHack(txn, collection, cq.release(), &rawExec); + } + else { + size_t options = QueryPlannerParams::DEFAULT; + // TODO: The version attached to the TLS cannot be relied upon, the shard + // version should be passed as part of the command parameter. + if (shardingState.needCollectionMetadata(cq->getParsed().ns())) { + options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; + } - // Got the execution tree. Explain it. - Explain::explainStages(exec.get(), verbosity, out); - return Status::OK(); - } + execStatus = getExecutor(txn, + collection, + cq.release(), + PlanExecutor::YIELD_AUTO, + &rawExec, + options); + } - bool FindCmd::run(OperationContext* txn, - const string& dbname, + if (!execStatus.isOK()) { + return execStatus; + } + + exec.reset(rawExec); + } + + // Got the execution tree. Explain it. + Explain::explainStages(exec.get(), verbosity, out); + return Status::OK(); + } + + /** + * Runs a query using the following steps: + * 1) Parsing. + * 2) Acquire locks. + * 3) Plan query, obtaining an executor that can run it. + * 4) Setup a cursor for the query, which may be used on subsequent getMores. + * 5) Generate the first batch. + * 6) Save state for getMore. + * 7) Generate response to send to the client. + */ + virtual bool run(OperationContext* txn, + const std::string& dbname, BSONObj& cmdObj, int options, - string& errmsg, + std::string& errmsg, BSONObjBuilder& result, bool fromRepl) { - // Currently only explains of finds run through the find command. Queries that are not - // explained use the legacy OP_QUERY path. - // TODO: check the comment above regarding shard versioning. - errmsg = "find command not yet implemented"; - return false; - } + const std::string fullns = parseNs(dbname, cmdObj); + const NamespaceString nss(fullns); + + // Although it is a command, a find command gets counted as a query. + globalOpCounters.gotQuery(); + + // 1a) Parse the command BSON to a LiteParsedQuery. + std::unique_ptr<LiteParsedQuery> lpq; + { + LiteParsedQuery* rawLpq; + const bool isExplain = false; + Status lpqStatus = LiteParsedQuery::make(fullns, cmdObj, isExplain, &rawLpq); + if (!lpqStatus.isOK()) { + return appendCommandStatus(result, lpqStatus); + } + lpq.reset(rawLpq); + } + + // Fill out curop information. + beginQueryOp(nss, cmdObj, lpq->getNumToReturn(), lpq->getSkip(), txn->getCurOp()); + + // 1b) Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery. + std::unique_ptr<CanonicalQuery> cq; + { + CanonicalQuery* rawCq; + WhereCallbackReal whereCallback(txn, nss.db()); + Status canonStatus = CanonicalQuery::canonicalize(lpq.release(), + &rawCq, + whereCallback); + if (!canonStatus.isOK()) { + return appendCommandStatus(result, canonStatus); + } + cq.reset(rawCq); + } + + // 2) Acquire locks. + AutoGetCollectionForRead ctx(txn, nss); + Collection* collection = ctx.getCollection(); + + // 3) Get the execution plan for the query. + // + // TODO: Do we need to handle oplog replay here? + std::unique_ptr<PlanExecutor> execHolder; + { + PlanExecutor* rawExec; + size_t options = QueryPlannerParams::DEFAULT; + // TODO: The version attached to the TLS cannot be relied upon, the shard + // version should be passed as part of the command parameter. + if (shardingState.needCollectionMetadata(cq->getParsed().ns())) { + options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; + } + + Status execStatus = getExecutor(txn, + collection, + cq.release(), + PlanExecutor::YIELD_AUTO, + &rawExec, + options); + if (!execStatus.isOK()) { + return appendCommandStatus(result, execStatus); + } + + execHolder.reset(rawExec); + } + + if (!collection) { + // No collection. Just fill out curop indicating that there were zero results and + // there is no ClientCursor id, and then return. + const int numResults = 0; + const CursorId cursorId = 0; + endQueryOp(ctx, execHolder.get(), numResults, cursorId, txn->getCurOp()); + Command::appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result); + return true; + } + + const LiteParsedQuery& pq = execHolder->getCanonicalQuery()->getParsed(); + + // 4) If possible, register the execution plan inside a ClientCursor, and pin that + // cursor. In this case, ownership of the PlanExecutor is transferred to the + // ClientCursor, and 'exec' becomes null. + // + // First unregister the PlanExecutor so it can be re-registered with ClientCursor. + execHolder->deregisterExec(); + + // Create a ClientCursor containing this plan executor. We don't have to worry + // about leaking it as it's inserted into a global map by its ctor. + ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), + execHolder.release(), + nss.ns(), + pq.getOptions(), + pq.getFilter()); + CursorId cursorId = cursor->cursorid(); + ClientCursorPin ccPin(collection->getCursorManager(), cursorId); + + // On early return, get rid of the the cursor. + ScopeGuard cursorFreer = MakeGuard(&ClientCursorPin::deleteUnderlying, ccPin); + + invariant(!execHolder); + PlanExecutor* exec = cursor->getExecutor(); + + // 5) Stream query results, adding them to a BSONArray as we go. + // + // TODO: Handle result sets larger than 16MB. + BSONArrayBuilder firstBatch; + BSONObj obj; + PlanExecutor::ExecState state; + int numResults = 0; + while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { + // Add result to output buffer. + firstBatch.append(obj); + numResults++; + + if (enoughForFirstBatch(pq, numResults, firstBatch.len())) { + break; + } + } + + // Throw an assertion if query execution fails for any reason. + if (PlanExecutor::FAILURE == state) { + const std::unique_ptr<PlanStageStats> stats(exec->getStats()); + error() << "Plan executor error, stats: " << Explain::statsToBSON(*stats); + return appendCommandStatus(result, + Status(ErrorCodes::OperationFailed, + str::stream() << "Executor error: " + << WorkingSetCommon::toStatusString(obj))); + } + + // 6) Set up the cursor for getMore. + if (shouldSaveCursor(txn, collection, state, exec)) { + // State will be restored on getMore. + exec->saveState(); + + // TODO: Do we also need to set collection metadata here? + cursor->setLeftoverMaxTimeMicros(txn->getCurOp()->getRemainingMaxTimeMicros()); + cursor->setPos(numResults); + + // Don't stash the RU for tailable cursors at EOF, let them get a new RU on their + // next getMore. + if (!(pq.isTailable() && state == PlanExecutor::IS_EOF)) { + // We stash away the RecoveryUnit in the ClientCursor. It's used for + // subsequent getMore requests. The calling OpCtx gets a fresh RecoveryUnit. + txn->recoveryUnit()->commitAndRestart(); + cursor->setOwnedRecoveryUnit(txn->releaseRecoveryUnit()); + StorageEngine* engine = getGlobalEnvironment()->getGlobalStorageEngine(); + txn->setRecoveryUnit(engine->newRecoveryUnit()); + } + } + else { + cursorId = 0; + } + + // Fill out curop based on the results. + endQueryOp(ctx, exec, numResults, cursorId, txn->getCurOp()); + + // 7) Generate the response object to send to the client. + Command::appendCursorResponseObject(cursorId, nss.ns(), firstBatch.arr(), &result); + if (cursorId) { + cursorFreer.Dismiss(); + } + return true; + } + + } findCmd; } // namespace mongo diff --git a/src/mongo/db/commands/find_cmd.h b/src/mongo/db/commands/find_cmd.h deleted file mode 100644 index e472ebc1271..00000000000 --- a/src/mongo/db/commands/find_cmd.h +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Copyright (C) 2014 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include "mongo/db/catalog/collection.h" -#include "mongo/db/commands.h" -#include "mongo/db/query/lite_parsed_query.h" - -namespace mongo { - - /** - * The find command will be the main entry point for running queries once runQuery() - * is deprecated. - * - * Currently, only explains run through the FindCmd, and regular queries use the old code - * path. - */ - class FindCmd : public Command { - public: - FindCmd() : Command("find") { } - - virtual bool isWriteCommandForConfigServer() const { return false; } - - virtual bool slaveOk() const { return false; } - - virtual bool slaveOverrideOk() const { return true; } - - virtual bool maintenanceOk() const { return false; } - - virtual bool adminOnly() const { return false; } - - virtual void help( std::stringstream& help ) const { - help << "query for documents"; - } - - /** - * In order to run the find command, you must be authorized for the "find" action - * type on the collection. - */ - virtual Status checkAuthForCommand(ClientBasic* client, - const std::string& dbname, - const BSONObj& cmdObj); - - virtual Status explain(OperationContext* txn, - const std::string& dbname, - const BSONObj& cmdObj, - ExplainCommon::Verbosity verbosity, - BSONObjBuilder* out) const; - - /** - * TODO: This needs to be implemented. Currently it does nothing. - */ - virtual bool run(OperationContext* txn, - const std::string& dbname, - BSONObj& cmdObj, int options, - std::string& errmsg, - BSONObjBuilder& result, - bool fromRepl); - - }; - -} // namespace mongo diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp index 4b179c19661..f64f32d22af 100644 --- a/src/mongo/db/query/find.cpp +++ b/src/mongo/db/query/find.cpp @@ -77,22 +77,6 @@ namespace { return !pq.getHint().isEmpty() || !pq.getMin().isEmpty() || !pq.getMax().isEmpty(); } - /** - * Quote: - * if ntoreturn is zero, we return up to 101 objects. on the subsequent getmore, there - * is only a size limit. The idea is that on a find() where one doesn't use much results, - * we don't return much, but once getmore kicks in, we start pushing significant quantities. - * - * The n limit (vs. size) is important when someone fetches only one small field from big - * objects, which causes massive scanning server-side. - */ - bool enoughForFirstBatch(const mongo::LiteParsedQuery& pq, int n, int len) { - if (0 == pq.getNumToReturn()) { - return (len > 1024 * 1024) || n >= 101; - } - return n >= pq.getNumToReturn() || len > mongo::MaxBytesToReturnToClientAtOnce; - } - bool enough(const mongo::LiteParsedQuery& pq, int n) { if (0 == pq.getNumToReturn()) { return false; } return n >= pq.getNumToReturn(); @@ -123,6 +107,115 @@ namespace mongo { // Failpoint for checking whether we've received a getmore. MONGO_FP_DECLARE(failReceivedGetmore); + /** + * If ntoreturn is zero, we stop generating additional results as soon as we have either 101 + * documents or at least 1MB of data. On subsequent getmores, there is no limit on the number + * of results; we will stop as soon as we have at least 4 MB of data. The idea is that on a + * find() where one doesn't use much results, we don't return much, but once getmore kicks in, + * we start pushing significant quantities. + * + * If ntoreturn is non-zero, the we stop building the first batch once we either have ntoreturn + * results, or when the result set exceeds 4 MB. + */ + bool enoughForFirstBatch(const LiteParsedQuery& pq, int numDocs, int bytesBuffered) { + if (0 == pq.getNumToReturn()) { + return (bytesBuffered > 1024 * 1024) || numDocs >= 101; + } + return numDocs >= pq.getNumToReturn() || bytesBuffered > MaxBytesToReturnToClientAtOnce; + } + + bool shouldSaveCursor(OperationContext* txn, + const Collection* collection, + PlanExecutor::ExecState finalState, + PlanExecutor* exec) { + if (PlanExecutor::FAILURE == finalState || PlanExecutor::DEAD == finalState) { + return false; + } + + const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed(); + if (!pq.wantMore() && !pq.isTailable()) { + return false; + } + + if (pq.getNumToReturn() == 1) { + return false; + } + + // We keep a tailable cursor around unless the collection we're tailing has no + // records. + // + // SERVER-13955: we should be able to create a tailable cursor that waits on + // an empty collection. Right now we do not keep a cursor if the collection + // has zero records. + if (pq.isTailable()) { + return collection && collection->numRecords(txn) != 0U; + } + + return !exec->isEOF(); + } + + void beginQueryOp(const NamespaceString& nss, + const BSONObj& queryObj, + int ntoreturn, + int ntoskip, + CurOp* curop) { + curop->debug().ns = nss.ns(); + curop->debug().query = queryObj; + curop->debug().ntoreturn = ntoreturn; + curop->debug().ntoskip = ntoskip; + curop->setQuery(queryObj); + } + + void endQueryOp(const AutoGetCollectionForRead& ctx, + PlanExecutor* exec, + int numResults, + CursorId cursorId, + CurOp* curop) { + invariant(exec); + invariant(curop); + + // Fill out basic curop query exec properties. + curop->debug().nreturned = numResults; + curop->debug().cursorid = (0 == cursorId ? -1 : cursorId); + + // Fill out curop based on explain summary statistics. + PlanSummaryStats summaryStats; + Explain::getSummaryStats(exec, &summaryStats); + curop->debug().scanAndOrder = summaryStats.hasSortStage; + curop->debug().nscanned = summaryStats.totalKeysExamined; + curop->debug().nscannedObjects = summaryStats.totalDocsExamined; + curop->debug().idhack = summaryStats.isIdhack; + + const int dbProfilingLevel = (ctx.getDb() != NULL) ? ctx.getDb()->getProfilingLevel() : + serverGlobalParams.defaultProfile; + const logger::LogComponent queryLogComponent = logger::LogComponent::kQuery; + const logger::LogSeverity logLevelOne = logger::LogSeverity::Debug(1); + + // Set debug information for consumption by the profiler and slow query log. + if (dbProfilingLevel > 0 + || curop->elapsedMillis() > serverGlobalParams.slowMS + || logger::globalLogDomain()->shouldLog(queryLogComponent, logLevelOne)) { + // Generate plan summary string. + curop->debug().planSummary = Explain::getPlanSummary(exec); + } + + // Set debug information for consumption by the profiler only. + if (dbProfilingLevel > 0) { + // Get BSON stats. + scoped_ptr<PlanStageStats> execStats(exec->getStats()); + BSONObjBuilder statsBob; + Explain::statsToBSON(*execStats, &statsBob); + curop->debug().execStats.set(statsBob.obj()); + + // Replace exec stats with plan summary if stats cannot fit into CachedBSONObj. + if (curop->debug().execStats.tooBig() && !curop->debug().planSummary.empty()) { + BSONObjBuilder bob; + bob.append("summary", curop->debug().planSummary.toString()); + curop->debug().execStats.set(bob.done()); + } + } + } + // TODO: Move this and the other command stuff in runQuery outta here and up a level. static bool runCommands(OperationContext* txn, const char *ns, @@ -556,8 +649,6 @@ namespace mongo { } } - // cout << "diskloc is " << startLoc.toString() << endl; - // Build our collection scan... CollectionScanParams params; params.collection = collection; @@ -582,10 +673,7 @@ namespace mongo { uassert(16256, str::stream() << "Invalid ns [" << nss.ns() << "]", nss.isValid()); // Set curop information. - curop.debug().ns = nss.ns(); - curop.debug().ntoreturn = q.ntoreturn; - curop.debug().query = q.query; - curop.setQuery(q.query); + beginQueryOp(nss, q.query, q.ntoreturn, q.ntoskip, &curop); // If the query is really a command, run it. if (nss.isCommand()) { @@ -642,12 +730,7 @@ namespace mongo { // Parse, canonicalize, plan, transcribe, and get a plan executor. PlanExecutor* rawExec = NULL; - ScopedTransaction scopedXact(txn, MODE_IS); AutoGetCollectionForRead ctx(txn, nss); - - const int dbProfilingLevel = (ctx.getDb() != NULL) ? ctx.getDb()->getProfilingLevel() : - serverGlobalParams.defaultProfile; - Collection* collection = ctx.getCollection(); // We'll now try to get the query executor that will execute this query for us. There @@ -754,9 +837,6 @@ namespace mongo { // If we're replaying the oplog, we save the last time that we read. OpTime slaveReadTill; - // Do we save the PlanExecutor in a ClientCursor for getMore calls later? - bool saveClientCursor = false; - BSONObj obj; PlanExecutor::ExecState state; // uint64_t numMisplacedDocs = 0; @@ -784,11 +864,6 @@ namespace mongo { << " numToReturn=" << pq.getNumToReturn() << " numResults=" << numResults << endl; - // If only one result requested assume it's a findOne() and don't save the cursor. - if (pq.wantMore() && 1 != pq.getNumToReturn()) { - LOG(5) << " executor EOF=" << exec->isEOF() << endl; - saveClientCursor = !exec->isEOF(); - } break; } } @@ -809,19 +884,6 @@ namespace mongo { uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj)); } - // Why save a dead executor? - if (PlanExecutor::DEAD == state) { - saveClientCursor = false; - } - else if (pq.isTailable()) { - // If we're tailing a capped collection, we don't bother saving the cursor if the - // collection is empty. Otherwise, the semantics of the tailable cursor is that the - // client will keep trying to read from it. So we'll keep it around. - if (collection && collection->numRecords(txn) != 0 && pq.getNumToReturn() != 1) { - saveClientCursor = true; - } - } - // TODO(greg): This will go away soon. if (!shardingState.getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // if the version changed during the query we might be missing some data and its safe to @@ -831,39 +893,12 @@ namespace mongo { shardingState.getVersion(nss.ns())); } - const logger::LogComponent queryLogComponent = logger::LogComponent::kQuery; - const logger::LogSeverity logLevelOne = logger::LogSeverity::Debug(1); - - PlanSummaryStats summaryStats; - Explain::getSummaryStats(exec.get(), &summaryStats); - - curop.debug().ntoskip = pq.getSkip(); - curop.debug().nreturned = numResults; - curop.debug().scanAndOrder = summaryStats.hasSortStage; - curop.debug().nscanned = summaryStats.totalKeysExamined; - curop.debug().nscannedObjects = summaryStats.totalDocsExamined; - curop.debug().idhack = summaryStats.isIdhack; - - // Set debug information for consumption by the profiler. - if (dbProfilingLevel > 0 || - curop.elapsedMillis() > serverGlobalParams.slowMS || - logger::globalLogDomain()->shouldLog(queryLogComponent, logLevelOne)) { - // Get BSON stats. - scoped_ptr<PlanStageStats> execStats(exec->getStats()); - BSONObjBuilder statsBob; - Explain::statsToBSON(*execStats, &statsBob); - curop.debug().execStats.set(statsBob.obj()); - - // Replace exec stats with plan summary if stats cannot fit into CachedBSONObj. - if (curop.debug().execStats.tooBig() && !curop.debug().planSummary.empty()) { - BSONObjBuilder bob; - bob.append("summary", curop.debug().planSummary.toString()); - curop.debug().execStats.set(bob.done()); - } - } - + // Fill out curop based on query results. If we have a cursorid, we will fill out curop with + // this cursorid later. long long ccId = 0; - if (saveClientCursor) { + endQueryOp(ctx, exec.get(), numResults, ccId, &curop); + + if (shouldSaveCursor(txn, collection, state, exec.get())) { // We won't use the executor until it's getMore'd. exec->saveState(); diff --git a/src/mongo/db/query/find.h b/src/mongo/db/query/find.h index b2bd7881ec3..c8511556928 100644 --- a/src/mongo/db/query/find.h +++ b/src/mongo/db/query/find.h @@ -41,6 +41,46 @@ namespace mongo { class OperationContext; /** + * Returns true if enough results have been prepared to stop adding more to the first batch. + * + * Should be called *after* adding to the result set rather than before. + */ + bool enoughForFirstBatch(const LiteParsedQuery& pq, int numDocs, int bytesBuffered); + + /** + * Returns true if we should keep a cursor around because we're expecting to return more query + * results. + * + * If false, the caller should close the cursor and indicate this to the client by sending back + * a cursor ID of 0. + */ + bool shouldSaveCursor(OperationContext* txn, + const Collection* collection, + PlanExecutor::ExecState finalState, + PlanExecutor* exec); + + /** + * Fills out CurOp with information about this query. + */ + void beginQueryOp(const NamespaceString& nss, + const BSONObj& queryObj, + int ntoreturn, + int ntoskip, + CurOp* curop); + + /** + * Fills out CurOp with information regarding this query's execution. + * + * Uses explain functionality to extract stats from 'exec'. 'ctx' is used to conditionalize + * whether or not we do expensive stats gathering based on the database profiling level. + */ + void endQueryOp(const AutoGetCollectionForRead& ctx, + PlanExecutor* exec, + int numResults, + CursorId cursorId, + CurOp* curop); + + /** * Constructs a PlanExecutor for a query with the oplogReplay option set to true, * for the query 'cq' over the collection 'collection'. The PlanExecutor will * wrap a singleton OplogStart stage. |