SERVER-17282 implement FindCmd::run()

author: David Storch <david.storch@10gen.com> 2015-03-14 18:49:43 -0400
committer: David Storch <david.storch@10gen.com> 2015-03-18 10:01:44 -0400
commit: 4049c8328c98d8eb2b84fffca43ff4904e936909 (patch)
tree: f6819274232596c3bb2d3df0fb5b89fcde344c71 /src
parent: 465ca1774ddb7078566cb2edf0408f5881d6aae3 (diff)
download: mongo-4049c8328c98d8eb2b84fffca43ff4904e936909.tar.gz
4 files changed, 443 insertions, 254 deletions
diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp
index 36c62862650..277ea0fec87 100644
--- a/src/mongo/db/commands/find_cmd.cpp
+++ b/src/mongo/db/commands/find_cmd.cpp
@@ -26,121 +26,323 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
 
-#include "mongo/db/commands/find_cmd.h"
+#include "mongo/platform/basic.h"
 
-#include <boost/scoped_ptr.hpp>
+#include <memory>
 
 #include "mongo/db/auth/authorization_session.h"
 #include "mongo/db/catalog/collection.h"
 #include "mongo/db/catalog/database.h"
 #include "mongo/db/client.h"
+#include "mongo/db/clientcursor.h"
 #include "mongo/db/commands.h"
+#include "mongo/db/exec/working_set_common.h"
+#include "mongo/db/global_environment_experiment.h"
 #include "mongo/db/query/explain.h"
-#include "mongo/db/query/get_executor.h"
 #include "mongo/db/query/find.h"
+#include "mongo/db/query/get_executor.h"
+#include "mongo/db/stats/counters.h"
 #include "mongo/s/d_state.h"
+#include "mongo/util/log.h"
+#include "mongo/util/scopeguard.h"
 
 namespace mongo {
 
-    using boost::scoped_ptr;
-    using std::auto_ptr;
-    using std::string;
+    /**
+     * A command for running .find() queries.
+     */
+    class FindCmd : public Command {
+    public:
+        FindCmd() : Command("find") { }
+
+        virtual bool isWriteCommandForConfigServer() const { return false; }
+
+        virtual bool slaveOk() const { return false; }
+
+        virtual bool slaveOverrideOk() const { return true; }
+
+        virtual bool maintenanceOk() const { return false; }
 
-    static FindCmd findCmd;
+        virtual bool adminOnly() const { return false; }
 
-    Status FindCmd::checkAuthForCommand(ClientBasic* client,
+        virtual void help(std::stringstream& help) const {
+            help << "query for documents";
+        }
+
+        /**
+         * A find command does not increment the command counter, but rather increments the
+         * query counter.
+         */
+        bool shouldAffectCommandCounter() const { return false; }
+
+        virtual Status checkAuthForCommand(ClientBasic* client,
                                            const std::string& dbname,
                                            const BSONObj& cmdObj) {
-        AuthorizationSession* authzSession = client->getAuthorizationSession();
-        ResourcePattern pattern = parseResourcePattern(dbname, cmdObj);
+            AuthorizationSession* authzSession = client->getAuthorizationSession();
+            ResourcePattern pattern = parseResourcePattern(dbname, cmdObj);
 
-        if (authzSession->isAuthorizedForActionsOnResource(pattern, ActionType::find)) {
-            return Status::OK();
-        }
+            if (authzSession->isAuthorizedForActionsOnResource(pattern, ActionType::find)) {
+                return Status::OK();
+            }
 
-        return Status(ErrorCodes::Unauthorized, "unauthorized");
-    }
-
-    Status FindCmd::explain(OperationContext* txn,
-                             const std::string& dbname,
-                             const BSONObj& cmdObj,
-                             ExplainCommon::Verbosity verbosity,
-                             BSONObjBuilder* out) const {
-        const string fullns = parseNs(dbname, cmdObj);
-
-        // Parse the command BSON to a LiteParsedQuery.
-        LiteParsedQuery* rawLpq;
-        bool isExplain = true;
-        Status lpqStatus = LiteParsedQuery::make(fullns, cmdObj, isExplain, &rawLpq);
-        if (!lpqStatus.isOK()) {
-            return lpqStatus;
-        }
-        auto_ptr<LiteParsedQuery> lpq(rawLpq);
-
-        const NamespaceString nss(fullns);
-
-        // Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery.
-        // This requires a lock on the collection in case we're parsing $where: where-specific
-        // parsing code assumes we have a lock and creates execution machinery that requires it.
-        CanonicalQuery* rawCq;
-        WhereCallbackReal whereCallback(txn, nss.db());
-        Status canonStatus = CanonicalQuery::canonicalize(lpq.release(), &rawCq, whereCallback);
-        if (!canonStatus.isOK()) {
-            return canonStatus;
+            return Status(ErrorCodes::Unauthorized, "unauthorized");
         }
-        auto_ptr<CanonicalQuery> cq(rawCq);
-
-        AutoGetCollectionForRead ctx(txn, nss);
-        // The collection may be NULL. If so, getExecutor() should handle it by returning
-        // an execution tree with an EOFStage.
-        Collection* collection = ctx.getCollection();
-
-        // We have a parsed query. Time to get the execution plan for it.
-        PlanExecutor* rawExec;
-        Status execStatus = Status::OK();
-        if (cq->getParsed().isOplogReplay()) {
-            execStatus = getOplogStartHack(txn, collection, cq.release(), &rawExec);
-        }
-        else {
-            size_t options = QueryPlannerParams::DEFAULT;
-            // TODO: The version attached to the TLS cannot be relied upon, the shard
-            // version should be passed as part of the command parameter.
-            if (shardingState.needCollectionMetadata(cq->getParsed().ns())) {
-                options |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
+
+        virtual Status explain(OperationContext* txn,
+                               const std::string& dbname,
+                               const BSONObj& cmdObj,
+                               ExplainCommon::Verbosity verbosity,
+                               BSONObjBuilder* out) const {
+            const std::string fullns = parseNs(dbname, cmdObj);
+            const NamespaceString nss(fullns);
+
+            // Parse the command BSON to a LiteParsedQuery.
+            std::unique_ptr<LiteParsedQuery> lpq;
+            {
+                LiteParsedQuery* rawLpq;
+                const bool isExplain = true;
+                Status lpqStatus = LiteParsedQuery::make(fullns, cmdObj, isExplain, &rawLpq);
+                if (!lpqStatus.isOK()) {
+                    return lpqStatus;
+                }
+                lpq.reset(rawLpq);
             }
 
-            execStatus = getExecutor(txn,
-                                     collection,
-                                     cq.release(),
-                                     PlanExecutor::YIELD_AUTO,
-                                     &rawExec,
-                                     options);
-        }
+            // Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery.
+            std::unique_ptr<CanonicalQuery> cq;
+            {
+                CanonicalQuery* rawCq;
+                WhereCallbackReal whereCallback(txn, nss.db());
+                Status canonStatus = CanonicalQuery::canonicalize(lpq.release(),
+                                                                  &rawCq,
+                                                                  whereCallback);
+                if (!canonStatus.isOK()) {
+                    return canonStatus;
+                }
+                cq.reset(rawCq);
+            }
 
-        if (!execStatus.isOK()) {
-            return execStatus;
-        }
+            AutoGetCollectionForRead ctx(txn, nss);
+            // The collection may be NULL. If so, getExecutor() should handle it by returning
+            // an execution tree with an EOFStage.
+            Collection* collection = ctx.getCollection();
 
-        scoped_ptr<PlanExecutor> exec(rawExec);
+            // We have a parsed query. Time to get the execution plan for it.
+            std::unique_ptr<PlanExecutor> exec;
+            {
+                PlanExecutor* rawExec;
+                Status execStatus = Status::OK();
+                if (cq->getParsed().isOplogReplay()) {
+                    execStatus = getOplogStartHack(txn, collection, cq.release(), &rawExec);
+                }
+                else {
+                    size_t options = QueryPlannerParams::DEFAULT;
+                    // TODO: The version attached to the TLS cannot be relied upon, the shard
+                    // version should be passed as part of the command parameter.
+                    if (shardingState.needCollectionMetadata(cq->getParsed().ns())) {
+                        options |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
+                    }
 
-        // Got the execution tree. Explain it.
-        Explain::explainStages(exec.get(), verbosity, out);
-        return Status::OK();
-    }
+                    execStatus = getExecutor(txn,
+                                             collection,
+                                             cq.release(),
+                                             PlanExecutor::YIELD_AUTO,
+                                             &rawExec,
+                                             options);
+                }
 
-    bool FindCmd::run(OperationContext* txn,
-                         const string& dbname,
+                if (!execStatus.isOK()) {
+                    return execStatus;
+                }
+
+                exec.reset(rawExec);
+            }
+
+            // Got the execution tree. Explain it.
+            Explain::explainStages(exec.get(), verbosity, out);
+            return Status::OK();
+        }
+
+        /**
+         * Runs a query using the following steps:
+         *   1) Parsing.
+         *   2) Acquire locks.
+         *   3) Plan query, obtaining an executor that can run it.
+         *   4) Setup a cursor for the query, which may be used on subsequent getMores.
+         *   5) Generate the first batch.
+         *   6) Save state for getMore.
+         *   7) Generate response to send to the client.
+         */
+        virtual bool run(OperationContext* txn,
+                         const std::string& dbname,
                          BSONObj& cmdObj, int options,
-                         string& errmsg,
+                         std::string& errmsg,
                          BSONObjBuilder& result,
                          bool fromRepl) {
-        // Currently only explains of finds run through the find command. Queries that are not
-        // explained use the legacy OP_QUERY path.
-        // TODO: check the comment above regarding shard versioning.
-        errmsg = "find command not yet implemented";
-        return false;
-    }
+            const std::string fullns = parseNs(dbname, cmdObj);
+            const NamespaceString nss(fullns);
+
+            // Although it is a command, a find command gets counted as a query.
+            globalOpCounters.gotQuery();
+
+            // 1a) Parse the command BSON to a LiteParsedQuery.
+            std::unique_ptr<LiteParsedQuery> lpq;
+            {
+                LiteParsedQuery* rawLpq;
+                const bool isExplain = false;
+                Status lpqStatus = LiteParsedQuery::make(fullns, cmdObj, isExplain, &rawLpq);
+                if (!lpqStatus.isOK()) {
+                    return appendCommandStatus(result, lpqStatus);
+                }
+                lpq.reset(rawLpq);
+            }
+
+            // Fill out curop information.
+            beginQueryOp(nss, cmdObj, lpq->getNumToReturn(), lpq->getSkip(), txn->getCurOp());
+
+            // 1b) Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery.
+            std::unique_ptr<CanonicalQuery> cq;
+            {
+                CanonicalQuery* rawCq;
+                WhereCallbackReal whereCallback(txn, nss.db());
+                Status canonStatus = CanonicalQuery::canonicalize(lpq.release(),
+                                                                  &rawCq,
+                                                                  whereCallback);
+                if (!canonStatus.isOK()) {
+                    return appendCommandStatus(result, canonStatus);
+                }
+                cq.reset(rawCq);
+            }
+
+            // 2) Acquire locks.
+            AutoGetCollectionForRead ctx(txn, nss);
+            Collection* collection = ctx.getCollection();
+
+            // 3) Get the execution plan for the query.
+            //
+            // TODO: Do we need to handle oplog replay here?
+            std::unique_ptr<PlanExecutor> execHolder;
+            {
+                PlanExecutor* rawExec;
+                size_t options = QueryPlannerParams::DEFAULT;
+                // TODO: The version attached to the TLS cannot be relied upon, the shard
+                // version should be passed as part of the command parameter.
+                if (shardingState.needCollectionMetadata(cq->getParsed().ns())) {
+                    options |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
+                }
+
+                Status execStatus = getExecutor(txn,
+                                                collection,
+                                                cq.release(),
+                                                PlanExecutor::YIELD_AUTO,
+                                                &rawExec,
+                                                options);
+                if (!execStatus.isOK()) {
+                    return appendCommandStatus(result, execStatus);
+                }
+
+                execHolder.reset(rawExec);
+            }
+
+            if (!collection) {
+                // No collection. Just fill out curop indicating that there were zero results and
+                // there is no ClientCursor id, and then return.
+                const int numResults = 0;
+                const CursorId cursorId = 0;
+                endQueryOp(ctx, execHolder.get(), numResults, cursorId, txn->getCurOp());
+                Command::appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result);
+                return true;
+            }
+
+            const LiteParsedQuery& pq = execHolder->getCanonicalQuery()->getParsed();
+
+            // 4) If possible, register the execution plan inside a ClientCursor, and pin that
+            // cursor. In this case, ownership of the PlanExecutor is transferred to the
+            // ClientCursor, and 'exec' becomes null.
+            //
+            // First unregister the PlanExecutor so it can be re-registered with ClientCursor.
+            execHolder->deregisterExec();
+
+            // Create a ClientCursor containing this plan executor. We don't have to worry
+            // about leaking it as it's inserted into a global map by its ctor.
+            ClientCursor* cursor = new ClientCursor(collection->getCursorManager(),
+                                                    execHolder.release(),
+                                                    nss.ns(),
+                                                    pq.getOptions(),
+                                                    pq.getFilter());
+            CursorId cursorId = cursor->cursorid();
+            ClientCursorPin ccPin(collection->getCursorManager(), cursorId);
+
+            // On early return, get rid of the the cursor.
+            ScopeGuard cursorFreer = MakeGuard(&ClientCursorPin::deleteUnderlying, ccPin);
+
+            invariant(!execHolder);
+            PlanExecutor* exec = cursor->getExecutor();
+
+            // 5) Stream query results, adding them to a BSONArray as we go.
+            //
+            // TODO: Handle result sets larger than 16MB.
+            BSONArrayBuilder firstBatch;
+            BSONObj obj;
+            PlanExecutor::ExecState state;
+            int numResults = 0;
+            while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
+                // Add result to output buffer.
+                firstBatch.append(obj);
+                numResults++;
+
+                if (enoughForFirstBatch(pq, numResults, firstBatch.len())) {
+                    break;
+                }
+            }
+
+            // Throw an assertion if query execution fails for any reason.
+            if (PlanExecutor::FAILURE == state) {
+                const std::unique_ptr<PlanStageStats> stats(exec->getStats());
+                error() << "Plan executor error, stats: " << Explain::statsToBSON(*stats);
+                return appendCommandStatus(result,
+                                           Status(ErrorCodes::OperationFailed,
+                                                  str::stream() << "Executor error: "
+                                                  << WorkingSetCommon::toStatusString(obj)));
+            }
+
+            // 6) Set up the cursor for getMore.
+            if (shouldSaveCursor(txn, collection, state, exec)) {
+                // State will be restored on getMore.
+                exec->saveState();
+
+                // TODO: Do we also need to set collection metadata here?
+                cursor->setLeftoverMaxTimeMicros(txn->getCurOp()->getRemainingMaxTimeMicros());
+                cursor->setPos(numResults);
+
+                // Don't stash the RU for tailable cursors at EOF, let them get a new RU on their
+                // next getMore.
+                if (!(pq.isTailable() && state == PlanExecutor::IS_EOF)) {
+                    // We stash away the RecoveryUnit in the ClientCursor. It's used for
+                    // subsequent getMore requests. The calling OpCtx gets a fresh RecoveryUnit.
+                    txn->recoveryUnit()->commitAndRestart();
+                    cursor->setOwnedRecoveryUnit(txn->releaseRecoveryUnit());
+                    StorageEngine* engine = getGlobalEnvironment()->getGlobalStorageEngine();
+                    txn->setRecoveryUnit(engine->newRecoveryUnit());
+                }
+            }
+            else {
+                cursorId = 0;
+            }
+
+            // Fill out curop based on the results.
+            endQueryOp(ctx, exec, numResults, cursorId, txn->getCurOp());
+
+            // 7) Generate the response object to send to the client.
+            Command::appendCursorResponseObject(cursorId, nss.ns(), firstBatch.arr(), &result);
+            if (cursorId) {
+                cursorFreer.Dismiss();
+            }
+            return true;
+        }
+
+    } findCmd;
 
 } // namespace mongo
diff --git a/src/mongo/db/commands/find_cmd.h b/src/mongo/db/commands/find_cmd.h
deleted file mode 100644
index e472ebc1271..00000000000
--- a/src/mongo/db/commands/find_cmd.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- *    Copyright (C) 2014 MongoDB Inc.
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the GNU Affero General Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/commands.h"
-#include "mongo/db/query/lite_parsed_query.h"
-
-namespace mongo {
-
-    /**
-     * The find command will be the main entry point for running queries once runQuery()
-     * is deprecated.
-     *
-     * Currently, only explains run through the FindCmd, and regular queries use the old code
-     * path.
-     */
-    class FindCmd : public Command {
-    public:
-        FindCmd() : Command("find") { }
-
-        virtual bool isWriteCommandForConfigServer() const { return false; }
-
-        virtual bool slaveOk() const { return false; }
-
-        virtual bool slaveOverrideOk() const { return true; }
-
-        virtual bool maintenanceOk() const { return false; }
-
-        virtual bool adminOnly() const { return false; }
-
-        virtual void help( std::stringstream& help ) const {
-            help << "query for documents";
-        }
-
-        /**
-         * In order to run the find command, you must be authorized for the "find" action
-         * type on the collection.
-         */
-        virtual Status checkAuthForCommand(ClientBasic* client,
-                                           const std::string& dbname,
-                                           const BSONObj& cmdObj);
-
-        virtual Status explain(OperationContext* txn,
-                               const std::string& dbname,
-                               const BSONObj& cmdObj,
-                               ExplainCommon::Verbosity verbosity,
-                               BSONObjBuilder* out) const;
-
-        /**
-         * TODO: This needs to be implemented. Currently it does nothing.
-         */
-        virtual bool run(OperationContext* txn,
-                         const std::string& dbname,
-                         BSONObj& cmdObj, int options,
-                         std::string& errmsg,
-                         BSONObjBuilder& result,
-                         bool fromRepl);
-
-    };
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp
index 4b179c19661..f64f32d22af 100644
--- a/src/mongo/db/query/find.cpp
+++ b/src/mongo/db/query/find.cpp
@@ -77,22 +77,6 @@ namespace {
         return !pq.getHint().isEmpty() || !pq.getMin().isEmpty() || !pq.getMax().isEmpty();
     }
 
-    /**
-     * Quote:
-     * if ntoreturn is zero, we return up to 101 objects.  on the subsequent getmore, there
-     * is only a size limit.  The idea is that on a find() where one doesn't use much results,
-     * we don't return much, but once getmore kicks in, we start pushing significant quantities.
-     *
-     * The n limit (vs. size) is important when someone fetches only one small field from big
-     * objects, which causes massive scanning server-side.
-     */
-    bool enoughForFirstBatch(const mongo::LiteParsedQuery& pq, int n, int len) {
-        if (0 == pq.getNumToReturn()) {
-            return (len > 1024 * 1024) || n >= 101;
-        }
-        return n >= pq.getNumToReturn() || len > mongo::MaxBytesToReturnToClientAtOnce;
-    }
-
     bool enough(const mongo::LiteParsedQuery& pq, int n) {
         if (0 == pq.getNumToReturn()) { return false; }
         return n >= pq.getNumToReturn();
@@ -123,6 +107,115 @@ namespace mongo {
     // Failpoint for checking whether we've received a getmore.
     MONGO_FP_DECLARE(failReceivedGetmore);
 
+    /**
+     * If ntoreturn is zero, we stop generating additional results as soon as we have either 101
+     * documents or at least 1MB of data. On subsequent getmores, there is no limit on the number
+     * of results; we will stop as soon as we have at least 4 MB of data.  The idea is that on a
+     * find() where one doesn't use much results, we don't return much, but once getmore kicks in,
+     * we start pushing significant quantities.
+     *
+     * If ntoreturn is non-zero, the we stop building the first batch once we either have ntoreturn
+     * results, or when the result set exceeds 4 MB.
+     */
+    bool enoughForFirstBatch(const LiteParsedQuery& pq, int numDocs, int bytesBuffered) {
+        if (0 == pq.getNumToReturn()) {
+            return (bytesBuffered > 1024 * 1024) || numDocs >= 101;
+        }
+        return numDocs >= pq.getNumToReturn() || bytesBuffered > MaxBytesToReturnToClientAtOnce;
+    }
+
+    bool shouldSaveCursor(OperationContext* txn,
+                          const Collection* collection,
+                          PlanExecutor::ExecState finalState,
+                          PlanExecutor* exec) {
+        if (PlanExecutor::FAILURE == finalState || PlanExecutor::DEAD == finalState) {
+            return false;
+        }
+
+        const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed();
+        if (!pq.wantMore() && !pq.isTailable()) {
+            return false;
+        }
+
+        if (pq.getNumToReturn() == 1) {
+            return false;
+        }
+
+        // We keep a tailable cursor around unless the collection we're tailing has no
+        // records.
+        //
+        // SERVER-13955: we should be able to create a tailable cursor that waits on
+        // an empty collection. Right now we do not keep a cursor if the collection
+        // has zero records.
+        if (pq.isTailable()) {
+            return collection && collection->numRecords(txn) != 0U;
+        }
+
+        return !exec->isEOF();
+    }
+
+    void beginQueryOp(const NamespaceString& nss,
+                      const BSONObj& queryObj,
+                      int ntoreturn,
+                      int ntoskip,
+                      CurOp* curop) {
+        curop->debug().ns = nss.ns();
+        curop->debug().query = queryObj;
+        curop->debug().ntoreturn = ntoreturn;
+        curop->debug().ntoskip = ntoskip;
+        curop->setQuery(queryObj);
+    }
+
+    void endQueryOp(const AutoGetCollectionForRead& ctx,
+                    PlanExecutor* exec,
+                    int numResults,
+                    CursorId cursorId,
+                    CurOp* curop) {
+        invariant(exec);
+        invariant(curop);
+
+        // Fill out basic curop query exec properties.
+        curop->debug().nreturned = numResults;
+        curop->debug().cursorid = (0 == cursorId ? -1 : cursorId);
+
+        // Fill out curop based on explain summary statistics.
+        PlanSummaryStats summaryStats;
+        Explain::getSummaryStats(exec, &summaryStats);
+        curop->debug().scanAndOrder = summaryStats.hasSortStage;
+        curop->debug().nscanned = summaryStats.totalKeysExamined;
+        curop->debug().nscannedObjects = summaryStats.totalDocsExamined;
+        curop->debug().idhack = summaryStats.isIdhack;
+
+        const int dbProfilingLevel = (ctx.getDb() != NULL) ? ctx.getDb()->getProfilingLevel() :
+                                                             serverGlobalParams.defaultProfile;
+        const logger::LogComponent queryLogComponent = logger::LogComponent::kQuery;
+        const logger::LogSeverity logLevelOne = logger::LogSeverity::Debug(1);
+
+        // Set debug information for consumption by the profiler and slow query log.
+        if (dbProfilingLevel > 0
+                || curop->elapsedMillis() > serverGlobalParams.slowMS
+                || logger::globalLogDomain()->shouldLog(queryLogComponent, logLevelOne)) {
+            // Generate plan summary string.
+            curop->debug().planSummary = Explain::getPlanSummary(exec);
+        }
+
+        // Set debug information for consumption by the profiler only.
+        if (dbProfilingLevel > 0) {
+            // Get BSON stats.
+            scoped_ptr<PlanStageStats> execStats(exec->getStats());
+            BSONObjBuilder statsBob;
+            Explain::statsToBSON(*execStats, &statsBob);
+            curop->debug().execStats.set(statsBob.obj());
+
+            // Replace exec stats with plan summary if stats cannot fit into CachedBSONObj.
+            if (curop->debug().execStats.tooBig() && !curop->debug().planSummary.empty()) {
+                BSONObjBuilder bob;
+                bob.append("summary", curop->debug().planSummary.toString());
+                curop->debug().execStats.set(bob.done());
+            }
+        }
+    }
+
     // TODO: Move this and the other command stuff in runQuery outta here and up a level.
     static bool runCommands(OperationContext* txn,
                             const char *ns,
@@ -556,8 +649,6 @@ namespace mongo {
             }
         }
 
-        // cout << "diskloc is " << startLoc.toString() << endl;
-
         // Build our collection scan...
         CollectionScanParams params;
         params.collection = collection;
@@ -582,10 +673,7 @@ namespace mongo {
         uassert(16256, str::stream() << "Invalid ns [" << nss.ns() << "]", nss.isValid());
 
         // Set curop information.
-        curop.debug().ns = nss.ns();
-        curop.debug().ntoreturn = q.ntoreturn;
-        curop.debug().query = q.query;
-        curop.setQuery(q.query);
+        beginQueryOp(nss, q.query, q.ntoreturn, q.ntoskip, &curop);
 
         // If the query is really a command, run it.
         if (nss.isCommand()) {
@@ -642,12 +730,7 @@ namespace mongo {
         // Parse, canonicalize, plan, transcribe, and get a plan executor.
         PlanExecutor* rawExec = NULL;
 
-        ScopedTransaction scopedXact(txn, MODE_IS);
         AutoGetCollectionForRead ctx(txn, nss);
-
-        const int dbProfilingLevel = (ctx.getDb() != NULL) ? ctx.getDb()->getProfilingLevel() :
-                                                             serverGlobalParams.defaultProfile;
-
         Collection* collection = ctx.getCollection();
 
         // We'll now try to get the query executor that will execute this query for us. There
@@ -754,9 +837,6 @@ namespace mongo {
         // If we're replaying the oplog, we save the last time that we read.
         OpTime slaveReadTill;
 
-        // Do we save the PlanExecutor in a ClientCursor for getMore calls later?
-        bool saveClientCursor = false;
-
         BSONObj obj;
         PlanExecutor::ExecState state;
         // uint64_t numMisplacedDocs = 0;
@@ -784,11 +864,6 @@ namespace mongo {
                        << " numToReturn=" << pq.getNumToReturn()
                        << " numResults=" << numResults
                        << endl;
-                // If only one result requested assume it's a findOne() and don't save the cursor.
-                if (pq.wantMore() && 1 != pq.getNumToReturn()) {
-                    LOG(5) << " executor EOF=" << exec->isEOF() << endl;
-                    saveClientCursor = !exec->isEOF();
-                }
                 break;
             }
         }
@@ -809,19 +884,6 @@ namespace mongo {
             uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj));
         }
 
-        // Why save a dead executor?
-        if (PlanExecutor::DEAD == state) {
-            saveClientCursor = false;
-        }
-        else if (pq.isTailable()) {
-            // If we're tailing a capped collection, we don't bother saving the cursor if the
-            // collection is empty. Otherwise, the semantics of the tailable cursor is that the
-            // client will keep trying to read from it. So we'll keep it around.
-            if (collection && collection->numRecords(txn) != 0 && pq.getNumToReturn() != 1) {
-                saveClientCursor = true;
-            }
-        }
-
         // TODO(greg): This will go away soon.
         if (!shardingState.getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) {
             // if the version changed during the query we might be missing some data and its safe to
@@ -831,39 +893,12 @@ namespace mongo {
                                            shardingState.getVersion(nss.ns()));
         }
 
-        const logger::LogComponent queryLogComponent = logger::LogComponent::kQuery;
-        const logger::LogSeverity logLevelOne = logger::LogSeverity::Debug(1);
-
-        PlanSummaryStats summaryStats;
-        Explain::getSummaryStats(exec.get(), &summaryStats);
-
-        curop.debug().ntoskip = pq.getSkip();
-        curop.debug().nreturned = numResults;
-        curop.debug().scanAndOrder = summaryStats.hasSortStage;
-        curop.debug().nscanned = summaryStats.totalKeysExamined;
-        curop.debug().nscannedObjects = summaryStats.totalDocsExamined;
-        curop.debug().idhack = summaryStats.isIdhack;
-
-        // Set debug information for consumption by the profiler.
-        if (dbProfilingLevel > 0 ||
-            curop.elapsedMillis() > serverGlobalParams.slowMS ||
-            logger::globalLogDomain()->shouldLog(queryLogComponent, logLevelOne)) {
-            // Get BSON stats.
-            scoped_ptr<PlanStageStats> execStats(exec->getStats());
-            BSONObjBuilder statsBob;
-            Explain::statsToBSON(*execStats, &statsBob);
-            curop.debug().execStats.set(statsBob.obj());
-
-            // Replace exec stats with plan summary if stats cannot fit into CachedBSONObj.
-            if (curop.debug().execStats.tooBig() && !curop.debug().planSummary.empty()) {
-                BSONObjBuilder bob;
-                bob.append("summary", curop.debug().planSummary.toString());
-                curop.debug().execStats.set(bob.done());
-            }
-        }
-
+        // Fill out curop based on query results. If we have a cursorid, we will fill out curop with
+        // this cursorid later.
         long long ccId = 0;
-        if (saveClientCursor) {
+        endQueryOp(ctx, exec.get(), numResults, ccId, &curop);
+
+        if (shouldSaveCursor(txn, collection, state, exec.get())) {
             // We won't use the executor until it's getMore'd.
             exec->saveState();
 
diff --git a/src/mongo/db/query/find.h b/src/mongo/db/query/find.h
index b2bd7881ec3..c8511556928 100644
--- a/src/mongo/db/query/find.h
+++ b/src/mongo/db/query/find.h
@@ -41,6 +41,46 @@ namespace mongo {
     class OperationContext;
 
     /**
+     * Returns true if enough results have been prepared to stop adding more to the first batch.
+     *
+     * Should be called *after* adding to the result set rather than before.
+     */
+    bool enoughForFirstBatch(const LiteParsedQuery& pq, int numDocs, int bytesBuffered);
+
+    /**
+     * Returns true if we should keep a cursor around because we're expecting to return more query
+     * results.
+     *
+     * If false, the caller should close the cursor and indicate this to the client by sending back
+     * a cursor ID of 0.
+     */
+    bool shouldSaveCursor(OperationContext* txn,
+                          const Collection* collection,
+                          PlanExecutor::ExecState finalState,
+                          PlanExecutor* exec);
+
+    /**
+     * Fills out CurOp with information about this query.
+     */
+    void beginQueryOp(const NamespaceString& nss,
+                      const BSONObj& queryObj,
+                      int ntoreturn,
+                      int ntoskip,
+                      CurOp* curop);
+
+    /**
+     * Fills out CurOp with information regarding this query's execution.
+     *
+     * Uses explain functionality to extract stats from 'exec'. 'ctx' is used to conditionalize
+     * whether or not we do expensive stats gathering based on the database profiling level.
+     */
+    void endQueryOp(const AutoGetCollectionForRead& ctx,
+                    PlanExecutor* exec,
+                    int numResults,
+                    CursorId cursorId,
+                    CurOp* curop);
+
+    /**
      * Constructs a PlanExecutor for a query with the oplogReplay option set to true,
      * for the query 'cq' over the collection 'collection'. The PlanExecutor will
      * wrap a singleton OplogStart stage.
author	David Storch <david.storch@10gen.com>	2015-03-14 18:49:43 -0400
committer	David Storch <david.storch@10gen.com>	2015-03-18 10:01:44 -0400
commit	4049c8328c98d8eb2b84fffca43ff4904e936909 (patch)
tree	f6819274232596c3bb2d3df0fb5b89fcde344c71 /src
parent	465ca1774ddb7078566cb2edf0408f5881d6aae3 (diff)
download	mongo-4049c8328c98d8eb2b84fffca43ff4904e936909.tar.gz