SERVER-14097 SERVER-14098 execution-level explain for .find() and .count()

The explain implementation for .find() and .count() is feature complete. To use the .find() implementation, set the enableNewExplain setParameter to true. Count operations are explained through the new explain command, e.g. db.runCommand({explain: {count: "coll", query: {foo: "bar"}}}).
author: David Storch <david.storch@10gen.com> 2014-06-11 14:24:20 -0400
committer: David Storch <david.storch@10gen.com> 2014-06-27 09:56:04 -0400
commit: 1cc6be662ccd83b0341ef1f31f8f4ad30dc69451 (patch)
tree: 2cdd5f7e473d77685f6f7ca1c6735f75991089a7
parent: a67bddc57c9b8a8d9d13ac30ebe74f9914b0c6d0 (diff)
download: mongo-1cc6be662ccd83b0341ef1f31f8f4ad30dc69451.tar.gz
79 files changed, 3337 insertions, 694 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript
index d1a1b855bd2..db5584ea011 100644
--- a/src/mongo/SConscript
+++ b/src/mongo/SConscript
@@ -636,7 +636,6 @@ serverOnlyFiles = [ "db/curop.cpp",
                     "db/commands/geonear.cpp",
                     "db/geo/haystack.cpp",
                     "db/geo/s2common.cpp",
-                    "db/ops/count.cpp",
                     "db/ops/delete.cpp",
                     "db/ops/delete_executor.cpp",
                     "db/ops/insert.cpp",
@@ -658,6 +657,7 @@ serverOnlyFiles = [ "db/curop.cpp",
                     "db/commands/copydb.cpp",
                     "db/commands/copydb_getnonce.cpp",
                     "db/commands/compact.cpp",
+                    "db/commands/count.cpp",
                     "db/commands/auth_schema_upgrade_d.cpp",
                     "db/commands/create_indexes.cpp",
                     "db/commands/dbhash.cpp",
@@ -665,6 +665,7 @@ serverOnlyFiles = [ "db/curop.cpp",
                     "db/commands/cleanup_orphaned_cmd.cpp",
                     "db/commands/collection_to_capped.cpp",
                     "db/commands/drop_indexes.cpp",
+                    "db/commands/explain_cmd.cpp",
                     "db/commands/fsync.cpp",
                     "db/commands/get_last_error.cpp",
                     "db/commands/write_commands/write_commands.cpp",
diff --git a/src/mongo/db/commands.h b/src/mongo/db/commands.h
index 485deeb61e4..e8c1a91d5b5 100644
--- a/src/mongo/db/commands.h
+++ b/src/mongo/db/commands.h
@@ -37,6 +37,7 @@
 #include "mongo/db/auth/resource_pattern.h"
 #include "mongo/db/client_basic.h"
 #include "mongo/db/jsobj.h"
+#include "mongo/db/query/explain.h"
 
 namespace mongo {
 
@@ -138,6 +139,26 @@ namespace mutablebson {
         virtual void help( std::stringstream& help ) const;
 
         /**
+         * Commands which can be explained override this method. Any operation which has a query
+         * part and executes as a tree of execution stages can be explained. A command should
+         * implement explain by:
+         *
+         *   1) Calling its custom parse function in order to parse the command. The output of
+         *   this function should be a CanonicalQuery (representing the query part of the
+         *   operation), and a PlanExecutor which wraps the tree of execution stages.
+         *
+         *   2) Calling Explain::explainStages(...) on the PlanExecutor. This is the function
+         *   which knows how to convert an execution stage tree into explain output.
+         */
+        virtual Status explain(OperationContext* txn,
+                               const std::string& dbname,
+                               const BSONObj& cmdObj,
+                               Explain::Verbosity verbosity,
+                               BSONObjBuilder* out) const {
+            return Status(ErrorCodes::IllegalOperation, "Cannot explain cmd: " + name);
+        }
+
+        /**
          * Checks if the given client is authorized to run this command on database "dbname"
          * with the invocation described by "cmdObj".
          */
diff --git a/src/mongo/db/commands/count.cpp b/src/mongo/db/commands/count.cpp
new file mode 100644
index 00000000000..48ebb4422e1
--- /dev/null
+++ b/src/mongo/db/commands/count.cpp
@@ -0,0 +1,291 @@
+/**
+ *    Copyright (C) 2014 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/commands/count.h"
+
+#include "mongo/db/catalog/database.h"
+#include "mongo/db/client.h"
+#include "mongo/db/curop.h"
+#include "mongo/db/query/get_executor.h"
+#include "mongo/db/query/get_runner.h"
+#include "mongo/db/query/type_explain.h"
+
+namespace mongo {
+
+    static CmdCount cmdCount;
+
+    static long long applySkipLimit(long long num, const BSONObj& cmd) {
+        BSONElement s = cmd["skip"];
+        BSONElement l = cmd["limit"];
+
+        if (s.isNumber()) {
+            num = num - s.numberLong();
+            if (num < 0) {
+                num = 0;
+            }
+        }
+
+        if (l.isNumber()) {
+            long long limit = l.numberLong();
+            if (limit < 0) {
+                limit = -limit;
+            }
+
+            // 0 means no limit.
+            if (limit < num && limit != 0) {
+                num = limit;
+            }
+        }
+
+        return num;
+    }
+
+    long long runCount(OperationContext* txn,
+                       const string& ns,
+                       const BSONObj &cmd,
+                       string &err,
+                       int &errCode) {
+        // Lock 'ns'.
+        Client::Context cx(ns);
+        Collection* collection = cx.db()->getCollection(txn, ns);
+
+        if (NULL == collection) {
+            err = "ns missing";
+            return -1;
+        }
+
+        BSONObj query = cmd.getObjectField("query");
+        const std::string hint = cmd.getStringField("hint");
+        const BSONObj hintObj = hint.empty() ? BSONObj() : BSON("$hint" << hint);
+
+        // count of all objects
+        if (query.isEmpty()) {
+            return applySkipLimit(collection->numRecords(), cmd);
+        }
+
+        Runner* rawRunner;
+        long long skip = cmd["skip"].numberLong();
+        long long limit = cmd["limit"].numberLong();
+
+        if (limit < 0) {
+            limit = -limit;
+        }
+
+        uassertStatusOK(getRunnerCount(collection, query, hintObj, &rawRunner));
+        auto_ptr<Runner> runner(rawRunner);
+
+        // Store the plan summary string in CurOp.
+        Client& client = cc();
+        CurOp* currentOp = client.curop();
+        if (NULL != currentOp) {
+            PlanInfo* rawInfo;
+            Status s = runner->getInfo(NULL, &rawInfo);
+            if (s.isOK()) {
+                scoped_ptr<PlanInfo> planInfo(rawInfo);
+                currentOp->debug().planSummary = planInfo->planSummary.c_str();
+            }
+        }
+
+        try {
+            const ScopedRunnerRegistration safety(runner.get());
+
+            long long count = 0;
+            Runner::RunnerState state;
+            while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, NULL))) {
+                if (skip > 0) {
+                    --skip;
+                }
+                else {
+                    ++count;
+                    // Fast-path. There's no point in iterating all over the runner if limit
+                    // is set.
+                    if (count >= limit && limit != 0) {
+                        break;
+                    }
+                }
+            }
+
+            // Emulate old behavior and return the count even if the runner was killed.  This
+            // happens when the underlying collection is dropped.
+            return count;
+        }
+        catch (const DBException &e) {
+            err = e.toString();
+            errCode = e.getCode();
+        }
+        catch (const std::exception &e) {
+            err = e.what();
+            errCode = 0;
+        }
+
+        // Historically we have returned zero in many count assertion cases - see SERVER-2291.
+        log() << "Count with ns: " << ns << " and query: " << query
+              << " failed with exception: " << err << " code: " << errCode
+              << endl;
+
+        return -2;
+    }
+
+    Status CmdCount::explain(OperationContext* txn,
+                             const std::string& dbname,
+                             const BSONObj& cmdObj,
+                             Explain::Verbosity verbosity,
+                             BSONObjBuilder* out) const {
+
+        // Acquire the DB read lock and get the collection.
+        const string ns = parseNs(dbname, cmdObj);
+        Client::ReadContext ctx(txn, ns);
+        Collection* collection = ctx.ctx().db()->getCollection( txn, ns );
+
+        // Handle special case of an empty query. When there is no query, we don't construct
+        // an actual execution tree. Since each collection tracks the number of documents it
+        // contains, count ops with no query just ask the collection for the total number of
+        // documents (and then apply the skip/limit to this number if necessary).
+        BSONObj query = cmdObj.getObjectField("query");
+        if (query.isEmpty()) {
+            Explain::explainCountEmptyQuery(out);
+            return Status::OK();
+        }
+
+        // Get an executor for the command and use it to generate the explain output.
+        CanonicalQuery* rawCq;
+        PlanExecutor* rawExec;
+        Status execStatus = parseCountToExecutor(cmdObj, dbname, ns, collection,
+                                                 &rawCq, &rawExec);
+        if (!execStatus.isOK()) {
+            return execStatus;
+        }
+
+        scoped_ptr<CanonicalQuery> cq(rawCq);
+        scoped_ptr<PlanExecutor> exec(rawExec);
+
+        return Explain::explainStages(exec.get(), cq.get(), verbosity, out);
+    }
+
+    Status CmdCount::parseCountToExecutor(const BSONObj& cmdObj,
+                                          const std::string& dbname,
+                                          const std::string& ns,
+                                          Collection* collection,
+                                          CanonicalQuery** queryOut,
+                                          PlanExecutor** execOut) const {
+
+        long long skip = 0;
+        if (cmdObj["skip"].isNumber()) {
+            skip = cmdObj["skip"].numberLong();
+            if (skip < 0) {
+                return Status(ErrorCodes::BadValue, "skip value is negative in count query");
+            }
+        }
+        else if (cmdObj["skip"].ok()) {
+            return Status(ErrorCodes::BadValue, "skip value is not a valid number");
+        }
+
+        BSONObj query = cmdObj.getObjectField("query");
+        invariant(!query.isEmpty());
+
+        const std::string hint = cmdObj.getStringField("hint");
+        const BSONObj hintObj = hint.empty() ? BSONObj() : BSON("$hint" << hint);
+
+        StringData dbnameData(dbname);
+        const WhereCallbackReal whereCallback(dbnameData);
+
+        CanonicalQuery* cq;
+        uassertStatusOK(CanonicalQuery::canonicalize(ns,
+                                                     query,
+                                                     BSONObj(),
+                                                     BSONObj(),
+                                                     0,
+                                                     0,
+                                                     hintObj,
+                                                     &cq,
+                                                     whereCallback));
+
+        auto_ptr<CanonicalQuery> autoCq(cq);
+
+        Status execStat = getExecutor(collection, cq, execOut,
+                                      QueryPlannerParams::PRIVATE_IS_COUNT);
+        if (!execStat.isOK()) {
+            return execStat;
+        }
+
+        *queryOut = autoCq.release();
+        return Status::OK();
+    }
+
+    bool CmdCount::run(OperationContext* txn,
+                       const string& dbname,
+                       BSONObj& cmdObj,
+                       int, string& errmsg,
+                       BSONObjBuilder& result, bool) {
+
+        long long skip = 0;
+        if ( cmdObj["skip"].isNumber() ) {
+            skip = cmdObj["skip"].numberLong();
+            if ( skip < 0 ) {
+                errmsg = "skip value is negative in count query";
+                return false;
+            }
+        }
+        else if ( cmdObj["skip"].ok() ) {
+            errmsg = "skip value is not a valid number";
+            return false;
+        }
+
+        const string ns = parseNs(dbname, cmdObj);
+
+        // This acquires the DB read lock
+        //
+        Client::ReadContext ctx(txn, ns);
+
+        string err;
+        int errCode;
+        long long n = runCount(txn, ns, cmdObj, err, errCode);
+
+        long long retVal = n;
+        bool ok = true;
+        if ( n == -1 ) {
+            retVal = 0;
+            result.appendBool( "missing" , true );
+        }
+        else if ( n < 0 ) {
+            retVal = 0;
+            ok = false;
+            if ( !err.empty() ) {
+                errmsg = err;
+                result.append("code", errCode);
+                return false;
+            }
+        }
+
+        result.append("n", static_cast<double>(retVal));
+        return ok;
+    }
+
+} // namespace mongo
diff --git a/src/mongo/db/commands/count.h b/src/mongo/db/commands/count.h
new file mode 100644
index 00000000000..eeaaac204f1
--- /dev/null
+++ b/src/mongo/db/commands/count.h
@@ -0,0 +1,110 @@
+/**
+ *    Copyright (C) 2014 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/catalog/collection.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/jsobj.h"
+#include "mongo/db/repl/repl_settings.h"
+
+namespace mongo {
+
+    class OperationContext;
+
+    /**
+     * 'ns' is the namespace we're counting on.
+     *
+     * { count: "collectionname"[, query: <query>] }
+     *
+     * @return -1 on ns does not exist error and other errors, 0 on other errors, otherwise the
+     * match count.
+     */
+    long long runCount(OperationContext* txn,
+                       const std::string& ns,
+                       const BSONObj& cmd,
+                       std::string& err,
+                       int& errCode);
+
+    /* select count(*) */
+    class CmdCount : public Command {
+    public:
+        virtual bool isWriteCommandForConfigServer() const { return false; }
+        CmdCount() : Command("count") { }
+        virtual bool slaveOk() const {
+            // ok on --slave setups
+            return repl::replSettings.slave == repl::SimpleSlave;
+        }
+        virtual bool slaveOverrideOk() const { return true; }
+        virtual bool maintenanceOk() const { return false; }
+        virtual bool adminOnly() const { return false; }
+        virtual void help( stringstream& help ) const { help << "count objects in collection"; }
+        virtual void addRequiredPrivileges(const std::string& dbname,
+                                           const BSONObj& cmdObj,
+                                           std::vector<Privilege>* out) {
+            ActionSet actions;
+            actions.addAction(ActionType::find);
+            out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions));
+        }
+
+        virtual Status explain(OperationContext* txn,
+                               const std::string& dbname,
+                               const BSONObj& cmdObj,
+                               Explain::Verbosity verbosity,
+                               BSONObjBuilder* out) const;
+
+        virtual bool run(OperationContext* txn,
+                         const string& dbname,
+                         BSONObj& cmdObj,
+                         int, string& errmsg,
+                         BSONObjBuilder& result, bool);
+
+    private:
+        /**
+         * Converts the command object 'cmdObj' for a count into a PlanExecutor capable
+         * of running the count and a CanonicalQuery.
+         *
+         * If successful, returns the executor through 'execOut' and the canonicalized
+         * query through 'queryOut'. The caller must delete both 'queryOut' and 'execOut'.
+         *
+         * On failure, returns a non-OK status, and the caller should not delete either
+         * 'queryOut' or 'execOut'.
+         *
+         * TODO: the regular run() command for count should call this instead of getting
+         * a runner.
+         */
+        Status parseCountToExecutor(const BSONObj& cmdObj,
+                                    const std::string& dbname,
+                                    const std::string& ns,
+                                    Collection* collection,
+                                    CanonicalQuery** queryOut,
+                                    PlanExecutor** execOut) const;
+
+    };
+
+}  // namespace mongo
diff --git a/src/mongo/db/commands/explain_cmd.cpp b/src/mongo/db/commands/explain_cmd.cpp
new file mode 100644
index 00000000000..c2b468a3865
--- /dev/null
+++ b/src/mongo/db/commands/explain_cmd.cpp
@@ -0,0 +1,111 @@
+/**
+ *    Copyright (C) 2014 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/commands/explain_cmd.h"
+
+#include "mongo/db/catalog/database.h"
+#include "mongo/db/client.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/query/explain.h"
+#include "mongo/db/query/get_runner.h"
+#include "mongo/util/mongoutils/str.h"
+
+namespace mongo {
+
+    Status CmdExplain::checkAuthForCommand(ClientBasic* client,
+                                           const std::string& dbname,
+                                           const BSONObj& cmdObj) {
+        if (Object != cmdObj.firstElement().type()) {
+            return Status(ErrorCodes::BadValue, "explain command requires a nested object");
+        }
+
+        BSONObj explainObj = cmdObj.firstElement().Obj();
+
+        Command* commToExplain = Command::findCommand(explainObj.firstElementFieldName());
+        if (NULL == commToExplain) {
+            mongoutils::str::stream ss;
+            ss << "unknown command: " << explainObj.firstElementFieldName();
+            return Status(ErrorCodes::CommandNotFound, ss);
+        }
+
+        return commToExplain->checkAuthForCommand(client, dbname, explainObj);
+    }
+
+    bool CmdExplain::run(OperationContext* txn,
+                         const string& dbname,
+                         BSONObj& cmdObj, int options,
+                         string& errmsg,
+                         BSONObjBuilder& result,
+                         bool fromRepl) {
+        // Get the verbosity.
+        Explain::Verbosity verbosity = Explain::QUERY_PLANNER;
+        if (!cmdObj["verbosity"].eoo()) {
+            const char* verbStr = cmdObj["verbosity"].valuestrsafe();
+            if (mongoutils::str::equals(verbStr, "executionStats")) {
+                verbosity = Explain::EXEC_STATS;
+            }
+            else if (mongoutils::str::equals(verbStr, "allPlansExecution")) {
+                verbosity = Explain::EXEC_ALL_PLANS;
+            }
+            else if (!mongoutils::str::equals(verbStr, "queryPlanner")) {
+               errmsg = "verbosity string must be one of "
+                        "{'queryPlanner', 'executionStats', 'allPlansExecution'}";
+               return false;
+            }
+        }
+
+        if (Object != cmdObj.firstElement().type()) {
+            errmsg = "explain command requires a nested object";
+            return false;
+        }
+
+        // This is the nested command which we are explaining.
+        BSONObj explainObj = cmdObj.firstElement().Obj();
+
+        const string ns = parseNs(dbname, explainObj);
+
+        Command* commToExplain = Command::findCommand(explainObj.firstElementFieldName());
+        if (NULL == commToExplain) {
+            mongoutils::str::stream ss;
+            ss << "unknown command: " << explainObj.firstElementFieldName();
+            Status explainStatus(ErrorCodes::CommandNotFound, ss);
+            return appendCommandStatus(result, explainStatus);
+        }
+
+        // Actually call the nested command's explain(...) method.
+        Status explainStatus = commToExplain->explain(txn, dbname, explainObj, verbosity, &result);
+        if (!explainStatus.isOK()) {
+            return appendCommandStatus(result, explainStatus);
+        }
+
+        return true;
+    }
+
+} // namespace mongo
diff --git a/src/mongo/db/commands/explain_cmd.h b/src/mongo/db/commands/explain_cmd.h
new file mode 100644
index 00000000000..638c736f4f2
--- /dev/null
+++ b/src/mongo/db/commands/explain_cmd.h
@@ -0,0 +1,85 @@
+/**
+ *    Copyright (C) 2014 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/catalog/collection.h"
+#include "mongo/db/commands.h"
+
+namespace mongo {
+
+    /**
+     * The explain command is used to generate explain output for any read or write
+     * operation which has a query component (e.g. find, count, update, remove, distinct, etc.).
+     *
+     * The explain command takes as its argument a nested object which specifies the command to
+     * explain, and a verbosity indicator. For example:
+     *
+     *    {explain: {count: "coll", query: {foo: "bar"}}, verbosity: "executionStats"}
+     *
+     * This command like a dispatcher: it just retrieves a pointer to the nested command and
+     * invokes its explain() implementation.
+     */
+    class CmdExplain : public Command {
+    public:
+        CmdExplain() : Command("explain") { }
+
+        virtual bool isWriteCommandForConfigServer() const { return false; }
+
+        // TODO: make slave ok true, test explains on secondaries.
+        virtual bool slaveOk() const {
+            return false;
+        }
+
+        virtual bool maintenanceOk() const { return false; }
+
+        virtual bool adminOnly() const { return false; }
+
+        virtual void help( stringstream& help ) const {
+            help << "explain database reads and writes";
+        }
+
+        /**
+         * You are authorized to run an explain if you are authorized to run
+         * the command that you are explaining. The auth check is performed recursively
+         * on the nested command.
+         */
+        virtual Status checkAuthForCommand(ClientBasic* client,
+                                           const std::string& dbname,
+                                           const BSONObj& cmdObj);
+
+        virtual bool run(OperationContext* txn,
+                         const string& dbname,
+                         BSONObj& cmdObj, int options,
+                         string& errmsg,
+                         BSONObjBuilder& result,
+                         bool fromRepl);
+
+    } cmdExplain;
+
+}  // namespace mongo
diff --git a/src/mongo/db/dbcommands.cpp b/src/mongo/db/dbcommands.cpp
index 8d833cb12b6..4d7f8000f15 100644
--- a/src/mongo/db/dbcommands.cpp
+++ b/src/mongo/db/dbcommands.cpp
@@ -60,7 +60,6 @@
 #include "mongo/db/json.h"
 #include "mongo/db/lasterror.h"
 #include "mongo/db/namespace_string.h"
-#include "mongo/db/ops/count.h"
 #include "mongo/db/ops/insert.h"
 #include "mongo/db/query/get_runner.h"
 #include "mongo/db/query/internal_plans.h"
@@ -468,72 +467,6 @@ namespace mongo {
         }
     } cmdDrop;
 
-    /* select count(*) */
-    class CmdCount : public Command {
-    public:
-        virtual bool isWriteCommandForConfigServer() const { return false; }
-        CmdCount() : Command("count") { }
-        virtual bool slaveOk() const {
-            // ok on --slave setups
-            return repl::replSettings.slave == repl::SimpleSlave;
-        }
-        virtual bool slaveOverrideOk() const { return true; }
-        virtual bool maintenanceOk() const { return false; }
-        virtual bool adminOnly() const { return false; }
-        virtual void help( stringstream& help ) const { help << "count objects in collection"; }
-        virtual void addRequiredPrivileges(const std::string& dbname,
-                                           const BSONObj& cmdObj,
-                                           std::vector<Privilege>* out) {
-            ActionSet actions;
-            actions.addAction(ActionType::find);
-            out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions));
-        }
-
-        virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
-            long long skip = 0;
-            if ( cmdObj["skip"].isNumber() ) {
-                skip = cmdObj["skip"].numberLong();
-                if ( skip < 0 ) {
-                    errmsg = "skip value is negative in count query";
-                    return false;
-                }
-            }
-            else if ( cmdObj["skip"].ok() ) {
-                errmsg = "skip value is not a valid number";
-                return false;
-            }
-
-            const string ns = parseNs(dbname, cmdObj);
-
-            // This acquires the DB read lock
-            //
-            Client::ReadContext ctx(txn, ns);
-
-            string err;
-            int errCode;
-            long long n = runCount(txn, ns, cmdObj, err, errCode);
-
-            long long retVal = n;
-            bool ok = true;
-            if ( n == -1 ) {
-                retVal = 0;
-                result.appendBool( "missing" , true );
-            }
-            else if ( n < 0 ) {
-                retVal = 0;
-                ok = false;
-                if ( !err.empty() ) {
-                    errmsg = err;
-                    result.append("code", errCode);
-                    return false;
-                }
-            }
-
-            result.append("n", static_cast<double>(retVal));
-            return ok;
-        }
-    } cmdCount;
-
     /* create collection */
     class CmdCreate : public Command {
     public:
diff --git a/src/mongo/db/exec/2dnear.cpp b/src/mongo/db/exec/2dnear.cpp
index 236d062858f..d1f276c233a 100644
--- a/src/mongo/db/exec/2dnear.cpp
+++ b/src/mongo/db/exec/2dnear.cpp
@@ -55,6 +55,10 @@ namespace mongo {
 
     PlanStage::StageState TwoDNear::work(WorkingSetID* out) {
         ++_commonStats.works;
+
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (!_initted) {
             _initted = true;
 
@@ -151,6 +155,11 @@ namespace mongo {
         _invalidationMap.erase(range.first, range.second);
     }
 
+    vector<PlanStage*> TwoDNear::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
     PlanStageStats* TwoDNear::getStats() {
         _commonStats.isEOF = isEOF();
         _specificStats.keyPattern = _params.indexKeyPattern;
diff --git a/src/mongo/db/exec/2dnear.h b/src/mongo/db/exec/2dnear.h
index 9ac7eae3a7a..265f3ff6402 100644
--- a/src/mongo/db/exec/2dnear.h
+++ b/src/mongo/db/exec/2dnear.h
@@ -74,6 +74,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_GEO_NEAR_2D; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript
index 3c85bfb8602..96054d3f97c 100644
--- a/src/mongo/db/exec/SConscript
+++ b/src/mongo/db/exec/SConscript
@@ -43,7 +43,9 @@ env.Library(
         "collection_scan.cpp",
         "count.cpp",
         "distinct_scan.cpp",
+        "eof.cpp",
         "fetch.cpp",
+        "idhack.cpp",
         "index_scan.cpp",
         "keep_mutations.cpp",
         "limit.cpp",
@@ -58,6 +60,7 @@ env.Library(
         "skip.cpp",
         "sort.cpp",
         "stagedebug_cmd.cpp",
+        "subplan.cpp",
         "text.cpp",
         "working_set_common.cpp",
     ],
diff --git a/src/mongo/db/exec/and_hash.cpp b/src/mongo/db/exec/and_hash.cpp
index 7569ff36703..ff7cae7da48 100644
--- a/src/mongo/db/exec/and_hash.cpp
+++ b/src/mongo/db/exec/and_hash.cpp
@@ -107,6 +107,9 @@ namespace mongo {
     PlanStage::StageState AndHashStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (isEOF()) { return PlanStage::IS_EOF; }
 
         // Fast-path for one of our children being EOF immediately.  We work each child a few times.
@@ -500,6 +503,10 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> AndHashStage::getChildren() const {
+        return _children;
+    }
+
     PlanStageStats* AndHashStage::getStats() {
         _commonStats.isEOF = isEOF();
 
diff --git a/src/mongo/db/exec/and_hash.h b/src/mongo/db/exec/and_hash.h
index d72edc19fa4..df353b1cec5 100644
--- a/src/mongo/db/exec/and_hash.h
+++ b/src/mongo/db/exec/and_hash.h
@@ -79,6 +79,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_AND_HASH; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/and_sorted.cpp b/src/mongo/db/exec/and_sorted.cpp
index 71979872a4a..30ac9402a2e 100644
--- a/src/mongo/db/exec/and_sorted.cpp
+++ b/src/mongo/db/exec/and_sorted.cpp
@@ -61,6 +61,9 @@ namespace mongo {
     PlanStage::StageState AndSortedStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (isEOF()) { return PlanStage::IS_EOF; }
 
         if (0 == _specificStats.failedAnd.size()) {
@@ -297,6 +300,10 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> AndSortedStage::getChildren() const {
+        return _children;
+    }
+
     PlanStageStats* AndSortedStage::getStats() {
         _commonStats.isEOF = isEOF();
 
diff --git a/src/mongo/db/exec/and_sorted.h b/src/mongo/db/exec/and_sorted.h
index afcf4de5c49..1d74a4bde60 100644
--- a/src/mongo/db/exec/and_sorted.h
+++ b/src/mongo/db/exec/and_sorted.h
@@ -65,6 +65,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_AND_SORTED; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/cached_plan.cpp b/src/mongo/db/exec/cached_plan.cpp
index 874e52fd526..31450fecc89 100644
--- a/src/mongo/db/exec/cached_plan.cpp
+++ b/src/mongo/db/exec/cached_plan.cpp
@@ -69,6 +69,9 @@ namespace mongo {
     PlanStage::StageState CachedPlanStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (isEOF()) { return PlanStage::IS_EOF; }
 
 	StageState childStatus = getActiveChild()->work(out);
@@ -123,6 +126,17 @@ namespace mongo {
         ++_commonStats.invalidates;
     }
 
+    vector<PlanStage*> CachedPlanStage::getChildren() const {
+        vector<PlanStage*> children;
+        if (_usingBackupChild) {
+            children.push_back(_backupChildPlan.get());
+        }
+        else {
+            children.push_back(_mainChildPlan.get());
+        }
+        return children;
+    }
+
     PlanStageStats* CachedPlanStage::getStats() {
         _commonStats.isEOF = isEOF();
 
diff --git a/src/mongo/db/exec/cached_plan.h b/src/mongo/db/exec/cached_plan.h
index 37962fbd456..9c17239deb7 100644
--- a/src/mongo/db/exec/cached_plan.h
+++ b/src/mongo/db/exec/cached_plan.h
@@ -60,6 +60,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_CACHED_PLAN; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
@@ -72,7 +76,7 @@ namespace mongo {
         const Collection* _collection;
 
         // not owned
-	CanonicalQuery* _canonicalQuery;
+        CanonicalQuery* _canonicalQuery;
 
         // owned by us
         boost::scoped_ptr<PlanStage> _mainChildPlan;
diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp
index 47541cf7239..195ab21e551 100644
--- a/src/mongo/db/exec/collection_scan.cpp
+++ b/src/mongo/db/exec/collection_scan.cpp
@@ -53,6 +53,10 @@ namespace mongo {
 
     PlanStage::StageState CollectionScan::work(WorkingSetID* out) {
         ++_commonStats.works;
+
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (_nsDropped) { return PlanStage::DEAD; }
 
         // Do some init if we haven't already.
@@ -154,6 +158,11 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> CollectionScan::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
     PlanStageStats* CollectionScan::getStats() {
         _commonStats.isEOF = isEOF();
 
diff --git a/src/mongo/db/exec/collection_scan.h b/src/mongo/db/exec/collection_scan.h
index a6cd87dc68d..f752c69e4c3 100644
--- a/src/mongo/db/exec/collection_scan.h
+++ b/src/mongo/db/exec/collection_scan.h
@@ -57,6 +57,10 @@ namespace mongo {
         virtual void prepareToYield();
         virtual void recoverFromYield();
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_COLLSCAN; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/count.cpp b/src/mongo/db/exec/count.cpp
index 56e8607a611..eeb860e9e92 100644
--- a/src/mongo/db/exec/count.cpp
+++ b/src/mongo/db/exec/count.cpp
@@ -44,7 +44,10 @@ namespace mongo {
           _params(params),
           _hitEnd(false),
           _shouldDedup(params.descriptor->isMultikey()),
-          _commonStats(kStageType) { }
+          _commonStats(kStageType) {
+        _specificStats.keyPattern = _params.descriptor->keyPattern();
+        _specificStats.isMultiKey = _params.descriptor->isMultikey();
+    }
 
     void Count::initIndexCursor() {
         CursorOptions cursorOptions;
@@ -62,6 +65,8 @@ namespace mongo {
         // that points at the end.
         _btreeCursor->seek(_params.startKey, !_params.startKeyInclusive);
 
+        ++_specificStats.keysExamined;
+
         // Create the cursor that points at our end position.
         IndexCursor* endCursor;
         verify(_iam->newCursor(cursorOptions, &endCursor).isOK());
@@ -73,6 +78,8 @@ namespace mongo {
         // If the end key is inclusive we want to point *past* it since that's the end.
         _endCursor->seek(_params.endKey, _params.endKeyInclusive);
 
+        ++_specificStats.keysExamined;
+
         // See if we've hit the end already.
         checkEnd();
     }
@@ -91,10 +98,16 @@ namespace mongo {
     }
 
     PlanStage::StageState Count::work(WorkingSetID* out) {
+        ++_commonStats.works;
+
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (NULL == _btreeCursor.get()) {
             // First call to work().  Perform cursor init.
             initIndexCursor();
             checkEnd();
+            ++_commonStats.needTime;
             return PlanStage::NEED_TIME;
         }
 
@@ -104,8 +117,11 @@ namespace mongo {
         _btreeCursor->next();
         checkEnd();
 
+        ++_specificStats.keysExamined;
+
         if (_shouldDedup) {
             if (_returned.end() != _returned.find(loc)) {
+                ++_commonStats.needTime;
                 return PlanStage::NEED_TIME;
             }
             else {
@@ -114,6 +130,7 @@ namespace mongo {
         }
 
         *out = WorkingSet::INVALID_ID;
+        ++_commonStats.advanced;
         return PlanStage::ADVANCED;
     }
 
@@ -127,6 +144,7 @@ namespace mongo {
     }
 
     void Count::prepareToYield() {
+        ++_commonStats.yields;
         if (_hitEnd || (NULL == _btreeCursor.get())) { return; }
 
         _btreeCursor->savePosition();
@@ -134,6 +152,7 @@ namespace mongo {
     }
 
     void Count::recoverFromYield() {
+        ++_commonStats.unyields;
         if (_hitEnd || (NULL == _btreeCursor.get())) { return; }
 
         if (!_btreeCursor->restorePosition().isOK()) {
@@ -178,6 +197,8 @@ namespace mongo {
     }
 
     void Count::invalidate(const DiskLoc& dl, InvalidationType type) {
+        ++_commonStats.invalidates;
+
         // The only state we're responsible for holding is what DiskLocs to drop.  If a document
         // mutates the underlying index cursor will deal with it.
         if (INVALIDATION_MUTATION == type) {
@@ -192,11 +213,20 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> Count::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
     PlanStageStats* Count::getStats() {
-        // We don't collect stats since this stage is only used by the count command.
-        // If count ever collects stats we must implement this.
-        invariant(0);
-        return NULL;
+        _commonStats.isEOF = isEOF();
+        auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_COUNT));
+
+        CountStats* countStats = new CountStats(_specificStats);
+        countStats->keyPattern = _specificStats.keyPattern.getOwned();
+        ret->specific.reset(countStats);
+
+        return ret.release();
     }
 
 }  // namespace mongo
diff --git a/src/mongo/db/exec/count.h b/src/mongo/db/exec/count.h
index 51054dfe4dd..a3a82ab0032 100644
--- a/src/mongo/db/exec/count.h
+++ b/src/mongo/db/exec/count.h
@@ -76,6 +76,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_COUNT; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
@@ -114,6 +118,7 @@ namespace mongo {
         bool _shouldDedup;
 
         CommonStats _commonStats;
+        CountStats _specificStats;
     };
 
 }  // namespace mongo
diff --git a/src/mongo/db/exec/distinct_scan.cpp b/src/mongo/db/exec/distinct_scan.cpp
index 6db9a8b3f31..3cbd9e84d08 100644
--- a/src/mongo/db/exec/distinct_scan.cpp
+++ b/src/mongo/db/exec/distinct_scan.cpp
@@ -91,6 +91,9 @@ namespace mongo {
     PlanStage::StageState DistinctScan::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (NULL == _btreeCursor.get()) {
             // First call to work().  Perform cursor init.
             initIndexCursor();
@@ -213,6 +216,11 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> DistinctScan::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
     PlanStageStats* DistinctScan::getStats() {
         _commonStats.isEOF = isEOF();
         auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_DISTINCT));
diff --git a/src/mongo/db/exec/distinct_scan.h b/src/mongo/db/exec/distinct_scan.h
index c3b1e110dd8..9e088da5068 100644
--- a/src/mongo/db/exec/distinct_scan.h
+++ b/src/mongo/db/exec/distinct_scan.h
@@ -88,6 +88,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_DISTINCT; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/eof.cpp b/src/mongo/db/exec/eof.cpp
new file mode 100644
index 00000000000..e953f60406c
--- /dev/null
+++ b/src/mongo/db/exec/eof.cpp
@@ -0,0 +1,75 @@
+/**
+ *    Copyright (C) 2013 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/exec/eof.h"
+
+namespace mongo {
+
+    // static
+    const char* EOFStage::kStageType = "EOF";
+
+    EOFStage::EOFStage() : _commonStats(kStageType) { }
+
+    EOFStage::~EOFStage() { }
+
+    bool EOFStage::isEOF() {
+        return true;
+    }
+
+    PlanStage::StageState EOFStage::work(WorkingSetID* out) {
+        ++_commonStats.works;
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+        return PlanStage::IS_EOF;
+    }
+
+    void EOFStage::prepareToYield() {
+        ++_commonStats.yields;
+    }
+
+    void EOFStage::recoverFromYield() {
+        ++_commonStats.unyields;
+    }
+
+    void EOFStage::invalidate(const DiskLoc& dl, InvalidationType type) {
+        ++_commonStats.invalidates;
+    }
+
+    vector<PlanStage*> EOFStage::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
+    PlanStageStats* EOFStage::getStats() {
+        _commonStats.isEOF = isEOF();
+        return new PlanStageStats(_commonStats, STAGE_EOF);
+    }
+
+}  // namespace mongo
diff --git a/src/mongo/db/ops/count.h b/src/mongo/db/exec/eof.h
index 8040efb6028..cfcddc15b87 100644
--- a/src/mongo/db/ops/count.h
+++ b/src/mongo/db/exec/eof.h
@@ -1,7 +1,5 @@
-// count.h
-
 /**
- *    Copyright (C) 2013 MongoDB Inc.
+ *    Copyright (C) 2013 10gen Inc.
  *
  *    This program is free software: you can redistribute it and/or  modify
  *    it under the terms of the GNU Affero General Public License, version 3,
@@ -28,24 +26,39 @@
  *    it in the license file.
  */
 
-#include "mongo/db/jsobj.h"
+#pragma once
 
-namespace mongo {
+#include "mongo/db/diskloc.h"
+#include "mongo/db/exec/plan_stage.h"
 
-    class OperationContext;
+namespace mongo {
 
     /**
-     * 'ns' is the namespace we're counting on.
-     *
-     * { count: "collectionname"[, query: <query>] }
-     *
-     * @return -1 on ns does not exist error and other errors, 0 on other errors, otherwise the
-     * match count.
+     * This stage just returns EOF immediately.
      */
-    long long runCount(OperationContext* txn,
-                       const std::string& ns,
-                       const BSONObj& cmd,
-                       std::string& err,
-                       int& errCode);
+    class EOFStage : public PlanStage {
+    public:
+        EOFStage();
+
+        virtual ~EOFStage();
+
+        virtual bool isEOF();
+        virtual StageState work(WorkingSetID* out);
+
+        virtual void prepareToYield();
+        virtual void recoverFromYield();
+        virtual void invalidate(const DiskLoc& dl, InvalidationType type);
+
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_EOF; }
+
+        PlanStageStats* getStats();
+
+        static const char* kStageType;
+
+    private:
+        CommonStats _commonStats;
+    };
 
-} // namespace mongo
+}  // namespace mongo
diff --git a/src/mongo/db/exec/fetch.cpp b/src/mongo/db/exec/fetch.cpp
index 8718786f0c8..fc0e039482e 100644
--- a/src/mongo/db/exec/fetch.cpp
+++ b/src/mongo/db/exec/fetch.cpp
@@ -58,6 +58,9 @@ namespace mongo {
     PlanStage::StageState FetchStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (isEOF()) { return PlanStage::IS_EOF; }
 
         // If we're here, we're not waiting for a DiskLoc to be fetched.  Get another to-be-fetched
@@ -83,6 +86,8 @@ namespace mongo {
                 member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
             }
 
+            ++_specificStats.docsExamined;
+
             return returnIfMatches(member, id, out);
         }
         else if (PlanStage::FAILURE == status) {
@@ -143,6 +148,12 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> FetchStage::getChildren() const {
+        vector<PlanStage*> children;
+        children.push_back(_child.get());
+        return children;
+    }
+
     PlanStageStats* FetchStage::getStats() {
         _commonStats.isEOF = isEOF();
 
diff --git a/src/mongo/db/exec/fetch.h b/src/mongo/db/exec/fetch.h
index 870e67a9148..f99fba7b1b5 100644
--- a/src/mongo/db/exec/fetch.h
+++ b/src/mongo/db/exec/fetch.h
@@ -59,6 +59,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_FETCH; }
+
         PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/idhack.cpp b/src/mongo/db/exec/idhack.cpp
new file mode 100644
index 00000000000..d27f7a34b5b
--- /dev/null
+++ b/src/mongo/db/exec/idhack.cpp
@@ -0,0 +1,148 @@
+/**
+ *    Copyright (C) 2013 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/exec/idhack.h"
+
+#include "mongo/client/dbclientinterface.h"
+#include "mongo/db/exec/projection.h"
+#include "mongo/db/index/btree_access_method.h"
+#include "mongo/s/d_logic.h"
+
+namespace mongo {
+
+    // static
+    const char* IDHackStage::kStageType = "IDHACK";
+
+    IDHackStage::IDHackStage(const Collection* collection, CanonicalQuery* query, WorkingSet* ws)
+        : _collection(collection),
+          _workingSet(ws),
+          _key(query->getQueryObj()["_id"].wrap()),
+          _query(query),
+          _killed(false),
+          _done(false),
+          _commonStats(kStageType) { }
+
+    IDHackStage::IDHackStage(Collection* collection, const BSONObj& key, WorkingSet* ws)
+        : _collection(collection),
+          _workingSet(ws),
+          _key(key),
+          _query(NULL),
+          _killed(false),
+          _done(false),
+          _commonStats(kStageType) { }
+
+    IDHackStage::~IDHackStage() { }
+
+    bool IDHackStage::isEOF() {
+        return _killed || _done;
+    }
+
+    PlanStage::StageState IDHackStage::work(WorkingSetID* out) {
+        ++_commonStats.works;
+
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
+        if (_killed) { return PlanStage::DEAD; }
+        if (_done) { return PlanStage::IS_EOF; }
+
+        // Use the index catalog to get the id index.
+        const IndexCatalog* catalog = _collection->getIndexCatalog();
+
+        // Find the index we use.
+        IndexDescriptor* idDesc = catalog->findIdIndex();
+        if (NULL == idDesc) {
+            _done = true;
+            return PlanStage::IS_EOF;
+        }
+
+        // This may not be valid always.  See SERVER-12397.
+        const BtreeBasedAccessMethod* accessMethod =
+            static_cast<const BtreeBasedAccessMethod*>(catalog->getIndex(idDesc));
+
+        // Look up the key by going directly to the Btree.
+        DiskLoc loc = accessMethod->findSingle( _key );
+
+        // Key not found.
+        if (loc.isNull()) {
+            _done = true;
+            return PlanStage::IS_EOF;
+        }
+
+        ++_specificStats.keysExamined;
+        ++_specificStats.docsExamined;
+
+        // Fill out the WSM.
+        WorkingSetID id = _workingSet->allocate();
+        WorkingSetMember* member = _workingSet->get(id);
+        member->loc = loc;
+        member->obj = _collection->docFor(loc);
+        member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
+
+        _done = true;
+        ++_commonStats.advanced;
+        *out = id;
+        return PlanStage::ADVANCED;
+    }
+
+    void IDHackStage::prepareToYield() {
+        ++_commonStats.yields;
+    }
+
+    void IDHackStage::recoverFromYield() {
+        ++_commonStats.unyields;
+    }
+
+    void IDHackStage::invalidate(const DiskLoc& dl, InvalidationType type) {
+        ++_commonStats.invalidates;
+    }
+
+    // static
+    bool IDHackStage::supportsQuery(const CanonicalQuery& query) {
+        return !query.getParsed().showDiskLoc()
+            && query.getParsed().getHint().isEmpty()
+            && 0 == query.getParsed().getSkip()
+            && CanonicalQuery::isSimpleIdQuery(query.getParsed().getFilter())
+            && !query.getParsed().hasOption(QueryOption_CursorTailable);
+    }
+
+    vector<PlanStage*> IDHackStage::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
+    PlanStageStats* IDHackStage::getStats() {
+        _commonStats.isEOF = isEOF();
+        auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_IDHACK));
+        ret->specific.reset(new IDHackStats(_specificStats));
+        return ret.release();
+    }
+
+}  // namespace mongo
diff --git a/src/mongo/db/exec/idhack.h b/src/mongo/db/exec/idhack.h
new file mode 100644
index 00000000000..3f266638d9e
--- /dev/null
+++ b/src/mongo/db/exec/idhack.h
@@ -0,0 +1,94 @@
+/**
+ *    Copyright (C) 2013 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/diskloc.h"
+#include "mongo/db/catalog/collection.h"
+#include "mongo/db/exec/plan_stage.h"
+#include "mongo/db/query/canonical_query.h"
+
+namespace mongo {
+
+    /**
+     * A standalone stage implementing the fast path for key-value retrievals
+     * via the _id index.
+     */
+    class IDHackStage : public PlanStage {
+    public:
+        /** Takes ownership of all the arguments -collection. */
+        IDHackStage(const Collection* collection, CanonicalQuery* query, WorkingSet* ws);
+
+        IDHackStage(Collection* collection, const BSONObj& key, WorkingSet* ws);
+
+        virtual ~IDHackStage();
+
+        virtual bool isEOF();
+        virtual StageState work(WorkingSetID* out);
+
+        virtual void prepareToYield();
+        virtual void recoverFromYield();
+        virtual void invalidate(const DiskLoc& dl, InvalidationType type);
+
+        /**
+         * ID Hack has a very strict criteria for the queries it supports.
+         */
+        static bool supportsQuery(const CanonicalQuery& query);
+
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_IDHACK; }
+
+        PlanStageStats* getStats();
+
+        static const char* kStageType;
+
+    private:
+        // Not owned here.
+        const Collection* _collection;
+
+        // The WorkingSet we annotate with results.  Not owned by us.
+        WorkingSet* _workingSet;
+
+        // The value to match against the _id field.
+        BSONObj _key;
+
+        // Not owned by us.
+        CanonicalQuery* _query;
+
+        // Did someone call kill() on us?
+        bool _killed;
+
+        // Have we returned our one document?
+        bool _done;
+
+        CommonStats _commonStats;
+        IDHackStats _specificStats;
+    };
+
+}  // namespace mongo
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp
index bbdb836d54b..1982f134b8d 100644
--- a/src/mongo/db/exec/index_scan.cpp
+++ b/src/mongo/db/exec/index_scan.cpp
@@ -131,6 +131,9 @@ namespace mongo {
     PlanStage::StageState IndexScan::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (NULL == _indexCursor.get()) {
             // First call to work().  Perform possibly heavy init.
             initIndexScan();
@@ -339,6 +342,11 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> IndexScan::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
     PlanStageStats* IndexScan::getStats() {
         // WARNING: this could be called even if the collection was dropped.  Do not access any
         // catalog information here.
diff --git a/src/mongo/db/exec/index_scan.h b/src/mongo/db/exec/index_scan.h
index 7ad2ef316b2..6b993f387ad 100644
--- a/src/mongo/db/exec/index_scan.h
+++ b/src/mongo/db/exec/index_scan.h
@@ -91,6 +91,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_IXSCAN; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/keep_mutations.cpp b/src/mongo/db/exec/keep_mutations.cpp
index b52b7c0540b..586e9ab3bb5 100644
--- a/src/mongo/db/exec/keep_mutations.cpp
+++ b/src/mongo/db/exec/keep_mutations.cpp
@@ -53,6 +53,9 @@ namespace mongo {
     PlanStage::StageState KeepMutationsStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         // If we've returned as many results as we're limited to, isEOF will be true.
         if (isEOF()) { return PlanStage::IS_EOF; }
 
@@ -115,6 +118,12 @@ namespace mongo {
         _child->invalidate(dl, type);
     }
 
+    vector<PlanStage*> KeepMutationsStage::getChildren() const {
+        vector<PlanStage*> children;
+        children.push_back(_child.get());
+        return children;
+    }
+
     PlanStageStats* KeepMutationsStage::getStats() {
         _commonStats.isEOF = isEOF();
         auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_KEEP_MUTATIONS));
diff --git a/src/mongo/db/exec/keep_mutations.h b/src/mongo/db/exec/keep_mutations.h
index 02bc8da319a..6b10063512c 100644
--- a/src/mongo/db/exec/keep_mutations.h
+++ b/src/mongo/db/exec/keep_mutations.h
@@ -55,6 +55,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_KEEP_MUTATIONS; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/limit.cpp b/src/mongo/db/exec/limit.cpp
index 21f6ade2107..50fd0826eb8 100644
--- a/src/mongo/db/exec/limit.cpp
+++ b/src/mongo/db/exec/limit.cpp
@@ -45,6 +45,9 @@ namespace mongo {
     PlanStage::StageState LimitStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (0 == _numToReturn) {
             // We've returned as many results as we're limited to.
             return PlanStage::IS_EOF;
@@ -95,6 +98,12 @@ namespace mongo {
         _child->invalidate(dl, type);
     }
 
+    vector<PlanStage*> LimitStage::getChildren() const {
+        vector<PlanStage*> children;
+        children.push_back(_child.get());
+        return children;
+    }
+
     PlanStageStats* LimitStage::getStats() {
         _commonStats.isEOF = isEOF();
         _specificStats.limit = _numToReturn;
diff --git a/src/mongo/db/exec/limit.h b/src/mongo/db/exec/limit.h
index 5b5293823c2..05866c0c5b3 100644
--- a/src/mongo/db/exec/limit.h
+++ b/src/mongo/db/exec/limit.h
@@ -53,6 +53,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_LIMIT; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/merge_sort.cpp b/src/mongo/db/exec/merge_sort.cpp
index e35d0570423..6716a4b1c56 100644
--- a/src/mongo/db/exec/merge_sort.cpp
+++ b/src/mongo/db/exec/merge_sort.cpp
@@ -67,6 +67,9 @@ namespace mongo {
     PlanStage::StageState MergeSortStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (isEOF()) { return PlanStage::IS_EOF; }
 
         if (!_noResultToMerge.empty()) {
@@ -246,6 +249,10 @@ namespace mongo {
         return false;
     }
 
+    vector<PlanStage*> MergeSortStage::getChildren() const {
+        return _children;
+    }
+
     PlanStageStats* MergeSortStage::getStats() {
         _commonStats.isEOF = isEOF();
 
diff --git a/src/mongo/db/exec/merge_sort.h b/src/mongo/db/exec/merge_sort.h
index 3eda2add8ff..1b493d15913 100644
--- a/src/mongo/db/exec/merge_sort.h
+++ b/src/mongo/db/exec/merge_sort.h
@@ -69,6 +69,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_SORT_MERGE; }
+
         PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/mock_stage.cpp b/src/mongo/db/exec/mock_stage.cpp
index 1fbc01b06ae..e316fed458e 100644
--- a/src/mongo/db/exec/mock_stage.cpp
+++ b/src/mongo/db/exec/mock_stage.cpp
@@ -64,4 +64,9 @@ namespace mongo {
         _members.push(id);
     }
 
+    vector<PlanStage*> MockStage::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
 }  // namespace mongo
diff --git a/src/mongo/db/exec/mock_stage.h b/src/mongo/db/exec/mock_stage.h
index 9476038a16a..bdb2d667615 100644
--- a/src/mongo/db/exec/mock_stage.h
+++ b/src/mongo/db/exec/mock_stage.h
@@ -61,6 +61,11 @@ namespace mongo {
         virtual void prepareToYield() { }
         virtual void recoverFromYield() { }
         virtual void invalidate(const DiskLoc& dl, InvalidationType type) { }
+
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_MOCK; }
+
         virtual PlanStageStats* getStats() { return NULL; }
 
         /**
diff --git a/src/mongo/db/exec/multi_plan.cpp b/src/mongo/db/exec/multi_plan.cpp
index 2268c8ed58b..70ed5b52b58 100644
--- a/src/mongo/db/exec/multi_plan.cpp
+++ b/src/mongo/db/exec/multi_plan.cpp
@@ -106,6 +106,9 @@ namespace mongo {
     }
 
     PlanStage::StageState MultiPlanStage::work(WorkingSetID* out) {
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (_failure) {
             *out = _statusMemberId;
             return PlanStage::FAILURE;
@@ -253,7 +256,7 @@ namespace mongo {
         }
     }
 
-    vector<PlanStageStats*>* MultiPlanStage::generateCandidateStats() {
+    vector<PlanStageStats*> MultiPlanStage::generateCandidateStats() {
         for (size_t ix = 0; ix < _candidates.size(); ix++) {
             if (ix == (size_t)_bestPlanIdx) { continue; }
             if (ix == (size_t)_backupPlanIdx) { continue; }
@@ -267,7 +270,7 @@ namespace mongo {
             }
         }
 
-        return &_candidateStats;
+        return _candidateStats;
     }
 
     void MultiPlanStage::clearCandidates() {
@@ -330,31 +333,6 @@ namespace mongo {
         return !doneWorking;
     }
 
-    Status MultiPlanStage::executeWinningPlan() {
-        invariant(_bestPlanIdx != kNoSuchPlan);
-        PlanStage* winner = _candidates[_bestPlanIdx].root;
-        WorkingSet* ws = _candidates[_bestPlanIdx].ws;
-
-        bool doneWorking = false;
-
-        while (!doneWorking) {
-            WorkingSetID id = WorkingSet::INVALID_ID;
-            PlanStage::StageState state = winner->work(&id);
-
-            if (PlanStage::IS_EOF == state || PlanStage::DEAD == state) {
-                doneWorking = true;
-            }
-            else if (PlanStage::FAILURE == state) {
-                // Propogate error.
-                BSONObj errObj;
-                WorkingSetCommon::getStatusMemberObject(*ws, id, &errObj);
-                return Status(ErrorCodes::BadValue, WorkingSetCommon::toStatusString(errObj));
-            }
-        }
-
-        return Status::OK();
-    }
-
     Status MultiPlanStage::executeAllPlans() {
         // Boolean vector keeping track of which plans are done.
         vector<bool> planDone(_candidates.size(), false);
@@ -499,6 +477,21 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> MultiPlanStage::getChildren() const {
+        vector<PlanStage*> children;
+
+        if (bestPlanChosen()) {
+            children.push_back(_candidates[_bestPlanIdx].root);
+        }
+        else {
+            for (size_t i = 0; i < _candidates.size(); i++) {
+                children.push_back(_candidates[i].root);
+            }
+        }
+
+        return children;
+    }
+
     PlanStageStats* MultiPlanStage::getStats() {
         if (bestPlanChosen()) {
             return _candidates[_bestPlanIdx].root->getStats();
diff --git a/src/mongo/db/exec/multi_plan.h b/src/mongo/db/exec/multi_plan.h
index fe25dcddfde..dadc38a833a 100644
--- a/src/mongo/db/exec/multi_plan.h
+++ b/src/mongo/db/exec/multi_plan.h
@@ -68,6 +68,10 @@ namespace mongo {
 
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_MULTI_PLAN; }
+
         virtual PlanStageStats* getStats();
 
         /** Takes ownership of QuerySolution and PlanStage. not of WorkingSet */
@@ -95,29 +99,20 @@ namespace mongo {
          */
         bool hasBackupPlan() const;
 
-        /**
-         * Gathers execution stats for all losing plans.
-         */
-        vector<PlanStageStats*>* generateCandidateStats();
-
         //
         // Used by explain.
         //
 
         /**
-         * Runs the winning plan into it hits EOF or returns DEAD, throwing out the results.
-         * Execution stats are gathered in the process.
-         *
-         * You can call this after calling pickBestPlan(...). It expects that a winning plan
-         * has already been selected.
+         * Gathers execution stats for all losing plans.
          */
-        Status executeWinningPlan();
+        vector<PlanStageStats*> generateCandidateStats();
 
         /**
          * Runs the candidate plans until each has either hit EOF or returned DEAD. Results
          * from the plans are thrown out, but execution stats are gathered.
          *
-         * You can call this after calling pickBestPlan(...). It expects that a winning plan
+         * You should call this after calling pickBestPlan(...). It expects that a winning plan
          * has already been selected.
          */
         Status executeAllPlans();
diff --git a/src/mongo/db/exec/oplogstart.cpp b/src/mongo/db/exec/oplogstart.cpp
index d2bedf62976..f63f95b27b9 100644
--- a/src/mongo/db/exec/oplogstart.cpp
+++ b/src/mongo/db/exec/oplogstart.cpp
@@ -175,6 +175,11 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> OplogStart::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
     int OplogStart::_backwardsScanTime = 5;
 
 }  // namespace mongo
diff --git a/src/mongo/db/exec/oplogstart.h b/src/mongo/db/exec/oplogstart.h
index c0f74573c48..e10721a9064 100644
--- a/src/mongo/db/exec/oplogstart.h
+++ b/src/mongo/db/exec/oplogstart.h
@@ -70,9 +70,13 @@ namespace mongo {
         virtual void prepareToYield();
         virtual void recoverFromYield();
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
         // PS. don't call this.
         virtual PlanStageStats* getStats() { return NULL; }
 
+        virtual StageType stageType() const { return STAGE_OPLOG_START; }
+
         // For testing only.
         void setBackwardsScanTime(int newTime) { _backwardsScanTime = newTime; }
         bool isExtentHopping() { return _extentHopping; }
diff --git a/src/mongo/db/exec/or.cpp b/src/mongo/db/exec/or.cpp
index 9266c6fec10..e9832567e40 100644
--- a/src/mongo/db/exec/or.cpp
+++ b/src/mongo/db/exec/or.cpp
@@ -52,6 +52,9 @@ namespace mongo {
     PlanStage::StageState OrStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (isEOF()) { return PlanStage::IS_EOF; }
 
         if (0 == _specificStats.matchTested.size()) {
@@ -168,6 +171,10 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> OrStage::getChildren() const {
+        return _children;
+    }
+
     PlanStageStats* OrStage::getStats() {
         _commonStats.isEOF = isEOF();
 
diff --git a/src/mongo/db/exec/or.h b/src/mongo/db/exec/or.h
index 3e51bbcd94e..69aec74e22d 100644
--- a/src/mongo/db/exec/or.h
+++ b/src/mongo/db/exec/or.h
@@ -58,6 +58,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_OR; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/plan_stage.h b/src/mongo/db/exec/plan_stage.h
index a4f53d8ca9e..ac638fd2f25 100644
--- a/src/mongo/db/exec/plan_stage.h
+++ b/src/mongo/db/exec/plan_stage.h
@@ -41,11 +41,11 @@ namespace mongo {
      * A PlanStage ("stage") is the basic building block of a "Query Execution Plan."  A stage is
      * the smallest piece of machinery used in executing a compiled query.  Stages either access
      * data (from a collection or an index) to create a stream of results, or transform a stream of
-     * results (e.g. AND, OR, SORT) to create a stream of results.  
+     * results (e.g. AND, OR, SORT) to create a stream of results.
      *
      * Stages have zero or more input streams but only one output stream.  Data-accessing stages are
      * leaves and data-transforming stages have children.  Stages can be connected together to form
-     * a tree which is then executed (see plan_runner.h) to solve a query.  
+     * a tree which is then executed (see plan_runner.h) to solve a query.
      *
      * A stage's input and output are each typed.  Only stages with compatible types can be
      * connected.
@@ -197,6 +197,17 @@ namespace mongo {
         virtual void invalidate(const DiskLoc& dl, InvalidationType type) = 0;
 
         /**
+         * Retrieve a list of this stage's children. This stage keeps ownership of
+         * its children.
+         */
+        virtual std::vector<PlanStage*> getChildren() const = 0;
+
+        /**
+         * What type of stage is this?
+         */
+        virtual StageType stageType() const = 0;
+
+        /**
          * Returns a tree of stats.  See plan_stats.h for the details of this structure.  If the
          * stage has any children it must propagate the request for stats to them.
          *
diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h
index dab069ed6c2..55b8eba51e6 100644
--- a/src/mongo/db/exec/plan_stats.h
+++ b/src/mongo/db/exec/plan_stats.h
@@ -38,6 +38,7 @@
 #include "mongo/db/geo/hash.h"
 #include "mongo/db/query/stage_types.h"
 #include "mongo/platform/cstdint.h"
+#include "mongo/util/time_support.h"
 
 namespace mongo {
 
@@ -63,6 +64,7 @@ namespace mongo {
                         invalidates(0),
                         advanced(0),
                         needTime(0),
+                        executionTimeMillis(0),
                         isEOF(false) { }
         // String giving the type of the stage. Not owned.
         const char* stageTypeStr;
@@ -81,6 +83,9 @@ namespace mongo {
         // is no filter affixed, then 'filter' should be an empty BSONObj.
         BSONObj filter;
 
+        // Time elapsed while working inside this stage.
+        long long executionTimeMillis;
+
         // TODO: have some way of tracking WSM sizes (or really any series of #s).  We can measure
         // the size of our inputs and the size of our outputs.  We can do a lot with the WS here.
 
@@ -93,6 +98,34 @@ namespace mongo {
         CommonStats();
     };
 
+    /**
+     * This class increments a counter by the time elapsed since its construction when
+     * it goes out of scope.
+     */
+    class ScopedTimer {
+    public:
+        ScopedTimer(long long* counter) : _counter(counter) {
+            _start = curTimeMillis64();
+        }
+
+        ~ScopedTimer() {
+            long long elapsed = curTimeMillis64() - _start;
+            *_counter += elapsed;
+        }
+
+    private:
+        // Default constructor disallowed.
+        ScopedTimer();
+
+        MONGO_DISALLOW_COPYING(ScopedTimer);
+
+        // Reference to the counter that we are incrementing with the elapsed time.
+        long long* _counter;
+
+        // Time at which the timer was constructed.
+        long long _start;
+    };
+
     // The universal container for a stage's stats.
     struct PlanStageStats {
         PlanStageStats(const CommonStats& c, StageType t) : stageType(t), common(c) { }
@@ -210,6 +243,27 @@ namespace mongo {
         size_t docsTested;
     };
 
+    struct CountStats : public SpecificStats {
+        CountStats() : isMultiKey(false),
+                       keysExamined(0) { }
+
+        virtual ~CountStats() { }
+
+        virtual SpecificStats* clone() const {
+            CountStats* specific = new CountStats(*this);
+            // BSON objects have to be explicitly copied.
+            specific->keyPattern = keyPattern.getOwned();
+            return specific;
+        }
+
+        BSONObj keyPattern;
+
+        bool isMultiKey;
+
+        size_t keysExamined;
+
+    };
+
     struct DistinctScanStats : public SpecificStats {
         DistinctScanStats() : keysExamined(0) { }
 
@@ -224,7 +278,8 @@ namespace mongo {
     struct FetchStats : public SpecificStats {
         FetchStats() : alreadyHasObj(0),
                        forcedFetches(0),
-                       matchTested(0) { }
+                       matchTested(0),
+                       docsExamined(0) { }
 
         virtual ~FetchStats() { }
 
@@ -245,6 +300,28 @@ namespace mongo {
 
         // We know how many passed (it's the # of advanced) and therefore how many failed.
         size_t matchTested;
+
+        // The total number of full documents touched by the fetch stage.
+        size_t docsExamined;
+    };
+
+    struct IDHackStats : public SpecificStats {
+        IDHackStats() : keysExamined(0),
+                        docsExamined(0) { }
+
+        virtual ~IDHackStats() { }
+
+        virtual SpecificStats* clone() const {
+            IDHackStats* specific = new IDHackStats(*this);
+            return specific;
+        }
+
+        // Number of entries retrieved from the index while executing the idhack.
+        size_t keysExamined;
+
+        // Number of documents retrieved from the collection while executing the idhack.
+        size_t docsExamined;
+
     };
 
     struct IndexScanStats : public SpecificStats {
diff --git a/src/mongo/db/exec/projection.cpp b/src/mongo/db/exec/projection.cpp
index 60c0aa221f3..c4c8388462f 100644
--- a/src/mongo/db/exec/projection.cpp
+++ b/src/mongo/db/exec/projection.cpp
@@ -50,6 +50,8 @@ namespace mongo {
           _commonStats(kStageType),
           _projImpl(params.projImpl) {
 
+        _projObj = params.projObj;
+
         if (ProjectionStageParams::NO_FAST_PATH == _projImpl) {
             _exec.reset(new ProjectionExec(params.projObj, 
                                            params.fullExpression,
@@ -59,8 +61,6 @@ namespace mongo {
             // We shouldn't need the full expression if we're fast-pathing.
             invariant(NULL == params.fullExpression);
 
-            _projObj = params.projObj;
-
             // Sanity-check the input.
             invariant(_projObj.isOwned());
             invariant(!_projObj.isEmpty());
@@ -197,6 +197,9 @@ namespace mongo {
     PlanStage::StageState ProjectionStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         WorkingSetID id = WorkingSet::INVALID_ID;
         StageState status = _child->work(&id);
 
@@ -247,11 +250,20 @@ namespace mongo {
         _child->invalidate(dl, type);
     }
 
+    vector<PlanStage*> ProjectionStage::getChildren() const {
+        vector<PlanStage*> children;
+        children.push_back(_child.get());
+        return children;
+    }
+
     PlanStageStats* ProjectionStage::getStats() {
         _commonStats.isEOF = isEOF();
-        _specificStats.projObj = _projObj;
         auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_PROJECTION));
-        ret->specific.reset(new ProjectionStats(_specificStats));
+
+        ProjectionStats* projStats = new ProjectionStats(_specificStats);
+        projStats->projObj = _projObj;
+        ret->specific.reset(projStats);
+
         ret->children.push_back(_child->getStats());
         return ret.release();
     }
diff --git a/src/mongo/db/exec/projection.h b/src/mongo/db/exec/projection.h
index 4b66e0dc8af..af586156f77 100644
--- a/src/mongo/db/exec/projection.h
+++ b/src/mongo/db/exec/projection.h
@@ -87,6 +87,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_PROJECTION; }
+
         PlanStageStats* getStats();
 
         typedef unordered_set<StringData, StringData::Hasher> FieldSet;
diff --git a/src/mongo/db/exec/s2near.cpp b/src/mongo/db/exec/s2near.cpp
index faaeddaca22..c7d37111840 100644
--- a/src/mongo/db/exec/s2near.cpp
+++ b/src/mongo/db/exec/s2near.cpp
@@ -119,6 +119,9 @@ namespace mongo {
     }
 
     PlanStage::StageState S2NearStage::work(WorkingSetID* out) {
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (!_initted) { init(); }
 
         if (_failed) {
@@ -426,6 +429,11 @@ namespace mongo {
         return _innerRadius >= _maxDistance;
     }
 
+    vector<PlanStage*> S2NearStage::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
     PlanStageStats* S2NearStage::getStats() {
         // TODO: must agg stats across child ixscan/fetches.
         // TODO: we can do better than this, need own common stats.
diff --git a/src/mongo/db/exec/s2near.h b/src/mongo/db/exec/s2near.h
index 5286d4b4302..f273b8bf992 100644
--- a/src/mongo/db/exec/s2near.h
+++ b/src/mongo/db/exec/s2near.h
@@ -74,6 +74,10 @@ namespace mongo {
         void recoverFromYield();
         void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_GEO_NEAR_2DSPHERE; }
+
         PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/shard_filter.cpp b/src/mongo/db/exec/shard_filter.cpp
index 784afb0fad3..b425c0a6138 100644
--- a/src/mongo/db/exec/shard_filter.cpp
+++ b/src/mongo/db/exec/shard_filter.cpp
@@ -47,6 +47,9 @@ namespace mongo {
     PlanStage::StageState ShardFilterStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         // If we've returned as many results as we're limited to, isEOF will be true.
         if (isEOF()) { return PlanStage::IS_EOF; }
 
@@ -96,6 +99,12 @@ namespace mongo {
         _child->invalidate(dl, type);
     }
 
+    vector<PlanStage*> ShardFilterStage::getChildren() const {
+        vector<PlanStage*> children;
+        children.push_back(_child.get());
+        return children;
+    }
+
     PlanStageStats* ShardFilterStage::getStats() {
         _commonStats.isEOF = isEOF();
         auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_SHARDING_FILTER));
diff --git a/src/mongo/db/exec/shard_filter.h b/src/mongo/db/exec/shard_filter.h
index ce1a4ac64de..f1ce3fc615e 100644
--- a/src/mongo/db/exec/shard_filter.h
+++ b/src/mongo/db/exec/shard_filter.h
@@ -84,6 +84,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_SHARDING_FILTER; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/skip.cpp b/src/mongo/db/exec/skip.cpp
index 90450169d6d..cf0bc55dc88 100644
--- a/src/mongo/db/exec/skip.cpp
+++ b/src/mongo/db/exec/skip.cpp
@@ -45,6 +45,9 @@ namespace mongo {
     PlanStage::StageState SkipStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         WorkingSetID id = WorkingSet::INVALID_ID;
         StageState status = _child->work(&id);
 
@@ -99,6 +102,12 @@ namespace mongo {
         _child->invalidate(dl, type);
     }
 
+    vector<PlanStage*> SkipStage::getChildren() const {
+        vector<PlanStage*> children;
+        children.push_back(_child.get());
+        return children;
+    }
+
     PlanStageStats* SkipStage::getStats() {
         _commonStats.isEOF = isEOF();
         _specificStats.skip = _toSkip;
diff --git a/src/mongo/db/exec/skip.h b/src/mongo/db/exec/skip.h
index 48366a71616..1dafa9a305c 100644
--- a/src/mongo/db/exec/skip.h
+++ b/src/mongo/db/exec/skip.h
@@ -52,6 +52,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_SKIP; }
+
         virtual PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/sort.cpp b/src/mongo/db/exec/sort.cpp
index b30a198a1c1..7486e415744 100644
--- a/src/mongo/db/exec/sort.cpp
+++ b/src/mongo/db/exec/sort.cpp
@@ -304,6 +304,9 @@ namespace mongo {
     PlanStage::StageState SortStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (NULL == _sortKeyGen) {
             // This is heavy and should be done as part of work().
             _sortKeyGen.reset(new SortStageKeyGenerator(_collection, _pattern, _query));
@@ -449,6 +452,12 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> SortStage::getChildren() const {
+        vector<PlanStage*> children;
+        children.push_back(_child.get());
+        return children;
+    }
+
     PlanStageStats* SortStage::getStats() {
         _commonStats.isEOF = isEOF();
         _specificStats.memLimit = kMaxBytes;
diff --git a/src/mongo/db/exec/sort.h b/src/mongo/db/exec/sort.h
index ac4ae63b3f1..3a80397ef05 100644
--- a/src/mongo/db/exec/sort.h
+++ b/src/mongo/db/exec/sort.h
@@ -153,6 +153,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_SORT; }
+
         PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/exec/subplan.cpp b/src/mongo/db/exec/subplan.cpp
new file mode 100644
index 00000000000..67e631abebd
--- /dev/null
+++ b/src/mongo/db/exec/subplan.cpp
@@ -0,0 +1,504 @@
+/**
+ *    Copyright (C) 2013 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/exec/subplan.h"
+
+#include "mongo/client/dbclientinterface.h"
+#include "mongo/db/exec/multi_plan.h"
+#include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/get_executor.h"
+#include "mongo/db/query/plan_executor.h"
+#include "mongo/db/query/planner_analysis.h"
+#include "mongo/db/query/planner_access.h"
+#include "mongo/db/query/qlog.h"
+#include "mongo/db/query/query_planner.h"
+#include "mongo/db/query/stage_builder.h"
+
+namespace mongo {
+
+    // static
+    const char* SubplanStage::kStageType = "SUBPLAN";
+
+    SubplanStage::SubplanStage(Collection* collection,
+                               WorkingSet* ws,
+                               const QueryPlannerParams& params,
+                               CanonicalQuery* cq)
+        : _state(SubplanStage::PLANNING),
+          _collection(collection),
+          _ws(ws),
+          _plannerParams(params),
+          _query(cq),
+          _killed(false),
+          _child(NULL),
+          _commonStats(kStageType) { }
+
+    SubplanStage::~SubplanStage() {
+        while (!_solutions.empty()) {
+            vector<QuerySolution*> solns = _solutions.front();
+            for (size_t i = 0; i < solns.size(); i++) {
+                delete solns[i];
+            }
+            _solutions.pop();
+        }
+
+        while (!_cqs.empty()) {
+            delete _cqs.front();
+            _cqs.pop();
+        }
+    }
+
+    // static
+    Status SubplanStage::make(Collection* collection,
+                              WorkingSet* ws,
+                              const QueryPlannerParams& params,
+                              CanonicalQuery* cq,
+                              SubplanStage** out) {
+        auto_ptr<SubplanStage> autoStage(new SubplanStage(collection, ws, params, cq));
+        Status planningStatus = autoStage->planSubqueries();
+        if (!planningStatus.isOK()) {
+            return planningStatus;
+        }
+
+        *out = autoStage.release();
+        return Status::OK();
+    }
+
+    // static
+    bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) {
+        const LiteParsedQuery& lpq = query.getParsed();
+        const MatchExpression* expr = query.root();
+
+        // Only rooted ORs work with the subplan scheme.
+        if (MatchExpression::OR != expr->matchType()) {
+            return false;
+        }
+
+        // Collection scan
+        // No sort order requested
+        if (lpq.getSort().isEmpty() &&
+            expr->matchType() == MatchExpression::AND && expr->numChildren() == 0) {
+            return false;
+        }
+
+        // Hint provided
+        if (!lpq.getHint().isEmpty()) {
+            return false;
+        }
+
+        // Min provided
+        // Min queries are a special case of hinted queries.
+        if (!lpq.getMin().isEmpty()) {
+            return false;
+        }
+
+        // Max provided
+        // Similar to min, max queries are a special case of hinted queries.
+        if (!lpq.getMax().isEmpty()) {
+            return false;
+        }
+
+        // Tailable cursors won't get cached, just turn into collscans.
+        if (query.getParsed().hasOption(QueryOption_CursorTailable)) {
+            return false;
+        }
+
+        // Snapshot is really a hint.
+        if (query.getParsed().isSnapshot()) {
+            return false;
+        }
+
+        return true;
+    }
+
+    Status SubplanStage::planSubqueries() {
+        MatchExpression* theOr = _query->root();
+
+        for (size_t i = 0; i < _plannerParams.indices.size(); ++i) {
+            const IndexEntry& ie = _plannerParams.indices[i];
+            _indexMap[ie.keyPattern] = i;
+            QLOG() << "Subplanner: index " << i << " is " << ie.toString() << endl;
+        }
+
+        const WhereCallbackReal whereCallback(_collection->ns().db());
+
+        for (size_t i = 0; i < theOr->numChildren(); ++i) {
+            // Turn the i-th child into its own query.
+            MatchExpression* orChild = theOr->getChild(i);
+            CanonicalQuery* orChildCQ;
+            Status childCQStatus = CanonicalQuery::canonicalize(*_query,
+                                                                orChild,
+                                                                &orChildCQ,
+                                                                whereCallback);
+            if (!childCQStatus.isOK()) {
+                mongoutils::str::stream ss;
+                ss << "Subplanner: Can't canonicalize subchild " << orChild->toString()
+                   << " " << childCQStatus.reason();
+                return Status(ErrorCodes::BadValue, ss);
+            }
+
+            // Make sure it gets cleaned up.
+            auto_ptr<CanonicalQuery> safeOrChildCQ(orChildCQ);
+
+            // Plan the i-th child.
+            vector<QuerySolution*> solutions;
+
+            // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from 
+            // considering any plan that's a collscan.
+            QLOG() << "Subplanner: planning child " << i << " of " << theOr->numChildren();
+            Status status = QueryPlanner::plan(*safeOrChildCQ, _plannerParams, &solutions);
+
+            if (!status.isOK()) {
+                mongoutils::str::stream ss;
+                ss << "Subplanner: Can't plan for subchild " << orChildCQ->toString()
+                   << " " << status.reason();
+                return Status(ErrorCodes::BadValue, ss);
+            }
+            QLOG() << "Subplanner: got " << solutions.size() << " solutions";
+
+            if (0 == solutions.size()) {
+                // If one child doesn't have an indexed solution, bail out.
+                mongoutils::str::stream ss;
+                ss << "Subplanner: No solutions for subchild " << orChildCQ->toString();
+                return Status(ErrorCodes::BadValue, ss);
+            }
+
+            // Hang onto the canonicalized subqueries and the corresponding query solutions
+            // so that they can be used in subplan running later on.
+            _cqs.push(safeOrChildCQ.release());
+            _solutions.push(solutions);
+        }
+
+        return Status::OK();
+    }
+
+    bool SubplanStage::runSubplans() {
+        // This is what we annotate with the index selections and then turn into a solution.
+        auto_ptr<OrMatchExpression> theOr(
+            static_cast<OrMatchExpression*>(_query->root()->shallowClone()));
+
+        // This is the skeleton of index selections that is inserted into the cache.
+        auto_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree());
+
+        for (size_t i = 0; i < theOr->numChildren(); ++i) {
+            MatchExpression* orChild = theOr->getChild(i);
+
+            auto_ptr<CanonicalQuery> orChildCQ(_cqs.front());
+            _cqs.pop();
+
+            // 'solutions' is owned by the SubplanStage instance until
+            // it is popped from the queue.
+            vector<QuerySolution*> solutions = _solutions.front();
+            _solutions.pop();
+
+            // We already checked for zero solutions in planSubqueries(...).
+            invariant(!solutions.empty());
+
+            if (1 == solutions.size()) {
+                // There is only one solution. Transfer ownership to an auto_ptr.
+                auto_ptr<QuerySolution> autoSoln(solutions[0]);
+
+                // We want a well-formed *indexed* solution.
+                if (NULL == autoSoln->cacheData.get()) {
+                    // For example, we don't cache things for 2d indices.
+                    QLOG() << "Subplanner: No cache data for subchild " << orChild->toString();
+                    return false;
+                }
+
+                if (SolutionCacheData::USE_INDEX_TAGS_SOLN != autoSoln->cacheData->solnType) {
+                    QLOG() << "Subplanner: No indexed cache data for subchild "
+                           << orChild->toString();
+                    return false;
+                }
+
+                // Add the index assignments to our original query.
+                Status tagStatus = QueryPlanner::tagAccordingToCache(
+                    orChild, autoSoln->cacheData->tree.get(), _indexMap);
+
+                if (!tagStatus.isOK()) {
+                    QLOG() << "Subplanner: Failed to extract indices from subchild "
+                           << orChild->toString();
+                    return false;
+                }
+
+                // Add the child's cache data to the cache data we're creating for the main query.
+                cacheData->children.push_back(autoSoln->cacheData->tree->clone());
+            }
+            else {
+                // N solutions, rank them.  Takes ownership of orChildCQ.
+
+                // the working set will be shared by the candidate plans and owned by the runner
+                WorkingSet* sharedWorkingSet = new WorkingSet();
+
+                auto_ptr<MultiPlanStage> multiPlanStage(new MultiPlanStage(_collection,
+                                                                           orChildCQ.get()));
+
+                // Dump all the solutions into the MPR.
+                for (size_t ix = 0; ix < solutions.size(); ++ix) {
+                    PlanStage* nextPlanRoot;
+                    verify(StageBuilder::build(_collection,
+                                               *solutions[ix],
+                                               sharedWorkingSet,
+                                               &nextPlanRoot));
+
+                    // Owns first two arguments
+                    multiPlanStage->addPlan(solutions[ix], nextPlanRoot, sharedWorkingSet);
+                }
+
+                multiPlanStage->pickBestPlan();
+                if (!multiPlanStage->bestPlanChosen()) {
+                    QLOG() << "Subplanner: Failed to pick best plan for subchild "
+                           << orChildCQ->toString();
+                    return false;
+                }
+
+                scoped_ptr<PlanExecutor> exec(new PlanExecutor(sharedWorkingSet,
+                                                               multiPlanStage.release(),
+                                                               _collection));
+
+                _child.reset(exec->releaseStages());
+
+                if (_killed) {
+                    QLOG() << "Subplanner: Killed while picking best plan for subchild "
+                           << orChild->toString();
+                    return false;
+                }
+
+                QuerySolution* bestSoln = multiPlanStage->bestSolution();
+
+                if (SolutionCacheData::USE_INDEX_TAGS_SOLN != bestSoln->cacheData->solnType) {
+                    QLOG() << "Subplanner: No indexed cache data for subchild "
+                           << orChild->toString();
+                    return false;
+                }
+
+                // Add the index assignments to our original query.
+                Status tagStatus = QueryPlanner::tagAccordingToCache(
+                    orChild, bestSoln->cacheData->tree.get(), _indexMap);
+
+                if (!tagStatus.isOK()) {
+                    QLOG() << "Subplanner: Failed to extract indices from subchild "
+                           << orChild->toString();
+                    return false;
+                }
+
+                cacheData->children.push_back(bestSoln->cacheData->tree->clone());
+            }
+        }
+
+        // Must do this before using the planner functionality.
+        sortUsingTags(theOr.get());
+
+        // Use the cached index assignments to build solnRoot.  Takes ownership of 'theOr'
+        QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(
+            *_query, theOr.release(), false, _plannerParams.indices);
+
+        if (NULL == solnRoot) {
+            QLOG() << "Subplanner: Failed to build indexed data path for subplanned query\n";
+            return false;
+        }
+
+        QLOG() << "Subplanner: fully tagged tree is " << solnRoot->toString();
+
+        // Takes ownership of 'solnRoot'
+        QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*_query,
+                                                                      _plannerParams,
+                                                                      solnRoot);
+
+        if (NULL == soln) {
+            QLOG() << "Subplanner: Failed to analyze subplanned query";
+            return false;
+        }
+
+        // We want our franken-solution to be cached.
+        SolutionCacheData* scd = new SolutionCacheData();
+        scd->tree.reset(cacheData.release());
+        soln->cacheData.reset(scd);
+
+        QLOG() << "Subplanner: Composite solution is " << soln->toString() << endl;
+
+        // We use one of these even if there is one plan.  We do this so that the entry is cached
+        // with stats obtained in the same fashion as a competitive ranking would have obtained
+        // them.
+        auto_ptr<MultiPlanStage> multiPlanStage(new MultiPlanStage(_collection, _query));
+        WorkingSet* ws = new WorkingSet();
+        PlanStage* root;
+        verify(StageBuilder::build(_collection, *soln, ws, &root));
+        multiPlanStage->addPlan(soln, root, ws); // Takes ownership first two arguments.
+
+        multiPlanStage->pickBestPlan();
+        if (! multiPlanStage->bestPlanChosen()) {
+            QLOG() << "Subplanner: Failed to pick best plan for subchild "
+                   << _query->toString();
+            return false;
+        }
+
+        scoped_ptr<PlanExecutor> exec(new PlanExecutor(ws, multiPlanStage.release(), _collection));
+
+        _child.reset(exec->releaseStages());
+
+        return true;
+    }
+
+    bool SubplanStage::isEOF() {
+        if (_killed) {
+            return true;
+        }
+
+        // If we're still planning we're not done yet.
+        if (SubplanStage::PLANNING == _state) {
+            return false;
+        }
+
+        // If we're running we best have a runner.
+        invariant(_child.get());
+        return _child->isEOF();
+    }
+
+    PlanStage::StageState SubplanStage::work(WorkingSetID* out) {
+        ++_commonStats.works;
+
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
+        if (_killed) {
+            return PlanStage::DEAD;
+        }
+
+        if (isEOF()) { return PlanStage::IS_EOF; }
+
+        if (SubplanStage::PLANNING == _state) {
+            // Try to run as sub-plans.
+            if (runSubplans()) {
+                // If runSubplans returns true we expect something here.
+                invariant(_child.get());
+            }
+            else if (!_killed) {
+                // Couldn't run as subplans so we'll just call normal getExecutor.
+                PlanExecutor* exec;
+                Status status = getExecutorAlwaysPlan(_collection, _query, _plannerParams, &exec);
+
+                if (!status.isOK()) {
+                    // We utterly failed.
+                    _killed = true;
+
+                    // Propagate the error to the user wrapped in a BSONObj
+                    WorkingSetID id = _ws->allocate();
+                    WorkingSetMember* member = _ws->get(id);
+                    member->state = WorkingSetMember::OWNED_OBJ;
+                    member->keyData.clear();
+                    member->loc = DiskLoc();
+
+                    BSONObjBuilder bob;
+                    bob.append("ok", status.isOK() ? 1.0 : 0.0);
+                    bob.append("code", status.code());
+                    bob.append("errmsg", status.reason());
+                    member->obj = bob.obj();
+
+                    *out = id;
+                    return PlanStage::FAILURE;
+                }
+                else {
+                    scoped_ptr<PlanExecutor> cleanupExec(exec);
+                    _child.reset(exec->releaseStages());
+                }
+            }
+
+            // We can change state when we're either killed or we have an underlying runner.
+            invariant(_killed || NULL != _child.get());
+            _state = SubplanStage::RUNNING;
+        }
+
+        if (_killed) {
+            return PlanStage::DEAD;
+        }
+
+        if (isEOF()) {
+            return PlanStage::IS_EOF;
+        }
+
+        // If we're here we should have planned already.
+        invariant(SubplanStage::RUNNING == _state);
+        invariant(_child.get());
+        return _child->work(out);
+    }
+
+    void SubplanStage::prepareToYield() {
+        ++_commonStats.yields;
+        if (_killed) {
+            return;
+        }
+
+        // We're ranking a sub-plan via an MPR or we're streaming results from this stage.  Either
+        // way, pass on the request.
+        if (NULL != _child.get()) {
+            _child->prepareToYield();
+        }
+    }
+
+    void SubplanStage::recoverFromYield() {
+        ++_commonStats.unyields;
+        if (_killed) {
+            return;
+        }
+
+        // We're ranking a sub-plan via an MPR or we're streaming results from this stage.  Either
+        // way, pass on the request.
+        if (NULL != _child.get()) {
+            _child->recoverFromYield();
+        }
+    }
+
+    void SubplanStage::invalidate(const DiskLoc& dl, InvalidationType type) {
+        ++_commonStats.invalidates;
+        if (_killed) {
+            return;
+        }
+
+        if (NULL != _child.get()) {
+            _child->invalidate(dl, type);
+        }
+    }
+
+    vector<PlanStage*> SubplanStage::getChildren() const {
+        vector<PlanStage*> children;
+        if (NULL != _child.get()) {
+            children.push_back(_child.get());
+        }
+        return children;
+    }
+
+    PlanStageStats* SubplanStage::getStats() {
+        _commonStats.isEOF = isEOF();
+        auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_SUBPLAN));
+        ret->children.push_back(_child->getStats());
+        return ret.release();
+    }
+
+}  // namespace mongo
diff --git a/src/mongo/db/exec/subplan.h b/src/mongo/db/exec/subplan.h
new file mode 100644
index 00000000000..1fb6c698523
--- /dev/null
+++ b/src/mongo/db/exec/subplan.h
@@ -0,0 +1,138 @@
+/**
+ *    Copyright (C) 2013 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <boost/scoped_ptr.hpp>
+#include <string>
+#include <queue>
+
+#include "mongo/base/status.h"
+#include "mongo/db/diskloc.h"
+#include "mongo/db/exec/plan_stage.h"
+#include "mongo/db/query/query_planner_params.h"
+#include "mongo/db/query/query_solution.h"
+
+namespace mongo {
+
+    /**
+     * The SubplanStage is used for rooted $or queries. It plans each clause of the $or
+     * individually, and then creates an overall query plan based on the winning plan from
+     * each clause.
+     *
+     * Uses the MultiPlanStage in order to rank plans for the individual clauses.
+     */
+    class SubplanStage : public PlanStage {
+    public:
+        /**
+         * Used to create SubplanStage instances. The caller owns the instance
+         * returned through 'out'.
+         *
+         * 'out' is valid only if an OK status is returned.
+         */
+        static Status make(Collection* collection,
+                           WorkingSet* ws,
+                           const QueryPlannerParams& params,
+                           CanonicalQuery* cq,
+                           SubplanStage** out);
+
+        virtual ~SubplanStage();
+
+        static bool canUseSubplanning(const CanonicalQuery& query);
+
+        virtual bool isEOF();
+        virtual StageState work(WorkingSetID* out);
+
+        virtual void prepareToYield();
+        virtual void recoverFromYield();
+        virtual void invalidate(const DiskLoc& dl, InvalidationType type);
+
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_SUBPLAN; }
+
+        PlanStageStats* getStats();
+
+        static const char* kStageType;
+
+        /**
+         * Plan each branch of the $or independently, and store the resulting
+         * lists of query solutions in '_solutions'.
+         *
+         * Called from SubplanStage::make so that construction of the subplan stage
+         * fails immediately, rather than returning a plan executor and subsequently
+         * through getNext(...).
+         */
+        Status planSubqueries();
+
+    private:
+        SubplanStage(Collection* collection,
+                     WorkingSet* ws,
+                     const QueryPlannerParams& params,
+                     CanonicalQuery* cq);
+
+        bool runSubplans();
+
+        enum SubplanningState {
+            PLANNING,
+            RUNNING,
+        };
+
+        SubplanningState _state;
+
+        Collection* _collection;
+
+        // Not owned here.
+        WorkingSet* _ws;
+
+        QueryPlannerParams _plannerParams;
+
+        // Not owned here.
+        CanonicalQuery* _query;
+
+        bool _killed;
+
+        boost::scoped_ptr<PlanStage> _child;
+
+        // We do the subquery planning up front, and keep the resulting
+        // query solutions here. Lists of query solutions are dequeued
+        // and ownership is transferred to the underlying runners one
+        // at a time.
+        std::queue< std::vector<QuerySolution*> > _solutions;
+
+        // Holds the canonicalized subqueries. Ownership is transferred
+        // to the underlying runners one at a time.
+        std::queue<CanonicalQuery*> _cqs;
+
+        // We need this to extract cache-friendly index data from the index assignments.
+        map<BSONObj, size_t> _indexMap;
+
+        CommonStats _commonStats;
+    };
+
+}  // namespace mongo
diff --git a/src/mongo/db/exec/text.cpp b/src/mongo/db/exec/text.cpp
index 170a800eed9..50f7e3f99e4 100644
--- a/src/mongo/db/exec/text.cpp
+++ b/src/mongo/db/exec/text.cpp
@@ -63,6 +63,9 @@ namespace mongo {
     PlanStage::StageState TextStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
+        // Adds the amount of time taken by work() to executionTimeMillis.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
         if (isEOF()) { return PlanStage::IS_EOF; }
         invariant(_internalState != DONE);
 
@@ -134,6 +137,11 @@ namespace mongo {
         }
     }
 
+    vector<PlanStage*> TextStage::getChildren() const {
+        vector<PlanStage*> empty;
+        return empty;
+    }
+
     PlanStageStats* TextStage::getStats() {
         _commonStats.isEOF = isEOF();
 
diff --git a/src/mongo/db/exec/text.h b/src/mongo/db/exec/text.h
index 17f28766ae0..851d3cdada7 100644
--- a/src/mongo/db/exec/text.h
+++ b/src/mongo/db/exec/text.h
@@ -104,6 +104,10 @@ namespace mongo {
         virtual void recoverFromYield();
         virtual void invalidate(const DiskLoc& dl, InvalidationType type);
 
+        virtual std::vector<PlanStage*> getChildren() const;
+
+        virtual StageType stageType() const { return STAGE_TEXT; }
+
         PlanStageStats* getStats();
 
         static const char* kStageType;
diff --git a/src/mongo/db/instance.cpp b/src/mongo/db/instance.cpp
index 3894d07c10d..953a2debe4f 100644
--- a/src/mongo/db/instance.cpp
+++ b/src/mongo/db/instance.cpp
@@ -66,7 +66,7 @@
 #include "mongo/db/matcher/matcher.h"
 #include "mongo/db/mongod_options.h"
 #include "mongo/db/namespace_string.h"
-#include "mongo/db/ops/count.h"
+#include "mongo/db/commands/count.h"
 #include "mongo/db/ops/delete_executor.h"
 #include "mongo/db/ops/delete_request.h"
 #include "mongo/db/ops/insert.h"
diff --git a/src/mongo/db/ops/count.cpp b/src/mongo/db/ops/count.cpp
deleted file mode 100644
index a602a1c4478..00000000000
--- a/src/mongo/db/ops/count.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-// count.cpp
-
-/**
- *    Copyright (C) 2013 MongoDB Inc.
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the GNU Affero General Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/ops/count.h"
-
-#include "mongo/db/client.h"
-#include "mongo/db/clientcursor.h"
-#include "mongo/db/catalog/database.h"
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/curop.h"
-#include "mongo/db/query/get_runner.h"
-#include "mongo/db/query/type_explain.h"
-
-namespace mongo {
-
-    static long long applySkipLimit(long long num, const BSONObj& cmd) {
-        BSONElement s = cmd["skip"];
-        BSONElement l = cmd["limit"];
-
-        if (s.isNumber()) {
-            num = num - s.numberLong();
-            if (num < 0) {
-                num = 0;
-            }
-        }
-
-        if (l.isNumber()) {
-            long long limit = l.numberLong();
-            if (limit < 0) {
-                limit = -limit;
-            }
-
-            // 0 means no limit.
-            if (limit < num && limit != 0) {
-                num = limit;
-            }
-        }
-
-        return num;
-    }
-
-    long long runCount(OperationContext* txn,
-                       const string& ns,
-                       const BSONObj &cmd,
-                       string &err,
-                       int &errCode) {
-        // Lock 'ns'.
-        Client::Context cx(ns);
-        Collection* collection = cx.db()->getCollection(txn, ns);
-
-        if (NULL == collection) {
-            err = "ns missing";
-            return -1;
-        }
-
-        BSONObj query = cmd.getObjectField("query");
-        const std::string hint = cmd.getStringField("hint");
-        const BSONObj hintObj = hint.empty() ? BSONObj() : BSON("$hint" << hint);
-        
-        // count of all objects
-        if (query.isEmpty()) {
-            return applySkipLimit(collection->numRecords(), cmd);
-        }
-
-        Runner* rawRunner;
-        long long skip = cmd["skip"].numberLong();
-        long long limit = cmd["limit"].numberLong();
-
-        if (limit < 0) {
-            limit = -limit;
-        }
-
-        uassertStatusOK(getRunnerCount(collection, query, hintObj, &rawRunner));
-        auto_ptr<Runner> runner(rawRunner);
-
-        // Store the plan summary string in CurOp.
-        Client& client = cc();
-        CurOp* currentOp = client.curop();
-        if (NULL != currentOp) {
-            PlanInfo* rawInfo;
-            Status s = runner->getInfo(NULL, &rawInfo);
-            if (s.isOK()) {
-                scoped_ptr<PlanInfo> planInfo(rawInfo);
-                currentOp->debug().planSummary = planInfo->planSummary.c_str();
-            }
-        }
-
-        try {
-            const ScopedRunnerRegistration safety(runner.get());
-
-            long long count = 0;
-            Runner::RunnerState state;
-            while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, NULL))) {
-                if (skip > 0) {
-                    --skip;
-                }
-                else {
-                    ++count;
-                    // Fast-path. There's no point in iterating all over the runner if limit
-                    // is set.
-                    if (count >= limit && limit != 0) {
-                        break;
-                    }
-                }
-            }
-
-            // Emulate old behavior and return the count even if the runner was killed.  This
-            // happens when the underlying collection is dropped.
-            return count;
-        }
-        catch (const DBException &e) {
-            err = e.toString();
-            errCode = e.getCode();
-        } 
-        catch (const std::exception &e) {
-            err = e.what();
-            errCode = 0;
-        } 
-
-        // Historically we have returned zero in many count assertion cases - see SERVER-2291.
-        log() << "Count with ns: " << ns << " and query: " << query
-              << " failed with exception: " << err << " code: " << errCode
-              << endl;
-
-        return -2;
-    }
-    
-} // namespace mongo
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index e48861a11f5..510a86fe1c6 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -34,6 +34,7 @@ env.Library(
         "eof_runner.cpp",
         "explain.cpp",
         "explain_plan.cpp",
+        "get_executor.cpp",
         "get_runner.cpp",
         "idhack_runner.cpp",
         "internal_runner.cpp",
diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp
index 7477c1a0285..6cf96eac112 100644
--- a/src/mongo/db/query/explain.cpp
+++ b/src/mongo/db/query/explain.cpp
@@ -31,7 +31,7 @@
 #include "mongo/db/query/explain.h"
 
 #include "mongo/db/exec/multi_plan.h"
-#include "mongo/db/query/get_runner.h"
+#include "mongo/db/query/get_executor.h"
 #include "mongo/db/query/plan_executor.h"
 #include "mongo/db/query/query_planner.h"
 #include "mongo/db/query/stage_builder.h"
@@ -42,6 +42,44 @@
 #include "mongo/util/processinfo.h"
 #include "mongo/util/version.h"
 
+namespace {
+
+    using namespace mongo;
+
+    /**
+     * Do a depth-first traversal of the tree rooted at 'root', and flatten the tree nodes
+     * into the list 'flattened'.
+     */
+    void flattenStatsTree(PlanStageStats* root, vector<PlanStageStats*>* flattened) {
+        flattened->push_back(root);
+        for (size_t i = 0; i < root->children.size(); ++i) {
+            flattenStatsTree(root->children[i], flattened);
+        }
+    }
+
+    /**
+     * Get a pointer to the MultiPlanStage inside the stage tree rooted at 'root'.
+     * Returns NULL if there is no MPS.
+     */
+    MultiPlanStage* getMultiPlanStage(PlanStage* root) {
+        if (root->stageType() == STAGE_MULTI_PLAN) {
+            MultiPlanStage* mps = static_cast<MultiPlanStage*>(root);
+            return mps;
+        }
+
+        vector<PlanStage*> children = root->getChildren();
+        for (size_t i = 0; i < children.size(); i++) {
+            MultiPlanStage* mps = getMultiPlanStage(children[i]);
+            if (mps != NULL) {
+                return mps;
+            }
+        }
+
+        return NULL;
+    }
+
+} // namespace
+
 namespace mongo {
 
     using mongoutils::str::stream;
@@ -49,49 +87,98 @@ namespace mongo {
     MONGO_EXPORT_SERVER_PARAMETER(enableNewExplain, bool, false);
 
     // static
-    void Explain::explainTree(const PlanStageStats& stats,
-                              Explain::Verbosity verbosity,
-                              BSONObjBuilder* bob) {
+    void Explain::explainStatsTree(const PlanStageStats& stats,
+                                   Explain::Verbosity verbosity,
+                                   BSONObjBuilder* bob) {
         invariant(bob);
 
         // Stage name.
         bob->append("stage", stats.common.stageTypeStr);
 
+        // Display the BSON representation of the filter, if there is one.
         if (!stats.common.filter.isEmpty()) {
             bob->append("filter", stats.common.filter);
         }
 
+        // Some top-level exec stats get pulled out of the root stage.
+        if (verbosity >= Explain::EXEC_STATS) {
+            bob->appendNumber("nReturned", stats.common.advanced);
+            bob->appendNumber("executionTimeMillis", stats.common.executionTimeMillis);
+        }
+
         // Stage-specific stats
         if (STAGE_IXSCAN == stats.stageType) {
             IndexScanStats* spec = static_cast<IndexScanStats*>(stats.specific.get());
+
+            if (verbosity >= Explain::EXEC_STATS) {
+                bob->appendNumber("keysExamined", spec->keysExamined);
+            }
+
             bob->append("keyPattern", spec->keyPattern);
             bob->appendBool("isMultiKey", spec->isMultiKey);
             bob->append("indexBounds", spec->indexBounds);
         }
+        else if (STAGE_COLLSCAN == stats.stageType) {
+            CollectionScanStats* spec = static_cast<CollectionScanStats*>(stats.specific.get());
+            if (verbosity >= Explain::EXEC_STATS) {
+                bob->appendNumber("docsExamined", spec->docsTested);
+            }
+        }
+        else if (STAGE_COUNT == stats.stageType) {
+            CountStats* spec = static_cast<CountStats*>(stats.specific.get());
+
+            if (verbosity >= Explain::EXEC_STATS) {
+                bob->appendNumber("keysExamined", spec->keysExamined);
+            }
+
+            bob->append("keyPattern", spec->keyPattern);
+            bob->appendBool("isMultiKey", spec->isMultiKey);
+        }
+        else if (STAGE_FETCH == stats.stageType) {
+            FetchStats* spec = static_cast<FetchStats*>(stats.specific.get());
+            if (verbosity >= Explain::EXEC_STATS) {
+                bob->appendNumber("docsExamined", spec->docsExamined);
+            }
+        }
         else if (STAGE_GEO_NEAR_2D == stats.stageType) {
             TwoDNearStats* spec = static_cast<TwoDNearStats*>(stats.specific.get());
-            bob->append("keyPattern", spec->keyPattern);
 
-            // TODO these things are execution stats
-            /*bob->appendNumber("keysExamined", spec->nscanned);
-            bob->appendNumber("objectsLoaded", spec->objectsLoaded);*/
+            if (verbosity >= Explain::EXEC_STATS) {
+                bob->appendNumber("keysExamined", spec->nscanned);
+                bob->appendNumber("docsExamined", spec->objectsLoaded);
+            }
+
+            bob->append("keyPattern", spec->keyPattern);
+        }
+        else if (STAGE_IDHACK == stats.stageType) {
+            IDHackStats* spec = static_cast<IDHackStats*>(stats.specific.get());
+            if (verbosity >= Explain::EXEC_STATS) {
+                bob->appendNumber("keysExamined", spec->keysExamined);
+                bob->appendNumber("docsExamined", spec->docsExamined);
+            }
         }
         else if (STAGE_TEXT == stats.stageType) {
             TextStats* spec = static_cast<TextStats*>(stats.specific.get());
+
+            if (verbosity >= Explain::EXEC_STATS) {
+                bob->appendNumber("keysExamined", spec->keysExamined);
+                bob->appendNumber("docsExamined", spec->fetches);
+            }
+
             bob->append("indexPrefix", spec->indexPrefix);
             bob->append("parsedTextQuery", spec->parsedTextQuery);
-
-            // TODO these things are execution stats
-            /*bob->appendNumber("keysExamined", spec->keysExamined);
-            bob->appendNumber("fetches", spec->fetches);*/
         }
         else if (STAGE_SORT == stats.stageType) {
             SortStats* spec = static_cast<SortStats*>(stats.specific.get());
             bob->append("sortPattern", spec->sortPattern);
+
+            if (verbosity >= Explain::EXEC_STATS) {
+                bob->appendNumber("memUsage", spec->memUsage);
+            }
+
             if (spec->limit > 0) {
                 bob->appendNumber("limitAmount", spec->limit);
             }
-            bob->appendNumber("memUsage", spec->memUsage);
         }
         else if (STAGE_SORT_MERGE == stats.stageType) {
             MergeSortStats* spec = static_cast<MergeSortStats*>(stats.specific.get());
@@ -120,18 +207,18 @@ namespace mongo {
         // rather than 'inputStages'.
         if (1 == stats.children.size()) {
             BSONObjBuilder childBob;
-            explainTree(*stats.children[0], verbosity, &childBob);
+            explainStatsTree(*stats.children[0], verbosity, &childBob);
             bob->append("inputStage", childBob.obj());
             return;
         }
 
-        // There is more than one child. Recursively explainTree(...) on each
+        // There is more than one child. Recursively explainStatsTree(...) on each
         // of them and add them to the 'inputStages' array.
 
         BSONArrayBuilder childrenBob(bob->subarrayStart("inputStages"));
         for (size_t i = 0; i < stats.children.size(); ++i) {
             BSONObjBuilder childBob(childrenBob.subobjStart());
-            explainTree(*stats.children[i], verbosity, &childBob);
+            explainStatsTree(*stats.children[i], verbosity, &childBob);
         }
         childrenBob.doneFast();
     }
@@ -139,7 +226,7 @@ namespace mongo {
     // static
     void Explain::generatePlannerInfo(CanonicalQuery* query,
                                       PlanStageStats* winnerStats,
-                                      vector<PlanStageStats*>& rejectedStats,
+                                      const vector<PlanStageStats*>& rejectedStats,
                                       BSONObjBuilder* out) {
         BSONObjBuilder plannerBob(out->subobjStart("queryPlanner"));;
 
@@ -150,14 +237,14 @@ namespace mongo {
         parsedQueryBob.doneFast();
 
         BSONObjBuilder winningPlanBob(plannerBob.subobjStart("winningPlan"));
-        explainTree(*winnerStats, Explain::QUERY_PLANNER, &winningPlanBob);
+        explainStatsTree(*winnerStats, Explain::QUERY_PLANNER, &winningPlanBob);
         winningPlanBob.doneFast();
 
         // Genenerate array of rejected plans.
         BSONArrayBuilder allPlansBob(plannerBob.subarrayStart("rejectedPlans"));
         for (size_t i = 0; i < rejectedStats.size(); i++) {
             BSONObjBuilder childBob(allPlansBob.subobjStart());
-            explainTree(*rejectedStats.at(i), Explain::QUERY_PLANNER, &childBob);
+            explainStatsTree(*rejectedStats[i], Explain::QUERY_PLANNER, &childBob);
         }
         allPlansBob.doneFast();
 
@@ -165,6 +252,65 @@ namespace mongo {
     }
 
     // static
+    void Explain::generateExecStats(PlanStageStats* stats,
+                                    BSONObjBuilder* out) {
+
+        out->appendNumber("nReturned", stats->common.advanced);
+        out->appendNumber("executionTimeMillis", stats->common.executionTimeMillis);
+
+        // Flatten the stats tree into a list.
+        vector<PlanStageStats*> statsNodes;
+        flattenStatsTree(stats, &statsNodes);
+
+        // Iterate over all stages in the tree and get the total number of keys/docs examined.
+        // These are just aggregations of information already available in the stats tree.
+        size_t totalKeysExamined = 0;
+        size_t totalDocsExamined = 0;
+        for (size_t i = 0; i < statsNodes.size(); ++i) {
+            if (STAGE_IXSCAN == statsNodes[i]->stageType) {
+                IndexScanStats* spec = static_cast<IndexScanStats*>(statsNodes[i]->specific.get());
+                totalKeysExamined += spec->keysExamined;
+            }
+            else if (STAGE_GEO_NEAR_2D == statsNodes[i]->stageType) {
+                TwoDNearStats* spec = static_cast<TwoDNearStats*>(statsNodes[i]->specific.get());
+                totalKeysExamined += spec->nscanned;
+                totalDocsExamined += spec->objectsLoaded;
+            }
+            else if (STAGE_IDHACK == statsNodes[i]->stageType) {
+                IDHackStats* spec = static_cast<IDHackStats*>(statsNodes[i]->specific.get());
+                totalKeysExamined += spec->keysExamined;
+                totalDocsExamined += spec->docsExamined;
+            }
+            else if (STAGE_TEXT == statsNodes[i]->stageType) {
+                TextStats* spec = static_cast<TextStats*>(statsNodes[i]->specific.get());
+                totalKeysExamined += spec->keysExamined;
+                totalDocsExamined += spec->fetches;
+            }
+            else if (STAGE_FETCH == statsNodes[i]->stageType) {
+                FetchStats* spec = static_cast<FetchStats*>(statsNodes[i]->specific.get());
+                totalDocsExamined += spec->docsExamined;
+            }
+            else if (STAGE_COLLSCAN == statsNodes[i]->stageType) {
+                CollectionScanStats* spec =
+                    static_cast<CollectionScanStats*>(statsNodes[i]->specific.get());
+                totalDocsExamined += spec->docsTested;
+            }
+            else if (STAGE_COUNT == statsNodes[i]->stageType) {
+                CountStats* spec = static_cast<CountStats*>(statsNodes[i]->specific.get());
+                totalKeysExamined += spec->keysExamined;
+            }
+        }
+
+        out->appendNumber("totalKeysExamined", totalKeysExamined);
+        out->appendNumber("totalDocsExamined", totalDocsExamined);
+
+        // Add the tree of stages, with individual execution stats for each stage.
+        BSONObjBuilder stagesBob(out->subobjStart("executionStages"));
+        explainStatsTree(*stats, Explain::EXEC_STATS, &stagesBob);
+        stagesBob.doneFast();
+    }
+
+    // static
     void Explain::generateServerInfo(BSONObjBuilder* out) {
         BSONObjBuilder serverBob(out->subobjStart("serverInfo"));
         out->append("host", getHostNameCached());
@@ -183,174 +329,104 @@ namespace mongo {
     }
 
     // static
-    Status Explain::explainSinglePlan(Collection* collection,
-                                      CanonicalQuery* rawCanonicalQuery,
-                                      QuerySolution* solution,
-                                      Explain::Verbosity verbosity,
-                                      BSONObjBuilder* out) {
-        // Only one possible plan. Build the stages from the solution.
-        WorkingSet* ws = new WorkingSet();
-        PlanStage* root;
-        verify(StageBuilder::build(collection, *solution, ws, &root));
-
-        // Wrap the exec stages in a plan executor. Takes ownership of 'ws' and 'root'.
-        scoped_ptr<PlanExecutor> exec(new PlanExecutor(ws, root, collection));
-
-        // If we need execution stats, then we should run the plan.
-        if (verbosity > Explain::QUERY_PLANNER) {
-            Runner::RunnerState state;
-            BSONObj obj;
-            while (Runner::RUNNER_ADVANCED == (state = exec->getNext(&obj, NULL)));
-
-            if (Runner::RUNNER_ERROR == state) {
-                return Status(ErrorCodes::BadValue,
-                              "Exec error: " + WorkingSetCommon::toStatusString(obj));
-            }
-        }
-
-        scoped_ptr<PlanStageStats> stats(exec->getStats());
+    void Explain::explainCountEmptyQuery(BSONObjBuilder* out) {
+        BSONObjBuilder plannerBob(out->subobjStart("queryPlanner"));
 
-        // Actually generate the explain results.
+        plannerBob.append("plannerVersion", QueryPlanner::kPlannerVersion);
 
-        if (verbosity >= Explain::QUERY_PLANNER) {
-            vector<PlanStageStats*> rejected;
-            generatePlannerInfo(rawCanonicalQuery, stats.get(), rejected, out);
-            generateServerInfo(out);
-        }
+        plannerBob.append("winningPlan", "TRIVIAL_COUNT");
 
-        if (verbosity >= Explain::EXEC_STATS) {
-            // TODO: generate executionStats section
-        }
+        // Empty array of rejected plans.
+        BSONArrayBuilder allPlansBob(plannerBob.subarrayStart("rejectedPlans"));
+        allPlansBob.doneFast();
 
-        if (verbosity >= Explain::EXEC_ALL_PLANS) {
-            // TODO: generate rejected plans execution stats
-        }
+        plannerBob.doneFast();
 
-        return Status::OK();
+        generateServerInfo(out);
     }
 
     // static
-    Status Explain::explainMultiPlan(Collection* collection,
-                                     CanonicalQuery* rawCanonicalQuery,
-                                     vector<QuerySolution*>& solutions,
-                                     Explain::Verbosity verbosity,
-                                     BSONObjBuilder* out) {
-        scoped_ptr<WorkingSet> sharedWorkingSet(new WorkingSet());
-
-        scoped_ptr<MultiPlanStage> multiPlanStage(
-            new MultiPlanStage(collection, rawCanonicalQuery));
-
-        for (size_t ix = 0; ix < solutions.size(); ++ix) {
-            // version of StageBuild::build when WorkingSet is shared
-            PlanStage* nextPlanRoot;
-            verify(StageBuilder::build(collection, *solutions[ix],
-                                       sharedWorkingSet.get(), &nextPlanRoot));
-
-            // Takes ownership of the solution and the root PlanStage, but not the working set.
-            multiPlanStage->addPlan(solutions[ix], nextPlanRoot, sharedWorkingSet.get());
+    Status Explain::explainStages(PlanExecutor* exec,
+                                  CanonicalQuery* canonicalQuery,
+                                  Explain::Verbosity verbosity,
+                                  BSONObjBuilder* out) {
+        //
+        // Step 1: run the stages as required by the verbosity level.
+        //
+
+        // Inspect the tree to see if there is a MultiPlanStage.
+        MultiPlanStage* mps = getMultiPlanStage(exec->getStages());
+
+        // The queryPlanner verbosity level requires that we know the winning plan,
+        // if there are multiple. There are multiple candidates iff we have a MultiPlanStage.
+        if (verbosity >= Explain::QUERY_PLANNER && NULL != mps) {
+            mps->pickBestPlan();
         }
 
-        // Run the plan / do the plan selection as required by the requested verbosity.
-        multiPlanStage->pickBestPlan();
-        if (Explain::EXEC_STATS == verbosity) {
-            Status execStatus = multiPlanStage->executeWinningPlan();
-            if (!execStatus.isOK()) {
-                return execStatus;
+        // The executionStats verbosity level requires that we run the winning plan
+        // until if finishes.
+        if (verbosity >= Explain::EXEC_STATS) {
+            Status s = exec->executePlan();
+            if (!s.isOK()) {
+                return s;
             }
         }
-        else if (Explain::EXEC_ALL_PLANS == verbosity) {
-            Status execStatus = multiPlanStage->executeAllPlans();
-            if (!execStatus.isOK()) {
-                return execStatus;
+
+        // The allPlansExecution verbosity level requires that we run all plans to completion,
+        // if there are multiple candidates. If 'mps' is NULL, then there was only one candidate,
+        // and we don't have to worry about gathering stats for rejected plans.
+        if (verbosity == Explain::EXEC_ALL_PLANS && NULL != mps) {
+            Status s = mps->executeAllPlans();
+            if (!s.isOK()) {
+                return s;
             }
         }
 
+        //
+        // Step 2: collect plan stats (which also give the structure of the plan tree).
+        //
+
         // Get stats for the winning plan.
-        scoped_ptr<PlanStageStats> stats(multiPlanStage->getStats());
+        scoped_ptr<PlanStageStats> winningStats(exec->getStats());
 
-        // Actually generate the explain results.
+        // Get stats for the rejected plans, if there were rehected plans.
+        vector<PlanStageStats*> rejectedStats;
+        if (NULL != mps) {
+            rejectedStats = mps->generateCandidateStats();
+        }
+
+        //
+        // Step 3: use the stats trees to produce explain BSON.
+        //
 
         if (verbosity >= Explain::QUERY_PLANNER) {
-            vector<PlanStageStats*>* rejected = multiPlanStage->generateCandidateStats();
-            generatePlannerInfo(rawCanonicalQuery, stats.get(), *rejected, out);
-            generateServerInfo(out);
+            generatePlannerInfo(canonicalQuery, winningStats.get(), rejectedStats, out);
         }
 
         if (verbosity >= Explain::EXEC_STATS) {
-            // TODO: generate executionStats section
-        }
+            BSONObjBuilder execBob(out->subobjStart("executionStats"));
+
+            // Generate exec stats BSON for the winning plan.
+            generateExecStats(winningStats.get(), &execBob);
+
+            // Also generate exec stats for each rejected plan, if the verbosity level
+            // is high enough.
+            if (verbosity >= Explain::EXEC_ALL_PLANS) {
+                BSONArrayBuilder rejectedBob(execBob.subarrayStart("rejectedPlansExecution"));
+                for (size_t i = 0; i < rejectedStats.size(); ++i) {
+                    BSONObjBuilder planBob(rejectedBob.subobjStart());
+                    generateExecStats(rejectedStats[i], &planBob);
+                    planBob.doneFast();
+                }
+                rejectedBob.doneFast();
+            }
 
-        if (verbosity >= Explain::EXEC_ALL_PLANS) {
-            // TODO: generate rejected plans execution stats
+            execBob.doneFast();
         }
 
-        return Status::OK();
-    }
-
-    // static
-    void Explain::explainEmptyColl(CanonicalQuery* rawCanonicalQuery,
-                                   BSONObjBuilder* out) {
-        BSONObjBuilder plannerBob(out->subobjStart("queryPlanner"));
-
-        plannerBob.append("plannerVersion", QueryPlanner::kPlannerVersion);
-
-        BSONObjBuilder parsedQueryBob(plannerBob.subobjStart("parsedQuery"));
-        rawCanonicalQuery->root()->toBSON(&parsedQueryBob);
-        parsedQueryBob.doneFast();
-
-        plannerBob.appendBool("emptyCollection", true);
-
-        plannerBob.append("winningPlan", "EOF");
-
-        // Empty array of rejected plans.
-        BSONArrayBuilder allPlansBob(plannerBob.subarrayStart("rejectedPlans"));
-        allPlansBob.doneFast();
-
-        plannerBob.doneFast();
-
         generateServerInfo(out);
-    }
-
-    // static
-    Status Explain::explain(Collection* collection,
-                            CanonicalQuery* rawCanonicalQuery,
-                            size_t plannerOptions,
-                            Explain::Verbosity verbosity,
-                            BSONObjBuilder* out) {
-        invariant(rawCanonicalQuery);
-        auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);
-
-        if (NULL == collection) {
-            explainEmptyColl(rawCanonicalQuery, out);
-            return Status::OK();
-        }
-
-        QueryPlannerParams plannerParams;
-        plannerParams.options = plannerOptions;
-        fillOutPlannerParams(collection, rawCanonicalQuery, &plannerParams);
 
-        vector<QuerySolution*> solutions;
-        Status status = QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions);
-        if (!status.isOK()) {
-            return Status(ErrorCodes::BadValue,
-                          "error processing explain: " + canonicalQuery->toString() +
-                          " planner returned error: " + status.reason());
-        }
-
-        // We cannot figure out how to answer the query.  Perhaps it requires an index
-        // we do not have?
-        if (0 == solutions.size()) {
-            stream ss;
-            ss << "error processing explain: " << canonicalQuery->toString()
-               << " No query solutions";
-            return Status(ErrorCodes::BadValue, ss);
-        }
-        else if (1 == solutions.size()) {
-            return explainSinglePlan(collection, rawCanonicalQuery, solutions[0], verbosity, out);
-        }
-        else {
-            return explainMultiPlan(collection, rawCanonicalQuery, solutions, verbosity, out);
-        }
+        return Status::OK();
     }
 
 } // namespace mongo
diff --git a/src/mongo/db/query/explain.h b/src/mongo/db/query/explain.h
index 675109bc294..32d144466f5 100644
--- a/src/mongo/db/query/explain.h
+++ b/src/mongo/db/query/explain.h
@@ -28,8 +28,10 @@
 
 #pragma once
 
+#include "mongo/db/exec/plan_stage.h"
 #include "mongo/db/exec/plan_stats.h"
 #include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/plan_executor.h"
 #include "mongo/db/query/query_planner_params.h"
 #include "mongo/db/query/query_solution.h"
 
@@ -76,10 +78,17 @@ namespace mongo {
          */
         static void generatePlannerInfo(CanonicalQuery* query,
                                         PlanStageStats* winnerStats,
-                                        vector<PlanStageStats*>& rejectedStats,
+                                        const vector<PlanStageStats*>& rejectedStats,
                                         BSONObjBuilder* out);
 
         /**
+         * Generates the execution stats section for the stats tree 'stats',
+         * adding the resulting BSON to 'out'.
+         */
+        static void generateExecStats(PlanStageStats* stats,
+                                      BSONObjBuilder* out);
+
+        /**
          * Adds the 'serverInfo' explain section to the BSON object being build
          * by 'out'.
          */
@@ -92,64 +101,37 @@ namespace mongo {
          * Explain info is added to 'bob' according to the verbosity level passed in
          * 'verbosity'.
          */
-        static void explainTree(const PlanStageStats& stats,
-                                Explain::Verbosity verbosity,
-                                BSONObjBuilder* bob);
+        static void explainStatsTree(const PlanStageStats& stats,
+                                     Explain::Verbosity verbosity,
+                                     BSONObjBuilder* bob);
 
         /**
-         * Add explain info to 'out' at verbosity 'verbosity' in the case that there is
-         * only one query solution available.
+         * Generate explain info for the execution plan 'exec', adding the results
+         * to the BSONObj being built by 'out'.
          *
-         * The query 'rawCanonicalQuery' has one viable query solution 'solution' in the
-         * collection 'collection'.
+         * The query part of the operation is contained in 'canonicalQuery', but
+         * 'exec' can contain any tree of execution stages. We can explain any
+         * operation that executes as stages by calling into this function.
          *
-         * May use a PlanExecutor to run the solution in order to produce exec stats.
+         * The explain information is generated according with a level of detail
+         * specified by 'verbosity'.
          */
-        static Status explainSinglePlan(Collection* collection,
-                                        CanonicalQuery* rawCanonicalQuery,
-                                        QuerySolution* solution,
-                                        Explain::Verbosity verbosity,
-                                        BSONObjBuilder* out);
+        static Status explainStages(PlanExecutor* exec,
+                                    CanonicalQuery* canonicalQuery,
+                                    Explain::Verbosity verbosity,
+                                    BSONObjBuilder* out);
 
-        /**
-         * Add explain info to 'out' at verbosity 'verbosity' in the case that there are
-         * multiple query solutions available.
-         *
-         * The query 'rawCanonicalQuery' has the corresponding query solutions in 'solutions'.
-         *
-         * Uses a MultiPlan stage to choose the best plan, and to run the winning plan or the
-         * rejected plans as required by the verbosity level.
-         */
-        static Status explainMultiPlan(Collection* collection,
-                                       CanonicalQuery* rawCanonicalQuery,
-                                       vector<QuerySolution*>& solutions,
-                                       Explain::Verbosity verbosity,
-                                       BSONObjBuilder* out);
+        //
+        // Helpers for special-case explains.
+        //
 
         /**
-         * The format of the explain output is special if the collection is empty.
-         *
-         * Assuming that the collection is empty, adds the explain info for query
-         * 'rawCanonicalQuery' to 'out'.
+         * If you have an empty query with a count, then there are no execution stages.
+         * We just get the number of records and then apply skip/limit. Since there
+         * are no stages, this requires a special explain format.
          */
-        static void explainEmptyColl(CanonicalQuery* rawCanonicalQuery,
-                                     BSONObjBuilder* out);
+        static void explainCountEmptyQuery(BSONObjBuilder* out);
 
-        /**
-         * Top-level explain entry point for a query. Plans 'rawCanonicalQuery' in collection
-         * 'collection' using the planner parameters in 'plannerOptions'.
-         *
-         * The resulting explain BSON is added to 'out'. The level of detail in the output is
-         * controlled by 'verbosity'.
-         *
-         * If necessary, run the query in order to generate execution stats (but throw out
-         * the results of the query).
-         */
-        static Status explain(Collection* collection,
-                              CanonicalQuery* rawCanonicalQuery,
-                              size_t plannerOptions,
-                              Explain::Verbosity verbosity,
-                              BSONObjBuilder* out);
     };
 
 } // namespace
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
new file mode 100644
index 00000000000..03ee948817f
--- /dev/null
+++ b/src/mongo/db/query/get_executor.cpp
@@ -0,0 +1,811 @@
+/**
+ *    Copyright (C) 2013-2014 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/get_executor.h"
+
+#include <limits>
+
+#include "mongo/base/parse_number.h"
+#include "mongo/client/dbclientinterface.h"
+#include "mongo/db/exec/cached_plan.h"
+#include "mongo/db/exec/eof.h"
+#include "mongo/db/exec/idhack.h"
+#include "mongo/db/exec/multi_plan.h"
+#include "mongo/db/exec/projection.h"
+#include "mongo/db/exec/shard_filter.h"
+#include "mongo/db/exec/subplan.h"
+#include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/explain_plan.h"
+#include "mongo/db/query/query_settings.h"
+#include "mongo/db/query/index_bounds_builder.h"
+#include "mongo/db/query/internal_plans.h"
+#include "mongo/db/query/plan_cache.h"
+#include "mongo/db/query/plan_executor.h"
+#include "mongo/db/query/planner_analysis.h"
+#include "mongo/db/query/planner_access.h"
+#include "mongo/db/query/qlog.h"
+#include "mongo/db/query/query_knobs.h"
+#include "mongo/db/query/query_planner.h"
+#include "mongo/db/query/query_planner_common.h"
+#include "mongo/db/query/stage_builder.h"
+#include "mongo/db/index_names.h"
+#include "mongo/db/server_options.h"
+#include "mongo/db/server_parameters.h"
+#include "mongo/s/d_logic.h"
+
+namespace mongo {
+
+    // static
+    void filterAllowedIndexEntries(const AllowedIndices& allowedIndices,
+                                   std::vector<IndexEntry>* indexEntries) {
+        invariant(indexEntries);
+
+        // Filter index entries
+        // Check BSON objects in AllowedIndices::_indexKeyPatterns against IndexEntry::keyPattern.
+        // Removes IndexEntrys that do not match _indexKeyPatterns.
+        std::vector<IndexEntry> temp;
+        for (std::vector<IndexEntry>::const_iterator i = indexEntries->begin();
+             i != indexEntries->end(); ++i) {
+            const IndexEntry& indexEntry = *i;
+            for (std::vector<BSONObj>::const_iterator j = allowedIndices.indexKeyPatterns.begin();
+                 j != allowedIndices.indexKeyPatterns.end(); ++j) {
+                const BSONObj& index = *j;
+                // Copy index entry to temp vector if found in query settings.
+                if (0 == indexEntry.keyPattern.woCompare(index)) {
+                    temp.push_back(indexEntry);
+                    break;
+                }
+            }
+        }
+
+        // Update results.
+        temp.swap(*indexEntries);
+    }
+
+    namespace {
+        // The body is below in the "count hack" section but getRunner calls it.
+        bool turnIxscanIntoCount(QuerySolution* soln);
+    }  // namespace
+
+
+    void fillOutPlannerParams(Collection* collection,
+                              CanonicalQuery* canonicalQuery,
+                              QueryPlannerParams* plannerParams) {
+        // If it's not NULL, we may have indices.  Access the catalog and fill out IndexEntry(s)
+        IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
+        while (ii.more()) {
+            const IndexDescriptor* desc = ii.next();
+            plannerParams->indices.push_back(IndexEntry(desc->keyPattern(),
+                                                        desc->getAccessMethodName(),
+                                                        desc->isMultikey(),
+                                                        desc->isSparse(),
+                                                        desc->indexName(),
+                                                        desc->infoObj()));
+        }
+
+        // If query supports index filters, filter params.indices by indices in query settings.
+        QuerySettings* querySettings = collection->infoCache()->getQuerySettings();
+        AllowedIndices* allowedIndicesRaw;
+
+        // Filter index catalog if index filters are specified for query.
+        // Also, signal to planner that application hint should be ignored.
+        if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) {
+            boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw);
+            filterAllowedIndexEntries(*allowedIndices, &plannerParams->indices);
+            plannerParams->indexFiltersApplied = true;
+        }
+
+        // We will not output collection scans unless there are no indexed solutions. NO_TABLE_SCAN
+        // overrides this behavior by not outputting a collscan even if there are no indexed
+        // solutions.
+        if (storageGlobalParams.noTableScan) {
+            const string& ns = canonicalQuery->ns();
+            // There are certain cases where we ignore this restriction:
+            bool ignore = canonicalQuery->getQueryObj().isEmpty()
+                          || (string::npos != ns.find(".system."))
+                          || (0 == ns.find("local."));
+            if (!ignore) {
+                plannerParams->options |= QueryPlannerParams::NO_TABLE_SCAN;
+            }
+        }
+
+        // If the caller wants a shard filter, make sure we're actually sharded.
+        if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
+            CollectionMetadataPtr collMetadata =
+                shardingState.getCollectionMetadata(canonicalQuery->ns());
+
+            if (collMetadata) {
+                plannerParams->shardKey = collMetadata->getKeyPattern();
+            }
+            else {
+                // If there's no metadata don't bother w/the shard filter since we won't know what
+                // the key pattern is anyway...
+                plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
+            }
+        }
+
+        if (internalQueryPlannerEnableIndexIntersection) {
+            plannerParams->options |= QueryPlannerParams::INDEX_INTERSECTION;
+        }
+
+        plannerParams->options |= QueryPlannerParams::KEEP_MUTATIONS;
+        plannerParams->options |= QueryPlannerParams::SPLIT_LIMITED_SORT;
+    }
+
+    Status getExecutorIDHack(Collection* collection,
+                             CanonicalQuery* query,
+                             const QueryPlannerParams& plannerParams,
+                             PlanExecutor** out) {
+        invariant(collection);
+
+        LOG(2) << "Using idhack: " << query->toStringShort();
+        WorkingSet* ws = new WorkingSet();
+        PlanStage* root = new IDHackStage(collection, query, ws);
+
+        // Might have to filter out orphaned docs.
+        if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
+            root = new ShardFilterStage(shardingState.getCollectionMetadata(collection->ns()),
+                                        ws, root);
+        }
+
+        // There might be a projection. The idhack stage will always fetch the full document,
+        // so we don't support covered projections. However, we might use the simple inclusion
+        // fast path.
+        if (NULL != query && NULL != query->getProj()) {
+            ProjectionStageParams params(WhereCallbackReal(collection->ns().db()));
+            params.projObj = query->getProj()->getProjObj();
+
+            // Stuff the right data into the params depending on what proj impl we use.
+            if (query->getProj()->requiresDocument() || query->getProj()->wantIndexKey()) {
+                params.fullExpression = query->root();
+                params.projImpl = ProjectionStageParams::NO_FAST_PATH;
+            }
+            else {
+                params.projImpl = ProjectionStageParams::SIMPLE_DOC;
+            }
+
+            root = new ProjectionStage(params, ws, root);
+        }
+
+        *out = new PlanExecutor(ws, root, collection);
+        return Status::OK();
+    }
+
+    Status getExecutor(Collection* collection,
+                      CanonicalQuery* canonicalQuery,
+                      PlanExecutor** out,
+                      size_t plannerOptions) {
+        invariant(canonicalQuery);
+
+        // This can happen as we're called by internal clients as well.
+        if (NULL == collection) {
+            const string& ns = canonicalQuery->ns();
+            LOG(2) << "Collection " << ns << " does not exist."
+                   << " Using EOF runner: " << canonicalQuery->toStringShort();
+            EOFStage* eofStage = new EOFStage();
+            WorkingSet* ws = new WorkingSet();
+            *out = new PlanExecutor(ws, eofStage, collection);
+            return Status::OK();
+        }
+
+        // Fill out the planning params.  We use these for both cached solutions and non-cached.
+        QueryPlannerParams plannerParams;
+        plannerParams.options = plannerOptions;
+        fillOutPlannerParams(collection, canonicalQuery, &plannerParams);
+
+        // If we have an _id index we can use the idhack runner.
+        if (IDHackStage::supportsQuery(*canonicalQuery) &&
+            collection->getIndexCatalog()->findIdIndex()) {
+            return getExecutorIDHack(collection, canonicalQuery, plannerParams, out);
+        }
+
+        // Tailable: If the query requests tailable the collection must be capped.
+        if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) {
+            if (!collection->isCapped()) {
+                return Status(ErrorCodes::BadValue,
+                              "error processing query: " + canonicalQuery->toString() +
+                              " tailable cursor requested on non capped collection");
+            }
+
+            // If a sort is specified it must be equal to expectedSort.
+            const BSONObj expectedSort = BSON("$natural" << 1);
+            const BSONObj& actualSort = canonicalQuery->getParsed().getSort();
+            if (!actualSort.isEmpty() && !(actualSort == expectedSort)) {
+                return Status(ErrorCodes::BadValue,
+                              "error processing query: " + canonicalQuery->toString() +
+                              " invalid sort specified for tailable cursor: "
+                              + actualSort.toString());
+            }
+        }
+
+        // Try to look up a cached solution for the query.
+
+        CachedSolution* rawCS;
+        if (PlanCache::shouldCacheQuery(*canonicalQuery) &&
+            collection->infoCache()->getPlanCache()->get(*canonicalQuery, &rawCS).isOK()) {
+            // We have a CachedSolution.  Have the planner turn it into a QuerySolution.
+            boost::scoped_ptr<CachedSolution> cs(rawCS);
+            QuerySolution *qs, *backupQs;
+            QuerySolution*& chosenSolution=qs; // either qs or backupQs
+            Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs,
+                                                        &qs, &backupQs);
+
+            if (status.isOK()) {
+                // the working set will be shared by the root and backupRoot plans
+                // and owned by the containing single-solution-runner
+                //
+                WorkingSet* sharedWs = new WorkingSet();
+
+                PlanStage *root, *backupRoot=NULL;
+                verify(StageBuilder::build(collection, *qs, sharedWs, &root));
+                if ((plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT)
+                    && turnIxscanIntoCount(qs)) {
+                    LOG(2) << "Using fast count: " << canonicalQuery->toStringShort()
+                           << ", planSummary: " << getPlanSummary(*qs);
+
+                    if (NULL != backupQs) {
+                        delete backupQs;
+                    }
+                }
+                else if (NULL != backupQs) {
+                    verify(StageBuilder::build(collection, *backupQs, sharedWs, &backupRoot));
+                }
+
+                // add a CachedPlanStage on top of the previous root
+                root = new CachedPlanStage(collection, canonicalQuery, root, backupRoot);
+
+                *out = new PlanExecutor(sharedWs, root, chosenSolution, collection);
+                return Status::OK();
+            }
+        }
+
+        if (internalQueryPlanOrChildrenIndependently
+            && SubplanStage::canUseSubplanning(*canonicalQuery)) {
+
+            QLOG() << "Running query as sub-queries: " << canonicalQuery->toStringShort();
+            LOG(2) << "Running query as sub-queries: " << canonicalQuery->toStringShort();
+
+            auto_ptr<WorkingSet> ws(new WorkingSet());
+
+            SubplanStage* subplan;
+            Status runnerStatus = SubplanStage::make(collection, ws.get(), plannerParams,
+                                                     canonicalQuery, &subplan);
+            if (!runnerStatus.isOK()) {
+                return runnerStatus;
+            }
+
+            *out = new PlanExecutor(ws.release(), subplan, collection);
+            return Status::OK();
+        }
+
+        return getExecutorAlwaysPlan(collection, canonicalQuery, plannerParams, out);
+    }
+
+    Status getExecutorAlwaysPlan(Collection* collection,
+                                 CanonicalQuery* canonicalQuery,
+                                 const QueryPlannerParams& plannerParams,
+                                 PlanExecutor** execOut) {
+        invariant(collection);
+        invariant(canonicalQuery);
+
+        *execOut = NULL;
+
+        vector<QuerySolution*> solutions;
+        Status status = QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions);
+        if (!status.isOK()) {
+            return Status(ErrorCodes::BadValue,
+                          "error processing query: " + canonicalQuery->toString() +
+                          " planner returned error: " + status.reason());
+        }
+
+        // We cannot figure out how to answer the query.  Perhaps it requires an index
+        // we do not have?
+        if (0 == solutions.size()) {
+            return Status(ErrorCodes::BadValue,
+                          str::stream()
+                          << "error processing query: "
+                          << canonicalQuery->toString()
+                          << " No query solutions");
+        }
+
+        // See if one of our solutions is a fast count hack in disguise.
+        if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) {
+            for (size_t i = 0; i < solutions.size(); ++i) {
+                if (turnIxscanIntoCount(solutions[i])) {
+                    // Great, we can use solutions[i].  Clean up the other QuerySolution(s).
+                    for (size_t j = 0; j < solutions.size(); ++j) {
+                        if (j != i) {
+                            delete solutions[j];
+                        }
+                    }
+
+                    LOG(2) << "Using fast count: " << canonicalQuery->toStringShort()
+                           << ", planSummary: " << getPlanSummary(*solutions[i]);
+
+                    // We're not going to cache anything that's fast count.
+                    WorkingSet* ws = new WorkingSet();
+                    PlanStage* root;
+                    verify(StageBuilder::build(collection, *solutions[i], ws, &root));
+
+                    *execOut = new PlanExecutor(ws, root, solutions[i], collection);
+                    return Status::OK();
+                }
+            }
+        }
+
+        if (1 == solutions.size()) {
+            LOG(2) << "Only one plan is available; it will be run but will not be cached. "
+                   << canonicalQuery->toStringShort()
+                   << ", planSummary: " << getPlanSummary(*solutions[0]);
+
+            // Only one possible plan.  Run it.  Build the stages from the solution.
+            WorkingSet* ws = new WorkingSet();
+            PlanStage* root;
+            verify(StageBuilder::build(collection, *solutions[0], ws, &root));
+
+            *execOut = new PlanExecutor(ws, root, solutions[0], collection);
+            return Status::OK();
+        }
+        else {
+            // Many solutions.  Create a MultiPlanStage to pick the best, update the cache, and so on.
+
+            // The working set will be shared by all candidate plans and owned by the containing runner
+            WorkingSet* sharedWorkingSet = new WorkingSet();
+
+            MultiPlanStage* multiPlanStage = new MultiPlanStage(collection, canonicalQuery);
+
+            for (size_t ix = 0; ix < solutions.size(); ++ix) {
+                if (solutions[ix]->cacheData.get()) {
+                    solutions[ix]->cacheData->indexFilterApplied = plannerParams.indexFiltersApplied;
+                }
+
+                // version of StageBuild::build when WorkingSet is shared
+                PlanStage* nextPlanRoot;
+                verify(StageBuilder::build(collection, *solutions[ix],
+                                           sharedWorkingSet, &nextPlanRoot));
+
+                // Owns none of the arguments
+                multiPlanStage->addPlan(solutions[ix], nextPlanRoot, sharedWorkingSet);
+            }
+
+            PlanExecutor* exec = new PlanExecutor(sharedWorkingSet, multiPlanStage, collection);
+
+            *execOut = exec;
+            return Status::OK();
+        }
+    }
+
+    //
+    // Count hack
+    //
+
+    namespace {
+
+        /**
+         * Returns 'true' if the provided solution 'soln' can be rewritten to use
+         * a fast counting stage.  Mutates the tree in 'soln->root'.
+         *
+         * Otherwise, returns 'false'.
+         */
+        bool turnIxscanIntoCount(QuerySolution* soln) {
+            QuerySolutionNode* root = soln->root.get();
+
+            // Root should be a fetch w/o any filters.
+            if (STAGE_FETCH != root->getType()) {
+                return false;
+            }
+
+            if (NULL != root->filter.get()) {
+                return false;
+            }
+
+            // Child should be an ixscan.
+            if (STAGE_IXSCAN != root->children[0]->getType()) {
+                return false;
+            }
+
+            IndexScanNode* isn = static_cast<IndexScanNode*>(root->children[0]);
+
+            // No filters allowed and side-stepping isSimpleRange for now.  TODO: do we ever see
+            // isSimpleRange here?  because we could well use it.  I just don't think we ever do see
+            // it.
+
+            if (NULL != isn->filter.get() || isn->bounds.isSimpleRange) {
+                return false;
+            }
+
+            // Make sure the bounds are OK.
+            BSONObj startKey;
+            bool startKeyInclusive;
+            BSONObj endKey;
+            bool endKeyInclusive;
+
+            if (!IndexBoundsBuilder::isSingleInterval( isn->bounds,
+                                                       &startKey,
+                                                       &startKeyInclusive,
+                                                       &endKey,
+                                                       &endKeyInclusive )) {
+                return false;
+            }
+
+            // Make the count node that we replace the fetch + ixscan with.
+            CountNode* cn = new CountNode();
+            cn->indexKeyPattern = isn->indexKeyPattern;
+            cn->startKey = startKey;
+            cn->startKeyInclusive = startKeyInclusive;
+            cn->endKey = endKey;
+            cn->endKeyInclusive = endKeyInclusive;
+            // Takes ownership of 'cn' and deletes the old root.
+            soln->root.reset(cn);
+            return true;
+        }
+
+        /**
+         * Returns true if indices contains an index that can be
+         * used with DistinctNode. Sets indexOut to the array index
+         * of PlannerParams::indices.
+         * Look for the index for the fewest fields.
+         * Criteria for suitable index is that the index cannot be special
+         * (geo, hashed, text, ...).
+         *
+         * Multikey indices are not suitable for DistinctNode when the projection
+         * is on an array element. Arrays are flattened in a multikey index which
+         * makes it impossible for the distinct scan stage (plan stage generated from
+         * DistinctNode) to select the requested element by array index.
+         *
+         * Multikey indices cannot be used for the fast distinct hack if the field is dotted.
+         * Currently the solution generated for the distinct hack includes a projection stage and
+         * the projection stage cannot be covered with a dotted field.
+         */
+        bool getDistinctNodeIndex(const std::vector<IndexEntry>& indices,
+                                  const std::string& field, size_t* indexOut) {
+            invariant(indexOut);
+            bool isDottedField = str::contains(field, '.');
+            int minFields = std::numeric_limits<int>::max();
+            for (size_t i = 0; i < indices.size(); ++i) {
+                // Skip special indices.
+                if (!IndexNames::findPluginName(indices[i].keyPattern).empty()) {
+                    continue;
+                }
+                // Skip multikey indices if we are projecting on a dotted field.
+                if (indices[i].multikey && isDottedField) {
+                    continue;
+                }
+                int nFields = indices[i].keyPattern.nFields();
+                // Pick the index with the lowest number of fields.
+                if (nFields < minFields) {
+                    minFields = nFields;
+                    *indexOut = i;
+                }
+            }
+            return minFields != std::numeric_limits<int>::max();
+        }
+
+        /**
+         * Checks dotted field for a projection and truncates the
+         * field name if we could be projecting on an array element.
+         * Sets 'isIDOut' to true if the projection is on a sub document of _id.
+         * For example, _id.a.2, _id.b.c.
+         */
+        std::string getProjectedDottedField(const std::string& field, bool* isIDOut) {
+            // Check if field contains an array index.
+            std::vector<std::string> res;
+            mongo::splitStringDelim(field, &res, '.');
+
+            // Since we could exit early from the loop,
+            // we should check _id here and set '*isIDOut' accordingly.
+            *isIDOut = ("_id" == res[0]);
+
+            // Skip the first dotted component. If the field starts
+            // with a number, the number cannot be an array index.
+            int arrayIndex = 0;
+            for (size_t i = 1; i < res.size(); ++i) {
+                if (mongo::parseNumberFromStringWithBase(res[i], 10, &arrayIndex).isOK()) {
+                    // Array indices cannot be negative numbers (this is not $slice).
+                    // Negative numbers are allowed as field names.
+                    if (arrayIndex >= 0) {
+                        // Generate prefix of field up to (but not including) array index.
+                        std::vector<std::string> prefixStrings(res);
+                        prefixStrings.resize(i);
+                        // Reset projectedField. Instead of overwriting, joinStringDelim() appends joined string
+                        // to the end of projectedField.
+                        std::string projectedField;
+                        mongo::joinStringDelim(prefixStrings, &projectedField, '.');
+                        return projectedField;
+                    }
+                }
+            }
+
+            return field;
+        }
+
+        /**
+         * Creates a projection spec for a distinct command from the requested field.
+         * In most cases, the projection spec will be {_id: 0, key: 1}.
+         * The exceptions are:
+         * 1) When the requested field is '_id', the projection spec will {_id: 1}.
+         * 2) When the requested field could be an array element (eg. a.0),
+         *    the projected field will be the prefix of the field up to the array element.
+         *    For example, a.b.2 => {_id: 0, 'a.b': 1}
+         *    Note that we can't use a $slice projection because the distinct command filters
+         *    the results from the runner using the dotted field name. Using $slice will
+         *    re-order the documents in the array in the results.
+         */
+        BSONObj getDistinctProjection(const std::string& field) {
+            std::string projectedField(field);
+
+            bool isID = false;
+            if ("_id" == field) {
+                isID = true;
+            }
+            else if (str::contains(field, '.')) {
+                projectedField = getProjectedDottedField(field, &isID);
+            }
+            BSONObjBuilder bob;
+            if (!isID) {
+                bob.append("_id", 0);
+            }
+            bob.append(projectedField, 1);
+            return bob.obj();
+        }
+
+    }  // namespace
+
+    Status getExecutorCount(Collection* collection,
+                            const BSONObj& query,
+                            const BSONObj& hintObj,
+                            PlanExecutor** execOut) {
+        invariant(collection);
+
+        const WhereCallbackReal whereCallback(collection->ns().db());
+
+        CanonicalQuery* cq;
+        uassertStatusOK(CanonicalQuery::canonicalize(collection->ns().ns(),
+                                                     query,
+                                                     BSONObj(),
+                                                     BSONObj(),
+                                                     0,
+                                                     0,
+                                                     hintObj,
+                                                     &cq,
+                                                     whereCallback));
+
+        scoped_ptr<CanonicalQuery> cleanupCq(cq);
+
+        return getExecutor(collection, cq, execOut, QueryPlannerParams::PRIVATE_IS_COUNT);
+    }
+
+    //
+    // Distinct hack
+    //
+
+    bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, const string& field) {
+        QuerySolutionNode* root = soln->root.get();
+
+        // We're looking for a project on top of an ixscan.
+        if (STAGE_PROJECTION == root->getType() && (STAGE_IXSCAN == root->children[0]->getType())) {
+            IndexScanNode* isn = static_cast<IndexScanNode*>(root->children[0]);
+
+            // An additional filter must be applied to the data in the key, so we can't just skip
+            // all the keys with a given value; we must examine every one to find the one that (may)
+            // pass the filter.
+            if (NULL != isn->filter.get()) {
+                return false;
+            }
+
+            // We only set this when we have special query modifiers (.max() or .min()) or other
+            // special cases.  Don't want to handle the interactions between those and distinct.
+            // Don't think this will ever really be true but if it somehow is, just ignore this
+            // soln.
+            if (isn->bounds.isSimpleRange) {
+                return false;
+            }
+
+            // Make a new DistinctNode.  We swap this for the ixscan in the provided solution.
+            DistinctNode* dn = new DistinctNode();
+            dn->indexKeyPattern = isn->indexKeyPattern;
+            dn->direction = isn->direction;
+            dn->bounds = isn->bounds;
+
+            // Figure out which field we're skipping to the next value of.  TODO: We currently only
+            // try to distinct-hack when there is an index prefixed by the field we're distinct-ing
+            // over.  Consider removing this code if we stick with that policy.
+            dn->fieldNo = 0;
+            BSONObjIterator it(isn->indexKeyPattern);
+            while (it.more()) {
+                if (field == it.next().fieldName()) {
+                    break;
+                }
+                dn->fieldNo++;
+            }
+
+            // Delete the old index scan, set the child of project to the fast distinct scan.
+            delete root->children[0];
+            root->children[0] = dn;
+            return true;
+        }
+
+        return false;
+    }
+
+    Status getExecutorDistinct(Collection* collection,
+                               const BSONObj& query,
+                               const std::string& field,
+                               PlanExecutor** out) {
+        // This should'a been checked by the distinct command.
+        invariant(collection);
+
+        // TODO: check for idhack here?
+
+        // When can we do a fast distinct hack?
+        // 1. There is a plan with just one leaf and that leaf is an ixscan.
+        // 2. The ixscan indexes the field we're interested in.
+        // 2a: We are correct if the index contains the field but for now we look for prefix.
+        // 3. The query is covered/no fetch.
+        //
+        // We go through normal planning (with limited parameters) to see if we can produce
+        // a soln with the above properties.
+
+        QueryPlannerParams plannerParams;
+        plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN;
+
+        IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
+        while (ii.more()) {
+            const IndexDescriptor* desc = ii.next();
+            // The distinct hack can work if any field is in the index but it's not always clear
+            // if it's a win unless it's the first field.
+            if (desc->keyPattern().firstElement().fieldName() == field) {
+                plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
+                                                           desc->getAccessMethodName(),
+                                                           desc->isMultikey(),
+                                                           desc->isSparse(),
+                                                           desc->indexName(),
+                                                           desc->infoObj()));
+            }
+        }
+
+        const WhereCallbackReal whereCallback(collection->ns().db());
+
+        // If there are no suitable indices for the distinct hack bail out now into regular planning
+        // with no projection.
+        if (plannerParams.indices.empty()) {
+            CanonicalQuery* cq;
+            Status status = CanonicalQuery::canonicalize(
+                                collection->ns().ns(), query, &cq, whereCallback);
+            if (!status.isOK()) {
+                return status;
+            }
+
+            scoped_ptr<CanonicalQuery> cleanupCq(cq);
+
+            // Does not take ownership of its args.
+            return getExecutor(collection, cq, out);
+        }
+
+        //
+        // If we're here, we have an index prefixed by the field we're distinct-ing over.
+        //
+
+        // Applying a projection allows the planner to try to give us covered plans that we can turn
+        // into the projection hack.  getDistinctProjection deals with .find() projection semantics
+        // (ie _id:1 being implied by default).
+        BSONObj projection = getDistinctProjection(field);
+
+        // Apply a projection of the key.  Empty BSONObj() is for the sort.
+        CanonicalQuery* cq;
+        Status status = CanonicalQuery::canonicalize(collection->ns().ns(),
+                                                     query,
+                                                     BSONObj(),
+                                                     projection,
+                                                     &cq,
+                                                     whereCallback);
+        if (!status.isOK()) {
+            return status;
+        }
+
+        scoped_ptr<CanonicalQuery> cleanupCq(cq);
+
+        // If there's no query, we can just distinct-scan one of the indices.
+        // Not every index in plannerParams.indices may be suitable. Refer to
+        // getDistinctNodeIndex().
+        size_t distinctNodeIndex = 0;
+        if (query.isEmpty() &&
+            getDistinctNodeIndex(plannerParams.indices, field, &distinctNodeIndex)) {
+            DistinctNode* dn = new DistinctNode();
+            dn->indexKeyPattern = plannerParams.indices[distinctNodeIndex].keyPattern;
+            dn->direction = 1;
+            IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds);
+            dn->fieldNo = 0;
+
+            QueryPlannerParams params;
+
+            // Takes ownership of 'dn'.
+            QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn);
+            invariant(soln);
+
+            LOG(2) << "Using fast distinct: " << cq->toStringShort()
+                   << ", planSummary: " << getPlanSummary(*soln);
+
+            WorkingSet* ws = new WorkingSet();
+            PlanStage* root;
+            verify(StageBuilder::build(collection, *soln, ws, &root));
+            // Takes ownership of 'ws', 'root', and 'soln'.
+            *out = new PlanExecutor(ws, root, soln, collection);
+            return Status::OK();
+        }
+
+        // See if we can answer the query in a fast-distinct compatible fashion.
+        vector<QuerySolution*> solutions;
+        status = QueryPlanner::plan(*cq, plannerParams, &solutions);
+        if (!status.isOK()) {
+            return getExecutor(collection, cq, out);
+        }
+
+        // We look for a solution that has an ixscan we can turn into a distinctixscan
+        for (size_t i = 0; i < solutions.size(); ++i) {
+            if (turnIxscanIntoDistinctIxscan(solutions[i], field)) {
+                // Great, we can use solutions[i].  Clean up the other QuerySolution(s).
+                for (size_t j = 0; j < solutions.size(); ++j) {
+                    if (j != i) {
+                        delete solutions[j];
+                    }
+                }
+
+                LOG(2) << "Using fast distinct: " << cq->toStringShort()
+                       << ", planSummary: " << getPlanSummary(*solutions[i]);
+
+                // Build and return the SSR over solutions[i].
+                WorkingSet* ws = new WorkingSet();
+                PlanStage* root;
+                verify(StageBuilder::build(collection, *solutions[i], ws, &root));
+                // Takes ownership of 'ws', 'root', and 'solutions[i]'.
+                *out = new PlanExecutor(ws, root, solutions[i], collection);
+                return Status::OK();
+            }
+        }
+
+        // If we're here, the planner made a soln with the restricted index set but we couldn't
+        // translate any of them into a distinct-compatible soln.  So, delete the solutions and just
+        // go through normal planning.
+        for (size_t i = 0; i < solutions.size(); ++i) {
+            delete solutions[i];
+        }
+
+        // We drop the projection from the 'cq'.  Unfortunately this is not trivial.
+        status = CanonicalQuery::canonicalize(collection->ns().ns(), query, &cq, whereCallback);
+        if (!status.isOK()) {
+            return status;
+        }
+
+        cleanupCq.reset(cq);
+
+        // Does not take ownership.
+        return getExecutor(collection, cq, out);
+    }
+
+}  // namespace mongo
diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h
new file mode 100644
index 00000000000..fc78e302709
--- /dev/null
+++ b/src/mongo/db/query/get_executor.h
@@ -0,0 +1,127 @@
+/**
+ *    Copyright (C) 2013 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/plan_executor.h"
+#include "mongo/db/query/query_planner_params.h"
+#include "mongo/db/query/query_settings.h"
+#include "mongo/db/query/query_solution.h"
+
+namespace mongo {
+
+    class Collection;
+
+    /**
+     * Filter indexes retrieved from index catalog by
+     * allowed indices in query settings.
+     * Used by getRunner().
+     * This function is public to facilitate testing.
+     */
+    void filterAllowedIndexEntries(const AllowedIndices& allowedIndices,
+                                   std::vector<IndexEntry>* indexEntries);
+
+    /**
+     * Fill out the provided 'plannerParams' for the 'canonicalQuery' operating on the collection
+     * 'collection'.  Exposed for testing.
+     */
+    void fillOutPlannerParams(Collection* collection,
+                              CanonicalQuery* canonicalQuery,
+                              QueryPlannerParams* plannerParams);
+
+    /**
+     * Get a plan executor for a query. Does not take ownership of canonicalQuery.
+     *
+     * If the query is valid and an executor could be created, returns Status::OK()
+     * and populates *out with the PlanExecutor.
+     *
+     * If the query cannot be executed, returns a Status indicating why.  Deletes
+     * rawCanonicalQuery.
+     */
+    Status getExecutor(Collection* collection,
+                       CanonicalQuery* canonicalQuery,
+                       PlanExecutor** out,
+                       size_t plannerOptions = 0);
+
+    /**
+     * Get a plan executor for a simple id query. The executor will wrap an execution
+     * tree whose root stage is the idhack stage.
+     *
+     * Does not take ownership of 'query'.
+     */
+    Status getExecutorIDHack(Collection* collection,
+                             CanonicalQuery* query,
+                             const QueryPlannerParams& plannerParams,
+                             PlanExecutor** out);
+
+    /**
+     * If possible, turn the provided QuerySolution into a QuerySolution that uses a DistinctNode
+     * to provide results for the distinct command.
+     *
+     * If the provided solution could be mutated successfully, returns true, otherwise returns
+     * false.
+     */
+    bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, const string& field);
+
+    /*
+     * Get an executor for a query executing as part of a distinct command.
+     *
+     * Distinct is unique in that it doesn't care about getting all the results; it just wants all
+     * possible values of a certain field.  As such, we can skip lots of data in certain cases (see
+     * body of method for detail).
+     */
+    Status getExecutorDistinct(Collection* collection,
+                               const BSONObj& query,
+                               const std::string& field,
+                               PlanExecutor** out);
+
+    /*
+     * Get a PlanExecutor for a query executing as part of a count command.
+     *
+     * Count doesn't care about actually examining its results; it just wants to walk through them.
+     * As such, with certain covered queries, we can skip the overhead of fetching etc. when
+     * executing a count.
+     */
+    Status getExecutorCount(Collection* collection,
+                            const BSONObj& query,
+                            const BSONObj& hintObj,
+                            PlanExecutor** execOut);
+
+    /**
+     * Get a plan executor for a query. Ignores the cache and always plans the full query.
+     *
+     * Does not take ownership of its arguments.
+     *
+     * Returns the resulting executor through 'execOut'. The caller must delete 'execOut',
+     * if an OK status is returned.
+     */
+    Status getExecutorAlwaysPlan(Collection* collection,
+                                 CanonicalQuery* canonicalQuery,
+                                 const QueryPlannerParams& plannerParams,
+                                 PlanExecutor** execOut);
+
+}  // namespace mongo
diff --git a/src/mongo/db/query/get_runner.cpp b/src/mongo/db/query/get_runner.cpp
index b95c431caf0..b6cc5bf0710 100644
--- a/src/mongo/db/query/get_runner.cpp
+++ b/src/mongo/db/query/get_runner.cpp
@@ -26,6 +26,8 @@
  *    it in the license file.
  */
 
+// THIS FILE IS DEPRECATED -- replaced by get_executor.cpp
+
 #include "mongo/db/query/get_runner.h"
 
 #include <limits>
@@ -33,10 +35,14 @@
 #include "mongo/base/parse_number.h"
 #include "mongo/client/dbclientinterface.h"
 #include "mongo/db/exec/cached_plan.h"
+#include "mongo/db/exec/eof.h"
+#include "mongo/db/exec/idhack.h"
 #include "mongo/db/exec/multi_plan.h"
+#include "mongo/db/exec/subplan.h"
 #include "mongo/db/query/canonical_query.h"
 #include "mongo/db/query/eof_runner.h"
 #include "mongo/db/query/explain_plan.h"
+#include "mongo/db/query/get_executor.h"
 #include "mongo/db/query/query_settings.h"
 #include "mongo/db/query/idhack_runner.h"
 #include "mongo/db/query/index_bounds_builder.h"
@@ -58,33 +64,6 @@
 
 namespace mongo {
 
-    // static
-    void filterAllowedIndexEntries(const AllowedIndices& allowedIndices,
-                                   std::vector<IndexEntry>* indexEntries) {
-        invariant(indexEntries);
-
-        // Filter index entries
-        // Check BSON objects in AllowedIndices::_indexKeyPatterns against IndexEntry::keyPattern.
-        // Removes IndexEntrys that do not match _indexKeyPatterns.
-        std::vector<IndexEntry> temp;
-        for (std::vector<IndexEntry>::const_iterator i = indexEntries->begin();
-             i != indexEntries->end(); ++i) {
-            const IndexEntry& indexEntry = *i;
-            for (std::vector<BSONObj>::const_iterator j = allowedIndices.indexKeyPatterns.begin();
-                 j != allowedIndices.indexKeyPatterns.end(); ++j) {
-                const BSONObj& index = *j;
-                // Copy index entry to temp vector if found in query settings.
-                if (0 == indexEntry.keyPattern.woCompare(index)) {
-                    temp.push_back(indexEntry);
-                    break;
-                }
-            }
-        }
-
-        // Update results.
-        temp.swap(*indexEntries);
-    }
-
     Status getRunner(Collection* collection,
                      const std::string& ns,
                      const BSONObj& unparsedQuery,
@@ -124,70 +103,6 @@ namespace mongo {
     }  // namespace
 
 
-    void fillOutPlannerParams(Collection* collection,
-                              CanonicalQuery* canonicalQuery,
-                              QueryPlannerParams* plannerParams) {
-        // If it's not NULL, we may have indices.  Access the catalog and fill out IndexEntry(s)
-        IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
-        while (ii.more()) {
-            const IndexDescriptor* desc = ii.next();
-            plannerParams->indices.push_back(IndexEntry(desc->keyPattern(),
-                                                        desc->getAccessMethodName(),
-                                                        desc->isMultikey(),
-                                                        desc->isSparse(),
-                                                        desc->indexName(),
-                                                        desc->infoObj()));
-        }
-
-        // If query supports index filters, filter params.indices by indices in query settings.
-        QuerySettings* querySettings = collection->infoCache()->getQuerySettings();
-        AllowedIndices* allowedIndicesRaw;
-
-        // Filter index catalog if index filters are specified for query.
-        // Also, signal to planner that application hint should be ignored.
-        if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) {
-            boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw);
-            filterAllowedIndexEntries(*allowedIndices, &plannerParams->indices);
-            plannerParams->indexFiltersApplied = true;
-        }
-
-        // We will not output collection scans unless there are no indexed solutions. NO_TABLE_SCAN
-        // overrides this behavior by not outputting a collscan even if there are no indexed
-        // solutions.
-        if (storageGlobalParams.noTableScan) {
-            const string& ns = canonicalQuery->ns();
-            // There are certain cases where we ignore this restriction:
-            bool ignore = canonicalQuery->getQueryObj().isEmpty()
-                          || (string::npos != ns.find(".system."))
-                          || (0 == ns.find("local."));
-            if (!ignore) {
-                plannerParams->options |= QueryPlannerParams::NO_TABLE_SCAN;
-            }
-        }
-
-        // If the caller wants a shard filter, make sure we're actually sharded.
-        if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
-            CollectionMetadataPtr collMetadata =
-                shardingState.getCollectionMetadata(canonicalQuery->ns());
-
-            if (collMetadata) {
-                plannerParams->shardKey = collMetadata->getKeyPattern();
-            }
-            else {
-                // If there's no metadata don't bother w/the shard filter since we won't know what
-                // the key pattern is anyway...
-                plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
-            }
-        }
-
-        if (internalQueryPlannerEnableIndexIntersection) {
-            plannerParams->options |= QueryPlannerParams::INDEX_INTERSECTION;
-        }
-
-        plannerParams->options |= QueryPlannerParams::KEEP_MUTATIONS;
-        plannerParams->options |= QueryPlannerParams::SPLIT_LIMITED_SORT;
-    }
-
     /**
      * For a given query, get a runner.
      */
@@ -209,7 +124,7 @@ namespace mongo {
         }
 
         // If we have an _id index we can use the idhack runner.
-        if (IDHackRunner::supportsQuery(*canonicalQuery) &&
+        if (IDHackStage::supportsQuery(*canonicalQuery) &&
             collection->getIndexCatalog()->findIdIndex()) {
             LOG(2) << "Using idhack: " << canonicalQuery->toStringShort();
             *out = new IDHackRunner(collection, canonicalQuery.release());
@@ -275,7 +190,7 @@ namespace mongo {
 
                 // add a CachedPlanStage on top of the previous root
                 root = new CachedPlanStage(collection, rawCanonicalQuery, root, backupRoot);
-                
+
                 *out = new SingleSolutionRunner(collection,
                                                 canonicalQuery.release(),
                                                 chosenSolution, root, sharedWs);
@@ -307,7 +222,6 @@ namespace mongo {
                                CanonicalQuery* rawCanonicalQuery,
                                const QueryPlannerParams& plannerParams,
                                Runner** out) {
-
         invariant(collection);
         invariant(rawCanonicalQuery);
         auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);
@@ -613,62 +527,6 @@ namespace mongo {
     // Distinct hack
     //
 
-    /**
-     * If possible, turn the provided QuerySolution into a QuerySolution that uses a DistinctNode
-     * to provide results for the distinct command.
-     *
-     * If the provided solution could be mutated successfully, returns true, otherwise returns
-     * false.
-     */
-    bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, const string& field) {
-        QuerySolutionNode* root = soln->root.get();
-
-        // We're looking for a project on top of an ixscan.
-        if (STAGE_PROJECTION == root->getType() && (STAGE_IXSCAN == root->children[0]->getType())) {
-            IndexScanNode* isn = static_cast<IndexScanNode*>(root->children[0]);
-
-            // An additional filter must be applied to the data in the key, so we can't just skip
-            // all the keys with a given value; we must examine every one to find the one that (may)
-            // pass the filter.
-            if (NULL != isn->filter.get()) {
-                return false;
-            }
-
-            // We only set this when we have special query modifiers (.max() or .min()) or other
-            // special cases.  Don't want to handle the interactions between those and distinct.
-            // Don't think this will ever really be true but if it somehow is, just ignore this
-            // soln.
-            if (isn->bounds.isSimpleRange) {
-                return false;
-            }
-
-            // Make a new DistinctNode.  We swap this for the ixscan in the provided solution.
-            DistinctNode* dn = new DistinctNode();
-            dn->indexKeyPattern = isn->indexKeyPattern;
-            dn->direction = isn->direction;
-            dn->bounds = isn->bounds;
-
-            // Figure out which field we're skipping to the next value of.  TODO: We currently only
-            // try to distinct-hack when there is an index prefixed by the field we're distinct-ing
-            // over.  Consider removing this code if we stick with that policy.
-            dn->fieldNo = 0;
-            BSONObjIterator it(isn->indexKeyPattern);
-            while (it.more()) {
-                if (field == it.next().fieldName()) {
-                    break;
-                }
-                dn->fieldNo++;
-            }
-
-            // Delete the old index scan, set the child of project to the fast distinct scan.
-            delete root->children[0];
-            root->children[0] = dn;
-            return true;
-        }
-
-        return false;
-    }
-
     Status getRunnerDistinct(Collection* collection,
                              const BSONObj& query,
                              const string& field,
diff --git a/src/mongo/db/query/get_runner.h b/src/mongo/db/query/get_runner.h
index 9360478e278..eabc78d3bdd 100644
--- a/src/mongo/db/query/get_runner.h
+++ b/src/mongo/db/query/get_runner.h
@@ -26,9 +26,12 @@
  *    it in the license file.
  */
 
+// THIS FILE IS DEPRECATED -- replaced by get_executor.h
+
 #include "mongo/db/query/canonical_query.h"
 #include "mongo/db/query/query_planner_params.h"
 #include "mongo/db/query/query_settings.h"
+#include "mongo/db/query/query_solution.h"
 #include "mongo/db/query/runner.h"
 
 namespace mongo {
@@ -36,23 +39,6 @@ namespace mongo {
     class Collection;
 
     /**
-     * Filter indexes retrieved from index catalog by
-     * allowed indices in query settings.
-     * Used by getRunner().
-     * This function is public to facilitate testing.
-     */
-    void filterAllowedIndexEntries(const AllowedIndices& allowedIndices,
-                                   std::vector<IndexEntry>* indexEntries);
-
-    /**
-     * Fill out the provided 'plannerParams' for the 'canonicalQuery' operating on the collection
-     * 'collection'.  Exposed for testing.
-     */
-    void fillOutPlannerParams(Collection* collection,
-                              CanonicalQuery* canonicalQuery,
-                              QueryPlannerParams* plannerParams);
-
-    /**
      * Get a runner for a query.  Takes ownership of rawCanonicalQuery.
      *
      * If the query is valid and a runner could be created, returns Status::OK()
@@ -83,7 +69,6 @@ namespace mongo {
                      Runner** outRunner,
                      CanonicalQuery** outCanonicalQuery,
                      size_t plannerOptions = 0);
-
     /*
      * Get a runner for a query executing as part of a distinct command.
      *
@@ -95,6 +80,7 @@ namespace mongo {
                              const BSONObj& query,
                              const std::string& field,
                              Runner** out);
+
     /*
      * Get a runner for a query executing as part of a count command.
      *
diff --git a/src/mongo/db/query/get_runner_test.cpp b/src/mongo/db/query/get_runner_test.cpp
index ce646770623..15b9a618528 100644
--- a/src/mongo/db/query/get_runner_test.cpp
+++ b/src/mongo/db/query/get_runner_test.cpp
@@ -30,6 +30,7 @@
  * This file contains tests for mongo/db/query/get_runner.h
  */
 
+#include "mongo/db/query/get_executor.h"
 #include "mongo/db/query/get_runner.h"
 
 #include "mongo/db/json.h"
diff --git a/src/mongo/db/query/idhack_runner.cpp b/src/mongo/db/query/idhack_runner.cpp
index a8ec508e7e1..b9c0da6f82b 100644
--- a/src/mongo/db/query/idhack_runner.cpp
+++ b/src/mongo/db/query/idhack_runner.cpp
@@ -241,15 +241,6 @@ namespace mongo {
     }
 
     // static
-    bool IDHackRunner::supportsQuery(const CanonicalQuery& query) {
-        return !query.getParsed().showDiskLoc()
-            && query.getParsed().getHint().isEmpty()
-            && 0 == query.getParsed().getSkip()
-            && CanonicalQuery::isSimpleIdQuery(query.getParsed().getFilter())
-            && !query.getParsed().hasOption(QueryOption_CursorTailable);
-    }
-
-    // static
     bool IDHackRunner::hasCoveredProjection() const {
         // Some update operations use the IDHackRunner without creating a
         // canonical query. In this case, _query will be NULL. Just return
diff --git a/src/mongo/db/query/idhack_runner.h b/src/mongo/db/query/idhack_runner.h
index 89a6df33f55..23e2d02691c 100644
--- a/src/mongo/db/query/idhack_runner.h
+++ b/src/mongo/db/query/idhack_runner.h
@@ -76,11 +76,6 @@ namespace mongo {
         virtual Status getInfo(TypeExplain** explain,
                                PlanInfo** planInfo) const;
 
-        /**
-         * ID Hack has a very strict criteria for the queries it supports.
-         */
-        static bool supportsQuery(const CanonicalQuery& query);
-
     private:
         /**
          * ID Hack queries are only covered with the projection {_id: 1}.
diff --git a/src/mongo/db/query/index_entry.h b/src/mongo/db/query/index_entry.h
index f69ca28fc5f..5cb25c0aac4 100644
--- a/src/mongo/db/query/index_entry.h
+++ b/src/mongo/db/query/index_entry.h
@@ -32,6 +32,7 @@
 
 #include "mongo/db/index_names.h"
 #include "mongo/db/jsobj.h"
+#include "mongo/util/mongoutils/str.h"
 
 namespace mongo {
 
diff --git a/src/mongo/db/query/new_find.cpp b/src/mongo/db/query/new_find.cpp
index d1c81173d2e..76b1c083752 100644
--- a/src/mongo/db/query/new_find.cpp
+++ b/src/mongo/db/query/new_find.cpp
@@ -37,6 +37,7 @@
 #include "mongo/db/keypattern.h"
 #include "mongo/db/query/explain.h"
 #include "mongo/db/query/find_constants.h"
+#include "mongo/db/query/get_executor.h"
 #include "mongo/db/query/get_runner.h"
 #include "mongo/db/query/internal_plans.h"
 #include "mongo/db/query/qlog.h"
@@ -489,6 +490,8 @@ namespace mongo {
         //
         // TODO temporary until find() becomes a real command.
         if (isExplain && enableNewExplain) {
+            scoped_ptr<CanonicalQuery> safeCq(cq);
+
             size_t options = QueryPlannerParams::DEFAULT;
             if (shardingState.needCollectionMetadata(pq.ns())) {
                 options |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
@@ -497,11 +500,18 @@ namespace mongo {
             BufBuilder bb;
             bb.skip(sizeof(QueryResult));
 
+            PlanExecutor* rawExec;
+            Status execStatus = getExecutor(collection, cq, &rawExec, options);
+            if (!execStatus.isOK()) {
+                uasserted(17510, "Explain error: " + execStatus.reason());
+            }
+
+            scoped_ptr<PlanExecutor> exec(rawExec);
             BSONObjBuilder explainBob;
-            Status explainStatus = Explain::explain(collection, cq, options,
-                                                    Explain::QUERY_PLANNER, &explainBob);
+            Status explainStatus = Explain::explainStages(exec.get(), cq, Explain::EXEC_ALL_PLANS,
+                                                          &explainBob);
             if (!explainStatus.isOK()) {
-                uasserted(17510, "Explain error: " + explainStatus.reason());
+                uasserted(18521, "Explain error: " + explainStatus.reason());
             }
 
             // Add the resulting object to the return buffer.
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp
index f35017e4c54..235260010a6 100644
--- a/src/mongo/db/query/plan_executor.cpp
+++ b/src/mongo/db/query/plan_executor.cpp
@@ -36,7 +36,19 @@
 namespace mongo {
 
     PlanExecutor::PlanExecutor(WorkingSet* ws, PlanStage* rt, const Collection* collection)
-        : _collection(collection), _workingSet(ws), _root(rt), _killed(false) {}
+        : _collection(collection),
+          _workingSet(ws),
+          _root(rt),
+          _qs(NULL),
+          _killed(false) { }
+
+    PlanExecutor::PlanExecutor(WorkingSet* ws, PlanStage* rt, QuerySolution* qs,
+                               const Collection* collection)
+        : _collection(collection),
+          _workingSet(ws),
+          _root(rt),
+          _qs(qs),
+          _killed(false) { }
 
     PlanExecutor::~PlanExecutor() { }
 
@@ -143,4 +155,29 @@ namespace mongo {
         _killed = true;
     }
 
+    PlanStage* PlanExecutor::releaseStages() {
+        return _root.release();
+    }
+
+    PlanStage* PlanExecutor::getStages() {
+        return _root.get();
+    }
+
+    Status PlanExecutor::executePlan() {
+        WorkingSetID id = WorkingSet::INVALID_ID;
+        PlanStage::StageState code = PlanStage::NEED_TIME;
+        while (PlanStage::NEED_TIME == code || PlanStage::ADVANCED == code) {
+            code = _root->work(&id);
+        }
+
+        if (PlanStage::FAILURE == code) {
+            BSONObj obj;
+            WorkingSetCommon::getStatusMemberObject(*_workingSet, id, &obj);
+            return Status(ErrorCodes::BadValue,
+                          "Exec error: " + WorkingSetCommon::toStatusString(obj));
+        }
+
+        return Status::OK();
+    }
+
 } // namespace mongo
diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h
index 4c8c6fd97ad..c032ea77e4e 100644
--- a/src/mongo/db/query/plan_executor.h
+++ b/src/mongo/db/query/plan_executor.h
@@ -32,6 +32,7 @@
 
 #include "mongo/base/status.h"
 #include "mongo/db/query/runner.h"
+#include "mongo/db/query/query_solution.h"
 
 namespace mongo {
 
@@ -52,6 +53,8 @@ namespace mongo {
     class PlanExecutor {
     public:
         PlanExecutor(WorkingSet* ws, PlanStage* rt, const Collection* collection);
+        PlanExecutor(WorkingSet* ws, PlanStage* rt, QuerySolution* qs,
+                     const Collection* collection);
         ~PlanExecutor();
 
         //
@@ -94,13 +97,37 @@ namespace mongo {
          */
         void kill();
 
+        /**
+         * If this stage tree ranked plans using a MultiPlanStage, then returns the winning plan.
+         * Otherwise returns NULL.
+         */
+        QuerySolution* bestSolution();
+
+        /**
+         * Transfer ownership of the stage tree wrapped by this executor to the caller.
+         */
+        PlanStage* releaseStages();
+
+        /**
+         * Get the stage tree wrapped by this executor, without transferring ownership.
+         */
+        PlanStage* getStages();
+
+        /**
+         * Execute the plan to completion, throwing out the results.
+         *
+         * Used by explain.
+         */
+        Status executePlan();
+
     private:
         // Collection over which this plan executor runs. Used to resolve record ids retrieved by
         // the plan stages. The collection must not be destroyed while there are active plans.
         const Collection* _collection;
 
         boost::scoped_ptr<WorkingSet> _workingSet;
-        boost::scoped_ptr<PlanStage> _root;
+        std::auto_ptr<PlanStage> _root;
+        boost::scoped_ptr<QuerySolution> _qs;
 
         // Did somebody drop an index we care about or the namespace we're looking at?  If so,
         // we'll be killed.
diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h
index 62729bd7990..d258bd08f2d 100644
--- a/src/mongo/db/query/stage_types.h
+++ b/src/mongo/db/query/stage_types.h
@@ -36,7 +36,7 @@ namespace mongo {
     enum StageType {
         STAGE_AND_HASH,
         STAGE_AND_SORTED,
-	STAGE_CACHED_PLAN,
+        STAGE_CACHED_PLAN,
         STAGE_COLLSCAN,
 
         // If we're running a .count(), the query is fully covered by one ixscan, and the ixscan is
@@ -48,6 +48,8 @@ namespace mongo {
         // stage is an ixscan with some key-skipping behvaior that only distinct uses.
         STAGE_DISTINCT,
 
+        STAGE_EOF,
+
         // This is more of an "internal-only" stage where we try to keep docs that were mutated
         // during query execution.
         STAGE_KEEP_MUTATIONS,
@@ -58,15 +60,19 @@ namespace mongo {
         STAGE_GEO_NEAR_2D,
         STAGE_GEO_NEAR_2DSPHERE,
 
+        STAGE_IDHACK,
         STAGE_IXSCAN,
         STAGE_LIMIT,
-	STAGE_MULTI_PLAN,
+        STAGE_MOCK,
+        STAGE_MULTI_PLAN,
+        STAGE_OPLOG_START,
         STAGE_OR,
         STAGE_PROJECTION,
         STAGE_SHARDING_FILTER,
         STAGE_SKIP,
         STAGE_SORT,
         STAGE_SORT_MERGE,
+        STAGE_SUBPLAN,
         STAGE_TEXT,
         STAGE_UNKNOWN,
     };
diff --git a/src/mongo/dbtests/counttests.cpp b/src/mongo/dbtests/counttests.cpp
index 12c5907cb89..8e34074e2aa 100644
--- a/src/mongo/dbtests/counttests.cpp
+++ b/src/mongo/dbtests/counttests.cpp
@@ -32,7 +32,7 @@
 
 #include "mongo/db/db.h"
 #include "mongo/db/json.h"
-#include "mongo/db/ops/count.h"
+#include "mongo/db/commands/count.h"
 #include "mongo/db/catalog/collection.h"
 #include "mongo/db/operation_context_impl.h"
 
diff --git a/src/mongo/dbtests/plan_ranking.cpp b/src/mongo/dbtests/plan_ranking.cpp
index fbf8a507b6c..240371f1482 100644
--- a/src/mongo/dbtests/plan_ranking.cpp
+++ b/src/mongo/dbtests/plan_ranking.cpp
@@ -38,7 +38,7 @@
 #include "mongo/db/instance.h"
 #include "mongo/db/json.h"
 #include "mongo/db/operation_context_impl.h"
-#include "mongo/db/query/get_runner.h"
+#include "mongo/db/query/get_executor.h"
 #include "mongo/db/query/qlog.h"
 #include "mongo/db/query/query_knobs.h"
 #include "mongo/db/query/query_planner.h"
author	David Storch <david.storch@10gen.com>	2014-06-11 14:24:20 -0400
committer	David Storch <david.storch@10gen.com>	2014-06-27 09:56:04 -0400
commit	1cc6be662ccd83b0341ef1f31f8f4ad30dc69451 (patch)
tree	2cdd5f7e473d77685f6f7ca1c6735f75991089a7
parent	a67bddc57c9b8a8d9d13ac30ebe74f9914b0c6d0 (diff)
download	mongo-1cc6be662ccd83b0341ef1f31f8f4ad30dc69451.tar.gz