diff options
Diffstat (limited to 'src/mongo/db')
76 files changed, 3333 insertions, 691 deletions
diff --git a/src/mongo/db/commands.h b/src/mongo/db/commands.h index 485deeb61e4..e8c1a91d5b5 100644 --- a/src/mongo/db/commands.h +++ b/src/mongo/db/commands.h @@ -37,6 +37,7 @@ #include "mongo/db/auth/resource_pattern.h" #include "mongo/db/client_basic.h" #include "mongo/db/jsobj.h" +#include "mongo/db/query/explain.h" namespace mongo { @@ -138,6 +139,26 @@ namespace mutablebson { virtual void help( std::stringstream& help ) const; /** + * Commands which can be explained override this method. Any operation which has a query + * part and executes as a tree of execution stages can be explained. A command should + * implement explain by: + * + * 1) Calling its custom parse function in order to parse the command. The output of + * this function should be a CanonicalQuery (representing the query part of the + * operation), and a PlanExecutor which wraps the tree of execution stages. + * + * 2) Calling Explain::explainStages(...) on the PlanExecutor. This is the function + * which knows how to convert an execution stage tree into explain output. + */ + virtual Status explain(OperationContext* txn, + const std::string& dbname, + const BSONObj& cmdObj, + Explain::Verbosity verbosity, + BSONObjBuilder* out) const { + return Status(ErrorCodes::IllegalOperation, "Cannot explain cmd: " + name); + } + + /** * Checks if the given client is authorized to run this command on database "dbname" * with the invocation described by "cmdObj". */ diff --git a/src/mongo/db/commands/count.cpp b/src/mongo/db/commands/count.cpp new file mode 100644 index 00000000000..48ebb4422e1 --- /dev/null +++ b/src/mongo/db/commands/count.cpp @@ -0,0 +1,291 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/commands/count.h" + +#include "mongo/db/catalog/database.h" +#include "mongo/db/client.h" +#include "mongo/db/curop.h" +#include "mongo/db/query/get_executor.h" +#include "mongo/db/query/get_runner.h" +#include "mongo/db/query/type_explain.h" + +namespace mongo { + + static CmdCount cmdCount; + + static long long applySkipLimit(long long num, const BSONObj& cmd) { + BSONElement s = cmd["skip"]; + BSONElement l = cmd["limit"]; + + if (s.isNumber()) { + num = num - s.numberLong(); + if (num < 0) { + num = 0; + } + } + + if (l.isNumber()) { + long long limit = l.numberLong(); + if (limit < 0) { + limit = -limit; + } + + // 0 means no limit. + if (limit < num && limit != 0) { + num = limit; + } + } + + return num; + } + + long long runCount(OperationContext* txn, + const string& ns, + const BSONObj &cmd, + string &err, + int &errCode) { + // Lock 'ns'. + Client::Context cx(ns); + Collection* collection = cx.db()->getCollection(txn, ns); + + if (NULL == collection) { + err = "ns missing"; + return -1; + } + + BSONObj query = cmd.getObjectField("query"); + const std::string hint = cmd.getStringField("hint"); + const BSONObj hintObj = hint.empty() ? BSONObj() : BSON("$hint" << hint); + + // count of all objects + if (query.isEmpty()) { + return applySkipLimit(collection->numRecords(), cmd); + } + + Runner* rawRunner; + long long skip = cmd["skip"].numberLong(); + long long limit = cmd["limit"].numberLong(); + + if (limit < 0) { + limit = -limit; + } + + uassertStatusOK(getRunnerCount(collection, query, hintObj, &rawRunner)); + auto_ptr<Runner> runner(rawRunner); + + // Store the plan summary string in CurOp. + Client& client = cc(); + CurOp* currentOp = client.curop(); + if (NULL != currentOp) { + PlanInfo* rawInfo; + Status s = runner->getInfo(NULL, &rawInfo); + if (s.isOK()) { + scoped_ptr<PlanInfo> planInfo(rawInfo); + currentOp->debug().planSummary = planInfo->planSummary.c_str(); + } + } + + try { + const ScopedRunnerRegistration safety(runner.get()); + + long long count = 0; + Runner::RunnerState state; + while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, NULL))) { + if (skip > 0) { + --skip; + } + else { + ++count; + // Fast-path. There's no point in iterating all over the runner if limit + // is set. + if (count >= limit && limit != 0) { + break; + } + } + } + + // Emulate old behavior and return the count even if the runner was killed. This + // happens when the underlying collection is dropped. + return count; + } + catch (const DBException &e) { + err = e.toString(); + errCode = e.getCode(); + } + catch (const std::exception &e) { + err = e.what(); + errCode = 0; + } + + // Historically we have returned zero in many count assertion cases - see SERVER-2291. + log() << "Count with ns: " << ns << " and query: " << query + << " failed with exception: " << err << " code: " << errCode + << endl; + + return -2; + } + + Status CmdCount::explain(OperationContext* txn, + const std::string& dbname, + const BSONObj& cmdObj, + Explain::Verbosity verbosity, + BSONObjBuilder* out) const { + + // Acquire the DB read lock and get the collection. + const string ns = parseNs(dbname, cmdObj); + Client::ReadContext ctx(txn, ns); + Collection* collection = ctx.ctx().db()->getCollection( txn, ns ); + + // Handle special case of an empty query. When there is no query, we don't construct + // an actual execution tree. Since each collection tracks the number of documents it + // contains, count ops with no query just ask the collection for the total number of + // documents (and then apply the skip/limit to this number if necessary). + BSONObj query = cmdObj.getObjectField("query"); + if (query.isEmpty()) { + Explain::explainCountEmptyQuery(out); + return Status::OK(); + } + + // Get an executor for the command and use it to generate the explain output. + CanonicalQuery* rawCq; + PlanExecutor* rawExec; + Status execStatus = parseCountToExecutor(cmdObj, dbname, ns, collection, + &rawCq, &rawExec); + if (!execStatus.isOK()) { + return execStatus; + } + + scoped_ptr<CanonicalQuery> cq(rawCq); + scoped_ptr<PlanExecutor> exec(rawExec); + + return Explain::explainStages(exec.get(), cq.get(), verbosity, out); + } + + Status CmdCount::parseCountToExecutor(const BSONObj& cmdObj, + const std::string& dbname, + const std::string& ns, + Collection* collection, + CanonicalQuery** queryOut, + PlanExecutor** execOut) const { + + long long skip = 0; + if (cmdObj["skip"].isNumber()) { + skip = cmdObj["skip"].numberLong(); + if (skip < 0) { + return Status(ErrorCodes::BadValue, "skip value is negative in count query"); + } + } + else if (cmdObj["skip"].ok()) { + return Status(ErrorCodes::BadValue, "skip value is not a valid number"); + } + + BSONObj query = cmdObj.getObjectField("query"); + invariant(!query.isEmpty()); + + const std::string hint = cmdObj.getStringField("hint"); + const BSONObj hintObj = hint.empty() ? BSONObj() : BSON("$hint" << hint); + + StringData dbnameData(dbname); + const WhereCallbackReal whereCallback(dbnameData); + + CanonicalQuery* cq; + uassertStatusOK(CanonicalQuery::canonicalize(ns, + query, + BSONObj(), + BSONObj(), + 0, + 0, + hintObj, + &cq, + whereCallback)); + + auto_ptr<CanonicalQuery> autoCq(cq); + + Status execStat = getExecutor(collection, cq, execOut, + QueryPlannerParams::PRIVATE_IS_COUNT); + if (!execStat.isOK()) { + return execStat; + } + + *queryOut = autoCq.release(); + return Status::OK(); + } + + bool CmdCount::run(OperationContext* txn, + const string& dbname, + BSONObj& cmdObj, + int, string& errmsg, + BSONObjBuilder& result, bool) { + + long long skip = 0; + if ( cmdObj["skip"].isNumber() ) { + skip = cmdObj["skip"].numberLong(); + if ( skip < 0 ) { + errmsg = "skip value is negative in count query"; + return false; + } + } + else if ( cmdObj["skip"].ok() ) { + errmsg = "skip value is not a valid number"; + return false; + } + + const string ns = parseNs(dbname, cmdObj); + + // This acquires the DB read lock + // + Client::ReadContext ctx(txn, ns); + + string err; + int errCode; + long long n = runCount(txn, ns, cmdObj, err, errCode); + + long long retVal = n; + bool ok = true; + if ( n == -1 ) { + retVal = 0; + result.appendBool( "missing" , true ); + } + else if ( n < 0 ) { + retVal = 0; + ok = false; + if ( !err.empty() ) { + errmsg = err; + result.append("code", errCode); + return false; + } + } + + result.append("n", static_cast<double>(retVal)); + return ok; + } + +} // namespace mongo diff --git a/src/mongo/db/commands/count.h b/src/mongo/db/commands/count.h new file mode 100644 index 00000000000..eeaaac204f1 --- /dev/null +++ b/src/mongo/db/commands/count.h @@ -0,0 +1,110 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/catalog/collection.h" +#include "mongo/db/commands.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/repl/repl_settings.h" + +namespace mongo { + + class OperationContext; + + /** + * 'ns' is the namespace we're counting on. + * + * { count: "collectionname"[, query: <query>] } + * + * @return -1 on ns does not exist error and other errors, 0 on other errors, otherwise the + * match count. + */ + long long runCount(OperationContext* txn, + const std::string& ns, + const BSONObj& cmd, + std::string& err, + int& errCode); + + /* select count(*) */ + class CmdCount : public Command { + public: + virtual bool isWriteCommandForConfigServer() const { return false; } + CmdCount() : Command("count") { } + virtual bool slaveOk() const { + // ok on --slave setups + return repl::replSettings.slave == repl::SimpleSlave; + } + virtual bool slaveOverrideOk() const { return true; } + virtual bool maintenanceOk() const { return false; } + virtual bool adminOnly() const { return false; } + virtual void help( stringstream& help ) const { help << "count objects in collection"; } + virtual void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) { + ActionSet actions; + actions.addAction(ActionType::find); + out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); + } + + virtual Status explain(OperationContext* txn, + const std::string& dbname, + const BSONObj& cmdObj, + Explain::Verbosity verbosity, + BSONObjBuilder* out) const; + + virtual bool run(OperationContext* txn, + const string& dbname, + BSONObj& cmdObj, + int, string& errmsg, + BSONObjBuilder& result, bool); + + private: + /** + * Converts the command object 'cmdObj' for a count into a PlanExecutor capable + * of running the count and a CanonicalQuery. + * + * If successful, returns the executor through 'execOut' and the canonicalized + * query through 'queryOut'. The caller must delete both 'queryOut' and 'execOut'. + * + * On failure, returns a non-OK status, and the caller should not delete either + * 'queryOut' or 'execOut'. + * + * TODO: the regular run() command for count should call this instead of getting + * a runner. + */ + Status parseCountToExecutor(const BSONObj& cmdObj, + const std::string& dbname, + const std::string& ns, + Collection* collection, + CanonicalQuery** queryOut, + PlanExecutor** execOut) const; + + }; + +} // namespace mongo diff --git a/src/mongo/db/commands/explain_cmd.cpp b/src/mongo/db/commands/explain_cmd.cpp new file mode 100644 index 00000000000..c2b468a3865 --- /dev/null +++ b/src/mongo/db/commands/explain_cmd.cpp @@ -0,0 +1,111 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/commands/explain_cmd.h" + +#include "mongo/db/catalog/database.h" +#include "mongo/db/client.h" +#include "mongo/db/commands.h" +#include "mongo/db/query/explain.h" +#include "mongo/db/query/get_runner.h" +#include "mongo/util/mongoutils/str.h" + +namespace mongo { + + Status CmdExplain::checkAuthForCommand(ClientBasic* client, + const std::string& dbname, + const BSONObj& cmdObj) { + if (Object != cmdObj.firstElement().type()) { + return Status(ErrorCodes::BadValue, "explain command requires a nested object"); + } + + BSONObj explainObj = cmdObj.firstElement().Obj(); + + Command* commToExplain = Command::findCommand(explainObj.firstElementFieldName()); + if (NULL == commToExplain) { + mongoutils::str::stream ss; + ss << "unknown command: " << explainObj.firstElementFieldName(); + return Status(ErrorCodes::CommandNotFound, ss); + } + + return commToExplain->checkAuthForCommand(client, dbname, explainObj); + } + + bool CmdExplain::run(OperationContext* txn, + const string& dbname, + BSONObj& cmdObj, int options, + string& errmsg, + BSONObjBuilder& result, + bool fromRepl) { + // Get the verbosity. + Explain::Verbosity verbosity = Explain::QUERY_PLANNER; + if (!cmdObj["verbosity"].eoo()) { + const char* verbStr = cmdObj["verbosity"].valuestrsafe(); + if (mongoutils::str::equals(verbStr, "executionStats")) { + verbosity = Explain::EXEC_STATS; + } + else if (mongoutils::str::equals(verbStr, "allPlansExecution")) { + verbosity = Explain::EXEC_ALL_PLANS; + } + else if (!mongoutils::str::equals(verbStr, "queryPlanner")) { + errmsg = "verbosity string must be one of " + "{'queryPlanner', 'executionStats', 'allPlansExecution'}"; + return false; + } + } + + if (Object != cmdObj.firstElement().type()) { + errmsg = "explain command requires a nested object"; + return false; + } + + // This is the nested command which we are explaining. + BSONObj explainObj = cmdObj.firstElement().Obj(); + + const string ns = parseNs(dbname, explainObj); + + Command* commToExplain = Command::findCommand(explainObj.firstElementFieldName()); + if (NULL == commToExplain) { + mongoutils::str::stream ss; + ss << "unknown command: " << explainObj.firstElementFieldName(); + Status explainStatus(ErrorCodes::CommandNotFound, ss); + return appendCommandStatus(result, explainStatus); + } + + // Actually call the nested command's explain(...) method. + Status explainStatus = commToExplain->explain(txn, dbname, explainObj, verbosity, &result); + if (!explainStatus.isOK()) { + return appendCommandStatus(result, explainStatus); + } + + return true; + } + +} // namespace mongo diff --git a/src/mongo/db/commands/explain_cmd.h b/src/mongo/db/commands/explain_cmd.h new file mode 100644 index 00000000000..638c736f4f2 --- /dev/null +++ b/src/mongo/db/commands/explain_cmd.h @@ -0,0 +1,85 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/catalog/collection.h" +#include "mongo/db/commands.h" + +namespace mongo { + + /** + * The explain command is used to generate explain output for any read or write + * operation which has a query component (e.g. find, count, update, remove, distinct, etc.). + * + * The explain command takes as its argument a nested object which specifies the command to + * explain, and a verbosity indicator. For example: + * + * {explain: {count: "coll", query: {foo: "bar"}}, verbosity: "executionStats"} + * + * This command like a dispatcher: it just retrieves a pointer to the nested command and + * invokes its explain() implementation. + */ + class CmdExplain : public Command { + public: + CmdExplain() : Command("explain") { } + + virtual bool isWriteCommandForConfigServer() const { return false; } + + // TODO: make slave ok true, test explains on secondaries. + virtual bool slaveOk() const { + return false; + } + + virtual bool maintenanceOk() const { return false; } + + virtual bool adminOnly() const { return false; } + + virtual void help( stringstream& help ) const { + help << "explain database reads and writes"; + } + + /** + * You are authorized to run an explain if you are authorized to run + * the command that you are explaining. The auth check is performed recursively + * on the nested command. + */ + virtual Status checkAuthForCommand(ClientBasic* client, + const std::string& dbname, + const BSONObj& cmdObj); + + virtual bool run(OperationContext* txn, + const string& dbname, + BSONObj& cmdObj, int options, + string& errmsg, + BSONObjBuilder& result, + bool fromRepl); + + } cmdExplain; + +} // namespace mongo diff --git a/src/mongo/db/dbcommands.cpp b/src/mongo/db/dbcommands.cpp index 8d833cb12b6..4d7f8000f15 100644 --- a/src/mongo/db/dbcommands.cpp +++ b/src/mongo/db/dbcommands.cpp @@ -60,7 +60,6 @@ #include "mongo/db/json.h" #include "mongo/db/lasterror.h" #include "mongo/db/namespace_string.h" -#include "mongo/db/ops/count.h" #include "mongo/db/ops/insert.h" #include "mongo/db/query/get_runner.h" #include "mongo/db/query/internal_plans.h" @@ -468,72 +467,6 @@ namespace mongo { } } cmdDrop; - /* select count(*) */ - class CmdCount : public Command { - public: - virtual bool isWriteCommandForConfigServer() const { return false; } - CmdCount() : Command("count") { } - virtual bool slaveOk() const { - // ok on --slave setups - return repl::replSettings.slave == repl::SimpleSlave; - } - virtual bool slaveOverrideOk() const { return true; } - virtual bool maintenanceOk() const { return false; } - virtual bool adminOnly() const { return false; } - virtual void help( stringstream& help ) const { help << "count objects in collection"; } - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { - ActionSet actions; - actions.addAction(ActionType::find); - out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); - } - - virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { - long long skip = 0; - if ( cmdObj["skip"].isNumber() ) { - skip = cmdObj["skip"].numberLong(); - if ( skip < 0 ) { - errmsg = "skip value is negative in count query"; - return false; - } - } - else if ( cmdObj["skip"].ok() ) { - errmsg = "skip value is not a valid number"; - return false; - } - - const string ns = parseNs(dbname, cmdObj); - - // This acquires the DB read lock - // - Client::ReadContext ctx(txn, ns); - - string err; - int errCode; - long long n = runCount(txn, ns, cmdObj, err, errCode); - - long long retVal = n; - bool ok = true; - if ( n == -1 ) { - retVal = 0; - result.appendBool( "missing" , true ); - } - else if ( n < 0 ) { - retVal = 0; - ok = false; - if ( !err.empty() ) { - errmsg = err; - result.append("code", errCode); - return false; - } - } - - result.append("n", static_cast<double>(retVal)); - return ok; - } - } cmdCount; - /* create collection */ class CmdCreate : public Command { public: diff --git a/src/mongo/db/exec/2dnear.cpp b/src/mongo/db/exec/2dnear.cpp index 236d062858f..d1f276c233a 100644 --- a/src/mongo/db/exec/2dnear.cpp +++ b/src/mongo/db/exec/2dnear.cpp @@ -55,6 +55,10 @@ namespace mongo { PlanStage::StageState TwoDNear::work(WorkingSetID* out) { ++_commonStats.works; + + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (!_initted) { _initted = true; @@ -151,6 +155,11 @@ namespace mongo { _invalidationMap.erase(range.first, range.second); } + vector<PlanStage*> TwoDNear::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + PlanStageStats* TwoDNear::getStats() { _commonStats.isEOF = isEOF(); _specificStats.keyPattern = _params.indexKeyPattern; diff --git a/src/mongo/db/exec/2dnear.h b/src/mongo/db/exec/2dnear.h index 9ac7eae3a7a..265f3ff6402 100644 --- a/src/mongo/db/exec/2dnear.h +++ b/src/mongo/db/exec/2dnear.h @@ -74,6 +74,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_GEO_NEAR_2D; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript index 3c85bfb8602..96054d3f97c 100644 --- a/src/mongo/db/exec/SConscript +++ b/src/mongo/db/exec/SConscript @@ -43,7 +43,9 @@ env.Library( "collection_scan.cpp", "count.cpp", "distinct_scan.cpp", + "eof.cpp", "fetch.cpp", + "idhack.cpp", "index_scan.cpp", "keep_mutations.cpp", "limit.cpp", @@ -58,6 +60,7 @@ env.Library( "skip.cpp", "sort.cpp", "stagedebug_cmd.cpp", + "subplan.cpp", "text.cpp", "working_set_common.cpp", ], diff --git a/src/mongo/db/exec/and_hash.cpp b/src/mongo/db/exec/and_hash.cpp index 7569ff36703..ff7cae7da48 100644 --- a/src/mongo/db/exec/and_hash.cpp +++ b/src/mongo/db/exec/and_hash.cpp @@ -107,6 +107,9 @@ namespace mongo { PlanStage::StageState AndHashStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (isEOF()) { return PlanStage::IS_EOF; } // Fast-path for one of our children being EOF immediately. We work each child a few times. @@ -500,6 +503,10 @@ namespace mongo { } } + vector<PlanStage*> AndHashStage::getChildren() const { + return _children; + } + PlanStageStats* AndHashStage::getStats() { _commonStats.isEOF = isEOF(); diff --git a/src/mongo/db/exec/and_hash.h b/src/mongo/db/exec/and_hash.h index d72edc19fa4..df353b1cec5 100644 --- a/src/mongo/db/exec/and_hash.h +++ b/src/mongo/db/exec/and_hash.h @@ -79,6 +79,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_AND_HASH; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/and_sorted.cpp b/src/mongo/db/exec/and_sorted.cpp index 71979872a4a..30ac9402a2e 100644 --- a/src/mongo/db/exec/and_sorted.cpp +++ b/src/mongo/db/exec/and_sorted.cpp @@ -61,6 +61,9 @@ namespace mongo { PlanStage::StageState AndSortedStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (isEOF()) { return PlanStage::IS_EOF; } if (0 == _specificStats.failedAnd.size()) { @@ -297,6 +300,10 @@ namespace mongo { } } + vector<PlanStage*> AndSortedStage::getChildren() const { + return _children; + } + PlanStageStats* AndSortedStage::getStats() { _commonStats.isEOF = isEOF(); diff --git a/src/mongo/db/exec/and_sorted.h b/src/mongo/db/exec/and_sorted.h index afcf4de5c49..1d74a4bde60 100644 --- a/src/mongo/db/exec/and_sorted.h +++ b/src/mongo/db/exec/and_sorted.h @@ -65,6 +65,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_AND_SORTED; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/cached_plan.cpp b/src/mongo/db/exec/cached_plan.cpp index 874e52fd526..31450fecc89 100644 --- a/src/mongo/db/exec/cached_plan.cpp +++ b/src/mongo/db/exec/cached_plan.cpp @@ -69,6 +69,9 @@ namespace mongo { PlanStage::StageState CachedPlanStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (isEOF()) { return PlanStage::IS_EOF; } StageState childStatus = getActiveChild()->work(out); @@ -123,6 +126,17 @@ namespace mongo { ++_commonStats.invalidates; } + vector<PlanStage*> CachedPlanStage::getChildren() const { + vector<PlanStage*> children; + if (_usingBackupChild) { + children.push_back(_backupChildPlan.get()); + } + else { + children.push_back(_mainChildPlan.get()); + } + return children; + } + PlanStageStats* CachedPlanStage::getStats() { _commonStats.isEOF = isEOF(); diff --git a/src/mongo/db/exec/cached_plan.h b/src/mongo/db/exec/cached_plan.h index 37962fbd456..9c17239deb7 100644 --- a/src/mongo/db/exec/cached_plan.h +++ b/src/mongo/db/exec/cached_plan.h @@ -60,6 +60,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_CACHED_PLAN; } + virtual PlanStageStats* getStats(); static const char* kStageType; @@ -72,7 +76,7 @@ namespace mongo { const Collection* _collection; // not owned - CanonicalQuery* _canonicalQuery; + CanonicalQuery* _canonicalQuery; // owned by us boost::scoped_ptr<PlanStage> _mainChildPlan; diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp index 47541cf7239..195ab21e551 100644 --- a/src/mongo/db/exec/collection_scan.cpp +++ b/src/mongo/db/exec/collection_scan.cpp @@ -53,6 +53,10 @@ namespace mongo { PlanStage::StageState CollectionScan::work(WorkingSetID* out) { ++_commonStats.works; + + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (_nsDropped) { return PlanStage::DEAD; } // Do some init if we haven't already. @@ -154,6 +158,11 @@ namespace mongo { } } + vector<PlanStage*> CollectionScan::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + PlanStageStats* CollectionScan::getStats() { _commonStats.isEOF = isEOF(); diff --git a/src/mongo/db/exec/collection_scan.h b/src/mongo/db/exec/collection_scan.h index a6cd87dc68d..f752c69e4c3 100644 --- a/src/mongo/db/exec/collection_scan.h +++ b/src/mongo/db/exec/collection_scan.h @@ -57,6 +57,10 @@ namespace mongo { virtual void prepareToYield(); virtual void recoverFromYield(); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_COLLSCAN; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/count.cpp b/src/mongo/db/exec/count.cpp index 56e8607a611..eeb860e9e92 100644 --- a/src/mongo/db/exec/count.cpp +++ b/src/mongo/db/exec/count.cpp @@ -44,7 +44,10 @@ namespace mongo { _params(params), _hitEnd(false), _shouldDedup(params.descriptor->isMultikey()), - _commonStats(kStageType) { } + _commonStats(kStageType) { + _specificStats.keyPattern = _params.descriptor->keyPattern(); + _specificStats.isMultiKey = _params.descriptor->isMultikey(); + } void Count::initIndexCursor() { CursorOptions cursorOptions; @@ -62,6 +65,8 @@ namespace mongo { // that points at the end. _btreeCursor->seek(_params.startKey, !_params.startKeyInclusive); + ++_specificStats.keysExamined; + // Create the cursor that points at our end position. IndexCursor* endCursor; verify(_iam->newCursor(cursorOptions, &endCursor).isOK()); @@ -73,6 +78,8 @@ namespace mongo { // If the end key is inclusive we want to point *past* it since that's the end. _endCursor->seek(_params.endKey, _params.endKeyInclusive); + ++_specificStats.keysExamined; + // See if we've hit the end already. checkEnd(); } @@ -91,10 +98,16 @@ namespace mongo { } PlanStage::StageState Count::work(WorkingSetID* out) { + ++_commonStats.works; + + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (NULL == _btreeCursor.get()) { // First call to work(). Perform cursor init. initIndexCursor(); checkEnd(); + ++_commonStats.needTime; return PlanStage::NEED_TIME; } @@ -104,8 +117,11 @@ namespace mongo { _btreeCursor->next(); checkEnd(); + ++_specificStats.keysExamined; + if (_shouldDedup) { if (_returned.end() != _returned.find(loc)) { + ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { @@ -114,6 +130,7 @@ namespace mongo { } *out = WorkingSet::INVALID_ID; + ++_commonStats.advanced; return PlanStage::ADVANCED; } @@ -127,6 +144,7 @@ namespace mongo { } void Count::prepareToYield() { + ++_commonStats.yields; if (_hitEnd || (NULL == _btreeCursor.get())) { return; } _btreeCursor->savePosition(); @@ -134,6 +152,7 @@ namespace mongo { } void Count::recoverFromYield() { + ++_commonStats.unyields; if (_hitEnd || (NULL == _btreeCursor.get())) { return; } if (!_btreeCursor->restorePosition().isOK()) { @@ -178,6 +197,8 @@ namespace mongo { } void Count::invalidate(const DiskLoc& dl, InvalidationType type) { + ++_commonStats.invalidates; + // The only state we're responsible for holding is what DiskLocs to drop. If a document // mutates the underlying index cursor will deal with it. if (INVALIDATION_MUTATION == type) { @@ -192,11 +213,20 @@ namespace mongo { } } + vector<PlanStage*> Count::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + PlanStageStats* Count::getStats() { - // We don't collect stats since this stage is only used by the count command. - // If count ever collects stats we must implement this. - invariant(0); - return NULL; + _commonStats.isEOF = isEOF(); + auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_COUNT)); + + CountStats* countStats = new CountStats(_specificStats); + countStats->keyPattern = _specificStats.keyPattern.getOwned(); + ret->specific.reset(countStats); + + return ret.release(); } } // namespace mongo diff --git a/src/mongo/db/exec/count.h b/src/mongo/db/exec/count.h index 51054dfe4dd..a3a82ab0032 100644 --- a/src/mongo/db/exec/count.h +++ b/src/mongo/db/exec/count.h @@ -76,6 +76,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_COUNT; } + virtual PlanStageStats* getStats(); static const char* kStageType; @@ -114,6 +118,7 @@ namespace mongo { bool _shouldDedup; CommonStats _commonStats; + CountStats _specificStats; }; } // namespace mongo diff --git a/src/mongo/db/exec/distinct_scan.cpp b/src/mongo/db/exec/distinct_scan.cpp index 6db9a8b3f31..3cbd9e84d08 100644 --- a/src/mongo/db/exec/distinct_scan.cpp +++ b/src/mongo/db/exec/distinct_scan.cpp @@ -91,6 +91,9 @@ namespace mongo { PlanStage::StageState DistinctScan::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (NULL == _btreeCursor.get()) { // First call to work(). Perform cursor init. initIndexCursor(); @@ -213,6 +216,11 @@ namespace mongo { } } + vector<PlanStage*> DistinctScan::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + PlanStageStats* DistinctScan::getStats() { _commonStats.isEOF = isEOF(); auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_DISTINCT)); diff --git a/src/mongo/db/exec/distinct_scan.h b/src/mongo/db/exec/distinct_scan.h index c3b1e110dd8..9e088da5068 100644 --- a/src/mongo/db/exec/distinct_scan.h +++ b/src/mongo/db/exec/distinct_scan.h @@ -88,6 +88,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_DISTINCT; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/eof.cpp b/src/mongo/db/exec/eof.cpp new file mode 100644 index 00000000000..e953f60406c --- /dev/null +++ b/src/mongo/db/exec/eof.cpp @@ -0,0 +1,75 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/exec/eof.h" + +namespace mongo { + + // static + const char* EOFStage::kStageType = "EOF"; + + EOFStage::EOFStage() : _commonStats(kStageType) { } + + EOFStage::~EOFStage() { } + + bool EOFStage::isEOF() { + return true; + } + + PlanStage::StageState EOFStage::work(WorkingSetID* out) { + ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + return PlanStage::IS_EOF; + } + + void EOFStage::prepareToYield() { + ++_commonStats.yields; + } + + void EOFStage::recoverFromYield() { + ++_commonStats.unyields; + } + + void EOFStage::invalidate(const DiskLoc& dl, InvalidationType type) { + ++_commonStats.invalidates; + } + + vector<PlanStage*> EOFStage::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + + PlanStageStats* EOFStage::getStats() { + _commonStats.isEOF = isEOF(); + return new PlanStageStats(_commonStats, STAGE_EOF); + } + +} // namespace mongo diff --git a/src/mongo/db/ops/count.h b/src/mongo/db/exec/eof.h index 8040efb6028..cfcddc15b87 100644 --- a/src/mongo/db/ops/count.h +++ b/src/mongo/db/exec/eof.h @@ -1,7 +1,5 @@ -// count.h - /** - * Copyright (C) 2013 MongoDB Inc. + * Copyright (C) 2013 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, @@ -28,24 +26,39 @@ * it in the license file. */ -#include "mongo/db/jsobj.h" +#pragma once -namespace mongo { +#include "mongo/db/diskloc.h" +#include "mongo/db/exec/plan_stage.h" - class OperationContext; +namespace mongo { /** - * 'ns' is the namespace we're counting on. - * - * { count: "collectionname"[, query: <query>] } - * - * @return -1 on ns does not exist error and other errors, 0 on other errors, otherwise the - * match count. + * This stage just returns EOF immediately. */ - long long runCount(OperationContext* txn, - const std::string& ns, - const BSONObj& cmd, - std::string& err, - int& errCode); + class EOFStage : public PlanStage { + public: + EOFStage(); + + virtual ~EOFStage(); + + virtual bool isEOF(); + virtual StageState work(WorkingSetID* out); + + virtual void prepareToYield(); + virtual void recoverFromYield(); + virtual void invalidate(const DiskLoc& dl, InvalidationType type); + + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_EOF; } + + PlanStageStats* getStats(); + + static const char* kStageType; + + private: + CommonStats _commonStats; + }; -} // namespace mongo +} // namespace mongo diff --git a/src/mongo/db/exec/fetch.cpp b/src/mongo/db/exec/fetch.cpp index 8718786f0c8..fc0e039482e 100644 --- a/src/mongo/db/exec/fetch.cpp +++ b/src/mongo/db/exec/fetch.cpp @@ -58,6 +58,9 @@ namespace mongo { PlanStage::StageState FetchStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (isEOF()) { return PlanStage::IS_EOF; } // If we're here, we're not waiting for a DiskLoc to be fetched. Get another to-be-fetched @@ -83,6 +86,8 @@ namespace mongo { member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; } + ++_specificStats.docsExamined; + return returnIfMatches(member, id, out); } else if (PlanStage::FAILURE == status) { @@ -143,6 +148,12 @@ namespace mongo { } } + vector<PlanStage*> FetchStage::getChildren() const { + vector<PlanStage*> children; + children.push_back(_child.get()); + return children; + } + PlanStageStats* FetchStage::getStats() { _commonStats.isEOF = isEOF(); diff --git a/src/mongo/db/exec/fetch.h b/src/mongo/db/exec/fetch.h index 870e67a9148..f99fba7b1b5 100644 --- a/src/mongo/db/exec/fetch.h +++ b/src/mongo/db/exec/fetch.h @@ -59,6 +59,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_FETCH; } + PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/idhack.cpp b/src/mongo/db/exec/idhack.cpp new file mode 100644 index 00000000000..d27f7a34b5b --- /dev/null +++ b/src/mongo/db/exec/idhack.cpp @@ -0,0 +1,148 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/exec/idhack.h" + +#include "mongo/client/dbclientinterface.h" +#include "mongo/db/exec/projection.h" +#include "mongo/db/index/btree_access_method.h" +#include "mongo/s/d_logic.h" + +namespace mongo { + + // static + const char* IDHackStage::kStageType = "IDHACK"; + + IDHackStage::IDHackStage(const Collection* collection, CanonicalQuery* query, WorkingSet* ws) + : _collection(collection), + _workingSet(ws), + _key(query->getQueryObj()["_id"].wrap()), + _query(query), + _killed(false), + _done(false), + _commonStats(kStageType) { } + + IDHackStage::IDHackStage(Collection* collection, const BSONObj& key, WorkingSet* ws) + : _collection(collection), + _workingSet(ws), + _key(key), + _query(NULL), + _killed(false), + _done(false), + _commonStats(kStageType) { } + + IDHackStage::~IDHackStage() { } + + bool IDHackStage::isEOF() { + return _killed || _done; + } + + PlanStage::StageState IDHackStage::work(WorkingSetID* out) { + ++_commonStats.works; + + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + + if (_killed) { return PlanStage::DEAD; } + if (_done) { return PlanStage::IS_EOF; } + + // Use the index catalog to get the id index. + const IndexCatalog* catalog = _collection->getIndexCatalog(); + + // Find the index we use. + IndexDescriptor* idDesc = catalog->findIdIndex(); + if (NULL == idDesc) { + _done = true; + return PlanStage::IS_EOF; + } + + // This may not be valid always. See SERVER-12397. + const BtreeBasedAccessMethod* accessMethod = + static_cast<const BtreeBasedAccessMethod*>(catalog->getIndex(idDesc)); + + // Look up the key by going directly to the Btree. + DiskLoc loc = accessMethod->findSingle( _key ); + + // Key not found. + if (loc.isNull()) { + _done = true; + return PlanStage::IS_EOF; + } + + ++_specificStats.keysExamined; + ++_specificStats.docsExamined; + + // Fill out the WSM. + WorkingSetID id = _workingSet->allocate(); + WorkingSetMember* member = _workingSet->get(id); + member->loc = loc; + member->obj = _collection->docFor(loc); + member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; + + _done = true; + ++_commonStats.advanced; + *out = id; + return PlanStage::ADVANCED; + } + + void IDHackStage::prepareToYield() { + ++_commonStats.yields; + } + + void IDHackStage::recoverFromYield() { + ++_commonStats.unyields; + } + + void IDHackStage::invalidate(const DiskLoc& dl, InvalidationType type) { + ++_commonStats.invalidates; + } + + // static + bool IDHackStage::supportsQuery(const CanonicalQuery& query) { + return !query.getParsed().showDiskLoc() + && query.getParsed().getHint().isEmpty() + && 0 == query.getParsed().getSkip() + && CanonicalQuery::isSimpleIdQuery(query.getParsed().getFilter()) + && !query.getParsed().hasOption(QueryOption_CursorTailable); + } + + vector<PlanStage*> IDHackStage::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + + PlanStageStats* IDHackStage::getStats() { + _commonStats.isEOF = isEOF(); + auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_IDHACK)); + ret->specific.reset(new IDHackStats(_specificStats)); + return ret.release(); + } + +} // namespace mongo diff --git a/src/mongo/db/exec/idhack.h b/src/mongo/db/exec/idhack.h new file mode 100644 index 00000000000..3f266638d9e --- /dev/null +++ b/src/mongo/db/exec/idhack.h @@ -0,0 +1,94 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/diskloc.h" +#include "mongo/db/catalog/collection.h" +#include "mongo/db/exec/plan_stage.h" +#include "mongo/db/query/canonical_query.h" + +namespace mongo { + + /** + * A standalone stage implementing the fast path for key-value retrievals + * via the _id index. + */ + class IDHackStage : public PlanStage { + public: + /** Takes ownership of all the arguments -collection. */ + IDHackStage(const Collection* collection, CanonicalQuery* query, WorkingSet* ws); + + IDHackStage(Collection* collection, const BSONObj& key, WorkingSet* ws); + + virtual ~IDHackStage(); + + virtual bool isEOF(); + virtual StageState work(WorkingSetID* out); + + virtual void prepareToYield(); + virtual void recoverFromYield(); + virtual void invalidate(const DiskLoc& dl, InvalidationType type); + + /** + * ID Hack has a very strict criteria for the queries it supports. + */ + static bool supportsQuery(const CanonicalQuery& query); + + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_IDHACK; } + + PlanStageStats* getStats(); + + static const char* kStageType; + + private: + // Not owned here. + const Collection* _collection; + + // The WorkingSet we annotate with results. Not owned by us. + WorkingSet* _workingSet; + + // The value to match against the _id field. + BSONObj _key; + + // Not owned by us. + CanonicalQuery* _query; + + // Did someone call kill() on us? + bool _killed; + + // Have we returned our one document? + bool _done; + + CommonStats _commonStats; + IDHackStats _specificStats; + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp index bbdb836d54b..1982f134b8d 100644 --- a/src/mongo/db/exec/index_scan.cpp +++ b/src/mongo/db/exec/index_scan.cpp @@ -131,6 +131,9 @@ namespace mongo { PlanStage::StageState IndexScan::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (NULL == _indexCursor.get()) { // First call to work(). Perform possibly heavy init. initIndexScan(); @@ -339,6 +342,11 @@ namespace mongo { } } + vector<PlanStage*> IndexScan::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + PlanStageStats* IndexScan::getStats() { // WARNING: this could be called even if the collection was dropped. Do not access any // catalog information here. diff --git a/src/mongo/db/exec/index_scan.h b/src/mongo/db/exec/index_scan.h index 7ad2ef316b2..6b993f387ad 100644 --- a/src/mongo/db/exec/index_scan.h +++ b/src/mongo/db/exec/index_scan.h @@ -91,6 +91,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_IXSCAN; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/keep_mutations.cpp b/src/mongo/db/exec/keep_mutations.cpp index b52b7c0540b..586e9ab3bb5 100644 --- a/src/mongo/db/exec/keep_mutations.cpp +++ b/src/mongo/db/exec/keep_mutations.cpp @@ -53,6 +53,9 @@ namespace mongo { PlanStage::StageState KeepMutationsStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + // If we've returned as many results as we're limited to, isEOF will be true. if (isEOF()) { return PlanStage::IS_EOF; } @@ -115,6 +118,12 @@ namespace mongo { _child->invalidate(dl, type); } + vector<PlanStage*> KeepMutationsStage::getChildren() const { + vector<PlanStage*> children; + children.push_back(_child.get()); + return children; + } + PlanStageStats* KeepMutationsStage::getStats() { _commonStats.isEOF = isEOF(); auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_KEEP_MUTATIONS)); diff --git a/src/mongo/db/exec/keep_mutations.h b/src/mongo/db/exec/keep_mutations.h index 02bc8da319a..6b10063512c 100644 --- a/src/mongo/db/exec/keep_mutations.h +++ b/src/mongo/db/exec/keep_mutations.h @@ -55,6 +55,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_KEEP_MUTATIONS; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/limit.cpp b/src/mongo/db/exec/limit.cpp index 21f6ade2107..50fd0826eb8 100644 --- a/src/mongo/db/exec/limit.cpp +++ b/src/mongo/db/exec/limit.cpp @@ -45,6 +45,9 @@ namespace mongo { PlanStage::StageState LimitStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (0 == _numToReturn) { // We've returned as many results as we're limited to. return PlanStage::IS_EOF; @@ -95,6 +98,12 @@ namespace mongo { _child->invalidate(dl, type); } + vector<PlanStage*> LimitStage::getChildren() const { + vector<PlanStage*> children; + children.push_back(_child.get()); + return children; + } + PlanStageStats* LimitStage::getStats() { _commonStats.isEOF = isEOF(); _specificStats.limit = _numToReturn; diff --git a/src/mongo/db/exec/limit.h b/src/mongo/db/exec/limit.h index 5b5293823c2..05866c0c5b3 100644 --- a/src/mongo/db/exec/limit.h +++ b/src/mongo/db/exec/limit.h @@ -53,6 +53,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_LIMIT; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/merge_sort.cpp b/src/mongo/db/exec/merge_sort.cpp index e35d0570423..6716a4b1c56 100644 --- a/src/mongo/db/exec/merge_sort.cpp +++ b/src/mongo/db/exec/merge_sort.cpp @@ -67,6 +67,9 @@ namespace mongo { PlanStage::StageState MergeSortStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (isEOF()) { return PlanStage::IS_EOF; } if (!_noResultToMerge.empty()) { @@ -246,6 +249,10 @@ namespace mongo { return false; } + vector<PlanStage*> MergeSortStage::getChildren() const { + return _children; + } + PlanStageStats* MergeSortStage::getStats() { _commonStats.isEOF = isEOF(); diff --git a/src/mongo/db/exec/merge_sort.h b/src/mongo/db/exec/merge_sort.h index 3eda2add8ff..1b493d15913 100644 --- a/src/mongo/db/exec/merge_sort.h +++ b/src/mongo/db/exec/merge_sort.h @@ -69,6 +69,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_SORT_MERGE; } + PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/mock_stage.cpp b/src/mongo/db/exec/mock_stage.cpp index 1fbc01b06ae..e316fed458e 100644 --- a/src/mongo/db/exec/mock_stage.cpp +++ b/src/mongo/db/exec/mock_stage.cpp @@ -64,4 +64,9 @@ namespace mongo { _members.push(id); } + vector<PlanStage*> MockStage::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + } // namespace mongo diff --git a/src/mongo/db/exec/mock_stage.h b/src/mongo/db/exec/mock_stage.h index 9476038a16a..bdb2d667615 100644 --- a/src/mongo/db/exec/mock_stage.h +++ b/src/mongo/db/exec/mock_stage.h @@ -61,6 +61,11 @@ namespace mongo { virtual void prepareToYield() { } virtual void recoverFromYield() { } virtual void invalidate(const DiskLoc& dl, InvalidationType type) { } + + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_MOCK; } + virtual PlanStageStats* getStats() { return NULL; } /** diff --git a/src/mongo/db/exec/multi_plan.cpp b/src/mongo/db/exec/multi_plan.cpp index 2268c8ed58b..70ed5b52b58 100644 --- a/src/mongo/db/exec/multi_plan.cpp +++ b/src/mongo/db/exec/multi_plan.cpp @@ -106,6 +106,9 @@ namespace mongo { } PlanStage::StageState MultiPlanStage::work(WorkingSetID* out) { + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (_failure) { *out = _statusMemberId; return PlanStage::FAILURE; @@ -253,7 +256,7 @@ namespace mongo { } } - vector<PlanStageStats*>* MultiPlanStage::generateCandidateStats() { + vector<PlanStageStats*> MultiPlanStage::generateCandidateStats() { for (size_t ix = 0; ix < _candidates.size(); ix++) { if (ix == (size_t)_bestPlanIdx) { continue; } if (ix == (size_t)_backupPlanIdx) { continue; } @@ -267,7 +270,7 @@ namespace mongo { } } - return &_candidateStats; + return _candidateStats; } void MultiPlanStage::clearCandidates() { @@ -330,31 +333,6 @@ namespace mongo { return !doneWorking; } - Status MultiPlanStage::executeWinningPlan() { - invariant(_bestPlanIdx != kNoSuchPlan); - PlanStage* winner = _candidates[_bestPlanIdx].root; - WorkingSet* ws = _candidates[_bestPlanIdx].ws; - - bool doneWorking = false; - - while (!doneWorking) { - WorkingSetID id = WorkingSet::INVALID_ID; - PlanStage::StageState state = winner->work(&id); - - if (PlanStage::IS_EOF == state || PlanStage::DEAD == state) { - doneWorking = true; - } - else if (PlanStage::FAILURE == state) { - // Propogate error. - BSONObj errObj; - WorkingSetCommon::getStatusMemberObject(*ws, id, &errObj); - return Status(ErrorCodes::BadValue, WorkingSetCommon::toStatusString(errObj)); - } - } - - return Status::OK(); - } - Status MultiPlanStage::executeAllPlans() { // Boolean vector keeping track of which plans are done. vector<bool> planDone(_candidates.size(), false); @@ -499,6 +477,21 @@ namespace mongo { } } + vector<PlanStage*> MultiPlanStage::getChildren() const { + vector<PlanStage*> children; + + if (bestPlanChosen()) { + children.push_back(_candidates[_bestPlanIdx].root); + } + else { + for (size_t i = 0; i < _candidates.size(); i++) { + children.push_back(_candidates[i].root); + } + } + + return children; + } + PlanStageStats* MultiPlanStage::getStats() { if (bestPlanChosen()) { return _candidates[_bestPlanIdx].root->getStats(); diff --git a/src/mongo/db/exec/multi_plan.h b/src/mongo/db/exec/multi_plan.h index fe25dcddfde..dadc38a833a 100644 --- a/src/mongo/db/exec/multi_plan.h +++ b/src/mongo/db/exec/multi_plan.h @@ -68,6 +68,10 @@ namespace mongo { virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_MULTI_PLAN; } + virtual PlanStageStats* getStats(); /** Takes ownership of QuerySolution and PlanStage. not of WorkingSet */ @@ -95,29 +99,20 @@ namespace mongo { */ bool hasBackupPlan() const; - /** - * Gathers execution stats for all losing plans. - */ - vector<PlanStageStats*>* generateCandidateStats(); - // // Used by explain. // /** - * Runs the winning plan into it hits EOF or returns DEAD, throwing out the results. - * Execution stats are gathered in the process. - * - * You can call this after calling pickBestPlan(...). It expects that a winning plan - * has already been selected. + * Gathers execution stats for all losing plans. */ - Status executeWinningPlan(); + vector<PlanStageStats*> generateCandidateStats(); /** * Runs the candidate plans until each has either hit EOF or returned DEAD. Results * from the plans are thrown out, but execution stats are gathered. * - * You can call this after calling pickBestPlan(...). It expects that a winning plan + * You should call this after calling pickBestPlan(...). It expects that a winning plan * has already been selected. */ Status executeAllPlans(); diff --git a/src/mongo/db/exec/oplogstart.cpp b/src/mongo/db/exec/oplogstart.cpp index d2bedf62976..f63f95b27b9 100644 --- a/src/mongo/db/exec/oplogstart.cpp +++ b/src/mongo/db/exec/oplogstart.cpp @@ -175,6 +175,11 @@ namespace mongo { } } + vector<PlanStage*> OplogStart::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + int OplogStart::_backwardsScanTime = 5; } // namespace mongo diff --git a/src/mongo/db/exec/oplogstart.h b/src/mongo/db/exec/oplogstart.h index c0f74573c48..e10721a9064 100644 --- a/src/mongo/db/exec/oplogstart.h +++ b/src/mongo/db/exec/oplogstart.h @@ -70,9 +70,13 @@ namespace mongo { virtual void prepareToYield(); virtual void recoverFromYield(); + virtual std::vector<PlanStage*> getChildren() const; + // PS. don't call this. virtual PlanStageStats* getStats() { return NULL; } + virtual StageType stageType() const { return STAGE_OPLOG_START; } + // For testing only. void setBackwardsScanTime(int newTime) { _backwardsScanTime = newTime; } bool isExtentHopping() { return _extentHopping; } diff --git a/src/mongo/db/exec/or.cpp b/src/mongo/db/exec/or.cpp index 9266c6fec10..e9832567e40 100644 --- a/src/mongo/db/exec/or.cpp +++ b/src/mongo/db/exec/or.cpp @@ -52,6 +52,9 @@ namespace mongo { PlanStage::StageState OrStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (isEOF()) { return PlanStage::IS_EOF; } if (0 == _specificStats.matchTested.size()) { @@ -168,6 +171,10 @@ namespace mongo { } } + vector<PlanStage*> OrStage::getChildren() const { + return _children; + } + PlanStageStats* OrStage::getStats() { _commonStats.isEOF = isEOF(); diff --git a/src/mongo/db/exec/or.h b/src/mongo/db/exec/or.h index 3e51bbcd94e..69aec74e22d 100644 --- a/src/mongo/db/exec/or.h +++ b/src/mongo/db/exec/or.h @@ -58,6 +58,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_OR; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/plan_stage.h b/src/mongo/db/exec/plan_stage.h index a4f53d8ca9e..ac638fd2f25 100644 --- a/src/mongo/db/exec/plan_stage.h +++ b/src/mongo/db/exec/plan_stage.h @@ -41,11 +41,11 @@ namespace mongo { * A PlanStage ("stage") is the basic building block of a "Query Execution Plan." A stage is * the smallest piece of machinery used in executing a compiled query. Stages either access * data (from a collection or an index) to create a stream of results, or transform a stream of - * results (e.g. AND, OR, SORT) to create a stream of results. + * results (e.g. AND, OR, SORT) to create a stream of results. * * Stages have zero or more input streams but only one output stream. Data-accessing stages are * leaves and data-transforming stages have children. Stages can be connected together to form - * a tree which is then executed (see plan_runner.h) to solve a query. + * a tree which is then executed (see plan_runner.h) to solve a query. * * A stage's input and output are each typed. Only stages with compatible types can be * connected. @@ -197,6 +197,17 @@ namespace mongo { virtual void invalidate(const DiskLoc& dl, InvalidationType type) = 0; /** + * Retrieve a list of this stage's children. This stage keeps ownership of + * its children. + */ + virtual std::vector<PlanStage*> getChildren() const = 0; + + /** + * What type of stage is this? + */ + virtual StageType stageType() const = 0; + + /** * Returns a tree of stats. See plan_stats.h for the details of this structure. If the * stage has any children it must propagate the request for stats to them. * diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h index dab069ed6c2..55b8eba51e6 100644 --- a/src/mongo/db/exec/plan_stats.h +++ b/src/mongo/db/exec/plan_stats.h @@ -38,6 +38,7 @@ #include "mongo/db/geo/hash.h" #include "mongo/db/query/stage_types.h" #include "mongo/platform/cstdint.h" +#include "mongo/util/time_support.h" namespace mongo { @@ -63,6 +64,7 @@ namespace mongo { invalidates(0), advanced(0), needTime(0), + executionTimeMillis(0), isEOF(false) { } // String giving the type of the stage. Not owned. const char* stageTypeStr; @@ -81,6 +83,9 @@ namespace mongo { // is no filter affixed, then 'filter' should be an empty BSONObj. BSONObj filter; + // Time elapsed while working inside this stage. + long long executionTimeMillis; + // TODO: have some way of tracking WSM sizes (or really any series of #s). We can measure // the size of our inputs and the size of our outputs. We can do a lot with the WS here. @@ -93,6 +98,34 @@ namespace mongo { CommonStats(); }; + /** + * This class increments a counter by the time elapsed since its construction when + * it goes out of scope. + */ + class ScopedTimer { + public: + ScopedTimer(long long* counter) : _counter(counter) { + _start = curTimeMillis64(); + } + + ~ScopedTimer() { + long long elapsed = curTimeMillis64() - _start; + *_counter += elapsed; + } + + private: + // Default constructor disallowed. + ScopedTimer(); + + MONGO_DISALLOW_COPYING(ScopedTimer); + + // Reference to the counter that we are incrementing with the elapsed time. + long long* _counter; + + // Time at which the timer was constructed. + long long _start; + }; + // The universal container for a stage's stats. struct PlanStageStats { PlanStageStats(const CommonStats& c, StageType t) : stageType(t), common(c) { } @@ -210,6 +243,27 @@ namespace mongo { size_t docsTested; }; + struct CountStats : public SpecificStats { + CountStats() : isMultiKey(false), + keysExamined(0) { } + + virtual ~CountStats() { } + + virtual SpecificStats* clone() const { + CountStats* specific = new CountStats(*this); + // BSON objects have to be explicitly copied. + specific->keyPattern = keyPattern.getOwned(); + return specific; + } + + BSONObj keyPattern; + + bool isMultiKey; + + size_t keysExamined; + + }; + struct DistinctScanStats : public SpecificStats { DistinctScanStats() : keysExamined(0) { } @@ -224,7 +278,8 @@ namespace mongo { struct FetchStats : public SpecificStats { FetchStats() : alreadyHasObj(0), forcedFetches(0), - matchTested(0) { } + matchTested(0), + docsExamined(0) { } virtual ~FetchStats() { } @@ -245,6 +300,28 @@ namespace mongo { // We know how many passed (it's the # of advanced) and therefore how many failed. size_t matchTested; + + // The total number of full documents touched by the fetch stage. + size_t docsExamined; + }; + + struct IDHackStats : public SpecificStats { + IDHackStats() : keysExamined(0), + docsExamined(0) { } + + virtual ~IDHackStats() { } + + virtual SpecificStats* clone() const { + IDHackStats* specific = new IDHackStats(*this); + return specific; + } + + // Number of entries retrieved from the index while executing the idhack. + size_t keysExamined; + + // Number of documents retrieved from the collection while executing the idhack. + size_t docsExamined; + }; struct IndexScanStats : public SpecificStats { diff --git a/src/mongo/db/exec/projection.cpp b/src/mongo/db/exec/projection.cpp index 60c0aa221f3..c4c8388462f 100644 --- a/src/mongo/db/exec/projection.cpp +++ b/src/mongo/db/exec/projection.cpp @@ -50,6 +50,8 @@ namespace mongo { _commonStats(kStageType), _projImpl(params.projImpl) { + _projObj = params.projObj; + if (ProjectionStageParams::NO_FAST_PATH == _projImpl) { _exec.reset(new ProjectionExec(params.projObj, params.fullExpression, @@ -59,8 +61,6 @@ namespace mongo { // We shouldn't need the full expression if we're fast-pathing. invariant(NULL == params.fullExpression); - _projObj = params.projObj; - // Sanity-check the input. invariant(_projObj.isOwned()); invariant(!_projObj.isEmpty()); @@ -197,6 +197,9 @@ namespace mongo { PlanStage::StageState ProjectionStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + WorkingSetID id = WorkingSet::INVALID_ID; StageState status = _child->work(&id); @@ -247,11 +250,20 @@ namespace mongo { _child->invalidate(dl, type); } + vector<PlanStage*> ProjectionStage::getChildren() const { + vector<PlanStage*> children; + children.push_back(_child.get()); + return children; + } + PlanStageStats* ProjectionStage::getStats() { _commonStats.isEOF = isEOF(); - _specificStats.projObj = _projObj; auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_PROJECTION)); - ret->specific.reset(new ProjectionStats(_specificStats)); + + ProjectionStats* projStats = new ProjectionStats(_specificStats); + projStats->projObj = _projObj; + ret->specific.reset(projStats); + ret->children.push_back(_child->getStats()); return ret.release(); } diff --git a/src/mongo/db/exec/projection.h b/src/mongo/db/exec/projection.h index 4b66e0dc8af..af586156f77 100644 --- a/src/mongo/db/exec/projection.h +++ b/src/mongo/db/exec/projection.h @@ -87,6 +87,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_PROJECTION; } + PlanStageStats* getStats(); typedef unordered_set<StringData, StringData::Hasher> FieldSet; diff --git a/src/mongo/db/exec/s2near.cpp b/src/mongo/db/exec/s2near.cpp index faaeddaca22..c7d37111840 100644 --- a/src/mongo/db/exec/s2near.cpp +++ b/src/mongo/db/exec/s2near.cpp @@ -119,6 +119,9 @@ namespace mongo { } PlanStage::StageState S2NearStage::work(WorkingSetID* out) { + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (!_initted) { init(); } if (_failed) { @@ -426,6 +429,11 @@ namespace mongo { return _innerRadius >= _maxDistance; } + vector<PlanStage*> S2NearStage::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + PlanStageStats* S2NearStage::getStats() { // TODO: must agg stats across child ixscan/fetches. // TODO: we can do better than this, need own common stats. diff --git a/src/mongo/db/exec/s2near.h b/src/mongo/db/exec/s2near.h index 5286d4b4302..f273b8bf992 100644 --- a/src/mongo/db/exec/s2near.h +++ b/src/mongo/db/exec/s2near.h @@ -74,6 +74,10 @@ namespace mongo { void recoverFromYield(); void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_GEO_NEAR_2DSPHERE; } + PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/shard_filter.cpp b/src/mongo/db/exec/shard_filter.cpp index 784afb0fad3..b425c0a6138 100644 --- a/src/mongo/db/exec/shard_filter.cpp +++ b/src/mongo/db/exec/shard_filter.cpp @@ -47,6 +47,9 @@ namespace mongo { PlanStage::StageState ShardFilterStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + // If we've returned as many results as we're limited to, isEOF will be true. if (isEOF()) { return PlanStage::IS_EOF; } @@ -96,6 +99,12 @@ namespace mongo { _child->invalidate(dl, type); } + vector<PlanStage*> ShardFilterStage::getChildren() const { + vector<PlanStage*> children; + children.push_back(_child.get()); + return children; + } + PlanStageStats* ShardFilterStage::getStats() { _commonStats.isEOF = isEOF(); auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_SHARDING_FILTER)); diff --git a/src/mongo/db/exec/shard_filter.h b/src/mongo/db/exec/shard_filter.h index ce1a4ac64de..f1ce3fc615e 100644 --- a/src/mongo/db/exec/shard_filter.h +++ b/src/mongo/db/exec/shard_filter.h @@ -84,6 +84,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_SHARDING_FILTER; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/skip.cpp b/src/mongo/db/exec/skip.cpp index 90450169d6d..cf0bc55dc88 100644 --- a/src/mongo/db/exec/skip.cpp +++ b/src/mongo/db/exec/skip.cpp @@ -45,6 +45,9 @@ namespace mongo { PlanStage::StageState SkipStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + WorkingSetID id = WorkingSet::INVALID_ID; StageState status = _child->work(&id); @@ -99,6 +102,12 @@ namespace mongo { _child->invalidate(dl, type); } + vector<PlanStage*> SkipStage::getChildren() const { + vector<PlanStage*> children; + children.push_back(_child.get()); + return children; + } + PlanStageStats* SkipStage::getStats() { _commonStats.isEOF = isEOF(); _specificStats.skip = _toSkip; diff --git a/src/mongo/db/exec/skip.h b/src/mongo/db/exec/skip.h index 48366a71616..1dafa9a305c 100644 --- a/src/mongo/db/exec/skip.h +++ b/src/mongo/db/exec/skip.h @@ -52,6 +52,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_SKIP; } + virtual PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/sort.cpp b/src/mongo/db/exec/sort.cpp index b30a198a1c1..7486e415744 100644 --- a/src/mongo/db/exec/sort.cpp +++ b/src/mongo/db/exec/sort.cpp @@ -304,6 +304,9 @@ namespace mongo { PlanStage::StageState SortStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (NULL == _sortKeyGen) { // This is heavy and should be done as part of work(). _sortKeyGen.reset(new SortStageKeyGenerator(_collection, _pattern, _query)); @@ -449,6 +452,12 @@ namespace mongo { } } + vector<PlanStage*> SortStage::getChildren() const { + vector<PlanStage*> children; + children.push_back(_child.get()); + return children; + } + PlanStageStats* SortStage::getStats() { _commonStats.isEOF = isEOF(); _specificStats.memLimit = kMaxBytes; diff --git a/src/mongo/db/exec/sort.h b/src/mongo/db/exec/sort.h index ac4ae63b3f1..3a80397ef05 100644 --- a/src/mongo/db/exec/sort.h +++ b/src/mongo/db/exec/sort.h @@ -153,6 +153,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_SORT; } + PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/exec/subplan.cpp b/src/mongo/db/exec/subplan.cpp new file mode 100644 index 00000000000..67e631abebd --- /dev/null +++ b/src/mongo/db/exec/subplan.cpp @@ -0,0 +1,504 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/exec/subplan.h" + +#include "mongo/client/dbclientinterface.h" +#include "mongo/db/exec/multi_plan.h" +#include "mongo/db/query/canonical_query.h" +#include "mongo/db/query/get_executor.h" +#include "mongo/db/query/plan_executor.h" +#include "mongo/db/query/planner_analysis.h" +#include "mongo/db/query/planner_access.h" +#include "mongo/db/query/qlog.h" +#include "mongo/db/query/query_planner.h" +#include "mongo/db/query/stage_builder.h" + +namespace mongo { + + // static + const char* SubplanStage::kStageType = "SUBPLAN"; + + SubplanStage::SubplanStage(Collection* collection, + WorkingSet* ws, + const QueryPlannerParams& params, + CanonicalQuery* cq) + : _state(SubplanStage::PLANNING), + _collection(collection), + _ws(ws), + _plannerParams(params), + _query(cq), + _killed(false), + _child(NULL), + _commonStats(kStageType) { } + + SubplanStage::~SubplanStage() { + while (!_solutions.empty()) { + vector<QuerySolution*> solns = _solutions.front(); + for (size_t i = 0; i < solns.size(); i++) { + delete solns[i]; + } + _solutions.pop(); + } + + while (!_cqs.empty()) { + delete _cqs.front(); + _cqs.pop(); + } + } + + // static + Status SubplanStage::make(Collection* collection, + WorkingSet* ws, + const QueryPlannerParams& params, + CanonicalQuery* cq, + SubplanStage** out) { + auto_ptr<SubplanStage> autoStage(new SubplanStage(collection, ws, params, cq)); + Status planningStatus = autoStage->planSubqueries(); + if (!planningStatus.isOK()) { + return planningStatus; + } + + *out = autoStage.release(); + return Status::OK(); + } + + // static + bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) { + const LiteParsedQuery& lpq = query.getParsed(); + const MatchExpression* expr = query.root(); + + // Only rooted ORs work with the subplan scheme. + if (MatchExpression::OR != expr->matchType()) { + return false; + } + + // Collection scan + // No sort order requested + if (lpq.getSort().isEmpty() && + expr->matchType() == MatchExpression::AND && expr->numChildren() == 0) { + return false; + } + + // Hint provided + if (!lpq.getHint().isEmpty()) { + return false; + } + + // Min provided + // Min queries are a special case of hinted queries. + if (!lpq.getMin().isEmpty()) { + return false; + } + + // Max provided + // Similar to min, max queries are a special case of hinted queries. + if (!lpq.getMax().isEmpty()) { + return false; + } + + // Tailable cursors won't get cached, just turn into collscans. + if (query.getParsed().hasOption(QueryOption_CursorTailable)) { + return false; + } + + // Snapshot is really a hint. + if (query.getParsed().isSnapshot()) { + return false; + } + + return true; + } + + Status SubplanStage::planSubqueries() { + MatchExpression* theOr = _query->root(); + + for (size_t i = 0; i < _plannerParams.indices.size(); ++i) { + const IndexEntry& ie = _plannerParams.indices[i]; + _indexMap[ie.keyPattern] = i; + QLOG() << "Subplanner: index " << i << " is " << ie.toString() << endl; + } + + const WhereCallbackReal whereCallback(_collection->ns().db()); + + for (size_t i = 0; i < theOr->numChildren(); ++i) { + // Turn the i-th child into its own query. + MatchExpression* orChild = theOr->getChild(i); + CanonicalQuery* orChildCQ; + Status childCQStatus = CanonicalQuery::canonicalize(*_query, + orChild, + &orChildCQ, + whereCallback); + if (!childCQStatus.isOK()) { + mongoutils::str::stream ss; + ss << "Subplanner: Can't canonicalize subchild " << orChild->toString() + << " " << childCQStatus.reason(); + return Status(ErrorCodes::BadValue, ss); + } + + // Make sure it gets cleaned up. + auto_ptr<CanonicalQuery> safeOrChildCQ(orChildCQ); + + // Plan the i-th child. + vector<QuerySolution*> solutions; + + // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from + // considering any plan that's a collscan. + QLOG() << "Subplanner: planning child " << i << " of " << theOr->numChildren(); + Status status = QueryPlanner::plan(*safeOrChildCQ, _plannerParams, &solutions); + + if (!status.isOK()) { + mongoutils::str::stream ss; + ss << "Subplanner: Can't plan for subchild " << orChildCQ->toString() + << " " << status.reason(); + return Status(ErrorCodes::BadValue, ss); + } + QLOG() << "Subplanner: got " << solutions.size() << " solutions"; + + if (0 == solutions.size()) { + // If one child doesn't have an indexed solution, bail out. + mongoutils::str::stream ss; + ss << "Subplanner: No solutions for subchild " << orChildCQ->toString(); + return Status(ErrorCodes::BadValue, ss); + } + + // Hang onto the canonicalized subqueries and the corresponding query solutions + // so that they can be used in subplan running later on. + _cqs.push(safeOrChildCQ.release()); + _solutions.push(solutions); + } + + return Status::OK(); + } + + bool SubplanStage::runSubplans() { + // This is what we annotate with the index selections and then turn into a solution. + auto_ptr<OrMatchExpression> theOr( + static_cast<OrMatchExpression*>(_query->root()->shallowClone())); + + // This is the skeleton of index selections that is inserted into the cache. + auto_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree()); + + for (size_t i = 0; i < theOr->numChildren(); ++i) { + MatchExpression* orChild = theOr->getChild(i); + + auto_ptr<CanonicalQuery> orChildCQ(_cqs.front()); + _cqs.pop(); + + // 'solutions' is owned by the SubplanStage instance until + // it is popped from the queue. + vector<QuerySolution*> solutions = _solutions.front(); + _solutions.pop(); + + // We already checked for zero solutions in planSubqueries(...). + invariant(!solutions.empty()); + + if (1 == solutions.size()) { + // There is only one solution. Transfer ownership to an auto_ptr. + auto_ptr<QuerySolution> autoSoln(solutions[0]); + + // We want a well-formed *indexed* solution. + if (NULL == autoSoln->cacheData.get()) { + // For example, we don't cache things for 2d indices. + QLOG() << "Subplanner: No cache data for subchild " << orChild->toString(); + return false; + } + + if (SolutionCacheData::USE_INDEX_TAGS_SOLN != autoSoln->cacheData->solnType) { + QLOG() << "Subplanner: No indexed cache data for subchild " + << orChild->toString(); + return false; + } + + // Add the index assignments to our original query. + Status tagStatus = QueryPlanner::tagAccordingToCache( + orChild, autoSoln->cacheData->tree.get(), _indexMap); + + if (!tagStatus.isOK()) { + QLOG() << "Subplanner: Failed to extract indices from subchild " + << orChild->toString(); + return false; + } + + // Add the child's cache data to the cache data we're creating for the main query. + cacheData->children.push_back(autoSoln->cacheData->tree->clone()); + } + else { + // N solutions, rank them. Takes ownership of orChildCQ. + + // the working set will be shared by the candidate plans and owned by the runner + WorkingSet* sharedWorkingSet = new WorkingSet(); + + auto_ptr<MultiPlanStage> multiPlanStage(new MultiPlanStage(_collection, + orChildCQ.get())); + + // Dump all the solutions into the MPR. + for (size_t ix = 0; ix < solutions.size(); ++ix) { + PlanStage* nextPlanRoot; + verify(StageBuilder::build(_collection, + *solutions[ix], + sharedWorkingSet, + &nextPlanRoot)); + + // Owns first two arguments + multiPlanStage->addPlan(solutions[ix], nextPlanRoot, sharedWorkingSet); + } + + multiPlanStage->pickBestPlan(); + if (!multiPlanStage->bestPlanChosen()) { + QLOG() << "Subplanner: Failed to pick best plan for subchild " + << orChildCQ->toString(); + return false; + } + + scoped_ptr<PlanExecutor> exec(new PlanExecutor(sharedWorkingSet, + multiPlanStage.release(), + _collection)); + + _child.reset(exec->releaseStages()); + + if (_killed) { + QLOG() << "Subplanner: Killed while picking best plan for subchild " + << orChild->toString(); + return false; + } + + QuerySolution* bestSoln = multiPlanStage->bestSolution(); + + if (SolutionCacheData::USE_INDEX_TAGS_SOLN != bestSoln->cacheData->solnType) { + QLOG() << "Subplanner: No indexed cache data for subchild " + << orChild->toString(); + return false; + } + + // Add the index assignments to our original query. + Status tagStatus = QueryPlanner::tagAccordingToCache( + orChild, bestSoln->cacheData->tree.get(), _indexMap); + + if (!tagStatus.isOK()) { + QLOG() << "Subplanner: Failed to extract indices from subchild " + << orChild->toString(); + return false; + } + + cacheData->children.push_back(bestSoln->cacheData->tree->clone()); + } + } + + // Must do this before using the planner functionality. + sortUsingTags(theOr.get()); + + // Use the cached index assignments to build solnRoot. Takes ownership of 'theOr' + QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess( + *_query, theOr.release(), false, _plannerParams.indices); + + if (NULL == solnRoot) { + QLOG() << "Subplanner: Failed to build indexed data path for subplanned query\n"; + return false; + } + + QLOG() << "Subplanner: fully tagged tree is " << solnRoot->toString(); + + // Takes ownership of 'solnRoot' + QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*_query, + _plannerParams, + solnRoot); + + if (NULL == soln) { + QLOG() << "Subplanner: Failed to analyze subplanned query"; + return false; + } + + // We want our franken-solution to be cached. + SolutionCacheData* scd = new SolutionCacheData(); + scd->tree.reset(cacheData.release()); + soln->cacheData.reset(scd); + + QLOG() << "Subplanner: Composite solution is " << soln->toString() << endl; + + // We use one of these even if there is one plan. We do this so that the entry is cached + // with stats obtained in the same fashion as a competitive ranking would have obtained + // them. + auto_ptr<MultiPlanStage> multiPlanStage(new MultiPlanStage(_collection, _query)); + WorkingSet* ws = new WorkingSet(); + PlanStage* root; + verify(StageBuilder::build(_collection, *soln, ws, &root)); + multiPlanStage->addPlan(soln, root, ws); // Takes ownership first two arguments. + + multiPlanStage->pickBestPlan(); + if (! multiPlanStage->bestPlanChosen()) { + QLOG() << "Subplanner: Failed to pick best plan for subchild " + << _query->toString(); + return false; + } + + scoped_ptr<PlanExecutor> exec(new PlanExecutor(ws, multiPlanStage.release(), _collection)); + + _child.reset(exec->releaseStages()); + + return true; + } + + bool SubplanStage::isEOF() { + if (_killed) { + return true; + } + + // If we're still planning we're not done yet. + if (SubplanStage::PLANNING == _state) { + return false; + } + + // If we're running we best have a runner. + invariant(_child.get()); + return _child->isEOF(); + } + + PlanStage::StageState SubplanStage::work(WorkingSetID* out) { + ++_commonStats.works; + + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + + if (_killed) { + return PlanStage::DEAD; + } + + if (isEOF()) { return PlanStage::IS_EOF; } + + if (SubplanStage::PLANNING == _state) { + // Try to run as sub-plans. + if (runSubplans()) { + // If runSubplans returns true we expect something here. + invariant(_child.get()); + } + else if (!_killed) { + // Couldn't run as subplans so we'll just call normal getExecutor. + PlanExecutor* exec; + Status status = getExecutorAlwaysPlan(_collection, _query, _plannerParams, &exec); + + if (!status.isOK()) { + // We utterly failed. + _killed = true; + + // Propagate the error to the user wrapped in a BSONObj + WorkingSetID id = _ws->allocate(); + WorkingSetMember* member = _ws->get(id); + member->state = WorkingSetMember::OWNED_OBJ; + member->keyData.clear(); + member->loc = DiskLoc(); + + BSONObjBuilder bob; + bob.append("ok", status.isOK() ? 1.0 : 0.0); + bob.append("code", status.code()); + bob.append("errmsg", status.reason()); + member->obj = bob.obj(); + + *out = id; + return PlanStage::FAILURE; + } + else { + scoped_ptr<PlanExecutor> cleanupExec(exec); + _child.reset(exec->releaseStages()); + } + } + + // We can change state when we're either killed or we have an underlying runner. + invariant(_killed || NULL != _child.get()); + _state = SubplanStage::RUNNING; + } + + if (_killed) { + return PlanStage::DEAD; + } + + if (isEOF()) { + return PlanStage::IS_EOF; + } + + // If we're here we should have planned already. + invariant(SubplanStage::RUNNING == _state); + invariant(_child.get()); + return _child->work(out); + } + + void SubplanStage::prepareToYield() { + ++_commonStats.yields; + if (_killed) { + return; + } + + // We're ranking a sub-plan via an MPR or we're streaming results from this stage. Either + // way, pass on the request. + if (NULL != _child.get()) { + _child->prepareToYield(); + } + } + + void SubplanStage::recoverFromYield() { + ++_commonStats.unyields; + if (_killed) { + return; + } + + // We're ranking a sub-plan via an MPR or we're streaming results from this stage. Either + // way, pass on the request. + if (NULL != _child.get()) { + _child->recoverFromYield(); + } + } + + void SubplanStage::invalidate(const DiskLoc& dl, InvalidationType type) { + ++_commonStats.invalidates; + if (_killed) { + return; + } + + if (NULL != _child.get()) { + _child->invalidate(dl, type); + } + } + + vector<PlanStage*> SubplanStage::getChildren() const { + vector<PlanStage*> children; + if (NULL != _child.get()) { + children.push_back(_child.get()); + } + return children; + } + + PlanStageStats* SubplanStage::getStats() { + _commonStats.isEOF = isEOF(); + auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_SUBPLAN)); + ret->children.push_back(_child->getStats()); + return ret.release(); + } + +} // namespace mongo diff --git a/src/mongo/db/exec/subplan.h b/src/mongo/db/exec/subplan.h new file mode 100644 index 00000000000..1fb6c698523 --- /dev/null +++ b/src/mongo/db/exec/subplan.h @@ -0,0 +1,138 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <boost/scoped_ptr.hpp> +#include <string> +#include <queue> + +#include "mongo/base/status.h" +#include "mongo/db/diskloc.h" +#include "mongo/db/exec/plan_stage.h" +#include "mongo/db/query/query_planner_params.h" +#include "mongo/db/query/query_solution.h" + +namespace mongo { + + /** + * The SubplanStage is used for rooted $or queries. It plans each clause of the $or + * individually, and then creates an overall query plan based on the winning plan from + * each clause. + * + * Uses the MultiPlanStage in order to rank plans for the individual clauses. + */ + class SubplanStage : public PlanStage { + public: + /** + * Used to create SubplanStage instances. The caller owns the instance + * returned through 'out'. + * + * 'out' is valid only if an OK status is returned. + */ + static Status make(Collection* collection, + WorkingSet* ws, + const QueryPlannerParams& params, + CanonicalQuery* cq, + SubplanStage** out); + + virtual ~SubplanStage(); + + static bool canUseSubplanning(const CanonicalQuery& query); + + virtual bool isEOF(); + virtual StageState work(WorkingSetID* out); + + virtual void prepareToYield(); + virtual void recoverFromYield(); + virtual void invalidate(const DiskLoc& dl, InvalidationType type); + + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_SUBPLAN; } + + PlanStageStats* getStats(); + + static const char* kStageType; + + /** + * Plan each branch of the $or independently, and store the resulting + * lists of query solutions in '_solutions'. + * + * Called from SubplanStage::make so that construction of the subplan stage + * fails immediately, rather than returning a plan executor and subsequently + * through getNext(...). + */ + Status planSubqueries(); + + private: + SubplanStage(Collection* collection, + WorkingSet* ws, + const QueryPlannerParams& params, + CanonicalQuery* cq); + + bool runSubplans(); + + enum SubplanningState { + PLANNING, + RUNNING, + }; + + SubplanningState _state; + + Collection* _collection; + + // Not owned here. + WorkingSet* _ws; + + QueryPlannerParams _plannerParams; + + // Not owned here. + CanonicalQuery* _query; + + bool _killed; + + boost::scoped_ptr<PlanStage> _child; + + // We do the subquery planning up front, and keep the resulting + // query solutions here. Lists of query solutions are dequeued + // and ownership is transferred to the underlying runners one + // at a time. + std::queue< std::vector<QuerySolution*> > _solutions; + + // Holds the canonicalized subqueries. Ownership is transferred + // to the underlying runners one at a time. + std::queue<CanonicalQuery*> _cqs; + + // We need this to extract cache-friendly index data from the index assignments. + map<BSONObj, size_t> _indexMap; + + CommonStats _commonStats; + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/text.cpp b/src/mongo/db/exec/text.cpp index 170a800eed9..50f7e3f99e4 100644 --- a/src/mongo/db/exec/text.cpp +++ b/src/mongo/db/exec/text.cpp @@ -63,6 +63,9 @@ namespace mongo { PlanStage::StageState TextStage::work(WorkingSetID* out) { ++_commonStats.works; + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + if (isEOF()) { return PlanStage::IS_EOF; } invariant(_internalState != DONE); @@ -134,6 +137,11 @@ namespace mongo { } } + vector<PlanStage*> TextStage::getChildren() const { + vector<PlanStage*> empty; + return empty; + } + PlanStageStats* TextStage::getStats() { _commonStats.isEOF = isEOF(); diff --git a/src/mongo/db/exec/text.h b/src/mongo/db/exec/text.h index 17f28766ae0..851d3cdada7 100644 --- a/src/mongo/db/exec/text.h +++ b/src/mongo/db/exec/text.h @@ -104,6 +104,10 @@ namespace mongo { virtual void recoverFromYield(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); + virtual std::vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const { return STAGE_TEXT; } + PlanStageStats* getStats(); static const char* kStageType; diff --git a/src/mongo/db/instance.cpp b/src/mongo/db/instance.cpp index 3894d07c10d..953a2debe4f 100644 --- a/src/mongo/db/instance.cpp +++ b/src/mongo/db/instance.cpp @@ -66,7 +66,7 @@ #include "mongo/db/matcher/matcher.h" #include "mongo/db/mongod_options.h" #include "mongo/db/namespace_string.h" -#include "mongo/db/ops/count.h" +#include "mongo/db/commands/count.h" #include "mongo/db/ops/delete_executor.h" #include "mongo/db/ops/delete_request.h" #include "mongo/db/ops/insert.h" diff --git a/src/mongo/db/ops/count.cpp b/src/mongo/db/ops/count.cpp deleted file mode 100644 index a602a1c4478..00000000000 --- a/src/mongo/db/ops/count.cpp +++ /dev/null @@ -1,155 +0,0 @@ -// count.cpp - -/** - * Copyright (C) 2013 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/ops/count.h" - -#include "mongo/db/client.h" -#include "mongo/db/clientcursor.h" -#include "mongo/db/catalog/database.h" -#include "mongo/db/catalog/collection.h" -#include "mongo/db/curop.h" -#include "mongo/db/query/get_runner.h" -#include "mongo/db/query/type_explain.h" - -namespace mongo { - - static long long applySkipLimit(long long num, const BSONObj& cmd) { - BSONElement s = cmd["skip"]; - BSONElement l = cmd["limit"]; - - if (s.isNumber()) { - num = num - s.numberLong(); - if (num < 0) { - num = 0; - } - } - - if (l.isNumber()) { - long long limit = l.numberLong(); - if (limit < 0) { - limit = -limit; - } - - // 0 means no limit. - if (limit < num && limit != 0) { - num = limit; - } - } - - return num; - } - - long long runCount(OperationContext* txn, - const string& ns, - const BSONObj &cmd, - string &err, - int &errCode) { - // Lock 'ns'. - Client::Context cx(ns); - Collection* collection = cx.db()->getCollection(txn, ns); - - if (NULL == collection) { - err = "ns missing"; - return -1; - } - - BSONObj query = cmd.getObjectField("query"); - const std::string hint = cmd.getStringField("hint"); - const BSONObj hintObj = hint.empty() ? BSONObj() : BSON("$hint" << hint); - - // count of all objects - if (query.isEmpty()) { - return applySkipLimit(collection->numRecords(), cmd); - } - - Runner* rawRunner; - long long skip = cmd["skip"].numberLong(); - long long limit = cmd["limit"].numberLong(); - - if (limit < 0) { - limit = -limit; - } - - uassertStatusOK(getRunnerCount(collection, query, hintObj, &rawRunner)); - auto_ptr<Runner> runner(rawRunner); - - // Store the plan summary string in CurOp. - Client& client = cc(); - CurOp* currentOp = client.curop(); - if (NULL != currentOp) { - PlanInfo* rawInfo; - Status s = runner->getInfo(NULL, &rawInfo); - if (s.isOK()) { - scoped_ptr<PlanInfo> planInfo(rawInfo); - currentOp->debug().planSummary = planInfo->planSummary.c_str(); - } - } - - try { - const ScopedRunnerRegistration safety(runner.get()); - - long long count = 0; - Runner::RunnerState state; - while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, NULL))) { - if (skip > 0) { - --skip; - } - else { - ++count; - // Fast-path. There's no point in iterating all over the runner if limit - // is set. - if (count >= limit && limit != 0) { - break; - } - } - } - - // Emulate old behavior and return the count even if the runner was killed. This - // happens when the underlying collection is dropped. - return count; - } - catch (const DBException &e) { - err = e.toString(); - errCode = e.getCode(); - } - catch (const std::exception &e) { - err = e.what(); - errCode = 0; - } - - // Historically we have returned zero in many count assertion cases - see SERVER-2291. - log() << "Count with ns: " << ns << " and query: " << query - << " failed with exception: " << err << " code: " << errCode - << endl; - - return -2; - } - -} // namespace mongo diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index e48861a11f5..510a86fe1c6 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -34,6 +34,7 @@ env.Library( "eof_runner.cpp", "explain.cpp", "explain_plan.cpp", + "get_executor.cpp", "get_runner.cpp", "idhack_runner.cpp", "internal_runner.cpp", diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp index 7477c1a0285..6cf96eac112 100644 --- a/src/mongo/db/query/explain.cpp +++ b/src/mongo/db/query/explain.cpp @@ -31,7 +31,7 @@ #include "mongo/db/query/explain.h" #include "mongo/db/exec/multi_plan.h" -#include "mongo/db/query/get_runner.h" +#include "mongo/db/query/get_executor.h" #include "mongo/db/query/plan_executor.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/query/stage_builder.h" @@ -42,6 +42,44 @@ #include "mongo/util/processinfo.h" #include "mongo/util/version.h" +namespace { + + using namespace mongo; + + /** + * Do a depth-first traversal of the tree rooted at 'root', and flatten the tree nodes + * into the list 'flattened'. + */ + void flattenStatsTree(PlanStageStats* root, vector<PlanStageStats*>* flattened) { + flattened->push_back(root); + for (size_t i = 0; i < root->children.size(); ++i) { + flattenStatsTree(root->children[i], flattened); + } + } + + /** + * Get a pointer to the MultiPlanStage inside the stage tree rooted at 'root'. + * Returns NULL if there is no MPS. + */ + MultiPlanStage* getMultiPlanStage(PlanStage* root) { + if (root->stageType() == STAGE_MULTI_PLAN) { + MultiPlanStage* mps = static_cast<MultiPlanStage*>(root); + return mps; + } + + vector<PlanStage*> children = root->getChildren(); + for (size_t i = 0; i < children.size(); i++) { + MultiPlanStage* mps = getMultiPlanStage(children[i]); + if (mps != NULL) { + return mps; + } + } + + return NULL; + } + +} // namespace + namespace mongo { using mongoutils::str::stream; @@ -49,49 +87,98 @@ namespace mongo { MONGO_EXPORT_SERVER_PARAMETER(enableNewExplain, bool, false); // static - void Explain::explainTree(const PlanStageStats& stats, - Explain::Verbosity verbosity, - BSONObjBuilder* bob) { + void Explain::explainStatsTree(const PlanStageStats& stats, + Explain::Verbosity verbosity, + BSONObjBuilder* bob) { invariant(bob); // Stage name. bob->append("stage", stats.common.stageTypeStr); + // Display the BSON representation of the filter, if there is one. if (!stats.common.filter.isEmpty()) { bob->append("filter", stats.common.filter); } + // Some top-level exec stats get pulled out of the root stage. + if (verbosity >= Explain::EXEC_STATS) { + bob->appendNumber("nReturned", stats.common.advanced); + bob->appendNumber("executionTimeMillis", stats.common.executionTimeMillis); + } + // Stage-specific stats if (STAGE_IXSCAN == stats.stageType) { IndexScanStats* spec = static_cast<IndexScanStats*>(stats.specific.get()); + + if (verbosity >= Explain::EXEC_STATS) { + bob->appendNumber("keysExamined", spec->keysExamined); + } + bob->append("keyPattern", spec->keyPattern); bob->appendBool("isMultiKey", spec->isMultiKey); bob->append("indexBounds", spec->indexBounds); } + else if (STAGE_COLLSCAN == stats.stageType) { + CollectionScanStats* spec = static_cast<CollectionScanStats*>(stats.specific.get()); + if (verbosity >= Explain::EXEC_STATS) { + bob->appendNumber("docsExamined", spec->docsTested); + } + } + else if (STAGE_COUNT == stats.stageType) { + CountStats* spec = static_cast<CountStats*>(stats.specific.get()); + + if (verbosity >= Explain::EXEC_STATS) { + bob->appendNumber("keysExamined", spec->keysExamined); + } + + bob->append("keyPattern", spec->keyPattern); + bob->appendBool("isMultiKey", spec->isMultiKey); + } + else if (STAGE_FETCH == stats.stageType) { + FetchStats* spec = static_cast<FetchStats*>(stats.specific.get()); + if (verbosity >= Explain::EXEC_STATS) { + bob->appendNumber("docsExamined", spec->docsExamined); + } + } else if (STAGE_GEO_NEAR_2D == stats.stageType) { TwoDNearStats* spec = static_cast<TwoDNearStats*>(stats.specific.get()); - bob->append("keyPattern", spec->keyPattern); - // TODO these things are execution stats - /*bob->appendNumber("keysExamined", spec->nscanned); - bob->appendNumber("objectsLoaded", spec->objectsLoaded);*/ + if (verbosity >= Explain::EXEC_STATS) { + bob->appendNumber("keysExamined", spec->nscanned); + bob->appendNumber("docsExamined", spec->objectsLoaded); + } + + bob->append("keyPattern", spec->keyPattern); + } + else if (STAGE_IDHACK == stats.stageType) { + IDHackStats* spec = static_cast<IDHackStats*>(stats.specific.get()); + if (verbosity >= Explain::EXEC_STATS) { + bob->appendNumber("keysExamined", spec->keysExamined); + bob->appendNumber("docsExamined", spec->docsExamined); + } } else if (STAGE_TEXT == stats.stageType) { TextStats* spec = static_cast<TextStats*>(stats.specific.get()); + + if (verbosity >= Explain::EXEC_STATS) { + bob->appendNumber("keysExamined", spec->keysExamined); + bob->appendNumber("docsExamined", spec->fetches); + } + bob->append("indexPrefix", spec->indexPrefix); bob->append("parsedTextQuery", spec->parsedTextQuery); - - // TODO these things are execution stats - /*bob->appendNumber("keysExamined", spec->keysExamined); - bob->appendNumber("fetches", spec->fetches);*/ } else if (STAGE_SORT == stats.stageType) { SortStats* spec = static_cast<SortStats*>(stats.specific.get()); bob->append("sortPattern", spec->sortPattern); + + if (verbosity >= Explain::EXEC_STATS) { + bob->appendNumber("memUsage", spec->memUsage); + } + if (spec->limit > 0) { bob->appendNumber("limitAmount", spec->limit); } - bob->appendNumber("memUsage", spec->memUsage); } else if (STAGE_SORT_MERGE == stats.stageType) { MergeSortStats* spec = static_cast<MergeSortStats*>(stats.specific.get()); @@ -120,18 +207,18 @@ namespace mongo { // rather than 'inputStages'. if (1 == stats.children.size()) { BSONObjBuilder childBob; - explainTree(*stats.children[0], verbosity, &childBob); + explainStatsTree(*stats.children[0], verbosity, &childBob); bob->append("inputStage", childBob.obj()); return; } - // There is more than one child. Recursively explainTree(...) on each + // There is more than one child. Recursively explainStatsTree(...) on each // of them and add them to the 'inputStages' array. BSONArrayBuilder childrenBob(bob->subarrayStart("inputStages")); for (size_t i = 0; i < stats.children.size(); ++i) { BSONObjBuilder childBob(childrenBob.subobjStart()); - explainTree(*stats.children[i], verbosity, &childBob); + explainStatsTree(*stats.children[i], verbosity, &childBob); } childrenBob.doneFast(); } @@ -139,7 +226,7 @@ namespace mongo { // static void Explain::generatePlannerInfo(CanonicalQuery* query, PlanStageStats* winnerStats, - vector<PlanStageStats*>& rejectedStats, + const vector<PlanStageStats*>& rejectedStats, BSONObjBuilder* out) { BSONObjBuilder plannerBob(out->subobjStart("queryPlanner"));; @@ -150,14 +237,14 @@ namespace mongo { parsedQueryBob.doneFast(); BSONObjBuilder winningPlanBob(plannerBob.subobjStart("winningPlan")); - explainTree(*winnerStats, Explain::QUERY_PLANNER, &winningPlanBob); + explainStatsTree(*winnerStats, Explain::QUERY_PLANNER, &winningPlanBob); winningPlanBob.doneFast(); // Genenerate array of rejected plans. BSONArrayBuilder allPlansBob(plannerBob.subarrayStart("rejectedPlans")); for (size_t i = 0; i < rejectedStats.size(); i++) { BSONObjBuilder childBob(allPlansBob.subobjStart()); - explainTree(*rejectedStats.at(i), Explain::QUERY_PLANNER, &childBob); + explainStatsTree(*rejectedStats[i], Explain::QUERY_PLANNER, &childBob); } allPlansBob.doneFast(); @@ -165,6 +252,65 @@ namespace mongo { } // static + void Explain::generateExecStats(PlanStageStats* stats, + BSONObjBuilder* out) { + + out->appendNumber("nReturned", stats->common.advanced); + out->appendNumber("executionTimeMillis", stats->common.executionTimeMillis); + + // Flatten the stats tree into a list. + vector<PlanStageStats*> statsNodes; + flattenStatsTree(stats, &statsNodes); + + // Iterate over all stages in the tree and get the total number of keys/docs examined. + // These are just aggregations of information already available in the stats tree. + size_t totalKeysExamined = 0; + size_t totalDocsExamined = 0; + for (size_t i = 0; i < statsNodes.size(); ++i) { + if (STAGE_IXSCAN == statsNodes[i]->stageType) { + IndexScanStats* spec = static_cast<IndexScanStats*>(statsNodes[i]->specific.get()); + totalKeysExamined += spec->keysExamined; + } + else if (STAGE_GEO_NEAR_2D == statsNodes[i]->stageType) { + TwoDNearStats* spec = static_cast<TwoDNearStats*>(statsNodes[i]->specific.get()); + totalKeysExamined += spec->nscanned; + totalDocsExamined += spec->objectsLoaded; + } + else if (STAGE_IDHACK == statsNodes[i]->stageType) { + IDHackStats* spec = static_cast<IDHackStats*>(statsNodes[i]->specific.get()); + totalKeysExamined += spec->keysExamined; + totalDocsExamined += spec->docsExamined; + } + else if (STAGE_TEXT == statsNodes[i]->stageType) { + TextStats* spec = static_cast<TextStats*>(statsNodes[i]->specific.get()); + totalKeysExamined += spec->keysExamined; + totalDocsExamined += spec->fetches; + } + else if (STAGE_FETCH == statsNodes[i]->stageType) { + FetchStats* spec = static_cast<FetchStats*>(statsNodes[i]->specific.get()); + totalDocsExamined += spec->docsExamined; + } + else if (STAGE_COLLSCAN == statsNodes[i]->stageType) { + CollectionScanStats* spec = + static_cast<CollectionScanStats*>(statsNodes[i]->specific.get()); + totalDocsExamined += spec->docsTested; + } + else if (STAGE_COUNT == statsNodes[i]->stageType) { + CountStats* spec = static_cast<CountStats*>(statsNodes[i]->specific.get()); + totalKeysExamined += spec->keysExamined; + } + } + + out->appendNumber("totalKeysExamined", totalKeysExamined); + out->appendNumber("totalDocsExamined", totalDocsExamined); + + // Add the tree of stages, with individual execution stats for each stage. + BSONObjBuilder stagesBob(out->subobjStart("executionStages")); + explainStatsTree(*stats, Explain::EXEC_STATS, &stagesBob); + stagesBob.doneFast(); + } + + // static void Explain::generateServerInfo(BSONObjBuilder* out) { BSONObjBuilder serverBob(out->subobjStart("serverInfo")); out->append("host", getHostNameCached()); @@ -183,174 +329,104 @@ namespace mongo { } // static - Status Explain::explainSinglePlan(Collection* collection, - CanonicalQuery* rawCanonicalQuery, - QuerySolution* solution, - Explain::Verbosity verbosity, - BSONObjBuilder* out) { - // Only one possible plan. Build the stages from the solution. - WorkingSet* ws = new WorkingSet(); - PlanStage* root; - verify(StageBuilder::build(collection, *solution, ws, &root)); - - // Wrap the exec stages in a plan executor. Takes ownership of 'ws' and 'root'. - scoped_ptr<PlanExecutor> exec(new PlanExecutor(ws, root, collection)); - - // If we need execution stats, then we should run the plan. - if (verbosity > Explain::QUERY_PLANNER) { - Runner::RunnerState state; - BSONObj obj; - while (Runner::RUNNER_ADVANCED == (state = exec->getNext(&obj, NULL))); - - if (Runner::RUNNER_ERROR == state) { - return Status(ErrorCodes::BadValue, - "Exec error: " + WorkingSetCommon::toStatusString(obj)); - } - } - - scoped_ptr<PlanStageStats> stats(exec->getStats()); + void Explain::explainCountEmptyQuery(BSONObjBuilder* out) { + BSONObjBuilder plannerBob(out->subobjStart("queryPlanner")); - // Actually generate the explain results. + plannerBob.append("plannerVersion", QueryPlanner::kPlannerVersion); - if (verbosity >= Explain::QUERY_PLANNER) { - vector<PlanStageStats*> rejected; - generatePlannerInfo(rawCanonicalQuery, stats.get(), rejected, out); - generateServerInfo(out); - } + plannerBob.append("winningPlan", "TRIVIAL_COUNT"); - if (verbosity >= Explain::EXEC_STATS) { - // TODO: generate executionStats section - } + // Empty array of rejected plans. + BSONArrayBuilder allPlansBob(plannerBob.subarrayStart("rejectedPlans")); + allPlansBob.doneFast(); - if (verbosity >= Explain::EXEC_ALL_PLANS) { - // TODO: generate rejected plans execution stats - } + plannerBob.doneFast(); - return Status::OK(); + generateServerInfo(out); } // static - Status Explain::explainMultiPlan(Collection* collection, - CanonicalQuery* rawCanonicalQuery, - vector<QuerySolution*>& solutions, - Explain::Verbosity verbosity, - BSONObjBuilder* out) { - scoped_ptr<WorkingSet> sharedWorkingSet(new WorkingSet()); - - scoped_ptr<MultiPlanStage> multiPlanStage( - new MultiPlanStage(collection, rawCanonicalQuery)); - - for (size_t ix = 0; ix < solutions.size(); ++ix) { - // version of StageBuild::build when WorkingSet is shared - PlanStage* nextPlanRoot; - verify(StageBuilder::build(collection, *solutions[ix], - sharedWorkingSet.get(), &nextPlanRoot)); - - // Takes ownership of the solution and the root PlanStage, but not the working set. - multiPlanStage->addPlan(solutions[ix], nextPlanRoot, sharedWorkingSet.get()); + Status Explain::explainStages(PlanExecutor* exec, + CanonicalQuery* canonicalQuery, + Explain::Verbosity verbosity, + BSONObjBuilder* out) { + // + // Step 1: run the stages as required by the verbosity level. + // + + // Inspect the tree to see if there is a MultiPlanStage. + MultiPlanStage* mps = getMultiPlanStage(exec->getStages()); + + // The queryPlanner verbosity level requires that we know the winning plan, + // if there are multiple. There are multiple candidates iff we have a MultiPlanStage. + if (verbosity >= Explain::QUERY_PLANNER && NULL != mps) { + mps->pickBestPlan(); } - // Run the plan / do the plan selection as required by the requested verbosity. - multiPlanStage->pickBestPlan(); - if (Explain::EXEC_STATS == verbosity) { - Status execStatus = multiPlanStage->executeWinningPlan(); - if (!execStatus.isOK()) { - return execStatus; + // The executionStats verbosity level requires that we run the winning plan + // until if finishes. + if (verbosity >= Explain::EXEC_STATS) { + Status s = exec->executePlan(); + if (!s.isOK()) { + return s; } } - else if (Explain::EXEC_ALL_PLANS == verbosity) { - Status execStatus = multiPlanStage->executeAllPlans(); - if (!execStatus.isOK()) { - return execStatus; + + // The allPlansExecution verbosity level requires that we run all plans to completion, + // if there are multiple candidates. If 'mps' is NULL, then there was only one candidate, + // and we don't have to worry about gathering stats for rejected plans. + if (verbosity == Explain::EXEC_ALL_PLANS && NULL != mps) { + Status s = mps->executeAllPlans(); + if (!s.isOK()) { + return s; } } + // + // Step 2: collect plan stats (which also give the structure of the plan tree). + // + // Get stats for the winning plan. - scoped_ptr<PlanStageStats> stats(multiPlanStage->getStats()); + scoped_ptr<PlanStageStats> winningStats(exec->getStats()); - // Actually generate the explain results. + // Get stats for the rejected plans, if there were rehected plans. + vector<PlanStageStats*> rejectedStats; + if (NULL != mps) { + rejectedStats = mps->generateCandidateStats(); + } + + // + // Step 3: use the stats trees to produce explain BSON. + // if (verbosity >= Explain::QUERY_PLANNER) { - vector<PlanStageStats*>* rejected = multiPlanStage->generateCandidateStats(); - generatePlannerInfo(rawCanonicalQuery, stats.get(), *rejected, out); - generateServerInfo(out); + generatePlannerInfo(canonicalQuery, winningStats.get(), rejectedStats, out); } if (verbosity >= Explain::EXEC_STATS) { - // TODO: generate executionStats section - } + BSONObjBuilder execBob(out->subobjStart("executionStats")); + + // Generate exec stats BSON for the winning plan. + generateExecStats(winningStats.get(), &execBob); + + // Also generate exec stats for each rejected plan, if the verbosity level + // is high enough. + if (verbosity >= Explain::EXEC_ALL_PLANS) { + BSONArrayBuilder rejectedBob(execBob.subarrayStart("rejectedPlansExecution")); + for (size_t i = 0; i < rejectedStats.size(); ++i) { + BSONObjBuilder planBob(rejectedBob.subobjStart()); + generateExecStats(rejectedStats[i], &planBob); + planBob.doneFast(); + } + rejectedBob.doneFast(); + } - if (verbosity >= Explain::EXEC_ALL_PLANS) { - // TODO: generate rejected plans execution stats + execBob.doneFast(); } - return Status::OK(); - } - - // static - void Explain::explainEmptyColl(CanonicalQuery* rawCanonicalQuery, - BSONObjBuilder* out) { - BSONObjBuilder plannerBob(out->subobjStart("queryPlanner")); - - plannerBob.append("plannerVersion", QueryPlanner::kPlannerVersion); - - BSONObjBuilder parsedQueryBob(plannerBob.subobjStart("parsedQuery")); - rawCanonicalQuery->root()->toBSON(&parsedQueryBob); - parsedQueryBob.doneFast(); - - plannerBob.appendBool("emptyCollection", true); - - plannerBob.append("winningPlan", "EOF"); - - // Empty array of rejected plans. - BSONArrayBuilder allPlansBob(plannerBob.subarrayStart("rejectedPlans")); - allPlansBob.doneFast(); - - plannerBob.doneFast(); - generateServerInfo(out); - } - - // static - Status Explain::explain(Collection* collection, - CanonicalQuery* rawCanonicalQuery, - size_t plannerOptions, - Explain::Verbosity verbosity, - BSONObjBuilder* out) { - invariant(rawCanonicalQuery); - auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); - - if (NULL == collection) { - explainEmptyColl(rawCanonicalQuery, out); - return Status::OK(); - } - - QueryPlannerParams plannerParams; - plannerParams.options = plannerOptions; - fillOutPlannerParams(collection, rawCanonicalQuery, &plannerParams); - vector<QuerySolution*> solutions; - Status status = QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions); - if (!status.isOK()) { - return Status(ErrorCodes::BadValue, - "error processing explain: " + canonicalQuery->toString() + - " planner returned error: " + status.reason()); - } - - // We cannot figure out how to answer the query. Perhaps it requires an index - // we do not have? - if (0 == solutions.size()) { - stream ss; - ss << "error processing explain: " << canonicalQuery->toString() - << " No query solutions"; - return Status(ErrorCodes::BadValue, ss); - } - else if (1 == solutions.size()) { - return explainSinglePlan(collection, rawCanonicalQuery, solutions[0], verbosity, out); - } - else { - return explainMultiPlan(collection, rawCanonicalQuery, solutions, verbosity, out); - } + return Status::OK(); } } // namespace mongo diff --git a/src/mongo/db/query/explain.h b/src/mongo/db/query/explain.h index 675109bc294..32d144466f5 100644 --- a/src/mongo/db/query/explain.h +++ b/src/mongo/db/query/explain.h @@ -28,8 +28,10 @@ #pragma once +#include "mongo/db/exec/plan_stage.h" #include "mongo/db/exec/plan_stats.h" #include "mongo/db/query/canonical_query.h" +#include "mongo/db/query/plan_executor.h" #include "mongo/db/query/query_planner_params.h" #include "mongo/db/query/query_solution.h" @@ -76,10 +78,17 @@ namespace mongo { */ static void generatePlannerInfo(CanonicalQuery* query, PlanStageStats* winnerStats, - vector<PlanStageStats*>& rejectedStats, + const vector<PlanStageStats*>& rejectedStats, BSONObjBuilder* out); /** + * Generates the execution stats section for the stats tree 'stats', + * adding the resulting BSON to 'out'. + */ + static void generateExecStats(PlanStageStats* stats, + BSONObjBuilder* out); + + /** * Adds the 'serverInfo' explain section to the BSON object being build * by 'out'. */ @@ -92,64 +101,37 @@ namespace mongo { * Explain info is added to 'bob' according to the verbosity level passed in * 'verbosity'. */ - static void explainTree(const PlanStageStats& stats, - Explain::Verbosity verbosity, - BSONObjBuilder* bob); + static void explainStatsTree(const PlanStageStats& stats, + Explain::Verbosity verbosity, + BSONObjBuilder* bob); /** - * Add explain info to 'out' at verbosity 'verbosity' in the case that there is - * only one query solution available. + * Generate explain info for the execution plan 'exec', adding the results + * to the BSONObj being built by 'out'. * - * The query 'rawCanonicalQuery' has one viable query solution 'solution' in the - * collection 'collection'. + * The query part of the operation is contained in 'canonicalQuery', but + * 'exec' can contain any tree of execution stages. We can explain any + * operation that executes as stages by calling into this function. * - * May use a PlanExecutor to run the solution in order to produce exec stats. + * The explain information is generated according with a level of detail + * specified by 'verbosity'. */ - static Status explainSinglePlan(Collection* collection, - CanonicalQuery* rawCanonicalQuery, - QuerySolution* solution, - Explain::Verbosity verbosity, - BSONObjBuilder* out); + static Status explainStages(PlanExecutor* exec, + CanonicalQuery* canonicalQuery, + Explain::Verbosity verbosity, + BSONObjBuilder* out); - /** - * Add explain info to 'out' at verbosity 'verbosity' in the case that there are - * multiple query solutions available. - * - * The query 'rawCanonicalQuery' has the corresponding query solutions in 'solutions'. - * - * Uses a MultiPlan stage to choose the best plan, and to run the winning plan or the - * rejected plans as required by the verbosity level. - */ - static Status explainMultiPlan(Collection* collection, - CanonicalQuery* rawCanonicalQuery, - vector<QuerySolution*>& solutions, - Explain::Verbosity verbosity, - BSONObjBuilder* out); + // + // Helpers for special-case explains. + // /** - * The format of the explain output is special if the collection is empty. - * - * Assuming that the collection is empty, adds the explain info for query - * 'rawCanonicalQuery' to 'out'. + * If you have an empty query with a count, then there are no execution stages. + * We just get the number of records and then apply skip/limit. Since there + * are no stages, this requires a special explain format. */ - static void explainEmptyColl(CanonicalQuery* rawCanonicalQuery, - BSONObjBuilder* out); + static void explainCountEmptyQuery(BSONObjBuilder* out); - /** - * Top-level explain entry point for a query. Plans 'rawCanonicalQuery' in collection - * 'collection' using the planner parameters in 'plannerOptions'. - * - * The resulting explain BSON is added to 'out'. The level of detail in the output is - * controlled by 'verbosity'. - * - * If necessary, run the query in order to generate execution stats (but throw out - * the results of the query). - */ - static Status explain(Collection* collection, - CanonicalQuery* rawCanonicalQuery, - size_t plannerOptions, - Explain::Verbosity verbosity, - BSONObjBuilder* out); }; } // namespace diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp new file mode 100644 index 00000000000..03ee948817f --- /dev/null +++ b/src/mongo/db/query/get_executor.cpp @@ -0,0 +1,811 @@ +/** + * Copyright (C) 2013-2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/get_executor.h" + +#include <limits> + +#include "mongo/base/parse_number.h" +#include "mongo/client/dbclientinterface.h" +#include "mongo/db/exec/cached_plan.h" +#include "mongo/db/exec/eof.h" +#include "mongo/db/exec/idhack.h" +#include "mongo/db/exec/multi_plan.h" +#include "mongo/db/exec/projection.h" +#include "mongo/db/exec/shard_filter.h" +#include "mongo/db/exec/subplan.h" +#include "mongo/db/query/canonical_query.h" +#include "mongo/db/query/explain_plan.h" +#include "mongo/db/query/query_settings.h" +#include "mongo/db/query/index_bounds_builder.h" +#include "mongo/db/query/internal_plans.h" +#include "mongo/db/query/plan_cache.h" +#include "mongo/db/query/plan_executor.h" +#include "mongo/db/query/planner_analysis.h" +#include "mongo/db/query/planner_access.h" +#include "mongo/db/query/qlog.h" +#include "mongo/db/query/query_knobs.h" +#include "mongo/db/query/query_planner.h" +#include "mongo/db/query/query_planner_common.h" +#include "mongo/db/query/stage_builder.h" +#include "mongo/db/index_names.h" +#include "mongo/db/server_options.h" +#include "mongo/db/server_parameters.h" +#include "mongo/s/d_logic.h" + +namespace mongo { + + // static + void filterAllowedIndexEntries(const AllowedIndices& allowedIndices, + std::vector<IndexEntry>* indexEntries) { + invariant(indexEntries); + + // Filter index entries + // Check BSON objects in AllowedIndices::_indexKeyPatterns against IndexEntry::keyPattern. + // Removes IndexEntrys that do not match _indexKeyPatterns. + std::vector<IndexEntry> temp; + for (std::vector<IndexEntry>::const_iterator i = indexEntries->begin(); + i != indexEntries->end(); ++i) { + const IndexEntry& indexEntry = *i; + for (std::vector<BSONObj>::const_iterator j = allowedIndices.indexKeyPatterns.begin(); + j != allowedIndices.indexKeyPatterns.end(); ++j) { + const BSONObj& index = *j; + // Copy index entry to temp vector if found in query settings. + if (0 == indexEntry.keyPattern.woCompare(index)) { + temp.push_back(indexEntry); + break; + } + } + } + + // Update results. + temp.swap(*indexEntries); + } + + namespace { + // The body is below in the "count hack" section but getRunner calls it. + bool turnIxscanIntoCount(QuerySolution* soln); + } // namespace + + + void fillOutPlannerParams(Collection* collection, + CanonicalQuery* canonicalQuery, + QueryPlannerParams* plannerParams) { + // If it's not NULL, we may have indices. Access the catalog and fill out IndexEntry(s) + IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); + while (ii.more()) { + const IndexDescriptor* desc = ii.next(); + plannerParams->indices.push_back(IndexEntry(desc->keyPattern(), + desc->getAccessMethodName(), + desc->isMultikey(), + desc->isSparse(), + desc->indexName(), + desc->infoObj())); + } + + // If query supports index filters, filter params.indices by indices in query settings. + QuerySettings* querySettings = collection->infoCache()->getQuerySettings(); + AllowedIndices* allowedIndicesRaw; + + // Filter index catalog if index filters are specified for query. + // Also, signal to planner that application hint should be ignored. + if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) { + boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw); + filterAllowedIndexEntries(*allowedIndices, &plannerParams->indices); + plannerParams->indexFiltersApplied = true; + } + + // We will not output collection scans unless there are no indexed solutions. NO_TABLE_SCAN + // overrides this behavior by not outputting a collscan even if there are no indexed + // solutions. + if (storageGlobalParams.noTableScan) { + const string& ns = canonicalQuery->ns(); + // There are certain cases where we ignore this restriction: + bool ignore = canonicalQuery->getQueryObj().isEmpty() + || (string::npos != ns.find(".system.")) + || (0 == ns.find("local.")); + if (!ignore) { + plannerParams->options |= QueryPlannerParams::NO_TABLE_SCAN; + } + } + + // If the caller wants a shard filter, make sure we're actually sharded. + if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { + CollectionMetadataPtr collMetadata = + shardingState.getCollectionMetadata(canonicalQuery->ns()); + + if (collMetadata) { + plannerParams->shardKey = collMetadata->getKeyPattern(); + } + else { + // If there's no metadata don't bother w/the shard filter since we won't know what + // the key pattern is anyway... + plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER; + } + } + + if (internalQueryPlannerEnableIndexIntersection) { + plannerParams->options |= QueryPlannerParams::INDEX_INTERSECTION; + } + + plannerParams->options |= QueryPlannerParams::KEEP_MUTATIONS; + plannerParams->options |= QueryPlannerParams::SPLIT_LIMITED_SORT; + } + + Status getExecutorIDHack(Collection* collection, + CanonicalQuery* query, + const QueryPlannerParams& plannerParams, + PlanExecutor** out) { + invariant(collection); + + LOG(2) << "Using idhack: " << query->toStringShort(); + WorkingSet* ws = new WorkingSet(); + PlanStage* root = new IDHackStage(collection, query, ws); + + // Might have to filter out orphaned docs. + if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { + root = new ShardFilterStage(shardingState.getCollectionMetadata(collection->ns()), + ws, root); + } + + // There might be a projection. The idhack stage will always fetch the full document, + // so we don't support covered projections. However, we might use the simple inclusion + // fast path. + if (NULL != query && NULL != query->getProj()) { + ProjectionStageParams params(WhereCallbackReal(collection->ns().db())); + params.projObj = query->getProj()->getProjObj(); + + // Stuff the right data into the params depending on what proj impl we use. + if (query->getProj()->requiresDocument() || query->getProj()->wantIndexKey()) { + params.fullExpression = query->root(); + params.projImpl = ProjectionStageParams::NO_FAST_PATH; + } + else { + params.projImpl = ProjectionStageParams::SIMPLE_DOC; + } + + root = new ProjectionStage(params, ws, root); + } + + *out = new PlanExecutor(ws, root, collection); + return Status::OK(); + } + + Status getExecutor(Collection* collection, + CanonicalQuery* canonicalQuery, + PlanExecutor** out, + size_t plannerOptions) { + invariant(canonicalQuery); + + // This can happen as we're called by internal clients as well. + if (NULL == collection) { + const string& ns = canonicalQuery->ns(); + LOG(2) << "Collection " << ns << " does not exist." + << " Using EOF runner: " << canonicalQuery->toStringShort(); + EOFStage* eofStage = new EOFStage(); + WorkingSet* ws = new WorkingSet(); + *out = new PlanExecutor(ws, eofStage, collection); + return Status::OK(); + } + + // Fill out the planning params. We use these for both cached solutions and non-cached. + QueryPlannerParams plannerParams; + plannerParams.options = plannerOptions; + fillOutPlannerParams(collection, canonicalQuery, &plannerParams); + + // If we have an _id index we can use the idhack runner. + if (IDHackStage::supportsQuery(*canonicalQuery) && + collection->getIndexCatalog()->findIdIndex()) { + return getExecutorIDHack(collection, canonicalQuery, plannerParams, out); + } + + // Tailable: If the query requests tailable the collection must be capped. + if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) { + if (!collection->isCapped()) { + return Status(ErrorCodes::BadValue, + "error processing query: " + canonicalQuery->toString() + + " tailable cursor requested on non capped collection"); + } + + // If a sort is specified it must be equal to expectedSort. + const BSONObj expectedSort = BSON("$natural" << 1); + const BSONObj& actualSort = canonicalQuery->getParsed().getSort(); + if (!actualSort.isEmpty() && !(actualSort == expectedSort)) { + return Status(ErrorCodes::BadValue, + "error processing query: " + canonicalQuery->toString() + + " invalid sort specified for tailable cursor: " + + actualSort.toString()); + } + } + + // Try to look up a cached solution for the query. + + CachedSolution* rawCS; + if (PlanCache::shouldCacheQuery(*canonicalQuery) && + collection->infoCache()->getPlanCache()->get(*canonicalQuery, &rawCS).isOK()) { + // We have a CachedSolution. Have the planner turn it into a QuerySolution. + boost::scoped_ptr<CachedSolution> cs(rawCS); + QuerySolution *qs, *backupQs; + QuerySolution*& chosenSolution=qs; // either qs or backupQs + Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs, + &qs, &backupQs); + + if (status.isOK()) { + // the working set will be shared by the root and backupRoot plans + // and owned by the containing single-solution-runner + // + WorkingSet* sharedWs = new WorkingSet(); + + PlanStage *root, *backupRoot=NULL; + verify(StageBuilder::build(collection, *qs, sharedWs, &root)); + if ((plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) + && turnIxscanIntoCount(qs)) { + LOG(2) << "Using fast count: " << canonicalQuery->toStringShort() + << ", planSummary: " << getPlanSummary(*qs); + + if (NULL != backupQs) { + delete backupQs; + } + } + else if (NULL != backupQs) { + verify(StageBuilder::build(collection, *backupQs, sharedWs, &backupRoot)); + } + + // add a CachedPlanStage on top of the previous root + root = new CachedPlanStage(collection, canonicalQuery, root, backupRoot); + + *out = new PlanExecutor(sharedWs, root, chosenSolution, collection); + return Status::OK(); + } + } + + if (internalQueryPlanOrChildrenIndependently + && SubplanStage::canUseSubplanning(*canonicalQuery)) { + + QLOG() << "Running query as sub-queries: " << canonicalQuery->toStringShort(); + LOG(2) << "Running query as sub-queries: " << canonicalQuery->toStringShort(); + + auto_ptr<WorkingSet> ws(new WorkingSet()); + + SubplanStage* subplan; + Status runnerStatus = SubplanStage::make(collection, ws.get(), plannerParams, + canonicalQuery, &subplan); + if (!runnerStatus.isOK()) { + return runnerStatus; + } + + *out = new PlanExecutor(ws.release(), subplan, collection); + return Status::OK(); + } + + return getExecutorAlwaysPlan(collection, canonicalQuery, plannerParams, out); + } + + Status getExecutorAlwaysPlan(Collection* collection, + CanonicalQuery* canonicalQuery, + const QueryPlannerParams& plannerParams, + PlanExecutor** execOut) { + invariant(collection); + invariant(canonicalQuery); + + *execOut = NULL; + + vector<QuerySolution*> solutions; + Status status = QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions); + if (!status.isOK()) { + return Status(ErrorCodes::BadValue, + "error processing query: " + canonicalQuery->toString() + + " planner returned error: " + status.reason()); + } + + // We cannot figure out how to answer the query. Perhaps it requires an index + // we do not have? + if (0 == solutions.size()) { + return Status(ErrorCodes::BadValue, + str::stream() + << "error processing query: " + << canonicalQuery->toString() + << " No query solutions"); + } + + // See if one of our solutions is a fast count hack in disguise. + if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) { + for (size_t i = 0; i < solutions.size(); ++i) { + if (turnIxscanIntoCount(solutions[i])) { + // Great, we can use solutions[i]. Clean up the other QuerySolution(s). + for (size_t j = 0; j < solutions.size(); ++j) { + if (j != i) { + delete solutions[j]; + } + } + + LOG(2) << "Using fast count: " << canonicalQuery->toStringShort() + << ", planSummary: " << getPlanSummary(*solutions[i]); + + // We're not going to cache anything that's fast count. + WorkingSet* ws = new WorkingSet(); + PlanStage* root; + verify(StageBuilder::build(collection, *solutions[i], ws, &root)); + + *execOut = new PlanExecutor(ws, root, solutions[i], collection); + return Status::OK(); + } + } + } + + if (1 == solutions.size()) { + LOG(2) << "Only one plan is available; it will be run but will not be cached. " + << canonicalQuery->toStringShort() + << ", planSummary: " << getPlanSummary(*solutions[0]); + + // Only one possible plan. Run it. Build the stages from the solution. + WorkingSet* ws = new WorkingSet(); + PlanStage* root; + verify(StageBuilder::build(collection, *solutions[0], ws, &root)); + + *execOut = new PlanExecutor(ws, root, solutions[0], collection); + return Status::OK(); + } + else { + // Many solutions. Create a MultiPlanStage to pick the best, update the cache, and so on. + + // The working set will be shared by all candidate plans and owned by the containing runner + WorkingSet* sharedWorkingSet = new WorkingSet(); + + MultiPlanStage* multiPlanStage = new MultiPlanStage(collection, canonicalQuery); + + for (size_t ix = 0; ix < solutions.size(); ++ix) { + if (solutions[ix]->cacheData.get()) { + solutions[ix]->cacheData->indexFilterApplied = plannerParams.indexFiltersApplied; + } + + // version of StageBuild::build when WorkingSet is shared + PlanStage* nextPlanRoot; + verify(StageBuilder::build(collection, *solutions[ix], + sharedWorkingSet, &nextPlanRoot)); + + // Owns none of the arguments + multiPlanStage->addPlan(solutions[ix], nextPlanRoot, sharedWorkingSet); + } + + PlanExecutor* exec = new PlanExecutor(sharedWorkingSet, multiPlanStage, collection); + + *execOut = exec; + return Status::OK(); + } + } + + // + // Count hack + // + + namespace { + + /** + * Returns 'true' if the provided solution 'soln' can be rewritten to use + * a fast counting stage. Mutates the tree in 'soln->root'. + * + * Otherwise, returns 'false'. + */ + bool turnIxscanIntoCount(QuerySolution* soln) { + QuerySolutionNode* root = soln->root.get(); + + // Root should be a fetch w/o any filters. + if (STAGE_FETCH != root->getType()) { + return false; + } + + if (NULL != root->filter.get()) { + return false; + } + + // Child should be an ixscan. + if (STAGE_IXSCAN != root->children[0]->getType()) { + return false; + } + + IndexScanNode* isn = static_cast<IndexScanNode*>(root->children[0]); + + // No filters allowed and side-stepping isSimpleRange for now. TODO: do we ever see + // isSimpleRange here? because we could well use it. I just don't think we ever do see + // it. + + if (NULL != isn->filter.get() || isn->bounds.isSimpleRange) { + return false; + } + + // Make sure the bounds are OK. + BSONObj startKey; + bool startKeyInclusive; + BSONObj endKey; + bool endKeyInclusive; + + if (!IndexBoundsBuilder::isSingleInterval( isn->bounds, + &startKey, + &startKeyInclusive, + &endKey, + &endKeyInclusive )) { + return false; + } + + // Make the count node that we replace the fetch + ixscan with. + CountNode* cn = new CountNode(); + cn->indexKeyPattern = isn->indexKeyPattern; + cn->startKey = startKey; + cn->startKeyInclusive = startKeyInclusive; + cn->endKey = endKey; + cn->endKeyInclusive = endKeyInclusive; + // Takes ownership of 'cn' and deletes the old root. + soln->root.reset(cn); + return true; + } + + /** + * Returns true if indices contains an index that can be + * used with DistinctNode. Sets indexOut to the array index + * of PlannerParams::indices. + * Look for the index for the fewest fields. + * Criteria for suitable index is that the index cannot be special + * (geo, hashed, text, ...). + * + * Multikey indices are not suitable for DistinctNode when the projection + * is on an array element. Arrays are flattened in a multikey index which + * makes it impossible for the distinct scan stage (plan stage generated from + * DistinctNode) to select the requested element by array index. + * + * Multikey indices cannot be used for the fast distinct hack if the field is dotted. + * Currently the solution generated for the distinct hack includes a projection stage and + * the projection stage cannot be covered with a dotted field. + */ + bool getDistinctNodeIndex(const std::vector<IndexEntry>& indices, + const std::string& field, size_t* indexOut) { + invariant(indexOut); + bool isDottedField = str::contains(field, '.'); + int minFields = std::numeric_limits<int>::max(); + for (size_t i = 0; i < indices.size(); ++i) { + // Skip special indices. + if (!IndexNames::findPluginName(indices[i].keyPattern).empty()) { + continue; + } + // Skip multikey indices if we are projecting on a dotted field. + if (indices[i].multikey && isDottedField) { + continue; + } + int nFields = indices[i].keyPattern.nFields(); + // Pick the index with the lowest number of fields. + if (nFields < minFields) { + minFields = nFields; + *indexOut = i; + } + } + return minFields != std::numeric_limits<int>::max(); + } + + /** + * Checks dotted field for a projection and truncates the + * field name if we could be projecting on an array element. + * Sets 'isIDOut' to true if the projection is on a sub document of _id. + * For example, _id.a.2, _id.b.c. + */ + std::string getProjectedDottedField(const std::string& field, bool* isIDOut) { + // Check if field contains an array index. + std::vector<std::string> res; + mongo::splitStringDelim(field, &res, '.'); + + // Since we could exit early from the loop, + // we should check _id here and set '*isIDOut' accordingly. + *isIDOut = ("_id" == res[0]); + + // Skip the first dotted component. If the field starts + // with a number, the number cannot be an array index. + int arrayIndex = 0; + for (size_t i = 1; i < res.size(); ++i) { + if (mongo::parseNumberFromStringWithBase(res[i], 10, &arrayIndex).isOK()) { + // Array indices cannot be negative numbers (this is not $slice). + // Negative numbers are allowed as field names. + if (arrayIndex >= 0) { + // Generate prefix of field up to (but not including) array index. + std::vector<std::string> prefixStrings(res); + prefixStrings.resize(i); + // Reset projectedField. Instead of overwriting, joinStringDelim() appends joined string + // to the end of projectedField. + std::string projectedField; + mongo::joinStringDelim(prefixStrings, &projectedField, '.'); + return projectedField; + } + } + } + + return field; + } + + /** + * Creates a projection spec for a distinct command from the requested field. + * In most cases, the projection spec will be {_id: 0, key: 1}. + * The exceptions are: + * 1) When the requested field is '_id', the projection spec will {_id: 1}. + * 2) When the requested field could be an array element (eg. a.0), + * the projected field will be the prefix of the field up to the array element. + * For example, a.b.2 => {_id: 0, 'a.b': 1} + * Note that we can't use a $slice projection because the distinct command filters + * the results from the runner using the dotted field name. Using $slice will + * re-order the documents in the array in the results. + */ + BSONObj getDistinctProjection(const std::string& field) { + std::string projectedField(field); + + bool isID = false; + if ("_id" == field) { + isID = true; + } + else if (str::contains(field, '.')) { + projectedField = getProjectedDottedField(field, &isID); + } + BSONObjBuilder bob; + if (!isID) { + bob.append("_id", 0); + } + bob.append(projectedField, 1); + return bob.obj(); + } + + } // namespace + + Status getExecutorCount(Collection* collection, + const BSONObj& query, + const BSONObj& hintObj, + PlanExecutor** execOut) { + invariant(collection); + + const WhereCallbackReal whereCallback(collection->ns().db()); + + CanonicalQuery* cq; + uassertStatusOK(CanonicalQuery::canonicalize(collection->ns().ns(), + query, + BSONObj(), + BSONObj(), + 0, + 0, + hintObj, + &cq, + whereCallback)); + + scoped_ptr<CanonicalQuery> cleanupCq(cq); + + return getExecutor(collection, cq, execOut, QueryPlannerParams::PRIVATE_IS_COUNT); + } + + // + // Distinct hack + // + + bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, const string& field) { + QuerySolutionNode* root = soln->root.get(); + + // We're looking for a project on top of an ixscan. + if (STAGE_PROJECTION == root->getType() && (STAGE_IXSCAN == root->children[0]->getType())) { + IndexScanNode* isn = static_cast<IndexScanNode*>(root->children[0]); + + // An additional filter must be applied to the data in the key, so we can't just skip + // all the keys with a given value; we must examine every one to find the one that (may) + // pass the filter. + if (NULL != isn->filter.get()) { + return false; + } + + // We only set this when we have special query modifiers (.max() or .min()) or other + // special cases. Don't want to handle the interactions between those and distinct. + // Don't think this will ever really be true but if it somehow is, just ignore this + // soln. + if (isn->bounds.isSimpleRange) { + return false; + } + + // Make a new DistinctNode. We swap this for the ixscan in the provided solution. + DistinctNode* dn = new DistinctNode(); + dn->indexKeyPattern = isn->indexKeyPattern; + dn->direction = isn->direction; + dn->bounds = isn->bounds; + + // Figure out which field we're skipping to the next value of. TODO: We currently only + // try to distinct-hack when there is an index prefixed by the field we're distinct-ing + // over. Consider removing this code if we stick with that policy. + dn->fieldNo = 0; + BSONObjIterator it(isn->indexKeyPattern); + while (it.more()) { + if (field == it.next().fieldName()) { + break; + } + dn->fieldNo++; + } + + // Delete the old index scan, set the child of project to the fast distinct scan. + delete root->children[0]; + root->children[0] = dn; + return true; + } + + return false; + } + + Status getExecutorDistinct(Collection* collection, + const BSONObj& query, + const std::string& field, + PlanExecutor** out) { + // This should'a been checked by the distinct command. + invariant(collection); + + // TODO: check for idhack here? + + // When can we do a fast distinct hack? + // 1. There is a plan with just one leaf and that leaf is an ixscan. + // 2. The ixscan indexes the field we're interested in. + // 2a: We are correct if the index contains the field but for now we look for prefix. + // 3. The query is covered/no fetch. + // + // We go through normal planning (with limited parameters) to see if we can produce + // a soln with the above properties. + + QueryPlannerParams plannerParams; + plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN; + + IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); + while (ii.more()) { + const IndexDescriptor* desc = ii.next(); + // The distinct hack can work if any field is in the index but it's not always clear + // if it's a win unless it's the first field. + if (desc->keyPattern().firstElement().fieldName() == field) { + plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), + desc->getAccessMethodName(), + desc->isMultikey(), + desc->isSparse(), + desc->indexName(), + desc->infoObj())); + } + } + + const WhereCallbackReal whereCallback(collection->ns().db()); + + // If there are no suitable indices for the distinct hack bail out now into regular planning + // with no projection. + if (plannerParams.indices.empty()) { + CanonicalQuery* cq; + Status status = CanonicalQuery::canonicalize( + collection->ns().ns(), query, &cq, whereCallback); + if (!status.isOK()) { + return status; + } + + scoped_ptr<CanonicalQuery> cleanupCq(cq); + + // Does not take ownership of its args. + return getExecutor(collection, cq, out); + } + + // + // If we're here, we have an index prefixed by the field we're distinct-ing over. + // + + // Applying a projection allows the planner to try to give us covered plans that we can turn + // into the projection hack. getDistinctProjection deals with .find() projection semantics + // (ie _id:1 being implied by default). + BSONObj projection = getDistinctProjection(field); + + // Apply a projection of the key. Empty BSONObj() is for the sort. + CanonicalQuery* cq; + Status status = CanonicalQuery::canonicalize(collection->ns().ns(), + query, + BSONObj(), + projection, + &cq, + whereCallback); + if (!status.isOK()) { + return status; + } + + scoped_ptr<CanonicalQuery> cleanupCq(cq); + + // If there's no query, we can just distinct-scan one of the indices. + // Not every index in plannerParams.indices may be suitable. Refer to + // getDistinctNodeIndex(). + size_t distinctNodeIndex = 0; + if (query.isEmpty() && + getDistinctNodeIndex(plannerParams.indices, field, &distinctNodeIndex)) { + DistinctNode* dn = new DistinctNode(); + dn->indexKeyPattern = plannerParams.indices[distinctNodeIndex].keyPattern; + dn->direction = 1; + IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds); + dn->fieldNo = 0; + + QueryPlannerParams params; + + // Takes ownership of 'dn'. + QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn); + invariant(soln); + + LOG(2) << "Using fast distinct: " << cq->toStringShort() + << ", planSummary: " << getPlanSummary(*soln); + + WorkingSet* ws = new WorkingSet(); + PlanStage* root; + verify(StageBuilder::build(collection, *soln, ws, &root)); + // Takes ownership of 'ws', 'root', and 'soln'. + *out = new PlanExecutor(ws, root, soln, collection); + return Status::OK(); + } + + // See if we can answer the query in a fast-distinct compatible fashion. + vector<QuerySolution*> solutions; + status = QueryPlanner::plan(*cq, plannerParams, &solutions); + if (!status.isOK()) { + return getExecutor(collection, cq, out); + } + + // We look for a solution that has an ixscan we can turn into a distinctixscan + for (size_t i = 0; i < solutions.size(); ++i) { + if (turnIxscanIntoDistinctIxscan(solutions[i], field)) { + // Great, we can use solutions[i]. Clean up the other QuerySolution(s). + for (size_t j = 0; j < solutions.size(); ++j) { + if (j != i) { + delete solutions[j]; + } + } + + LOG(2) << "Using fast distinct: " << cq->toStringShort() + << ", planSummary: " << getPlanSummary(*solutions[i]); + + // Build and return the SSR over solutions[i]. + WorkingSet* ws = new WorkingSet(); + PlanStage* root; + verify(StageBuilder::build(collection, *solutions[i], ws, &root)); + // Takes ownership of 'ws', 'root', and 'solutions[i]'. + *out = new PlanExecutor(ws, root, solutions[i], collection); + return Status::OK(); + } + } + + // If we're here, the planner made a soln with the restricted index set but we couldn't + // translate any of them into a distinct-compatible soln. So, delete the solutions and just + // go through normal planning. + for (size_t i = 0; i < solutions.size(); ++i) { + delete solutions[i]; + } + + // We drop the projection from the 'cq'. Unfortunately this is not trivial. + status = CanonicalQuery::canonicalize(collection->ns().ns(), query, &cq, whereCallback); + if (!status.isOK()) { + return status; + } + + cleanupCq.reset(cq); + + // Does not take ownership. + return getExecutor(collection, cq, out); + } + +} // namespace mongo diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h new file mode 100644 index 00000000000..fc78e302709 --- /dev/null +++ b/src/mongo/db/query/get_executor.h @@ -0,0 +1,127 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/canonical_query.h" +#include "mongo/db/query/plan_executor.h" +#include "mongo/db/query/query_planner_params.h" +#include "mongo/db/query/query_settings.h" +#include "mongo/db/query/query_solution.h" + +namespace mongo { + + class Collection; + + /** + * Filter indexes retrieved from index catalog by + * allowed indices in query settings. + * Used by getRunner(). + * This function is public to facilitate testing. + */ + void filterAllowedIndexEntries(const AllowedIndices& allowedIndices, + std::vector<IndexEntry>* indexEntries); + + /** + * Fill out the provided 'plannerParams' for the 'canonicalQuery' operating on the collection + * 'collection'. Exposed for testing. + */ + void fillOutPlannerParams(Collection* collection, + CanonicalQuery* canonicalQuery, + QueryPlannerParams* plannerParams); + + /** + * Get a plan executor for a query. Does not take ownership of canonicalQuery. + * + * If the query is valid and an executor could be created, returns Status::OK() + * and populates *out with the PlanExecutor. + * + * If the query cannot be executed, returns a Status indicating why. Deletes + * rawCanonicalQuery. + */ + Status getExecutor(Collection* collection, + CanonicalQuery* canonicalQuery, + PlanExecutor** out, + size_t plannerOptions = 0); + + /** + * Get a plan executor for a simple id query. The executor will wrap an execution + * tree whose root stage is the idhack stage. + * + * Does not take ownership of 'query'. + */ + Status getExecutorIDHack(Collection* collection, + CanonicalQuery* query, + const QueryPlannerParams& plannerParams, + PlanExecutor** out); + + /** + * If possible, turn the provided QuerySolution into a QuerySolution that uses a DistinctNode + * to provide results for the distinct command. + * + * If the provided solution could be mutated successfully, returns true, otherwise returns + * false. + */ + bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, const string& field); + + /* + * Get an executor for a query executing as part of a distinct command. + * + * Distinct is unique in that it doesn't care about getting all the results; it just wants all + * possible values of a certain field. As such, we can skip lots of data in certain cases (see + * body of method for detail). + */ + Status getExecutorDistinct(Collection* collection, + const BSONObj& query, + const std::string& field, + PlanExecutor** out); + + /* + * Get a PlanExecutor for a query executing as part of a count command. + * + * Count doesn't care about actually examining its results; it just wants to walk through them. + * As such, with certain covered queries, we can skip the overhead of fetching etc. when + * executing a count. + */ + Status getExecutorCount(Collection* collection, + const BSONObj& query, + const BSONObj& hintObj, + PlanExecutor** execOut); + + /** + * Get a plan executor for a query. Ignores the cache and always plans the full query. + * + * Does not take ownership of its arguments. + * + * Returns the resulting executor through 'execOut'. The caller must delete 'execOut', + * if an OK status is returned. + */ + Status getExecutorAlwaysPlan(Collection* collection, + CanonicalQuery* canonicalQuery, + const QueryPlannerParams& plannerParams, + PlanExecutor** execOut); + +} // namespace mongo diff --git a/src/mongo/db/query/get_runner.cpp b/src/mongo/db/query/get_runner.cpp index b95c431caf0..b6cc5bf0710 100644 --- a/src/mongo/db/query/get_runner.cpp +++ b/src/mongo/db/query/get_runner.cpp @@ -26,6 +26,8 @@ * it in the license file. */ +// THIS FILE IS DEPRECATED -- replaced by get_executor.cpp + #include "mongo/db/query/get_runner.h" #include <limits> @@ -33,10 +35,14 @@ #include "mongo/base/parse_number.h" #include "mongo/client/dbclientinterface.h" #include "mongo/db/exec/cached_plan.h" +#include "mongo/db/exec/eof.h" +#include "mongo/db/exec/idhack.h" #include "mongo/db/exec/multi_plan.h" +#include "mongo/db/exec/subplan.h" #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/eof_runner.h" #include "mongo/db/query/explain_plan.h" +#include "mongo/db/query/get_executor.h" #include "mongo/db/query/query_settings.h" #include "mongo/db/query/idhack_runner.h" #include "mongo/db/query/index_bounds_builder.h" @@ -58,33 +64,6 @@ namespace mongo { - // static - void filterAllowedIndexEntries(const AllowedIndices& allowedIndices, - std::vector<IndexEntry>* indexEntries) { - invariant(indexEntries); - - // Filter index entries - // Check BSON objects in AllowedIndices::_indexKeyPatterns against IndexEntry::keyPattern. - // Removes IndexEntrys that do not match _indexKeyPatterns. - std::vector<IndexEntry> temp; - for (std::vector<IndexEntry>::const_iterator i = indexEntries->begin(); - i != indexEntries->end(); ++i) { - const IndexEntry& indexEntry = *i; - for (std::vector<BSONObj>::const_iterator j = allowedIndices.indexKeyPatterns.begin(); - j != allowedIndices.indexKeyPatterns.end(); ++j) { - const BSONObj& index = *j; - // Copy index entry to temp vector if found in query settings. - if (0 == indexEntry.keyPattern.woCompare(index)) { - temp.push_back(indexEntry); - break; - } - } - } - - // Update results. - temp.swap(*indexEntries); - } - Status getRunner(Collection* collection, const std::string& ns, const BSONObj& unparsedQuery, @@ -124,70 +103,6 @@ namespace mongo { } // namespace - void fillOutPlannerParams(Collection* collection, - CanonicalQuery* canonicalQuery, - QueryPlannerParams* plannerParams) { - // If it's not NULL, we may have indices. Access the catalog and fill out IndexEntry(s) - IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); - while (ii.more()) { - const IndexDescriptor* desc = ii.next(); - plannerParams->indices.push_back(IndexEntry(desc->keyPattern(), - desc->getAccessMethodName(), - desc->isMultikey(), - desc->isSparse(), - desc->indexName(), - desc->infoObj())); - } - - // If query supports index filters, filter params.indices by indices in query settings. - QuerySettings* querySettings = collection->infoCache()->getQuerySettings(); - AllowedIndices* allowedIndicesRaw; - - // Filter index catalog if index filters are specified for query. - // Also, signal to planner that application hint should be ignored. - if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) { - boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw); - filterAllowedIndexEntries(*allowedIndices, &plannerParams->indices); - plannerParams->indexFiltersApplied = true; - } - - // We will not output collection scans unless there are no indexed solutions. NO_TABLE_SCAN - // overrides this behavior by not outputting a collscan even if there are no indexed - // solutions. - if (storageGlobalParams.noTableScan) { - const string& ns = canonicalQuery->ns(); - // There are certain cases where we ignore this restriction: - bool ignore = canonicalQuery->getQueryObj().isEmpty() - || (string::npos != ns.find(".system.")) - || (0 == ns.find("local.")); - if (!ignore) { - plannerParams->options |= QueryPlannerParams::NO_TABLE_SCAN; - } - } - - // If the caller wants a shard filter, make sure we're actually sharded. - if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { - CollectionMetadataPtr collMetadata = - shardingState.getCollectionMetadata(canonicalQuery->ns()); - - if (collMetadata) { - plannerParams->shardKey = collMetadata->getKeyPattern(); - } - else { - // If there's no metadata don't bother w/the shard filter since we won't know what - // the key pattern is anyway... - plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER; - } - } - - if (internalQueryPlannerEnableIndexIntersection) { - plannerParams->options |= QueryPlannerParams::INDEX_INTERSECTION; - } - - plannerParams->options |= QueryPlannerParams::KEEP_MUTATIONS; - plannerParams->options |= QueryPlannerParams::SPLIT_LIMITED_SORT; - } - /** * For a given query, get a runner. */ @@ -209,7 +124,7 @@ namespace mongo { } // If we have an _id index we can use the idhack runner. - if (IDHackRunner::supportsQuery(*canonicalQuery) && + if (IDHackStage::supportsQuery(*canonicalQuery) && collection->getIndexCatalog()->findIdIndex()) { LOG(2) << "Using idhack: " << canonicalQuery->toStringShort(); *out = new IDHackRunner(collection, canonicalQuery.release()); @@ -275,7 +190,7 @@ namespace mongo { // add a CachedPlanStage on top of the previous root root = new CachedPlanStage(collection, rawCanonicalQuery, root, backupRoot); - + *out = new SingleSolutionRunner(collection, canonicalQuery.release(), chosenSolution, root, sharedWs); @@ -307,7 +222,6 @@ namespace mongo { CanonicalQuery* rawCanonicalQuery, const QueryPlannerParams& plannerParams, Runner** out) { - invariant(collection); invariant(rawCanonicalQuery); auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); @@ -613,62 +527,6 @@ namespace mongo { // Distinct hack // - /** - * If possible, turn the provided QuerySolution into a QuerySolution that uses a DistinctNode - * to provide results for the distinct command. - * - * If the provided solution could be mutated successfully, returns true, otherwise returns - * false. - */ - bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, const string& field) { - QuerySolutionNode* root = soln->root.get(); - - // We're looking for a project on top of an ixscan. - if (STAGE_PROJECTION == root->getType() && (STAGE_IXSCAN == root->children[0]->getType())) { - IndexScanNode* isn = static_cast<IndexScanNode*>(root->children[0]); - - // An additional filter must be applied to the data in the key, so we can't just skip - // all the keys with a given value; we must examine every one to find the one that (may) - // pass the filter. - if (NULL != isn->filter.get()) { - return false; - } - - // We only set this when we have special query modifiers (.max() or .min()) or other - // special cases. Don't want to handle the interactions between those and distinct. - // Don't think this will ever really be true but if it somehow is, just ignore this - // soln. - if (isn->bounds.isSimpleRange) { - return false; - } - - // Make a new DistinctNode. We swap this for the ixscan in the provided solution. - DistinctNode* dn = new DistinctNode(); - dn->indexKeyPattern = isn->indexKeyPattern; - dn->direction = isn->direction; - dn->bounds = isn->bounds; - - // Figure out which field we're skipping to the next value of. TODO: We currently only - // try to distinct-hack when there is an index prefixed by the field we're distinct-ing - // over. Consider removing this code if we stick with that policy. - dn->fieldNo = 0; - BSONObjIterator it(isn->indexKeyPattern); - while (it.more()) { - if (field == it.next().fieldName()) { - break; - } - dn->fieldNo++; - } - - // Delete the old index scan, set the child of project to the fast distinct scan. - delete root->children[0]; - root->children[0] = dn; - return true; - } - - return false; - } - Status getRunnerDistinct(Collection* collection, const BSONObj& query, const string& field, diff --git a/src/mongo/db/query/get_runner.h b/src/mongo/db/query/get_runner.h index 9360478e278..eabc78d3bdd 100644 --- a/src/mongo/db/query/get_runner.h +++ b/src/mongo/db/query/get_runner.h @@ -26,9 +26,12 @@ * it in the license file. */ +// THIS FILE IS DEPRECATED -- replaced by get_executor.h + #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/query_planner_params.h" #include "mongo/db/query/query_settings.h" +#include "mongo/db/query/query_solution.h" #include "mongo/db/query/runner.h" namespace mongo { @@ -36,23 +39,6 @@ namespace mongo { class Collection; /** - * Filter indexes retrieved from index catalog by - * allowed indices in query settings. - * Used by getRunner(). - * This function is public to facilitate testing. - */ - void filterAllowedIndexEntries(const AllowedIndices& allowedIndices, - std::vector<IndexEntry>* indexEntries); - - /** - * Fill out the provided 'plannerParams' for the 'canonicalQuery' operating on the collection - * 'collection'. Exposed for testing. - */ - void fillOutPlannerParams(Collection* collection, - CanonicalQuery* canonicalQuery, - QueryPlannerParams* plannerParams); - - /** * Get a runner for a query. Takes ownership of rawCanonicalQuery. * * If the query is valid and a runner could be created, returns Status::OK() @@ -83,7 +69,6 @@ namespace mongo { Runner** outRunner, CanonicalQuery** outCanonicalQuery, size_t plannerOptions = 0); - /* * Get a runner for a query executing as part of a distinct command. * @@ -95,6 +80,7 @@ namespace mongo { const BSONObj& query, const std::string& field, Runner** out); + /* * Get a runner for a query executing as part of a count command. * diff --git a/src/mongo/db/query/get_runner_test.cpp b/src/mongo/db/query/get_runner_test.cpp index ce646770623..15b9a618528 100644 --- a/src/mongo/db/query/get_runner_test.cpp +++ b/src/mongo/db/query/get_runner_test.cpp @@ -30,6 +30,7 @@ * This file contains tests for mongo/db/query/get_runner.h */ +#include "mongo/db/query/get_executor.h" #include "mongo/db/query/get_runner.h" #include "mongo/db/json.h" diff --git a/src/mongo/db/query/idhack_runner.cpp b/src/mongo/db/query/idhack_runner.cpp index a8ec508e7e1..b9c0da6f82b 100644 --- a/src/mongo/db/query/idhack_runner.cpp +++ b/src/mongo/db/query/idhack_runner.cpp @@ -241,15 +241,6 @@ namespace mongo { } // static - bool IDHackRunner::supportsQuery(const CanonicalQuery& query) { - return !query.getParsed().showDiskLoc() - && query.getParsed().getHint().isEmpty() - && 0 == query.getParsed().getSkip() - && CanonicalQuery::isSimpleIdQuery(query.getParsed().getFilter()) - && !query.getParsed().hasOption(QueryOption_CursorTailable); - } - - // static bool IDHackRunner::hasCoveredProjection() const { // Some update operations use the IDHackRunner without creating a // canonical query. In this case, _query will be NULL. Just return diff --git a/src/mongo/db/query/idhack_runner.h b/src/mongo/db/query/idhack_runner.h index 89a6df33f55..23e2d02691c 100644 --- a/src/mongo/db/query/idhack_runner.h +++ b/src/mongo/db/query/idhack_runner.h @@ -76,11 +76,6 @@ namespace mongo { virtual Status getInfo(TypeExplain** explain, PlanInfo** planInfo) const; - /** - * ID Hack has a very strict criteria for the queries it supports. - */ - static bool supportsQuery(const CanonicalQuery& query); - private: /** * ID Hack queries are only covered with the projection {_id: 1}. diff --git a/src/mongo/db/query/index_entry.h b/src/mongo/db/query/index_entry.h index f69ca28fc5f..5cb25c0aac4 100644 --- a/src/mongo/db/query/index_entry.h +++ b/src/mongo/db/query/index_entry.h @@ -32,6 +32,7 @@ #include "mongo/db/index_names.h" #include "mongo/db/jsobj.h" +#include "mongo/util/mongoutils/str.h" namespace mongo { diff --git a/src/mongo/db/query/new_find.cpp b/src/mongo/db/query/new_find.cpp index d1c81173d2e..76b1c083752 100644 --- a/src/mongo/db/query/new_find.cpp +++ b/src/mongo/db/query/new_find.cpp @@ -37,6 +37,7 @@ #include "mongo/db/keypattern.h" #include "mongo/db/query/explain.h" #include "mongo/db/query/find_constants.h" +#include "mongo/db/query/get_executor.h" #include "mongo/db/query/get_runner.h" #include "mongo/db/query/internal_plans.h" #include "mongo/db/query/qlog.h" @@ -489,6 +490,8 @@ namespace mongo { // // TODO temporary until find() becomes a real command. if (isExplain && enableNewExplain) { + scoped_ptr<CanonicalQuery> safeCq(cq); + size_t options = QueryPlannerParams::DEFAULT; if (shardingState.needCollectionMetadata(pq.ns())) { options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; @@ -497,11 +500,18 @@ namespace mongo { BufBuilder bb; bb.skip(sizeof(QueryResult)); + PlanExecutor* rawExec; + Status execStatus = getExecutor(collection, cq, &rawExec, options); + if (!execStatus.isOK()) { + uasserted(17510, "Explain error: " + execStatus.reason()); + } + + scoped_ptr<PlanExecutor> exec(rawExec); BSONObjBuilder explainBob; - Status explainStatus = Explain::explain(collection, cq, options, - Explain::QUERY_PLANNER, &explainBob); + Status explainStatus = Explain::explainStages(exec.get(), cq, Explain::EXEC_ALL_PLANS, + &explainBob); if (!explainStatus.isOK()) { - uasserted(17510, "Explain error: " + explainStatus.reason()); + uasserted(18521, "Explain error: " + explainStatus.reason()); } // Add the resulting object to the return buffer. diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp index f35017e4c54..235260010a6 100644 --- a/src/mongo/db/query/plan_executor.cpp +++ b/src/mongo/db/query/plan_executor.cpp @@ -36,7 +36,19 @@ namespace mongo { PlanExecutor::PlanExecutor(WorkingSet* ws, PlanStage* rt, const Collection* collection) - : _collection(collection), _workingSet(ws), _root(rt), _killed(false) {} + : _collection(collection), + _workingSet(ws), + _root(rt), + _qs(NULL), + _killed(false) { } + + PlanExecutor::PlanExecutor(WorkingSet* ws, PlanStage* rt, QuerySolution* qs, + const Collection* collection) + : _collection(collection), + _workingSet(ws), + _root(rt), + _qs(qs), + _killed(false) { } PlanExecutor::~PlanExecutor() { } @@ -143,4 +155,29 @@ namespace mongo { _killed = true; } + PlanStage* PlanExecutor::releaseStages() { + return _root.release(); + } + + PlanStage* PlanExecutor::getStages() { + return _root.get(); + } + + Status PlanExecutor::executePlan() { + WorkingSetID id = WorkingSet::INVALID_ID; + PlanStage::StageState code = PlanStage::NEED_TIME; + while (PlanStage::NEED_TIME == code || PlanStage::ADVANCED == code) { + code = _root->work(&id); + } + + if (PlanStage::FAILURE == code) { + BSONObj obj; + WorkingSetCommon::getStatusMemberObject(*_workingSet, id, &obj); + return Status(ErrorCodes::BadValue, + "Exec error: " + WorkingSetCommon::toStatusString(obj)); + } + + return Status::OK(); + } + } // namespace mongo diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h index 4c8c6fd97ad..c032ea77e4e 100644 --- a/src/mongo/db/query/plan_executor.h +++ b/src/mongo/db/query/plan_executor.h @@ -32,6 +32,7 @@ #include "mongo/base/status.h" #include "mongo/db/query/runner.h" +#include "mongo/db/query/query_solution.h" namespace mongo { @@ -52,6 +53,8 @@ namespace mongo { class PlanExecutor { public: PlanExecutor(WorkingSet* ws, PlanStage* rt, const Collection* collection); + PlanExecutor(WorkingSet* ws, PlanStage* rt, QuerySolution* qs, + const Collection* collection); ~PlanExecutor(); // @@ -94,13 +97,37 @@ namespace mongo { */ void kill(); + /** + * If this stage tree ranked plans using a MultiPlanStage, then returns the winning plan. + * Otherwise returns NULL. + */ + QuerySolution* bestSolution(); + + /** + * Transfer ownership of the stage tree wrapped by this executor to the caller. + */ + PlanStage* releaseStages(); + + /** + * Get the stage tree wrapped by this executor, without transferring ownership. + */ + PlanStage* getStages(); + + /** + * Execute the plan to completion, throwing out the results. + * + * Used by explain. + */ + Status executePlan(); + private: // Collection over which this plan executor runs. Used to resolve record ids retrieved by // the plan stages. The collection must not be destroyed while there are active plans. const Collection* _collection; boost::scoped_ptr<WorkingSet> _workingSet; - boost::scoped_ptr<PlanStage> _root; + std::auto_ptr<PlanStage> _root; + boost::scoped_ptr<QuerySolution> _qs; // Did somebody drop an index we care about or the namespace we're looking at? If so, // we'll be killed. diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h index 62729bd7990..d258bd08f2d 100644 --- a/src/mongo/db/query/stage_types.h +++ b/src/mongo/db/query/stage_types.h @@ -36,7 +36,7 @@ namespace mongo { enum StageType { STAGE_AND_HASH, STAGE_AND_SORTED, - STAGE_CACHED_PLAN, + STAGE_CACHED_PLAN, STAGE_COLLSCAN, // If we're running a .count(), the query is fully covered by one ixscan, and the ixscan is @@ -48,6 +48,8 @@ namespace mongo { // stage is an ixscan with some key-skipping behvaior that only distinct uses. STAGE_DISTINCT, + STAGE_EOF, + // This is more of an "internal-only" stage where we try to keep docs that were mutated // during query execution. STAGE_KEEP_MUTATIONS, @@ -58,15 +60,19 @@ namespace mongo { STAGE_GEO_NEAR_2D, STAGE_GEO_NEAR_2DSPHERE, + STAGE_IDHACK, STAGE_IXSCAN, STAGE_LIMIT, - STAGE_MULTI_PLAN, + STAGE_MOCK, + STAGE_MULTI_PLAN, + STAGE_OPLOG_START, STAGE_OR, STAGE_PROJECTION, STAGE_SHARDING_FILTER, STAGE_SKIP, STAGE_SORT, STAGE_SORT_MERGE, + STAGE_SUBPLAN, STAGE_TEXT, STAGE_UNKNOWN, }; |