diff options
author | David Storch <david.storch@10gen.com> | 2014-10-21 10:24:24 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2014-10-21 10:32:59 -0400 |
commit | 011dde7e6eac3b73cb1d2a7f004feee9bed99c46 (patch) | |
tree | 32b20bc3224627c93e5781ba72abb45a1f825b37 /src/mongo/db/query | |
parent | 0bee61d26e44e26c2678d550990a57ce488f222d (diff) | |
download | mongo-011dde7e6eac3b73cb1d2a7f004feee9bed99c46.tar.gz |
SERVER-15541 SERVER-15652 implement timing-based yielding
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 132 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.h | 9 | ||||
-rw-r--r-- | src/mongo/db/query/internal_plans.h | 32 | ||||
-rw-r--r-- | src/mongo/db/query/new_find.cpp | 30 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor.cpp | 191 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor.h | 123 | ||||
-rw-r--r-- | src/mongo/db/query/plan_yield_policy.cpp | 69 | ||||
-rw-r--r-- | src/mongo/db/query/plan_yield_policy.h | 29 |
8 files changed, 390 insertions, 225 deletions
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 26db3998308..8c474c0f948 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -301,19 +301,10 @@ namespace mongo { && SubplanStage::canUseSubplanning(*canonicalQuery)) { QLOG() << "Running query as sub-queries: " << canonicalQuery->toStringShort(); + LOG(2) << "Running query as sub-queries: " << canonicalQuery->toStringShort(); - SubplanStage* subPlan; - Status subplanStatus = SubplanStage::make(opCtx, collection, ws, plannerParams, - canonicalQuery, &subPlan); - if (subplanStatus.isOK()) { - LOG(2) << "Running query as sub-queries: " << canonicalQuery->toStringShort(); - *rootOut = subPlan; - return Status::OK(); - } - else { - QLOG() << "Subplanner: " << subplanStatus.reason(); - // Fall back on non-subplan execution. - } + *rootOut = new SubplanStage(opCtx, collection, ws, plannerParams, canonicalQuery); + return Status::OK(); } vector<QuerySolution*> solutions; @@ -388,9 +379,6 @@ namespace mongo { multiPlanStage->addPlan(solutions[ix], nextPlanRoot, ws); } - // Do the plan selection up front. - multiPlanStage->pickBestPlan(); - *rootOut = multiPlanStage; return Status::OK(); } @@ -401,6 +389,7 @@ namespace mongo { Status getExecutor(OperationContext* txn, Collection* collection, CanonicalQuery* rawCanonicalQuery, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** out, size_t plannerOptions) { auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); @@ -415,15 +404,15 @@ namespace mongo { invariant(root); // We must have a tree of stages in order to have a valid plan executor, but the query // solution may be null. - *out = new PlanExecutor(txn, ws.release(), root, querySolution, canonicalQuery.release(), - collection); - return Status::OK(); + return PlanExecutor::make(txn, ws.release(), root, querySolution, canonicalQuery.release(), + collection, yieldPolicy, out); } Status getExecutor(OperationContext* txn, Collection* collection, const std::string& ns, const BSONObj& unparsedQuery, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** out, size_t plannerOptions) { if (!collection) { @@ -431,8 +420,7 @@ namespace mongo { << " Using EOF stage: " << unparsedQuery.toString(); EOFStage* eofStage = new EOFStage(); WorkingSet* ws = new WorkingSet(); - *out = new PlanExecutor(txn, ws, eofStage, ns); - return Status::OK(); + return PlanExecutor::make(txn, ws, eofStage, ns, yieldPolicy, out); } if (!CanonicalQuery::isSimpleIdQuery(unparsedQuery) || @@ -446,7 +434,7 @@ namespace mongo { return status; // Takes ownership of 'cq'. - return getExecutor(txn, collection, cq, out, plannerOptions); + return getExecutor(txn, collection, cq, yieldPolicy, out, plannerOptions); } LOG(2) << "Using idhack: " << unparsedQuery.toString(); @@ -460,8 +448,7 @@ namespace mongo { root); } - *out = new PlanExecutor(txn, ws, root, collection); - return Status::OK(); + return PlanExecutor::make(txn, ws, root, collection, yieldPolicy, out); } // @@ -475,6 +462,7 @@ namespace mongo { bool shouldCallLogOp, bool fromMigrate, bool isExplain, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** out) { auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); auto_ptr<WorkingSet> ws(new WorkingSet()); @@ -495,9 +483,8 @@ namespace mongo { root = new DeleteStage(txn, deleteStageParams, ws.get(), collection, root); // We must have a tree of stages in order to have a valid plan executor, but the query // solution may be null. - *out = new PlanExecutor(txn, ws.release(), root, querySolution, canonicalQuery.release(), - collection); - return Status::OK(); + return PlanExecutor::make(txn, ws.release(), root, querySolution, canonicalQuery.release(), + collection, yieldPolicy, out); } Status getExecutorDelete(OperationContext* txn, @@ -508,6 +495,7 @@ namespace mongo { bool shouldCallLogOp, bool fromMigrate, bool isExplain, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** out) { auto_ptr<WorkingSet> ws(new WorkingSet()); DeleteStageParams deleteStageParams; @@ -523,8 +511,7 @@ namespace mongo { << " Using EOF stage: " << unparsedQuery.toString(); DeleteStage* deleteStage = new DeleteStage(txn, deleteStageParams, ws.get(), NULL, new EOFStage()); - *out = new PlanExecutor(txn, ws.release(), deleteStage, ns); - return Status::OK(); + return PlanExecutor::make(txn, ws.release(), deleteStage, ns, yieldPolicy, out); } if (CanonicalQuery::isSimpleIdQuery(unparsedQuery) && @@ -535,8 +522,7 @@ namespace mongo { ws.get()); DeleteStage* root = new DeleteStage(txn, deleteStageParams, ws.get(), collection, idHackStage); - *out = new PlanExecutor(txn, ws.release(), root, collection); - return Status::OK(); + return PlanExecutor::make(txn, ws.release(), root, collection, yieldPolicy, out); } const WhereCallbackReal whereCallback(txn, collection->ns().db()); @@ -551,7 +537,7 @@ namespace mongo { // Takes ownership of 'cq'. return getExecutorDelete(txn, collection, cq, isMulti, shouldCallLogOp, - fromMigrate, isExplain, out); + fromMigrate, isExplain, yieldPolicy, out); } // @@ -564,6 +550,7 @@ namespace mongo { const UpdateRequest* request, UpdateDriver* driver, OpDebug* opDebug, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** execOut) { auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); auto_ptr<WorkingSet> ws(new WorkingSet()); @@ -584,13 +571,14 @@ namespace mongo { root = new UpdateStage(updateStageParams, ws.get(), db, root); // We must have a tree of stages in order to have a valid plan executor, but the query // solution may be null. Takes ownership of all args other than 'collection' and 'txn' - *execOut = new PlanExecutor(txn, - ws.release(), - root, - querySolution, - canonicalQuery.release(), - collection); - return Status::OK(); + return PlanExecutor::make(txn, + ws.release(), + root, + querySolution, + canonicalQuery.release(), + collection, + yieldPolicy, + execOut); } Status getExecutorUpdate(OperationContext* txn, @@ -599,6 +587,7 @@ namespace mongo { const UpdateRequest* request, UpdateDriver* driver, OpDebug* opDebug, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** execOut) { auto_ptr<WorkingSet> ws(new WorkingSet()); Collection* collection = db->getCollection(request->getOpCtx(), @@ -614,8 +603,7 @@ namespace mongo { << " Using EOF stage: " << unparsedQuery.toString(); UpdateStage* updateStage = new UpdateStage(updateStageParams, ws.get(), db, new EOFStage()); - *execOut = new PlanExecutor(txn, ws.release(), updateStage, ns); - return Status::OK(); + return PlanExecutor::make(txn, ws.release(), updateStage, ns, yieldPolicy, execOut); } if (CanonicalQuery::isSimpleIdQuery(unparsedQuery) && @@ -625,8 +613,7 @@ namespace mongo { PlanStage* idHackStage = new IDHackStage(txn, collection, unparsedQuery["_id"].wrap(), ws.get()); UpdateStage* root = new UpdateStage(updateStageParams, ws.get(), db, idHackStage); - *execOut = new PlanExecutor(txn, ws.release(), root, collection); - return Status::OK(); + return PlanExecutor::make(txn, ws.release(), root, collection, yieldPolicy, execOut); } const WhereCallbackReal whereCallback(txn, collection->ns().db()); @@ -640,7 +627,7 @@ namespace mongo { return status; // Takes ownership of 'cq'. - return getExecutorUpdate(txn, db, cq, request, driver, opDebug, execOut); + return getExecutorUpdate(txn, db, cq, request, driver, opDebug, yieldPolicy, execOut); } // @@ -650,6 +637,7 @@ namespace mongo { Status getExecutorGroup(OperationContext* txn, Collection* collection, const GroupRequest& request, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** execOut) { if (!globalScriptEngine) { return Status(ErrorCodes::BadValue, "server-side JavaScript execution is disabled"); @@ -664,8 +652,7 @@ namespace mongo { // reporting machinery always assumes that the root stage for a group operation is a // GroupStage, so in this case we put a GroupStage on top of an EOFStage. root = new GroupStage(txn, request, ws.get(), new EOFStage()); - *execOut = new PlanExecutor(txn, ws.release(), root, request.ns); - return Status::OK(); + return PlanExecutor::make(txn, ws.release(), root, request.ns, yieldPolicy, execOut); } const NamespaceString nss(request.ns); @@ -692,13 +679,14 @@ namespace mongo { root = new GroupStage(txn, request, ws.get(), root); // We must have a tree of stages in order to have a valid plan executor, but the query // solution may be null. Takes ownership of all args other than 'collection'. - *execOut = new PlanExecutor(txn, - ws.release(), - root, - querySolution, - canonicalQuery.release(), - collection); - return Status::OK(); + return PlanExecutor::make(txn, + ws.release(), + root, + querySolution, + canonicalQuery.release(), + collection, + yieldPolicy, + execOut); } // @@ -880,6 +868,7 @@ namespace mongo { Status getExecutorCount(OperationContext* txn, Collection* collection, const CountRequest& request, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** execOut) { auto_ptr<WorkingSet> ws(new WorkingSet()); PlanStage* root; @@ -890,8 +879,7 @@ namespace mongo { // reporting machinery always assumes that the root stage for a count operation is // a CountStage, so in this case we put a CountStage on top of an EOFStage. root = new CountStage(txn, collection, request, ws.get(), new EOFStage()); - *execOut = new PlanExecutor(txn, ws.release(), root, request.ns); - return Status::OK(); + return PlanExecutor::make(txn, ws.release(), root, request.ns, yieldPolicy, execOut); } if (request.query.isEmpty()) { @@ -899,8 +887,7 @@ namespace mongo { // for its number of records. This is implemented by the CountStage, and we don't need // to create a child for the count stage in this case. root = new CountStage(txn, collection, request, ws.get(), NULL); - *execOut = new PlanExecutor(txn, ws.release(), root, request.ns); - return Status::OK(); + return PlanExecutor::make(txn, ws.release(), root, request.ns, yieldPolicy, execOut); } const WhereCallbackReal whereCallback(txn, collection->ns().db()); @@ -937,14 +924,14 @@ namespace mongo { root = new CountStage(txn, collection, request, ws.get(), root); // We must have a tree of stages in order to have a valid plan executor, but the query // solution may be NULL. Takes ownership of all args other than 'collection' and 'txn' - *execOut = new PlanExecutor(txn, - ws.release(), - root, - querySolution, - cq.release(), - collection); - - return Status::OK(); + return PlanExecutor::make(txn, + ws.release(), + root, + querySolution, + cq.release(), + collection, + yieldPolicy, + execOut); } // @@ -1004,6 +991,7 @@ namespace mongo { Collection* collection, const BSONObj& query, const std::string& field, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** out) { // This should'a been checked by the distinct command. invariant(collection); @@ -1050,7 +1038,7 @@ namespace mongo { } // Takes ownership of 'cq'. - return getExecutor(txn, collection, cq, out); + return getExecutor(txn, collection, cq, yieldPolicy, out); } // @@ -1102,15 +1090,15 @@ namespace mongo { << ", planSummary: " << Explain::getPlanSummary(root); // Takes ownership of its arguments (except for 'collection'). - *out = new PlanExecutor(txn, ws, root, soln, autoCq.release(), collection); - return Status::OK(); + return PlanExecutor::make(txn, ws, root, soln, autoCq.release(), collection, + yieldPolicy, out); } // See if we can answer the query in a fast-distinct compatible fashion. vector<QuerySolution*> solutions; status = QueryPlanner::plan(*cq, plannerParams, &solutions); if (!status.isOK()) { - return getExecutor(txn, collection, autoCq.release(), out); + return getExecutor(txn, collection, autoCq.release(), yieldPolicy, out); } // We look for a solution that has an ixscan we can turn into a distinctixscan @@ -1131,9 +1119,9 @@ namespace mongo { LOG(2) << "Using fast distinct: " << cq->toStringShort() << ", planSummary: " << Explain::getPlanSummary(root); - // Takes ownership of its arguments (except for 'collection'). - *out = new PlanExecutor(txn, ws, root, solutions[i], autoCq.release(), collection); - return Status::OK(); + // Takes ownership of 'ws', 'root', 'solutions[i]', and 'autoCq'. + return PlanExecutor::make(txn, ws, root, solutions[i], autoCq.release(), + collection, yieldPolicy, out); } } @@ -1153,7 +1141,7 @@ namespace mongo { autoCq.reset(cq); // Takes ownership of 'autoCq'. - return getExecutor(txn, collection, autoCq.release(), out); + return getExecutor(txn, collection, autoCq.release(), yieldPolicy, out); } } // namespace mongo diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h index 60d015f93d0..86e8156dfd6 100644 --- a/src/mongo/db/query/get_executor.h +++ b/src/mongo/db/query/get_executor.h @@ -71,6 +71,7 @@ namespace mongo { Status getExecutor(OperationContext* txn, Collection* collection, CanonicalQuery* rawCanonicalQuery, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** out, size_t plannerOptions = 0); @@ -90,6 +91,7 @@ namespace mongo { Collection* collection, const std::string& ns, const BSONObj& unparsedQuery, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** out, size_t plannerOptions = 0); @@ -113,6 +115,7 @@ namespace mongo { Collection* collection, const BSONObj& query, const std::string& field, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** out); /* @@ -125,6 +128,7 @@ namespace mongo { Status getExecutorCount(OperationContext* txn, Collection* collection, const CountRequest& request, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** execOut); // @@ -149,6 +153,7 @@ namespace mongo { bool shouldCallLogOp, bool fromMigrate, bool isExplain, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** execOut); /** @@ -168,6 +173,7 @@ namespace mongo { bool shouldCallLogOp, bool fromMigrate, bool isExplain, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** execOut); // @@ -191,6 +197,7 @@ namespace mongo { const UpdateRequest* request, UpdateDriver* driver, OpDebug* opDebug, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** execOut); /** @@ -210,6 +217,7 @@ namespace mongo { const UpdateRequest* request, UpdateDriver* driver, OpDebug* opDebug, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** execOut); // @@ -230,6 +238,7 @@ namespace mongo { Status getExecutorGroup(OperationContext* txn, Collection* collection, const GroupRequest& request, + PlanExecutor::YieldPolicy yieldPolicy, PlanExecutor** execOut); } // namespace mongo diff --git a/src/mongo/db/query/internal_plans.h b/src/mongo/db/query/internal_plans.h index c16aa251f2d..398c2ccbbb5 100644 --- a/src/mongo/db/query/internal_plans.h +++ b/src/mongo/db/query/internal_plans.h @@ -73,7 +73,16 @@ namespace mongo { if (NULL == collection) { EOFStage* eof = new EOFStage(); - return new PlanExecutor(txn, ws, eof, ns.toString()); + PlanExecutor* exec; + // Takes ownership if 'ws' and 'eof'. + Status execStatus = PlanExecutor::make(txn, + ws, + eof, + ns.toString(), + PlanExecutor::YIELD_MANUAL, + &exec); + invariant(execStatus.isOK()); + return exec; } dassert( ns == collection->ns().ns() ); @@ -90,8 +99,16 @@ namespace mongo { } CollectionScan* cs = new CollectionScan(txn, params, ws, NULL); + PlanExecutor* exec; // Takes ownership of 'ws' and 'cs'. - return new PlanExecutor(txn, ws, cs, collection); + Status execStatus = PlanExecutor::make(txn, + ws, + cs, + collection, + PlanExecutor::YIELD_MANUAL, + &exec); + invariant(execStatus.isOK()); + return exec; } /** @@ -123,7 +140,16 @@ namespace mongo { root = new FetchStage(txn, ws, root, NULL, collection); } - return new PlanExecutor(txn, ws, root, collection); + PlanExecutor* exec; + // Takes ownership of 'ws' and 'root'. + Status execStatus = PlanExecutor::make(txn, + ws, + root, + collection, + PlanExecutor::YIELD_MANUAL, + &exec); + invariant(execStatus.isOK()); + return exec; } }; diff --git a/src/mongo/db/query/new_find.cpp b/src/mongo/db/query/new_find.cpp index e091193dc37..89d724c4c30 100644 --- a/src/mongo/db/query/new_find.cpp +++ b/src/mongo/db/query/new_find.cpp @@ -455,8 +455,12 @@ namespace mongo { WorkingSet* oplogws = new WorkingSet(); OplogStart* stage = new OplogStart(txn, collection, tsExpr, oplogws); + PlanExecutor* rawExec; // Takes ownership of ws and stage. - scoped_ptr<PlanExecutor> exec(new PlanExecutor(txn, oplogws, stage, collection)); + Status execStatus = PlanExecutor::make(txn, oplogws, stage, collection, + PlanExecutor::YIELD_AUTO, &rawExec); + invariant(execStatus.isOK()); + scoped_ptr<PlanExecutor> exec(rawExec); // The stage returns a DiskLoc of where to start. DiskLoc startLoc; @@ -464,7 +468,8 @@ namespace mongo { // This is normal. The start of the oplog is the beginning of the collection. if (PlanExecutor::IS_EOF == state) { - return getExecutor(txn, collection, autoCq.release(), execOut); + return getExecutor(txn, collection, autoCq.release(), PlanExecutor::YIELD_AUTO, + execOut); } // This is not normal. An error was encountered. @@ -485,8 +490,8 @@ namespace mongo { WorkingSet* ws = new WorkingSet(); CollectionScan* cs = new CollectionScan(txn, params, ws, cq->root()); // Takes ownership of 'ws', 'cs', and 'cq'. - *execOut = new PlanExecutor(txn, ws, cs, autoCq.release(), collection); - return Status::OK(); + return PlanExecutor::make(txn, ws, cs, autoCq.release(), collection, + PlanExecutor::YIELD_AUTO, execOut); } std::string newRunQuery(OperationContext* txn, @@ -580,15 +585,7 @@ namespace mongo { // Otherwise we go through the selection of which executor is most suited to the // query + run-time context at hand. Status status = Status::OK(); - if (collection == NULL) { - LOG(2) << "Collection " << ns << " does not exist." - << " Using EOF stage: " << cq->toStringShort(); - EOFStage* eofStage = new EOFStage(); - WorkingSet* ws = new WorkingSet(); - // Takes ownership of 'cq'. - rawExec = new PlanExecutor(txn, ws, eofStage, cq, NULL); - } - else if (pq.getOptions().oplogReplay) { + if (NULL != collection && pq.getOptions().oplogReplay) { // Takes ownership of 'cq'. status = getOplogStartHack(txn, collection, cq, &rawExec); } @@ -598,7 +595,7 @@ namespace mongo { options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; } // Takes ownership of 'cq'. - status = getExecutor(txn, collection, cq, &rawExec, options); + status = getExecutor(txn, collection, cq, PlanExecutor::YIELD_AUTO, &rawExec, options); } if (!status.isOK()) { @@ -609,11 +606,6 @@ namespace mongo { verify(NULL != rawExec); auto_ptr<PlanExecutor> exec(rawExec); - // We want the PlanExecutor to yield automatically, but we handle registration of the - // executor ourselves. We want to temporarily register the executor while we are generating - // this batch of results, and then unregister and re-register with ClientCursor for getmore. - exec->setYieldPolicy(PlanExecutor::YIELD_AUTO); - // If it's actually an explain, do the explain and return rather than falling through // to the normal query execution loop. if (pq.isExplain()) { diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp index 601091a567b..cca412cac8a 100644 --- a/src/mongo/db/query/plan_executor.cpp +++ b/src/mongo/db/query/plan_executor.cpp @@ -29,9 +29,11 @@ #include "mongo/db/query/plan_executor.h" #include "mongo/db/catalog/collection.h" +#include "mongo/db/exec/multi_plan.h" #include "mongo/db/exec/pipeline_proxy.h" #include "mongo/db/exec/plan_stage.h" #include "mongo/db/exec/plan_stats.h" +#include "mongo/db/exec/subplan.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/query/plan_yield_policy.h" @@ -40,46 +42,93 @@ namespace mongo { - PlanExecutor::PlanExecutor(OperationContext* opCtx, - WorkingSet* ws, - PlanStage* rt, - const Collection* collection) - : _opCtx(opCtx), - _collection(collection), - _cq(NULL), - _workingSet(ws), - _qs(NULL), - _root(rt), - _killed(false) { - initNs(); + namespace { + + /** + * Retrieves the first stage of a given type from the plan tree, or NULL + * if no such stage is found. + */ + PlanStage* getStageByType(PlanStage* root, StageType type) { + if (root->stageType() == type) { + return root; + } + + vector<PlanStage*> children = root->getChildren(); + for (size_t i = 0; i < children.size(); i++) { + PlanStage* result = getStageByType(children[i], type); + if (result) { + return result; + } + } + + return NULL; + } + } - PlanExecutor::PlanExecutor(OperationContext* opCtx, - WorkingSet* ws, - PlanStage* rt, - std::string ns) - : _opCtx(opCtx), - _collection(NULL), - _cq(NULL), - _workingSet(ws), - _qs(NULL), - _root(rt), - _ns(ns), - _killed(false) { } + // static + Status PlanExecutor::make(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + const Collection* collection, + YieldPolicy yieldPolicy, + PlanExecutor** out) { + return PlanExecutor::make(opCtx, ws, rt, NULL, NULL, collection, "", yieldPolicy, out); + } - PlanExecutor::PlanExecutor(OperationContext* opCtx, - WorkingSet* ws, - PlanStage* rt, - CanonicalQuery* cq, - const Collection* collection) - : _opCtx(opCtx), - _collection(collection), - _cq(cq), - _workingSet(ws), - _qs(NULL), - _root(rt), - _killed(false) { - initNs(); + // static + Status PlanExecutor::make(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + const std::string& ns, + YieldPolicy yieldPolicy, + PlanExecutor** out) { + return PlanExecutor::make(opCtx, ws, rt, NULL, NULL, NULL, ns, yieldPolicy, out); + } + + // static + Status PlanExecutor::make(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + CanonicalQuery* cq, + const Collection* collection, + YieldPolicy yieldPolicy, + PlanExecutor** out) { + return PlanExecutor::make(opCtx, ws, rt, NULL, cq, collection, "", yieldPolicy, out); + } + + // static + Status PlanExecutor::make(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + QuerySolution* qs, + CanonicalQuery* cq, + const Collection* collection, + YieldPolicy yieldPolicy, + PlanExecutor** out) { + return PlanExecutor::make(opCtx, ws, rt, qs, cq, collection, "", yieldPolicy, out); + } + + // static + Status PlanExecutor::make(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + QuerySolution* qs, + CanonicalQuery* cq, + const Collection* collection, + const std::string& ns, + YieldPolicy yieldPolicy, + PlanExecutor** out) { + std::auto_ptr<PlanExecutor> exec(new PlanExecutor(opCtx, ws, rt, qs, cq, collection, ns)); + + // Perform plan selection, if necessary. + Status status = exec->pickBestPlan(yieldPolicy); + if (!status.isOK()) { + return status; + } + + *out = exec.release(); + return Status::OK(); } PlanExecutor::PlanExecutor(OperationContext* opCtx, @@ -87,18 +136,22 @@ namespace mongo { PlanStage* rt, QuerySolution* qs, CanonicalQuery* cq, - const Collection* collection) + const Collection* collection, + const std::string& ns) : _opCtx(opCtx), _collection(collection), _cq(cq), _workingSet(ws), _qs(qs), _root(rt), + _ns(ns), _killed(false) { - initNs(); - } + // We may still need to initialize _ns from either _collection or _cq. + if (!_ns.empty()) { + // We already have an _ns set, so there's nothing more to do. + return; + } - void PlanExecutor::initNs() { if (NULL != _collection) { _ns = _collection->ns().ns(); } @@ -108,6 +161,31 @@ namespace mongo { } } + Status PlanExecutor::pickBestPlan(YieldPolicy policy) { + // For YIELD_AUTO, this will both set an auto yield policy on the PlanExecutor and + // register it to receive notifications. + this->setYieldPolicy(policy); + + // First check if we need to do subplanning. + PlanStage* foundStage = getStageByType(_root.get(), STAGE_SUBPLAN); + if (foundStage) { + SubplanStage* subplan = static_cast<SubplanStage*>(foundStage); + return subplan->pickBestPlan(_yieldPolicy.get()); + } + + // If we didn't have to do subplanning, we might still have to do regular + // multi plan selection. + foundStage = getStageByType(_root.get(), STAGE_MULTI_PLAN); + if (foundStage) { + MultiPlanStage* mps = static_cast<MultiPlanStage*>(foundStage); + return mps->pickBestPlan(_yieldPolicy.get()); + } + + // Either we chose a plan, or no plan selection was required. In both cases, + // our work has been successfully completed. + return Status::OK(); + } + PlanExecutor::~PlanExecutor() { } // static @@ -180,6 +258,14 @@ namespace mongo { if (_killed) { return PlanExecutor::DEAD; } for (;;) { + // Yield if it's time to yield. + if (NULL != _yieldPolicy.get() && _yieldPolicy->shouldYield()) { + _yieldPolicy->yield(); + if (_killed) { + return PlanExecutor::DEAD; + } + } + WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState code = _root->work(&id); @@ -282,19 +368,22 @@ namespace mongo { } Status PlanExecutor::executePlan() { - WorkingSetID id = WorkingSet::INVALID_ID; - PlanStage::StageState code = PlanStage::NEED_TIME; - while (PlanStage::NEED_TIME == code || PlanStage::ADVANCED == code) { - code = _root->work(&id); + BSONObj obj; + PlanExecutor::ExecState state = PlanExecutor::ADVANCED; + while (PlanExecutor::ADVANCED == state) { + state = this->getNext(&obj, NULL); } - if (PlanStage::FAILURE == code) { - BSONObj obj; - WorkingSetCommon::getStatusMemberObject(*_workingSet, id, &obj); - return Status(ErrorCodes::BadValue, - "Exec error: " + WorkingSetCommon::toStatusString(obj)); + if (PlanExecutor::DEAD == state) { + return Status(ErrorCodes::OperationFailed, "Exec error: PlanExecutor killed"); + } + else if (PlanExecutor::EXEC_ERROR == state) { + return Status(ErrorCodes::OperationFailed, + str::stream() << "Exec error: " + << WorkingSetCommon::toStatusString(obj)); } + invariant(PlanExecutor::IS_EOF == state); return Status::OK(); } @@ -308,7 +397,7 @@ namespace mongo { } else { invariant(PlanExecutor::YIELD_AUTO == policy); - _yieldPolicy.reset(new PlanYieldPolicy()); + _yieldPolicy.reset(new PlanYieldPolicy(this)); // Runners that yield automatically generally need to be registered so that // after yielding, they receive notifications of events like deletions and diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h index db616fc7281..f7da5783174 100644 --- a/src/mongo/db/query/plan_executor.h +++ b/src/mongo/db/query/plan_executor.h @@ -89,27 +89,35 @@ namespace mongo { // // 1. Register your PlanExecutor with ClientCursor. Registered executors are informed // about DiskLoc deletions and namespace invalidation, as well as other important - // events. Do this either by calling registerExec() on the executor. This can be done - // once you get your executor, or could be done per-yield. + // events. Do this by calling registerExec() on the executor. Alternatively, this can + // be done per-yield (as described below). // - // 2. Call exec->saveState() before you yield. + // 2. Construct a PlanYieldPolicy 'policy', passing 'exec' to the constructor. // - // 3. Call Yield::yieldAllLocks(), passing in the executor's OperationContext*. This - // causes the executor to give up its locks and block so that it goes to the back of - // the scheduler's queue. + // 3. Call PlanYieldPolicy::yield() on 'policy'. If your PlanExecutor is not yet + // registered (because you want to register on a per-yield basis), then pass + // 'true' to yield(). // - // 4. Call exec->restoreState() before using the executor again. - // - // 5. The next call to exec->getNext() may return DEAD. - // - // 6. Make sure the executor gets deregistered from ClientCursor. PlanExecutor does - // this in an RAII fashion when it is destroyed, or you can explicity call - // unregisterExec() on the PlanExecutor. + // 4. The call to yield() returns a boolean indicating whether or not 'exec' is + // still alove. If it is false, then 'exec' was killed during the yield and is + // no longer valid. YIELD_MANUAL, }; // - // Constructors / destructor. + // Factory methods. + // + // On success, return a new PlanExecutor, owned by the caller, through 'out'. + // + // Passing YIELD_AUTO to any of these factories will construct a yielding runner which + // may yield in the following circumstances: + // 1) During plan selection inside the call to make(). + // 2) On any call to getNext(). + // 3) While executing the plan inside executePlan(). + // + // The runner will also be automatically registered to receive notifications in the + // case of YIELD_AUTO, so no further calls to registerExec() or setYieldPolicy() are + // necessary. // /** @@ -118,40 +126,48 @@ namespace mongo { * Right now this is only for idhack updates which neither canonicalize * nor go through normal planning. */ - PlanExecutor(OperationContext* opCtx, - WorkingSet* ws, - PlanStage* rt, - const Collection* collection); + static Status make(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + const Collection* collection, + YieldPolicy yieldPolicy, + PlanExecutor** out); /** * Used when we have a NULL collection and no canonical query. In this case, * we need to explicitly pass a namespace to the plan executor. */ - PlanExecutor(OperationContext* opCtx, - WorkingSet* ws, - PlanStage* rt, - std::string ns); + static Status make(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + const std::string& ns, + YieldPolicy yieldPolicy, + PlanExecutor** out); /** * Used when there is a canonical query but no query solution (e.g. idhack * queries, queries against a NULL collection, queries using the subplan stage). */ - PlanExecutor(OperationContext* opCtx, - WorkingSet* ws, - PlanStage* rt, - CanonicalQuery* cq, - const Collection* collection); + static Status make(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + CanonicalQuery* cq, + const Collection* collection, + YieldPolicy yieldPolicy, + PlanExecutor** out); /** * The constructor for the normal case, when you have both a canonical query * and a query solution. */ - PlanExecutor(OperationContext* opCtx, - WorkingSet* ws, - PlanStage* rt, - QuerySolution* qs, - CanonicalQuery* cq, - const Collection* collection); + static Status make(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + QuerySolution* qs, + CanonicalQuery* cq, + const Collection* collection, + YieldPolicy yieldPolicy, + PlanExecutor** out); ~PlanExecutor(); @@ -228,6 +244,8 @@ namespace mongo { * For read operations, objOut or dlOut are populated with another query result. * * For write operations, the return depends on the particulars of the write stage. + * + * If an AUTO_YIELD policy is set, then this method may yield. */ ExecState getNext(BSONObj* objOut, DiskLoc* dlOut); @@ -242,6 +260,8 @@ namespace mongo { /** * Execute the plan to completion, throwing out the results. Used when you want to work the * underlying tree without getting results back. + * + * If an AUTO_YIELD policy is set on this executor, then this will automatically yield. */ Status executePlan(); @@ -315,9 +335,42 @@ namespace mongo { }; /** - * Initialize the namespace using either the canonical query or the collection. + * New PlanExecutor instances are created with the static make() methods above. + */ + PlanExecutor(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + QuerySolution* qs, + CanonicalQuery* cq, + const Collection* collection, + const std::string& ns); + + /** + * Public factory methods delegate to this private factory to do their work. + */ + static Status make(OperationContext* opCtx, + WorkingSet* ws, + PlanStage* rt, + QuerySolution* qs, + CanonicalQuery* cq, + const Collection* collection, + const std::string& ns, + YieldPolicy yieldPolicy, + PlanExecutor** out); + + /** + * Clients of PlanExecutor expect that on receiving a new instance from one of the make() + * factory methods, plan selection has already been completed. In order to enforce this + * property, this function is called to do plan selection prior to returning the new + * PlanExecutor. + * + * If the tree contains plan selection stages, such as MultiPlanStage or SubplanStage, + * this calls into their underlying plan selection facilities. Otherwise, does nothing. + * + * If an AUTO_YIELD policy is set (and document-level locking is not supported), then + * locks are yielded during plan selection. */ - void initNs(); + Status pickBestPlan(YieldPolicy policy); // The OperationContext that we're executing within. We need this in order to release // locks. diff --git a/src/mongo/db/query/plan_yield_policy.cpp b/src/mongo/db/query/plan_yield_policy.cpp index f8db2c1e98a..689a76973b5 100644 --- a/src/mongo/db/query/plan_yield_policy.cpp +++ b/src/mongo/db/query/plan_yield_policy.cpp @@ -31,61 +31,52 @@ #include "mongo/db/query/plan_yield_policy.h" #include "mongo/db/concurrency/yield.h" +#include "mongo/db/global_environment_experiment.h" namespace mongo { // Yield every 128 cycles or 10ms. These values were inherited from v2.6, which just copied // them from v2.4. - PlanYieldPolicy::PlanYieldPolicy() + PlanYieldPolicy::PlanYieldPolicy(PlanExecutor* exec) : _elapsedTracker(128, 10), - _planYielding(NULL) { } - - PlanYieldPolicy::~PlanYieldPolicy() { - if (NULL != _planYielding) { - // We were destructed mid-yield. Since we're being used to yield a runner, we have - // to deregister the runner. - if (_planYielding->collection()) { - _planYielding->collection()->cursorCache()->deregisterExecutor(_planYielding); - } - } - } - - /** - * Yield the provided runner, registering and deregistering it appropriately. Deal with - * deletion during a yield by setting _runnerYielding to ensure deregistration. - * - * Provided runner MUST be YIELD_MANUAL. - */ - bool PlanYieldPolicy::yieldAndCheckIfOK(PlanExecutor* plan) { - invariant(plan); - invariant(plan->collection()); + _planYielding(exec) { } - // If micros > 0, we should yield. - plan->saveState(); + bool PlanYieldPolicy::shouldYield() { + return _elapsedTracker.intervalHasElapsed(); + } - // If we're destructed during yield this will be used to deregister ourselves. - // This happens when we're not in a ClientCursor and somebody kills all cursors - // on the ns we're operating on. - _planYielding = plan; + bool PlanYieldPolicy::yield(bool registerPlan) { + // This is a no-op if document-level locking is supported. Doc-level locking systems + // should not need to yield. + if (supportsDocLocking()) { + return true; + } - // Register with the thing that may kill() the 'plan'. - plan->collection()->cursorCache()->registerExecutor(plan); + // No need to yield if the collection is NULL. + if (NULL == _planYielding->collection()) { + return true; + } - // Note that this call checks for interrupt, and thus can throw if interrupt flag is set. - Yield::yieldAllLocks(plan->getOpCtx(), 1); + invariant(_planYielding); - // If the plan was killed, runner->collection() will return NULL, and we can't/don't - // deregister it. - if (plan->collection()) { - plan->collection()->cursorCache()->deregisterExecutor(plan); + if (registerPlan) { + _planYielding->registerExec(); } - _planYielding = NULL; + OperationContext* opCtx = _planYielding->getOpCtx(); + invariant(opCtx); + + _planYielding->saveState(); + // Note that this call checks for interrupt, and thus can throw if interrupt flag is set. + Yield::yieldAllLocks(opCtx, 1); _elapsedTracker.resetLastTime(); - return plan->restoreState(plan->getOpCtx()); + if (registerPlan) { + _planYielding->deregisterExec(); + } + + return _planYielding->restoreState(opCtx); } } // namespace mongo - diff --git a/src/mongo/db/query/plan_yield_policy.h b/src/mongo/db/query/plan_yield_policy.h index cb6d1a0a9ec..50aff9656f9 100644 --- a/src/mongo/db/query/plan_yield_policy.h +++ b/src/mongo/db/query/plan_yield_policy.h @@ -29,26 +29,43 @@ #pragma once #include "mongo/db/catalog/collection.h" +#include "mongo/db/query/plan_executor.h" #include "mongo/util/elapsed_tracker.h" namespace mongo { class PlanYieldPolicy { public: - PlanYieldPolicy(); - ~PlanYieldPolicy(); + explicit PlanYieldPolicy(PlanExecutor* exec); + + /** + * Used by AUTO_YIELD plan executors in order to check whether it is time to yield. + * PlanExecutors give up their locks periodically in order to be fair to other + * threads. + */ + bool shouldYield(); /** - * Yield the provided runner, registering and deregistering it appropriately. Deal with - * deletion during a yield by setting _planYielding to ensure deregistration. + * Used to cause a plan executor to give up locks and go to sleep. The PlanExecutor + * must *not* be in saved state. Handles calls to save/restore state internally. * - * Provided plan executor MUST be YIELD_MANUAL. + * By default, assumes that the PlanExecutor is already registered. If 'registerPlan' + * is explicitly set to true, then the executor will get automatically registered and + * deregistered here. + * + * Returns true if the executor was restored successfully and is still alive. Returns false + * if the executor got killed during yield. */ - bool yieldAndCheckIfOK(PlanExecutor* plan); + bool yield(bool registerPlan = false); private: + // Default constructor disallowed in order to ensure initialization of '_planYielding'. + PlanYieldPolicy(); + ElapsedTracker _elapsedTracker; + // The plan executor which this yield policy is responsible for yielding. Must + // not outlive the plan executor. PlanExecutor* _planYielding; }; |