SERVER-15541 SERVER-15652 implement timing-based yielding

author: David Storch <david.storch@10gen.com> 2014-10-21 10:24:24 -0400
committer: David Storch <david.storch@10gen.com> 2014-10-21 10:32:59 -0400
commit: 011dde7e6eac3b73cb1d2a7f004feee9bed99c46 (patch)
tree: 32b20bc3224627c93e5781ba72abb45a1f825b37 /src/mongo/db/query
parent: 0bee61d26e44e26c2678d550990a57ce488f222d (diff)
download: mongo-011dde7e6eac3b73cb1d2a7f004feee9bed99c46.tar.gz
8 files changed, 390 insertions, 225 deletions
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 26db3998308..8c474c0f948 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -301,19 +301,10 @@ namespace mongo {
                 && SubplanStage::canUseSubplanning(*canonicalQuery)) {
 
                 QLOG() << "Running query as sub-queries: " << canonicalQuery->toStringShort();
+                LOG(2) << "Running query as sub-queries: " << canonicalQuery->toStringShort();
 
-                SubplanStage* subPlan;
-                Status subplanStatus = SubplanStage::make(opCtx, collection, ws, plannerParams,
-                                                          canonicalQuery, &subPlan);
-                if (subplanStatus.isOK()) {
-                    LOG(2) << "Running query as sub-queries: " << canonicalQuery->toStringShort();
-                    *rootOut = subPlan;
-                    return Status::OK();
-                }
-                else {
-                    QLOG() << "Subplanner: " << subplanStatus.reason();
-                    // Fall back on non-subplan execution.
-                }
+                *rootOut = new SubplanStage(opCtx, collection, ws, plannerParams, canonicalQuery);
+                return Status::OK();
             }
 
             vector<QuerySolution*> solutions;
@@ -388,9 +379,6 @@ namespace mongo {
                     multiPlanStage->addPlan(solutions[ix], nextPlanRoot, ws);
                 }
 
-                // Do the plan selection up front.
-                multiPlanStage->pickBestPlan();
-
                 *rootOut = multiPlanStage;
                 return Status::OK();
             }
@@ -401,6 +389,7 @@ namespace mongo {
     Status getExecutor(OperationContext* txn,
                        Collection* collection,
                        CanonicalQuery* rawCanonicalQuery,
+                       PlanExecutor::YieldPolicy yieldPolicy,
                        PlanExecutor** out,
                        size_t plannerOptions) {
         auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);
@@ -415,15 +404,15 @@ namespace mongo {
         invariant(root);
         // We must have a tree of stages in order to have a valid plan executor, but the query
         // solution may be null.
-        *out = new PlanExecutor(txn, ws.release(), root, querySolution, canonicalQuery.release(),
-                                collection);
-        return Status::OK();
+        return PlanExecutor::make(txn, ws.release(), root, querySolution, canonicalQuery.release(),
+                                  collection, yieldPolicy, out);
     }
 
     Status getExecutor(OperationContext* txn,
                        Collection* collection,
                        const std::string& ns,
                        const BSONObj& unparsedQuery,
+                       PlanExecutor::YieldPolicy yieldPolicy,
                        PlanExecutor** out,
                        size_t plannerOptions) {
         if (!collection) {
@@ -431,8 +420,7 @@ namespace mongo {
                    << " Using EOF stage: " << unparsedQuery.toString();
             EOFStage* eofStage = new EOFStage();
             WorkingSet* ws = new WorkingSet();
-            *out = new PlanExecutor(txn, ws, eofStage, ns);
-            return Status::OK();
+            return PlanExecutor::make(txn, ws, eofStage, ns, yieldPolicy, out);
         }
 
         if (!CanonicalQuery::isSimpleIdQuery(unparsedQuery) ||
@@ -446,7 +434,7 @@ namespace mongo {
                 return status;
 
             // Takes ownership of 'cq'.
-            return getExecutor(txn, collection, cq, out, plannerOptions);
+            return getExecutor(txn, collection, cq, yieldPolicy, out, plannerOptions);
         }
 
         LOG(2) << "Using idhack: " << unparsedQuery.toString();
@@ -460,8 +448,7 @@ namespace mongo {
                                         root);
         }
 
-        *out = new PlanExecutor(txn, ws, root, collection);
-        return Status::OK();
+        return PlanExecutor::make(txn, ws, root, collection, yieldPolicy, out);
     }
 
     //
@@ -475,6 +462,7 @@ namespace mongo {
                              bool shouldCallLogOp,
                              bool fromMigrate,
                              bool isExplain,
+                             PlanExecutor::YieldPolicy yieldPolicy,
                              PlanExecutor** out) {
         auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);
         auto_ptr<WorkingSet> ws(new WorkingSet());
@@ -495,9 +483,8 @@ namespace mongo {
         root = new DeleteStage(txn, deleteStageParams, ws.get(), collection, root);
         // We must have a tree of stages in order to have a valid plan executor, but the query
         // solution may be null.
-        *out = new PlanExecutor(txn, ws.release(), root, querySolution, canonicalQuery.release(),
-                                collection);
-        return Status::OK();
+        return PlanExecutor::make(txn, ws.release(), root, querySolution, canonicalQuery.release(),
+                                  collection, yieldPolicy, out);
     }
 
     Status getExecutorDelete(OperationContext* txn,
@@ -508,6 +495,7 @@ namespace mongo {
                              bool shouldCallLogOp,
                              bool fromMigrate,
                              bool isExplain,
+                             PlanExecutor::YieldPolicy yieldPolicy,
                              PlanExecutor** out) {
         auto_ptr<WorkingSet> ws(new WorkingSet());
         DeleteStageParams deleteStageParams;
@@ -523,8 +511,7 @@ namespace mongo {
                    << " Using EOF stage: " << unparsedQuery.toString();
             DeleteStage* deleteStage = new DeleteStage(txn, deleteStageParams, ws.get(), NULL,
                                                        new EOFStage());
-            *out = new PlanExecutor(txn, ws.release(), deleteStage, ns);
-            return Status::OK();
+            return PlanExecutor::make(txn, ws.release(), deleteStage, ns, yieldPolicy, out);
         }
 
         if (CanonicalQuery::isSimpleIdQuery(unparsedQuery) &&
@@ -535,8 +522,7 @@ namespace mongo {
                                                      ws.get());
             DeleteStage* root = new DeleteStage(txn, deleteStageParams, ws.get(), collection,
                                                 idHackStage);
-            *out = new PlanExecutor(txn, ws.release(), root, collection);
-            return Status::OK();
+            return PlanExecutor::make(txn, ws.release(), root, collection, yieldPolicy, out);
         }
 
         const WhereCallbackReal whereCallback(txn, collection->ns().db());
@@ -551,7 +537,7 @@ namespace mongo {
 
         // Takes ownership of 'cq'.
         return getExecutorDelete(txn, collection, cq, isMulti, shouldCallLogOp,
-                                 fromMigrate, isExplain, out);
+                                 fromMigrate, isExplain, yieldPolicy, out);
     }
 
     //
@@ -564,6 +550,7 @@ namespace mongo {
                              const UpdateRequest* request,
                              UpdateDriver* driver,
                              OpDebug* opDebug,
+                             PlanExecutor::YieldPolicy yieldPolicy,
                              PlanExecutor** execOut) {
         auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);
         auto_ptr<WorkingSet> ws(new WorkingSet());
@@ -584,13 +571,14 @@ namespace mongo {
         root = new UpdateStage(updateStageParams, ws.get(), db, root);
         // We must have a tree of stages in order to have a valid plan executor, but the query
         // solution may be null. Takes ownership of all args other than 'collection' and 'txn'
-        *execOut = new PlanExecutor(txn,
-                                    ws.release(),
-                                    root,
-                                    querySolution,
-                                    canonicalQuery.release(),
-                                    collection);
-        return Status::OK();
+        return PlanExecutor::make(txn,
+                                  ws.release(),
+                                  root,
+                                  querySolution,
+                                  canonicalQuery.release(),
+                                  collection,
+                                  yieldPolicy,
+                                  execOut);
     }
 
     Status getExecutorUpdate(OperationContext* txn,
@@ -599,6 +587,7 @@ namespace mongo {
                              const UpdateRequest* request,
                              UpdateDriver* driver,
                              OpDebug* opDebug,
+                             PlanExecutor::YieldPolicy yieldPolicy,
                              PlanExecutor** execOut) {
         auto_ptr<WorkingSet> ws(new WorkingSet());
         Collection* collection = db->getCollection(request->getOpCtx(),
@@ -614,8 +603,7 @@ namespace mongo {
                    << " Using EOF stage: " << unparsedQuery.toString();
             UpdateStage* updateStage = new UpdateStage(updateStageParams, ws.get(), db,
                                                        new EOFStage());
-            *execOut = new PlanExecutor(txn, ws.release(), updateStage, ns);
-            return Status::OK();
+            return PlanExecutor::make(txn, ws.release(), updateStage, ns, yieldPolicy, execOut);
         }
 
         if (CanonicalQuery::isSimpleIdQuery(unparsedQuery) &&
@@ -625,8 +613,7 @@ namespace mongo {
             PlanStage* idHackStage = new IDHackStage(txn, collection, unparsedQuery["_id"].wrap(),
                                                      ws.get());
             UpdateStage* root = new UpdateStage(updateStageParams, ws.get(), db, idHackStage);
-            *execOut = new PlanExecutor(txn, ws.release(), root, collection);
-            return Status::OK();
+            return PlanExecutor::make(txn, ws.release(), root, collection, yieldPolicy, execOut);
         }
 
         const WhereCallbackReal whereCallback(txn, collection->ns().db());
@@ -640,7 +627,7 @@ namespace mongo {
             return status;
 
         // Takes ownership of 'cq'.
-        return getExecutorUpdate(txn, db, cq, request, driver, opDebug, execOut);
+        return getExecutorUpdate(txn, db, cq, request, driver, opDebug, yieldPolicy, execOut);
     }
 
     //
@@ -650,6 +637,7 @@ namespace mongo {
     Status getExecutorGroup(OperationContext* txn,
                             Collection* collection,
                             const GroupRequest& request,
+                            PlanExecutor::YieldPolicy yieldPolicy,
                             PlanExecutor** execOut) {
         if (!globalScriptEngine) {
             return Status(ErrorCodes::BadValue, "server-side JavaScript execution is disabled");
@@ -664,8 +652,7 @@ namespace mongo {
             // reporting machinery always assumes that the root stage for a group operation is a
             // GroupStage, so in this case we put a GroupStage on top of an EOFStage.
             root = new GroupStage(txn, request, ws.get(), new EOFStage());
-            *execOut = new PlanExecutor(txn, ws.release(), root, request.ns);
-            return Status::OK();
+            return PlanExecutor::make(txn, ws.release(), root, request.ns, yieldPolicy, execOut);
         }
 
         const NamespaceString nss(request.ns);
@@ -692,13 +679,14 @@ namespace mongo {
         root = new GroupStage(txn, request, ws.get(), root);
         // We must have a tree of stages in order to have a valid plan executor, but the query
         // solution may be null. Takes ownership of all args other than 'collection'.
-        *execOut = new PlanExecutor(txn,
-                                    ws.release(),
-                                    root,
-                                    querySolution,
-                                    canonicalQuery.release(),
-                                    collection);
-        return Status::OK();
+        return PlanExecutor::make(txn,
+                                  ws.release(),
+                                  root,
+                                  querySolution,
+                                  canonicalQuery.release(),
+                                  collection,
+                                  yieldPolicy,
+                                  execOut);
     }
 
     //
@@ -880,6 +868,7 @@ namespace mongo {
     Status getExecutorCount(OperationContext* txn,
                             Collection* collection,
                             const CountRequest& request,
+                            PlanExecutor::YieldPolicy yieldPolicy,
                             PlanExecutor** execOut) {
         auto_ptr<WorkingSet> ws(new WorkingSet());
         PlanStage* root;
@@ -890,8 +879,7 @@ namespace mongo {
             // reporting machinery always assumes that the root stage for a count operation is
             // a CountStage, so in this case we put a CountStage on top of an EOFStage.
             root = new CountStage(txn, collection, request, ws.get(), new EOFStage());
-            *execOut = new PlanExecutor(txn, ws.release(), root, request.ns);
-            return Status::OK();
+            return PlanExecutor::make(txn, ws.release(), root, request.ns, yieldPolicy, execOut);
         }
 
         if (request.query.isEmpty()) {
@@ -899,8 +887,7 @@ namespace mongo {
             // for its number of records. This is implemented by the CountStage, and we don't need
             // to create a child for the count stage in this case.
             root = new CountStage(txn, collection, request, ws.get(), NULL);
-            *execOut = new PlanExecutor(txn, ws.release(), root, request.ns);
-            return Status::OK();
+            return PlanExecutor::make(txn, ws.release(), root, request.ns, yieldPolicy, execOut);
         }
 
         const WhereCallbackReal whereCallback(txn, collection->ns().db());
@@ -937,14 +924,14 @@ namespace mongo {
         root = new CountStage(txn, collection, request, ws.get(), root);
         // We must have a tree of stages in order to have a valid plan executor, but the query
         // solution may be NULL. Takes ownership of all args other than 'collection' and 'txn'
-        *execOut = new PlanExecutor(txn,
-                                    ws.release(),
-                                    root,
-                                    querySolution,
-                                    cq.release(),
-                                    collection);
-
-        return Status::OK();
+        return PlanExecutor::make(txn,
+                                  ws.release(),
+                                  root,
+                                  querySolution,
+                                  cq.release(),
+                                  collection,
+                                  yieldPolicy,
+                                  execOut);
     }
 
     //
@@ -1004,6 +991,7 @@ namespace mongo {
                                Collection* collection,
                                const BSONObj& query,
                                const std::string& field,
+                               PlanExecutor::YieldPolicy yieldPolicy,
                                PlanExecutor** out) {
         // This should'a been checked by the distinct command.
         invariant(collection);
@@ -1050,7 +1038,7 @@ namespace mongo {
             }
 
             // Takes ownership of 'cq'.
-            return getExecutor(txn, collection, cq, out);
+            return getExecutor(txn, collection, cq, yieldPolicy, out);
         }
 
         //
@@ -1102,15 +1090,15 @@ namespace mongo {
                    << ", planSummary: " << Explain::getPlanSummary(root);
 
             // Takes ownership of its arguments (except for 'collection').
-            *out = new PlanExecutor(txn, ws, root, soln, autoCq.release(), collection);
-            return Status::OK();
+            return PlanExecutor::make(txn, ws, root, soln, autoCq.release(), collection,
+                                      yieldPolicy, out);
         }
 
         // See if we can answer the query in a fast-distinct compatible fashion.
         vector<QuerySolution*> solutions;
         status = QueryPlanner::plan(*cq, plannerParams, &solutions);
         if (!status.isOK()) {
-            return getExecutor(txn, collection, autoCq.release(), out);
+            return getExecutor(txn, collection, autoCq.release(), yieldPolicy, out);
         }
 
         // We look for a solution that has an ixscan we can turn into a distinctixscan
@@ -1131,9 +1119,9 @@ namespace mongo {
                 LOG(2) << "Using fast distinct: " << cq->toStringShort()
                        << ", planSummary: " << Explain::getPlanSummary(root);
 
-                // Takes ownership of its arguments (except for 'collection').
-                *out = new PlanExecutor(txn, ws, root, solutions[i], autoCq.release(), collection);
-                return Status::OK();
+                // Takes ownership of 'ws', 'root', 'solutions[i]', and 'autoCq'.
+                return PlanExecutor::make(txn, ws, root, solutions[i], autoCq.release(),
+                                          collection, yieldPolicy, out);
             }
         }
 
@@ -1153,7 +1141,7 @@ namespace mongo {
         autoCq.reset(cq);
 
         // Takes ownership of 'autoCq'.
-        return getExecutor(txn, collection, autoCq.release(), out);
+        return getExecutor(txn, collection, autoCq.release(), yieldPolicy, out);
     }
 
 }  // namespace mongo
diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h
index 60d015f93d0..86e8156dfd6 100644
--- a/src/mongo/db/query/get_executor.h
+++ b/src/mongo/db/query/get_executor.h
@@ -71,6 +71,7 @@ namespace mongo {
     Status getExecutor(OperationContext* txn,
                        Collection* collection,
                        CanonicalQuery* rawCanonicalQuery,
+                       PlanExecutor::YieldPolicy yieldPolicy,
                        PlanExecutor** out,
                        size_t plannerOptions = 0);
 
@@ -90,6 +91,7 @@ namespace mongo {
                        Collection* collection,
                        const std::string& ns,
                        const BSONObj& unparsedQuery,
+                       PlanExecutor::YieldPolicy yieldPolicy,
                        PlanExecutor** out,
                        size_t plannerOptions = 0);
 
@@ -113,6 +115,7 @@ namespace mongo {
                                Collection* collection,
                                const BSONObj& query,
                                const std::string& field,
+                               PlanExecutor::YieldPolicy yieldPolicy,
                                PlanExecutor** out);
 
     /*
@@ -125,6 +128,7 @@ namespace mongo {
     Status getExecutorCount(OperationContext* txn,
                             Collection* collection,
                             const CountRequest& request,
+                            PlanExecutor::YieldPolicy yieldPolicy,
                             PlanExecutor** execOut);
 
     //
@@ -149,6 +153,7 @@ namespace mongo {
                              bool shouldCallLogOp,
                              bool fromMigrate,
                              bool isExplain,
+                             PlanExecutor::YieldPolicy yieldPolicy,
                              PlanExecutor** execOut);
 
     /**
@@ -168,6 +173,7 @@ namespace mongo {
                              bool shouldCallLogOp,
                              bool fromMigrate,
                              bool isExplain,
+                             PlanExecutor::YieldPolicy yieldPolicy,
                              PlanExecutor** execOut);
 
     //
@@ -191,6 +197,7 @@ namespace mongo {
                              const UpdateRequest* request,
                              UpdateDriver* driver,
                              OpDebug* opDebug,
+                             PlanExecutor::YieldPolicy yieldPolicy,
                              PlanExecutor** execOut);
 
     /**
@@ -210,6 +217,7 @@ namespace mongo {
                              const UpdateRequest* request,
                              UpdateDriver* driver,
                              OpDebug* opDebug,
+                             PlanExecutor::YieldPolicy yieldPolicy,
                              PlanExecutor** execOut);
 
     //
@@ -230,6 +238,7 @@ namespace mongo {
     Status getExecutorGroup(OperationContext* txn,
                             Collection* collection,
                             const GroupRequest& request,
+                            PlanExecutor::YieldPolicy yieldPolicy,
                             PlanExecutor** execOut);
 
 }  // namespace mongo
diff --git a/src/mongo/db/query/internal_plans.h b/src/mongo/db/query/internal_plans.h
index c16aa251f2d..398c2ccbbb5 100644
--- a/src/mongo/db/query/internal_plans.h
+++ b/src/mongo/db/query/internal_plans.h
@@ -73,7 +73,16 @@ namespace mongo {
 
             if (NULL == collection) {
                 EOFStage* eof = new EOFStage();
-                return new PlanExecutor(txn, ws, eof, ns.toString());
+                PlanExecutor* exec;
+                // Takes ownership if 'ws' and 'eof'.
+                Status execStatus =  PlanExecutor::make(txn,
+                                                        ws,
+                                                        eof,
+                                                        ns.toString(),
+                                                        PlanExecutor::YIELD_MANUAL,
+                                                        &exec);
+                invariant(execStatus.isOK());
+                return exec;
             }
 
             dassert( ns == collection->ns().ns() );
@@ -90,8 +99,16 @@ namespace mongo {
             }
 
             CollectionScan* cs = new CollectionScan(txn, params, ws, NULL);
+            PlanExecutor* exec;
             // Takes ownership of 'ws' and 'cs'.
-            return new PlanExecutor(txn, ws, cs, collection);
+            Status execStatus = PlanExecutor::make(txn,
+                                                   ws,
+                                                   cs,
+                                                   collection,
+                                                   PlanExecutor::YIELD_MANUAL,
+                                                   &exec);
+            invariant(execStatus.isOK());
+            return exec;
         }
 
         /**
@@ -123,7 +140,16 @@ namespace mongo {
                 root = new FetchStage(txn, ws, root, NULL, collection);
             }
 
-            return new PlanExecutor(txn, ws, root, collection);
+            PlanExecutor* exec;
+            // Takes ownership of 'ws' and 'root'.
+            Status execStatus = PlanExecutor::make(txn,
+                                                   ws,
+                                                   root,
+                                                   collection,
+                                                   PlanExecutor::YIELD_MANUAL,
+                                                   &exec);
+            invariant(execStatus.isOK());
+            return exec;
         }
     };
 
diff --git a/src/mongo/db/query/new_find.cpp b/src/mongo/db/query/new_find.cpp
index e091193dc37..89d724c4c30 100644
--- a/src/mongo/db/query/new_find.cpp
+++ b/src/mongo/db/query/new_find.cpp
@@ -455,8 +455,12 @@ namespace mongo {
         WorkingSet* oplogws = new WorkingSet();
         OplogStart* stage = new OplogStart(txn, collection, tsExpr, oplogws);
 
+        PlanExecutor* rawExec;
         // Takes ownership of ws and stage.
-        scoped_ptr<PlanExecutor> exec(new PlanExecutor(txn, oplogws, stage, collection));
+        Status execStatus = PlanExecutor::make(txn, oplogws, stage, collection,
+                                               PlanExecutor::YIELD_AUTO, &rawExec);
+        invariant(execStatus.isOK());
+        scoped_ptr<PlanExecutor> exec(rawExec);
 
         // The stage returns a DiskLoc of where to start.
         DiskLoc startLoc;
@@ -464,7 +468,8 @@ namespace mongo {
 
         // This is normal.  The start of the oplog is the beginning of the collection.
         if (PlanExecutor::IS_EOF == state) {
-            return getExecutor(txn, collection, autoCq.release(), execOut);
+            return getExecutor(txn, collection, autoCq.release(), PlanExecutor::YIELD_AUTO,
+                               execOut);
         }
 
         // This is not normal.  An error was encountered.
@@ -485,8 +490,8 @@ namespace mongo {
         WorkingSet* ws = new WorkingSet();
         CollectionScan* cs = new CollectionScan(txn, params, ws, cq->root());
         // Takes ownership of 'ws', 'cs', and 'cq'.
-        *execOut = new PlanExecutor(txn, ws, cs, autoCq.release(), collection);
-        return Status::OK();
+        return PlanExecutor::make(txn, ws, cs, autoCq.release(), collection,
+                                  PlanExecutor::YIELD_AUTO, execOut);
     }
 
     std::string newRunQuery(OperationContext* txn,
@@ -580,15 +585,7 @@ namespace mongo {
         // Otherwise we go through the selection of which executor is most suited to the
         // query + run-time context at hand.
         Status status = Status::OK();
-        if (collection == NULL) {
-            LOG(2) << "Collection " << ns << " does not exist."
-                   << " Using EOF stage: " << cq->toStringShort();
-            EOFStage* eofStage = new EOFStage();
-            WorkingSet* ws = new WorkingSet();
-            // Takes ownership of 'cq'.
-            rawExec = new PlanExecutor(txn, ws, eofStage, cq, NULL);
-        }
-        else if (pq.getOptions().oplogReplay) {
+        if (NULL != collection && pq.getOptions().oplogReplay) {
             // Takes ownership of 'cq'.
             status = getOplogStartHack(txn, collection, cq, &rawExec);
         }
@@ -598,7 +595,7 @@ namespace mongo {
                 options |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
             }
             // Takes ownership of 'cq'.
-            status = getExecutor(txn, collection, cq, &rawExec, options);
+            status = getExecutor(txn, collection, cq, PlanExecutor::YIELD_AUTO, &rawExec, options);
         }
 
         if (!status.isOK()) {
@@ -609,11 +606,6 @@ namespace mongo {
         verify(NULL != rawExec);
         auto_ptr<PlanExecutor> exec(rawExec);
 
-        // We want the PlanExecutor to yield automatically, but we handle registration of the
-        // executor ourselves. We want to temporarily register the executor while we are generating
-        // this batch of results, and then unregister and re-register with ClientCursor for getmore.
-        exec->setYieldPolicy(PlanExecutor::YIELD_AUTO);
-
         // If it's actually an explain, do the explain and return rather than falling through
         // to the normal query execution loop.
         if (pq.isExplain()) {
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp
index 601091a567b..cca412cac8a 100644
--- a/src/mongo/db/query/plan_executor.cpp
+++ b/src/mongo/db/query/plan_executor.cpp
@@ -29,9 +29,11 @@
 #include "mongo/db/query/plan_executor.h"
 
 #include "mongo/db/catalog/collection.h"
+#include "mongo/db/exec/multi_plan.h"
 #include "mongo/db/exec/pipeline_proxy.h"
 #include "mongo/db/exec/plan_stage.h"
 #include "mongo/db/exec/plan_stats.h"
+#include "mongo/db/exec/subplan.h"
 #include "mongo/db/exec/working_set.h"
 #include "mongo/db/exec/working_set_common.h"
 #include "mongo/db/query/plan_yield_policy.h"
@@ -40,46 +42,93 @@
 
 namespace mongo {
 
-    PlanExecutor::PlanExecutor(OperationContext* opCtx,
-                               WorkingSet* ws,
-                               PlanStage* rt,
-                               const Collection* collection)
-        : _opCtx(opCtx),
-          _collection(collection),
-          _cq(NULL),
-          _workingSet(ws),
-          _qs(NULL),
-          _root(rt),
-          _killed(false) {
-        initNs();
+    namespace {
+
+        /**
+         * Retrieves the first stage of a given type from the plan tree, or NULL
+         * if no such stage is found.
+         */
+        PlanStage* getStageByType(PlanStage* root, StageType type) {
+            if (root->stageType() == type) {
+                return root;
+            }
+
+            vector<PlanStage*> children = root->getChildren();
+            for (size_t i = 0; i < children.size(); i++) {
+                PlanStage* result = getStageByType(children[i], type);
+                if (result) {
+                    return result;
+                }
+            }
+
+            return NULL;
+        }
+
     }
 
-    PlanExecutor::PlanExecutor(OperationContext* opCtx,
-                               WorkingSet* ws,
-                               PlanStage* rt,
-                               std::string ns)
-        : _opCtx(opCtx),
-          _collection(NULL),
-          _cq(NULL),
-          _workingSet(ws),
-          _qs(NULL),
-          _root(rt),
-          _ns(ns),
-          _killed(false) { }
+    // static
+    Status PlanExecutor::make(OperationContext* opCtx,
+                              WorkingSet* ws,
+                              PlanStage* rt,
+                              const Collection* collection,
+                              YieldPolicy yieldPolicy,
+                              PlanExecutor** out) {
+        return PlanExecutor::make(opCtx, ws, rt, NULL, NULL, collection, "", yieldPolicy, out);
+    }
 
-    PlanExecutor::PlanExecutor(OperationContext* opCtx,
-                               WorkingSet* ws,
-                               PlanStage* rt,
-                               CanonicalQuery* cq,
-                               const Collection* collection)
-        : _opCtx(opCtx),
-          _collection(collection),
-          _cq(cq),
-          _workingSet(ws),
-          _qs(NULL),
-          _root(rt),
-          _killed(false) {
-        initNs();
+    // static
+    Status PlanExecutor::make(OperationContext* opCtx,
+                              WorkingSet* ws,
+                              PlanStage* rt,
+                              const std::string& ns,
+                              YieldPolicy yieldPolicy,
+                              PlanExecutor** out) {
+        return PlanExecutor::make(opCtx, ws, rt, NULL, NULL, NULL, ns, yieldPolicy, out);
+    }
+
+    // static
+    Status PlanExecutor::make(OperationContext* opCtx,
+                              WorkingSet* ws,
+                              PlanStage* rt,
+                              CanonicalQuery* cq,
+                              const Collection* collection,
+                              YieldPolicy yieldPolicy,
+                              PlanExecutor** out) {
+        return PlanExecutor::make(opCtx, ws, rt, NULL, cq, collection, "", yieldPolicy, out);
+    }
+
+    // static
+    Status PlanExecutor::make(OperationContext* opCtx,
+                              WorkingSet* ws,
+                              PlanStage* rt,
+                              QuerySolution* qs,
+                              CanonicalQuery* cq,
+                              const Collection* collection,
+                              YieldPolicy yieldPolicy,
+                              PlanExecutor** out) {
+        return PlanExecutor::make(opCtx, ws, rt, qs, cq, collection, "", yieldPolicy, out);
+    }
+
+    // static
+    Status PlanExecutor::make(OperationContext* opCtx,
+                              WorkingSet* ws,
+                              PlanStage* rt,
+                              QuerySolution* qs,
+                              CanonicalQuery* cq,
+                              const Collection* collection,
+                              const std::string& ns,
+                              YieldPolicy yieldPolicy,
+                              PlanExecutor** out) {
+        std::auto_ptr<PlanExecutor> exec(new PlanExecutor(opCtx, ws, rt, qs, cq, collection, ns));
+
+        // Perform plan selection, if necessary.
+        Status status = exec->pickBestPlan(yieldPolicy);
+        if (!status.isOK()) {
+            return status;
+        }
+
+        *out = exec.release();
+        return Status::OK();
     }
 
     PlanExecutor::PlanExecutor(OperationContext* opCtx,
@@ -87,18 +136,22 @@ namespace mongo {
                                PlanStage* rt,
                                QuerySolution* qs,
                                CanonicalQuery* cq,
-                               const Collection* collection)
+                               const Collection* collection,
+                               const std::string& ns)
         : _opCtx(opCtx),
           _collection(collection),
           _cq(cq),
           _workingSet(ws),
           _qs(qs),
           _root(rt),
+          _ns(ns),
           _killed(false) {
-        initNs();
-    }
+        // We may still need to initialize _ns from either _collection or _cq.
+        if (!_ns.empty()) {
+            // We already have an _ns set, so there's nothing more to do.
+            return;
+        }
 
-    void PlanExecutor::initNs() {
         if (NULL != _collection) {
             _ns = _collection->ns().ns();
         }
@@ -108,6 +161,31 @@ namespace mongo {
         }
     }
 
+    Status PlanExecutor::pickBestPlan(YieldPolicy policy) {
+        // For YIELD_AUTO, this will both set an auto yield policy on the PlanExecutor and
+        // register it to receive notifications.
+        this->setYieldPolicy(policy);
+
+        // First check if we need to do subplanning.
+        PlanStage* foundStage = getStageByType(_root.get(), STAGE_SUBPLAN);
+        if (foundStage) {
+            SubplanStage* subplan = static_cast<SubplanStage*>(foundStage);
+            return subplan->pickBestPlan(_yieldPolicy.get());
+        }
+
+        // If we didn't have to do subplanning, we might still have to do regular
+        // multi plan selection.
+        foundStage = getStageByType(_root.get(), STAGE_MULTI_PLAN);
+        if (foundStage) {
+            MultiPlanStage* mps = static_cast<MultiPlanStage*>(foundStage);
+            return mps->pickBestPlan(_yieldPolicy.get());
+        }
+
+        // Either we chose a plan, or no plan selection was required. In both cases,
+        // our work has been successfully completed.
+        return Status::OK();
+    }
+
     PlanExecutor::~PlanExecutor() { }
 
     // static
@@ -180,6 +258,14 @@ namespace mongo {
         if (_killed) { return PlanExecutor::DEAD; }
 
         for (;;) {
+            // Yield if it's time to yield.
+            if (NULL != _yieldPolicy.get() && _yieldPolicy->shouldYield()) {
+                _yieldPolicy->yield();
+                if (_killed) {
+                    return PlanExecutor::DEAD;
+                }
+            }
+
             WorkingSetID id = WorkingSet::INVALID_ID;
             PlanStage::StageState code = _root->work(&id);
 
@@ -282,19 +368,22 @@ namespace mongo {
     }
 
     Status PlanExecutor::executePlan() {
-        WorkingSetID id = WorkingSet::INVALID_ID;
-        PlanStage::StageState code = PlanStage::NEED_TIME;
-        while (PlanStage::NEED_TIME == code || PlanStage::ADVANCED == code) {
-            code = _root->work(&id);
+        BSONObj obj;
+        PlanExecutor::ExecState state = PlanExecutor::ADVANCED;
+        while (PlanExecutor::ADVANCED == state) {
+            state = this->getNext(&obj, NULL);
         }
 
-        if (PlanStage::FAILURE == code) {
-            BSONObj obj;
-            WorkingSetCommon::getStatusMemberObject(*_workingSet, id, &obj);
-            return Status(ErrorCodes::BadValue,
-                          "Exec error: " + WorkingSetCommon::toStatusString(obj));
+        if (PlanExecutor::DEAD == state) {
+            return Status(ErrorCodes::OperationFailed, "Exec error: PlanExecutor killed");
+        }
+        else if (PlanExecutor::EXEC_ERROR == state) {
+            return Status(ErrorCodes::OperationFailed,
+                          str::stream() << "Exec error: "
+                                        << WorkingSetCommon::toStatusString(obj));
         }
 
+        invariant(PlanExecutor::IS_EOF == state);
         return Status::OK();
     }
 
@@ -308,7 +397,7 @@ namespace mongo {
         }
         else {
             invariant(PlanExecutor::YIELD_AUTO == policy);
-            _yieldPolicy.reset(new PlanYieldPolicy());
+            _yieldPolicy.reset(new PlanYieldPolicy(this));
 
             // Runners that yield automatically generally need to be registered so that
             // after yielding, they receive notifications of events like deletions and
diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h
index db616fc7281..f7da5783174 100644
--- a/src/mongo/db/query/plan_executor.h
+++ b/src/mongo/db/query/plan_executor.h
@@ -89,27 +89,35 @@ namespace mongo {
             //
             // 1. Register your PlanExecutor with ClientCursor. Registered executors are informed
             // about DiskLoc deletions and namespace invalidation, as well as other important
-            // events. Do this either by  calling registerExec() on the executor. This can be done
-            // once you get your executor, or could be done per-yield.
+            // events. Do this by calling registerExec() on the executor. Alternatively, this can
+            // be done per-yield (as described below).
             //
-            // 2. Call exec->saveState() before you yield.
+            // 2. Construct a PlanYieldPolicy 'policy', passing 'exec' to the constructor.
             //
-            // 3. Call Yield::yieldAllLocks(), passing in the executor's OperationContext*. This
-            // causes the executor to give up its locks and block so that it goes to the back of
-            // the scheduler's queue.
+            // 3. Call PlanYieldPolicy::yield() on 'policy'. If your PlanExecutor is not yet
+            // registered (because you want to register on a per-yield basis), then pass
+            // 'true' to yield().
             //
-            // 4. Call exec->restoreState() before using the executor again.
-            //
-            // 5. The next call to exec->getNext() may return DEAD.
-            //
-            // 6. Make sure the executor gets deregistered from ClientCursor. PlanExecutor does
-            // this in an RAII fashion when it is destroyed, or you can explicity call
-            // unregisterExec() on the PlanExecutor.
+            // 4. The call to yield() returns a boolean indicating whether or not 'exec' is
+            // still alove. If it is false, then 'exec' was killed during the yield and is
+            // no longer valid.
             YIELD_MANUAL,
         };
 
         //
-        // Constructors / destructor.
+        // Factory methods.
+        //
+        // On success, return a new PlanExecutor, owned by the caller, through 'out'.
+        //
+        // Passing YIELD_AUTO to any of these factories will construct a yielding runner which
+        // may yield in the following circumstances:
+        //   1) During plan selection inside the call to make().
+        //   2) On any call to getNext().
+        //   3) While executing the plan inside executePlan().
+        //
+        // The runner will also be automatically registered to receive notifications in the
+        // case of YIELD_AUTO, so no further calls to registerExec() or setYieldPolicy() are
+        // necessary.
         //
 
         /**
@@ -118,40 +126,48 @@ namespace mongo {
          * Right now this is only for idhack updates which neither canonicalize
          * nor go through normal planning.
          */
-        PlanExecutor(OperationContext* opCtx,
-                     WorkingSet* ws,
-                     PlanStage* rt,
-                     const Collection* collection);
+        static Status make(OperationContext* opCtx,
+                           WorkingSet* ws,
+                           PlanStage* rt,
+                           const Collection* collection,
+                           YieldPolicy yieldPolicy,
+                           PlanExecutor** out);
 
         /**
          * Used when we have a NULL collection and no canonical query. In this case,
          * we need to explicitly pass a namespace to the plan executor.
          */
-        PlanExecutor(OperationContext* opCtx,
-                     WorkingSet* ws,
-                     PlanStage* rt,
-                     std::string ns);
+        static Status make(OperationContext* opCtx,
+                           WorkingSet* ws,
+                           PlanStage* rt,
+                           const std::string& ns,
+                           YieldPolicy yieldPolicy,
+                           PlanExecutor** out);
 
         /**
          * Used when there is a canonical query but no query solution (e.g. idhack
          * queries, queries against a NULL collection, queries using the subplan stage).
          */
-        PlanExecutor(OperationContext* opCtx,
-                     WorkingSet* ws,
-                     PlanStage* rt,
-                     CanonicalQuery* cq,
-                     const Collection* collection);
+        static Status make(OperationContext* opCtx,
+                           WorkingSet* ws,
+                           PlanStage* rt,
+                           CanonicalQuery* cq,
+                           const Collection* collection,
+                           YieldPolicy yieldPolicy,
+                           PlanExecutor** out);
 
         /**
          * The constructor for the normal case, when you have both a canonical query
          * and a query solution.
          */
-        PlanExecutor(OperationContext* opCtx,
-                     WorkingSet* ws,
-                     PlanStage* rt,
-                     QuerySolution* qs,
-                     CanonicalQuery* cq,
-                     const Collection* collection);
+        static Status make(OperationContext* opCtx,
+                           WorkingSet* ws,
+                           PlanStage* rt,
+                           QuerySolution* qs,
+                           CanonicalQuery* cq,
+                           const Collection* collection,
+                           YieldPolicy yieldPolicy,
+                           PlanExecutor** out);
 
         ~PlanExecutor();
 
@@ -228,6 +244,8 @@ namespace mongo {
          * For read operations, objOut or dlOut are populated with another query result.
          *
          * For write operations, the return depends on the particulars of the write stage.
+         *
+         * If an AUTO_YIELD policy is set, then this method may yield.
          */
         ExecState getNext(BSONObj* objOut, DiskLoc* dlOut);
 
@@ -242,6 +260,8 @@ namespace mongo {
         /**
          * Execute the plan to completion, throwing out the results.  Used when you want to work the
          * underlying tree without getting results back.
+         *
+         * If an AUTO_YIELD policy is set on this executor, then this will automatically yield.
          */
         Status executePlan();
 
@@ -315,9 +335,42 @@ namespace mongo {
         };
 
         /**
-         * Initialize the namespace using either the canonical query or the collection.
+         * New PlanExecutor instances are created with the static make() methods above.
+         */
+        PlanExecutor(OperationContext* opCtx,
+                     WorkingSet* ws,
+                     PlanStage* rt,
+                     QuerySolution* qs,
+                     CanonicalQuery* cq,
+                     const Collection* collection,
+                     const std::string& ns);
+
+        /**
+         * Public factory methods delegate to this private factory to do their work.
+         */
+        static Status make(OperationContext* opCtx,
+                           WorkingSet* ws,
+                           PlanStage* rt,
+                           QuerySolution* qs,
+                           CanonicalQuery* cq,
+                           const Collection* collection,
+                           const std::string& ns,
+                           YieldPolicy yieldPolicy,
+                           PlanExecutor** out);
+
+        /**
+         * Clients of PlanExecutor expect that on receiving a new instance from one of the make()
+         * factory methods, plan selection has already been completed. In order to enforce this
+         * property, this function is called to do plan selection prior to returning the new
+         * PlanExecutor.
+         *
+         * If the tree contains plan selection stages, such as MultiPlanStage or SubplanStage,
+         * this calls into their underlying plan selection facilities. Otherwise, does nothing.
+         *
+         * If an AUTO_YIELD policy is set (and document-level locking is not supported), then
+         * locks are yielded during plan selection.
          */
-        void initNs();
+        Status pickBestPlan(YieldPolicy policy);
 
         // The OperationContext that we're executing within.  We need this in order to release
         // locks.
diff --git a/src/mongo/db/query/plan_yield_policy.cpp b/src/mongo/db/query/plan_yield_policy.cpp
index f8db2c1e98a..689a76973b5 100644
--- a/src/mongo/db/query/plan_yield_policy.cpp
+++ b/src/mongo/db/query/plan_yield_policy.cpp
@@ -31,61 +31,52 @@
 #include "mongo/db/query/plan_yield_policy.h"
 
 #include "mongo/db/concurrency/yield.h"
+#include "mongo/db/global_environment_experiment.h"
 
 namespace mongo {
 
     // Yield every 128 cycles or 10ms.  These values were inherited from v2.6, which just copied
     // them from v2.4.
-    PlanYieldPolicy::PlanYieldPolicy()
+    PlanYieldPolicy::PlanYieldPolicy(PlanExecutor* exec)
         : _elapsedTracker(128, 10),
-          _planYielding(NULL) { }
-
-    PlanYieldPolicy::~PlanYieldPolicy() {
-        if (NULL != _planYielding) {
-            // We were destructed mid-yield.  Since we're being used to yield a runner, we have
-            // to deregister the runner.
-            if (_planYielding->collection()) {
-                _planYielding->collection()->cursorCache()->deregisterExecutor(_planYielding);
-            }
-        }
-    }
-
-    /**
-     * Yield the provided runner, registering and deregistering it appropriately.  Deal with
-     * deletion during a yield by setting _runnerYielding to ensure deregistration.
-     *
-     * Provided runner MUST be YIELD_MANUAL.
-     */
-    bool PlanYieldPolicy::yieldAndCheckIfOK(PlanExecutor* plan) {
-        invariant(plan);
-        invariant(plan->collection());
+          _planYielding(exec) { }
 
-        // If micros > 0, we should yield.
-        plan->saveState();
+    bool PlanYieldPolicy::shouldYield() {
+        return _elapsedTracker.intervalHasElapsed();
+    }
 
-        // If we're destructed during yield this will be used to deregister ourselves.
-        // This happens when we're not in a ClientCursor and somebody kills all cursors
-        // on the ns we're operating on.
-        _planYielding = plan;
+    bool PlanYieldPolicy::yield(bool registerPlan) {
+        // This is a no-op if document-level locking is supported. Doc-level locking systems
+        // should not need to yield.
+        if (supportsDocLocking()) {
+            return true;
+        }
 
-        // Register with the thing that may kill() the 'plan'.
-        plan->collection()->cursorCache()->registerExecutor(plan);
+        // No need to yield if the collection is NULL.
+        if (NULL == _planYielding->collection()) {
+            return true;
+        }
 
-        // Note that this call checks for interrupt, and thus can throw if interrupt flag is set.
-        Yield::yieldAllLocks(plan->getOpCtx(), 1);
+        invariant(_planYielding);
 
-        // If the plan was killed, runner->collection() will return NULL, and we can't/don't
-        // deregister it.
-        if (plan->collection()) {
-            plan->collection()->cursorCache()->deregisterExecutor(plan);
+        if (registerPlan) {
+            _planYielding->registerExec();
         }
 
-        _planYielding = NULL;
+        OperationContext* opCtx = _planYielding->getOpCtx();
+        invariant(opCtx);
+
+        _planYielding->saveState();
 
+        // Note that this call checks for interrupt, and thus can throw if interrupt flag is set.
+        Yield::yieldAllLocks(opCtx, 1);
         _elapsedTracker.resetLastTime();
 
-        return plan->restoreState(plan->getOpCtx());
+        if (registerPlan) {
+            _planYielding->deregisterExec();
+        }
+
+        return _planYielding->restoreState(opCtx);
     }
 
 } // namespace mongo
-
diff --git a/src/mongo/db/query/plan_yield_policy.h b/src/mongo/db/query/plan_yield_policy.h
index cb6d1a0a9ec..50aff9656f9 100644
--- a/src/mongo/db/query/plan_yield_policy.h
+++ b/src/mongo/db/query/plan_yield_policy.h
@@ -29,26 +29,43 @@
 #pragma once
 
 #include "mongo/db/catalog/collection.h"
+#include "mongo/db/query/plan_executor.h"
 #include "mongo/util/elapsed_tracker.h"
 
 namespace mongo {
 
     class PlanYieldPolicy {
     public:
-        PlanYieldPolicy();
-        ~PlanYieldPolicy();
+        explicit PlanYieldPolicy(PlanExecutor* exec);
+
+        /**
+         * Used by AUTO_YIELD plan executors in order to check whether it is time to yield.
+         * PlanExecutors give up their locks periodically in order to be fair to other
+         * threads.
+         */
+        bool shouldYield();
 
         /**
-         * Yield the provided runner, registering and deregistering it appropriately.  Deal with
-         * deletion during a yield by setting _planYielding to ensure deregistration.
+         * Used to cause a plan executor to give up locks and go to sleep. The PlanExecutor
+         * must *not* be in saved state. Handles calls to save/restore state internally.
          *
-         * Provided plan executor MUST be YIELD_MANUAL.
+         * By default, assumes that the PlanExecutor is already registered. If 'registerPlan'
+         * is explicitly set to true, then the executor will get automatically registered and
+         * deregistered here.
+         *
+         * Returns true if the executor was restored successfully and is still alive. Returns false
+         * if the executor got killed during yield.
          */
-        bool yieldAndCheckIfOK(PlanExecutor* plan);
+        bool yield(bool registerPlan = false);
 
     private:
+        // Default constructor disallowed in order to ensure initialization of '_planYielding'.
+        PlanYieldPolicy();
+
         ElapsedTracker _elapsedTracker;
 
+        // The plan executor which this yield policy is responsible for yielding. Must
+        // not outlive the plan executor.
         PlanExecutor* _planYielding;
     };
author	David Storch <david.storch@10gen.com>	2014-10-21 10:24:24 -0400
committer	David Storch <david.storch@10gen.com>	2014-10-21 10:32:59 -0400
commit	011dde7e6eac3b73cb1d2a7f004feee9bed99c46 (patch)
tree	32b20bc3224627c93e5781ba72abb45a1f825b37 /src/mongo/db/query
parent	0bee61d26e44e26c2678d550990a57ce488f222d (diff)
download	mongo-011dde7e6eac3b73cb1d2a7f004feee9bed99c46.tar.gz