SERVER-15225 CachedPlanStage can replan poorly performing queries after a trial period

This is a minimal backport of the rewrite to the CachedPlanStage. The functionality is behind a flag and turned off by default. It can be enabled with the internalQueryCacheReplanningEnabled setParameter.
author: David Storch <david.storch@10gen.com> 2015-05-22 13:19:19 -0400
committer: David Storch <david.storch@10gen.com> 2015-06-03 14:53:48 -0400
commit: f80286e0203fa0aa0873bff2963ce0aa81a51383 (patch)
tree: dea40d051e88062eb025dca2dc78f00535d9710b /src/mongo
parent: b677e49bed78c415498102a6d7d1cfbed43e76f7 (diff)
download: mongo-f80286e0203fa0aa0873bff2963ce0aa81a51383.tar.gz
11 files changed, 686 insertions, 38 deletions
diff --git a/src/mongo/db/exec/cached_plan.cpp b/src/mongo/db/exec/cached_plan.cpp
index 2bc2e39bd1b..fed8b8de0c0 100644
--- a/src/mongo/db/exec/cached_plan.cpp
+++ b/src/mongo/db/exec/cached_plan.cpp
@@ -26,18 +26,28 @@
  *    it in the license file.
  */
 
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
+
+#include "mongo/platform/basic.h"
+
 #include "mongo/db/exec/cached_plan.h"
-#include "mongo/db/exec/scoped_timer.h"
-#include "mongo/db/exec/working_set_common.h"
-#include "mongo/util/mongoutils/str.h"
 
-// for updateCache
 #include "mongo/db/catalog/collection.h"
 #include "mongo/db/catalog/database.h"
 #include "mongo/db/client.h"
+#include "mongo/db/exec/multi_plan.h"
+#include "mongo/db/exec/scoped_timer.h"
+#include "mongo/db/exec/working_set_common.h"
+#include "mongo/db/query/explain.h"
 #include "mongo/db/query/plan_cache.h"
 #include "mongo/db/query/plan_ranker.h"
+#include "mongo/db/query/plan_yield_policy.h"
 #include "mongo/db/query/qlog.h"
+#include "mongo/db/query/query_knobs.h"
+#include "mongo/db/query/query_planner.h"
+#include "mongo/db/query/stage_builder.h"
+#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/log.h"
 
 namespace mongo {
 
@@ -47,14 +57,23 @@ namespace mongo {
     // static
     const char* CachedPlanStage::kStageType = "CACHED_PLAN";
 
-    CachedPlanStage::CachedPlanStage(const Collection* collection,
+    CachedPlanStage::CachedPlanStage(OperationContext* txn,
+                                     Collection* collection,
+                                     WorkingSet* ws,
                                      CanonicalQuery* cq,
+                                     const QueryPlannerParams& params,
+                                     size_t decisionWorks,
                                      PlanStage* mainChild,
                                      QuerySolution* mainQs,
                                      PlanStage* backupChild,
                                      QuerySolution* backupQs)
-        : _collection(collection),
+        : _txn(txn),
+          _collection(collection),
+          _ws(ws),
           _canonicalQuery(cq),
+          _plannerParams(params),
+          _replanningEnabled(internalQueryCacheReplanningEnabled),
+          _decisionWorks(decisionWorks),
           _mainQs(mainQs),
           _backupQs(backupQs),
           _mainChildPlan(mainChild),
@@ -80,9 +99,206 @@ namespace mongo {
             return true;
         }
 
+        if (!_results.empty()) {
+            return false;
+        }
+
         return getActiveChild()->isEOF();
     }
 
+    Status CachedPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
+        // If replanning is disabled, then this is a no-op.
+        if (!_replanningEnabled) {
+            return Status::OK();
+        }
+
+        // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
+        // execution work that happens here, so this is needed for the time accounting to
+        // make sense.
+        ScopedTimer timer(&_commonStats.executionTimeMillis);
+
+        // If we work this many times during the trial period, then we will replan the
+        // query from scratch.
+        size_t maxWorksBeforeReplan = static_cast<size_t>(internalQueryCacheEvictionRatio
+                                                          * _decisionWorks);
+
+        // The trial period ends without replanning if the cached plan produces this many results
+        size_t numResults = MultiPlanStage::getTrialPeriodNumToReturn(*_canonicalQuery);
+
+        for (size_t i = 0; i < maxWorksBeforeReplan; ++i) {
+            // Might need to yield between calls to work due to the timer elapsing.
+            Status yieldStatus = tryYield(yieldPolicy);
+            if (!yieldStatus.isOK()) {
+                return yieldStatus;
+            }
+
+            WorkingSetID id = WorkingSet::INVALID_ID;
+            PlanStage::StageState state = _mainChildPlan->work(&id);
+
+            if (PlanStage::ADVANCED == state) {
+                // Save result for later.
+                _results.push_back(id);
+
+                if (_results.size() >= numResults) {
+                    // Once a plan returns enough results, stop working. Update cache with stats
+                    // from this run and return.
+                    updateCache();
+                    return Status::OK();
+                }
+            }
+            else if (PlanStage::IS_EOF == state) {
+                // Cached plan hit EOF quickly enough. No need to replan. Update cache with stats
+                // from this run and return.
+                updateCache();
+                return Status::OK();
+            }
+            else if (PlanStage::NEED_FETCH == state) {
+                WorkingSetMember* member = _ws->get(id);
+                invariant(member->hasFetcher());
+                // Transfer ownership of the fetcher and yield.
+                _fetcher.reset(member->releaseFetcher());
+                Status fetchYieldStatus = tryYield(yieldPolicy);
+                if (!fetchYieldStatus.isOK()) {
+                    return fetchYieldStatus;
+                }
+            }
+            else if (PlanStage::FAILURE == state) {
+                // On failure, fall back to replanning the whole query. We neither evict the
+                // existing cache entry nor cache the result of replanning.
+                BSONObj statusObj;
+                WorkingSetCommon::getStatusMemberObject(*_ws, id, &statusObj);
+
+                LOG(1) << "Execution of cached plan failed, falling back to replan."
+                       << " query: "
+                       << _canonicalQuery->toStringShort()
+                       << " planSummary: "
+                       << Explain::getPlanSummary(_mainChildPlan.get())
+                       << " status: "
+                       << statusObj;
+
+                const bool shouldCache = false;
+                return replan(yieldPolicy, shouldCache);
+            }
+            else if (PlanStage::DEAD == state) {
+                return Status(ErrorCodes::OperationFailed,
+                              "Executor killed during cached plan trial period");
+            }
+            else {
+                invariant(PlanStage::NEED_TIME == state);
+            }
+        }
+
+        // If we're here, the trial period took more than 'maxWorksBeforeReplan' work cycles. This
+        // plan is taking too long, so we replan from scratch.
+        LOG(1) << "Execution of cached plan required "
+               << maxWorksBeforeReplan
+               << " works, but was originally cached with only "
+               << _decisionWorks
+               << " works. Evicting cache entry and replanning query: "
+               << _canonicalQuery->toStringShort()
+               << " plan summary before replan: "
+               << Explain::getPlanSummary(_mainChildPlan.get());
+
+        const bool shouldCache = true;
+        return replan(yieldPolicy, shouldCache);
+    }
+
+    Status CachedPlanStage::tryYield(PlanYieldPolicy* yieldPolicy) {
+        // These are the conditions which cause us to yield during plan selection if we have a
+        // YIELD_AUTO policy:
+        //   1) The yield policy's timer elapsed, or
+        //   2) some stage requested a yield due to a document fetch (NEED_FETCH).
+        // In both cases, the actual yielding happens here.
+        if (NULL != yieldPolicy && (yieldPolicy->shouldYield() || NULL != _fetcher.get())) {
+            // Here's where we yield.
+            bool alive = yieldPolicy->yield(_fetcher.get());
+
+            if (!alive) {
+                return Status(ErrorCodes::OperationFailed,
+                              "PlanExecutor killed during cached plan trial period");
+            }
+        }
+
+        // We're done using the fetcher, so it should be freed. We don't want to
+        // use the same RecordFetcher twice.
+        _fetcher.reset();
+
+        return Status::OK();
+    }
+
+    Status CachedPlanStage::replan(PlanYieldPolicy* yieldPolicy, bool shouldCache) {
+        // We're going to start over with a new plan. No need for only old buffered results.
+        _results.clear();
+
+        // Clear out the working set. We'll start with a fresh working set.
+        _ws->clear();
+
+        // No need for any existing child stages or QuerySolutions. We will create new ones from
+        // scratch.
+        _mainQs.reset();
+        _backupQs.reset();
+        _mainChildPlan.reset();
+        _backupChildPlan.reset();
+
+        // Remove the current plan cache entry for this shape. The plan cache entry could have
+        // already been removed by another thread, so our removal won't necessarily succeed.
+        if (shouldCache) {
+            PlanCache* cache = _collection->infoCache()->getPlanCache();
+            cache->remove(*_canonicalQuery);
+        }
+
+        // Use the query planning module to plan the whole query.
+        std::vector<QuerySolution*> rawSolutions;
+        Status status = QueryPlanner::plan(*_canonicalQuery, _plannerParams, &rawSolutions);
+        if (!status.isOK()) {
+            return Status(ErrorCodes::BadValue,
+                          str::stream()
+                          << "error processing query: " << _canonicalQuery->toString()
+                          << " planner returned error: " << status.reason());
+        }
+
+        OwnedPointerVector<QuerySolution> solutions(rawSolutions);
+
+        // We cannot figure out how to answer the query.  Perhaps it requires an index
+        // we do not have?
+        if (0 == solutions.size()) {
+            return Status(ErrorCodes::BadValue,
+                          str::stream()
+                          << "error processing query: "
+                          << _canonicalQuery->toString()
+                          << " No query solutions");
+        }
+
+        if (1 == solutions.size()) {
+            PlanStage* newRoot;
+            // Only one possible plan. Build the stages from the solution.
+            verify(StageBuilder::build(_txn, _collection, *solutions[0], _ws, &newRoot));
+            _mainChildPlan.reset(newRoot);
+            _mainQs.reset(solutions.popAndReleaseBack());
+            return Status::OK();
+        }
+
+        // Many solutions. Create a MultiPlanStage to pick the best, update the cache,
+        // and so on. The working set will be shared by all candidate plans.
+        _mainChildPlan.reset(new MultiPlanStage(_txn, _collection, _canonicalQuery, shouldCache));
+        MultiPlanStage* multiPlanStage = static_cast<MultiPlanStage*>(_mainChildPlan.get());
+
+        for (size_t ix = 0; ix < solutions.size(); ++ix) {
+            if (solutions[ix]->cacheData.get()) {
+                solutions[ix]->cacheData->indexFilterApplied = _plannerParams.indexFiltersApplied;
+            }
+
+            PlanStage* nextPlanRoot;
+            verify(StageBuilder::build(_txn, _collection, *solutions[ix], _ws, &nextPlanRoot));
+
+            // Takes ownership of 'solutions[ix]' and 'nextPlanRoot'.
+            multiPlanStage->addPlan(solutions.releaseAt(ix), nextPlanRoot, _ws);
+        }
+
+        // Delegate to the MultiPlanStage's plan selection facility.
+        return multiPlanStage->pickBestPlan(yieldPolicy);
+    }
+
     PlanStage::StageState CachedPlanStage::work(WorkingSetID* out) {
         ++_commonStats.works;
 
@@ -94,6 +310,15 @@ namespace mongo {
 
         if (isEOF()) { return PlanStage::IS_EOF; }
 
+        // First exhaust any results buffered during the trial period.
+        if (!_results.empty()) {
+            *out = _results.front();
+            _results.pop_front();
+            _commonStats.advanced++;
+            _alreadyProduced = true;
+            return PlanStage::ADVANCED;
+        }
+
         StageState childStatus = getActiveChild()->work(out);
 
         if (PlanStage::ADVANCED == childStatus) {
@@ -107,9 +332,11 @@ namespace mongo {
         else if (PlanStage::FAILURE == childStatus
              && !_alreadyProduced
              && !_usingBackupChild
+             && !_replanningEnabled
              && NULL != _backupChildPlan.get()) {
             // Switch the active child to the backup. Subsequent calls to work() will exercise
-            // the backup plan.
+            // the backup plan. We are only willing to switch to the backup plan if replanning is
+            // disabled.
             _usingBackupChild = true;
             _commonStats.needTime++;
             return PlanStage::NEED_TIME;
@@ -152,6 +379,20 @@ namespace mongo {
             _backupChildPlan->invalidate(txn, dl, type);
         }
         ++_commonStats.invalidates;
+
+        for (std::list<WorkingSetID>::iterator it = _results.begin(); it != _results.end(); ) {
+            WorkingSetMember* member = _ws->get(*it);
+            if (member->hasLoc() && member->loc == dl) {
+                std::list<WorkingSetID>::iterator next = it;
+                ++next;
+                WorkingSetCommon::fetchAndInvalidateLoc(txn, member, _collection);
+                _results.erase(it);
+                it = next;
+            }
+            else {
+                ++it;
+            }
+        }
     }
 
     vector<PlanStage*> CachedPlanStage::getChildren() const {
@@ -197,7 +438,8 @@ namespace mongo {
         feedback->score = PlanRanker::scoreTree(feedback->stats.get());
 
         PlanCache* cache = _collection->infoCache()->getPlanCache();
-        Status fbs = cache->feedback(*_canonicalQuery, feedback.release());
+        const bool allowedToEvict = !_replanningEnabled;
+        Status fbs = cache->feedback(*_canonicalQuery, feedback.release(), allowedToEvict);
 
         if (!fbs.isOK()) {
             QLOG() << _canonicalQuery->ns() << ": Failed to update cache with feedback: "
diff --git a/src/mongo/db/exec/cached_plan.h b/src/mongo/db/exec/cached_plan.h
index ceb63e1e190..632476b481d 100644
--- a/src/mongo/db/exec/cached_plan.h
+++ b/src/mongo/db/exec/cached_plan.h
@@ -28,15 +28,21 @@
 
 #pragma once
 
+#include <boost/scoped_ptr.hpp>
+#include <list>
+
 #include "mongo/db/jsobj.h"
 #include "mongo/db/exec/plan_stage.h"
 #include "mongo/db/exec/working_set.h"
 #include "mongo/db/query/canonical_query.h"
 #include "mongo/db/query/query_solution.h"
 #include "mongo/db/record_id.h"
+#include "mongo/db/storage/record_fetcher.h"
 
 namespace mongo {
 
+    class PlanYieldPolicy;
+
     /**
      * This stage outputs its mainChild, and possibly its backup child
      * and also updates the cache.
@@ -49,8 +55,12 @@ namespace mongo {
         /**
          * Takes ownership of 'mainChild', 'mainQs', 'backupChild', and 'backupQs'.
          */
-        CachedPlanStage(const Collection* collection,
+        CachedPlanStage(OperationContext* txn,
+                        Collection* collection,
+                        WorkingSet* ws,
                         CanonicalQuery* cq,
+                        const QueryPlannerParams& params,
+                        size_t decisionWorks,
                         PlanStage* mainChild,
                         QuerySolution* mainQs,
                         PlanStage* backupChild = NULL,
@@ -80,16 +90,60 @@ namespace mongo {
 
         void kill();
 
+        /**
+         * Runs the cached plan for a trial period, yielding during the trial period according to
+         * 'yieldPolicy'.
+         *
+         * If the performance is lower than expected, the old plan is evicted and a new plan is
+         * selected from scratch (again yielding according to 'yieldPolicy'). Otherwise, the cached
+         * plan is run.
+         */
+        Status pickBestPlan(PlanYieldPolicy* yieldPolicy);
+
     private:
         PlanStage* getActiveChild() const;
         void updateCache();
 
-        // not owned
-        const Collection* _collection;
+        /**
+         * May yield during the cached plan stage's trial period or replanning phases.
+         *
+         * Returns a non-OK status if the plan was killed during a yield.
+         */
+        Status tryYield(PlanYieldPolicy* yieldPolicy);
+
+        /**
+         * Uses the QueryPlanner and the MultiPlanStage to re-generate candidate plans for this
+         * query and select a new winner.
+         *
+         * We fallback to a new plan if, based on the number of works during the trial period that
+         * put the plan in the cache, the performance was worse than anticipated during the trial
+         * period.
+         *
+         * We only write the result of re-planning to the plan cache if 'shouldCache' is true.
+         */
+        Status replan(PlanYieldPolicy* yieldPolicy, bool shouldCache);
+
+        // Not owned here.
+        OperationContext* _txn;
+
+        // Not owned here.
+        Collection* _collection;
 
-        // not owned
+        // Not owned here.
+        WorkingSet* _ws;
+
+        // Not owned here.
         CanonicalQuery* _canonicalQuery;
 
+        QueryPlannerParams _plannerParams;
+
+        // Whether or not the cached plan trial period and replanning is enabled.
+        const bool _replanningEnabled;
+
+        // The number of work cycles taken to decide on a winning plan when the plan was first
+        // cached.
+        size_t _decisionWorks;
+
         // Owned by us. Must be deleted after the corresponding PlanStage trees, as
         // those trees point into the query solutions.
         boost::scoped_ptr<QuerySolution> _mainQs;
@@ -113,6 +167,15 @@ namespace mongo {
         // Has this query been killed?
         bool _killed;
 
+        // Any results produced during trial period execution are kept here.
+        std::list<WorkingSetID> _results;
+
+        // When a stage requests a yield for document fetch, it gives us back a RecordFetcher*
+        // to use to pull the record into memory. We take ownership of the RecordFetcher here,
+        // deleting it after we've had a chance to do the fetch. For timing-based yields, we
+        // just pass a NULL fetcher.
+        boost::scoped_ptr<RecordFetcher> _fetcher;
+
         // Stats
         CommonStats _commonStats;
         CachedPlanStats _specificStats;
diff --git a/src/mongo/db/exec/multi_plan.cpp b/src/mongo/db/exec/multi_plan.cpp
index d7e9e19c30e..6415d4f91ec 100644
--- a/src/mongo/db/exec/multi_plan.cpp
+++ b/src/mongo/db/exec/multi_plan.cpp
@@ -61,9 +61,11 @@ namespace mongo {
 
     MultiPlanStage::MultiPlanStage(OperationContext* txn,
                                    const Collection* collection,
-                                   CanonicalQuery* cq)
+                                   CanonicalQuery* cq,
+                                   bool shouldCache)
         : _txn(txn),
           _collection(collection),
+          _shouldCache(shouldCache),
           _query(cq),
           _bestPlanIdx(kNoSuchPlan),
           _backupPlanIdx(kNoSuchPlan),
@@ -183,6 +185,24 @@ namespace mongo {
         return Status::OK();
     }
 
+    // static
+    size_t MultiPlanStage::getTrialPeriodNumToReturn(const CanonicalQuery& query) {
+        // We treat ntoreturn as though it is a limit during plan ranking.
+        // This means that ranking might not be great for sort + batchSize.
+        // But it also means that we don't buffer too much data for sort + limit.
+        // See SERVER-14174 for details.
+        size_t numToReturn = query.getParsed().getNumToReturn();
+
+        // Determine the number of results which we will produce during the plan
+        // ranking phase before stopping.
+        size_t numResults = static_cast<size_t>(internalQueryPlanEvaluationMaxResults);
+        if (numToReturn > 0) {
+            numResults = std::min(numToReturn, numResults);
+        }
+
+        return numResults;
+    }
+
     Status MultiPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
         // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
         // execution work that happens here, so this is needed for the time accounting to
@@ -205,14 +225,7 @@ namespace mongo {
         // This means that ranking might not be great for sort + batchSize.
         // But it also means that we don't buffer too much data for sort + limit.
         // See SERVER-14174 for details.
-        size_t numToReturn = _query->getParsed().getNumToReturn();
-
-        // Determine the number of results which we will produce during the plan
-        // ranking phase before stopping.
-        size_t numResults = (size_t)internalQueryPlanEvaluationMaxResults;
-        if (numToReturn > 0) {
-            numResults = std::min(numToReturn, numResults);
-        }
+        size_t numResults = getTrialPeriodNumToReturn(*_query);
 
         // Work the plans, stopping when a plan hits EOF or returns some
         // fixed number of results.
@@ -311,13 +324,16 @@ namespace mongo {
         //   1) the query must be of a type that is safe to cache,
         //   2) two or more plans cannot have tied for the win. Caching in the case of ties can
         //   cause successive queries of the same shape to use a bad index.
-        //   3) Furthermore, the winning plan must have returned at least one result. Plans which
+        //   3) The caller must have indicated that it is willing to allow a plan to be cached via
+        //   the '_shouldCache' argument to the constructor.
+        //   4) Furthermore, the winning plan must have returned at least one result. Plans which
         //   return zero results cannot be reliably ranked. Such query shapes are generally
         //   existence type queries, and a winning plan should get cached once the query finds a
         //   result.
         if (PlanCache::shouldCacheQuery(*_query)
-            && !ranking->tieForBest
-            && !alreadyProduced.empty()) {
+                && !ranking->tieForBest
+                && !alreadyProduced.empty()
+                && _shouldCache) {
             // Create list of candidate solutions for the cache with
             // the best solution at the front.
             std::vector<QuerySolution*> solutions;
diff --git a/src/mongo/db/exec/multi_plan.h b/src/mongo/db/exec/multi_plan.h
index a793ee7127b..2e2be24d455 100644
--- a/src/mongo/db/exec/multi_plan.h
+++ b/src/mongo/db/exec/multi_plan.h
@@ -52,8 +52,16 @@ namespace mongo {
      */
     class MultiPlanStage : public PlanStage {
     public:
-        /** Takes no ownership */
-        MultiPlanStage(OperationContext* txn, const Collection* collection, CanonicalQuery* cq);
+        /**
+         * Takes no ownership.
+         *
+         * If 'shouldCache' is true, writes a cache entry for the winning plan to the plan cache
+         * when possible. If 'shouldCache' is false, the plan cache will never be written.
+         */
+        MultiPlanStage(OperationContext* txn,
+                       const Collection* collection,
+                       CanonicalQuery* cq,
+                       bool shouldCache = true);
 
         virtual ~MultiPlanStage();
 
@@ -94,6 +102,12 @@ namespace mongo {
          */
         Status pickBestPlan(PlanYieldPolicy* yieldPolicy);
 
+        /**
+         * Returns the max number of documents which we should allow any plan to return during the
+         * trial period. As soon as any plan hits this number of documents, the trial period ends.
+         */
+        static size_t getTrialPeriodNumToReturn(const CanonicalQuery& query);
+
         /** Return true if a best plan has been chosen  */
         bool bestPlanChosen() const;
 
@@ -156,6 +170,9 @@ namespace mongo {
         OperationContext* _txn;
         const Collection* _collection;
 
+        // Whether or not we should try to cache the winning plan in the plan cache.
+        const bool _shouldCache;
+
         // The query that we're trying to figure out the best solution to.
         // not owned here
         CanonicalQuery* _query;
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index a3cf395ade3..62971906234 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -311,9 +311,16 @@ namespace mongo {
 
                     // Add a CachedPlanStage on top of the previous root. Takes ownership of
                     // '*rootOut', 'backupRoot', 'qs', and 'backupQs'.
-                    *rootOut = new CachedPlanStage(collection, canonicalQuery,
-                                                   *rootOut, qs,
-                                                   backupRoot, backupQs);
+                    *rootOut = new CachedPlanStage(opCtx,
+                                                   collection,
+                                                   ws,
+                                                   canonicalQuery,
+                                                   plannerParams,
+                                                   cs->decisionWorks,
+                                                   *rootOut,
+                                                   qs,
+                                                   backupRoot,
+                                                   backupQs);
                     return Status::OK();
                 }
             }
diff --git a/src/mongo/db/query/plan_cache.cpp b/src/mongo/db/query/plan_cache.cpp
index d1b16dc209c..9ecbf3fcc93 100644
--- a/src/mongo/db/query/plan_cache.cpp
+++ b/src/mongo/db/query/plan_cache.cpp
@@ -114,7 +114,8 @@ namespace mongo {
           key(key),
           query(entry.query.getOwned()),
           sort(entry.sort.getOwned()),
-          projection(entry.projection.getOwned()) {
+          projection(entry.projection.getOwned()),
+          decisionWorks(entry.decision->stats[0]->common.works) {
         // CachedSolution should not having any references into
         // cache entry. All relevant data should be cloned/copied.
         for (size_t i = 0; i < entry.plannerData.size(); ++i) {
@@ -423,7 +424,9 @@ namespace mongo {
         return false;
     }
 
-    Status PlanCache::feedback(const CanonicalQuery& cq, PlanCacheEntryFeedback* feedback) {
+    Status PlanCache::feedback(const CanonicalQuery& cq,
+                               PlanCacheEntryFeedback* feedback,
+                               bool allowedToEvict) {
         if (NULL == feedback) {
             return Status(ErrorCodes::BadValue, "feedback is NULL");
         }
@@ -439,9 +442,10 @@ namespace mongo {
         invariant(entry);
 
         if (entry->feedback.size() >= size_t(internalQueryCacheFeedbacksStored)) {
-            // If we have enough feedback, then use it to determine whether
-            // we should get rid of the cached solution.
-            if (hasCachedPlanPerformanceDegraded(entry, autoFeedback.get())) {
+            // If we have enough feedback, then use it to determine whether we should get rid of the
+            // cached solution. We do not use the feedback-based eviction policy if replanning is
+            // enabled.
+            if (hasCachedPlanPerformanceDegraded(entry, autoFeedback.get()) && allowedToEvict) {
                 LOG(1) << _ns << ": removing plan cache entry " << entry->toString()
                        << " - detected degradation in performance of cached solution.";
                 _cache.remove(ck);
diff --git a/src/mongo/db/query/plan_cache.h b/src/mongo/db/query/plan_cache.h
index 57f5657b053..28f07b4ea5f 100644
--- a/src/mongo/db/query/plan_cache.h
+++ b/src/mongo/db/query/plan_cache.h
@@ -196,6 +196,10 @@ namespace mongo {
         BSONObj query;
         BSONObj sort;
         BSONObj projection;
+
+        // The number of work cycles taken to decide on a winning plan when the plan was first
+        // cached.
+        size_t decisionWorks;
     };
 
     /**
@@ -334,10 +338,12 @@ namespace mongo {
          * If the entry corresponding to 'cq' still exists, 'feedback' is added to the run
          * statistics about the plan.  Status::OK() is returned.
          *
-         * May cause the cache entry to be removed if it is determined that the cached plan
-         * is badly performing.
+         * If 'allowedToEvict' is true, may cause the cache entry to be removed if it is determined
+         * that the cached plan is badly performing.
          */
-        Status feedback(const CanonicalQuery& cq, PlanCacheEntryFeedback* feedback);
+        Status feedback(const CanonicalQuery& cq,
+                        PlanCacheEntryFeedback* feedback,
+                        bool allowedToEvict);
 
         /**
          * Remove the entry corresponding to 'ck' from the cache.  Returns Status::OK() if the plan
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp
index b3e329a354e..ec6ac705f52 100644
--- a/src/mongo/db/query/plan_executor.cpp
+++ b/src/mongo/db/query/plan_executor.cpp
@@ -41,6 +41,7 @@
 #include "mongo/db/exec/working_set_common.h"
 #include "mongo/db/global_environment_experiment.h"
 #include "mongo/db/query/plan_yield_policy.h"
+#include "mongo/db/query/query_knobs.h"
 #include "mongo/db/storage/record_fetcher.h"
 
 #include "mongo/util/stacktrace.h"
@@ -183,13 +184,21 @@ namespace mongo {
         }
 
         // If we didn't have to do subplanning, we might still have to do regular
-        // multi plan selection.
+        // multi plan selection...
         foundStage = getStageByType(_root.get(), STAGE_MULTI_PLAN);
         if (foundStage) {
             MultiPlanStage* mps = static_cast<MultiPlanStage*>(foundStage);
             return mps->pickBestPlan(_yieldPolicy.get());
         }
 
+        // ...or, we might have run a plan from the cache for a trial period, falling back on
+        // regular planning if the cached plan performs poorly.
+        foundStage = getStageByType(_root.get(), STAGE_CACHED_PLAN);
+        if (foundStage) {
+            CachedPlanStage* cachedPlan = static_cast<CachedPlanStage*>(foundStage);
+            return cachedPlan->pickBestPlan(_yieldPolicy.get());
+        }
+
         // Either we chose a plan, or no plan selection was required. In both cases,
         // our work has been successfully completed.
         return Status::OK();
diff --git a/src/mongo/db/query/query_knobs.cpp b/src/mongo/db/query/query_knobs.cpp
index b269e87f314..71c29358d31 100644
--- a/src/mongo/db/query/query_knobs.cpp
+++ b/src/mongo/db/query/query_knobs.cpp
@@ -46,6 +46,10 @@ namespace mongo {
 
     MONGO_EXPORT_SERVER_PARAMETER(internalQueryCacheWriteOpsBetweenFlush, int, 1000);
 
+    MONGO_EXPORT_SERVER_PARAMETER(internalQueryCacheReplanningEnabled, bool, false);
+
+    MONGO_EXPORT_SERVER_PARAMETER(internalQueryCacheEvictionRatio, double, 10.0);
+
     MONGO_EXPORT_SERVER_PARAMETER(internalQueryPlannerMaxIndexedSolutions, int, 64);
 
     MONGO_EXPORT_SERVER_PARAMETER(internalQueryEnumerationMaxOrSolutions, int, 10);
diff --git a/src/mongo/db/query/query_knobs.h b/src/mongo/db/query/query_knobs.h
index 18386867b4b..0f202e823dc 100644
--- a/src/mongo/db/query/query_knobs.h
+++ b/src/mongo/db/query/query_knobs.h
@@ -72,6 +72,13 @@ namespace mongo {
     // How many write ops should we allow in a collection before tossing all cache entries?
     extern int internalQueryCacheWriteOpsBetweenFlush;
 
+    // Whether or not CachedPlanStage replanning is enabled.
+    extern bool internalQueryCacheReplanningEnabled;
+
+    // How many times more works must we perform in order to justify plan cache eviction and
+    // replanning?
+    extern double internalQueryCacheEvictionRatio;
+
     //
     // Planning and enumeration.
     //
diff --git a/src/mongo/dbtests/query_stage_cached_plan.cpp b/src/mongo/dbtests/query_stage_cached_plan.cpp
new file mode 100644
index 00000000000..a6c07a65b7a
--- /dev/null
+++ b/src/mongo/dbtests/query_stage_cached_plan.cpp
@@ -0,0 +1,273 @@
+/**
+ *    Copyright (C) 2015 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects
+ *    for all of the code used other than as permitted herein. If you modify
+ *    file(s) with this exception, you may extend this exception to your
+ *    version of the file(s), but you are not obligated to do so. If you do not
+ *    wish to do so, delete this exception statement from your version. If you
+ *    delete this exception statement from all source files in the program,
+ *    then also delete it in the license file.
+ */
+
+#include <boost/scoped_ptr.hpp>
+#include <memory>
+
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/catalog/collection.h"
+#include "mongo/db/catalog/database.h"
+#include "mongo/db/catalog/database_holder.h"
+#include "mongo/db/client.h"
+#include "mongo/db/exec/cached_plan.h"
+#include "mongo/db/exec/queued_data_stage.h"
+#include "mongo/db/jsobj.h"
+#include "mongo/db/json.h"
+#include "mongo/db/operation_context_impl.h"
+#include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/get_executor.h"
+#include "mongo/db/query/plan_cache.h"
+#include "mongo/db/query/plan_yield_policy.h"
+#include "mongo/db/query/query_knobs.h"
+#include "mongo/db/query/query_planner_params.h"
+#include "mongo/dbtests/dbtests.h"
+#include "mongo/util/scopeguard.h"
+
+namespace QueryStageCachedPlan {
+
+    class QueryStageCachedPlanBase {
+    public:
+        QueryStageCachedPlanBase() {
+            // If collection exists already, we need to drop it.
+            dropCollection();
+
+            // Add indices.
+            addIndex(BSON("a" << 1));
+            addIndex(BSON("b" << 1));
+
+            Client::WriteContext ctx(&_txn, ns());
+            Collection* collection = ctx.getCollection();
+            ASSERT(collection);
+
+            // Add data.
+            for (int i = 0; i < 10; i++) {
+                insertDocument(collection, BSON("_id" << i << "a" << i << "b" << 1));
+            }
+        }
+
+        void addIndex(const BSONObj& obj) {
+            ASSERT_OK(dbtests::createIndex(&_txn, ns(), obj));
+        }
+
+        void dropCollection() {
+            const NamespaceString nsString(ns());
+            ScopedTransaction transaction(&_txn, MODE_X);
+            Lock::DBLock dbLock(_txn.lockState(), nsString.db(), MODE_X);
+            Database* database = dbHolder().get(&_txn, nsString.db());
+            if (!database) {
+                return;
+            }
+
+            WriteUnitOfWork wuow(&_txn);
+            database->dropCollection(&_txn, ns());
+            wuow.commit();
+        }
+
+        void insertDocument(Collection* collection, BSONObj obj) {
+            WriteUnitOfWork wuow(&_txn);
+
+            const bool enforceQuota = false;
+            StatusWith<RecordId> res = collection->insertDocument(&_txn, obj, enforceQuota);
+            ASSERT(res.isOK());
+
+            wuow.commit();
+        }
+
+        static void resetEvictionEnabled(bool resetTo) {
+            internalQueryCacheReplanningEnabled = resetTo;
+        }
+
+        static const char* ns() {
+            return "unittests.QueryStageCachedPlan";
+        }
+
+    protected:
+        OperationContextImpl _txn;
+        WorkingSet _ws;
+    };
+
+    /**
+     * Test that on failure, the cached plan stage replans the query but does not create a new cache
+     * entry.
+     */
+    class QueryStageCachedPlanFailure : public QueryStageCachedPlanBase {
+    public:
+        void run() {
+            bool oldReplanningFlagValue = internalQueryCacheReplanningEnabled;
+            internalQueryCacheReplanningEnabled = true;
+            ScopeGuard flagResetter = MakeGuard(&QueryStageCachedPlanBase::resetEvictionEnabled,
+                                                oldReplanningFlagValue);
+
+            AutoGetCollectionForRead ctx(&_txn, ns());
+            Collection* collection = ctx.getCollection();
+            ASSERT(collection);
+
+            // Query can be answered by either index on "a" or index on "b".
+            CanonicalQuery* rawCq;
+            ASSERT_OK(CanonicalQuery::canonicalize(ns(), fromjson("{a: {$gte: 8}, b: 1}"), &rawCq));
+            boost::scoped_ptr<CanonicalQuery> cq(rawCq);
+
+            // We shouldn't have anything in the plan cache for this shape yet.
+            PlanCache* cache = collection->infoCache()->getPlanCache();
+            ASSERT(cache);
+            CachedSolution* rawCachedSolution;
+            ASSERT_NOT_OK(cache->get(*cq, &rawCachedSolution));
+
+            // Get planner params.
+            QueryPlannerParams plannerParams;
+            fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams);
+
+            // Queued data stage will return a failure during the cached plan trial period.
+            std::auto_ptr<QueuedDataStage> mockChild(new QueuedDataStage(&_ws));
+            mockChild->pushBack(PlanStage::FAILURE);
+
+            // High enough so that we shouldn't trigger a replan based on works.
+            const size_t decisionWorks = 50;
+            CachedPlanStage cachedPlanStage(&_txn, collection, &_ws, cq.get(), plannerParams,
+                                            decisionWorks, mockChild.release(), NULL);
+
+            // This should succeed after triggering a replan.
+            ASSERT_OK(cachedPlanStage.pickBestPlan(NULL));
+
+            // Make sure that we get 2 legit results back.
+            size_t numResults = 0;
+            PlanStage::StageState state = PlanStage::NEED_TIME;
+            while (state != PlanStage::IS_EOF) {
+                WorkingSetID id = WorkingSet::INVALID_ID;
+                state = cachedPlanStage.work(&id);
+
+                ASSERT_NE(state, PlanStage::FAILURE);
+                ASSERT_NE(state, PlanStage::DEAD);
+
+                if (state == PlanStage::ADVANCED) {
+                    WorkingSetMember* member = _ws.get(id);
+                    ASSERT(cq->root()->matchesBSON(member->obj.value()));
+                    numResults++;
+                }
+            }
+
+            ASSERT_EQ(numResults, 2U);
+
+            // Plan cache should still be empty, as we don't write to it when we replan a failed
+            // query.
+            ASSERT_NOT_OK(cache->get(*cq, &rawCachedSolution));
+
+            resetEvictionEnabled(oldReplanningFlagValue);
+            flagResetter.Dismiss();
+        }
+    };
+
+    /**
+     * Test that hitting the cached plan stage trial period's threshold for work cycles causes the
+     * query to be replanned. Also verify that the replanning results in a new plan cache entry.
+     */
+    class QueryStageCachedPlanHitMaxWorks : public QueryStageCachedPlanBase {
+    public:
+        void run() {
+            bool oldReplanningFlagValue = internalQueryCacheReplanningEnabled;
+            internalQueryCacheReplanningEnabled = true;
+            ScopeGuard flagResetter = MakeGuard(&QueryStageCachedPlanBase::resetEvictionEnabled,
+                                                oldReplanningFlagValue);
+
+            AutoGetCollectionForRead ctx(&_txn, ns());
+            Collection* collection = ctx.getCollection();
+            ASSERT(collection);
+
+            // Query can be answered by either index on "a" or index on "b".
+            CanonicalQuery* rawCq;
+            ASSERT_OK(CanonicalQuery::canonicalize(ns(), fromjson("{a: {$gte: 8}, b: 1}"), &rawCq));
+            boost::scoped_ptr<CanonicalQuery> cq(rawCq);
+
+            // We shouldn't have anything in the plan cache for this shape yet.
+            PlanCache* cache = collection->infoCache()->getPlanCache();
+            ASSERT(cache);
+            CachedSolution* rawCachedSolution;
+            ASSERT_NOT_OK(cache->get(*cq, &rawCachedSolution));
+
+            // Get planner params.
+            QueryPlannerParams plannerParams;
+            fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams);
+
+            // Set up queued data stage to take a long time before returning EOF. Should be long
+            // enough to trigger a replan.
+            const size_t decisionWorks = 10;
+            const size_t mockWorks = 1U + static_cast<size_t>(internalQueryCacheEvictionRatio
+                                                              * decisionWorks);
+            std::auto_ptr<QueuedDataStage> mockChild(new QueuedDataStage(&_ws));
+            for (size_t i = 0; i < mockWorks; i++) {
+                mockChild->pushBack(PlanStage::NEED_TIME);
+            }
+
+            CachedPlanStage cachedPlanStage(&_txn, collection, &_ws, cq.get(), plannerParams,
+                                            decisionWorks, mockChild.release(), NULL);
+
+            // This should succeed after triggering a replan.
+            ASSERT_OK(cachedPlanStage.pickBestPlan(NULL));
+
+            // Make sure that we get 2 legit results back.
+            size_t numResults = 0;
+            PlanStage::StageState state = PlanStage::NEED_TIME;
+            while (state != PlanStage::IS_EOF) {
+                WorkingSetID id = WorkingSet::INVALID_ID;
+                state = cachedPlanStage.work(&id);
+
+                ASSERT_NE(state, PlanStage::FAILURE);
+                ASSERT_NE(state, PlanStage::DEAD);
+
+                if (state == PlanStage::ADVANCED) {
+                    WorkingSetMember* member = _ws.get(id);
+                    ASSERT(cq->root()->matchesBSON(member->obj.value()));
+                    numResults++;
+                }
+            }
+
+            ASSERT_EQ(numResults, 2U);
+
+            // This time we expect to find something in the plan cache. Replans after hitting the
+            // works threshold result in a cache entry.
+            ASSERT_OK(cache->get(*cq, &rawCachedSolution));
+            boost::scoped_ptr<CachedSolution> cachedSolution(rawCachedSolution);
+
+            resetEvictionEnabled(oldReplanningFlagValue);
+            flagResetter.Dismiss();
+        }
+    };
+
+    class All : public Suite {
+    public:
+        All() : Suite("query_stage_cached_plan") {}
+
+        void setupTests() {
+            add<QueryStageCachedPlanFailure>();
+            add<QueryStageCachedPlanHitMaxWorks>();
+        }
+    };
+
+    SuiteInstance<All> all;
+
+} // namespace QueryStageCachedPlan
author	David Storch <david.storch@10gen.com>	2015-05-22 13:19:19 -0400
committer	David Storch <david.storch@10gen.com>	2015-06-03 14:53:48 -0400
commit	f80286e0203fa0aa0873bff2963ce0aa81a51383 (patch)
tree	dea40d051e88062eb025dca2dc78f00535d9710b /src/mongo
parent	b677e49bed78c415498102a6d7d1cfbed43e76f7 (diff)
download	mongo-f80286e0203fa0aa0873bff2963ce0aa81a51383.tar.gz