summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2015-05-22 13:19:19 -0400
committerDavid Storch <david.storch@10gen.com>2015-06-03 14:53:48 -0400
commitf80286e0203fa0aa0873bff2963ce0aa81a51383 (patch)
treedea40d051e88062eb025dca2dc78f00535d9710b /src/mongo
parentb677e49bed78c415498102a6d7d1cfbed43e76f7 (diff)
downloadmongo-f80286e0203fa0aa0873bff2963ce0aa81a51383.tar.gz
SERVER-15225 CachedPlanStage can replan poorly performing queries after a trial period
This is a minimal backport of the rewrite to the CachedPlanStage. The functionality is behind a flag and turned off by default. It can be enabled with the internalQueryCacheReplanningEnabled setParameter.
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/exec/cached_plan.cpp258
-rw-r--r--src/mongo/db/exec/cached_plan.h71
-rw-r--r--src/mongo/db/exec/multi_plan.cpp40
-rw-r--r--src/mongo/db/exec/multi_plan.h21
-rw-r--r--src/mongo/db/query/get_executor.cpp13
-rw-r--r--src/mongo/db/query/plan_cache.cpp14
-rw-r--r--src/mongo/db/query/plan_cache.h12
-rw-r--r--src/mongo/db/query/plan_executor.cpp11
-rw-r--r--src/mongo/db/query/query_knobs.cpp4
-rw-r--r--src/mongo/db/query/query_knobs.h7
-rw-r--r--src/mongo/dbtests/query_stage_cached_plan.cpp273
11 files changed, 686 insertions, 38 deletions
diff --git a/src/mongo/db/exec/cached_plan.cpp b/src/mongo/db/exec/cached_plan.cpp
index 2bc2e39bd1b..fed8b8de0c0 100644
--- a/src/mongo/db/exec/cached_plan.cpp
+++ b/src/mongo/db/exec/cached_plan.cpp
@@ -26,18 +26,28 @@
* it in the license file.
*/
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
+
+#include "mongo/platform/basic.h"
+
#include "mongo/db/exec/cached_plan.h"
-#include "mongo/db/exec/scoped_timer.h"
-#include "mongo/db/exec/working_set_common.h"
-#include "mongo/util/mongoutils/str.h"
-// for updateCache
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/database.h"
#include "mongo/db/client.h"
+#include "mongo/db/exec/multi_plan.h"
+#include "mongo/db/exec/scoped_timer.h"
+#include "mongo/db/exec/working_set_common.h"
+#include "mongo/db/query/explain.h"
#include "mongo/db/query/plan_cache.h"
#include "mongo/db/query/plan_ranker.h"
+#include "mongo/db/query/plan_yield_policy.h"
#include "mongo/db/query/qlog.h"
+#include "mongo/db/query/query_knobs.h"
+#include "mongo/db/query/query_planner.h"
+#include "mongo/db/query/stage_builder.h"
+#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/log.h"
namespace mongo {
@@ -47,14 +57,23 @@ namespace mongo {
// static
const char* CachedPlanStage::kStageType = "CACHED_PLAN";
- CachedPlanStage::CachedPlanStage(const Collection* collection,
+ CachedPlanStage::CachedPlanStage(OperationContext* txn,
+ Collection* collection,
+ WorkingSet* ws,
CanonicalQuery* cq,
+ const QueryPlannerParams& params,
+ size_t decisionWorks,
PlanStage* mainChild,
QuerySolution* mainQs,
PlanStage* backupChild,
QuerySolution* backupQs)
- : _collection(collection),
+ : _txn(txn),
+ _collection(collection),
+ _ws(ws),
_canonicalQuery(cq),
+ _plannerParams(params),
+ _replanningEnabled(internalQueryCacheReplanningEnabled),
+ _decisionWorks(decisionWorks),
_mainQs(mainQs),
_backupQs(backupQs),
_mainChildPlan(mainChild),
@@ -80,9 +99,206 @@ namespace mongo {
return true;
}
+ if (!_results.empty()) {
+ return false;
+ }
+
return getActiveChild()->isEOF();
}
+ Status CachedPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
+ // If replanning is disabled, then this is a no-op.
+ if (!_replanningEnabled) {
+ return Status::OK();
+ }
+
+ // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
+ // execution work that happens here, so this is needed for the time accounting to
+ // make sense.
+ ScopedTimer timer(&_commonStats.executionTimeMillis);
+
+ // If we work this many times during the trial period, then we will replan the
+ // query from scratch.
+ size_t maxWorksBeforeReplan = static_cast<size_t>(internalQueryCacheEvictionRatio
+ * _decisionWorks);
+
+ // The trial period ends without replanning if the cached plan produces this many results
+ size_t numResults = MultiPlanStage::getTrialPeriodNumToReturn(*_canonicalQuery);
+
+ for (size_t i = 0; i < maxWorksBeforeReplan; ++i) {
+ // Might need to yield between calls to work due to the timer elapsing.
+ Status yieldStatus = tryYield(yieldPolicy);
+ if (!yieldStatus.isOK()) {
+ return yieldStatus;
+ }
+
+ WorkingSetID id = WorkingSet::INVALID_ID;
+ PlanStage::StageState state = _mainChildPlan->work(&id);
+
+ if (PlanStage::ADVANCED == state) {
+ // Save result for later.
+ _results.push_back(id);
+
+ if (_results.size() >= numResults) {
+ // Once a plan returns enough results, stop working. Update cache with stats
+ // from this run and return.
+ updateCache();
+ return Status::OK();
+ }
+ }
+ else if (PlanStage::IS_EOF == state) {
+ // Cached plan hit EOF quickly enough. No need to replan. Update cache with stats
+ // from this run and return.
+ updateCache();
+ return Status::OK();
+ }
+ else if (PlanStage::NEED_FETCH == state) {
+ WorkingSetMember* member = _ws->get(id);
+ invariant(member->hasFetcher());
+ // Transfer ownership of the fetcher and yield.
+ _fetcher.reset(member->releaseFetcher());
+ Status fetchYieldStatus = tryYield(yieldPolicy);
+ if (!fetchYieldStatus.isOK()) {
+ return fetchYieldStatus;
+ }
+ }
+ else if (PlanStage::FAILURE == state) {
+ // On failure, fall back to replanning the whole query. We neither evict the
+ // existing cache entry nor cache the result of replanning.
+ BSONObj statusObj;
+ WorkingSetCommon::getStatusMemberObject(*_ws, id, &statusObj);
+
+ LOG(1) << "Execution of cached plan failed, falling back to replan."
+ << " query: "
+ << _canonicalQuery->toStringShort()
+ << " planSummary: "
+ << Explain::getPlanSummary(_mainChildPlan.get())
+ << " status: "
+ << statusObj;
+
+ const bool shouldCache = false;
+ return replan(yieldPolicy, shouldCache);
+ }
+ else if (PlanStage::DEAD == state) {
+ return Status(ErrorCodes::OperationFailed,
+ "Executor killed during cached plan trial period");
+ }
+ else {
+ invariant(PlanStage::NEED_TIME == state);
+ }
+ }
+
+ // If we're here, the trial period took more than 'maxWorksBeforeReplan' work cycles. This
+ // plan is taking too long, so we replan from scratch.
+ LOG(1) << "Execution of cached plan required "
+ << maxWorksBeforeReplan
+ << " works, but was originally cached with only "
+ << _decisionWorks
+ << " works. Evicting cache entry and replanning query: "
+ << _canonicalQuery->toStringShort()
+ << " plan summary before replan: "
+ << Explain::getPlanSummary(_mainChildPlan.get());
+
+ const bool shouldCache = true;
+ return replan(yieldPolicy, shouldCache);
+ }
+
+ Status CachedPlanStage::tryYield(PlanYieldPolicy* yieldPolicy) {
+ // These are the conditions which cause us to yield during plan selection if we have a
+ // YIELD_AUTO policy:
+ // 1) The yield policy's timer elapsed, or
+ // 2) some stage requested a yield due to a document fetch (NEED_FETCH).
+ // In both cases, the actual yielding happens here.
+ if (NULL != yieldPolicy && (yieldPolicy->shouldYield() || NULL != _fetcher.get())) {
+ // Here's where we yield.
+ bool alive = yieldPolicy->yield(_fetcher.get());
+
+ if (!alive) {
+ return Status(ErrorCodes::OperationFailed,
+ "PlanExecutor killed during cached plan trial period");
+ }
+ }
+
+ // We're done using the fetcher, so it should be freed. We don't want to
+ // use the same RecordFetcher twice.
+ _fetcher.reset();
+
+ return Status::OK();
+ }
+
+ Status CachedPlanStage::replan(PlanYieldPolicy* yieldPolicy, bool shouldCache) {
+ // We're going to start over with a new plan. No need for only old buffered results.
+ _results.clear();
+
+ // Clear out the working set. We'll start with a fresh working set.
+ _ws->clear();
+
+ // No need for any existing child stages or QuerySolutions. We will create new ones from
+ // scratch.
+ _mainQs.reset();
+ _backupQs.reset();
+ _mainChildPlan.reset();
+ _backupChildPlan.reset();
+
+ // Remove the current plan cache entry for this shape. The plan cache entry could have
+ // already been removed by another thread, so our removal won't necessarily succeed.
+ if (shouldCache) {
+ PlanCache* cache = _collection->infoCache()->getPlanCache();
+ cache->remove(*_canonicalQuery);
+ }
+
+ // Use the query planning module to plan the whole query.
+ std::vector<QuerySolution*> rawSolutions;
+ Status status = QueryPlanner::plan(*_canonicalQuery, _plannerParams, &rawSolutions);
+ if (!status.isOK()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream()
+ << "error processing query: " << _canonicalQuery->toString()
+ << " planner returned error: " << status.reason());
+ }
+
+ OwnedPointerVector<QuerySolution> solutions(rawSolutions);
+
+ // We cannot figure out how to answer the query. Perhaps it requires an index
+ // we do not have?
+ if (0 == solutions.size()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream()
+ << "error processing query: "
+ << _canonicalQuery->toString()
+ << " No query solutions");
+ }
+
+ if (1 == solutions.size()) {
+ PlanStage* newRoot;
+ // Only one possible plan. Build the stages from the solution.
+ verify(StageBuilder::build(_txn, _collection, *solutions[0], _ws, &newRoot));
+ _mainChildPlan.reset(newRoot);
+ _mainQs.reset(solutions.popAndReleaseBack());
+ return Status::OK();
+ }
+
+ // Many solutions. Create a MultiPlanStage to pick the best, update the cache,
+ // and so on. The working set will be shared by all candidate plans.
+ _mainChildPlan.reset(new MultiPlanStage(_txn, _collection, _canonicalQuery, shouldCache));
+ MultiPlanStage* multiPlanStage = static_cast<MultiPlanStage*>(_mainChildPlan.get());
+
+ for (size_t ix = 0; ix < solutions.size(); ++ix) {
+ if (solutions[ix]->cacheData.get()) {
+ solutions[ix]->cacheData->indexFilterApplied = _plannerParams.indexFiltersApplied;
+ }
+
+ PlanStage* nextPlanRoot;
+ verify(StageBuilder::build(_txn, _collection, *solutions[ix], _ws, &nextPlanRoot));
+
+ // Takes ownership of 'solutions[ix]' and 'nextPlanRoot'.
+ multiPlanStage->addPlan(solutions.releaseAt(ix), nextPlanRoot, _ws);
+ }
+
+ // Delegate to the MultiPlanStage's plan selection facility.
+ return multiPlanStage->pickBestPlan(yieldPolicy);
+ }
+
PlanStage::StageState CachedPlanStage::work(WorkingSetID* out) {
++_commonStats.works;
@@ -94,6 +310,15 @@ namespace mongo {
if (isEOF()) { return PlanStage::IS_EOF; }
+ // First exhaust any results buffered during the trial period.
+ if (!_results.empty()) {
+ *out = _results.front();
+ _results.pop_front();
+ _commonStats.advanced++;
+ _alreadyProduced = true;
+ return PlanStage::ADVANCED;
+ }
+
StageState childStatus = getActiveChild()->work(out);
if (PlanStage::ADVANCED == childStatus) {
@@ -107,9 +332,11 @@ namespace mongo {
else if (PlanStage::FAILURE == childStatus
&& !_alreadyProduced
&& !_usingBackupChild
+ && !_replanningEnabled
&& NULL != _backupChildPlan.get()) {
// Switch the active child to the backup. Subsequent calls to work() will exercise
- // the backup plan.
+ // the backup plan. We are only willing to switch to the backup plan if replanning is
+ // disabled.
_usingBackupChild = true;
_commonStats.needTime++;
return PlanStage::NEED_TIME;
@@ -152,6 +379,20 @@ namespace mongo {
_backupChildPlan->invalidate(txn, dl, type);
}
++_commonStats.invalidates;
+
+ for (std::list<WorkingSetID>::iterator it = _results.begin(); it != _results.end(); ) {
+ WorkingSetMember* member = _ws->get(*it);
+ if (member->hasLoc() && member->loc == dl) {
+ std::list<WorkingSetID>::iterator next = it;
+ ++next;
+ WorkingSetCommon::fetchAndInvalidateLoc(txn, member, _collection);
+ _results.erase(it);
+ it = next;
+ }
+ else {
+ ++it;
+ }
+ }
}
vector<PlanStage*> CachedPlanStage::getChildren() const {
@@ -197,7 +438,8 @@ namespace mongo {
feedback->score = PlanRanker::scoreTree(feedback->stats.get());
PlanCache* cache = _collection->infoCache()->getPlanCache();
- Status fbs = cache->feedback(*_canonicalQuery, feedback.release());
+ const bool allowedToEvict = !_replanningEnabled;
+ Status fbs = cache->feedback(*_canonicalQuery, feedback.release(), allowedToEvict);
if (!fbs.isOK()) {
QLOG() << _canonicalQuery->ns() << ": Failed to update cache with feedback: "
diff --git a/src/mongo/db/exec/cached_plan.h b/src/mongo/db/exec/cached_plan.h
index ceb63e1e190..632476b481d 100644
--- a/src/mongo/db/exec/cached_plan.h
+++ b/src/mongo/db/exec/cached_plan.h
@@ -28,15 +28,21 @@
#pragma once
+#include <boost/scoped_ptr.hpp>
+#include <list>
+
#include "mongo/db/jsobj.h"
#include "mongo/db/exec/plan_stage.h"
#include "mongo/db/exec/working_set.h"
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/query/query_solution.h"
#include "mongo/db/record_id.h"
+#include "mongo/db/storage/record_fetcher.h"
namespace mongo {
+ class PlanYieldPolicy;
+
/**
* This stage outputs its mainChild, and possibly its backup child
* and also updates the cache.
@@ -49,8 +55,12 @@ namespace mongo {
/**
* Takes ownership of 'mainChild', 'mainQs', 'backupChild', and 'backupQs'.
*/
- CachedPlanStage(const Collection* collection,
+ CachedPlanStage(OperationContext* txn,
+ Collection* collection,
+ WorkingSet* ws,
CanonicalQuery* cq,
+ const QueryPlannerParams& params,
+ size_t decisionWorks,
PlanStage* mainChild,
QuerySolution* mainQs,
PlanStage* backupChild = NULL,
@@ -80,16 +90,60 @@ namespace mongo {
void kill();
+ /**
+ * Runs the cached plan for a trial period, yielding during the trial period according to
+ * 'yieldPolicy'.
+ *
+ * If the performance is lower than expected, the old plan is evicted and a new plan is
+ * selected from scratch (again yielding according to 'yieldPolicy'). Otherwise, the cached
+ * plan is run.
+ */
+ Status pickBestPlan(PlanYieldPolicy* yieldPolicy);
+
private:
PlanStage* getActiveChild() const;
void updateCache();
- // not owned
- const Collection* _collection;
+ /**
+ * May yield during the cached plan stage's trial period or replanning phases.
+ *
+ * Returns a non-OK status if the plan was killed during a yield.
+ */
+ Status tryYield(PlanYieldPolicy* yieldPolicy);
+
+ /**
+ * Uses the QueryPlanner and the MultiPlanStage to re-generate candidate plans for this
+ * query and select a new winner.
+ *
+ * We fallback to a new plan if, based on the number of works during the trial period that
+ * put the plan in the cache, the performance was worse than anticipated during the trial
+ * period.
+ *
+ * We only write the result of re-planning to the plan cache if 'shouldCache' is true.
+ */
+ Status replan(PlanYieldPolicy* yieldPolicy, bool shouldCache);
+
+ // Not owned here.
+ OperationContext* _txn;
+
+ // Not owned here.
+ Collection* _collection;
- // not owned
+ // Not owned here.
+ WorkingSet* _ws;
+
+ // Not owned here.
CanonicalQuery* _canonicalQuery;
+ QueryPlannerParams _plannerParams;
+
+ // Whether or not the cached plan trial period and replanning is enabled.
+ const bool _replanningEnabled;
+
+ // The number of work cycles taken to decide on a winning plan when the plan was first
+ // cached.
+ size_t _decisionWorks;
+
// Owned by us. Must be deleted after the corresponding PlanStage trees, as
// those trees point into the query solutions.
boost::scoped_ptr<QuerySolution> _mainQs;
@@ -113,6 +167,15 @@ namespace mongo {
// Has this query been killed?
bool _killed;
+ // Any results produced during trial period execution are kept here.
+ std::list<WorkingSetID> _results;
+
+ // When a stage requests a yield for document fetch, it gives us back a RecordFetcher*
+ // to use to pull the record into memory. We take ownership of the RecordFetcher here,
+ // deleting it after we've had a chance to do the fetch. For timing-based yields, we
+ // just pass a NULL fetcher.
+ boost::scoped_ptr<RecordFetcher> _fetcher;
+
// Stats
CommonStats _commonStats;
CachedPlanStats _specificStats;
diff --git a/src/mongo/db/exec/multi_plan.cpp b/src/mongo/db/exec/multi_plan.cpp
index d7e9e19c30e..6415d4f91ec 100644
--- a/src/mongo/db/exec/multi_plan.cpp
+++ b/src/mongo/db/exec/multi_plan.cpp
@@ -61,9 +61,11 @@ namespace mongo {
MultiPlanStage::MultiPlanStage(OperationContext* txn,
const Collection* collection,
- CanonicalQuery* cq)
+ CanonicalQuery* cq,
+ bool shouldCache)
: _txn(txn),
_collection(collection),
+ _shouldCache(shouldCache),
_query(cq),
_bestPlanIdx(kNoSuchPlan),
_backupPlanIdx(kNoSuchPlan),
@@ -183,6 +185,24 @@ namespace mongo {
return Status::OK();
}
+ // static
+ size_t MultiPlanStage::getTrialPeriodNumToReturn(const CanonicalQuery& query) {
+ // We treat ntoreturn as though it is a limit during plan ranking.
+ // This means that ranking might not be great for sort + batchSize.
+ // But it also means that we don't buffer too much data for sort + limit.
+ // See SERVER-14174 for details.
+ size_t numToReturn = query.getParsed().getNumToReturn();
+
+ // Determine the number of results which we will produce during the plan
+ // ranking phase before stopping.
+ size_t numResults = static_cast<size_t>(internalQueryPlanEvaluationMaxResults);
+ if (numToReturn > 0) {
+ numResults = std::min(numToReturn, numResults);
+ }
+
+ return numResults;
+ }
+
Status MultiPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
// Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
// execution work that happens here, so this is needed for the time accounting to
@@ -205,14 +225,7 @@ namespace mongo {
// This means that ranking might not be great for sort + batchSize.
// But it also means that we don't buffer too much data for sort + limit.
// See SERVER-14174 for details.
- size_t numToReturn = _query->getParsed().getNumToReturn();
-
- // Determine the number of results which we will produce during the plan
- // ranking phase before stopping.
- size_t numResults = (size_t)internalQueryPlanEvaluationMaxResults;
- if (numToReturn > 0) {
- numResults = std::min(numToReturn, numResults);
- }
+ size_t numResults = getTrialPeriodNumToReturn(*_query);
// Work the plans, stopping when a plan hits EOF or returns some
// fixed number of results.
@@ -311,13 +324,16 @@ namespace mongo {
// 1) the query must be of a type that is safe to cache,
// 2) two or more plans cannot have tied for the win. Caching in the case of ties can
// cause successive queries of the same shape to use a bad index.
- // 3) Furthermore, the winning plan must have returned at least one result. Plans which
+ // 3) The caller must have indicated that it is willing to allow a plan to be cached via
+ // the '_shouldCache' argument to the constructor.
+ // 4) Furthermore, the winning plan must have returned at least one result. Plans which
// return zero results cannot be reliably ranked. Such query shapes are generally
// existence type queries, and a winning plan should get cached once the query finds a
// result.
if (PlanCache::shouldCacheQuery(*_query)
- && !ranking->tieForBest
- && !alreadyProduced.empty()) {
+ && !ranking->tieForBest
+ && !alreadyProduced.empty()
+ && _shouldCache) {
// Create list of candidate solutions for the cache with
// the best solution at the front.
std::vector<QuerySolution*> solutions;
diff --git a/src/mongo/db/exec/multi_plan.h b/src/mongo/db/exec/multi_plan.h
index a793ee7127b..2e2be24d455 100644
--- a/src/mongo/db/exec/multi_plan.h
+++ b/src/mongo/db/exec/multi_plan.h
@@ -52,8 +52,16 @@ namespace mongo {
*/
class MultiPlanStage : public PlanStage {
public:
- /** Takes no ownership */
- MultiPlanStage(OperationContext* txn, const Collection* collection, CanonicalQuery* cq);
+ /**
+ * Takes no ownership.
+ *
+ * If 'shouldCache' is true, writes a cache entry for the winning plan to the plan cache
+ * when possible. If 'shouldCache' is false, the plan cache will never be written.
+ */
+ MultiPlanStage(OperationContext* txn,
+ const Collection* collection,
+ CanonicalQuery* cq,
+ bool shouldCache = true);
virtual ~MultiPlanStage();
@@ -94,6 +102,12 @@ namespace mongo {
*/
Status pickBestPlan(PlanYieldPolicy* yieldPolicy);
+ /**
+ * Returns the max number of documents which we should allow any plan to return during the
+ * trial period. As soon as any plan hits this number of documents, the trial period ends.
+ */
+ static size_t getTrialPeriodNumToReturn(const CanonicalQuery& query);
+
/** Return true if a best plan has been chosen */
bool bestPlanChosen() const;
@@ -156,6 +170,9 @@ namespace mongo {
OperationContext* _txn;
const Collection* _collection;
+ // Whether or not we should try to cache the winning plan in the plan cache.
+ const bool _shouldCache;
+
// The query that we're trying to figure out the best solution to.
// not owned here
CanonicalQuery* _query;
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index a3cf395ade3..62971906234 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -311,9 +311,16 @@ namespace mongo {
// Add a CachedPlanStage on top of the previous root. Takes ownership of
// '*rootOut', 'backupRoot', 'qs', and 'backupQs'.
- *rootOut = new CachedPlanStage(collection, canonicalQuery,
- *rootOut, qs,
- backupRoot, backupQs);
+ *rootOut = new CachedPlanStage(opCtx,
+ collection,
+ ws,
+ canonicalQuery,
+ plannerParams,
+ cs->decisionWorks,
+ *rootOut,
+ qs,
+ backupRoot,
+ backupQs);
return Status::OK();
}
}
diff --git a/src/mongo/db/query/plan_cache.cpp b/src/mongo/db/query/plan_cache.cpp
index d1b16dc209c..9ecbf3fcc93 100644
--- a/src/mongo/db/query/plan_cache.cpp
+++ b/src/mongo/db/query/plan_cache.cpp
@@ -114,7 +114,8 @@ namespace mongo {
key(key),
query(entry.query.getOwned()),
sort(entry.sort.getOwned()),
- projection(entry.projection.getOwned()) {
+ projection(entry.projection.getOwned()),
+ decisionWorks(entry.decision->stats[0]->common.works) {
// CachedSolution should not having any references into
// cache entry. All relevant data should be cloned/copied.
for (size_t i = 0; i < entry.plannerData.size(); ++i) {
@@ -423,7 +424,9 @@ namespace mongo {
return false;
}
- Status PlanCache::feedback(const CanonicalQuery& cq, PlanCacheEntryFeedback* feedback) {
+ Status PlanCache::feedback(const CanonicalQuery& cq,
+ PlanCacheEntryFeedback* feedback,
+ bool allowedToEvict) {
if (NULL == feedback) {
return Status(ErrorCodes::BadValue, "feedback is NULL");
}
@@ -439,9 +442,10 @@ namespace mongo {
invariant(entry);
if (entry->feedback.size() >= size_t(internalQueryCacheFeedbacksStored)) {
- // If we have enough feedback, then use it to determine whether
- // we should get rid of the cached solution.
- if (hasCachedPlanPerformanceDegraded(entry, autoFeedback.get())) {
+ // If we have enough feedback, then use it to determine whether we should get rid of the
+ // cached solution. We do not use the feedback-based eviction policy if replanning is
+ // enabled.
+ if (hasCachedPlanPerformanceDegraded(entry, autoFeedback.get()) && allowedToEvict) {
LOG(1) << _ns << ": removing plan cache entry " << entry->toString()
<< " - detected degradation in performance of cached solution.";
_cache.remove(ck);
diff --git a/src/mongo/db/query/plan_cache.h b/src/mongo/db/query/plan_cache.h
index 57f5657b053..28f07b4ea5f 100644
--- a/src/mongo/db/query/plan_cache.h
+++ b/src/mongo/db/query/plan_cache.h
@@ -196,6 +196,10 @@ namespace mongo {
BSONObj query;
BSONObj sort;
BSONObj projection;
+
+ // The number of work cycles taken to decide on a winning plan when the plan was first
+ // cached.
+ size_t decisionWorks;
};
/**
@@ -334,10 +338,12 @@ namespace mongo {
* If the entry corresponding to 'cq' still exists, 'feedback' is added to the run
* statistics about the plan. Status::OK() is returned.
*
- * May cause the cache entry to be removed if it is determined that the cached plan
- * is badly performing.
+ * If 'allowedToEvict' is true, may cause the cache entry to be removed if it is determined
+ * that the cached plan is badly performing.
*/
- Status feedback(const CanonicalQuery& cq, PlanCacheEntryFeedback* feedback);
+ Status feedback(const CanonicalQuery& cq,
+ PlanCacheEntryFeedback* feedback,
+ bool allowedToEvict);
/**
* Remove the entry corresponding to 'ck' from the cache. Returns Status::OK() if the plan
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp
index b3e329a354e..ec6ac705f52 100644
--- a/src/mongo/db/query/plan_executor.cpp
+++ b/src/mongo/db/query/plan_executor.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/exec/working_set_common.h"
#include "mongo/db/global_environment_experiment.h"
#include "mongo/db/query/plan_yield_policy.h"
+#include "mongo/db/query/query_knobs.h"
#include "mongo/db/storage/record_fetcher.h"
#include "mongo/util/stacktrace.h"
@@ -183,13 +184,21 @@ namespace mongo {
}
// If we didn't have to do subplanning, we might still have to do regular
- // multi plan selection.
+ // multi plan selection...
foundStage = getStageByType(_root.get(), STAGE_MULTI_PLAN);
if (foundStage) {
MultiPlanStage* mps = static_cast<MultiPlanStage*>(foundStage);
return mps->pickBestPlan(_yieldPolicy.get());
}
+ // ...or, we might have run a plan from the cache for a trial period, falling back on
+ // regular planning if the cached plan performs poorly.
+ foundStage = getStageByType(_root.get(), STAGE_CACHED_PLAN);
+ if (foundStage) {
+ CachedPlanStage* cachedPlan = static_cast<CachedPlanStage*>(foundStage);
+ return cachedPlan->pickBestPlan(_yieldPolicy.get());
+ }
+
// Either we chose a plan, or no plan selection was required. In both cases,
// our work has been successfully completed.
return Status::OK();
diff --git a/src/mongo/db/query/query_knobs.cpp b/src/mongo/db/query/query_knobs.cpp
index b269e87f314..71c29358d31 100644
--- a/src/mongo/db/query/query_knobs.cpp
+++ b/src/mongo/db/query/query_knobs.cpp
@@ -46,6 +46,10 @@ namespace mongo {
MONGO_EXPORT_SERVER_PARAMETER(internalQueryCacheWriteOpsBetweenFlush, int, 1000);
+ MONGO_EXPORT_SERVER_PARAMETER(internalQueryCacheReplanningEnabled, bool, false);
+
+ MONGO_EXPORT_SERVER_PARAMETER(internalQueryCacheEvictionRatio, double, 10.0);
+
MONGO_EXPORT_SERVER_PARAMETER(internalQueryPlannerMaxIndexedSolutions, int, 64);
MONGO_EXPORT_SERVER_PARAMETER(internalQueryEnumerationMaxOrSolutions, int, 10);
diff --git a/src/mongo/db/query/query_knobs.h b/src/mongo/db/query/query_knobs.h
index 18386867b4b..0f202e823dc 100644
--- a/src/mongo/db/query/query_knobs.h
+++ b/src/mongo/db/query/query_knobs.h
@@ -72,6 +72,13 @@ namespace mongo {
// How many write ops should we allow in a collection before tossing all cache entries?
extern int internalQueryCacheWriteOpsBetweenFlush;
+ // Whether or not CachedPlanStage replanning is enabled.
+ extern bool internalQueryCacheReplanningEnabled;
+
+ // How many times more works must we perform in order to justify plan cache eviction and
+ // replanning?
+ extern double internalQueryCacheEvictionRatio;
+
//
// Planning and enumeration.
//
diff --git a/src/mongo/dbtests/query_stage_cached_plan.cpp b/src/mongo/dbtests/query_stage_cached_plan.cpp
new file mode 100644
index 00000000000..a6c07a65b7a
--- /dev/null
+++ b/src/mongo/dbtests/query_stage_cached_plan.cpp
@@ -0,0 +1,273 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include <boost/scoped_ptr.hpp>
+#include <memory>
+
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/catalog/collection.h"
+#include "mongo/db/catalog/database.h"
+#include "mongo/db/catalog/database_holder.h"
+#include "mongo/db/client.h"
+#include "mongo/db/exec/cached_plan.h"
+#include "mongo/db/exec/queued_data_stage.h"
+#include "mongo/db/jsobj.h"
+#include "mongo/db/json.h"
+#include "mongo/db/operation_context_impl.h"
+#include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/get_executor.h"
+#include "mongo/db/query/plan_cache.h"
+#include "mongo/db/query/plan_yield_policy.h"
+#include "mongo/db/query/query_knobs.h"
+#include "mongo/db/query/query_planner_params.h"
+#include "mongo/dbtests/dbtests.h"
+#include "mongo/util/scopeguard.h"
+
+namespace QueryStageCachedPlan {
+
+ class QueryStageCachedPlanBase {
+ public:
+ QueryStageCachedPlanBase() {
+ // If collection exists already, we need to drop it.
+ dropCollection();
+
+ // Add indices.
+ addIndex(BSON("a" << 1));
+ addIndex(BSON("b" << 1));
+
+ Client::WriteContext ctx(&_txn, ns());
+ Collection* collection = ctx.getCollection();
+ ASSERT(collection);
+
+ // Add data.
+ for (int i = 0; i < 10; i++) {
+ insertDocument(collection, BSON("_id" << i << "a" << i << "b" << 1));
+ }
+ }
+
+ void addIndex(const BSONObj& obj) {
+ ASSERT_OK(dbtests::createIndex(&_txn, ns(), obj));
+ }
+
+ void dropCollection() {
+ const NamespaceString nsString(ns());
+ ScopedTransaction transaction(&_txn, MODE_X);
+ Lock::DBLock dbLock(_txn.lockState(), nsString.db(), MODE_X);
+ Database* database = dbHolder().get(&_txn, nsString.db());
+ if (!database) {
+ return;
+ }
+
+ WriteUnitOfWork wuow(&_txn);
+ database->dropCollection(&_txn, ns());
+ wuow.commit();
+ }
+
+ void insertDocument(Collection* collection, BSONObj obj) {
+ WriteUnitOfWork wuow(&_txn);
+
+ const bool enforceQuota = false;
+ StatusWith<RecordId> res = collection->insertDocument(&_txn, obj, enforceQuota);
+ ASSERT(res.isOK());
+
+ wuow.commit();
+ }
+
+ static void resetEvictionEnabled(bool resetTo) {
+ internalQueryCacheReplanningEnabled = resetTo;
+ }
+
+ static const char* ns() {
+ return "unittests.QueryStageCachedPlan";
+ }
+
+ protected:
+ OperationContextImpl _txn;
+ WorkingSet _ws;
+ };
+
+ /**
+ * Test that on failure, the cached plan stage replans the query but does not create a new cache
+ * entry.
+ */
+ class QueryStageCachedPlanFailure : public QueryStageCachedPlanBase {
+ public:
+ void run() {
+ bool oldReplanningFlagValue = internalQueryCacheReplanningEnabled;
+ internalQueryCacheReplanningEnabled = true;
+ ScopeGuard flagResetter = MakeGuard(&QueryStageCachedPlanBase::resetEvictionEnabled,
+ oldReplanningFlagValue);
+
+ AutoGetCollectionForRead ctx(&_txn, ns());
+ Collection* collection = ctx.getCollection();
+ ASSERT(collection);
+
+ // Query can be answered by either index on "a" or index on "b".
+ CanonicalQuery* rawCq;
+ ASSERT_OK(CanonicalQuery::canonicalize(ns(), fromjson("{a: {$gte: 8}, b: 1}"), &rawCq));
+ boost::scoped_ptr<CanonicalQuery> cq(rawCq);
+
+ // We shouldn't have anything in the plan cache for this shape yet.
+ PlanCache* cache = collection->infoCache()->getPlanCache();
+ ASSERT(cache);
+ CachedSolution* rawCachedSolution;
+ ASSERT_NOT_OK(cache->get(*cq, &rawCachedSolution));
+
+ // Get planner params.
+ QueryPlannerParams plannerParams;
+ fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams);
+
+ // Queued data stage will return a failure during the cached plan trial period.
+ std::auto_ptr<QueuedDataStage> mockChild(new QueuedDataStage(&_ws));
+ mockChild->pushBack(PlanStage::FAILURE);
+
+ // High enough so that we shouldn't trigger a replan based on works.
+ const size_t decisionWorks = 50;
+ CachedPlanStage cachedPlanStage(&_txn, collection, &_ws, cq.get(), plannerParams,
+ decisionWorks, mockChild.release(), NULL);
+
+ // This should succeed after triggering a replan.
+ ASSERT_OK(cachedPlanStage.pickBestPlan(NULL));
+
+ // Make sure that we get 2 legit results back.
+ size_t numResults = 0;
+ PlanStage::StageState state = PlanStage::NEED_TIME;
+ while (state != PlanStage::IS_EOF) {
+ WorkingSetID id = WorkingSet::INVALID_ID;
+ state = cachedPlanStage.work(&id);
+
+ ASSERT_NE(state, PlanStage::FAILURE);
+ ASSERT_NE(state, PlanStage::DEAD);
+
+ if (state == PlanStage::ADVANCED) {
+ WorkingSetMember* member = _ws.get(id);
+ ASSERT(cq->root()->matchesBSON(member->obj.value()));
+ numResults++;
+ }
+ }
+
+ ASSERT_EQ(numResults, 2U);
+
+ // Plan cache should still be empty, as we don't write to it when we replan a failed
+ // query.
+ ASSERT_NOT_OK(cache->get(*cq, &rawCachedSolution));
+
+ resetEvictionEnabled(oldReplanningFlagValue);
+ flagResetter.Dismiss();
+ }
+ };
+
+ /**
+ * Test that hitting the cached plan stage trial period's threshold for work cycles causes the
+ * query to be replanned. Also verify that the replanning results in a new plan cache entry.
+ */
+ class QueryStageCachedPlanHitMaxWorks : public QueryStageCachedPlanBase {
+ public:
+ void run() {
+ bool oldReplanningFlagValue = internalQueryCacheReplanningEnabled;
+ internalQueryCacheReplanningEnabled = true;
+ ScopeGuard flagResetter = MakeGuard(&QueryStageCachedPlanBase::resetEvictionEnabled,
+ oldReplanningFlagValue);
+
+ AutoGetCollectionForRead ctx(&_txn, ns());
+ Collection* collection = ctx.getCollection();
+ ASSERT(collection);
+
+ // Query can be answered by either index on "a" or index on "b".
+ CanonicalQuery* rawCq;
+ ASSERT_OK(CanonicalQuery::canonicalize(ns(), fromjson("{a: {$gte: 8}, b: 1}"), &rawCq));
+ boost::scoped_ptr<CanonicalQuery> cq(rawCq);
+
+ // We shouldn't have anything in the plan cache for this shape yet.
+ PlanCache* cache = collection->infoCache()->getPlanCache();
+ ASSERT(cache);
+ CachedSolution* rawCachedSolution;
+ ASSERT_NOT_OK(cache->get(*cq, &rawCachedSolution));
+
+ // Get planner params.
+ QueryPlannerParams plannerParams;
+ fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams);
+
+ // Set up queued data stage to take a long time before returning EOF. Should be long
+ // enough to trigger a replan.
+ const size_t decisionWorks = 10;
+ const size_t mockWorks = 1U + static_cast<size_t>(internalQueryCacheEvictionRatio
+ * decisionWorks);
+ std::auto_ptr<QueuedDataStage> mockChild(new QueuedDataStage(&_ws));
+ for (size_t i = 0; i < mockWorks; i++) {
+ mockChild->pushBack(PlanStage::NEED_TIME);
+ }
+
+ CachedPlanStage cachedPlanStage(&_txn, collection, &_ws, cq.get(), plannerParams,
+ decisionWorks, mockChild.release(), NULL);
+
+ // This should succeed after triggering a replan.
+ ASSERT_OK(cachedPlanStage.pickBestPlan(NULL));
+
+ // Make sure that we get 2 legit results back.
+ size_t numResults = 0;
+ PlanStage::StageState state = PlanStage::NEED_TIME;
+ while (state != PlanStage::IS_EOF) {
+ WorkingSetID id = WorkingSet::INVALID_ID;
+ state = cachedPlanStage.work(&id);
+
+ ASSERT_NE(state, PlanStage::FAILURE);
+ ASSERT_NE(state, PlanStage::DEAD);
+
+ if (state == PlanStage::ADVANCED) {
+ WorkingSetMember* member = _ws.get(id);
+ ASSERT(cq->root()->matchesBSON(member->obj.value()));
+ numResults++;
+ }
+ }
+
+ ASSERT_EQ(numResults, 2U);
+
+ // This time we expect to find something in the plan cache. Replans after hitting the
+ // works threshold result in a cache entry.
+ ASSERT_OK(cache->get(*cq, &rawCachedSolution));
+ boost::scoped_ptr<CachedSolution> cachedSolution(rawCachedSolution);
+
+ resetEvictionEnabled(oldReplanningFlagValue);
+ flagResetter.Dismiss();
+ }
+ };
+
+ class All : public Suite {
+ public:
+ All() : Suite("query_stage_cached_plan") {}
+
+ void setupTests() {
+ add<QueryStageCachedPlanFailure>();
+ add<QueryStageCachedPlanHitMaxWorks>();
+ }
+ };
+
+ SuiteInstance<All> all;
+
+} // namespace QueryStageCachedPlan