diff options
author | David Storch <david.storch@10gen.com> | 2015-05-22 13:19:19 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2015-06-03 14:53:48 -0400 |
commit | f80286e0203fa0aa0873bff2963ce0aa81a51383 (patch) | |
tree | dea40d051e88062eb025dca2dc78f00535d9710b /src/mongo | |
parent | b677e49bed78c415498102a6d7d1cfbed43e76f7 (diff) | |
download | mongo-f80286e0203fa0aa0873bff2963ce0aa81a51383.tar.gz |
SERVER-15225 CachedPlanStage can replan poorly performing queries after a trial period
This is a minimal backport of the rewrite to the CachedPlanStage. The functionality is behind
a flag and turned off by default. It can be enabled with the internalQueryCacheReplanningEnabled
setParameter.
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/exec/cached_plan.cpp | 258 | ||||
-rw-r--r-- | src/mongo/db/exec/cached_plan.h | 71 | ||||
-rw-r--r-- | src/mongo/db/exec/multi_plan.cpp | 40 | ||||
-rw-r--r-- | src/mongo/db/exec/multi_plan.h | 21 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/query/plan_cache.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/query/plan_cache.h | 12 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.h | 7 | ||||
-rw-r--r-- | src/mongo/dbtests/query_stage_cached_plan.cpp | 273 |
11 files changed, 686 insertions, 38 deletions
diff --git a/src/mongo/db/exec/cached_plan.cpp b/src/mongo/db/exec/cached_plan.cpp index 2bc2e39bd1b..fed8b8de0c0 100644 --- a/src/mongo/db/exec/cached_plan.cpp +++ b/src/mongo/db/exec/cached_plan.cpp @@ -26,18 +26,28 @@ * it in the license file. */ +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery + +#include "mongo/platform/basic.h" + #include "mongo/db/exec/cached_plan.h" -#include "mongo/db/exec/scoped_timer.h" -#include "mongo/db/exec/working_set_common.h" -#include "mongo/util/mongoutils/str.h" -// for updateCache #include "mongo/db/catalog/collection.h" #include "mongo/db/catalog/database.h" #include "mongo/db/client.h" +#include "mongo/db/exec/multi_plan.h" +#include "mongo/db/exec/scoped_timer.h" +#include "mongo/db/exec/working_set_common.h" +#include "mongo/db/query/explain.h" #include "mongo/db/query/plan_cache.h" #include "mongo/db/query/plan_ranker.h" +#include "mongo/db/query/plan_yield_policy.h" #include "mongo/db/query/qlog.h" +#include "mongo/db/query/query_knobs.h" +#include "mongo/db/query/query_planner.h" +#include "mongo/db/query/stage_builder.h" +#include "mongo/util/mongoutils/str.h" +#include "mongo/util/log.h" namespace mongo { @@ -47,14 +57,23 @@ namespace mongo { // static const char* CachedPlanStage::kStageType = "CACHED_PLAN"; - CachedPlanStage::CachedPlanStage(const Collection* collection, + CachedPlanStage::CachedPlanStage(OperationContext* txn, + Collection* collection, + WorkingSet* ws, CanonicalQuery* cq, + const QueryPlannerParams& params, + size_t decisionWorks, PlanStage* mainChild, QuerySolution* mainQs, PlanStage* backupChild, QuerySolution* backupQs) - : _collection(collection), + : _txn(txn), + _collection(collection), + _ws(ws), _canonicalQuery(cq), + _plannerParams(params), + _replanningEnabled(internalQueryCacheReplanningEnabled), + _decisionWorks(decisionWorks), _mainQs(mainQs), _backupQs(backupQs), _mainChildPlan(mainChild), @@ -80,9 +99,206 @@ namespace mongo { return true; } + if (!_results.empty()) { + return false; + } + return getActiveChild()->isEOF(); } + Status CachedPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) { + // If replanning is disabled, then this is a no-op. + if (!_replanningEnabled) { + return Status::OK(); + } + + // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of + // execution work that happens here, so this is needed for the time accounting to + // make sense. + ScopedTimer timer(&_commonStats.executionTimeMillis); + + // If we work this many times during the trial period, then we will replan the + // query from scratch. + size_t maxWorksBeforeReplan = static_cast<size_t>(internalQueryCacheEvictionRatio + * _decisionWorks); + + // The trial period ends without replanning if the cached plan produces this many results + size_t numResults = MultiPlanStage::getTrialPeriodNumToReturn(*_canonicalQuery); + + for (size_t i = 0; i < maxWorksBeforeReplan; ++i) { + // Might need to yield between calls to work due to the timer elapsing. + Status yieldStatus = tryYield(yieldPolicy); + if (!yieldStatus.isOK()) { + return yieldStatus; + } + + WorkingSetID id = WorkingSet::INVALID_ID; + PlanStage::StageState state = _mainChildPlan->work(&id); + + if (PlanStage::ADVANCED == state) { + // Save result for later. + _results.push_back(id); + + if (_results.size() >= numResults) { + // Once a plan returns enough results, stop working. Update cache with stats + // from this run and return. + updateCache(); + return Status::OK(); + } + } + else if (PlanStage::IS_EOF == state) { + // Cached plan hit EOF quickly enough. No need to replan. Update cache with stats + // from this run and return. + updateCache(); + return Status::OK(); + } + else if (PlanStage::NEED_FETCH == state) { + WorkingSetMember* member = _ws->get(id); + invariant(member->hasFetcher()); + // Transfer ownership of the fetcher and yield. + _fetcher.reset(member->releaseFetcher()); + Status fetchYieldStatus = tryYield(yieldPolicy); + if (!fetchYieldStatus.isOK()) { + return fetchYieldStatus; + } + } + else if (PlanStage::FAILURE == state) { + // On failure, fall back to replanning the whole query. We neither evict the + // existing cache entry nor cache the result of replanning. + BSONObj statusObj; + WorkingSetCommon::getStatusMemberObject(*_ws, id, &statusObj); + + LOG(1) << "Execution of cached plan failed, falling back to replan." + << " query: " + << _canonicalQuery->toStringShort() + << " planSummary: " + << Explain::getPlanSummary(_mainChildPlan.get()) + << " status: " + << statusObj; + + const bool shouldCache = false; + return replan(yieldPolicy, shouldCache); + } + else if (PlanStage::DEAD == state) { + return Status(ErrorCodes::OperationFailed, + "Executor killed during cached plan trial period"); + } + else { + invariant(PlanStage::NEED_TIME == state); + } + } + + // If we're here, the trial period took more than 'maxWorksBeforeReplan' work cycles. This + // plan is taking too long, so we replan from scratch. + LOG(1) << "Execution of cached plan required " + << maxWorksBeforeReplan + << " works, but was originally cached with only " + << _decisionWorks + << " works. Evicting cache entry and replanning query: " + << _canonicalQuery->toStringShort() + << " plan summary before replan: " + << Explain::getPlanSummary(_mainChildPlan.get()); + + const bool shouldCache = true; + return replan(yieldPolicy, shouldCache); + } + + Status CachedPlanStage::tryYield(PlanYieldPolicy* yieldPolicy) { + // These are the conditions which cause us to yield during plan selection if we have a + // YIELD_AUTO policy: + // 1) The yield policy's timer elapsed, or + // 2) some stage requested a yield due to a document fetch (NEED_FETCH). + // In both cases, the actual yielding happens here. + if (NULL != yieldPolicy && (yieldPolicy->shouldYield() || NULL != _fetcher.get())) { + // Here's where we yield. + bool alive = yieldPolicy->yield(_fetcher.get()); + + if (!alive) { + return Status(ErrorCodes::OperationFailed, + "PlanExecutor killed during cached plan trial period"); + } + } + + // We're done using the fetcher, so it should be freed. We don't want to + // use the same RecordFetcher twice. + _fetcher.reset(); + + return Status::OK(); + } + + Status CachedPlanStage::replan(PlanYieldPolicy* yieldPolicy, bool shouldCache) { + // We're going to start over with a new plan. No need for only old buffered results. + _results.clear(); + + // Clear out the working set. We'll start with a fresh working set. + _ws->clear(); + + // No need for any existing child stages or QuerySolutions. We will create new ones from + // scratch. + _mainQs.reset(); + _backupQs.reset(); + _mainChildPlan.reset(); + _backupChildPlan.reset(); + + // Remove the current plan cache entry for this shape. The plan cache entry could have + // already been removed by another thread, so our removal won't necessarily succeed. + if (shouldCache) { + PlanCache* cache = _collection->infoCache()->getPlanCache(); + cache->remove(*_canonicalQuery); + } + + // Use the query planning module to plan the whole query. + std::vector<QuerySolution*> rawSolutions; + Status status = QueryPlanner::plan(*_canonicalQuery, _plannerParams, &rawSolutions); + if (!status.isOK()) { + return Status(ErrorCodes::BadValue, + str::stream() + << "error processing query: " << _canonicalQuery->toString() + << " planner returned error: " << status.reason()); + } + + OwnedPointerVector<QuerySolution> solutions(rawSolutions); + + // We cannot figure out how to answer the query. Perhaps it requires an index + // we do not have? + if (0 == solutions.size()) { + return Status(ErrorCodes::BadValue, + str::stream() + << "error processing query: " + << _canonicalQuery->toString() + << " No query solutions"); + } + + if (1 == solutions.size()) { + PlanStage* newRoot; + // Only one possible plan. Build the stages from the solution. + verify(StageBuilder::build(_txn, _collection, *solutions[0], _ws, &newRoot)); + _mainChildPlan.reset(newRoot); + _mainQs.reset(solutions.popAndReleaseBack()); + return Status::OK(); + } + + // Many solutions. Create a MultiPlanStage to pick the best, update the cache, + // and so on. The working set will be shared by all candidate plans. + _mainChildPlan.reset(new MultiPlanStage(_txn, _collection, _canonicalQuery, shouldCache)); + MultiPlanStage* multiPlanStage = static_cast<MultiPlanStage*>(_mainChildPlan.get()); + + for (size_t ix = 0; ix < solutions.size(); ++ix) { + if (solutions[ix]->cacheData.get()) { + solutions[ix]->cacheData->indexFilterApplied = _plannerParams.indexFiltersApplied; + } + + PlanStage* nextPlanRoot; + verify(StageBuilder::build(_txn, _collection, *solutions[ix], _ws, &nextPlanRoot)); + + // Takes ownership of 'solutions[ix]' and 'nextPlanRoot'. + multiPlanStage->addPlan(solutions.releaseAt(ix), nextPlanRoot, _ws); + } + + // Delegate to the MultiPlanStage's plan selection facility. + return multiPlanStage->pickBestPlan(yieldPolicy); + } + PlanStage::StageState CachedPlanStage::work(WorkingSetID* out) { ++_commonStats.works; @@ -94,6 +310,15 @@ namespace mongo { if (isEOF()) { return PlanStage::IS_EOF; } + // First exhaust any results buffered during the trial period. + if (!_results.empty()) { + *out = _results.front(); + _results.pop_front(); + _commonStats.advanced++; + _alreadyProduced = true; + return PlanStage::ADVANCED; + } + StageState childStatus = getActiveChild()->work(out); if (PlanStage::ADVANCED == childStatus) { @@ -107,9 +332,11 @@ namespace mongo { else if (PlanStage::FAILURE == childStatus && !_alreadyProduced && !_usingBackupChild + && !_replanningEnabled && NULL != _backupChildPlan.get()) { // Switch the active child to the backup. Subsequent calls to work() will exercise - // the backup plan. + // the backup plan. We are only willing to switch to the backup plan if replanning is + // disabled. _usingBackupChild = true; _commonStats.needTime++; return PlanStage::NEED_TIME; @@ -152,6 +379,20 @@ namespace mongo { _backupChildPlan->invalidate(txn, dl, type); } ++_commonStats.invalidates; + + for (std::list<WorkingSetID>::iterator it = _results.begin(); it != _results.end(); ) { + WorkingSetMember* member = _ws->get(*it); + if (member->hasLoc() && member->loc == dl) { + std::list<WorkingSetID>::iterator next = it; + ++next; + WorkingSetCommon::fetchAndInvalidateLoc(txn, member, _collection); + _results.erase(it); + it = next; + } + else { + ++it; + } + } } vector<PlanStage*> CachedPlanStage::getChildren() const { @@ -197,7 +438,8 @@ namespace mongo { feedback->score = PlanRanker::scoreTree(feedback->stats.get()); PlanCache* cache = _collection->infoCache()->getPlanCache(); - Status fbs = cache->feedback(*_canonicalQuery, feedback.release()); + const bool allowedToEvict = !_replanningEnabled; + Status fbs = cache->feedback(*_canonicalQuery, feedback.release(), allowedToEvict); if (!fbs.isOK()) { QLOG() << _canonicalQuery->ns() << ": Failed to update cache with feedback: " diff --git a/src/mongo/db/exec/cached_plan.h b/src/mongo/db/exec/cached_plan.h index ceb63e1e190..632476b481d 100644 --- a/src/mongo/db/exec/cached_plan.h +++ b/src/mongo/db/exec/cached_plan.h @@ -28,15 +28,21 @@ #pragma once +#include <boost/scoped_ptr.hpp> +#include <list> + #include "mongo/db/jsobj.h" #include "mongo/db/exec/plan_stage.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/query_solution.h" #include "mongo/db/record_id.h" +#include "mongo/db/storage/record_fetcher.h" namespace mongo { + class PlanYieldPolicy; + /** * This stage outputs its mainChild, and possibly its backup child * and also updates the cache. @@ -49,8 +55,12 @@ namespace mongo { /** * Takes ownership of 'mainChild', 'mainQs', 'backupChild', and 'backupQs'. */ - CachedPlanStage(const Collection* collection, + CachedPlanStage(OperationContext* txn, + Collection* collection, + WorkingSet* ws, CanonicalQuery* cq, + const QueryPlannerParams& params, + size_t decisionWorks, PlanStage* mainChild, QuerySolution* mainQs, PlanStage* backupChild = NULL, @@ -80,16 +90,60 @@ namespace mongo { void kill(); + /** + * Runs the cached plan for a trial period, yielding during the trial period according to + * 'yieldPolicy'. + * + * If the performance is lower than expected, the old plan is evicted and a new plan is + * selected from scratch (again yielding according to 'yieldPolicy'). Otherwise, the cached + * plan is run. + */ + Status pickBestPlan(PlanYieldPolicy* yieldPolicy); + private: PlanStage* getActiveChild() const; void updateCache(); - // not owned - const Collection* _collection; + /** + * May yield during the cached plan stage's trial period or replanning phases. + * + * Returns a non-OK status if the plan was killed during a yield. + */ + Status tryYield(PlanYieldPolicy* yieldPolicy); + + /** + * Uses the QueryPlanner and the MultiPlanStage to re-generate candidate plans for this + * query and select a new winner. + * + * We fallback to a new plan if, based on the number of works during the trial period that + * put the plan in the cache, the performance was worse than anticipated during the trial + * period. + * + * We only write the result of re-planning to the plan cache if 'shouldCache' is true. + */ + Status replan(PlanYieldPolicy* yieldPolicy, bool shouldCache); + + // Not owned here. + OperationContext* _txn; + + // Not owned here. + Collection* _collection; - // not owned + // Not owned here. + WorkingSet* _ws; + + // Not owned here. CanonicalQuery* _canonicalQuery; + QueryPlannerParams _plannerParams; + + // Whether or not the cached plan trial period and replanning is enabled. + const bool _replanningEnabled; + + // The number of work cycles taken to decide on a winning plan when the plan was first + // cached. + size_t _decisionWorks; + // Owned by us. Must be deleted after the corresponding PlanStage trees, as // those trees point into the query solutions. boost::scoped_ptr<QuerySolution> _mainQs; @@ -113,6 +167,15 @@ namespace mongo { // Has this query been killed? bool _killed; + // Any results produced during trial period execution are kept here. + std::list<WorkingSetID> _results; + + // When a stage requests a yield for document fetch, it gives us back a RecordFetcher* + // to use to pull the record into memory. We take ownership of the RecordFetcher here, + // deleting it after we've had a chance to do the fetch. For timing-based yields, we + // just pass a NULL fetcher. + boost::scoped_ptr<RecordFetcher> _fetcher; + // Stats CommonStats _commonStats; CachedPlanStats _specificStats; diff --git a/src/mongo/db/exec/multi_plan.cpp b/src/mongo/db/exec/multi_plan.cpp index d7e9e19c30e..6415d4f91ec 100644 --- a/src/mongo/db/exec/multi_plan.cpp +++ b/src/mongo/db/exec/multi_plan.cpp @@ -61,9 +61,11 @@ namespace mongo { MultiPlanStage::MultiPlanStage(OperationContext* txn, const Collection* collection, - CanonicalQuery* cq) + CanonicalQuery* cq, + bool shouldCache) : _txn(txn), _collection(collection), + _shouldCache(shouldCache), _query(cq), _bestPlanIdx(kNoSuchPlan), _backupPlanIdx(kNoSuchPlan), @@ -183,6 +185,24 @@ namespace mongo { return Status::OK(); } + // static + size_t MultiPlanStage::getTrialPeriodNumToReturn(const CanonicalQuery& query) { + // We treat ntoreturn as though it is a limit during plan ranking. + // This means that ranking might not be great for sort + batchSize. + // But it also means that we don't buffer too much data for sort + limit. + // See SERVER-14174 for details. + size_t numToReturn = query.getParsed().getNumToReturn(); + + // Determine the number of results which we will produce during the plan + // ranking phase before stopping. + size_t numResults = static_cast<size_t>(internalQueryPlanEvaluationMaxResults); + if (numToReturn > 0) { + numResults = std::min(numToReturn, numResults); + } + + return numResults; + } + Status MultiPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) { // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of // execution work that happens here, so this is needed for the time accounting to @@ -205,14 +225,7 @@ namespace mongo { // This means that ranking might not be great for sort + batchSize. // But it also means that we don't buffer too much data for sort + limit. // See SERVER-14174 for details. - size_t numToReturn = _query->getParsed().getNumToReturn(); - - // Determine the number of results which we will produce during the plan - // ranking phase before stopping. - size_t numResults = (size_t)internalQueryPlanEvaluationMaxResults; - if (numToReturn > 0) { - numResults = std::min(numToReturn, numResults); - } + size_t numResults = getTrialPeriodNumToReturn(*_query); // Work the plans, stopping when a plan hits EOF or returns some // fixed number of results. @@ -311,13 +324,16 @@ namespace mongo { // 1) the query must be of a type that is safe to cache, // 2) two or more plans cannot have tied for the win. Caching in the case of ties can // cause successive queries of the same shape to use a bad index. - // 3) Furthermore, the winning plan must have returned at least one result. Plans which + // 3) The caller must have indicated that it is willing to allow a plan to be cached via + // the '_shouldCache' argument to the constructor. + // 4) Furthermore, the winning plan must have returned at least one result. Plans which // return zero results cannot be reliably ranked. Such query shapes are generally // existence type queries, and a winning plan should get cached once the query finds a // result. if (PlanCache::shouldCacheQuery(*_query) - && !ranking->tieForBest - && !alreadyProduced.empty()) { + && !ranking->tieForBest + && !alreadyProduced.empty() + && _shouldCache) { // Create list of candidate solutions for the cache with // the best solution at the front. std::vector<QuerySolution*> solutions; diff --git a/src/mongo/db/exec/multi_plan.h b/src/mongo/db/exec/multi_plan.h index a793ee7127b..2e2be24d455 100644 --- a/src/mongo/db/exec/multi_plan.h +++ b/src/mongo/db/exec/multi_plan.h @@ -52,8 +52,16 @@ namespace mongo { */ class MultiPlanStage : public PlanStage { public: - /** Takes no ownership */ - MultiPlanStage(OperationContext* txn, const Collection* collection, CanonicalQuery* cq); + /** + * Takes no ownership. + * + * If 'shouldCache' is true, writes a cache entry for the winning plan to the plan cache + * when possible. If 'shouldCache' is false, the plan cache will never be written. + */ + MultiPlanStage(OperationContext* txn, + const Collection* collection, + CanonicalQuery* cq, + bool shouldCache = true); virtual ~MultiPlanStage(); @@ -94,6 +102,12 @@ namespace mongo { */ Status pickBestPlan(PlanYieldPolicy* yieldPolicy); + /** + * Returns the max number of documents which we should allow any plan to return during the + * trial period. As soon as any plan hits this number of documents, the trial period ends. + */ + static size_t getTrialPeriodNumToReturn(const CanonicalQuery& query); + /** Return true if a best plan has been chosen */ bool bestPlanChosen() const; @@ -156,6 +170,9 @@ namespace mongo { OperationContext* _txn; const Collection* _collection; + // Whether or not we should try to cache the winning plan in the plan cache. + const bool _shouldCache; + // The query that we're trying to figure out the best solution to. // not owned here CanonicalQuery* _query; diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index a3cf395ade3..62971906234 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -311,9 +311,16 @@ namespace mongo { // Add a CachedPlanStage on top of the previous root. Takes ownership of // '*rootOut', 'backupRoot', 'qs', and 'backupQs'. - *rootOut = new CachedPlanStage(collection, canonicalQuery, - *rootOut, qs, - backupRoot, backupQs); + *rootOut = new CachedPlanStage(opCtx, + collection, + ws, + canonicalQuery, + plannerParams, + cs->decisionWorks, + *rootOut, + qs, + backupRoot, + backupQs); return Status::OK(); } } diff --git a/src/mongo/db/query/plan_cache.cpp b/src/mongo/db/query/plan_cache.cpp index d1b16dc209c..9ecbf3fcc93 100644 --- a/src/mongo/db/query/plan_cache.cpp +++ b/src/mongo/db/query/plan_cache.cpp @@ -114,7 +114,8 @@ namespace mongo { key(key), query(entry.query.getOwned()), sort(entry.sort.getOwned()), - projection(entry.projection.getOwned()) { + projection(entry.projection.getOwned()), + decisionWorks(entry.decision->stats[0]->common.works) { // CachedSolution should not having any references into // cache entry. All relevant data should be cloned/copied. for (size_t i = 0; i < entry.plannerData.size(); ++i) { @@ -423,7 +424,9 @@ namespace mongo { return false; } - Status PlanCache::feedback(const CanonicalQuery& cq, PlanCacheEntryFeedback* feedback) { + Status PlanCache::feedback(const CanonicalQuery& cq, + PlanCacheEntryFeedback* feedback, + bool allowedToEvict) { if (NULL == feedback) { return Status(ErrorCodes::BadValue, "feedback is NULL"); } @@ -439,9 +442,10 @@ namespace mongo { invariant(entry); if (entry->feedback.size() >= size_t(internalQueryCacheFeedbacksStored)) { - // If we have enough feedback, then use it to determine whether - // we should get rid of the cached solution. - if (hasCachedPlanPerformanceDegraded(entry, autoFeedback.get())) { + // If we have enough feedback, then use it to determine whether we should get rid of the + // cached solution. We do not use the feedback-based eviction policy if replanning is + // enabled. + if (hasCachedPlanPerformanceDegraded(entry, autoFeedback.get()) && allowedToEvict) { LOG(1) << _ns << ": removing plan cache entry " << entry->toString() << " - detected degradation in performance of cached solution."; _cache.remove(ck); diff --git a/src/mongo/db/query/plan_cache.h b/src/mongo/db/query/plan_cache.h index 57f5657b053..28f07b4ea5f 100644 --- a/src/mongo/db/query/plan_cache.h +++ b/src/mongo/db/query/plan_cache.h @@ -196,6 +196,10 @@ namespace mongo { BSONObj query; BSONObj sort; BSONObj projection; + + // The number of work cycles taken to decide on a winning plan when the plan was first + // cached. + size_t decisionWorks; }; /** @@ -334,10 +338,12 @@ namespace mongo { * If the entry corresponding to 'cq' still exists, 'feedback' is added to the run * statistics about the plan. Status::OK() is returned. * - * May cause the cache entry to be removed if it is determined that the cached plan - * is badly performing. + * If 'allowedToEvict' is true, may cause the cache entry to be removed if it is determined + * that the cached plan is badly performing. */ - Status feedback(const CanonicalQuery& cq, PlanCacheEntryFeedback* feedback); + Status feedback(const CanonicalQuery& cq, + PlanCacheEntryFeedback* feedback, + bool allowedToEvict); /** * Remove the entry corresponding to 'ck' from the cache. Returns Status::OK() if the plan diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp index b3e329a354e..ec6ac705f52 100644 --- a/src/mongo/db/query/plan_executor.cpp +++ b/src/mongo/db/query/plan_executor.cpp @@ -41,6 +41,7 @@ #include "mongo/db/exec/working_set_common.h" #include "mongo/db/global_environment_experiment.h" #include "mongo/db/query/plan_yield_policy.h" +#include "mongo/db/query/query_knobs.h" #include "mongo/db/storage/record_fetcher.h" #include "mongo/util/stacktrace.h" @@ -183,13 +184,21 @@ namespace mongo { } // If we didn't have to do subplanning, we might still have to do regular - // multi plan selection. + // multi plan selection... foundStage = getStageByType(_root.get(), STAGE_MULTI_PLAN); if (foundStage) { MultiPlanStage* mps = static_cast<MultiPlanStage*>(foundStage); return mps->pickBestPlan(_yieldPolicy.get()); } + // ...or, we might have run a plan from the cache for a trial period, falling back on + // regular planning if the cached plan performs poorly. + foundStage = getStageByType(_root.get(), STAGE_CACHED_PLAN); + if (foundStage) { + CachedPlanStage* cachedPlan = static_cast<CachedPlanStage*>(foundStage); + return cachedPlan->pickBestPlan(_yieldPolicy.get()); + } + // Either we chose a plan, or no plan selection was required. In both cases, // our work has been successfully completed. return Status::OK(); diff --git a/src/mongo/db/query/query_knobs.cpp b/src/mongo/db/query/query_knobs.cpp index b269e87f314..71c29358d31 100644 --- a/src/mongo/db/query/query_knobs.cpp +++ b/src/mongo/db/query/query_knobs.cpp @@ -46,6 +46,10 @@ namespace mongo { MONGO_EXPORT_SERVER_PARAMETER(internalQueryCacheWriteOpsBetweenFlush, int, 1000); + MONGO_EXPORT_SERVER_PARAMETER(internalQueryCacheReplanningEnabled, bool, false); + + MONGO_EXPORT_SERVER_PARAMETER(internalQueryCacheEvictionRatio, double, 10.0); + MONGO_EXPORT_SERVER_PARAMETER(internalQueryPlannerMaxIndexedSolutions, int, 64); MONGO_EXPORT_SERVER_PARAMETER(internalQueryEnumerationMaxOrSolutions, int, 10); diff --git a/src/mongo/db/query/query_knobs.h b/src/mongo/db/query/query_knobs.h index 18386867b4b..0f202e823dc 100644 --- a/src/mongo/db/query/query_knobs.h +++ b/src/mongo/db/query/query_knobs.h @@ -72,6 +72,13 @@ namespace mongo { // How many write ops should we allow in a collection before tossing all cache entries? extern int internalQueryCacheWriteOpsBetweenFlush; + // Whether or not CachedPlanStage replanning is enabled. + extern bool internalQueryCacheReplanningEnabled; + + // How many times more works must we perform in order to justify plan cache eviction and + // replanning? + extern double internalQueryCacheEvictionRatio; + // // Planning and enumeration. // diff --git a/src/mongo/dbtests/query_stage_cached_plan.cpp b/src/mongo/dbtests/query_stage_cached_plan.cpp new file mode 100644 index 00000000000..a6c07a65b7a --- /dev/null +++ b/src/mongo/dbtests/query_stage_cached_plan.cpp @@ -0,0 +1,273 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include <boost/scoped_ptr.hpp> +#include <memory> + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/catalog/collection.h" +#include "mongo/db/catalog/database.h" +#include "mongo/db/catalog/database_holder.h" +#include "mongo/db/client.h" +#include "mongo/db/exec/cached_plan.h" +#include "mongo/db/exec/queued_data_stage.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/json.h" +#include "mongo/db/operation_context_impl.h" +#include "mongo/db/query/canonical_query.h" +#include "mongo/db/query/get_executor.h" +#include "mongo/db/query/plan_cache.h" +#include "mongo/db/query/plan_yield_policy.h" +#include "mongo/db/query/query_knobs.h" +#include "mongo/db/query/query_planner_params.h" +#include "mongo/dbtests/dbtests.h" +#include "mongo/util/scopeguard.h" + +namespace QueryStageCachedPlan { + + class QueryStageCachedPlanBase { + public: + QueryStageCachedPlanBase() { + // If collection exists already, we need to drop it. + dropCollection(); + + // Add indices. + addIndex(BSON("a" << 1)); + addIndex(BSON("b" << 1)); + + Client::WriteContext ctx(&_txn, ns()); + Collection* collection = ctx.getCollection(); + ASSERT(collection); + + // Add data. + for (int i = 0; i < 10; i++) { + insertDocument(collection, BSON("_id" << i << "a" << i << "b" << 1)); + } + } + + void addIndex(const BSONObj& obj) { + ASSERT_OK(dbtests::createIndex(&_txn, ns(), obj)); + } + + void dropCollection() { + const NamespaceString nsString(ns()); + ScopedTransaction transaction(&_txn, MODE_X); + Lock::DBLock dbLock(_txn.lockState(), nsString.db(), MODE_X); + Database* database = dbHolder().get(&_txn, nsString.db()); + if (!database) { + return; + } + + WriteUnitOfWork wuow(&_txn); + database->dropCollection(&_txn, ns()); + wuow.commit(); + } + + void insertDocument(Collection* collection, BSONObj obj) { + WriteUnitOfWork wuow(&_txn); + + const bool enforceQuota = false; + StatusWith<RecordId> res = collection->insertDocument(&_txn, obj, enforceQuota); + ASSERT(res.isOK()); + + wuow.commit(); + } + + static void resetEvictionEnabled(bool resetTo) { + internalQueryCacheReplanningEnabled = resetTo; + } + + static const char* ns() { + return "unittests.QueryStageCachedPlan"; + } + + protected: + OperationContextImpl _txn; + WorkingSet _ws; + }; + + /** + * Test that on failure, the cached plan stage replans the query but does not create a new cache + * entry. + */ + class QueryStageCachedPlanFailure : public QueryStageCachedPlanBase { + public: + void run() { + bool oldReplanningFlagValue = internalQueryCacheReplanningEnabled; + internalQueryCacheReplanningEnabled = true; + ScopeGuard flagResetter = MakeGuard(&QueryStageCachedPlanBase::resetEvictionEnabled, + oldReplanningFlagValue); + + AutoGetCollectionForRead ctx(&_txn, ns()); + Collection* collection = ctx.getCollection(); + ASSERT(collection); + + // Query can be answered by either index on "a" or index on "b". + CanonicalQuery* rawCq; + ASSERT_OK(CanonicalQuery::canonicalize(ns(), fromjson("{a: {$gte: 8}, b: 1}"), &rawCq)); + boost::scoped_ptr<CanonicalQuery> cq(rawCq); + + // We shouldn't have anything in the plan cache for this shape yet. + PlanCache* cache = collection->infoCache()->getPlanCache(); + ASSERT(cache); + CachedSolution* rawCachedSolution; + ASSERT_NOT_OK(cache->get(*cq, &rawCachedSolution)); + + // Get planner params. + QueryPlannerParams plannerParams; + fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams); + + // Queued data stage will return a failure during the cached plan trial period. + std::auto_ptr<QueuedDataStage> mockChild(new QueuedDataStage(&_ws)); + mockChild->pushBack(PlanStage::FAILURE); + + // High enough so that we shouldn't trigger a replan based on works. + const size_t decisionWorks = 50; + CachedPlanStage cachedPlanStage(&_txn, collection, &_ws, cq.get(), plannerParams, + decisionWorks, mockChild.release(), NULL); + + // This should succeed after triggering a replan. + ASSERT_OK(cachedPlanStage.pickBestPlan(NULL)); + + // Make sure that we get 2 legit results back. + size_t numResults = 0; + PlanStage::StageState state = PlanStage::NEED_TIME; + while (state != PlanStage::IS_EOF) { + WorkingSetID id = WorkingSet::INVALID_ID; + state = cachedPlanStage.work(&id); + + ASSERT_NE(state, PlanStage::FAILURE); + ASSERT_NE(state, PlanStage::DEAD); + + if (state == PlanStage::ADVANCED) { + WorkingSetMember* member = _ws.get(id); + ASSERT(cq->root()->matchesBSON(member->obj.value())); + numResults++; + } + } + + ASSERT_EQ(numResults, 2U); + + // Plan cache should still be empty, as we don't write to it when we replan a failed + // query. + ASSERT_NOT_OK(cache->get(*cq, &rawCachedSolution)); + + resetEvictionEnabled(oldReplanningFlagValue); + flagResetter.Dismiss(); + } + }; + + /** + * Test that hitting the cached plan stage trial period's threshold for work cycles causes the + * query to be replanned. Also verify that the replanning results in a new plan cache entry. + */ + class QueryStageCachedPlanHitMaxWorks : public QueryStageCachedPlanBase { + public: + void run() { + bool oldReplanningFlagValue = internalQueryCacheReplanningEnabled; + internalQueryCacheReplanningEnabled = true; + ScopeGuard flagResetter = MakeGuard(&QueryStageCachedPlanBase::resetEvictionEnabled, + oldReplanningFlagValue); + + AutoGetCollectionForRead ctx(&_txn, ns()); + Collection* collection = ctx.getCollection(); + ASSERT(collection); + + // Query can be answered by either index on "a" or index on "b". + CanonicalQuery* rawCq; + ASSERT_OK(CanonicalQuery::canonicalize(ns(), fromjson("{a: {$gte: 8}, b: 1}"), &rawCq)); + boost::scoped_ptr<CanonicalQuery> cq(rawCq); + + // We shouldn't have anything in the plan cache for this shape yet. + PlanCache* cache = collection->infoCache()->getPlanCache(); + ASSERT(cache); + CachedSolution* rawCachedSolution; + ASSERT_NOT_OK(cache->get(*cq, &rawCachedSolution)); + + // Get planner params. + QueryPlannerParams plannerParams; + fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams); + + // Set up queued data stage to take a long time before returning EOF. Should be long + // enough to trigger a replan. + const size_t decisionWorks = 10; + const size_t mockWorks = 1U + static_cast<size_t>(internalQueryCacheEvictionRatio + * decisionWorks); + std::auto_ptr<QueuedDataStage> mockChild(new QueuedDataStage(&_ws)); + for (size_t i = 0; i < mockWorks; i++) { + mockChild->pushBack(PlanStage::NEED_TIME); + } + + CachedPlanStage cachedPlanStage(&_txn, collection, &_ws, cq.get(), plannerParams, + decisionWorks, mockChild.release(), NULL); + + // This should succeed after triggering a replan. + ASSERT_OK(cachedPlanStage.pickBestPlan(NULL)); + + // Make sure that we get 2 legit results back. + size_t numResults = 0; + PlanStage::StageState state = PlanStage::NEED_TIME; + while (state != PlanStage::IS_EOF) { + WorkingSetID id = WorkingSet::INVALID_ID; + state = cachedPlanStage.work(&id); + + ASSERT_NE(state, PlanStage::FAILURE); + ASSERT_NE(state, PlanStage::DEAD); + + if (state == PlanStage::ADVANCED) { + WorkingSetMember* member = _ws.get(id); + ASSERT(cq->root()->matchesBSON(member->obj.value())); + numResults++; + } + } + + ASSERT_EQ(numResults, 2U); + + // This time we expect to find something in the plan cache. Replans after hitting the + // works threshold result in a cache entry. + ASSERT_OK(cache->get(*cq, &rawCachedSolution)); + boost::scoped_ptr<CachedSolution> cachedSolution(rawCachedSolution); + + resetEvictionEnabled(oldReplanningFlagValue); + flagResetter.Dismiss(); + } + }; + + class All : public Suite { + public: + All() : Suite("query_stage_cached_plan") {} + + void setupTests() { + add<QueryStageCachedPlanFailure>(); + add<QueryStageCachedPlanHitMaxWorks>(); + } + }; + + SuiteInstance<All> all; + +} // namespace QueryStageCachedPlan |