diff options
author | Hari Khalsa <hkhalsa@10gen.com> | 2014-03-15 16:55:41 -0400 |
---|---|---|
committer | Hari Khalsa <hkhalsa@10gen.com> | 2014-03-18 17:53:39 -0400 |
commit | 36c6964edd8ac40a335ad24b5d95ec02b603d7dd (patch) | |
tree | 5d6b5013d2a50e9ad7bdd702ff1f765da5d8ab9b | |
parent | 7497c50db07a987530e1f80cf0930b85494ec20d (diff) | |
download | mongo-36c6964edd8ac40a335ad24b5d95ec02b603d7dd.tar.gz |
SERVER-13184 with rooted OR query plan sub-queries independently and combine
-rw-r--r-- | src/mongo/db/query/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query.cpp | 159 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query.h | 21 | ||||
-rw-r--r-- | src/mongo/db/query/get_runner.cpp | 195 | ||||
-rw-r--r-- | src/mongo/db/query/get_runner.h | 8 | ||||
-rw-r--r-- | src/mongo/db/query/multi_plan_runner.cpp | 33 | ||||
-rw-r--r-- | src/mongo/db/query/multi_plan_runner.h | 13 | ||||
-rw-r--r-- | src/mongo/db/query/plan_cache.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.h | 3 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/query/subplan_runner.cpp | 413 | ||||
-rw-r--r-- | src/mongo/db/query/subplan_runner.h | 104 | ||||
-rw-r--r-- | src/mongo/dbtests/plan_ranking.cpp | 2 |
14 files changed, 830 insertions, 141 deletions
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index 29f3ade9520..c65f0c46b6d 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -43,6 +43,7 @@ env.Library( "plan_ranker.cpp", "single_solution_runner.cpp", "stage_builder.cpp", + "subplan_runner.cpp", "type_explain.cpp", ], LIBDEPS=[ diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp index 39520757b9e..86580396f32 100644 --- a/src/mongo/db/query/canonical_query.cpp +++ b/src/mongo/db/query/canonical_query.cpp @@ -305,19 +305,9 @@ namespace { namespace mongo { - // static - Status CanonicalQuery::canonicalize(const QueryMessage& qm, CanonicalQuery** out) { - LiteParsedQuery* lpq; - Status parseStatus = LiteParsedQuery::make(qm, &lpq); - if (!parseStatus.isOK()) { return parseStatus; } - - auto_ptr<CanonicalQuery> cq(new CanonicalQuery()); - Status initStatus = cq->init(lpq); - if (!initStatus.isOK()) { return initStatus; } - - *out = cq.release(); - return Status::OK(); - } + // + // These all punt to the many-argumented canonicalize below. + // // static Status CanonicalQuery::canonicalize(const string& ns, const BSONObj& query, @@ -364,6 +354,65 @@ namespace mongo { out); } + // + // These actually call init() on the CQ. + // + + // static + Status CanonicalQuery::canonicalize(const QueryMessage& qm, CanonicalQuery** out) { + // Make LiteParsedQuery. + LiteParsedQuery* lpq; + Status parseStatus = LiteParsedQuery::make(qm, &lpq); + if (!parseStatus.isOK()) { return parseStatus; } + + // Make MatchExpression. + StatusWithMatchExpression swme = MatchExpressionParser::parse(lpq->getFilter()); + if (!swme.isOK()) { + delete lpq; + return swme.getStatus(); + } + + // Make the CQ we'll hopefully return. + auto_ptr<CanonicalQuery> cq(new CanonicalQuery()); + // Takes ownership of lpq and the MatchExpression* in swme. + Status initStatus = cq->init(lpq, swme.getValue()); + + if (!initStatus.isOK()) { return initStatus; } + *out = cq.release(); + return Status::OK(); + } + + // static + Status CanonicalQuery::canonicalize(const CanonicalQuery& baseQuery, + MatchExpression* root, + CanonicalQuery** out) { + + LiteParsedQuery* lpq; + + // Pass empty sort and projection. + BSONObj emptyObj; + // 0, 0, 0 is 'ntoskip', 'ntoreturn', and 'queryoptions' + // false, false is 'snapshot' and 'explain' + Status parseStatus = LiteParsedQuery::make(baseQuery.ns(), + 0, 0, 0, + baseQuery.getParsed().getFilter(), + baseQuery.getParsed().getProj(), + baseQuery.getParsed().getSort(), + emptyObj, emptyObj, emptyObj, + false, false, &lpq); + if (!parseStatus.isOK()) { + return parseStatus; + } + + // Make the CQ we'll hopefully return. + auto_ptr<CanonicalQuery> cq(new CanonicalQuery()); + Status initStatus = cq->init(lpq, root->shallowClone()); + + if (!initStatus.isOK()) { return initStatus; } + *out = cq.release(); + return Status::OK(); + } + // static Status CanonicalQuery::canonicalize(const string& ns, const BSONObj& query, const BSONObj& sort, const BSONObj& proj, @@ -378,16 +427,56 @@ namespace mongo { BSONObj emptyObj; Status parseStatus = LiteParsedQuery::make(ns, skip, limit, 0, query, proj, sort, hint, minObj, maxObj, snapshot, explain, &lpq); - if (!parseStatus.isOK()) { return parseStatus; } + if (!parseStatus.isOK()) { + return parseStatus; + } + // Build a parse tree from the BSONObj in the parsed query. + StatusWithMatchExpression swme = MatchExpressionParser::parse(lpq->getFilter()); + if (!swme.isOK()) { + delete lpq; + return swme.getStatus(); + } + + // Make the CQ we'll hopefully return. auto_ptr<CanonicalQuery> cq(new CanonicalQuery()); - Status initStatus = cq->init(lpq); - if (!initStatus.isOK()) { return initStatus; } + // Takes ownership of lpq and the MatchExpression* in swme. + Status initStatus = cq->init(lpq, swme.getValue()); + if (!initStatus.isOK()) { return initStatus; } *out = cq.release(); return Status::OK(); } + Status CanonicalQuery::init(LiteParsedQuery* lpq, MatchExpression* root) { + _pq.reset(lpq); + + // Normalize, sort and validate tree. + root = normalizeTree(root); + + sortTree(root); + _root.reset(root); + Status validStatus = isValid(root, *_pq); + if (!validStatus.isOK()) { + return validStatus; + } + + this->generateCacheKey(); + + // Validate the projection if there is one. + if (!_pq->getProj().isEmpty()) { + ParsedProjection* pp; + Status projStatus = ParsedProjection::make(_pq->getProj(), _root.get(), &pp); + if (!projStatus.isOK()) { + return projStatus; + } + _proj.reset(pp); + } + + return Status::OK(); + } + + // static bool CanonicalQuery::isSimpleIdQuery(const BSONObj& query) { bool hasID = false; @@ -610,6 +699,7 @@ namespace mongo { } // static + // XXX TODO: This does not belong here at all. MatchExpression* CanonicalQuery::logicalRewrite(MatchExpression* tree) { // Only thing we do is pull an OR up at the root. if (MatchExpression::AND != tree->matchType()) { @@ -657,43 +747,6 @@ namespace mongo { return normalizeTree(orChild); } - Status CanonicalQuery::init(LiteParsedQuery* lpq) { - _pq.reset(lpq); - - // Build a parse tree from the BSONObj in the parsed query. - StatusWithMatchExpression swme = MatchExpressionParser::parse(_pq->getFilter()); - if (!swme.isOK()) { return swme.getStatus(); } - - // Normalize, sort and validate tree. - MatchExpression* root = swme.getValue(); - root = normalizeTree(root); - - // TODO: We don't want to do this all the time. See AndWithOrWithOneIndex in - // query_planner_test.cpp. - // - // root = logicalRewrite(root); - sortTree(root); - Status validStatus = isValid(root, *_pq); - if (!validStatus.isOK()) { - return validStatus; - } - _root.reset(root); - - this->generateCacheKey(); - - // Validate the projection if there is one. - if (!_pq->getProj().isEmpty()) { - ParsedProjection* pp; - Status projStatus = ParsedProjection::make(_pq->getProj(), _root.get(), &pp); - if (!projStatus.isOK()) { - return projStatus; - } - _proj.reset(pp); - } - - return Status::OK(); - } - std::string CanonicalQuery::toString() const { mongoutils::str::stream ss; ss << "ns=" << _pq->ns() << " limit=" << _pq->getNumToReturn() diff --git a/src/mongo/db/query/canonical_query.h b/src/mongo/db/query/canonical_query.h index 339b6bf92dc..a65fc5dbc05 100644 --- a/src/mongo/db/query/canonical_query.h +++ b/src/mongo/db/query/canonical_query.h @@ -42,11 +42,26 @@ namespace mongo { class CanonicalQuery { public: + /** + * Caller owns the pointer in 'out' if any call to canonicalize returns Status::OK(). + */ static Status canonicalize(const QueryMessage& qm, CanonicalQuery** out); /** * For testing or for internal clients to use. */ + + /** + * Used for creating sub-queries from an existing CanonicalQuery. + * + * 'root' must be an expression in baseQuery.root(). + * + * Does not take ownership of 'root'. + */ + static Status canonicalize(const CanonicalQuery& baseQuery, + MatchExpression* root, + CanonicalQuery** out); + static Status canonicalize(const string& ns, const BSONObj& query, CanonicalQuery** out); static Status canonicalize(const string& ns, const BSONObj& query, long long skip, @@ -150,8 +165,10 @@ namespace mongo { */ void generateCacheKey(void); - // Takes ownership of lpq - Status init(LiteParsedQuery* lpq); + /** + * Takes ownership of 'root' and 'lpq'. + */ + Status init(LiteParsedQuery* lpq, MatchExpression* root); scoped_ptr<LiteParsedQuery> _pq; diff --git a/src/mongo/db/query/get_runner.cpp b/src/mongo/db/query/get_runner.cpp index ea324b4719e..4298b83de12 100644 --- a/src/mongo/db/query/get_runner.cpp +++ b/src/mongo/db/query/get_runner.cpp @@ -43,12 +43,14 @@ #include "mongo/db/query/multi_plan_runner.h" #include "mongo/db/query/plan_cache.h" #include "mongo/db/query/planner_analysis.h" +#include "mongo/db/query/planner_access.h" #include "mongo/db/query/qlog.h" #include "mongo/db/query/query_knobs.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/query/query_planner_common.h" #include "mongo/db/query/single_solution_runner.h" #include "mongo/db/query/stage_builder.h" +#include "mongo/db/query/subplan_runner.h" #include "mongo/db/index_names.h" #include "mongo/db/server_options.h" #include "mongo/db/server_parameters.h" @@ -200,6 +202,98 @@ namespace mongo { plannerParams->options |= QueryPlannerParams::KEEP_MUTATIONS; } + Status getRunnerFromCache(CanonicalQuery* canonicalQuery, + Collection* collection, + const QueryPlannerParams& plannerParams, + Runner** out) { + // Skip cache look up for non-cacheable queries. + if (!PlanCache::shouldCacheQuery(*canonicalQuery)) { + return Status(ErrorCodes::BadValue, "query is not cacheable"); + } + + CachedSolution* rawCS; + Status cacheLookupStatus = collection->infoCache()->getPlanCache()->get(*canonicalQuery, + &rawCS); + if (!cacheLookupStatus.isOK()) { + return cacheLookupStatus; + } + + // We have a CachedSolution. Have the planner turn it into a QuerySolution. + boost::scoped_ptr<CachedSolution> cs(rawCS); + QuerySolution *qs, *backupQs; + Status status = QueryPlanner::planFromCache(*canonicalQuery, + plannerParams, + *cs, + &qs, + &backupQs); + if (!status.isOK()) { + return status; + } + + // See SERVER-12438. Unfortunately we have to defer to the backup solution + // if both a batch size is set and a sort is requested. + // + // TODO: it would be really nice to delete this block in the future. + if (NULL != backupQs && + 0 < canonicalQuery->getParsed().getNumToReturn() && + !canonicalQuery->getParsed().getSort().isEmpty()) { + delete qs; + + WorkingSet* ws; + PlanStage* root; + verify(StageBuilder::build(*backupQs, &root, &ws)); + + // And, run the plan. + *out = new SingleSolutionRunner(collection, + canonicalQuery, + backupQs, root, ws); + return Status::OK(); + } + + // If our cached solution is a hit for a count query, try to turn it into a fast count + // thing. + if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) { + if (turnIxscanIntoCount(qs)) { + LOG(2) << "Using fast count: " << canonicalQuery->toStringShort() + << ", planSummary: " << getPlanSummary(*qs); + + WorkingSet* ws; + PlanStage* root; + verify(StageBuilder::build(*qs, &root, &ws)); + *out = new SingleSolutionRunner(collection, + canonicalQuery, qs, root, ws); + if (NULL != backupQs) { + delete backupQs; + } + return Status::OK(); + } + } + + // If we're here, we're going to used the cached plan and things are normal. + LOG(2) << "Using cached query plan: " << canonicalQuery->toStringShort() + << ", planSummary: " << getPlanSummary(*qs); + + WorkingSet* ws; + PlanStage* root; + verify(StageBuilder::build(*qs, &root, &ws)); + CachedPlanRunner* cpr = new CachedPlanRunner(collection, + canonicalQuery, + qs, + root, + ws); + + // If there's a backup solution, let the CachedPlanRunner know about it. + if (NULL != backupQs) { + WorkingSet* backupWs; + PlanStage* backupRoot; + verify(StageBuilder::build(*backupQs, &backupRoot, &backupWs)); + cpr->setBackupPlan(backupQs, backupRoot, backupWs); + } + + *out = cpr; + return Status::OK(); + } + /** * For a given query, get a runner. The runner could be a SingleSolutionRunner, a * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc. @@ -253,80 +347,38 @@ namespace mongo { plannerParams.options = plannerOptions; fillOutPlannerParams(collection, rawCanonicalQuery, &plannerParams); - // Try to look up a cached solution for the query. - // - // Skip cache look up for non-cacheable queries. - // See PlanCache::shouldCacheQuery() - // - // TODO: Can the cache have negative data about a solution? - CachedSolution* rawCS; - if (PlanCache::shouldCacheQuery(*canonicalQuery) && - collection->infoCache()->getPlanCache()->get(*canonicalQuery, &rawCS).isOK()) { - // We have a CachedSolution. Have the planner turn it into a QuerySolution. - boost::scoped_ptr<CachedSolution> cs(rawCS); - QuerySolution *qs, *backupQs; - Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs, - &qs, &backupQs); - - // See SERVER-12438. Unfortunately we have to defer to the backup solution - // if both a batch size is set and a sort is requested. - // - // TODO: it would be really nice to delete this block in the future. - if (status.isOK() && NULL != backupQs && - 0 < canonicalQuery->getParsed().getNumToReturn() && - !canonicalQuery->getParsed().getSort().isEmpty()) { - delete qs; - - WorkingSet* ws; - PlanStage* root; - verify(StageBuilder::build(*backupQs, &root, &ws)); - - // And, run the plan. - *out = new SingleSolutionRunner(collection, - canonicalQuery.release(), - backupQs, root, ws); - return Status::OK(); - } - - if (status.isOK()) { - if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) { - if (turnIxscanIntoCount(qs)) { - LOG(2) << "Using fast count: " << canonicalQuery->toStringShort() - << ", planSummary: " << getPlanSummary(*qs); + // See if the cache has what we're looking for. + Status cacheStatus = getRunnerFromCache(canonicalQuery.get(), + collection, + plannerParams, + out); + + // This can be not-OK and we can carry on. It just means the query wasn't cached. + if (cacheStatus.isOK()) { + // We got a cached runner. + canonicalQuery.release(); + return cacheStatus; + } - WorkingSet* ws; - PlanStage* root; - verify(StageBuilder::build(*qs, &root, &ws)); - *out = new SingleSolutionRunner(collection, - canonicalQuery.release(), qs, root, ws); - if (NULL != backupQs) { - delete backupQs; - } - return Status::OK(); - } - } + if (internalQueryPlanOrChildrenIndependently + && SubplanRunner::canUseSubplanRunner(*canonicalQuery)) { - LOG(2) << "Using cached query plan: " << canonicalQuery->toStringShort() - << ", planSummary: " << getPlanSummary(*qs); + LOG(2) << "Running query as sub-queries: " << canonicalQuery->toStringShort(); + *out = new SubplanRunner(collection, plannerParams, canonicalQuery.release()); + return Status::OK(); + } - WorkingSet* ws; - PlanStage* root; - verify(StageBuilder::build(*qs, &root, &ws)); - CachedPlanRunner* cpr = new CachedPlanRunner(collection, - canonicalQuery.release(), qs, - root, ws); + return getRunnerAlwaysPlan(collection, canonicalQuery.release(), plannerParams, out); + } - if (NULL != backupQs) { - WorkingSet* backupWs; - PlanStage* backupRoot; - verify(StageBuilder::build(*backupQs, &backupRoot, &backupWs)); - cpr->setBackupPlan(backupQs, backupRoot, backupWs); - } + Status getRunnerAlwaysPlan(Collection* collection, + CanonicalQuery* rawCanonicalQuery, + const QueryPlannerParams& plannerParams, + Runner** out) { - *out = cpr; - return Status::OK(); - } - } + invariant(collection); + invariant(rawCanonicalQuery); + auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); vector<QuerySolution*> solutions; Status status = QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions); @@ -386,7 +438,10 @@ namespace mongo { // And, run the plan. *out = new SingleSolutionRunner(collection, - canonicalQuery.release(),solutions[0], root, ws); + canonicalQuery.release(), + solutions[0], + root, + ws); return Status::OK(); } else { diff --git a/src/mongo/db/query/get_runner.h b/src/mongo/db/query/get_runner.h index 187ba543ad9..18f7c7235f1 100644 --- a/src/mongo/db/query/get_runner.h +++ b/src/mongo/db/query/get_runner.h @@ -119,6 +119,14 @@ namespace mongo { Runner** out); /** + * Get a runner for a query. Ignores the cache and always plans the full query. + */ + Status getRunnerAlwaysPlan(Collection* collection, + CanonicalQuery* rawCanonicalQuery, + const QueryPlannerParams& plannerParams, + Runner** out); + + /** * RAII approach to ensuring that runners are deregistered in newRunQuery. * * While retrieving the first batch of results, newRunQuery manually registers the runner with diff --git a/src/mongo/db/query/multi_plan_runner.cpp b/src/mongo/db/query/multi_plan_runner.cpp index fd536a1990f..d99ad25cd85 100644 --- a/src/mongo/db/query/multi_plan_runner.cpp +++ b/src/mongo/db/query/multi_plan_runner.cpp @@ -57,6 +57,7 @@ namespace mongo { _failureCount(0), _policy(Runner::YIELD_MANUAL), _query(query), + _bestChild(numeric_limits<size_t>::max()), _backupSolution(NULL), _backupPlan(NULL) { } @@ -227,6 +228,7 @@ namespace mongo { if (_killed) { return Runner::RUNNER_DEAD; } if (_failure) { return Runner::RUNNER_ERROR; } } + cacheBestPlan(); } // Look for an already produced result that provides the data the caller wants. @@ -338,24 +340,31 @@ namespace mongo { // After picking best plan, ranking will own plan stats from // candidate solutions (winner and losers). - std::auto_ptr<PlanRankingDecision> ranking(new PlanRankingDecision); - size_t bestChild = PlanRanker::pickBestPlan(_candidates, ranking.get()); + _ranking.reset(new PlanRankingDecision()); + _bestChild = PlanRanker::pickBestPlan(_candidates, _ranking.get()); + if (NULL != out) { *out = _bestChild; } + return true; + } + + void MultiPlanRunner::cacheBestPlan() { + // Must call pickBestPlan before. + verify(_bestChild != numeric_limits<size_t>::max()); // Copy candidate order. We will need this to sort candidate stats for explain // after transferring ownership of 'ranking' to plan cache. - std::vector<size_t> candidateOrder = ranking->candidateOrder; + std::vector<size_t> candidateOrder = _ranking->candidateOrder; // Run the best plan. Store it. - _bestPlan.reset(new PlanExecutor(_candidates[bestChild].ws, - _candidates[bestChild].root)); + _bestPlan.reset(new PlanExecutor(_candidates[_bestChild].ws, + _candidates[_bestChild].root)); _bestPlan->setYieldPolicy(_policy); - _alreadyProduced = _candidates[bestChild].results; - _bestSolution.reset(_candidates[bestChild].solution); + _alreadyProduced = _candidates[_bestChild].results; + _bestSolution.reset(_candidates[_bestChild].solution); QLOG() << "Winning solution:\n" << _bestSolution->toString() << endl; LOG(2) << "Winning plan: " << getPlanSummary(*_bestSolution); - size_t backupChild = bestChild; + size_t backupChild = _bestChild; if (_bestSolution->hasBlockingStage && (0 == _alreadyProduced.size())) { QLOG() << "Winner has blocking stage, looking for backup plan.\n"; for (size_t i = 0; i < _candidates.size(); ++i) { @@ -377,7 +386,7 @@ namespace mongo { // 2) the winning plan did not actually produce any results, // without hitting EOF. In this case, we have no information to // suggest that this plan is good. - const PlanStageStats* bestStats = ranking->stats.vector()[0]; + const PlanStageStats* bestStats = _ranking->stats.vector()[0]; if (PlanCache::shouldCacheQuery(*_query) && (!_alreadyProduced.empty() || bestStats->common.isEOF)) { Database* db = cc().database(); @@ -411,7 +420,7 @@ namespace mongo { } if (validSolutions) { - cache->add(*_query, solutions, ranking.release()); + cache->add(*_query, solutions, _ranking.release()); } } @@ -422,7 +431,7 @@ namespace mongo { // index into candidates/ranking size_t i = candidateOrder[orderingIndex]; - if (i == bestChild) { continue; } + if (i == _bestChild) { continue; } if (i == backupChild) { continue; } delete _candidates[i].solution; @@ -441,8 +450,6 @@ namespace mongo { } _candidates.clear(); - if (NULL != out) { *out = bestChild; } - return true; } bool MultiPlanRunner::hasBackupPlan() const { diff --git a/src/mongo/db/query/multi_plan_runner.h b/src/mongo/db/query/multi_plan_runner.h index 9142f990eaa..01dbc3091c3 100644 --- a/src/mongo/db/query/multi_plan_runner.h +++ b/src/mongo/db/query/multi_plan_runner.h @@ -92,6 +92,13 @@ namespace mongo { */ bool hasBackupPlan() const; + /** + * Caching the best plan is (currently implemented as) a destructive act so we separate it + * from ranking so that inspection of the winning solution is possible. Also sets a backup + * plan if a backup plan is needed. Exposed for testing. + */ + void cacheBestPlan(); + virtual void saveState(); virtual bool restoreState(); virtual void invalidate(const DiskLoc& dl, InvalidationType type); @@ -164,6 +171,12 @@ namespace mongo { // The query that we're trying to figure out the best solution to. boost::scoped_ptr<CanonicalQuery> _query; + // What's the ranking? Produced by pickBestPlan, consumed by cacheBestPlan. + auto_ptr<PlanRankingDecision> _ranking; + + // What's the best child? Filled out by pickBestPlan, consumed by cacheBestPlan. + size_t _bestChild; + // // Backup plan for sort // diff --git a/src/mongo/db/query/plan_cache.cpp b/src/mongo/db/query/plan_cache.cpp index f192197e200..3c7144b0b14 100644 --- a/src/mongo/db/query/plan_cache.cpp +++ b/src/mongo/db/query/plan_cache.cpp @@ -33,6 +33,7 @@ #include <memory> #include "boost/thread/locks.hpp" #include "mongo/base/owned_pointer_vector.h" +#include "mongo/client/dbclientinterface.h" // For QueryOption_foobar #include "mongo/db/query/plan_ranker.h" #include "mongo/db/query/query_solution.h" #include "mongo/db/query/qlog.h" @@ -82,6 +83,16 @@ namespace mongo { return false; } + // Tailable cursors won't get cached, just turn into collscans. + if (query.getParsed().hasOption(QueryOption_CursorTailable)) { + return false; + } + + // Snapshot is really a hint. + if (query.getParsed().isSnapshot()) { + return false; + } + return true; } @@ -283,7 +294,8 @@ namespace mongo { PlanCache::~PlanCache() { } - Status PlanCache::add(const CanonicalQuery& query, const std::vector<QuerySolution*>& solns, + Status PlanCache::add(const CanonicalQuery& query, + const std::vector<QuerySolution*>& solns, PlanRankingDecision* why) { invariant(why); diff --git a/src/mongo/db/query/query_knobs.cpp b/src/mongo/db/query/query_knobs.cpp index ff71b9a1f00..772fb63a7ed 100644 --- a/src/mongo/db/query/query_knobs.cpp +++ b/src/mongo/db/query/query_knobs.cpp @@ -56,4 +56,6 @@ namespace mongo { MONGO_EXPORT_SERVER_PARAMETER(internalQueryPlannerEnableIndexIntersection, bool, true); + MONGO_EXPORT_SERVER_PARAMETER(internalQueryPlanOrChildrenIndependently, bool, true); + } // namespace mongo diff --git a/src/mongo/db/query/query_knobs.h b/src/mongo/db/query/query_knobs.h index 0b56d38211e..e1271ebcfbb 100644 --- a/src/mongo/db/query/query_knobs.h +++ b/src/mongo/db/query/query_knobs.h @@ -82,4 +82,7 @@ namespace mongo { // How many intersections will the enumerator consider at each AND? extern int internalQueryEnumerationMaxIntersectPerAnd; + // Do we want to plan each child of the OR independently? + extern bool internalQueryPlanOrChildrenIndependently; + } // namespace mongo diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 8f1a950c28c..2cc95c578c1 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -410,8 +410,7 @@ namespace mongo { return Status::OK(); } - // The hint can be $natural: 1. If this happens, output a collscan. It's a weird way of - // saying "table scan for two, please." + // The hint can be $natural: 1. If this happens, output a collscan. if (!query.getParsed().getHint().isEmpty()) { BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural"); if (!natural.eoo()) { diff --git a/src/mongo/db/query/subplan_runner.cpp b/src/mongo/db/query/subplan_runner.cpp new file mode 100644 index 00000000000..26fac18dba0 --- /dev/null +++ b/src/mongo/db/query/subplan_runner.cpp @@ -0,0 +1,413 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/subplan_runner.h" + +#include "mongo/client/dbclientinterface.h" +#include "mongo/db/diskloc.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/query/canonical_query.h" +#include "mongo/db/query/get_runner.h" +#include "mongo/db/query/multi_plan_runner.h" +#include "mongo/db/query/planner_analysis.h" +#include "mongo/db/query/planner_access.h" +#include "mongo/db/query/qlog.h" +#include "mongo/db/query/query_planner.h" +#include "mongo/db/query/stage_builder.h" +#include "mongo/db/query/type_explain.h" + +namespace mongo { + + // static + bool SubplanRunner::canUseSubplanRunner(const CanonicalQuery& query) { + const LiteParsedQuery& lpq = query.getParsed(); + const MatchExpression* expr = query.root(); + + // Only rooted ORs work with the subplan scheme. + if (MatchExpression::OR != expr->matchType()) { + return false; + } + + // Collection scan + // No sort order requested + if (lpq.getSort().isEmpty() && + expr->matchType() == MatchExpression::AND && expr->numChildren() == 0) { + return false; + } + + // Hint provided + if (!lpq.getHint().isEmpty()) { + return false; + } + + // Min provided + // Min queries are a special case of hinted queries. + if (!lpq.getMin().isEmpty()) { + return false; + } + + // Max provided + // Similar to min, max queries are a special case of hinted queries. + if (!lpq.getMax().isEmpty()) { + return false; + } + + // Tailable cursors won't get cached, just turn into collscans. + if (query.getParsed().hasOption(QueryOption_CursorTailable)) { + return false; + } + + // Snapshot is really a hint. + if (query.getParsed().isSnapshot()) { + return false; + } + + return true; + } + + SubplanRunner::SubplanRunner(Collection* collection, + const QueryPlannerParams& params, + CanonicalQuery* cq) + : _state(SubplanRunner::PLANNING), + _collection(collection), + _plannerParams(params), + _query(cq), + _killed(false), + _policy(Runner::YIELD_MANUAL), + _ns(cq->getParsed().ns()) { } + + SubplanRunner::~SubplanRunner() { } + + Runner::RunnerState SubplanRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) { + if (_killed) { + return Runner::RUNNER_DEAD; + } + + if (isEOF()) { return Runner::RUNNER_EOF; } + + if (SubplanRunner::PLANNING == _state) { + // Try to run as sub-plans. + if (runSubplans()) { + // If runSubplans returns true we expect something here. + invariant(_underlyingRunner.get()); + } + else if (!_killed) { + // Couldn't run as subplans so we'll just call normal getRunner. + + Runner* runner; + Status status = getRunnerAlwaysPlan( + _collection, _query.release(), _plannerParams, &runner); + + if (!status.isOK()) { + // We utterly failed. + _killed = true; + + // Propagate the error to the user wrapped in a BSONObj + if (NULL != objOut) { + BSONObjBuilder bob; + bob.append("ok", status.isOK() ? 1.0 : 0.0); + bob.append("code", status.code()); + bob.append("errmsg", status.reason()); + *objOut = bob.obj(); + } + return Runner::RUNNER_ERROR; + } + else { + _underlyingRunner.reset(runner); + _underlyingRunner->setYieldPolicy(_policy); + } + } + + // We can change state when we're either killed or we have an underlying runner. + invariant(_killed || NULL != _underlyingRunner.get()); + _state = SubplanRunner::RUNNING; + } + + if (_killed) { + return Runner::RUNNER_DEAD; + } + + if (isEOF()) { + return Runner::RUNNER_EOF; + } + + // If we're here we should have planned already. + invariant(SubplanRunner::RUNNING == _state); + invariant(_underlyingRunner.get()); + return _underlyingRunner->getNext(objOut, dlOut); + } + + bool SubplanRunner::runSubplans() { + // This is what we annotate with the index selections and then turn into a solution. + auto_ptr<OrMatchExpression> theOr( + static_cast<OrMatchExpression*>(_query->root()->shallowClone())); + + // This is the skeleton of index selections that is inserted into the cache. + auto_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree()); + + // We need this to extract cache-friendly index data from the index assignments. + map<BSONObj, size_t> indexMap; + for (size_t i = 0; i < _plannerParams.indices.size(); ++i) { + const IndexEntry& ie = _plannerParams.indices[i]; + indexMap[ie.keyPattern] = i; + } + + for (size_t i = 0; i < theOr->numChildren(); ++i) { + // Turn the i-th child into its own query. + MatchExpression* orChild = theOr->getChild(i); + CanonicalQuery* orChildCQ; + Status childCQStatus = CanonicalQuery::canonicalize(*_query, + orChild, + &orChildCQ); + if (!childCQStatus.isOK()) { + QLOG() << "Can't canonicalize subchild " << orChild->toString(); + return false; + } + + // Make sure it gets cleaned up. + auto_ptr<CanonicalQuery> safeOrChildCQ(orChildCQ); + + // Plan the i-th child. + vector<QuerySolution*> solutions; + + // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from + // considering any plan that's a collscan. + Status status = QueryPlanner::plan(*safeOrChildCQ, _plannerParams, &solutions); + + if (!status.isOK()) { + QLOG() << "Can't plan for subchild " << orChildCQ->toString(); + return false; + } + + if (0 == solutions.size()) { + // If one child doesn't have an indexed solution, bail out. + QLOG() << "No solutions for subchild " << orChildCQ->toString(); + return false; + } + else if (1 == solutions.size()) { + auto_ptr<QuerySolution> autoSoln(solutions[0]); + + // We want a well-formed *indexed* solution. + if (NULL == autoSoln->cacheData.get()) { + // For example, we don't cache things for 2d indices. + QLOG() << "No cache data for subchild " << orChildCQ->toString(); + return false; + } + + if (SolutionCacheData::USE_INDEX_TAGS_SOLN != autoSoln->cacheData->solnType) { + QLOG() << "No indexed cache data for subchild " << orChildCQ->toString(); + return false; + } + + // Add the index assignments to our original query. + Status tagStatus = QueryPlanner::tagAccordingToCache( + orChild, autoSoln->cacheData->tree.get(), indexMap); + + if (!tagStatus.isOK()) { + QLOG() << "Failed to extract indices from subchild" << orChildCQ->toString(); + return false; + } + + // Add the child's cache data to the cache data we're creating for the main query. + cacheData->children.push_back(autoSoln->cacheData->tree->clone()); + } + else { + // N solutions, rank them. Takes ownership of safeOrChildCQ. + MultiPlanRunner* mpr = new MultiPlanRunner(_collection, safeOrChildCQ.release()); + + // Dump all the solutions into the MPR. + for (size_t i = 0; i < solutions.size(); ++i) { + WorkingSet* ws; + PlanStage* root; + verify(StageBuilder::build(*solutions[i], &root, &ws)); + // Takes ownership of all arguments. + mpr->addPlan(solutions[i], root, ws); + } + + // If we're allowed to yield, let the MPR know. + mpr->setYieldPolicy(_policy); + + // Calling pickBestPlan can yield so we must propagate events down to the MPR. + _underlyingRunner.reset(mpr); + + // Pull out the best plan. + size_t bestPlan; + BSONObj errorObj; + if (!mpr->pickBestPlan(&bestPlan, &errorObj)) { + QLOG() << "Failed to pick best plan for subchild " << orChildCQ->toString() + << " error obj is " << errorObj.toString(); + return false; + } + + // pickBestPlan can yield. Make sure we're not dead any which way. + if (_killed) { + QLOG() << "Killed while picking best plan for subchild " + << orChildCQ->toString(); + return false; + } + + // Add the index assignments to our original query. + Status tagStatus = QueryPlanner::tagAccordingToCache( + orChild, solutions[bestPlan]->cacheData->tree.get(), indexMap); + + if (!tagStatus.isOK()) { + QLOG() << "Failed to extract indices from subchild" << orChildCQ->toString(); + return false; + } + + cacheData->children.push_back(solutions[bestPlan]->cacheData->tree->clone()); + } + } + + // Must do this before using the planner functionality. + sortUsingTags(theOr.get()); + + // Use the cached index assignments to build solnRoot. Takes ownership of 'theOr' + QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess( + *_query, theOr.release(), false, _plannerParams.indices); + + if (NULL == solnRoot) { + QLOG() << "Failed to build indexed data path for subplanned query"; + return false; + } + + // Takes ownership of 'solnRoot' + QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*_query, + _plannerParams, + solnRoot); + + if (NULL == soln) { + QLOG() << "Failed analyze subplanned query"; + return false; + } + + // We want our franken-solution to be cached. + SolutionCacheData* scd = new SolutionCacheData(); + scd->tree.reset(cacheData.release()); + soln->cacheData.reset(scd); + + // We use one of these even if there is one plan. We do this so that the entry is cached + // with stats obtained in the same fashion as a competitive ranking would have obtained + // them. + MultiPlanRunner* mpr = new MultiPlanRunner(_collection, _query.release()); + WorkingSet* ws; + PlanStage* root; + verify(StageBuilder::build(*soln, &root, &ws)); + // Takes ownership of all arguments. + mpr->addPlan(soln, root, ws); + + mpr->setYieldPolicy(_policy); + _underlyingRunner.reset(mpr); + + return true; + } + + bool SubplanRunner::isEOF() { + if (_killed) { + return true; + } + + // If we're still planning we're not done yet. + if (SubplanRunner::PLANNING == _state) { + return false; + } + + // If we're running we best have a runner. + invariant(_underlyingRunner.get()); + return _underlyingRunner->isEOF(); + } + + void SubplanRunner::saveState() { + if (_killed) { + return; + } + + // We're ranking a sub-plan via an MPR or we're streaming results from this Runner. Either + // way, pass on the request. + if (NULL != _underlyingRunner.get()) { + _underlyingRunner->saveState(); + } + } + + bool SubplanRunner::restoreState() { + if (_killed) { + return false; + } + + // We're ranking a sub-plan via an MPR or we're streaming results from this Runner. Either + // way, pass on the request. + if (NULL != _underlyingRunner.get()) { + return _underlyingRunner->restoreState(); + } + + return true; + } + + void SubplanRunner::setYieldPolicy(Runner::YieldPolicy policy) { + if (_killed) { return; } + + // If somebody sets this before calling work() we need to know how to set it in our subquery + // runners. + _policy = policy; + + if (NULL != _underlyingRunner.get()) { + _underlyingRunner->setYieldPolicy(policy); + } + } + + void SubplanRunner::invalidate(const DiskLoc& dl, InvalidationType type) { + if (_killed) { return; } + + if (NULL != _underlyingRunner.get()) { + _underlyingRunner->invalidate(dl, type); + } + } + + const std::string& SubplanRunner::ns() { + return _ns; + } + + void SubplanRunner::kill() { + _killed = true; + + if (NULL != _underlyingRunner.get()) { + _underlyingRunner->kill(); + } + } + + Status SubplanRunner::getInfo(TypeExplain** explain, PlanInfo** planInfo) const { + if (SubplanRunner::RUNNING == _state) { + invariant(_underlyingRunner.get()); + return _underlyingRunner->getInfo(explain, planInfo); + } + else { + return Status(ErrorCodes::BadValue, "no sub-plan to defer getInfo to"); + } + } + +} // namespace mongo diff --git a/src/mongo/db/query/subplan_runner.h b/src/mongo/db/query/subplan_runner.h new file mode 100644 index 00000000000..6b6fe0d8401 --- /dev/null +++ b/src/mongo/db/query/subplan_runner.h @@ -0,0 +1,104 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <boost/scoped_ptr.hpp> +#include <string> + +#include "mongo/base/status.h" +#include "mongo/db/query/runner.h" +#include "mongo/db/query/query_planner_params.h" + +namespace mongo { + + class BSONObj; + class CanonicalQuery; + class DiskLoc; + class TypeExplain; + struct PlanInfo; + + class SubplanRunner : public Runner { + public: + SubplanRunner(Collection* collection, + const QueryPlannerParams& params, + CanonicalQuery* cq); + + static bool canUseSubplanRunner(const CanonicalQuery& query); + + virtual ~SubplanRunner(); + + virtual Runner::RunnerState getNext(BSONObj* objOut, DiskLoc* dlOut); + + virtual bool isEOF(); + + virtual void saveState(); + + virtual bool restoreState(); + + virtual void setYieldPolicy(Runner::YieldPolicy policy); + + virtual void invalidate(const DiskLoc& dl, InvalidationType type); + + virtual const std::string& ns(); + + virtual void kill(); + + virtual const Collection* collection() { + return _collection; + } + + virtual Status getInfo(TypeExplain** explain, + PlanInfo** planInfo) const; + + private: + bool runSubplans(); + + enum SubplanRunnerState { + PLANNING, + RUNNING, + }; + + SubplanRunnerState _state; + + Collection* _collection; + + QueryPlannerParams _plannerParams; + + std::auto_ptr<CanonicalQuery> _query; + + bool _killed; + + Runner::YieldPolicy _policy; + + boost::scoped_ptr<Runner> _underlyingRunner; + + std::string _ns; + }; + +} // namespace mongo diff --git a/src/mongo/dbtests/plan_ranking.cpp b/src/mongo/dbtests/plan_ranking.cpp index 0c85cfbdfac..991ffc6d316 100644 --- a/src/mongo/dbtests/plan_ranking.cpp +++ b/src/mongo/dbtests/plan_ranking.cpp @@ -121,6 +121,8 @@ namespace PlanRankingTests { BSONObj unused; ASSERT(_mpr->pickBestPlan(&bestPlan, &unused)); ASSERT_LESS_THAN(bestPlan, solutions.size()); + // This is what sets a backup plan, should we test for it. + _mpr->cacheBestPlan(); return solutions[bestPlan]; } |