diff options
Diffstat (limited to 'src/mongo/db/query/query_planner.cpp')
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 319 |
1 files changed, 125 insertions, 194 deletions
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 73eb55db257..4d5eac4ab53 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -28,72 +28,41 @@ #include "mongo/db/query/query_planner.h" +#include <map> +#include <set> +#include <stack> +#include <vector> + // For QueryOption_foobar #include "mongo/client/dbclientinterface.h" #include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_parser.h" #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/index_bounds_builder.h" +#include "mongo/db/query/index_tag.h" +#include "mongo/db/query/plan_enumerator.h" +#include "mongo/db/query/predicate_map.h" #include "mongo/db/query/query_solution.h" namespace mongo { /** - * Describes an index that could be used to answer a predicate. - */ - struct RelevantIndex { - enum Relevance { - // Is the index prefixed by the predicate's field? If so it can be used. - FIRST, - - // Is the predicate's field in the index but not as a prefix? If so, the index might be - // able to be used, if there is another predicate that prefixes the index. - NOT_FIRST, - }; - - RelevantIndex(int i, Relevance r) : index(i), relevance(r) { } - - // What index is relevant? - int index; - - // How useful is it? - Relevance relevance; - - // To allow insertion into a set and sorting by something. - bool operator<(const RelevantIndex& other) const { - // We're only ever comparing these inside of a predicate. A predicate should only be - // tagged for an index once. This of course assumes that values are only indexed once - // in an index. - verify(other.index != index); - return index < other.index; - } - }; - - /** - * Caches information about the predicates we're trying to plan for. - */ - struct PredicateInfo { - PredicateInfo(MatchExpression::MatchType t) : type(t) { } - - // The type of the predicate. See db/matcher/expression.h - MatchExpression::MatchType type; - - // Any relevant indices. Ordered by index no. - set<RelevantIndex> relevant; - }; - - // This is a multimap because the same field name can easily appear more than once in a query. - typedef multimap<string, PredicateInfo> PredicateMap; - - /** * Scan the parse tree, adding all predicates to the provided map. */ - void makePredicateMap(const MatchExpression* node, PredicateMap* out) { + void makePredicateMap(MatchExpression* node, PredicateMap* out) { StringData path = node->path(); + // If we've seen this path before, link 'node' into that bunch. + // Otherwise, create a new entry <path, node> in the predicate map. if (!path.empty()) { - // XXX: make sure the (path, pred) pair isn't in there already? - out->insert(std::make_pair(path.toString(), PredicateInfo(node->matchType()))); + PredicateMap::iterator it = out->lower_bound(path.toString()); + if (it != out->end()) { + vector<MatchExpression*>& nodes = it->second.nodes; + nodes.push_back(node); + } + else { + out->insert(std::make_pair(path.toString(), node)); + } } // XXX XXX XXX XXX @@ -150,8 +119,15 @@ namespace mongo { } bool hasPredicate(const PredicateMap& pm, MatchExpression::MatchType type) { - for (PredicateMap::const_iterator i = pm.begin(); i != pm.end(); ++i) { - if (i->second.type == type) { return true; } + for (PredicateMap::const_iterator itMap = pm.begin(); itMap != pm.end(); ++itMap) { + const vector<MatchExpression*>& nodes = itMap->second.nodes; + for (vector<MatchExpression*>::const_iterator itVec = nodes.begin(); + itVec != nodes.end(); + ++itVec) { + if ((*itVec)->matchType() == type) { + return true; + } + } } return false; } @@ -181,147 +157,98 @@ namespace mongo { return soln.release(); } - /** - * Provides elements from the power set of possible indices to use. Uses the available - * predicate information to make better decisions about what indices are best. - * - * TODO: Use stats about indices. - * TODO: Use predicate information. - */ - class PlanEnumerator { - public: - class OutputTag : public MatchExpression::TagData { - public: - OutputTag(size_t i) : index(i) { } - virtual ~OutputTag() { } - virtual void debugString(StringBuilder* builder) const { - *builder << " || Selected Index #" << index; - } - // What index should we try to use for this leaf? - size_t index; - }; - - /** - * Internal tag used to explore what indices to use for a leaf node. - */ - class EnumeratorTag : public MatchExpression::TagData { - public: - EnumeratorTag(const PredicateInfo* p) : pred(p), nextIndexToUse(0) { } - - virtual ~EnumeratorTag() { } - - virtual void debugString(StringBuilder* builder) const { - *builder << " || Relevant Indices:"; - - for (set<RelevantIndex>::const_iterator it = pred->relevant.begin(); - it != pred->relevant.end(); ++it) { - - *builder << " #" << it->index << " "; - if (RelevantIndex::FIRST == it->relevance) { - *builder << "[prefix]"; - } - else { - verify(RelevantIndex::NOT_FIRST == it->relevance); - *builder << "[not-prefix]"; - } - } - } - - // Not owned here. - const PredicateInfo* pred; - size_t nextIndexToUse; - }; - - // TODO: This is inefficient. We could create the tagged tree as part of the PredicateMap - // construction. - void tag(MatchExpression* node) { - StringData path = node->path(); + // TODO: Document when this settles + struct FilterInfo { + MatchExpression* filterNode; + size_t currChild; + FilterInfo(MatchExpression* f, int c) : filterNode(f), currChild(c) {} + }; - if (!path.empty()) { + // TODO: Document when this settles + QuerySolution* makeIndexedPath(const CanonicalQuery& query, + const vector<BSONObj>& indexKeyPatterns, + MatchExpression* filter) { - for (PredicateMap::const_iterator it = _pm.find(path.toString()); _pm.end() != it; - ++it) { + auto_ptr<QuerySolution> soln(new QuerySolution()); - if (it->second.type == node->matchType()) { - EnumeratorTag* td = new EnumeratorTag(&it->second); - node->setTag(td); - _taggedLeaves.push_back(node); - break; - } + // We'll build a tree of solutions nodes as we traverse the filter. For + // now, we're not generating multi-index solutions, so we can hold on to a + // single node here. + IndexScanNode* isn = new IndexScanNode(); + + // We'll do a post order traversal of the filter, which is a n-ary tree. We descend the + // tree keeping track of which child is next to visit. + std::stack<FilterInfo> filterNodes; + FilterInfo rootFilter(filter, 0); + filterNodes.push(rootFilter); + + while (!filterNodes.empty()) { + + FilterInfo& fi = filterNodes.top(); + MatchExpression* filterNode = fi.filterNode; + size_t& currChild = fi.currChild; + if (filterNode->numChildren() == currChild) { + + // Visit leaf or node. If we find an index tag, we compute the bounds and + // fill up the index information on the current IXScan node. + IndexTag* indexTag = static_cast<IndexTag*>(filterNode->getTag()); + bool exactBounds = false; + if (indexTag != NULL) { + + isn->indexKeyPattern = indexKeyPatterns[indexTag->index]; + + // We assume that this is in the same direction of the index. We may later + // change our minds if the query requires sorting in the opposite + // direction. But, right now, the direction of traversal is not in + // question. + isn->direction = 1; + + // TODO: handle combining oils if this is not the first predicate we'eve + // seen for this field. + // TODO: handle compound indexes + OrderedIntervalList oil(filterNode->path().toString()); + IndexBoundsBuilder::translate( + static_cast<LeafMatchExpression*>(filterNode), + isn->indexKeyPattern.firstElement(), + &oil, + &exactBounds); + + // TODO: union or intersect oils + // It can be the case that this is not the first "oil" for a given + // field. That is, there are several predicates agains a same field which + // happens to have a useful index. In that case, we may need to combine + // oils in a "larger" bound to accomodate all the predicates. + vector<OrderedIntervalList>& fields = isn->bounds.fields; + fields.push_back(oil); } - } - // XXX XXX XXX XXX - // XXX Do we do this if the node is logical, or if it's array, or both? - // XXX XXX XXX XXX - for (size_t i = 0; i < node->numChildren(); ++i) { - tag(const_cast<MatchExpression*>(node->getChild(i))); + // TODO: possibly trim redundant MatchExpressions nodes + // It can be the case that the bounds for a query will only return exact + // results (as opposed to just limiting the index range in which the results + // lie). When results are exact, we don't need to retest them and thus such + // nodes can be eliminated from the filter. Note that some non-leaf nodes + // (e.g. and's, or's) may be left with single children and can therefore be + // eliminated as well. + isn->filter = filterNode; + + // TODO: Multiple IXScans nodes in a solution + // If this is not the first index tag found, then we add another IXScan node to + // the queryNodes stack and start using $or and $ands in the filter expession + // to tie the IXScan together into a tree of QuerySolutionNode's. + + filterNodes.pop(); } - } - - vector<MatchExpression*> _taggedLeaves; - - /** - * Does not take ownership of any arguments. They must outlive any calls to getNext(...). - */ - PlanEnumerator(const CanonicalQuery* cq, const PredicateMap* pm, - const vector<BSONObj>* indices) - : _cq(*cq), _pm(*pm), _indices(*indices) { - - // Copy the query tree. - // TODO: have a MatchExpression::copy function(?) - StatusWithMatchExpression swme = MatchExpressionParser::parse(cq->getQueryObj()); - verify(swme.isOK()); - _taggedTree.reset(swme.getValue()); - - // Walk the query tree and tag with possible indices - tag(_taggedTree.get()); - - for (size_t i = 0; i < indices->size(); ++i) { - cout << "Index #" << i << ": " << (*indices)[i].toString() << endl; + else { + // Continue the traversal. + FilterInfo fiChild(filterNode->getChild(currChild), 0); + filterNodes.push(fiChild); + currChild++; } - - cout << "Tagged tree: " << _taggedTree->toString() << endl; - } - - /** - * Outputs a possible plan. Leaves in the plan are tagged with an index to use. - * Returns true if a plan was outputted, false if no more plans will be outputted. - * - * 'tree' is set to point to the query tree. A QuerySolution is built from this tree. - * Caller owns the pointer. Note that 'tree' itself points into data owned by the provided - * CanonicalQuery. - * - * Nodes in 'tree' are tagged with indices that should be used to answer the tagged nodes. - * Only nodes that have a field name (isLogical() == false) will be tagged. - */ - bool getNext(MatchExpression** tree) { - // TODO: ALBERTO - - // Clone tree - // MatchExpression* ret = _taggedTree->shallowClone(); - - // Walk over cloned tree and tagged tree. Select indices out of tagged tree and mark - // the cloned tree. - - // *tree = ret; - // return true; - - return false; } - private: - // Not owned by us. - const CanonicalQuery& _cq; - - // Not owned by us. - const PredicateMap& _pm; - - // Not owned by us. - const vector<BSONObj>& _indices; - - scoped_ptr<MatchExpression> _taggedTree; - }; + soln->root.reset(isn); + return soln.release(); + } // static void QueryPlanner::plan(const CanonicalQuery& query, const vector<BSONObj>& indexKeyPatterns, @@ -367,8 +294,11 @@ namespace mongo { vector<BSONObj> relevantIndices; findRelevantIndices(predicates, indexKeyPatterns, &relevantIndices); - // No indices, no work to do. - if (0 == relevantIndices.size()) { return; } + // No indices, no work to do other than emitting the collection scan plan. + if (0 == relevantIndices.size()) { + out->push_back(makeCollectionScan(query, false)); + return; + } // Figure out how useful each index is to each predicate. rateIndices(relevantIndices, &predicates); @@ -377,22 +307,24 @@ namespace mongo { // Planner Section 2: Use predicate/index data to output sets of indices that we can use. // - PlanEnumerator isp(&query, &predicates, &relevantIndices); + PlanEnumerator isp(query.root(), &predicates, &relevantIndices); + isp.init(); MatchExpression* rawTree; while (isp.getNext(&rawTree)) { - QuerySolutionNode* solutionRoot = NULL; // // Planner Section 3: Logical Rewrite. Use the index selection and the tree structure // to try to rewrite the tree. TODO: Do this for real. We treat the tree as static. // + QuerySolution* soln = makeIndexedPath(query, indexKeyPatterns, rawTree); // // Planner Section 4: Covering. If we're projecting, See if we get any covering from // this plan. If not, add a fetch. // + if (!query.getParsed().getProj().isEmpty()) { warning() << "Can't deal with proj yet" << endl; } @@ -401,9 +333,10 @@ namespace mongo { } // - // Planner Section 5: Sort. If we're sorting, see if the plan gives us a sort for free. - // If not, add a sort. + // Planner Section 5: Sort. If we're sorting, see if the plan gives us a sort for + // free. If not, add a sort. // + if (!query.getParsed().getSort().isEmpty()) { } else { @@ -416,10 +349,8 @@ namespace mongo { // TODO: Validate. // - if (NULL != solutionRoot) { - QuerySolution* qs = new QuerySolution(); - qs->root.reset(solutionRoot); - out->push_back(qs); + if (NULL != soln) { + out->push_back(soln); } } |