summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2014-05-22 11:37:43 -0400
committerDan Pasette <dan@mongodb.com>2014-06-01 10:28:56 -0400
commit3f02f1a6a157eda94f8d10edbeb4148a11a4bdc1 (patch)
treec773bea5de7c430b2ed2342293a802e2c79e578a
parent2eb48801e124665a72347302534a219edb8ed674 (diff)
downloadmongo-3f02f1a6a157eda94f8d10edbeb4148a11a4bdc1.tar.gz
SERVER-13960 fix access planning for OR with inexact child predicates
Includes a refactor of QueryPlannerAccess::processIndexScans(...) which makes this change saner. (cherry picked from commit bee249ac8907cc9de6b19ba87c3fcb074d84b1a3)
-rw-r--r--src/mongo/db/query/index_bounds_builder.h19
-rw-r--r--src/mongo/db/query/planner_access.cpp632
-rw-r--r--src/mongo/db/query/planner_access.h173
-rw-r--r--src/mongo/db/query/query_planner_test.cpp35
4 files changed, 522 insertions, 337 deletions
diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h
index 3fa80b40b6f..56ac49d947d 100644
--- a/src/mongo/db/query/index_bounds_builder.h
+++ b/src/mongo/db/query/index_bounds_builder.h
@@ -41,15 +41,24 @@ namespace mongo {
*/
class IndexBoundsBuilder {
public:
+ /**
+ * Describes various degrees of precision with which predicates can be evaluated based
+ * on the index bounds.
+ *
+ * The integer values of the enum are significant, and are assigned in order of
+ * increasing tightness. These values are used when we need to do comparison between two
+ * BoundsTightness values. Such comparisons can answer questions such as "Does predicate
+ * X have tighter or looser bounds than predicate Y?".
+ */
enum BoundsTightness {
- // Index bounds are exact.
- EXACT,
+ // Index bounds are inexact, but no fetch is required
+ INEXACT_COVERED = 0,
// Index bounds are inexact, and a fetch is required.
- INEXACT_FETCH,
+ INEXACT_FETCH = 1,
- // Index bounds are inexact, but no fetch is required
- INEXACT_COVERED
+ // Index bounds are exact.
+ EXACT = 2
};
/**
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 413860ac396..3c2d1efdffe 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -173,14 +173,24 @@ namespace mongo {
}
bool QueryPlannerAccess::shouldMergeWithLeaf(const MatchExpression* expr,
- const IndexEntry& index,
- size_t pos,
- QuerySolutionNode* node,
- MatchExpression::MatchType mergeType) {
+ const ScanBuildingState& scanState) {
+ const QuerySolutionNode* node = scanState.currentScan.get();
if (NULL == node || NULL == expr) {
return false;
}
+ if (NULL == scanState.ixtag) {
+ return false;
+ }
+
+ if (scanState.currentIndexNumber != scanState.ixtag->index) {
+ return false;
+ }
+
+ size_t pos = scanState.ixtag->pos;
+ const IndexEntry& index = scanState.indices[scanState.currentIndexNumber];
+ const MatchExpression::MatchType mergeType = scanState.root->matchType();
+
const StageType type = node->getType();
verify(STAGE_GEO_NEAR_2D != type);
@@ -222,8 +232,8 @@ namespace mongo {
//
invariant(type == STAGE_IXSCAN);
- IndexScanNode* scan = static_cast<IndexScanNode*>(node);
- IndexBounds* boundsToFillOut = &scan->bounds;
+ const IndexScanNode* scan = static_cast<const IndexScanNode*>(node);
+ const IndexBounds* boundsToFillOut = &scan->bounds;
if (boundsToFillOut->fields[pos].name.empty()) {
// The bounds will be compounded. This is OK because the
@@ -246,24 +256,26 @@ namespace mongo {
}
void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr,
- const IndexEntry& index,
- size_t pos,
- IndexBoundsBuilder::BoundsTightness* tightnessOut,
- QuerySolutionNode* node,
- MatchExpression::MatchType mergeType) {
+ ScanBuildingState* scanState) {
+ QuerySolutionNode* node = scanState->currentScan.get();
+ invariant(NULL != node);
+
+ const MatchExpression::MatchType mergeType = scanState->root->matchType();
+ size_t pos = scanState->ixtag->pos;
+ const IndexEntry& index = scanState->indices[scanState->currentIndexNumber];
const StageType type = node->getType();
verify(STAGE_GEO_NEAR_2D != type);
if (STAGE_GEO_2D == type) {
- *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+ scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH;
return;
}
// Text data is covered, but not exactly. Text covering is unlike any other covering
- // so we deal with it in _addFilterToSolutionNode.
+ // so we deal with it in addFilterToSolutionNode.
if (STAGE_TEXT == type) {
- *tightnessOut = IndexBoundsBuilder::INEXACT_COVERED;
+ scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED;
return;
}
@@ -288,22 +300,24 @@ namespace mongo {
keyElt = it.next();
}
verify(!keyElt.eoo());
- *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+ scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH;
verify(boundsToFillOut->fields.size() > pos);
OrderedIntervalList* oil = &boundsToFillOut->fields[pos];
if (boundsToFillOut->fields[pos].name.empty()) {
- IndexBoundsBuilder::translate(expr, keyElt, index, oil, tightnessOut);
+ IndexBoundsBuilder::translate(expr, keyElt, index, oil, &scanState->tightness);
}
else {
if (MatchExpression::AND == mergeType) {
- IndexBoundsBuilder::translateAndIntersect(expr, keyElt, index, oil, tightnessOut);
+ IndexBoundsBuilder::translateAndIntersect(expr, keyElt, index, oil,
+ &scanState->tightness);
}
else {
verify(MatchExpression::OR == mergeType);
- IndexBoundsBuilder::translateAndUnion(expr, keyElt, index, oil, tightnessOut);
+ IndexBoundsBuilder::translateAndUnion(expr, keyElt, index, oil,
+ &scanState->tightness);
}
}
}
@@ -408,6 +422,42 @@ namespace mongo {
}
// static
+ void QueryPlannerAccess::finishAndOutputLeaf(ScanBuildingState* scanState,
+ vector<QuerySolutionNode*>* out) {
+ finishLeafNode(scanState->currentScan.get(),
+ scanState->indices[scanState->currentIndexNumber]);
+
+ if (MatchExpression::OR == scanState->root->matchType()) {
+ if (scanState->loosestBounds == IndexBoundsBuilder::INEXACT_FETCH) {
+ // This is an $or where at least one predicate has inexact bounds. In this
+ // case we must add a fetch node above the index scan whose filter includes
+ // *all* of the predicates used to generate the ixscan.
+ FetchNode* fetch = new FetchNode();
+ // Takes ownership.
+ fetch->filter.reset(scanState->curOr.release());
+ // Takes ownership.
+ fetch->children.push_back(scanState->currentScan.release());
+
+ scanState->currentScan.reset(fetch);
+ }
+ else if (scanState->loosestBounds == IndexBoundsBuilder::INEXACT_COVERED) {
+ // This an OR, at least one of the predicates used to generate 'currentScan'
+ // is inexact covered, but none is inexact fetch. This means that we can put
+ // these predicates, joined by an $or, as filters on the index scan. This avoids
+ // a fetch and allows the predicates to be covered by the index.
+ //
+ // Ex.
+ // Say we have index {a: 1} and query {$or: [{a: /foo/}, {a: /bar/}]}.
+ // The entire query, {$or: [{a: /foo/}, {a: /bar/}]}, should be a filter
+ // in the index scan stage itself.
+ scanState->currentScan->filter.reset(scanState->curOr.release());
+ }
+ }
+
+ out->push_back(scanState->currentScan.release());
+ }
+
+ // static
void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntry& index) {
const StageType type = node->getType();
verify(STAGE_GEO_NEAR_2D != type);
@@ -502,181 +552,37 @@ namespace mongo {
// static
bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
- MatchExpression* root,
- bool inArrayOperator,
- const vector<IndexEntry>& indices,
- vector<QuerySolutionNode*>* out) {
-
- auto_ptr<QuerySolutionNode> currentScan;
- size_t currentIndexNumber = IndexTag::kNoIndex;
- size_t curChild = 0;
-
- // This 'while' processes all IXSCANs, possibly merging scans by combining the bounds. We
- // can merge scans in two cases:
- // 1. Filling out subsequent fields in a compound index.
- // 2. Intersecting bounds. Currently unimplemented.
- while (curChild < root->numChildren()) {
- MatchExpression* child = root->getChild(curChild);
+ MatchExpression* root,
+ bool inArrayOperator,
+ const std::vector<IndexEntry>& indices,
+ std::vector<QuerySolutionNode*>* out) {
+ // Initialize the ScanBuildingState.
+ ScanBuildingState scanState(root, inArrayOperator, indices);
+
+ while (scanState.curChild < root->numChildren()) {
+ MatchExpression* child = root->getChild(scanState.curChild);
// If there is no tag, it's not using an index. We've sorted our children such that the
// children with tags are first, so we stop now.
if (NULL == child->getTag()) { break; }
- IndexTag* ixtag = static_cast<IndexTag*>(child->getTag());
+ scanState.ixtag = static_cast<IndexTag*>(child->getTag());
// If there's a tag it must be valid.
- verify(IndexTag::kNoIndex != ixtag->index);
+ verify(IndexTag::kNoIndex != scanState.ixtag->index);
// If the child can't use an index on its own field (and the child is not a negation
// of a bounds-generating expression), then it's indexed by virtue of one of
// its children having an index.
//
- // If the child is an $elemMatch, we try to merge its child predicates into the
- // current ixscan.
- //
// NOTE: If the child is logical, it could possibly collapse into a single ixscan. we
// ignore this for now.
if (!Indexability::isBoundsGenerating(child)) {
// If we're here, then the child is indexed by virtue of its children.
// In most cases this means that we recursively build indexed data
// access on 'child'.
-
- if (MatchExpression::AND == root->matchType() &&
- MatchExpression::ELEM_MATCH_OBJECT == child->matchType()) {
- // We have an AND with an ELEM_MATCH_OBJECT child. The plan enumerator produces
- // index taggings which indicate that we should try to compound with
- // predicates retrieved from inside the subtree rooted at the ELEM_MATCH.
- // In order to obey the enumerator's tagging, we need to retrieve these
- // predicates from inside the $elemMatch, and try to merge them with
- // the current index scan.
-
- // Contains tagged predicates from inside the tree rooted at 'child'
- // which are logically part of the AND.
- vector<MatchExpression*> emChildren;
-
- // Contains tagged nodes that are not logically part of the AND and
- // cannot use the index directly (e.g. OR nodes which are tagged to
- // be indexed).
- vector<MatchExpression*> emSubnodes;
-
- // Populate 'emChildren' and 'emSubnodes'.
- findElemMatchChildren(child, &emChildren, &emSubnodes);
-
- // Recursively build data access for the nodes inside 'emSubnodes'.
- for (size_t i = 0; i < emSubnodes.size(); ++i) {
- MatchExpression* subnode = emSubnodes[i];
-
- if (!Indexability::isBoundsGenerating(subnode)) {
- // Must pass true for 'inArrayOperator' because the subnode is
- // beneath an ELEM_MATCH_OBJECT.
- QuerySolutionNode* childSolution = buildIndexedDataAccess(query,
- subnode,
- true,
- indices);
-
- // buildIndexedDataAccess(...) returns NULL in error conditions, when
- // it is unable to construct a query solution from a tagged match
- // expression tree. If we are unable to construct a solution according
- // to the instructions from the enumerator, then we bail out early
- // (by returning false) rather than continuing on and potentially
- // constructing an invalid solution tree.
- if (NULL == childSolution) { return false; }
-
- // Output the resulting solution tree.
- out->push_back(childSolution);
- }
- }
-
- // For each predicate in 'emChildren', try to merge it with the
- // current index scan.
- //
- // This loop is identical to the outer loop except for two
- // changes:
- // 1) The OR case is removed. We would never hit the OR case
- // because we've already checked that the matchType of 'root'
- // is AND.
- // 2) We want to leave the entire $elemMatch in place as a
- // child of the parent AND. This way, the calling function
- // will affix the entire $elemMatch expression as a filter
- // above the AND.
- for (size_t i = 0; i < emChildren.size(); ++i) {
- MatchExpression* emChild = emChildren[i];
- invariant(NULL != emChild->getTag());
- IndexTag* innerTag = static_cast<IndexTag*>(emChild->getTag());
-
- if (NULL != currentScan.get() && (currentIndexNumber == innerTag->index) &&
- shouldMergeWithLeaf(emChild, indices[currentIndexNumber], innerTag->pos,
- currentScan.get(), root->matchType())) {
- // The child uses the same index we're currently building a scan for. Merge
- // the bounds and filters.
- verify(currentIndexNumber == innerTag->index);
-
- IndexBoundsBuilder::BoundsTightness tightness = IndexBoundsBuilder::INEXACT_FETCH;
- mergeWithLeafNode(emChild, indices[currentIndexNumber], innerTag->pos, &tightness,
- currentScan.get(), root->matchType());
-
- if (tightness == IndexBoundsBuilder::INEXACT_COVERED
- && !indices[currentIndexNumber].multikey) {
- // Add the filter to the current index scan. This is optional because
- // the entire filter will get affixed to the parent AND. It is here
- // as an optimization---an additional filter during the index scan
- // stage will cause fewer documents to bubble up to the parent node
- // of the execution tree.
- _addFilterToSolutionNode(currentScan.get(), emChild, root->matchType());
- }
- }
- else {
- if (NULL != currentScan.get()) {
- finishLeafNode(currentScan.get(), indices[currentIndexNumber]);
- out->push_back(currentScan.release());
- }
- else {
- verify(IndexTag::kNoIndex == currentIndexNumber);
- }
-
- currentIndexNumber = innerTag->index;
-
- IndexBoundsBuilder::BoundsTightness tightness = IndexBoundsBuilder::INEXACT_FETCH;
- currentScan.reset(makeLeafNode(query, indices[currentIndexNumber], innerTag->pos,
- emChild, &tightness));
-
- if (tightness == IndexBoundsBuilder::INEXACT_COVERED
- && !indices[currentIndexNumber].multikey) {
- // Add the filter to the current index scan. This is optional because
- // the entire filter will get affixed to the parent AND. It is here
- // as an optimization---an additional filter during the index scan
- // stage will cause fewer documents to bubble up to the parent node
- // of the execution tree.
- _addFilterToSolutionNode(currentScan.get(), emChild, root->matchType());
- }
- }
- }
-
- // We're done processing the $elemMatch child. We leave it hanging off
- // it's AND parent so that it will be affixed as a filter later on,
- // and move on to the next child of the AND.
- ++curChild;
- continue;
- }
- else if (!inArrayOperator) {
- // The logical sub-tree is responsible for fully evaluating itself. Any
- // required filters or fetches are already hung on it. As such, we remove the
- // filter branch from our tree. buildIndexedDataAccess takes ownership of the
- // child.
- root->getChildVector()->erase(root->getChildVector()->begin() + curChild);
- // The curChild of today is the curChild+1 of yesterday.
- }
- else {
- ++curChild;
+ if (!processIndexScansSubnode(query, &scanState, out)) {
+ return false;
}
-
- // If inArrayOperator: takes ownership of child, which is OK, since we detached
- // child from root.
- QuerySolutionNode* childSolution = buildIndexedDataAccess(query,
- child,
- inArrayOperator,
- indices);
- if (NULL == childSolution) { return false; }
- out->push_back(childSolution);
continue;
}
@@ -686,8 +592,8 @@ namespace mongo {
// If 'child' is a NOT, then the tag we're interested in is on the NOT's
// child node.
if (MatchExpression::NOT == child->matchType()) {
- ixtag = static_cast<IndexTag*>(child->getChild(0)->getTag());
- invariant(IndexTag::kNoIndex != ixtag->index);
+ scanState.ixtag = static_cast<IndexTag*>(child->getChild(0)->getTag());
+ invariant(IndexTag::kNoIndex != scanState.ixtag->index);
}
// If the child we're looking at uses a different index than the current index scan, add
@@ -709,153 +615,190 @@ namespace mongo {
// complications in the multikey case that have to be obeyed both by the enumerator
// and here as we try to merge predicates into query solution leaves. The hairy
// details of these rules are documented near the top of planner_access.h.
- if (NULL != currentScan.get() && (currentIndexNumber == ixtag->index) &&
- shouldMergeWithLeaf(child, indices[currentIndexNumber], ixtag->pos,
- currentScan.get(), root->matchType())) {
+ if (shouldMergeWithLeaf(child, scanState)) {
// The child uses the same index we're currently building a scan for. Merge
// the bounds and filters.
- verify(currentIndexNumber == ixtag->index);
-
- IndexBoundsBuilder::BoundsTightness tightness = IndexBoundsBuilder::INEXACT_FETCH;
- mergeWithLeafNode(child, indices[currentIndexNumber], ixtag->pos, &tightness,
- currentScan.get(), root->matchType());
-
- if (inArrayOperator) {
- // We're inside an array operator. The entire array operator expression
- // should always be affixed as a filter. We keep 'curChild' in the $and
- // for affixing later.
- ++curChild;
- }
- else if (tightness == IndexBoundsBuilder::EXACT) {
- root->getChildVector()->erase(root->getChildVector()->begin()
- + curChild);
- delete child;
- }
- else if (tightness == IndexBoundsBuilder::INEXACT_COVERED
- && (INDEX_TEXT == indices[currentIndexNumber].type
- || !indices[currentIndexNumber].multikey)) {
- // The bounds are not exact, but the information needed to
- // evaluate the predicate is in the index key. Remove the
- // MatchExpression from its parent and attach it to the filter
- // of the index scan we're building.
- //
- // We can only use this optimization if the index is NOT multikey.
- // Suppose that we had the multikey index {x: 1} and a document
- // {x: ["a", "b"]}. Now if we query for {x: /b/} the filter might
- // ever only be applied to the index key "a". We'd incorrectly
- // conclude that the document does not match the query :( so we
- // gotta stick to non-multikey indices.
- root->getChildVector()->erase(root->getChildVector()->begin()
- + curChild);
-
- _addFilterToSolutionNode(currentScan.get(), child, root->matchType());
- }
- else if (root->matchType() == MatchExpression::OR) {
- // In the AND case, the filter can be brought above the AND node.
- // But in the OR case, the filter only applies to one branch, so
- // we must affix curChild's filter now. In order to apply the filter
- // to the proper OR branch, create a FETCH node with the filter whose
- // child is the IXSCAN.
- finishLeafNode(currentScan.get(), indices[currentIndexNumber]);
- root->getChildVector()->erase(root->getChildVector()->begin()
- + curChild);
-
- FetchNode* fetch = new FetchNode();
- // takes ownership
- fetch->filter.reset(child);
- // takes ownership
- fetch->children.push_back(currentScan.release());
- // takes ownership
- out->push_back(fetch);
-
- currentIndexNumber = IndexTag::kNoIndex;
- }
- else {
- // We keep curChild in the AND for affixing later.
- ++curChild;
- }
+ verify(scanState.currentIndexNumber == scanState.ixtag->index);
+ scanState.tightness = IndexBoundsBuilder::INEXACT_FETCH;
+ mergeWithLeafNode(child, &scanState);
+ handleFilter(&scanState);
}
else {
- if (NULL != currentScan.get()) {
- finishLeafNode(currentScan.get(), indices[currentIndexNumber]);
- out->push_back(currentScan.release());
+ if (NULL != scanState.currentScan.get()) {
+ // Output the current scan before starting to construct a new out.
+ finishAndOutputLeaf(&scanState, out);
}
else {
- verify(IndexTag::kNoIndex == currentIndexNumber);
+ verify(IndexTag::kNoIndex == scanState.currentIndexNumber);
}
- currentIndexNumber = ixtag->index;
+ // Reset state before producing a new leaf.
+ scanState.resetForNextScan(scanState.ixtag);
- IndexBoundsBuilder::BoundsTightness tightness = IndexBoundsBuilder::INEXACT_FETCH;
- currentScan.reset(makeLeafNode(query, indices[currentIndexNumber], ixtag->pos,
- child, &tightness));
+ scanState.currentScan.reset(makeLeafNode(query,
+ indices[scanState.currentIndexNumber],
+ scanState.ixtag->pos, child,
+ &scanState.tightness));
- if (inArrayOperator) {
- // We're inside an array operator. The entire array operator expression
- // should always be affixed as a filter. We keep 'curChild' in the $and
- // for affixing later.
- ++curChild;
+ handleFilter(&scanState);
+ }
+ }
+
+ // Output the scan we're done with, if it exists.
+ if (NULL != scanState.currentScan.get()) {
+ finishAndOutputLeaf(&scanState, out);
+ }
+
+ return true;
+ }
+
+ // static
+ bool QueryPlannerAccess::processIndexScansElemMatch(const CanonicalQuery& query,
+ ScanBuildingState* scanState,
+ std::vector<QuerySolutionNode*>* out) {
+ MatchExpression* root = scanState->root;
+ MatchExpression* child = root->getChild(scanState->curChild);
+ const vector<IndexEntry>& indices = scanState->indices;
+
+ // We have an AND with an ELEM_MATCH_OBJECT child. The plan enumerator produces
+ // index taggings which indicate that we should try to compound with
+ // predicates retrieved from inside the subtree rooted at the ELEM_MATCH.
+ // In order to obey the enumerator's tagging, we need to retrieve these
+ // predicates from inside the $elemMatch, and try to merge them with
+ // the current index scan.
+
+ // Contains tagged predicates from inside the tree rooted at 'child'
+ // which are logically part of the AND.
+ vector<MatchExpression*> emChildren;
+
+ // Contains tagged nodes that are not logically part of the AND and
+ // cannot use the index directly (e.g. OR nodes which are tagged to
+ // be indexed).
+ vector<MatchExpression*> emSubnodes;
+
+ // Populate 'emChildren' and 'emSubnodes'.
+ findElemMatchChildren(child, &emChildren, &emSubnodes);
+
+ // Recursively build data access for the nodes inside 'emSubnodes'.
+ for (size_t i = 0; i < emSubnodes.size(); ++i) {
+ MatchExpression* subnode = emSubnodes[i];
+
+ if (!Indexability::isBoundsGenerating(subnode)) {
+ // Must pass true for 'inArrayOperator' because the subnode is
+ // beneath an ELEM_MATCH_OBJECT.
+ QuerySolutionNode* childSolution = buildIndexedDataAccess(query,
+ subnode,
+ true,
+ indices);
+
+ // buildIndexedDataAccess(...) returns NULL in error conditions, when
+ // it is unable to construct a query solution from a tagged match
+ // expression tree. If we are unable to construct a solution according
+ // to the instructions from the enumerator, then we bail out early
+ // (by returning false) rather than continuing on and potentially
+ // constructing an invalid solution tree.
+ if (NULL == childSolution) { return false; }
+
+ // Output the resulting solution tree.
+ out->push_back(childSolution);
+ }
+ }
+
+ // For each predicate in 'emChildren', try to merge it with the current index scan.
+ //
+ // This loop is similar to that in processIndexScans(...), except it does not call into
+ // handleFilters(...). Instead, we leave the entire $elemMatch filter intact. This way,
+ // the complete $elemMatch expression will be affixed as a filter later on.
+ for (size_t i = 0; i < emChildren.size(); ++i) {
+ MatchExpression* emChild = emChildren[i];
+ invariant(NULL != emChild->getTag());
+ scanState->ixtag = static_cast<IndexTag*>(emChild->getTag());
+
+ if (shouldMergeWithLeaf(emChild, *scanState)) {
+ // The child uses the same index we're currently building a scan for. Merge
+ // the bounds and filters.
+ verify(scanState->currentIndexNumber == scanState->ixtag->index);
+
+ scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH;
+ mergeWithLeafNode(emChild, scanState);
+
+ if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED
+ && !indices[scanState->currentIndexNumber].multikey) {
+ // Add the filter to the current index scan. This is optional because
+ // the entire filter will get affixed to the parent AND. It is here
+ // as an optimization---an additional filter during the index scan
+ // stage will cause fewer documents to bubble up to the parent node
+ // of the execution tree.
+ addFilterToSolutionNode(scanState->currentScan.get(), emChild, root->matchType());
}
- else if (tightness == IndexBoundsBuilder::EXACT) {
- // The bounds answer the predicate, and we can remove the expression from the
- // root. NOTE(opt): Erasing entry 0, 1, 2, ... could be kind of n^2, maybe
- // optimize later.
- root->getChildVector()->erase(root->getChildVector()->begin()
- + curChild);
- delete child;
- // Don't increment curChild.
+ }
+ else {
+ if (NULL != scanState->currentScan.get()) {
+ finishAndOutputLeaf(scanState, out);
}
- else if (tightness == IndexBoundsBuilder::INEXACT_COVERED
- && !indices[currentIndexNumber].multikey) {
- // The bounds are not exact, but the information needed to
- // evaluate the predicate is in the index key. Remove the
- // MatchExpression from its parent and attach it to the filter
- // of the index scan we're building.
- //
- // We can only use this optimization if the index is NOT multikey.
- // Suppose that we had the multikey index {x: 1} and a document
- // {x: ["a", "b"]}. Now if we query for {x: /b/} the filter might
- // ever only be applied to the index key "a". We'd incorrectly
- // conclude that the document does not match the query :( so we
- // gotta stick to non-multikey indices.
- root->getChildVector()->erase(root->getChildVector()->begin()
- + curChild);
-
- _addFilterToSolutionNode(currentScan.get(), child, root->matchType());
+ else {
+ verify(IndexTag::kNoIndex == scanState->currentIndexNumber);
}
- else if (root->matchType() == MatchExpression::OR) {
- // In the AND case, the filter can be brought above the AND node.
- // But in the OR case, the filter only applies to one branch, so
- // we must affix curChild's filter now. In order to apply the filter
- // to the proper OR branch, create a FETCH node with the filter whose
- // child is the IXSCAN.
- finishLeafNode(currentScan.get(), indices[currentIndexNumber]);
- root->getChildVector()->erase(root->getChildVector()->begin()
- + curChild);
- FetchNode* fetch = new FetchNode();
- // takes ownership
- fetch->filter.reset(child);
- // takes ownership
- fetch->children.push_back(currentScan.release());
- // takes ownership
- out->push_back(fetch);
-
- currentIndexNumber = IndexTag::kNoIndex;
- }
- else {
- // We keep curChild in the AND for affixing later as a filter.
- ++curChild;
+ scanState->currentIndexNumber = scanState->ixtag->index;
+
+ scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH;
+ scanState->currentScan.reset(makeLeafNode(query, indices[scanState->currentIndexNumber],
+ scanState->ixtag->pos,
+ emChild, &scanState->tightness));
+
+ if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED
+ && !indices[scanState->currentIndexNumber].multikey) {
+ // Add the filter to the current index scan. This is optional because
+ // the entire filter will get affixed to the parent AND. It is here
+ // as an optimization---an additional filter during the index scan
+ // stage will cause fewer documents to bubble up to the parent node
+ // of the execution tree.
+ addFilterToSolutionNode(scanState->currentScan.get(), emChild, root->matchType());
}
}
}
- // Output the scan we're done with, if it exists.
- if (NULL != currentScan.get()) {
- finishLeafNode(currentScan.get(), indices[currentIndexNumber]);
- out->push_back(currentScan.release());
+ // We're done processing the $elemMatch child. We leave it hanging off
+ // it's AND parent so that it will be affixed as a filter later on,
+ // and move on to the next child of the AND.
+ ++scanState->curChild;
+ return true;
+ }
+
+ // static
+ bool QueryPlannerAccess::processIndexScansSubnode(const CanonicalQuery& query,
+ ScanBuildingState* scanState,
+ std::vector<QuerySolutionNode*>* out) {
+ MatchExpression* root = scanState->root;
+ MatchExpression* child = root->getChild(scanState->curChild);
+ const vector<IndexEntry>& indices = scanState->indices;
+ bool inArrayOperator = scanState->inArrayOperator;
+
+ if (MatchExpression::AND == root->matchType() &&
+ MatchExpression::ELEM_MATCH_OBJECT == child->matchType()) {
+ return processIndexScansElemMatch(query, scanState, out);
+ }
+ else if (!inArrayOperator) {
+ // The logical sub-tree is responsible for fully evaluating itself. Any
+ // required filters or fetches are already hung on it. As such, we remove the
+ // filter branch from our tree. buildIndexedDataAccess takes ownership of the
+ // child.
+ root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
+ // The curChild of today is the curChild+1 of yesterday.
+ }
+ else {
+ ++scanState->curChild;
}
+ // If inArrayOperator: takes ownership of child, which is OK, since we detached
+ // child from root.
+ QuerySolutionNode* childSolution = buildIndexedDataAccess(query,
+ child,
+ inArrayOperator,
+ indices);
+ if (NULL == childSolution) { return false; }
+ out->push_back(childSolution);
return true;
}
@@ -1216,9 +1159,9 @@ namespace mongo {
}
// static
- void QueryPlannerAccess::_addFilterToSolutionNode(QuerySolutionNode* node,
- MatchExpression* match,
- MatchExpression::MatchType type) {
+ void QueryPlannerAccess::addFilterToSolutionNode(QuerySolutionNode* node,
+ MatchExpression* match,
+ MatchExpression::MatchType type) {
if (NULL == node->filter) {
node->filter.reset(match);
}
@@ -1248,6 +1191,81 @@ namespace mongo {
}
}
+ // static
+ void QueryPlannerAccess::handleFilter(ScanBuildingState* scanState) {
+ if (MatchExpression::OR == scanState->root->matchType()) {
+ handleFilterOr(scanState);
+ }
+ else if (MatchExpression::AND == scanState->root->matchType()) {
+ handleFilterAnd(scanState);
+ }
+ else {
+ // We must be building leaves for either and AND or an OR.
+ invariant(0);
+ }
+ }
+
+ // static
+ void QueryPlannerAccess::handleFilterOr(ScanBuildingState* scanState) {
+ MatchExpression* root = scanState->root;
+ MatchExpression* child = root->getChild(scanState->curChild);
+
+ if (scanState->inArrayOperator) {
+ // We're inside an array operator. The entire array operator expression
+ // should always be affixed as a filter. We keep 'curChild' in the $and
+ // for affixing later.
+ ++scanState->curChild;
+ }
+ else {
+ if (scanState->tightness < scanState->loosestBounds) {
+ scanState->loosestBounds = scanState->tightness;
+ }
+
+ // Detach 'child' and add it to 'curOr'.
+ root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
+ scanState->curOr->getChildVector()->push_back(child);
+ }
+ }
+
+ // static
+ void QueryPlannerAccess::handleFilterAnd(ScanBuildingState* scanState) {
+ MatchExpression* root = scanState->root;
+ MatchExpression* child = root->getChild(scanState->curChild);
+ const IndexEntry& index = scanState->indices[scanState->currentIndexNumber];
+
+ if (scanState->inArrayOperator) {
+ // We're inside an array operator. The entire array operator expression
+ // should always be affixed as a filter. We keep 'curChild' in the $and
+ // for affixing later.
+ ++scanState->curChild;
+ }
+ else if (scanState->tightness == IndexBoundsBuilder::EXACT) {
+ root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
+ delete child;
+ }
+ else if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED
+ && (INDEX_TEXT == index.type || !index.multikey)) {
+ // The bounds are not exact, but the information needed to
+ // evaluate the predicate is in the index key. Remove the
+ // MatchExpression from its parent and attach it to the filter
+ // of the index scan we're building.
+ //
+ // We can only use this optimization if the index is NOT multikey.
+ // Suppose that we had the multikey index {x: 1} and a document
+ // {x: ["a", "b"]}. Now if we query for {x: /b/} the filter might
+ // ever only be applied to the index key "a". We'd incorrectly
+ // conclude that the document does not match the query :( so we
+ // gotta stick to non-multikey indices.
+ root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
+
+ addFilterToSolutionNode(scanState->currentScan.get(), child, root->matchType());
+ }
+ else {
+ // We keep curChild in the AND for affixing later.
+ ++scanState->curChild;
+ }
+ }
+
QuerySolutionNode* QueryPlannerAccess::makeIndexScan(const IndexEntry& index,
const CanonicalQuery& query,
const QueryPlannerParams& params,
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index 71dd60d9f39..177483ea5f6 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -95,6 +95,96 @@ namespace mongo {
class QueryPlannerAccess {
public:
/**
+ * Building the leaves (i.e. the index scans) is done by looping through
+ * predicates one at a time. During the process, there is a fair amount of state
+ * information to keep track of, which we consolidate into this data structure.
+ */
+ struct ScanBuildingState {
+
+ ScanBuildingState(MatchExpression* theRoot,
+ bool inArrayOp,
+ const std::vector<IndexEntry>& indexList)
+ : root(theRoot),
+ inArrayOperator(inArrayOp),
+ indices(indexList),
+ currentScan(NULL),
+ curChild(0),
+ currentIndexNumber(IndexTag::kNoIndex),
+ ixtag(NULL),
+ tightness(IndexBoundsBuilder::INEXACT_FETCH),
+ curOr(NULL),
+ loosestBounds(IndexBoundsBuilder::EXACT) {
+ }
+
+ /**
+ * Reset the scan building state in preparation for building a new scan.
+ *
+ * This always should be called prior to allocating a new 'currentScan'.
+ */
+ void resetForNextScan(IndexTag* newTag) {
+ currentScan.reset(NULL);
+ currentIndexNumber = newTag->index;
+ tightness = IndexBoundsBuilder::INEXACT_FETCH;
+ loosestBounds = IndexBoundsBuilder::EXACT;
+
+ if (MatchExpression::OR == root->matchType()) {
+ curOr.reset(new OrMatchExpression());
+ }
+ }
+
+ // The root of the MatchExpression tree for which we are currently building index
+ // scans. Should be either an AND node or an OR node.
+ MatchExpression* root;
+
+ // Are we inside an array operator such as $elemMatch or $all?
+ bool inArrayOperator;
+
+ // A list of relevant indices which 'root' may be tagged to use.
+ const std::vector<IndexEntry>& indices;
+
+ // The index access node that we are currently constructing. We may merge
+ // multiple tagged predicates into a single index scan.
+ std::auto_ptr<QuerySolutionNode> currentScan;
+
+ // An index into the child vector of 'root'. Indicates the child MatchExpression
+ // for which we are currently either constructing a new scan or which we are about
+ // to merge with 'currentScan'.
+ size_t curChild;
+
+ // An index into the 'indices', so that 'indices[currentIndexNumber]' gives the
+ // index used by 'currentScan'. If there is no currentScan, this should be set
+ // to 'IndexTag::kNoIndex'.
+ size_t currentIndexNumber;
+
+ // The tag on 'curChild'.
+ IndexTag* ixtag;
+
+ // Whether the bounds for predicate 'curChild' are exact, inexact and covered by
+ // the index, or inexact with a fetch required.
+ IndexBoundsBuilder::BoundsTightness tightness;
+
+ // If 'root' is an $or, the child predicates which are tagged with the same index are
+ // detached from the original root and added here. 'curOr' may be attached as a filter
+ // later on, or ignored and cleaned up by the auto_ptr.
+ std::auto_ptr<MatchExpression> curOr;
+
+ // The values of BoundsTightness range from loosest to tightest in this order:
+ //
+ // INEXACT_FETCH < INEXACT_COVERED < EXACT
+ //
+ // 'loosestBounds' stores the smallest of these three values encountered so far for
+ // the current scan. If at least one of the child predicates assigned to the current
+ // index is INEXACT_FETCH, then 'loosestBounds' is INEXACT_FETCH. If at least one of
+ // the child predicates assigned to the current index is INEXACT_COVERED but none are
+ // INEXACT_FETCH, then 'loosestBounds' is INEXACT_COVERED.
+ IndexBoundsBuilder::BoundsTightness loosestBounds;
+
+ private:
+ // Default constructor is not allowed.
+ ScanBuildingState();
+ };
+
+ /**
* Return a CollectionScanNode that scans as requested in 'query'.
*/
static QuerySolutionNode* makeCollectionScan(const CanonicalQuery& query,
@@ -192,6 +282,27 @@ namespace mongo {
const vector<IndexEntry>& indices,
vector<QuerySolutionNode*>* out);
+ /**
+ * Used by processIndexScans(...) in order to recursively build a data access
+ * plan for a "subnode", a node in the MatchExpression tree which is indexed by
+ * virtue of its children.
+ *
+ * The resulting scans are outputted in the out-parameter 'out'.
+ */
+ static bool processIndexScansSubnode(const CanonicalQuery& query,
+ ScanBuildingState* scanState,
+ std::vector<QuerySolutionNode*>* out);
+
+ /**
+ * Used by processIndexScansSubnode(...) to build the leaves of the solution tree for an
+ * ELEM_MATCH_OBJECT node beneath an AND.
+ *
+ * The resulting scans are outputted in the out-parameter 'out'.
+ */
+ static bool processIndexScansElemMatch(const CanonicalQuery& query,
+ ScanBuildingState* scanState,
+ std::vector<QuerySolutionNode*>* out);
+
//
// Helpers for creating an index scan.
//
@@ -214,29 +325,16 @@ namespace mongo {
/**
* Merge the predicate 'expr' with the leaf node 'node'.
*/
- static void mergeWithLeafNode(MatchExpression* expr,
- const IndexEntry& index,
- size_t pos,
- IndexBoundsBuilder::BoundsTightness* tightnessOut,
- QuerySolutionNode* node,
- MatchExpression::MatchType mergeType);
+ static void mergeWithLeafNode(MatchExpression* expr, ScanBuildingState* scanState);
/**
* Determines whether it is safe to merge the expression 'expr' with
- * the leaf node of the query solution, 'node'.
- *
- * 'index' provides information about the index used by 'node'.
- * 'pos' gives the position in the index (for compound indices) that
- * 'expr' needs to use. Finally, 'mergeType' indicates whether we
- * will try to merge using an AND or OR.
+ * the leaf node of the query solution contained in 'scanState'.
*
* Does not take ownership of its arguments.
*/
static bool shouldMergeWithLeaf(const MatchExpression* expr,
- const IndexEntry& index,
- size_t pos,
- QuerySolutionNode* node,
- MatchExpression::MatchType mergeType);
+ const ScanBuildingState& scanState);
/**
* If index scan (regular or expression index), fill in any bounds that are missing in
@@ -247,9 +345,19 @@ namespace mongo {
*/
static void finishLeafNode(QuerySolutionNode* node, const IndexEntry& index);
+ /**
+ * Fills in any missing bounds by calling finishLeafNode(...) for the scan contained in
+ * 'scanState'. The resulting scan is outputted in the out-parameter 'out', transferring
+ * ownership in the process.
+ *
+ * If 'scanState' is building an index scan for OR-related predicates, filters
+ * may be affixed to the scan as necessary.
+ */
+ static void finishAndOutputLeaf(ScanBuildingState* scanState,
+ std::vector<QuerySolutionNode*>* out);
+
static void finishTextNode(QuerySolutionNode* node, const IndexEntry& index);
- private:
/**
* Add the filter 'match' to the query solution node 'node'. Takes
* ownership of 'match'.
@@ -257,8 +365,35 @@ namespace mongo {
* The MatchType, 'type', indicates whether 'match' is a child of an
* AND or an OR match expression.
*/
- static void _addFilterToSolutionNode(QuerySolutionNode* node, MatchExpression* match,
- MatchExpression::MatchType type);
+ static void addFilterToSolutionNode(QuerySolutionNode* node, MatchExpression* match,
+ MatchExpression::MatchType type);
+
+ /**
+ * Once a predicate is merged into the current scan, there are a few things we might
+ * want to do with the filter:
+ * 1) Detach the filter from its parent and delete it because the predicate is
+ * answered by exact index bounds.
+ * 2) Leave the filter alone so that it can be affixed as part of a fetch node later.
+ * 3) Detach the filter from its parent and attach it directly to an index scan node.
+ * We can sometimes due this for INEXACT_COVERED predicates which are not answered exactly
+ * by the bounds, but can be answered by examing the data in the index key.
+ * 4) Detach the filter from its parent and attach it as a child of a separate
+ * MatchExpression tree. This is done for proper handling of inexact bounds for $or
+ * queries.
+ *
+ * This executes one of the four options above, according to the data in 'scanState'.
+ */
+ static void handleFilter(ScanBuildingState* scanState);
+
+ /**
+ * Implements handleFilter(...) for OR queries.
+ */
+ static void handleFilterAnd(ScanBuildingState* scanState);
+
+ /**
+ * Implements handleFilter(...) for AND queries.
+ */
+ static void handleFilterOr(ScanBuildingState* scanState);
};
} // namespace mongo
diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp
index 8e465a40287..f514b579dd7 100644
--- a/src/mongo/db/query/query_planner_test.cpp
+++ b/src/mongo/db/query/query_planner_test.cpp
@@ -878,6 +878,33 @@ namespace {
" b: [[1,1,true,true], [5,5,true,true]]}}}]}}}}");
}
+ // SERVER-13960: properly handle $or with a mix of exact and inexact predicates.
+ TEST_F(QueryPlannerTest, OrInexactWithExact) {
+ addIndex(BSON("name" << 1));
+ runQuery(fromjson("{$or: [{name: 'thomas'}, {name: /^alexand(er|ra)/}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists("{fetch: {node: {ixscan: {filter:"
+ "{$or: [{name: 'thomas'}, {name: /^alexand(er|ra)/}]},"
+ "pattern: {name: 1}}}}}");
+ }
+
+ // SERVER-13960
+ TEST_F(QueryPlannerTest, OrInexactWithExact2) {
+ addIndex(BSON("a" << 1));
+ addIndex(BSON("b" << 1));
+ runQuery(fromjson("{$or: [{a: 'foo'}, {a: /bar/}, {b: 'foo'}, {b: /bar/}]}"));
+
+ assertNumSolutions(2U);
+ assertSolutionExists("{cscan: {dir: 1}}");
+ assertSolutionExists("{fetch: {node: {or: {nodes: ["
+ "{ixscan: {filter: {$or:[{a:'foo'},{a:/bar/}]},"
+ "pattern: {a: 1}}},"
+ "{ixscan: {filter: {$or:[{b:'foo'},{b:/bar/}]},"
+ "pattern: {b: 1}}}]}}}}");
+ }
+
//
// Min/Max
//
@@ -1856,9 +1883,7 @@ namespace {
assertNumSolutions(2U);
assertSolutionExists("{cscan: {dir: 1}}");
- assertSolutionExists("{or: {nodes: ["
- "{fetch: {node: {ixscan: {pattern: {a: '2dsphere'}}}}},"
- "{fetch: {node: {ixscan: {pattern: {a: '2dsphere'}}}}}]}}");
+ assertSolutionExists("{fetch: {node: {ixscan: {pattern: {a: '2dsphere'}}}}}");
}
TEST_F(QueryPlannerTest, Or2DSphereSameFieldNonNearMultikey) {
@@ -1871,9 +1896,7 @@ namespace {
assertNumSolutions(2U);
assertSolutionExists("{cscan: {dir: 1}}");
- assertSolutionExists("{or: {nodes: ["
- "{fetch: {node: {ixscan: {pattern: {a: '2dsphere'}}}}},"
- "{fetch: {node: {ixscan: {pattern: {a: '2dsphere'}}}}}]}}");
+ assertSolutionExists("{fetch: {node: {ixscan: {pattern: {a: '2dsphere'}}}}}");
}
TEST_F(QueryPlannerTest, CompoundMultikey2DSphereNear) {