summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2017-10-10 19:08:11 -0400
committerDavid Storch <david.storch@10gen.com>2017-10-13 17:58:01 -0400
commit744738bd23a5aed625dc1eed89851824fcf5e33a (patch)
tree1dbde58792c15fc42f5d6a064e306c0d6d7a5ab5 /src/mongo/db/query
parent10dbb695223d08dd5a4412e1319228496d606b13 (diff)
downloadmongo-744738bd23a5aed625dc1eed89851824fcf5e33a.tar.gz
SERVER-21011 Fix query correctness problem related to covered matching for 2d/text indexes.
The fix ensures that the tightness predicates over the trailing fields of 2d/text indexes is checked. Predicates which are INEXACT_FETCH will then get affixed to the FETCH stage of the plan rather than incorrectly affixed to the IXSCAN.
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp8
-rw-r--r--src/mongo/db/query/index_bounds_builder.h11
-rw-r--r--src/mongo/db/query/index_bounds_builder_test.cpp50
-rw-r--r--src/mongo/db/query/planner_access.cpp95
-rw-r--r--src/mongo/db/query/query_planner_geo_test.cpp78
-rw-r--r--src/mongo/db/query/query_planner_text_test.cpp58
-rw-r--r--src/mongo/db/query/query_solution.h7
7 files changed, 259 insertions, 48 deletions
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 38d53bb1bf9..cf94e16822c 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -249,6 +249,14 @@ bool typeMatch(const BSONObj& obj) {
return first.canonicalType() == second.canonicalType();
}
+bool IndexBoundsBuilder::canUseCoveredMatching(const MatchExpression* expr,
+ const IndexEntry& index) {
+ IndexBoundsBuilder::BoundsTightness tightness;
+ OrderedIntervalList oil;
+ translate(expr, BSONElement{}, index, &oil, &tightness);
+ return tightness >= IndexBoundsBuilder::INEXACT_COVERED;
+}
+
// static
void IndexBoundsBuilder::translate(const MatchExpression* expr,
const BSONElement& elt,
diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h
index 6a4a74a94b0..9e58584e698 100644
--- a/src/mongo/db/query/index_bounds_builder.h
+++ b/src/mongo/db/query/index_bounds_builder.h
@@ -70,6 +70,17 @@ public:
static void allValuesForField(const BSONElement& elt, OrderedIntervalList* out);
/**
+ * Returns true if 'expr' can correctly be assigned as an INEXACT_COVERED predicate to an index
+ * scan over 'index'.
+ *
+ * The result of this function is not meaningful when the predicate applies to special fields
+ * such as "hashed", "2d", or "2dsphere". That is, the caller is responsible for ensuring that
+ * 'expr' is a candidate for covered matching over a regular ascending/descending field of the
+ * index.
+ */
+ static bool canUseCoveredMatching(const MatchExpression* expr, const IndexEntry& index);
+
+ /**
* Turn the MatchExpression in 'expr' into a set of index bounds. The field that 'expr' is
* concerned with is indexed according to the keypattern element 'elt' from index 'index'.
*
diff --git a/src/mongo/db/query/index_bounds_builder_test.cpp b/src/mongo/db/query/index_bounds_builder_test.cpp
index f7d0e23794e..264972d017f 100644
--- a/src/mongo/db/query/index_bounds_builder_test.cpp
+++ b/src/mongo/db/query/index_bounds_builder_test.cpp
@@ -2209,4 +2209,54 @@ TEST(IndexBoundsBuilderTest, RedundantTypeNumberHasCorrectBounds) {
ASSERT(tightness == IndexBoundsBuilder::INEXACT_FETCH);
}
+TEST(IndexBoundsBuilderTest, CanUseCoveredMatchingForEqualityPredicate) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ BSONObj obj = fromjson("{a: {$eq: 3}}");
+ unique_ptr<MatchExpression> expr(parseMatchExpression(obj));
+ ASSERT_TRUE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex));
+}
+
+TEST(IndexBoundsBuilderTest, CannotUseCoveredMatchingForEqualityToArrayPredicate) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ BSONObj obj = fromjson("{a: {$eq: [1, 2, 3]}}");
+ unique_ptr<MatchExpression> expr(parseMatchExpression(obj));
+ ASSERT_FALSE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex));
+}
+
+TEST(IndexBoundsBuilderTest, CannotUseCoveredMatchingForEqualityToNullPredicate) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ BSONObj obj = fromjson("{a: null}");
+ unique_ptr<MatchExpression> expr(parseMatchExpression(obj));
+ ASSERT_FALSE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex));
+}
+
+TEST(IndexBoundsBuilderTest, CannotUseCoveredMatchingForTypeArrayPredicate) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ BSONObj obj = fromjson("{a: {$type: 'array'}}");
+ unique_ptr<MatchExpression> expr(parseMatchExpression(obj));
+ ASSERT_FALSE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex));
+}
+
+TEST(IndexBoundsBuilderTest, CannotUseCoveredMatchingForExistsTruePredicate) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ BSONObj obj = fromjson("{a: {$exists: true}}");
+ unique_ptr<MatchExpression> expr(parseMatchExpression(obj));
+ ASSERT_FALSE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex));
+}
+
+TEST(IndexBoundsBuilderTest, CannotUseCoveredMatchingForExistsFalsePredicate) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ BSONObj obj = fromjson("{a: {$exists: false}}");
+ unique_ptr<MatchExpression> expr(parseMatchExpression(obj));
+ ASSERT_FALSE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex));
+}
+
+TEST(IndexBoundsBuilderTest, CanUseCoveredMatchingForExistsTrueWithSparseIndex) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ testIndex.sparse = true;
+ BSONObj obj = fromjson("{a: {$exists: true}}");
+ unique_ptr<MatchExpression> expr(parseMatchExpression(obj));
+ ASSERT_TRUE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex));
+}
+
} // namespace
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 8494fc320f4..ac1ecae8127 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -227,6 +227,17 @@ QuerySolutionNode* QueryPlannerAccess::makeLeafNode(
TextMatchExpressionBase* textExpr = static_cast<TextMatchExpressionBase*>(expr);
TextNode* ret = new TextNode(index);
ret->ftsQuery = textExpr->getFTSQuery().clone();
+
+ // Count the number of prefix fields before the "text" field.
+ for (auto&& keyPatternElt : ret->index.keyPattern) {
+ // We know that the only key pattern with a type of String is the _fts field
+ // which is immediately after all prefix fields.
+ if (BSONType::String == keyPatternElt.type()) {
+ break;
+ }
+ ++(ret->numPrefixFields);
+ }
+
return ret;
} else {
// Note that indexKeyPattern.firstElement().fieldName() may not equal expr->path()
@@ -338,10 +349,24 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt
const StageType type = node->getType();
- // Text data is covered, but not exactly. Text covering is unlike any other covering
- // so we deal with it in addFilterToSolutionNode.
if (STAGE_TEXT == type) {
- scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED;
+ auto textNode = static_cast<TextNode*>(node);
+
+ if (pos < textNode->numPrefixFields) {
+ // This predicate is assigned to one of the prefix fields of the text index. Such
+ // predicates must always be equalities and must always be attached to the TEXT node. In
+ // order to ensure this happens, we assign INEXACT_COVERED tightness.
+ scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED;
+ } else {
+ // The predicate is assigned to one of the trailing fields of the text index. We
+ // currently don't generate bounds for predicates assigned to trailing fields of a text
+ // index, but rather attempt to attach a covered filter. However, certain predicates can
+ // never be correctly covered (e.g. $exists), so we assign the tightness accordingly.
+ scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index)
+ ? IndexBoundsBuilder::INEXACT_COVERED
+ : IndexBoundsBuilder::INEXACT_FETCH;
+ }
+
return;
}
@@ -350,20 +375,23 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt
if (STAGE_GEO_NEAR_2D == type) {
invariant(INDEX_2D == index.type);
- // 2D indexes are weird - the "2d" field stores a normally-indexed BinData field, but
- // additional array fields are *not* exploded into multi-keys - they are stored directly
- // as arrays in the index. Also, no matter what the index expression, the "2d" field is
- // always first.
- // This means that we can only generically accumulate bounds for 2D indexes over the
- // first "2d" field (pos == 0) - MatchExpressions over other fields in the 2D index may
- // be covered (can be evaluated using only the 2D index key). The additional fields
- // must not affect the index scan bounds, since they are not stored in an
- // IndexScan-compatible format.
+ // 2D indexes have a special format - the "2d" field stores a normally-indexed BinData
+ // field, but additional array fields are *not* exploded into multi-keys - they are stored
+ // directly as arrays in the index. Also, no matter what the index expression, the "2d"
+ // field is always first.
+ //
+ // This means that we can only generically accumulate bounds for 2D indexes over the first
+ // "2d" field (pos == 0) - MatchExpressions over other fields in the 2D index may be covered
+ // (can be evaluated using only the 2D index key). The additional fields must not affect
+ // the index scan bounds, since they are not stored in an IndexScan-compatible format.
if (pos > 0) {
- // Marking this field as covered allows the planner to accumulate a MatchExpression
- // over the returned 2D index keys instead of adding to the index bounds.
- scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED;
+ // The predicate is over a trailing field of the "2d" index. If possible, we assign it
+ // as a covered filter (the INEXACT_COVERED case). Otherwise, the filter must be
+ // evaluated after fetching the full documents.
+ scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index)
+ ? IndexBoundsBuilder::INEXACT_COVERED
+ : IndexBoundsBuilder::INEXACT_FETCH;
return;
}
@@ -377,10 +405,15 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt
verify(type == STAGE_IXSCAN);
IndexScanNode* scan = static_cast<IndexScanNode*>(node);
- // See STAGE_GEO_NEAR_2D above - 2D indexes can only accumulate scan bounds over the
- // first "2d" field (pos == 0)
+ // See STAGE_GEO_NEAR_2D above - 2D indexes can only accumulate scan bounds over the first
+ // "2d" field (pos == 0).
if (INDEX_2D == index.type && pos > 0) {
- scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED;
+ // The predicate is over a trailing field of the "2d" index. If possible, we assign it
+ // as a covered filter (the INEXACT_COVERED case). Otherwise, the filter must be
+ // evaluated after fetching the full documents.
+ scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index)
+ ? IndexBoundsBuilder::INEXACT_COVERED
+ : IndexBoundsBuilder::INEXACT_FETCH;
return;
}
@@ -419,25 +452,9 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt
void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntry& index) {
TextNode* tn = static_cast<TextNode*>(node);
- // Figure out what positions are prefix positions. We build an index key prefix from
- // the predicates over the text index prefix keys.
- // For example, say keyPattern = { a: 1, _fts: "text", _ftsx: 1, b: 1 }
- // prefixEnd should be 1.
- size_t prefixEnd = 0;
- BSONObjIterator it(tn->index.keyPattern);
- // Count how many prefix terms we have.
- while (it.more()) {
- // We know that the only key pattern with a type of String is the _fts field
- // which is immediately after all prefix fields.
- if (String == it.next().type()) {
- break;
- }
- ++prefixEnd;
- }
-
// If there's no prefix, the filter is already on the node and the index prefix is null.
// We can just return.
- if (!prefixEnd) {
+ if (!tn->numPrefixFields) {
return;
}
@@ -451,7 +468,7 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr
if (MatchExpression::AND != textFilterMe->matchType()) {
// Only one prefix term.
- invariant(1 == prefixEnd);
+ invariant(1u == tn->numPrefixFields);
// Sanity check: must be an EQ.
invariant(MatchExpression::EQ == textFilterMe->matchType());
@@ -463,10 +480,10 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr
// Indexed by the keyPattern position index assignment. We want to add
// prefixes in order but we must order them first.
- vector<MatchExpression*> prefixExprs(prefixEnd, NULL);
+ vector<MatchExpression*> prefixExprs(tn->numPrefixFields, nullptr);
AndMatchExpression* amExpr = static_cast<AndMatchExpression*>(textFilterMe);
- invariant(amExpr->numChildren() >= prefixEnd);
+ invariant(amExpr->numChildren() >= tn->numPrefixFields);
// Look through the AND children. The prefix children we want to
// stash in prefixExprs.
@@ -477,7 +494,7 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr
invariant(NULL != ixtag);
// Skip this child if it's not part of a prefix, or if we've already assigned a
// predicate to this prefix position.
- if (ixtag->pos >= prefixEnd || prefixExprs[ixtag->pos] != NULL) {
+ if (ixtag->pos >= tn->numPrefixFields || prefixExprs[ixtag->pos] != NULL) {
++curChild;
continue;
}
diff --git a/src/mongo/db/query/query_planner_geo_test.cpp b/src/mongo/db/query/query_planner_geo_test.cpp
index a4efc42195b..d9474eddae3 100644
--- a/src/mongo/db/query/query_planner_geo_test.cpp
+++ b/src/mongo/db/query/query_planner_geo_test.cpp
@@ -240,24 +240,22 @@ TEST_F(QueryPlannerTest, Multikey2DSphereGeoNearReverseCompound) {
}
TEST_F(QueryPlannerTest, 2DNonNearContainedOr) {
- addIndex(BSON("x" << 1 << "a"
- << "2d"));
+ addIndex(BSON("a"
+ << "2d"
+ << "x"
+ << 1));
addIndex(BSON("y" << 1));
runQuery(
fromjson("{$and: [{x: 1}, {$or: [{a: {$within: {$polygon: [[0, 0], [0, 1], [1, 0], [0, "
"0]]}}}, {y: 1}]}]}"));
- assertNumSolutions(3U);
+ assertNumSolutions(2U);
assertSolutionExists(
"{fetch: {filter: {x: 1}, node: {or: {nodes: ["
- "{ixscan: {pattern: {x: 1, a: '2d'}, bounds: {x: [[1, 1, true, true]]}}},"
+ "{fetch: {filter: {a: {$within: {$polygon: [[0, 0], [0, 1], [1, 0], [0, 0]]}}},"
+ "node: {ixscan: {pattern: {a: '2d', x: 1}, filter: {x: 1}}}}},"
"{ixscan: {pattern: {y: 1}, bounds: {y: [[1, 1, true, true]]}}}"
"]}}}}");
- assertSolutionExists(
- "{fetch: {filter: {$or: [{a: {$within: {$polygon: [[0, 0], [0, 1], [1, 0], [0, 0]]}}}, {y: "
- "1}]}, node: "
- "{ixscan: {pattern: {x: 1, a: '2d'}, bounds: {x: [[1, 1, true, true]], a: [['MinKey', "
- "'MaxKey', true, true]]}}}}}");
assertSolutionExists("{cscan: {dir: 1}}}}");
}
@@ -1592,4 +1590,66 @@ TEST_F(QueryPlanner2dsphereVersionTest, NegationWithoutGeoPredCannotUseGeoIndex)
testMultiple2dsphereIndexVersions(versions, keyPatterns, predicate, solutions);
}
+TEST_F(QueryPlannerTest, 2dInexactFetchPredicateOverTrailingFieldHandledCorrectly) {
+ params.options = QueryPlannerParams::NO_TABLE_SCAN;
+
+ addIndex(BSON("a"
+ << "2d"
+ << "b"
+ << 1));
+
+ runQuery(fromjson("{a: {$geoWithin: {$center: [[0, 0], 1]}}, b: {$exists: true}}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {a: {$geoWithin: {$center: [[0, 0], 1]}}, b: {$exists: true}}, node: "
+ "{ixscan: {filter: null, pattern: {a: '2d', b: 1}}}}}");
+}
+
+TEST_F(QueryPlannerTest, 2dInexactFetchPredicateOverTrailingFieldHandledCorrectlyMultikey) {
+ params.options = QueryPlannerParams::NO_TABLE_SCAN;
+
+ const bool multikey = true;
+ addIndex(BSON("a"
+ << "2d"
+ << "b"
+ << 1),
+ multikey);
+
+ runQuery(fromjson("{a: {$geoWithin: {$center: [[0, 0], 1]}}, b: {$exists: true}}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {a: {$geoWithin: {$center: [[0, 0], 1]}}, b: {$exists: true}}, node: "
+ "{ixscan: {filter: null, pattern: {a: '2d', b: 1}}}}}");
+}
+
+TEST_F(QueryPlannerTest, 2dNearInexactFetchPredicateOverTrailingFieldHandledCorrectly) {
+ params.options = QueryPlannerParams::NO_TABLE_SCAN;
+
+ addIndex(BSON("a"
+ << "2d"
+ << "b"
+ << 1));
+
+ runQuery(fromjson("{a: {$near: [0, 0]}, b: {$exists: true}}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {b: {$exists: true}}, node: {geoNear2d: {a: '2d', b: 1}}}}");
+}
+
+TEST_F(QueryPlannerTest, 2dNearInexactFetchPredicateOverTrailingFieldMultikey) {
+ params.options = QueryPlannerParams::NO_TABLE_SCAN;
+
+ const bool multikey = true;
+ addIndex(BSON("a"
+ << "2d"
+ << "b"
+ << 1),
+ multikey);
+
+ runQuery(fromjson("{a: {$near: [0, 0]}, b: {$exists: true}}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {b: {$exists: true}}, node: {geoNear2d: {a: '2d', b: 1}}}}");
+}
+
} // namespace
diff --git a/src/mongo/db/query/query_planner_text_test.cpp b/src/mongo/db/query/query_planner_text_test.cpp
index dad9250402b..1488ddd849c 100644
--- a/src/mongo/db/query/query_planner_text_test.cpp
+++ b/src/mongo/db/query/query_planner_text_test.cpp
@@ -487,4 +487,62 @@ TEST_F(QueryPlannerTest, PredicatesOverLeadingFieldsWithSharedPathPrefixHandledC
"{text: {search: 'foo', prefix: {'a.x': 1, 'a.y': 2, 'b.x': 3, 'b.y': 4}}}");
}
+TEST_F(QueryPlannerTest, EqualityToArrayOverLeadingFieldHandledCorrectly) {
+ addIndex(BSON("a" << 1 << "_fts"
+ << "text"
+ << "_ftsx"
+ << 1));
+
+ runQuery(fromjson("{a: [1, 2, 3], $text: {$search: 'foo'}}"));
+
+ assertNumSolutions(1U);
+ assertSolutionExists("{text: {search: 'foo', prefix: {a: [1, 2, 3]}}}");
+}
+
+TEST_F(QueryPlannerTest, EqualityToArrayOverLeadingFieldHandledCorrectlyWithMultikeyTrue) {
+ const bool multikey = true;
+ addIndex(BSON("a" << 1 << "_fts"
+ << "text"
+ << "_ftsx"
+ << 1),
+ multikey);
+
+ runQuery(fromjson("{a: [1, 2, 3], $text: {$search: 'foo'}}"));
+
+ assertNumSolutions(1U);
+ assertSolutionExists("{text: {search: 'foo', prefix: {a: [1, 2, 3]}}}");
+}
+
+TEST_F(QueryPlannerTest, InexactFetchPredicateOverTrailingFieldHandledCorrectly) {
+ addIndex(BSON("a" << 1 << "_fts"
+ << "text"
+ << "_ftsx"
+ << 1
+ << "b"
+ << 1));
+
+ runQuery(fromjson("{a: 3, $text: {$search: 'foo'}, b: {$exists: true}}"));
+
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {b: {$exists: true}}, node: {text: {search: 'foo', prefix: {a: 3}}}}}");
+}
+
+TEST_F(QueryPlannerTest, InexactFetchPredicateOverTrailingFieldHandledCorrectlyMultikeyTrue) {
+ const bool multikey = true;
+ addIndex(BSON("a" << 1 << "_fts"
+ << "text"
+ << "_ftsx"
+ << 1
+ << "b"
+ << 1),
+ multikey);
+
+ runQuery(fromjson("{a: 3, $text: {$search: 'foo'}, b: {$exists: true}}"));
+
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {b: {$exists: true}}, node: {text: {search: 'foo', prefix: {a: 3}}}}}");
+}
+
} // namespace
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 68484ed8d66..9d3735edc02 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -250,6 +250,13 @@ struct TextNode : public QuerySolutionNode {
IndexEntry index;
std::unique_ptr<fts::FTSQuery> ftsQuery;
+ // The number of fields in the prefix of the text index. For example, if the key pattern is
+ //
+ // { a: 1, b: 1, _fts: "text", _ftsx: 1, c: 1 }
+ //
+ // then the number of prefix fields is 2, because of "a" and "b".
+ size_t numPrefixFields = 0u;
+
// "Prefix" fields of a text index can handle equality predicates. We group them with the
// text node while creating the text leaf node and convert them into a BSONObj index prefix
// when we finish the text leaf node.