diff options
author | David Storch <david.storch@10gen.com> | 2017-10-10 19:08:11 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2017-10-13 17:58:01 -0400 |
commit | 744738bd23a5aed625dc1eed89851824fcf5e33a (patch) | |
tree | 1dbde58792c15fc42f5d6a064e306c0d6d7a5ab5 /src/mongo/db/query | |
parent | 10dbb695223d08dd5a4412e1319228496d606b13 (diff) | |
download | mongo-744738bd23a5aed625dc1eed89851824fcf5e33a.tar.gz |
SERVER-21011 Fix query correctness problem related to covered matching for 2d/text indexes.
The fix ensures that the tightness predicates over the
trailing fields of 2d/text indexes is checked. Predicates
which are INEXACT_FETCH will then get affixed to the FETCH
stage of the plan rather than incorrectly affixed to the
IXSCAN.
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.h | 11 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder_test.cpp | 50 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 95 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_geo_test.cpp | 78 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_text_test.cpp | 58 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 7 |
7 files changed, 259 insertions, 48 deletions
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 38d53bb1bf9..cf94e16822c 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -249,6 +249,14 @@ bool typeMatch(const BSONObj& obj) { return first.canonicalType() == second.canonicalType(); } +bool IndexBoundsBuilder::canUseCoveredMatching(const MatchExpression* expr, + const IndexEntry& index) { + IndexBoundsBuilder::BoundsTightness tightness; + OrderedIntervalList oil; + translate(expr, BSONElement{}, index, &oil, &tightness); + return tightness >= IndexBoundsBuilder::INEXACT_COVERED; +} + // static void IndexBoundsBuilder::translate(const MatchExpression* expr, const BSONElement& elt, diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 6a4a74a94b0..9e58584e698 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -70,6 +70,17 @@ public: static void allValuesForField(const BSONElement& elt, OrderedIntervalList* out); /** + * Returns true if 'expr' can correctly be assigned as an INEXACT_COVERED predicate to an index + * scan over 'index'. + * + * The result of this function is not meaningful when the predicate applies to special fields + * such as "hashed", "2d", or "2dsphere". That is, the caller is responsible for ensuring that + * 'expr' is a candidate for covered matching over a regular ascending/descending field of the + * index. + */ + static bool canUseCoveredMatching(const MatchExpression* expr, const IndexEntry& index); + + /** * Turn the MatchExpression in 'expr' into a set of index bounds. The field that 'expr' is * concerned with is indexed according to the keypattern element 'elt' from index 'index'. * diff --git a/src/mongo/db/query/index_bounds_builder_test.cpp b/src/mongo/db/query/index_bounds_builder_test.cpp index f7d0e23794e..264972d017f 100644 --- a/src/mongo/db/query/index_bounds_builder_test.cpp +++ b/src/mongo/db/query/index_bounds_builder_test.cpp @@ -2209,4 +2209,54 @@ TEST(IndexBoundsBuilderTest, RedundantTypeNumberHasCorrectBounds) { ASSERT(tightness == IndexBoundsBuilder::INEXACT_FETCH); } +TEST(IndexBoundsBuilderTest, CanUseCoveredMatchingForEqualityPredicate) { + IndexEntry testIndex = IndexEntry(BSONObj()); + BSONObj obj = fromjson("{a: {$eq: 3}}"); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + ASSERT_TRUE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex)); +} + +TEST(IndexBoundsBuilderTest, CannotUseCoveredMatchingForEqualityToArrayPredicate) { + IndexEntry testIndex = IndexEntry(BSONObj()); + BSONObj obj = fromjson("{a: {$eq: [1, 2, 3]}}"); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + ASSERT_FALSE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex)); +} + +TEST(IndexBoundsBuilderTest, CannotUseCoveredMatchingForEqualityToNullPredicate) { + IndexEntry testIndex = IndexEntry(BSONObj()); + BSONObj obj = fromjson("{a: null}"); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + ASSERT_FALSE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex)); +} + +TEST(IndexBoundsBuilderTest, CannotUseCoveredMatchingForTypeArrayPredicate) { + IndexEntry testIndex = IndexEntry(BSONObj()); + BSONObj obj = fromjson("{a: {$type: 'array'}}"); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + ASSERT_FALSE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex)); +} + +TEST(IndexBoundsBuilderTest, CannotUseCoveredMatchingForExistsTruePredicate) { + IndexEntry testIndex = IndexEntry(BSONObj()); + BSONObj obj = fromjson("{a: {$exists: true}}"); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + ASSERT_FALSE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex)); +} + +TEST(IndexBoundsBuilderTest, CannotUseCoveredMatchingForExistsFalsePredicate) { + IndexEntry testIndex = IndexEntry(BSONObj()); + BSONObj obj = fromjson("{a: {$exists: false}}"); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + ASSERT_FALSE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex)); +} + +TEST(IndexBoundsBuilderTest, CanUseCoveredMatchingForExistsTrueWithSparseIndex) { + IndexEntry testIndex = IndexEntry(BSONObj()); + testIndex.sparse = true; + BSONObj obj = fromjson("{a: {$exists: true}}"); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + ASSERT_TRUE(IndexBoundsBuilder::canUseCoveredMatching(expr.get(), testIndex)); +} + } // namespace diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 8494fc320f4..ac1ecae8127 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -227,6 +227,17 @@ QuerySolutionNode* QueryPlannerAccess::makeLeafNode( TextMatchExpressionBase* textExpr = static_cast<TextMatchExpressionBase*>(expr); TextNode* ret = new TextNode(index); ret->ftsQuery = textExpr->getFTSQuery().clone(); + + // Count the number of prefix fields before the "text" field. + for (auto&& keyPatternElt : ret->index.keyPattern) { + // We know that the only key pattern with a type of String is the _fts field + // which is immediately after all prefix fields. + if (BSONType::String == keyPatternElt.type()) { + break; + } + ++(ret->numPrefixFields); + } + return ret; } else { // Note that indexKeyPattern.firstElement().fieldName() may not equal expr->path() @@ -338,10 +349,24 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt const StageType type = node->getType(); - // Text data is covered, but not exactly. Text covering is unlike any other covering - // so we deal with it in addFilterToSolutionNode. if (STAGE_TEXT == type) { - scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED; + auto textNode = static_cast<TextNode*>(node); + + if (pos < textNode->numPrefixFields) { + // This predicate is assigned to one of the prefix fields of the text index. Such + // predicates must always be equalities and must always be attached to the TEXT node. In + // order to ensure this happens, we assign INEXACT_COVERED tightness. + scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED; + } else { + // The predicate is assigned to one of the trailing fields of the text index. We + // currently don't generate bounds for predicates assigned to trailing fields of a text + // index, but rather attempt to attach a covered filter. However, certain predicates can + // never be correctly covered (e.g. $exists), so we assign the tightness accordingly. + scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index) + ? IndexBoundsBuilder::INEXACT_COVERED + : IndexBoundsBuilder::INEXACT_FETCH; + } + return; } @@ -350,20 +375,23 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt if (STAGE_GEO_NEAR_2D == type) { invariant(INDEX_2D == index.type); - // 2D indexes are weird - the "2d" field stores a normally-indexed BinData field, but - // additional array fields are *not* exploded into multi-keys - they are stored directly - // as arrays in the index. Also, no matter what the index expression, the "2d" field is - // always first. - // This means that we can only generically accumulate bounds for 2D indexes over the - // first "2d" field (pos == 0) - MatchExpressions over other fields in the 2D index may - // be covered (can be evaluated using only the 2D index key). The additional fields - // must not affect the index scan bounds, since they are not stored in an - // IndexScan-compatible format. + // 2D indexes have a special format - the "2d" field stores a normally-indexed BinData + // field, but additional array fields are *not* exploded into multi-keys - they are stored + // directly as arrays in the index. Also, no matter what the index expression, the "2d" + // field is always first. + // + // This means that we can only generically accumulate bounds for 2D indexes over the first + // "2d" field (pos == 0) - MatchExpressions over other fields in the 2D index may be covered + // (can be evaluated using only the 2D index key). The additional fields must not affect + // the index scan bounds, since they are not stored in an IndexScan-compatible format. if (pos > 0) { - // Marking this field as covered allows the planner to accumulate a MatchExpression - // over the returned 2D index keys instead of adding to the index bounds. - scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED; + // The predicate is over a trailing field of the "2d" index. If possible, we assign it + // as a covered filter (the INEXACT_COVERED case). Otherwise, the filter must be + // evaluated after fetching the full documents. + scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index) + ? IndexBoundsBuilder::INEXACT_COVERED + : IndexBoundsBuilder::INEXACT_FETCH; return; } @@ -377,10 +405,15 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt verify(type == STAGE_IXSCAN); IndexScanNode* scan = static_cast<IndexScanNode*>(node); - // See STAGE_GEO_NEAR_2D above - 2D indexes can only accumulate scan bounds over the - // first "2d" field (pos == 0) + // See STAGE_GEO_NEAR_2D above - 2D indexes can only accumulate scan bounds over the first + // "2d" field (pos == 0). if (INDEX_2D == index.type && pos > 0) { - scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED; + // The predicate is over a trailing field of the "2d" index. If possible, we assign it + // as a covered filter (the INEXACT_COVERED case). Otherwise, the filter must be + // evaluated after fetching the full documents. + scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index) + ? IndexBoundsBuilder::INEXACT_COVERED + : IndexBoundsBuilder::INEXACT_FETCH; return; } @@ -419,25 +452,9 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntry& index) { TextNode* tn = static_cast<TextNode*>(node); - // Figure out what positions are prefix positions. We build an index key prefix from - // the predicates over the text index prefix keys. - // For example, say keyPattern = { a: 1, _fts: "text", _ftsx: 1, b: 1 } - // prefixEnd should be 1. - size_t prefixEnd = 0; - BSONObjIterator it(tn->index.keyPattern); - // Count how many prefix terms we have. - while (it.more()) { - // We know that the only key pattern with a type of String is the _fts field - // which is immediately after all prefix fields. - if (String == it.next().type()) { - break; - } - ++prefixEnd; - } - // If there's no prefix, the filter is already on the node and the index prefix is null. // We can just return. - if (!prefixEnd) { + if (!tn->numPrefixFields) { return; } @@ -451,7 +468,7 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr if (MatchExpression::AND != textFilterMe->matchType()) { // Only one prefix term. - invariant(1 == prefixEnd); + invariant(1u == tn->numPrefixFields); // Sanity check: must be an EQ. invariant(MatchExpression::EQ == textFilterMe->matchType()); @@ -463,10 +480,10 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr // Indexed by the keyPattern position index assignment. We want to add // prefixes in order but we must order them first. - vector<MatchExpression*> prefixExprs(prefixEnd, NULL); + vector<MatchExpression*> prefixExprs(tn->numPrefixFields, nullptr); AndMatchExpression* amExpr = static_cast<AndMatchExpression*>(textFilterMe); - invariant(amExpr->numChildren() >= prefixEnd); + invariant(amExpr->numChildren() >= tn->numPrefixFields); // Look through the AND children. The prefix children we want to // stash in prefixExprs. @@ -477,7 +494,7 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr invariant(NULL != ixtag); // Skip this child if it's not part of a prefix, or if we've already assigned a // predicate to this prefix position. - if (ixtag->pos >= prefixEnd || prefixExprs[ixtag->pos] != NULL) { + if (ixtag->pos >= tn->numPrefixFields || prefixExprs[ixtag->pos] != NULL) { ++curChild; continue; } diff --git a/src/mongo/db/query/query_planner_geo_test.cpp b/src/mongo/db/query/query_planner_geo_test.cpp index a4efc42195b..d9474eddae3 100644 --- a/src/mongo/db/query/query_planner_geo_test.cpp +++ b/src/mongo/db/query/query_planner_geo_test.cpp @@ -240,24 +240,22 @@ TEST_F(QueryPlannerTest, Multikey2DSphereGeoNearReverseCompound) { } TEST_F(QueryPlannerTest, 2DNonNearContainedOr) { - addIndex(BSON("x" << 1 << "a" - << "2d")); + addIndex(BSON("a" + << "2d" + << "x" + << 1)); addIndex(BSON("y" << 1)); runQuery( fromjson("{$and: [{x: 1}, {$or: [{a: {$within: {$polygon: [[0, 0], [0, 1], [1, 0], [0, " "0]]}}}, {y: 1}]}]}")); - assertNumSolutions(3U); + assertNumSolutions(2U); assertSolutionExists( "{fetch: {filter: {x: 1}, node: {or: {nodes: [" - "{ixscan: {pattern: {x: 1, a: '2d'}, bounds: {x: [[1, 1, true, true]]}}}," + "{fetch: {filter: {a: {$within: {$polygon: [[0, 0], [0, 1], [1, 0], [0, 0]]}}}," + "node: {ixscan: {pattern: {a: '2d', x: 1}, filter: {x: 1}}}}}," "{ixscan: {pattern: {y: 1}, bounds: {y: [[1, 1, true, true]]}}}" "]}}}}"); - assertSolutionExists( - "{fetch: {filter: {$or: [{a: {$within: {$polygon: [[0, 0], [0, 1], [1, 0], [0, 0]]}}}, {y: " - "1}]}, node: " - "{ixscan: {pattern: {x: 1, a: '2d'}, bounds: {x: [[1, 1, true, true]], a: [['MinKey', " - "'MaxKey', true, true]]}}}}}"); assertSolutionExists("{cscan: {dir: 1}}}}"); } @@ -1592,4 +1590,66 @@ TEST_F(QueryPlanner2dsphereVersionTest, NegationWithoutGeoPredCannotUseGeoIndex) testMultiple2dsphereIndexVersions(versions, keyPatterns, predicate, solutions); } +TEST_F(QueryPlannerTest, 2dInexactFetchPredicateOverTrailingFieldHandledCorrectly) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + + addIndex(BSON("a" + << "2d" + << "b" + << 1)); + + runQuery(fromjson("{a: {$geoWithin: {$center: [[0, 0], 1]}}, b: {$exists: true}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {a: {$geoWithin: {$center: [[0, 0], 1]}}, b: {$exists: true}}, node: " + "{ixscan: {filter: null, pattern: {a: '2d', b: 1}}}}}"); +} + +TEST_F(QueryPlannerTest, 2dInexactFetchPredicateOverTrailingFieldHandledCorrectlyMultikey) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + + const bool multikey = true; + addIndex(BSON("a" + << "2d" + << "b" + << 1), + multikey); + + runQuery(fromjson("{a: {$geoWithin: {$center: [[0, 0], 1]}}, b: {$exists: true}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {a: {$geoWithin: {$center: [[0, 0], 1]}}, b: {$exists: true}}, node: " + "{ixscan: {filter: null, pattern: {a: '2d', b: 1}}}}}"); +} + +TEST_F(QueryPlannerTest, 2dNearInexactFetchPredicateOverTrailingFieldHandledCorrectly) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + + addIndex(BSON("a" + << "2d" + << "b" + << 1)); + + runQuery(fromjson("{a: {$near: [0, 0]}, b: {$exists: true}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {b: {$exists: true}}, node: {geoNear2d: {a: '2d', b: 1}}}}"); +} + +TEST_F(QueryPlannerTest, 2dNearInexactFetchPredicateOverTrailingFieldMultikey) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + + const bool multikey = true; + addIndex(BSON("a" + << "2d" + << "b" + << 1), + multikey); + + runQuery(fromjson("{a: {$near: [0, 0]}, b: {$exists: true}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {b: {$exists: true}}, node: {geoNear2d: {a: '2d', b: 1}}}}"); +} + } // namespace diff --git a/src/mongo/db/query/query_planner_text_test.cpp b/src/mongo/db/query/query_planner_text_test.cpp index dad9250402b..1488ddd849c 100644 --- a/src/mongo/db/query/query_planner_text_test.cpp +++ b/src/mongo/db/query/query_planner_text_test.cpp @@ -487,4 +487,62 @@ TEST_F(QueryPlannerTest, PredicatesOverLeadingFieldsWithSharedPathPrefixHandledC "{text: {search: 'foo', prefix: {'a.x': 1, 'a.y': 2, 'b.x': 3, 'b.y': 4}}}"); } +TEST_F(QueryPlannerTest, EqualityToArrayOverLeadingFieldHandledCorrectly) { + addIndex(BSON("a" << 1 << "_fts" + << "text" + << "_ftsx" + << 1)); + + runQuery(fromjson("{a: [1, 2, 3], $text: {$search: 'foo'}}")); + + assertNumSolutions(1U); + assertSolutionExists("{text: {search: 'foo', prefix: {a: [1, 2, 3]}}}"); +} + +TEST_F(QueryPlannerTest, EqualityToArrayOverLeadingFieldHandledCorrectlyWithMultikeyTrue) { + const bool multikey = true; + addIndex(BSON("a" << 1 << "_fts" + << "text" + << "_ftsx" + << 1), + multikey); + + runQuery(fromjson("{a: [1, 2, 3], $text: {$search: 'foo'}}")); + + assertNumSolutions(1U); + assertSolutionExists("{text: {search: 'foo', prefix: {a: [1, 2, 3]}}}"); +} + +TEST_F(QueryPlannerTest, InexactFetchPredicateOverTrailingFieldHandledCorrectly) { + addIndex(BSON("a" << 1 << "_fts" + << "text" + << "_ftsx" + << 1 + << "b" + << 1)); + + runQuery(fromjson("{a: 3, $text: {$search: 'foo'}, b: {$exists: true}}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {b: {$exists: true}}, node: {text: {search: 'foo', prefix: {a: 3}}}}}"); +} + +TEST_F(QueryPlannerTest, InexactFetchPredicateOverTrailingFieldHandledCorrectlyMultikeyTrue) { + const bool multikey = true; + addIndex(BSON("a" << 1 << "_fts" + << "text" + << "_ftsx" + << 1 + << "b" + << 1), + multikey); + + runQuery(fromjson("{a: 3, $text: {$search: 'foo'}, b: {$exists: true}}")); + + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {b: {$exists: true}}, node: {text: {search: 'foo', prefix: {a: 3}}}}}"); +} + } // namespace diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 68484ed8d66..9d3735edc02 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -250,6 +250,13 @@ struct TextNode : public QuerySolutionNode { IndexEntry index; std::unique_ptr<fts::FTSQuery> ftsQuery; + // The number of fields in the prefix of the text index. For example, if the key pattern is + // + // { a: 1, b: 1, _fts: "text", _ftsx: 1, c: 1 } + // + // then the number of prefix fields is 2, because of "a" and "b". + size_t numPrefixFields = 0u; + // "Prefix" fields of a text index can handle equality predicates. We group them with the // text node while creating the text leaf node and convert them into a BSONObj index prefix // when we finish the text leaf node. |