diff options
author | Charlie Swanson <charlie.swanson@mongodb.com> | 2018-05-21 15:17:53 -0400 |
---|---|---|
committer | Charlie Swanson <charlie.swanson@mongodb.com> | 2018-06-20 16:31:13 -0400 |
commit | 1a18c8f8aec34b43553fe4d7961350d1a7a6ada4 (patch) | |
tree | 7af83bd3fb0599bf2fdfb2b992e11987fa31882e /src/mongo | |
parent | a2ee109e64923e0e569fa8adb0dbc67488a77983 (diff) | |
download | mongo-1a18c8f8aec34b43553fe4d7961350d1a7a6ada4.tar.gz |
SERVER-27646 Build index bounds for {$ne: null} predicates
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/query/SConscript | 60 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds.cpp | 34 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 59 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder_test.cpp | 288 | ||||
-rw-r--r-- | src/mongo/db/query/index_entry.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/query/index_entry.h | 11 | ||||
-rw-r--r-- | src/mongo/db/query/index_entry_test.cpp | 93 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.cpp | 261 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.h | 47 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect_test.cpp | 140 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_array_test.cpp | 207 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test.cpp | 121 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test_lib.cpp | 3 |
13 files changed, 1145 insertions, 196 deletions
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index 37f68acfc2d..a52027feee7 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -18,8 +18,6 @@ env.Library( target='query_planner', source=[ "canonical_query.cpp", - "query_settings.cpp", - "index_entry.cpp", "index_tag.cpp", "parsed_projection.cpp", "plan_cache.cpp", @@ -29,7 +27,14 @@ env.Library( "planner_analysis.cpp", "planner_ixselect.cpp", "query_planner.cpp", + "expression_index.cpp", + "expression_index_knobs.cpp", + "index_bounds.cpp", + "index_bounds_builder.cpp", + "index_entry.cpp", + "interval.cpp", "query_planner_common.cpp", + "query_settings.cpp", "query_solution.cpp", ], LIBDEPS=[ @@ -38,11 +43,11 @@ env.Library( "$BUILD_DIR/mongo/db/index/expression_params", "$BUILD_DIR/mongo/db/index_names", "$BUILD_DIR/mongo/db/matcher/expressions", + "$BUILD_DIR/mongo/db/mongohasher", "$BUILD_DIR/mongo/db/server_parameters", - "collation/collator_interface", "collation/collator_factory_interface", + "collation/collator_interface", "command_request_response", - "index_bounds", "query_knobs", ], ) @@ -125,26 +130,6 @@ env.CppUnitTest( ) env.Library( - target="index_bounds", - source=[ - "expression_index.cpp", - "expression_index_knobs.cpp", - "index_bounds.cpp", - "index_bounds_builder.cpp", - "interval.cpp", - ], - LIBDEPS=[ - "$BUILD_DIR/mongo/base", - "$BUILD_DIR/mongo/db/index/expression_params", - "$BUILD_DIR/mongo/db/index_names", - "$BUILD_DIR/mongo/db/matcher/expressions", - "$BUILD_DIR/mongo/db/mongohasher", - "$BUILD_DIR/mongo/db/server_parameters", - "collation/collator_interface", - ], -) - -env.Library( target='command_request_response', source=[ 'count_request.cpp', @@ -258,36 +243,19 @@ env.CppUnitTest( env.CppUnitTest( target="index_bounds_test", source=[ - "index_bounds_test.cpp" - ], - LIBDEPS=[ - "index_bounds", - ], -) - -env.CppUnitTest( - target="index_bounds_builder_test", - source=[ - "index_bounds_builder_test.cpp" + "index_bounds_builder_test.cpp", + "index_bounds_test.cpp", + "index_entry_test.cpp", + "interval_test.cpp" ], LIBDEPS=[ "collation/collator_interface_mock", - "index_bounds", + "query_planner", "query_test_service_context", ], ) env.CppUnitTest( - target="interval_test", - source=[ - "interval_test.cpp" - ], - LIBDEPS=[ - "index_bounds", - ], -) - -env.CppUnitTest( target="query_request_test", source=[ "query_request_test.cpp" diff --git a/src/mongo/db/query/index_bounds.cpp b/src/mongo/db/query/index_bounds.cpp index ef8eeb6d8b8..4f84057b670 100644 --- a/src/mongo/db/query/index_bounds.cpp +++ b/src/mongo/db/query/index_bounds.cpp @@ -192,25 +192,31 @@ void OrderedIntervalList::complement() { minBob.appendMinKey(""); BSONObj minObj = minBob.obj(); - // We complement by scanning the entire range of BSON values - // from MinKey to MaxKey. The value from which we must begin - // the next complemented interval is kept in 'curBoundary'. + // We complement by scanning the entire range of BSON values from MinKey to MaxKey. The value + // from which we must begin the next complemented interval is kept in 'curBoundary'. BSONElement curBoundary = minObj.firstElement(); - // If 'curInclusive' is true, then 'curBoundary' is - // included in one of the original intervals, and hence - // should not be included in the complement (and vice-versa - // if 'curInclusive' is false). + // If 'curInclusive' is true, then 'curBoundary' is included in one of the original intervals, + // and hence should not be included in the complement (and vice-versa if 'curInclusive' is + // false). bool curInclusive = false; - // We will build up a list of intervals that represents - // the inversion of those in the OIL. + // We will build up a list of intervals that represents the inversion of those in the OIL. vector<Interval> newIntervals; - for (size_t j = 0; j < intervals.size(); ++j) { - Interval curInt = intervals[j]; - if (0 != curInt.start.woCompare(curBoundary) || (!curInclusive && !curInt.startInclusive)) { - // Make a new interval from 'curBoundary' to - // the start of 'curInterval'. + for (const auto& curInt : intervals) { + + // There is one special case worth optimizing for: we will generate two point queries for an + // equality-to-null predicate like {a: {$eq: null}}. The points are undefined and null, so + // when complementing (for {a: {$ne: null}} or similar), we know that there is nothing in + // between these two points, and can avoid adding that range. + const bool isProvablyEmptyRange = + (curBoundary.type() == BSONType::Undefined && curInclusive && + curInt.start.type() == BSONType::jstNULL && curInt.startInclusive); + + if ((0 != curInt.start.woCompare(curBoundary) || + (!curInclusive && !curInt.startInclusive)) && + !isProvablyEmptyRange) { + // Make a new interval from 'curBoundary' to the start of 'curInterval'. BSONObjBuilder intBob; intBob.append(curBoundary); intBob.append(curInt.start); diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index fb9425b5fdb..b4f6623480e 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -45,6 +45,7 @@ #include "mongo/db/query/expression_index.h" #include "mongo/db/query/expression_index_knobs.h" #include "mongo/db/query/indexability.h" +#include "mongo/db/query/planner_ixselect.h" #include "mongo/db/query/query_knobs.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" @@ -90,6 +91,31 @@ bool stringMayHaveUnescapedPipe(StringData str) { return false; } +const BSONObj kUndefinedElementObj = BSON("" << BSONUndefined); +const BSONObj kNullElementObj = BSON("" << BSONNULL); + +const Interval kHashedUndefinedInterval = IndexBoundsBuilder::makePointInterval( + ExpressionMapping::hash(kUndefinedElementObj.firstElement())); +const Interval kHashedNullInterval = + IndexBoundsBuilder::makePointInterval(ExpressionMapping::hash(kNullElementObj.firstElement())); + +void makeNullEqualityBounds(const IndexEntry& index, + bool isHashed, + OrderedIntervalList* oil, + IndexBoundsBuilder::BoundsTightness* tightnessOut) { + // An equality to null predicate cannot be covered because the index does not distinguish + // between the lack of a value and the literal value null. + *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + + // There are two values that could possibly be equal to null in an index: undefined and null. + oil->intervals.push_back(isHashed + ? kHashedUndefinedInterval + : IndexBoundsBuilder::makePointInterval(kUndefinedElementObj)); + oil->intervals.push_back(isHashed ? kHashedNullInterval + : IndexBoundsBuilder::makePointInterval(kNullElementObj)); + // Just to be sure, make sure the bounds are in the right order if the hash values are opposite. + IndexBoundsBuilder::unionize(oil); +} } // namespace string IndexBoundsBuilder::simpleRegex(const char* regex, @@ -355,12 +381,23 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, translate(child, elt, index, oilOut, tightnessOut); oilOut->complement(); - // If the index is multikey, it doesn't matter what the tightness of the child is, we must - // return INEXACT_FETCH. Consider a multikey index on 'a' with document {a: [1, 2, 3]} and - // query {a: {$ne: 3}}. If we treated the bounds [MinKey, 3), (3, MaxKey] as exact, then we - // would erroneously return the document! - if (index.multikey) { - *tightnessOut = INEXACT_FETCH; + // Until the index distinguishes between missing values and literal null values, we cannot + // build exact bounds for equality predicates on the literal value null. However, we _can_ + // build exact bounds for the inverse, for example the query {a: {$ne: null}}. + if (MatchExpression::EQ == child->matchType() && + static_cast<ComparisonMatchExpression*>(child)->getData().type() == BSONType::jstNULL) { + // We don't expect to try to use a sparse index for $ne: null. While this should be + // correct, it is not currently supported. + invariant(!index.sparse); + *tightnessOut = IndexBoundsBuilder::EXACT; + } + + // If the index is multikey on this path, it doesn't matter what the tightness of the child + // is, we must return INEXACT_FETCH. Consider a multikey index on 'a' with document + // {a: [1, 2, 3]} and query {a: {$ne: 3}}. If we treated the bounds [MinKey, 3), (3, MaxKey] + // as exact, then we would erroneously return the document! + if (index.pathHasMultikeyComponent(elt.fieldNameStringData())) { + *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; } } else if (MatchExpression::EXISTS == expr->matchType()) { oilOut->intervals.push_back(allValues()); @@ -862,9 +899,15 @@ void IndexBoundsBuilder::translateEquality(const BSONElement& data, bool isHashed, OrderedIntervalList* oil, BoundsTightness* tightnessOut) { + if (BSONType::jstNULL == data.type()) { + // An equality to null query is special. It should return both undefined and null values, so + // is not a point query. + return makeNullEqualityBounds(index, isHashed, oil, tightnessOut); + } + // We have to copy the data out of the parse tree and stuff it into the index // bounds. BSONValue will be useful here. - if (Array != data.type()) { + if (BSONType::Array != data.type()) { BSONObj dataObj = objFromElement(data, index.collator); if (isHashed) { dataObj = ExpressionMapping::hash(dataObj.firstElement()); @@ -873,7 +916,7 @@ void IndexBoundsBuilder::translateEquality(const BSONElement& data, verify(dataObj.isOwned()); oil->intervals.push_back(makePointInterval(dataObj)); - if (dataObj.firstElement().isNull() || isHashed) { + if (isHashed) { *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; } else { *tightnessOut = IndexBoundsBuilder::EXACT; diff --git a/src/mongo/db/query/index_bounds_builder_test.cpp b/src/mongo/db/query/index_bounds_builder_test.cpp index e030c12afa3..4d84ee23a54 100644 --- a/src/mongo/db/query/index_bounds_builder_test.cpp +++ b/src/mongo/db/query/index_bounds_builder_test.cpp @@ -885,9 +885,12 @@ TEST(IndexBoundsBuilderTest, TranslateExprEqualToNullIsInexactFetch) { IndexBoundsBuilder::BoundsTightness tightness; IndexBoundsBuilder::translate(expr.get(), elt, testIndex, &oil, &tightness); ASSERT_EQUALS(oil.name, "a"); - ASSERT_EQUALS(oil.intervals.size(), 1U); + ASSERT_EQUALS(oil.intervals.size(), 2U); + ASSERT_EQUALS( + Interval::INTERVAL_EQUALS, + oil.intervals[0].compare(Interval(fromjson("{'': undefined, '': undefined}"), true, true))); ASSERT_EQUALS(Interval::INTERVAL_EQUALS, - oil.intervals[0].compare(Interval(fromjson("{'': null, '': null}"), true, true))); + oil.intervals[1].compare(Interval(fromjson("{'': null, '': null}"), true, true))); ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); } @@ -2014,6 +2017,287 @@ TEST(IndexBoundsBuilderTest, TranslateEqualityToNonStringWithMockCollator) { ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT); } +/** + * Asserts that 'oil' contains exactly two bounds: [[undefined, undefined], [null, null]]. + */ +void assertBoundsRepresentEqualsNull(const OrderedIntervalList& oil) { + ASSERT_EQUALS(oil.intervals.size(), 2U); + ASSERT_EQUALS( + Interval::INTERVAL_EQUALS, + oil.intervals[0].compare(Interval(fromjson("{'': undefined, '': undefined}"), true, true))); + ASSERT_EQUALS(Interval::INTERVAL_EQUALS, + oil.intervals[1].compare(Interval(fromjson("{'': null, '': null}"), true, true))); +} + +TEST(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildInexactBounds) { + BSONObj indexPattern = BSON("a" << 1); + IndexEntry testIndex(indexPattern); + + BSONObj obj = BSON("a" << BSONNULL); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + ASSERT_EQUALS(oil.name, "a"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + assertBoundsRepresentEqualsNull(oil); +} + +TEST(IndexBoundsBuilderTest, TranslateDottedEqualsToNullShouldBuildInexactBounds) { + BSONObj indexPattern = BSON("a.b" << 1); + IndexEntry testIndex(indexPattern); + + BSONObj obj = BSON("a.b" << BSONNULL); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + ASSERT_EQUALS(oil.name, "a.b"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + assertBoundsRepresentEqualsNull(oil); +} + +TEST(IndexBoundsBuilderTest, TranslateEqualsToNullMultiKeyShouldBuildInexactBounds) { + BSONObj indexPattern = BSON("a" << 1); + IndexEntry testIndex(indexPattern); + testIndex.multikey = true; + + BSONObj obj = BSON("a" << BSONNULL); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + ASSERT_EQUALS(oil.name, "a"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + assertBoundsRepresentEqualsNull(oil); +} + +TEST(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildTwoIntervalsForHashedIndex) { + BSONObj indexPattern = BSON("a" + << "hashed"); + IndexEntry testIndex(indexPattern); + testIndex.type = IndexType::INDEX_HASHED; + + BSONObj obj = BSON("a" << BSONNULL); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + ASSERT_EQUALS(oil.name, "a"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + // We should have one for undefined, and one for null. + ASSERT_EQUALS(oil.intervals.size(), 2U); + { + const BSONObj undefinedElementObj = BSON("" << BSONUndefined); + const BSONObj hashedUndefinedInterval = + ExpressionMapping::hash(undefinedElementObj.firstElement()); + ASSERT_EQ(hashedUndefinedInterval.firstElement().type(), BSONType::NumberLong); + + const auto& firstInterval = oil.intervals[0]; + ASSERT_TRUE(firstInterval.startInclusive); + ASSERT_TRUE(firstInterval.endInclusive); + ASSERT_EQ(firstInterval.start.type(), BSONType::NumberLong); + ASSERT_EQ(firstInterval.start.numberLong(), + hashedUndefinedInterval.firstElement().numberLong()); + } + + { + const BSONObj nullElementObj = BSON("" << BSONNULL); + const BSONObj hashedNullInterval = ExpressionMapping::hash(nullElementObj.firstElement()); + ASSERT_EQ(hashedNullInterval.firstElement().type(), BSONType::NumberLong); + + const auto& secondInterval = oil.intervals[1]; + ASSERT_TRUE(secondInterval.startInclusive); + ASSERT_TRUE(secondInterval.endInclusive); + ASSERT_EQ(secondInterval.start.type(), BSONType::NumberLong); + ASSERT_EQ(secondInterval.start.numberLong(), + hashedNullInterval.firstElement().numberLong()); + } +} + +/** + * Asserts that 'oil' contains exactly two bounds: [MinKey, undefined) and (null, MaxKey]. + */ +void assertBoundsRepresentNotEqualsNull(const OrderedIntervalList& oil) { + ASSERT_EQUALS(oil.intervals.size(), 2U); + { + BSONObjBuilder bob; + bob.appendMinKey(""); + bob.appendUndefined(""); + ASSERT_EQUALS(Interval::INTERVAL_EQUALS, + oil.intervals[0].compare(Interval(bob.obj(), true, false))); + } + + { + BSONObjBuilder bob; + bob.appendNull(""); + bob.appendMaxKey(""); + ASSERT_EQUALS(Interval::INTERVAL_EQUALS, + oil.intervals[1].compare(Interval(bob.obj(), false, true))); + } +} + +TEST(IndexBoundsBuilderTest, TranslateNotEqualToNullShouldBuildExactBoundsIfIndexIsNotMultiKey) { + BSONObj indexPattern = BSON("a" << 1); + IndexEntry testIndex(indexPattern); + + BSONObj obj = BSON("a" << BSON("$ne" << BSONNULL)); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + // Bounds should be [MinKey, undefined), (null, MaxKey]. + ASSERT_EQUALS(oil.name, "a"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT); + assertBoundsRepresentNotEqualsNull(oil); +} + +TEST(IndexBoundsBuilderTest, + TranslateNotEqualToNullShouldBuildExactBoundsIfIndexIsNotMultiKeyOnRelevantPath) { + BSONObj indexPattern = BSON("a" << 1 << "b" << 1); + IndexEntry testIndex(indexPattern); + testIndex.multikeyPaths = {{}, {0}}; // "a" is not multi-key, but "b" is. + + BSONObj obj = BSON("a" << BSON("$ne" << BSONNULL)); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + // Bounds should be [MinKey, undefined), (null, MaxKey]. + ASSERT_EQUALS(oil.name, "a"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT); + assertBoundsRepresentNotEqualsNull(oil); +} + +TEST(IndexBoundsBuilderTest, TranslateNotEqualToNullShouldBuildExactBoundsOnReverseIndex) { + BSONObj indexPattern = BSON("a" << -1); + IndexEntry testIndex(indexPattern); + + BSONObj obj = BSON("a" << BSON("$ne" << BSONNULL)); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + // Bounds should be [MinKey, undefined), (null, MaxKey]. + ASSERT_EQUALS(oil.name, "a"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT); + assertBoundsRepresentNotEqualsNull(oil); +} + +TEST(IndexBoundsBuilderTest, TranslateNotEqualToNullShouldBuildInexactBoundsIfIndexIsMultiKey) { + BSONObj indexPattern = BSON("a" << 1); + IndexEntry testIndex(indexPattern); + testIndex.multikey = true; + + BSONObj matchObj = BSON("a" << BSON("$ne" << BSONNULL)); + unique_ptr<MatchExpression> expr(parseMatchExpression(matchObj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + ASSERT_EQUALS(oil.name, "a"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + assertBoundsRepresentNotEqualsNull(oil); +} + +TEST(IndexBoundsBuilderTest, + TranslateDottedElemMatchValueNotEqualToNullShouldBuildExactBoundsIfIsMultiKeyOnThatPath) { + BSONObj indexPattern = BSON("a.b" << 1); + IndexEntry testIndex(indexPattern); + testIndex.multikeyPaths = {{1}}; // "a.b" is multikey. + + BSONObj matchObj = BSON("a.b" << BSON("$elemMatch" << BSON("$ne" << BSONNULL))); + unique_ptr<MatchExpression> expr(parseMatchExpression(matchObj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + ASSERT_EQUALS(oil.name, "a.b"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + assertBoundsRepresentNotEqualsNull(oil); +} + +TEST(IndexBoundsBuilderTest, + TranslateDottedFieldNotEqualToNullShouldBuildInexactBoundsIfIndexIsMultiKey) { + BSONObj indexPattern = BSON("a.b" << 1); + IndexEntry testIndex(indexPattern); + testIndex.multikey = true; + + BSONObj matchObj = BSON("a.b" << BSON("$ne" << BSONNULL)); + unique_ptr<MatchExpression> expr(parseMatchExpression(matchObj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + ASSERT_EQUALS(oil.name, "a.b"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + assertBoundsRepresentNotEqualsNull(oil); +} + +TEST(IndexBoundsBuilderTest, + TranslateElemMatchValueNotEqualToNullShouldBuildInexactBoundsIfIndexIsMultiKey) { + BSONObj indexPattern = BSON("a" << 1); + IndexEntry testIndex(indexPattern); + testIndex.multikey = true; + + BSONObj obj = BSON("a" << BSON("$elemMatch" << BSON("$ne" << BSONNULL))); + unique_ptr<MatchExpression> expr(parseMatchExpression(obj)); + BSONElement elt = obj.firstElement(); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate(expr.get(), elt, testIndex, &oil, &tightness); + + ASSERT_EQUALS(oil.name, "a"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + assertBoundsRepresentNotEqualsNull(oil); +} + +TEST(IndexBoundsBuilderTest, + TranslateElemMatchValueNotEqualToNullShouldBuildInExactBoundsIfIndexIsNotMultiKey) { + BSONObj indexPattern = BSON("a" << 1); + IndexEntry testIndex(indexPattern); + + BSONObj matchObj = BSON("a" << BSON("$elemMatch" << BSON("$ne" << BSONNULL))); + unique_ptr<MatchExpression> expr(parseMatchExpression(matchObj)); + + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate( + expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness); + + ASSERT_EQUALS(oil.name, "a"); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + assertBoundsRepresentNotEqualsNull(oil); +} + TEST(IndexBoundsBuilderTest, TranslateNotEqualToStringWithMockCollator) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); IndexEntry testIndex = IndexEntry(BSONObj()); diff --git a/src/mongo/db/query/index_entry.cpp b/src/mongo/db/query/index_entry.cpp index 13153465387..49898846d45 100644 --- a/src/mongo/db/query/index_entry.cpp +++ b/src/mongo/db/query/index_entry.cpp @@ -63,4 +63,21 @@ std::string IndexEntry::toString() const { return sb.str(); } +bool IndexEntry::pathHasMultikeyComponent(StringData indexedField) const { + if (multikeyPaths.empty()) { + // The index has no path-level multikeyness metadata. + return multikey; + } + + size_t pos = 0; + for (auto&& key : keyPattern) { + if (key.fieldNameStringData() == indexedField) { + return !multikeyPaths[pos].empty(); + } + ++pos; + } + + MONGO_UNREACHABLE; +} + } // namespace mongo diff --git a/src/mongo/db/query/index_entry.h b/src/mongo/db/query/index_entry.h index 42e4d2281d2..28ac79dfcd1 100644 --- a/src/mongo/db/query/index_entry.h +++ b/src/mongo/db/query/index_entry.h @@ -103,6 +103,17 @@ struct IndexEntry { type = IndexNames::nameToType(IndexNames::findPluginName(keyPattern)); } + /** + * Returns true if 'indexedField' has any multikey components. For example, returns true if this + * index has a multikey component "a", and 'indexedField' is "a.b". Illegal to call unless + * 'indexedField' is present in this index's key pattern. + * + * For indexes created on older versions we may not have path-level multikey information. In + * these cases we only have a single boolean to track whether any path in the index is multikey. + * If this is the case we defensively return true for any path. + */ + bool pathHasMultikeyComponent(StringData indexedField) const; + bool operator==(const IndexEntry& rhs) const { // Indexes are logically equal when names are equal. return this->name == rhs.name; diff --git a/src/mongo/db/query/index_entry_test.cpp b/src/mongo/db/query/index_entry_test.cpp new file mode 100644 index 00000000000..0c56b1d3524 --- /dev/null +++ b/src/mongo/db/query/index_entry_test.cpp @@ -0,0 +1,93 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/index_entry.h" + +#include "mongo/unittest/death_test.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { + +namespace { + +IndexEntry makeIndexEntry(BSONObj keyPattern, MultikeyPaths multiKeyPaths) { + IndexEntry entry{std::move(keyPattern)}; + entry.multikeyPaths = std::move(multiKeyPaths); + entry.multikey = std::any_of(entry.multikeyPaths.cbegin(), + entry.multikeyPaths.cend(), + [](const auto& entry) { return !entry.empty(); }); + return entry; +} + +TEST(QueryPlannerIXSelectTest, IndexedFieldHasMultikeyComponents) { + auto indexEntry = makeIndexEntry(BSON("a" << 1 << "b.c" << 1), {{}, {}}); + ASSERT_FALSE(indexEntry.pathHasMultikeyComponent("a"_sd)); + ASSERT_FALSE(indexEntry.pathHasMultikeyComponent("b.c"_sd)); + + indexEntry = makeIndexEntry(BSON("a" << 1 << "b" << 1), {{}, {0U}}); + ASSERT_FALSE(indexEntry.pathHasMultikeyComponent("a"_sd)); + ASSERT_TRUE(indexEntry.pathHasMultikeyComponent("b"_sd)); + + indexEntry = makeIndexEntry(BSON("a" << 1 << "b" << 1 << "c.d" << 1), {{}, {}, {1U}}); + ASSERT_FALSE(indexEntry.pathHasMultikeyComponent("a"_sd)); + ASSERT_FALSE(indexEntry.pathHasMultikeyComponent("b"_sd)); + ASSERT_TRUE(indexEntry.pathHasMultikeyComponent("c.d"_sd)); + + indexEntry = makeIndexEntry(BSON("a.b" << 1 << "a.c" << 1), {{}, {1U}}); + ASSERT_FALSE(indexEntry.pathHasMultikeyComponent("a.b"_sd)); + ASSERT_TRUE(indexEntry.pathHasMultikeyComponent("a.c"_sd)); + + indexEntry = makeIndexEntry(BSON("a.b" << 1 << "a.c" << 1), {{0U, 1U}, {0U}}); + ASSERT_TRUE(indexEntry.pathHasMultikeyComponent("a.b"_sd)); + ASSERT_TRUE(indexEntry.pathHasMultikeyComponent("a.c"_sd)); + + indexEntry = makeIndexEntry(BSON("a" << 1 << "b" << 1), {{0U}, {}}); + ASSERT_TRUE(indexEntry.pathHasMultikeyComponent("a"_sd)); + ASSERT_FALSE(indexEntry.pathHasMultikeyComponent("b"_sd)); + + indexEntry = makeIndexEntry(BSON("a.b.c" << 1 << "d" << 1), {{1U, 2U}, {}}); + ASSERT_TRUE(indexEntry.pathHasMultikeyComponent("a.b.c"_sd)); + ASSERT_FALSE(indexEntry.pathHasMultikeyComponent("d"_sd)); + + indexEntry = makeIndexEntry(BSON("a.b" << 1 << "c" << 1 << "d" << 1), {{1U}, {}, {0U}}); + ASSERT_TRUE(indexEntry.pathHasMultikeyComponent("a.b"_sd)); + ASSERT_FALSE(indexEntry.pathHasMultikeyComponent("c"_sd)); + ASSERT_TRUE(indexEntry.pathHasMultikeyComponent("d"_sd)); +} + +DEATH_TEST(QueryPlannerIXSelectTest, + IndexedFieldHasMultikeyComponentsPassingInvalidFieldIsFatal, + "Invariant failure Hit a MONGO_UNREACHABLE!") { + auto indexEntry = makeIndexEntry(BSON("a" << 1), {{}}); + indexEntry.pathHasMultikeyComponent("b"_sd); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index 5f92f12b88c..dfabd73f04d 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -37,7 +37,6 @@ #include "mongo/db/index/s2_common.h" #include "mongo/db/index_names.h" #include "mongo/db/matcher/expression_algo.h" -#include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_internal_expr_eq.h" #include "mongo/db/matcher/expression_text.h" @@ -51,26 +50,87 @@ namespace mongo { namespace { -/** - * Checks whether the given index is compatible with each child of the given $elemMatch expression. - * Assumes that the match expression is of type ELEM_MATCH_VALUE. - */ -bool elemMatchValueCompatible(const BSONElement& elt, - const IndexEntry& index, - MatchExpression* elemMatch, - const CollatorInterface* collator) { - invariant(elemMatch->matchType() == MatchExpression::ELEM_MATCH_VALUE); - for (size_t child = 0; child < elemMatch->numChildren(); ++child) { - if (!QueryPlannerIXSelect::compatible( - elt, index, elemMatch->getChild(child), collator, true)) { +std::size_t numPathComponents(StringData path) { + return FieldRef{path}.numParts(); +} + +} // namespace + +bool QueryPlannerIXSelect::notEqualsNullCanUseIndex(const IndexEntry& index, + const BSONElement& keyPatternElt, + std::size_t keyPatternIndex, + const ElemMatchContext& elemMatchContext) { + // It is safe to use a non-multikey index for not equals null queries. + if (!index.multikey && index.multikeyPaths.empty()) { + // This is an old index without multikey path metadata. + return true; + } + if (!index.multikeyPaths.empty() && index.multikeyPaths[keyPatternIndex].empty()) { + // This part of the index has no multikey components, it is always safe to use the index. + return true; + } + + // At least one component is multikey. In most circumstances, we can't index the negation of EQ + // with a value of null if the index is multikey on one of the components of the path. + // + // This is quite subtle, and due to the semantics of null matching. For example, if the query is + // {a: {$ne: null}}, you might expect us to build index bounds of [MinKey, undefined) and + // (null, MaxKey] (or similar) on an 'a' index. However, with this query the document {a: []} + // should match (because it does not match {a: null}), but will have an index key of undefined. + // Similarly, the document {a: [null, null]} matches the query {'a.b': {$ne: null}}, but would + // have an index key of null in an index on 'a.b'. Since it's possible for a key of undefined to + // be included in the results and also possible for a value of null to be included, there are no + // restrictions on the bounds of the index for such a predicate. Further, such an index could + // not be used for covering, so would not provide any help to the query. + // + // There are two exceptions to this rule, both having to do with $elemMatch, see below. + auto* parentElemMatch = elemMatchContext.innermostParentElemMatch; + if (!parentElemMatch) { + // See above, if there's no $elemMatch we can't use the index. + return false; + } + + if (MatchExpression::ELEM_MATCH_VALUE == parentElemMatch->matchType()) { + // If this $ne clause is within a $elemMatch *value*, the semantics of $elemMatch guarantee + // that no matching values will be null or undefined, even if the index is multikey. + // + // For example, the document {a: []} does *not* match the query {a: {$elemMatch: {$ne: + // null}} because there was no element within the array that matched. While the document {a: + // [[]]} *does* match that query, the index entry for that document would be [], not null or + // undefined. + return true; + } else { + invariant(MatchExpression::ELEM_MATCH_OBJECT == parentElemMatch->matchType()); + if (index.multikeyPaths.empty()) { + // The index has no path-level multikey metadata, we can't do the analysis below so have + // to be defensive. return false; } + + // This $ne clause is within an $elemMatch *object*. We can safely use the index so long as + // there are no multikey paths below the $elemMatch. + // + // For example, take the query {"a.b": {$elemMatch: {"c.d": {$ne: null}}}}. We can use an + // "a.b.c.d" index if _only_ "a" and/or "a.b" is/are multikey, because there will be no + // array traversal for the "c.d" part of the query. If "a.b.c" or "a.b.c.d" are multikey, we + // cannot use this index. As an example of what would go wrong, suppose the collection + // contained the document {a: [{b: []}]}. The "a.b.c.d" index key for that document would be + // undefined, and so would be excluded from the index bounds. However, that document should + // match the query. + const std::size_t firstComponentAfterElemMatch = + numPathComponents(elemMatchContext.fullPathToParentElemMatch); + for (auto&& multikeyComponent : index.multikeyPaths[keyPatternIndex]) { + if (multikeyComponent >= firstComponentAfterElemMatch) { + return false; + } + } + + // The index was multikey, but only on paths that came before the $elemMatch, so we can + // safely use the index without having to worry about implicitly traversing arrays. + return true; } - return true; } -} // namespace - static double fieldWithDefault(const BSONObj& infoObj, const string& name, double def) { BSONElement e = infoObj[name]; if (e.isNumber()) { @@ -152,11 +212,10 @@ void QueryPlannerIXSelect::getFields(const MatchExpression* node, out->insert(prefix + node->path().toString()); } else if (Indexability::arrayUsesIndexOnChildren(node)) { // If the array uses an index on its children, it's something like - // {foo : {$elemMatch: { bar: 1}}}, in which case the predicate is really over - // foo.bar. + // {foo : {$elemMatch: {bar: 1}}}, in which case the predicate is really over foo.bar. // - // When we have {foo: {$all: [{$elemMatch: {a:1}}], the path of the embedded elemMatch - // is empty. We don't want to append a dot in that case as the field would be foo..a. + // When we have {foo: {$all: [{$elemMatch: {a: 1}}], the path of the embedded elemMatch + // is empty. We don't want to append a dot in that case as the field would be foo..a. if (!node->path().empty()) { prefix += node->path().toString() + "."; } @@ -186,11 +245,25 @@ void QueryPlannerIXSelect::findRelevantIndices(const stdx::unordered_set<string> } // static -bool QueryPlannerIXSelect::compatible(const BSONElement& elt, +// This is the public method which does not accept an ElemMatchContext. +bool QueryPlannerIXSelect::compatible(const BSONElement& keyPatternElt, const IndexEntry& index, + std::size_t keyPatternIdx, MatchExpression* node, - const CollatorInterface* collator, - bool elemMatchChild) { + StringData fullPathToNode, + const CollatorInterface* collator) { + return _compatible( + keyPatternElt, index, keyPatternIdx, node, fullPathToNode, collator, ElemMatchContext{}); +} + +// static +bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, + const IndexEntry& index, + std::size_t keyPatternIdx, + MatchExpression* node, + StringData fullPathToNode, + const CollatorInterface* collator, + const ElemMatchContext& elemMatchContext) { if ((boundsGeneratingNodeContainsComparisonToType(node, BSONType::String) || boundsGeneratingNodeContainsComparisonToType(node, BSONType::Array) || boundsGeneratingNodeContainsComparisonToType(node, BSONType::Object)) && @@ -207,17 +280,20 @@ bool QueryPlannerIXSelect::compatible(const BSONElement& elt, // be treated as a btree index by an ancient version of MongoDB. To try to run // 2dsphere queries over it would be folly. string indexedFieldType; - if (String != elt.type() || (INDEX_BTREE == index.type)) { + if (String != keyPatternElt.type() || (INDEX_BTREE == index.type)) { indexedFieldType = ""; } else { - indexedFieldType = elt.String(); + indexedFieldType = keyPatternElt.String(); } - // We know elt.fieldname() == node->path(). + const bool isChildOfElemMatchValue = elemMatchContext.innermostParentElemMatch && + elemMatchContext.innermostParentElemMatch->matchType() == MatchExpression::ELEM_MATCH_VALUE; + + // We know keyPatternElt.fieldname() == node->path(). MatchExpression::MatchType exprtype = node->matchType(); if (exprtype == MatchExpression::INTERNAL_EXPR_EQ && - indexedFieldHasMultikeyComponents(elt.fieldNameStringData(), index)) { + index.pathHasMultikeyComponent(keyPatternElt.fieldNameStringData())) { // Expression language equality cannot be indexed if the field path has multikey components. return false; } @@ -229,7 +305,7 @@ bool QueryPlannerIXSelect::compatible(const BSONElement& elt, // We can use a sparse index for $_internalExprEq with a null element. Expression language // equality-to-null semantics are that only literal nulls match. Sparse indexes contain // index keys for literal nulls, but not for missing elements. - if (exprtype == MatchExpression::EQ && index.sparse && !elemMatchChild) { + if (exprtype == MatchExpression::EQ && index.sparse && !isChildOfElemMatchValue) { const EqualityMatchExpression* expr = static_cast<const EqualityMatchExpression*>(node); if (expr->getData().isNull()) { return false; @@ -238,7 +314,7 @@ bool QueryPlannerIXSelect::compatible(const BSONElement& elt, // Can't use a sparse index for $in with a null element, unless the $eq is within a // $elemMatch expression since the latter implies a match on the literal element 'null'. - if (exprtype == MatchExpression::MATCH_IN && index.sparse && !elemMatchChild) { + if (exprtype == MatchExpression::MATCH_IN && index.sparse && !isChildOfElemMatchValue) { const InMatchExpression* expr = static_cast<const InMatchExpression*>(node); if (expr->hasNull()) { return false; @@ -250,14 +326,13 @@ bool QueryPlannerIXSelect::compatible(const BSONElement& elt, return false; } - // There are restrictions on when we can use the index if - // the expression is a NOT. + // There are restrictions on when we can use the index if the expression is a NOT. if (exprtype == MatchExpression::NOT) { // Don't allow indexed NOT on special index types such as geo or text indices. // TODO: SERVER-30994 should remove this check entirely and allow $not on the // 'non-special' fields of non-btree indices. // (e.g. {a: 1, geo: "2dsphere"}) - if (INDEX_BTREE != index.type && !elemMatchChild) { + if (INDEX_BTREE != index.type && !isChildOfElemMatchValue) { return false; } @@ -268,13 +343,25 @@ bool QueryPlannerIXSelect::compatible(const BSONElement& elt, } // Can't index negations of MOD, REGEX, TYPE_OPERATOR, or ELEM_MATCH_VALUE. - MatchExpression::MatchType childtype = node->getChild(0)->matchType(); + auto* child = node->getChild(0); + MatchExpression::MatchType childtype = child->matchType(); if (MatchExpression::REGEX == childtype || MatchExpression::MOD == childtype || MatchExpression::TYPE_OPERATOR == childtype || MatchExpression::ELEM_MATCH_VALUE == childtype) { return false; } + // Most of the time we can't use a multikey index for a $ne: null query, however there + // are a few exceptions around $elemMatch. + const bool isNotEqualsNull = + (childtype == MatchExpression::EQ && + static_cast<ComparisonMatchExpression*>(child)->getData().type() == + BSONType::jstNULL); + if (isNotEqualsNull && + !notEqualsNullCanUseIndex(index, keyPatternElt, keyPatternIdx, elemMatchContext)) { + return false; + } + // If it's a negated $in, it can't have any REGEX's inside. if (MatchExpression::MATCH_IN == childtype) { InMatchExpression* ime = static_cast<InMatchExpression*>(node->getChild(0)); @@ -284,6 +371,22 @@ bool QueryPlannerIXSelect::compatible(const BSONElement& elt, } } + // If this is an $elemMatch value, make sure _all_ of the children can use the index. + if (node->matchType() == MatchExpression::ELEM_MATCH_VALUE) { + ElemMatchContext newContext; + newContext.fullPathToParentElemMatch = fullPathToNode; + newContext.innermostParentElemMatch = static_cast<ElemMatchValueMatchExpression*>(node); + + auto* children = node->getChildVector(); + if (!std::all_of(children->begin(), children->end(), [&](MatchExpression* child) { + const auto newPath = fullPathToNode.toString() + child->path(); + return _compatible( + keyPatternElt, index, keyPatternIdx, child, newPath, collator, newContext); + })) { + return false; + } + } + // We can only index EQ using text indices. This is an artificial limitation imposed by // FTSSpec::getIndexPrefix() which will fail if there is not an EQ predicate on each // index prefix field of the text index. @@ -304,9 +407,7 @@ bool QueryPlannerIXSelect::compatible(const BSONElement& elt, // Not-equalities can only go in a suffix field of an index kp. We look through the key // pattern to see if the field we're looking at now appears as a prefix. If so, we // can't use this index for it. - BSONObjIterator specIt(index.keyPattern); - while (specIt.more()) { - BSONElement elt = specIt.next(); + for (auto&& elt : index.keyPattern) { // We hit the dividing mark between prefix and suffix, so whatever field we're // looking at is a suffix, since it appears *after* the dividing mark between the // two. As such, we can use the index. @@ -321,10 +422,9 @@ bool QueryPlannerIXSelect::compatible(const BSONElement& elt, } } - // NOTE: This shouldn't be reached. Text index implies there is a separator implies we - // will always hit the 'return true' above. + // Text index implies there is a separator implies we will always hit the 'return true' + // above. MONGO_UNREACHABLE; - return true; } else if (IndexNames::HASHED == indexedFieldType) { if (ComparisonMatchExpressionBase::isEquality(exprtype)) { return true; @@ -387,24 +487,33 @@ bool QueryPlannerIXSelect::compatible(const BSONElement& elt, return false; } else { warning() << "Unknown indexing for node " << node->toString() << " and field " - << elt.toString(); + << keyPatternElt.toString(); verify(0); } } // static +// This is the public method which does not accept an ElemMatchContext. void QueryPlannerIXSelect::rateIndices(MatchExpression* node, string prefix, const vector<IndexEntry>& indices, const CollatorInterface* collator) { + return _rateIndices(node, prefix, indices, collator, ElemMatchContext{}); +} + +// static +void QueryPlannerIXSelect::_rateIndices(MatchExpression* node, + string prefix, + const vector<IndexEntry>& indices, + const CollatorInterface* collator, + const ElemMatchContext& elemMatchCtx) { // Do not traverse tree beyond logical NOR node MatchExpression::MatchType exprtype = node->matchType(); if (exprtype == MatchExpression::NOR) { return; } - // Every indexable node is tagged even when no compatible index is - // available. + // Every indexable node is tagged even when no compatible index is available. if (Indexability::isBoundsGenerating(node)) { string fullPath; if (MatchExpression::NOT == node->matchType()) { @@ -418,46 +527,56 @@ void QueryPlannerIXSelect::rateIndices(MatchExpression* node, auto rt = static_cast<RelevantTag*>(node->getTag()); rt->path = fullPath; - // TODO: This is slow, with all the string compares. for (size_t i = 0; i < indices.size(); ++i) { - BSONObjIterator it(indices[i].keyPattern); - BSONElement elt = it.next(); - if (elt.fieldName() == fullPath && compatible(elt, indices[i], node, collator)) { - if (node->matchType() != MatchExpression::ELEM_MATCH_VALUE || - elemMatchValueCompatible(elt, indices[i], node, collator)) { - rt->first.push_back(i); - } - } - while (it.more()) { - elt = it.next(); - if (elt.fieldName() == fullPath && - compatible(elt, indices[i], node, collator, false)) { - if (node->matchType() != MatchExpression::ELEM_MATCH_VALUE || - elemMatchValueCompatible(elt, indices[i], node, collator)) { + const IndexEntry& index = indices[i]; + std::size_t keyPatternIndex = 0; + for (auto&& keyPatternElt : index.keyPattern) { + if (keyPatternElt.fieldNameStringData() == fullPath && _compatible(keyPatternElt, + index, + keyPatternIndex, + node, + fullPath, + collator, + elemMatchCtx)) { + if (keyPatternIndex == 0) { + rt->first.push_back(i); + } else { rt->notFirst.push_back(i); } } + ++keyPatternIndex; } } - // If this is a NOT, we have to clone the tag and attach - // it to the NOT's child. + // If this is a NOT, we have to clone the tag and attach it to the NOT's child. if (MatchExpression::NOT == node->matchType()) { RelevantTag* childRt = static_cast<RelevantTag*>(rt->clone()); childRt->path = rt->path; node->getChild(0)->setTag(childRt); } } else if (Indexability::arrayUsesIndexOnChildren(node)) { - // See comment in getFields about all/elemMatch and paths. + const auto newPath = prefix + node->path().toString(); + ElemMatchContext newContext; + // Note this StringData is unowned and references the string declared on the stack here. + // This should be fine since we are only ever reading from this in recursive calls as + // context to help make planning decisions. + newContext.fullPathToParentElemMatch = newPath; + newContext.innermostParentElemMatch = static_cast<ElemMatchObjectMatchExpression*>(node); + + // If the array uses an index on its children, it's something like + // {foo: {$elemMatch: {bar: 1}}}, in which case the predicate is really over foo.bar. + // + // When we have {foo: {$all: [{$elemMatch: {a: 1}}], the path of the embedded elemMatch + // is empty. We don't want to append a dot in that case as the field would be foo..a. if (!node->path().empty()) { prefix += node->path().toString() + "."; } for (size_t i = 0; i < node->numChildren(); ++i) { - rateIndices(node->getChild(i), prefix, indices, collator); + _rateIndices(node->getChild(i), prefix, indices, collator, newContext); } } else if (node->getCategory() == MatchExpression::MatchCategory::kLogical) { for (size_t i = 0; i < node->numChildren(); ++i) { - rateIndices(node->getChild(i), prefix, indices, collator); + _rateIndices(node->getChild(i), prefix, indices, collator, elemMatchCtx); } } } @@ -475,24 +594,6 @@ void QueryPlannerIXSelect::stripInvalidAssignments(MatchExpression* node, stripInvalidAssignmentsToPartialIndices(node, indices); } -bool QueryPlannerIXSelect::indexedFieldHasMultikeyComponents(StringData indexedField, - const IndexEntry& index) { - if (index.multikeyPaths.empty()) { - // The index has no path-level multikeyness metadata. - return index.multikey; - } - - size_t pos = 0; - for (auto&& key : index.keyPattern) { - if (key.fieldNameStringData() == indexedField) { - return !index.multikeyPaths[pos].empty(); - } - ++pos; - } - - MONGO_UNREACHABLE; -} - namespace { /** diff --git a/src/mongo/db/query/planner_ixselect.h b/src/mongo/db/query/planner_ixselect.h index 1a92dd33d6c..8e2f526b3c1 100644 --- a/src/mongo/db/query/planner_ixselect.h +++ b/src/mongo/db/query/planner_ixselect.h @@ -28,6 +28,7 @@ #pragma once +#include "mongo/db/matcher/expression_array.h" #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/index_entry.h" #include "mongo/db/query/query_solution.h" @@ -62,18 +63,21 @@ public: std::vector<IndexEntry>* out); /** - * Return true if the index key pattern field 'elt' (which belongs to 'index') can be used - * to answer the predicate 'node'. + * Return true if the index key pattern field 'keyPatternElt' (which belongs to 'index' and is + * at position 'keyPatternIndex' in the index's keyPattern) can be used to answer the predicate + * 'node'. When 'node' is a sub-tree of a larger MatchExpression, 'fullPathToNode' is the path + * traversed to get to this node, otherwise it is empty. * * For example, {field: "hashed"} can only be used with sets of equalities. * {field: "2d"} can only be used with some geo predicates. * {field: "2dsphere"} can only be used with some other geo predicates. */ - static bool compatible(const BSONElement& elt, + static bool compatible(const BSONElement& keyPatternElt, const IndexEntry& index, + std::size_t keyPatternIndex, MatchExpression* node, - const CollatorInterface* collator, - bool elemMatchChild = false); + StringData fullPathToNode, + const CollatorInterface* collator); /** * Determine how useful all of our relevant 'indices' are to all predicates in the subtree @@ -129,13 +133,33 @@ public: static void stripUnneededAssignments(MatchExpression* node, const std::vector<IndexEntry>& indices); +private: /** - * Returns true if the indexed field has any multikey components. Illegal to call unless - * 'indexedField' is present in the key pattern for 'index'. + * Used to keep track of if any $elemMatch predicates were encountered when walking a + * MatchExpression tree. The presence of an outer $elemMatch can impact whether an index is + * applicable for an inner MatchExpression. For example, the NOT expression in + * {a: {$elemMatch: {b: {$ne: null}}} can only use an "a.b" index if that path is not multikey + * on "a.b". Because of the $elemMatch, it's okay to use the "a.b" index if the path is multikey + * on "a". */ - static bool indexedFieldHasMultikeyComponents(StringData indexedField, const IndexEntry& index); + struct ElemMatchContext { + ArrayMatchingMatchExpression* innermostParentElemMatch{nullptr}; + StringData fullPathToParentElemMatch{""_sd}; + }; -private: + static bool _compatible(const BSONElement& keyPatternElt, + const IndexEntry& index, + std::size_t keyPatternIndex, + MatchExpression* node, + StringData fullPathToNode, + const CollatorInterface* collator, + const ElemMatchContext& elemMatchContext); + + static void _rateIndices(MatchExpression* node, + std::string prefix, + const std::vector<IndexEntry>& indices, + const CollatorInterface* collator, + const ElemMatchContext& elemMatchContext); /** * Amend the RelevantTag lists for all predicates in the subtree rooted at 'node' to remove * invalid assignments to text indexes. @@ -222,6 +246,11 @@ private: */ static void stripInvalidAssignmentsToPartialIndices(MatchExpression* node, const std::vector<IndexEntry>& indices); + + static bool notEqualsNullCanUseIndex(const IndexEntry& index, + const BSONElement& keyPatternElt, + std::size_t keyPatternIndex, + const ElemMatchContext& elemMatchContext); }; } // namespace mongo diff --git a/src/mongo/db/query/planner_ixselect_test.cpp b/src/mongo/db/query/planner_ixselect_test.cpp index 41d6cb1e912..ea4836de09b 100644 --- a/src/mongo/db/query/planner_ixselect_test.cpp +++ b/src/mongo/db/query/planner_ixselect_test.cpp @@ -1130,48 +1130,124 @@ TEST(QueryPlannerIXSelectTest, InternalExprEqCanUseSparseIndexWithComparisonToNo testRateIndices( "{a: {$_internalExprEq: 1}}", "", kSimpleCollator, indices, "a", expectedIndices); } +TEST(QueryPlannerIXSelectTest, NotEqualsNullCanUseIndex) { + IndexEntry entry{BSON("a" << 1)}; + std::set<size_t> expectedIndices = {0}; + testRateIndices("{a: {$ne: null}}", "", kSimpleCollator, {entry}, "a,a", expectedIndices); +} + +TEST(QueryPlannerIXSelectTest, NotEqualsNullCannotUseMultiKeyIndex) { + IndexEntry entry{BSON("a" << 1)}; + entry.multikey = true; + std::set<size_t> expectedIndices = {}; + testRateIndices("{a: {$ne: null}}", "", kSimpleCollator, {entry}, "a,a", expectedIndices); +} + +TEST(QueryPlannerIXSelectTest, NotEqualsNullCannotUseDottedMultiKeyIndex) { + IndexEntry entry{BSON("a.b" << 1)}; + entry.multikeyPaths = {{0}}; + std::set<size_t> expectedIndices = {}; + testRateIndices( + "{'a.b': {$ne: null}}", "", kSimpleCollator, {entry}, "a.b,a.b", expectedIndices); +} + +TEST(QueryPlannerIXSelectTest, NotEqualsNullCanUseIndexWhichIsMultiKeyOnAnotherPath) { + IndexEntry entry{BSON("a" << 1 << "mk" << 1)}; + entry.multikeyPaths = {{}, {0}}; + std::set<size_t> expectedIndices = {0}; + testRateIndices("{a: {$ne: null}}", "", kSimpleCollator, {entry}, "a,a", expectedIndices); +} + +TEST(QueryPlannerIXSelectTest, ElemMatchValueWithNotEqualsNullCanUseIndex) { + IndexEntry entry{BSON("a" << 1)}; + std::set<size_t> expectedIndices = {0}; + testRateIndices( + "{a: {$elemMatch: {$ne: null}}}", "", kSimpleCollator, {entry}, "a", expectedIndices); +} + +TEST(QueryPlannerIXSelectTest, ElemMatchValueWithNotEqualsNullCanUseMultiKeyIndex) { + IndexEntry entry{BSON("a" << 1)}; + entry.multikey = true; + std::set<size_t> expectedIndices = {0}; + testRateIndices( + "{a: {$elemMatch: {$ne: null}}}", "", kSimpleCollator, {entry}, "a", expectedIndices); +} -TEST(QueryPlannerIXSelectTest, IndexedFieldHasMultikeyComponents) { - auto indexEntry = makeIndexEntry(BSON("a" << 1 << "b.c" << 1), {{}, {}}); - ASSERT_FALSE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("a"_sd, indexEntry)); - ASSERT_FALSE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("b.c"_sd, indexEntry)); +TEST(QueryPlannerIXSelectTest, ElemMatchObjectWithNotEqualNullCanUseIndex) { + IndexEntry entry{BSON("a.b" << 1)}; + std::set<size_t> expectedIndices = {0}; + testRateIndices("{a: {$elemMatch: {b: {$ne: null}}}}", + "", + kSimpleCollator, + {entry}, + "a.b,a.b", + expectedIndices); +} - indexEntry = makeIndexEntry(BSON("a" << 1 << "b" << 1), {{}, {0U}}); - ASSERT_FALSE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("a"_sd, indexEntry)); - ASSERT_TRUE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("b"_sd, indexEntry)); +TEST(QueryPlannerIXSelectTest, ElemMatchObjectWithNotEqualNullCannotUseOldMultiKeyIndex) { + IndexEntry entry{BSON("a.b" << 1)}; + entry.multikey = true; + std::set<size_t> expectedIndices = {}; + testRateIndices("{a: {$elemMatch: {b: {$ne: null}}}}", + "", + kSimpleCollator, + {entry}, + "a.b,a.b", + expectedIndices); +} + +TEST(QueryPlannerIXSelectTest, ElemMatchObjectWithNotEqualNullCanUseIndexMultikeyOnPrefix) { + IndexEntry entry{BSON("a.b.c.d" << 1)}; + entry.multikeyPaths = {{0U}}; + std::set<size_t> expectedIndices = {0U}; + const auto query = "{'a.b': {$elemMatch: {'c.d': {$ne: null}}}}"; + testRateIndices(query, "", kSimpleCollator, {entry}, "a.b.c.d,a.b.c.d", expectedIndices); - indexEntry = makeIndexEntry(BSON("a" << 1 << "b" << 1 << "c.d" << 1), {{}, {}, {1U}}); - ASSERT_FALSE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("a"_sd, indexEntry)); - ASSERT_FALSE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("b"_sd, indexEntry)); - ASSERT_TRUE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("c.d"_sd, indexEntry)); + entry.multikeyPaths = {{1U}}; + testRateIndices(query, "", kSimpleCollator, {entry}, "a.b.c.d,a.b.c.d", expectedIndices); - indexEntry = makeIndexEntry(BSON("a.b" << 1 << "a.c" << 1), {{}, {1U}}); - ASSERT_FALSE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("a.b"_sd, indexEntry)); - ASSERT_TRUE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("a.c"_sd, indexEntry)); + entry.multikeyPaths = {{2U}}; + expectedIndices = {}; + testRateIndices(query, "", kSimpleCollator, {entry}, "a.b.c.d,a.b.c.d", expectedIndices); + + entry.multikeyPaths = {{3U}}; + expectedIndices = {}; + testRateIndices(query, "", kSimpleCollator, {entry}, "a.b.c.d,a.b.c.d", expectedIndices); +} - indexEntry = makeIndexEntry(BSON("a.b" << 1 << "a.c" << 1), {{0U, 1U}, {0U}}); - ASSERT_TRUE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("a.b"_sd, indexEntry)); - ASSERT_TRUE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("a.c"_sd, indexEntry)); +TEST(QueryPlannerIXSelectTest, + NestedElemMatchObjectWithNotEqualNullCanUseIndexMultikeyOnAnyPrefix) { + IndexEntry entry{BSON("a.b.c.d" << 1)}; + entry.multikeyPaths = {{0U}}; + std::set<size_t> expectedIndices = {0U}; + const auto query = "{a: {$elemMatch: {b: {$elemMatch: {'c.d': {$ne: null}}}}}}"; + testRateIndices(query, "", kSimpleCollator, {entry}, "a.b.c.d,a.b.c.d", expectedIndices); - indexEntry = makeIndexEntry(BSON("a" << 1 << "b" << 1), {{0U}, {}}); - ASSERT_TRUE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("a"_sd, indexEntry)); - ASSERT_FALSE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("b"_sd, indexEntry)); + entry.multikeyPaths = {{1U}}; + testRateIndices(query, "", kSimpleCollator, {entry}, "a.b.c.d,a.b.c.d", expectedIndices); - indexEntry = makeIndexEntry(BSON("a.b.c" << 1 << "d" << 1), {{1U, 2U}, {}}); - ASSERT_TRUE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("a.b.c"_sd, indexEntry)); - ASSERT_FALSE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("d"_sd, indexEntry)); + entry.multikeyPaths = {{2U}}; + expectedIndices = {}; + testRateIndices(query, "", kSimpleCollator, {entry}, "a.b.c.d,a.b.c.d", expectedIndices); - indexEntry = makeIndexEntry(BSON("a.b" << 1 << "c" << 1 << "d" << 1), {{1U}, {}, {0U}}); - ASSERT_TRUE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("a.b"_sd, indexEntry)); - ASSERT_FALSE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("c"_sd, indexEntry)); - ASSERT_TRUE(QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("d"_sd, indexEntry)); + entry.multikeyPaths = {{3U}}; + expectedIndices = {}; + testRateIndices(query, "", kSimpleCollator, {entry}, "a.b.c.d,a.b.c.d", expectedIndices); } -DEATH_TEST(QueryPlannerIXSelectTest, - IndexedFieldHasMultikeyComponentsPassingInvalidFieldIsFatal, - "Invariant failure Hit a MONGO_UNREACHABLE!") { - auto indexEntry = makeIndexEntry(BSON("a" << 1), {{}}); - QueryPlannerIXSelect::indexedFieldHasMultikeyComponents("b"_sd, indexEntry); +TEST(QueryPlannerIXSelectTest, HashedIndexShouldNotBeRelevantForNotPredicate) { + IndexEntry entry{BSON("a" + << "hashed")}; + entry.type = IndexType::INDEX_HASHED; + std::set<size_t> expectedIndices = {}; + testRateIndices("{a: {$ne: 4}}", "", kSimpleCollator, {entry}, "a,a", expectedIndices); } +TEST(QueryPlannerIXSelectTest, HashedIndexShouldNotBeRelevantForNotEqualsNullPredicate) { + IndexEntry entry{BSON("a" + << "hashed")}; + entry.type = IndexType::INDEX_HASHED; + std::set<size_t> expectedIndices = {}; + testRateIndices("{a: {$ne: null}}", "", kSimpleCollator, {entry}, "a,a", expectedIndices); +} } // namespace diff --git a/src/mongo/db/query/query_planner_array_test.cpp b/src/mongo/db/query/query_planner_array_test.cpp index 68521862a55..77829966187 100644 --- a/src/mongo/db/query/query_planner_array_test.cpp +++ b/src/mongo/db/query/query_planner_array_test.cpp @@ -2252,4 +2252,211 @@ TEST_F(QueryPlannerTest, CanExplodeMultikeyIndexScanForSortWhenSortFieldsAreNotM "{ixscan: {pattern: {a: 1, 'b.c': 1}, filter: null," "bounds: {a: [[2,2,true,true]], 'b.c': [['MinKey','MaxKey',true,true]]}}}]}}}}"); } + +TEST_F(QueryPlannerTest, ElemMatchValueNENull) { + addIndex(BSON("a" << 1)); + runQuery(fromjson("{a: {$elemMatch: {$ne: null}}}")); + + // We can't use the index because we would exclude {a: []} which should match. + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: {a: {$elemMatch: {$ne: null}}}, node: {" + " ixscan: {pattern: {a:1}, bounds: {" + " a: [['MinKey',undefined,true,false], [null,'MaxKey',false,true]]" + "}}}}}"); +} + +TEST_F(QueryPlannerTest, ElemMatchObjectNENull) { + addIndex(BSON("a.b" << 1)); + runQuery(fromjson("{a: {$elemMatch: {b: {$ne: null}}}}")); + + // We can't use the index because we would exclude {"a.b": []} which should match. + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: {a: {$elemMatch: {b: {$ne: null}}}}, node: {" + " ixscan: {pattern: {'a.b':1}, bounds: {" + " 'a.b': [['MinKey',undefined,true,false], [null,'MaxKey',false,true]]" + "}}}}}"); +} + +TEST_F(QueryPlannerTest, NENullOnMultikeyIndex) { + // true means multikey + addIndex(BSON("a" << 1), true); + runQuery(fromjson("{a: {$ne: null}}")); + + // We can't use the index because we would exclude {a: []} which should match. + assertNumSolutions(1U); + assertSolutionExists("{cscan: {dir: 1}}"); +} + +TEST_F(QueryPlannerTest, ElemMatchValueNENullOnMultikeyIndex) { + // true means multikey + addIndex(BSON("a" << 1), true); + runQuery(fromjson("{a: {$elemMatch: {$ne: null}}}")); + + // We should be able to use the index because of the value $elemMatch. + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: {a: {$elemMatch: {$ne: null}}}, node: {" + " ixscan: {pattern: {a: 1}, bounds: {" + " a: [['MinKey',undefined,true,false], [null,'MaxKey',false,true]]" + "}}}}}"); +} + +TEST_F(QueryPlannerTest, ElemMatchObjectNENullOnMultikeyIndex) { + // true means multikey + addIndex(BSON("a.b" << 1), true); + runQuery(fromjson("{a: {$elemMatch: {b: {$ne: null}}}}")); + + // We can't use the index because we would exclude {a: [{b: []}]} which should match. + assertNumSolutions(1U); + assertSolutionExists("{cscan: {dir: 1}}"); +} + +TEST_F(QueryPlannerTest, ElemMatchObjectNENullWithSuffixOfElemMatchMultiKey) { + MultikeyPaths multikeyPaths{{0U, 1U}}; + addIndex(BSON("a.b" << 1), multikeyPaths); + runQuery(fromjson("{a: {$elemMatch: {b: {$ne: null}}}}")); + + // We can't use the index because we would exclude {a: [{b: []}]} which should match. + assertNumSolutions(1U); + assertSolutionExists("{cscan: {dir: 1}}"); +} + +TEST_F(QueryPlannerTest, ElemMatchObjectNENullWithPrefixOfElemMatchMultiKey) { + MultikeyPaths multikeyPaths{{0U}}; + addIndex(BSON("a.b" << 1), multikeyPaths); + runQuery(fromjson("{a: {$elemMatch: {b: {$ne: null}}}}")); + + // We should be able to use the index since only 'a' is multikey. + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: {a: {$elemMatch: {b: {$ne: null}}}}, node: {" + " ixscan: {pattern: {'a.b': 1}, bounds: {" + " 'a.b': [['MinKey',undefined,true,false], [null,'MaxKey',false,true]]" + "}}}}}"); +} + +TEST_F(QueryPlannerTest, CompoundIndexBoundsDottedNotEqualsNullWithProjectionMultiKeyOnOtherPath) { + MultikeyPaths multikeyPaths{{0U}, {}}; + addIndex(BSON("a" << 1 << "c.d" << 1), multikeyPaths); + runQuerySortProj(fromjson("{'a': {$gt: 'foo'}, 'c.d': {$ne: null}}"), + BSONObj(), + fromjson("{_id: 0, 'c.d': 1}")); + + assertNumSolutions(2U); + assertSolutionExists("{proj: {spec: {_id: 0, 'c.d': 1}, node: {cscan: {dir: 1}}}}"); + assertSolutionExists( + "{proj: {spec: {_id: 0, 'c.d': 1}, node: {" + " ixscan: {filter: null, pattern: {'a': 1, 'c.d': 1}, bounds: {" + " 'a': [['foo',{},false,false]], " + " 'c.d':[['MinKey',undefined,true,false],[null,'MaxKey',false,true]]" + "}}}}}"); +} + +TEST_F(QueryPlannerTest, + CompoundIndexBoundsElemMatchObjectEqualsNullWithProjectionMultiKeyOnOtherPath) { + MultikeyPaths multikeyPaths{{0U}, {}}; + addIndex(BSON("a" << 1 << "c.d" << 1), multikeyPaths); + runQuerySortProj(fromjson("{'a': {$gt: 'foo'}, c: {$elemMatch: {d: {$ne: null}}}}"), + BSONObj(), + fromjson("{_id: 0, 'c.d': 1}")); + + assertNumSolutions(2U); + assertSolutionExists("{proj: {spec: {_id: 0, 'c.d': 1}, node: {cscan: {dir: 1}}}}"); + assertSolutionExists( + "{proj: {spec: {_id: 0, 'c.d': 1}, node: {" + " fetch: {filter: {c: {$elemMatch: {d: {$ne: null}}}}, node: {" + " ixscan: {filter: null, pattern: {'a': 1, 'c.d': 1}, bounds: {" + " a: [['foo',{},false,false]], " + " 'c.d':[['MinKey',undefined,true,false],[null,'MaxKey',false,true]]" + "}}}}}}}"); +} + +TEST_F(QueryPlannerTest, CompoundIndexBoundsDottedNotEqualsNullMultiKey) { + const bool isMultiKey = true; + addIndex(BSON("a.b" << 1 << "c.d" << 1), isMultiKey); + runQuery(fromjson("{'a.b': {$gt: 'foo'}, 'c.d': {$ne: null}}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {" + " filter: {'c.d': {$ne: null}}," + " node: {ixscan: {" + " filter: null," + " pattern: {'a.b': 1, 'c.d': 1}," + " bounds: {" + " 'a.b': [['foo',{},false,false]], " + " 'c.d':[['MinKey','MaxKey',true,true]]" + " }" + " }}" + "}}"); +} + +TEST_F(QueryPlannerTest, CompoundIndexBoundsDottedNotEqualsNullMultiKeyPaths) { + const MultikeyPaths multikeyPaths = {{}, {0}}; // 'c' is multikey. + addIndex(BSON("a.b" << 1 << "c.d" << 1), multikeyPaths); + runQuery(fromjson("{'a.b': {$gt: 'foo'}, 'c.d': {$ne: null}}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {" + " filter: {'c.d': {$ne: null}}," + " node: {ixscan: {" + " filter: null," + " pattern: {'a.b': 1, 'c.d': 1}," + " bounds: {" + " 'a.b': [['foo',{},false,false]], " + " 'c.d':[['MinKey','MaxKey',true,true]]" + " }" + " }}" + "}}"); +} + +TEST_F(QueryPlannerTest, CompoundIndexBoundsDottedNotEqualsNullMultiKeyWithProjection) { + const bool isMultiKey = true; + addIndex(BSON("a.b" << 1 << "c.d" << 1), isMultiKey); + runQuerySortProj(fromjson("{'a.b': {$gt: 'foo'}, 'c.d': {$ne: null}}"), + BSONObj(), + fromjson("{_id: 0, 'c.d': 1}")); + + assertNumSolutions(2U); + assertSolutionExists("{proj: {spec: {_id: 0, 'c.d': 1}, node: {cscan: {dir: 1}}}}"); + assertSolutionExists( + "{proj: {spec: {_id: 0, 'c.d': 1}, node: {" + "fetch: {filter: {'c.d': {$ne: null}}, node: {ixscan: {filter: null, pattern: " + "{'a.b': 1, 'c.d': 1}, bounds: {'a.b': [['foo',{},false,false]], " + "'c.d':[['MinKey','MaxKey',true,true]]}}}}}}}"); +} + +TEST_F(QueryPlannerTest, CompoundIndexBoundsNotEqualsNullReverseIndex) { + addIndex(BSON("a" << 1 << "b" << -1 << "c" << 1)); + runQuery(fromjson("{a: {$gt: 'foo'}, b: {$ne: null}, c: {$ne: null}}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {" + " filter: null," + " node: {" + " ixscan: {" + " filter: null, " + " pattern: {a: 1, b: -1, c: 1}," + " bounds: {" + " a: [['foo', {}, false, false]]," + " b: [['MaxKey', null, true, false], [undefined, 'MinKey', false, true]]," + " c: [['MinKey', undefined, true, false], [null, 'MaxKey', false, true]]" + " }," + " dir: 1" + " }" + " }" + "}}"); +} + } // namespace diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index 5a3818d21cc..efd33099df2 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -2478,8 +2478,8 @@ TEST_F(QueryPlannerTest, ExprEqCanUseSparseIndexForEqualityToNull) { assertNumSolutions(1U); assertSolutionExists( - "{fetch: {filter: {a: {$_internalExprEq: null}}, node: {ixscan: " - "{filter: null, pattern: {a: 1}, bounds: {a: [[null,null,true,true]]}}}}}"); + "{fetch: {filter: {a: {$_internalExprEq: null}}, node: {ixscan: {filter: null, pattern: " + "{a: 1}, bounds: {a: [[undefined,undefined,true,true], [null,null,true,true]]}}}}}"); } // @@ -2932,6 +2932,65 @@ TEST_F(QueryPlannerTest, NEOnMultikeyIndex) { "[3,'MaxKey',false,true]]}}}}}"); } +TEST_F(QueryPlannerTest, NENull) { + addIndex(BSON("a" << 1)); + runQuery(fromjson("{a: {$ne: null}}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {pattern: {a:1}, bounds: {a: " + "[['MinKey',undefined,true,false],[null,'MaxKey',false,true]]}}}}}"); +} + +TEST_F(QueryPlannerTest, NENullWithSort) { + addIndex(BSON("a" << 1)); + runQuerySortProj(fromjson("{a: {$ne: null}}"), BSON("a" << 1), BSONObj()); + + assertNumSolutions(2U); + assertSolutionExists( + "{sort: {" + " pattern: {a: 1}," + " limit: 0," + " node: {" + " sortKeyGen: {" + " node: {" + " cscan: {" + " filter: {a: {$ne: null}}, " + " dir: 1" + " }}}}}}"); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {pattern: {a:1}, " + "bounds: {a: [['MinKey',undefined,true,false]," + "[null,'MaxKey',false,true]]}}}}}"); +} + +TEST_F(QueryPlannerTest, NENullWithProjection) { + addIndex(BSON("a" << 1)); + runQuerySortProj(fromjson("{a: {$ne: null}}"), BSONObj(), BSON("_id" << 0 << "a" << 1)); + + assertNumSolutions(2U); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {cscan: {dir: 1}}}}"); + assertSolutionExists( + "{proj: {spec: {_id: 0, a: 1}, node: {ixscan: {pattern: {a:1}, " + "bounds: {a: [['MinKey',undefined,true,false],[null,'MaxKey',false,true]]}}}}}"); +} + +TEST_F(QueryPlannerTest, NENullWithSortAndProjection) { + addIndex(BSON("a" << 1)); + runQuerySortProj(fromjson("{a: {$ne: null}}"), BSON("a" << 1), BSON("_id" << 0 << "a" << 1)); + + assertNumSolutions(2U); + assertSolutionExists( + "{proj: {spec: {_id: 0, a: 1}, node: {sort: {pattern: {a: 1}, limit: 0, node: {sortKeyGen: " + "{node: {cscan: {dir: 1}}}}}}}}"); + assertSolutionExists( + "{proj: {spec: {_id: 0, a: 1}, node: {" + " ixscan: {pattern: {a:1}, bounds: {" + " a: [['MinKey',undefined,true,false], [null,'MaxKey',false,true]]" + "}}}}}"); +} + // In general, a negated $nin can make use of an index. TEST_F(QueryPlannerTest, NinUsesMultikeyIndex) { // true means multikey @@ -3025,6 +3084,46 @@ TEST_F(QueryPlannerTest, CompoundIndexBoundsStringBounds) { "b:[['bar',{},true,false]]}}}}}"); } +TEST_F(QueryPlannerTest, CompoundIndexBoundsNotEqualsNull) { + addIndex(BSON("a" << 1 << "b" << 1)); + runQuery(fromjson("{a: {$gt: 'foo'}, b: {$ne: null}}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: " + "{a: 1, b: 1}, bounds: {a: [['foo',{},false,false]], " + "b:[['MinKey',undefined,true,false],[null,'MaxKey',false,true]]}}}}}"); +} + +TEST_F(QueryPlannerTest, CompoundIndexBoundsDottedNotEqualsNull) { + addIndex(BSON("a.b" << 1 << "c.d" << 1)); + runQuery(fromjson("{'a.b': {$gt: 'foo'}, 'c.d': {$ne: null}}")); + + assertNumSolutions(2U); + assertSolutionExists("{cscan: {dir: 1}}"); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: {filter: null, pattern: " + "{'a.b': 1, 'c.d': 1}, bounds: {'a.b': [['foo',{},false,false]], " + "'c.d':[['MinKey',undefined,true,false],[null,'MaxKey',false,true]]}}}}}"); +} + +TEST_F(QueryPlannerTest, CompoundIndexBoundsDottedNotEqualsNullWithProjection) { + addIndex(BSON("a.b" << 1 << "c.d" << 1)); + runQuerySortProj(fromjson("{'a.b': {$gt: 'foo'}, 'c.d': {$ne: null}}"), + BSONObj(), + fromjson("{_id: 0, 'c.d': 1}")); + + assertNumSolutions(2U); + assertSolutionExists("{proj: {spec: {_id: 0, 'c.d': 1}, node: {cscan: {dir: 1}}}}"); + assertSolutionExists( + "{proj: {spec: {_id: 0, 'c.d': 1}, node: {" + " ixscan: {filter: null, pattern: {'a.b': 1, 'c.d': 1}, bounds: {" + " 'a.b': [['foo',{},false,false]], " + " 'c.d':[['MinKey',undefined,true,false],[null,'MaxKey',false,true]]" + "}}}}}"); +} + TEST_F(QueryPlannerTest, IndexBoundsAndWithNestedOr) { addIndex(BSON("a" << 1)); runQuery(fromjson("{$and: [{a: 1, $or: [{a: 2}, {a: 3}]}]}")); @@ -3104,7 +3203,7 @@ TEST_F(QueryPlannerTest, CompoundIndexBoundsIntersectRanges) { // predicates (SERVER-13890). TEST_F(QueryPlannerTest, IndexBoundsOrOfNegations) { addIndex(BSON("a" << 1)); - runQuery(fromjson("{$or: [{a: {$ne: 3}}, {a: {$ne: 4}}]}")); + runQuery(fromjson("{$or: [{a: {$ne: null}}, {a: {$ne: 4}}]}")); assertNumSolutions(2U); assertSolutionExists("{cscan: {dir: 1}}"); @@ -4559,6 +4658,22 @@ TEST_F(QueryPlannerTest, ContainedOrNot) { assertSolutionExists("{cscan: {dir: 1}}}}"); } +TEST_F(QueryPlannerTest, ContainedOrNotEqualsNull) { + addIndex(BSON("b" << 1 << "a" << 1)); + addIndex(BSON("c" << 1 << "a" << 1)); + + runQuery(fromjson("{$and: [{$nor: [{a: null}]}, {$or: [{b: 6}, {c: 7}]}]}")); + assertNumSolutions(2); + assertSolutionExists( + "{fetch: {filter: null, node: {or: {nodes: [" + "{ixscan: {pattern: {b: 1, a: 1}, bounds: {b: [[6, 6, true, true]], a: [['MinKey', " + "undefined, true, false], [null, 'MaxKey', false, true]]}}}," + "{ixscan: {pattern: {c: 1, a: 1}, bounds: {c: [[7, 7, true, true]], a: [['MinKey', " + "undefined, true, false], [null, 'MaxKey', false, true]]}}}" + "]}}}}"); + assertSolutionExists("{cscan: {dir: 1}}}}"); +} + TEST_F(QueryPlannerTest, ContainedOrMoveToNot) { addIndex(BSON("b" << 1 << "a" << 1)); addIndex(BSON("c" << 1 << "a" << 1)); diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index 377bbb791ae..be3e8871b9c 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -122,8 +122,7 @@ bool intervalMatches(const BSONObj& testInt, const Interval trueInt) { appendIntervalBound(bob, low); appendIntervalBound(bob, high); Interval toCompare(bob.obj(), startInclusive, endInclusive); - - return Interval::INTERVAL_EQUALS == trueInt.compare(toCompare); + return trueInt.equals(toCompare); } /** |