diff options
author | Jess Balint <jbalint@gmail.com> | 2022-02-24 18:19:46 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-24 18:57:18 +0000 |
commit | e68a7d47305e14e090cba9ce3d92533053299996 (patch) | |
tree | 6f8fafdf1953cedb4aefb7a4459797d9dd440785 | |
parent | ce846ddb65af2196acfa2bc6691b1a14524be630 (diff) | |
download | mongo-e68a7d47305e14e090cba9ce3d92533053299996.tar.gz |
SERVER-40691 $nin:[[],...] queries are not indexedr4.2.19-rc0r4.2.19
(cherry picked from commit df25c71b8674a78e17468f48bcda5285decb9246)
-rw-r--r-- | jstests/core/null_query_semantics.js | 39 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 28 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.h | 10 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.cpp | 29 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.h | 14 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test.cpp | 49 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test_lib.cpp | 7 | ||||
-rw-r--r-- | src/mongo/s/query/results_merger_test_fixture.h | 2 |
8 files changed, 166 insertions, 12 deletions
diff --git a/jstests/core/null_query_semantics.js b/jstests/core/null_query_semantics.js index 7276a62b813..2e09c46d104 100644 --- a/jstests/core/null_query_semantics.js +++ b/jstests/core/null_query_semantics.js @@ -314,6 +314,45 @@ function testNotEqualsNullSemantics() { projectResults); }()); + // Test the semantics of the query {a: {$nin: [null, []]}}. + (function testNotInNullAndEmptyArrayQuery() { + const query = {a: {$nin: [null, []]}}; + const noProjectResults = coll.find(query).toArray(); + const expected = [ + // Documents excluded by the query predicate are commented + // out below. + {_id: "a_empty_subobject", a: {}}, + //{_id: "a_null", a: null}, + {_id: "a_number", a: 4}, + {_id: "a_subobject_b_not_null", a: {b: "hi"}}, + {_id: "a_subobject_b_null", a: {b: null}}, + {_id: "a_subobject_b_undefined", a: {b: undefined}}, + // Semantic difference during backport: this document doesn't match "null" in this + // version (c.f. SERVER-21929). + {_id: "a_undefined", a: undefined}, + //{_id: "no_a"}, + + //{_id: "a_double_array", a: [[]]}, + //{_id: "a_empty_array", a: []}, + {_id: "a_object_array_all_b_nulls", a: [{b: null}, {b: undefined}, {b: null}, {}]}, + {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]}, + {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]}, + {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]}, + {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]}, + //{_id: "a_value_array_all_nulls", a: [null, null]}, + {_id: "a_value_array_no_nulls", a: [1, "string", 4]}, + //{_id: "a_value_array_with_null", a: [1, "string", null, 4]}, + // Semantic difference during backport: this document doesn't match "null" in this + // version (c.f. SERVER-21929). + {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]}, + ]; + + assert(resultsEq(noProjectResults, expected), noProjectResults); + + const projectResults = coll.find(query, projectToOnlyA).toArray(); + assert(resultsEq(projectResults, extractAValues(expected)), projectResults); + }()); + (function testNotInNullAndRegexQuery() { const query = {a: {$nin: [null, /^str.*/]}}; const noProjectResults = coll.find(query).toArray(); diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 597ee8ac1c6..e0d0cb8f6af 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -120,6 +120,7 @@ bool stringMayHaveUnescapedPipe(StringData str) { const BSONObj kUndefinedElementObj = BSON("" << BSONUndefined); const BSONObj kNullElementObj = BSON("" << BSONNULL); +const BSONObj kEmptyArrayElementObj = BSON("" << BSONArray()); const Interval kHashedUndefinedInterval = IndexBoundsBuilder::makePointInterval( ExpressionMapping::hash(kUndefinedElementObj.firstElement())); @@ -519,6 +520,33 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, return; } + if (MatchExpression::MATCH_IN == child->matchType()) { + auto ime = static_cast<const InMatchExpression*>(child); + if (QueryPlannerIXSelect::canUseIndexForNin(ime)) { + // Permit documents with an undefined value to be + // returned from $nin index scan. the empty array is + // represented as undefined in the index but the empty + // array values are filtered out. This is required to + // match the collection scan (no index) behavior. This + // was later changed by SERVER-21929. + BSONObjBuilder bob; + bob.appendMinKey(""); + bob.appendUndefined(""); + oilOut->intervals.push_back(makeRangeInterval( + bob.asTempObj().getOwned(), BoundInclusion::kIncludeBothStartAndEndKeys)); + bob.resetToEmpty(); + bob.appendNull(""); + bob.appendMaxKey(""); + oilOut->intervals.push_back( + makeRangeInterval(bob.done().getOwned(), BoundInclusion::kIncludeEndKeyOnly)); + unionize(oilOut); + if (index.pathHasMultikeyComponent(elt.fieldNameStringData())) { + *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + } + return; + } + } + _translatePredicate(child, elt, index, oilOut, tightnessOut); oilOut->complement(); diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 0453df842cb..69d72549e0f 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -48,6 +48,16 @@ public: * Describes various degrees of precision with which predicates can be evaluated based * on the index bounds. * + * Exact vs. inexact is about whether or not you will need to apply a predicate after scanning, + * and fetch vs. not fetch is whether you need data which is not in the index to answer the + * query. Often if you have inexact bounds it causes you to need more than the index data, but + * not always. For example, to correctly match null predicates like {a: {$eq: null}} you may + * need to fetch the data to distinguish between a null and missing 'a' value (the index stores + * both with a literal null), so would need INEXACT_FETCH bounds. And as an INEXACT_COVERED + * example you could think of something like $mod where you can apply the predicate to the data + * directly in the index, but $mod won't generate tight bounds so you still need to apply the + * predicate. + * * The integer values of the enum are significant, and are assigned in order of * increasing tightness. These values are used when we need to do comparison between two * BoundsTightness values. Such comparisons can answer questions such as "Does predicate diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index 3443ba16581..4aa69cfce17 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -174,12 +174,15 @@ bool QueryPlannerIXSelect::notEqualsNullCanUseIndex(const IndexEntry& index, } } -static double fieldWithDefault(const BSONObj& infoObj, const string& name, double def) { - BSONElement e = infoObj[name]; - if (e.isNumber()) { - return e.numberDouble(); - } - return def; +bool QueryPlannerIXSelect::canUseIndexForNin(const InMatchExpression* ime) { + const std::vector<BSONElement>& inList = ime->getEqualities(); + auto containsNull = [](const BSONElement& elt) { return elt.type() == jstNULL; }; + auto containsEmptyArray = [](const BSONElement& elt) { + return elt.type() == Array && elt.embeddedObject().isEmpty(); + }; + return ime->getRegexes().empty() && inList.size() == 2 && + std::any_of(inList.begin(), inList.end(), containsNull) && + std::any_of(inList.begin(), inList.end(), containsEmptyArray); } /** @@ -432,10 +435,6 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, return false; } - if (isEqualsArrayOrNotInArray(child)) { - return false; - } - // Most of the time we can't use a multikey index for a $ne: null query, however there // are a few exceptions around $elemMatch. const bool canUseIndexForNeNull = @@ -447,6 +446,12 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, // If it's a negated $in, it can't have any REGEX's inside. if (MatchExpression::MATCH_IN == childtype) { InMatchExpression* ime = static_cast<InMatchExpression*>(node->getChild(0)); + + if (canUseIndexForNin(ime)) { + // This is a case that we know is supported. + return true; + } + if (!ime->getRegexes().empty()) { return false; } @@ -457,6 +462,10 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, return false; } } + + if (isEqualsArrayOrNotInArray(child)) { + return false; + } } // If this is an $elemMatch value, make sure _all_ of the children can use the index. diff --git a/src/mongo/db/query/planner_ixselect.h b/src/mongo/db/query/planner_ixselect.h index 1b7fed5eed0..5bc5f152632 100644 --- a/src/mongo/db/query/planner_ixselect.h +++ b/src/mongo/db/query/planner_ixselect.h @@ -153,6 +153,20 @@ public: */ static bool logicalNodeMayBeSupportedByAnIndex(const MatchExpression* queryExpr); + /** + * We can use an index for this special case: {$not:{$in:[null, []]}}. Return true if this is + * the expression (modulo in-list ordering) and it doesn't contain any regexes. + * + * Why is this case special? An equality expression for "null" will match both documents with a + * literal null for the specified key, and those where the key is not present. An equality + * expression for "[]" (which is stored as "undefined" in the index) will match documents with + * an empty array or an array with an empty array element. If we negate either of this bounds in + * isolation, we may produce incomplete results wrt the other expression. If we negate the + * composition of the two, we can properly return complete results excluding both null and empty + * array values. + */ + static bool canUseIndexForNin(const InMatchExpression* ime); + private: /** * Used to keep track of if any $elemMatch predicates were encountered when walking a diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index dc34029f148..43a75829043 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -2765,6 +2765,55 @@ TEST_F(QueryPlannerTest, NegationInElemMatchDoesNotUseSparseIndex) { assertHasOnlyCollscan(); } +TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseMultikeyIndex) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + // Use a multikey index. + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [[], null] }}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {a: {$not: {$in: [null, []]}}}, node: {ixscan: " + "{pattern: {a: 1}, bounds: " + "{a: [" + "['MinKey', undefined, true, true]," + "[null, 'MaxKey', false, true]" + "]}}}}}"); +} + +TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseIndex) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + // Use an index which is not multikey. + addIndex(fromjson("{a: 1}")); + runQuery(fromjson("{a: { $nin: [[], null] }}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {node: {ixscan: " + "{pattern: {a: 1}, bounds: " + "{a: [" + "['MinKey', undefined, true, true]," + "[null, 'MaxKey', false, true]" + "]}}}}}"); +} + +TEST_F(QueryPlannerTest, NinListWithNullShouldNotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [null] }}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerTest, NinListWithRegexCannotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + // This matches the [[], null] pattern but also has a regex. + runQuery(fromjson("{a: { $nin: [[], null, /abc/] }}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerTest, NinListWithNonEmptyArrayShouldNotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [[], [1]] }}")); + assertHasOnlyCollscan(); +} + TEST_F(QueryPlannerTest, SparseIndexCannotSupportEqualsNull) { addIndex(BSON("i" << 1), false, // multikey diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index 2cfcc54985b..809945049b2 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -79,8 +79,13 @@ bool filterMatches(const BSONObj& testFilter, if (!statusWithMatcher.isOK()) { return false; } - const std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue()); + std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue()); CanonicalQuery::sortTree(root.get()); + if (root->matchType() == mongo::MatchExpression::NOT) { + // Ideally we would optimize() everything, but some of the tests depend on structural + // equivalence of single-arg $or expressions. + root = MatchExpression::optimize(std::move(root)); + } std::unique_ptr<MatchExpression> trueFilter(trueFilterNode->filter->shallowClone()); CanonicalQuery::sortTree(trueFilter.get()); return trueFilter->equivalent(root.get()); diff --git a/src/mongo/s/query/results_merger_test_fixture.h b/src/mongo/s/query/results_merger_test_fixture.h index 62f504a3ea1..41bdd4cf85e 100644 --- a/src/mongo/s/query/results_merger_test_fixture.h +++ b/src/mongo/s/query/results_merger_test_fixture.h @@ -145,7 +145,7 @@ protected: void scheduleNetworkResponses(std::vector<CursorResponse> responses) { std::vector<BSONObj> objs; for (const auto& cursorResponse : responses) { - // For tests of the AsyncResultsMerger, all CursorRepsonses scheduled by the tests are + // For tests of the AsyncResultsMerger, all CursorResponses scheduled by the tests are // subsequent responses, since the AsyncResultsMerger will only ever run getMores. objs.push_back(cursorResponse.toBSON(CursorResponse::ResponseType::SubsequentResponse)); } |