diff options
author | Jess Balint <jbalint@gmail.com> | 2022-02-23 17:40:44 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-23 18:07:38 +0000 |
commit | 6889d8d80d0e69cc5c3de68eb05fe89e9b93cff4 (patch) | |
tree | 31b6c6beefb3c42be8c2faf735552354ba826a4c | |
parent | 5db2c8c6dadaa0b29c1de7cb5a133c34cfedf2d9 (diff) | |
download | mongo-6889d8d80d0e69cc5c3de68eb05fe89e9b93cff4.tar.gz |
SERVER-40691 $nin:[[],...] queries are not indexed
-rw-r--r-- | jstests/core/null_query_semantics.js | 35 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.h | 10 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.cpp | 35 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.h | 14 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_index_test.cpp | 57 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test_lib.cpp | 7 | ||||
-rw-r--r-- | src/mongo/s/query/results_merger_test_fixture.h | 2 |
8 files changed, 161 insertions, 15 deletions
diff --git a/jstests/core/null_query_semantics.js b/jstests/core/null_query_semantics.js index 2cdd7bc218d..ea03ba082ca 100644 --- a/jstests/core/null_query_semantics.js +++ b/jstests/core/null_query_semantics.js @@ -369,6 +369,41 @@ function testNullSemantics(coll) { assert(resultsEq(projectResults, extractAValues(expected)), projectResults); }()); + // Test the semantics of the query {a: {$nin: [null, []]}}. + (function testNotInNullAndEmptyArrayQuery() { + const query = {a: {$nin: [null, []]}}; + const noProjectResults = coll.find(query).toArray(); + const expected = [ + // Documents excluded by the query predicate are commented + // out below. + {_id: "a_empty_subobject", a: {}}, + //{_id: "a_null", a: null}, + {_id: "a_number", a: 4}, + {_id: "a_subobject_b_not_null", a: {b: "hi"}}, + {_id: "a_subobject_b_null", a: {b: null}}, + {_id: "a_subobject_b_undefined", a: {b: undefined}}, + //{_id: "a_undefined", a: undefined}, + //{_id: "no_a"}, + + //{_id: "a_double_array", a: [[]]}, + //{_id: "a_empty_array", a: []}, + {_id: "a_object_array_all_b_nulls", a: [{b: null}, {b: undefined}, {b: null}, {}]}, + {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]}, + {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]}, + {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]}, + {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]}, + //{_id: "a_value_array_all_nulls", a: [null, null]}, + {_id: "a_value_array_no_nulls", a: [1, "string", 4]}, + //{_id: "a_value_array_with_null", a: [1, "string", null, 4]}, + //{_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]}, + ]; + + assert(resultsEq(noProjectResults, expected), noProjectResults); + + const projectResults = coll.find(query, projectToOnlyA).toArray(); + assert(resultsEq(projectResults, extractAValues(expected)), projectResults); + }()); + (function testNotInNullAndRegexQuery() { const query = {a: {$nin: [null, /^str.*/]}}; const noProjectResults = coll.find(query).toArray(); diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 4176823172e..bacedd53af9 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -122,6 +122,7 @@ bool stringMayHaveUnescapedPipe(StringData str) { const BSONObj kUndefinedElementObj = BSON("" << BSONUndefined); const BSONObj kNullElementObj = BSON("" << BSONNULL); +const BSONObj kEmptyArrayElementObj = BSON("" << BSONArray()); const Interval kHashedUndefinedInterval = IndexBoundsBuilder::makePointInterval( ExpressionMapping::hash(kUndefinedElementObj.firstElement())); @@ -563,6 +564,21 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, return; } + if (MatchExpression::MATCH_IN == child->matchType()) { + auto ime = static_cast<const InMatchExpression*>(child); + if (QueryPlannerIXSelect::canUseIndexForNin(ime)) { + makeNullEqualityBounds(index, isHashed, oilOut, tightnessOut); + oilOut->intervals.push_back( + IndexBoundsBuilder::makePointInterval(kEmptyArrayElementObj)); + oilOut->complement(); + unionize(oilOut); + if (index.pathHasMultikeyComponent(elt.fieldNameStringData())) { + *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + } + return; + } + } + _translatePredicate(child, elt, index, oilOut, tightnessOut); oilOut->complement(); diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 67205d64e7a..c226dc51d6f 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -51,6 +51,16 @@ public: * Describes various degrees of precision with which predicates can be evaluated based * on the index bounds. * + * Exact vs. inexact is about whether or not you will need to apply a predicate after scanning, + * and fetch vs. not fetch is whether you need data which is not in the index to answer the + * query. Often if you have inexact bounds it causes you to need more than the index data, but + * not always. For example, to correctly match null predicates like {a: {$eq: null}} you may + * need to fetch the data to distinguish between a null and missing 'a' value (the index stores + * both with a literal null), so would need INEXACT_FETCH bounds. And as an INEXACT_COVERED + * example you could think of something like $mod where you can apply the predicate to the data + * directly in the index, but $mod won't generate tight bounds so you still need to apply the + * predicate. + * * The integer values of the enum are significant, and are assigned in order of * increasing tightness. These values are used when we need to do comparison between two * BoundsTightness values. Such comparisons can answer questions such as "Does predicate diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index ff565bbae56..713c090e1c6 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -175,12 +175,15 @@ bool QueryPlannerIXSelect::notEqualsNullCanUseIndex(const IndexEntry& index, } } -static double fieldWithDefault(const BSONObj& infoObj, const string& name, double def) { - BSONElement e = infoObj[name]; - if (e.isNumber()) { - return e.numberDouble(); - } - return def; +bool QueryPlannerIXSelect::canUseIndexForNin(const InMatchExpression* ime) { + const std::vector<BSONElement>& inList = ime->getEqualities(); + auto containsNull = [](const BSONElement& elt) { return elt.type() == jstNULL; }; + auto containsEmptyArray = [](const BSONElement& elt) { + return elt.type() == Array && elt.embeddedObject().isEmpty(); + }; + return ime->getRegexes().empty() && inList.size() == 2 && + std::any_of(inList.begin(), inList.end(), containsNull) && + std::any_of(inList.begin(), inList.end(), containsEmptyArray); } /** @@ -432,13 +435,6 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, return false; } - // Comparisons with arrays have strange enough semantics that inverting the bounds - // within a $not has many complex special cases. We avoid indexing these queries, even - // though it is sometimes possible to build useful bounds. - if (isComparisonWithArrayPred(child)) { - return false; - } - // $gt and $lt to MinKey/MaxKey must build inexact bounds if the index is multikey and // therefore cannot be inverted safely in a $not. if (index.multikey && (child->isGTMinKey() || child->isLTMaxKey())) { @@ -457,6 +453,12 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, // If it's a negated $in, it can't have any REGEX's inside. if (MatchExpression::MATCH_IN == childtype) { InMatchExpression* ime = static_cast<InMatchExpression*>(node->getChild(0)); + + if (canUseIndexForNin(ime)) { + // This is a case that we know is supported. + return true; + } + if (!ime->getRegexes().empty()) { return false; } @@ -467,6 +469,13 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, return false; } } + + // Comparisons with arrays have strange enough semantics that inverting the bounds + // within a $not has many complex special cases. We avoid indexing these queries, even + // though it is sometimes possible to build useful bounds. + if (isComparisonWithArrayPred(child)) { + return false; + } } // If this is an $elemMatch value, make sure _all_ of the children can use the index. diff --git a/src/mongo/db/query/planner_ixselect.h b/src/mongo/db/query/planner_ixselect.h index 0bd1accd5bc..84f00d656da 100644 --- a/src/mongo/db/query/planner_ixselect.h +++ b/src/mongo/db/query/planner_ixselect.h @@ -158,6 +158,20 @@ public: */ static bool logicalNodeMayBeSupportedByAnIndex(const MatchExpression* queryExpr); + /** + * We can use an index for this special case: {$not:{$in:[null, []]}}. Return true if this is + * the expression (modulo in-list ordering) and it doesn't contain any regexes. + * + * Why is this case special? An equality expression for "null" will match both documents with a + * literal null for the specified key, and those where the key is not present. An equality + * expression for "[]" (which is stored as "undefined" in the index) will match documents with + * an empty array or an array with an empty array element. If we negate either of this bounds in + * isolation, we may produce incomplete results wrt the other expression. If we negate the + * composition of the two, we can properly return complete results excluding both null and empty + * array values. + */ + static bool canUseIndexForNin(const InMatchExpression* ime); + private: /** * Used to keep track of if any $elemMatch predicates were encountered when walking a diff --git a/src/mongo/db/query/query_planner_index_test.cpp b/src/mongo/db/query/query_planner_index_test.cpp index 994b8738d7b..2d240650b82 100644 --- a/src/mongo/db/query/query_planner_index_test.cpp +++ b/src/mongo/db/query/query_planner_index_test.cpp @@ -201,6 +201,63 @@ TEST_F(QueryPlannerTest, NegationInElemMatchDoesNotUseSparseIndex) { assertHasOnlyCollscan(); } +TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseMultikeyIndex) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + // Use a multikey index. + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [[], null] }}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {a: {$not: {$in: [null, []]}}}, node: {ixscan: " + "{pattern: {a: 1}, bounds: " + "{a: [" + "['MinKey', undefined, true, false]," + "[null, [], false, false]," + "[[], 'MaxKey', false, true]" + "]}}}}}"); +} + +TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseIndex) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + // Use an index which is not multikey. + addIndex(fromjson("{a: 1}")); + runQuery(fromjson("{a: { $nin: [[], null] }}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {node: {ixscan: " + "{pattern: {a: 1}, bounds: " + "{a: [" + "['MinKey', undefined, true, false]," + "[null, [], false, false]," + "[[], 'MaxKey', false, true]" + "]}}}}}"); +} + +TEST_F(QueryPlannerTest, NinListWithNullShouldNotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [null] }}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerTest, NinListWithRegexCannotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + // This matches the [[], null] pattern but also has a regex. + runQuery(fromjson("{a: { $nin: [[], null, /abc/] }}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerTest, NinListWithNonEmptyArrayShouldNotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [[], [1]] }}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerTest, NotLtEmptyArrayShouldNotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $not: { $lt: [] } } }")); + assertHasOnlyCollscan(); +} + TEST_F(QueryPlannerTest, SparseIndexCannotSupportEqualsNull) { addIndex(BSON("i" << 1), false, // multikey diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index 9be90219927..11c35a36b53 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -81,8 +81,13 @@ bool filterMatches(const BSONObj& testFilter, if (!statusWithMatcher.isOK()) { return false; } - const std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue()); + std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue()); MatchExpression::sortTree(root.get()); + if (root->matchType() == mongo::MatchExpression::NOT) { + // Ideally we would optimize() everything, but some of the tests depend on structural + // equivalence of single-arg $or expressions. + root = MatchExpression::optimize(std::move(root)); + } std::unique_ptr<MatchExpression> trueFilter(trueFilterNode->filter->shallowClone()); MatchExpression::sortTree(trueFilter.get()); return trueFilter->equivalent(root.get()); diff --git a/src/mongo/s/query/results_merger_test_fixture.h b/src/mongo/s/query/results_merger_test_fixture.h index 10978c11cb7..30b72c341ad 100644 --- a/src/mongo/s/query/results_merger_test_fixture.h +++ b/src/mongo/s/query/results_merger_test_fixture.h @@ -147,7 +147,7 @@ protected: void scheduleNetworkResponses(std::vector<CursorResponse> responses) { std::vector<BSONObj> objs; for (const auto& cursorResponse : responses) { - // For tests of the AsyncResultsMerger, all CursorRepsonses scheduled by the tests are + // For tests of the AsyncResultsMerger, all CursorResponses scheduled by the tests are // subsequent responses, since the AsyncResultsMerger will only ever run getMores. objs.push_back(cursorResponse.toBSON(CursorResponse::ResponseType::SubsequentResponse)); } |