diff options
author | Jess Balint <jbalint@gmail.com> | 2022-02-22 22:39:19 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-22 23:32:44 +0000 |
commit | 54b918d73c63483faecedb807722a707ba0e64c2 (patch) | |
tree | 2b92e04f078c82e63a02f6c37285b3be222c8a47 /src/mongo/db | |
parent | 6be53022af440793953d1e39725f38b728bcb155 (diff) | |
download | mongo-54b918d73c63483faecedb807722a707ba0e64c2.tar.gz |
SERVER-40691 $nin:[[],...] queries are not indexed #3420
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.h | 10 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.cpp | 35 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.h | 14 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_index_test.cpp | 57 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test_lib.cpp | 7 |
6 files changed, 121 insertions, 14 deletions
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 11617423e01..69a1860014c 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -577,6 +577,18 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, return; } + if (MatchExpression::MATCH_IN == child->matchType()) { + auto ime = static_cast<const InMatchExpression*>(child); + if (QueryPlannerIXSelect::canUseIndexForNin(ime)) { + makeNullEqualityBounds(index, isHashed, oilOut, tightnessOut); + oilOut->intervals.push_back(IndexBoundsBuilder::kEmptyArrayPointInterval); + oilOut->complement(); + unionize(oilOut); + *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + return; + } + } + _translatePredicate(child, elt, index, oilOut, tightnessOut); oilOut->complement(); diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 4cdba8c0c0e..8c7a3fc446e 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -52,6 +52,16 @@ public: * Describes various degrees of precision with which predicates can be evaluated based * on the index bounds. * + * Exact vs. inexact is about whether or not you will need to apply a predicate after scanning, + * and fetch vs. not fetch is whether you need data which is not in the index to answer the + * query. Often if you have inexact bounds it causes you to need more than the index data, but + * not always. For example, to correctly match null predicates like {a: {$eq: null}} you may + * need to fetch the data to distinguish between a null and missing 'a' value (the index stores + * both with a literal null), so would need INEXACT_FETCH bounds. And as an INEXACT_COVERED + * example you could think of something like $mod where you can apply the predicate to the data + * directly in the index, but $mod won't generate tight bounds so you still need to apply the + * predicate. + * * The integer values of the enum are significant, and are assigned in order of * increasing tightness. These values are used when we need to do comparison between two * BoundsTightness values. Such comparisons can answer questions such as "Does predicate diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index 007ec1dade0..e4368c7b174 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -176,12 +176,15 @@ bool QueryPlannerIXSelect::notEqualsNullCanUseIndex(const IndexEntry& index, } } -static double fieldWithDefault(const BSONObj& infoObj, const string& name, double def) { - BSONElement e = infoObj[name]; - if (e.isNumber()) { - return e.numberDouble(); - } - return def; +bool QueryPlannerIXSelect::canUseIndexForNin(const InMatchExpression* ime) { + const std::vector<BSONElement>& inList = ime->getEqualities(); + auto containsNull = [](const BSONElement& elt) { return elt.type() == jstNULL; }; + auto containsEmptyArray = [](const BSONElement& elt) { + return elt.type() == Array && elt.embeddedObject().isEmpty(); + }; + return !ime->hasRegex() && inList.size() == 2 && + std::any_of(inList.begin(), inList.end(), containsNull) && + std::any_of(inList.begin(), inList.end(), containsEmptyArray); } /** @@ -434,13 +437,6 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, return false; } - // Comparisons with arrays have strange enough semantics that inverting the bounds - // within a $not has many complex special cases. We avoid indexing these queries, even - // though it is sometimes possible to build useful bounds. - if (isComparisonWithArrayPred(child)) { - return false; - } - // $gt and $lt to MinKey/MaxKey must build inexact bounds if the index is multikey and // therefore cannot be inverted safely in a $not. if (index.multikey && (child->isGTMinKey() || child->isLTMaxKey())) { @@ -459,6 +455,12 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, // If it's a negated $in, it can't have any REGEX's inside. if (MatchExpression::MATCH_IN == childtype) { InMatchExpression* ime = static_cast<InMatchExpression*>(node->getChild(0)); + + if (canUseIndexForNin(ime)) { + // This is a case that we know is supported. + return true; + } + if (!ime->getRegexes().empty()) { return false; } @@ -469,6 +471,13 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, return false; } } + + // Comparisons with arrays have strange enough semantics that inverting the bounds + // within a $not has many complex special cases. We avoid indexing these queries, even + // though it is sometimes possible to build useful bounds. + if (isComparisonWithArrayPred(child)) { + return false; + } } // If this is an $elemMatch value, make sure _all_ of the children can use the index. diff --git a/src/mongo/db/query/planner_ixselect.h b/src/mongo/db/query/planner_ixselect.h index 87111686349..0ef2d480953 100644 --- a/src/mongo/db/query/planner_ixselect.h +++ b/src/mongo/db/query/planner_ixselect.h @@ -157,6 +157,20 @@ public: */ static bool logicalNodeMayBeSupportedByAnIndex(const MatchExpression* queryExpr); + /** + * We can use an index for this special case: {$not:{$in:[null, []]}}. Return true if this is + * the expression (modulo in-list ordering) and it doesn't contain any regexes. + * + * Why is this case special? An equality expression for "null" will match both documents with a + * literal null for the specified key, and those where the key is not present. An equality + * expression for "[]" (which is stored as "undefined" in the index) will match documents with + * an empty array or an array with an empty array element. If we negate either of this bounds in + * isolation, we may produce incomplete results wrt the other expression. If we negate the + * composition of the two, we can properly return complete results excluding both null and empty + * array values. + */ + static bool canUseIndexForNin(const InMatchExpression* ime); + private: /** * Used to keep track of if any $elemMatch predicates were encountered when walking a diff --git a/src/mongo/db/query/query_planner_index_test.cpp b/src/mongo/db/query/query_planner_index_test.cpp index 7e043a7d795..045fa35dbe1 100644 --- a/src/mongo/db/query/query_planner_index_test.cpp +++ b/src/mongo/db/query/query_planner_index_test.cpp @@ -248,6 +248,63 @@ TEST_F(QueryPlannerTest, NegationInElemMatchDoesNotUseSparseIndex) { assertHasOnlyCollscan(); } +TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseMultikeyIndex) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + // Use a multikey index. + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [[], null] }}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {a: {$not: {$in: [null, []]}}}, node: {ixscan: " + "{pattern: {a: 1}, bounds: " + "{a: [" + "['MinKey', undefined, true, false]," + "[null, [], false, false]," + "[[], 'MaxKey', false, true]" + "]}}}}}"); +} + +TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseIndex) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + // Use an index which is not multikey. + addIndex(fromjson("{a: 1}")); + runQuery(fromjson("{a: { $nin: [[], null] }}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {node: {ixscan: " + "{pattern: {a: 1}, bounds: " + "{a: [" + "['MinKey', undefined, true, false]," + "[null, [], false, false]," + "[[], 'MaxKey', false, true]" + "]}}}}}"); +} + +TEST_F(QueryPlannerTest, NinListWithNullShouldNotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [null] }}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerTest, NinListWithRegexCannotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + // This matches the [[], null] pattern but also has a regex. + runQuery(fromjson("{a: { $nin: [[], null, /abc/] }}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerTest, NinListWithNonEmptyArrayShouldNotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [[], [1]] }}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerTest, NotLtEmptyArrayShouldNotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $not: { $lt: [] } } }")); + assertHasOnlyCollscan(); +} + TEST_F(QueryPlannerTest, SparseIndexCannotSupportEqualsNull) { addIndex(BSON("i" << 1), false, // multikey diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index 1d78d8a086d..695418cc84f 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -84,8 +84,13 @@ Status filterMatches(const BSONObj& testFilter, return statusWithMatcher.getStatus().withContext( "match expression provided by the test did not parse successfully"); } - const std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue()); + std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue()); MatchExpression::sortTree(root.get()); + if (root->matchType() == mongo::MatchExpression::NOT) { + // Ideally we would optimize() everything, but some of the tests depend on structural + // equivalence of single-arg $or expressions. + root = MatchExpression::optimize(std::move(root)); + } std::unique_ptr<MatchExpression> trueFilter(trueFilterNode->filter->shallowClone()); MatchExpression::sortTree(trueFilter.get()); if (trueFilter->equivalent(root.get())) { |