summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJess Balint <jbalint@gmail.com>2022-02-22 22:39:19 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-02-22 23:32:44 +0000
commit54b918d73c63483faecedb807722a707ba0e64c2 (patch)
tree2b92e04f078c82e63a02f6c37285b3be222c8a47
parent6be53022af440793953d1e39725f38b728bcb155 (diff)
downloadmongo-54b918d73c63483faecedb807722a707ba0e64c2.tar.gz
SERVER-40691 $nin:[[],...] queries are not indexed #3420
-rw-r--r--jstests/core/null_query_semantics.js35
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp12
-rw-r--r--src/mongo/db/query/index_bounds_builder.h10
-rw-r--r--src/mongo/db/query/planner_ixselect.cpp35
-rw-r--r--src/mongo/db/query/planner_ixselect.h14
-rw-r--r--src/mongo/db/query/query_planner_index_test.cpp57
-rw-r--r--src/mongo/db/query/query_planner_test_lib.cpp7
-rw-r--r--src/mongo/s/query/results_merger_test_fixture.h2
8 files changed, 157 insertions, 15 deletions
diff --git a/jstests/core/null_query_semantics.js b/jstests/core/null_query_semantics.js
index 900f9edf2b0..1ff14cc710c 100644
--- a/jstests/core/null_query_semantics.js
+++ b/jstests/core/null_query_semantics.js
@@ -364,6 +364,41 @@ function testNullSemantics(coll) {
assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
}());
+ // Test the semantics of the query {a: {$nin: [null, []]}}.
+ (function testNotInNullAndEmptyArrayQuery() {
+ const query = {a: {$nin: [null, []]}};
+ const noProjectResults = coll.find(query).toArray();
+ const expected = [
+ // Documents excluded by the query predicate are commented
+ // out below.
+ {_id: "a_empty_subobject", a: {}},
+ //{_id: "a_null", a: null},
+ {_id: "a_number", a: 4},
+ {_id: "a_subobject_b_not_null", a: {b: "hi"}},
+ {_id: "a_subobject_b_null", a: {b: null}},
+ {_id: "a_subobject_b_undefined", a: {b: undefined}},
+ //{_id: "a_undefined", a: undefined},
+ //{_id: "no_a"},
+
+ //{_id: "a_double_array", a: [[]]},
+ //{_id: "a_empty_array", a: []},
+ {_id: "a_object_array_all_b_nulls", a: [{b: null}, {b: undefined}, {b: null}, {}]},
+ {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]},
+ {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]},
+ {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]},
+ {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]},
+ //{_id: "a_value_array_all_nulls", a: [null, null]},
+ {_id: "a_value_array_no_nulls", a: [1, "string", 4]},
+ //{_id: "a_value_array_with_null", a: [1, "string", null, 4]},
+ //{_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]},
+ ];
+
+ assert(resultsEq(noProjectResults, expected), noProjectResults);
+
+ const projectResults = coll.find(query, projectToOnlyA).toArray();
+ assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
+ }());
+
(function testNotInNullAndRegexQuery() {
const query = {a: {$nin: [null, /^str.*/]}};
const noProjectResults = coll.find(query).toArray();
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 11617423e01..69a1860014c 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -577,6 +577,18 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr,
return;
}
+ if (MatchExpression::MATCH_IN == child->matchType()) {
+ auto ime = static_cast<const InMatchExpression*>(child);
+ if (QueryPlannerIXSelect::canUseIndexForNin(ime)) {
+ makeNullEqualityBounds(index, isHashed, oilOut, tightnessOut);
+ oilOut->intervals.push_back(IndexBoundsBuilder::kEmptyArrayPointInterval);
+ oilOut->complement();
+ unionize(oilOut);
+ *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+ return;
+ }
+ }
+
_translatePredicate(child, elt, index, oilOut, tightnessOut);
oilOut->complement();
diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h
index 4cdba8c0c0e..8c7a3fc446e 100644
--- a/src/mongo/db/query/index_bounds_builder.h
+++ b/src/mongo/db/query/index_bounds_builder.h
@@ -52,6 +52,16 @@ public:
* Describes various degrees of precision with which predicates can be evaluated based
* on the index bounds.
*
+ * Exact vs. inexact is about whether or not you will need to apply a predicate after scanning,
+ * and fetch vs. not fetch is whether you need data which is not in the index to answer the
+ * query. Often if you have inexact bounds it causes you to need more than the index data, but
+ * not always. For example, to correctly match null predicates like {a: {$eq: null}} you may
+ * need to fetch the data to distinguish between a null and missing 'a' value (the index stores
+ * both with a literal null), so would need INEXACT_FETCH bounds. And as an INEXACT_COVERED
+ * example you could think of something like $mod where you can apply the predicate to the data
+ * directly in the index, but $mod won't generate tight bounds so you still need to apply the
+ * predicate.
+ *
* The integer values of the enum are significant, and are assigned in order of
* increasing tightness. These values are used when we need to do comparison between two
* BoundsTightness values. Such comparisons can answer questions such as "Does predicate
diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp
index 007ec1dade0..e4368c7b174 100644
--- a/src/mongo/db/query/planner_ixselect.cpp
+++ b/src/mongo/db/query/planner_ixselect.cpp
@@ -176,12 +176,15 @@ bool QueryPlannerIXSelect::notEqualsNullCanUseIndex(const IndexEntry& index,
}
}
-static double fieldWithDefault(const BSONObj& infoObj, const string& name, double def) {
- BSONElement e = infoObj[name];
- if (e.isNumber()) {
- return e.numberDouble();
- }
- return def;
+bool QueryPlannerIXSelect::canUseIndexForNin(const InMatchExpression* ime) {
+ const std::vector<BSONElement>& inList = ime->getEqualities();
+ auto containsNull = [](const BSONElement& elt) { return elt.type() == jstNULL; };
+ auto containsEmptyArray = [](const BSONElement& elt) {
+ return elt.type() == Array && elt.embeddedObject().isEmpty();
+ };
+ return !ime->hasRegex() && inList.size() == 2 &&
+ std::any_of(inList.begin(), inList.end(), containsNull) &&
+ std::any_of(inList.begin(), inList.end(), containsEmptyArray);
}
/**
@@ -434,13 +437,6 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt,
return false;
}
- // Comparisons with arrays have strange enough semantics that inverting the bounds
- // within a $not has many complex special cases. We avoid indexing these queries, even
- // though it is sometimes possible to build useful bounds.
- if (isComparisonWithArrayPred(child)) {
- return false;
- }
-
// $gt and $lt to MinKey/MaxKey must build inexact bounds if the index is multikey and
// therefore cannot be inverted safely in a $not.
if (index.multikey && (child->isGTMinKey() || child->isLTMaxKey())) {
@@ -459,6 +455,12 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt,
// If it's a negated $in, it can't have any REGEX's inside.
if (MatchExpression::MATCH_IN == childtype) {
InMatchExpression* ime = static_cast<InMatchExpression*>(node->getChild(0));
+
+ if (canUseIndexForNin(ime)) {
+ // This is a case that we know is supported.
+ return true;
+ }
+
if (!ime->getRegexes().empty()) {
return false;
}
@@ -469,6 +471,13 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt,
return false;
}
}
+
+ // Comparisons with arrays have strange enough semantics that inverting the bounds
+ // within a $not has many complex special cases. We avoid indexing these queries, even
+ // though it is sometimes possible to build useful bounds.
+ if (isComparisonWithArrayPred(child)) {
+ return false;
+ }
}
// If this is an $elemMatch value, make sure _all_ of the children can use the index.
diff --git a/src/mongo/db/query/planner_ixselect.h b/src/mongo/db/query/planner_ixselect.h
index 87111686349..0ef2d480953 100644
--- a/src/mongo/db/query/planner_ixselect.h
+++ b/src/mongo/db/query/planner_ixselect.h
@@ -157,6 +157,20 @@ public:
*/
static bool logicalNodeMayBeSupportedByAnIndex(const MatchExpression* queryExpr);
+ /**
+ * We can use an index for this special case: {$not:{$in:[null, []]}}. Return true if this is
+ * the expression (modulo in-list ordering) and it doesn't contain any regexes.
+ *
+ * Why is this case special? An equality expression for "null" will match both documents with a
+ * literal null for the specified key, and those where the key is not present. An equality
+ * expression for "[]" (which is stored as "undefined" in the index) will match documents with
+ * an empty array or an array with an empty array element. If we negate either of this bounds in
+ * isolation, we may produce incomplete results wrt the other expression. If we negate the
+ * composition of the two, we can properly return complete results excluding both null and empty
+ * array values.
+ */
+ static bool canUseIndexForNin(const InMatchExpression* ime);
+
private:
/**
* Used to keep track of if any $elemMatch predicates were encountered when walking a
diff --git a/src/mongo/db/query/query_planner_index_test.cpp b/src/mongo/db/query/query_planner_index_test.cpp
index 7e043a7d795..045fa35dbe1 100644
--- a/src/mongo/db/query/query_planner_index_test.cpp
+++ b/src/mongo/db/query/query_planner_index_test.cpp
@@ -248,6 +248,63 @@ TEST_F(QueryPlannerTest, NegationInElemMatchDoesNotUseSparseIndex) {
assertHasOnlyCollscan();
}
+TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseMultikeyIndex) {
+ params.options = QueryPlannerParams::NO_TABLE_SCAN;
+ // Use a multikey index.
+ addIndex(fromjson("{a: 1}"), true);
+ runQuery(fromjson("{a: { $nin: [[], null] }}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {a: {$not: {$in: [null, []]}}}, node: {ixscan: "
+ "{pattern: {a: 1}, bounds: "
+ "{a: ["
+ "['MinKey', undefined, true, false],"
+ "[null, [], false, false],"
+ "[[], 'MaxKey', false, true]"
+ "]}}}}}");
+}
+
+TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseIndex) {
+ params.options = QueryPlannerParams::NO_TABLE_SCAN;
+ // Use an index which is not multikey.
+ addIndex(fromjson("{a: 1}"));
+ runQuery(fromjson("{a: { $nin: [[], null] }}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {node: {ixscan: "
+ "{pattern: {a: 1}, bounds: "
+ "{a: ["
+ "['MinKey', undefined, true, false],"
+ "[null, [], false, false],"
+ "[[], 'MaxKey', false, true]"
+ "]}}}}}");
+}
+
+TEST_F(QueryPlannerTest, NinListWithNullShouldNotUseIndex) {
+ addIndex(fromjson("{a: 1}"), true);
+ runQuery(fromjson("{a: { $nin: [null] }}"));
+ assertHasOnlyCollscan();
+}
+
+TEST_F(QueryPlannerTest, NinListWithRegexCannotUseIndex) {
+ addIndex(fromjson("{a: 1}"), true);
+ // This matches the [[], null] pattern but also has a regex.
+ runQuery(fromjson("{a: { $nin: [[], null, /abc/] }}"));
+ assertHasOnlyCollscan();
+}
+
+TEST_F(QueryPlannerTest, NinListWithNonEmptyArrayShouldNotUseIndex) {
+ addIndex(fromjson("{a: 1}"), true);
+ runQuery(fromjson("{a: { $nin: [[], [1]] }}"));
+ assertHasOnlyCollscan();
+}
+
+TEST_F(QueryPlannerTest, NotLtEmptyArrayShouldNotUseIndex) {
+ addIndex(fromjson("{a: 1}"), true);
+ runQuery(fromjson("{a: { $not: { $lt: [] } } }"));
+ assertHasOnlyCollscan();
+}
+
TEST_F(QueryPlannerTest, SparseIndexCannotSupportEqualsNull) {
addIndex(BSON("i" << 1),
false, // multikey
diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp
index 1d78d8a086d..695418cc84f 100644
--- a/src/mongo/db/query/query_planner_test_lib.cpp
+++ b/src/mongo/db/query/query_planner_test_lib.cpp
@@ -84,8 +84,13 @@ Status filterMatches(const BSONObj& testFilter,
return statusWithMatcher.getStatus().withContext(
"match expression provided by the test did not parse successfully");
}
- const std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue());
+ std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue());
MatchExpression::sortTree(root.get());
+ if (root->matchType() == mongo::MatchExpression::NOT) {
+ // Ideally we would optimize() everything, but some of the tests depend on structural
+ // equivalence of single-arg $or expressions.
+ root = MatchExpression::optimize(std::move(root));
+ }
std::unique_ptr<MatchExpression> trueFilter(trueFilterNode->filter->shallowClone());
MatchExpression::sortTree(trueFilter.get());
if (trueFilter->equivalent(root.get())) {
diff --git a/src/mongo/s/query/results_merger_test_fixture.h b/src/mongo/s/query/results_merger_test_fixture.h
index 10978c11cb7..30b72c341ad 100644
--- a/src/mongo/s/query/results_merger_test_fixture.h
+++ b/src/mongo/s/query/results_merger_test_fixture.h
@@ -147,7 +147,7 @@ protected:
void scheduleNetworkResponses(std::vector<CursorResponse> responses) {
std::vector<BSONObj> objs;
for (const auto& cursorResponse : responses) {
- // For tests of the AsyncResultsMerger, all CursorRepsonses scheduled by the tests are
+ // For tests of the AsyncResultsMerger, all CursorResponses scheduled by the tests are
// subsequent responses, since the AsyncResultsMerger will only ever run getMores.
objs.push_back(cursorResponse.toBSON(CursorResponse::ResponseType::SubsequentResponse));
}