summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJess Balint <jbalint@gmail.com>2022-02-24 18:19:46 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-02-24 18:57:18 +0000
commite68a7d47305e14e090cba9ce3d92533053299996 (patch)
tree6f8fafdf1953cedb4aefb7a4459797d9dd440785
parentce846ddb65af2196acfa2bc6691b1a14524be630 (diff)
downloadmongo-e68a7d47305e14e090cba9ce3d92533053299996.tar.gz
SERVER-40691 $nin:[[],...] queries are not indexedr4.2.19-rc0r4.2.19
(cherry picked from commit df25c71b8674a78e17468f48bcda5285decb9246)
-rw-r--r--jstests/core/null_query_semantics.js39
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp28
-rw-r--r--src/mongo/db/query/index_bounds_builder.h10
-rw-r--r--src/mongo/db/query/planner_ixselect.cpp29
-rw-r--r--src/mongo/db/query/planner_ixselect.h14
-rw-r--r--src/mongo/db/query/query_planner_test.cpp49
-rw-r--r--src/mongo/db/query/query_planner_test_lib.cpp7
-rw-r--r--src/mongo/s/query/results_merger_test_fixture.h2
8 files changed, 166 insertions, 12 deletions
diff --git a/jstests/core/null_query_semantics.js b/jstests/core/null_query_semantics.js
index 7276a62b813..2e09c46d104 100644
--- a/jstests/core/null_query_semantics.js
+++ b/jstests/core/null_query_semantics.js
@@ -314,6 +314,45 @@ function testNotEqualsNullSemantics() {
projectResults);
}());
+ // Test the semantics of the query {a: {$nin: [null, []]}}.
+ (function testNotInNullAndEmptyArrayQuery() {
+ const query = {a: {$nin: [null, []]}};
+ const noProjectResults = coll.find(query).toArray();
+ const expected = [
+ // Documents excluded by the query predicate are commented
+ // out below.
+ {_id: "a_empty_subobject", a: {}},
+ //{_id: "a_null", a: null},
+ {_id: "a_number", a: 4},
+ {_id: "a_subobject_b_not_null", a: {b: "hi"}},
+ {_id: "a_subobject_b_null", a: {b: null}},
+ {_id: "a_subobject_b_undefined", a: {b: undefined}},
+ // Semantic difference during backport: this document doesn't match "null" in this
+ // version (c.f. SERVER-21929).
+ {_id: "a_undefined", a: undefined},
+ //{_id: "no_a"},
+
+ //{_id: "a_double_array", a: [[]]},
+ //{_id: "a_empty_array", a: []},
+ {_id: "a_object_array_all_b_nulls", a: [{b: null}, {b: undefined}, {b: null}, {}]},
+ {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]},
+ {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]},
+ {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]},
+ {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]},
+ //{_id: "a_value_array_all_nulls", a: [null, null]},
+ {_id: "a_value_array_no_nulls", a: [1, "string", 4]},
+ //{_id: "a_value_array_with_null", a: [1, "string", null, 4]},
+ // Semantic difference during backport: this document doesn't match "null" in this
+ // version (c.f. SERVER-21929).
+ {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]},
+ ];
+
+ assert(resultsEq(noProjectResults, expected), noProjectResults);
+
+ const projectResults = coll.find(query, projectToOnlyA).toArray();
+ assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
+ }());
+
(function testNotInNullAndRegexQuery() {
const query = {a: {$nin: [null, /^str.*/]}};
const noProjectResults = coll.find(query).toArray();
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 597ee8ac1c6..e0d0cb8f6af 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -120,6 +120,7 @@ bool stringMayHaveUnescapedPipe(StringData str) {
const BSONObj kUndefinedElementObj = BSON("" << BSONUndefined);
const BSONObj kNullElementObj = BSON("" << BSONNULL);
+const BSONObj kEmptyArrayElementObj = BSON("" << BSONArray());
const Interval kHashedUndefinedInterval = IndexBoundsBuilder::makePointInterval(
ExpressionMapping::hash(kUndefinedElementObj.firstElement()));
@@ -519,6 +520,33 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr,
return;
}
+ if (MatchExpression::MATCH_IN == child->matchType()) {
+ auto ime = static_cast<const InMatchExpression*>(child);
+ if (QueryPlannerIXSelect::canUseIndexForNin(ime)) {
+ // Permit documents with an undefined value to be
+ // returned from $nin index scan. the empty array is
+ // represented as undefined in the index but the empty
+ // array values are filtered out. This is required to
+ // match the collection scan (no index) behavior. This
+ // was later changed by SERVER-21929.
+ BSONObjBuilder bob;
+ bob.appendMinKey("");
+ bob.appendUndefined("");
+ oilOut->intervals.push_back(makeRangeInterval(
+ bob.asTempObj().getOwned(), BoundInclusion::kIncludeBothStartAndEndKeys));
+ bob.resetToEmpty();
+ bob.appendNull("");
+ bob.appendMaxKey("");
+ oilOut->intervals.push_back(
+ makeRangeInterval(bob.done().getOwned(), BoundInclusion::kIncludeEndKeyOnly));
+ unionize(oilOut);
+ if (index.pathHasMultikeyComponent(elt.fieldNameStringData())) {
+ *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+ }
+ return;
+ }
+ }
+
_translatePredicate(child, elt, index, oilOut, tightnessOut);
oilOut->complement();
diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h
index 0453df842cb..69d72549e0f 100644
--- a/src/mongo/db/query/index_bounds_builder.h
+++ b/src/mongo/db/query/index_bounds_builder.h
@@ -48,6 +48,16 @@ public:
* Describes various degrees of precision with which predicates can be evaluated based
* on the index bounds.
*
+ * Exact vs. inexact is about whether or not you will need to apply a predicate after scanning,
+ * and fetch vs. not fetch is whether you need data which is not in the index to answer the
+ * query. Often if you have inexact bounds it causes you to need more than the index data, but
+ * not always. For example, to correctly match null predicates like {a: {$eq: null}} you may
+ * need to fetch the data to distinguish between a null and missing 'a' value (the index stores
+ * both with a literal null), so would need INEXACT_FETCH bounds. And as an INEXACT_COVERED
+ * example you could think of something like $mod where you can apply the predicate to the data
+ * directly in the index, but $mod won't generate tight bounds so you still need to apply the
+ * predicate.
+ *
* The integer values of the enum are significant, and are assigned in order of
* increasing tightness. These values are used when we need to do comparison between two
* BoundsTightness values. Such comparisons can answer questions such as "Does predicate
diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp
index 3443ba16581..4aa69cfce17 100644
--- a/src/mongo/db/query/planner_ixselect.cpp
+++ b/src/mongo/db/query/planner_ixselect.cpp
@@ -174,12 +174,15 @@ bool QueryPlannerIXSelect::notEqualsNullCanUseIndex(const IndexEntry& index,
}
}
-static double fieldWithDefault(const BSONObj& infoObj, const string& name, double def) {
- BSONElement e = infoObj[name];
- if (e.isNumber()) {
- return e.numberDouble();
- }
- return def;
+bool QueryPlannerIXSelect::canUseIndexForNin(const InMatchExpression* ime) {
+ const std::vector<BSONElement>& inList = ime->getEqualities();
+ auto containsNull = [](const BSONElement& elt) { return elt.type() == jstNULL; };
+ auto containsEmptyArray = [](const BSONElement& elt) {
+ return elt.type() == Array && elt.embeddedObject().isEmpty();
+ };
+ return ime->getRegexes().empty() && inList.size() == 2 &&
+ std::any_of(inList.begin(), inList.end(), containsNull) &&
+ std::any_of(inList.begin(), inList.end(), containsEmptyArray);
}
/**
@@ -432,10 +435,6 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt,
return false;
}
- if (isEqualsArrayOrNotInArray(child)) {
- return false;
- }
-
// Most of the time we can't use a multikey index for a $ne: null query, however there
// are a few exceptions around $elemMatch.
const bool canUseIndexForNeNull =
@@ -447,6 +446,12 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt,
// If it's a negated $in, it can't have any REGEX's inside.
if (MatchExpression::MATCH_IN == childtype) {
InMatchExpression* ime = static_cast<InMatchExpression*>(node->getChild(0));
+
+ if (canUseIndexForNin(ime)) {
+ // This is a case that we know is supported.
+ return true;
+ }
+
if (!ime->getRegexes().empty()) {
return false;
}
@@ -457,6 +462,10 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt,
return false;
}
}
+
+ if (isEqualsArrayOrNotInArray(child)) {
+ return false;
+ }
}
// If this is an $elemMatch value, make sure _all_ of the children can use the index.
diff --git a/src/mongo/db/query/planner_ixselect.h b/src/mongo/db/query/planner_ixselect.h
index 1b7fed5eed0..5bc5f152632 100644
--- a/src/mongo/db/query/planner_ixselect.h
+++ b/src/mongo/db/query/planner_ixselect.h
@@ -153,6 +153,20 @@ public:
*/
static bool logicalNodeMayBeSupportedByAnIndex(const MatchExpression* queryExpr);
+ /**
+ * We can use an index for this special case: {$not:{$in:[null, []]}}. Return true if this is
+ * the expression (modulo in-list ordering) and it doesn't contain any regexes.
+ *
+ * Why is this case special? An equality expression for "null" will match both documents with a
+ * literal null for the specified key, and those where the key is not present. An equality
+ * expression for "[]" (which is stored as "undefined" in the index) will match documents with
+ * an empty array or an array with an empty array element. If we negate either of this bounds in
+ * isolation, we may produce incomplete results wrt the other expression. If we negate the
+ * composition of the two, we can properly return complete results excluding both null and empty
+ * array values.
+ */
+ static bool canUseIndexForNin(const InMatchExpression* ime);
+
private:
/**
* Used to keep track of if any $elemMatch predicates were encountered when walking a
diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp
index dc34029f148..43a75829043 100644
--- a/src/mongo/db/query/query_planner_test.cpp
+++ b/src/mongo/db/query/query_planner_test.cpp
@@ -2765,6 +2765,55 @@ TEST_F(QueryPlannerTest, NegationInElemMatchDoesNotUseSparseIndex) {
assertHasOnlyCollscan();
}
+TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseMultikeyIndex) {
+ params.options = QueryPlannerParams::NO_TABLE_SCAN;
+ // Use a multikey index.
+ addIndex(fromjson("{a: 1}"), true);
+ runQuery(fromjson("{a: { $nin: [[], null] }}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {filter: {a: {$not: {$in: [null, []]}}}, node: {ixscan: "
+ "{pattern: {a: 1}, bounds: "
+ "{a: ["
+ "['MinKey', undefined, true, true],"
+ "[null, 'MaxKey', false, true]"
+ "]}}}}}");
+}
+
+TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseIndex) {
+ params.options = QueryPlannerParams::NO_TABLE_SCAN;
+ // Use an index which is not multikey.
+ addIndex(fromjson("{a: 1}"));
+ runQuery(fromjson("{a: { $nin: [[], null] }}"));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ "{fetch: {node: {ixscan: "
+ "{pattern: {a: 1}, bounds: "
+ "{a: ["
+ "['MinKey', undefined, true, true],"
+ "[null, 'MaxKey', false, true]"
+ "]}}}}}");
+}
+
+TEST_F(QueryPlannerTest, NinListWithNullShouldNotUseIndex) {
+ addIndex(fromjson("{a: 1}"), true);
+ runQuery(fromjson("{a: { $nin: [null] }}"));
+ assertHasOnlyCollscan();
+}
+
+TEST_F(QueryPlannerTest, NinListWithRegexCannotUseIndex) {
+ addIndex(fromjson("{a: 1}"), true);
+ // This matches the [[], null] pattern but also has a regex.
+ runQuery(fromjson("{a: { $nin: [[], null, /abc/] }}"));
+ assertHasOnlyCollscan();
+}
+
+TEST_F(QueryPlannerTest, NinListWithNonEmptyArrayShouldNotUseIndex) {
+ addIndex(fromjson("{a: 1}"), true);
+ runQuery(fromjson("{a: { $nin: [[], [1]] }}"));
+ assertHasOnlyCollscan();
+}
+
TEST_F(QueryPlannerTest, SparseIndexCannotSupportEqualsNull) {
addIndex(BSON("i" << 1),
false, // multikey
diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp
index 2cfcc54985b..809945049b2 100644
--- a/src/mongo/db/query/query_planner_test_lib.cpp
+++ b/src/mongo/db/query/query_planner_test_lib.cpp
@@ -79,8 +79,13 @@ bool filterMatches(const BSONObj& testFilter,
if (!statusWithMatcher.isOK()) {
return false;
}
- const std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue());
+ std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue());
CanonicalQuery::sortTree(root.get());
+ if (root->matchType() == mongo::MatchExpression::NOT) {
+ // Ideally we would optimize() everything, but some of the tests depend on structural
+ // equivalence of single-arg $or expressions.
+ root = MatchExpression::optimize(std::move(root));
+ }
std::unique_ptr<MatchExpression> trueFilter(trueFilterNode->filter->shallowClone());
CanonicalQuery::sortTree(trueFilter.get());
return trueFilter->equivalent(root.get());
diff --git a/src/mongo/s/query/results_merger_test_fixture.h b/src/mongo/s/query/results_merger_test_fixture.h
index 62f504a3ea1..41bdd4cf85e 100644
--- a/src/mongo/s/query/results_merger_test_fixture.h
+++ b/src/mongo/s/query/results_merger_test_fixture.h
@@ -145,7 +145,7 @@ protected:
void scheduleNetworkResponses(std::vector<CursorResponse> responses) {
std::vector<BSONObj> objs;
for (const auto& cursorResponse : responses) {
- // For tests of the AsyncResultsMerger, all CursorRepsonses scheduled by the tests are
+ // For tests of the AsyncResultsMerger, all CursorResponses scheduled by the tests are
// subsequent responses, since the AsyncResultsMerger will only ever run getMores.
objs.push_back(cursorResponse.toBSON(CursorResponse::ResponseType::SubsequentResponse));
}