summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTed Tuckman <ted.tuckman@mongodb.com>2020-01-24 15:53:43 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-08-14 19:00:24 +0000
commit65864da5eb9885dae432fa8a1b9b46f31be04b07 (patch)
tree5eb82174a5845fb7d63250989ca9c1817cf8974f
parentd3470d48166e9897e9d9817340db9f11f70a55e4 (diff)
downloadmongo-65864da5eb9885dae432fa8a1b9b46f31be04b07.tar.gz
SERVER-45233 Indexed array inequality fixed to use correct bounds
create mode 100644 jstests/core/array_comparison_correctness.js create mode 100644 jstests/core/array_index_and_nonIndex_consistent.js (cherry picked from commit 52e7950cae37ef3783b05840c04abbbe383fa1ff) (cherry picked from commit e3fba401e6e75b26cc2e5d363ad331dff749cfbf)
-rw-r--r--jstests/core/array_comparison_correctness.js131
-rw-r--r--jstests/core/array_index_and_nonIndex_consistent.js76
-rw-r--r--jstests/core/array_match4.js4
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp158
4 files changed, 319 insertions, 50 deletions
diff --git a/jstests/core/array_comparison_correctness.js b/jstests/core/array_comparison_correctness.js
new file mode 100644
index 00000000000..df209ae3f0f
--- /dev/null
+++ b/jstests/core/array_comparison_correctness.js
@@ -0,0 +1,131 @@
+/*
+ * Demonstrate the expected behavior of $lt and $gt comparisons involving arrays. This is only
+ * tested without an index, results between index and non-index behavior are compared in
+ * array_index_and_nonIndex_consistent.js
+ */
+
+(function() {
+ "use strict";
+ load("jstests/aggregation/extras/utils.js"); // arrayEq
+ const collName = jsTestName();
+ const coll = db.getCollection(collName);
+ coll.drop();
+ const docsInColl = [
+ {_id: 0, val: [1, 2]},
+ {_id: 1, val: [3, 4]},
+ {_id: 2, val: [3, 1]},
+ {_id: 3, val: {"test": 5}},
+ {_id: 4, val: [{"test": 7}]},
+ {_id: 5, val: [true, false]},
+ {_id: 6, val: 2},
+ {_id: 7, val: 3},
+ {_id: 8, val: 4},
+ {_id: 9, val: [2]},
+ {_id: 10, val: [3]},
+ {_id: 11, val: [4]},
+ {_id: 12, val: [1, true]},
+ {_id: 13, val: [true, 1]},
+ {_id: 14, val: [1, 4]},
+ {_id: 15, val: [null]},
+ {_id: 16, val: MinKey},
+ {_id: 17, val: [MinKey]},
+ {_id: 18, val: [MinKey, 3]},
+ {_id: 19, val: [3, MinKey]},
+ {_id: 20, val: MaxKey},
+ {_id: 21, val: [MaxKey]},
+ {_id: 22, val: [MaxKey, 3]},
+ {_id: 23, val: [3, MaxKey]},
+ {_id: 24, val: true},
+ {_id: 25, val: false},
+ ];
+ assert.commandWorked(coll.insert(docsInColl));
+ function generateFailedEqString(expected, found) {
+ return "Expected: " + tojson(expected) + "\n Found: " + tojson(found);
+ }
+ function generateExpectedResults(indexes) {
+ resultSet = [];
+ indexes.forEach(function(index) {
+ resultSet.push(docsInColl[index]);
+ });
+ return resultSet;
+ }
+ // Querying for LT/GT an integer returns all arrays that have a single integer element that
+ // matches the query.
+ let resultSet = coll.find({val: {$lt: 2}}).toArray();
+ let expected = generateExpectedResults([0, 2, 12, 14]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ resultSet = coll.find({val: {$gt: 2}}).toArray();
+ expected = generateExpectedResults([1, 2, 8, 14]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ // Test that querying for GT MinKey and LT MaxKey returns all results except for those values.
+ resultSet = coll.find({val: {$gt: MinKey}}).toArray();
+ let expectedInts = [...Array(25).keys()];
+ expectedInts.splice(16, 1);
+ expected = generateExpectedResults(expectedInts);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ resultSet = coll.find({val: {$lt: MaxKey}}).toArray();
+ expectedInts = [...Array(25).keys()];
+ expectedInts.splice(20, 1);
+ expected = generateExpectedResults(expectedInts);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ // Test that querying for GT [MinKey] or LT [MaxKey] returns all other arrays, but nothing else.
+ resultSet = coll.find({val: {$gt: [MinKey]}}).toArray();
+ expected =
+ generateExpectedResults([0, 1, 2, 4, 5, 9, 10, 11, 12, 13, 14, 15, 18, 19, 21, 22, 23]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ resultSet = coll.find({val: {$lt: [MaxKey]}}).toArray();
+ expected =
+ generateExpectedResults([0, 1, 2, 4, 5, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 22, 23]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ // Test that querying for LT/GT true returns no other types.
+ resultSet = coll.find({val: {$gt: true}}).toArray();
+ expected = [];
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ resultSet = coll.find({val: {$lt: true}}).toArray();
+ expected = generateExpectedResults([25]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ // Test that querying for LT/GT arrays maintains lexicographic (version number) order and only
+ // returns arrays.
+ resultSet = coll.find({val: {$lt: [-1]}}).toArray();
+ expected = generateExpectedResults([15, 17, 18]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ resultSet = coll.find({val: {$lt: [3]}}).toArray();
+ expected = generateExpectedResults([0, 9, 12, 14, 15, 17, 18]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ resultSet = coll.find({val: {$lt: [3, 2]}}).toArray();
+ expected = generateExpectedResults([0, 2, 9, 12, 14, 15, 17, 18]);
+ assert(arrayEq(resultSet, expected), (resultSet, expected));
+
+ resultSet = coll.find({val: {$gt: [2]}}).toArray();
+ expected = generateExpectedResults([1, 2, 4, 5, 10, 11, 13, 19, 21, 22]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ resultSet = coll.find({val: {$gt: [15]}}).toArray();
+ expected = generateExpectedResults([5, 13, 21, 22]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ resultSet = coll.find({val: {$gt: [3, 3]}}).toArray();
+ expected = generateExpectedResults([1, 4, 5, 11, 13, 19, 21, 22]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ // $gt the empty array should return all arrays.
+ resultSet = coll.find({val: {$gt: []}}).toArray();
+ expected =
+ generateExpectedResults([0, 1, 2, 4, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 21, 22, 23]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+
+ // $lt the empty array should return no arrays.
+ resultSet = coll.find({val: {$lt: []}}).toArray();
+ expected = generateExpectedResults([3, 6, 7, 8, 16]);
+ assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected));
+})();
diff --git a/jstests/core/array_index_and_nonIndex_consistent.js b/jstests/core/array_index_and_nonIndex_consistent.js
new file mode 100644
index 00000000000..05ddf4ccf0c
--- /dev/null
+++ b/jstests/core/array_index_and_nonIndex_consistent.js
@@ -0,0 +1,76 @@
+/*
+ * Make sure that $gt and $lt queries return the same results regardless of whether there is a
+ * multikey index.
+ */
+
+(function() {
+ "use strict";
+ load("jstests/aggregation/extras/utils.js"); // arrayEq
+ const indexColl = db.indexColl;
+ const nonIndexedColl = db.nonIndexedColl;
+ indexColl.drop();
+ nonIndexedColl.drop();
+
+ db.indexColl.createIndex({val: 1});
+ const collList = [indexColl, nonIndexedColl];
+
+ collList.forEach(function(collObj) {
+ assert.commandWorked(collObj.insert([
+ {val: [1, 2]},
+ {val: [3, 4]},
+ {val: [3, 1]},
+ {val: {"test": 5}},
+ {val: [{"test": 7}]},
+ {val: [true, false]},
+ {val: 2},
+ {val: 3},
+ {val: 4},
+ {val: [2]},
+ {val: [3]},
+ {val: [4]},
+ {val: [1, true]},
+ {val: [true, 1]},
+ {val: [1, 4]},
+ {val: [null]},
+ {val: MinKey},
+ {val: [MinKey]},
+ {val: [MinKey, 3]},
+ {val: [3, MinKey]},
+ {val: MaxKey},
+ {val: [MaxKey]},
+ {val: [MaxKey, 3]},
+ {val: [3, MaxKey]},
+ {val: []},
+ ]));
+ });
+
+ const queryList = [
+ [2, 2], [0, 3], [3, 0], [1, 3], [3, 1], [1, 5],
+ [5, 1], [1], [3], [5], {"test": 2}, {"test": 6},
+ [true, true], [true], true, 1, 3, 5,
+ null, [null], [], [MinKey], [MinKey, 2], [MinKey, 4],
+ MinKey, [MaxKey], [MaxKey, 2], [MaxKey, 4], MaxKey, [],
+ ];
+
+ let failedLT = [];
+ let failedGT = [];
+
+ queryList.forEach(function(q) {
+ const queryLT = {val: {"$lt": q}};
+ const queryGT = {val: {"$gt": q}};
+ const projOutId = {_id: 0, val: 1};
+
+ let indexRes = indexColl.find(queryLT, projOutId).sort({val: 1}).toArray();
+ let nonIndexedRes = nonIndexedColl.find(queryLT, projOutId).sort({val: 1}).toArray();
+
+ assert(arrayEq(indexRes, nonIndexedRes),
+ "Ran query " + tojson(queryLT) + " and got mismatched results.\n Indexed: " +
+ tojson(indexRes) + "\n NonIndexed: " + tojson(nonIndexedRes));
+
+ indexRes = indexColl.find(queryGT, projOutId).sort({val: 1}).toArray();
+ nonIndexedRes = nonIndexedColl.find(queryGT, projOutId).sort({val: 1}).toArray();
+ assert(arrayEq(indexRes, nonIndexedRes),
+ "Ran query " + tojson(queryGT) + " and got mismatched results.\n Indexed: " +
+ tojson(indexRes) + "\n NonIndexed: " + tojson(nonIndexedRes));
+ });
+})();
diff --git a/jstests/core/array_match4.js b/jstests/core/array_match4.js
index b4cdec5143a..821e9a4f4eb 100644
--- a/jstests/core/array_match4.js
+++ b/jstests/core/array_match4.js
@@ -25,6 +25,4 @@ print('explain for ' + tojson(query_gte, '', true) + ' = ' + tojson(explain));
// number of documents returned by indexes query should be consistent
// with non-indexed case.
-// XXX: The following assertion documents current behavior.
-// XXX: 2.4 and 2.6 both return 0 documents.
-assert.eq(0, t.find(query_gte).itcount(), '$gte (with index)');
+assert.eq(1, t.find(query_gte).itcount(), '$gte (with index)');
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 48ee291b288..0adf7533a1f 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -327,6 +327,81 @@ bool IndexBoundsBuilder::canUseCoveredMatching(const MatchExpression* expr,
return tightness >= IndexBoundsBuilder::INEXACT_COVERED;
}
+namespace {
+// Contains all the logic for determining bounds of a LT or LTE query.
+void buildBoundsForQueryElementForLT(BSONElement dataElt,
+ const mongo::CollatorInterface* collator,
+ BSONObjBuilder* bob) {
+ // Use -infinity for one-sided numerical bounds
+ if (dataElt.isNumber()) {
+ bob->appendNumber("", -std::numeric_limits<double>::infinity());
+ } else if (dataElt.type() == BSONType::Array) {
+ // For comparison to an array, we do lexicographic comparisons. In a multikey index, the
+ // index entries are the array elements themselves. We must therefore look at all types, and
+ // all values between MinKey and the first element in the array.
+ bob->appendMinKey("");
+ } else {
+ bob->appendMinForType("", dataElt.type());
+ }
+ if (dataElt.type() != BSONType::Array) {
+ CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob);
+ return;
+ }
+
+ auto eltArr = dataElt.Array();
+ if (eltArr.empty()) {
+ // The empty array is the lowest array.
+ bob->appendMinForType("", dataElt.type());
+ } else {
+ // If the type of the element is greater than the type of the array, the bounds have to
+ // include that element. Otherwise the array type, and therefore `dataElt` is
+ // sufficiently large to include all relevant keys.
+ if (canonicalizeBSONType(eltArr[0].type()) > canonicalizeBSONType(BSONType::Array)) {
+ CollationIndexKey::collationAwareIndexKeyAppend(eltArr[0], collator, bob);
+ } else {
+ CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob);
+ }
+ }
+}
+
+void buildBoundsForQueryElementForGT(BSONElement dataElt,
+ const mongo::CollatorInterface* collator,
+ BSONObjBuilder* bob) {
+ if (dataElt.type() == BSONType::Array) {
+ auto eltArr = dataElt.Array();
+ if (eltArr.empty()) {
+ // If the array is empty, we need bounds that will match all arrays. Unfortunately,
+ // this means that we have to check the entire index, as any array could have a key
+ // anywhere in the multikey index.
+ bob->appendMinKey("");
+ } else {
+ // If the type of the element is smaller than the type of the array, the bounds need
+ // to extend to that element. Otherwise the array type, and therefore `dataElt` is
+ // sufficiently large include all relevant keys.
+ if (canonicalizeBSONType(eltArr[0].type()) < canonicalizeBSONType(BSONType::Array)) {
+ CollationIndexKey::collationAwareIndexKeyAppend(eltArr[0], collator, bob);
+ } else {
+ CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob);
+ }
+ }
+ } else {
+ CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob);
+ }
+
+ if (dataElt.isNumber()) {
+ bob->appendNumber("", std::numeric_limits<double>::infinity());
+ // For comparison to an array, we do lexicographic comparisons. In a multikey index, the
+ // index entries are the array elements themselves. We must therefore look at all types, and
+ // all values between the first element in the array and MaxKey.
+ } else if (dataElt.type() == BSONType::Array) {
+ bob->appendMaxKey("");
+ } else {
+ bob->appendMaxForType("", dataElt.type());
+ }
+}
+
+} // namespace
+
// static
void IndexBoundsBuilder::translate(const MatchExpression* expr,
const BSONElement& elt,
@@ -469,29 +544,26 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
}
BSONObjBuilder bob;
- // Use -infinity for one-sided numerical bounds
- if (dataElt.isNumber()) {
- bob.appendNumber("", -std::numeric_limits<double>::infinity());
- } else {
- bob.appendMinForType("", dataElt.type());
- }
- CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob);
- BSONObj dataObj = bob.obj();
+ buildBoundsForQueryElementForLT(dataElt, index.collator, &bob);
+ BSONObj dataObj = bob.done().getOwned();
verify(dataObj.isOwned());
- oilOut->intervals.push_back(makeRangeInterval(
- dataObj, IndexBounds::makeBoundInclusionFromBoundBools(typeMatch(dataObj), true)));
+ bool inclusiveBounds = dataElt.type() == BSONType::Array || typeMatch(dataObj);
+ const Interval interval = makeRangeInterval(
+ dataObj, IndexBounds::makeBoundInclusionFromBoundBools(inclusiveBounds, true));
+ oilOut->intervals.push_back(interval);
*tightnessOut = getInequalityPredicateTightness(dataElt, index);
} else if (MatchExpression::LT == expr->matchType()) {
const LTMatchExpression* node = static_cast<const LTMatchExpression*>(expr);
BSONElement dataElt = node->getData();
- // Everything is < MaxKey, except for MaxKey.
+ // Everything is < MaxKey, except for MaxKey. However the bounds need to be inclusive to
+ // find the array [MaxKey] which is smaller for a comparison but equal in a multikey index.
if (MaxKey == dataElt.type()) {
oilOut->intervals.push_back(allValuesRespectingInclusion(
- IndexBounds::makeBoundInclusionFromBoundBools(true, false)));
- *tightnessOut =
- index.collator ? IndexBoundsBuilder::INEXACT_FETCH : IndexBoundsBuilder::EXACT;
+ IndexBounds::makeBoundInclusionFromBoundBools(true, index.multikey)));
+ *tightnessOut = index.collator || index.multikey ? IndexBoundsBuilder::INEXACT_FETCH
+ : IndexBoundsBuilder::EXACT;
return;
}
@@ -502,17 +574,14 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
}
BSONObjBuilder bob;
- // Use -infinity for one-sided numerical bounds
- if (dataElt.isNumber()) {
- bob.appendNumber("", -std::numeric_limits<double>::infinity());
- } else {
- bob.appendMinForType("", dataElt.type());
- }
- CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob);
- BSONObj dataObj = bob.obj();
+ buildBoundsForQueryElementForLT(dataElt, index.collator, &bob);
+ BSONObj dataObj = bob.done().getOwned();
verify(dataObj.isOwned());
- Interval interval = makeRangeInterval(
- dataObj, IndexBounds::makeBoundInclusionFromBoundBools(typeMatch(dataObj), false));
+ bool inclusiveBounds = dataElt.type() == BSONType::Array;
+ Interval interval =
+ makeRangeInterval(dataObj,
+ IndexBounds::makeBoundInclusionFromBoundBools(
+ typeMatch(dataObj) || inclusiveBounds, inclusiveBounds));
// If the operand to LT is equal to the lower bound X, the interval [X, X) is invalid
// and should not be added to the bounds.
@@ -525,12 +594,13 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
const GTMatchExpression* node = static_cast<const GTMatchExpression*>(expr);
BSONElement dataElt = node->getData();
- // Everything is > MinKey, except MinKey.
+ // Everything is > MinKey, except MinKey. However the bounds need to be inclusive to find
+ // the array [MinKey], which is larger for a comparison but equal in a multikey index.
if (MinKey == dataElt.type()) {
oilOut->intervals.push_back(allValuesRespectingInclusion(
- IndexBounds::makeBoundInclusionFromBoundBools(false, true)));
- *tightnessOut =
- index.collator ? IndexBoundsBuilder::INEXACT_FETCH : IndexBoundsBuilder::EXACT;
+ IndexBounds::makeBoundInclusionFromBoundBools(index.multikey, true)));
+ *tightnessOut = index.collator || index.multikey ? IndexBoundsBuilder::INEXACT_FETCH
+ : IndexBoundsBuilder::EXACT;
return;
}
@@ -541,16 +611,14 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
}
BSONObjBuilder bob;
- CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob);
- if (dataElt.isNumber()) {
- bob.appendNumber("", std::numeric_limits<double>::infinity());
- } else {
- bob.appendMaxForType("", dataElt.type());
- }
- BSONObj dataObj = bob.obj();
+ buildBoundsForQueryElementForGT(dataElt, index.collator, &bob);
+ BSONObj dataObj = bob.done().getOwned();
verify(dataObj.isOwned());
- Interval interval = makeRangeInterval(
- dataObj, IndexBounds::makeBoundInclusionFromBoundBools(false, typeMatch(dataObj)));
+ bool inclusiveBounds = dataElt.type() == BSONType::Array;
+ Interval interval =
+ makeRangeInterval(dataObj,
+ IndexBounds::makeBoundInclusionFromBoundBools(
+ inclusiveBounds, inclusiveBounds || typeMatch(dataObj)));
// If the operand to GT is equal to the upper bound X, the interval (X, X] is invalid
// and should not be added to the bounds.
@@ -580,17 +648,13 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
}
BSONObjBuilder bob;
- CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob);
- if (dataElt.isNumber()) {
- bob.appendNumber("", std::numeric_limits<double>::infinity());
- } else {
- bob.appendMaxForType("", dataElt.type());
- }
- BSONObj dataObj = bob.obj();
+ buildBoundsForQueryElementForGT(dataElt, index.collator, &bob);
+ BSONObj dataObj = bob.done().getOwned();
verify(dataObj.isOwned());
-
- oilOut->intervals.push_back(makeRangeInterval(
- dataObj, IndexBounds::makeBoundInclusionFromBoundBools(true, typeMatch(dataObj))));
+ bool inclusiveBounds = dataElt.type() == BSONType::Array || typeMatch(dataObj);
+ const Interval interval = makeRangeInterval(
+ dataObj, IndexBounds::makeBoundInclusionFromBoundBools(true, inclusiveBounds));
+ oilOut->intervals.push_back(interval);
*tightnessOut = getInequalityPredicateTightness(dataElt, index);
} else if (MatchExpression::REGEX == expr->matchType()) {