diff options
author | Ted Tuckman <ted.tuckman@mongodb.com> | 2020-01-24 15:53:43 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-08-14 19:00:24 +0000 |
commit | 65864da5eb9885dae432fa8a1b9b46f31be04b07 (patch) | |
tree | 5eb82174a5845fb7d63250989ca9c1817cf8974f | |
parent | d3470d48166e9897e9d9817340db9f11f70a55e4 (diff) | |
download | mongo-65864da5eb9885dae432fa8a1b9b46f31be04b07.tar.gz |
SERVER-45233 Indexed array inequality fixed to use correct bounds
create mode 100644 jstests/core/array_comparison_correctness.js
create mode 100644 jstests/core/array_index_and_nonIndex_consistent.js
(cherry picked from commit 52e7950cae37ef3783b05840c04abbbe383fa1ff)
(cherry picked from commit e3fba401e6e75b26cc2e5d363ad331dff749cfbf)
-rw-r--r-- | jstests/core/array_comparison_correctness.js | 131 | ||||
-rw-r--r-- | jstests/core/array_index_and_nonIndex_consistent.js | 76 | ||||
-rw-r--r-- | jstests/core/array_match4.js | 4 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 158 |
4 files changed, 319 insertions, 50 deletions
diff --git a/jstests/core/array_comparison_correctness.js b/jstests/core/array_comparison_correctness.js new file mode 100644 index 00000000000..df209ae3f0f --- /dev/null +++ b/jstests/core/array_comparison_correctness.js @@ -0,0 +1,131 @@ +/* + * Demonstrate the expected behavior of $lt and $gt comparisons involving arrays. This is only + * tested without an index, results between index and non-index behavior are compared in + * array_index_and_nonIndex_consistent.js + */ + +(function() { + "use strict"; + load("jstests/aggregation/extras/utils.js"); // arrayEq + const collName = jsTestName(); + const coll = db.getCollection(collName); + coll.drop(); + const docsInColl = [ + {_id: 0, val: [1, 2]}, + {_id: 1, val: [3, 4]}, + {_id: 2, val: [3, 1]}, + {_id: 3, val: {"test": 5}}, + {_id: 4, val: [{"test": 7}]}, + {_id: 5, val: [true, false]}, + {_id: 6, val: 2}, + {_id: 7, val: 3}, + {_id: 8, val: 4}, + {_id: 9, val: [2]}, + {_id: 10, val: [3]}, + {_id: 11, val: [4]}, + {_id: 12, val: [1, true]}, + {_id: 13, val: [true, 1]}, + {_id: 14, val: [1, 4]}, + {_id: 15, val: [null]}, + {_id: 16, val: MinKey}, + {_id: 17, val: [MinKey]}, + {_id: 18, val: [MinKey, 3]}, + {_id: 19, val: [3, MinKey]}, + {_id: 20, val: MaxKey}, + {_id: 21, val: [MaxKey]}, + {_id: 22, val: [MaxKey, 3]}, + {_id: 23, val: [3, MaxKey]}, + {_id: 24, val: true}, + {_id: 25, val: false}, + ]; + assert.commandWorked(coll.insert(docsInColl)); + function generateFailedEqString(expected, found) { + return "Expected: " + tojson(expected) + "\n Found: " + tojson(found); + } + function generateExpectedResults(indexes) { + resultSet = []; + indexes.forEach(function(index) { + resultSet.push(docsInColl[index]); + }); + return resultSet; + } + // Querying for LT/GT an integer returns all arrays that have a single integer element that + // matches the query. + let resultSet = coll.find({val: {$lt: 2}}).toArray(); + let expected = generateExpectedResults([0, 2, 12, 14]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + resultSet = coll.find({val: {$gt: 2}}).toArray(); + expected = generateExpectedResults([1, 2, 8, 14]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + // Test that querying for GT MinKey and LT MaxKey returns all results except for those values. + resultSet = coll.find({val: {$gt: MinKey}}).toArray(); + let expectedInts = [...Array(25).keys()]; + expectedInts.splice(16, 1); + expected = generateExpectedResults(expectedInts); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + resultSet = coll.find({val: {$lt: MaxKey}}).toArray(); + expectedInts = [...Array(25).keys()]; + expectedInts.splice(20, 1); + expected = generateExpectedResults(expectedInts); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + // Test that querying for GT [MinKey] or LT [MaxKey] returns all other arrays, but nothing else. + resultSet = coll.find({val: {$gt: [MinKey]}}).toArray(); + expected = + generateExpectedResults([0, 1, 2, 4, 5, 9, 10, 11, 12, 13, 14, 15, 18, 19, 21, 22, 23]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + resultSet = coll.find({val: {$lt: [MaxKey]}}).toArray(); + expected = + generateExpectedResults([0, 1, 2, 4, 5, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 22, 23]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + // Test that querying for LT/GT true returns no other types. + resultSet = coll.find({val: {$gt: true}}).toArray(); + expected = []; + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + resultSet = coll.find({val: {$lt: true}}).toArray(); + expected = generateExpectedResults([25]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + // Test that querying for LT/GT arrays maintains lexicographic (version number) order and only + // returns arrays. + resultSet = coll.find({val: {$lt: [-1]}}).toArray(); + expected = generateExpectedResults([15, 17, 18]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + resultSet = coll.find({val: {$lt: [3]}}).toArray(); + expected = generateExpectedResults([0, 9, 12, 14, 15, 17, 18]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + resultSet = coll.find({val: {$lt: [3, 2]}}).toArray(); + expected = generateExpectedResults([0, 2, 9, 12, 14, 15, 17, 18]); + assert(arrayEq(resultSet, expected), (resultSet, expected)); + + resultSet = coll.find({val: {$gt: [2]}}).toArray(); + expected = generateExpectedResults([1, 2, 4, 5, 10, 11, 13, 19, 21, 22]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + resultSet = coll.find({val: {$gt: [15]}}).toArray(); + expected = generateExpectedResults([5, 13, 21, 22]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + resultSet = coll.find({val: {$gt: [3, 3]}}).toArray(); + expected = generateExpectedResults([1, 4, 5, 11, 13, 19, 21, 22]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + // $gt the empty array should return all arrays. + resultSet = coll.find({val: {$gt: []}}).toArray(); + expected = + generateExpectedResults([0, 1, 2, 4, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 21, 22, 23]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + + // $lt the empty array should return no arrays. + resultSet = coll.find({val: {$lt: []}}).toArray(); + expected = generateExpectedResults([3, 6, 7, 8, 16]); + assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); +})(); diff --git a/jstests/core/array_index_and_nonIndex_consistent.js b/jstests/core/array_index_and_nonIndex_consistent.js new file mode 100644 index 00000000000..05ddf4ccf0c --- /dev/null +++ b/jstests/core/array_index_and_nonIndex_consistent.js @@ -0,0 +1,76 @@ +/* + * Make sure that $gt and $lt queries return the same results regardless of whether there is a + * multikey index. + */ + +(function() { + "use strict"; + load("jstests/aggregation/extras/utils.js"); // arrayEq + const indexColl = db.indexColl; + const nonIndexedColl = db.nonIndexedColl; + indexColl.drop(); + nonIndexedColl.drop(); + + db.indexColl.createIndex({val: 1}); + const collList = [indexColl, nonIndexedColl]; + + collList.forEach(function(collObj) { + assert.commandWorked(collObj.insert([ + {val: [1, 2]}, + {val: [3, 4]}, + {val: [3, 1]}, + {val: {"test": 5}}, + {val: [{"test": 7}]}, + {val: [true, false]}, + {val: 2}, + {val: 3}, + {val: 4}, + {val: [2]}, + {val: [3]}, + {val: [4]}, + {val: [1, true]}, + {val: [true, 1]}, + {val: [1, 4]}, + {val: [null]}, + {val: MinKey}, + {val: [MinKey]}, + {val: [MinKey, 3]}, + {val: [3, MinKey]}, + {val: MaxKey}, + {val: [MaxKey]}, + {val: [MaxKey, 3]}, + {val: [3, MaxKey]}, + {val: []}, + ])); + }); + + const queryList = [ + [2, 2], [0, 3], [3, 0], [1, 3], [3, 1], [1, 5], + [5, 1], [1], [3], [5], {"test": 2}, {"test": 6}, + [true, true], [true], true, 1, 3, 5, + null, [null], [], [MinKey], [MinKey, 2], [MinKey, 4], + MinKey, [MaxKey], [MaxKey, 2], [MaxKey, 4], MaxKey, [], + ]; + + let failedLT = []; + let failedGT = []; + + queryList.forEach(function(q) { + const queryLT = {val: {"$lt": q}}; + const queryGT = {val: {"$gt": q}}; + const projOutId = {_id: 0, val: 1}; + + let indexRes = indexColl.find(queryLT, projOutId).sort({val: 1}).toArray(); + let nonIndexedRes = nonIndexedColl.find(queryLT, projOutId).sort({val: 1}).toArray(); + + assert(arrayEq(indexRes, nonIndexedRes), + "Ran query " + tojson(queryLT) + " and got mismatched results.\n Indexed: " + + tojson(indexRes) + "\n NonIndexed: " + tojson(nonIndexedRes)); + + indexRes = indexColl.find(queryGT, projOutId).sort({val: 1}).toArray(); + nonIndexedRes = nonIndexedColl.find(queryGT, projOutId).sort({val: 1}).toArray(); + assert(arrayEq(indexRes, nonIndexedRes), + "Ran query " + tojson(queryGT) + " and got mismatched results.\n Indexed: " + + tojson(indexRes) + "\n NonIndexed: " + tojson(nonIndexedRes)); + }); +})(); diff --git a/jstests/core/array_match4.js b/jstests/core/array_match4.js index b4cdec5143a..821e9a4f4eb 100644 --- a/jstests/core/array_match4.js +++ b/jstests/core/array_match4.js @@ -25,6 +25,4 @@ print('explain for ' + tojson(query_gte, '', true) + ' = ' + tojson(explain)); // number of documents returned by indexes query should be consistent // with non-indexed case. -// XXX: The following assertion documents current behavior. -// XXX: 2.4 and 2.6 both return 0 documents. -assert.eq(0, t.find(query_gte).itcount(), '$gte (with index)'); +assert.eq(1, t.find(query_gte).itcount(), '$gte (with index)'); diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 48ee291b288..0adf7533a1f 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -327,6 +327,81 @@ bool IndexBoundsBuilder::canUseCoveredMatching(const MatchExpression* expr, return tightness >= IndexBoundsBuilder::INEXACT_COVERED; } +namespace { +// Contains all the logic for determining bounds of a LT or LTE query. +void buildBoundsForQueryElementForLT(BSONElement dataElt, + const mongo::CollatorInterface* collator, + BSONObjBuilder* bob) { + // Use -infinity for one-sided numerical bounds + if (dataElt.isNumber()) { + bob->appendNumber("", -std::numeric_limits<double>::infinity()); + } else if (dataElt.type() == BSONType::Array) { + // For comparison to an array, we do lexicographic comparisons. In a multikey index, the + // index entries are the array elements themselves. We must therefore look at all types, and + // all values between MinKey and the first element in the array. + bob->appendMinKey(""); + } else { + bob->appendMinForType("", dataElt.type()); + } + if (dataElt.type() != BSONType::Array) { + CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob); + return; + } + + auto eltArr = dataElt.Array(); + if (eltArr.empty()) { + // The empty array is the lowest array. + bob->appendMinForType("", dataElt.type()); + } else { + // If the type of the element is greater than the type of the array, the bounds have to + // include that element. Otherwise the array type, and therefore `dataElt` is + // sufficiently large to include all relevant keys. + if (canonicalizeBSONType(eltArr[0].type()) > canonicalizeBSONType(BSONType::Array)) { + CollationIndexKey::collationAwareIndexKeyAppend(eltArr[0], collator, bob); + } else { + CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob); + } + } +} + +void buildBoundsForQueryElementForGT(BSONElement dataElt, + const mongo::CollatorInterface* collator, + BSONObjBuilder* bob) { + if (dataElt.type() == BSONType::Array) { + auto eltArr = dataElt.Array(); + if (eltArr.empty()) { + // If the array is empty, we need bounds that will match all arrays. Unfortunately, + // this means that we have to check the entire index, as any array could have a key + // anywhere in the multikey index. + bob->appendMinKey(""); + } else { + // If the type of the element is smaller than the type of the array, the bounds need + // to extend to that element. Otherwise the array type, and therefore `dataElt` is + // sufficiently large include all relevant keys. + if (canonicalizeBSONType(eltArr[0].type()) < canonicalizeBSONType(BSONType::Array)) { + CollationIndexKey::collationAwareIndexKeyAppend(eltArr[0], collator, bob); + } else { + CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob); + } + } + } else { + CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob); + } + + if (dataElt.isNumber()) { + bob->appendNumber("", std::numeric_limits<double>::infinity()); + // For comparison to an array, we do lexicographic comparisons. In a multikey index, the + // index entries are the array elements themselves. We must therefore look at all types, and + // all values between the first element in the array and MaxKey. + } else if (dataElt.type() == BSONType::Array) { + bob->appendMaxKey(""); + } else { + bob->appendMaxForType("", dataElt.type()); + } +} + +} // namespace + // static void IndexBoundsBuilder::translate(const MatchExpression* expr, const BSONElement& elt, @@ -469,29 +544,26 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, } BSONObjBuilder bob; - // Use -infinity for one-sided numerical bounds - if (dataElt.isNumber()) { - bob.appendNumber("", -std::numeric_limits<double>::infinity()); - } else { - bob.appendMinForType("", dataElt.type()); - } - CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob); - BSONObj dataObj = bob.obj(); + buildBoundsForQueryElementForLT(dataElt, index.collator, &bob); + BSONObj dataObj = bob.done().getOwned(); verify(dataObj.isOwned()); - oilOut->intervals.push_back(makeRangeInterval( - dataObj, IndexBounds::makeBoundInclusionFromBoundBools(typeMatch(dataObj), true))); + bool inclusiveBounds = dataElt.type() == BSONType::Array || typeMatch(dataObj); + const Interval interval = makeRangeInterval( + dataObj, IndexBounds::makeBoundInclusionFromBoundBools(inclusiveBounds, true)); + oilOut->intervals.push_back(interval); *tightnessOut = getInequalityPredicateTightness(dataElt, index); } else if (MatchExpression::LT == expr->matchType()) { const LTMatchExpression* node = static_cast<const LTMatchExpression*>(expr); BSONElement dataElt = node->getData(); - // Everything is < MaxKey, except for MaxKey. + // Everything is < MaxKey, except for MaxKey. However the bounds need to be inclusive to + // find the array [MaxKey] which is smaller for a comparison but equal in a multikey index. if (MaxKey == dataElt.type()) { oilOut->intervals.push_back(allValuesRespectingInclusion( - IndexBounds::makeBoundInclusionFromBoundBools(true, false))); - *tightnessOut = - index.collator ? IndexBoundsBuilder::INEXACT_FETCH : IndexBoundsBuilder::EXACT; + IndexBounds::makeBoundInclusionFromBoundBools(true, index.multikey))); + *tightnessOut = index.collator || index.multikey ? IndexBoundsBuilder::INEXACT_FETCH + : IndexBoundsBuilder::EXACT; return; } @@ -502,17 +574,14 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, } BSONObjBuilder bob; - // Use -infinity for one-sided numerical bounds - if (dataElt.isNumber()) { - bob.appendNumber("", -std::numeric_limits<double>::infinity()); - } else { - bob.appendMinForType("", dataElt.type()); - } - CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob); - BSONObj dataObj = bob.obj(); + buildBoundsForQueryElementForLT(dataElt, index.collator, &bob); + BSONObj dataObj = bob.done().getOwned(); verify(dataObj.isOwned()); - Interval interval = makeRangeInterval( - dataObj, IndexBounds::makeBoundInclusionFromBoundBools(typeMatch(dataObj), false)); + bool inclusiveBounds = dataElt.type() == BSONType::Array; + Interval interval = + makeRangeInterval(dataObj, + IndexBounds::makeBoundInclusionFromBoundBools( + typeMatch(dataObj) || inclusiveBounds, inclusiveBounds)); // If the operand to LT is equal to the lower bound X, the interval [X, X) is invalid // and should not be added to the bounds. @@ -525,12 +594,13 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, const GTMatchExpression* node = static_cast<const GTMatchExpression*>(expr); BSONElement dataElt = node->getData(); - // Everything is > MinKey, except MinKey. + // Everything is > MinKey, except MinKey. However the bounds need to be inclusive to find + // the array [MinKey], which is larger for a comparison but equal in a multikey index. if (MinKey == dataElt.type()) { oilOut->intervals.push_back(allValuesRespectingInclusion( - IndexBounds::makeBoundInclusionFromBoundBools(false, true))); - *tightnessOut = - index.collator ? IndexBoundsBuilder::INEXACT_FETCH : IndexBoundsBuilder::EXACT; + IndexBounds::makeBoundInclusionFromBoundBools(index.multikey, true))); + *tightnessOut = index.collator || index.multikey ? IndexBoundsBuilder::INEXACT_FETCH + : IndexBoundsBuilder::EXACT; return; } @@ -541,16 +611,14 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, } BSONObjBuilder bob; - CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob); - if (dataElt.isNumber()) { - bob.appendNumber("", std::numeric_limits<double>::infinity()); - } else { - bob.appendMaxForType("", dataElt.type()); - } - BSONObj dataObj = bob.obj(); + buildBoundsForQueryElementForGT(dataElt, index.collator, &bob); + BSONObj dataObj = bob.done().getOwned(); verify(dataObj.isOwned()); - Interval interval = makeRangeInterval( - dataObj, IndexBounds::makeBoundInclusionFromBoundBools(false, typeMatch(dataObj))); + bool inclusiveBounds = dataElt.type() == BSONType::Array; + Interval interval = + makeRangeInterval(dataObj, + IndexBounds::makeBoundInclusionFromBoundBools( + inclusiveBounds, inclusiveBounds || typeMatch(dataObj))); // If the operand to GT is equal to the upper bound X, the interval (X, X] is invalid // and should not be added to the bounds. @@ -580,17 +648,13 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, } BSONObjBuilder bob; - CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob); - if (dataElt.isNumber()) { - bob.appendNumber("", std::numeric_limits<double>::infinity()); - } else { - bob.appendMaxForType("", dataElt.type()); - } - BSONObj dataObj = bob.obj(); + buildBoundsForQueryElementForGT(dataElt, index.collator, &bob); + BSONObj dataObj = bob.done().getOwned(); verify(dataObj.isOwned()); - - oilOut->intervals.push_back(makeRangeInterval( - dataObj, IndexBounds::makeBoundInclusionFromBoundBools(true, typeMatch(dataObj)))); + bool inclusiveBounds = dataElt.type() == BSONType::Array || typeMatch(dataObj); + const Interval interval = makeRangeInterval( + dataObj, IndexBounds::makeBoundInclusionFromBoundBools(true, inclusiveBounds)); + oilOut->intervals.push_back(interval); *tightnessOut = getInequalityPredicateTightness(dataElt, index); } else if (MatchExpression::REGEX == expr->matchType()) { |