diff options
author | Ted Tuckman <ted.tuckman@mongodb.com> | 2020-01-24 15:53:43 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-07-15 16:07:47 +0000 |
commit | 1f89ab198058cd5ed012acd2b081f0e8c6949927 (patch) | |
tree | 6616c7810c4a822e73c4bc6721543cccf7d35212 | |
parent | fc829bb485654a6325f358b2642aab34a9bc2543 (diff) | |
download | mongo-1f89ab198058cd5ed012acd2b081f0e8c6949927.tar.gz |
SERVER-45233 Indexed array inequality fixed to use correct bounds
create mode 100644 jstests/core/array_comparison_correctness.js
create mode 100644 jstests/core/array_index_and_nonIndex_consistent.js
(cherry picked from commit 52e7950cae37ef3783b05840c04abbbe383fa1ff)
-rw-r--r-- | jstests/core/array_comparison_correctness.js | 118 | ||||
-rw-r--r-- | jstests/core/array_index_and_nonIndex_consistent.js | 74 | ||||
-rw-r--r-- | jstests/core/array_match4.js | 5 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 153 |
4 files changed, 301 insertions, 49 deletions
diff --git a/jstests/core/array_comparison_correctness.js b/jstests/core/array_comparison_correctness.js new file mode 100644 index 00000000000..5001d30a9a7 --- /dev/null +++ b/jstests/core/array_comparison_correctness.js @@ -0,0 +1,118 @@ +/* + * Demonstrate the expected behavior of $lt and $gt comparisons involving arrays. This is only + * tested without an index, results between index and non-index behavior are compared in + * array_index_and_nonIndex_consistent.js + */ + +(function() { +"use strict"; +load("jstests/aggregation/extras/utils.js"); // arrayEq +const collName = jsTestName(); +const coll = db.getCollection(collName); +coll.drop(); +const docsInColl = [ + {_id: 0, val: [1, 2]}, + {_id: 1, val: [3, 4]}, + {_id: 2, val: [3, 1]}, + {_id: 3, val: {"test": 5}}, + {_id: 4, val: [{"test": 7}]}, + {_id: 5, val: [true, false]}, + {_id: 6, val: 2}, + {_id: 7, val: 3}, + {_id: 8, val: 4}, + {_id: 9, val: [2]}, + {_id: 10, val: [3]}, + {_id: 11, val: [4]}, + {_id: 12, val: [1, true]}, + {_id: 13, val: [true, 1]}, + {_id: 14, val: [1, 4]}, + {_id: 15, val: [null]}, + {_id: 16, val: MinKey}, + {_id: 17, val: [MinKey]}, + {_id: 18, val: [MinKey, 3]}, + {_id: 19, val: [3, MinKey]}, + {_id: 20, val: MaxKey}, + {_id: 21, val: [MaxKey]}, + {_id: 22, val: [MaxKey, 3]}, + {_id: 23, val: [3, MaxKey]}, + {_id: 24, val: true}, + {_id: 25, val: false}, +]; +assert.commandWorked(coll.insert(docsInColl)); +function generateFailedEqString(expected, found) { + return "Expected: " + tojson(expected) + "\n Found: " + tojson(found); +} +function generateExpectedResults(indexes) { + resultSet = []; + indexes.forEach(function(index) { + resultSet.push(docsInColl[index]); + }); + return resultSet; +} +// Querying for LT/GT an integer returns all arrays that have a single integer element that +// matches the query. +let resultSet = coll.find({val: {$lt: 2}}).toArray(); +let expected = generateExpectedResults([0, 2, 12, 14]); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +resultSet = coll.find({val: {$gt: 2}}).toArray(); +expected = generateExpectedResults([1, 2, 8, 14]); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +// Test that querying for GT MinKey and LT MaxKey returns all results except for those values. +resultSet = coll.find({val: {$gt: MinKey}}).toArray(); +let expectedInts = [...Array(25).keys()]; +expectedInts.splice(16, 1); +expected = generateExpectedResults(expectedInts); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +resultSet = coll.find({val: {$lt: MaxKey}}).toArray(); +expectedInts = [...Array(25).keys()]; +expectedInts.splice(20, 1); +expected = generateExpectedResults(expectedInts); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +// Test that querying for GT [MinKey] or LT [MaxKey] returns all other arrays, but nothing else. +resultSet = coll.find({val: {$gt: [MinKey]}}).toArray(); +expected = generateExpectedResults([0, 1, 2, 4, 5, 9, 10, 11, 12, 13, 14, 15, 18, 19, 21, 22, 23]); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +resultSet = coll.find({val: {$lt: [MaxKey]}}).toArray(); +expected = generateExpectedResults([0, 1, 2, 4, 5, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 22, 23]); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +// Test that querying for LT/GT true returns no other types. +resultSet = coll.find({val: {$gt: true}}).toArray(); +expected = []; +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +resultSet = coll.find({val: {$lt: true}}).toArray(); +expected = generateExpectedResults([25]); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +// Test that querying for LT/GT arrays maintains lexicographic (version number) order and only +// returns arrays. +resultSet = coll.find({val: {$lt: [-1]}}).toArray(); +expected = generateExpectedResults([15, 17, 18]); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +resultSet = coll.find({val: {$lt: [3]}}).toArray(); +expected = generateExpectedResults([0, 9, 12, 14, 15, 17, 18]); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +resultSet = coll.find({val: {$lt: [3, 2]}}).toArray(); +expected = generateExpectedResults([0, 2, 9, 12, 14, 15, 17, 18]); +assert(arrayEq(resultSet, expected), (resultSet, expected)); + +resultSet = coll.find({val: {$gt: [2]}}).toArray(); +expected = generateExpectedResults([1, 2, 4, 5, 10, 11, 13, 19, 21, 22]); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +resultSet = coll.find({val: {$gt: [15]}}).toArray(); +expected = generateExpectedResults([5, 13, 21, 22]); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); + +resultSet = coll.find({val: {$gt: [3, 3]}}).toArray(); +expected = generateExpectedResults([1, 4, 5, 11, 13, 19, 21, 22]); +assert(arrayEq(resultSet, expected), generateFailedEqString(resultSet, expected)); +})(); diff --git a/jstests/core/array_index_and_nonIndex_consistent.js b/jstests/core/array_index_and_nonIndex_consistent.js new file mode 100644 index 00000000000..5cba0e66a6a --- /dev/null +++ b/jstests/core/array_index_and_nonIndex_consistent.js @@ -0,0 +1,74 @@ +/* + * Make sure that $gt and $lt queries return the same results regardless of whether there is a + * multikey index. + */ + +(function() { +"use strict"; +load("jstests/aggregation/extras/utils.js"); // arrayEq +const indexColl = db.indexColl; +const nonIndexedColl = db.nonIndexedColl; +indexColl.drop(); +nonIndexedColl.drop(); + +db.indexColl.createIndex({val: 1}); +const collList = [indexColl, nonIndexedColl]; + +collList.forEach(function(collObj) { + assert.commandWorked(collObj.insert([ + {val: [1, 2]}, + {val: [3, 4]}, + {val: [3, 1]}, + {val: {"test": 5}}, + {val: [{"test": 7}]}, + {val: [true, false]}, + {val: 2}, + {val: 3}, + {val: 4}, + {val: [2]}, + {val: [3]}, + {val: [4]}, + {val: [1, true]}, + {val: [true, 1]}, + {val: [1, 4]}, + {val: [null]}, + {val: MinKey}, + {val: [MinKey]}, + {val: [MinKey, 3]}, + {val: [3, MinKey]}, + {val: MaxKey}, + {val: [MaxKey]}, + {val: [MaxKey, 3]}, + {val: [3, MaxKey]}, + ])); +}); + +const queryList = [ + [2, 2], [0, 3], [3, 0], [1, 3], [3, 1], [1, 5], [5, 1], [1], + [3], [5], {"test": 2}, {"test": 6}, [true, true], [true], true, 1, + 3, 5, null, [null], [], [MinKey], [MinKey, 2], [MinKey, 4], + MinKey, [MaxKey], [MaxKey, 2], [MaxKey, 4], MaxKey, +]; + +let failedLT = []; +let failedGT = []; + +queryList.forEach(function(q) { + const queryLT = {val: {"$lt": q}}; + const queryGT = {val: {"$gt": q}}; + const projOutId = {_id: 0, val: 1}; + + let indexRes = indexColl.find(queryLT, projOutId).sort({val: 1}).toArray(); + let nonIndexedRes = nonIndexedColl.find(queryLT, projOutId).sort({val: 1}).toArray(); + + assert(arrayEq(indexRes, nonIndexedRes), + "Ran query " + tojson(queryLT) + " and got mismatched results.\n Indexed: " + + tojson(indexRes) + "\n NonIndexed: " + tojson(nonIndexedRes)); + + indexRes = indexColl.find(queryGT, projOutId).sort({val: 1}).toArray(); + nonIndexedRes = nonIndexedColl.find(queryGT, projOutId).sort({val: 1}).toArray(); + assert(arrayEq(indexRes, nonIndexedRes), + "Ran query " + tojson(queryGT) + " and got mismatched results.\n Indexed: " + + tojson(indexRes) + "\n NonIndexed: " + tojson(nonIndexedRes)); +}); +})(); diff --git a/jstests/core/array_match4.js b/jstests/core/array_match4.js index b4cdec5143a..dafbca8f4f9 100644 --- a/jstests/core/array_match4.js +++ b/jstests/core/array_match4.js @@ -1,3 +1,4 @@ +// @tags: [requires_fcv_44] var t = db.array_match4; t.drop(); @@ -25,6 +26,4 @@ print('explain for ' + tojson(query_gte, '', true) + ' = ' + tojson(explain)); // number of documents returned by indexes query should be consistent // with non-indexed case. -// XXX: The following assertion documents current behavior. -// XXX: 2.4 and 2.6 both return 0 documents. -assert.eq(0, t.find(query_gte).itcount(), '$gte (with index)'); +assert.eq(1, t.find(query_gte).itcount(), '$gte (with index)'); diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 9204e4c0138..597ee8ac1c6 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -379,6 +379,81 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, } } +namespace { +// Contains all the logic for determining bounds of a LT or LTE query. +void buildBoundsForQueryElementForLT(BSONElement dataElt, + const mongo::CollatorInterface* collator, + BSONObjBuilder* bob) { + // Use -infinity for one-sided numerical bounds + if (dataElt.isNumber()) { + bob->appendNumber("", -std::numeric_limits<double>::infinity()); + } else if (dataElt.type() == BSONType::Array) { + // For comparison to an array, we do lexicographic comparisons. In a multikey index, the + // index entries are the array elements themselves. We must therefore look at all types, and + // all values between MinKey and the first element in the array. + bob->appendMinKey(""); + } else { + bob->appendMinForType("", dataElt.type()); + } + if (dataElt.type() != BSONType::Array) { + CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob); + return; + } + + auto eltArr = dataElt.Array(); + if (eltArr.empty()) { + // The empty array is the lowest array. + bob->appendMinForType("", dataElt.type()); + } else { + // If the type of the element is greater than the type of the array, the bounds have to + // include that element. Otherwise the array type, and therefore `dataElt` is + // sufficiently large to include all relevant keys. + if (canonicalizeBSONType(eltArr[0].type()) > canonicalizeBSONType(BSONType::Array)) { + CollationIndexKey::collationAwareIndexKeyAppend(eltArr[0], collator, bob); + } else { + CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob); + } + } +} + +void buildBoundsForQueryElementForGT(BSONElement dataElt, + const mongo::CollatorInterface* collator, + BSONObjBuilder* bob) { + if (dataElt.type() == BSONType::Array) { + auto eltArr = dataElt.Array(); + if (eltArr.empty()) { + // If the array is empty, we need bounds that will match all arrays. Unfortunately, + // this means that we have to check the entire index, as any array could have a key + // anywhere in the multikey index. + bob->appendMinKey(""); + } else { + // If the type of the element is smaller than the type of the array, the bounds need + // to extend to that element. Otherwise the array type, and therefore `dataElt` is + // sufficiently large include all relevant keys. + if (canonicalizeBSONType(eltArr[0].type()) < canonicalizeBSONType(BSONType::Array)) { + CollationIndexKey::collationAwareIndexKeyAppend(eltArr[0], collator, bob); + } else { + CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob); + } + } + } else { + CollationIndexKey::collationAwareIndexKeyAppend(dataElt, collator, bob); + } + + if (dataElt.isNumber()) { + bob->appendNumber("", std::numeric_limits<double>::infinity()); + // For comparison to an array, we do lexicographic comparisons. In a multikey index, the + // index entries are the array elements themselves. We must therefore look at all types, and + // all values between the first element in the array and MaxKey. + } else if (dataElt.type() == BSONType::Array) { + bob->appendMaxKey(""); + } else { + bob->appendMaxForType("", dataElt.type()); + } +} + +} // namespace + void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, const BSONElement& elt, const IndexEntry& index, @@ -535,18 +610,13 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, } BSONObjBuilder bob; - // Use -infinity for one-sided numerical bounds - if (dataElt.isNumber()) { - bob.appendNumber("", -std::numeric_limits<double>::infinity()); - } else { - bob.appendMinForType("", dataElt.type()); - } - CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob); - BSONObj dataObj = bob.obj(); + buildBoundsForQueryElementForLT(dataElt, index.collator, &bob); + BSONObj dataObj = bob.done().getOwned(); verify(dataObj.isOwned()); + bool inclusiveBounds = dataElt.type() == BSONType::Array || typeMatch(dataObj); const Interval interval = makeRangeInterval( - dataObj, IndexBounds::makeBoundInclusionFromBoundBools(typeMatch(dataObj), true)); + dataObj, IndexBounds::makeBoundInclusionFromBoundBools(inclusiveBounds, true)); oilOut->intervals.push_back(interval); *tightnessOut = getInequalityPredicateTightness(interval, dataElt, index); @@ -554,12 +624,13 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, const LTMatchExpression* node = static_cast<const LTMatchExpression*>(expr); BSONElement dataElt = node->getData(); - // Everything is < MaxKey, except for MaxKey. + // Everything is < MaxKey, except for MaxKey. However the bounds need to be inclusive to + // find the array [MaxKey] which is smaller for a comparison but equal in a multikey index. if (MaxKey == dataElt.type()) { oilOut->intervals.push_back(allValuesRespectingInclusion( - IndexBounds::makeBoundInclusionFromBoundBools(true, false))); - *tightnessOut = - index.collator ? IndexBoundsBuilder::INEXACT_FETCH : IndexBoundsBuilder::EXACT; + IndexBounds::makeBoundInclusionFromBoundBools(true, index.multikey))); + *tightnessOut = index.collator || index.multikey ? IndexBoundsBuilder::INEXACT_FETCH + : IndexBoundsBuilder::EXACT; return; } @@ -570,17 +641,14 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, } BSONObjBuilder bob; - // Use -infinity for one-sided numerical bounds - if (dataElt.isNumber()) { - bob.appendNumber("", -std::numeric_limits<double>::infinity()); - } else { - bob.appendMinForType("", dataElt.type()); - } - CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob); - BSONObj dataObj = bob.obj(); + buildBoundsForQueryElementForLT(dataElt, index.collator, &bob); + BSONObj dataObj = bob.done().getOwned(); verify(dataObj.isOwned()); - Interval interval = makeRangeInterval( - dataObj, IndexBounds::makeBoundInclusionFromBoundBools(typeMatch(dataObj), false)); + bool inclusiveBounds = dataElt.type() == BSONType::Array; + Interval interval = + makeRangeInterval(dataObj, + IndexBounds::makeBoundInclusionFromBoundBools( + typeMatch(dataObj) || inclusiveBounds, inclusiveBounds)); // If the operand to LT is equal to the lower bound X, the interval [X, X) is invalid // and should not be added to the bounds. @@ -593,12 +661,13 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, const GTMatchExpression* node = static_cast<const GTMatchExpression*>(expr); BSONElement dataElt = node->getData(); - // Everything is > MinKey, except MinKey. + // Everything is > MinKey, except MinKey. However the bounds need to be inclusive to find + // the array [MinKey], which is larger for a comparison but equal in a multikey index. if (MinKey == dataElt.type()) { oilOut->intervals.push_back(allValuesRespectingInclusion( - IndexBounds::makeBoundInclusionFromBoundBools(false, true))); - *tightnessOut = - index.collator ? IndexBoundsBuilder::INEXACT_FETCH : IndexBoundsBuilder::EXACT; + IndexBounds::makeBoundInclusionFromBoundBools(index.multikey, true))); + *tightnessOut = index.collator || index.multikey ? IndexBoundsBuilder::INEXACT_FETCH + : IndexBoundsBuilder::EXACT; return; } @@ -609,16 +678,14 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, } BSONObjBuilder bob; - CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob); - if (dataElt.isNumber()) { - bob.appendNumber("", std::numeric_limits<double>::infinity()); - } else { - bob.appendMaxForType("", dataElt.type()); - } - BSONObj dataObj = bob.obj(); + buildBoundsForQueryElementForGT(dataElt, index.collator, &bob); + BSONObj dataObj = bob.done().getOwned(); verify(dataObj.isOwned()); - Interval interval = makeRangeInterval( - dataObj, IndexBounds::makeBoundInclusionFromBoundBools(false, typeMatch(dataObj))); + bool inclusiveBounds = dataElt.type() == BSONType::Array; + Interval interval = + makeRangeInterval(dataObj, + IndexBounds::makeBoundInclusionFromBoundBools( + inclusiveBounds, inclusiveBounds || typeMatch(dataObj))); // If the operand to GT is equal to the upper bound X, the interval (X, X] is invalid // and should not be added to the bounds. @@ -652,19 +719,13 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, makeNullEqualityBounds(index, isHashed, oilOut, tightnessOut); return; } - BSONObjBuilder bob; - CollationIndexKey::collationAwareIndexKeyAppend(dataElt, index.collator, &bob); - if (dataElt.isNumber()) { - bob.appendNumber("", std::numeric_limits<double>::infinity()); - } else { - bob.appendMaxForType("", dataElt.type()); - } - BSONObj dataObj = bob.obj(); + buildBoundsForQueryElementForGT(dataElt, index.collator, &bob); + BSONObj dataObj = bob.done().getOwned(); verify(dataObj.isOwned()); - + bool inclusiveBounds = dataElt.type() == BSONType::Array || typeMatch(dataObj); const Interval interval = makeRangeInterval( - dataObj, IndexBounds::makeBoundInclusionFromBoundBools(true, typeMatch(dataObj))); + dataObj, IndexBounds::makeBoundInclusionFromBoundBools(true, inclusiveBounds)); oilOut->intervals.push_back(interval); *tightnessOut = getInequalityPredicateTightness(interval, dataElt, index); |