diff options
author | Ted Tuckman <ted.tuckman@mongodb.com> | 2020-06-09 10:08:29 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-09-03 15:39:37 +0000 |
commit | 1be4f1b11d932a7799a6b08385c650bb61867e50 (patch) | |
tree | 4f0ffd0f6a98027cb8d1927cf8bf1f3637776722 | |
parent | 13a49321277a5b9fcc180594964a801b9136f6f2 (diff) | |
download | mongo-1be4f1b11d932a7799a6b08385c650bb61867e50.tar.gz |
SERVER-47382 Support inequalities on arrays with $not on indexed collection
-rw-r--r-- | jstests/aggregation/extras/utils.js | 37 | ||||
-rw-r--r-- | jstests/core/array_index_and_nonIndex_consistent.js | 79 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression.h | 8 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_leaf.h | 8 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query_encoder.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder_test.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.cpp | 24 |
8 files changed, 142 insertions, 61 deletions
diff --git a/jstests/aggregation/extras/utils.js b/jstests/aggregation/extras/utils.js index 1d5f5dee228..088ffa786e7 100644 --- a/jstests/aggregation/extras/utils.js +++ b/jstests/aggregation/extras/utils.js @@ -174,6 +174,43 @@ function arrayEq(al, ar, verbose = false, valueComparator, fieldsToSkip = []) { return true; } +function arrayDiff(al, ar, verbose = false, valueComparator) { + // Check that these are both arrays. + if (!(al instanceof Array)) { + debug('arrayDiff: al is not an array: ' + tojson(al)); + return false; + } + + if (!(ar instanceof Array)) { + debug('arrayDiff: ar is not an array: ' + tojson(ar)); + return false; + } + + // Keep a set of which indexes we've already used to avoid considering [1,1] as equal to [1,2]. + const matchedIndexesInRight = new Set(); + let unmatchedElementsInLeft = []; + for (let leftElem of al) { + let foundMatch = false; + for (let i = 0; i < ar.length; ++i) { + if (!matchedIndexesInRight.has(i) && anyEq(leftElem, ar[i], verbose, valueComparator)) { + matchedIndexesInRight.add(i); // Don't use the same value each time. + foundMatch = true; + break; + } + } + if (!foundMatch) { + unmatchedElementsInLeft.push(leftElem); + } + } + let unmatchedElementsInRight = []; + for (let i = 0; i < ar.length; ++i) { + if (!matchedIndexesInRight.has(i)) { + unmatchedElementsInRight.push(ar[i]); + } + } + return {left: unmatchedElementsInLeft, right: unmatchedElementsInRight}; +} + /** * Makes a shallow copy of 'a'. */ diff --git a/jstests/core/array_index_and_nonIndex_consistent.js b/jstests/core/array_index_and_nonIndex_consistent.js index 7846032c21a..71657238d91 100644 --- a/jstests/core/array_index_and_nonIndex_consistent.js +++ b/jstests/core/array_index_and_nonIndex_consistent.js @@ -2,6 +2,7 @@ * Make sure that $gt and $lt queries return the same results regardless of whether there is a * multikey index. * @tags: [ + * requires_fcv_47, * sbe_incompatible, * ] */ @@ -9,6 +10,18 @@ (function() { "use strict"; load("jstests/aggregation/extras/utils.js"); // arrayEq + +function buildErrorString(q, indexed, nonIndexed) { + const arrDiff = arrayDiff(indexed, nonIndexed); + if (arrDiff === false) { + return ""; + } + let errStr = "Ran query " + tojson(q) + + " and got mismatched results.\nUnmatched from indexed collection (" + arrDiff.left.length + + "/" + indexed.length + "): " + tojson(arrDiff.left) + "\nUnmatched from nonIndexed (" + + arrDiff.right.length + "/" + nonIndexed.length + "): " + tojson(arrDiff.right); + return errStr; +} const indexColl = db.indexColl; const nonIndexedColl = db.nonIndexedColl; indexColl.drop(); @@ -35,6 +48,8 @@ collList.forEach(function(collObj) { {val: [true, 1]}, {val: [1, 4]}, {val: [null]}, + // TODO SERVER-49766 Enable this test once the bug is found and fixed. + // {val: null}, {val: MinKey}, {val: [MinKey]}, {val: [MinKey, 3]}, @@ -48,31 +63,51 @@ collList.forEach(function(collObj) { }); const queryList = [ - [2, 2], [0, 3], [3, 0], [1, 3], [3, 1], [1, 5], [5, 1], [1], - [3], [5], {"test": 2}, {"test": 6}, [true, true], [true], true, 1, - 3, 5, null, [null], [], [MinKey], [MinKey, 2], [MinKey, 4], - MinKey, [MaxKey], [MaxKey, 2], [MaxKey, 4], MaxKey, [], + [2, 2], [0, 3], [3, 0], [1, 3], [3, 1], [1, 5], [5, 1], [1], + [3], [5], {"test": 2}, {"test": 6}, [true, true], [true], true, 1, + 3, 5, [], [MinKey], [MinKey, 2], [MinKey, 4], MinKey, [MaxKey], + [MaxKey, 2], [MaxKey, 4], MaxKey, [], false, + // TODO: SERVER-49766 Enable these queries. + // null, [null], ]; -let failedLT = []; -let failedGT = []; - queryList.forEach(function(q) { - const queryLT = {val: {"$lt": q}}; - const queryGT = {val: {"$gt": q}}; + const queryPreds = [ + {$lt: q}, + {$lte: q}, + {$gt: q}, + {$gte: q}, + {$eq: q}, + {$not: {$lt: q}}, + {$not: {$lte: q}}, + {$not: {$gt: q}}, + {$not: {$gte: q}}, + {$not: {$eq: q}}, + ]; const projOutId = {_id: 0, val: 1}; - - let indexRes = indexColl.find(queryLT, projOutId).sort({val: 1}).toArray(); - let nonIndexedRes = nonIndexedColl.find(queryLT, projOutId).sort({val: 1}).toArray(); - - assert(arrayEq(indexRes, nonIndexedRes), - "Ran query " + tojson(queryLT) + " and got mismatched results.\n Indexed: " + - tojson(indexRes) + "\n NonIndexed: " + tojson(nonIndexedRes)); - - indexRes = indexColl.find(queryGT, projOutId).sort({val: 1}).toArray(); - nonIndexedRes = nonIndexedColl.find(queryGT, projOutId).sort({val: 1}).toArray(); - assert(arrayEq(indexRes, nonIndexedRes), - "Ran query " + tojson(queryGT) + " and got mismatched results.\n Indexed: " + - tojson(indexRes) + "\n NonIndexed: " + tojson(nonIndexedRes)); + queryPreds.forEach(function(pred) { + const query = {val: pred}; + const indexRes = indexColl.find(query, projOutId).sort({val: 1}).toArray(); + const nonIndexedRes = nonIndexedColl.find(query, projOutId).sort({val: 1}).toArray(); + assert(arrayEq(indexRes, nonIndexedRes), buildErrorString(query, indexRes, nonIndexedRes)); + }); +}); +// Test queries with multiple intervals. +const multiIntQueryList = [ + {val: {$gt: 1, $lt: 3}}, + {val: {$gt: [1], $lt: [3]}}, + {val: {$not: {$gt: 3, $lt: 1}}}, + {val: {$not: {$gt: [3], $lt: [1]}}}, + {val: {$not: {$gt: [3], $lt: 1}}}, + {val: {$not: {$not: {$lt: 3}}}}, + {val: {$not: {$not: {$lt: true}}}}, + {val: {$not: {$not: {$lt: [true]}}}}, + {val: {$not: {$not: {$lt: [3]}}}}, +]; +multiIntQueryList.forEach(function(q) { + const projOutId = {_id: 0, val: 1}; + const indexRes = indexColl.find(q, projOutId).sort({val: 1}).toArray(); + const nonIndexedRes = nonIndexedColl.find(q, projOutId).sort({val: 1}).toArray(); + assert(arrayEq(indexRes, nonIndexedRes), buildErrorString(q, indexRes, nonIndexedRes)); }); })(); diff --git a/src/mongo/db/matcher/expression.h b/src/mongo/db/matcher/expression.h index 6e702768d82..ba2ac275e9d 100644 --- a/src/mongo/db/matcher/expression.h +++ b/src/mongo/db/matcher/expression.h @@ -454,6 +454,14 @@ public: return false; } + virtual bool isGTMinKey() const { + return false; + } + + virtual bool isLTMaxKey() const { + return false; + } + virtual void acceptVisitor(MatchExpressionMutableVisitor* visitor) = 0; virtual void acceptVisitor(MatchExpressionConstVisitor* visitor) const = 0; diff --git a/src/mongo/db/matcher/expression_leaf.h b/src/mongo/db/matcher/expression_leaf.h index 2f4dabdf965..5420c91ca55 100644 --- a/src/mongo/db/matcher/expression_leaf.h +++ b/src/mongo/db/matcher/expression_leaf.h @@ -299,6 +299,10 @@ public: void acceptVisitor(MatchExpressionConstVisitor* visitor) const final { visitor->visit(this); } + + bool isLTMaxKey() const final { + return _rhs.type() == BSONType::MaxKey; + } }; class GTMatchExpression final : public ComparisonMatchExpression { @@ -336,6 +340,10 @@ public: void acceptVisitor(MatchExpressionConstVisitor* visitor) const final { visitor->visit(this); } + + bool isGTMinKey() const final { + return _rhs.type() == BSONType::MinKey; + } }; class GTEMatchExpression final : public ComparisonMatchExpression { diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp index fede266acc1..487a8ad2198 100644 --- a/src/mongo/db/query/canonical_query_encoder.cpp +++ b/src/mongo/db/query/canonical_query_encoder.cpp @@ -414,6 +414,15 @@ void encodeKeyForMatch(const MatchExpression* tree, StringBuilder* keyBuilder) { } } + // If the query predicate is minKey or maxKey it can't use the same plan as other GT/LT + // queries. If the index is multikey and the query involves one of these values, it needs + // to use INEXACT_FETCH and the bounds can't be inverted. Therefore these need their own + // shape. + if (tree->isGTMinKey()) + *keyBuilder << "min"; + else if (tree->isLTMaxKey()) + *keyBuilder << "max"; + // Traverse child nodes. // Enclose children in []. if (tree->numChildren() > 0) { diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index dc826b1ebdb..98386d60579 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -568,12 +568,12 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, *tightnessOut = IndexBoundsBuilder::EXACT; } - // This disables indexed negation of array inequality. - // TODO: SERVER-45233 Perform correct behavior here once indexed array inequality without - // negation's semantics are correctly determined and implemented. - massert(ErrorCodes::InternalError, - "Indexed negation of array inequality not supported.", - *tightnessOut == IndexBoundsBuilder::EXACT); + // Generally speaking inverting bounds can only be done for exact bounds. Any looser bounds + // (like INEXACT_FETCH) would signal that inversion would be mistakenly excluding some + // values. One exception is for collation, whose index bounds are tracked as INEXACT_FETCH, + // but only because the index data is different than the user data, not because the range + // is imprecise. + invariant(*tightnessOut == IndexBoundsBuilder::EXACT || index.collator); // If the index is multikey on this path, it doesn't matter what the tightness of the child // is, we must return INEXACT_FETCH. Consider a multikey index on 'a' with document diff --git a/src/mongo/db/query/index_bounds_builder_test.cpp b/src/mongo/db/query/index_bounds_builder_test.cpp index 7ec1beb9f37..2d3ca2d0ac5 100644 --- a/src/mongo/db/query/index_bounds_builder_test.cpp +++ b/src/mongo/db/query/index_bounds_builder_test.cpp @@ -492,32 +492,6 @@ TEST_F(IndexBoundsBuilderTest, TranslateGtMinKey) { ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT); } -TEST_F(IndexBoundsBuilderTest, DontCrashOnNegationOfArrayInequality) { - BSONObj keyPattern = BSON("a" << 1); - auto testIndex = IndexEntry(keyPattern, - IndexNames::nameToType(IndexNames::findPluginName(keyPattern)), - true, // multikey - {}, - {}, - false, // sparse - false, // unique - IndexEntry::Identifier{"test_foo"}, - nullptr, // filterExpr - BSONObj(), - nullptr, - nullptr); - - BSONObj obj = fromjson("{a: {$not: {$lt: [\"here\", {}, false]}}}"); - auto expr = MatchExpression::optimize(parseMatchExpression(obj)); - BSONElement elt = obj.firstElement(); - OrderedIntervalList oil; - IndexBoundsBuilder::BoundsTightness tightness; - // TODO: SERVER-45233 This should succeed rather than throwing code. - ASSERT_THROWS_CODE(IndexBoundsBuilder::translate(expr.get(), elt, testIndex, &oil, &tightness), - DBException, - ErrorCodes::InternalError); -} - // Nothing can be greater than MaxKey so the resulting index bounds would be a useless empty range. TEST_F(IndexBoundsBuilderTest, TranslateGtMaxKeyDoesNotGenerateBounds) { auto testIndex = buildSimpleIndexEntry(); diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index 642074fea09..47e1df683f5 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -55,11 +55,13 @@ namespace { namespace wcp = ::mongo::wildcard_planning; -// Can't index negations of {$eq: <Array>} or {$in: [<Array>, ...]}. Note that we could -// use the index in principle, though we would need to generate special bounds. -bool isEqualsArrayOrNotInArray(const MatchExpression* me) { +// Can't index negations of {$eq: <Array>}, {$lt: <Array>}, {$gt: <Array>}, {$lte: <Array>}, {$gte: +// <Array>}, or {$in: [<Array>, ...]}. Note that we could use the index in principle, though we +// would need to generate special bounds. +bool isComparisonWithArrayPred(const MatchExpression* me) { const auto type = me->matchType(); - if (type == MatchExpression::EQ) { + if (type == MatchExpression::EQ || type == MatchExpression::LT || type == MatchExpression::GT || + type == MatchExpression::LTE || type == MatchExpression::GTE) { return static_cast<const ComparisonMatchExpression*>(me)->getData().type() == BSONType::Array; } else if (type == MatchExpression::MATCH_IN) { @@ -68,7 +70,6 @@ bool isEqualsArrayOrNotInArray(const MatchExpression* me) { return elt.type() == BSONType::Array; }); } - return false; } @@ -444,7 +445,16 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, return false; } - if (isEqualsArrayOrNotInArray(child)) { + // Comparisons with arrays have strange enough semantics that inverting the bounds + // within a $not has many complex special cases. We avoid indexing these queries, even + // though it is sometimes possible to build useful bounds. + if (isComparisonWithArrayPred(child)) { + return false; + } + + // $gt and $lt to MinKey/MaxKey must build inexact bounds if the index is multikey and + // therefore cannot be inverted safely in a $not. + if (index.multikey && (child->isGTMinKey() || child->isLTMaxKey())) { return false; } @@ -623,7 +633,7 @@ bool QueryPlannerIXSelect::nodeIsSupportedBySparseIndex(const MatchExpression* q bool QueryPlannerIXSelect::logicalNodeMayBeSupportedByAnIndex(const MatchExpression* queryExpr) { return !(queryExpr->matchType() == MatchExpression::NOT && - isEqualsArrayOrNotInArray(queryExpr->getChild(0))); + isComparisonWithArrayPred(queryExpr->getChild(0))); } bool QueryPlannerIXSelect::nodeIsSupportedByWildcardIndex(const MatchExpression* queryExpr) { |