summaryrefslogtreecommitdiff
path: root/src/mongo/db/matcher
diff options
context:
space:
mode:
authorTimour Katchaounov <timour.katchaounov@mongodb.com>2021-03-21 19:11:40 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-10 20:41:28 +0000
commit2f73b66d3cd004e0110bbee47151b75c7fc0791f (patch)
treed8a4926fe8cfac15aad840af0ce3373df66d9a59 /src/mongo/db/matcher
parentf3dbbebb303be094a563989f086cbf35e61e1b69 (diff)
downloadmongo-2f73b66d3cd004e0110bbee47151b75c7fc0791f.tar.gz
SERVER-34012 Planner's logic for taking union of index bounds intervals is slow for large $or queries
Implement a rewrite of single-path ORs into IN-lists.
Diffstat (limited to 'src/mongo/db/matcher')
-rw-r--r--src/mongo/db/matcher/expression_leaf.cpp5
-rw-r--r--src/mongo/db/matcher/expression_leaf.h6
-rw-r--r--src/mongo/db/matcher/expression_optimize_test.cpp43
-rw-r--r--src/mongo/db/matcher/expression_tree.cpp136
4 files changed, 190 insertions, 0 deletions
diff --git a/src/mongo/db/matcher/expression_leaf.cpp b/src/mongo/db/matcher/expression_leaf.cpp
index b10ced9bfe4..25280408af2 100644
--- a/src/mongo/db/matcher/expression_leaf.cpp
+++ b/src/mongo/db/matcher/expression_leaf.cpp
@@ -389,6 +389,7 @@ std::unique_ptr<MatchExpression> InMatchExpression::shallowClone() const {
next->_hasEmptyArray = _hasEmptyArray;
next->_equalitySet = _equalitySet;
next->_originalEqualityVector = _originalEqualityVector;
+ next->_equalityStorage = _equalityStorage;
for (auto&& regex : _regexes) {
std::unique_ptr<RegexMatchExpression> clonedRegex(
static_cast<RegexMatchExpression*>(regex->shallowClone().release()));
@@ -546,6 +547,10 @@ Status InMatchExpression::setEqualities(std::vector<BSONElement> equalities) {
return Status::OK();
}
+void InMatchExpression::setBackingBSON(BSONObj equalityStorage) {
+ _equalityStorage = std::move(equalityStorage);
+}
+
Status InMatchExpression::addRegex(std::unique_ptr<RegexMatchExpression> expr) {
_regexes.push_back(std::move(expr));
return Status::OK();
diff --git a/src/mongo/db/matcher/expression_leaf.h b/src/mongo/db/matcher/expression_leaf.h
index fe6878eb05d..3c35445bc0b 100644
--- a/src/mongo/db/matcher/expression_leaf.h
+++ b/src/mongo/db/matcher/expression_leaf.h
@@ -584,6 +584,8 @@ public:
Status setEqualities(std::vector<BSONElement> equalities);
+ void setBackingBSON(BSONObj equalityStorage);
+
Status addRegex(std::unique_ptr<RegexMatchExpression> expr);
const std::vector<BSONElement>& getEqualities() const {
@@ -648,6 +650,10 @@ private:
// Container of regex elements this object owns.
std::vector<std::unique_ptr<RegexMatchExpression>> _regexes;
+
+ // When this $in is generated internally, e.g. via a rewrite, this is where we store the
+ // data of the corresponding equality elements.
+ BSONObj _equalityStorage;
};
/**
diff --git a/src/mongo/db/matcher/expression_optimize_test.cpp b/src/mongo/db/matcher/expression_optimize_test.cpp
index eaaf42e3e76..6a41ad33643 100644
--- a/src/mongo/db/matcher/expression_optimize_test.cpp
+++ b/src/mongo/db/matcher/expression_optimize_test.cpp
@@ -29,6 +29,8 @@
#include "mongo/db/pipeline/expression.h"
+#include <vector>
+
#include "mongo/db/matcher/expression_always_boolean.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
#include "mongo/db/query/canonical_query.h"
@@ -448,5 +450,46 @@ TEST(ExpressionOptimizeTest, NestedOrWithAlwaysTrueOptimizesToAlwaysTrue) {
ASSERT_BSONOBJ_EQ(bob.obj(), fromjson("{$alwaysTrue: 1}"));
}
+TEST(ExpressionOptimizeTest, OrRewrittenToIn) {
+ const std::vector<std::pair<std::string, std::string>> queries = {
+ {"{$or: [{f1: 5}, {f1: 3}, {f1: 7}]}", "{ f1: { $in: [ 3, 5, 7 ] } }"},
+ {"{$or: [{f1: {$eq: 5}}, {f1: {$eq: 3}}, {f1: {$eq: 7}}]}", "{ f1: { $in: [ 3, 5, 7 ] } }"},
+ {"{$or: [{f1: 42}, {f1: NaN}, {f1: 99}]}", "{ f1: { $in: [ NaN, 42, 99 ] } }"},
+ {"{$or: [{f1: /^x/}, {f1:'ab'}]}", "{ f1: { $in: [ \"ab\", /^x/ ] } }"},
+ {"{$or: [{f1: /^x/}, {f1:'^a'}]}", "{ f1: { $in: [ \"^a\", /^x/ ] } }"},
+ {"{$or: [{f1: 42}, {f1: null}, {f1: 99}]}",
+ "{ $or: [ { f1: { $in: [ 42, 99 ] } }, { f1: { $eq: null } } ] }"},
+ {"{$or: [{f1: 1}, {f2: 9}, {f1: 99}]}",
+ "{ $or: [ { f1: { $in: [ 1, 99 ] } }, { f2: { $eq: 9 } } ] }"},
+ {"{$and: [{$or: [{f1: 7}, {f1: 3}, {f1: 5}]}, {$or: [{f1: 1}, {f1: 2}, {f1: 3}]}]}",
+ "{ $and: [ { f1: { $in: [ 3, 5, 7 ] } }, { f1: { $in: [ 1, 2, 3 ] } } ] }"},
+ {"{$or: [{$or: [{f1: 7}, {f1: 3}, {f1: 5}]}, {$or: [{f1: 1}, {f1: 2}, {f1: 3}]}]}",
+ "{ $or: [ { f1: { $in: [ 3, 5, 7 ] } }, { f1: { $in: [ 1, 2, 3 ] } } ] }"},
+ {"{$or: [{$and: [{f1: 7}, {f2: 7}, {f1: 5}]}, {$or: [{f1: 1}, {f1: 2}, {f1: 3}]}]}",
+ "{ $or: [ { $and: [ { f1: { $eq: 7 } }, { f2: { $eq: 7 } }, { f1: { $eq: 5 } } ] },"
+ " { f1: { $in: [ 1, 2, 3 ] } } ] }"},
+ };
+
+ auto optimizeExpr = [](std::string exprStr) {
+ auto obj = fromjson(exprStr);
+ std::unique_ptr<MatchExpression> matchExpression(parseMatchExpression(obj));
+ auto optimizedMatchExpression = MatchExpression::optimize(std::move(matchExpression));
+ BSONObjBuilder bob;
+ optimizedMatchExpression->serialize(&bob, true);
+ return bob.obj();
+ };
+
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[0].first), fromjson(queries[0].second));
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[1].first), fromjson(queries[1].second));
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[2].first), fromjson(queries[2].second));
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[3].first), fromjson(queries[3].second));
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[4].first), fromjson(queries[4].second));
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[5].first), fromjson(queries[5].second));
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[6].first), fromjson(queries[6].second));
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[7].first), fromjson(queries[7].second));
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[8].first), fromjson(queries[8].second));
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[9].first), fromjson(queries[9].second));
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_tree.cpp b/src/mongo/db/matcher/expression_tree.cpp
index 92fab31668b..9df86035a47 100644
--- a/src/mongo/db/matcher/expression_tree.cpp
+++ b/src/mongo/db/matcher/expression_tree.cpp
@@ -152,6 +152,142 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c
}
}
+ // Rewrite an OR with EQ conditions on the same path as an IN-list. Example:
+ // {$or: [{name: "Don"}, {name: "Alice"}]}
+ // is rewritten as:
+ // {name: {$in: ["Alice", "Don"]}}
+ if (matchType == MatchExpression::OR && children.size() > 1) {
+ size_t countEquivEqPaths = 0;
+ size_t countNonEquivExpr = 0;
+ boost::optional<std::string> childPath;
+ const CollatorInterface* eqCollator = nullptr;
+
+ auto isNullOrRegEx = [](const BSONElement& elm) {
+ return (elm.isNull() || elm.type() == BSONType::RegEx);
+ };
+
+ // Check if all children are equality conditions or regular expressions with the
+ // same path argument, and same collation.
+ for (auto& childExpression : children) {
+ if (childExpression->matchType() != MatchExpression::EQ &&
+ childExpression->matchType() != MatchExpression::REGEX) {
+ ++countNonEquivExpr;
+ continue;
+ }
+
+ // Disjunctions of equalities use $eq comparison, which has different semantics
+ // from $in equality comparison in two cases:
+ // (1) ('null' $eq 'undefined' = true), while ('null' $in 'undefined' = false),
+ // that is, when comparing a 'null' argument to an 'undefined' value, the
+ // result is different for $eq vs $in.
+ // (2) the regex under the equality is matched literally as a string constant,
+ // while a regex inside $in is matched as a regular expression.
+ // $lookup processing explicitly depends on this different semantics.
+ // Both these cases should not be rewritten into $in because of the different
+ // comparison semantics.
+ const CollatorInterface* curCollator = nullptr;
+ if (childExpression->matchType() == MatchExpression::EQ) {
+ auto eqExpression =
+ static_cast<EqualityMatchExpression*>(childExpression.get());
+ curCollator = eqExpression->getCollator();
+ if (isNullOrRegEx(eqExpression->getData())) {
+ ++countNonEquivExpr;
+ continue;
+ }
+ }
+
+ // childExpression is an equality with $in comparison semantics.
+ // The current approach assumes there is one (large) group of $eq disjuncts
+ // that are on the same path.
+ if (!childPath) {
+ // The path of the first equality.
+ childPath = childExpression->path().toString();
+ eqCollator = curCollator;
+ countEquivEqPaths = 1;
+ } else if (*childPath == childExpression->path() && eqCollator == curCollator) {
+ ++countEquivEqPaths; // subsequent equality on the same path
+ } else {
+ ++countNonEquivExpr; // equality on another path
+ }
+ }
+ tassert(3401201,
+ "All expressions must be classified as either eq-equiv or non-eq-equiv.",
+ countEquivEqPaths + countNonEquivExpr == children.size());
+
+ // The condition above checks that there are at least two equalities that can be
+ // rewritten to an $in, and the we have classified all $or conditions into two disjunct
+ // groups.
+ if (countEquivEqPaths > 1) {
+ tassert(3401202, "There must be a common path.", childPath);
+ auto inExpression = std::make_unique<InMatchExpression>(StringData(*childPath));
+ auto nonEquivOrExpr =
+ (countNonEquivExpr > 0) ? std::make_unique<OrMatchExpression>() : nullptr;
+ BSONArrayBuilder bab;
+
+ for (auto& childExpression : children) {
+ if (*childPath != childExpression->path()) {
+ nonEquivOrExpr->add(std::move(childExpression));
+ } else if (childExpression->matchType() == MatchExpression::EQ) {
+ std::unique_ptr<EqualityMatchExpression> eqExpressionPtr{
+ static_cast<EqualityMatchExpression*>(childExpression.release())};
+ if (isNullOrRegEx(eqExpressionPtr->getData()) ||
+ eqExpressionPtr->getCollator() != eqCollator) {
+ nonEquivOrExpr->add(std::move(eqExpressionPtr));
+ } else {
+ bab.append(eqExpressionPtr->getData());
+ }
+ } else if (childExpression->matchType() == MatchExpression::REGEX) {
+ std::unique_ptr<RegexMatchExpression> regexExpressionPtr{
+ static_cast<RegexMatchExpression*>(childExpression.release())};
+ // Reset the path because when we parse a $in expression which contains a
+ // regexp, we create a RegexMatchExpression with an empty path.
+ regexExpressionPtr->setPath({});
+ auto status = inExpression->addRegex(std::move(regexExpressionPtr));
+ tassert(3401203, // TODO SERVER-55449 convert to tassertStatusOK.
+ "Conversion from OR to IN should always succeed.",
+ status == Status::OK());
+ } else {
+ nonEquivOrExpr->add(std::move(childExpression));
+ }
+ }
+ children.clear();
+ tassert(3401204,
+ "Incorrect number of non-equivalent expressions",
+ !nonEquivOrExpr || nonEquivOrExpr->numChildren() == countNonEquivExpr);
+
+ auto backingArr = bab.arr();
+ std::vector<BSONElement> inEqualities;
+ backingArr.elems(inEqualities);
+ tassert(3401205,
+ "Incorrect number of in-equivalent expressions",
+ !countEquivEqPaths ||
+ (inEqualities.size() + inExpression->getRegexes().size()) ==
+ countEquivEqPaths);
+
+ auto status = inExpression->setEqualities(std::move(inEqualities));
+ tassert(3401206, // TODO SERVER-55449 convert to tassertStatusOK.
+ "Conversion from OR to IN should always succeed.",
+ status == Status::OK());
+
+ inExpression->setBackingBSON(std::move(backingArr));
+ if (eqCollator) {
+ inExpression->setCollator(eqCollator);
+ }
+
+ if (countNonEquivExpr > 0) {
+ auto parentOrExpr = std::make_unique<OrMatchExpression>();
+ parentOrExpr->add(std::move(inExpression));
+ if (countNonEquivExpr == 1) {
+ parentOrExpr->add(nonEquivOrExpr->releaseChild(0));
+ } else {
+ parentOrExpr->add(std::move(nonEquivOrExpr));
+ }
+ return parentOrExpr;
+ }
+ return inExpression;
+ }
+ }
+
return expression;
};
}