summaryrefslogtreecommitdiff
path: root/src/mongo/db/matcher
diff options
context:
space:
mode:
authorDavid Storch <david.storch@mongodb.com>2021-06-01 18:14:41 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-14 15:25:21 +0000
commitd75f3a8bb66ef1ef41bfd16eecd42c31f91c818b (patch)
tree1dbfaf604282e43d5fcf2d6a3e67f9eeaa254cdb /src/mongo/db/matcher
parenta2d174f0aa4c9cbac4b39863d0f6c37f4774470b (diff)
downloadmongo-d75f3a8bb66ef1ef41bfd16eecd42c31f91c818b.tar.gz
SERVER-21929 Make $in with null always match undefined
Diffstat (limited to 'src/mongo/db/matcher')
-rw-r--r--src/mongo/db/matcher/expression_leaf.cpp4
-rw-r--r--src/mongo/db/matcher/expression_leaf_test.cpp26
-rw-r--r--src/mongo/db/matcher/expression_optimize_test.cpp5
-rw-r--r--src/mongo/db/matcher/expression_tree.cpp35
4 files changed, 37 insertions, 33 deletions
diff --git a/src/mongo/db/matcher/expression_leaf.cpp b/src/mongo/db/matcher/expression_leaf.cpp
index 25280408af2..66abea54ccf 100644
--- a/src/mongo/db/matcher/expression_leaf.cpp
+++ b/src/mongo/db/matcher/expression_leaf.cpp
@@ -403,7 +403,9 @@ bool InMatchExpression::contains(const BSONElement& e) const {
}
bool InMatchExpression::matchesSingleElement(const BSONElement& e, MatchDetails* details) const {
- if (_hasNull && e.eoo()) {
+ // When an $in has a null, it adopts the same semantics as {$eq:null}. Namely, in addition to
+ // matching literal null values, the $in should match missing and undefined.
+ if (_hasNull && (e.eoo() || e.type() == BSONType::Undefined)) {
return true;
}
if (contains(e)) {
diff --git a/src/mongo/db/matcher/expression_leaf_test.cpp b/src/mongo/db/matcher/expression_leaf_test.cpp
index 8ee1d50cb1b..209a6b80225 100644
--- a/src/mongo/db/matcher/expression_leaf_test.cpp
+++ b/src/mongo/db/matcher/expression_leaf_test.cpp
@@ -134,11 +134,14 @@ TEST(EqOp, MatchesReferencedArrayValue) {
TEST(EqOp, MatchesNull) {
BSONObj operand = BSON("a" << BSONNULL);
EqualityMatchExpression eq("a", operand["a"]);
- ASSERT(eq.matchesBSON(BSONObj(), nullptr));
- ASSERT(eq.matchesBSON(BSON("a" << BSONNULL), nullptr));
- ASSERT(!eq.matchesBSON(BSON("a" << 4), nullptr));
- // A non-existent field is treated same way as an empty bson object
- ASSERT(eq.matchesBSON(BSON("b" << 4), nullptr));
+ ASSERT_TRUE(eq.matchesBSON(BSONObj(), nullptr));
+ ASSERT_TRUE(eq.matchesBSON(BSON("a" << BSONNULL), nullptr));
+ ASSERT_FALSE(eq.matchesBSON(BSON("a" << 4), nullptr));
+
+ // {$eq:null} has special semantics which say that both missing and undefined match, in addition
+ // to literal nulls.
+ ASSERT_TRUE(eq.matchesBSON(BSON("b" << 4), nullptr));
+ ASSERT_TRUE(eq.matchesBSON(BSON("a" << BSONUndefined), nullptr));
}
// This test documents how the matcher currently works,
@@ -1192,11 +1195,14 @@ TEST(InMatchExpression, MatchesNull) {
std::vector<BSONElement> equalities{operand.firstElement()};
ASSERT_OK(in.setEqualities(std::move(equalities)));
- ASSERT(in.matchesBSON(BSONObj(), nullptr));
- ASSERT(in.matchesBSON(BSON("a" << BSONNULL), nullptr));
- ASSERT(!in.matchesBSON(BSON("a" << 4), nullptr));
- // A non-existent field is treated same way as an empty bson object
- ASSERT(in.matchesBSON(BSON("b" << 4), nullptr));
+ ASSERT_TRUE(in.matchesBSON(BSONObj(), nullptr));
+ ASSERT_TRUE(in.matchesBSON(BSON("a" << BSONNULL), nullptr));
+ ASSERT_FALSE(in.matchesBSON(BSON("a" << 4), nullptr));
+
+ // When null appears inside an $in, it has the same special semantics as an {$eq:null}
+ // predicate. In particular, we expect it to match both missing and undefined.
+ ASSERT_TRUE(in.matchesBSON(BSON("b" << 4), nullptr));
+ ASSERT_TRUE(in.matchesBSON(BSON("a" << BSONUndefined), nullptr));
}
TEST(InMatchExpression, MatchesUndefined) {
diff --git a/src/mongo/db/matcher/expression_optimize_test.cpp b/src/mongo/db/matcher/expression_optimize_test.cpp
index ab9e91cb30f..eaa606e06df 100644
--- a/src/mongo/db/matcher/expression_optimize_test.cpp
+++ b/src/mongo/db/matcher/expression_optimize_test.cpp
@@ -460,8 +460,7 @@ TEST(ExpressionOptimizeTest, OrRewrittenToIn) {
{"{$or: [{f1: 42}, {f1: NaN}, {f1: 99}]}", "{ f1: { $in: [ NaN, 42, 99 ] } }"},
{"{$or: [{f1: /^x/}, {f1:'ab'}]}", "{ f1: { $in: [ \"ab\", /^x/ ] } }"},
{"{$or: [{f1: /^x/}, {f1:'^a'}]}", "{ f1: { $in: [ \"^a\", /^x/ ] } }"},
- {"{$or: [{f1: 42}, {f1: null}, {f1: 99}]}",
- "{ $or: [ { f1: { $in: [ 42, 99 ] } }, { f1: { $eq: null } } ] }"},
+ {"{$or: [{f1: 42}, {f1: null}, {f1: 99}]}", "{ f1: { $in: [ null, 42, 99 ] } }"},
{"{$or: [{f1: 1}, {f2: 9}, {f1: 99}]}",
"{ $or: [ { f1: { $in: [ 1, 99 ] } }, { f2: { $eq: 9 } } ] }"},
{"{$and: [{$or: [{f1: 7}, {f1: 3}, {f1: 5}]}, {$or: [{f1: 1}, {f1: 2}, {f1: 3}]}]}",
@@ -471,6 +470,7 @@ TEST(ExpressionOptimizeTest, OrRewrittenToIn) {
{"{$or: [{$and: [{f1: 7}, {f2: 7}, {f1: 5}]}, {$or: [{f1: 1}, {f1: 2}, {f1: 3}]}]}",
"{ $or: [ { $and: [ { f1: { $eq: 7 } }, { f2: { $eq: 7 } }, { f1: { $eq: 5 } } ] },"
" { f1: { $in: [ 1, 2, 3 ] } } ] }"},
+ {"{$or: [{$and: [ { f1: null } ] } ] }", "{ f1: { $eq: null } }"},
};
auto optimizeExpr = [](std::string exprStr) {
@@ -492,6 +492,7 @@ TEST(ExpressionOptimizeTest, OrRewrittenToIn) {
ASSERT_BSONOBJ_EQ(optimizeExpr(queries[7].first), fromjson(queries[7].second));
ASSERT_BSONOBJ_EQ(optimizeExpr(queries[8].first), fromjson(queries[8].second));
ASSERT_BSONOBJ_EQ(optimizeExpr(queries[9].first), fromjson(queries[9].second));
+ ASSERT_BSONOBJ_EQ(optimizeExpr(queries[10].first), fromjson(queries[10].second));
}
} // namespace
diff --git a/src/mongo/db/matcher/expression_tree.cpp b/src/mongo/db/matcher/expression_tree.cpp
index ec7a5e08dd0..c9cdf026700 100644
--- a/src/mongo/db/matcher/expression_tree.cpp
+++ b/src/mongo/db/matcher/expression_tree.cpp
@@ -162,9 +162,7 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c
boost::optional<std::string> childPath;
const CollatorInterface* eqCollator = nullptr;
- auto isNullOrRegEx = [](const BSONElement& elm) {
- return (elm.isNull() || elm.type() == BSONType::RegEx);
- };
+ auto isRegEx = [](const BSONElement& elm) { return elm.type() == BSONType::RegEx; };
// Check if all children are equality conditions or regular expressions with the
// same path argument, and same collation.
@@ -175,22 +173,19 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c
continue;
}
- // Disjunctions of equalities use $eq comparison, which has different semantics
- // from $in equality comparison in two cases:
- // (1) ('null' $eq 'undefined' = true), while ('null' $in 'undefined' = false),
- // that is, when comparing a 'null' argument to an 'undefined' value, the
- // result is different for $eq vs $in.
- // (2) the regex under the equality is matched literally as a string constant,
- // while a regex inside $in is matched as a regular expression.
- // $lookup processing explicitly depends on this different semantics.
- // Both these cases should not be rewritten into $in because of the different
- // comparison semantics.
+ // Disjunctions of equalities use $eq comparison, which has different semantics from
+ // $in for regular expressions. The regex under the equality is matched literally as
+ // a string constant, while a regex inside $in is matched as a regular expression.
+ // Furthermore, $lookup processing explicitly depends on these different semantics.
+ //
+ // We should not attempt to rewrite an $eq:<regex> into $in because of these
+ // different comparison semantics.
const CollatorInterface* curCollator = nullptr;
if (childExpression->matchType() == MatchExpression::EQ) {
auto eqExpression =
static_cast<EqualityMatchExpression*>(childExpression.get());
curCollator = eqExpression->getCollator();
- if (isNullOrRegEx(eqExpression->getData())) {
+ if (isRegEx(eqExpression->getData())) {
++countNonEquivExpr;
continue;
}
@@ -211,14 +206,14 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c
}
}
tassert(3401201,
- "All expressions must be classified as either eq-equiv or non-eq-equiv.",
+ "All expressions must be classified as either eq-equiv or non-eq-equiv",
countEquivEqPaths + countNonEquivExpr == children.size());
// The condition above checks that there are at least two equalities that can be
- // rewritten to an $in, and the we have classified all $or conditions into two disjunct
+ // rewritten to an $in, and the we have classified all $or conditions into two disjoint
// groups.
if (countEquivEqPaths > 1) {
- tassert(3401202, "There must be a common path.", childPath);
+ tassert(3401202, "There must be a common path", childPath);
auto inExpression = std::make_unique<InMatchExpression>(StringData(*childPath));
auto nonEquivOrExpr =
(countNonEquivExpr > 0) ? std::make_unique<OrMatchExpression>() : nullptr;
@@ -230,7 +225,7 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c
} else if (childExpression->matchType() == MatchExpression::EQ) {
std::unique_ptr<EqualityMatchExpression> eqExpressionPtr{
static_cast<EqualityMatchExpression*>(childExpression.release())};
- if (isNullOrRegEx(eqExpressionPtr->getData()) ||
+ if (isRegEx(eqExpressionPtr->getData()) ||
eqExpressionPtr->getCollator() != eqCollator) {
nonEquivOrExpr->add(std::move(eqExpressionPtr));
} else {
@@ -244,7 +239,7 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c
regexExpressionPtr->setPath({});
auto status = inExpression->addRegex(std::move(regexExpressionPtr));
tassert(3401203, // TODO SERVER-53380 convert to tassertStatusOK.
- "Conversion from OR to IN should always succeed.",
+ "Conversion from OR to IN should always succeed",
status == Status::OK());
} else {
nonEquivOrExpr->add(std::move(childExpression));
@@ -266,7 +261,7 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c
auto status = inExpression->setEqualities(std::move(inEqualities));
tassert(3401206, // TODO SERVER-53380 convert to tassertStatusOK.
- "Conversion from OR to IN should always succeed.",
+ "Conversion from OR to IN should always succeed",
status == Status::OK());
inExpression->setBackingBSON(std::move(backingArr));