diff options
author | David Storch <david.storch@mongodb.com> | 2021-06-01 18:14:41 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-14 15:25:21 +0000 |
commit | d75f3a8bb66ef1ef41bfd16eecd42c31f91c818b (patch) | |
tree | 1dbfaf604282e43d5fcf2d6a3e67f9eeaa254cdb /src/mongo/db/matcher | |
parent | a2d174f0aa4c9cbac4b39863d0f6c37f4774470b (diff) | |
download | mongo-d75f3a8bb66ef1ef41bfd16eecd42c31f91c818b.tar.gz |
SERVER-21929 Make $in with null always match undefined
Diffstat (limited to 'src/mongo/db/matcher')
-rw-r--r-- | src/mongo/db/matcher/expression_leaf.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_leaf_test.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_optimize_test.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_tree.cpp | 35 |
4 files changed, 37 insertions, 33 deletions
diff --git a/src/mongo/db/matcher/expression_leaf.cpp b/src/mongo/db/matcher/expression_leaf.cpp index 25280408af2..66abea54ccf 100644 --- a/src/mongo/db/matcher/expression_leaf.cpp +++ b/src/mongo/db/matcher/expression_leaf.cpp @@ -403,7 +403,9 @@ bool InMatchExpression::contains(const BSONElement& e) const { } bool InMatchExpression::matchesSingleElement(const BSONElement& e, MatchDetails* details) const { - if (_hasNull && e.eoo()) { + // When an $in has a null, it adopts the same semantics as {$eq:null}. Namely, in addition to + // matching literal null values, the $in should match missing and undefined. + if (_hasNull && (e.eoo() || e.type() == BSONType::Undefined)) { return true; } if (contains(e)) { diff --git a/src/mongo/db/matcher/expression_leaf_test.cpp b/src/mongo/db/matcher/expression_leaf_test.cpp index 8ee1d50cb1b..209a6b80225 100644 --- a/src/mongo/db/matcher/expression_leaf_test.cpp +++ b/src/mongo/db/matcher/expression_leaf_test.cpp @@ -134,11 +134,14 @@ TEST(EqOp, MatchesReferencedArrayValue) { TEST(EqOp, MatchesNull) { BSONObj operand = BSON("a" << BSONNULL); EqualityMatchExpression eq("a", operand["a"]); - ASSERT(eq.matchesBSON(BSONObj(), nullptr)); - ASSERT(eq.matchesBSON(BSON("a" << BSONNULL), nullptr)); - ASSERT(!eq.matchesBSON(BSON("a" << 4), nullptr)); - // A non-existent field is treated same way as an empty bson object - ASSERT(eq.matchesBSON(BSON("b" << 4), nullptr)); + ASSERT_TRUE(eq.matchesBSON(BSONObj(), nullptr)); + ASSERT_TRUE(eq.matchesBSON(BSON("a" << BSONNULL), nullptr)); + ASSERT_FALSE(eq.matchesBSON(BSON("a" << 4), nullptr)); + + // {$eq:null} has special semantics which say that both missing and undefined match, in addition + // to literal nulls. + ASSERT_TRUE(eq.matchesBSON(BSON("b" << 4), nullptr)); + ASSERT_TRUE(eq.matchesBSON(BSON("a" << BSONUndefined), nullptr)); } // This test documents how the matcher currently works, @@ -1192,11 +1195,14 @@ TEST(InMatchExpression, MatchesNull) { std::vector<BSONElement> equalities{operand.firstElement()}; ASSERT_OK(in.setEqualities(std::move(equalities))); - ASSERT(in.matchesBSON(BSONObj(), nullptr)); - ASSERT(in.matchesBSON(BSON("a" << BSONNULL), nullptr)); - ASSERT(!in.matchesBSON(BSON("a" << 4), nullptr)); - // A non-existent field is treated same way as an empty bson object - ASSERT(in.matchesBSON(BSON("b" << 4), nullptr)); + ASSERT_TRUE(in.matchesBSON(BSONObj(), nullptr)); + ASSERT_TRUE(in.matchesBSON(BSON("a" << BSONNULL), nullptr)); + ASSERT_FALSE(in.matchesBSON(BSON("a" << 4), nullptr)); + + // When null appears inside an $in, it has the same special semantics as an {$eq:null} + // predicate. In particular, we expect it to match both missing and undefined. + ASSERT_TRUE(in.matchesBSON(BSON("b" << 4), nullptr)); + ASSERT_TRUE(in.matchesBSON(BSON("a" << BSONUndefined), nullptr)); } TEST(InMatchExpression, MatchesUndefined) { diff --git a/src/mongo/db/matcher/expression_optimize_test.cpp b/src/mongo/db/matcher/expression_optimize_test.cpp index ab9e91cb30f..eaa606e06df 100644 --- a/src/mongo/db/matcher/expression_optimize_test.cpp +++ b/src/mongo/db/matcher/expression_optimize_test.cpp @@ -460,8 +460,7 @@ TEST(ExpressionOptimizeTest, OrRewrittenToIn) { {"{$or: [{f1: 42}, {f1: NaN}, {f1: 99}]}", "{ f1: { $in: [ NaN, 42, 99 ] } }"}, {"{$or: [{f1: /^x/}, {f1:'ab'}]}", "{ f1: { $in: [ \"ab\", /^x/ ] } }"}, {"{$or: [{f1: /^x/}, {f1:'^a'}]}", "{ f1: { $in: [ \"^a\", /^x/ ] } }"}, - {"{$or: [{f1: 42}, {f1: null}, {f1: 99}]}", - "{ $or: [ { f1: { $in: [ 42, 99 ] } }, { f1: { $eq: null } } ] }"}, + {"{$or: [{f1: 42}, {f1: null}, {f1: 99}]}", "{ f1: { $in: [ null, 42, 99 ] } }"}, {"{$or: [{f1: 1}, {f2: 9}, {f1: 99}]}", "{ $or: [ { f1: { $in: [ 1, 99 ] } }, { f2: { $eq: 9 } } ] }"}, {"{$and: [{$or: [{f1: 7}, {f1: 3}, {f1: 5}]}, {$or: [{f1: 1}, {f1: 2}, {f1: 3}]}]}", @@ -471,6 +470,7 @@ TEST(ExpressionOptimizeTest, OrRewrittenToIn) { {"{$or: [{$and: [{f1: 7}, {f2: 7}, {f1: 5}]}, {$or: [{f1: 1}, {f1: 2}, {f1: 3}]}]}", "{ $or: [ { $and: [ { f1: { $eq: 7 } }, { f2: { $eq: 7 } }, { f1: { $eq: 5 } } ] }," " { f1: { $in: [ 1, 2, 3 ] } } ] }"}, + {"{$or: [{$and: [ { f1: null } ] } ] }", "{ f1: { $eq: null } }"}, }; auto optimizeExpr = [](std::string exprStr) { @@ -492,6 +492,7 @@ TEST(ExpressionOptimizeTest, OrRewrittenToIn) { ASSERT_BSONOBJ_EQ(optimizeExpr(queries[7].first), fromjson(queries[7].second)); ASSERT_BSONOBJ_EQ(optimizeExpr(queries[8].first), fromjson(queries[8].second)); ASSERT_BSONOBJ_EQ(optimizeExpr(queries[9].first), fromjson(queries[9].second)); + ASSERT_BSONOBJ_EQ(optimizeExpr(queries[10].first), fromjson(queries[10].second)); } } // namespace diff --git a/src/mongo/db/matcher/expression_tree.cpp b/src/mongo/db/matcher/expression_tree.cpp index ec7a5e08dd0..c9cdf026700 100644 --- a/src/mongo/db/matcher/expression_tree.cpp +++ b/src/mongo/db/matcher/expression_tree.cpp @@ -162,9 +162,7 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c boost::optional<std::string> childPath; const CollatorInterface* eqCollator = nullptr; - auto isNullOrRegEx = [](const BSONElement& elm) { - return (elm.isNull() || elm.type() == BSONType::RegEx); - }; + auto isRegEx = [](const BSONElement& elm) { return elm.type() == BSONType::RegEx; }; // Check if all children are equality conditions or regular expressions with the // same path argument, and same collation. @@ -175,22 +173,19 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c continue; } - // Disjunctions of equalities use $eq comparison, which has different semantics - // from $in equality comparison in two cases: - // (1) ('null' $eq 'undefined' = true), while ('null' $in 'undefined' = false), - // that is, when comparing a 'null' argument to an 'undefined' value, the - // result is different for $eq vs $in. - // (2) the regex under the equality is matched literally as a string constant, - // while a regex inside $in is matched as a regular expression. - // $lookup processing explicitly depends on this different semantics. - // Both these cases should not be rewritten into $in because of the different - // comparison semantics. + // Disjunctions of equalities use $eq comparison, which has different semantics from + // $in for regular expressions. The regex under the equality is matched literally as + // a string constant, while a regex inside $in is matched as a regular expression. + // Furthermore, $lookup processing explicitly depends on these different semantics. + // + // We should not attempt to rewrite an $eq:<regex> into $in because of these + // different comparison semantics. const CollatorInterface* curCollator = nullptr; if (childExpression->matchType() == MatchExpression::EQ) { auto eqExpression = static_cast<EqualityMatchExpression*>(childExpression.get()); curCollator = eqExpression->getCollator(); - if (isNullOrRegEx(eqExpression->getData())) { + if (isRegEx(eqExpression->getData())) { ++countNonEquivExpr; continue; } @@ -211,14 +206,14 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c } } tassert(3401201, - "All expressions must be classified as either eq-equiv or non-eq-equiv.", + "All expressions must be classified as either eq-equiv or non-eq-equiv", countEquivEqPaths + countNonEquivExpr == children.size()); // The condition above checks that there are at least two equalities that can be - // rewritten to an $in, and the we have classified all $or conditions into two disjunct + // rewritten to an $in, and the we have classified all $or conditions into two disjoint // groups. if (countEquivEqPaths > 1) { - tassert(3401202, "There must be a common path.", childPath); + tassert(3401202, "There must be a common path", childPath); auto inExpression = std::make_unique<InMatchExpression>(StringData(*childPath)); auto nonEquivOrExpr = (countNonEquivExpr > 0) ? std::make_unique<OrMatchExpression>() : nullptr; @@ -230,7 +225,7 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c } else if (childExpression->matchType() == MatchExpression::EQ) { std::unique_ptr<EqualityMatchExpression> eqExpressionPtr{ static_cast<EqualityMatchExpression*>(childExpression.release())}; - if (isNullOrRegEx(eqExpressionPtr->getData()) || + if (isRegEx(eqExpressionPtr->getData()) || eqExpressionPtr->getCollator() != eqCollator) { nonEquivOrExpr->add(std::move(eqExpressionPtr)); } else { @@ -244,7 +239,7 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c regexExpressionPtr->setPath({}); auto status = inExpression->addRegex(std::move(regexExpressionPtr)); tassert(3401203, // TODO SERVER-53380 convert to tassertStatusOK. - "Conversion from OR to IN should always succeed.", + "Conversion from OR to IN should always succeed", status == Status::OK()); } else { nonEquivOrExpr->add(std::move(childExpression)); @@ -266,7 +261,7 @@ MatchExpression::ExpressionOptimizerFunc ListOfMatchExpression::getOptimizer() c auto status = inExpression->setEqualities(std::move(inEqualities)); tassert(3401206, // TODO SERVER-53380 convert to tassertStatusOK. - "Conversion from OR to IN should always succeed.", + "Conversion from OR to IN should always succeed", status == Status::OK()); inExpression->setBackingBSON(std::move(backingArr)); |