SERVER-68448 Implement CE for $not

author: Alya Berciu <alya.berciu@mongodb.com> 2022-10-13 07:41:53 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-10-13 08:12:25 +0000
commit: 66f85beaaaabb3711dc2943dd8280e8ab1ca50ff (patch)
tree: 26df7b3229e0f50cc088cf0ceccdd2d6507390b3
parent: dee48c34e55e81c17892900b8fac36a4d5a4f20d (diff)
download: mongo-66f85beaaaabb3711dc2943dd8280e8ab1ca50ff.tar.gz
2 files changed, 208 insertions, 0 deletions
diff --git a/src/mongo/db/query/ce/ce_heuristic_test.cpp b/src/mongo/db/query/ce/ce_heuristic_test.cpp
index 6146a001fa3..9e2425c6fe5 100644
--- a/src/mongo/db/query/ce/ce_heuristic_test.cpp
+++ b/src/mongo/db/query/ce/ce_heuristic_test.cpp
@@ -808,5 +808,188 @@ TEST(CEHeuristicTest, CEAfterMemoSubstitutionExplorationPhases) {
     ASSERT_MATCH_CE(ht, "{a : 13, b : 42}", 10.0);
 }
 
+TEST(CEHeuristicTest, CENotEquality) {
+    double collCard = kCollCard;
+    HeuristicCETester opt(collName);
+
+    // We avoid optimizing in order to verify heuristic estimate of FilterNode subtree. Note that we
+    // do not generate SargableNodes for $not predicates, but we do generate SargableNodes without
+    // it; for the purposes of this test, we want to demonstrate that $not returns the inverse of
+    // the FilterNode estimate.
+    HeuristicCETester noOpt(collName, kNoOptPhaseSet);
+
+    // Equality selectivity is sqrt(kCollCard)/kCollCard = 0.01. We then add 0.1 when we see a
+    // Traverse, obtaining a selectivity of 0.11. When we see a UnaryOp [Not] above this subtree, we
+    // invert the selectivity 1.0 - 0.11 = 0.89.
+    double ce = 1100.0;
+    double inverseCE = collCard - ce;
+    ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE);
+
+    // Update cardinality to 25.
+    collCard = 25;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    // Selectivity is sqrt(25)/25 + 0.1 for traverse.
+    ce = 7.5;
+    inverseCE = collCard - ce;
+    ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE);
+
+    // Update cardinality to 9.
+    collCard = 9;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    // Selectivity is sqrt(3)/9 + 0.1 for traverse.
+    ce = 3.90;
+    inverseCE = collCard - ce;
+    ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE);
+}
+
+TEST(CEHeuristicTest, CENotOpenRange) {
+    // Repeat the above test for open ranges; the $not cardinality estimate should add up with the
+    // non-$not estimate to the collection cardinality.
+    double collCard = kCollCard;
+    HeuristicCETester opt(collName);
+    HeuristicCETester noOpt(collName, kNoOptPhaseSet);
+
+    // Expect open-range selectivity for input card > 100 (0.33 + 0.10 for traverse).
+    double ce = 4300;
+    double inverseCE = collCard - ce;
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$lte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE);
+
+    // Update cardinality to 25.
+    collCard = 25;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    // Expect open-range selectivity for input card in range (20, 100) (0.45 + 0.10 for traverse).
+    ce = 13.75;
+    inverseCE = collCard - ce;
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$lte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE);
+
+    // Update cardinality to 10.
+    collCard = 10.0;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    // Expect open-range selectivity for input card < 20 (0.70 + 0.10 for traverse).
+    ce = 8.0;
+    inverseCE = collCard - ce;
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$lte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE);
+}
+
+TEST(CEHeuristicTest, CENotClosedRange) {
+    // Repeat the above test for closed ranges; the $not cardinality estimate should add up with the
+    // non-$not estimate to the collection cardinality.
+    double collCard = kCollCard;
+    double ce = 1849.0;
+    double inverseCE = collCard - ce;
+    HeuristicCETester opt(collName);
+    HeuristicCETester noOpt(collName, kNoOptPhaseSet);
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+
+    /*
+     * Update cardinality to 25. Here we observe an interesting edge case where the estimated
+     * cardinality is not the inverse of the actual cardinality.
+     *
+     * Consider the predicate {a: {$gt: 10, $lt: 20}}. This generates two FilterNodes stacked on top
+     * of each other. However, the predicate {a: {$not: {$gt: 10, $lt: 20}}} generates just one
+     * FilterNode.
+     *
+     * We always use input cardinality to determine which interval selectivity we're going to use.
+     * However, we have a different input cardinality for the one FilterNode case (collCard) than
+     * for the two FilterNodes case: the first node gets collCard, and the second node gets a
+     * smaller value after the selectivity of the first filter is applied.
+     *
+     * Because we use a piecewise function to pick the selectivity, and because we go from inputCard
+     * < 100 to inputCard < 20, we choose different selectivities for the intervals in the second
+     * FilterNode (0.50) than in the first (0.33).
+     */
+    collCard = 25;
+    ce = 11.0;
+    inverseCE = 17.4375;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+
+    // Update cardinality to 10.
+    collCard = 10.0;
+    ce = 6.4;
+    inverseCE = collCard - ce;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+}
+
 }  // namespace
 }  // namespace mongo::ce
diff --git a/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp b/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
index 344736f3e7d..0777c87b36b 100644
--- a/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
+++ b/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
@@ -310,6 +310,27 @@ public:
         return {{}, nullptr, sel};
     }
 
+    EvalFilterSelectivityResult transport(const UnaryOp& node,
+                                          CEType /*inputCard*/,
+                                          EvalFilterSelectivityResult childResult) {
+        switch (node.op()) {
+            case Operations::Not:
+                childResult.selectivity = negationSel(childResult.selectivity);
+                return childResult;
+            case Operations::Neg:
+                // If we see negation (-) in a UnaryOp, we ignore it for CE purposes.
+                return childResult;
+            default:
+                MONGO_UNREACHABLE;
+        }
+    }
+
+    EvalFilterSelectivityResult transport(const PathConstant& /*node*/,
+                                          CEType /*inputCard*/,
+                                          EvalFilterSelectivityResult childResult) {
+        return childResult;
+    }
+
     template <typename T, typename... Ts>
     EvalFilterSelectivityResult transport(const T& /*node*/, Ts&&...) {
         _hasTraverse = false;
@@ -323,6 +344,10 @@ public:
     }
 
 private:
+    SelectivityType negationSel(const SelectivityType in) {
+        return 1.0 - in;
+    }
+
     SelectivityType conjunctionSel(const SelectivityType left, const SelectivityType right) {
         return left * right;
     }
author	Alya Berciu <alya.berciu@mongodb.com>	2022-10-13 07:41:53 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-10-13 08:12:25 +0000
commit	66f85beaaaabb3711dc2943dd8280e8ab1ca50ff (patch)
tree	26df7b3229e0f50cc088cf0ceccdd2d6507390b3
parent	dee48c34e55e81c17892900b8fac36a4d5a4f20d (diff)
download	mongo-66f85beaaaabb3711dc2943dd8280e8ab1ca50ff.tar.gz