summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
authorAlya Berciu <alya.berciu@mongodb.com>2022-10-24 08:56:49 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-10-24 09:27:03 +0000
commit4b485507171554a0c0349d3aa4335cd1a1ba8d47 (patch)
treecdcc3ab3e50191cfb45f5c21014024ee964b340a /src/mongo/db/query
parent54e1dbc67009010a27ec762d389b4ae7d74a996b (diff)
downloadmongo-4b485507171554a0c0349d3aa4335cd1a1ba8d47.tar.gz
SERVER-68444 Remove added traverse selectivity for FilterNodes
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp22
-rw-r--r--src/mongo/db/query/ce/ce_heuristic_test.cpp145
-rw-r--r--src/mongo/db/query/ce/ce_histogram_test.cpp2
-rw-r--r--src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp55
4 files changed, 112 insertions, 112 deletions
diff --git a/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp b/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp
index f859a355ff6..70b67645dc7 100644
--- a/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp
+++ b/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp
@@ -175,27 +175,27 @@ TEST(CEDataflowTest, EstimateLimitSkipNode) {
ASSERT_CE(t, "[{$limit: 1}, {$match: {a: 1}}, {$skip: 1}]", 0.0);
ASSERT_CE(t, "[{$limit: 1}, {$match: {a: 1}}, {$skip: 50}]", 0.0);
- // Input card to $match: 50. $match selectivity here is sqrt(50)/50 + 0.1 for traverse.
- ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 1}]", 11.0711);
+ // Input card to $match: 50. $match selectivity here is sqrt(50)/50.
+ ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 1}]", 6.07107);
ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 50}]", 0.0);
ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 1000}]", 0.0);
// Input card to $match is kCollCard. However, our estimate is larger than matchCard because we
// have a FilterNode that does not get converted to a SargableNode in this case. The $match
- // selectivity here is sqrt(1000)/1000 + 0.1 for traverse.
- ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 1}]", 130.623);
- ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 50}]", 81.623);
+ // selectivity here is sqrt(1000)/1000.
+ ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 1}]", 30.6228);
+ ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 20}]", 11.6228);
ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 1000}]", 0.0);
- // Input card to $match: 999. $match selectivity here is sqrt(999)/999 + 0.1 for traverse.
+ // Input card to $match: 999. $match selectivity here is sqrt(999)/999.
ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 1}]", 1.0);
- ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 50}]", 50.0);
- ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 1000}]", 131.507);
+ ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 20}]", 20.0);
+ ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 1000}]", 31.607);
- // Input card to $match: 950. $match selectivity here is sqrt(950)/950 + 0.1 for traverse.
+ // Input card to $match: 950. $match selectivity here is sqrt(950)/950.
ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 1}]", 1.0);
- ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 50}]", 50.0);
- ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 1000}]", 125.822);
+ ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 20}]", 20.0);
+ ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 1000}]", 30.8221);
// Input card to $match is 0.0.
ASSERT_CE(t, "[{$skip: 1000}, {$match: {a: 1}}, {$limit: 50}]", 0.0);
diff --git a/src/mongo/db/query/ce/ce_heuristic_test.cpp b/src/mongo/db/query/ce/ce_heuristic_test.cpp
index 9e2425c6fe5..7167a7ff44e 100644
--- a/src/mongo/db/query/ce/ce_heuristic_test.cpp
+++ b/src/mongo/db/query/ce/ce_heuristic_test.cpp
@@ -68,56 +68,56 @@ protected:
TEST(CEHeuristicTest, CEWithoutOptimizationGtLtNum) {
std::string query = "{a0 : {$gt : 14, $lt : 21}}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
- ASSERT_MATCH_CE(ht, query, 1849.0);
+ ASSERT_MATCH_CE(ht, query, 1089.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationEqNum) {
std::string query = "{a: 123}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 2.03205, 3.0);
- ASSERT_MATCH_CE_CARD(ht, query, 3.34575, 7.0);
- ASSERT_MATCH_CE_CARD(ht, query, 4.16228, 10.0);
- ASSERT_MATCH_CE_CARD(ht, query, 20.0, 100.0);
- ASSERT_MATCH_CE_CARD(ht, query, 1100.0, 10000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationEqStr) {
std::string query = "{a: 'foo'}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 2.03205, 3.0);
- ASSERT_MATCH_CE_CARD(ht, query, 3.34575, 7.0);
- ASSERT_MATCH_CE_CARD(ht, query, 4.16228, 10.0);
- ASSERT_MATCH_CE_CARD(ht, query, 20.0, 100.0);
- ASSERT_MATCH_CE_CARD(ht, query, 1100.0, 10000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationGtNum) {
std::string query = "{a: {$gt: 44}}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 7.2, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 54.45, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 430.0, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationGtStr) {
std::string query = "{a: {$gt: 'foo'}}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 7.2, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 54.45, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 430.0, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationLtNum) {
std::string query = "{a: {$lt: 44}}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 7.2, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 54.45, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 430.0, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationDNF1pathSimple) {
@@ -127,9 +127,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationDNF1pathSimple) {
"{$and: [{a0: {$gt:40}}, {a0: {$lt: 44}}]}"
"]}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
- ASSERT_MATCH_CE_CARD(ht, query, 7.8336, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 50.8359, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 335.612, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 6.6591, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 36.0354, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 205.941, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj1) {
@@ -140,9 +140,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj1) {
"]}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 8.352, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 67.9264, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 535.393, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 7.623, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 55.5761, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 402.963, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj2) {
@@ -153,9 +153,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj2) {
"]}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 6.912, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 43.4239, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 290.293, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 5.733, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 31.0736, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 181.863, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj3) {
@@ -170,9 +170,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj3) {
"]}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 3.01561, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 9.37173, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 19.3064, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 1.52063, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 4.15975, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 9.11877, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj4) {
@@ -187,9 +187,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj4) {
"]}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 8.98677, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 94.9731, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 894.681, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 8.9298, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 89.9501, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 798.495, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationTraverseSelectivityDoesNotAccumulate) {
@@ -219,9 +219,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnSameValue) {
"]}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 5.6, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 27.8048, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 159.083, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 5.0, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 18.8997, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 62.2456, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnDifferentValues) {
@@ -232,9 +232,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnDifferentValues) {
"]}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 3.9, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 19.8499, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 131.623, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 3.0, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 9.94987, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 31.6228, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationConjunctionWithIn) {
@@ -247,9 +247,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationConjunctionWithIn) {
// Estimation for $in is not implemented yet, so we assume it has the default filter selectivity
// of 0.1.
ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
- ASSERT_MATCH_CE_CARD(ht, query, 4.41, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 27.7649, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 218.46, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 3.6, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 18.8549, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 128.46, 1000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationOneLowBoundWithoutTraverse) {
@@ -546,11 +546,11 @@ TEST(CEHeuristicTest, CEWithoutOptimizationConjunctionOfBoundsWithDifferentPaths
HeuristicCETester ht(collName, kNoOptPhaseSet);
ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
- ASSERT_CE_CARD(ht, rootNode, 1.92, 3.0);
- ASSERT_CE_CARD(ht, rootNode, 4.48, 7.0);
- ASSERT_CE_CARD(ht, rootNode, 6.4, 10.0);
- ASSERT_CE_CARD(ht, rootNode, 18.49, 100.0);
- ASSERT_CE_CARD(ht, rootNode, 1849.0, 10000.0);
+ ASSERT_CE_CARD(ht, rootNode, 1.47, 3.0);
+ ASSERT_CE_CARD(ht, rootNode, 3.43, 7.0);
+ ASSERT_CE_CARD(ht, rootNode, 4.9, 10.0);
+ ASSERT_CE_CARD(ht, rootNode, 10.89, 100.0);
+ ASSERT_CE_CARD(ht, rootNode, 1089.0, 10000.0);
}
TEST(CEHeuristicTest, CEWithoutOptimizationDisjunctionOnSamePathWithoutTraverse) {
@@ -717,9 +717,9 @@ TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_OR2paths) {
std::string query = "{$or: [{a0: {$gt:44}}, {b0: {$lt: 9}}]}";
HeuristicCETester ht(collName, kOnlySubPhaseSet);
// Disjunctions on different paths are not SARGable.
- ASSERT_MATCH_CE_CARD(ht, query, 8.64, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 78.9525, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 675.1, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 8.19, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 69.0525, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 551.1, 1000.0);
}
TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathSimple) {
@@ -774,9 +774,9 @@ TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF2paths) {
"]}";
HeuristicCETester ht(collName, kOnlySubPhaseSet);
// Disjunctions on different paths are not SARGable.
- ASSERT_MATCH_CE_CARD(ht, query, 7.8336, 9.0);
- ASSERT_MATCH_CE_CARD(ht, query, 50.8359, 99.0);
- ASSERT_MATCH_CE_CARD(ht, query, 335.612, 1000.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 6.6591, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 36.0354, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 205.941, 1000.0);
}
TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF1path) {
@@ -818,10 +818,9 @@ TEST(CEHeuristicTest, CENotEquality) {
// the FilterNode estimate.
HeuristicCETester noOpt(collName, kNoOptPhaseSet);
- // Equality selectivity is sqrt(kCollCard)/kCollCard = 0.01. We then add 0.1 when we see a
- // Traverse, obtaining a selectivity of 0.11. When we see a UnaryOp [Not] above this subtree, we
- // invert the selectivity 1.0 - 0.11 = 0.89.
- double ce = 1100.0;
+ // Equality selectivity is sqrt(kCollCard)/kCollCard = 0.01. When we see a UnaryOp [Not] above
+ // this subtree, we invert the selectivity 1.0 - 0.01 = 0.99.
+ double ce = 100.0;
double inverseCE = collCard - ce;
ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
@@ -833,8 +832,8 @@ TEST(CEHeuristicTest, CENotEquality) {
opt.setCollCard(collCard);
noOpt.setCollCard(collCard);
- // Selectivity is sqrt(25)/25 + 0.1 for traverse.
- ce = 7.5;
+ // Selectivity is sqrt(25)/25.
+ ce = 5.0;
inverseCE = collCard - ce;
ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
@@ -846,8 +845,8 @@ TEST(CEHeuristicTest, CENotEquality) {
opt.setCollCard(collCard);
noOpt.setCollCard(collCard);
- // Selectivity is sqrt(3)/9 + 0.1 for traverse.
- ce = 3.90;
+ // Selectivity is sqrt(3)/9.
+ ce = 3.0;
inverseCE = collCard - ce;
ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
@@ -862,8 +861,8 @@ TEST(CEHeuristicTest, CENotOpenRange) {
HeuristicCETester opt(collName);
HeuristicCETester noOpt(collName, kNoOptPhaseSet);
- // Expect open-range selectivity for input card > 100 (0.33 + 0.10 for traverse).
- double ce = 4300;
+ // Expect open-range selectivity for input card > 100 (0.33).
+ double ce = 3300;
double inverseCE = collCard - ce;
ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
@@ -882,8 +881,8 @@ TEST(CEHeuristicTest, CENotOpenRange) {
opt.setCollCard(collCard);
noOpt.setCollCard(collCard);
- // Expect open-range selectivity for input card in range (20, 100) (0.45 + 0.10 for traverse).
- ce = 13.75;
+ // Expect open-range selectivity for input card in range (20, 100) (0.45).
+ ce = 11.25;
inverseCE = collCard - ce;
ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
@@ -902,8 +901,8 @@ TEST(CEHeuristicTest, CENotOpenRange) {
opt.setCollCard(collCard);
noOpt.setCollCard(collCard);
- // Expect open-range selectivity for input card < 20 (0.70 + 0.10 for traverse).
- ce = 8.0;
+ // Expect open-range selectivity for input card < 20 (0.70).
+ ce = 7.0;
inverseCE = collCard - ce;
ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
@@ -922,7 +921,7 @@ TEST(CEHeuristicTest, CENotClosedRange) {
// Repeat the above test for closed ranges; the $not cardinality estimate should add up with the
// non-$not estimate to the collection cardinality.
double collCard = kCollCard;
- double ce = 1849.0;
+ double ce = 1089.0;
double inverseCE = collCard - ce;
HeuristicCETester opt(collName);
HeuristicCETester noOpt(collName, kNoOptPhaseSet);
@@ -956,8 +955,8 @@ TEST(CEHeuristicTest, CENotClosedRange) {
* FilterNode (0.50) than in the first (0.33).
*/
collCard = 25;
- ce = 11.0;
- inverseCE = 17.4375;
+ ce = 7.875;
+ inverseCE = 19.9375;
opt.setCollCard(collCard);
noOpt.setCollCard(collCard);
@@ -974,7 +973,7 @@ TEST(CEHeuristicTest, CENotClosedRange) {
// Update cardinality to 10.
collCard = 10.0;
- ce = 6.4;
+ ce = 4.9;
inverseCE = collCard - ce;
opt.setCollCard(collCard);
noOpt.setCollCard(collCard);
diff --git a/src/mongo/db/query/ce/ce_histogram_test.cpp b/src/mongo/db/query/ce/ce_histogram_test.cpp
index d2506c7bf99..6305ddc7be1 100644
--- a/src/mongo/db/query/ce/ce_histogram_test.cpp
+++ b/src/mongo/db/query/ce/ce_histogram_test.cpp
@@ -441,7 +441,7 @@ TEST(CEHistogramTest, TestHistogramOnNestedPaths) {
// TODO SERVER-68596: this doesn't generate a SargableNode. When it does, it should return 0.0.
// Currently, this estimate falls back to heuristic CE.
- ASSERT_MATCH_CE(t, "{\"a.histogram.path\": {$elemMatch: {$eq: 42}}}", 6.21);
+ ASSERT_MATCH_CE(t, "{\"a.histogram.path\": {$elemMatch: {$eq: 42}}}", 0.707107);
}
TEST(CEHistogramTest, TestArrayHistogramOnAtomicPredicates) {
diff --git a/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp b/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
index 057a3c4d32a..6c1cd1d92f3 100644
--- a/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
+++ b/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
@@ -42,13 +42,30 @@ using namespace mongo::ce;
constexpr SelectivityType kInvalidSel = -1.0;
constexpr SelectivityType kDefaultFilterSel = 0.1;
-constexpr SelectivityType kDefaultTraverseSelectivity = 0.1;
+
+// The selectivities used in the piece-wise function for open-range intervals.
+// Note that we assume a smaller input cardinality will result in a less selective range.
+constexpr SelectivityType kSmallCardOpenRangeSel = 0.70;
+constexpr SelectivityType kMediumCardOpenRangeSel = 0.45;
+constexpr SelectivityType kLargeCardOpenRangeSel = 0.33;
+
+// The selectivities used in the piece-wise function for closed-range intervals.
+// Note that we assume a smaller input cardinality will result in a less selective range.
+constexpr SelectivityType kSmallCardClosedRangeSel = 0.50;
+constexpr SelectivityType kMediumCardClosedRangeSel = 0.33;
+constexpr SelectivityType kLargeCardClosedRangeSel = 0.20;
// Global and Local selectivity should multiply to the Complete selectivity.
constexpr SelectivityType kDefaultCompleteGroupSel = 0.01;
constexpr SelectivityType kDefaultLocalGroupSel = 0.02;
constexpr SelectivityType kDefaultGlobalGroupSel = 0.5;
+// The following constants are the steps used in the piece-wise functions that select selectivies
+// based on input cardinality.
+constexpr CEType kSmallLimit = 20.0;
+constexpr CEType kMediumLimit = 100.0;
+
+// Assumed average number of elements in an array.
constexpr CEType kDefaultAverageArraySize = 10.0;
/**
@@ -73,12 +90,12 @@ SelectivityType equalitySel(const CEType inputCard) {
*/
SelectivityType closedRangeSel(const CEType inputCard) {
SelectivityType sel = kInvalidSel;
- if (inputCard < 20.0) {
- sel = 0.50;
- } else if (inputCard < 100.0) {
- sel = 0.33;
+ if (inputCard < kSmallLimit) {
+ sel = kSmallCardClosedRangeSel;
+ } else if (inputCard < kMediumLimit) {
+ sel = kMediumCardClosedRangeSel;
} else {
- sel = 0.20;
+ sel = kLargeCardClosedRangeSel;
}
return sel;
}
@@ -90,12 +107,12 @@ SelectivityType closedRangeSel(const CEType inputCard) {
*/
SelectivityType openRangeSel(const CEType inputCard) {
SelectivityType sel = kInvalidSel;
- if (inputCard < 20.0) {
- sel = 0.70;
- } else if (inputCard < 100.0) {
- sel = 0.45;
+ if (inputCard < kSmallLimit) {
+ sel = kSmallCardOpenRangeSel;
+ } else if (inputCard < kMediumLimit) {
+ sel = kMediumCardOpenRangeSel;
} else {
- sel = 0.33;
+ sel = kLargeCardOpenRangeSel;
}
return sel;
}
@@ -262,21 +279,12 @@ public:
EvalFilterSelectivityResult transport(const PathTraverse& node,
CEType /*inputCard*/,
EvalFilterSelectivityResult childResult) {
- // We only want to decrease selectivity when we see the first traverse in a path expression.
- if (!_hasTraverse) {
- childResult.selectivity =
- std::min(childResult.selectivity + kDefaultTraverseSelectivity, 1.0);
- }
-
- _hasTraverse = true;
return childResult;
}
EvalFilterSelectivityResult transport(const PathCompare& node,
CEType inputCard,
EvalFilterSelectivityResult /*childResult*/) {
- _hasTraverse = false;
-
// Note that the result will be ignored if this operation is part of an interval.
const SelectivityType sel = operationSel(node.op(), inputCard);
return {{}, &node, sel};
@@ -286,8 +294,6 @@ public:
CEType inputCard,
EvalFilterSelectivityResult leftChildResult,
EvalFilterSelectivityResult rightChildResult) {
- _hasTraverse = false;
-
const bool isInterval = leftChildResult.compare && rightChildResult.compare &&
leftChildResult.path == rightChildResult.path;
@@ -302,8 +308,6 @@ public:
CEType /*inputCard*/,
EvalFilterSelectivityResult leftChildResult,
EvalFilterSelectivityResult rightChildResult) {
- _hasTraverse = false;
-
const SelectivityType sel =
disjunctionSel(leftChildResult.selectivity, rightChildResult.selectivity);
@@ -333,7 +337,6 @@ public:
template <typename T, typename... Ts>
EvalFilterSelectivityResult transport(const T& /*node*/, Ts&&...) {
- _hasTraverse = false;
return {{}, nullptr, kDefaultFilterSel};
}
@@ -357,8 +360,6 @@ private:
// once.
return left + right - left * right;
}
-
- bool _hasTraverse = false;
};
class CEHeuristicTransport {