diff options
author | Alya Berciu <alya.berciu@mongodb.com> | 2022-10-24 08:56:49 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-10-24 09:27:03 +0000 |
commit | 4b485507171554a0c0349d3aa4335cd1a1ba8d47 (patch) | |
tree | cdcc3ab3e50191cfb45f5c21014024ee964b340a /src/mongo/db/query | |
parent | 54e1dbc67009010a27ec762d389b4ae7d74a996b (diff) | |
download | mongo-4b485507171554a0c0349d3aa4335cd1a1ba8d47.tar.gz |
SERVER-68444 Remove added traverse selectivity for FilterNodes
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp | 22 | ||||
-rw-r--r-- | src/mongo/db/query/ce/ce_heuristic_test.cpp | 145 | ||||
-rw-r--r-- | src/mongo/db/query/ce/ce_histogram_test.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp | 55 |
4 files changed, 112 insertions, 112 deletions
diff --git a/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp b/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp index f859a355ff6..70b67645dc7 100644 --- a/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp +++ b/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp @@ -175,27 +175,27 @@ TEST(CEDataflowTest, EstimateLimitSkipNode) { ASSERT_CE(t, "[{$limit: 1}, {$match: {a: 1}}, {$skip: 1}]", 0.0); ASSERT_CE(t, "[{$limit: 1}, {$match: {a: 1}}, {$skip: 50}]", 0.0); - // Input card to $match: 50. $match selectivity here is sqrt(50)/50 + 0.1 for traverse. - ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 1}]", 11.0711); + // Input card to $match: 50. $match selectivity here is sqrt(50)/50. + ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 1}]", 6.07107); ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 50}]", 0.0); ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 1000}]", 0.0); // Input card to $match is kCollCard. However, our estimate is larger than matchCard because we // have a FilterNode that does not get converted to a SargableNode in this case. The $match - // selectivity here is sqrt(1000)/1000 + 0.1 for traverse. - ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 1}]", 130.623); - ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 50}]", 81.623); + // selectivity here is sqrt(1000)/1000. + ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 1}]", 30.6228); + ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 20}]", 11.6228); ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 1000}]", 0.0); - // Input card to $match: 999. $match selectivity here is sqrt(999)/999 + 0.1 for traverse. + // Input card to $match: 999. $match selectivity here is sqrt(999)/999. ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 1}]", 1.0); - ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 50}]", 50.0); - ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 1000}]", 131.507); + ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 20}]", 20.0); + ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 1000}]", 31.607); - // Input card to $match: 950. $match selectivity here is sqrt(950)/950 + 0.1 for traverse. + // Input card to $match: 950. $match selectivity here is sqrt(950)/950. ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 1}]", 1.0); - ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 50}]", 50.0); - ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 1000}]", 125.822); + ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 20}]", 20.0); + ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 1000}]", 30.8221); // Input card to $match is 0.0. ASSERT_CE(t, "[{$skip: 1000}, {$match: {a: 1}}, {$limit: 50}]", 0.0); diff --git a/src/mongo/db/query/ce/ce_heuristic_test.cpp b/src/mongo/db/query/ce/ce_heuristic_test.cpp index 9e2425c6fe5..7167a7ff44e 100644 --- a/src/mongo/db/query/ce/ce_heuristic_test.cpp +++ b/src/mongo/db/query/ce/ce_heuristic_test.cpp @@ -68,56 +68,56 @@ protected: TEST(CEHeuristicTest, CEWithoutOptimizationGtLtNum) { std::string query = "{a0 : {$gt : 14, $lt : 21}}"; HeuristicCETester ht(collName, kNoOptPhaseSet); - ASSERT_MATCH_CE(ht, query, 1849.0); + ASSERT_MATCH_CE(ht, query, 1089.0); } TEST(CEHeuristicTest, CEWithoutOptimizationEqNum) { std::string query = "{a: 123}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 2.03205, 3.0); - ASSERT_MATCH_CE_CARD(ht, query, 3.34575, 7.0); - ASSERT_MATCH_CE_CARD(ht, query, 4.16228, 10.0); - ASSERT_MATCH_CE_CARD(ht, query, 20.0, 100.0); - ASSERT_MATCH_CE_CARD(ht, query, 1100.0, 10000.0); + ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0); + ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0); + ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0); + ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0); + ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationEqStr) { std::string query = "{a: 'foo'}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 2.03205, 3.0); - ASSERT_MATCH_CE_CARD(ht, query, 3.34575, 7.0); - ASSERT_MATCH_CE_CARD(ht, query, 4.16228, 10.0); - ASSERT_MATCH_CE_CARD(ht, query, 20.0, 100.0); - ASSERT_MATCH_CE_CARD(ht, query, 1100.0, 10000.0); + ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0); + ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0); + ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0); + ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0); + ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationGtNum) { std::string query = "{a: {$gt: 44}}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 7.2, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 54.45, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 430.0, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationGtStr) { std::string query = "{a: {$gt: 'foo'}}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 7.2, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 54.45, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 430.0, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationLtNum) { std::string query = "{a: {$lt: 44}}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 7.2, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 54.45, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 430.0, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationDNF1pathSimple) { @@ -127,9 +127,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationDNF1pathSimple) { "{$and: [{a0: {$gt:40}}, {a0: {$lt: 44}}]}" "]}"; HeuristicCETester ht(collName, kNoOptPhaseSet); - ASSERT_MATCH_CE_CARD(ht, query, 7.8336, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 50.8359, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 335.612, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 6.6591, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 36.0354, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 205.941, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj1) { @@ -140,9 +140,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj1) { "]}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 8.352, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 67.9264, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 535.393, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 7.623, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 55.5761, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 402.963, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj2) { @@ -153,9 +153,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj2) { "]}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 6.912, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 43.4239, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 290.293, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 5.733, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 31.0736, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 181.863, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj3) { @@ -170,9 +170,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj3) { "]}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 3.01561, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 9.37173, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 19.3064, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 1.52063, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 4.15975, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 9.11877, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj4) { @@ -187,9 +187,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj4) { "]}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 8.98677, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 94.9731, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 894.681, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 8.9298, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 89.9501, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 798.495, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationTraverseSelectivityDoesNotAccumulate) { @@ -219,9 +219,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnSameValue) { "]}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 5.6, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 27.8048, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 159.083, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 5.0, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 18.8997, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 62.2456, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnDifferentValues) { @@ -232,9 +232,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnDifferentValues) { "]}"; HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 3.9, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 19.8499, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 131.623, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 3.0, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 9.94987, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 31.6228, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationConjunctionWithIn) { @@ -247,9 +247,9 @@ TEST(CEHeuristicTest, CEWithoutOptimizationConjunctionWithIn) { // Estimation for $in is not implemented yet, so we assume it has the default filter selectivity // of 0.1. ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0); - ASSERT_MATCH_CE_CARD(ht, query, 4.41, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 27.7649, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 218.46, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 3.6, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 18.8549, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 128.46, 1000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationOneLowBoundWithoutTraverse) { @@ -546,11 +546,11 @@ TEST(CEHeuristicTest, CEWithoutOptimizationConjunctionOfBoundsWithDifferentPaths HeuristicCETester ht(collName, kNoOptPhaseSet); ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0); - ASSERT_CE_CARD(ht, rootNode, 1.92, 3.0); - ASSERT_CE_CARD(ht, rootNode, 4.48, 7.0); - ASSERT_CE_CARD(ht, rootNode, 6.4, 10.0); - ASSERT_CE_CARD(ht, rootNode, 18.49, 100.0); - ASSERT_CE_CARD(ht, rootNode, 1849.0, 10000.0); + ASSERT_CE_CARD(ht, rootNode, 1.47, 3.0); + ASSERT_CE_CARD(ht, rootNode, 3.43, 7.0); + ASSERT_CE_CARD(ht, rootNode, 4.9, 10.0); + ASSERT_CE_CARD(ht, rootNode, 10.89, 100.0); + ASSERT_CE_CARD(ht, rootNode, 1089.0, 10000.0); } TEST(CEHeuristicTest, CEWithoutOptimizationDisjunctionOnSamePathWithoutTraverse) { @@ -717,9 +717,9 @@ TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_OR2paths) { std::string query = "{$or: [{a0: {$gt:44}}, {b0: {$lt: 9}}]}"; HeuristicCETester ht(collName, kOnlySubPhaseSet); // Disjunctions on different paths are not SARGable. - ASSERT_MATCH_CE_CARD(ht, query, 8.64, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 78.9525, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 675.1, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 8.19, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 69.0525, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 551.1, 1000.0); } TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathSimple) { @@ -774,9 +774,9 @@ TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF2paths) { "]}"; HeuristicCETester ht(collName, kOnlySubPhaseSet); // Disjunctions on different paths are not SARGable. - ASSERT_MATCH_CE_CARD(ht, query, 7.8336, 9.0); - ASSERT_MATCH_CE_CARD(ht, query, 50.8359, 99.0); - ASSERT_MATCH_CE_CARD(ht, query, 335.612, 1000.0); + ASSERT_MATCH_CE_CARD(ht, query, 6.6591, 9.0); + ASSERT_MATCH_CE_CARD(ht, query, 36.0354, 99.0); + ASSERT_MATCH_CE_CARD(ht, query, 205.941, 1000.0); } TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF1path) { @@ -818,10 +818,9 @@ TEST(CEHeuristicTest, CENotEquality) { // the FilterNode estimate. HeuristicCETester noOpt(collName, kNoOptPhaseSet); - // Equality selectivity is sqrt(kCollCard)/kCollCard = 0.01. We then add 0.1 when we see a - // Traverse, obtaining a selectivity of 0.11. When we see a UnaryOp [Not] above this subtree, we - // invert the selectivity 1.0 - 0.11 = 0.89. - double ce = 1100.0; + // Equality selectivity is sqrt(kCollCard)/kCollCard = 0.01. When we see a UnaryOp [Not] above + // this subtree, we invert the selectivity 1.0 - 0.01 = 0.99. + double ce = 100.0; double inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce); ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE); @@ -833,8 +832,8 @@ TEST(CEHeuristicTest, CENotEquality) { opt.setCollCard(collCard); noOpt.setCollCard(collCard); - // Selectivity is sqrt(25)/25 + 0.1 for traverse. - ce = 7.5; + // Selectivity is sqrt(25)/25. + ce = 5.0; inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce); ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE); @@ -846,8 +845,8 @@ TEST(CEHeuristicTest, CENotEquality) { opt.setCollCard(collCard); noOpt.setCollCard(collCard); - // Selectivity is sqrt(3)/9 + 0.1 for traverse. - ce = 3.90; + // Selectivity is sqrt(3)/9. + ce = 3.0; inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce); ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE); @@ -862,8 +861,8 @@ TEST(CEHeuristicTest, CENotOpenRange) { HeuristicCETester opt(collName); HeuristicCETester noOpt(collName, kNoOptPhaseSet); - // Expect open-range selectivity for input card > 100 (0.33 + 0.10 for traverse). - double ce = 4300; + // Expect open-range selectivity for input card > 100 (0.33). + double ce = 3300; double inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce); @@ -882,8 +881,8 @@ TEST(CEHeuristicTest, CENotOpenRange) { opt.setCollCard(collCard); noOpt.setCollCard(collCard); - // Expect open-range selectivity for input card in range (20, 100) (0.45 + 0.10 for traverse). - ce = 13.75; + // Expect open-range selectivity for input card in range (20, 100) (0.45). + ce = 11.25; inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce); @@ -902,8 +901,8 @@ TEST(CEHeuristicTest, CENotOpenRange) { opt.setCollCard(collCard); noOpt.setCollCard(collCard); - // Expect open-range selectivity for input card < 20 (0.70 + 0.10 for traverse). - ce = 8.0; + // Expect open-range selectivity for input card < 20 (0.70). + ce = 7.0; inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce); @@ -922,7 +921,7 @@ TEST(CEHeuristicTest, CENotClosedRange) { // Repeat the above test for closed ranges; the $not cardinality estimate should add up with the // non-$not estimate to the collection cardinality. double collCard = kCollCard; - double ce = 1849.0; + double ce = 1089.0; double inverseCE = collCard - ce; HeuristicCETester opt(collName); HeuristicCETester noOpt(collName, kNoOptPhaseSet); @@ -956,8 +955,8 @@ TEST(CEHeuristicTest, CENotClosedRange) { * FilterNode (0.50) than in the first (0.33). */ collCard = 25; - ce = 11.0; - inverseCE = 17.4375; + ce = 7.875; + inverseCE = 19.9375; opt.setCollCard(collCard); noOpt.setCollCard(collCard); @@ -974,7 +973,7 @@ TEST(CEHeuristicTest, CENotClosedRange) { // Update cardinality to 10. collCard = 10.0; - ce = 6.4; + ce = 4.9; inverseCE = collCard - ce; opt.setCollCard(collCard); noOpt.setCollCard(collCard); diff --git a/src/mongo/db/query/ce/ce_histogram_test.cpp b/src/mongo/db/query/ce/ce_histogram_test.cpp index d2506c7bf99..6305ddc7be1 100644 --- a/src/mongo/db/query/ce/ce_histogram_test.cpp +++ b/src/mongo/db/query/ce/ce_histogram_test.cpp @@ -441,7 +441,7 @@ TEST(CEHistogramTest, TestHistogramOnNestedPaths) { // TODO SERVER-68596: this doesn't generate a SargableNode. When it does, it should return 0.0. // Currently, this estimate falls back to heuristic CE. - ASSERT_MATCH_CE(t, "{\"a.histogram.path\": {$elemMatch: {$eq: 42}}}", 6.21); + ASSERT_MATCH_CE(t, "{\"a.histogram.path\": {$elemMatch: {$eq: 42}}}", 0.707107); } TEST(CEHistogramTest, TestArrayHistogramOnAtomicPredicates) { diff --git a/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp b/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp index 057a3c4d32a..6c1cd1d92f3 100644 --- a/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp +++ b/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp @@ -42,13 +42,30 @@ using namespace mongo::ce; constexpr SelectivityType kInvalidSel = -1.0; constexpr SelectivityType kDefaultFilterSel = 0.1; -constexpr SelectivityType kDefaultTraverseSelectivity = 0.1; + +// The selectivities used in the piece-wise function for open-range intervals. +// Note that we assume a smaller input cardinality will result in a less selective range. +constexpr SelectivityType kSmallCardOpenRangeSel = 0.70; +constexpr SelectivityType kMediumCardOpenRangeSel = 0.45; +constexpr SelectivityType kLargeCardOpenRangeSel = 0.33; + +// The selectivities used in the piece-wise function for closed-range intervals. +// Note that we assume a smaller input cardinality will result in a less selective range. +constexpr SelectivityType kSmallCardClosedRangeSel = 0.50; +constexpr SelectivityType kMediumCardClosedRangeSel = 0.33; +constexpr SelectivityType kLargeCardClosedRangeSel = 0.20; // Global and Local selectivity should multiply to the Complete selectivity. constexpr SelectivityType kDefaultCompleteGroupSel = 0.01; constexpr SelectivityType kDefaultLocalGroupSel = 0.02; constexpr SelectivityType kDefaultGlobalGroupSel = 0.5; +// The following constants are the steps used in the piece-wise functions that select selectivies +// based on input cardinality. +constexpr CEType kSmallLimit = 20.0; +constexpr CEType kMediumLimit = 100.0; + +// Assumed average number of elements in an array. constexpr CEType kDefaultAverageArraySize = 10.0; /** @@ -73,12 +90,12 @@ SelectivityType equalitySel(const CEType inputCard) { */ SelectivityType closedRangeSel(const CEType inputCard) { SelectivityType sel = kInvalidSel; - if (inputCard < 20.0) { - sel = 0.50; - } else if (inputCard < 100.0) { - sel = 0.33; + if (inputCard < kSmallLimit) { + sel = kSmallCardClosedRangeSel; + } else if (inputCard < kMediumLimit) { + sel = kMediumCardClosedRangeSel; } else { - sel = 0.20; + sel = kLargeCardClosedRangeSel; } return sel; } @@ -90,12 +107,12 @@ SelectivityType closedRangeSel(const CEType inputCard) { */ SelectivityType openRangeSel(const CEType inputCard) { SelectivityType sel = kInvalidSel; - if (inputCard < 20.0) { - sel = 0.70; - } else if (inputCard < 100.0) { - sel = 0.45; + if (inputCard < kSmallLimit) { + sel = kSmallCardOpenRangeSel; + } else if (inputCard < kMediumLimit) { + sel = kMediumCardOpenRangeSel; } else { - sel = 0.33; + sel = kLargeCardOpenRangeSel; } return sel; } @@ -262,21 +279,12 @@ public: EvalFilterSelectivityResult transport(const PathTraverse& node, CEType /*inputCard*/, EvalFilterSelectivityResult childResult) { - // We only want to decrease selectivity when we see the first traverse in a path expression. - if (!_hasTraverse) { - childResult.selectivity = - std::min(childResult.selectivity + kDefaultTraverseSelectivity, 1.0); - } - - _hasTraverse = true; return childResult; } EvalFilterSelectivityResult transport(const PathCompare& node, CEType inputCard, EvalFilterSelectivityResult /*childResult*/) { - _hasTraverse = false; - // Note that the result will be ignored if this operation is part of an interval. const SelectivityType sel = operationSel(node.op(), inputCard); return {{}, &node, sel}; @@ -286,8 +294,6 @@ public: CEType inputCard, EvalFilterSelectivityResult leftChildResult, EvalFilterSelectivityResult rightChildResult) { - _hasTraverse = false; - const bool isInterval = leftChildResult.compare && rightChildResult.compare && leftChildResult.path == rightChildResult.path; @@ -302,8 +308,6 @@ public: CEType /*inputCard*/, EvalFilterSelectivityResult leftChildResult, EvalFilterSelectivityResult rightChildResult) { - _hasTraverse = false; - const SelectivityType sel = disjunctionSel(leftChildResult.selectivity, rightChildResult.selectivity); @@ -333,7 +337,6 @@ public: template <typename T, typename... Ts> EvalFilterSelectivityResult transport(const T& /*node*/, Ts&&...) { - _hasTraverse = false; return {{}, nullptr, kDefaultFilterSel}; } @@ -357,8 +360,6 @@ private: // once. return left + right - left * right; } - - bool _hasTraverse = false; }; class CEHeuristicTransport { |