diff options
author | Svilen Mihaylov <svilen.mihaylov@mongodb.com> | 2022-12-12 14:20:14 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-12-12 14:50:51 +0000 |
commit | 7e2c544eafc88db80a0d9d026acf1a2a58aa3bdb (patch) | |
tree | 4266e050bca881fc92f52f9f894a533e8ebca5eb /src/mongo/db/query/ce | |
parent | adb64c11d8b62d85223e7279c98ac89ac4fc06d5 (diff) | |
download | mongo-7e2c544eafc88db80a0d9d026acf1a2a58aa3bdb.tar.gz |
SERVER-71332 [CQF] Strong alias for Selectivity and Cardinality
Diffstat (limited to 'src/mongo/db/query/ce')
-rw-r--r-- | src/mongo/db/query/ce/generated_histograms_test.cpp | 46 | ||||
-rw-r--r-- | src/mongo/db/query/ce/heuristic_dataflow_nodes_test.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/query/ce/heuristic_estimator.cpp | 84 | ||||
-rw-r--r-- | src/mongo/db/query/ce/heuristic_estimator_test.cpp | 52 | ||||
-rw-r--r-- | src/mongo/db/query/ce/hinted_estimator.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/ce/histogram_array_data_test.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/query/ce/histogram_edge_cases_test.cpp | 199 | ||||
-rw-r--r-- | src/mongo/db/query/ce/histogram_estimator.cpp | 23 | ||||
-rw-r--r-- | src/mongo/db/query/ce/histogram_estimator_test.cpp | 46 | ||||
-rw-r--r-- | src/mongo/db/query/ce/histogram_interpolation_test.cpp | 42 | ||||
-rw-r--r-- | src/mongo/db/query/ce/histogram_predicate_estimation.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/query/ce/histogram_predicate_estimation.h | 6 | ||||
-rw-r--r-- | src/mongo/db/query/ce/maxdiff_histogram_test.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/query/ce/sampling_estimator.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/query/ce/test_utils.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/query/ce/test_utils.h | 25 |
16 files changed, 308 insertions, 304 deletions
diff --git a/src/mongo/db/query/ce/generated_histograms_test.cpp b/src/mongo/db/query/ce/generated_histograms_test.cpp index 3f5ce361584..3f50283e2e5 100644 --- a/src/mongo/db/query/ce/generated_histograms_test.cpp +++ b/src/mongo/db/query/ce/generated_histograms_test.cpp @@ -90,12 +90,12 @@ TEST(EstimatorTest, UniformIntStrEstimate) { // Predicates over bucket bound. // Actual cardinality {$eq: 804} = 2. - double expectedCard = estimateIntValCard(hist, 804, EstimationType::kEqual); - ASSERT_APPROX_EQUAL(2.5, expectedCard, kErrorBound); + CEType expectedCard{estimateIntValCard(hist, 804, EstimationType::kEqual)}; + ASSERT_CE_APPROX_EQUAL(2.5, expectedCard, kErrorBound); // Actual cardinality {$lt: 100} = 40. - expectedCard = estimateIntValCard(hist, 100, EstimationType::kLess); - ASSERT_APPROX_EQUAL(52.4, expectedCard, kErrorBound); + expectedCard = {estimateIntValCard(hist, 100, EstimationType::kLess)}; + ASSERT_CE_APPROX_EQUAL(52.4, expectedCard, kErrorBound); // Range query crossing the type brackets. // Actual cardinality {$gt: 100} = 475. @@ -107,7 +107,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) { tagLowStr, valLowStr, true /* includeScalar */); - ASSERT_APPROX_EQUAL(460.1, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(460.1, expectedCard, kErrorBound); // Actual cardinality {$lt: 'abc'} = 291. expectedCard = estimateCardRange(arrHist, @@ -118,7 +118,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) { tagAbc, valAbc, true /* includeScalar */); - ASSERT_APPROX_EQUAL(319.9, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(319.9, expectedCard, kErrorBound); // Actual cardinality {$gte: 'abc'} = 194. expectedCard = estimateCardRange(arrHist, @@ -129,12 +129,12 @@ TEST(EstimatorTest, UniformIntStrEstimate) { tagObj, valObj, true /* includeScalar */); - ASSERT_APPROX_EQUAL(167.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(167.0, expectedCard, kErrorBound); // Queries over the low string bound. // Actual cardinality {$eq: ''} = 0. expectedCard = estimateCardEq(arrHist, tagLowStr, valLowStr, true); - ASSERT_APPROX_EQUAL(2.727, expectedCard, 0.001); + ASSERT_CE_APPROX_EQUAL(2.727, expectedCard, 0.001); // Actual cardinality {$gt: ''} = 485. expectedCard = estimateCardRange(arrHist, @@ -145,7 +145,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) { tagObj, valObj, true /* includeScalar */); - ASSERT_APPROX_EQUAL(485, expectedCard, 0.001); + ASSERT_CE_APPROX_EQUAL(485, expectedCard, 0.001); } TEST(EstimatorTest, IntStrArrayEstimate) { @@ -239,7 +239,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) { value::ValueGuard vgLowStr(tagLowStr, valLowStr); // Actual cardinality {$lt: 100} = 115. - double expectedCard = estimateCardRange(arrHist, + CEType expectedCard = estimateCardRange(arrHist, false /* lowInclusive */, tagLowDbl, valLowDbl, @@ -247,7 +247,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) { value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(100), true /* includeScalar */); - ASSERT_APPROX_EQUAL(109.9, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(109.9, expectedCard, kErrorBound); // Actual cardinality {$gt: 502} = 434. expectedCard = estimateCardRange(arrHist, @@ -258,7 +258,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) { tagLowStr, valLowStr, true /* includeScalar */); - ASSERT_APPROX_EQUAL(443.8, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(443.8, expectedCard, kErrorBound); // Actual cardinality {$gte: 502} = 437. expectedCard = estimateCardRange(arrHist, @@ -269,17 +269,17 @@ TEST(EstimatorTest, IntStrArrayEstimate) { tagLowStr, valLowStr, true /* includeScalar */); - ASSERT_APPROX_EQUAL(448.3, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(448.3, expectedCard, kErrorBound); // Actual cardinality {$eq: ''} = 0. expectedCard = estimateCardEq(arrHist, tagLowStr, valLowStr, true /* includeScalar */); - ASSERT_APPROX_EQUAL(6.69, expectedCard, 0.001); + ASSERT_CE_APPROX_EQUAL(6.69, expectedCard, 0.001); // Actual cardinality {$eq: 'DD2'} = 2. auto [tagStr, valStr] = value::makeNewString("DD2"_sd); value::ValueGuard vg(tagStr, valStr); expectedCard = estimateCardEq(arrHist, tagStr, valStr, true /* includeScalar */); - ASSERT_APPROX_EQUAL(5.27, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(5.27, expectedCard, kErrorBound); // Actual cardinality {$lte: 'DD2'} = 120. expectedCard = estimateCardRange(arrHist, @@ -290,7 +290,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) { tagStr, valStr, true /* includeScalar */); - ASSERT_APPROX_EQUAL(160.6, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(160.6, expectedCard, kErrorBound); // Actual cardinality {$gt: 'DD2'} = 450. auto [tagObj, valObj] = value::makeNewObject(); @@ -303,7 +303,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) { tagObj, valObj, true /* includeScalar */); - ASSERT_APPROX_EQUAL(411.2, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(411.2, expectedCard, kErrorBound); // Queries with $elemMatch. const auto [tagInt, valInt] = @@ -311,7 +311,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) { // Actual cardinality {$match: {a: {$elemMatch: {$eq: 603}}}} = 12. expectedCard = estimateCardEq(arrHist, tagInt, valInt, false /* includeScalar */); - ASSERT_APPROX_EQUAL(12.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(12.0, expectedCard, kErrorBound); // Actual cardinality {$match: {a: {$elemMatch: {$lte: 603}}}} = 252. expectedCard = estimateCardRange(arrHist, @@ -322,7 +322,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) { tagInt, valInt, false /* includeScalar */); - ASSERT_APPROX_EQUAL(293.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(293.0, expectedCard, kErrorBound); // Actual cardinality {$match: {a: {$elemMatch: {$gte: 603}}}} = 200. expectedCard = estimateCardRange(arrHist, @@ -333,12 +333,12 @@ TEST(EstimatorTest, IntStrArrayEstimate) { tagLowStr, valLowStr, false /* includeScalar */); - ASSERT_APPROX_EQUAL(250.8, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(250.8, expectedCard, kErrorBound); // Actual cardinality {$match: {a: {$elemMatch: {$eq: 'cu'}}}} = 7. std::tie(tagStr, valStr) = value::makeNewString("cu"_sd); expectedCard = estimateCardEq(arrHist, tagStr, valStr, false /* includeScalar */); - ASSERT_APPROX_EQUAL(3.8, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(3.8, expectedCard, kErrorBound); // Actual cardinality {$match: {a: {$elemMatch: {$gte: 'cu'}}}} = 125. expectedCard = estimateCardRange(arrHist, @@ -349,7 +349,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) { tagObj, valObj, false /* includeScalar */); - ASSERT_APPROX_EQUAL(109.7, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(109.7, expectedCard, kErrorBound); // Actual cardinality {$match: {a: {$elemMatch: {$lte: 'cu'}}}} = 141. expectedCard = estimateCardRange(arrHist, @@ -360,7 +360,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) { tagStr, valStr, false /* includeScalar */); - ASSERT_APPROX_EQUAL(156.1, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(156.1, expectedCard, kErrorBound); } } // namespace } // namespace mongo::optimizer::ce diff --git a/src/mongo/db/query/ce/heuristic_dataflow_nodes_test.cpp b/src/mongo/db/query/ce/heuristic_dataflow_nodes_test.cpp index 7efe1a974ba..36767c8d7f9 100644 --- a/src/mongo/db/query/ce/heuristic_dataflow_nodes_test.cpp +++ b/src/mongo/db/query/ce/heuristic_dataflow_nodes_test.cpp @@ -36,13 +36,13 @@ namespace mongo::optimizer::ce { namespace { -constexpr double kCollCard = 1000.0; +constexpr CEType kCollCard{1000.0}; const std::string kCollName = "test"; -constexpr double kOtherCollCard = 200.0; +constexpr CEType kOtherCollCard{200.0}; const std::string kOtherCollName = "otherTest"; -constexpr double kThirdCollCard = 50.0; +constexpr CEType kThirdCollCard{50.0}; const std::string kThirdCollName = "thirdTest"; class DataflowCETester : public CETester { @@ -114,7 +114,7 @@ TEST(CEDataflowTest, EstimateUnionNode) { // The following plans include a UnionNode. { DataflowCETester t; - t.setCollCard(2000); + t.setCollCard({2000.0}); t.setIndexes( {{"indexA", makeIndexDefinition("a", CollationOp::Ascending, /* isMultiKey */ true)}}); t.setDisableScan(true); @@ -146,12 +146,12 @@ TEST(CEDataflowTest, EstimateLimitSkipNode) { // Verify that 'LimitSkipNode' estimate with only a skip set is max(inputCE - skip, 0). ASSERT_CE(t, "[{$skip: 0}]", kCollCard); - ASSERT_CE(t, "[{$skip: 1}]", kCollCard - 1.0); - ASSERT_CE(t, "[{$skip: 50}]", kCollCard - 50.0); + ASSERT_CE(t, "[{$skip: 1}]", kCollCard - CEType{1.0}); + ASSERT_CE(t, "[{$skip: 50}]", kCollCard - CEType{50.0}); ASSERT_CE(t, "[{$skip: 1000}]", 0.0); ASSERT_CE(t, "[{$skip: 10000}]", 0.0); - ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 1}]", matchCard - 1.0); - ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 5}]", matchCard - 5.0); + ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 1}]", matchCard - CEType{1.0}); + ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 5}]", matchCard - CEType{5.0}); ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 50}]", 0.0); ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 1000}]", 0.0); @@ -164,8 +164,8 @@ TEST(CEDataflowTest, EstimateLimitSkipNode) { ASSERT_CE(t, "[{$skip: 1}, {$limit: 50}]", 50.0); ASSERT_CE(t, "[{$limit: 50}, {$skip: 50}]", 0.0); ASSERT_CE(t, "[{$skip: 50}, {$limit: 50}]", 50.0); - ASSERT_CE(t, "[{$limit: 1000}, {$skip: 50}]", kCollCard - 50.0); - ASSERT_CE(t, "[{$skip: 50}, {$limit: 1000}]", kCollCard - 50.0); + ASSERT_CE(t, "[{$limit: 1000}, {$skip: 50}]", kCollCard - CEType{50.0}); + ASSERT_CE(t, "[{$skip: 50}, {$limit: 1000}]", kCollCard - CEType{50.0}); ASSERT_CE(t, "[{$limit: 50}, {$skip: 1000}]", 0.0); ASSERT_CE(t, "[{$skip: 1000}, {$limit: 50}]", 0.0); ASSERT_CE(t, "[{$limit: 1000}, {$skip: 1000}]", 0.0); diff --git a/src/mongo/db/query/ce/heuristic_estimator.cpp b/src/mongo/db/query/ce/heuristic_estimator.cpp index 88421015f39..641fc2ef98f 100644 --- a/src/mongo/db/query/ce/heuristic_estimator.cpp +++ b/src/mongo/db/query/ce/heuristic_estimator.cpp @@ -35,35 +35,35 @@ namespace mongo::optimizer::ce { // Invalid estimate - an arbitrary negative value used for initialization. -constexpr SelectivityType kInvalidSel = -1.0; +constexpr SelectivityType kInvalidSel{-1.0}; -constexpr SelectivityType kDefaultFilterSel = 0.1; -constexpr SelectivityType kDefaultExistsSel = 0.70; +constexpr SelectivityType kDefaultFilterSel{0.1}; +constexpr SelectivityType kDefaultExistsSel{0.70}; // The selectivities used in the piece-wise function for open-range intervals. // Note that we assume a smaller input cardinality will result in a less selective range. -constexpr SelectivityType kSmallCardOpenRangeSel = 0.70; -constexpr SelectivityType kMediumCardOpenRangeSel = 0.45; -constexpr SelectivityType kLargeCardOpenRangeSel = 0.33; +constexpr SelectivityType kSmallCardOpenRangeSel{0.70}; +constexpr SelectivityType kMediumCardOpenRangeSel{0.45}; +constexpr SelectivityType kLargeCardOpenRangeSel{0.33}; // The selectivities used in the piece-wise function for closed-range intervals. // Note that we assume a smaller input cardinality will result in a less selective range. -constexpr SelectivityType kSmallCardClosedRangeSel = 0.50; -constexpr SelectivityType kMediumCardClosedRangeSel = 0.33; -constexpr SelectivityType kLargeCardClosedRangeSel = 0.20; +constexpr SelectivityType kSmallCardClosedRangeSel{0.50}; +constexpr SelectivityType kMediumCardClosedRangeSel{0.33}; +constexpr SelectivityType kLargeCardClosedRangeSel{0.20}; // Global and Local selectivity should multiply to the Complete selectivity. -constexpr SelectivityType kDefaultCompleteGroupSel = 0.01; -constexpr SelectivityType kDefaultLocalGroupSel = 0.02; -constexpr SelectivityType kDefaultGlobalGroupSel = 0.5; +constexpr SelectivityType kDefaultCompleteGroupSel{0.01}; +constexpr SelectivityType kDefaultLocalGroupSel{0.02}; +constexpr SelectivityType kDefaultGlobalGroupSel{0.5}; // The following constants are the steps used in the piece-wise functions that select selectivies // based on input cardinality. -constexpr CEType kSmallLimit = 20.0; -constexpr CEType kMediumLimit = 100.0; +constexpr CEType kSmallLimit{20.0}; +constexpr CEType kMediumLimit{100.0}; -// Assumed average number of elements in an array. -constexpr CEType kDefaultAverageArraySize = 10.0; +// Assumed average number of elements in an array. This is a unitless constant. +constexpr double kDefaultAverageArraySize{10.0}; /** * Default selectivity of equalities. To avoid super small selectivities for small @@ -75,9 +75,9 @@ SelectivityType equalitySel(const CEType inputCard) { uassert(6716604, "Zero cardinality must be handled by the caller.", inputCard > 0.0); if (inputCard <= 1.0) { // If the input has < 1 values, it cannot be reduced any further by a condition. - return 1.0; + return {1.0}; } - return std::sqrt(inputCard) / inputCard; + return {1.0 / std::sqrt(inputCard._value)}; } /** @@ -129,7 +129,7 @@ mongo::sbe::value::TypeTags boundType(const BoundRequirement& bound) { SelectivityType intervalSel(const IntervalRequirement& interval, const CEType inputCard) { SelectivityType sel = kInvalidSel; if (interval.isFullyOpen()) { - sel = 1.0; + sel = {1.0}; } else if (interval.isEquality()) { sel = equalitySel(inputCard); } else if (interval.getHighBound().isPlusInf() || interval.getLowBound().isMinusInf() || @@ -148,16 +148,12 @@ SelectivityType intervalSel(const IntervalRequirement& interval, const CEType in return sel; } -SelectivityType negationSel(SelectivityType sel) { - return 1.0 - sel; -} - SelectivityType operationSel(const Operations op, const CEType inputCard) { switch (op) { case Operations::Eq: return equalitySel(inputCard); case Operations::Neq: - return negationSel(equalitySel(inputCard)); + return negateSel(equalitySel(inputCard)); case Operations::EqMember: // Reached when the query has $in. We don't handle it yet. return kDefaultFilterSel; @@ -196,7 +192,7 @@ SelectivityType intervalSel(const PathCompare& left, const auto rightConst = right.getVal().cast<Constant>(); if (leftConst && rightConst && !(*leftConst == *rightConst)) { // Equality comparison on different constants is a contradiction. - return 0.0; + return {0.0}; } // We can't tell if the equalities result in a contradiction or not, so we use the // default equality selectivity. @@ -322,7 +318,7 @@ public: EvalFilterSelectivityResult childResult) { switch (node.op()) { case Operations::Not: - childResult.selectivity = negationSel(childResult.selectivity); + childResult.selectivity = negateSel(childResult.selectivity); return childResult; case Operations::Neg: // If we see negation (-) in a UnaryOp, we ignore it for CE purposes. @@ -362,10 +358,6 @@ public: } private: - SelectivityType negationSel(const SelectivityType in) { - return 1.0 - in; - } - SelectivityType conjunctionSel(const SelectivityType left, const SelectivityType right) { return left * right; } @@ -386,7 +378,7 @@ public: } CEType transport(const ValueScanNode& node, CEType /*bindResult*/) { - return node.getArraySize(); + return {static_cast<double>(node.getArraySize())}; } CEType transport(const MemoLogicalDelegatorNode& node) { @@ -398,7 +390,7 @@ public: CEType transport(const FilterNode& node, CEType childResult, CEType /*exprResult*/) { if (childResult == 0.0) { // Early out and return 0 since we don't expect to get more results. - return 0.0; + return {0.0}; } if (node.getFilter() == Constant::boolean(true)) { // Trivially true filter. @@ -406,7 +398,7 @@ public: } if (node.getFilter() == Constant::boolean(false)) { // Trivially false filter. - return 0.0; + return {0.0}; } const SelectivityType sel = @@ -426,10 +418,10 @@ public: CEType /*refsResult*/) { // Early out and return 0 since we don't expect to get more results. if (childResult == 0.0) { - return 0.0; + return {0.0}; } - SelectivityType topLevelSel = 1.0; + SelectivityType topLevelSel{1.0}; std::vector<SelectivityType> topLevelSelectivities; for (const auto& [key, req] : node.getReqMap()) { if (req.getIsPerfOnly()) { @@ -437,14 +429,14 @@ public: continue; } - SelectivityType disjSel = 1.0; + SelectivityType disjSel{1.0}; std::vector<SelectivityType> disjSelectivities; // Intervals are in DNF. const auto intervalDNF = req.getIntervals(); const auto disjuncts = intervalDNF.cast<IntervalReqExpr::Disjunction>()->nodes(); for (const auto& disjunct : disjuncts) { const auto& conjuncts = disjunct.cast<IntervalReqExpr::Conjunction>()->nodes(); - SelectivityType conjSel = 1.0; + SelectivityType conjSel{1.0}; std::vector<SelectivityType> conjSelectivities; for (const auto& conjunct : conjuncts) { const auto& interval = conjunct.cast<IntervalReqExpr::Atom>()->getExpr(); @@ -459,7 +451,7 @@ public: } if (topLevelSelectivities.empty()) { - return 1.0; + return {1.0}; } // The elements of the PartialSchemaRequirements map represent an implicit conjunction. topLevelSel = conjExponentialBackoff(std::move(topLevelSelectivities)); @@ -492,11 +484,11 @@ public: SelectivityType selectivity = kDefaultFilterSel; if (filter == Constant::boolean(false)) { - selectivity = 0.0; + selectivity = {0.0}; } else if (filter == Constant::boolean(true)) { - selectivity = 1.0; + selectivity = {1.0}; } - return leftChildResult * rightChildResult * selectivity; + return computeJoinCE(leftChildResult, rightChildResult, selectivity); } CEType transport(const UnionNode& node, @@ -504,7 +496,7 @@ public: CEType /*bindResult*/, CEType /*refsResult*/) { // Combine the CE of each child. - CEType result = 0; + CEType result{0.0}; for (auto&& child : childResults) { result += child; } @@ -548,11 +540,11 @@ public: CEType transport(const LimitSkipNode& node, CEType childResult) { const auto limit = node.getProperty().getLimit(); const auto skip = node.getProperty().getSkip(); - const auto cardAfterSkip = std::max(childResult - skip, 0.0); + const auto cardAfterSkip = (childResult > skip) ? (childResult._value - skip) : 0.0; if (limit < cardAfterSkip) { - return limit; + return {static_cast<double>(limit)}; } - return cardAfterSkip; + return {cardAfterSkip}; } CEType transport(const ExchangeNode& node, CEType childResult, CEType /*refsResult*/) { @@ -571,7 +563,7 @@ public: template <typename T, typename... Ts> CEType transport(const T& /*node*/, Ts&&...) { static_assert(!canBeLogicalNode<T>(), "Logical node must implement its CE derivation."); - return 0.0; + return {0.0}; } static CEType derive(const Metadata& metadata, diff --git a/src/mongo/db/query/ce/heuristic_estimator_test.cpp b/src/mongo/db/query/ce/heuristic_estimator_test.cpp index f92f63edde9..fdaf20c44b2 100644 --- a/src/mongo/db/query/ce/heuristic_estimator_test.cpp +++ b/src/mongo/db/query/ce/heuristic_estimator_test.cpp @@ -44,7 +44,7 @@ namespace mongo::optimizer::ce { namespace { -constexpr double kCollCard = 10000.0; +constexpr CEType kCollCard{10000.0}; const std::string collName = "test"; class HeuristicCETester : public CETester { @@ -202,7 +202,7 @@ TEST(CEHeuristicTest, CEWithoutOptimizationTraverseSelectivityDoesNotAccumulate) HeuristicCETester ht(collName, kNoOptPhaseSet); auto ce1 = ht.getMatchCE(query); auto ce2 = ht.getMatchCE(queryWithLongPaths); - ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError); + ASSERT_CE_APPROX_EQUAL(ce1, ce2, kMaxCEError); } TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnSameValue) { @@ -619,7 +619,7 @@ TEST(CEHeuristicTest, CEWithoutOptimizationEquivalentConjunctions) { ht.setCollCard(kCollCard); auto ce1 = ht.getCE(rootNode1); auto ce2 = ht.getCE(rootNode2); - ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError); + ASSERT_CE_APPROX_EQUAL(ce1, ce2, kMaxCEError); } TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Eq) { @@ -731,7 +731,7 @@ TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathComplex) { "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]}" "]}"; auto ce2 = ht.getMatchCE(query2); - ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError); + ASSERT_CE_APPROX_EQUAL(ce1, ce2, kMaxCEError); } TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF2paths) { @@ -777,7 +777,7 @@ TEST(CEHeuristicTest, CEAfterMemoSubstitutionExplorationPhases) { } TEST(CEHeuristicTest, CENotEquality) { - double collCard = kCollCard; + CEType collCard = kCollCard; HeuristicCETester opt(collName); // We avoid optimizing in order to verify heuristic estimate of FilterNode subtree. Note that we @@ -788,20 +788,20 @@ TEST(CEHeuristicTest, CENotEquality) { // Equality selectivity is sqrt(kCollCard)/kCollCard = 0.01. When we see a UnaryOp [Not] above // this subtree, we invert the selectivity 1.0 - 0.01 = 0.99. - double ce = 100.0; - double inverseCE = collCard - ce; + CEType ce{100.0}; + CEType inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce); ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE); ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$eq: 1}}", ce); ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE); // Update cardinality to 25. - collCard = 25; + collCard = {25}; opt.setCollCard(collCard); noOpt.setCollCard(collCard); // Selectivity is sqrt(25)/25. - ce = 5.0; + ce = {5.0}; inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce); ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE); @@ -809,12 +809,12 @@ TEST(CEHeuristicTest, CENotEquality) { ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE); // Update cardinality to 9. - collCard = 9; + collCard = {9}; opt.setCollCard(collCard); noOpt.setCollCard(collCard); // Selectivity is sqrt(3)/9. - ce = 3.0; + ce = {3.0}; inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce); ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE); @@ -825,13 +825,13 @@ TEST(CEHeuristicTest, CENotEquality) { TEST(CEHeuristicTest, CENotOpenRange) { // Repeat the above test for open ranges; the $not cardinality estimate should add up with the // non-$not estimate to the collection cardinality. - double collCard = kCollCard; + CEType collCard = kCollCard; HeuristicCETester opt(collName); HeuristicCETester noOpt(collName, kNoOptPhaseSet); // Expect open-range selectivity for input card > 100 (0.33). - double ce = 3300; - double inverseCE = collCard - ce; + CEType ce = {3300}; + CEType inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce); ASSERT_MATCH_CE(opt, "{a: {$not: {$lt: 1}}}", inverseCE); @@ -845,12 +845,12 @@ TEST(CEHeuristicTest, CENotOpenRange) { ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE); // Update cardinality to 25. - collCard = 25; + collCard = {25}; opt.setCollCard(collCard); noOpt.setCollCard(collCard); // Expect open-range selectivity for input card in range (20, 100) (0.45). - ce = 11.25; + ce = {11.25}; inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce); @@ -865,12 +865,12 @@ TEST(CEHeuristicTest, CENotOpenRange) { ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE); // Update cardinality to 10. - collCard = 10.0; + collCard = {10.0}; opt.setCollCard(collCard); noOpt.setCollCard(collCard); // Expect open-range selectivity for input card < 20 (0.70). - ce = 7.0; + ce = {7.0}; inverseCE = collCard - ce; ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce); @@ -888,9 +888,9 @@ TEST(CEHeuristicTest, CENotOpenRange) { TEST(CEHeuristicTest, CENotClosedRange) { // Repeat the above test for closed ranges; the $not cardinality estimate should add up with the // non-$not estimate to the collection cardinality. - double collCard = kCollCard; - double ce = 1089.0; - double inverseCE = collCard - ce; + CEType collCard = kCollCard; + CEType ce = {1089.0}; + CEType inverseCE = collCard - ce; HeuristicCETester opt(collName); HeuristicCETester noOpt(collName, kNoOptPhaseSet); @@ -922,9 +922,9 @@ TEST(CEHeuristicTest, CENotClosedRange) { * < 100 to inputCard < 20, we choose different selectivities for the intervals in the second * FilterNode (0.50) than in the first (0.33). */ - collCard = 25; - ce = 7.875; - inverseCE = 19.9375; + collCard = {25}; + ce = {7.875}; + inverseCE = {19.9375}; opt.setCollCard(collCard); noOpt.setCollCard(collCard); @@ -940,8 +940,8 @@ TEST(CEHeuristicTest, CENotClosedRange) { ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 10, $lt: 20}}}", inverseCE); // Update cardinality to 10. - collCard = 10.0; - ce = 4.9; + collCard = {10.0}; + ce = {4.9}; inverseCE = collCard - ce; opt.setCollCard(collCard); noOpt.setCollCard(collCard); diff --git a/src/mongo/db/query/ce/hinted_estimator.cpp b/src/mongo/db/query/ce/hinted_estimator.cpp index b27381268b8..41b63a8f914 100644 --- a/src/mongo/db/query/ce/hinted_estimator.cpp +++ b/src/mongo/db/query/ce/hinted_estimator.cpp @@ -58,7 +58,7 @@ public: if (canBeLogicalNode<T>()) { return _heuristicCE.deriveCE(_metadata, _memo, _logicalProps, n.ref()); } - return 0.0; + return {0.0}; } static CEType derive(const Metadata& metadata, diff --git a/src/mongo/db/query/ce/histogram_array_data_test.cpp b/src/mongo/db/query/ce/histogram_array_data_test.cpp index 7f8bb92fc51..569ca7c6a80 100644 --- a/src/mongo/db/query/ce/histogram_array_data_test.cpp +++ b/src/mongo/db/query/ce/histogram_array_data_test.cpp @@ -179,7 +179,7 @@ TEST(EstimatorArrayDataTest, Histogram1000ArraysSmall10Buckets) { for (const auto q : querySet) { // $match query, includeScalar = true. - double estCard = estimateCardRange(arrHist, + CEType estCard = estimateCardRange(arrHist, false /* lowInclusive */, value::TypeTags::NumberInt32, sbe::value::bitcastFrom<int32_t>(q.low), @@ -187,7 +187,7 @@ TEST(EstimatorArrayDataTest, Histogram1000ArraysSmall10Buckets) { value::TypeTags::NumberInt32, sbe::value::bitcastFrom<int32_t>(q.high), true /* includeScalar */); - ASSERT_APPROX_EQUAL(estCard, q.estMatch, 0.1); + ASSERT_CE_APPROX_EQUAL(estCard, q.estMatch, 0.1); // $elemMatch query, includeScalar = false. estCard = estimateCardRange(arrHist, @@ -198,7 +198,7 @@ TEST(EstimatorArrayDataTest, Histogram1000ArraysSmall10Buckets) { value::TypeTags::NumberInt32, sbe::value::bitcastFrom<int32_t>(q.high), false /* includeScalar */); - ASSERT_APPROX_EQUAL(estCard, q.estElemMatch, 0.1); + ASSERT_CE_APPROX_EQUAL(estCard, q.estElemMatch, 0.1); } std::cout << computeRMSE(querySet, false /* isElemMatch */) << std::endl; std::cout << computeRMSE(querySet, true /* isElemMatch */) << std::endl; @@ -270,7 +270,7 @@ TEST(EstimatorArrayDataTest, Histogram1000ArraysLarge10Buckets) { for (const auto q : querySet) { // $match query, includeScalar = true. - double estCard = estimateCardRange(arrHist, + CEType estCard = estimateCardRange(arrHist, false /* lowInclusive */, value::TypeTags::NumberInt32, sbe::value::bitcastFrom<int32_t>(q.low), @@ -278,7 +278,7 @@ TEST(EstimatorArrayDataTest, Histogram1000ArraysLarge10Buckets) { value::TypeTags::NumberInt32, sbe::value::bitcastFrom<int32_t>(q.high), true /* includeScalar */); - ASSERT_APPROX_EQUAL(estCard, q.estMatch, 0.1); + ASSERT_CE_APPROX_EQUAL(estCard, q.estMatch, 0.1); // $elemMatch query, includeScalar = false. estCard = estimateCardRange(arrHist, @@ -289,7 +289,7 @@ TEST(EstimatorArrayDataTest, Histogram1000ArraysLarge10Buckets) { value::TypeTags::NumberInt32, sbe::value::bitcastFrom<int32_t>(q.high), false /* includeScalar */); - ASSERT_APPROX_EQUAL(estCard, q.estElemMatch, 0.1); + ASSERT_CE_APPROX_EQUAL(estCard, q.estElemMatch, 0.1); } std::cout << computeRMSE(querySet, false /* isElemMatch */) << std::endl; std::cout << computeRMSE(querySet, true /* isElemMatch */) << std::endl; diff --git a/src/mongo/db/query/ce/histogram_edge_cases_test.cpp b/src/mongo/db/query/ce/histogram_edge_cases_test.cpp index 6d8b4427783..6c90549a73e 100644 --- a/src/mongo/db/query/ce/histogram_edge_cases_test.cpp +++ b/src/mongo/db/query/ce/histogram_edge_cases_test.cpp @@ -411,57 +411,60 @@ TEST(EstimatorTest, TwoBucketsTimestampHistogram) { ASSERT_EQ(100.0, getTotals(hist).card); const auto valueBefore = value::bitcastFrom<int64_t>(startTs.asULL() - 1); - double expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kEqual).card; - ASSERT_EQ(0.0, expectedCard); - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kLess).card; - ASSERT_EQ(0.0, expectedCard); - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kGreater).card; - ASSERT_EQ(100.0, expectedCard); + CEType expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kEqual).card}; + ASSERT_EQ(0.0, expectedCard._value); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kLess).card}; + ASSERT_EQ(0.0, expectedCard._value); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kGreater).card}; + ASSERT_EQ(100.0, expectedCard._value); const auto valueStart = value::bitcastFrom<int64_t>( startTs.asULL()); // NB: startTs.asInt64() produces different value. - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kEqual).card; - ASSERT_EQ(3.0, expectedCard); - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kLess).card; - ASSERT_EQ(0.0, expectedCard); - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kGreater).card; - ASSERT_EQ(97.0, expectedCard); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kEqual).card}; + ASSERT_EQ(3.0, expectedCard._value); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kLess).card}; + ASSERT_EQ(0.0, expectedCard._value); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kGreater).card}; + ASSERT_EQ(97.0, expectedCard._value); const auto valueEnd = value::bitcastFrom<int64_t>(endTs.asULL()); - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kEqual).card; - ASSERT_EQ(1.0, expectedCard); - expectedCard = estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kLess).card; - ASSERT_EQ(99.0, expectedCard); - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kGreater).card; - ASSERT_EQ(0.0, expectedCard); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kEqual).card}; + ASSERT_EQ(1.0, expectedCard._value); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kLess).card}; + ASSERT_EQ(99.0, expectedCard._value); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kGreater).card}; + ASSERT_EQ(0.0, expectedCard._value); const auto valueIn = value::bitcastFrom<int64_t>((startTs.asULL() + endTs.asULL()) / 2); - expectedCard = estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kEqual).card; - ASSERT_EQ(2.0, expectedCard); - expectedCard = estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kLess).card; - ASSERT_APPROX_EQUAL(49.0, expectedCard, kErrorBound); - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kGreater).card; - ASSERT_APPROX_EQUAL(49.0, expectedCard, kErrorBound); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kEqual).card}; + ASSERT_EQ(2.0, expectedCard._value); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kLess).card}; + ASSERT_CE_APPROX_EQUAL(49.0, expectedCard, kErrorBound); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kGreater).card}; + ASSERT_CE_APPROX_EQUAL(49.0, expectedCard, kErrorBound); const auto valueAfter = value::bitcastFrom<int64_t>(endTs.asULL() + 100); - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kEqual).card; - ASSERT_EQ(0.0, expectedCard); - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kLess).card; - ASSERT_EQ(100.0, expectedCard); - expectedCard = - estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kGreater).card; - ASSERT_EQ(0.0, expectedCard); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kEqual).card}; + ASSERT_EQ(0.0, expectedCard._value); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kLess).card}; + ASSERT_EQ(100.0, expectedCard._value); + expectedCard = { + estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kGreater).card}; + ASSERT_EQ(0.0, expectedCard._value); } TEST(EstimatorTest, TwoBucketsObjectIdHistogram) { @@ -540,7 +543,7 @@ TEST(EstimatorTest, TwoExclusiveBucketsMixedHistogram) { value::bitcastFrom<double>(std::numeric_limits<double>::quiet_NaN())); // (NaN, 1). - double expectedCard = estimateCardRange(arrHist, + CEType expectedCard = estimateCardRange(arrHist, false /* lowInclusive */, tagLowDbl, valLowDbl, @@ -548,7 +551,7 @@ TEST(EstimatorTest, TwoExclusiveBucketsMixedHistogram) { value::TypeTags::NumberInt32, value::bitcastFrom<int64_t>(1), true /* includeScalar */); - ASSERT_APPROX_EQUAL(0.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(0.0, expectedCard, kErrorBound); // (NaN, 5). expectedCard = estimateCardRange(arrHist, @@ -559,7 +562,7 @@ TEST(EstimatorTest, TwoExclusiveBucketsMixedHistogram) { value::TypeTags::NumberInt32, value::bitcastFrom<int64_t>(5), true /* includeScalar */); - ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(3.0, expectedCard, kErrorBound); const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd); value::ValueGuard vgLowStr(tagLowStr, valLowStr); @@ -575,7 +578,7 @@ TEST(EstimatorTest, TwoExclusiveBucketsMixedHistogram) { tagLowStr, valLowStr, true /* includeScalar */); - ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(3.0, expectedCard, kErrorBound); // ["", "a"]. expectedCard = estimateCardRange(arrHist, @@ -587,7 +590,7 @@ TEST(EstimatorTest, TwoExclusiveBucketsMixedHistogram) { value, true /* includeScalar */); - ASSERT_APPROX_EQUAL(0.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(0.0, expectedCard, kErrorBound); std::tie(tag, value) = value::makeNewString("xyz"_sd); // ["", "xyz"]. @@ -600,7 +603,7 @@ TEST(EstimatorTest, TwoExclusiveBucketsMixedHistogram) { value, true /* includeScalar */); - ASSERT_APPROX_EQUAL(5.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(5.0, expectedCard, kErrorBound); } TEST(EstimatorTest, TwoBucketsMixedHistogram) { @@ -620,12 +623,12 @@ TEST(EstimatorTest, TwoBucketsMixedHistogram) { auto [tag, value] = value::makeNewString("pqr"_sd); value::ValueGuard vg(tag, value); - double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card; - ASSERT_EQ(5.0, expectedCard); - expectedCard = estimate(hist, tag, value, EstimationType::kLess).card; - ASSERT_EQ(95.0, expectedCard); - expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card; - ASSERT_EQ(0.0, expectedCard); + CEType expectedCard{estimate(hist, tag, value, EstimationType::kEqual).card}; + ASSERT_EQ(5.0, expectedCard._value); + expectedCard = {estimate(hist, tag, value, EstimationType::kLess).card}; + ASSERT_EQ(95.0, expectedCard._value); + expectedCard = {estimate(hist, tag, value, EstimationType::kGreater).card}; + ASSERT_EQ(0.0, expectedCard._value); // Estimates for a value smaller than the first bucket bound. ASSERT_APPROX_EQUAL(1.88, estimateIntValCard(hist, 50, EstimationType::kEqual), kErrorBound); @@ -640,16 +643,16 @@ TEST(EstimatorTest, TwoBucketsMixedHistogram) { ASSERT_EQ(0.0, estimateIntValCard(hist, 105, EstimationType::kEqual)); std::tie(tag, value) = value::makeNewString("a"_sd); - expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card; - ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound); - expectedCard = estimate(hist, tag, value, EstimationType::kLess).card; - ASSERT_APPROX_EQUAL(54.5, expectedCard, kErrorBound); - expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card; - ASSERT_APPROX_EQUAL(57.5, expectedCard, kErrorBound); - expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card; - ASSERT_APPROX_EQUAL(42.5, expectedCard, kErrorBound); - expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card; - ASSERT_APPROX_EQUAL(45.5, expectedCard, kErrorBound); + expectedCard = {estimate(hist, tag, value, EstimationType::kEqual).card}; + ASSERT_CE_APPROX_EQUAL(3.0, expectedCard, kErrorBound); + expectedCard = {estimate(hist, tag, value, EstimationType::kLess).card}; + ASSERT_CE_APPROX_EQUAL(54.5, expectedCard, kErrorBound); + expectedCard = {estimate(hist, tag, value, EstimationType::kLessOrEqual).card}; + ASSERT_CE_APPROX_EQUAL(57.5, expectedCard, kErrorBound); + expectedCard = {estimate(hist, tag, value, EstimationType::kGreater).card}; + ASSERT_CE_APPROX_EQUAL(42.5, expectedCard, kErrorBound); + expectedCard = {estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card}; + ASSERT_CE_APPROX_EQUAL(45.5, expectedCard, kErrorBound); // Range estimates, including min/max values per data type. const auto [tagLowDbl, valLowDbl] = @@ -667,7 +670,7 @@ TEST(EstimatorTest, TwoBucketsMixedHistogram) { value::TypeTags::NumberInt32, value::bitcastFrom<int64_t>(25), true /* includeScalar */); - ASSERT_APPROX_EQUAL(8.49, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(8.49, expectedCard, kErrorBound); // [25, 1000000]. expectedCard = estimateCardRange(arrHist, @@ -678,7 +681,7 @@ TEST(EstimatorTest, TwoBucketsMixedHistogram) { tagHighInt, valHighInt, true /* includeScalar */); - ASSERT_APPROX_EQUAL(13.38, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(13.38, expectedCard, kErrorBound); // [NaN, 1000000]. expectedCard = estimateCardRange(arrHist, @@ -689,7 +692,7 @@ TEST(EstimatorTest, TwoBucketsMixedHistogram) { tagHighInt, valHighInt, true /* includeScalar */); - ASSERT_APPROX_EQUAL(20.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(20.0, expectedCard, kErrorBound); const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd); value::ValueGuard vgLowStr(tagLowStr, valLowStr); @@ -703,7 +706,7 @@ TEST(EstimatorTest, TwoBucketsMixedHistogram) { tagLowStr, valLowStr, true /* includeScalar */); - ASSERT_APPROX_EQUAL(20.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(20.0, expectedCard, kErrorBound); // [25, ""). expectedCard = estimateCardRange(arrHist, @@ -714,7 +717,7 @@ TEST(EstimatorTest, TwoBucketsMixedHistogram) { tagLowStr, valLowStr, true /* includeScalar */); - ASSERT_APPROX_EQUAL(13.39, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(13.39, expectedCard, kErrorBound); // ["", "a"]. expectedCard = estimateCardRange(arrHist, @@ -726,7 +729,7 @@ TEST(EstimatorTest, TwoBucketsMixedHistogram) { value, true /* includeScalar */); - ASSERT_APPROX_EQUAL(37.49, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(37.49, expectedCard, kErrorBound); // ["", {}). auto [tagObj, valObj] = value::makeNewObject(); @@ -739,7 +742,7 @@ TEST(EstimatorTest, TwoBucketsMixedHistogram) { tagObj, valObj, true /* includeScalar */); - ASSERT_APPROX_EQUAL(80.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(80.0, expectedCard, kErrorBound); // ["a", {}). expectedCard = estimateCardRange(arrHist, @@ -751,7 +754,7 @@ TEST(EstimatorTest, TwoBucketsMixedHistogram) { valObj, true /* includeScalar */); - ASSERT_APPROX_EQUAL(45.5, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(45.5, expectedCard, kErrorBound); } /** @@ -866,20 +869,20 @@ TEST(EstimatorTest, MinValueMixedHistogramFromData) { // Minimum ObjectId. auto&& [minOid, inclOid] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::ObjectId); auto [minOidTag, minOidVal] = minOid->cast<mongo::optimizer::Constant>()->get(); - double expectedCard = estimate(hist, minOidTag, minOidVal, EstimationType::kEqual).card; - ASSERT_EQ(1.0, expectedCard); + CEType expectedCard = {estimate(hist, minOidTag, minOidVal, EstimationType::kEqual).card}; + ASSERT_EQ(1.0, expectedCard._value); // Minimum date. const auto&& [minDate, inclDate] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Date); const auto [minDateTag, minDateVal] = minDate->cast<mongo::optimizer::Constant>()->get(); - expectedCard = estimate(hist, minDateTag, minDateVal, EstimationType::kEqual).card; - ASSERT_EQ(1.0, expectedCard); + expectedCard = {estimate(hist, minDateTag, minDateVal, EstimationType::kEqual).card}; + ASSERT_EQ(1.0, expectedCard._value); // Minimum timestamp. auto&& [minTs, inclTs] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Timestamp); auto [minTsTag, minTsVal] = minTs->cast<mongo::optimizer::Constant>()->get(); - expectedCard = estimate(hist, minTsTag, minTsVal, EstimationType::kEqual).card; - ASSERT_EQ(1.0, expectedCard); + expectedCard = {estimate(hist, minTsTag, minTsVal, EstimationType::kEqual).card}; + ASSERT_EQ(1.0, expectedCard._value); // Add minimum values to the data set and create another histogram. const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd); @@ -897,13 +900,13 @@ TEST(EstimatorTest, MinValueMixedHistogramFromData) { } // Precise estimate for equality to empty string, it is a bucket boundary. - expectedCard = estimate(hist2, tagLowStr, valLowStr, EstimationType::kEqual).card; - ASSERT_EQ(1.0, expectedCard); + expectedCard = {estimate(hist2, tagLowStr, valLowStr, EstimationType::kEqual).card}; + ASSERT_EQ(1.0, expectedCard._value); // Equality to the minimum date/ts value is estimated by range_frequency/NDV. - expectedCard = estimate(hist2, minDateTag, minDateVal, EstimationType::kEqual).card; - ASSERT_EQ(1.0, expectedCard); - expectedCard = estimate(hist2, minTsTag, minTsVal, EstimationType::kEqual).card; - ASSERT_EQ(1.0, expectedCard); + expectedCard = {estimate(hist2, minDateTag, minDateVal, EstimationType::kEqual).card}; + ASSERT_EQ(1.0, expectedCard._value); + expectedCard = {estimate(hist2, minTsTag, minTsVal, EstimationType::kEqual).card}; + ASSERT_EQ(1.0, expectedCard._value); // Inequality predicates using min values. const ArrayHistogram arrHist(hist2, @@ -923,7 +926,7 @@ TEST(EstimatorTest, MinValueMixedHistogramFromData) { value::TypeTags::Date, value::bitcastFrom<int64_t>(startInstant), true /* includeScalar */); - ASSERT_EQ(1.0, expectedCard); + ASSERT_EQ(1.0, expectedCard._value); // [minDate, endInstant], estimated by the entire date bucket. expectedCard = estimateCardRange(arrHist, @@ -934,7 +937,7 @@ TEST(EstimatorTest, MinValueMixedHistogramFromData) { value::TypeTags::Date, value::bitcastFrom<int64_t>(endInstant), true /* includeScalar */); - ASSERT_EQ(3.0, expectedCard); + ASSERT_EQ(3.0, expectedCard._value); // [minDate, minTs), estimated by the entire date bucket. // (is this interval possible or is it better to have maxDate upper bound?). @@ -946,7 +949,7 @@ TEST(EstimatorTest, MinValueMixedHistogramFromData) { minTsTag, minTsVal, true /* includeScalar */); - ASSERT_EQ(3.0, expectedCard); + ASSERT_EQ(3.0, expectedCard._value); // [minTs, startTs], estimated by the half of the timestamp bucket. expectedCard = estimateCardRange(arrHist, @@ -957,7 +960,7 @@ TEST(EstimatorTest, MinValueMixedHistogramFromData) { value::TypeTags::Timestamp, value::bitcastFrom<int64_t>(startTs.asULL()), true /* includeScalar */); - ASSERT_EQ(1.0, expectedCard); + ASSERT_EQ(1.0, expectedCard._value); // [minTs, endTs], estimated by the entire timestamp bucket. expectedCard = estimateCardRange(arrHist, @@ -968,7 +971,7 @@ TEST(EstimatorTest, MinValueMixedHistogramFromData) { value::TypeTags::Timestamp, value::bitcastFrom<int64_t>(endTs.asULL()), true /* includeScalar */); - ASSERT_EQ(3.0, expectedCard); + ASSERT_EQ(3.0, expectedCard._value); // [minTs, maxTs], estimated by the entire timestamp bucket. auto&& [maxTs, inclMaxTs] = getMinMaxBoundForType(false /*isMin*/, value::TypeTags::Timestamp); @@ -981,7 +984,7 @@ TEST(EstimatorTest, MinValueMixedHistogramFromData) { maxTsTag, maxTsVal, true /* includeScalar */); - ASSERT_EQ(3.0, expectedCard); + ASSERT_EQ(3.0, expectedCard._value); } TEST(EstimatorTest, MinValueMixedHistogramFromBuckets) { @@ -1006,20 +1009,20 @@ TEST(EstimatorTest, MinValueMixedHistogramFromBuckets) { // Minimum ObjectId. auto&& [minOid, inclOid] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::ObjectId); auto [minOidTag, minOidVal] = minOid->cast<mongo::optimizer::Constant>()->get(); - double expectedCard = estimate(hist, minOidTag, minOidVal, EstimationType::kEqual).card; - ASSERT_APPROX_EQUAL(1.9, expectedCard, kErrorBound); + CEType expectedCard{estimate(hist, minOidTag, minOidVal, EstimationType::kEqual).card}; + ASSERT_CE_APPROX_EQUAL(1.9, expectedCard, kErrorBound); // Minimum date. const auto&& [minDate, inclDate] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Date); const auto [minDateTag, minDateVal] = minDate->cast<mongo::optimizer::Constant>()->get(); - expectedCard = estimate(hist, minDateTag, minDateVal, EstimationType::kEqual).card; - ASSERT_EQ(4.0, expectedCard); + expectedCard = {estimate(hist, minDateTag, minDateVal, EstimationType::kEqual).card}; + ASSERT_EQ(4.0, expectedCard._value); // Minimum timestamp. auto&& [minTs, inclTs] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Timestamp); auto [minTsTag, minTsVal] = minTs->cast<mongo::optimizer::Constant>()->get(); - expectedCard = estimate(hist, minTsTag, minTsVal, EstimationType::kEqual).card; - ASSERT_APPROX_EQUAL(1.9, expectedCard, kErrorBound); + expectedCard = {estimate(hist, minTsTag, minTsVal, EstimationType::kEqual).card}; + ASSERT_CE_APPROX_EQUAL(1.9, expectedCard, kErrorBound); // Inequality predicates using min values. const ArrayHistogram arrHist(hist, @@ -1040,7 +1043,7 @@ TEST(EstimatorTest, MinValueMixedHistogramFromBuckets) { value::TypeTags::Date, value::bitcastFrom<int64_t>(innerDate), true /* includeScalar */); - ASSERT_APPROX_EQUAL(48.0, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(48.0, expectedCard, kErrorBound); // [minTs, innerTs], estimated by the half of the timestamp bucket. const Timestamp innerTs{Seconds(1516864323LL), 0}; @@ -1052,7 +1055,7 @@ TEST(EstimatorTest, MinValueMixedHistogramFromBuckets) { value::TypeTags::Timestamp, value::bitcastFrom<int64_t>(innerTs.asULL()), true /* includeScalar */); - ASSERT_APPROX_EQUAL(47.5, expectedCard, kErrorBound); + ASSERT_CE_APPROX_EQUAL(47.5, expectedCard, kErrorBound); } } // namespace } // namespace mongo::optimizer::ce diff --git a/src/mongo/db/query/ce/histogram_estimator.cpp b/src/mongo/db/query/ce/histogram_estimator.cpp index 8c119412b9f..d7bdd4f1bcd 100644 --- a/src/mongo/db/query/ce/histogram_estimator.cpp +++ b/src/mongo/db/query/ce/histogram_estimator.cpp @@ -90,7 +90,7 @@ public: const cascades::Memo& memo, const properties::LogicalProps& logicalProps, CEType /*bindResult*/) { - return _stats->getCardinality(); + return {_stats->getCardinality()}; } /** @@ -115,7 +115,7 @@ public: CEType /*refsResult*/) { // Early out and return 0 since we don't expect to get more results. if (childResult == 0.0) { - return 0.0; + return {0.0}; } // Initial first pass through the requirements map to extract information about each path. @@ -168,26 +168,27 @@ public: conjunctRequirements.emplace(serializedPath, std::move(sc)); } - std::vector<double> topLevelSelectivities; + std::vector<SelectivityType> topLevelSelectivities; for (const auto& [serializedPath, conjunctReq] : conjunctRequirements) { - const CEType totalCard = _stats->getCardinality(); + const CEType totalCard{_stats->getCardinality()}; if (conjunctReq.intervals.empty() && !conjunctReq.includeScalar) { // In this case there is a single 'PathArr' interval for this field. // The selectivity of this interval is: (count of all arrays) / totalCard - double pathArrSel = conjunctReq.histogram.getArrayCount() / totalCard; + SelectivityType pathArrSel = + CEType{conjunctReq.histogram.getArrayCount()} / totalCard; topLevelSelectivities.push_back(pathArrSel); } // Intervals are in DNF. for (const IntervalReqExpr::Node& intervalDNF : conjunctReq.intervals) { - std::vector<double> disjSelectivities; + std::vector<SelectivityType> disjSelectivities; const auto disjuncts = intervalDNF.cast<IntervalReqExpr::Disjunction>()->nodes(); for (const auto& disjunct : disjuncts) { const auto& conjuncts = disjunct.cast<IntervalReqExpr::Conjunction>()->nodes(); - std::vector<double> conjSelectivities; + std::vector<SelectivityType> conjSelectivities; for (const auto& conjunct : conjuncts) { const auto& interval = conjunct.cast<IntervalReqExpr::Atom>()->getExpr(); auto cardinality = @@ -201,7 +202,7 @@ public: "path"_attr = serializedPath, "interval"_attr = ExplainGenerator::explainInterval(interval), - "ce"_attr = cardinality); + "ce"_attr = cardinality._value); // We may still not have been able to estimate the interval using // histograms, for instance if the interval bounds were non-Constant. In @@ -233,7 +234,7 @@ public: "path"_attr = serializedPath, "intervalDNF"_attr = ExplainGenerator::explainIntervalExpr(intervalDNF), - "selectivity"_attr = backoff); + "selectivity"_attr = backoff._value); topLevelSelectivities.push_back(backoff); } } @@ -247,7 +248,7 @@ public: 5, "Final estimate for SargableNode using histograms.", "node"_attr = ExplainGenerator::explainV2(n), - "cardinality"_attr = childResult); + "cardinality"_attr = childResult._value); return childResult; } @@ -275,7 +276,7 @@ public: if (canBeLogicalNode<T>()) { return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref()); } - return 0.0; + return {0.0}; } private: diff --git a/src/mongo/db/query/ce/histogram_estimator_test.cpp b/src/mongo/db/query/ce/histogram_estimator_test.cpp index bdc7d95dea4..36f74e81792 100644 --- a/src/mongo/db/query/ce/histogram_estimator_test.cpp +++ b/src/mongo/db/query/ce/histogram_estimator_test.cpp @@ -50,8 +50,8 @@ std::string collName("test"); class CEHistogramTester : public CETester { public: - CEHistogramTester(std::string collName, double numRecords) - : CETester(collName, numRecords), _stats{new CollectionStatisticsMock(numRecords)} {} + CEHistogramTester(std::string collName, CEType collCard) + : CETester(collName, collCard), _stats{new CollectionStatisticsMock(collCard._value)} {} void addHistogram(const std::string& path, std::shared_ptr<ArrayHistogram> histogram) { _stats->addHistogram(path, histogram); @@ -152,7 +152,7 @@ std::unique_ptr<ArrayHistogram> getArrayHistogramFromData(TestBuckets scalarBuck } TEST(CEHistogramTest, AssertSmallMaxDiffHistogramEstimatesAtomicPredicates) { - constexpr auto kCollCard = 8; + constexpr CEType kCollCard{8.0}; CEHistogramTester t(collName, kCollCard); // Construct a histogram with two buckets: one for 3 ints equal to 1, another for 5 strings @@ -207,7 +207,7 @@ TEST(CEHistogramTest, AssertSmallMaxDiffHistogramEstimatesAtomicPredicates) { } TEST(CEHistogramTest, AssertSmallHistogramEstimatesComplexPredicates) { - constexpr auto kCollCard = 9; + constexpr CEType kCollCard{9.0}; CEHistogramTester t(collName, kCollCard); // Construct a histogram with three int buckets for field 'a'. @@ -256,7 +256,7 @@ TEST(CEHistogramTest, AssertSmallHistogramEstimatesComplexPredicates) { } TEST(CEHistogramTest, SanityTestEmptyHistogram) { - constexpr auto kCollCard = 0; + constexpr CEType kCollCard{0.0}; CEHistogramTester t(collName, kCollCard); t.addHistogram("empty", std::make_unique<ArrayHistogram>()); @@ -267,14 +267,14 @@ TEST(CEHistogramTest, SanityTestEmptyHistogram) { } TEST(CEHistogramTest, TestOneBucketOneIntHistogram) { - constexpr auto kCollCard = 50; + constexpr CEType kCollCard{50.0}; CEHistogramTester t(collName, kCollCard); // Create a histogram with a single bucket that contains exactly one int (42) with a frequency // of 50 (equal to the collection cardinality). t.addHistogram("soloInt", getArrayHistogramFromData({ - {Value(42), kCollCard /* frequency */}, + {Value(42), kCollCard._value /* frequency */}, })); // Check against a variety of intervals that include 42 as a bound. @@ -318,7 +318,7 @@ TEST(CEHistogramTest, TestOneBucketOneIntHistogram) { } TEST(CEHistogramTest, TestOneBoundIntRangeHistogram) { - constexpr auto kCollCard = 51; + constexpr CEType kCollCard{51.0}; CEHistogramTester t(collName, kCollCard); t.addHistogram("intRange", getArrayHistogramFromData({ @@ -415,18 +415,18 @@ TEST(CEHistogramTest, TestOneBoundIntRangeHistogram) { } TEST(CEHistogramTest, TestHistogramOnNestedPaths) { - constexpr auto kCollCard = 50; + constexpr CEType kCollCard{50.0}; CEHistogramTester t(collName, kCollCard); // Create a histogram with a single bucket that contains exactly one int (42) with a frequency // of 50 (equal to the collection cardinality). t.addHistogram("path", getArrayHistogramFromData({ - {Value(42), kCollCard /* frequency */}, + {Value(42), kCollCard._value /* frequency */}, })); t.addHistogram("a.histogram.path", getArrayHistogramFromData({ - {Value(42), kCollCard /* frequency */}, + {Value(42), kCollCard._value /* frequency */}, })); ASSERT_MATCH_CE(t, "{\"not.a.histogram.path\": {$eq: 42}}", 7.071 /* heuristic */); @@ -466,7 +466,7 @@ TEST(CEHistogramTest, TestHistogramOnNestedPaths) { } TEST(CEHistogramTest, TestArrayHistogramOnAtomicPredicates) { - constexpr auto kCollCard = 6; + constexpr CEType kCollCard{6.0}; CEHistogramTester t(collName, kCollCard); t.addHistogram( "a", @@ -546,7 +546,7 @@ TEST(CEHistogramTest, TestArrayHistogramOnAtomicPredicates) { } TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) { - constexpr auto kCollCard = 175; + constexpr CEType kCollCard{175.0}; CEHistogramTester t(collName, kCollCard); // A scalar histogram with values in the range [1,10], most of which are in the middle bucket. @@ -586,7 +586,7 @@ TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) { {Value(10), 35 /* frequency */}, }, {{sbe::value::TypeTags::NumberInt32, 420}}, // Array type count = 3*35+5*35+1*35+3*35. - kCollCard, // kCollCard arrays total. + kCollCard._value, // kCollCard arrays total. 35 // 35 empty arrays )); @@ -733,7 +733,7 @@ TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) { } TEST(CEHistogramTest, TestMixedElemMatchAndNonElemMatch) { - constexpr auto kCollCard = 1; + constexpr CEType kCollCard{1.0}; CEHistogramTester t(collName, kCollCard); // A very simple histogram encoding a collection with one document {a: [3, 10]}. @@ -777,7 +777,7 @@ TEST(CEHistogramTest, TestMixedElemMatchAndNonElemMatch) { } TEST(CEHistogramTest, TestTypeCounters) { - constexpr double kCollCard = 1000.0; + constexpr CEType kCollCard{1000.0}; CEHistogramTester t(collName, kCollCard); // This test is designed such that for each document, we have the following fields: @@ -797,7 +797,7 @@ TEST(CEHistogramTest, TestTypeCounters) { {/* No array max buckets. */}, {{sbe::value::TypeTags::Object, kNumObj}, {sbe::value::TypeTags::Null, kNumNull}}, - kCollCard)); + kCollCard._value)); // Count of each type in array type counters for field "mixed". constexpr double kNumObjMA = 50.0; @@ -1031,7 +1031,7 @@ TEST(CEHistogramTest, TestTypeCounters) { TEST(CEHistogramTest, TestNestedArrayTypeCounterPredicates) { // This test validates the correct behaviour of both the nested-array type counter as well as // combinations of type counters and histogram estimates. - constexpr double kCollCard = 1000.0; + constexpr CEType kCollCard{1000.0}; constexpr double kNumArr = 600.0; // Total number of arrays. constexpr double kNumNestArr = 500.0; // Frequency of nested arrays, e.g. [[1, 2, 3]]. constexpr double kNumNonNestArr = 100.0; @@ -1046,7 +1046,7 @@ TEST(CEHistogramTest, TestNestedArrayTypeCounterPredicates) { // Sanity test numbers. ASSERT_EQ(kNumArr1 + kNumArr2, kNumArr3); ASSERT_EQ(kNumNonNestArr + kNumNestArr, kNumArr); - ASSERT_EQ(kNumObj + kNumArr + kNum1 + kNum2 + kNum3, kCollCard); + ASSERT_EQ(kNumObj + kNumArr + kNum1 + kNum2 + kNum3, kCollCard._value); // Define histogram buckets. TestBuckets scalarBuckets{{Value(1), kNum1}, {Value(2), kNum2}, {Value(3), kNum3}}; @@ -1150,12 +1150,12 @@ TEST(CEHistogramTest, TestFallbackForNonConstIntervals) { const auto estInterval = [](const auto& interval) { ArrayHistogram ah; return estimateIntervalCardinality( - ah, interval, 100 /* inputCardinality */, true /* includeScalar */); + ah, interval, {100} /* inputCardinality */, true /* includeScalar */); }; - ASSERT_EQ(estInterval(intervalLowNonConst), -1.0); - ASSERT_EQ(estInterval(intervalHighNonConst), -1.0); - ASSERT_EQ(estInterval(intervalEqNonConst), -1.0); + ASSERT_EQ(estInterval(intervalLowNonConst)._value, -1.0); + ASSERT_EQ(estInterval(intervalHighNonConst)._value, -1.0); + ASSERT_EQ(estInterval(intervalEqNonConst)._value, -1.0); } } // namespace } // namespace mongo::optimizer::ce diff --git a/src/mongo/db/query/ce/histogram_interpolation_test.cpp b/src/mongo/db/query/ce/histogram_interpolation_test.cpp index 4ad9d38b4e0..38e78428cba 100644 --- a/src/mongo/db/query/ce/histogram_interpolation_test.cpp +++ b/src/mongo/db/query/ce/histogram_interpolation_test.cpp @@ -255,24 +255,24 @@ TEST(EstimatorTest, UniformIntStrEstimate) { // Predicates over value inside of the last numeric bucket. // Query: [{$match: {a: {$eq: 993}}}]. - double expectedCard = estimateIntValCard(hist, 993, EstimationType::kEqual); - ASSERT_APPROX_EQUAL(7.0, expectedCard, 0.1); // Actual: 9. + CEType expectedCard{estimateIntValCard(hist, 993, EstimationType::kEqual)}; + ASSERT_CE_APPROX_EQUAL(7.0, expectedCard, 0.1); // Actual: 9. // Query: [{$match: {a: {$lt: 993}}}]. - expectedCard = estimateIntValCard(hist, 993, EstimationType::kLess); - ASSERT_APPROX_EQUAL(241.4, expectedCard, 0.1); // Actual: 241. + expectedCard = {estimateIntValCard(hist, 993, EstimationType::kLess)}; + ASSERT_CE_APPROX_EQUAL(241.4, expectedCard, 0.1); // Actual: 241. // Query: [{$match: {a: {$lte: 993}}}]. - expectedCard = estimateIntValCard(hist, 993, EstimationType::kLessOrEqual); - ASSERT_APPROX_EQUAL(248.4, expectedCard, 0.1); // Actual: 250. + expectedCard = {estimateIntValCard(hist, 993, EstimationType::kLessOrEqual)}; + ASSERT_CE_APPROX_EQUAL(248.4, expectedCard, 0.1); // Actual: 250. // Predicates over value inside of the first string bucket. auto [tag, value] = value::makeNewString("04e"_sd); value::ValueGuard vg(tag, value); // Query: [{$match: {a: {$eq: '04e'}}}]. - expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card; - ASSERT_APPROX_EQUAL(2.2, expectedCard, 0.1); // Actual: 3. + expectedCard = {estimate(hist, tag, value, EstimationType::kEqual).card}; + ASSERT_CE_APPROX_EQUAL(2.2, expectedCard, 0.1); // Actual: 3. value::TypeTags lowTag = value::TypeTags::NumberInt64; value::Value lowVal = 100000000; @@ -280,7 +280,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) { // Type bracketing: low value of different type than the bucket bound. // Query: [{$match: {a: {$eq: 100000000}}}]. expectedCard = estimateCardEq(arrHist, lowTag, lowVal, true /* includeScalar */); - ASSERT_APPROX_EQUAL(0.0, expectedCard, 0.1); // Actual: 0. + ASSERT_CE_APPROX_EQUAL(0.0, expectedCard, 0.1); // Actual: 0. // No interpolation for inequality to values inside the first string bucket, fallback to half of // the bucket frequency. @@ -294,7 +294,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) { tag, value, true /* includeScalar */); - ASSERT_APPROX_EQUAL(13.3, expectedCard, 0.1); // Actual: 0. + ASSERT_CE_APPROX_EQUAL(13.3, expectedCard, 0.1); // Actual: 0. // Query: [{$match: {a: {$lte: '04e'}}}]. expectedCard = estimateCardRange(arrHist, @@ -305,7 +305,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) { tag, value, true /* includeScalar */); - ASSERT_APPROX_EQUAL(15.5, expectedCard, 0.1); // Actual: 3. + ASSERT_CE_APPROX_EQUAL(15.5, expectedCard, 0.1); // Actual: 3. // Value towards the end of the bucket gets the same half bucket estimate. std::tie(tag, value) = value::makeNewString("8B5"_sd); @@ -319,7 +319,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) { tag, value, true /* includeScalar */); - ASSERT_APPROX_EQUAL(13.3, expectedCard, 0.1); // Actual: 24. + ASSERT_CE_APPROX_EQUAL(13.3, expectedCard, 0.1); // Actual: 24. // Query: [{$match: {a: {$lte: '8B5'}}}]. expectedCard = estimateCardRange(arrHist, @@ -330,7 +330,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) { tag, value, true /* includeScalar */); - ASSERT_APPROX_EQUAL(15.5, expectedCard, 0.1); // Actual: 29. + ASSERT_CE_APPROX_EQUAL(15.5, expectedCard, 0.1); // Actual: 29. } TEST(EstimatorTest, UniformIntArrayOnlyEstimate) { @@ -379,7 +379,7 @@ TEST(EstimatorTest, UniformIntArrayOnlyEstimate) { value::Value highVal = 600; // Test interpolation for query: [{$match: {a: {$elemMatch: {$gt: 500, $lt: 600}}}}]. - double expectedCard = estimateCardRange(arrHist, + CEType expectedCard = estimateCardRange(arrHist, false /* lowInclusive */, lowTag, lowVal, @@ -387,7 +387,7 @@ TEST(EstimatorTest, UniformIntArrayOnlyEstimate) { highTag, highVal, false /* includeScalar */); - ASSERT_APPROX_EQUAL(27.0, expectedCard, 0.1); // actual 21. + ASSERT_CE_APPROX_EQUAL(27.0, expectedCard, 0.1); // actual 21. // Test interpolation for query: [{$match: {a: {$gt: 500, $lt: 600}}}]. // Note: although there are no scalars, the estimate is different than the @@ -400,7 +400,7 @@ TEST(EstimatorTest, UniformIntArrayOnlyEstimate) { highTag, highVal, true /* includeScalar */); - ASSERT_APPROX_EQUAL(92.0, expectedCard, 0.1); // actual 92. + ASSERT_CE_APPROX_EQUAL(92.0, expectedCard, 0.1); // actual 92. // Query at the end of the domain: more precise estimates from ArrayMin, ArrayMax histograms. lowVal = 10; @@ -415,7 +415,7 @@ TEST(EstimatorTest, UniformIntArrayOnlyEstimate) { highTag, highVal, false /* includeScalar */); - ASSERT_APPROX_EQUAL(24.1, expectedCard, 0.1); // actual 29. + ASSERT_CE_APPROX_EQUAL(24.1, expectedCard, 0.1); // actual 29. // Test interpolation for query: [{$match: {a: {$gt: 10, $lt: 110}}}]. expectedCard = estimateCardRange(arrHist, @@ -426,7 +426,7 @@ TEST(EstimatorTest, UniformIntArrayOnlyEstimate) { highTag, highVal, true /* includeScalar */); - ASSERT_APPROX_EQUAL(27.8, expectedCard, 0.1); // actual 31. + ASSERT_CE_APPROX_EQUAL(27.8, expectedCard, 0.1); // actual 31. } TEST(EstimatorTest, UniformIntMixedArrayEstimate) { @@ -482,7 +482,7 @@ TEST(EstimatorTest, UniformIntMixedArrayEstimate) { value::Value highVal = 550; // Test interpolation for query: [{$match: {a: {$gt: 500, $lt: 550}}}]. - double expectedCard = estimateCardRange(arrHist, + CEType expectedCard = estimateCardRange(arrHist, false /* lowInclusive */, lowTag, lowVal, @@ -490,7 +490,7 @@ TEST(EstimatorTest, UniformIntMixedArrayEstimate) { highTag, highVal, true /* includeScalar */); - ASSERT_APPROX_EQUAL(92.9, expectedCard, 0.1); // Actual: 94. + ASSERT_CE_APPROX_EQUAL(92.9, expectedCard, 0.1); // Actual: 94. // Test interpolation for query: [{$match: {a: {$elemMatch: {$gt: 500, $lt: 550}}}}]. expectedCard = estimateCardRange(arrHist, @@ -501,7 +501,7 @@ TEST(EstimatorTest, UniformIntMixedArrayEstimate) { highTag, highVal, false /* includeScalar */); - ASSERT_APPROX_EQUAL(11.0, expectedCard, 0.1); // Actual: 8. + ASSERT_CE_APPROX_EQUAL(11.0, expectedCard, 0.1); // Actual: 8. } } // namespace diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp index 25d1658807d..e2477b30eec 100644 --- a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp +++ b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp @@ -283,7 +283,7 @@ double getTypeCard(const ArrayHistogram& ah, value::TypeTags tag, bool includeSc /** * Estimates equality to the given tag/value using histograms. */ -double estimateCardEq(const ArrayHistogram& ah, +CEType estimateCardEq(const ArrayHistogram& ah, value::TypeTags tag, value::Value val, bool includeScalar) { @@ -294,7 +294,7 @@ double estimateCardEq(const ArrayHistogram& ah, if (ah.isArray()) { card += estimate(ah.getArrayUnique(), tag, val, EstimationType::kEqual).card; } - return card; + return {card}; } static EstimationResult estimateRange(const ScalarHistogram& histogram, @@ -338,7 +338,7 @@ static EstimationResult estimateRangeQueryOnArray(const ScalarHistogram& histogr return highEstimate - lowEstimate; } -double estimateCardRange(const ArrayHistogram& ah, +CEType estimateCardRange(const ArrayHistogram& ah, /* Define lower bound. */ bool lowInclusive, value::TypeTags tagLow, @@ -421,10 +421,10 @@ double estimateCardRange(const ArrayHistogram& ah, result += scalarEst.card; } - return result; + return {result}; } -double estimateIntervalCardinality(const ArrayHistogram& ah, +CEType estimateIntervalCardinality(const ArrayHistogram& ah, const IntervalRequirement& interval, CEType childResult, bool includeScalar) { @@ -433,7 +433,7 @@ double estimateIntervalCardinality(const ArrayHistogram& ah, } else if (interval.isEquality()) { auto maybeConstBound = getBound(interval.getLowBound()); if (!maybeConstBound) { - return kInvalidEstimate; + return {kInvalidEstimate}; } auto [tag, val] = *maybeConstBound; @@ -442,20 +442,20 @@ double estimateIntervalCardinality(const ArrayHistogram& ah, } // Otherwise, we return the cardinality for the type of the intervals. - return getTypeCard(ah, tag, includeScalar); + return {getTypeCard(ah, tag, includeScalar)}; } // Otherwise, we have a range. auto lowBound = interval.getLowBound(); auto maybeConstLowBound = getBound(lowBound); if (!maybeConstLowBound) { - return kInvalidEstimate; + return {kInvalidEstimate}; } auto highBound = interval.getHighBound(); auto maybeConstHighBound = getBound(highBound); if (!maybeConstHighBound) { - return kInvalidEstimate; + return {kInvalidEstimate}; } auto [lowTag, lowVal] = *maybeConstLowBound; @@ -482,15 +482,15 @@ double estimateIntervalCardinality(const ArrayHistogram& ah, // non-histogrammable types. Otherwise, we need to figure out which type(s) are included by this // range. if (lowTag == highTag || isIntervalSubsetOfType(interval, lowTag)) { - return getTypeCard(ah, lowTag, includeScalar); + return {getTypeCard(ah, lowTag, includeScalar)}; } else if (isIntervalSubsetOfType(interval, highTag)) { - return getTypeCard(ah, highTag, includeScalar); + return {getTypeCard(ah, highTag, includeScalar)}; } // If we reach here, we've given up estimating, because our interval intersected both high & low // type intervals (and possibly more types). // TODO: could we aggregate type counts across all intersected types here? - return 0.0; + return {0.0}; } } // namespace mongo::optimizer::ce diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.h b/src/mongo/db/query/ce/histogram_predicate_estimation.h index 763f6c13a5e..c3e867994fd 100644 --- a/src/mongo/db/query/ce/histogram_predicate_estimation.h +++ b/src/mongo/db/query/ce/histogram_predicate_estimation.h @@ -74,7 +74,7 @@ EstimationResult estimate(const stats::ScalarHistogram& h, * Given an array histogram, an interval, and the input cardinality, estimates the cardinality of * the interval. */ -double estimateIntervalCardinality(const stats::ArrayHistogram& estimator, +CEType estimateIntervalCardinality(const stats::ArrayHistogram& estimator, const IntervalRequirement& interval, CEType inputCardinality, bool includeScalar); @@ -83,7 +83,7 @@ double estimateIntervalCardinality(const stats::ArrayHistogram& estimator, * Estimates the cardinality of an equality predicate given an ArrayHistogram and an SBE value and * type tag pair. */ -double estimateCardEq(const stats::ArrayHistogram& ah, +CEType estimateCardEq(const stats::ArrayHistogram& ah, sbe::value::TypeTags tag, sbe::value::Value val, bool includeScalar); @@ -93,7 +93,7 @@ double estimateCardEq(const stats::ArrayHistogram& ah, * Set 'includeScalar' to true to indicate whether or not the provided range should include no-array * values. The other fields define the range of the estimation. */ -double estimateCardRange(const stats::ArrayHistogram& ah, +CEType estimateCardRange(const stats::ArrayHistogram& ah, bool lowInclusive, sbe::value::TypeTags tagLow, sbe::value::Value valLow, diff --git a/src/mongo/db/query/ce/maxdiff_histogram_test.cpp b/src/mongo/db/query/ce/maxdiff_histogram_test.cpp index 80364fea0bb..2c8d5d8b96a 100644 --- a/src/mongo/db/query/ce/maxdiff_histogram_test.cpp +++ b/src/mongo/db/query/ce/maxdiff_histogram_test.cpp @@ -208,10 +208,9 @@ TEST_F(HistogramTest, MaxDiffIntArrays) { const auto [tag, val] = makeInt64Value(2); value::ValueGuard vg(tag, val); - const double estimatedCard = estimateCardEq(estimator, tag, val, true /* includeScalar - */); + const CEType estimatedCard = estimateCardEq(estimator, tag, val, true /*includeScalar*/); - ASSERT_APPROX_EQUAL(4.0, estimatedCard, kTolerance); + ASSERT_CE_APPROX_EQUAL(4.0, estimatedCard, kTolerance); ASSERT_EQ(4, actualCard); } @@ -221,7 +220,7 @@ TEST_F(HistogramTest, MaxDiffIntArrays) { const auto [tag, val] = makeInt64Value(3); value::ValueGuard vg(tag, val); - const double estimatedCard = estimateCardRange(estimator, + const CEType estimatedCard = estimateCardRange(estimator, false /*lowInclusive*/, value::TypeTags::MinKey, 0, @@ -230,7 +229,7 @@ TEST_F(HistogramTest, MaxDiffIntArrays) { val, true /* includeScalar */); ASSERT_EQ(6, actualCard); - ASSERT_APPROX_EQUAL(6.0, estimatedCard, kTolerance); + ASSERT_CE_APPROX_EQUAL(6.0, estimatedCard, kTolerance); } { @@ -242,7 +241,7 @@ TEST_F(HistogramTest, MaxDiffIntArrays) { const auto [highTag, highVal] = makeInt64Value(5); value::ValueGuard vgHigh(highTag, highVal); - const double estimatedCard = estimateCardRange(estimator, + const CEType estimatedCard = estimateCardRange(estimator, false /*lowInclusive*/, lowTag, lowVal, @@ -252,7 +251,7 @@ TEST_F(HistogramTest, MaxDiffIntArrays) { false /* includeScalar */); ASSERT_EQ(2, actualCard); - ASSERT_APPROX_EQUAL(3.15479, estimatedCard, kTolerance); + ASSERT_CE_APPROX_EQUAL(3.15479, estimatedCard, kTolerance); } } diff --git a/src/mongo/db/query/ce/sampling_estimator.cpp b/src/mongo/db/query/ce/sampling_estimator.cpp index 85a9bcf4319..6682ff38611 100644 --- a/src/mongo/db/query/ce/sampling_estimator.cpp +++ b/src/mongo/db/query/ce/sampling_estimator.cpp @@ -197,7 +197,7 @@ public: if (canBeLogicalNode<T>()) { return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref()); } - return 0.0; + return {0.0}; } CEType derive(const Metadata& metadata, @@ -230,7 +230,7 @@ private: OPTIMIZER_DEBUG_LOG(6264805, 5, "CE sampling estimated filter selectivity", - "selectivity"_attr = *selectivity); + "selectivity"_attr = selectivity->_value); return *selectivity * childResult; } @@ -288,13 +288,13 @@ private: const auto [tag, value] = accessors.at(0)->getViewOfValue(); if (tag == sbe::value::TypeTags::NumberInt64) { // TODO: check if we get exactly one result from the groupby? - return static_cast<double>(value) / _sampleSize; + return {{static_cast<double>(value) / _sampleSize}}; } return boost::none; }; // If nothing passes the filter, estimate 0.0 selectivity. HashGroup will return 0 results. - return 0.0; + return {{0.0}}; } struct NodeRefHash { diff --git a/src/mongo/db/query/ce/test_utils.cpp b/src/mongo/db/query/ce/test_utils.cpp index 191ab63ecec..d4a3bd0bc55 100644 --- a/src/mongo/db/query/ce/test_utils.cpp +++ b/src/mongo/db/query/ce/test_utils.cpp @@ -43,7 +43,7 @@ namespace mongo::optimizer::ce { namespace value = sbe::value; CETester::CETester(std::string collName, - double collCard, + CEType collCard, const OptPhaseManager::PhaseSet& optPhases) : _optPhases(optPhases), _hints(), _metadata({}), _collName(collName) { addCollection(collName, collCard); @@ -130,9 +130,9 @@ CEType CETester::getCE(ABT& abt, std::function<bool(const ABT&)> nodePredicate) // when estimating that node directly. Note that this check will fail if we are testing // histogram estimation and only using the MemoSubstitutionPhase because the memo always // uses heuristic estimation in this case. - ASSERT_APPROX_EQUAL(card, memoCE, kMaxCEError); + ASSERT_CE_APPROX_EQUAL(card, memoCE, kMaxCEError); } else { - if (std::abs(memoCE - card) > kMaxCEError) { + if (absCEDiff(memoCE, card) > kMaxCEError) { std::cout << "ERROR: CE Group(" << groupId << ") " << card << " vs. " << memoCE << std::endl; std::cout << ExplainGenerator::explainV2(node) << std::endl; @@ -161,7 +161,7 @@ ScanDefinition& CETester::getCollScanDefinition() { } -void CETester::setCollCard(double card) { +void CETester::setCollCard(CEType card) { auto& scanDef = getCollScanDefinition(); addCollection(_collName, card, scanDef.getIndexDefs()); } @@ -172,7 +172,7 @@ void CETester::setIndexes(opt::unordered_map<std::string, IndexDefinition> index } void CETester::addCollection(std::string collName, - double numRecords, + CEType numRecords, opt::unordered_map<std::string, IndexDefinition> indexes) { _metadata._scanDefs.insert_or_assign(collName, createScanDef({}, diff --git a/src/mongo/db/query/ce/test_utils.h b/src/mongo/db/query/ce/test_utils.h index 1f84fe9a1a8..44c584a9c36 100644 --- a/src/mongo/db/query/ce/test_utils.h +++ b/src/mongo/db/query/ce/test_utils.h @@ -39,7 +39,7 @@ namespace mongo::optimizer::ce { constexpr bool kCETestLogOnly = false; const double kMaxCEError = 0.01; -const CEType kInvalidCardinality = -1.0; +const CEType kInvalidCardinality{-1.0}; const OptPhaseManager::PhaseSet kDefaultCETestPhaseSet{OptPhase::MemoSubstitutionPhase, OptPhase::MemoExplorationPhase, @@ -49,6 +49,15 @@ const OptPhaseManager::PhaseSet kOnlySubPhaseSet{OptPhase::MemoSubstitutionPhase const OptPhaseManager::PhaseSet kNoOptPhaseSet{}; +#define ASSERT_CE_APPROX_EQUAL(estimatedCE, expectedCE, kMaxCEError) \ + ASSERT_APPROX_EQUAL( \ + static_cast<double>(estimatedCE), static_cast<double>(expectedCE), kMaxCEError) + +template <class T1, class T2> +constexpr double absCEDiff(const T1 v1, const T2 v2) { + return std::abs(static_cast<double>(v1) - static_cast<double>(v2)); +} + /** * Helpful macros for asserting that the CE of a $match predicate is approximately what we were * expecting. @@ -56,12 +65,12 @@ const OptPhaseManager::PhaseSet kNoOptPhaseSet{}; #define _ASSERT_CE(estimatedCE, expectedCE) \ if constexpr (kCETestLogOnly) { \ - if (std::abs(estimatedCE - expectedCE) > kMaxCEError) { \ + if (absCEDiff(estimatedCE, expectedCE) > kMaxCEError) { \ std::cout << "ERROR: expected " << expectedCE << std::endl; \ } \ ASSERT_APPROX_EQUAL(1.0, 1.0, kMaxCEError); \ } else { \ - ASSERT_APPROX_EQUAL(estimatedCE, expectedCE, kMaxCEError); \ + ASSERT_CE_APPROX_EQUAL(estimatedCE, expectedCE, kMaxCEError); \ } #define _PREDICATE(field, predicate) (str::stream() << "{" << field << ": " << predicate "}") #define _ELEMMATCH_PREDICATE(field, predicate) \ @@ -72,7 +81,7 @@ const OptPhaseManager::PhaseSet kNoOptPhaseSet{}; // This macro does the same as above but also sets the collection cardinality. #define ASSERT_CE_CARD(ce, pipeline, expectedCE, collCard) \ - ce.setCollCard(collCard); \ + ce.setCollCard({collCard}); \ ASSERT_CE(ce, pipeline, expectedCE) // This macro verifies the cardinality of a pipeline with a single $match predicate. @@ -84,7 +93,7 @@ const OptPhaseManager::PhaseSet kNoOptPhaseSet{}; // This macro does the same as above but also sets the collection cardinality. #define ASSERT_MATCH_CE_CARD(ce, predicate, expectedCE, collCard) \ - ce.setCollCard(collCard); \ + ce.setCollCard({collCard}); \ ASSERT_MATCH_CE(ce, predicate, expectedCE) // This macro tests cardinality of two versions of the predicate; with and without $elemMatch. @@ -129,7 +138,7 @@ public: * 'numRecords' in the metadata. */ CETester(std::string collName, - double numRecords, + CEType collCard, const OptPhaseManager::PhaseSet& optPhases = kDefaultCETestPhaseSet); /** @@ -158,7 +167,7 @@ public: /** * Updates the cardinality of the collection '_collName'. */ - void setCollCard(double card); + void setCollCard(CEType card); /** * Updates the indexes used by the collection '_collName'. @@ -169,7 +178,7 @@ public: * Adds a ScanDefinition for an additional collection for the test. */ void addCollection(std::string collName, - double numRecords, + CEType numRecords, opt::unordered_map<std::string, IndexDefinition> indexes = {}); /** |