diff options
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/ce/heuristic_estimator.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/query/ce/heuristic_predicate_estimation.cpp | 30 | ||||
-rw-r--r-- | src/mongo/db/query/ce/heuristic_predicate_estimation.h | 4 | ||||
-rw-r--r-- | src/mongo/db/query/ce/test_utils.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/query/ce/test_utils.h | 3 | ||||
-rw-r--r-- | src/mongo/db/query/cqf_get_executor.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/defs.h | 5 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/metadata.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/metadata.h | 6 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/metadata_factory.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/metadata_factory.h | 4 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/strong_alias.h | 17 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/unit_test_pipeline_utils.cpp | 7 |
13 files changed, 66 insertions, 55 deletions
diff --git a/src/mongo/db/query/ce/heuristic_estimator.cpp b/src/mongo/db/query/ce/heuristic_estimator.cpp index 8b35eb1b331..bbdd6d76717 100644 --- a/src/mongo/db/query/ce/heuristic_estimator.cpp +++ b/src/mongo/db/query/ce/heuristic_estimator.cpp @@ -165,7 +165,7 @@ private: SelectivityType disjunctionSel(const SelectivityType left, const SelectivityType right) { // We sum the selectivities and subtract the overlapping part so that it's only counted // once. - return left + right - left * right; + return negateSel(negateSel(left) * negateSel(right)); } }; @@ -173,8 +173,9 @@ class HeuristicTransport { public: CEType transport(const ScanNode& node, CEType /*bindResult*/) { // Default cardinality estimate. - const CEType metadataCE = _metadata._scanDefs.at(node.getScanDefName()).getCE(); - return (metadataCE < 0.0) ? kDefaultCard : metadataCE; + const boost::optional<CEType>& metadataCE = + _metadata._scanDefs.at(node.getScanDefName()).getCE(); + return metadataCE.get_value_or(kDefaultCard); } CEType transport(const ValueScanNode& node, CEType /*bindResult*/) { diff --git a/src/mongo/db/query/ce/heuristic_predicate_estimation.cpp b/src/mongo/db/query/ce/heuristic_predicate_estimation.cpp index b3a8b755244..d0a799ea3da 100644 --- a/src/mongo/db/query/ce/heuristic_predicate_estimation.cpp +++ b/src/mongo/db/query/ce/heuristic_predicate_estimation.cpp @@ -46,35 +46,28 @@ SelectivityType heuristicEqualitySel(const CEType inputCard) { } SelectivityType heuristicClosedRangeSel(const CEType inputCard) { - SelectivityType sel = kInvalidSel; if (inputCard < kSmallLimit) { - sel = kSmallCardClosedRangeSel; + return kSmallCardClosedRangeSel; } else if (inputCard < kMediumLimit) { - sel = kMediumCardClosedRangeSel; - } else { - sel = kLargeCardClosedRangeSel; + return kMediumCardClosedRangeSel; } - return sel; + return kLargeCardClosedRangeSel; } SelectivityType heuristicOpenRangeSel(const CEType inputCard) { - SelectivityType sel = kInvalidSel; if (inputCard < kSmallLimit) { - sel = kSmallCardOpenRangeSel; + return kSmallCardOpenRangeSel; } else if (inputCard < kMediumLimit) { - sel = kMediumCardOpenRangeSel; - } else { - sel = kLargeCardOpenRangeSel; + return kMediumCardOpenRangeSel; } - return sel; + return kLargeCardOpenRangeSel; } SelectivityType heuristicIntervalSel(const IntervalRequirement& interval, const CEType inputCard) { - SelectivityType sel = kInvalidSel; if (interval.isFullyOpen()) { - sel = {1.0}; + return 1.0; } else if (interval.isEquality()) { - sel = heuristicEqualitySel(inputCard); + return heuristicEqualitySel(inputCard); } else if (interval.getHighBound().isPlusInf() || interval.getLowBound().isMinusInf() || getBoundReqTypeTag(interval.getLowBound()) != getBoundReqTypeTag(interval.getHighBound())) { @@ -84,12 +77,9 @@ SelectivityType heuristicIntervalSel(const IntervalRequirement& interval, const // one of the bounds is the lowest/highest value of the previous/next type. // TODO: Notice that sometimes type bracketing uses a min/max value from the same type, // so sometimes we may not detect an open-ended interval. - sel = heuristicOpenRangeSel(inputCard); - } else { - sel = heuristicClosedRangeSel(inputCard); + return heuristicOpenRangeSel(inputCard); } - uassert(6716603, "Invalid selectivity.", validSelectivity(sel)); - return sel; + return heuristicClosedRangeSel(inputCard); } CEType heuristicIntervalCard(const IntervalRequirement& interval, const CEType inputCard) { diff --git a/src/mongo/db/query/ce/heuristic_predicate_estimation.h b/src/mongo/db/query/ce/heuristic_predicate_estimation.h index 46b09c7f0b9..07fc4fd05cb 100644 --- a/src/mongo/db/query/ce/heuristic_predicate_estimation.h +++ b/src/mongo/db/query/ce/heuristic_predicate_estimation.h @@ -31,10 +31,6 @@ #include "mongo/db/query/optimizer/utils/memo_utils.h" namespace mongo::optimizer::ce { -// Invalid estimate - an arbitrary negative value used for initialization. -constexpr SelectivityType kInvalidSel{-1.0}; -constexpr CEType kInvalidEstimate{-1.0}; - constexpr SelectivityType kDefaultFilterSel{0.1}; constexpr SelectivityType kDefaultExistsSel{0.70}; diff --git a/src/mongo/db/query/ce/test_utils.cpp b/src/mongo/db/query/ce/test_utils.cpp index cd1e9939a54..92864e2d00b 100644 --- a/src/mongo/db/query/ce/test_utils.cpp +++ b/src/mongo/db/query/ce/test_utils.cpp @@ -111,7 +111,7 @@ CEType CETester::getCE(ABT& abt, std::function<bool(const ABT&)> nodePredicate) return card; } - CEType outCard = kInvalidCardinality; + boost::optional<CEType> outCard; for (size_t groupId = 0; groupId < memo.getGroupCount(); groupId++) { // We only want to return the cardinality for the memo group matching the 'nodePredicate'. if (const auto& node = memo.getLogicalNodes(groupId).front(); nodePredicate(node)) { @@ -121,13 +121,13 @@ CEType CETester::getCE(ABT& abt, std::function<bool(const ABT&)> nodePredicate) } } - ASSERT_NOT_EQUALS(outCard, kInvalidCardinality); + ASSERT_TRUE(outCard.has_value()); if constexpr (kCETestLogOnly) { - std::cout << "CE: " << outCard << std::endl; + std::cout << "CE: " << *outCard << std::endl; } - return outCard; + return *outCard; } void CETester::optimize(OptPhaseManager& phaseManager, ABT& abt) const { @@ -152,7 +152,7 @@ void CETester::setIndexes(opt::unordered_map<std::string, IndexDefinition> index } void CETester::addCollection(std::string collName, - CEType numRecords, + boost::optional<CEType> numRecords, opt::unordered_map<std::string, IndexDefinition> indexes) { _metadata._scanDefs.insert_or_assign(collName, createScanDef({}, diff --git a/src/mongo/db/query/ce/test_utils.h b/src/mongo/db/query/ce/test_utils.h index 3a9c7f9c78e..c66d03bc483 100644 --- a/src/mongo/db/query/ce/test_utils.h +++ b/src/mongo/db/query/ce/test_utils.h @@ -39,7 +39,6 @@ namespace mongo::optimizer::ce { constexpr bool kCETestLogOnly = false; const double kMaxCEError = 0.01; -const CEType kInvalidCardinality{-1.0}; const OptPhaseManager::PhaseSet kDefaultCETestPhaseSet{OptPhase::MemoSubstitutionPhase, OptPhase::MemoExplorationPhase, @@ -182,7 +181,7 @@ public: * Adds a ScanDefinition for an additional collection for the test. */ void addCollection(std::string collName, - CEType numRecords, + boost::optional<CEType> numRecords, opt::unordered_map<std::string, IndexDefinition> indexes = {}); /** diff --git a/src/mongo/db/query/cqf_get_executor.cpp b/src/mongo/db/query/cqf_get_executor.cpp index d7cee2b4c6b..b811e1e4068 100644 --- a/src/mongo/db/query/cqf_get_executor.cpp +++ b/src/mongo/db/query/cqf_get_executor.cpp @@ -412,7 +412,10 @@ static void populateAdditionalScanDefs( ? DistributionType::Centralized : DistributionType::UnknownPartitioning}; - const CEType collectionCE{collectionExists ? collection->numRecords(opCtx) : -1.0}; + boost::optional<CEType> collectionCE; + if (collectionExists) { + collectionCE = collection->numRecords(opCtx); + } scanDefs.emplace(scanDefName, createScanDef({{"type", "mongod"}, {"database", involvedNss.db().toString()}, @@ -540,7 +543,10 @@ Metadata populateMetadata(boost::intrusive_ptr<ExpressionContext> expCtx, : DistributionType::UnknownPartitioning}; opt::unordered_map<std::string, ScanDefinition> scanDefs; - const int64_t numRecords = collectionExists ? collection->numRecords(opCtx) : -1; + boost::optional<CEType> numRecords; + if (collectionExists) { + numRecords = static_cast<double>(collection->numRecords(opCtx)); + } scanDefs.emplace(scanDefName, createScanDef({{"type", "mongod"}, {"database", nss.db().toString()}, @@ -551,7 +557,7 @@ Metadata populateMetadata(boost::intrusive_ptr<ExpressionContext> expCtx, constFold, std::move(distribution), collectionExists, - {static_cast<double>(numRecords)})); + numRecords)); // Add a scan definition for all involved collections. Note that the base namespace has already // been accounted for above and isn't included here. diff --git a/src/mongo/db/query/optimizer/defs.h b/src/mongo/db/query/optimizer/defs.h index 061dab2ecbf..18570d22c6c 100644 --- a/src/mongo/db/query/optimizer/defs.h +++ b/src/mongo/db/query/optimizer/defs.h @@ -30,6 +30,7 @@ #pragma once #include <boost/optional.hpp> +#include <limits> #include <set> #include <sstream> #include <string> @@ -193,12 +194,16 @@ private: struct SelectivityTag { // Selectivity does not have units, it is a simple ratio. static constexpr bool kUnitless = true; + static constexpr double kMaxValue = 1.0; + static constexpr double kMinValue = 0.0; }; using SelectivityType = StrongDoubleAlias<SelectivityTag>; struct CETag { // Cardinality has units: it is measured in documents. static constexpr bool kUnitless = false; + static constexpr double kMaxValue = std::numeric_limits<double>::max(); + static constexpr double kMinValue = 0.0; }; using CEType = StrongDoubleAlias<CETag>; diff --git a/src/mongo/db/query/optimizer/metadata.cpp b/src/mongo/db/query/optimizer/metadata.cpp index 5cabb4826f7..4a4bea82d51 100644 --- a/src/mongo/db/query/optimizer/metadata.cpp +++ b/src/mongo/db/query/optimizer/metadata.cpp @@ -162,20 +162,20 @@ ScanDefinition::ScanDefinition() {} /*nonMultiKeyPathSet*/, {DistributionType::Centralized}, true /*exists*/, - {-1.0} /*ce*/) {} + boost::none /*ce*/) {} ScanDefinition::ScanDefinition(ScanDefOptions options, opt::unordered_map<std::string, IndexDefinition> indexDefs, MultikeynessTrie multikeynessTrie, DistributionAndPaths distributionAndPaths, const bool exists, - const CEType ce) + boost::optional<CEType> ce) : _options(std::move(options)), _distributionAndPaths(std::move(distributionAndPaths)), _indexDefs(std::move(indexDefs)), _multikeynessTrie(std::move(multikeynessTrie)), _exists(exists), - _ce(ce) {} + _ce(std::move(ce)) {} const ScanDefOptions& ScanDefinition::getOptionsMap() const { return _options; @@ -201,7 +201,7 @@ bool ScanDefinition::exists() const { return _exists; } -CEType ScanDefinition::getCE() const { +const boost::optional<CEType>& ScanDefinition::getCE() const { return _ce; } diff --git a/src/mongo/db/query/optimizer/metadata.h b/src/mongo/db/query/optimizer/metadata.h index 7db0c0d2f34..c698ca83352 100644 --- a/src/mongo/db/query/optimizer/metadata.h +++ b/src/mongo/db/query/optimizer/metadata.h @@ -172,7 +172,7 @@ public: MultikeynessTrie multikeynessTrie, DistributionAndPaths distributionAndPaths, bool exists, - CEType ce); + boost::optional<CEType> ce); const ScanDefOptions& getOptionsMap() const; @@ -185,7 +185,7 @@ public: bool exists() const; - CEType getCE() const; + const boost::optional<CEType>& getCE() const; private: ScanDefOptions _options; @@ -204,7 +204,7 @@ private: bool _exists; // If positive, estimated number of docs in the collection. - CEType _ce; + boost::optional<CEType> _ce; }; struct Metadata { diff --git a/src/mongo/db/query/optimizer/metadata_factory.cpp b/src/mongo/db/query/optimizer/metadata_factory.cpp index ca9f498ba79..50bda02eb89 100644 --- a/src/mongo/db/query/optimizer/metadata_factory.cpp +++ b/src/mongo/db/query/optimizer/metadata_factory.cpp @@ -70,7 +70,7 @@ ScanDefinition createScanDef(ScanDefOptions options, const ConstFoldFn& constFold, DistributionAndPaths distributionAndPaths, const bool exists, - const CEType ce) { + boost::optional<CEType> ce) { MultikeynessTrie multikeynessTrie = createTrie(indexDefs); @@ -80,7 +80,7 @@ ScanDefinition createScanDef(ScanDefOptions options, constFold, std::move(distributionAndPaths), exists, - ce); + std::move(ce)); } ScanDefinition createScanDef(ScanDefOptions options, @@ -89,7 +89,7 @@ ScanDefinition createScanDef(ScanDefOptions options, const ConstFoldFn& constFold, DistributionAndPaths distributionAndPaths, const bool exists, - const CEType ce) { + boost::optional<CEType> ce) { // Simplify partial filter requirements using the non-multikey paths. for (auto& [indexDefName, indexDef] : indexDefs) { @@ -112,7 +112,7 @@ ScanDefinition createScanDef(ScanDefOptions options, std::move(multikeynessTrie), std::move(distributionAndPaths), exists, - ce}; + std::move(ce)}; } } // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/metadata_factory.h b/src/mongo/db/query/optimizer/metadata_factory.h index dea5b676c3d..c9e8e3c4885 100644 --- a/src/mongo/db/query/optimizer/metadata_factory.h +++ b/src/mongo/db/query/optimizer/metadata_factory.h @@ -43,7 +43,7 @@ ScanDefinition createScanDef(ScanDefOptions options, const ConstFoldFn& constFold, DistributionAndPaths distributionAndPaths, bool exists = true, - CEType ce = CEType{-1.0}); + boost::optional<CEType> ce = boost::none); ScanDefinition createScanDef(ScanDefOptions options, IndexDefinitions indexDefs, @@ -51,6 +51,6 @@ ScanDefinition createScanDef(ScanDefOptions options, const ConstFoldFn& constFold, DistributionAndPaths distributionAndPaths, bool exists = true, - CEType ce = CEType{-1.0}); + boost::optional<CEType> ce = boost::none); } // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/utils/strong_alias.h b/src/mongo/db/query/optimizer/utils/strong_alias.h index a7413754c88..b083d64556f 100644 --- a/src/mongo/db/query/optimizer/utils/strong_alias.h +++ b/src/mongo/db/query/optimizer/utils/strong_alias.h @@ -29,8 +29,9 @@ #pragma once -#include "mongo/util/assert_util.h" +#include "mongo/util/assert_util_core.h" #include "mongo/util/str.h" +#include <limits> namespace mongo::optimizer { @@ -112,14 +113,11 @@ StreamType& operator<<(StreamType& stream, const StrongStringAlias<TagType>& t) return stream << t.value(); } - /** * Strong double alias. Used for cardinality estimation and selectivity. The tag type is expected to * have a boolean field "kUnitless". It specifies if this entity is unitless (e.g. a simple ratio, a * percent) vs having units (e.g. documents). This effectively enables or disables multiplication * and division by the same alias type. - * - * TODO: SERVER-71801: Validation for strong double alias. */ template <class TagType> struct StrongDoubleAlias { @@ -133,6 +131,17 @@ struct StrongDoubleAlias { return _value; } + constexpr void assertValid() const { + uassert(7180104, "Invalid value", _value >= TagType::kMinValue); + uassert(7180105, "Invalid value", _value <= TagType::kMaxValue); + } + + constexpr StrongDoubleAlias(const double value) : _value(value) { + assertValid(); + } + + constexpr StrongDoubleAlias() = default; + constexpr bool operator==(const StrongDoubleAlias other) const { return _value == other._value; } diff --git a/src/mongo/db/query/optimizer/utils/unit_test_pipeline_utils.cpp b/src/mongo/db/query/optimizer/utils/unit_test_pipeline_utils.cpp index 8f0c9adfff8..f30f7bafa25 100644 --- a/src/mongo/db/query/optimizer/utils/unit_test_pipeline_utils.cpp +++ b/src/mongo/db/query/optimizer/utils/unit_test_pipeline_utils.cpp @@ -186,7 +186,12 @@ void serializeMetadata(std::ostream& stream, Metadata metadata) { } stream << "\t\t\tcollection exists: " << scanDef.exists() << std::endl; - stream << "\t\t\tCE type: " << scanDef.getCE() << std::endl; + stream << "\t\t\tCE type: "; + if (const auto& ce = scanDef.getCE()) { + stream << *ce << std::endl; + } else { + stream << "(empty)" << std::endl; + } } } |