summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/ce/heuristic_estimator.cpp7
-rw-r--r--src/mongo/db/query/ce/heuristic_predicate_estimation.cpp30
-rw-r--r--src/mongo/db/query/ce/heuristic_predicate_estimation.h4
-rw-r--r--src/mongo/db/query/ce/test_utils.cpp10
-rw-r--r--src/mongo/db/query/ce/test_utils.h3
-rw-r--r--src/mongo/db/query/cqf_get_executor.cpp12
-rw-r--r--src/mongo/db/query/optimizer/defs.h5
-rw-r--r--src/mongo/db/query/optimizer/metadata.cpp8
-rw-r--r--src/mongo/db/query/optimizer/metadata.h6
-rw-r--r--src/mongo/db/query/optimizer/metadata_factory.cpp8
-rw-r--r--src/mongo/db/query/optimizer/metadata_factory.h4
-rw-r--r--src/mongo/db/query/optimizer/utils/strong_alias.h17
-rw-r--r--src/mongo/db/query/optimizer/utils/unit_test_pipeline_utils.cpp7
13 files changed, 66 insertions, 55 deletions
diff --git a/src/mongo/db/query/ce/heuristic_estimator.cpp b/src/mongo/db/query/ce/heuristic_estimator.cpp
index 8b35eb1b331..bbdd6d76717 100644
--- a/src/mongo/db/query/ce/heuristic_estimator.cpp
+++ b/src/mongo/db/query/ce/heuristic_estimator.cpp
@@ -165,7 +165,7 @@ private:
SelectivityType disjunctionSel(const SelectivityType left, const SelectivityType right) {
// We sum the selectivities and subtract the overlapping part so that it's only counted
// once.
- return left + right - left * right;
+ return negateSel(negateSel(left) * negateSel(right));
}
};
@@ -173,8 +173,9 @@ class HeuristicTransport {
public:
CEType transport(const ScanNode& node, CEType /*bindResult*/) {
// Default cardinality estimate.
- const CEType metadataCE = _metadata._scanDefs.at(node.getScanDefName()).getCE();
- return (metadataCE < 0.0) ? kDefaultCard : metadataCE;
+ const boost::optional<CEType>& metadataCE =
+ _metadata._scanDefs.at(node.getScanDefName()).getCE();
+ return metadataCE.get_value_or(kDefaultCard);
}
CEType transport(const ValueScanNode& node, CEType /*bindResult*/) {
diff --git a/src/mongo/db/query/ce/heuristic_predicate_estimation.cpp b/src/mongo/db/query/ce/heuristic_predicate_estimation.cpp
index b3a8b755244..d0a799ea3da 100644
--- a/src/mongo/db/query/ce/heuristic_predicate_estimation.cpp
+++ b/src/mongo/db/query/ce/heuristic_predicate_estimation.cpp
@@ -46,35 +46,28 @@ SelectivityType heuristicEqualitySel(const CEType inputCard) {
}
SelectivityType heuristicClosedRangeSel(const CEType inputCard) {
- SelectivityType sel = kInvalidSel;
if (inputCard < kSmallLimit) {
- sel = kSmallCardClosedRangeSel;
+ return kSmallCardClosedRangeSel;
} else if (inputCard < kMediumLimit) {
- sel = kMediumCardClosedRangeSel;
- } else {
- sel = kLargeCardClosedRangeSel;
+ return kMediumCardClosedRangeSel;
}
- return sel;
+ return kLargeCardClosedRangeSel;
}
SelectivityType heuristicOpenRangeSel(const CEType inputCard) {
- SelectivityType sel = kInvalidSel;
if (inputCard < kSmallLimit) {
- sel = kSmallCardOpenRangeSel;
+ return kSmallCardOpenRangeSel;
} else if (inputCard < kMediumLimit) {
- sel = kMediumCardOpenRangeSel;
- } else {
- sel = kLargeCardOpenRangeSel;
+ return kMediumCardOpenRangeSel;
}
- return sel;
+ return kLargeCardOpenRangeSel;
}
SelectivityType heuristicIntervalSel(const IntervalRequirement& interval, const CEType inputCard) {
- SelectivityType sel = kInvalidSel;
if (interval.isFullyOpen()) {
- sel = {1.0};
+ return 1.0;
} else if (interval.isEquality()) {
- sel = heuristicEqualitySel(inputCard);
+ return heuristicEqualitySel(inputCard);
} else if (interval.getHighBound().isPlusInf() || interval.getLowBound().isMinusInf() ||
getBoundReqTypeTag(interval.getLowBound()) !=
getBoundReqTypeTag(interval.getHighBound())) {
@@ -84,12 +77,9 @@ SelectivityType heuristicIntervalSel(const IntervalRequirement& interval, const
// one of the bounds is the lowest/highest value of the previous/next type.
// TODO: Notice that sometimes type bracketing uses a min/max value from the same type,
// so sometimes we may not detect an open-ended interval.
- sel = heuristicOpenRangeSel(inputCard);
- } else {
- sel = heuristicClosedRangeSel(inputCard);
+ return heuristicOpenRangeSel(inputCard);
}
- uassert(6716603, "Invalid selectivity.", validSelectivity(sel));
- return sel;
+ return heuristicClosedRangeSel(inputCard);
}
CEType heuristicIntervalCard(const IntervalRequirement& interval, const CEType inputCard) {
diff --git a/src/mongo/db/query/ce/heuristic_predicate_estimation.h b/src/mongo/db/query/ce/heuristic_predicate_estimation.h
index 46b09c7f0b9..07fc4fd05cb 100644
--- a/src/mongo/db/query/ce/heuristic_predicate_estimation.h
+++ b/src/mongo/db/query/ce/heuristic_predicate_estimation.h
@@ -31,10 +31,6 @@
#include "mongo/db/query/optimizer/utils/memo_utils.h"
namespace mongo::optimizer::ce {
-// Invalid estimate - an arbitrary negative value used for initialization.
-constexpr SelectivityType kInvalidSel{-1.0};
-constexpr CEType kInvalidEstimate{-1.0};
-
constexpr SelectivityType kDefaultFilterSel{0.1};
constexpr SelectivityType kDefaultExistsSel{0.70};
diff --git a/src/mongo/db/query/ce/test_utils.cpp b/src/mongo/db/query/ce/test_utils.cpp
index cd1e9939a54..92864e2d00b 100644
--- a/src/mongo/db/query/ce/test_utils.cpp
+++ b/src/mongo/db/query/ce/test_utils.cpp
@@ -111,7 +111,7 @@ CEType CETester::getCE(ABT& abt, std::function<bool(const ABT&)> nodePredicate)
return card;
}
- CEType outCard = kInvalidCardinality;
+ boost::optional<CEType> outCard;
for (size_t groupId = 0; groupId < memo.getGroupCount(); groupId++) {
// We only want to return the cardinality for the memo group matching the 'nodePredicate'.
if (const auto& node = memo.getLogicalNodes(groupId).front(); nodePredicate(node)) {
@@ -121,13 +121,13 @@ CEType CETester::getCE(ABT& abt, std::function<bool(const ABT&)> nodePredicate)
}
}
- ASSERT_NOT_EQUALS(outCard, kInvalidCardinality);
+ ASSERT_TRUE(outCard.has_value());
if constexpr (kCETestLogOnly) {
- std::cout << "CE: " << outCard << std::endl;
+ std::cout << "CE: " << *outCard << std::endl;
}
- return outCard;
+ return *outCard;
}
void CETester::optimize(OptPhaseManager& phaseManager, ABT& abt) const {
@@ -152,7 +152,7 @@ void CETester::setIndexes(opt::unordered_map<std::string, IndexDefinition> index
}
void CETester::addCollection(std::string collName,
- CEType numRecords,
+ boost::optional<CEType> numRecords,
opt::unordered_map<std::string, IndexDefinition> indexes) {
_metadata._scanDefs.insert_or_assign(collName,
createScanDef({},
diff --git a/src/mongo/db/query/ce/test_utils.h b/src/mongo/db/query/ce/test_utils.h
index 3a9c7f9c78e..c66d03bc483 100644
--- a/src/mongo/db/query/ce/test_utils.h
+++ b/src/mongo/db/query/ce/test_utils.h
@@ -39,7 +39,6 @@ namespace mongo::optimizer::ce {
constexpr bool kCETestLogOnly = false;
const double kMaxCEError = 0.01;
-const CEType kInvalidCardinality{-1.0};
const OptPhaseManager::PhaseSet kDefaultCETestPhaseSet{OptPhase::MemoSubstitutionPhase,
OptPhase::MemoExplorationPhase,
@@ -182,7 +181,7 @@ public:
* Adds a ScanDefinition for an additional collection for the test.
*/
void addCollection(std::string collName,
- CEType numRecords,
+ boost::optional<CEType> numRecords,
opt::unordered_map<std::string, IndexDefinition> indexes = {});
/**
diff --git a/src/mongo/db/query/cqf_get_executor.cpp b/src/mongo/db/query/cqf_get_executor.cpp
index d7cee2b4c6b..b811e1e4068 100644
--- a/src/mongo/db/query/cqf_get_executor.cpp
+++ b/src/mongo/db/query/cqf_get_executor.cpp
@@ -412,7 +412,10 @@ static void populateAdditionalScanDefs(
? DistributionType::Centralized
: DistributionType::UnknownPartitioning};
- const CEType collectionCE{collectionExists ? collection->numRecords(opCtx) : -1.0};
+ boost::optional<CEType> collectionCE;
+ if (collectionExists) {
+ collectionCE = collection->numRecords(opCtx);
+ }
scanDefs.emplace(scanDefName,
createScanDef({{"type", "mongod"},
{"database", involvedNss.db().toString()},
@@ -540,7 +543,10 @@ Metadata populateMetadata(boost::intrusive_ptr<ExpressionContext> expCtx,
: DistributionType::UnknownPartitioning};
opt::unordered_map<std::string, ScanDefinition> scanDefs;
- const int64_t numRecords = collectionExists ? collection->numRecords(opCtx) : -1;
+ boost::optional<CEType> numRecords;
+ if (collectionExists) {
+ numRecords = static_cast<double>(collection->numRecords(opCtx));
+ }
scanDefs.emplace(scanDefName,
createScanDef({{"type", "mongod"},
{"database", nss.db().toString()},
@@ -551,7 +557,7 @@ Metadata populateMetadata(boost::intrusive_ptr<ExpressionContext> expCtx,
constFold,
std::move(distribution),
collectionExists,
- {static_cast<double>(numRecords)}));
+ numRecords));
// Add a scan definition for all involved collections. Note that the base namespace has already
// been accounted for above and isn't included here.
diff --git a/src/mongo/db/query/optimizer/defs.h b/src/mongo/db/query/optimizer/defs.h
index 061dab2ecbf..18570d22c6c 100644
--- a/src/mongo/db/query/optimizer/defs.h
+++ b/src/mongo/db/query/optimizer/defs.h
@@ -30,6 +30,7 @@
#pragma once
#include <boost/optional.hpp>
+#include <limits>
#include <set>
#include <sstream>
#include <string>
@@ -193,12 +194,16 @@ private:
struct SelectivityTag {
// Selectivity does not have units, it is a simple ratio.
static constexpr bool kUnitless = true;
+ static constexpr double kMaxValue = 1.0;
+ static constexpr double kMinValue = 0.0;
};
using SelectivityType = StrongDoubleAlias<SelectivityTag>;
struct CETag {
// Cardinality has units: it is measured in documents.
static constexpr bool kUnitless = false;
+ static constexpr double kMaxValue = std::numeric_limits<double>::max();
+ static constexpr double kMinValue = 0.0;
};
using CEType = StrongDoubleAlias<CETag>;
diff --git a/src/mongo/db/query/optimizer/metadata.cpp b/src/mongo/db/query/optimizer/metadata.cpp
index 5cabb4826f7..4a4bea82d51 100644
--- a/src/mongo/db/query/optimizer/metadata.cpp
+++ b/src/mongo/db/query/optimizer/metadata.cpp
@@ -162,20 +162,20 @@ ScanDefinition::ScanDefinition()
{} /*nonMultiKeyPathSet*/,
{DistributionType::Centralized},
true /*exists*/,
- {-1.0} /*ce*/) {}
+ boost::none /*ce*/) {}
ScanDefinition::ScanDefinition(ScanDefOptions options,
opt::unordered_map<std::string, IndexDefinition> indexDefs,
MultikeynessTrie multikeynessTrie,
DistributionAndPaths distributionAndPaths,
const bool exists,
- const CEType ce)
+ boost::optional<CEType> ce)
: _options(std::move(options)),
_distributionAndPaths(std::move(distributionAndPaths)),
_indexDefs(std::move(indexDefs)),
_multikeynessTrie(std::move(multikeynessTrie)),
_exists(exists),
- _ce(ce) {}
+ _ce(std::move(ce)) {}
const ScanDefOptions& ScanDefinition::getOptionsMap() const {
return _options;
@@ -201,7 +201,7 @@ bool ScanDefinition::exists() const {
return _exists;
}
-CEType ScanDefinition::getCE() const {
+const boost::optional<CEType>& ScanDefinition::getCE() const {
return _ce;
}
diff --git a/src/mongo/db/query/optimizer/metadata.h b/src/mongo/db/query/optimizer/metadata.h
index 7db0c0d2f34..c698ca83352 100644
--- a/src/mongo/db/query/optimizer/metadata.h
+++ b/src/mongo/db/query/optimizer/metadata.h
@@ -172,7 +172,7 @@ public:
MultikeynessTrie multikeynessTrie,
DistributionAndPaths distributionAndPaths,
bool exists,
- CEType ce);
+ boost::optional<CEType> ce);
const ScanDefOptions& getOptionsMap() const;
@@ -185,7 +185,7 @@ public:
bool exists() const;
- CEType getCE() const;
+ const boost::optional<CEType>& getCE() const;
private:
ScanDefOptions _options;
@@ -204,7 +204,7 @@ private:
bool _exists;
// If positive, estimated number of docs in the collection.
- CEType _ce;
+ boost::optional<CEType> _ce;
};
struct Metadata {
diff --git a/src/mongo/db/query/optimizer/metadata_factory.cpp b/src/mongo/db/query/optimizer/metadata_factory.cpp
index ca9f498ba79..50bda02eb89 100644
--- a/src/mongo/db/query/optimizer/metadata_factory.cpp
+++ b/src/mongo/db/query/optimizer/metadata_factory.cpp
@@ -70,7 +70,7 @@ ScanDefinition createScanDef(ScanDefOptions options,
const ConstFoldFn& constFold,
DistributionAndPaths distributionAndPaths,
const bool exists,
- const CEType ce) {
+ boost::optional<CEType> ce) {
MultikeynessTrie multikeynessTrie = createTrie(indexDefs);
@@ -80,7 +80,7 @@ ScanDefinition createScanDef(ScanDefOptions options,
constFold,
std::move(distributionAndPaths),
exists,
- ce);
+ std::move(ce));
}
ScanDefinition createScanDef(ScanDefOptions options,
@@ -89,7 +89,7 @@ ScanDefinition createScanDef(ScanDefOptions options,
const ConstFoldFn& constFold,
DistributionAndPaths distributionAndPaths,
const bool exists,
- const CEType ce) {
+ boost::optional<CEType> ce) {
// Simplify partial filter requirements using the non-multikey paths.
for (auto& [indexDefName, indexDef] : indexDefs) {
@@ -112,7 +112,7 @@ ScanDefinition createScanDef(ScanDefOptions options,
std::move(multikeynessTrie),
std::move(distributionAndPaths),
exists,
- ce};
+ std::move(ce)};
}
} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/metadata_factory.h b/src/mongo/db/query/optimizer/metadata_factory.h
index dea5b676c3d..c9e8e3c4885 100644
--- a/src/mongo/db/query/optimizer/metadata_factory.h
+++ b/src/mongo/db/query/optimizer/metadata_factory.h
@@ -43,7 +43,7 @@ ScanDefinition createScanDef(ScanDefOptions options,
const ConstFoldFn& constFold,
DistributionAndPaths distributionAndPaths,
bool exists = true,
- CEType ce = CEType{-1.0});
+ boost::optional<CEType> ce = boost::none);
ScanDefinition createScanDef(ScanDefOptions options,
IndexDefinitions indexDefs,
@@ -51,6 +51,6 @@ ScanDefinition createScanDef(ScanDefOptions options,
const ConstFoldFn& constFold,
DistributionAndPaths distributionAndPaths,
bool exists = true,
- CEType ce = CEType{-1.0});
+ boost::optional<CEType> ce = boost::none);
} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/utils/strong_alias.h b/src/mongo/db/query/optimizer/utils/strong_alias.h
index a7413754c88..b083d64556f 100644
--- a/src/mongo/db/query/optimizer/utils/strong_alias.h
+++ b/src/mongo/db/query/optimizer/utils/strong_alias.h
@@ -29,8 +29,9 @@
#pragma once
-#include "mongo/util/assert_util.h"
+#include "mongo/util/assert_util_core.h"
#include "mongo/util/str.h"
+#include <limits>
namespace mongo::optimizer {
@@ -112,14 +113,11 @@ StreamType& operator<<(StreamType& stream, const StrongStringAlias<TagType>& t)
return stream << t.value();
}
-
/**
* Strong double alias. Used for cardinality estimation and selectivity. The tag type is expected to
* have a boolean field "kUnitless". It specifies if this entity is unitless (e.g. a simple ratio, a
* percent) vs having units (e.g. documents). This effectively enables or disables multiplication
* and division by the same alias type.
- *
- * TODO: SERVER-71801: Validation for strong double alias.
*/
template <class TagType>
struct StrongDoubleAlias {
@@ -133,6 +131,17 @@ struct StrongDoubleAlias {
return _value;
}
+ constexpr void assertValid() const {
+ uassert(7180104, "Invalid value", _value >= TagType::kMinValue);
+ uassert(7180105, "Invalid value", _value <= TagType::kMaxValue);
+ }
+
+ constexpr StrongDoubleAlias(const double value) : _value(value) {
+ assertValid();
+ }
+
+ constexpr StrongDoubleAlias() = default;
+
constexpr bool operator==(const StrongDoubleAlias other) const {
return _value == other._value;
}
diff --git a/src/mongo/db/query/optimizer/utils/unit_test_pipeline_utils.cpp b/src/mongo/db/query/optimizer/utils/unit_test_pipeline_utils.cpp
index 8f0c9adfff8..f30f7bafa25 100644
--- a/src/mongo/db/query/optimizer/utils/unit_test_pipeline_utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/unit_test_pipeline_utils.cpp
@@ -186,7 +186,12 @@ void serializeMetadata(std::ostream& stream, Metadata metadata) {
}
stream << "\t\t\tcollection exists: " << scanDef.exists() << std::endl;
- stream << "\t\t\tCE type: " << scanDef.getCE() << std::endl;
+ stream << "\t\t\tCE type: ";
+ if (const auto& ce = scanDef.getCE()) {
+ stream << *ce << std::endl;
+ } else {
+ stream << "(empty)" << std::endl;
+ }
}
}