summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/commands/cqf/cqf_aggregate.cpp41
-rw-r--r--src/mongo/db/query/SConscript2
-rw-r--r--src/mongo/db/query/ce/SConscript32
-rw-r--r--src/mongo/db/query/ce/array_histogram.cpp123
-rw-r--r--src/mongo/db/query/ce/array_histogram.h84
-rw-r--r--src/mongo/db/query/ce/ce_estimation.cpp168
-rw-r--r--src/mongo/db/query/ce/ce_estimation.h48
-rw-r--r--src/mongo/db/query/ce/ce_histogram.cpp160
-rw-r--r--src/mongo/db/query/ce/ce_histogram.h52
-rw-r--r--src/mongo/db/query/ce/ce_histogram_test.cpp366
-rw-r--r--src/mongo/db/query/ce/ce_math.cpp62
-rw-r--r--src/mongo/db/query/ce/ce_math.h52
-rw-r--r--src/mongo/db/query/ce/ce_test_utils.cpp76
-rw-r--r--src/mongo/db/query/ce/ce_test_utils.h83
-rw-r--r--src/mongo/db/query/ce/collection_statistics.cpp63
-rw-r--r--src/mongo/db/query/ce/collection_statistics.h76
-rw-r--r--src/mongo/db/query/ce/histogram.cpp220
-rw-r--r--src/mongo/db/query/ce/histogram.h125
-rw-r--r--src/mongo/db/query/ce/utils.cpp140
-rw-r--r--src/mongo/db/query/ce/utils.h79
-rw-r--r--src/mongo/db/query/ce_mode_parameter.cpp40
-rw-r--r--src/mongo/db/query/ce_mode_parameter.h47
-rw-r--r--src/mongo/db/query/ce_mode_parameter_test.cpp47
-rw-r--r--src/mongo/db/query/query_knobs.idl16
24 files changed, 2184 insertions, 18 deletions
diff --git a/src/mongo/db/commands/cqf/cqf_aggregate.cpp b/src/mongo/db/commands/cqf/cqf_aggregate.cpp
index 827ae2fe066..c51302d28a8 100644
--- a/src/mongo/db/commands/cqf/cqf_aggregate.cpp
+++ b/src/mongo/db/commands/cqf/cqf_aggregate.cpp
@@ -33,7 +33,10 @@
#include "mongo/db/exec/sbe/abt/abt_lower.h"
#include "mongo/db/pipeline/abt/document_source_visitor.h"
#include "mongo/db/pipeline/abt/match_expression_visitor.h"
+#include "mongo/db/query/ce/ce_histogram.h"
#include "mongo/db/query/ce/ce_sampling.h"
+#include "mongo/db/query/ce/collection_statistics.h"
+#include "mongo/db/query/ce_mode_parameter.h"
#include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
#include "mongo/db/query/optimizer/cascades/cost_derivation.h"
#include "mongo/db/query/optimizer/explain.h"
@@ -457,8 +460,8 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> getSBEExecutorViaCascadesOp
OPTIMIZER_DEBUG_LOG(
6264803, 5, "Translated ABT", "explain"_attr = ExplainGenerator::explainV2(abtTree));
- if (collectionExists && numRecords > 0 &&
- internalQueryEnableSamplingCardinalityEstimator.load()) {
+ if (internalQueryCardinalityEstimatorMode == ce::kSampling && collectionExists &&
+ numRecords > 0) {
Metadata metadataForSampling = metadata;
// Do not use indexes for sampling.
for (auto& entry : metadataForSampling._scanDefs) {
@@ -486,17 +489,33 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> getSBEExecutorViaCascadesOp
return optimizeAndCreateExecutor(
phaseManager, std::move(abtTree), opCtx, expCtx, nss, collection);
- }
- // Use heuristics.
- OptPhaseManager phaseManager{OptPhaseManager::getAllRewritesSet(),
- prefixId,
- std::move(metadata),
- DebugInfo::kDefaultForProd};
- phaseManager.getHints() = queryHints;
+ } else if (internalQueryCardinalityEstimatorMode == ce::kHistogram &&
+ ce::CollectionStatistics::hasCollectionStatistics(nss)) {
+ const auto& stats = ce::CollectionStatistics::getCollectionStatistics(nss);
+ auto ceDerivation = std::make_unique<CEHistogramTransport>(opCtx, stats);
+ OptPhaseManager phaseManager{OptPhaseManager::getAllRewritesSet(),
+ prefixId,
+ false /*requireRID*/,
+ std::move(metadata),
+ std::move(ceDerivation),
+ std::make_unique<DefaultCosting>(),
+ DebugInfo::kDefaultForProd};
- return optimizeAndCreateExecutor(
- phaseManager, std::move(abtTree), opCtx, expCtx, nss, collection);
+ return optimizeAndCreateExecutor(
+ phaseManager, std::move(abtTree), opCtx, expCtx, nss, collection);
+
+ } else {
+ // Default to using heuristics.
+ OptPhaseManager phaseManager{OptPhaseManager::getAllRewritesSet(),
+ prefixId,
+ std::move(metadata),
+ DebugInfo::kDefaultForProd};
+ phaseManager.getHints() = queryHints;
+
+ return optimizeAndCreateExecutor(
+ phaseManager, std::move(abtTree), opCtx, expCtx, nss, collection);
+ }
}
} // namespace mongo
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index 2f3a46adb83..d72ed205996 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -259,6 +259,7 @@ env.Library(
env.Library(
target="query_knobs",
source=[
+ 'ce_mode_parameter.cpp',
'plan_cache_size_parameter.cpp',
'query_feature_flags.idl',
'query_knobs.idl',
@@ -353,6 +354,7 @@ env.CppUnitTest(
"canonical_query_encoder_test.cpp",
"canonical_query_test.cpp",
"canonical_query_test_util.cpp",
+ "ce_mode_parameter_test.cpp",
"classic_stage_builder_test.cpp",
"count_command_test.cpp",
"cursor_response_test.cpp",
diff --git a/src/mongo/db/query/ce/SConscript b/src/mongo/db/query/ce/SConscript
index c570a88b23f..4941d74512d 100644
--- a/src/mongo/db/query/ce/SConscript
+++ b/src/mongo/db/query/ce/SConscript
@@ -7,10 +7,42 @@ env = env.Clone()
env.Library(
target="query_ce",
source=[
+ 'array_histogram.cpp',
+ 'ce_histogram.cpp',
+ 'ce_math.cpp',
'ce_sampling.cpp',
+ 'ce_estimation.cpp',
+ 'collection_statistics.cpp',
+ 'histogram.cpp',
+ 'utils.cpp',
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/exec/sbe/query_sbe_abt',
'$BUILD_DIR/mongo/db/query/optimizer/optimizer',
],
)
+
+env.Library(
+ target="ce_test_utils",
+ source=[
+ 'ce_test_utils.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/exec/sbe/query_sbe_values',
+ '$BUILD_DIR/mongo/db/exec/sbe/sbe_abt_test_util',
+ '$BUILD_DIR/mongo/db/query/optimizer/unit_test_utils',
+ "$BUILD_DIR/mongo/unittest/unittest",
+ 'query_ce',
+ ],
+)
+
+env.CppUnitTest(
+ target="ce_histogram_test",
+ source=[
+ "ce_histogram_test.cpp",
+ ],
+ LIBDEPS=[
+ 'ce_test_utils',
+ ],
+)
diff --git a/src/mongo/db/query/ce/array_histogram.cpp b/src/mongo/db/query/ce/array_histogram.cpp
new file mode 100644
index 00000000000..32da0754b7e
--- /dev/null
+++ b/src/mongo/db/query/ce/array_histogram.cpp
@@ -0,0 +1,123 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/ce/array_histogram.h"
+
+namespace mongo::ce {
+using namespace sbe;
+
+ArrayHistogram::ArrayHistogram() : ArrayHistogram(Histogram(), {}) {}
+
+ArrayHistogram::ArrayHistogram(Histogram scalar,
+ std::map<value::TypeTags, size_t> typeCounts,
+ Histogram arrayUnique,
+ Histogram arrayMin,
+ Histogram arrayMax,
+ std::map<value::TypeTags, size_t> arrayTypeCounts)
+ : _scalar(std::move(scalar)),
+ _typeCounts(std::move(typeCounts)),
+ _arrayUnique(std::move(arrayUnique)),
+ _arrayMin(std::move(arrayMin)),
+ _arrayMax(std::move(arrayMax)),
+ _arrayTypeCounts(std::move(arrayTypeCounts)) {
+ invariant(isArray());
+}
+
+ArrayHistogram::ArrayHistogram(Histogram scalar, std::map<value::TypeTags, size_t> typeCounts)
+ : _scalar(std::move(scalar)),
+ _typeCounts(std::move(typeCounts)),
+ _arrayUnique(boost::none),
+ _arrayMin(boost::none),
+ _arrayMax(boost::none),
+ _arrayTypeCounts(boost::none) {
+ invariant(!isArray());
+}
+
+bool ArrayHistogram::isArray() const {
+ return _arrayUnique && _arrayMin && _arrayMax && _arrayTypeCounts;
+}
+
+std::string typeCountsToString(const std::map<value::TypeTags, size_t>& typeCounts) {
+ std::ostringstream os;
+ os << "{";
+ bool first = true;
+ for (auto [tag, count] : typeCounts) {
+ if (!first)
+ os << ", ";
+ os << tag << ": " << count;
+ first = false;
+ }
+ os << "}";
+ return os.str();
+}
+
+std::string ArrayHistogram::toString() const {
+ std::ostringstream os;
+ os << "{\n";
+ os << " scalar: " << _scalar.toString();
+ os << ",\n typeCounts: " << typeCountsToString(_typeCounts);
+ if (isArray()) {
+ os << ",\n arrayUnique: " << _arrayUnique->toString();
+ os << ",\n arrayMin: " << _arrayMin->toString();
+ os << ",\n arrayMax: " << _arrayMax->toString();
+ os << ",\n arrayTypeCounts: " << typeCountsToString(*_arrayTypeCounts);
+ }
+ os << "\n}\n";
+ return os.str();
+}
+
+const Histogram& ArrayHistogram::getScalar() const {
+ return _scalar;
+}
+
+const Histogram& ArrayHistogram::getArrayUnique() const {
+ invariant(isArray());
+ return *_arrayUnique;
+}
+
+const Histogram& ArrayHistogram::getArrayMin() const {
+ invariant(isArray());
+ return *_arrayMin;
+}
+
+const Histogram& ArrayHistogram::getArrayMax() const {
+ invariant(isArray());
+ return *_arrayMax;
+}
+
+const std::map<value::TypeTags, size_t>& ArrayHistogram::getTypeCounts() const {
+ return _typeCounts;
+}
+
+const std::map<value::TypeTags, size_t>& ArrayHistogram::getArrayTypeCounts() const {
+ invariant(isArray());
+ return *_arrayTypeCounts;
+}
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/array_histogram.h b/src/mongo/db/query/ce/array_histogram.h
new file mode 100644
index 00000000000..dce62420729
--- /dev/null
+++ b/src/mongo/db/query/ce/array_histogram.h
@@ -0,0 +1,84 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/ce/histogram.h"
+
+namespace mongo::ce {
+
+class ArrayHistogram {
+public:
+ // Constructs an empty scalar histogram.
+ ArrayHistogram();
+
+ // Constructor for scalar field histograms.
+ ArrayHistogram(Histogram scalar, std::map<value::TypeTags, size_t> typeCounts);
+
+ // Constructor for array field histograms. We have to initialize all array fields in this case.
+ ArrayHistogram(Histogram scalar,
+ std::map<value::TypeTags, size_t> typeCounts,
+ Histogram arrayUnique,
+ Histogram arrayMin,
+ Histogram arrayMax,
+ std::map<value::TypeTags, size_t> arrayTypeCounts);
+
+ ArrayHistogram(const ArrayHistogram&) = delete;
+
+ // Returns whether or not this histogram includes array data points.
+ bool isArray() const;
+
+ std::string toString() const;
+ const Histogram& getScalar() const;
+ const Histogram& getArrayUnique() const;
+ const Histogram& getArrayMin() const;
+ const Histogram& getArrayMax() const;
+ const std::map<value::TypeTags, size_t>& getTypeCounts() const;
+ const std::map<value::TypeTags, size_t>& getArrayTypeCounts() const;
+
+private:
+ /* Histogram fields for all paths. */
+
+ // Contains values which appeared originally as scalars on the path.
+ Histogram _scalar;
+ std::map<value::TypeTags, size_t> _typeCounts;
+
+ /* Histogram fields for array paths (only initialized if arrays are present). */
+
+ // Contains unique scalar values originating from arrays.
+ boost::optional<Histogram> _arrayUnique;
+ // Contains minimum values originating from arrays **per class**.
+ boost::optional<Histogram> _arrayMin;
+ // Contains maximum values originating from arrays **per class**.
+ boost::optional<Histogram> _arrayMax;
+ boost::optional<std::map<value::TypeTags, size_t>> _arrayTypeCounts;
+};
+
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_estimation.cpp b/src/mongo/db/query/ce/ce_estimation.cpp
new file mode 100644
index 00000000000..05be80562d3
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_estimation.cpp
@@ -0,0 +1,168 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/ce/ce_estimation.h"
+#include "mongo/db/exec/sbe/abt/abt_lower.h"
+#include "mongo/db/query/optimizer/syntax/expr.h"
+
+namespace mongo::ce {
+using namespace sbe;
+
+/**
+ * Estimates the cardinality of an equality predicate given an ArrayHistogram and an SBE value and
+ * type tag pair.
+ */
+double estimateCardEq(const ArrayHistogram& ah, value::TypeTags tag, value::Value val) {
+ if (tag != value::TypeTags::Null) {
+ double card = ah.getScalar().estimate(tag, val, EstimationType::kEqual)._card;
+ if (ah.isArray()) {
+ return card + ah.getArrayUnique().estimate(tag, val, EstimationType::kEqual)._card;
+ }
+ return card;
+ } else {
+ // Predicate: {field: null}
+ // Count the values that are either null or that do not contain the field.
+ // TODO:
+ // This prototype doesn't have the concept of missing values. It can be added easily
+ // by adding a cardinality estimate that is >= the number of values.
+ // Estimation of $exists can be built on top of this estimate:
+ // {$exists: true} matches the documents that contain the field, including those where the
+ // field value is null.
+ // {$exists: false} matches only the documents that do not contain the field.
+ auto findNull = ah.getTypeCounts().find(value::TypeTags::Null);
+ if (findNull != ah.getTypeCounts().end()) {
+ return findNull->second;
+ }
+ return 0.0;
+ }
+}
+
+static EstimationResult estimateRange(const Histogram& histogram,
+ bool lowInclusive,
+ value::TypeTags tagLow,
+ value::Value valLow,
+ bool highInclusive,
+ value::TypeTags tagHigh,
+ value::Value valHigh) {
+ const EstimationType highType =
+ highInclusive ? EstimationType::kLessOrEqual : EstimationType::kLess;
+ const EstimationResult highEstimate = histogram.estimate(tagHigh, valHigh, highType);
+
+ const EstimationType lowType =
+ lowInclusive ? EstimationType::kLess : EstimationType::kLessOrEqual;
+ const EstimationResult lowEstimate = histogram.estimate(tagLow, valLow, lowType);
+
+ return highEstimate - lowEstimate;
+}
+
+/**
+ * Estimates the cardinality of a range predicate given an ArrayHistogram and a range predicate.
+ * Set 'includeScalar' to true to indicate whether or not the provided range should include no-array
+ * values. The other fields define the range of the estimation.
+ */
+double estimateCardRange(const ArrayHistogram& ah,
+ bool includeScalar,
+ /* Define lower bound. */
+ bool lowInclusive,
+ value::TypeTags tagLow,
+ value::Value valLow,
+ /* Define upper bound. */
+ bool highInclusive,
+ value::TypeTags tagHigh,
+ value::Value valHigh) {
+ uassert(6695701,
+ "Low bound must not be higher than high",
+ compareValues3w(tagLow, valLow, tagHigh, valHigh) <= 0);
+
+ // Helper lambda to shorten code for legibility.
+ auto estRange = [&](const Histogram& h) {
+ return estimateRange(h, lowInclusive, tagLow, valLow, highInclusive, tagHigh, valHigh);
+ };
+
+ double result = 0.0;
+ if (ah.isArray()) {
+ const auto arrayMinEst = estRange(ah.getArrayMin());
+ const auto arrayMaxEst = estRange(ah.getArrayMax());
+ const auto arrayUniqueEst = estRange(ah.getArrayUnique());
+
+ // TODO: should we consider diving by sqrt(ndv) or just by ndv?
+ const double arrayUniqueDensity = (arrayUniqueEst._ndv == 0.0)
+ ? 0.0
+ : (arrayUniqueEst._card / std::sqrt(arrayUniqueEst._ndv));
+
+ result = std::max(std::max(arrayMinEst._card, arrayMaxEst._card), arrayUniqueDensity);
+ }
+
+ if (includeScalar) {
+ const auto scalarEst = estRange(ah.getScalar());
+ result += scalarEst._card;
+ }
+
+ return result;
+}
+
+double estimateIntervalCardinality(const ce::ArrayHistogram& ah,
+ const optimizer::IntervalRequirement& interval,
+ optimizer::CEType childResult) {
+ auto getBound = [](const optimizer::BoundRequirement& boundReq) {
+ return boundReq.getBound().cast<optimizer::Constant>()->get();
+ };
+
+ if (interval.isFullyOpen()) {
+ return childResult;
+ } else if (interval.isEquality()) {
+ ce::SBEValue sbeValue = getBound(interval.getLowBound());
+ auto cardinality = estimateCardEq(ah, sbeValue.getTag(), sbeValue.getValue());
+ return cardinality;
+ }
+
+ // Otherwise, we have a range.
+ ce::SBEValue lowSBEValue(sbe::value::TypeTags::MinKey, 0);
+ auto lowBound = interval.getLowBound();
+ if (!lowBound.isInfinite()) {
+ lowSBEValue = getBound(lowBound);
+ }
+
+ ce::SBEValue highSBEValue(sbe::value::TypeTags::MaxKey, 0);
+ auto highBound = interval.getHighBound();
+ if (!highBound.isInfinite()) {
+ highSBEValue = getBound(highBound);
+ }
+
+ return estimateCardRange(ah,
+ true /*includeScalar*/,
+ lowBound.isInclusive(),
+ lowSBEValue.getTag(),
+ lowSBEValue.getValue(),
+ highBound.isInclusive(),
+ highSBEValue.getTag(),
+ highSBEValue.getValue());
+}
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_estimation.h b/src/mongo/db/query/ce/ce_estimation.h
new file mode 100644
index 00000000000..02fc036bd61
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_estimation.h
@@ -0,0 +1,48 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/ce/array_histogram.h"
+#include "mongo/db/query/optimizer/defs.h"
+#include "mongo/db/query/optimizer/index_bounds.h"
+
+namespace mongo::ce {
+
+double estimateCardEq(const ArrayHistogram& ah, value::TypeTags tag, value::Value val);
+
+/**
+ * Given an array histogram, an interval, and the input cardinality, estimates the cardinality of
+ * the interval.
+ */
+double estimateIntervalCardinality(const ArrayHistogram& estimator,
+ const optimizer::IntervalRequirement& interval,
+ optimizer::CEType inputCardinality);
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_histogram.cpp b/src/mongo/db/query/ce/ce_histogram.cpp
new file mode 100644
index 00000000000..1a04b740333
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_histogram.cpp
@@ -0,0 +1,160 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/exec/sbe/abt/abt_lower.h"
+
+#include "mongo/db/query/ce/ce_estimation.h"
+#include "mongo/db/query/ce/ce_histogram.h"
+#include "mongo/db/query/ce/ce_math.h"
+#include "mongo/db/query/ce/collection_statistics.h"
+
+#include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
+#include "mongo/db/query/optimizer/utils/abt_hash.h"
+#include "mongo/db/query/optimizer/utils/memo_utils.h"
+
+namespace mongo::optimizer::cascades {
+
+using namespace properties;
+
+class CEHistogramTransportImpl {
+public:
+ CEHistogramTransportImpl(OperationContext* opCtx, const ce::CollectionStatistics& stats)
+ : _opCtx(opCtx), _heuristicCE(), _stats(stats) {}
+
+ ~CEHistogramTransportImpl() {}
+
+ CEType transport(const ABT& n,
+ const ScanNode& node,
+ const Memo& memo,
+ const LogicalProps& logicalProps,
+ CEType /*bindResult*/) {
+ return _stats.getCardinality();
+ }
+
+ CEType transport(const ABT& n,
+ const SargableNode& node,
+ const Memo& memo,
+ const LogicalProps& logicalProps,
+ CEType childResult,
+ CEType /*bindsResult*/,
+ CEType /*refsResult*/) {
+ // Early out and return 0 since we don't expect to get more results.
+ if (childResult == 0.0) {
+ return 0.0;
+ }
+
+ std::vector<double> topLevelSelectivities;
+ for (const auto& [key, req] : node.getReqMap()) {
+ std::vector<double> disjSelectivities;
+ auto path = key._path.cast<PathGet>()->name();
+
+ // Fallback to heuristic if no histogram.
+ auto histogram = _stats.getHistogram(path);
+ if (!histogram) {
+ // For now, because of the structure of SargableNode and the implementation of
+ // HeuristicCE, we can't combine heuristic & histogram estimates. In this case,
+ // default to Heuristic if we don't have a histogram for any of the predicates.
+ return _heuristicCE.deriveCE(memo, logicalProps, n.ref());
+ }
+
+ // Intervals are in DNF.
+ const auto intervalDNF = req.getIntervals();
+ const auto disjuncts = intervalDNF.cast<IntervalReqExpr::Disjunction>()->nodes();
+ for (const auto& disjunct : disjuncts) {
+ const auto& conjuncts = disjunct.cast<IntervalReqExpr::Conjunction>()->nodes();
+
+ std::vector<double> conjSelectivities;
+ for (const auto& conjunct : conjuncts) {
+ const auto& interval = conjunct.cast<IntervalReqExpr::Atom>()->getExpr();
+ auto cardinality =
+ ce::estimateIntervalCardinality(*histogram, interval, childResult);
+
+ // We have to convert the cardinality to a selectivity. The histogram returns
+ // the cardinality for the entire collection; however, fewer records may be
+ // expected at the SargableNode.
+ conjSelectivities.push_back(cardinality / _stats.getCardinality());
+ }
+
+ auto backoff = ce::conjExponentialBackoff(conjSelectivities);
+ disjSelectivities.push_back(backoff);
+ }
+
+ auto backoff = ce::disjExponentialBackoff(disjSelectivities);
+ topLevelSelectivities.push_back(backoff);
+ }
+
+ // The elements of the PartialSchemaRequirements map represent an implicit conjunction.
+ auto backoff = ce::conjExponentialBackoff(topLevelSelectivities);
+ return backoff * childResult;
+ }
+
+ CEType transport(const ABT& n,
+ const RootNode& node,
+ const Memo& memo,
+ const LogicalProps& logicalProps,
+ CEType childResult,
+ CEType /*refsResult*/) {
+ // Root node does not change cardinality.
+ return childResult;
+ }
+
+ /**
+ * Other ABT types default to heuristic CE.
+ */
+ template <typename T, typename... Ts>
+ CEType transport(const ABT& n,
+ const T& /*node*/,
+ const Memo& memo,
+ const LogicalProps& logicalProps,
+ Ts&&...) {
+ if (canBeLogicalNode<T>()) {
+ return _heuristicCE.deriveCE(memo, logicalProps, n.ref());
+ }
+ return 0.0;
+ }
+
+private:
+ OperationContext* _opCtx;
+ HeuristicCE _heuristicCE;
+ const ce::CollectionStatistics& _stats;
+};
+
+CEHistogramTransport::CEHistogramTransport(OperationContext* opCtx,
+ const ce::CollectionStatistics& stats)
+ : _impl(std::make_unique<CEHistogramTransportImpl>(opCtx, stats)) {}
+
+CEHistogramTransport::~CEHistogramTransport() {}
+
+CEType CEHistogramTransport::deriveCE(const Memo& memo,
+ const LogicalProps& logicalProps,
+ const ABT::reference_type logicalNodeRef) const {
+ return algebra::transport<true>(logicalNodeRef, *this->_impl, memo, logicalProps);
+}
+
+} // namespace mongo::optimizer::cascades
diff --git a/src/mongo/db/query/ce/ce_histogram.h b/src/mongo/db/query/ce/ce_histogram.h
new file mode 100644
index 00000000000..6132cc19339
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_histogram.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/ce/collection_statistics.h"
+#include "mongo/db/query/optimizer/cascades/interfaces.h"
+
+namespace mongo::optimizer::cascades {
+
+class CEHistogramTransportImpl;
+
+class CEHistogramTransport : public CEInterface {
+public:
+ CEHistogramTransport(OperationContext* opCtx, const ce::CollectionStatistics& stats);
+ ~CEHistogramTransport();
+
+ CEType deriveCE(const Memo& memo,
+ const properties::LogicalProps& logicalProps,
+ ABT::reference_type logicalNodeRef) const final;
+
+private:
+ std::unique_ptr<CEHistogramTransportImpl> _impl;
+};
+
+} // namespace mongo::optimizer::cascades
diff --git a/src/mongo/db/query/ce/ce_histogram_test.cpp b/src/mongo/db/query/ce/ce_histogram_test.cpp
new file mode 100644
index 00000000000..76a32ef5fa0
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_histogram_test.cpp
@@ -0,0 +1,366 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/ce/ce_estimation.h"
+#include "mongo/db/query/ce/ce_histogram.h"
+#include "mongo/db/query/ce/ce_test_utils.h"
+#include "mongo/db/query/sbe_stage_builder_helpers.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::ce {
+namespace {
+
+using namespace optimizer;
+using namespace cascades;
+
+class CEHistogramTester : public CETester {
+public:
+ CEHistogramTester(std::string collName, double numRecords, const CollectionStatistics& stats)
+ : CETester(collName, numRecords), _stats{stats} {}
+
+protected:
+ std::unique_ptr<CEInterface> getCETransport(OperationContext* opCtx) override {
+ return std::make_unique<CEHistogramTransport>(opCtx, _stats);
+ }
+
+private:
+ const CollectionStatistics& _stats;
+};
+
+struct TestBucket {
+ Value val;
+ int equalFreq;
+ int rangeFreq = 0;
+ int ndv = 1; /* ndv including bucket boundary*/
+};
+
+std::unique_ptr<ArrayHistogram> getHistogramFromData(std::vector<TestBucket> testBuckets) {
+ value::Array bounds;
+ std::vector<Bucket> buckets;
+ std::map<value::TypeTags, size_t> typeCounts;
+
+ int cumulativeFreq = 0;
+ int cumulativeNDV = 0;
+ for (auto b : testBuckets) {
+ // Add bucket boundary value to bounds.
+ auto sbeVal = stage_builder::makeValue(b.val);
+ auto [tag, val] = sbeVal;
+ bounds.push_back(tag, val);
+
+ // Increment count of values for each type tag.
+ if (auto it = typeCounts.find(tag); it != typeCounts.end()) {
+ it->second += b.equalFreq + b.rangeFreq;
+ } else {
+ typeCounts[tag] = b.equalFreq + b.rangeFreq;
+ }
+ cumulativeFreq += b.equalFreq + b.rangeFreq;
+ cumulativeNDV += b.ndv;
+
+ // Create a histogram bucket.
+ buckets.emplace_back(b.equalFreq,
+ b.rangeFreq,
+ cumulativeFreq,
+ b.ndv - 1, /* ndv excluding bucket boundary*/
+ cumulativeNDV);
+ }
+
+ return std::make_unique<ArrayHistogram>(Histogram(std::move(bounds), std::move(buckets)),
+ std::move(typeCounts));
+}
+
+TEST(CEHistogramTest, AssertSmallMaxDiffHistogramEstimatesAtomicPredicates) {
+ const auto collName = "test";
+ const auto collCardinality = 8;
+
+ CollectionStatistics collStats(collCardinality);
+
+ // Construct a histogram with two buckets: one for 3 ints equal to 1, another for 5 strings
+ // equal to "ing".
+ const std::string& str = "ing";
+ collStats.addHistogram("a",
+ getHistogramFromData({
+ {Value(1), 3 /* frequency */},
+ {Value(str), 5 /* frequency */},
+ }));
+
+ CEHistogramTester t(collName, collCardinality, collStats);
+
+ // Test $eq.
+ ASSERT_MATCH_CE(t, "{a: {$eq: 1}}", 3.0);
+ ASSERT_MATCH_CE(t, "{a: {$eq: 2}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$eq: \"ing\"}}", 5.0);
+ ASSERT_MATCH_CE(t, "{a: {$eq: \"foo\"}}", 0.0);
+
+ // Test case when field doesn't match fieldpath of histogram. This falls back to heuristics.
+ ASSERT_MATCH_CE(t, "{b: {$eq: 1}}", 0.8);
+
+ // Test $gt.
+ ASSERT_MATCH_CE(t, "{a: {$gt: 3}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$gt: 1}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$gt: 0}}", 3.0);
+ ASSERT_MATCH_CE(t, "{a: {$gt: \"bar\"}}", 5.0);
+ ASSERT_MATCH_CE(t, "{a: {$gt: \"ing\"}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$gt: \"zap\"}}", 0.0);
+
+ // Test $lt.
+ ASSERT_MATCH_CE(t, "{a: {$lt: 3}}", 3.0);
+ ASSERT_MATCH_CE(t, "{a: {$lt: 1}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$lt: 0}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$lt: \"bar\"}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$lt: \"ing\"}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$lt: \"zap\"}}", 5.0);
+
+ // Test $gte.
+ ASSERT_MATCH_CE(t, "{a: {$gte: 3}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$gte: 1}}", 3.0);
+ ASSERT_MATCH_CE(t, "{a: {$gte: 0}}", 3.0);
+ ASSERT_MATCH_CE(t, "{a: {$gte: \"bar\"}}", 5.0);
+ ASSERT_MATCH_CE(t, "{a: {$gte: \"ing\"}}", 5.0);
+ ASSERT_MATCH_CE(t, "{a: {$gte: \"zap\"}}", 0.0);
+
+ // Test $lte.
+ ASSERT_MATCH_CE(t, "{a: {$lte: 3}}", 3.0);
+ ASSERT_MATCH_CE(t, "{a: {$lte: 1}}", 3.0);
+ ASSERT_MATCH_CE(t, "{a: {$lte: 0}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$lte: \"bar\"}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$lte: \"ing\"}}", 5.0);
+ ASSERT_MATCH_CE(t, "{a: {$lte: \"zap\"}}", 5.0);
+}
+
+TEST(CEHistogramTest, AssertSmallHistogramEstimatesComplexPredicates) {
+ const auto collName = "test";
+ const auto collCardinality = 9;
+
+ CollectionStatistics collStats(collCardinality);
+
+ // Construct a histogram with three int buckets for field 'a'.
+ collStats.addHistogram("a",
+ getHistogramFromData({
+ {Value(1), 3 /* frequency */},
+ {Value(2), 5 /* frequency */},
+ {Value(3), 1 /* frequency */},
+ }));
+
+ // Construct a histogram with two int buckets for field 'b'.
+ collStats.addHistogram("b",
+ getHistogramFromData({
+ {Value(22), 3 /* frequency */},
+ {Value(33), 6 /* frequency */},
+ }));
+
+ CEHistogramTester t(collName, collCardinality, collStats);
+
+ // Test simple conjunctions on one field. Note the first example: the range we expect to see
+ // here is (1, 3); however, the structure in the SargableNode gives us a conjunction of two
+ // intervals instead: (1, "") ^ (nan, 3) This is then estimated using exponential backoff to
+ // give us a less accurate result. The correct cardinality here would be 5.
+ ASSERT_MATCH_CE(t, "{a: {$gt: 1}, a: {$lt: 3}}", 5.66);
+ ASSERT_MATCH_CE(t, "{a: {$gt: 1}, a: {$lte: 3}}", 6.0);
+ ASSERT_MATCH_CE(t, "{a: {$gte: 1}, a: {$lt: 3}}", 8.0);
+ ASSERT_MATCH_CE(t, "{a: {$gte: 1}, a: {$lte: 3}}", 9.0);
+
+ // Test ranges which exclude each other.
+ ASSERT_MATCH_CE(t, "{a: {$lt: 1}, a: {$gt: 3}}", 0.0);
+
+ // Test overlapping ranges. This is a similar case to {a: {$gt: 1}, a: {$lt: 3}} above: we
+ // expect to see the range [2, 2]; instead, we see the range [nan, 2] ^ [2, "").
+ ASSERT_MATCH_CE(t, "{a: {$lte: 2}, a: {$gte: 2}}", 5.66);
+
+ // Test conjunctions over multiple fields for which we have histograms. Here we expect a
+ // cardinality estimated by exponential backoff.
+ ASSERT_MATCH_CE(t, "{a: {$eq: 2}, b: {$eq: 22}}", 2.24);
+ ASSERT_MATCH_CE(t, "{a: {$eq: 11}, b: {$eq: 22}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$gt: 11}, a: {$lte: 100}, b: {$eq: 22}}", 0.0);
+ ASSERT_MATCH_CE(t, "{a: {$lt: 3}, a: {$gte: 1}, b: {$lt: 100}, b: {$gt: 30}}", 5.66);
+
+ // Test conjunctions over multiple fields for which we may not have histograms. This falls back
+ // to heuristic estimation.
+ ASSERT_MATCH_CE(t, "{a: {$eq: 2}, c: {$eq: 1}}", 0.09);
+ ASSERT_MATCH_CE(t, "{c: {$eq: 2}, d: {$eq: 22}}", 0.09);
+}
+
+TEST(CEHistogramTest, SanityTestEmptyHistogram) {
+ const auto collName = "test";
+ const auto collCardinality = 0;
+
+ CollectionStatistics collStats(collCardinality);
+ collStats.addHistogram("empty", std::make_unique<ArrayHistogram>());
+ CEHistogramTester t(collName, collCardinality, collStats);
+
+ ASSERT_MATCH_CE(t, "{empty: {$eq: 1.0}}", 0.0);
+ ASSERT_MATCH_CE(t, "{empty: {$lt: 1.0}, empty: {$gt: 0.0}}", 0.0);
+ ASSERT_MATCH_CE(t, "{empty: {$eq: 1.0}, other: {$eq: \"anything\"}}", 0.0);
+ ASSERT_MATCH_CE(t, "{other: {$eq: \"anything\"}, empty: {$eq: 1.0}}", 0.0);
+}
+
+TEST(CEHistogramTest, AssertOneBucketOneIntHistogram) {
+ const auto collName = "test";
+ const auto collCardinality = 50;
+
+ CollectionStatistics collStats(collCardinality);
+
+ // Create a histogram with a single bucket that contains exactly one int (42) with a frequency
+ // of 50 (equal to the collection cardinality).
+ collStats.addHistogram("soloInt",
+ getHistogramFromData({
+ {Value(42), collCardinality /* frequency */},
+ }));
+
+ CEHistogramTester t(collName, collCardinality, collStats);
+
+ // Check against a variety of intervals that include 42 as a bound.
+ ASSERT_MATCH_CE(t, "{soloInt: {$eq: 42}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$lt: 42}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$lte: 42}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lt: 42}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lte: 42}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lt: 42}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lte: 42}}", collCardinality);
+
+ // Check against a variety of intervals that include 42 only as one bound.
+ ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lt: 43}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lte: 43}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lt: 43}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lte: 43}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lt: 42}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lte: 42}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lt: 42}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lte: 42}}", collCardinality);
+
+ // Check against a variety of intervals close to 42 using a lower bound of 41 and a higher bound
+ // of 43.
+ ASSERT_MATCH_CE(t, "{soloInt: {$eq: 41}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$eq: 43}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$lt: 43}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$lte: 43}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lt: 43}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lt: 43}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lte: 43}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lte: 43}}", collCardinality);
+
+ // Check against different types.
+ // TODO: investigate why this returns 5?
+ // ASSERT_MATCH_CE(t, "{soloInt: {$eq: [42]}}", 0.0);
+ // TODO: investigate why this line triggers an out of memory exception.
+ // ASSERT_MATCH_CE(t, "{soloInt: {$eq: {soloInt: 42}}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$eq: \"42\"}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$lt: \"42\"}}", 0.0);
+ ASSERT_MATCH_CE(t, "{soloInt: {$lt: 42.1}}", collCardinality);
+}
+
+TEST(CEHistogramTest, AssertOneBoundIntRangeHistogram) {
+ const auto collName = "test";
+ const auto collCardinality = 51;
+
+ CollectionStatistics collStats(collCardinality);
+
+ collStats.addHistogram(
+ "intRange",
+ getHistogramFromData({
+ {Value(10), 5 /* frequency */},
+ {Value(20), 1 /* frequency */, 45 /* range frequency */, 10 /* ndv */},
+ }));
+
+ CEHistogramTester t(collName, collCardinality, collStats);
+
+ // Test ranges that overlap only with the lower bound.
+ // Note: 5 values equal 10.
+ ASSERT_MATCH_CE(t, "{intRange: {$eq: 10}}", 5.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$lte: 10}}", 5.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$lte: 10}, intRange: {$gte: 10}}", 5.0);
+
+ // Test ranges that overlap only with the upper bound.
+ ASSERT_MATCH_CE(t, "{intRange: {$eq: 11}}", 5.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$eq: 15}}", 5.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$eq: 15.5}}", 5.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$eq: 20}}", 1.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 20}}", 1.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 10}}", 46.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 15}}", 23.5);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 15}}", 23.5);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 11}, intRange: {$lte: 20}}", 23.5);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 11}, intRange: {$lte: 20}}", 23.5);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 11}, intRange: {$lt: 20}}", 23.27);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 11}, intRange: {$lt: 20}}", 23.27);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 12}, intRange: {$lt: 15}}", 17.25);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 12}, intRange: {$lt: 15}}", 17.25);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 12}, intRange: {$lte: 15}}", 17.25);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 12}, intRange: {$lte: 15}}", 17.25);
+
+ // Test ranges that partially overlap with the entire histogram.
+ ASSERT_MATCH_CE(t, "{intRange: {$lt: 11}}", 27.5);
+ ASSERT_MATCH_CE(t, "{intRange: {$lt: 15}}", 27.5);
+ ASSERT_MATCH_CE(t, "{intRange: {$lte: 15}}", 27.5);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 8}, intRange: {$lte: 15}}", 27.5);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 8}, intRange: {$lte: 15}}", 27.5);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 8}, intRange: {$lt: 15}}", 27.5);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 8}, intRange: {$lte: 15}}", 27.5);
+
+ // Test ranges that include all values in the histogram.
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 10}, intRange: {$lte: 20}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 1}, intRange: {$lte: 30}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 1}, intRange: {$lt: 30}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 1}, intRange: {$lte: 30}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 1}, intRange: {$lt: 30}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 0}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{intRange: {$lt: 100}}", collCardinality);
+ ASSERT_MATCH_CE(t, "{intRange: {$lte: 100}}", collCardinality);
+
+ // Test ranges that are fully included in the histogram.
+ ASSERT_MATCH_CE(t, "{intRange: {$eq: 10.5}}", 5.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$eq: 12.5}}", 5.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$eq: 19.36}}", 5.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$lt: 19}, intRange: {$gt: 11}}", 17.26);
+
+ // Test ranges that don't overlap with the histogram.
+ ASSERT_MATCH_CE(t, "{intRange: {$lt: 10}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$lt: 5}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$lte: 5}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$eq: 20.1}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$eq: 21}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 21}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 20}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 100}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 30}, intRange: {$lte: 50}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 30}, intRange: {$lt: 50}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 30}, intRange: {$lt: 50}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 30}, intRange: {$lte: 50}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 0}, intRange: {$lte: 5}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}, intRange: {$lt: 5}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gte: 0}, intRange: {$lt: 5}}", 0.0);
+ ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}, intRange: {$lte: 5}}", 0.0);
+}
+
+} // namespace
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_math.cpp b/src/mongo/db/query/ce/ce_math.cpp
new file mode 100644
index 00000000000..a5dd7bf04de
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_math.cpp
@@ -0,0 +1,62 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <functional>
+
+#include "mongo/db/query/ce/ce_math.h"
+
+namespace mongo::ce {
+double conjExponentialBackoff(std::vector<double> conjSelectivities) {
+ std::sort(conjSelectivities.begin(), conjSelectivities.end());
+ double sel = conjSelectivities[0];
+ double f = 1.0;
+ size_t i = 1;
+ while (i < conjSelectivities.size() && i <= kMaxBackoffIterations) {
+ f /= 2.0;
+ sel *= std::pow(conjSelectivities[i], f);
+ i++;
+ }
+ return sel;
+}
+
+double disjExponentialBackoff(std::vector<double> disjSelectivities) {
+ std::sort(disjSelectivities.begin(), disjSelectivities.end(), std::greater<double>());
+ double sel = 1.0 - disjSelectivities[0];
+ double f = 1.0;
+ size_t i = 1;
+ while (i < disjSelectivities.size() && i <= kMaxBackoffIterations) {
+ f /= 2.0;
+ sel *= std::pow(1 - disjSelectivities[i], f);
+ i++;
+ }
+ return 1.0 - sel;
+}
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_math.h b/src/mongo/db/query/ce/ce_math.h
new file mode 100644
index 00000000000..3ef529a9224
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_math.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <vector>
+
+namespace mongo::ce {
+
+/**
+ * Specifies the maximum number of iterations to use when estimating via exponential backoff.
+ */
+const size_t kMaxBackoffIterations = 3;
+
+/**
+ * Estimates the selectivity of a conjunction given the selectivities of its subexpressions using
+ * exponential backoff.
+ */
+double conjExponentialBackoff(std::vector<double> conjSelectivities);
+
+/**
+ * Estimates the selectivity of a disjunction given the selectivities of its subexpressions using
+ * exponential backoff.
+ */
+double disjExponentialBackoff(std::vector<double> disjSelectivities);
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_test_utils.cpp b/src/mongo/db/query/ce/ce_test_utils.cpp
new file mode 100644
index 00000000000..eb7824eb483
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_test_utils.cpp
@@ -0,0 +1,76 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/ce/ce_test_utils.h"
+
+#include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
+#include "mongo/db/query/optimizer/cascades/logical_props_derivation.h"
+#include "mongo/db/query/optimizer/explain.h"
+#include "mongo/db/query/optimizer/opt_phase_manager.h"
+#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::ce {
+
+using namespace optimizer;
+using namespace cascades;
+
+CETester::CETester(std::string collName, double numRecords)
+ : _collName(std::move(collName)), _numRecords(std::move(numRecords)) {}
+
+double CETester::getCE(const std::string& query) {
+ // Mock opCtx for test.
+ QueryTestServiceContext serviceContext;
+ auto opCtx = serviceContext.makeOperationContext();
+
+ // Mock memo.
+ ScanDefinition sd({}, {}, {DistributionType::Centralized}, true, _numRecords);
+ Metadata metadata({{_collName, sd}});
+ Memo memo(DebugInfo::kDefaultForTests,
+ metadata,
+ std::make_unique<DefaultLogicalPropsDerivation>(),
+ std::make_unique<HeuristicCE>());
+
+ // Construct placeholder PhaseManager.
+ PrefixId prefixId;
+ OptPhaseManager phaseManager({OptPhaseManager::OptPhase::MemoSubstitutionPhase},
+ prefixId,
+ {{{_collName, {{}, {}}}}},
+ DebugInfo::kDefaultForTests);
+
+ // Construct ABT from pipeline and optimize.
+ ABT abt = translatePipeline("[{$match: " + query + "}]", _collName);
+ ASSERT_TRUE(phaseManager.optimize(abt));
+
+ // Get cardinality estimate.
+ auto cht = getCETransport(opCtx.get());
+ return cht->deriveCE(memo, {}, abt.ref());
+}
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_test_utils.h b/src/mongo/db/query/ce/ce_test_utils.h
new file mode 100644
index 00000000000..e6fc660e27b
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_test_utils.h
@@ -0,0 +1,83 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/optimizer/cascades/interfaces.h"
+
+namespace mongo {
+
+namespace optimizer {
+namespace cascades {
+
+// Forward declaration.
+class CEInterface;
+
+} // namespace cascades
+} // namespace optimizer
+
+namespace ce {
+
+const double kMaxCEError = 0.01;
+
+/**
+ * A helpful macro for asserting that the CE of a $match predicate is approximately what we were
+ * expecting.
+ */
+#define ASSERT_MATCH_CE(ce, predicate, expectedCE) \
+ ASSERT_APPROX_EQUAL(expectedCE, ce.getCE(predicate), kMaxCEError)
+
+/**
+ * A test utility class for helping verify the cardinality of CE transports on a given $match
+ * predicate.
+ */
+class CETester {
+public:
+ CETester(std::string collName, double numRecords);
+
+ /**
+ * Returns the estimated cardinality of a given 'matchPredicate'.
+ */
+ double getCE(const std::string& matchPredicate);
+
+protected:
+ /**
+ * Subclasses need to override this method to initialize the transports they are testing.
+ */
+ virtual std::unique_ptr<optimizer::cascades::CEInterface> getCETransport(
+ OperationContext* opCtx) = 0;
+
+ std::string _collName;
+
+ // The number of records in the collection we are testing.
+ double _numRecords;
+};
+
+} // namespace ce
+} // namespace mongo
diff --git a/src/mongo/db/query/ce/collection_statistics.cpp b/src/mongo/db/query/ce/collection_statistics.cpp
new file mode 100644
index 00000000000..397228d785a
--- /dev/null
+++ b/src/mongo/db/query/ce/collection_statistics.cpp
@@ -0,0 +1,63 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/ce/collection_statistics.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo::ce {
+
+bool CollectionStatistics::hasCollectionStatistics(const NamespaceString& nss) {
+ return false; // TODO: actually check if we have statistics for 'nss' here.
+}
+
+const CollectionStatistics& CollectionStatistics::getCollectionStatistics(
+ const NamespaceString& nss) {
+ MONGO_UNIMPLEMENTED; // TODO: actually get statistics here.
+}
+
+CollectionStatistics::CollectionStatistics(double cardinality)
+ : _cardinality{cardinality}, _histograms{} {};
+
+double CollectionStatistics::getCardinality() const {
+ return _cardinality;
+}
+
+void CollectionStatistics::addHistogram(const std::string& path,
+ std::unique_ptr<ArrayHistogram> histogram) {
+ _histograms[path] = std::move(histogram);
+}
+
+const ArrayHistogram* CollectionStatistics::getHistogram(const std::string& path) const {
+ if (auto mapIt = _histograms.find(path); mapIt != _histograms.end()) {
+ return mapIt->second.get();
+ }
+ return nullptr;
+}
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/collection_statistics.h b/src/mongo/db/query/ce/collection_statistics.h
new file mode 100644
index 00000000000..4ac133129cf
--- /dev/null
+++ b/src/mongo/db/query/ce/collection_statistics.h
@@ -0,0 +1,76 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/ce/array_histogram.h"
+
+namespace mongo::ce {
+
+using Histograms = std::map<std::string, std::unique_ptr<ArrayHistogram>>;
+
+class CollectionStatistics {
+public:
+ /**
+ * Returns whether collection statistics for a collection with namespace 'nss' are available.
+ */
+ static bool hasCollectionStatistics(const NamespaceString& nss);
+
+ /**
+ * Retrieves the collection statistics for a collection with namespace 'nss'.
+ *
+ * Note: Must check hasCollectionStatistics(nss) first, as this will throw if statistics are
+ * unavailable for 'nss'.
+ */
+ static const CollectionStatistics& getCollectionStatistics(const NamespaceString& nss);
+
+ CollectionStatistics(double cardinality);
+
+ /**
+ * Returns the cardinality of the given collection.
+ */
+ double getCardinality() const;
+
+ /**
+ * Adds a histogram along the given path.
+ */
+ void addHistogram(const std::string& path, std::unique_ptr<ArrayHistogram> histogram);
+
+ /**
+ * Returns the histogram for the given field path, or nullptr if none exists.
+ */
+ const ArrayHistogram* getHistogram(const std::string& path) const;
+
+private:
+ double _cardinality;
+ Histograms _histograms;
+};
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/histogram.cpp b/src/mongo/db/query/ce/histogram.cpp
new file mode 100644
index 00000000000..2359929707d
--- /dev/null
+++ b/src/mongo/db/query/ce/histogram.cpp
@@ -0,0 +1,220 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/ce/histogram.h"
+
+namespace mongo::ce {
+
+using namespace sbe;
+
+Bucket::Bucket(
+ double equalFreq, double rangeFreq, double cumulativeFreq, double ndv, double cumulativeNDV)
+ : _equalFreq(equalFreq),
+ _rangeFreq(rangeFreq),
+ _cumulativeFreq(cumulativeFreq),
+ _ndv(ndv),
+ _cumulativeNDV(cumulativeNDV) {
+ uassert(6695702, "Invalid equalFreq", _equalFreq >= 0.0);
+ uassert(6695703, "Invalid rangeFreq", _rangeFreq >= 0.0);
+ uassert(6695704, "Invalid ndv", _ndv <= _rangeFreq);
+ uassert(6695705, "Invalid cumulative frequency", _cumulativeFreq >= _equalFreq + _rangeFreq);
+ uassert(6695706, "Invalid cumulative ndv", _cumulativeNDV >= _ndv + 1.0);
+}
+
+std::string Bucket::toString() const {
+ std::ostringstream os;
+ os << "equalFreq: " << _equalFreq << ", rangeFreq: " << _rangeFreq
+ << ", cumulativeFreq: " << _cumulativeFreq << ", ndv: " << _ndv
+ << ", cumulativeNDV: " << _cumulativeNDV;
+ return os.str();
+}
+
+Histogram::Histogram() : Histogram({}, {}) {}
+
+Histogram::Histogram(value::Array bounds, std::vector<Bucket> buckets)
+ : _bounds(std::move(bounds)), _buckets(std::move(buckets)) {
+ uassert(6695707, "Invalid sizes", bounds.size() == buckets.size());
+}
+
+std::string Histogram::toString() const {
+ std::ostringstream os;
+ os << "[";
+ for (size_t i = 0; i < _buckets.size(); i++) {
+ os << "{val: " << _bounds.getAt(i) << ", " << _buckets.at(i).toString() << "}";
+ if (_buckets.size() - i > 1)
+ os << ",";
+ }
+ os << "]";
+ return os.str();
+}
+
+std::string Histogram::plot() const {
+ std::ostringstream os;
+ double maxFreq = 0;
+ const double maxBucketSize = 100;
+
+ for (const auto& bucket : _buckets) {
+ double maxBucketFreq = std::max(bucket._equalFreq, bucket._rangeFreq);
+ maxFreq = std::max(maxFreq, maxBucketFreq);
+ }
+
+ std::vector<std::pair<double, std::string>> headers;
+ size_t maxHeaderSize = 0;
+ for (size_t i = 0; i < _buckets.size(); ++i) {
+ std::ostringstream rngHeader;
+ std::ostringstream eqlHeader;
+ double scaledRngF = maxBucketSize * _buckets[i]._rangeFreq / maxFreq;
+ double scaledEqlF = maxBucketSize * _buckets[i]._equalFreq / maxFreq;
+ rngHeader << _bounds.getAt(i) << ": " << _buckets[i]._rangeFreq;
+ eqlHeader << _bounds.getAt(i) << ": " << _buckets[i]._equalFreq;
+ auto rngStr = rngHeader.str();
+ maxHeaderSize = std::max(maxHeaderSize, rngStr.size());
+ headers.emplace_back(scaledRngF, rngStr);
+ auto eqlStr = eqlHeader.str();
+ maxHeaderSize = std::max(maxHeaderSize, eqlStr.size());
+ headers.emplace_back(scaledEqlF, eqlStr);
+ }
+
+ const std::string maxLine(maxBucketSize + maxHeaderSize + 3, '-');
+ os << maxLine << "\n";
+ for (size_t j = 0; j < headers.size(); ++j) {
+ auto header = headers.at(j);
+ header.second.resize(maxHeaderSize, ' ');
+ const std::string bar(std::round(header.first), '*');
+ os << header.second << " | " << bar << "\n";
+ }
+ os << maxLine << "\n";
+
+ return os.str();
+}
+
+EstimationResult Histogram::getTotals() const {
+ if (_buckets.empty()) {
+ return {0.0, 0.0};
+ }
+
+ const Bucket& last = _buckets.back();
+ return {last._cumulativeFreq, last._cumulativeNDV};
+}
+
+EstimationResult Histogram::estimate(value::TypeTags tag,
+ value::Value val,
+ EstimationType type) const {
+ switch (type) {
+ case EstimationType::kGreater:
+ return getTotals() - estimate(tag, val, EstimationType::kLessOrEqual);
+
+ case EstimationType::kGreaterOrEqual:
+ return getTotals() - estimate(tag, val, EstimationType::kLess);
+
+ default:
+ // Continue.
+ break;
+ }
+
+ size_t bucketIndex = 0;
+ {
+ size_t len = _buckets.size();
+ while (len > 0) {
+ const size_t half = len >> 1;
+ const auto [boundTag, boundVal] = _bounds.getAt(bucketIndex + half);
+
+ if (compareValues3w(boundTag, boundVal, tag, val) < 0) {
+ bucketIndex += half + 1;
+ len -= half + 1;
+ } else {
+ len = half;
+ }
+ }
+ }
+ if (bucketIndex == _buckets.size()) {
+ // Value beyond the largest endpoint.
+ switch (type) {
+ case EstimationType::kEqual:
+ return {0.0, 0.0};
+
+ case EstimationType::kLess:
+ case EstimationType::kLessOrEqual:
+ return getTotals();
+
+ default:
+ MONGO_UNREACHABLE;
+ }
+ }
+
+ const Bucket& bucket = _buckets.at(bucketIndex);
+ const auto [boundTag, boundVal] = _bounds.getAt(bucketIndex);
+ const bool isEndpoint = compareValues3w(boundTag, boundVal, tag, val) == 0;
+
+ switch (type) {
+ case EstimationType::kEqual: {
+ if (isEndpoint) {
+ return {bucket._equalFreq, 1.0};
+ }
+ return {(bucket._ndv == 0.0) ? 0.0 : bucket._rangeFreq / bucket._ndv, 1.0};
+ }
+
+ case EstimationType::kLess: {
+ double resultCard = bucket._cumulativeFreq - bucket._equalFreq;
+ double resultNDV = bucket._cumulativeNDV - 1.0;
+
+ if (!isEndpoint) {
+ // TODO: consider value interpolation instead of assigning 50% of the weight.
+ resultCard -= bucket._rangeFreq / 2.0;
+ resultNDV -= bucket._ndv / 2.0;
+ }
+ return {resultCard, resultNDV};
+ }
+
+ case EstimationType::kLessOrEqual: {
+ double resultCard = bucket._cumulativeFreq;
+ double resultNDV = bucket._cumulativeNDV;
+
+ if (!isEndpoint) {
+ // TODO: consider value interpolation instead of assigning 50% of the weight.
+ resultCard -= bucket._equalFreq + bucket._rangeFreq / 2.0;
+ resultNDV -= 1.0 + bucket._ndv / 2.0;
+ }
+ return {resultCard, resultNDV};
+ }
+
+ default:
+ MONGO_UNREACHABLE;
+ }
+}
+
+const value::Array& Histogram::getBounds() const {
+ return _bounds;
+}
+
+const std::vector<Bucket>& Histogram::getBuckets() const {
+ return _buckets;
+}
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/histogram.h b/src/mongo/db/query/ce/histogram.h
new file mode 100644
index 00000000000..d8e051b0c47
--- /dev/null
+++ b/src/mongo/db/query/ce/histogram.h
@@ -0,0 +1,125 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <utility>
+#include <vector>
+
+#include "mongo/db/query/ce/utils.h"
+
+namespace mongo::ce {
+
+using namespace sbe;
+
+/**
+ * Statistics related to a single histogram bucket. The boundary value is kept in a separate array,
+ * so that each bucket has a corresponding boundary value. The reason for this to manage the memory
+ * of values.
+ */
+struct Bucket {
+ Bucket(double equalFreq,
+ double rangeFreq,
+ double cumulativeFreq,
+ double ndv,
+ double cumulativeNDV);
+
+ std::string toString() const;
+
+ // Frequency of the bound value itself.
+ double _equalFreq;
+
+ // Frequency of other values.
+ double _rangeFreq;
+
+ // Sum of frequencies of preceding buckets to avoid recomputing. Includes both _equalFreq and
+ // _rangeFreq.
+ double _cumulativeFreq;
+
+ // Number of distinct values in this bucket, excludes the bound.
+ double _ndv;
+
+ double _cumulativeNDV;
+
+ // TODO: add other statistical values like average size, etc
+};
+
+enum class EstimationType { kEqual, kLess, kLessOrEqual, kGreater, kGreaterOrEqual };
+
+const stdx::unordered_map<EstimationType, std::string> estimationTypeName = {
+ {EstimationType::kEqual, "eq"},
+ {EstimationType::kLess, "lt"},
+ {EstimationType::kLessOrEqual, "lte"},
+ {EstimationType::kGreater, "gt"},
+ {EstimationType::kGreaterOrEqual, "gte"}};
+
+struct EstimationResult {
+ double _card;
+ double _ndv;
+
+ EstimationResult operator-(const EstimationResult& other) const {
+ return {_card - other._card, _ndv - other._ndv};
+ }
+};
+
+/**
+ * A Histogram over a set of values. The histogram consists of two parallel vectors - one with the
+ * individual value statistics, and another one with the actual boundary values.
+ */
+class Histogram {
+public:
+ Histogram();
+ Histogram(value::Array bounds, std::vector<Bucket> buckets);
+
+ std::string toString() const;
+ std::string plot() const;
+
+ EstimationResult getTotals() const;
+ EstimationResult estimate(value::TypeTags tag, value::Value val, EstimationType type) const;
+
+ const value::Array& getBounds() const;
+ const std::vector<Bucket>& getBuckets() const;
+
+ bool empty() const {
+ return _buckets.empty();
+ }
+
+private:
+ // Bucket bounds representing the **highest** value in each bucket.
+ value::Array _bounds;
+
+ std::vector<Bucket> _buckets;
+
+ // TODO: add counts for types of values not in histogram: arrays, objects, null, missing, etc.
+
+ // TODO: _fieldPath - how to represent?
+ // TODO: other metadata?
+};
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/utils.cpp b/src/mongo/db/query/ce/utils.cpp
new file mode 100644
index 00000000000..5ee6f3a083e
--- /dev/null
+++ b/src/mongo/db/query/ce/utils.cpp
@@ -0,0 +1,140 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/ce/utils.h"
+
+#include "mongo/db/query/ce/histogram.h"
+
+/**
+ * Note: This is a temporary file used for histogram generation until the histogram implementation
+ * is finalized.
+ */
+
+namespace mongo::ce {
+
+using namespace sbe;
+
+SBEValue::SBEValue(value::TypeTags tag, value::Value val) : _tag(tag), _val(val) {}
+
+SBEValue::SBEValue(std::pair<value::TypeTags, value::Value> v) : SBEValue(v.first, v.second) {}
+
+SBEValue::SBEValue(const SBEValue& other) {
+ auto [tag, val] = copyValue(other._tag, other._val);
+ _tag = tag;
+ _val = val;
+}
+
+SBEValue::SBEValue(SBEValue&& other) {
+ _tag = other._tag;
+ _val = other._val;
+
+ other._tag = value::TypeTags::Nothing;
+ other._val = 0;
+}
+
+SBEValue::~SBEValue() {
+ value::releaseValue(_tag, _val);
+}
+
+SBEValue& SBEValue::operator=(const SBEValue& other) {
+ value::releaseValue(_tag, _val);
+
+ auto [tag, val] = copyValue(other._tag, other._val);
+ _tag = tag;
+ _val = val;
+ return *this;
+}
+
+SBEValue& SBEValue::operator=(SBEValue&& other) {
+ value::releaseValue(_tag, _val);
+
+ _tag = other._tag;
+ _val = other._val;
+
+ other._tag = value::TypeTags::Nothing;
+ other._val = 0;
+
+ return *this;
+}
+
+std::pair<value::TypeTags, value::Value> SBEValue::get() const {
+ return std::make_pair(_tag, _val);
+}
+
+value::TypeTags SBEValue::getTag() const {
+ return _tag;
+}
+
+value::Value SBEValue::getValue() const {
+ return _val;
+}
+
+std::pair<value::TypeTags, value::Value> makeInt64Value(const int v) {
+ return std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(v));
+};
+
+std::pair<value::TypeTags, value::Value> makeNullValue() {
+ return std::make_pair(value::TypeTags::Null, 0);
+};
+
+bool sameTypeClass(value::TypeTags tag1, value::TypeTags tag2) {
+ if (tag1 == tag2) {
+ return true;
+ }
+
+ static constexpr const char* kTempFieldName = "temp";
+
+ BSONObjBuilder minb1;
+ minb1.appendMinForType(kTempFieldName, value::tagToType(tag1));
+ const BSONObj min1 = minb1.obj();
+
+ BSONObjBuilder minb2;
+ minb2.appendMinForType(kTempFieldName, value::tagToType(tag2));
+ const BSONObj min2 = minb2.obj();
+
+ return min1.woCompare(min2) == 0;
+}
+
+int32_t compareValues3w(value::TypeTags tag1,
+ value::Value val1,
+ value::TypeTags tag2,
+ value::Value val2) {
+ const auto [compareTag, compareVal] = value::compareValue(tag1, val1, tag2, val2);
+ uassert(6695716, "Invalid comparison result", compareTag == value::TypeTags::NumberInt32);
+ return value::bitcastTo<int32_t>(compareVal);
+}
+
+void sortValueVector(std::vector<SBEValue>& sortVector) {
+ const auto cmp = [](const SBEValue& a, const SBEValue& b) {
+ return compareValues3w(a.getTag(), a.getValue(), b.getTag(), b.getValue()) < 0;
+ };
+ std::sort(sortVector.begin(), sortVector.end(), cmp);
+}
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/utils.h b/src/mongo/db/query/ce/utils.h
new file mode 100644
index 00000000000..61e39c0bc84
--- /dev/null
+++ b/src/mongo/db/query/ce/utils.h
@@ -0,0 +1,79 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/exec/sbe/values/value.h"
+
+/**
+ * Note: This is a temporary file used for histogram generation until the histogram implementation
+ * is finalized.
+ */
+
+namespace mongo::ce {
+
+using namespace sbe;
+
+enum class EstimationType;
+class Histogram;
+
+class SBEValue {
+public:
+ SBEValue(value::TypeTags tag, value::Value val);
+ SBEValue(std::pair<value::TypeTags, value::Value> v);
+ ~SBEValue();
+
+ SBEValue(const SBEValue& other);
+ SBEValue(SBEValue&& other);
+
+ SBEValue& operator=(const SBEValue& other);
+ SBEValue& operator=(SBEValue&& other);
+
+ std::pair<value::TypeTags, value::Value> get() const;
+ value::TypeTags getTag() const;
+ value::Value getValue() const;
+
+private:
+ value::TypeTags _tag;
+ value::Value _val;
+};
+
+std::pair<value::TypeTags, value::Value> makeInt64Value(int v);
+std::pair<value::TypeTags, value::Value> makeNullValue();
+
+bool sameTypeClass(value::TypeTags tag1, value::TypeTags tag2);
+
+int32_t compareValues3w(value::TypeTags tag1,
+ value::Value val1,
+ value::TypeTags tag2,
+ value::Value val2);
+
+void sortValueVector(std::vector<SBEValue>& sortVector);
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce_mode_parameter.cpp b/src/mongo/db/query/ce_mode_parameter.cpp
new file mode 100644
index 00000000000..8c019d25746
--- /dev/null
+++ b/src/mongo/db/query/ce_mode_parameter.cpp
@@ -0,0 +1,40 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/ce_mode_parameter.h"
+#include "mongo/db/query/query_knobs_gen.h"
+
+namespace mongo::ce {
+Status validateCEMode(const std::string& value) {
+ if (value == kHeuristic || value == kHistogram || value == kSampling) {
+ return Status::OK();
+ }
+ return Status(ErrorCodes::Error{6695700}, "Invalid cardinality estimation mode.");
+}
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce_mode_parameter.h b/src/mongo/db/query/ce_mode_parameter.h
new file mode 100644
index 00000000000..b267067747f
--- /dev/null
+++ b/src/mongo/db/query/ce_mode_parameter.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "mongo/base/status.h"
+
+namespace mongo::ce {
+
+/**
+ * Defines cardinality estimation modes.
+ */
+const std::string kHeuristic = "heuristic";
+const std::string kHistogram = "histogram";
+const std::string kSampling = "sampling";
+
+Status validateCEMode(const std::string& value);
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce_mode_parameter_test.cpp b/src/mongo/db/query/ce_mode_parameter_test.cpp
new file mode 100644
index 00000000000..66e4b23bc38
--- /dev/null
+++ b/src/mongo/db/query/ce_mode_parameter_test.cpp
@@ -0,0 +1,47 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/ce_mode_parameter.h"
+
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::ce {
+
+TEST(CEModeParameterTest, ValidatesValidCEModes) {
+ ASSERT_OK(validateCEMode("heuristic"));
+ ASSERT_OK(validateCEMode("histogram"));
+ ASSERT_OK(validateCEMode("sampling"));
+}
+
+TEST(CEModeParameterTest, RejectsInvalidCEModes) {
+ ASSERT_NOT_OK(validateCEMode("notamode"));
+ ASSERT_NOT_OK(validateCEMode(""));
+}
+
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl
index 4b7198314f2..0f64623fa63 100644
--- a/src/mongo/db/query/query_knobs.idl
+++ b/src/mongo/db/query/query_knobs.idl
@@ -29,6 +29,7 @@
global:
cpp_namespace: "mongo"
cpp_includes:
+ - "mongo/db/query/ce_mode_parameter.h"
- "mongo/db/query/plan_cache_size_parameter.h"
- "mongo/db/query/sbe_plan_cache_on_parameter_change.h"
- "mongo/platform/atomic_proxy.h"
@@ -713,14 +714,15 @@ server_parameters:
expr: 100 * 1024 * 1024
validator:
gt: 0
-
- internalQueryEnableSamplingCardinalityEstimator:
- description: "Set to use the sampling-based method for estimating cardinality in the Cascades
- optimizer."
+
+ internalQueryCardinalityEstimatorMode:
+ description: "Set to select a method for estimating cardinality in the Cascades optimizer."
set_at: [ startup, runtime ]
- cpp_varname: "internalQueryEnableSamplingCardinalityEstimator"
- cpp_vartype: AtomicWord<bool>
- default: true
+ cpp_varname: "internalQueryCardinalityEstimatorMode"
+ cpp_vartype: std::string
+ default: sampling
+ validator:
+ callback: ce::validateCEMode
internalQueryEnableCascadesOptimizer:
description: "Set to use the new optimizer path, must be used in conjunction with the feature