summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimour Katchaounov <timour.katchaounov@mongodb.com>2022-10-12 12:42:54 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-10-12 13:40:54 +0000
commitea85b99ada1fe21761f1e4f442f53509fda09d3a (patch)
tree87883d3c8c3d3b87e5e14dd5634e657151b04ee0
parentc66a4121d166b955523f001a4f81e3ffdabe961d (diff)
downloadmongo-ea85b99ada1fe21761f1e4f442f53509fda09d3a.tar.gz
SERVER-69795 PathArr estimation via Array type counter
-rw-r--r--src/mongo/db/query/ce/array_histogram.cpp28
-rw-r--r--src/mongo/db/query/ce/array_histogram.h4
-rw-r--r--src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp6
-rw-r--r--src/mongo/db/query/ce/ce_heuristic_test.cpp16
-rw-r--r--src/mongo/db/query/ce/ce_histogram.cpp8
-rw-r--r--src/mongo/db/query/ce/ce_histogram_test.cpp79
-rw-r--r--src/mongo/db/query/ce/ce_test_utils.cpp23
-rw-r--r--src/mongo/db/query/ce/ce_test_utils.h11
-rw-r--r--src/mongo/db/query/ce/maxdiff_test_utils.cpp3
-rw-r--r--src/mongo/db/query/ce/scalar_histogram.h7
10 files changed, 144 insertions, 41 deletions
diff --git a/src/mongo/db/query/ce/array_histogram.cpp b/src/mongo/db/query/ce/array_histogram.cpp
index efbe7404289..52e2c3d32a2 100644
--- a/src/mongo/db/query/ce/array_histogram.cpp
+++ b/src/mongo/db/query/ce/array_histogram.cpp
@@ -48,6 +48,8 @@ ArrayHistogram::ArrayHistogram(ScalarHistogram scalar,
_arrayMax(std::move(arrayMax)),
_arrayTypeCounts(std::move(arrayTypeCounts)) {
invariant(isArray());
+ // ArrayMin/Max histograms must have the same number of buckets.
+ invariant(_arrayMin->getBuckets().size() == _arrayMax->getBuckets().size());
}
ArrayHistogram::ArrayHistogram(ScalarHistogram scalar, TypeCounts typeCounts)
@@ -121,4 +123,30 @@ const TypeCounts& ArrayHistogram::getArrayTypeCounts() const {
return *_arrayTypeCounts;
}
+const size_t ArrayHistogram::getArrayCount() const {
+ if (isArray()) {
+ auto findArray = _typeCounts.find(value::TypeTags::Array);
+ uassert(6979504,
+ "Histogram with array data must have a total array count.",
+ findArray != _typeCounts.end());
+ size_t arrayCount = findArray->second;
+ uassert(6979503, "Histogram with array data must have at least one array.", arrayCount > 0);
+ return arrayCount;
+ }
+ return 0;
+}
+
+const size_t ArrayHistogram::getEmptyArrayCount() const {
+ if (isArray()) {
+ size_t nonEmptyArrayCount = _arrayMin->empty() ? 0 : _arrayMin->getCardinality();
+ size_t totalArrCount = getArrayCount();
+ uassert(6979502,
+ "The number of empty arrays is < the total number of arrays.",
+ totalArrCount >= nonEmptyArrayCount);
+ size_t emptyArrCount = totalArrCount - nonEmptyArrayCount;
+ return emptyArrCount;
+ }
+ return 0;
+}
+
} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/array_histogram.h b/src/mongo/db/query/ce/array_histogram.h
index 2a6e9af3383..8a953f3414d 100644
--- a/src/mongo/db/query/ce/array_histogram.h
+++ b/src/mongo/db/query/ce/array_histogram.h
@@ -73,6 +73,10 @@ public:
const ScalarHistogram& getArrayMax() const;
const TypeCounts& getTypeCounts() const;
const TypeCounts& getArrayTypeCounts() const;
+ // The total number of arrays in the histogram's path including empty arrays.
+ const size_t getArrayCount() const;
+ // The total number of empty arrays ( [] ) in the histogram's path.
+ const size_t getEmptyArrayCount() const;
private:
/* ScalarHistogram fields for all paths. */
diff --git a/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp b/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp
index f03c50754ef..f859a355ff6 100644
--- a/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp
+++ b/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp
@@ -61,7 +61,7 @@ protected:
TEST(CEDataflowTest, EstimateTrivialNodes) {
DataflowCETester t;
- const auto matchCard = t.getMatchCE("{a: 1}");
+ const auto matchCard = t.getMatchCE<optimizer::RootNode>("{a: 1}");
// Verify 'CollationNode' estimate returns the input cardinality.
ASSERT_CE(t, "[{$sort: {a: 1}}]", kCollCard);
@@ -132,7 +132,7 @@ TEST(CEDataflowTest, EstimateUnionNode) {
TEST(CEDataflowTest, EstimateLimitSkipNode) {
DataflowCETester t;
- const CEType matchCard = t.getMatchCE("{a: 1}");
+ const CEType matchCard = t.getMatchCE<optimizer::RootNode>("{a: 1}");
// Verify that 'LimitSkipNode' estimate with only a limit set is min(limit, inputCE).
ASSERT_CE(t, "[{$limit: 1}]", 1.0);
@@ -204,7 +204,7 @@ TEST(CEDataflowTest, EstimateLimitSkipNode) {
TEST(CEDataflowTest, EstimateUnwindNode) {
DataflowCETester t;
- const CEType matchCard = t.getMatchCE("{a: 1}");
+ const CEType matchCard = t.getMatchCE<optimizer::RootNode>("{a: 1}");
// We assume that arrays on average have ~10 elements, so we estimate this as inputCard*10.
ASSERT_CE(t, "[{$unwind: '$a'}]", 10 * kCollCard);
diff --git a/src/mongo/db/query/ce/ce_heuristic_test.cpp b/src/mongo/db/query/ce/ce_heuristic_test.cpp
index 0eaf38eb97b..6146a001fa3 100644
--- a/src/mongo/db/query/ce/ce_heuristic_test.cpp
+++ b/src/mongo/db/query/ce/ce_heuristic_test.cpp
@@ -206,8 +206,8 @@ TEST(CEHeuristicTest, CEWithoutOptimizationTraverseSelectivityDoesNotAccumulate)
"{'b0.b1.b3': {$gt: 10}}"
"]}";
HeuristicCETester ht(collName, kNoOptPhaseSet);
- auto ce1 = ht.getMatchCE(query);
- auto ce2 = ht.getMatchCE(queryWithLongPaths);
+ auto ce1 = ht.getMatchCE<optimizer::RootNode>(query);
+ auto ce2 = ht.getMatchCE<optimizer::RootNode>(queryWithLongPaths);
ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
}
@@ -649,8 +649,8 @@ TEST(CEHeuristicTest, CEWithoutOptimizationEquivalentConjunctions) {
HeuristicCETester ht(collName, kNoOptPhaseSet);
ht.setCollCard(kCollCard);
- auto ce1 = ht.getCE(rootNode1);
- auto ce2 = ht.getCE(rootNode2);
+ auto ce1 = ht.getCE<optimizer::RootNode>(rootNode1);
+ auto ce2 = ht.getCE<optimizer::RootNode>(rootNode2);
ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
}
@@ -750,7 +750,7 @@ TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathComplex) {
"{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
"81}}, {a0: {$lt: 77}}]}"
"]}";
- auto ce1 = ht.getMatchCE(query1);
+ auto ce1 = ht.getMatchCE<optimizer::RootNode>(query1);
// The conjuncts are in inverse selectivity order.
std::string query2 =
"{$or: ["
@@ -762,7 +762,7 @@ TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathComplex) {
"{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}, {a0: {$gt: 42}}]},"
"{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]}"
"]}";
- auto ce2 = ht.getMatchCE(query2);
+ auto ce2 = ht.getMatchCE<optimizer::RootNode>(query2);
ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
}
@@ -804,10 +804,8 @@ TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF2paths) {
}
TEST(CEHeuristicTest, CEAfterMemoSubstitutionExplorationPhases) {
- std::string query = "{a : 13, b : 42}";
HeuristicCETester ht(collName);
- double ce = ht.getMatchCE(query);
- ASSERT_APPROX_EQUAL(10.0, ce, kMaxCEError);
+ ASSERT_MATCH_CE(ht, "{a : 13, b : 42}", 10.0);
}
} // namespace
diff --git a/src/mongo/db/query/ce/ce_histogram.cpp b/src/mongo/db/query/ce/ce_histogram.cpp
index ac6938766b4..4ea46a7b8dd 100644
--- a/src/mongo/db/query/ce/ce_histogram.cpp
+++ b/src/mongo/db/query/ce/ce_histogram.cpp
@@ -177,10 +177,10 @@ public:
const CEType totalCard = _stats->getCardinality();
if (conjunctReq.intervals.empty() && !conjunctReq.includeScalar) {
- // TODO SERVER-69795: handle the case where we have a single 'PathArr' interval for
- // this field by returning the number of arrays we expect in the output based on
- // histogram type counters.
- topLevelSelectivities.push_back(1.0);
+ // In this case there is a single 'PathArr' interval for this field.
+ // The selectivity of this interval is: (count of all arrays) / totalCard
+ double pathArrSel = conjunctReq.histogram.getArrayCount() / totalCard;
+ topLevelSelectivities.push_back(pathArrSel);
}
// Intervals are in DNF.
diff --git a/src/mongo/db/query/ce/ce_histogram_test.cpp b/src/mongo/db/query/ce/ce_histogram_test.cpp
index 6e38fd707b7..67a19f02f25 100644
--- a/src/mongo/db/query/ce/ce_histogram_test.cpp
+++ b/src/mongo/db/query/ce/ce_histogram_test.cpp
@@ -119,12 +119,21 @@ std::unique_ptr<ArrayHistogram> getArrayHistogramFromData(
std::vector<TestBucket> arrayUniqueBuckets,
std::vector<TestBucket> arrayMinBuckets,
std::vector<TestBucket> arrayMaxBuckets,
- TypeCounts arrayTypeCounts) {
+ TypeCounts arrayTypeCounts,
+ size_t emptyArrayCount = 0) {
+ auto arrayMinHist = getHistogramFromData(arrayMinBuckets);
+ auto arrayMaxHist = getHistogramFromData(arrayMaxBuckets);
+ TypeCounts typeCounts = getTypeCountsFromData(scalarBuckets);
+ // The min/max histograms contain one (min or max) value from each array. Therefore the
+ // total number of min/max values is equal to the number of non non-empty arrays.
+ // The total number of arrays needs to account for the additional empty arrays not present
+ // in histograms.
+ typeCounts[value::TypeTags::Array] = arrayMinHist.getCardinality() + emptyArrayCount;
return std::make_unique<ArrayHistogram>(getHistogramFromData(scalarBuckets),
- getTypeCountsFromData(scalarBuckets),
+ std::move(typeCounts),
getHistogramFromData(arrayUniqueBuckets),
- getHistogramFromData(arrayMinBuckets),
- getHistogramFromData(arrayMaxBuckets),
+ std::move(arrayMinHist),
+ std::move(arrayMaxHist),
std::move(arrayTypeCounts));
}
@@ -477,7 +486,8 @@ TEST(CEHistogramTest, TestArrayHistogramOnAtomicPredicates) {
{Value(6), 1 /* frequency */},
{Value(10), 1 /* frequency */},
},
- {{sbe::value::TypeTags::NumberInt32, 4}} // Array type counts.
+ {{sbe::value::TypeTags::NumberInt32, 4}}, // Array type counts.
+ 1 // 1 empty array.
));
CEHistogramTester t(collName, collCardinality, collStats);
@@ -536,17 +546,17 @@ TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
}));
// An array histogram built on the following arrays with 35 occurrences of each:
- // [[1, 2, 3], [5, 5, 5, 5, 5], [6], [], [8, 9, 10]]
+ // [{[1, 2, 3]: 35}, {[5, 5, 5, 5, 5]: 35}, {[6]: 35}, {[]: 35}, {[8, 9, 10]: 35}]
collStats->addHistogram(
"array",
getArrayHistogramFromData(
{/* No scalar buckets. */},
{
// Array unique buckets.
- {Value(1), 35 /* frequency */},
{Value(2), 35 /* frequency */, 35 /* range frequency */, 2 /* ndv */},
- {Value(5), 35 /* frequency */},
- {Value(6), 35 /* frequency */, 105 /* range frequency */, 4 /* ndv */},
+ {Value(5), 35 /* frequency */, 35 /* range frequency */, 2 /* ndv */},
+ {Value(6), 35 /* frequency */},
+ {Value(10), 35 /* frequency */, 105 /* range frequency */, 3 /* ndv */},
},
{
// Array min buckets.
@@ -562,7 +572,8 @@ TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
{Value(6), 35 /* frequency */},
{Value(10), 35 /* frequency */},
},
- {{sbe::value::TypeTags::NumberInt32, collCardinality}} // Array type counts.
+ {{sbe::value::TypeTags::NumberInt32, collCardinality}}, // Array type counts.
+ 35 // 35 empty arrays
));
collStats->addHistogram(
@@ -572,7 +583,7 @@ TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
// [{[1, 2, 3]: 17}, {[5, 5, 5, 5, 5]: 17}, {[6]: 17}, {[]: 20}, {[8, 9, 10]: 17}]
getArrayHistogramFromData(
{
- // Scalar buckets.
+ // Scalar buckets. These are half the number of values from the "scalar" histogram.
{Value(1), 5 /* frequency */},
{Value(2), 5 /* frequency */},
{Value(3), 10 /* frequency */, 60 /* range frequency */, 5 /* ndv */},
@@ -580,9 +591,10 @@ TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
},
{
// Array unique buckets.
- {Value(1), 17 /* frequency */, 34 /* range frequency */, 3 /* ndv */},
- {Value(5), 17 /* frequency */},
- {Value(6), 17 /* frequency */, 51 /* range frequency */, 4 /* ndv */},
+ {Value(2), 17 /* frequency */, 17 /* range frequency */, 2 /* ndv */},
+ {Value(5), 17 /* frequency */, 17 /* range frequency */, 2 /* ndv */},
+ {Value(6), 17 /* frequency */},
+ {Value(10), 17 /* frequency */, 34 /* range frequency */, 3 /* ndv */},
},
{
// Array min buckets.
@@ -598,7 +610,8 @@ TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
{Value(6), 17 /* frequency */},
{Value(10), 17 /* frequency */},
},
- {{sbe::value::TypeTags::NumberInt32, 88}} // Array type counts.
+ {{sbe::value::TypeTags::NumberInt32, 88}}, // Array type counts.
+ 20 // 20 empty arrays.
));
CEHistogramTester t(collName, collCardinality, collStats);
@@ -647,8 +660,8 @@ TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 35.0);
// Test case with multiple predicates and ranges.
- ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$lt: 5}}", 68.75);
- ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$gt: 5}}", 28.19);
+ ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$lt: 5}}", 55.959);
+ ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$gt: 5}}", 25.4291);
// Test multiple $elemMatches.
ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, array: {$elemMatch: {$eq: 5}}}", 0.0);
@@ -666,8 +679,8 @@ TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
"{scalar: {$elemMatch: {$eq: 5}}, mixed: {$elemMatch: {$eq: 5}}, array: "
"{$elemMatch: {$eq: 5}}}",
0.0);
- ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$lt: 5}}}", 32.09);
- ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$gt: 5}}}", 42.78);
+ ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$lt: 5}}}", 24.124);
+ ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$gt: 5}}}", 38.5984);
// Verify that we still return an estimate of 0.0 for any $elemMatch predicate on a scalar
// field when we have a non-multikey index.
@@ -675,6 +688,34 @@ TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
makeIndexDefinition("scalar", CollationOp::Ascending, /* isMultiKey */ false)}});
ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}}", 0.0);
ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$gt: 1, $lt: 10}}}", 0.0);
+
+ // Test how we estimate singular PathArr sargable predicate.
+ ASSERT_MATCH_CE_NODE(t, "{array: {$elemMatch: {}}}", 175.0, optimizer::SargableNode);
+ ASSERT_MATCH_CE_NODE(t, "{mixed: {$elemMatch: {}}}", 88.0, optimizer::SargableNode);
+
+ // Take into account both empty and non-empty arrays.
+ auto makePathArrABT = [&](const std::string& path) {
+ const auto scanProjection = "scan_0";
+ auto scanNode = make<ScanNode>(scanProjection, collName);
+ auto filterNode = make<FilterNode>(
+ make<EvalFilter>(make<PathGet>(path, make<PathArr>()), make<Variable>(scanProjection)),
+ std::move(scanNode));
+ return make<RootNode>(
+ properties::ProjectionRequirement{ProjectionNameVector{scanProjection}},
+ std::move(filterNode));
+ };
+
+ // There are no arrays in the 'scalar' field.
+ ABT scalarABT = makePathArrABT("scalar");
+ ASSERT_CE(t, scalarABT, 0.0);
+
+ // About half the values of this field are arrays.
+ ABT mixedABT = makePathArrABT("mixed");
+ ASSERT_CE(t, mixedABT, 88.0);
+
+ // This field is always an array.
+ ABT arrayABT = makePathArrABT("array");
+ ASSERT_CE(t, arrayABT, collCardinality);
}
TEST(CEHistogramTest, TestMixedElemMatchAndNonElemMatch) {
diff --git a/src/mongo/db/query/ce/ce_test_utils.cpp b/src/mongo/db/query/ce/ce_test_utils.cpp
index f4e2e4080ee..f91f1a3c6b3 100644
--- a/src/mongo/db/query/ce/ce_test_utils.cpp
+++ b/src/mongo/db/query/ce/ce_test_utils.cpp
@@ -52,22 +52,29 @@ CETester::CETester(std::string collName,
addCollection(collName, collCard);
}
+template <class T>
optimizer::CEType CETester::getMatchCE(const std::string& predicate) const {
- return getCE("[{$match: " + predicate + "}]");
+ return getCE<T>("[{$match: " + predicate + "}]");
}
+template <class T>
optimizer::CEType CETester::getCE(const std::string& pipeline) const {
if constexpr (kCETestLogOnly) {
- std::cout << "Query: " << pipeline << "\n";
+ std::cout << "\n\nQuery: " << pipeline << "\n";
}
// Construct ABT from pipeline and optimize.
ABT abt = translatePipeline(pipeline, _collName);
// Get cardinality estimate.
- return getCE(abt);
+ return getCE<T>(abt);
}
+template optimizer::CEType CETester::getCE<optimizer::RootNode>(const std::string& query) const;
+template optimizer::CEType CETester::getCE<optimizer::SargableNode>(const std::string& query) const;
+
+
+template <class T>
optimizer::CEType CETester::getCE(ABT& abt) const {
if constexpr (kCETestLogOnly) {
std::cout << ExplainGenerator::explainV2(abt) << std::endl;
@@ -138,7 +145,7 @@ optimizer::CEType CETester::getCE(ABT& abt) const {
}
}
- if (node.is<optimizer::RootNode>()) {
+ if (node.is<T>()) {
// We want to return the cardinality for the entire ABT.
outCard = memoCE;
}
@@ -153,12 +160,20 @@ optimizer::CEType CETester::getCE(ABT& abt) const {
return outCard;
}
+template optimizer::CEType CETester::getCE<optimizer::RootNode>(ABT& abt) const;
+template optimizer::CEType CETester::getCE<optimizer::SargableNode>(ABT& abt) const;
+template optimizer::CEType CETester::getMatchCE<optimizer::RootNode>(
+ const std::string& matchPredicate) const;
+template optimizer::CEType CETester::getMatchCE<optimizer::SargableNode>(
+ const std::string& matchPredicate) const;
+
ScanDefinition& CETester::getCollScanDefinition() {
auto it = _metadata._scanDefs.find(_collName);
invariant(it != _metadata._scanDefs.end());
return it->second;
}
+
void CETester::setCollCard(double card) {
auto& scanDef = getCollScanDefinition();
addCollection(_collName, card, scanDef.getIndexDefs());
diff --git a/src/mongo/db/query/ce/ce_test_utils.h b/src/mongo/db/query/ce/ce_test_utils.h
index ccab2ba77ce..a62a3b4a3b8 100644
--- a/src/mongo/db/query/ce/ce_test_utils.h
+++ b/src/mongo/db/query/ce/ce_test_utils.h
@@ -86,7 +86,8 @@ const OptPhaseManager::PhaseSet kNoOptPhaseSet{};
(str::stream() << "{" << field << ": {$elemMatch: " << predicate << "}}")
// This macro verifies the cardinality of a pipeline or an input ABT.
-#define ASSERT_CE(ce, pipeline, expectedCE) _ASSERT_CE(ce.getCE(pipeline), (expectedCE))
+#define ASSERT_CE(ce, pipeline, expectedCE) \
+ _ASSERT_CE(ce.getCE<optimizer::RootNode>(pipeline), (expectedCE))
// This macro does the same as above but also sets the collection cardinality.
#define ASSERT_CE_CARD(ce, pipeline, expectedCE, collCard) \
@@ -95,7 +96,10 @@ const OptPhaseManager::PhaseSet kNoOptPhaseSet{};
// This macro verifies the cardinality of a pipeline with a single $match predicate.
#define ASSERT_MATCH_CE(ce, predicate, expectedCE) \
- _ASSERT_CE(ce.getMatchCE(predicate), (expectedCE))
+ _ASSERT_CE(ce.getMatchCE<optimizer::RootNode>(predicate), (expectedCE))
+
+#define ASSERT_MATCH_CE_NODE(ce, predicate, expectedCE, nodeType) \
+ _ASSERT_CE(ce.getMatchCE<nodeType>(predicate), (expectedCE))
// This macro does the same as above but also sets the collection cardinality.
#define ASSERT_MATCH_CE_CARD(ce, predicate, expectedCE, collCard) \
@@ -124,16 +128,19 @@ public:
/**
* Returns the estimated cardinality of a given 'matchPredicate'.
*/
+ template <class T>
CEType getMatchCE(const std::string& matchPredicate) const;
/**
* Returns the estimated cardinality of a given 'pipeline'.
*/
+ template <class T>
CEType getCE(const std::string& pipeline) const;
/**
* Returns the estimated cardinality of a given 'abt'.
*/
+ template <class T>
CEType getCE(ABT& abt) const;
/**
diff --git a/src/mongo/db/query/ce/maxdiff_test_utils.cpp b/src/mongo/db/query/ce/maxdiff_test_utils.cpp
index fdd42ff627b..96f86914633 100644
--- a/src/mongo/db/query/ce/maxdiff_test_utils.cpp
+++ b/src/mongo/db/query/ce/maxdiff_test_utils.cpp
@@ -137,6 +137,9 @@ std::string plotArrayEstimator(const ArrayHistogram& estimator, const std::strin
os << tagCount.first << "=" << tagCount.second << " ";
}
}
+ if (estimator.isArray()) {
+ os << "\nEmpty array count: " << estimator.getEmptyArrayCount();
+ }
os << "\n";
return os.str();
diff --git a/src/mongo/db/query/ce/scalar_histogram.h b/src/mongo/db/query/ce/scalar_histogram.h
index 203b9bd7d96..3ce391f20d2 100644
--- a/src/mongo/db/query/ce/scalar_histogram.h
+++ b/src/mongo/db/query/ce/scalar_histogram.h
@@ -84,6 +84,13 @@ public:
const sbe::value::Array& getBounds() const;
const std::vector<Bucket>& getBuckets() const;
+ // Return the total number of histogrammed values.
+ const size_t getCardinality() const {
+ if (_buckets.empty()) {
+ return 0.0;
+ }
+ return _buckets.back()._cumulativeFreq;
+ }
bool empty() const {
return _buckets.empty();