summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlya Berciu <alya.berciu@mongodb.com>2022-08-30 16:14:40 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-30 17:39:14 +0000
commit34e78e195036e7aa6850ccb6251e66654267b588 (patch)
tree3d18279b5ecbb2fbc6b4892eb3412e64529e1b3d
parent39077b8d14957a4f63ea7e2fd23494011b987393 (diff)
downloadmongo-34e78e195036e7aa6850ccb6251e66654267b588.tar.gz
SERVER-69039 Fix histogram selection for dotted paths
-rw-r--r--src/mongo/db/query/ce/ce_histogram.cpp37
-rw-r--r--src/mongo/db/query/ce/ce_histogram_test.cpp25
2 files changed, 61 insertions, 1 deletions
diff --git a/src/mongo/db/query/ce/ce_histogram.cpp b/src/mongo/db/query/ce/ce_histogram.cpp
index db163c7b2e2..e86dc3b689a 100644
--- a/src/mongo/db/query/ce/ce_histogram.cpp
+++ b/src/mongo/db/query/ce/ce_histogram.cpp
@@ -42,6 +42,41 @@ namespace mongo::optimizer::cascades {
using namespace properties;
+namespace {
+// This transport combines chains of PathGets and PathTraverses into an MQL-like string path.
+class PathDescribeTransport {
+public:
+ std::string transport(const optimizer::PathTraverse& /*node*/, std::string childResult) {
+ return childResult;
+ }
+
+ std::string transport(const optimizer::PathGet& node, std::string childResult) {
+ return str::stream() << node.name() << (childResult.length() > 0 ? "." : "") << childResult;
+ }
+
+ std::string transport(const optimizer::EvalFilter& node,
+ std::string pathResult,
+ std::string inputResult) {
+ return pathResult;
+ }
+
+ std::string transport(const optimizer::PathIdentity& node) {
+ return "";
+ }
+
+ template <typename T, typename... Ts>
+ std::string transport(const T& node, Ts&&... /* args */) {
+ uasserted(6903900, "Unexpected node in path serialization.");
+ }
+};
+
+std::string serializePath(const optimizer::ABT& path) {
+ PathDescribeTransport pdt;
+ auto str = optimizer::algebra::transport<false>(path, pdt);
+ return str;
+}
+} // namespace
+
class CEHistogramTransportImpl {
public:
CEHistogramTransportImpl(const ce::CollectionStatistics& stats)
@@ -72,7 +107,7 @@ public:
std::vector<double> topLevelSelectivities;
for (const auto& [key, req] : node.getReqMap()) {
std::vector<double> disjSelectivities;
- auto path = key._path.cast<PathGet>()->name();
+ auto path = serializePath(key._path.ref());
// Fallback to heuristic if no histogram.
auto histogram = _stats.getHistogram(path);
diff --git a/src/mongo/db/query/ce/ce_histogram_test.cpp b/src/mongo/db/query/ce/ce_histogram_test.cpp
index 6231e5cbe0d..13ab164840c 100644
--- a/src/mongo/db/query/ce/ce_histogram_test.cpp
+++ b/src/mongo/db/query/ce/ce_histogram_test.cpp
@@ -362,5 +362,30 @@ TEST(CEHistogramTest, AssertOneBoundIntRangeHistogram) {
ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}, intRange: {$lte: 5}}", 0.0);
}
+TEST(CEHistogramTest, TestHistogramOnNestedPaths) {
+ const auto collName = "test";
+ const auto collCardinality = 50;
+
+ CollectionStatistics collStats(collCardinality);
+
+ // Create a histogram with a single bucket that contains exactly one int (42) with a frequency
+ // of 50 (equal to the collection cardinality).
+ collStats.addHistogram("path",
+ getHistogramFromData({
+ {Value(42), collCardinality /* frequency */},
+ }));
+ collStats.addHistogram("a.histogram.path",
+ getHistogramFromData({
+ {Value(42), collCardinality /* frequency */},
+ }));
+
+ CEHistogramTester t(collName, collCardinality, collStats);
+
+ ASSERT_MATCH_CE(t, "{\"not.a.histogram.path\": {$eq: 42}}", 7.071 /* heuristic */);
+ ASSERT_MATCH_CE(t, "{\"a.histogram.path\": {$eq: 42}}", collCardinality);
+ ASSERT_MATCH_CE(
+ t, "{\"a.histogram.path.with.no.histogram\": {$eq: 42}}", 7.071 /* heuristic */);
+}
+
} // namespace
} // namespace mongo::ce