diff options
author | Alya Berciu <alya.berciu@mongodb.com> | 2022-08-30 16:14:40 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-30 17:39:14 +0000 |
commit | 34e78e195036e7aa6850ccb6251e66654267b588 (patch) | |
tree | 3d18279b5ecbb2fbc6b4892eb3412e64529e1b3d /src/mongo/db/query | |
parent | 39077b8d14957a4f63ea7e2fd23494011b987393 (diff) | |
download | mongo-34e78e195036e7aa6850ccb6251e66654267b588.tar.gz |
SERVER-69039 Fix histogram selection for dotted paths
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/ce/ce_histogram.cpp | 37 | ||||
-rw-r--r-- | src/mongo/db/query/ce/ce_histogram_test.cpp | 25 |
2 files changed, 61 insertions, 1 deletions
diff --git a/src/mongo/db/query/ce/ce_histogram.cpp b/src/mongo/db/query/ce/ce_histogram.cpp index db163c7b2e2..e86dc3b689a 100644 --- a/src/mongo/db/query/ce/ce_histogram.cpp +++ b/src/mongo/db/query/ce/ce_histogram.cpp @@ -42,6 +42,41 @@ namespace mongo::optimizer::cascades { using namespace properties; +namespace { +// This transport combines chains of PathGets and PathTraverses into an MQL-like string path. +class PathDescribeTransport { +public: + std::string transport(const optimizer::PathTraverse& /*node*/, std::string childResult) { + return childResult; + } + + std::string transport(const optimizer::PathGet& node, std::string childResult) { + return str::stream() << node.name() << (childResult.length() > 0 ? "." : "") << childResult; + } + + std::string transport(const optimizer::EvalFilter& node, + std::string pathResult, + std::string inputResult) { + return pathResult; + } + + std::string transport(const optimizer::PathIdentity& node) { + return ""; + } + + template <typename T, typename... Ts> + std::string transport(const T& node, Ts&&... /* args */) { + uasserted(6903900, "Unexpected node in path serialization."); + } +}; + +std::string serializePath(const optimizer::ABT& path) { + PathDescribeTransport pdt; + auto str = optimizer::algebra::transport<false>(path, pdt); + return str; +} +} // namespace + class CEHistogramTransportImpl { public: CEHistogramTransportImpl(const ce::CollectionStatistics& stats) @@ -72,7 +107,7 @@ public: std::vector<double> topLevelSelectivities; for (const auto& [key, req] : node.getReqMap()) { std::vector<double> disjSelectivities; - auto path = key._path.cast<PathGet>()->name(); + auto path = serializePath(key._path.ref()); // Fallback to heuristic if no histogram. auto histogram = _stats.getHistogram(path); diff --git a/src/mongo/db/query/ce/ce_histogram_test.cpp b/src/mongo/db/query/ce/ce_histogram_test.cpp index 6231e5cbe0d..13ab164840c 100644 --- a/src/mongo/db/query/ce/ce_histogram_test.cpp +++ b/src/mongo/db/query/ce/ce_histogram_test.cpp @@ -362,5 +362,30 @@ TEST(CEHistogramTest, AssertOneBoundIntRangeHistogram) { ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}, intRange: {$lte: 5}}", 0.0); } +TEST(CEHistogramTest, TestHistogramOnNestedPaths) { + const auto collName = "test"; + const auto collCardinality = 50; + + CollectionStatistics collStats(collCardinality); + + // Create a histogram with a single bucket that contains exactly one int (42) with a frequency + // of 50 (equal to the collection cardinality). + collStats.addHistogram("path", + getHistogramFromData({ + {Value(42), collCardinality /* frequency */}, + })); + collStats.addHistogram("a.histogram.path", + getHistogramFromData({ + {Value(42), collCardinality /* frequency */}, + })); + + CEHistogramTester t(collName, collCardinality, collStats); + + ASSERT_MATCH_CE(t, "{\"not.a.histogram.path\": {$eq: 42}}", 7.071 /* heuristic */); + ASSERT_MATCH_CE(t, "{\"a.histogram.path\": {$eq: 42}}", collCardinality); + ASSERT_MATCH_CE( + t, "{\"a.histogram.path.with.no.histogram\": {$eq: 42}}", 7.071 /* heuristic */); +} + } // namespace } // namespace mongo::ce |