summaryrefslogtreecommitdiff
path: root/src/mongo/db/query/ce/ce_histogram.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/query/ce/ce_histogram.cpp')
-rw-r--r--src/mongo/db/query/ce/ce_histogram.cpp160
1 files changed, 160 insertions, 0 deletions
diff --git a/src/mongo/db/query/ce/ce_histogram.cpp b/src/mongo/db/query/ce/ce_histogram.cpp
new file mode 100644
index 00000000000..1a04b740333
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_histogram.cpp
@@ -0,0 +1,160 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/exec/sbe/abt/abt_lower.h"
+
+#include "mongo/db/query/ce/ce_estimation.h"
+#include "mongo/db/query/ce/ce_histogram.h"
+#include "mongo/db/query/ce/ce_math.h"
+#include "mongo/db/query/ce/collection_statistics.h"
+
+#include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
+#include "mongo/db/query/optimizer/utils/abt_hash.h"
+#include "mongo/db/query/optimizer/utils/memo_utils.h"
+
+namespace mongo::optimizer::cascades {
+
+using namespace properties;
+
+class CEHistogramTransportImpl {
+public:
+ CEHistogramTransportImpl(OperationContext* opCtx, const ce::CollectionStatistics& stats)
+ : _opCtx(opCtx), _heuristicCE(), _stats(stats) {}
+
+ ~CEHistogramTransportImpl() {}
+
+ CEType transport(const ABT& n,
+ const ScanNode& node,
+ const Memo& memo,
+ const LogicalProps& logicalProps,
+ CEType /*bindResult*/) {
+ return _stats.getCardinality();
+ }
+
+ CEType transport(const ABT& n,
+ const SargableNode& node,
+ const Memo& memo,
+ const LogicalProps& logicalProps,
+ CEType childResult,
+ CEType /*bindsResult*/,
+ CEType /*refsResult*/) {
+ // Early out and return 0 since we don't expect to get more results.
+ if (childResult == 0.0) {
+ return 0.0;
+ }
+
+ std::vector<double> topLevelSelectivities;
+ for (const auto& [key, req] : node.getReqMap()) {
+ std::vector<double> disjSelectivities;
+ auto path = key._path.cast<PathGet>()->name();
+
+ // Fallback to heuristic if no histogram.
+ auto histogram = _stats.getHistogram(path);
+ if (!histogram) {
+ // For now, because of the structure of SargableNode and the implementation of
+ // HeuristicCE, we can't combine heuristic & histogram estimates. In this case,
+ // default to Heuristic if we don't have a histogram for any of the predicates.
+ return _heuristicCE.deriveCE(memo, logicalProps, n.ref());
+ }
+
+ // Intervals are in DNF.
+ const auto intervalDNF = req.getIntervals();
+ const auto disjuncts = intervalDNF.cast<IntervalReqExpr::Disjunction>()->nodes();
+ for (const auto& disjunct : disjuncts) {
+ const auto& conjuncts = disjunct.cast<IntervalReqExpr::Conjunction>()->nodes();
+
+ std::vector<double> conjSelectivities;
+ for (const auto& conjunct : conjuncts) {
+ const auto& interval = conjunct.cast<IntervalReqExpr::Atom>()->getExpr();
+ auto cardinality =
+ ce::estimateIntervalCardinality(*histogram, interval, childResult);
+
+ // We have to convert the cardinality to a selectivity. The histogram returns
+ // the cardinality for the entire collection; however, fewer records may be
+ // expected at the SargableNode.
+ conjSelectivities.push_back(cardinality / _stats.getCardinality());
+ }
+
+ auto backoff = ce::conjExponentialBackoff(conjSelectivities);
+ disjSelectivities.push_back(backoff);
+ }
+
+ auto backoff = ce::disjExponentialBackoff(disjSelectivities);
+ topLevelSelectivities.push_back(backoff);
+ }
+
+ // The elements of the PartialSchemaRequirements map represent an implicit conjunction.
+ auto backoff = ce::conjExponentialBackoff(topLevelSelectivities);
+ return backoff * childResult;
+ }
+
+ CEType transport(const ABT& n,
+ const RootNode& node,
+ const Memo& memo,
+ const LogicalProps& logicalProps,
+ CEType childResult,
+ CEType /*refsResult*/) {
+ // Root node does not change cardinality.
+ return childResult;
+ }
+
+ /**
+ * Other ABT types default to heuristic CE.
+ */
+ template <typename T, typename... Ts>
+ CEType transport(const ABT& n,
+ const T& /*node*/,
+ const Memo& memo,
+ const LogicalProps& logicalProps,
+ Ts&&...) {
+ if (canBeLogicalNode<T>()) {
+ return _heuristicCE.deriveCE(memo, logicalProps, n.ref());
+ }
+ return 0.0;
+ }
+
+private:
+ OperationContext* _opCtx;
+ HeuristicCE _heuristicCE;
+ const ce::CollectionStatistics& _stats;
+};
+
+CEHistogramTransport::CEHistogramTransport(OperationContext* opCtx,
+ const ce::CollectionStatistics& stats)
+ : _impl(std::make_unique<CEHistogramTransportImpl>(opCtx, stats)) {}
+
+CEHistogramTransport::~CEHistogramTransport() {}
+
+CEType CEHistogramTransport::deriveCE(const Memo& memo,
+ const LogicalProps& logicalProps,
+ const ABT::reference_type logicalNodeRef) const {
+ return algebra::transport<true>(logicalNodeRef, *this->_impl, memo, logicalProps);
+}
+
+} // namespace mongo::optimizer::cascades