diff options
author | Alexander Ignatyev <alexander.ignatyev@mongodb.com> | 2022-10-12 09:47:55 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-10-12 10:20:52 +0000 |
commit | a13ab294e6a88b0f9e4cdbf1580bd3fce0b45295 (patch) | |
tree | 76cc19a4b6321f24370ce268b5e79995aba8f9c2 /src/mongo/db/query | |
parent | 967172e344563b61d2a8cb60813010f96f03e076 (diff) | |
download | mongo-a13ab294e6a88b0f9e4cdbf1580bd3fce0b45295.tar.gz |
SERVER-69447 Add a query knob to control version of Cost Model coefficients
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/query/cost_model/SConscript | 15 | ||||
-rw-r--r-- | src/mongo/db/query/cost_model/cost_model_manager.cpp | 51 | ||||
-rw-r--r-- | src/mongo/db/query/cost_model/cost_model_manager.h | 64 | ||||
-rw-r--r-- | src/mongo/db/query/cqf_get_executor.cpp | 30 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/cascades/cost_derivation.cpp | 199 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/cascades/cost_derivation.h | 13 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/cascades/cost_model.idl | 103 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.idl | 7 |
10 files changed, 393 insertions, 91 deletions
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index 5b8b1b4093c..8f2ada203c7 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -8,6 +8,7 @@ env.SConscript( dirs=[ 'ce', 'collation', + 'cost_model', 'datetime', 'optimizer', ], diff --git a/src/mongo/db/query/cost_model/SConscript b/src/mongo/db/query/cost_model/SConscript new file mode 100644 index 00000000000..220782a6c5d --- /dev/null +++ b/src/mongo/db/query/cost_model/SConscript @@ -0,0 +1,15 @@ +# -*- mode: python -*- + +Import("env") + +env = env.Clone() + +env.Library( + target="query_cost_model", + source=[ + 'cost_model_manager.cpp', + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/query/optimizer/optimizer', + ], +) diff --git a/src/mongo/db/query/cost_model/cost_model_manager.cpp b/src/mongo/db/query/cost_model/cost_model_manager.cpp new file mode 100644 index 00000000000..a52c81a7bd9 --- /dev/null +++ b/src/mongo/db/query/cost_model/cost_model_manager.cpp @@ -0,0 +1,51 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/cost_model/cost_model_manager.h" + +#include "mongo/db/query/optimizer/cascades/cost_derivation.h" + +namespace mongo::cost_model { +using optimizer::cascades::CostModelCoefficients; + +CostModelManager::CostModelManager() { + CostModelCoefficients coefficients; + initializeCoefficients(coefficients); + _coefficients = coefficients.toBSON(); +} + +CostModelCoefficients CostModelManager::getDefaultCoefficients() const { + return CostModelCoefficients::parse(IDLParserContext{"CostModelCoefficients"}, _coefficients); +} + +CostModelCoefficients CostModelManager::getCoefficients(const BSONObj& overrides) const { + return CostModelCoefficients::parse(IDLParserContext{"CostModelCoefficients"}, + _coefficients.addFields(overrides)); +} +} // namespace mongo::cost_model diff --git a/src/mongo/db/query/cost_model/cost_model_manager.h b/src/mongo/db/query/cost_model/cost_model_manager.h new file mode 100644 index 00000000000..2970d342bb0 --- /dev/null +++ b/src/mongo/db/query/cost_model/cost_model_manager.h @@ -0,0 +1,64 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/query/optimizer/cascades/cost_model_gen.h" + +namespace mongo::cost_model { + +/** + * This class is main access point to Cost Model Coefficients, it rerieves them and applies + * overrides. + */ +class CostModelManager { +public: + CostModelManager(); + + /** + * Returns default version of Cost Model Coefficients with applied overrides specified in the + * 'overrides' parameters as a BSON Object. See the IDL definition of 'CostModelCoefficients' + * for the names of the fields. + * + * Usage: + * @code + * costModelManager.getCoefficients(BSON("scanIncrementalCost" << 0.001)); + * @endcode + */ + optimizer::cascades::CostModelCoefficients getCoefficients(const BSONObj& overrides) const; + + /** + * Returns default version of Cost Model Coefficients. + */ + optimizer::cascades::CostModelCoefficients getDefaultCoefficients() const; + +private: + BSONObj _coefficients; +}; +} // namespace mongo::cost_model diff --git a/src/mongo/db/query/cqf_get_executor.cpp b/src/mongo/db/query/cqf_get_executor.cpp index b2bfbc20327..cbe7b9afb1f 100644 --- a/src/mongo/db/query/cqf_get_executor.cpp +++ b/src/mongo/db/query/cqf_get_executor.cpp @@ -39,9 +39,11 @@ #include "mongo/db/query/ce/ce_sampling.h" #include "mongo/db/query/ce/collection_statistics_impl.h" #include "mongo/db/query/ce_mode_parameter.h" +#include "mongo/db/query/cost_model/cost_model_manager.h" #include "mongo/db/query/cqf_command_utils.h" #include "mongo/db/query/optimizer/cascades/ce_heuristic.h" #include "mongo/db/query/optimizer/cascades/cost_derivation.h" +#include "mongo/db/query/optimizer/cascades/cost_model_gen.h" #include "mongo/db/query/optimizer/explain.h" #include "mongo/db/query/optimizer/node.h" #include "mongo/db/query/optimizer/opt_phase_manager.h" @@ -56,8 +58,21 @@ #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery namespace mongo { - using namespace optimizer; +using cost_model::CostModelManager; + +namespace { +const auto costModelManager = ServiceContext::declareDecoration<CostModelManager>(); + +BSONObj getCostModelCoefficientsOverride() { + if (internalCostModelCoefficients.empty()) { + return BSONObj(); + } + + return fromjson(internalCostModelCoefficients); +} +} // namespace + static opt::unordered_map<std::string, optimizer::IndexDefinition> buildIndexSpecsOptimizer( boost::intrusive_ptr<ExpressionContext> expCtx, @@ -485,6 +500,7 @@ Metadata populateMetadata(boost::intrusive_ptr<ExpressionContext> expCtx, enum class CEMode { kSampling, kHistogram, kHeuristic }; static OptPhaseManager createPhaseManager(const CEMode mode, + const CostModelCoefficients& costModel, const NamespaceString& nss, OperationContext* opCtx, const int64_t collectionSize, @@ -506,7 +522,7 @@ static OptPhaseManager createPhaseManager(const CEMode mode, false /*requireRID*/, std::move(metadataForSampling), std::make_unique<HeuristicCE>(), - std::make_unique<DefaultCosting>(), + std::make_unique<DefaultCosting>(costModel), defaultConvertPathToInterval, DebugInfo::kDefaultForProd, {} /*hints*/}; @@ -516,7 +532,7 @@ static OptPhaseManager createPhaseManager(const CEMode mode, std::move(metadata), std::make_unique<CESamplingTransport>( opCtx, std::move(phaseManagerForSampling), collectionSize), - std::make_unique<DefaultCosting>(), + std::make_unique<DefaultCosting>(costModel), defaultConvertPathToInterval, DebugInfo::kDefaultForProd, std::move(hints)}; @@ -530,7 +546,7 @@ static OptPhaseManager createPhaseManager(const CEMode mode, std::move(metadata), std::make_unique<CEHistogramTransport>( std::make_shared<ce::CollectionStatisticsImpl>(collectionSize, nss)), - std::make_unique<DefaultCosting>(), + std::make_unique<DefaultCosting>(costModel), defaultConvertPathToInterval, DebugInfo::kDefaultForProd, std::move(hints)}; @@ -541,7 +557,7 @@ static OptPhaseManager createPhaseManager(const CEMode mode, requireRID, std::move(metadata), std::make_unique<HeuristicCE>(), - std::make_unique<DefaultCosting>(), + std::make_unique<DefaultCosting>(costModel), defaultConvertPathToInterval, DebugInfo::kDefaultForProd, std::move(hints)}; @@ -628,7 +644,11 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> getSBEExecutorViaCascadesOp << internalQueryCardinalityEstimatorMode); } + auto costModel = costModelManager(opCtx->getServiceContext()) + .getCoefficients(getCostModelCoefficientsOverride()); + OptPhaseManager phaseManager = createPhaseManager(mode, + costModel, nss, opCtx, numRecords, diff --git a/src/mongo/db/query/optimizer/SConscript b/src/mongo/db/query/optimizer/SConscript index 3791d2bdd0e..53f5a642532 100644 --- a/src/mongo/db/query/optimizer/SConscript +++ b/src/mongo/db/query/optimizer/SConscript @@ -19,6 +19,7 @@ env.Library( "cascades/ce_heuristic.cpp", "cascades/ce_hinted.cpp", "cascades/cost_derivation.cpp", + "cascades/cost_model.idl", "cascades/enforcers.cpp", "cascades/implementers.cpp", "cascades/logical_props_derivation.cpp", diff --git a/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp b/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp index b517f748af0..999a21d1374 100644 --- a/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp +++ b/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp @@ -28,12 +28,20 @@ */ #include "mongo/db/query/optimizer/cascades/cost_derivation.h" + +#include "mongo/base/init.h" #include "mongo/db/query/optimizer/defs.h" namespace mongo::optimizer::cascades { using namespace properties; +namespace { +CostModelCoefficients defaultCoefficicients; +MONGO_INITIALIZER(CostModelDefaultCoefficients)(InitializerContext*) { + initializeCoefficients(defaultCoefficicients); +} + struct CostAndCEInternal { CostAndCEInternal(double cost, CEType ce) : _cost(cost), _ce(ce) { uassert(8423334, "Invalid cost.", !std::isnan(cost) && cost >= 0.0); @@ -44,71 +52,22 @@ struct CostAndCEInternal { }; class CostDerivation { - // These cost should reflect estimated aggregated execution time in milliseconds. - static constexpr double ms = 1.0e-3; - - // Startup cost of an operator. This is the minimal cost of an operator since it is - // present even if it doesn't process any input. - // TODO: calibrate the cost individually for each operator - static constexpr double kStartupCost = 0.000001; - - // TODO: collection scan should depend on the width of the doc. - // TODO: the actual measured cost is (0.4 * ms), however we increase it here because currently - // it is not possible to estimate the cost of a collection scan vs a full index scan. - static constexpr double kScanIncrementalCost = 0.6 * ms; - - // TODO: cost(N fields) ~ (0.55 + 0.025 * N) - static constexpr double kIndexScanIncrementalCost = 0.5 * ms; - - // TODO: cost(N fields) ~ 0.7 + 0.19 * N - static constexpr double kSeekCost = 2.0 * ms; - - // TODO: take the expression into account. - // cost(N conditions) = 0.2 + N * ??? - static constexpr double kFilterIncrementalCost = 0.2 * ms; - // TODO: the cost of projection depends on number of fields: cost(N fields) ~ 0.1 + 0.2 * N - static constexpr double kEvalIncrementalCost = 2.0 * ms; - - // TODO: cost(N fields) ~ 0.04 + 0.03*(N^2) - static constexpr double kGroupByIncrementalCost = 0.07 * ms; - static constexpr double kUnwindIncrementalCost = 0.03 * ms; // TODO: not yet calibrated - // TODO: not yet calibrated, should be at least as expensive as a filter - static constexpr double kBinaryJoinIncrementalCost = 0.2 * ms; - static constexpr double kHashJoinIncrementalCost = 0.05 * ms; // TODO: not yet calibrated - static constexpr double kMergeJoinIncrementalCost = 0.02 * ms; // TODO: not yet calibrated - - static constexpr double kUniqueIncrementalCost = 0.7 * ms; - - // TODO: implement collation cost that depends on number and size of sorted fields - // Based on a mix of int and str(64) fields: - // 1 sort field: sort_cost(N) = 1.0/10 * N * log(N) - // 5 sort fields: sort_cost(N) = 2.5/10 * N * log(N) - // 10 sort fields: sort_cost(N) = 3.0/10 * N * log(N) - // field_cost_coeff(F) ~ 0.75 + 0.2 * F - static constexpr double kCollationIncrementalCost = 2.5 * ms; // 5 fields avg - static constexpr double kCollationWithLimitIncrementalCost = - 1.0 * ms; // TODO: not yet calibrated - - static constexpr double kUnionIncrementalCost = 0.02 * ms; - - static constexpr double kExchangeIncrementalCost = 0.1 * ms; // TODO: not yet calibrated - public: CostAndCEInternal operator()(const ABT& /*n*/, const PhysicalScanNode& /*node*/) { // Default estimate for scan. - const double collectionScanCost = - kStartupCost + kScanIncrementalCost * _cardinalityEstimate; + const double collectionScanCost = _coefficients.getStartupCost() + + _coefficients.getScanIncrementalCost() * _cardinalityEstimate; return {collectionScanCost, _cardinalityEstimate}; } CostAndCEInternal operator()(const ABT& /*n*/, const CoScanNode& /*node*/) { // Assumed to be free. - return {kStartupCost, _cardinalityEstimate}; + return {_coefficients.getStartupCost(), _cardinalityEstimate}; } CostAndCEInternal operator()(const ABT& /*n*/, const IndexScanNode& node) { - const double indexScanCost = - kStartupCost + kIndexScanIncrementalCost * _cardinalityEstimate; + const double indexScanCost = _coefficients.getStartupCost() + + _coefficients.getIndexScanIncrementalCost() * _cardinalityEstimate; return {indexScanCost, _cardinalityEstimate}; } @@ -116,7 +75,8 @@ public: // SeekNode should deliver one result via cardinality estimate override. // TODO: consider using node.getProjectionMap()._fieldProjections.size() to make the cost // dependent on the size of the projection - const double seekCost = kStartupCost + kSeekCost * _cardinalityEstimate; + const double seekCost = + _coefficients.getStartupCost() + _coefficients.getSeekCost() * _cardinalityEstimate; return {seekCost, _cardinalityEstimate}; } @@ -158,7 +118,8 @@ public: double filterCost = childResult._cost; if (!isTrivialExpr<EvalFilter>(node.getFilter())) { // Non-trivial filter. - filterCost += kStartupCost + kFilterIncrementalCost * childResult._ce; + filterCost += _coefficients.getStartupCost() + + _coefficients.getFilterIncrementalCost() * childResult._ce; } return {filterCost, _cardinalityEstimate}; } @@ -168,7 +129,8 @@ public: double evalCost = childResult._cost; if (!isTrivialExpr<EvalPath>(node.getProjection())) { // Non-trivial projection. - evalCost += kStartupCost + kEvalIncrementalCost * _cardinalityEstimate; + evalCost += _coefficients.getStartupCost() + + _coefficients.getEvalIncrementalCost() * _cardinalityEstimate; } return {evalCost, _cardinalityEstimate}; } @@ -176,8 +138,9 @@ public: CostAndCEInternal operator()(const ABT& /*n*/, const BinaryJoinNode& node) { CostAndCEInternal leftChildResult = deriveChild(node.getLeftChild(), 0); CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1); - const double joinCost = kStartupCost + - kBinaryJoinIncrementalCost * (leftChildResult._ce + rightChildResult._ce) + + const double joinCost = _coefficients.getStartupCost() + + _coefficients.getBinaryJoinIncrementalCost() * + (leftChildResult._ce + rightChildResult._ce) + leftChildResult._cost + rightChildResult._cost; return {joinCost, _cardinalityEstimate}; } @@ -187,8 +150,9 @@ public: CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1); // TODO: distinguish build side and probe side. - const double hashJoinCost = kStartupCost + - kHashJoinIncrementalCost * (leftChildResult._ce + rightChildResult._ce) + + const double hashJoinCost = _coefficients.getStartupCost() + + _coefficients.getHashJoinIncrementalCost() * + (leftChildResult._ce + rightChildResult._ce) + leftChildResult._cost + rightChildResult._cost; return {hashJoinCost, _cardinalityEstimate}; } @@ -197,8 +161,9 @@ public: CostAndCEInternal leftChildResult = deriveChild(node.getLeftChild(), 0); CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1); - const double mergeJoinCost = kStartupCost + - kMergeJoinIncrementalCost * (leftChildResult._ce + rightChildResult._ce) + + const double mergeJoinCost = _coefficients.getStartupCost() + + _coefficients.getMergeJoinIncrementalCost() * + (leftChildResult._ce + rightChildResult._ce) + leftChildResult._cost + rightChildResult._cost; return {mergeJoinCost, _cardinalityEstimate}; @@ -213,12 +178,12 @@ public: return {childResult._cost, _cardinalityEstimate}; } - double totalCost = kStartupCost; + double totalCost = _coefficients.getStartupCost(); // The cost is the sum of the costs of its children and the cost to union each child. for (size_t childIdx = 0; childIdx < children.size(); childIdx++) { CostAndCEInternal childResult = deriveChild(children[childIdx], childIdx); - const double childCost = - childResult._cost + (childIdx > 0 ? kUnionIncrementalCost * childResult._ce : 0); + const double childCost = childResult._cost + + (childIdx > 0 ? _coefficients.getUnionIncrementalCost() * childResult._ce : 0); totalCost += childCost; } return {totalCost, _cardinalityEstimate}; @@ -226,14 +191,15 @@ public: CostAndCEInternal operator()(const ABT& /*n*/, const GroupByNode& node) { CostAndCEInternal childResult = deriveChild(node.getChild(), 0); - double groupByCost = kStartupCost; + double groupByCost = _coefficients.getStartupCost(); // TODO: for now pretend global group by is free. if (node.getType() == GroupNodeType::Global) { groupByCost += childResult._cost; } else { // TODO: consider RepetitionEstimate since this is a stateful operation. - groupByCost += kGroupByIncrementalCost * childResult._ce + childResult._cost; + groupByCost += + _coefficients.getGroupByIncrementalCost() * childResult._ce + childResult._cost; } return {groupByCost, _cardinalityEstimate}; } @@ -241,14 +207,15 @@ public: CostAndCEInternal operator()(const ABT& /*n*/, const UnwindNode& node) { CostAndCEInternal childResult = deriveChild(node.getChild(), 0); // Unwind probably depends mostly on its output size. - const double unwindCost = kUnwindIncrementalCost * _cardinalityEstimate + childResult._cost; + const double unwindCost = + _coefficients.getUnwindIncrementalCost() * _cardinalityEstimate + childResult._cost; return {unwindCost, _cardinalityEstimate}; } CostAndCEInternal operator()(const ABT& /*n*/, const UniqueNode& node) { CostAndCEInternal childResult = deriveChild(node.getChild(), 0); - const double uniqueCost = - kStartupCost + kUniqueIncrementalCost * childResult._ce + childResult._cost; + const double uniqueCost = _coefficients.getStartupCost() + + _coefficients.getUniqueIncrementalCost() * childResult._ce + childResult._cost; return {uniqueCost, _cardinalityEstimate}; } @@ -257,18 +224,18 @@ public: // TODO: consider RepetitionEstimate since this is a stateful operation. double logFactor = childResult._ce; - double incrConst = kCollationIncrementalCost; + double incrConst = _coefficients.getCollationIncrementalCost(); if (hasProperty<LimitSkipRequirement>(_physProps)) { if (auto limit = getPropertyConst<LimitSkipRequirement>(_physProps).getAbsoluteLimit(); limit < logFactor) { logFactor = limit; - incrConst = kCollationWithLimitIncrementalCost; + incrConst = _coefficients.getCollationWithLimitIncrementalCost(); } } // Notice that log2(x) < 0 for any x < 1, and log2(1) = 0. Generally it makes sense that // there is no cost to sort 1 document, so the only cost left is the startup cost. - const double sortCost = kStartupCost + childResult._cost + + const double sortCost = _coefficients.getStartupCost() + childResult._cost + ((logFactor <= 1.0) ? 0.0 // TODO: The cost formula below is based on 1 field, mix of int and str. Instead we @@ -280,13 +247,14 @@ public: CostAndCEInternal operator()(const ABT& /*n*/, const LimitSkipNode& node) { // Assumed to be free. CostAndCEInternal childResult = deriveChild(node.getChild(), 0); - const double limitCost = kStartupCost + childResult._cost; + const double limitCost = _coefficients.getStartupCost() + childResult._cost; return {limitCost, _cardinalityEstimate}; } CostAndCEInternal operator()(const ABT& /*n*/, const ExchangeNode& node) { CostAndCEInternal childResult = deriveChild(node.getChild(), 0); - double localCost = kStartupCost + kExchangeIncrementalCost * _cardinalityEstimate; + double localCost = _coefficients.getStartupCost() + + _coefficients.getExchangeIncrementalCost() * _cardinalityEstimate; switch (node.getProperty().getDistributionAndProjections()._type) { case DistributionType::Replicated: @@ -323,9 +291,10 @@ public: const PhysProps& physProps, const ABT::reference_type physNodeRef, const ChildPropsType& childProps, - const NodeCEMap& nodeCEMap) { - CostAndCEInternal result = - deriveInternal(metadata, memo, physProps, physNodeRef, childProps, nodeCEMap); + const NodeCEMap& nodeCEMap, + const CostModelCoefficients& coefficients) { + CostAndCEInternal result = deriveInternal( + metadata, memo, physProps, physNodeRef, childProps, nodeCEMap, coefficients); switch (getPropertyConst<DistributionRequirement>(physProps) .getDistributionAndProjections() @@ -354,13 +323,15 @@ private: const CEType ce, const PhysProps& physProps, const ChildPropsType& childProps, - const NodeCEMap& nodeCEMap) + const NodeCEMap& nodeCEMap, + const CostModelCoefficients& coefficients) : _metadata(metadata), _memo(memo), _physProps(physProps), _cardinalityEstimate(getAdjustedCE(ce, _physProps)), _childProps(childProps), - _nodeCEMap(nodeCEMap) {} + _nodeCEMap(nodeCEMap), + _coefficients(coefficients) {} template <class T> static bool isTrivialExpr(const ABT& n) { @@ -379,7 +350,8 @@ private: const PhysProps& physProps, const ABT::reference_type physNodeRef, const ChildPropsType& childProps, - const NodeCEMap& nodeCEMap) { + const NodeCEMap& nodeCEMap, + const CostModelCoefficients& coefficients) { auto it = nodeCEMap.find(physNodeRef.cast<Node>()); bool found = (it != nodeCEMap.cend()); uassert(8423330, @@ -387,14 +359,15 @@ private: found || physNodeRef.is<MemoLogicalDelegatorNode>()); const CEType ce = (found ? it->second : 0.0); - CostDerivation instance(metadata, memo, ce, physProps, childProps, nodeCEMap); + CostDerivation instance(metadata, memo, ce, physProps, childProps, nodeCEMap, coefficients); CostAndCEInternal costCEestimates = physNodeRef.visit(instance); return costCEestimates; } CostAndCEInternal deriveChild(const ABT& child, const size_t childIndex) { PhysProps physProps = _childProps.empty() ? _physProps : _childProps.at(childIndex).second; - return deriveInternal(_metadata, _memo, physProps, child.ref(), {}, _nodeCEMap); + return deriveInternal( + _metadata, _memo, physProps, child.ref(), {}, _nodeCEMap, _coefficients); } static CEType getAdjustedCE(CEType baseCE, const PhysProps& physProps) { @@ -431,7 +404,61 @@ private: const CEType _cardinalityEstimate; const ChildPropsType& _childProps; const NodeCEMap& _nodeCEMap; + const CostModelCoefficients& _coefficients; }; +} // namespace + +void initializeCoefficients(CostModelCoefficients& coefficients) { + // These cost should reflect estimated aggregated execution time in milliseconds. + constexpr double ms = 1.0e-3; + + // Startup cost of an operator. This is the minimal cost of an operator since it is + // present even if it doesn't process any input. + // TODO: calibrate the cost individually for each operator + coefficients.setStartupCost(0.000001); + + // TODO: collection scan should depend on the width of the doc. + // TODO: the actual measured cost is (0.4 * ms), however we increase it here because currently + // it is not possible to estimate the cost of a collection scan vs a full index scan. + coefficients.setScanIncrementalCost(0.6 * ms); + + // TODO: cost(N fields) ~ (0.55 + 0.025 * N) + coefficients.setIndexScanIncrementalCost(0.5 * ms); + + // TODO: cost(N fields) ~ 0.7 + 0.19 * N + coefficients.setSeekCost(2.0 * ms); + + // TODO: take the expression into account. + // cost(N conditions) = 0.2 + N * ??? + coefficients.setFilterIncrementalCost(0.2 * ms); + // TODO: the cost of projection depends on number of fields: cost(N fields) ~ 0.1 + 0.2 * N + coefficients.setEvalIncrementalCost(2.0 * ms); + + // TODO: cost(N fields) ~ 0.04 + 0.03*(N^2) + coefficients.setGroupByIncrementalCost(0.07 * ms); + coefficients.setUnwindIncrementalCost(0.03 * ms); // TODO: not yet calibrated + // TODO: not yet calibrated, should be at least as expensive as a filter + coefficients.setBinaryJoinIncrementalCost(0.2 * ms); + coefficients.setHashJoinIncrementalCost(0.05 * ms); // TODO: not yet calibrated + coefficients.setMergeJoinIncrementalCost(0.02 * ms); // TODO: not yet calibrated + + coefficients.setUniqueIncrementalCost(0.7 * ms); + + // TODO: implement collation cost that depends on number and size of sorted fields + // Based on a mix of int and str(64) fields: + // 1 sort field: sort_cost(N) = 1.0/10 * N * log(N) + // 5 sort fields: sort_cost(N) = 2.5/10 * N * log(N) + // 10 sort fields: sort_cost(N) = 3.0/10 * N * log(N) + // field_cost_coeff(F) ~ 0.75 + 0.2 * F + coefficients.setCollationIncrementalCost(2.5 * ms); // 5 fields avg + coefficients.setCollationWithLimitIncrementalCost(1.0 * ms); // TODO: not yet calibrated + + coefficients.setUnionIncrementalCost(0.02 * ms); + + coefficients.setExchangeIncrementalCost(0.1 * ms); // TODO: not yet calibrated +} + +DefaultCosting::DefaultCosting() : _coefficients{defaultCoefficicients} {} CostAndCE DefaultCosting::deriveCost(const Metadata& metadata, const Memo& memo, @@ -439,8 +466,8 @@ CostAndCE DefaultCosting::deriveCost(const Metadata& metadata, const ABT::reference_type physNodeRef, const ChildPropsType& childProps, const NodeCEMap& nodeCEMap) const { - const CostAndCEInternal result = - CostDerivation::derive(metadata, memo, physProps, physNodeRef, childProps, nodeCEMap); + const CostAndCEInternal result = CostDerivation::derive( + metadata, memo, physProps, physNodeRef, childProps, nodeCEMap, _coefficients); return {CostType::fromDouble(result._cost), result._ce}; } diff --git a/src/mongo/db/query/optimizer/cascades/cost_derivation.h b/src/mongo/db/query/optimizer/cascades/cost_derivation.h index b71a020316e..2bdebd4323b 100644 --- a/src/mongo/db/query/optimizer/cascades/cost_derivation.h +++ b/src/mongo/db/query/optimizer/cascades/cost_derivation.h @@ -29,22 +29,35 @@ #pragma once +#include "mongo/db/query/optimizer/cascades/cost_model_gen.h" #include "mongo/db/query/optimizer/cascades/interfaces.h" #include "mongo/db/query/optimizer/cascades/memo.h" namespace mongo::optimizer::cascades { /** + * Populate given cost model coefficients object with default values. + */ +void initializeCoefficients(CostModelCoefficients& coefficients); + +/** * Default costing for physical nodes with logical delegator (not-yet-optimized) inputs. */ class DefaultCosting : public CostingInterface { public: + DefaultCosting(); + DefaultCosting(CostModelCoefficients coefficicients) + : _coefficients{std::move(coefficicients)} {} + CostAndCE deriveCost(const Metadata& metadata, const Memo& memo, const properties::PhysProps& physProps, ABT::reference_type physNodeRef, const ChildPropsType& childProps, const NodeCEMap& nodeCEMap) const override final; + +private: + const CostModelCoefficients _coefficients; }; } // namespace mongo::optimizer::cascades diff --git a/src/mongo/db/query/optimizer/cascades/cost_model.idl b/src/mongo/db/query/optimizer/cascades/cost_model.idl new file mode 100644 index 00000000000..06f6de9ce56 --- /dev/null +++ b/src/mongo/db/query/optimizer/cascades/cost_model.idl @@ -0,0 +1,103 @@ +#Copyright(C) 2022 - present MongoDB, Inc. +# +#This program is free software : you can redistribute it and / or modify +#it under the terms of the Server Side Public License, version 1, +#as published by MongoDB, Inc. +# +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the +#Server Side Public License for more details. +# +#You should have received a copy of the Server Side Public License +#along with this program.If not, see +# <http: //www.mongodb.com/licensing/server-side-public-license>. +#Ands +#As a special exception, the copyright holders give permission to link the +#code of portions of this program with the OpenSSL library under certain +#conditions as described in each individual source file and distribute +#linked combinations including the program with the OpenSSL library.You +#must comply with the Server Side Public License in all respects for +#all of the code used other than as permitted herein.If you modify file(s) +#with this exception, you may extend this exception to your version of the +#file(s), but you are not obligated to do so.If you do not wish to do so, +#delete this exception statement from your version.If you delete this +#exception statement from all source files in the program, then also delete +#it in the license file. +# + +global: + cpp_namespace: "mongo::optimizer::cascades" + +imports: + - "mongo/db/basic_types.idl" + +structs: + CostModelCoefficients: + description: "A type representing Cost Model coefficients." + strict: true + fields: + startupCost: + optional: false + type: double + stability: stable + scanIncrementalCost: + optional: false + type: double + stability: stable + indexScanIncrementalCost: + optional: false + type: double + stability: stable + seekCost: + optional: false + type: double + stability: stable + filterIncrementalCost: + optional: false + type: double + stability: stable + evalIncrementalCost: + optional: false + type: double + stability: stable + groupByIncrementalCost: + optional: false + type: double + stability: stable + unwindIncrementalCost: + optional: false + type: double + stability: stable + binaryJoinIncrementalCost: + optional: false + type: double + stability: stable + hashJoinIncrementalCost: + optional: false + type: double + stability: stable + mergeJoinIncrementalCost: + optional: false + type: double + stability: stable + uniqueIncrementalCost: + optional: false + type: double + stability: stable + collationIncrementalCost: + optional: false + type: double + stability: stable + collationWithLimitIncrementalCost: + optional: false + type: double + stability: stable + unionIncrementalCost: + optional: false + type: double + stability: stable + exchangeIncrementalCost: + optional: false + type: double + stability: stable diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl index 06af67ae3e0..20fb75b122d 100644 --- a/src/mongo/db/query/query_knobs.idl +++ b/src/mongo/db/query/query_knobs.idl @@ -913,6 +913,13 @@ server_parameters: default: expr: false + internalCostModelCoefficients: + description: "Cost Model Coefficients Override" + set_at: [ startup, runtime ] + cpp_varname: "internalCostModelCoefficients" + cpp_vartype: "std::string" + on_update: plan_cache_util::clearSbeCacheOnParameterChange + # TODO SERVER-68341 Remove this query knob after tenancy is supported in the sharded cluster. internalChangeStreamUseTenantIdForTesting: description: "If true, then change streams will operate upon an internal tenant id for testing |