summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
authorAlexander Ignatyev <alexander.ignatyev@mongodb.com>2022-10-12 09:47:55 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-10-12 10:20:52 +0000
commita13ab294e6a88b0f9e4cdbf1580bd3fce0b45295 (patch)
tree76cc19a4b6321f24370ce268b5e79995aba8f9c2 /src/mongo/db/query
parent967172e344563b61d2a8cb60813010f96f03e076 (diff)
downloadmongo-a13ab294e6a88b0f9e4cdbf1580bd3fce0b45295.tar.gz
SERVER-69447 Add a query knob to control version of Cost Model coefficients
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/SConscript1
-rw-r--r--src/mongo/db/query/cost_model/SConscript15
-rw-r--r--src/mongo/db/query/cost_model/cost_model_manager.cpp51
-rw-r--r--src/mongo/db/query/cost_model/cost_model_manager.h64
-rw-r--r--src/mongo/db/query/cqf_get_executor.cpp30
-rw-r--r--src/mongo/db/query/optimizer/SConscript1
-rw-r--r--src/mongo/db/query/optimizer/cascades/cost_derivation.cpp199
-rw-r--r--src/mongo/db/query/optimizer/cascades/cost_derivation.h13
-rw-r--r--src/mongo/db/query/optimizer/cascades/cost_model.idl103
-rw-r--r--src/mongo/db/query/query_knobs.idl7
10 files changed, 393 insertions, 91 deletions
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index 5b8b1b4093c..8f2ada203c7 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -8,6 +8,7 @@ env.SConscript(
dirs=[
'ce',
'collation',
+ 'cost_model',
'datetime',
'optimizer',
],
diff --git a/src/mongo/db/query/cost_model/SConscript b/src/mongo/db/query/cost_model/SConscript
new file mode 100644
index 00000000000..220782a6c5d
--- /dev/null
+++ b/src/mongo/db/query/cost_model/SConscript
@@ -0,0 +1,15 @@
+# -*- mode: python -*-
+
+Import("env")
+
+env = env.Clone()
+
+env.Library(
+ target="query_cost_model",
+ source=[
+ 'cost_model_manager.cpp',
+ ],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/query/optimizer/optimizer',
+ ],
+)
diff --git a/src/mongo/db/query/cost_model/cost_model_manager.cpp b/src/mongo/db/query/cost_model/cost_model_manager.cpp
new file mode 100644
index 00000000000..a52c81a7bd9
--- /dev/null
+++ b/src/mongo/db/query/cost_model/cost_model_manager.cpp
@@ -0,0 +1,51 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/cost_model/cost_model_manager.h"
+
+#include "mongo/db/query/optimizer/cascades/cost_derivation.h"
+
+namespace mongo::cost_model {
+using optimizer::cascades::CostModelCoefficients;
+
+CostModelManager::CostModelManager() {
+ CostModelCoefficients coefficients;
+ initializeCoefficients(coefficients);
+ _coefficients = coefficients.toBSON();
+}
+
+CostModelCoefficients CostModelManager::getDefaultCoefficients() const {
+ return CostModelCoefficients::parse(IDLParserContext{"CostModelCoefficients"}, _coefficients);
+}
+
+CostModelCoefficients CostModelManager::getCoefficients(const BSONObj& overrides) const {
+ return CostModelCoefficients::parse(IDLParserContext{"CostModelCoefficients"},
+ _coefficients.addFields(overrides));
+}
+} // namespace mongo::cost_model
diff --git a/src/mongo/db/query/cost_model/cost_model_manager.h b/src/mongo/db/query/cost_model/cost_model_manager.h
new file mode 100644
index 00000000000..2970d342bb0
--- /dev/null
+++ b/src/mongo/db/query/cost_model/cost_model_manager.h
@@ -0,0 +1,64 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/optimizer/cascades/cost_model_gen.h"
+
+namespace mongo::cost_model {
+
+/**
+ * This class is main access point to Cost Model Coefficients, it rerieves them and applies
+ * overrides.
+ */
+class CostModelManager {
+public:
+ CostModelManager();
+
+ /**
+ * Returns default version of Cost Model Coefficients with applied overrides specified in the
+ * 'overrides' parameters as a BSON Object. See the IDL definition of 'CostModelCoefficients'
+ * for the names of the fields.
+ *
+ * Usage:
+ * @code
+ * costModelManager.getCoefficients(BSON("scanIncrementalCost" << 0.001));
+ * @endcode
+ */
+ optimizer::cascades::CostModelCoefficients getCoefficients(const BSONObj& overrides) const;
+
+ /**
+ * Returns default version of Cost Model Coefficients.
+ */
+ optimizer::cascades::CostModelCoefficients getDefaultCoefficients() const;
+
+private:
+ BSONObj _coefficients;
+};
+} // namespace mongo::cost_model
diff --git a/src/mongo/db/query/cqf_get_executor.cpp b/src/mongo/db/query/cqf_get_executor.cpp
index b2bfbc20327..cbe7b9afb1f 100644
--- a/src/mongo/db/query/cqf_get_executor.cpp
+++ b/src/mongo/db/query/cqf_get_executor.cpp
@@ -39,9 +39,11 @@
#include "mongo/db/query/ce/ce_sampling.h"
#include "mongo/db/query/ce/collection_statistics_impl.h"
#include "mongo/db/query/ce_mode_parameter.h"
+#include "mongo/db/query/cost_model/cost_model_manager.h"
#include "mongo/db/query/cqf_command_utils.h"
#include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
#include "mongo/db/query/optimizer/cascades/cost_derivation.h"
+#include "mongo/db/query/optimizer/cascades/cost_model_gen.h"
#include "mongo/db/query/optimizer/explain.h"
#include "mongo/db/query/optimizer/node.h"
#include "mongo/db/query/optimizer/opt_phase_manager.h"
@@ -56,8 +58,21 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
namespace mongo {
-
using namespace optimizer;
+using cost_model::CostModelManager;
+
+namespace {
+const auto costModelManager = ServiceContext::declareDecoration<CostModelManager>();
+
+BSONObj getCostModelCoefficientsOverride() {
+ if (internalCostModelCoefficients.empty()) {
+ return BSONObj();
+ }
+
+ return fromjson(internalCostModelCoefficients);
+}
+} // namespace
+
static opt::unordered_map<std::string, optimizer::IndexDefinition> buildIndexSpecsOptimizer(
boost::intrusive_ptr<ExpressionContext> expCtx,
@@ -485,6 +500,7 @@ Metadata populateMetadata(boost::intrusive_ptr<ExpressionContext> expCtx,
enum class CEMode { kSampling, kHistogram, kHeuristic };
static OptPhaseManager createPhaseManager(const CEMode mode,
+ const CostModelCoefficients& costModel,
const NamespaceString& nss,
OperationContext* opCtx,
const int64_t collectionSize,
@@ -506,7 +522,7 @@ static OptPhaseManager createPhaseManager(const CEMode mode,
false /*requireRID*/,
std::move(metadataForSampling),
std::make_unique<HeuristicCE>(),
- std::make_unique<DefaultCosting>(),
+ std::make_unique<DefaultCosting>(costModel),
defaultConvertPathToInterval,
DebugInfo::kDefaultForProd,
{} /*hints*/};
@@ -516,7 +532,7 @@ static OptPhaseManager createPhaseManager(const CEMode mode,
std::move(metadata),
std::make_unique<CESamplingTransport>(
opCtx, std::move(phaseManagerForSampling), collectionSize),
- std::make_unique<DefaultCosting>(),
+ std::make_unique<DefaultCosting>(costModel),
defaultConvertPathToInterval,
DebugInfo::kDefaultForProd,
std::move(hints)};
@@ -530,7 +546,7 @@ static OptPhaseManager createPhaseManager(const CEMode mode,
std::move(metadata),
std::make_unique<CEHistogramTransport>(
std::make_shared<ce::CollectionStatisticsImpl>(collectionSize, nss)),
- std::make_unique<DefaultCosting>(),
+ std::make_unique<DefaultCosting>(costModel),
defaultConvertPathToInterval,
DebugInfo::kDefaultForProd,
std::move(hints)};
@@ -541,7 +557,7 @@ static OptPhaseManager createPhaseManager(const CEMode mode,
requireRID,
std::move(metadata),
std::make_unique<HeuristicCE>(),
- std::make_unique<DefaultCosting>(),
+ std::make_unique<DefaultCosting>(costModel),
defaultConvertPathToInterval,
DebugInfo::kDefaultForProd,
std::move(hints)};
@@ -628,7 +644,11 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> getSBEExecutorViaCascadesOp
<< internalQueryCardinalityEstimatorMode);
}
+ auto costModel = costModelManager(opCtx->getServiceContext())
+ .getCoefficients(getCostModelCoefficientsOverride());
+
OptPhaseManager phaseManager = createPhaseManager(mode,
+ costModel,
nss,
opCtx,
numRecords,
diff --git a/src/mongo/db/query/optimizer/SConscript b/src/mongo/db/query/optimizer/SConscript
index 3791d2bdd0e..53f5a642532 100644
--- a/src/mongo/db/query/optimizer/SConscript
+++ b/src/mongo/db/query/optimizer/SConscript
@@ -19,6 +19,7 @@ env.Library(
"cascades/ce_heuristic.cpp",
"cascades/ce_hinted.cpp",
"cascades/cost_derivation.cpp",
+ "cascades/cost_model.idl",
"cascades/enforcers.cpp",
"cascades/implementers.cpp",
"cascades/logical_props_derivation.cpp",
diff --git a/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp b/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp
index b517f748af0..999a21d1374 100644
--- a/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp
+++ b/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp
@@ -28,12 +28,20 @@
*/
#include "mongo/db/query/optimizer/cascades/cost_derivation.h"
+
+#include "mongo/base/init.h"
#include "mongo/db/query/optimizer/defs.h"
namespace mongo::optimizer::cascades {
using namespace properties;
+namespace {
+CostModelCoefficients defaultCoefficicients;
+MONGO_INITIALIZER(CostModelDefaultCoefficients)(InitializerContext*) {
+ initializeCoefficients(defaultCoefficicients);
+}
+
struct CostAndCEInternal {
CostAndCEInternal(double cost, CEType ce) : _cost(cost), _ce(ce) {
uassert(8423334, "Invalid cost.", !std::isnan(cost) && cost >= 0.0);
@@ -44,71 +52,22 @@ struct CostAndCEInternal {
};
class CostDerivation {
- // These cost should reflect estimated aggregated execution time in milliseconds.
- static constexpr double ms = 1.0e-3;
-
- // Startup cost of an operator. This is the minimal cost of an operator since it is
- // present even if it doesn't process any input.
- // TODO: calibrate the cost individually for each operator
- static constexpr double kStartupCost = 0.000001;
-
- // TODO: collection scan should depend on the width of the doc.
- // TODO: the actual measured cost is (0.4 * ms), however we increase it here because currently
- // it is not possible to estimate the cost of a collection scan vs a full index scan.
- static constexpr double kScanIncrementalCost = 0.6 * ms;
-
- // TODO: cost(N fields) ~ (0.55 + 0.025 * N)
- static constexpr double kIndexScanIncrementalCost = 0.5 * ms;
-
- // TODO: cost(N fields) ~ 0.7 + 0.19 * N
- static constexpr double kSeekCost = 2.0 * ms;
-
- // TODO: take the expression into account.
- // cost(N conditions) = 0.2 + N * ???
- static constexpr double kFilterIncrementalCost = 0.2 * ms;
- // TODO: the cost of projection depends on number of fields: cost(N fields) ~ 0.1 + 0.2 * N
- static constexpr double kEvalIncrementalCost = 2.0 * ms;
-
- // TODO: cost(N fields) ~ 0.04 + 0.03*(N^2)
- static constexpr double kGroupByIncrementalCost = 0.07 * ms;
- static constexpr double kUnwindIncrementalCost = 0.03 * ms; // TODO: not yet calibrated
- // TODO: not yet calibrated, should be at least as expensive as a filter
- static constexpr double kBinaryJoinIncrementalCost = 0.2 * ms;
- static constexpr double kHashJoinIncrementalCost = 0.05 * ms; // TODO: not yet calibrated
- static constexpr double kMergeJoinIncrementalCost = 0.02 * ms; // TODO: not yet calibrated
-
- static constexpr double kUniqueIncrementalCost = 0.7 * ms;
-
- // TODO: implement collation cost that depends on number and size of sorted fields
- // Based on a mix of int and str(64) fields:
- // 1 sort field: sort_cost(N) = 1.0/10 * N * log(N)
- // 5 sort fields: sort_cost(N) = 2.5/10 * N * log(N)
- // 10 sort fields: sort_cost(N) = 3.0/10 * N * log(N)
- // field_cost_coeff(F) ~ 0.75 + 0.2 * F
- static constexpr double kCollationIncrementalCost = 2.5 * ms; // 5 fields avg
- static constexpr double kCollationWithLimitIncrementalCost =
- 1.0 * ms; // TODO: not yet calibrated
-
- static constexpr double kUnionIncrementalCost = 0.02 * ms;
-
- static constexpr double kExchangeIncrementalCost = 0.1 * ms; // TODO: not yet calibrated
-
public:
CostAndCEInternal operator()(const ABT& /*n*/, const PhysicalScanNode& /*node*/) {
// Default estimate for scan.
- const double collectionScanCost =
- kStartupCost + kScanIncrementalCost * _cardinalityEstimate;
+ const double collectionScanCost = _coefficients.getStartupCost() +
+ _coefficients.getScanIncrementalCost() * _cardinalityEstimate;
return {collectionScanCost, _cardinalityEstimate};
}
CostAndCEInternal operator()(const ABT& /*n*/, const CoScanNode& /*node*/) {
// Assumed to be free.
- return {kStartupCost, _cardinalityEstimate};
+ return {_coefficients.getStartupCost(), _cardinalityEstimate};
}
CostAndCEInternal operator()(const ABT& /*n*/, const IndexScanNode& node) {
- const double indexScanCost =
- kStartupCost + kIndexScanIncrementalCost * _cardinalityEstimate;
+ const double indexScanCost = _coefficients.getStartupCost() +
+ _coefficients.getIndexScanIncrementalCost() * _cardinalityEstimate;
return {indexScanCost, _cardinalityEstimate};
}
@@ -116,7 +75,8 @@ public:
// SeekNode should deliver one result via cardinality estimate override.
// TODO: consider using node.getProjectionMap()._fieldProjections.size() to make the cost
// dependent on the size of the projection
- const double seekCost = kStartupCost + kSeekCost * _cardinalityEstimate;
+ const double seekCost =
+ _coefficients.getStartupCost() + _coefficients.getSeekCost() * _cardinalityEstimate;
return {seekCost, _cardinalityEstimate};
}
@@ -158,7 +118,8 @@ public:
double filterCost = childResult._cost;
if (!isTrivialExpr<EvalFilter>(node.getFilter())) {
// Non-trivial filter.
- filterCost += kStartupCost + kFilterIncrementalCost * childResult._ce;
+ filterCost += _coefficients.getStartupCost() +
+ _coefficients.getFilterIncrementalCost() * childResult._ce;
}
return {filterCost, _cardinalityEstimate};
}
@@ -168,7 +129,8 @@ public:
double evalCost = childResult._cost;
if (!isTrivialExpr<EvalPath>(node.getProjection())) {
// Non-trivial projection.
- evalCost += kStartupCost + kEvalIncrementalCost * _cardinalityEstimate;
+ evalCost += _coefficients.getStartupCost() +
+ _coefficients.getEvalIncrementalCost() * _cardinalityEstimate;
}
return {evalCost, _cardinalityEstimate};
}
@@ -176,8 +138,9 @@ public:
CostAndCEInternal operator()(const ABT& /*n*/, const BinaryJoinNode& node) {
CostAndCEInternal leftChildResult = deriveChild(node.getLeftChild(), 0);
CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1);
- const double joinCost = kStartupCost +
- kBinaryJoinIncrementalCost * (leftChildResult._ce + rightChildResult._ce) +
+ const double joinCost = _coefficients.getStartupCost() +
+ _coefficients.getBinaryJoinIncrementalCost() *
+ (leftChildResult._ce + rightChildResult._ce) +
leftChildResult._cost + rightChildResult._cost;
return {joinCost, _cardinalityEstimate};
}
@@ -187,8 +150,9 @@ public:
CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1);
// TODO: distinguish build side and probe side.
- const double hashJoinCost = kStartupCost +
- kHashJoinIncrementalCost * (leftChildResult._ce + rightChildResult._ce) +
+ const double hashJoinCost = _coefficients.getStartupCost() +
+ _coefficients.getHashJoinIncrementalCost() *
+ (leftChildResult._ce + rightChildResult._ce) +
leftChildResult._cost + rightChildResult._cost;
return {hashJoinCost, _cardinalityEstimate};
}
@@ -197,8 +161,9 @@ public:
CostAndCEInternal leftChildResult = deriveChild(node.getLeftChild(), 0);
CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1);
- const double mergeJoinCost = kStartupCost +
- kMergeJoinIncrementalCost * (leftChildResult._ce + rightChildResult._ce) +
+ const double mergeJoinCost = _coefficients.getStartupCost() +
+ _coefficients.getMergeJoinIncrementalCost() *
+ (leftChildResult._ce + rightChildResult._ce) +
leftChildResult._cost + rightChildResult._cost;
return {mergeJoinCost, _cardinalityEstimate};
@@ -213,12 +178,12 @@ public:
return {childResult._cost, _cardinalityEstimate};
}
- double totalCost = kStartupCost;
+ double totalCost = _coefficients.getStartupCost();
// The cost is the sum of the costs of its children and the cost to union each child.
for (size_t childIdx = 0; childIdx < children.size(); childIdx++) {
CostAndCEInternal childResult = deriveChild(children[childIdx], childIdx);
- const double childCost =
- childResult._cost + (childIdx > 0 ? kUnionIncrementalCost * childResult._ce : 0);
+ const double childCost = childResult._cost +
+ (childIdx > 0 ? _coefficients.getUnionIncrementalCost() * childResult._ce : 0);
totalCost += childCost;
}
return {totalCost, _cardinalityEstimate};
@@ -226,14 +191,15 @@ public:
CostAndCEInternal operator()(const ABT& /*n*/, const GroupByNode& node) {
CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
- double groupByCost = kStartupCost;
+ double groupByCost = _coefficients.getStartupCost();
// TODO: for now pretend global group by is free.
if (node.getType() == GroupNodeType::Global) {
groupByCost += childResult._cost;
} else {
// TODO: consider RepetitionEstimate since this is a stateful operation.
- groupByCost += kGroupByIncrementalCost * childResult._ce + childResult._cost;
+ groupByCost +=
+ _coefficients.getGroupByIncrementalCost() * childResult._ce + childResult._cost;
}
return {groupByCost, _cardinalityEstimate};
}
@@ -241,14 +207,15 @@ public:
CostAndCEInternal operator()(const ABT& /*n*/, const UnwindNode& node) {
CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
// Unwind probably depends mostly on its output size.
- const double unwindCost = kUnwindIncrementalCost * _cardinalityEstimate + childResult._cost;
+ const double unwindCost =
+ _coefficients.getUnwindIncrementalCost() * _cardinalityEstimate + childResult._cost;
return {unwindCost, _cardinalityEstimate};
}
CostAndCEInternal operator()(const ABT& /*n*/, const UniqueNode& node) {
CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
- const double uniqueCost =
- kStartupCost + kUniqueIncrementalCost * childResult._ce + childResult._cost;
+ const double uniqueCost = _coefficients.getStartupCost() +
+ _coefficients.getUniqueIncrementalCost() * childResult._ce + childResult._cost;
return {uniqueCost, _cardinalityEstimate};
}
@@ -257,18 +224,18 @@ public:
// TODO: consider RepetitionEstimate since this is a stateful operation.
double logFactor = childResult._ce;
- double incrConst = kCollationIncrementalCost;
+ double incrConst = _coefficients.getCollationIncrementalCost();
if (hasProperty<LimitSkipRequirement>(_physProps)) {
if (auto limit = getPropertyConst<LimitSkipRequirement>(_physProps).getAbsoluteLimit();
limit < logFactor) {
logFactor = limit;
- incrConst = kCollationWithLimitIncrementalCost;
+ incrConst = _coefficients.getCollationWithLimitIncrementalCost();
}
}
// Notice that log2(x) < 0 for any x < 1, and log2(1) = 0. Generally it makes sense that
// there is no cost to sort 1 document, so the only cost left is the startup cost.
- const double sortCost = kStartupCost + childResult._cost +
+ const double sortCost = _coefficients.getStartupCost() + childResult._cost +
((logFactor <= 1.0)
? 0.0
// TODO: The cost formula below is based on 1 field, mix of int and str. Instead we
@@ -280,13 +247,14 @@ public:
CostAndCEInternal operator()(const ABT& /*n*/, const LimitSkipNode& node) {
// Assumed to be free.
CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
- const double limitCost = kStartupCost + childResult._cost;
+ const double limitCost = _coefficients.getStartupCost() + childResult._cost;
return {limitCost, _cardinalityEstimate};
}
CostAndCEInternal operator()(const ABT& /*n*/, const ExchangeNode& node) {
CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
- double localCost = kStartupCost + kExchangeIncrementalCost * _cardinalityEstimate;
+ double localCost = _coefficients.getStartupCost() +
+ _coefficients.getExchangeIncrementalCost() * _cardinalityEstimate;
switch (node.getProperty().getDistributionAndProjections()._type) {
case DistributionType::Replicated:
@@ -323,9 +291,10 @@ public:
const PhysProps& physProps,
const ABT::reference_type physNodeRef,
const ChildPropsType& childProps,
- const NodeCEMap& nodeCEMap) {
- CostAndCEInternal result =
- deriveInternal(metadata, memo, physProps, physNodeRef, childProps, nodeCEMap);
+ const NodeCEMap& nodeCEMap,
+ const CostModelCoefficients& coefficients) {
+ CostAndCEInternal result = deriveInternal(
+ metadata, memo, physProps, physNodeRef, childProps, nodeCEMap, coefficients);
switch (getPropertyConst<DistributionRequirement>(physProps)
.getDistributionAndProjections()
@@ -354,13 +323,15 @@ private:
const CEType ce,
const PhysProps& physProps,
const ChildPropsType& childProps,
- const NodeCEMap& nodeCEMap)
+ const NodeCEMap& nodeCEMap,
+ const CostModelCoefficients& coefficients)
: _metadata(metadata),
_memo(memo),
_physProps(physProps),
_cardinalityEstimate(getAdjustedCE(ce, _physProps)),
_childProps(childProps),
- _nodeCEMap(nodeCEMap) {}
+ _nodeCEMap(nodeCEMap),
+ _coefficients(coefficients) {}
template <class T>
static bool isTrivialExpr(const ABT& n) {
@@ -379,7 +350,8 @@ private:
const PhysProps& physProps,
const ABT::reference_type physNodeRef,
const ChildPropsType& childProps,
- const NodeCEMap& nodeCEMap) {
+ const NodeCEMap& nodeCEMap,
+ const CostModelCoefficients& coefficients) {
auto it = nodeCEMap.find(physNodeRef.cast<Node>());
bool found = (it != nodeCEMap.cend());
uassert(8423330,
@@ -387,14 +359,15 @@ private:
found || physNodeRef.is<MemoLogicalDelegatorNode>());
const CEType ce = (found ? it->second : 0.0);
- CostDerivation instance(metadata, memo, ce, physProps, childProps, nodeCEMap);
+ CostDerivation instance(metadata, memo, ce, physProps, childProps, nodeCEMap, coefficients);
CostAndCEInternal costCEestimates = physNodeRef.visit(instance);
return costCEestimates;
}
CostAndCEInternal deriveChild(const ABT& child, const size_t childIndex) {
PhysProps physProps = _childProps.empty() ? _physProps : _childProps.at(childIndex).second;
- return deriveInternal(_metadata, _memo, physProps, child.ref(), {}, _nodeCEMap);
+ return deriveInternal(
+ _metadata, _memo, physProps, child.ref(), {}, _nodeCEMap, _coefficients);
}
static CEType getAdjustedCE(CEType baseCE, const PhysProps& physProps) {
@@ -431,7 +404,61 @@ private:
const CEType _cardinalityEstimate;
const ChildPropsType& _childProps;
const NodeCEMap& _nodeCEMap;
+ const CostModelCoefficients& _coefficients;
};
+} // namespace
+
+void initializeCoefficients(CostModelCoefficients& coefficients) {
+ // These cost should reflect estimated aggregated execution time in milliseconds.
+ constexpr double ms = 1.0e-3;
+
+ // Startup cost of an operator. This is the minimal cost of an operator since it is
+ // present even if it doesn't process any input.
+ // TODO: calibrate the cost individually for each operator
+ coefficients.setStartupCost(0.000001);
+
+ // TODO: collection scan should depend on the width of the doc.
+ // TODO: the actual measured cost is (0.4 * ms), however we increase it here because currently
+ // it is not possible to estimate the cost of a collection scan vs a full index scan.
+ coefficients.setScanIncrementalCost(0.6 * ms);
+
+ // TODO: cost(N fields) ~ (0.55 + 0.025 * N)
+ coefficients.setIndexScanIncrementalCost(0.5 * ms);
+
+ // TODO: cost(N fields) ~ 0.7 + 0.19 * N
+ coefficients.setSeekCost(2.0 * ms);
+
+ // TODO: take the expression into account.
+ // cost(N conditions) = 0.2 + N * ???
+ coefficients.setFilterIncrementalCost(0.2 * ms);
+ // TODO: the cost of projection depends on number of fields: cost(N fields) ~ 0.1 + 0.2 * N
+ coefficients.setEvalIncrementalCost(2.0 * ms);
+
+ // TODO: cost(N fields) ~ 0.04 + 0.03*(N^2)
+ coefficients.setGroupByIncrementalCost(0.07 * ms);
+ coefficients.setUnwindIncrementalCost(0.03 * ms); // TODO: not yet calibrated
+ // TODO: not yet calibrated, should be at least as expensive as a filter
+ coefficients.setBinaryJoinIncrementalCost(0.2 * ms);
+ coefficients.setHashJoinIncrementalCost(0.05 * ms); // TODO: not yet calibrated
+ coefficients.setMergeJoinIncrementalCost(0.02 * ms); // TODO: not yet calibrated
+
+ coefficients.setUniqueIncrementalCost(0.7 * ms);
+
+ // TODO: implement collation cost that depends on number and size of sorted fields
+ // Based on a mix of int and str(64) fields:
+ // 1 sort field: sort_cost(N) = 1.0/10 * N * log(N)
+ // 5 sort fields: sort_cost(N) = 2.5/10 * N * log(N)
+ // 10 sort fields: sort_cost(N) = 3.0/10 * N * log(N)
+ // field_cost_coeff(F) ~ 0.75 + 0.2 * F
+ coefficients.setCollationIncrementalCost(2.5 * ms); // 5 fields avg
+ coefficients.setCollationWithLimitIncrementalCost(1.0 * ms); // TODO: not yet calibrated
+
+ coefficients.setUnionIncrementalCost(0.02 * ms);
+
+ coefficients.setExchangeIncrementalCost(0.1 * ms); // TODO: not yet calibrated
+}
+
+DefaultCosting::DefaultCosting() : _coefficients{defaultCoefficicients} {}
CostAndCE DefaultCosting::deriveCost(const Metadata& metadata,
const Memo& memo,
@@ -439,8 +466,8 @@ CostAndCE DefaultCosting::deriveCost(const Metadata& metadata,
const ABT::reference_type physNodeRef,
const ChildPropsType& childProps,
const NodeCEMap& nodeCEMap) const {
- const CostAndCEInternal result =
- CostDerivation::derive(metadata, memo, physProps, physNodeRef, childProps, nodeCEMap);
+ const CostAndCEInternal result = CostDerivation::derive(
+ metadata, memo, physProps, physNodeRef, childProps, nodeCEMap, _coefficients);
return {CostType::fromDouble(result._cost), result._ce};
}
diff --git a/src/mongo/db/query/optimizer/cascades/cost_derivation.h b/src/mongo/db/query/optimizer/cascades/cost_derivation.h
index b71a020316e..2bdebd4323b 100644
--- a/src/mongo/db/query/optimizer/cascades/cost_derivation.h
+++ b/src/mongo/db/query/optimizer/cascades/cost_derivation.h
@@ -29,22 +29,35 @@
#pragma once
+#include "mongo/db/query/optimizer/cascades/cost_model_gen.h"
#include "mongo/db/query/optimizer/cascades/interfaces.h"
#include "mongo/db/query/optimizer/cascades/memo.h"
namespace mongo::optimizer::cascades {
/**
+ * Populate given cost model coefficients object with default values.
+ */
+void initializeCoefficients(CostModelCoefficients& coefficients);
+
+/**
* Default costing for physical nodes with logical delegator (not-yet-optimized) inputs.
*/
class DefaultCosting : public CostingInterface {
public:
+ DefaultCosting();
+ DefaultCosting(CostModelCoefficients coefficicients)
+ : _coefficients{std::move(coefficicients)} {}
+
CostAndCE deriveCost(const Metadata& metadata,
const Memo& memo,
const properties::PhysProps& physProps,
ABT::reference_type physNodeRef,
const ChildPropsType& childProps,
const NodeCEMap& nodeCEMap) const override final;
+
+private:
+ const CostModelCoefficients _coefficients;
};
} // namespace mongo::optimizer::cascades
diff --git a/src/mongo/db/query/optimizer/cascades/cost_model.idl b/src/mongo/db/query/optimizer/cascades/cost_model.idl
new file mode 100644
index 00000000000..06f6de9ce56
--- /dev/null
+++ b/src/mongo/db/query/optimizer/cascades/cost_model.idl
@@ -0,0 +1,103 @@
+#Copyright(C) 2022 - present MongoDB, Inc.
+#
+#This program is free software : you can redistribute it and / or modify
+#it under the terms of the Server Side Public License, version 1,
+#as published by MongoDB, Inc.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
+#Server Side Public License for more details.
+#
+#You should have received a copy of the Server Side Public License
+#along with this program.If not, see
+# <http: //www.mongodb.com/licensing/server-side-public-license>.
+#Ands
+#As a special exception, the copyright holders give permission to link the
+#code of portions of this program with the OpenSSL library under certain
+#conditions as described in each individual source file and distribute
+#linked combinations including the program with the OpenSSL library.You
+#must comply with the Server Side Public License in all respects for
+#all of the code used other than as permitted herein.If you modify file(s)
+#with this exception, you may extend this exception to your version of the
+#file(s), but you are not obligated to do so.If you do not wish to do so,
+#delete this exception statement from your version.If you delete this
+#exception statement from all source files in the program, then also delete
+#it in the license file.
+#
+
+global:
+ cpp_namespace: "mongo::optimizer::cascades"
+
+imports:
+ - "mongo/db/basic_types.idl"
+
+structs:
+ CostModelCoefficients:
+ description: "A type representing Cost Model coefficients."
+ strict: true
+ fields:
+ startupCost:
+ optional: false
+ type: double
+ stability: stable
+ scanIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ indexScanIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ seekCost:
+ optional: false
+ type: double
+ stability: stable
+ filterIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ evalIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ groupByIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ unwindIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ binaryJoinIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ hashJoinIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ mergeJoinIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ uniqueIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ collationIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ collationWithLimitIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ unionIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
+ exchangeIncrementalCost:
+ optional: false
+ type: double
+ stability: stable
diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl
index 06af67ae3e0..20fb75b122d 100644
--- a/src/mongo/db/query/query_knobs.idl
+++ b/src/mongo/db/query/query_knobs.idl
@@ -913,6 +913,13 @@ server_parameters:
default:
expr: false
+ internalCostModelCoefficients:
+ description: "Cost Model Coefficients Override"
+ set_at: [ startup, runtime ]
+ cpp_varname: "internalCostModelCoefficients"
+ cpp_vartype: "std::string"
+ on_update: plan_cache_util::clearSbeCacheOnParameterChange
+
# TODO SERVER-68341 Remove this query knob after tenancy is supported in the sharded cluster.
internalChangeStreamUseTenantIdForTesting:
description: "If true, then change streams will operate upon an internal tenant id for testing