diff options
Diffstat (limited to 'src/mongo/db/query/plan_ranker.cpp')
-rw-r--r-- | src/mongo/db/query/plan_ranker.cpp | 344 |
1 files changed, 97 insertions, 247 deletions
diff --git a/src/mongo/db/query/plan_ranker.cpp b/src/mongo/db/query/plan_ranker.cpp index e81612cf7e7..fa47db3f5e5 100644 --- a/src/mongo/db/query/plan_ranker.cpp +++ b/src/mongo/db/query/plan_ranker.cpp @@ -31,279 +31,129 @@ #include "mongo/platform/basic.h" -#include <algorithm> -#include <cmath> -#include <utility> -#include <vector> - #include "mongo/db/query/plan_ranker.h" -#include "mongo/db/exec/plan_stage.h" -#include "mongo/db/exec/working_set.h" -#include "mongo/db/query/explain.h" -#include "mongo/db/query/query_knobs_gen.h" -#include "mongo/db/query/query_solution.h" -#include "mongo/db/server_options.h" #include "mongo/logv2/log.h" -namespace { - -/** - * Comparator for (scores, candidateIndex) in pickBestPlan(). - */ -bool scoreComparator(const std::pair<double, size_t>& lhs, const std::pair<double, size_t>& rhs) { - // Just compare score in lhs.first and rhs.first; - // Ignore candidate array index in lhs.second and rhs.second. - return lhs.first > rhs.first; +namespace mongo::plan_ranker { +namespace log_detail { +void logScoreFormula(std::function<std::string()> formula, + double score, + double baseScore, + double productivity, + double noFetchBonus, + double noSortBonus, + double noIxisectBonus, + double tieBreakers) { + LOGV2_DEBUG(20961, 2, "{sb_str}", "sb_str"_attr = [&]() { + StringBuilder sb; + sb << "score(" << str::convertDoubleToString(score) << ") = baseScore(" + << str::convertDoubleToString(baseScore) << ")" + << " + productivity(" << formula() << " = " << str::convertDoubleToString(productivity) + << ")" + << " + tieBreakers(" << str::convertDoubleToString(noFetchBonus) << " noFetchBonus + " + << str::convertDoubleToString(noSortBonus) << " noSortBonus + " + << str::convertDoubleToString(noIxisectBonus) + << " noIxisectBonus = " << str::convertDoubleToString(tieBreakers) << ")"; + return sb.str(); + }()); } -} // namespace - -namespace mongo { - -using std::endl; -using std::vector; - -// static -StatusWith<std::unique_ptr<PlanRankingDecision>> PlanRanker::pickBestPlan( - const vector<CandidatePlan>& candidates) { - invariant(!candidates.empty()); - // A plan that hits EOF is automatically scored above - // its peers. If multiple plans hit EOF during the same - // set of round-robin calls to work(), then all such plans - // receive the bonus. - double eofBonus = 1.0; - - // Each plan will have a stat tree. - std::vector<std::unique_ptr<PlanStageStats>> statTrees; - - // Get stat trees from each plan. - // Copy stats trees instead of transferring ownership - // because multi plan runner will need its own stats - // trees for explain. - for (size_t i = 0; i < candidates.size(); ++i) { - statTrees.push_back(candidates[i].root->getStats()); - } - - // Holds (score, candidateInndex). - // Used to derive scores and candidate ordering. - vector<std::pair<double, size_t>> scoresAndCandidateindices; - vector<size_t> failed; +void logScoreBoost(double score) { + LOGV2_DEBUG(20962, + 5, + "Score boosted to {newScore} due to intersection forcing", + "Score boosted due to intersection forcing", + "newScore"_attr = score); +} - // Compute score for each tree. Record the best. - for (size_t i = 0; i < statTrees.size(); ++i) { - if (!candidates[i].failed) { - LOGV2_DEBUG( - 20956, +void logScoringPlan(std::function<std::string()> solution, + std::function<std::string()> explain, + std::function<std::string()> planSummary, + size_t planIndex, + bool isEOF) { + LOGV2_DEBUG(20956, 5, "Scoring plan {planIndex}:\n{querySolution}Stats:\n{stats}", "Scoring plan", - "planIndex"_attr = i, - "querySolution"_attr = redact(candidates[i].solution->toString()), - "stats"_attr = redact( - Explain::statsToBSON(*statTrees[i]).jsonString(ExtendedRelaxedV2_0_0, true))); - LOGV2_DEBUG(20957, - 2, - "Scoring query plan: {planSummary} planHitEOF={planHitEOF}", - "Scoring query plan", - "planSummary"_attr = Explain::getPlanSummary(candidates[i].root), - "planHitEOF"_attr = statTrees[i]->common.isEOF); - - double score = scoreTree(statTrees[i].get()); - LOGV2_DEBUG( - 20958, 5, "Basic plan score: {score}", "Basic plan score", "score"_attr = score); - if (statTrees[i]->common.isEOF) { - LOGV2_DEBUG(20959, - 5, - "Adding +{eofBonus} EOF bonus to score", - "Adding EOF bonus to score", - "eofBonus"_attr = eofBonus); - score += 1; - } - - scoresAndCandidateindices.push_back(std::make_pair(score, i)); - } else { - failed.push_back(i); - LOGV2_DEBUG(20960, - 2, - "Not scoring plan: {planSummary} because the plan failed", - "Not scoring a plan because the plan failed", - "planSummary"_attr = Explain::getPlanSummary(candidates[i].root)); - } - } - - // If there isn't a viable plan we should error. - if (scoresAndCandidateindices.size() == 0U) { - return {ErrorCodes::Error(31157), - "No viable plan was found because all candidate plans failed."}; - } - - // Sort (scores, candidateIndex). Get best child and populate candidate ordering. - std::stable_sort( - scoresAndCandidateindices.begin(), scoresAndCandidateindices.end(), scoreComparator); - - auto why = std::make_unique<PlanRankingDecision>(); - - // Determine whether plans tied for the win. - if (scoresAndCandidateindices.size() > 1U) { - double bestScore = scoresAndCandidateindices[0].first; - double runnerUpScore = scoresAndCandidateindices[1].first; - const double epsilon = 1e-10; - why->tieForBest = std::abs(bestScore - runnerUpScore) < epsilon; - } - - // Update results in 'why' - // Stats and scores in 'why' are sorted in descending order by score. - why->stats.clear(); - why->scores.clear(); - why->candidateOrder.clear(); - why->failedCandidates = std::move(failed); - for (size_t i = 0; i < scoresAndCandidateindices.size(); ++i) { - double score = scoresAndCandidateindices[i].first; - size_t candidateIndex = scoresAndCandidateindices[i].second; - - // We shouldn't cache the scores with the EOF bonus included, - // as this is just a tie-breaking measure for plan selection. - // Plans not run through the multi plan runner will not receive - // the bonus. - // - // An example of a bad thing that could happen if we stored scores - // with the EOF bonus included: - // - // Let's say Plan A hits EOF, is the highest ranking plan, and gets - // cached as such. On subsequent runs it will not receive the bonus. - // Eventually the plan cache feedback mechanism will evict the cache - // entry---the scores will appear to have fallen due to the missing - // EOF bonus. - // - // This begs the question, why don't we include the EOF bonus in - // scoring of cached plans as well? The problem here is that the cached - // plan runner always runs plans to completion before scoring. Queries - // that don't get the bonus in the multi plan runner might get the bonus - // after being run from the plan cache. - if (statTrees[candidateIndex]->common.isEOF) { - score -= eofBonus; - } - - why->stats.push_back(std::move(statTrees[candidateIndex])); - why->scores.push_back(score); - why->candidateOrder.push_back(candidateIndex); - } - for (auto& i : why->failedCandidates) { - why->stats.push_back(std::move(statTrees[i])); - } - - return StatusWith<std::unique_ptr<PlanRankingDecision>>(std::move(why)); + "planIndex"_attr = planIndex, + "querySolution"_attr = [solution]() { return redact(solution()); }(), + "stats"_attr = [explain]() { return redact(explain()); }()); + LOGV2_DEBUG(20957, + 2, + "Scoring query plan: {planSummary} planHitEOF={planHitEOF}", + "Scoring query plan", + "planSummary"_attr = [planSummary]() { return planSummary(); }(), + "planHitEOF"_attr = isEOF); } -// TODO: Move this out. This is a signal for ranking but will become its own complicated -// stats-collecting beast. -double computeSelectivity(const PlanStageStats* stats) { - if (STAGE_IXSCAN == stats->stageType) { - IndexScanStats* iss = static_cast<IndexScanStats*>(stats->specific.get()); - return iss->keyPattern.nFields(); - } else { - double sum = 0; - for (size_t i = 0; i < stats->children.size(); ++i) { - sum += computeSelectivity(stats->children[i].get()); - } - return sum; - } +void logScore(double score) { + LOGV2_DEBUG(20958, 5, "Basic plan score: {score}", "Basic plan score", "score"_attr = score); } -bool hasStage(const StageType type, const PlanStageStats* stats) { - if (type == stats->stageType) { - return true; - } - for (size_t i = 0; i < stats->children.size(); ++i) { - if (hasStage(type, stats->children[i].get())) { - return true; - } - } - return false; +void logEOFBonus(double eofBonus) { + LOGV2_DEBUG(20959, + 5, + "Adding +{eofBonus} EOF bonus to score", + "Adding EOF bonus to score", + "eofBonus"_attr = eofBonus); } -// static -double PlanRanker::scoreTree(const PlanStageStats* stats) { - // We start all scores at 1. Our "no plan selected" score is 0 and we want all plans to - // be greater than that. - double baseScore = 1; - - // How many "units of work" did the plan perform. Each call to work(...) - // counts as one unit. - size_t workUnits = stats->common.works; - invariant(workUnits != 0); - - // How much did a plan produce? - // Range: [0, 1] - double productivity = - static_cast<double>(stats->common.advanced) / static_cast<double>(workUnits); - - // Just enough to break a tie. Must be small enough to ensure that a more productive - // plan doesn't lose to a less productive plan due to tie breaking. - const double epsilon = std::min(1.0 / static_cast<double>(10 * workUnits), 1e-4); +void logFailedPlan(std::function<std::string()> planSummary) { + LOGV2_DEBUG(20960, + 2, + "Not scoring plan: {planSummary} because the plan failed", + "Not scoring a plan because the plan failed", + "planSummary"_attr = [&]() { return planSummary(); }()); +} +} // namespace log_detail - // We prefer queries that don't require a fetch stage. - double noFetchBonus = epsilon; - if (hasStage(STAGE_FETCH, stats)) { - noFetchBonus = 0; +namespace { +/** + * A plan scorer for the classic plan stage tree. Defines the plan productivity as a number + * of intermediate results returned, or advanced, by the root stage, divided by the "unit of works" + * which the plan performed. Each call to work(...) counts as one unit. + */ +class DefaultPlanScorer final : public PlanScorer<PlanStageStats> { +protected: + double calculateProductivity(const PlanStageStats* stats) const final { + invariant(stats->common.works != 0); + return static_cast<double>(stats->common.advanced) / + static_cast<double>(stats->common.works); } - // In the case of ties, prefer solutions without a blocking sort - // to solutions with a blocking sort. - double noSortBonus = epsilon; - if (hasStage(STAGE_SORT_DEFAULT, stats) || hasStage(STAGE_SORT_SIMPLE, stats)) { - noSortBonus = 0; + std::string getProductivityFormula(const PlanStageStats* stats) const { + StringBuilder sb; + sb << "(" << stats->common.advanced << " advanced)/(" << stats->common.works << " works)"; + return sb.str(); } - // In the case of ties, prefer single index solutions to ixisect. Index - // intersection solutions are often slower than single-index solutions - // because they require examining a superset of index keys that would be - // examined by a single index scan. - // - // On the other hand, index intersection solutions examine the same - // number or fewer of documents. In the case that index intersection - // allows us to examine fewer documents, the penalty given to ixisect - // can be made up via the no fetch bonus. - double noIxisectBonus = epsilon; - if (hasStage(STAGE_AND_HASH, stats) || hasStage(STAGE_AND_SORTED, stats)) { - noIxisectBonus = 0; + double getNumberOfAdvances(const PlanStageStats* stats) const final { + return stats->common.works; } - double tieBreakers = noFetchBonus + noSortBonus + noIxisectBonus; - double score = baseScore + productivity + tieBreakers; + bool hasStage(StageType type, const PlanStageStats* root) const final { + std::queue<const PlanStageStats*> remaining; + remaining.push(root); - if (shouldLog(logv2::LogSeverity::Debug(2))) { - StringBuilder sb; - sb << "baseScore(" << str::convertDoubleToString(baseScore) << ")" - << " + productivity((" << stats->common.advanced << " advanced)/(" << stats->common.works - << " works) = " << str::convertDoubleToString(productivity) << ")" - << " + tieBreakers(" << str::convertDoubleToString(noFetchBonus) << " noFetchBonus + " - << str::convertDoubleToString(noSortBonus) << " noSortBonus + " - << str::convertDoubleToString(noIxisectBonus) - << " noIxisectBonus = " << str::convertDoubleToString(tieBreakers) << ")"; - LOGV2_DEBUG(20961, - 2, - "score({score}) = {calculation}", - "Plan score calculation", - "score"_attr = score, - "calculation"_attr = sb.str()); - } + while (!remaining.empty()) { + auto stats = remaining.front(); + remaining.pop(); + + if (stats->stageType == type) { + return true; + } - if (internalQueryForceIntersectionPlans.load()) { - if (hasStage(STAGE_AND_HASH, stats) || hasStage(STAGE_AND_SORTED, stats)) { - // The boost should be >2.001 to make absolutely sure the ixisect plan will win due - // to the combination of 1) productivity, 2) eof bonus, and 3) no ixisect bonus. - score += 3; - LOGV2_DEBUG(20962, - 5, - "Score boosted to {newScore} due to intersection forcing", - "Score boosted due to intersection forcing", - "newScore"_attr = score); + for (auto&& child : stats->children) { + remaining.push(child.get()); + } } + return false; } +}; +} // namespace - return score; +std::unique_ptr<PlanScorer<PlanStageStats>> makePlanScorer() { + return std::make_unique<DefaultPlanScorer>(); } - -} // namespace mongo +} // namespace mongo::plan_ranker |