From 1f0ae1c45604798888f8b2425475a473d7265bd5 Mon Sep 17 00:00:00 2001 From: Ian Boros Date: Fri, 4 Oct 2019 15:38:42 +0000 Subject: SERVER-43202 propagate errors from interrupts during yielding --- src/mongo/base/error_codes.err | 2 + src/mongo/db/exec/cached_plan.cpp | 2 +- src/mongo/db/exec/subplan.cpp | 29 ++++++----- src/mongo/db/pipeline/dependencies.cpp | 5 ++ src/mongo/db/pipeline/pipeline_d.cpp | 85 +++++++++++++++++++++----------- src/mongo/db/query/explain.cpp | 7 +-- src/mongo/db/query/get_executor.cpp | 2 +- src/mongo/db/query/plan_yield_policy.cpp | 5 ++ src/mongo/db/query/query_planner.cpp | 33 +++++++------ 9 files changed, 105 insertions(+), 65 deletions(-) (limited to 'src/mongo') diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index 43b1656368b..377aa7649b7 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -290,6 +290,8 @@ error_code("TransactionCoordinatorDeadlineTaskCanceled", 287) error_code("ChecksumMismatch", 288) # Internal only code used by WaitForMajorityService. error_code("WaitForMajorityServiceEarlierOpTimeAvailable", 289) +# 290 is TransactionExceededLifetimeLimitSeconds, but not used. +error_code("NoQueryExecutionPlans", 291) # Error codes 4000-8999 are reserved. diff --git a/src/mongo/db/exec/cached_plan.cpp b/src/mongo/db/exec/cached_plan.cpp index e426a0ad340..f47a253669b 100644 --- a/src/mongo/db/exec/cached_plan.cpp +++ b/src/mongo/db/exec/cached_plan.cpp @@ -208,7 +208,7 @@ Status CachedPlanStage::replan(PlanYieldPolicy* yieldPolicy, bool shouldCache) { // We cannot figure out how to answer the query. Perhaps it requires an index // we do not have? if (0 == solutions.size()) { - return Status(ErrorCodes::BadValue, + return Status(ErrorCodes::NoQueryExecutionPlans, str::stream() << "error processing query: " << _canonicalQuery->toString() << " No query solutions"); } diff --git a/src/mongo/db/exec/subplan.cpp b/src/mongo/db/exec/subplan.cpp index 168fc342d3d..b5d33755761 100644 --- a/src/mongo/db/exec/subplan.cpp +++ b/src/mongo/db/exec/subplan.cpp @@ -196,13 +196,13 @@ Status tagOrChildAccordingToCache(PlanCacheIndexTree* compositeCacheData, // For example, we don't cache things for 2d indices. str::stream ss; ss << "No cache data for subchild " << orChild->debugString(); - return Status(ErrorCodes::BadValue, ss); + return Status(ErrorCodes::NoQueryExecutionPlans, ss); } if (SolutionCacheData::USE_INDEX_TAGS_SOLN != branchCacheData->solnType) { str::stream ss; ss << "No indexed cache data for subchild " << orChild->debugString(); - return Status(ErrorCodes::BadValue, ss); + return Status(ErrorCodes::NoQueryExecutionPlans, ss); } // Add the index assignments to our original query. @@ -212,7 +212,7 @@ Status tagOrChildAccordingToCache(PlanCacheIndexTree* compositeCacheData, if (!tagStatus.isOK()) { str::stream ss; ss << "Failed to extract indices from subchild " << orChild->debugString(); - return Status(ErrorCodes::BadValue, ss); + return tagStatus.withContext(ss); } // Add the child's cache data to the cache data we're creating for the main query. @@ -293,7 +293,7 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) { str::stream ss; ss << "Failed to pick best plan for subchild " << branchResult->canonicalQuery->toString(); - return Status(ErrorCodes::BadValue, ss); + return Status(ErrorCodes::NoQueryExecutionPlans, ss); } QuerySolution* bestSoln = multiPlanStage->bestSolution(); @@ -303,13 +303,13 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) { if (NULL == bestSoln->cacheData.get()) { str::stream ss; ss << "No cache data for subchild " << orChild->debugString(); - return Status(ErrorCodes::BadValue, ss); + return Status(ErrorCodes::NoQueryExecutionPlans, ss); } if (SolutionCacheData::USE_INDEX_TAGS_SOLN != bestSoln->cacheData->solnType) { str::stream ss; ss << "No indexed cache data for subchild " << orChild->debugString(); - return Status(ErrorCodes::BadValue, ss); + return Status(ErrorCodes::NoQueryExecutionPlans, ss); } // Add the index assignments to our original query. @@ -319,7 +319,7 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) { if (!tagStatus.isOK()) { str::stream ss; ss << "Failed to extract indices from subchild " << orChild->debugString(); - return Status(ErrorCodes::BadValue, ss); + return tagStatus.withContext(ss); } cacheData->children.push_back(bestSoln->cacheData->tree->clone()); @@ -336,7 +336,7 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) { if (!solnRoot) { str::stream ss; ss << "Failed to build indexed data path for subplanned query\n"; - return Status(ErrorCodes::BadValue, ss); + return Status(ErrorCodes::NoQueryExecutionPlans, ss); } LOG(5) << "Subplanner: fully tagged tree is " << redact(solnRoot->toString()); @@ -348,7 +348,7 @@ Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) { if (NULL == _compositeSolution.get()) { str::stream ss; ss << "Failed to analyze subplanned query"; - return Status(ErrorCodes::BadValue, ss); + return Status(ErrorCodes::NoQueryExecutionPlans, ss); } LOG(5) << "Subplanner: Composite solution is " << redact(_compositeSolution->toString()); @@ -382,7 +382,7 @@ Status SubplanStage::choosePlanWholeQuery(PlanYieldPolicy* yieldPolicy) { // We cannot figure out how to answer the query. Perhaps it requires an index // we do not have? if (0 == solutions.size()) { - return Status(ErrorCodes::BadValue, + return Status(ErrorCodes::NoQueryExecutionPlans, str::stream() << "error processing query: " << _query->toString() << " No query solutions"); } @@ -453,11 +453,10 @@ Status SubplanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) { // the overall winning plan from the resulting index tags. Status subplanSelectStat = choosePlanForSubqueries(yieldPolicy); if (!subplanSelectStat.isOK()) { - if (subplanSelectStat == ErrorCodes::QueryPlanKilled || - subplanSelectStat == ErrorCodes::MaxTimeMSExpired) { - // Query planning cannot continue if the plan for one of the subqueries was killed - // because the collection or a candidate index may have been dropped, or if we've - // exceeded the operation's time limit. + if (subplanSelectStat != ErrorCodes::NoQueryExecutionPlans) { + // Query planning can continue if we failed to find a solution for one of the + // children. Otherwise, it cannot, as it may no longer be safe to access the collection + // (and index may have been dropped, we may have exceeded the time limit, etc). return subplanSelectStat; } return choosePlanWholeQuery(yieldPolicy); diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp index 1586a68f96b..8c92cd5c751 100644 --- a/src/mongo/db/pipeline/dependencies.cpp +++ b/src/mongo/db/pipeline/dependencies.cpp @@ -102,6 +102,11 @@ BSONObj DepsTracker::toProjection() const { } last = field + '.'; + + // We should only have dependencies on fields that are valid in aggregation. Create a + // FieldPath to check this. + FieldPath fieldPath(field); + bb.append(field, 1); } diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 4a4f4b7ea14..69540a8b8bd 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -96,6 +96,39 @@ using std::unique_ptr; using write_ops::Insert; namespace { + +/** + * Return whether the given sort spec can be used in a find() sort. + */ +bool canSortBePushedDown(const BSONObj& sortSpec) { + // Return whether or not a sort stage can be pushed into the query layer. + for (auto&& elem : sortSpec) { + if (BSONType::Object != elem.type()) { + continue; + } + + BSONObj subObj = elem.embeddedObject(); + if (subObj.nFields() != 1) { + continue; + } + + BSONElement firstElem = subObj.firstElement(); + if (firstElem.fieldNameStringData() == "$meta" && firstElem.type() == BSONType::String) { + // Indeed it's a $meta. + + // Technically sorting by {$meta: "textScore"} can be done in find() but requires a + // corresponding projection, so for simplicity we don't support it. + if (firstElem.valueStringData() == "textScore" || + firstElem.valueStringData() == "randVal") { + return false; + } + } + } + + return true; +} + + /** * Returns a PlanExecutor which uses a random cursor to sample documents if successful. Returns {} * if the storage engine doesn't support random cursors, or if 'sampleSize' is a large enough @@ -233,7 +266,7 @@ StatusWith> attemptToGetExe return distinctExecutor.getStatus().withContext( "Unable to use distinct scan to optimize $group stage"); } else if (!distinctExecutor.getValue()) { - return {ErrorCodes::OperationFailed, + return {ErrorCodes::NoQueryExecutionPlans, "Unable to use distinct scan to optimize $group stage"}; } else { return distinctExecutor; @@ -680,17 +713,16 @@ StatusWith> PipelineD::prep pipeline->addInitialSource(groupTransform); return swExecutorGrouped; - } else if (swExecutorGrouped == ErrorCodes::QueryPlanKilled) { - return {ErrorCodes::OperationFailed, - str::stream() << "Failed to determine whether query system can provide a " - "DISTINCT_SCAN grouping: " - << swExecutorGrouped.getStatus().toString()}; + } else if (swExecutorGrouped != ErrorCodes::NoQueryExecutionPlans) { + return swExecutorGrouped.getStatus().withContext( + "Failed to determine whether query system can provide a " + "DISTINCT_SCAN grouping"); } } const BSONObj emptyProjection; - const BSONObj metaSortProjection = BSON("$meta" - << "sortKey"); + const BSONObj metaSortProjection = BSON("$sortKey" << BSON("$meta" + << "sortKey")); // The only way to get meta information (e.g. the text score) is to let the query system handle // the projection. In all other cases, unless the query system can do an index-covered @@ -700,7 +732,7 @@ StatusWith> PipelineD::prep plannerOpts |= QueryPlannerParams::NO_UNCOVERED_PROJECTIONS; } - if (sortStage) { + if (sortStage && canSortBePushedDown(*sortObj)) { // See if the query system can provide a non-blocking sort. auto swExecutorSort = attemptToGetExecutor(opCtx, @@ -736,11 +768,11 @@ StatusWith> PipelineD::prep if (swExecutorSortAndProj.isOK()) { // Success! We have a non-blocking sort and a covered projection. exec = std::move(swExecutorSortAndProj.getValue()); - } else if (swExecutorSortAndProj == ErrorCodes::QueryPlanKilled) { - return {ErrorCodes::OperationFailed, - str::stream() << "Failed to determine whether query system can provide a " - "covered projection in addition to a non-blocking sort: " - << swExecutorSortAndProj.getStatus().toString()}; + } else if (swExecutorSortAndProj != ErrorCodes::NoQueryExecutionPlans) { + + return swExecutorSortAndProj.getStatus().withContext( + "Failed to determine whether query system can provide a " + "covered projection in addition to a non-blocking sort"); } else { // The query system couldn't cover the projection. *projectionObj = BSONObj(); @@ -755,20 +787,15 @@ StatusWith> PipelineD::prep pipeline->_sources.push_front(sortStage->getLimitSrc()); } return std::move(exec); - } else if (swExecutorSort == ErrorCodes::QueryPlanKilled) { - return { - ErrorCodes::OperationFailed, - str::stream() - << "Failed to determine whether query system can provide a non-blocking sort: " - << swExecutorSort.getStatus().toString()}; + } else if (swExecutorSort != ErrorCodes::NoQueryExecutionPlans) { + return swExecutorSort.getStatus().withContext( + "Failed to determine whether query system can provide a non-blocking sort"); } - // The query system can't provide a non-blocking sort. - *sortObj = BSONObj(); } - // Either there was no $sort stage, or the query system could not provide a non-blocking - // sort. - dassert(sortObj->isEmpty()); + // Either there's no sort or the query system can't provide a non-blocking sort. + *sortObj = BSONObj(); + *projectionObj = removeSortKeyMetaProjection(*projectionObj); const auto metadataRequired = deps.getAllRequiredMetadataTypes(); if (metadataRequired.size() == 1 && @@ -796,11 +823,9 @@ StatusWith> PipelineD::prep if (swExecutorProj.isOK()) { // Success! We have a covered projection. return std::move(swExecutorProj.getValue()); - } else if (swExecutorProj == ErrorCodes::QueryPlanKilled) { - return {ErrorCodes::OperationFailed, - str::stream() - << "Failed to determine whether query system can provide a covered projection: " - << swExecutorProj.getStatus().toString()}; + } else if (swExecutorProj != ErrorCodes::NoQueryExecutionPlans) { + return swExecutorProj.getStatus().withContext( + "Failed to determine whether query system can provide a covered projection"); } // The query system couldn't provide a covered or simple uncovered projection. diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp index 977b7fdb1a1..f39f959b545 100644 --- a/src/mongo/db/query/explain.cpp +++ b/src/mongo/db/query/explain.cpp @@ -882,9 +882,10 @@ void Explain::explainStages(PlanExecutor* exec, if (verbosity >= ExplainOptions::Verbosity::kExecStats) { executePlanStatus = exec->executePlan(); - // If executing the query failed because it was killed, then the collection may no longer be - // valid. We indicate this by setting our collection pointer to null. - if (executePlanStatus == ErrorCodes::QueryPlanKilled) { + // If executing the query failed, for any number of reasons other than a planning failure, + // then the collection may no longer be valid. We conservatively set our collection pointer + // to null in case it is invalid. + if (executePlanStatus != ErrorCodes::NoQueryExecutionPlans) { collection = nullptr; } } diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index ba1ad4a1e04..1c1f4f3387a 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -517,7 +517,7 @@ StatusWith prepareExecution(OperationContext* opCtx, // We cannot figure out how to answer the query. Perhaps it requires an index // we do not have? if (0 == solutions.size()) { - return Status(ErrorCodes::BadValue, + return Status(ErrorCodes::NoQueryExecutionPlans, str::stream() << "error processing query: " << canonicalQuery->toString() << " No query solutions"); } diff --git a/src/mongo/db/query/plan_yield_policy.cpp b/src/mongo/db/query/plan_yield_policy.cpp index b86d3f437f4..534dec9d710 100644 --- a/src/mongo/db/query/plan_yield_policy.cpp +++ b/src/mongo/db/query/plan_yield_policy.cpp @@ -188,6 +188,11 @@ void PlanYieldPolicy::_yieldAllLocks(OperationContext* opCtx, if (ns.empty() || ns == planExecNS.ns()) { MONGO_FAIL_POINT_PAUSE_WHILE_SET(setYieldAllLocksHang); } + + if (config.getData().getField("checkForInterruptAfterHang").trueValue()) { + // Throws. + opCtx->checkForInterrupt(); + } } MONGO_FAIL_POINT_BLOCK(setYieldAllLocksWait, customWait) { diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 54c6a0b9fb0..3e40c5a4024 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -373,11 +373,11 @@ StatusWith> QueryPlanner::cacheDataFromTagge Status QueryPlanner::tagAccordingToCache(MatchExpression* filter, const PlanCacheIndexTree* const indexTree, const map& indexMap) { - if (NULL == filter) { - return Status(ErrorCodes::BadValue, "Cannot tag tree: filter is NULL."); + if (nullptr == filter) { + return Status(ErrorCodes::NoQueryExecutionPlans, "Cannot tag tree: filter is NULL."); } - if (NULL == indexTree) { - return Status(ErrorCodes::BadValue, "Cannot tag tree: indexTree is NULL."); + if (nullptr == indexTree) { + return Status(ErrorCodes::NoQueryExecutionPlans, "Cannot tag tree: indexTree is NULL."); } // We're tagging the tree here, so it shouldn't have @@ -389,7 +389,7 @@ Status QueryPlanner::tagAccordingToCache(MatchExpression* filter, ss << "Cache topology and query did not match: " << "query has " << filter->numChildren() << " children " << "and cache has " << indexTree->children.size() << " children."; - return Status(ErrorCodes::BadValue, ss); + return Status(ErrorCodes::NoQueryExecutionPlans, ss); } // Continue the depth-first tree traversal. @@ -406,7 +406,7 @@ Status QueryPlanner::tagAccordingToCache(MatchExpression* filter, for (const auto& orPushdown : indexTree->orPushdowns) { auto index = indexMap.find(orPushdown.indexEntryId); if (index == indexMap.end()) { - return Status(ErrorCodes::BadValue, + return Status(ErrorCodes::NoQueryExecutionPlans, str::stream() << "Did not find index: " << orPushdown.indexEntryId); } OrPushdownTag::Destination dest; @@ -422,7 +422,7 @@ Status QueryPlanner::tagAccordingToCache(MatchExpression* filter, if (got == indexMap.end()) { str::stream ss; ss << "Did not find index with name: " << indexTree->entry->identifier.catalogName; - return Status(ErrorCodes::BadValue, ss); + return Status(ErrorCodes::NoQueryExecutionPlans, ss); } if (filter->getTag()) { OrPushdownTag* orPushdownTag = static_cast(filter->getTag()); @@ -454,7 +454,7 @@ StatusWith> QueryPlanner::planFromCache( auto soln = buildWholeIXSoln( *winnerCacheData.tree->entry, query, params, winnerCacheData.wholeIXSolnDir); if (!soln) { - return Status(ErrorCodes::BadValue, + return Status(ErrorCodes::NoQueryExecutionPlans, "plan cache error: soln that uses index to provide sort"); } else { return {std::move(soln)}; @@ -464,7 +464,8 @@ StatusWith> QueryPlanner::planFromCache( // with tailable==true, hence the false below. auto soln = buildCollscanSoln(query, false, params); if (!soln) { - return Status(ErrorCodes::BadValue, "plan cache error: collection scan soln"); + return Status(ErrorCodes::NoQueryExecutionPlans, + "plan cache error: collection scan soln"); } else { return {std::move(soln)}; } @@ -512,14 +513,14 @@ StatusWith> QueryPlanner::planFromCache( query, std::move(clone), expandedIndexes, params)); if (!solnRoot) { - return Status(ErrorCodes::BadValue, + return Status(ErrorCodes::NoQueryExecutionPlans, str::stream() << "Failed to create data access plan from cache. Query: " << query.toStringShort()); } auto soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(solnRoot)); if (!soln) { - return Status(ErrorCodes::BadValue, + return Status(ErrorCodes::NoQueryExecutionPlans, str::stream() << "Failed to analyze plan from cache. Query: " << query.toStringShort()); } @@ -726,7 +727,8 @@ StatusWith>> QueryPlanner::plan( LOG(5) << "Unable to find index for $geoNear query."; // Don't leave tags on query tree. query.root()->resetTag(); - return Status(ErrorCodes::BadValue, "unable to find index for $geoNear query"); + return Status(ErrorCodes::NoQueryExecutionPlans, + "unable to find index for $geoNear query"); } LOG(5) << "Rated tree after geonear processing:" << redact(query.root()->debugString()); @@ -749,14 +751,15 @@ StatusWith>> QueryPlanner::plan( if (textIndexCount != 1) { // Don't leave tags on query tree. query.root()->resetTag(); - return Status(ErrorCodes::BadValue, "need exactly one text index for $text query"); + return Status(ErrorCodes::NoQueryExecutionPlans, + "need exactly one text index for $text query"); } // Error if the text node is tagged with zero indices. if (0 == tag->first.size() && 0 == tag->notFirst.size()) { // Don't leave tags on query tree. query.root()->resetTag(); - return Status(ErrorCodes::BadValue, + return Status(ErrorCodes::NoQueryExecutionPlans, "failed to use text index to satisfy $text query (if text index is " "compound, are equality predicates given for all prefix fields?)"); } @@ -833,7 +836,7 @@ StatusWith>> QueryPlanner::plan( MatchExpression* root = query.root(); for (size_t i = 0; i < root->numChildren(); ++i) { if (textNode == root->getChild(i)) { - return Status(ErrorCodes::BadValue, + return Status(ErrorCodes::NoQueryExecutionPlans, "Failed to produce a solution for TEXT under OR - " "other non-TEXT clauses under OR have to be indexed as well."); } -- cgit v1.2.1