From cfcdc9b9b84c3d9afcbb804513bbdbd54b77b7db Mon Sep 17 00:00:00 2001 From: Ian Boros Date: Thu, 10 Oct 2019 19:24:59 +0000 Subject: SERVER-42560 Simplify passing of metadata between DocumentSource and PlanStage --- src/mongo/db/pipeline/pipeline_d.cpp | 75 ++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 42 deletions(-) (limited to 'src/mongo/db/pipeline/pipeline_d.cpp') diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index c327187ecd3..a523066d2a8 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -193,6 +193,7 @@ StatusWith> attemptToGetExe const intrusive_ptr& pExpCtx, BSONObj queryObj, BSONObj projectionObj, + const QueryMetadataBitSet& metadataRequested, BSONObj sortObj, boost::optional groupIdForDistinctScan, const AggregationRequest* aggRequest, @@ -232,6 +233,9 @@ StatusWith> attemptToGetExe return {cq.getStatus()}; } + // Mark the metadata that's requested by the pipeline on the CQ. + cq.getValue()->requestAdditionalMetadata(metadataRequested); + if (groupIdForDistinctScan) { // When the pipeline includes a $group that groups by a single field // (groupIdForDistinctScan), we use getExecutorDistinct() to attempt to get an executor that @@ -266,13 +270,6 @@ StatusWith> attemptToGetExe return getExecutorFind(opCtx, collection, std::move(cq.getValue()), permitYield, plannerOpts); } -BSONObj removeSortKeyMetaProjection(BSONObj projectionObj) { - if (!projectionObj[Document::metaFieldSortKey]) { - return projectionObj; - } - return projectionObj.removeField(Document::metaFieldSortKey); -} - /** * Examines the indexes in 'collection' and returns the field name of a geo-indexed field suitable * for use in $geoNear. 2d indexes are given priority over 2dsphere indexes. @@ -366,7 +363,7 @@ PipelineD::buildInnerQueryExecutor(Collection* collection, // TODO SERVER-37453 this should no longer be necessary when we no don't need locks // to destroy a PlanExecutor. - auto deps = pipeline->getDependencies(DepsTracker::MetadataAvailable::kNoMetadata); + auto deps = pipeline->getDependencies(DepsTracker::kNoMetadata); auto attachExecutorCallback = [deps](Collection* collection, std::unique_ptr exec, @@ -479,10 +476,10 @@ PipelineD::buildInnerQueryExecutorGeneric(Collection* collection, // Find the set of fields in the source documents depended on by this pipeline. DepsTracker deps = pipeline->getDependencies(DocumentSourceMatch::isTextQuery(queryObj) - ? DepsTracker::MetadataAvailable::kTextScore - : DepsTracker::MetadataAvailable::kNoMetadata); + ? DepsTracker::kOnlyTextScore + : DepsTracker::kNoMetadata); - BSONObj projForQuery = deps.toProjection(); + BSONObj projForQuery = deps.toProjectionWithoutMetadata(); boost::intrusive_ptr sortStage; boost::intrusive_ptr groupStage; @@ -536,8 +533,7 @@ PipelineD::buildInnerQueryExecutorGeneric(Collection* collection, Pipeline* pipeline) { auto cursor = DocumentSourceCursor::create( collection, std::move(exec), pipeline->getContext(), trackOplogTS); - addCursorSource( - pipeline, std::move(cursor), std::move(deps), queryObj, sortObj, projForQuery); + addCursorSource(pipeline, std::move(cursor), std::move(deps), queryObj, sortObj); }; return std::make_pair(std::move(attachExecutorCallback), std::move(exec)); } @@ -557,7 +553,7 @@ PipelineD::buildInnerQueryExecutorGeoNear(Collection* collection, const auto geoNearStage = dynamic_cast(sources.front().get()); invariant(geoNearStage); - auto deps = pipeline->getDependencies(DepsTracker::kAllGeoNearDataAvailable); + auto deps = pipeline->getDependencies(DepsTracker::kAllGeoNearData); // If the user specified a "key" field, use that field to satisfy the "near" query. Otherwise, // look for a geo-indexed field in 'collection' that can. @@ -569,7 +565,7 @@ PipelineD::buildInnerQueryExecutorGeoNear(Collection* collection, // Create a PlanExecutor whose query is the "near" predicate on 'nearFieldName' combined with // the optional "query" argument in the $geoNear stage. BSONObj fullQuery = geoNearStage->asNearQuery(nearFieldName); - BSONObj proj = deps.toProjection(); + BSONObj proj = deps.toProjectionWithoutMetadata(); BSONObj sortFromQuerySystem; auto exec = uassertStatusOK(prepareExecutor(expCtx->opCtx, collection, @@ -627,7 +623,7 @@ StatusWith> PipelineD::prep // The query system has the potential to use an index to provide a non-blocking sort and/or to // use the projection to generate a covered plan. If this is possible, it is more efficient to // let the query system handle those parts of the pipeline. If not, it is more efficient to use - // a $sort and/or a ParsedDeps object. Thus, we will determine whether the query system can + // a $sort and/or a $project. Thus, we will determine whether the query system can // provide a non-blocking sort or a covered projection before we commit to a PlanExecutor. // // To determine if the query system can provide a non-blocking sort, we pass the @@ -674,6 +670,7 @@ StatusWith> PipelineD::prep expCtx, queryObj, *projectionObj, + deps.metadataDeps(), sortObj ? *sortObj : emptySort, rewrittenGroupStage->groupId(), aggRequest, @@ -711,16 +708,17 @@ StatusWith> PipelineD::prep const BSONObj metaSortProjection = BSON("$sortKey" << BSON("$meta" << "sortKey")); - // The only way to get meta information (e.g. the text score) is to let the query system handle - // the projection. In all other cases, unless the query system can do an index-covered - // projection and avoid going to the raw record at all, it is faster to have ParsedDeps filter - // the fields we need. + // TODO SERVER-42905: It should be possible to push down all eligible projections to the query + // layer. This code assumes that metadata is passed from the query layer to the DocumentSource + // layer via a projection, which is no longer true. if (!deps.getNeedsAnyMetadata()) { plannerOpts |= QueryPlannerParams::NO_UNCOVERED_PROJECTIONS; } SortPattern userSortPattern(*sortObj, expCtx); if (sortStage && canSortBePushedDown(userSortPattern)) { + QueryMetadataBitSet needsSortKey; + needsSortKey.set(DocumentMetadataFields::MetaType::kSortKey); // See if the query system can provide a non-blocking sort. auto swExecutorSort = attemptToGetExecutor(opCtx, @@ -728,7 +726,8 @@ StatusWith> PipelineD::prep nss, expCtx, queryObj, - expCtx->needsMerge ? metaSortProjection : emptyProjection, + BSONObj(), // empty projection + expCtx->needsMerge ? needsSortKey : DepsTracker::kNoMetadata, *sortObj, boost::none, /* groupIdForDistinctScan */ aggRequest, @@ -744,6 +743,7 @@ StatusWith> PipelineD::prep expCtx, queryObj, *projectionObj, + deps.metadataDeps(), *sortObj, boost::none, /* groupIdForDistinctScan */ aggRequest, @@ -780,13 +780,15 @@ StatusWith> PipelineD::prep } } - // Either there's no sort or the query system can't provide a non-blocking sort. + // Either there was no $sort stage, or the query system could not provide a non-blocking + // sort. *sortObj = BSONObj(); - *projectionObj = removeSortKeyMetaProjection(*projectionObj); - const auto metadataRequired = deps.getAllRequiredMetadataTypes(); - if (metadataRequired.size() == 1 && - metadataRequired.front() == DepsTracker::MetadataType::SORT_KEY) { + // Since the DocumentSource layer will perform the sort, remove any dependencies we have on the + // query layer for a sort key. + QueryMetadataBitSet metadataDepsWithoutSortKey = deps.metadataDeps(); + metadataDepsWithoutSortKey[DocumentMetadataFields::kSortKey] = false; + if (!metadataDepsWithoutSortKey.any()) { // A sort key requirement would have prevented us from being able to add this parameter // before, but now we know the query system won't cover the sort, so we will be able to // compute the sort key ourselves during the $sort stage, and thus don't need a query @@ -801,6 +803,7 @@ StatusWith> PipelineD::prep expCtx, queryObj, *projectionObj, + metadataDepsWithoutSortKey, *sortObj, boost::none, /* groupIdForDistinctScan */ aggRequest, @@ -814,7 +817,8 @@ StatusWith> PipelineD::prep "Failed to determine whether query system can provide a covered projection"); } - // The query system couldn't provide a covered or simple uncovered projection. + // The query system couldn't provide a covered or simple uncovered projection. Do no projections + // and request no metadata from the query layer. *projectionObj = BSONObj(); // If this doesn't work, nothing will. return attemptToGetExecutor(opCtx, @@ -823,6 +827,7 @@ StatusWith> PipelineD::prep expCtx, queryObj, *projectionObj, + DepsTracker::kNoMetadata, *sortObj, boost::none, /* groupIdForDistinctScan */ aggRequest, @@ -834,8 +839,7 @@ void PipelineD::addCursorSource(Pipeline* pipeline, boost::intrusive_ptr cursor, DepsTracker deps, const BSONObj& queryObj, - const BSONObj& sortObj, - const BSONObj& projectionObj) { + const BSONObj& sortObj) { // Add the cursor to the pipeline first so that it's correctly disposed of as part of the // pipeline if an exception is thrown during this method. pipeline->addInitialSource(cursor); @@ -845,19 +849,6 @@ void PipelineD::addCursorSource(Pipeline* pipeline, if (deps.hasNoRequirements()) { cursor->shouldProduceEmptyDocs(); } - - if (!projectionObj.isEmpty()) { - cursor->setProjection(projectionObj, boost::none, deps.getNeedsAnyMetadata()); - } else { - // There may be fewer dependencies now if the sort was covered. - if (!sortObj.isEmpty()) { - deps = pipeline->getDependencies(DocumentSourceMatch::isTextQuery(queryObj) - ? DepsTracker::MetadataAvailable::kTextScore - : DepsTracker::MetadataAvailable::kNoMetadata); - } - - cursor->setProjection(deps.toProjection(), deps.toParsedDeps(), deps.getNeedsAnyMetadata()); - } } Timestamp PipelineD::getLatestOplogTimestamp(const Pipeline* pipeline) { -- cgit v1.2.1