SERVER-40089 $group optimized with DISTINCT_SCAN cannot use $$ROOT

The getExecutorDistinct() function is responsible for both creating an executor for the distinct command and creating an executor for a $group that has been optimized with a DISTINCT_SCAN (see commit da63195). These two scenarios have different requirements for their projection, and getExecutorDistinct() distinguished the two by assuming any caller with an empty ({}) projection wanted the distinct command projection. However, a $first accumulator with $$ROOT requires the entire document, so the logic that builds an optimized $group executor generates an empty projection for this case as well. When that happens, getExecutorDistinct() mistakenly chooses the projection that the distinct command wants, and when the pipeline evaluates $$ROOT, it only gets to see a small subset of fields in the document. This patch modifies getExecutorDistinct() so that the caller must explicitly state what projection it wants. That means that the distinct command no longer passes an empty projection to indicate that it wants to project on just the distinct field. Instead, the distinct command computes the projection for the distinct field on its own and includes that projection in the ParsedDistinct object that it passes to getExecutorDistinct().
author: Justin Seyster <justin.seyster@mongodb.com> 2019-03-27 19:01:18 -0400
committer: Justin Seyster <justin.seyster@mongodb.com> 2019-03-28 17:17:27 -0400
commit: e73da48e26048cb5ca2120acadac2d9c2c8ee403 (patch)
tree: 1a562334c2cf2bbfc7629f9723439d564c20e847 /src/mongo/db/query
parent: b885fa6feb7da00dc367e917c53ba16a41b75af4 (diff)
download: mongo-e73da48e26048cb5ca2120acadac2d9c2c8ee403.tar.gz
4 files changed, 244 insertions, 251 deletions
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 9c9d9132811..62517e5a0da 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -1184,74 +1184,6 @@ bool getDistinctNodeIndex(const std::vector<IndexEntry>& indices,
     return minFields != std::numeric_limits<int>::max();
 }
 
-/**
- * Checks dotted field for a projection and truncates the
- * field name if we could be projecting on an array element.
- * Sets 'isIDOut' to true if the projection is on a sub document of _id.
- * For example, _id.a.2, _id.b.c.
- */
-std::string getProjectedDottedField(const std::string& field, bool* isIDOut) {
-    // Check if field contains an array index.
-    std::vector<std::string> res;
-    mongo::splitStringDelim(field, &res, '.');
-
-    // Since we could exit early from the loop,
-    // we should check _id here and set '*isIDOut' accordingly.
-    *isIDOut = ("_id" == res[0]);
-
-    // Skip the first dotted component. If the field starts
-    // with a number, the number cannot be an array index.
-    int arrayIndex = 0;
-    for (size_t i = 1; i < res.size(); ++i) {
-        if (mongo::parseNumberFromStringWithBase(res[i], 10, &arrayIndex).isOK()) {
-            // Array indices cannot be negative numbers (this is not $slice).
-            // Negative numbers are allowed as field names.
-            if (arrayIndex >= 0) {
-                // Generate prefix of field up to (but not including) array index.
-                std::vector<std::string> prefixStrings(res);
-                prefixStrings.resize(i);
-                // Reset projectedField. Instead of overwriting, joinStringDelim() appends joined
-                // string
-                // to the end of projectedField.
-                std::string projectedField;
-                mongo::joinStringDelim(prefixStrings, &projectedField, '.');
-                return projectedField;
-            }
-        }
-    }
-
-    return field;
-}
-
-/**
- * Creates a projection spec for a distinct command from the requested field.
- * In most cases, the projection spec will be {_id: 0, key: 1}.
- * The exceptions are:
- * 1) When the requested field is '_id', the projection spec will {_id: 1}.
- * 2) When the requested field could be an array element (eg. a.0),
- *    the projected field will be the prefix of the field up to the array element.
- *    For example, a.b.2 => {_id: 0, 'a.b': 1}
- *    Note that we can't use a $slice projection because the distinct command filters
- *    the results from the executor using the dotted field name. Using $slice will
- *    re-order the documents in the array in the results.
- */
-BSONObj getDistinctProjection(const std::string& field) {
-    std::string projectedField(field);
-
-    bool isID = false;
-    if ("_id" == field) {
-        isID = true;
-    } else if (str::contains(field, '.')) {
-        projectedField = getProjectedDottedField(field, &isID);
-    }
-    BSONObjBuilder bob;
-    if (!isID) {
-        bob.append("_id", 0);
-    }
-    bob.append(projectedField, 1);
-    return bob.obj();
-}
-
 }  // namespace
 
 StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorCount(
@@ -1351,38 +1283,46 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
                                   bool strictDistinctOnly) {
     QuerySolutionNode* root = soln->root.get();
 
-    // Root stage must be a project.
+    // We can attempt to convert a plan if it follows one of these patterns (starting from the
+    // root):
+    //   1. PROJECT=>FETCH=>IXSCAN
+    //   2. FETCH=>IXSCAN
+    //   3. PROJECT=>IXSCAN
+    QuerySolutionNode* projectNode = nullptr;
+    IndexScanNode* indexScanNode = nullptr;
+    FetchNode* fetchNode = nullptr;
+
     switch (root->getType()) {
-        default:
-            return false;
         case STAGE_PROJECTION_DEFAULT:
         case STAGE_PROJECTION_COVERED:
-        case STAGE_PROJECTION_SIMPLE:;
+        case STAGE_PROJECTION_SIMPLE:
+            projectNode = root;
+            break;
+        case STAGE_FETCH:
+            fetchNode = static_cast<FetchNode*>(root);
+            break;
+        default:
+            return false;
     }
 
-    // Child should be either an ixscan or fetch.
-    if (STAGE_IXSCAN != root->children[0]->getType() &&
-        STAGE_FETCH != root->children[0]->getType()) {
-        return false;
+    if (!fetchNode && (STAGE_FETCH == root->children[0]->getType())) {
+        fetchNode = static_cast<FetchNode*>(root->children[0]);
     }
 
-    IndexScanNode* indexScanNode = nullptr;
-    FetchNode* fetchNode = nullptr;
-    if (STAGE_IXSCAN == root->children[0]->getType()) {
-        indexScanNode = static_cast<IndexScanNode*>(root->children[0]);
-    } else {
-        fetchNode = static_cast<FetchNode*>(root->children[0]);
-        // If the fetch has a filter, we're out of luck. We can't skip all keys with a given value,
-        // since one of them may key a document that passes the filter.
-        if (fetchNode->filter) {
-            return false;
-        }
+    if (fetchNode && (STAGE_IXSCAN == fetchNode->children[0]->getType())) {
+        indexScanNode = static_cast<IndexScanNode*>(fetchNode->children[0]);
+    } else if (projectNode && (STAGE_IXSCAN == projectNode->children[0]->getType())) {
+        indexScanNode = static_cast<IndexScanNode*>(projectNode->children[0]);
+    }
 
-        if (STAGE_IXSCAN != fetchNode->children[0]->getType()) {
-            return false;
-        }
+    if (!indexScanNode) {
+        return false;
+    }
 
-        indexScanNode = static_cast<IndexScanNode*>(fetchNode->children[0]);
+    // If the fetch has a filter, we're out of luck. We can't skip all keys with a given value,
+    // since one of them may key a document that passes the filter.
+    if (fetchNode && fetchNode->filter) {
+        return false;
     }
 
     if (indexScanNode->index.type == IndexType::INDEX_WILDCARD) {
@@ -1467,39 +1407,33 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
     distinctNode->fieldNo = fieldNo;
 
     if (fetchNode) {
-        // If there is a fetch node, then there is no need for the projection. The fetch node should
-        // become the new root, with the distinct as its child. The PROJECT=>FETCH=>IXSCAN tree
-        // should become FETCH=>DISTINCT_SCAN.
-        switch (root->getType()) {
-            default:
-                MONGO_UNREACHABLE;
-            case STAGE_PROJECTION_DEFAULT:
-            case STAGE_PROJECTION_COVERED:
-            case STAGE_PROJECTION_SIMPLE:;
-        }
-        invariant(STAGE_FETCH == root->children[0]->getType());
-        invariant(STAGE_IXSCAN == root->children[0]->children[0]->getType());
+        // If the original plan had PROJECT and FETCH stages, we can get rid of the PROJECT
+        // transforming the plan from PROJECT=>FETCH=>IXSCAN to FETCH=>DISTINCT_SCAN.
+        if (projectNode) {
+            invariant(projectNode == root);
+            projectNode = nullptr;
 
-        // Detach the fetch from its parent projection.
-        root->children.clear();
+            invariant(STAGE_FETCH == root->children[0]->getType());
+            invariant(STAGE_IXSCAN == root->children[0]->children[0]->getType());
 
-        // Make the fetch the new root. This destroys the project stage.
-        soln->root.reset(fetchNode);
+            // Detach the fetch from its parent projection.
+            root->children.clear();
 
-        // Take ownership of the index scan node, detaching it from the solution tree.
+            // Make the fetch the new root. This destroys the project stage.
+            soln->root.reset(fetchNode);
+        }
+
+        // Whenver we have a FETCH node, the IXSCAN is its child. We detach the IXSCAN from the
+        // solution tree and take ownership of it, so that it gets destroyed when we leave this
+        // scope.
         std::unique_ptr<IndexScanNode> ownedIsn(indexScanNode);
+        indexScanNode = nullptr;
 
         // Attach the distinct node in the index scan's place.
         fetchNode->children[0] = distinctNode.release();
     } else {
         // There is no fetch node. The PROJECT=>IXSCAN tree should become PROJECT=>DISTINCT_SCAN.
-        switch (root->getType()) {
-            default:
-                MONGO_UNREACHABLE;
-            case STAGE_PROJECTION_DEFAULT:
-            case STAGE_PROJECTION_COVERED:
-            case STAGE_PROJECTION_SIMPLE:;
-        }
+        invariant(projectNode == root);
         invariant(STAGE_IXSCAN == root->children[0]->getType());
 
         // Take ownership of the index scan node, detaching it from the solution tree.
@@ -1545,53 +1479,40 @@ QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx,
     return plannerParams;
 }
 
-// Pass this to getExecutorForSimpleDistinct() or getExecutorDistinctFromIndexSolutions()
-// which will either move the query into a newly created executor or leave the executor as nullptr
-// to indicate that no solution was found.
-struct QueryOrExecutor {
-    QueryOrExecutor(unique_ptr<CanonicalQuery> cq) : cq(std::move(cq)) {}
-
-    std::unique_ptr<CanonicalQuery> cq;
-    std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> executor;
-};
-
 /**
  * A simple DISTINCT_SCAN has an empty query and no sort, so we just need to find a suitable index
- * that has the "distinct" field the first component of its key pattern.
+ * that has the "distinct" field as the first component of its key pattern.
  *
- * If a suitable solution is found, this function will create a new executor in
- * queryOrExecutor->executor and move the query into it, leaving queryOrExecutor->cq in a "moved
- * from" state. Otherwise, it will leave queryOrExecutor->cq as is and set queryOrExecutor->executor
- * to nullptr (but still return Status::OK).
+ * If a suitable solution is found, this function will create and return a new executor. In order to
+ * do so, it releases the CanonicalQuery from the 'parsedDistinct' input. If no solution is found,
+ * the return value is StatusOK with a nullptr value, and the 'parsedDistinct' CanonicalQuery
+ * remains valid. This function may also return a failed status code, in which case the caller
+ * should assume that the 'parsedDistinct' CanonicalQuery is no longer valid.
  */
-Status getExecutorForSimpleDistinct(OperationContext* opCtx,
-                                    Collection* collection,
-                                    const QueryPlannerParams& plannerParams,
-                                    PlanExecutor::YieldPolicy yieldPolicy,
-                                    ParsedDistinct* parsedDistinct,
-                                    QueryOrExecutor* queryOrExecutor) {
-    invariant(queryOrExecutor);
-    invariant(queryOrExecutor->cq);
-    invariant(!queryOrExecutor->executor);
+StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorForSimpleDistinct(
+    OperationContext* opCtx,
+    Collection* collection,
+    const QueryPlannerParams& plannerParams,
+    PlanExecutor::YieldPolicy yieldPolicy,
+    ParsedDistinct* parsedDistinct) {
+    invariant(parsedDistinct->getQuery());
+    auto collator = parsedDistinct->getQuery()->getCollator();
 
     // If there's no query, we can just distinct-scan one of the indices. Not every index in
     // plannerParams.indices may be suitable. Refer to getDistinctNodeIndex().
     size_t distinctNodeIndex = 0;
     if (!parsedDistinct->getQuery()->getQueryRequest().getFilter().isEmpty() ||
         !parsedDistinct->getQuery()->getQueryRequest().getSort().isEmpty() ||
-        !getDistinctNodeIndex(plannerParams.indices,
-                              parsedDistinct->getKey(),
-                              queryOrExecutor->cq->getCollator(),
-                              &distinctNodeIndex)) {
+        !getDistinctNodeIndex(
+            plannerParams.indices, parsedDistinct->getKey(), collator, &distinctNodeIndex)) {
         // Not a "simple" DISTINCT_SCAN or no suitable index was found.
-        queryOrExecutor->executor = nullptr;
-        return Status::OK();
+        return {nullptr};
     }
 
     auto dn = stdx::make_unique<DistinctNode>(plannerParams.indices[distinctNodeIndex]);
     dn->direction = 1;
     IndexBoundsBuilder::allValuesBounds(dn->index.keyPattern, &dn->bounds);
-    dn->queryCollator = queryOrExecutor->cq->getCollator();
+    dn->queryCollator = collator;
     dn->fieldNo = 0;
 
     // An index with a non-simple collation requires a FETCH stage.
@@ -1606,52 +1527,47 @@ Status getExecutorForSimpleDistinct(OperationContext* opCtx,
 
     QueryPlannerParams params;
 
-    auto soln =
-        QueryPlannerAnalysis::analyzeDataAccess(*queryOrExecutor->cq, params, std::move(solnRoot));
+    auto soln = QueryPlannerAnalysis::analyzeDataAccess(
+        *parsedDistinct->getQuery(), params, std::move(solnRoot));
     invariant(soln);
 
     unique_ptr<WorkingSet> ws = make_unique<WorkingSet>();
     PlanStage* rawRoot;
-    verify(StageBuilder::build(opCtx, collection, *queryOrExecutor->cq, *soln, ws.get(), &rawRoot));
+    verify(StageBuilder::build(
+        opCtx, collection, *parsedDistinct->getQuery(), *soln, ws.get(), &rawRoot));
     unique_ptr<PlanStage> root(rawRoot);
 
-    LOG(2) << "Using fast distinct: " << redact(queryOrExecutor->cq->toStringShort())
+    LOG(2) << "Using fast distinct: " << redact(parsedDistinct->getQuery()->toStringShort())
            << ", planSummary: " << Explain::getPlanSummary(root.get());
 
-    auto executor = PlanExecutor::make(opCtx,
-                                       std::move(ws),
-                                       std::move(root),
-                                       std::move(soln),
-                                       std::move(queryOrExecutor->cq),
-                                       collection,
-                                       yieldPolicy);
-
-    if (executor.isOK()) {
-        queryOrExecutor->executor = std::move(executor.getValue());
-        return Status::OK();
-    } else {
-        return executor.getStatus();
-    }
+    return PlanExecutor::make(opCtx,
+                              std::move(ws),
+                              std::move(root),
+                              std::move(soln),
+                              parsedDistinct->releaseQuery(),
+                              collection,
+                              yieldPolicy);
 }
 
 // Checks each solution in the 'solutions' vector to see if one includes an IXSCAN that can be
 // rewritten as a DISTINCT_SCAN, assuming we want distinct scan behavior on the getKey() property of
 // the 'parsedDistinct' argument.
 //
-// If a suitable solution is found, this function will create a new executor in
-// queryOrExecutor->executor and move the query into it, leaving queryOrExecutor->cq in a "moved
-// from" state. Otherwise, it will leave queryOrExecutor->cq as is and set queryOrExecutor->executor
-// to nullptr (but still return Status::OK).
+// If a suitable solution is found, this function will create and return a new executor. In order to
+// do so, it releases the CanonicalQuery from the 'parsedDistinct' input. If no solution is found,
+// the return value is StatusOK with a nullptr value, and the 'parsedDistinct' CanonicalQuery
+// remains valid. This function may also return a failed status code, in which case the caller
+// should assume that the 'parsedDistinct' CanonicalQuery is no longer valid.
 //
 // See the declaration of turnIxscanIntoDistinctIxscan() for an explanation of the
 // 'strictDistinctOnly' parameter.
-Status getExecutorDistinctFromIndexSolutions(OperationContext* opCtx,
-                                             Collection* collection,
-                                             std::vector<std::unique_ptr<QuerySolution>> solutions,
-                                             PlanExecutor::YieldPolicy yieldPolicy,
-                                             ParsedDistinct* parsedDistinct,
-                                             bool strictDistinctOnly,
-                                             QueryOrExecutor* queryOrExecutor) {
+StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>>
+getExecutorDistinctFromIndexSolutions(OperationContext* opCtx,
+                                      Collection* collection,
+                                      std::vector<std::unique_ptr<QuerySolution>> solutions,
+                                      PlanExecutor::YieldPolicy yieldPolicy,
+                                      ParsedDistinct* parsedDistinct,
+                                      bool strictDistinctOnly) {
     // We look for a solution that has an ixscan we can turn into a distinctixscan
     for (size_t i = 0; i < solutions.size(); ++i) {
         if (turnIxscanIntoDistinctIxscan(
@@ -1660,31 +1576,54 @@ Status getExecutorDistinctFromIndexSolutions(OperationContext* opCtx,
             unique_ptr<WorkingSet> ws = make_unique<WorkingSet>();
             unique_ptr<QuerySolution> currentSolution = std::move(solutions[i]);
             PlanStage* rawRoot;
-            verify(StageBuilder::build(
-                opCtx, collection, *queryOrExecutor->cq, *currentSolution, ws.get(), &rawRoot));
+            verify(StageBuilder::build(opCtx,
+                                       collection,
+                                       *parsedDistinct->getQuery(),
+                                       *currentSolution,
+                                       ws.get(),
+                                       &rawRoot));
             unique_ptr<PlanStage> root(rawRoot);
 
-            LOG(2) << "Using fast distinct: " << redact(queryOrExecutor->cq->toStringShort())
+            LOG(2) << "Using fast distinct: " << redact(parsedDistinct->getQuery()->toStringShort())
                    << ", planSummary: " << Explain::getPlanSummary(root.get());
 
-            auto executor = PlanExecutor::make(opCtx,
-                                               std::move(ws),
-                                               std::move(root),
-                                               std::move(currentSolution),
-                                               std::move(queryOrExecutor->cq),
-                                               collection,
-                                               yieldPolicy);
-
-            if (executor.isOK()) {
-                queryOrExecutor->executor = std::move(executor.getValue());
-                return Status::OK();
-            } else {
-                return executor.getStatus();
-            }
+            return PlanExecutor::make(opCtx,
+                                      std::move(ws),
+                                      std::move(root),
+                                      std::move(currentSolution),
+                                      parsedDistinct->releaseQuery(),
+                                      collection,
+                                      yieldPolicy);
         }
     }
 
-    return Status::OK();
+    // Indicate that, although there was no error, we did not find a DISTINCT_SCAN solution.
+    return {nullptr};
+}
+
+/**
+ * Makes a clone of 'cq' but without any projection, then runs getExecutor on the clone.
+ */
+StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorWithoutProjection(
+    OperationContext* opCtx,
+    Collection* collection,
+    const CanonicalQuery* cq,
+    PlanExecutor::YieldPolicy yieldPolicy,
+    size_t plannerOptions) {
+    auto qr = stdx::make_unique<QueryRequest>(cq->getQueryRequest());
+    qr->setProj(BSONObj());
+
+    const boost::intrusive_ptr<ExpressionContext> expCtx;
+    const ExtensionsCallbackReal extensionsCallback(opCtx, &collection->ns());
+    auto cqWithoutProjection =
+        CanonicalQuery::canonicalize(opCtx,
+                                     std::move(qr),
+                                     expCtx,
+                                     extensionsCallback,
+                                     MatchExpressionParser::kAllowAllSpecialFeatures);
+
+    return getExecutor(
+        opCtx, collection, std::move(cqWithoutProjection.getValue()), yieldPolicy, plannerOptions);
 }
 }  // namespace
 
@@ -1723,8 +1662,6 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
     auto plannerParams =
         fillOutPlannerParamsForDistinct(opCtx, collection, plannerOptions, *parsedDistinct);
 
-    const ExtensionsCallbackReal extensionsCallback(opCtx, &collection->ns());
-
     // If there are no suitable indices for the distinct hack bail out now into regular planning
     // with no projection.
     if (plannerParams.indices.empty()) {
@@ -1733,8 +1670,12 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
             // a DISTINCT_SCAN plan.
             return {nullptr};
         } else {
-            return getExecutor(
-                opCtx, collection, parsedDistinct->releaseQuery(), yieldPolicy, plannerOptions);
+            // Note that, when not in STRICT_DISTINCT_ONLY mode, the caller doesn't care about the
+            // projection, only that the planner does not produce a FETCH if it's possible to cover
+            // the fields in the projection. That's definitely not possible in this case, so we
+            // dispense with the projection.
+            return getExecutorWithoutProjection(
+                opCtx, collection, parsedDistinct->getQuery(), yieldPolicy, plannerOptions);
         }
     }
 
@@ -1742,55 +1683,24 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
     // If we're here, we have an index that includes the field we're distinct-ing over.
     //
 
-    auto qr = stdx::make_unique<QueryRequest>(parsedDistinct->getQuery()->getQueryRequest());
-
-    // Applying a projection allows the planner to try to give us covered plans that we can turn
-    // into the projection hack. The getDistinctProjection() function deals with .find() projection
-    // semantics (ie _id:1 being implied by default).
-    if (qr->getProj().isEmpty()) {
-        BSONObj projection = getDistinctProjection(parsedDistinct->getKey());
-        qr->setProj(projection);
-    }
-
-    const boost::intrusive_ptr<ExpressionContext> expCtx;
-    auto statusWithCQ =
-        CanonicalQuery::canonicalize(opCtx,
-                                     std::move(qr),
-                                     expCtx,
-                                     extensionsCallback,
-                                     MatchExpressionParser::kAllowAllSpecialFeatures);
-    if (!statusWithCQ.isOK()) {
-        return statusWithCQ.getStatus();
-    }
-
-    QueryOrExecutor queryOrExecutor(std::move(statusWithCQ.getValue()));
-
-    // If the canonical query does not have a user-specified collation, set it from the collection
-    // default.
-    if (queryOrExecutor.cq->getQueryRequest().getCollation().isEmpty() &&
-        collection->getDefaultCollator()) {
-        queryOrExecutor.cq->setCollator(collection->getDefaultCollator()->clone());
-    }
-
-    auto getExecutorStatus = getExecutorForSimpleDistinct(
-        opCtx, collection, plannerParams, yieldPolicy, parsedDistinct, &queryOrExecutor);
-    if (!getExecutorStatus.isOK()) {
-        return getExecutorStatus;
-    } else if (queryOrExecutor.executor) {
-        return std::move(queryOrExecutor.executor);
+    auto executorWithStatus =
+        getExecutorForSimpleDistinct(opCtx, collection, plannerParams, yieldPolicy, parsedDistinct);
+    if (!executorWithStatus.isOK() || executorWithStatus.getValue()) {
+        // We either got a DISTINCT plan or a fatal error.
+        return executorWithStatus;
     } else {
         // A "simple" DISTINCT plan wasn't possible, but we can try again with the QueryPlanner.
     }
 
     // Ask the QueryPlanner for a list of solutions that scan one of the indexes from
     // fillOutPlannerParamsForDistinct() (i.e., the indexes that include the distinct field).
-    auto statusWithSolutions = QueryPlanner::plan(*queryOrExecutor.cq, plannerParams);
+    auto statusWithSolutions = QueryPlanner::plan(*parsedDistinct->getQuery(), plannerParams);
     if (!statusWithSolutions.isOK()) {
         if (plannerOptions & QueryPlannerParams::STRICT_DISTINCT_ONLY) {
             return {nullptr};
         } else {
             return getExecutor(
-                opCtx, collection, std::move(queryOrExecutor.cq), yieldPolicy, plannerOptions);
+                opCtx, collection, parsedDistinct->releaseQuery(), yieldPolicy, plannerOptions);
         }
     }
     auto solutions = std::move(statusWithSolutions.getValue());
@@ -1799,23 +1709,24 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
     // STRICT_DISTINCT_ONLY flag is not set, we may get a DISTINCT_SCAN plan that filters out some
     // but not all duplicate values of the distinct field, meaning that the output from this
     // executor will still need deduplication.
-    getExecutorStatus = getExecutorDistinctFromIndexSolutions(
+    executorWithStatus = getExecutorDistinctFromIndexSolutions(
         opCtx,
         collection,
         std::move(solutions),
         yieldPolicy,
         parsedDistinct,
-        (plannerOptions & QueryPlannerParams::STRICT_DISTINCT_ONLY),
-        &queryOrExecutor);
-    if (!getExecutorStatus.isOK()) {
-        return getExecutorStatus;
-    } else if (queryOrExecutor.executor) {
-        return std::move(queryOrExecutor.executor);
+        (plannerOptions & QueryPlannerParams::STRICT_DISTINCT_ONLY));
+    if (!executorWithStatus.isOK() || executorWithStatus.getValue()) {
+        // We either got a DISTINCT plan or a fatal error.
+        return executorWithStatus;
     } else if (!(plannerOptions & QueryPlannerParams::STRICT_DISTINCT_ONLY)) {
         // We did not find a solution that we could convert to a DISTINCT_SCAN, so we fall back to
-        // regular planning.
-        return getExecutor(
-            opCtx, collection, parsedDistinct->releaseQuery(), yieldPolicy, plannerOptions);
+        // regular planning. Note that, when not in STRICT_DISTINCT_ONLY mode, the caller doesn't
+        // care about the projection, only that the planner does not produce a FETCH if it's
+        // possible to cover the fields in the projection. That's definitely not possible in this
+        // case, so we dispense with the projection.
+        return getExecutorWithoutProjection(
+            opCtx, collection, parsedDistinct->getQuery(), yieldPolicy, plannerOptions);
     } else {
         // We did not find a solution that we could convert to DISTINCT_SCAN, and the
         // STRICT_DISTINCT_ONLY prohibits us from using any other kind of plan, so we return
diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h
index 4eecc309d99..0e97efa0fe0 100644
--- a/src/mongo/db/query/get_executor.h
+++ b/src/mongo/db/query/get_executor.h
@@ -150,12 +150,12 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
  * or an aggregation pipeline that uses a $group stage with distinct-like semantics.
  *
  * Distinct is unique in that it doesn't care about getting all the results; it just wants all
- * possible values of a certain field.  As such, we can skip lots of data in certain cases (see
- * body of method for detail).
+ * possible values of a certain field.  As such, we can skip lots of data in certain cases (see body
+ * of method for detail).
  *
  * A $group stage on a single field behaves similarly to a distinct command. If it has no
- * accumulators or only $first accumulators, the $group command only needs to visit one document
- * for each distinct value of the grouped-by (_id) field to compute its result. When there is a sort
+ * accumulators or only $first accumulators, the $group command only needs to visit one document for
+ * each distinct value of the grouped-by (_id) field to compute its result. When there is a sort
  * order specified in parsedDistinct->getQuery()->getQueryRequest.getSort(), the DISTINCT_SCAN will
  * follow that sort order, ensuring that it chooses the correct document from each group to compute
  * any $first accumulators.
@@ -166,6 +166,10 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
  * DISTINCT_SCAN to filter some but not all duplicates (so that de-duplication is still necessary
  * after query execution), or it may fall back to a regular IXSCAN.
  *
+ * Note that this function uses the projection in 'parsedDistinct' to produce a covered query when
+ * possible, but when a covered query is not possible, the resulting plan may elide the projection
+ * stage (instead returning entire fetched documents).
+ *
  * For example, a distinct query on field 'b' could use a DISTINCT_SCAN over index {a: 1, b: 1}.
  * This plan will reduce the output set by filtering out documents that are equal on both the 'a'
  * and 'b' fields, but it could still output documents with equal 'b' values if their 'a' fields are
diff --git a/src/mongo/db/query/parsed_distinct.cpp b/src/mongo/db/query/parsed_distinct.cpp
index 9d342a1f2a7..245392d95ad 100644
--- a/src/mongo/db/query/parsed_distinct.cpp
+++ b/src/mongo/db/query/parsed_distinct.cpp
@@ -48,6 +48,74 @@ const char ParsedDistinct::kQueryField[] = "query";
 const char ParsedDistinct::kCollationField[] = "collation";
 const char ParsedDistinct::kCommentField[] = "comment";
 
+namespace {
+/**
+ * Checks dotted field for a projection and truncates the field name if we could be projecting on an
+ * array element. Sets 'isIDOut' to true if the projection is on a sub document of _id. For example,
+ * _id.a.2, _id.b.c.
+ */
+std::string getProjectedDottedField(const std::string& field, bool* isIDOut) {
+    // Check if field contains an array index.
+    std::vector<std::string> res;
+    mongo::splitStringDelim(field, &res, '.');
+
+    // Since we could exit early from the loop,
+    // we should check _id here and set '*isIDOut' accordingly.
+    *isIDOut = ("_id" == res[0]);
+
+    // Skip the first dotted component. If the field starts
+    // with a number, the number cannot be an array index.
+    int arrayIndex = 0;
+    for (size_t i = 1; i < res.size(); ++i) {
+        if (mongo::parseNumberFromStringWithBase(res[i], 10, &arrayIndex).isOK()) {
+            // Array indices cannot be negative numbers (this is not $slice).
+            // Negative numbers are allowed as field names.
+            if (arrayIndex >= 0) {
+                // Generate prefix of field up to (but not including) array index.
+                std::vector<std::string> prefixStrings(res);
+                prefixStrings.resize(i);
+                // Reset projectedField. Instead of overwriting, joinStringDelim() appends joined
+                // string
+                // to the end of projectedField.
+                std::string projectedField;
+                mongo::joinStringDelim(prefixStrings, &projectedField, '.');
+                return projectedField;
+            }
+        }
+    }
+
+    return field;
+}
+
+/**
+ * Creates a projection spec for a distinct command from the requested field. In most cases, the
+ * projection spec will be {_id: 0, key: 1}.
+ * The exceptions are:
+ * 1) When the requested field is '_id', the projection spec will {_id: 1}.
+ * 2) When the requested field could be an array element (eg. a.0), the projected field will be the
+ *    prefix of the field up to the array element. For example, a.b.2 => {_id: 0, 'a.b': 1} Note
+ *    that we can't use a $slice projection because the distinct command filters the results from
+ *    the executor using the dotted field name. Using $slice will re-order the documents in the
+ *    array in the results.
+ */
+BSONObj getDistinctProjection(const std::string& field) {
+    std::string projectedField(field);
+
+    bool isID = false;
+    if ("_id" == field) {
+        isID = true;
+    } else if (str::contains(field, '.')) {
+        projectedField = getProjectedDottedField(field, &isID);
+    }
+    BSONObjBuilder bob;
+    if (!isID) {
+        bob.append("_id", 0);
+    }
+    bob.append(projectedField, 1);
+    return bob.obj();
+}
+}  // namespace
+
 StatusWith<BSONObj> ParsedDistinct::asAggregationCommand() const {
     BSONObjBuilder aggregationBuilder;
 
@@ -120,7 +188,8 @@ StatusWith<ParsedDistinct> ParsedDistinct::parse(OperationContext* opCtx,
                                                  const NamespaceString& nss,
                                                  const BSONObj& cmdObj,
                                                  const ExtensionsCallback& extensionsCallback,
-                                                 bool isExplain) {
+                                                 bool isExplain,
+                                                 const CollatorInterface* defaultCollator) {
     IDLParserErrorContext ctx("distinct");
 
     DistinctCommand parsedDistinct(nss);
@@ -132,6 +201,10 @@ StatusWith<ParsedDistinct> ParsedDistinct::parse(OperationContext* opCtx,
 
     auto qr = stdx::make_unique<QueryRequest>(nss);
 
+    // Create a projection on the fields needed by the distinct command, so that the query planner
+    // will produce a covered plan if possible.
+    qr->setProj(getDistinctProjection(std::string(parsedDistinct.getKey())));
+
     if (auto query = parsedDistinct.getQuery()) {
         qr->setFilter(query.get());
     }
@@ -190,6 +263,10 @@ StatusWith<ParsedDistinct> ParsedDistinct::parse(OperationContext* opCtx,
         return cq.getStatus();
     }
 
+    if (cq.getValue()->getQueryRequest().getCollation().isEmpty() && defaultCollator) {
+        cq.getValue()->setCollator(defaultCollator->clone());
+    }
+
     return ParsedDistinct(std::move(cq.getValue()), parsedDistinct.getKey().toString());
 }
 
diff --git a/src/mongo/db/query/parsed_distinct.h b/src/mongo/db/query/parsed_distinct.h
index 3c4761c985d..2f6837ff8b0 100644
--- a/src/mongo/db/query/parsed_distinct.h
+++ b/src/mongo/db/query/parsed_distinct.h
@@ -84,7 +84,8 @@ public:
                                             const NamespaceString& nss,
                                             const BSONObj& cmdObj,
                                             const ExtensionsCallback& extensionsCallback,
-                                            bool isExplain);
+                                            bool isExplain,
+                                            const CollatorInterface* defaultCollator = nullptr);
 
 private:
     std::unique_ptr<CanonicalQuery> _query;
author	Justin Seyster <justin.seyster@mongodb.com>	2019-03-27 19:01:18 -0400
committer	Justin Seyster <justin.seyster@mongodb.com>	2019-03-28 17:17:27 -0400
commit	e73da48e26048cb5ca2120acadac2d9c2c8ee403 (patch)
tree	1a562334c2cf2bbfc7629f9723439d564c20e847 /src/mongo/db/query
parent	b885fa6feb7da00dc367e917c53ba16a41b75af4 (diff)
download	mongo-e73da48e26048cb5ca2120acadac2d9c2c8ee403.tar.gz