summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/commands/distinct.cpp12
-rw-r--r--src/mongo/db/query/get_executor.cpp401
-rw-r--r--src/mongo/db/query/get_executor.h12
-rw-r--r--src/mongo/db/query/parsed_distinct.cpp79
-rw-r--r--src/mongo/db/query/parsed_distinct.h3
5 files changed, 252 insertions, 255 deletions
diff --git a/src/mongo/db/commands/distinct.cpp b/src/mongo/db/commands/distinct.cpp
index 9370da8bd73..2f51a659469 100644
--- a/src/mongo/db/commands/distinct.cpp
+++ b/src/mongo/db/commands/distinct.cpp
@@ -127,8 +127,10 @@ public:
const auto nss = ctx->getNss();
const ExtensionsCallbackReal extensionsCallback(opCtx, &nss);
- auto parsedDistinct =
- uassertStatusOK(ParsedDistinct::parse(opCtx, nss, cmdObj, extensionsCallback, true));
+ auto defaultCollator =
+ ctx->getCollection() ? ctx->getCollection()->getDefaultCollator() : nullptr;
+ auto parsedDistinct = uassertStatusOK(
+ ParsedDistinct::parse(opCtx, nss, cmdObj, extensionsCallback, true, defaultCollator));
if (ctx->getView()) {
// Relinquish locks. The aggregation command will re-acquire them.
@@ -179,8 +181,10 @@ public:
const auto& nss = ctx->getNss();
const ExtensionsCallbackReal extensionsCallback(opCtx, &nss);
- auto parsedDistinct =
- uassertStatusOK(ParsedDistinct::parse(opCtx, nss, cmdObj, extensionsCallback, false));
+ auto defaultCollation =
+ ctx->getCollection() ? ctx->getCollection()->getDefaultCollator() : nullptr;
+ auto parsedDistinct = uassertStatusOK(
+ ParsedDistinct::parse(opCtx, nss, cmdObj, extensionsCallback, false, defaultCollation));
// Check whether we are allowed to read from this node after acquiring our locks.
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 9c9d9132811..62517e5a0da 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -1184,74 +1184,6 @@ bool getDistinctNodeIndex(const std::vector<IndexEntry>& indices,
return minFields != std::numeric_limits<int>::max();
}
-/**
- * Checks dotted field for a projection and truncates the
- * field name if we could be projecting on an array element.
- * Sets 'isIDOut' to true if the projection is on a sub document of _id.
- * For example, _id.a.2, _id.b.c.
- */
-std::string getProjectedDottedField(const std::string& field, bool* isIDOut) {
- // Check if field contains an array index.
- std::vector<std::string> res;
- mongo::splitStringDelim(field, &res, '.');
-
- // Since we could exit early from the loop,
- // we should check _id here and set '*isIDOut' accordingly.
- *isIDOut = ("_id" == res[0]);
-
- // Skip the first dotted component. If the field starts
- // with a number, the number cannot be an array index.
- int arrayIndex = 0;
- for (size_t i = 1; i < res.size(); ++i) {
- if (mongo::parseNumberFromStringWithBase(res[i], 10, &arrayIndex).isOK()) {
- // Array indices cannot be negative numbers (this is not $slice).
- // Negative numbers are allowed as field names.
- if (arrayIndex >= 0) {
- // Generate prefix of field up to (but not including) array index.
- std::vector<std::string> prefixStrings(res);
- prefixStrings.resize(i);
- // Reset projectedField. Instead of overwriting, joinStringDelim() appends joined
- // string
- // to the end of projectedField.
- std::string projectedField;
- mongo::joinStringDelim(prefixStrings, &projectedField, '.');
- return projectedField;
- }
- }
- }
-
- return field;
-}
-
-/**
- * Creates a projection spec for a distinct command from the requested field.
- * In most cases, the projection spec will be {_id: 0, key: 1}.
- * The exceptions are:
- * 1) When the requested field is '_id', the projection spec will {_id: 1}.
- * 2) When the requested field could be an array element (eg. a.0),
- * the projected field will be the prefix of the field up to the array element.
- * For example, a.b.2 => {_id: 0, 'a.b': 1}
- * Note that we can't use a $slice projection because the distinct command filters
- * the results from the executor using the dotted field name. Using $slice will
- * re-order the documents in the array in the results.
- */
-BSONObj getDistinctProjection(const std::string& field) {
- std::string projectedField(field);
-
- bool isID = false;
- if ("_id" == field) {
- isID = true;
- } else if (str::contains(field, '.')) {
- projectedField = getProjectedDottedField(field, &isID);
- }
- BSONObjBuilder bob;
- if (!isID) {
- bob.append("_id", 0);
- }
- bob.append(projectedField, 1);
- return bob.obj();
-}
-
} // namespace
StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorCount(
@@ -1351,38 +1283,46 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
bool strictDistinctOnly) {
QuerySolutionNode* root = soln->root.get();
- // Root stage must be a project.
+ // We can attempt to convert a plan if it follows one of these patterns (starting from the
+ // root):
+ // 1. PROJECT=>FETCH=>IXSCAN
+ // 2. FETCH=>IXSCAN
+ // 3. PROJECT=>IXSCAN
+ QuerySolutionNode* projectNode = nullptr;
+ IndexScanNode* indexScanNode = nullptr;
+ FetchNode* fetchNode = nullptr;
+
switch (root->getType()) {
- default:
- return false;
case STAGE_PROJECTION_DEFAULT:
case STAGE_PROJECTION_COVERED:
- case STAGE_PROJECTION_SIMPLE:;
+ case STAGE_PROJECTION_SIMPLE:
+ projectNode = root;
+ break;
+ case STAGE_FETCH:
+ fetchNode = static_cast<FetchNode*>(root);
+ break;
+ default:
+ return false;
}
- // Child should be either an ixscan or fetch.
- if (STAGE_IXSCAN != root->children[0]->getType() &&
- STAGE_FETCH != root->children[0]->getType()) {
- return false;
+ if (!fetchNode && (STAGE_FETCH == root->children[0]->getType())) {
+ fetchNode = static_cast<FetchNode*>(root->children[0]);
}
- IndexScanNode* indexScanNode = nullptr;
- FetchNode* fetchNode = nullptr;
- if (STAGE_IXSCAN == root->children[0]->getType()) {
- indexScanNode = static_cast<IndexScanNode*>(root->children[0]);
- } else {
- fetchNode = static_cast<FetchNode*>(root->children[0]);
- // If the fetch has a filter, we're out of luck. We can't skip all keys with a given value,
- // since one of them may key a document that passes the filter.
- if (fetchNode->filter) {
- return false;
- }
+ if (fetchNode && (STAGE_IXSCAN == fetchNode->children[0]->getType())) {
+ indexScanNode = static_cast<IndexScanNode*>(fetchNode->children[0]);
+ } else if (projectNode && (STAGE_IXSCAN == projectNode->children[0]->getType())) {
+ indexScanNode = static_cast<IndexScanNode*>(projectNode->children[0]);
+ }
- if (STAGE_IXSCAN != fetchNode->children[0]->getType()) {
- return false;
- }
+ if (!indexScanNode) {
+ return false;
+ }
- indexScanNode = static_cast<IndexScanNode*>(fetchNode->children[0]);
+ // If the fetch has a filter, we're out of luck. We can't skip all keys with a given value,
+ // since one of them may key a document that passes the filter.
+ if (fetchNode && fetchNode->filter) {
+ return false;
}
if (indexScanNode->index.type == IndexType::INDEX_WILDCARD) {
@@ -1467,39 +1407,33 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
distinctNode->fieldNo = fieldNo;
if (fetchNode) {
- // If there is a fetch node, then there is no need for the projection. The fetch node should
- // become the new root, with the distinct as its child. The PROJECT=>FETCH=>IXSCAN tree
- // should become FETCH=>DISTINCT_SCAN.
- switch (root->getType()) {
- default:
- MONGO_UNREACHABLE;
- case STAGE_PROJECTION_DEFAULT:
- case STAGE_PROJECTION_COVERED:
- case STAGE_PROJECTION_SIMPLE:;
- }
- invariant(STAGE_FETCH == root->children[0]->getType());
- invariant(STAGE_IXSCAN == root->children[0]->children[0]->getType());
+ // If the original plan had PROJECT and FETCH stages, we can get rid of the PROJECT
+ // transforming the plan from PROJECT=>FETCH=>IXSCAN to FETCH=>DISTINCT_SCAN.
+ if (projectNode) {
+ invariant(projectNode == root);
+ projectNode = nullptr;
- // Detach the fetch from its parent projection.
- root->children.clear();
+ invariant(STAGE_FETCH == root->children[0]->getType());
+ invariant(STAGE_IXSCAN == root->children[0]->children[0]->getType());
- // Make the fetch the new root. This destroys the project stage.
- soln->root.reset(fetchNode);
+ // Detach the fetch from its parent projection.
+ root->children.clear();
- // Take ownership of the index scan node, detaching it from the solution tree.
+ // Make the fetch the new root. This destroys the project stage.
+ soln->root.reset(fetchNode);
+ }
+
+ // Whenver we have a FETCH node, the IXSCAN is its child. We detach the IXSCAN from the
+ // solution tree and take ownership of it, so that it gets destroyed when we leave this
+ // scope.
std::unique_ptr<IndexScanNode> ownedIsn(indexScanNode);
+ indexScanNode = nullptr;
// Attach the distinct node in the index scan's place.
fetchNode->children[0] = distinctNode.release();
} else {
// There is no fetch node. The PROJECT=>IXSCAN tree should become PROJECT=>DISTINCT_SCAN.
- switch (root->getType()) {
- default:
- MONGO_UNREACHABLE;
- case STAGE_PROJECTION_DEFAULT:
- case STAGE_PROJECTION_COVERED:
- case STAGE_PROJECTION_SIMPLE:;
- }
+ invariant(projectNode == root);
invariant(STAGE_IXSCAN == root->children[0]->getType());
// Take ownership of the index scan node, detaching it from the solution tree.
@@ -1545,53 +1479,40 @@ QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx,
return plannerParams;
}
-// Pass this to getExecutorForSimpleDistinct() or getExecutorDistinctFromIndexSolutions()
-// which will either move the query into a newly created executor or leave the executor as nullptr
-// to indicate that no solution was found.
-struct QueryOrExecutor {
- QueryOrExecutor(unique_ptr<CanonicalQuery> cq) : cq(std::move(cq)) {}
-
- std::unique_ptr<CanonicalQuery> cq;
- std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> executor;
-};
-
/**
* A simple DISTINCT_SCAN has an empty query and no sort, so we just need to find a suitable index
- * that has the "distinct" field the first component of its key pattern.
+ * that has the "distinct" field as the first component of its key pattern.
*
- * If a suitable solution is found, this function will create a new executor in
- * queryOrExecutor->executor and move the query into it, leaving queryOrExecutor->cq in a "moved
- * from" state. Otherwise, it will leave queryOrExecutor->cq as is and set queryOrExecutor->executor
- * to nullptr (but still return Status::OK).
+ * If a suitable solution is found, this function will create and return a new executor. In order to
+ * do so, it releases the CanonicalQuery from the 'parsedDistinct' input. If no solution is found,
+ * the return value is StatusOK with a nullptr value, and the 'parsedDistinct' CanonicalQuery
+ * remains valid. This function may also return a failed status code, in which case the caller
+ * should assume that the 'parsedDistinct' CanonicalQuery is no longer valid.
*/
-Status getExecutorForSimpleDistinct(OperationContext* opCtx,
- Collection* collection,
- const QueryPlannerParams& plannerParams,
- PlanExecutor::YieldPolicy yieldPolicy,
- ParsedDistinct* parsedDistinct,
- QueryOrExecutor* queryOrExecutor) {
- invariant(queryOrExecutor);
- invariant(queryOrExecutor->cq);
- invariant(!queryOrExecutor->executor);
+StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorForSimpleDistinct(
+ OperationContext* opCtx,
+ Collection* collection,
+ const QueryPlannerParams& plannerParams,
+ PlanExecutor::YieldPolicy yieldPolicy,
+ ParsedDistinct* parsedDistinct) {
+ invariant(parsedDistinct->getQuery());
+ auto collator = parsedDistinct->getQuery()->getCollator();
// If there's no query, we can just distinct-scan one of the indices. Not every index in
// plannerParams.indices may be suitable. Refer to getDistinctNodeIndex().
size_t distinctNodeIndex = 0;
if (!parsedDistinct->getQuery()->getQueryRequest().getFilter().isEmpty() ||
!parsedDistinct->getQuery()->getQueryRequest().getSort().isEmpty() ||
- !getDistinctNodeIndex(plannerParams.indices,
- parsedDistinct->getKey(),
- queryOrExecutor->cq->getCollator(),
- &distinctNodeIndex)) {
+ !getDistinctNodeIndex(
+ plannerParams.indices, parsedDistinct->getKey(), collator, &distinctNodeIndex)) {
// Not a "simple" DISTINCT_SCAN or no suitable index was found.
- queryOrExecutor->executor = nullptr;
- return Status::OK();
+ return {nullptr};
}
auto dn = stdx::make_unique<DistinctNode>(plannerParams.indices[distinctNodeIndex]);
dn->direction = 1;
IndexBoundsBuilder::allValuesBounds(dn->index.keyPattern, &dn->bounds);
- dn->queryCollator = queryOrExecutor->cq->getCollator();
+ dn->queryCollator = collator;
dn->fieldNo = 0;
// An index with a non-simple collation requires a FETCH stage.
@@ -1606,52 +1527,47 @@ Status getExecutorForSimpleDistinct(OperationContext* opCtx,
QueryPlannerParams params;
- auto soln =
- QueryPlannerAnalysis::analyzeDataAccess(*queryOrExecutor->cq, params, std::move(solnRoot));
+ auto soln = QueryPlannerAnalysis::analyzeDataAccess(
+ *parsedDistinct->getQuery(), params, std::move(solnRoot));
invariant(soln);
unique_ptr<WorkingSet> ws = make_unique<WorkingSet>();
PlanStage* rawRoot;
- verify(StageBuilder::build(opCtx, collection, *queryOrExecutor->cq, *soln, ws.get(), &rawRoot));
+ verify(StageBuilder::build(
+ opCtx, collection, *parsedDistinct->getQuery(), *soln, ws.get(), &rawRoot));
unique_ptr<PlanStage> root(rawRoot);
- LOG(2) << "Using fast distinct: " << redact(queryOrExecutor->cq->toStringShort())
+ LOG(2) << "Using fast distinct: " << redact(parsedDistinct->getQuery()->toStringShort())
<< ", planSummary: " << Explain::getPlanSummary(root.get());
- auto executor = PlanExecutor::make(opCtx,
- std::move(ws),
- std::move(root),
- std::move(soln),
- std::move(queryOrExecutor->cq),
- collection,
- yieldPolicy);
-
- if (executor.isOK()) {
- queryOrExecutor->executor = std::move(executor.getValue());
- return Status::OK();
- } else {
- return executor.getStatus();
- }
+ return PlanExecutor::make(opCtx,
+ std::move(ws),
+ std::move(root),
+ std::move(soln),
+ parsedDistinct->releaseQuery(),
+ collection,
+ yieldPolicy);
}
// Checks each solution in the 'solutions' vector to see if one includes an IXSCAN that can be
// rewritten as a DISTINCT_SCAN, assuming we want distinct scan behavior on the getKey() property of
// the 'parsedDistinct' argument.
//
-// If a suitable solution is found, this function will create a new executor in
-// queryOrExecutor->executor and move the query into it, leaving queryOrExecutor->cq in a "moved
-// from" state. Otherwise, it will leave queryOrExecutor->cq as is and set queryOrExecutor->executor
-// to nullptr (but still return Status::OK).
+// If a suitable solution is found, this function will create and return a new executor. In order to
+// do so, it releases the CanonicalQuery from the 'parsedDistinct' input. If no solution is found,
+// the return value is StatusOK with a nullptr value, and the 'parsedDistinct' CanonicalQuery
+// remains valid. This function may also return a failed status code, in which case the caller
+// should assume that the 'parsedDistinct' CanonicalQuery is no longer valid.
//
// See the declaration of turnIxscanIntoDistinctIxscan() for an explanation of the
// 'strictDistinctOnly' parameter.
-Status getExecutorDistinctFromIndexSolutions(OperationContext* opCtx,
- Collection* collection,
- std::vector<std::unique_ptr<QuerySolution>> solutions,
- PlanExecutor::YieldPolicy yieldPolicy,
- ParsedDistinct* parsedDistinct,
- bool strictDistinctOnly,
- QueryOrExecutor* queryOrExecutor) {
+StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>>
+getExecutorDistinctFromIndexSolutions(OperationContext* opCtx,
+ Collection* collection,
+ std::vector<std::unique_ptr<QuerySolution>> solutions,
+ PlanExecutor::YieldPolicy yieldPolicy,
+ ParsedDistinct* parsedDistinct,
+ bool strictDistinctOnly) {
// We look for a solution that has an ixscan we can turn into a distinctixscan
for (size_t i = 0; i < solutions.size(); ++i) {
if (turnIxscanIntoDistinctIxscan(
@@ -1660,31 +1576,54 @@ Status getExecutorDistinctFromIndexSolutions(OperationContext* opCtx,
unique_ptr<WorkingSet> ws = make_unique<WorkingSet>();
unique_ptr<QuerySolution> currentSolution = std::move(solutions[i]);
PlanStage* rawRoot;
- verify(StageBuilder::build(
- opCtx, collection, *queryOrExecutor->cq, *currentSolution, ws.get(), &rawRoot));
+ verify(StageBuilder::build(opCtx,
+ collection,
+ *parsedDistinct->getQuery(),
+ *currentSolution,
+ ws.get(),
+ &rawRoot));
unique_ptr<PlanStage> root(rawRoot);
- LOG(2) << "Using fast distinct: " << redact(queryOrExecutor->cq->toStringShort())
+ LOG(2) << "Using fast distinct: " << redact(parsedDistinct->getQuery()->toStringShort())
<< ", planSummary: " << Explain::getPlanSummary(root.get());
- auto executor = PlanExecutor::make(opCtx,
- std::move(ws),
- std::move(root),
- std::move(currentSolution),
- std::move(queryOrExecutor->cq),
- collection,
- yieldPolicy);
-
- if (executor.isOK()) {
- queryOrExecutor->executor = std::move(executor.getValue());
- return Status::OK();
- } else {
- return executor.getStatus();
- }
+ return PlanExecutor::make(opCtx,
+ std::move(ws),
+ std::move(root),
+ std::move(currentSolution),
+ parsedDistinct->releaseQuery(),
+ collection,
+ yieldPolicy);
}
}
- return Status::OK();
+ // Indicate that, although there was no error, we did not find a DISTINCT_SCAN solution.
+ return {nullptr};
+}
+
+/**
+ * Makes a clone of 'cq' but without any projection, then runs getExecutor on the clone.
+ */
+StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorWithoutProjection(
+ OperationContext* opCtx,
+ Collection* collection,
+ const CanonicalQuery* cq,
+ PlanExecutor::YieldPolicy yieldPolicy,
+ size_t plannerOptions) {
+ auto qr = stdx::make_unique<QueryRequest>(cq->getQueryRequest());
+ qr->setProj(BSONObj());
+
+ const boost::intrusive_ptr<ExpressionContext> expCtx;
+ const ExtensionsCallbackReal extensionsCallback(opCtx, &collection->ns());
+ auto cqWithoutProjection =
+ CanonicalQuery::canonicalize(opCtx,
+ std::move(qr),
+ expCtx,
+ extensionsCallback,
+ MatchExpressionParser::kAllowAllSpecialFeatures);
+
+ return getExecutor(
+ opCtx, collection, std::move(cqWithoutProjection.getValue()), yieldPolicy, plannerOptions);
}
} // namespace
@@ -1723,8 +1662,6 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
auto plannerParams =
fillOutPlannerParamsForDistinct(opCtx, collection, plannerOptions, *parsedDistinct);
- const ExtensionsCallbackReal extensionsCallback(opCtx, &collection->ns());
-
// If there are no suitable indices for the distinct hack bail out now into regular planning
// with no projection.
if (plannerParams.indices.empty()) {
@@ -1733,8 +1670,12 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
// a DISTINCT_SCAN plan.
return {nullptr};
} else {
- return getExecutor(
- opCtx, collection, parsedDistinct->releaseQuery(), yieldPolicy, plannerOptions);
+ // Note that, when not in STRICT_DISTINCT_ONLY mode, the caller doesn't care about the
+ // projection, only that the planner does not produce a FETCH if it's possible to cover
+ // the fields in the projection. That's definitely not possible in this case, so we
+ // dispense with the projection.
+ return getExecutorWithoutProjection(
+ opCtx, collection, parsedDistinct->getQuery(), yieldPolicy, plannerOptions);
}
}
@@ -1742,55 +1683,24 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
// If we're here, we have an index that includes the field we're distinct-ing over.
//
- auto qr = stdx::make_unique<QueryRequest>(parsedDistinct->getQuery()->getQueryRequest());
-
- // Applying a projection allows the planner to try to give us covered plans that we can turn
- // into the projection hack. The getDistinctProjection() function deals with .find() projection
- // semantics (ie _id:1 being implied by default).
- if (qr->getProj().isEmpty()) {
- BSONObj projection = getDistinctProjection(parsedDistinct->getKey());
- qr->setProj(projection);
- }
-
- const boost::intrusive_ptr<ExpressionContext> expCtx;
- auto statusWithCQ =
- CanonicalQuery::canonicalize(opCtx,
- std::move(qr),
- expCtx,
- extensionsCallback,
- MatchExpressionParser::kAllowAllSpecialFeatures);
- if (!statusWithCQ.isOK()) {
- return statusWithCQ.getStatus();
- }
-
- QueryOrExecutor queryOrExecutor(std::move(statusWithCQ.getValue()));
-
- // If the canonical query does not have a user-specified collation, set it from the collection
- // default.
- if (queryOrExecutor.cq->getQueryRequest().getCollation().isEmpty() &&
- collection->getDefaultCollator()) {
- queryOrExecutor.cq->setCollator(collection->getDefaultCollator()->clone());
- }
-
- auto getExecutorStatus = getExecutorForSimpleDistinct(
- opCtx, collection, plannerParams, yieldPolicy, parsedDistinct, &queryOrExecutor);
- if (!getExecutorStatus.isOK()) {
- return getExecutorStatus;
- } else if (queryOrExecutor.executor) {
- return std::move(queryOrExecutor.executor);
+ auto executorWithStatus =
+ getExecutorForSimpleDistinct(opCtx, collection, plannerParams, yieldPolicy, parsedDistinct);
+ if (!executorWithStatus.isOK() || executorWithStatus.getValue()) {
+ // We either got a DISTINCT plan or a fatal error.
+ return executorWithStatus;
} else {
// A "simple" DISTINCT plan wasn't possible, but we can try again with the QueryPlanner.
}
// Ask the QueryPlanner for a list of solutions that scan one of the indexes from
// fillOutPlannerParamsForDistinct() (i.e., the indexes that include the distinct field).
- auto statusWithSolutions = QueryPlanner::plan(*queryOrExecutor.cq, plannerParams);
+ auto statusWithSolutions = QueryPlanner::plan(*parsedDistinct->getQuery(), plannerParams);
if (!statusWithSolutions.isOK()) {
if (plannerOptions & QueryPlannerParams::STRICT_DISTINCT_ONLY) {
return {nullptr};
} else {
return getExecutor(
- opCtx, collection, std::move(queryOrExecutor.cq), yieldPolicy, plannerOptions);
+ opCtx, collection, parsedDistinct->releaseQuery(), yieldPolicy, plannerOptions);
}
}
auto solutions = std::move(statusWithSolutions.getValue());
@@ -1799,23 +1709,24 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct(
// STRICT_DISTINCT_ONLY flag is not set, we may get a DISTINCT_SCAN plan that filters out some
// but not all duplicate values of the distinct field, meaning that the output from this
// executor will still need deduplication.
- getExecutorStatus = getExecutorDistinctFromIndexSolutions(
+ executorWithStatus = getExecutorDistinctFromIndexSolutions(
opCtx,
collection,
std::move(solutions),
yieldPolicy,
parsedDistinct,
- (plannerOptions & QueryPlannerParams::STRICT_DISTINCT_ONLY),
- &queryOrExecutor);
- if (!getExecutorStatus.isOK()) {
- return getExecutorStatus;
- } else if (queryOrExecutor.executor) {
- return std::move(queryOrExecutor.executor);
+ (plannerOptions & QueryPlannerParams::STRICT_DISTINCT_ONLY));
+ if (!executorWithStatus.isOK() || executorWithStatus.getValue()) {
+ // We either got a DISTINCT plan or a fatal error.
+ return executorWithStatus;
} else if (!(plannerOptions & QueryPlannerParams::STRICT_DISTINCT_ONLY)) {
// We did not find a solution that we could convert to a DISTINCT_SCAN, so we fall back to
- // regular planning.
- return getExecutor(
- opCtx, collection, parsedDistinct->releaseQuery(), yieldPolicy, plannerOptions);
+ // regular planning. Note that, when not in STRICT_DISTINCT_ONLY mode, the caller doesn't
+ // care about the projection, only that the planner does not produce a FETCH if it's
+ // possible to cover the fields in the projection. That's definitely not possible in this
+ // case, so we dispense with the projection.
+ return getExecutorWithoutProjection(
+ opCtx, collection, parsedDistinct->getQuery(), yieldPolicy, plannerOptions);
} else {
// We did not find a solution that we could convert to DISTINCT_SCAN, and the
// STRICT_DISTINCT_ONLY prohibits us from using any other kind of plan, so we return
diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h
index 4eecc309d99..0e97efa0fe0 100644
--- a/src/mongo/db/query/get_executor.h
+++ b/src/mongo/db/query/get_executor.h
@@ -150,12 +150,12 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
* or an aggregation pipeline that uses a $group stage with distinct-like semantics.
*
* Distinct is unique in that it doesn't care about getting all the results; it just wants all
- * possible values of a certain field. As such, we can skip lots of data in certain cases (see
- * body of method for detail).
+ * possible values of a certain field. As such, we can skip lots of data in certain cases (see body
+ * of method for detail).
*
* A $group stage on a single field behaves similarly to a distinct command. If it has no
- * accumulators or only $first accumulators, the $group command only needs to visit one document
- * for each distinct value of the grouped-by (_id) field to compute its result. When there is a sort
+ * accumulators or only $first accumulators, the $group command only needs to visit one document for
+ * each distinct value of the grouped-by (_id) field to compute its result. When there is a sort
* order specified in parsedDistinct->getQuery()->getQueryRequest.getSort(), the DISTINCT_SCAN will
* follow that sort order, ensuring that it chooses the correct document from each group to compute
* any $first accumulators.
@@ -166,6 +166,10 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
* DISTINCT_SCAN to filter some but not all duplicates (so that de-duplication is still necessary
* after query execution), or it may fall back to a regular IXSCAN.
*
+ * Note that this function uses the projection in 'parsedDistinct' to produce a covered query when
+ * possible, but when a covered query is not possible, the resulting plan may elide the projection
+ * stage (instead returning entire fetched documents).
+ *
* For example, a distinct query on field 'b' could use a DISTINCT_SCAN over index {a: 1, b: 1}.
* This plan will reduce the output set by filtering out documents that are equal on both the 'a'
* and 'b' fields, but it could still output documents with equal 'b' values if their 'a' fields are
diff --git a/src/mongo/db/query/parsed_distinct.cpp b/src/mongo/db/query/parsed_distinct.cpp
index 9d342a1f2a7..245392d95ad 100644
--- a/src/mongo/db/query/parsed_distinct.cpp
+++ b/src/mongo/db/query/parsed_distinct.cpp
@@ -48,6 +48,74 @@ const char ParsedDistinct::kQueryField[] = "query";
const char ParsedDistinct::kCollationField[] = "collation";
const char ParsedDistinct::kCommentField[] = "comment";
+namespace {
+/**
+ * Checks dotted field for a projection and truncates the field name if we could be projecting on an
+ * array element. Sets 'isIDOut' to true if the projection is on a sub document of _id. For example,
+ * _id.a.2, _id.b.c.
+ */
+std::string getProjectedDottedField(const std::string& field, bool* isIDOut) {
+ // Check if field contains an array index.
+ std::vector<std::string> res;
+ mongo::splitStringDelim(field, &res, '.');
+
+ // Since we could exit early from the loop,
+ // we should check _id here and set '*isIDOut' accordingly.
+ *isIDOut = ("_id" == res[0]);
+
+ // Skip the first dotted component. If the field starts
+ // with a number, the number cannot be an array index.
+ int arrayIndex = 0;
+ for (size_t i = 1; i < res.size(); ++i) {
+ if (mongo::parseNumberFromStringWithBase(res[i], 10, &arrayIndex).isOK()) {
+ // Array indices cannot be negative numbers (this is not $slice).
+ // Negative numbers are allowed as field names.
+ if (arrayIndex >= 0) {
+ // Generate prefix of field up to (but not including) array index.
+ std::vector<std::string> prefixStrings(res);
+ prefixStrings.resize(i);
+ // Reset projectedField. Instead of overwriting, joinStringDelim() appends joined
+ // string
+ // to the end of projectedField.
+ std::string projectedField;
+ mongo::joinStringDelim(prefixStrings, &projectedField, '.');
+ return projectedField;
+ }
+ }
+ }
+
+ return field;
+}
+
+/**
+ * Creates a projection spec for a distinct command from the requested field. In most cases, the
+ * projection spec will be {_id: 0, key: 1}.
+ * The exceptions are:
+ * 1) When the requested field is '_id', the projection spec will {_id: 1}.
+ * 2) When the requested field could be an array element (eg. a.0), the projected field will be the
+ * prefix of the field up to the array element. For example, a.b.2 => {_id: 0, 'a.b': 1} Note
+ * that we can't use a $slice projection because the distinct command filters the results from
+ * the executor using the dotted field name. Using $slice will re-order the documents in the
+ * array in the results.
+ */
+BSONObj getDistinctProjection(const std::string& field) {
+ std::string projectedField(field);
+
+ bool isID = false;
+ if ("_id" == field) {
+ isID = true;
+ } else if (str::contains(field, '.')) {
+ projectedField = getProjectedDottedField(field, &isID);
+ }
+ BSONObjBuilder bob;
+ if (!isID) {
+ bob.append("_id", 0);
+ }
+ bob.append(projectedField, 1);
+ return bob.obj();
+}
+} // namespace
+
StatusWith<BSONObj> ParsedDistinct::asAggregationCommand() const {
BSONObjBuilder aggregationBuilder;
@@ -120,7 +188,8 @@ StatusWith<ParsedDistinct> ParsedDistinct::parse(OperationContext* opCtx,
const NamespaceString& nss,
const BSONObj& cmdObj,
const ExtensionsCallback& extensionsCallback,
- bool isExplain) {
+ bool isExplain,
+ const CollatorInterface* defaultCollator) {
IDLParserErrorContext ctx("distinct");
DistinctCommand parsedDistinct(nss);
@@ -132,6 +201,10 @@ StatusWith<ParsedDistinct> ParsedDistinct::parse(OperationContext* opCtx,
auto qr = stdx::make_unique<QueryRequest>(nss);
+ // Create a projection on the fields needed by the distinct command, so that the query planner
+ // will produce a covered plan if possible.
+ qr->setProj(getDistinctProjection(std::string(parsedDistinct.getKey())));
+
if (auto query = parsedDistinct.getQuery()) {
qr->setFilter(query.get());
}
@@ -190,6 +263,10 @@ StatusWith<ParsedDistinct> ParsedDistinct::parse(OperationContext* opCtx,
return cq.getStatus();
}
+ if (cq.getValue()->getQueryRequest().getCollation().isEmpty() && defaultCollator) {
+ cq.getValue()->setCollator(defaultCollator->clone());
+ }
+
return ParsedDistinct(std::move(cq.getValue()), parsedDistinct.getKey().toString());
}
diff --git a/src/mongo/db/query/parsed_distinct.h b/src/mongo/db/query/parsed_distinct.h
index 3c4761c985d..2f6837ff8b0 100644
--- a/src/mongo/db/query/parsed_distinct.h
+++ b/src/mongo/db/query/parsed_distinct.h
@@ -84,7 +84,8 @@ public:
const NamespaceString& nss,
const BSONObj& cmdObj,
const ExtensionsCallback& extensionsCallback,
- bool isExplain);
+ bool isExplain,
+ const CollatorInterface* defaultCollator = nullptr);
private:
std::unique_ptr<CanonicalQuery> _query;