summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTess Avitabile <tess.avitabile@mongodb.com>2016-05-27 13:19:43 -0400
committerTess Avitabile <tess.avitabile@mongodb.com>2016-06-06 09:49:54 -0400
commit62f7837d70f2eb9ad63dcae57ae515dd868d339b (patch)
treed785b43e43fd0f7871baff6afa3a7a69c3114486
parente533634d86aae9385d9bdd94e15d992c4c8de622 (diff)
downloadmongo-62f7837d70f2eb9ad63dcae57ae515dd868d339b.tar.gz
SERVER-24175 Distinct command should respect the collation
-rw-r--r--jstests/core/collation_shell_helpers.js23
-rw-r--r--src/mongo/db/commands/distinct.cpp63
-rw-r--r--src/mongo/db/query/get_executor.cpp60
-rw-r--r--src/mongo/db/query/get_executor.h5
-rw-r--r--src/mongo/db/query/parsed_distinct.h69
5 files changed, 160 insertions, 60 deletions
diff --git a/jstests/core/collation_shell_helpers.js b/jstests/core/collation_shell_helpers.js
index 01755a342c8..b823521640d 100644
--- a/jstests/core/collation_shell_helpers.js
+++ b/jstests/core/collation_shell_helpers.js
@@ -242,10 +242,31 @@
assert.eq(1, planStage.advanced);
// Distinct.
- assert.eq(["foo", "bar"], coll.distinct("str", {}, {collation: {locale: "fr"}}));
+ coll.drop();
+ assert.writeOK(coll.insert({_id: 1, str: "foo"}));
+ assert.writeOK(coll.insert({_id: 2, str: "FOO"}));
+
+ // Without an index.
+ var res = coll.distinct("str", {}, {collation: {locale: "en_US", strength: 2}});
+ assert.eq(1, res.length);
+ assert.eq("foo", res[0].toLowerCase());
+ assert.eq(2, coll.distinct("str", {}, {collation: {locale: "en_US", strength: 3}}).length);
+ assert.eq(
+ 2, coll.distinct("_id", {str: "foo"}, {collation: {locale: "en_US", strength: 2}}).length);
+
+ // With an index.
+ coll.createIndex({str: 1}, {collation: {locale: "en_US", strength: 2}});
+ res = coll.distinct("str", {}, {collation: {locale: "en_US", strength: 2}});
+ assert.eq(1, res.length);
+ assert.eq("foo", res[0].toLowerCase());
+ assert.eq(2, coll.distinct("str", {}, {collation: {locale: "en_US", strength: 3}}).length);
+
assert.commandWorked(coll.explain().distinct("str", {}, {collation: {locale: "fr"}}));
// Find command.
+ coll.drop();
+ assert.writeOK(coll.insert({_id: 1, str: "foo"}));
+ assert.writeOK(coll.insert({_id: 2, str: "bar"}));
if (db.getMongo().useReadCommands()) {
// On _id field.
assert.writeOK(coll.insert({_id: "foo"}));
diff --git a/src/mongo/db/commands/distinct.cpp b/src/mongo/db/commands/distinct.cpp
index c173f950b8a..3171c816ec2 100644
--- a/src/mongo/db/commands/distinct.cpp
+++ b/src/mongo/db/commands/distinct.cpp
@@ -47,9 +47,11 @@
#include "mongo/db/exec/working_set_common.h"
#include "mongo/db/instance.h"
#include "mongo/db/jsobj.h"
+#include "mongo/db/matcher/extensions_callback_real.h"
#include "mongo/db/query/explain.h"
#include "mongo/db/query/find_common.h"
#include "mongo/db/query/get_executor.h"
+#include "mongo/db/query/parsed_distinct.h"
#include "mongo/db/query/plan_summary_stats.h"
#include "mongo/db/query/query_planner_common.h"
#include "mongo/stdx/memory.h"
@@ -105,27 +107,24 @@ public:
help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }";
}
- /**
- * Used by explain() and run() to get the PlanExecutor for the query.
- */
- StatusWith<unique_ptr<PlanExecutor>> getPlanExecutor(OperationContext* txn,
- Collection* collection,
- const string& ns,
- const BSONObj& cmdObj,
- bool isExplain) const {
+ StatusWith<ParsedDistinct> parse(OperationContext* txn,
+ const NamespaceString& nss,
+ const BSONObj& cmdObj,
+ bool isExplain) const {
// Extract the key field.
BSONElement keyElt;
auto statusKey = bsonExtractTypedField(cmdObj, kKeyField, BSONType::String, &keyElt);
if (!statusKey.isOK()) {
return {statusKey};
}
- string key = keyElt.valuestrsafe();
+ auto key = keyElt.valuestrsafe();
+
+ auto qr = stdx::make_unique<QueryRequest>(nss);
// Extract the query field. If the query field is nonexistent, an empty query is used.
- BSONObj query;
if (BSONElement queryElt = cmdObj[kQueryField]) {
if (queryElt.type() == BSONType::Object) {
- query = queryElt.embeddedObject();
+ qr->setFilter(queryElt.embeddedObject());
} else if (queryElt.type() != BSONType::jstNULL) {
return Status(ErrorCodes::TypeMismatch,
str::stream() << "\"" << kQueryField
@@ -139,9 +138,6 @@ public:
}
// Extract the collation field, if it exists.
- // TODO SERVER-23473: Pass this collation spec object down so that it can be converted into
- // a CollatorInterface.
- BSONObj collation;
if (BSONElement collationElt = cmdObj[kCollationField]) {
if (collationElt.type() != BSONType::Object) {
return Status(ErrorCodes::TypeMismatch,
@@ -151,16 +147,18 @@ public:
<< ", found "
<< typeName(collationElt.type()));
}
- collation = collationElt.embeddedObject();
+ qr->setCollation(collationElt.embeddedObject());
}
- auto executor = getExecutorDistinct(
- txn, collection, ns, query, key, isExplain, PlanExecutor::YIELD_AUTO);
- if (!executor.isOK()) {
- return executor.getStatus();
+ qr->setExplain(isExplain);
+
+ const ExtensionsCallbackReal extensionsCallback(txn, &nss);
+ auto cq = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback);
+ if (!cq.isOK()) {
+ return cq.getStatus();
}
- return std::move(executor.getValue());
+ return ParsedDistinct(std::move(cq.getValue()), std::move(key));
}
virtual Status explain(OperationContext* txn,
@@ -170,12 +168,19 @@ public:
const rpc::ServerSelectionMetadata&,
BSONObjBuilder* out) const {
const string ns = parseNs(dbname, cmdObj);
+ const NamespaceString nss(ns);
+
+ auto parsedDistinct = parse(txn, nss, cmdObj, true);
+ if (!parsedDistinct.isOK()) {
+ return parsedDistinct.getStatus();
+ }
+
AutoGetCollectionForRead ctx(txn, ns);
Collection* collection = ctx.getCollection();
- StatusWith<unique_ptr<PlanExecutor>> executor =
- getPlanExecutor(txn, collection, ns, cmdObj, true);
+ auto executor = getExecutorDistinct(
+ txn, collection, ns, &parsedDistinct.getValue(), PlanExecutor::YIELD_AUTO);
if (!executor.isOK()) {
return executor.getStatus();
}
@@ -193,11 +198,21 @@ public:
Timer t;
const string ns = parseNs(dbname, cmdObj);
+ const NamespaceString nss(ns);
+
+ auto parsedDistinct = parse(txn, nss, cmdObj, false);
+ if (!parsedDistinct.isOK()) {
+ return appendCommandStatus(result, parsedDistinct.getStatus());
+ }
+
+ auto collator = parsedDistinct.getValue().getQuery()->getCollator();
+
AutoGetCollectionForRead ctx(txn, ns);
Collection* collection = ctx.getCollection();
- auto executor = getPlanExecutor(txn, collection, ns, cmdObj, false);
+ auto executor = getExecutorDistinct(
+ txn, collection, ns, &parsedDistinct.getValue(), PlanExecutor::YIELD_AUTO);
if (!executor.isOK()) {
return appendCommandStatus(result, executor.getStatus());
}
@@ -215,7 +230,7 @@ public:
char* start = bb.buf();
BSONArrayBuilder arr(bb);
- BSONElementSet values;
+ BSONElementSet values(collator);
BSONObj obj;
PlanExecutor::ExecState state;
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 2b05871af47..c8778b48c8b 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -1060,11 +1060,16 @@ bool turnIxscanIntoCount(QuerySolution* soln) {
*/
bool getDistinctNodeIndex(const std::vector<IndexEntry>& indices,
const std::string& field,
+ const CollatorInterface* collator,
size_t* indexOut) {
invariant(indexOut);
bool isDottedField = str::contains(field, '.');
int minFields = std::numeric_limits<int>::max();
for (size_t i = 0; i < indices.size(); ++i) {
+ // Skip indices with non-matching collator.
+ if (!CollatorInterface::collatorsMatch(indices[i].collator, collator)) {
+ continue;
+ }
// Skip special indices.
if (!IndexNames::findPluginName(indices[i].keyPattern).empty()) {
continue;
@@ -1290,9 +1295,7 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, const string& field) {
StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn,
Collection* collection,
const std::string& ns,
- const BSONObj& query,
- const std::string& field,
- bool isExplain,
+ ParsedDistinct* parsedDistinct,
PlanExecutor::YieldPolicy yieldPolicy) {
if (!collection) {
// Treat collections that do not exist as empty collections.
@@ -1320,7 +1323,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn,
IndexCatalogEntry* ice = ii.catalogEntry(desc);
// The distinct hack can work if any field is in the index but it's not always clear
// if it's a win unless it's the first field.
- if (desc->keyPattern().firstElement().fieldName() == field) {
+ if (desc->keyPattern().firstElement().fieldName() == parsedDistinct->getKey()) {
plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
desc->getAccessMethodName(),
desc->isMultikey(txn),
@@ -1339,16 +1342,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn,
// If there are no suitable indices for the distinct hack bail out now into regular planning
// with no projection.
if (plannerParams.indices.empty()) {
- auto qr = stdx::make_unique<QueryRequest>(collection->ns());
- qr->setFilter(query);
- qr->setExplain(isExplain);
-
- auto statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback);
- if (!statusWithCQ.isOK()) {
- return statusWithCQ.getStatus();
- }
-
- return getExecutor(txn, collection, std::move(statusWithCQ.getValue()), yieldPolicy);
+ return getExecutor(txn, collection, parsedDistinct->releaseQuery(), yieldPolicy);
}
//
@@ -1358,11 +1352,9 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn,
// Applying a projection allows the planner to try to give us covered plans that we can turn
// into the projection hack. getDistinctProjection deals with .find() projection semantics
// (ie _id:1 being implied by default).
- BSONObj projection = getDistinctProjection(field);
+ BSONObj projection = getDistinctProjection(parsedDistinct->getKey());
- auto qr = stdx::make_unique<QueryRequest>(collection->ns());
- qr->setFilter(query);
- qr->setExplain(isExplain);
+ auto qr = stdx::make_unique<QueryRequest>(parsedDistinct->getQuery()->getQueryRequest());
qr->setProj(projection);
auto statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback);
@@ -1376,17 +1368,31 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn,
// Not every index in plannerParams.indices may be suitable. Refer to
// getDistinctNodeIndex().
size_t distinctNodeIndex = 0;
- if (query.isEmpty() && getDistinctNodeIndex(plannerParams.indices, field, &distinctNodeIndex)) {
+ if (parsedDistinct->getQuery()->getQueryRequest().getFilter().isEmpty() &&
+ getDistinctNodeIndex(plannerParams.indices,
+ parsedDistinct->getKey(),
+ cq->getCollator(),
+ &distinctNodeIndex)) {
auto dn = stdx::make_unique<DistinctNode>();
dn->indexKeyPattern = plannerParams.indices[distinctNodeIndex].keyPattern;
dn->direction = 1;
IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds);
dn->fieldNo = 0;
+ // An index with a non-simple collation requires a FETCH stage.
+ std::unique_ptr<QuerySolutionNode> solnRoot = std::move(dn);
+ if (plannerParams.indices[distinctNodeIndex].collator) {
+ if (!solnRoot->fetched()) {
+ auto fetch = stdx::make_unique<FetchNode>();
+ fetch->children.push_back(solnRoot.release());
+ solnRoot = std::move(fetch);
+ }
+ }
+
QueryPlannerParams params;
unique_ptr<QuerySolution> soln(
- QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn.release()));
+ QueryPlannerAnalysis::analyzeDataAccess(*cq, params, solnRoot.release()));
invariant(soln);
unique_ptr<WorkingSet> ws = make_unique<WorkingSet>();
@@ -1415,7 +1421,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn,
// We look for a solution that has an ixscan we can turn into a distinctixscan
for (size_t i = 0; i < solutions.size(); ++i) {
- if (turnIxscanIntoDistinctIxscan(solutions[i], field)) {
+ if (turnIxscanIntoDistinctIxscan(solutions[i], parsedDistinct->getKey())) {
// Great, we can use solutions[i]. Clean up the other QuerySolution(s).
for (size_t j = 0; j < solutions.size(); ++j) {
if (j != i) {
@@ -1450,17 +1456,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn,
delete solutions[i];
}
- // We drop the projection from the 'cq'. Unfortunately this is not trivial.
- auto qrNoProjection = stdx::make_unique<QueryRequest>(collection->ns());
- qrNoProjection->setFilter(query);
- qrNoProjection->setExplain(isExplain);
-
- statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qrNoProjection), extensionsCallback);
- if (!statusWithCQ.isOK()) {
- return statusWithCQ.getStatus();
- }
-
- return getExecutor(txn, collection, std::move(statusWithCQ.getValue()), yieldPolicy);
+ return getExecutor(txn, collection, parsedDistinct->releaseQuery(), yieldPolicy);
}
} // namespace mongo
diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h
index c15c7144370..d4de2c44146 100644
--- a/src/mongo/db/query/get_executor.h
+++ b/src/mongo/db/query/get_executor.h
@@ -32,6 +32,7 @@
#include "mongo/db/ops/update_driver.h"
#include "mongo/db/ops/update_request.h"
#include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/parsed_distinct.h"
#include "mongo/db/query/plan_executor.h"
#include "mongo/db/query/query_planner_params.h"
#include "mongo/db/query/query_settings.h"
@@ -112,9 +113,7 @@ StatusWith<std::unique_ptr<PlanExecutor>> getExecutorDistinct(
OperationContext* txn,
Collection* collection,
const std::string& ns,
- const BSONObj& query,
- const std::string& field,
- bool isExplain,
+ ParsedDistinct* parsedDistinct,
PlanExecutor::YieldPolicy yieldPolicy);
/*
diff --git a/src/mongo/db/query/parsed_distinct.h b/src/mongo/db/query/parsed_distinct.h
new file mode 100644
index 00000000000..01f9c372e80
--- /dev/null
+++ b/src/mongo/db/query/parsed_distinct.h
@@ -0,0 +1,69 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "mongo/db/query/canonical_query.h"
+
+namespace mongo {
+
+/**
+ * The parsed form of the distinct command request.
+ */
+class ParsedDistinct {
+public:
+ ParsedDistinct(std::unique_ptr<CanonicalQuery> query, const std::string key)
+ : _query(std::move(query)), _key(std::move(key)) {}
+
+ const CanonicalQuery* getQuery() const {
+ return _query.get();
+ }
+
+ /**
+ * Releases ownership of the canonical query to the caller.
+ */
+ std::unique_ptr<CanonicalQuery> releaseQuery() {
+ invariant(_query.get());
+ return std::move(_query);
+ }
+
+ const std::string& getKey() const {
+ return _key;
+ }
+
+private:
+ std::unique_ptr<CanonicalQuery> _query;
+
+ // The field for which we are getting distinct values.
+ const std::string _key;
+};
+
+} // namespace mongo