diff options
author | Tess Avitabile <tess.avitabile@mongodb.com> | 2016-05-27 13:19:43 -0400 |
---|---|---|
committer | Tess Avitabile <tess.avitabile@mongodb.com> | 2016-06-06 09:49:54 -0400 |
commit | 62f7837d70f2eb9ad63dcae57ae515dd868d339b (patch) | |
tree | d785b43e43fd0f7871baff6afa3a7a69c3114486 | |
parent | e533634d86aae9385d9bdd94e15d992c4c8de622 (diff) | |
download | mongo-62f7837d70f2eb9ad63dcae57ae515dd868d339b.tar.gz |
SERVER-24175 Distinct command should respect the collation
-rw-r--r-- | jstests/core/collation_shell_helpers.js | 23 | ||||
-rw-r--r-- | src/mongo/db/commands/distinct.cpp | 63 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 60 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.h | 5 | ||||
-rw-r--r-- | src/mongo/db/query/parsed_distinct.h | 69 |
5 files changed, 160 insertions, 60 deletions
diff --git a/jstests/core/collation_shell_helpers.js b/jstests/core/collation_shell_helpers.js index 01755a342c8..b823521640d 100644 --- a/jstests/core/collation_shell_helpers.js +++ b/jstests/core/collation_shell_helpers.js @@ -242,10 +242,31 @@ assert.eq(1, planStage.advanced); // Distinct. - assert.eq(["foo", "bar"], coll.distinct("str", {}, {collation: {locale: "fr"}})); + coll.drop(); + assert.writeOK(coll.insert({_id: 1, str: "foo"})); + assert.writeOK(coll.insert({_id: 2, str: "FOO"})); + + // Without an index. + var res = coll.distinct("str", {}, {collation: {locale: "en_US", strength: 2}}); + assert.eq(1, res.length); + assert.eq("foo", res[0].toLowerCase()); + assert.eq(2, coll.distinct("str", {}, {collation: {locale: "en_US", strength: 3}}).length); + assert.eq( + 2, coll.distinct("_id", {str: "foo"}, {collation: {locale: "en_US", strength: 2}}).length); + + // With an index. + coll.createIndex({str: 1}, {collation: {locale: "en_US", strength: 2}}); + res = coll.distinct("str", {}, {collation: {locale: "en_US", strength: 2}}); + assert.eq(1, res.length); + assert.eq("foo", res[0].toLowerCase()); + assert.eq(2, coll.distinct("str", {}, {collation: {locale: "en_US", strength: 3}}).length); + assert.commandWorked(coll.explain().distinct("str", {}, {collation: {locale: "fr"}})); // Find command. + coll.drop(); + assert.writeOK(coll.insert({_id: 1, str: "foo"})); + assert.writeOK(coll.insert({_id: 2, str: "bar"})); if (db.getMongo().useReadCommands()) { // On _id field. assert.writeOK(coll.insert({_id: "foo"})); diff --git a/src/mongo/db/commands/distinct.cpp b/src/mongo/db/commands/distinct.cpp index c173f950b8a..3171c816ec2 100644 --- a/src/mongo/db/commands/distinct.cpp +++ b/src/mongo/db/commands/distinct.cpp @@ -47,9 +47,11 @@ #include "mongo/db/exec/working_set_common.h" #include "mongo/db/instance.h" #include "mongo/db/jsobj.h" +#include "mongo/db/matcher/extensions_callback_real.h" #include "mongo/db/query/explain.h" #include "mongo/db/query/find_common.h" #include "mongo/db/query/get_executor.h" +#include "mongo/db/query/parsed_distinct.h" #include "mongo/db/query/plan_summary_stats.h" #include "mongo/db/query/query_planner_common.h" #include "mongo/stdx/memory.h" @@ -105,27 +107,24 @@ public: help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }"; } - /** - * Used by explain() and run() to get the PlanExecutor for the query. - */ - StatusWith<unique_ptr<PlanExecutor>> getPlanExecutor(OperationContext* txn, - Collection* collection, - const string& ns, - const BSONObj& cmdObj, - bool isExplain) const { + StatusWith<ParsedDistinct> parse(OperationContext* txn, + const NamespaceString& nss, + const BSONObj& cmdObj, + bool isExplain) const { // Extract the key field. BSONElement keyElt; auto statusKey = bsonExtractTypedField(cmdObj, kKeyField, BSONType::String, &keyElt); if (!statusKey.isOK()) { return {statusKey}; } - string key = keyElt.valuestrsafe(); + auto key = keyElt.valuestrsafe(); + + auto qr = stdx::make_unique<QueryRequest>(nss); // Extract the query field. If the query field is nonexistent, an empty query is used. - BSONObj query; if (BSONElement queryElt = cmdObj[kQueryField]) { if (queryElt.type() == BSONType::Object) { - query = queryElt.embeddedObject(); + qr->setFilter(queryElt.embeddedObject()); } else if (queryElt.type() != BSONType::jstNULL) { return Status(ErrorCodes::TypeMismatch, str::stream() << "\"" << kQueryField @@ -139,9 +138,6 @@ public: } // Extract the collation field, if it exists. - // TODO SERVER-23473: Pass this collation spec object down so that it can be converted into - // a CollatorInterface. - BSONObj collation; if (BSONElement collationElt = cmdObj[kCollationField]) { if (collationElt.type() != BSONType::Object) { return Status(ErrorCodes::TypeMismatch, @@ -151,16 +147,18 @@ public: << ", found " << typeName(collationElt.type())); } - collation = collationElt.embeddedObject(); + qr->setCollation(collationElt.embeddedObject()); } - auto executor = getExecutorDistinct( - txn, collection, ns, query, key, isExplain, PlanExecutor::YIELD_AUTO); - if (!executor.isOK()) { - return executor.getStatus(); + qr->setExplain(isExplain); + + const ExtensionsCallbackReal extensionsCallback(txn, &nss); + auto cq = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback); + if (!cq.isOK()) { + return cq.getStatus(); } - return std::move(executor.getValue()); + return ParsedDistinct(std::move(cq.getValue()), std::move(key)); } virtual Status explain(OperationContext* txn, @@ -170,12 +168,19 @@ public: const rpc::ServerSelectionMetadata&, BSONObjBuilder* out) const { const string ns = parseNs(dbname, cmdObj); + const NamespaceString nss(ns); + + auto parsedDistinct = parse(txn, nss, cmdObj, true); + if (!parsedDistinct.isOK()) { + return parsedDistinct.getStatus(); + } + AutoGetCollectionForRead ctx(txn, ns); Collection* collection = ctx.getCollection(); - StatusWith<unique_ptr<PlanExecutor>> executor = - getPlanExecutor(txn, collection, ns, cmdObj, true); + auto executor = getExecutorDistinct( + txn, collection, ns, &parsedDistinct.getValue(), PlanExecutor::YIELD_AUTO); if (!executor.isOK()) { return executor.getStatus(); } @@ -193,11 +198,21 @@ public: Timer t; const string ns = parseNs(dbname, cmdObj); + const NamespaceString nss(ns); + + auto parsedDistinct = parse(txn, nss, cmdObj, false); + if (!parsedDistinct.isOK()) { + return appendCommandStatus(result, parsedDistinct.getStatus()); + } + + auto collator = parsedDistinct.getValue().getQuery()->getCollator(); + AutoGetCollectionForRead ctx(txn, ns); Collection* collection = ctx.getCollection(); - auto executor = getPlanExecutor(txn, collection, ns, cmdObj, false); + auto executor = getExecutorDistinct( + txn, collection, ns, &parsedDistinct.getValue(), PlanExecutor::YIELD_AUTO); if (!executor.isOK()) { return appendCommandStatus(result, executor.getStatus()); } @@ -215,7 +230,7 @@ public: char* start = bb.buf(); BSONArrayBuilder arr(bb); - BSONElementSet values; + BSONElementSet values(collator); BSONObj obj; PlanExecutor::ExecState state; diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 2b05871af47..c8778b48c8b 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -1060,11 +1060,16 @@ bool turnIxscanIntoCount(QuerySolution* soln) { */ bool getDistinctNodeIndex(const std::vector<IndexEntry>& indices, const std::string& field, + const CollatorInterface* collator, size_t* indexOut) { invariant(indexOut); bool isDottedField = str::contains(field, '.'); int minFields = std::numeric_limits<int>::max(); for (size_t i = 0; i < indices.size(); ++i) { + // Skip indices with non-matching collator. + if (!CollatorInterface::collatorsMatch(indices[i].collator, collator)) { + continue; + } // Skip special indices. if (!IndexNames::findPluginName(indices[i].keyPattern).empty()) { continue; @@ -1290,9 +1295,7 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, const string& field) { StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn, Collection* collection, const std::string& ns, - const BSONObj& query, - const std::string& field, - bool isExplain, + ParsedDistinct* parsedDistinct, PlanExecutor::YieldPolicy yieldPolicy) { if (!collection) { // Treat collections that do not exist as empty collections. @@ -1320,7 +1323,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn, IndexCatalogEntry* ice = ii.catalogEntry(desc); // The distinct hack can work if any field is in the index but it's not always clear // if it's a win unless it's the first field. - if (desc->keyPattern().firstElement().fieldName() == field) { + if (desc->keyPattern().firstElement().fieldName() == parsedDistinct->getKey()) { plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(txn), @@ -1339,16 +1342,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn, // If there are no suitable indices for the distinct hack bail out now into regular planning // with no projection. if (plannerParams.indices.empty()) { - auto qr = stdx::make_unique<QueryRequest>(collection->ns()); - qr->setFilter(query); - qr->setExplain(isExplain); - - auto statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback); - if (!statusWithCQ.isOK()) { - return statusWithCQ.getStatus(); - } - - return getExecutor(txn, collection, std::move(statusWithCQ.getValue()), yieldPolicy); + return getExecutor(txn, collection, parsedDistinct->releaseQuery(), yieldPolicy); } // @@ -1358,11 +1352,9 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn, // Applying a projection allows the planner to try to give us covered plans that we can turn // into the projection hack. getDistinctProjection deals with .find() projection semantics // (ie _id:1 being implied by default). - BSONObj projection = getDistinctProjection(field); + BSONObj projection = getDistinctProjection(parsedDistinct->getKey()); - auto qr = stdx::make_unique<QueryRequest>(collection->ns()); - qr->setFilter(query); - qr->setExplain(isExplain); + auto qr = stdx::make_unique<QueryRequest>(parsedDistinct->getQuery()->getQueryRequest()); qr->setProj(projection); auto statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback); @@ -1376,17 +1368,31 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn, // Not every index in plannerParams.indices may be suitable. Refer to // getDistinctNodeIndex(). size_t distinctNodeIndex = 0; - if (query.isEmpty() && getDistinctNodeIndex(plannerParams.indices, field, &distinctNodeIndex)) { + if (parsedDistinct->getQuery()->getQueryRequest().getFilter().isEmpty() && + getDistinctNodeIndex(plannerParams.indices, + parsedDistinct->getKey(), + cq->getCollator(), + &distinctNodeIndex)) { auto dn = stdx::make_unique<DistinctNode>(); dn->indexKeyPattern = plannerParams.indices[distinctNodeIndex].keyPattern; dn->direction = 1; IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds); dn->fieldNo = 0; + // An index with a non-simple collation requires a FETCH stage. + std::unique_ptr<QuerySolutionNode> solnRoot = std::move(dn); + if (plannerParams.indices[distinctNodeIndex].collator) { + if (!solnRoot->fetched()) { + auto fetch = stdx::make_unique<FetchNode>(); + fetch->children.push_back(solnRoot.release()); + solnRoot = std::move(fetch); + } + } + QueryPlannerParams params; unique_ptr<QuerySolution> soln( - QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn.release())); + QueryPlannerAnalysis::analyzeDataAccess(*cq, params, solnRoot.release())); invariant(soln); unique_ptr<WorkingSet> ws = make_unique<WorkingSet>(); @@ -1415,7 +1421,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn, // We look for a solution that has an ixscan we can turn into a distinctixscan for (size_t i = 0; i < solutions.size(); ++i) { - if (turnIxscanIntoDistinctIxscan(solutions[i], field)) { + if (turnIxscanIntoDistinctIxscan(solutions[i], parsedDistinct->getKey())) { // Great, we can use solutions[i]. Clean up the other QuerySolution(s). for (size_t j = 0; j < solutions.size(); ++j) { if (j != i) { @@ -1450,17 +1456,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn, delete solutions[i]; } - // We drop the projection from the 'cq'. Unfortunately this is not trivial. - auto qrNoProjection = stdx::make_unique<QueryRequest>(collection->ns()); - qrNoProjection->setFilter(query); - qrNoProjection->setExplain(isExplain); - - statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qrNoProjection), extensionsCallback); - if (!statusWithCQ.isOK()) { - return statusWithCQ.getStatus(); - } - - return getExecutor(txn, collection, std::move(statusWithCQ.getValue()), yieldPolicy); + return getExecutor(txn, collection, parsedDistinct->releaseQuery(), yieldPolicy); } } // namespace mongo diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h index c15c7144370..d4de2c44146 100644 --- a/src/mongo/db/query/get_executor.h +++ b/src/mongo/db/query/get_executor.h @@ -32,6 +32,7 @@ #include "mongo/db/ops/update_driver.h" #include "mongo/db/ops/update_request.h" #include "mongo/db/query/canonical_query.h" +#include "mongo/db/query/parsed_distinct.h" #include "mongo/db/query/plan_executor.h" #include "mongo/db/query/query_planner_params.h" #include "mongo/db/query/query_settings.h" @@ -112,9 +113,7 @@ StatusWith<std::unique_ptr<PlanExecutor>> getExecutorDistinct( OperationContext* txn, Collection* collection, const std::string& ns, - const BSONObj& query, - const std::string& field, - bool isExplain, + ParsedDistinct* parsedDistinct, PlanExecutor::YieldPolicy yieldPolicy); /* diff --git a/src/mongo/db/query/parsed_distinct.h b/src/mongo/db/query/parsed_distinct.h new file mode 100644 index 00000000000..01f9c372e80 --- /dev/null +++ b/src/mongo/db/query/parsed_distinct.h @@ -0,0 +1,69 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <memory> +#include <string> + +#include "mongo/db/query/canonical_query.h" + +namespace mongo { + +/** + * The parsed form of the distinct command request. + */ +class ParsedDistinct { +public: + ParsedDistinct(std::unique_ptr<CanonicalQuery> query, const std::string key) + : _query(std::move(query)), _key(std::move(key)) {} + + const CanonicalQuery* getQuery() const { + return _query.get(); + } + + /** + * Releases ownership of the canonical query to the caller. + */ + std::unique_ptr<CanonicalQuery> releaseQuery() { + invariant(_query.get()); + return std::move(_query); + } + + const std::string& getKey() const { + return _key; + } + +private: + std::unique_ptr<CanonicalQuery> _query; + + // The field for which we are getting distinct values. + const std::string _key; +}; + +} // namespace mongo |