From 069be92b4f2c35140a711cb47d88baf961d74795 Mon Sep 17 00:00:00 2001 From: Sally McNichols Date: Thu, 16 Jun 2016 10:38:29 -0400 Subject: SERVER-23816 Add $sortByCount aggregation stage --- src/mongo/db/pipeline/SConscript | 1 + src/mongo/db/pipeline/document_source.cpp | 6 +- src/mongo/db/pipeline/document_source.h | 42 ++++++++-- .../db/pipeline/document_source_sort_by_count.cpp | 75 +++++++++++++++++ src/mongo/db/pipeline/document_source_test.cpp | 96 ++++++++++++++++++++++ src/mongo/db/pipeline/pipeline.cpp | 35 +++++--- 6 files changed, 234 insertions(+), 21 deletions(-) create mode 100644 src/mongo/db/pipeline/document_source_sort_by_count.cpp diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index 4de141c667b..7ce8e880659 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -120,6 +120,7 @@ docSourceEnv.Library( 'document_source_sample_from_random_cursor.cpp', 'document_source_skip.cpp', 'document_source_sort.cpp', + 'document_source_sort_by_count.cpp', 'document_source_unwind.cpp', ], LIBDEPS=[ diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp index 33e85baa380..430e3cf1496 100644 --- a/src/mongo/db/pipeline/document_source.cpp +++ b/src/mongo/db/pipeline/document_source.cpp @@ -56,8 +56,8 @@ void DocumentSource::registerParser(string name, Parser parser) { parserMap[name] = parser; } -intrusive_ptr DocumentSource::parse(const intrusive_ptr expCtx, - BSONObj stageObj) { +vector> DocumentSource::parse( + const intrusive_ptr expCtx, BSONObj stageObj) { uassert(16435, "A pipeline stage specification object must contain exactly one field.", stageObj.nFields() == 1); @@ -66,9 +66,11 @@ intrusive_ptr DocumentSource::parse(const intrusive_ptrsecond(stageSpec, expCtx); } diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index de60023ec87..3101cbe6882 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -73,17 +73,36 @@ class RecordCursor; * * As an example, if your document source looks like {"$foo": }, with a parsing function * 'createFromBson', you would add this line: - * REGISTER_EXPRESSION(foo, DocumentSourceFoo::createFromBson); + * REGISTER_DOCUMENT_SOURCE(foo, DocumentSourceFoo::createFromBson); */ -#define REGISTER_DOCUMENT_SOURCE(key, parser) \ - MONGO_INITIALIZER(addToDocSourceParserMap_##key)(InitializerContext*) { \ - DocumentSource::registerParser("$" #key, (parser)); \ - return Status::OK(); \ +#define REGISTER_DOCUMENT_SOURCE(key, parser) \ + MONGO_INITIALIZER(addToDocSourceParserMap_##key)(InitializerContext*) { \ + auto parserWrapper = [](BSONElement stageSpec, \ + const boost::intrusive_ptr& expCtx) { \ + return std::vector>{(parser)(stageSpec, expCtx)}; \ + }; \ + DocumentSource::registerParser("$" #key, parserWrapper); \ + return Status::OK(); \ } +/** + * Registers an alias to have the name 'key'. When a stage with name '$key' is found, + * 'parser' will be called to construct a vector of DocumentSources. + * + * As an example, if your document source looks like {"$foo": }, with a parsing function + * 'createFromBson', you would add this line: + * REGISTER_DOCUMENT_SOURCE_ALIAS(foo, DocumentSourceFoo::createFromBson); + */ +#define REGISTER_DOCUMENT_SOURCE_ALIAS(key, parser) \ + MONGO_INITIALIZER(addAliasToDocSourceParserMap_##key)(InitializerContext*) { \ + DocumentSource::registerParser("$" #key, (parser)); \ + return Status::OK(); \ + } + + class DocumentSource : public IntrusiveCounterUnsigned { public: - using Parser = stdx::function( + using Parser = stdx::function>( BSONElement, const boost::intrusive_ptr&)>; virtual ~DocumentSource() {} @@ -209,7 +228,7 @@ public: /** * Create a DocumentSource pipeline stage from 'stageObj'. */ - static boost::intrusive_ptr parse( + static std::vector> parse( const boost::intrusive_ptr expCtx, BSONObj stageObj); /** @@ -1691,4 +1710,13 @@ private: // field, tracking how many results we've returned so far for the current input document. long long _outputIndex; }; + +class DocumentSourceSortByCount final { +public: + static std::vector> createFromBson( + BSONElement elem, const boost::intrusive_ptr& pExpCtx); + +private: + DocumentSourceSortByCount() = default; +}; } diff --git a/src/mongo/db/pipeline/document_source_sort_by_count.cpp b/src/mongo/db/pipeline/document_source_sort_by_count.cpp new file mode 100644 index 00000000000..931a4bacdd7 --- /dev/null +++ b/src/mongo/db/pipeline/document_source_sort_by_count.cpp @@ -0,0 +1,75 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/jsobj.h" +#include "mongo/db/pipeline/document_source.h" +#include "mongo/db/pipeline/expression_context.h" + +namespace mongo { + +using boost::intrusive_ptr; +using std::vector; + +REGISTER_DOCUMENT_SOURCE_ALIAS(sortByCount, DocumentSourceSortByCount::createFromBson); + +vector> DocumentSourceSortByCount::createFromBson( + BSONElement elem, const intrusive_ptr& pExpCtx) { + if (elem.type() == Object) { + // Make sure that the sortByCount field is an expression inside an object + BSONObj innerObj = elem.embeddedObject(); + uassert(40147, + str::stream() << "the sortByCount field must be defined as a $-prefixed path or an " + "expression inside an object", + innerObj.firstElementFieldName()[0] == '$'); + } else if (elem.type() == String) { + // Make sure that the sortByCount field is a $-prefixed path + uassert(40148, + str::stream() << "the sortByCount field must be defined as a $-prefixed path or an " + "expression inside an object", + (elem.valueStringData()[0] == '$')); + } else { + uasserted( + 40149, + str::stream() << "the sortByCount field must be specified as a string or as an object"); + } + + BSONObjBuilder groupExprBuilder; + groupExprBuilder.appendAs(elem, "_id"); + groupExprBuilder.append("count", BSON("$sum" << 1)); + + BSONObj groupObj = BSON("$group" << groupExprBuilder.obj()); + BSONObj sortObj = BSON("$sort" << BSON("count" << -1)); + + auto groupSource = DocumentSourceGroup::createFromBson(groupObj.firstElement(), pExpCtx); + auto sortSource = DocumentSourceSort::createFromBson(sortObj.firstElement(), pExpCtx); + + return {groupSource, sortSource}; +} +} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp index b5ed007d1f9..6a409052de8 100644 --- a/src/mongo/db/pipeline/document_source_test.cpp +++ b/src/mongo/db/pipeline/document_source_test.cpp @@ -3417,6 +3417,102 @@ public: }; } +namespace DocumentSourceSortByCount { +using mongo::DocumentSourceSortByCount; +using mongo::DocumentSourceGroup; +using mongo::DocumentSourceSort; +using std::vector; +using boost::intrusive_ptr; + +/** + * Fixture to test that $sortByCount returns a DocumentSourceGroup and DocumentSourceSort. + */ +class SortByCountReturnsGroupAndSort : public Mock::Base, public unittest::Test { +public: + void testCreateFromBsonResult(BSONObj sortByCountSpec, Value expectedGroupExplain) { + vector> result = + DocumentSourceSortByCount::createFromBson(sortByCountSpec.firstElement(), ctx()); + + ASSERT_EQUALS(result.size(), 2UL); + + const auto* groupStage = dynamic_cast(result[0].get()); + ASSERT(groupStage); + + const auto* sortStage = dynamic_cast(result[1].get()); + ASSERT(sortStage); + + // Serialize the DocumentSourceGroup and DocumentSourceSort from $sortByCount so that we can + // check the explain output to make sure $group and $sort have the correct fields. + const bool explain = true; + vector explainedStages; + groupStage->serializeToArray(explainedStages, explain); + sortStage->serializeToArray(explainedStages, explain); + ASSERT_EQUALS(explainedStages.size(), 2UL); + + auto groupExplain = explainedStages[0]; + ASSERT_EQ(groupExplain["$group"], expectedGroupExplain); + + auto sortExplain = explainedStages[1]; + auto expectedSortExplain = Value{Document{{"sortKey", Document{{"count", -1}}}}}; + ASSERT_EQ(sortExplain["$sort"], expectedSortExplain); + } +}; + +TEST_F(SortByCountReturnsGroupAndSort, ExpressionFieldPathSpec) { + BSONObj spec = BSON("$sortByCount" + << "$x"); + Value expectedGroupExplain = + Value{Document{{"_id", "$x"}, {"count", Document{{"$sum", Document{{"$const", 1}}}}}}}; + testCreateFromBsonResult(spec, expectedGroupExplain); +} + +TEST_F(SortByCountReturnsGroupAndSort, ExpressionInObjectSpec) { + BSONObj spec = BSON("$sortByCount" << BSON("$floor" + << "$x")); + Value expectedGroupExplain = + Value{Document{{"_id", Document{{"$floor", Value{BSON_ARRAY("$x")}}}}, + {"count", Document{{"$sum", Document{{"$const", 1}}}}}}}; + testCreateFromBsonResult(spec, expectedGroupExplain); + + spec = BSON("$sortByCount" << BSON("$eq" << BSON_ARRAY("$x" << 15))); + expectedGroupExplain = + Value{Document{{"_id", Document{{"$eq", Value{BSON_ARRAY("$x" << BSON("$const" << 15))}}}}, + {"count", Document{{"$sum", Document{{"$const", 1}}}}}}}; + testCreateFromBsonResult(spec, expectedGroupExplain); +} + +/** + * Fixture to test error cases of the $sortByCount stage. + */ +class InvalidSortByCountSpec : public Mock::Base, public unittest::Test { +public: + vector> createSortByCount(BSONObj sortByCountSpec) { + auto specElem = sortByCountSpec.firstElement(); + return DocumentSourceSortByCount::createFromBson(specElem, ctx()); + } +}; + +TEST_F(InvalidSortByCountSpec, NonObjectNonStringSpec) { + BSONObj spec = BSON("$sortByCount" << 1); + ASSERT_THROWS_CODE(createSortByCount(spec), UserException, 40149); + + spec = BSON("$sortByCount" << BSONNULL); + ASSERT_THROWS_CODE(createSortByCount(spec), UserException, 40149); +} + +TEST_F(InvalidSortByCountSpec, NonExpressionInObjectSpec) { + BSONObj spec = BSON("$sortByCount" << BSON("field1" + << "$x")); + ASSERT_THROWS_CODE(createSortByCount(spec), UserException, 40147); +} + +TEST_F(InvalidSortByCountSpec, NonFieldPathStringSpec) { + BSONObj spec = BSON("$sortByCount" + << "test"); + ASSERT_THROWS_CODE(createSortByCount(spec), UserException, 40148); +} +} // namespace DocumentSourceSortByCount + class All : public Suite { public: All() : Suite("documentsource") {} diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp index 89a76aa9412..4125bc242ff 100644 --- a/src/mongo/db/pipeline/pipeline.cpp +++ b/src/mongo/db/pipeline/pipeline.cpp @@ -185,20 +185,31 @@ intrusive_ptr Pipeline::parseCommand(string& errmsg, str::stream() << "pipeline element " << iStep << " is not an object", pipeElement.type() == Object); - sources.push_back(DocumentSource::parse(pCtx, pipeElement.Obj())); - if (sources.back()->isValidInitialSource()) { - uassert(28837, - str::stream() << sources.back()->getSourceName() - << " is only valid as the first stage in a pipeline.", - iStep == 0); - } + vector> stepSources = + DocumentSource::parse(pCtx, pipeElement.Obj()); + + // Iterate over the steps in stepSource. stepSource may have more than one step if the + // current step is a DocumentSource alias. + const size_t nStepSources = stepSources.size(); + for (size_t iStepSource = 0; iStepSource < nStepSources; ++iStepSource) { + sources.push_back(stepSources[iStepSource]); + + if (sources.back()->isValidInitialSource()) { + uassert(28837, + str::stream() << sources.back()->getSourceName() + << " is only valid as the first stage in a pipeline.", + iStep == 0 && iStepSource == 0); + } - if (dynamic_cast(sources.back().get())) { - uassert(16991, "$out can only be the final stage in the pipeline", iStep == nSteps - 1); + if (dynamic_cast(sources.back().get())) { + uassert(16991, + "$out can only be the final stage in the pipeline", + iStep == nSteps - 1 && iStepSource == nStepSources - 1); - uassert(ErrorCodes::InvalidOptions, - "$out can only be used with the 'local' read concern level", - readConcernArgs.getLevel() == repl::ReadConcernLevel::kLocalReadConcern); + uassert(ErrorCodes::InvalidOptions, + "$out can only be used with the 'local' read concern level", + readConcernArgs.getLevel() == repl::ReadConcernLevel::kLocalReadConcern); + } } } -- cgit v1.2.1