summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSally McNichols <sally.mcnichols@mongodb.com>2016-06-16 10:38:29 -0400
committerSally McNichols <sally.mcnichols@mongodb.com>2016-06-16 10:38:29 -0400
commit069be92b4f2c35140a711cb47d88baf961d74795 (patch)
treedc1414546c5b5423023d66b227fe6dd242a65357
parente9cd7667471eff437463e7de842af95747847be4 (diff)
downloadmongo-069be92b4f2c35140a711cb47d88baf961d74795.tar.gz
SERVER-23816 Add $sortByCount aggregation stage
-rw-r--r--src/mongo/db/pipeline/SConscript1
-rw-r--r--src/mongo/db/pipeline/document_source.cpp6
-rw-r--r--src/mongo/db/pipeline/document_source.h42
-rw-r--r--src/mongo/db/pipeline/document_source_sort_by_count.cpp75
-rw-r--r--src/mongo/db/pipeline/document_source_test.cpp96
-rw-r--r--src/mongo/db/pipeline/pipeline.cpp35
6 files changed, 234 insertions, 21 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index 4de141c667b..7ce8e880659 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -120,6 +120,7 @@ docSourceEnv.Library(
'document_source_sample_from_random_cursor.cpp',
'document_source_skip.cpp',
'document_source_sort.cpp',
+ 'document_source_sort_by_count.cpp',
'document_source_unwind.cpp',
],
LIBDEPS=[
diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp
index 33e85baa380..430e3cf1496 100644
--- a/src/mongo/db/pipeline/document_source.cpp
+++ b/src/mongo/db/pipeline/document_source.cpp
@@ -56,8 +56,8 @@ void DocumentSource::registerParser(string name, Parser parser) {
parserMap[name] = parser;
}
-intrusive_ptr<DocumentSource> DocumentSource::parse(const intrusive_ptr<ExpressionContext> expCtx,
- BSONObj stageObj) {
+vector<intrusive_ptr<DocumentSource>> DocumentSource::parse(
+ const intrusive_ptr<ExpressionContext> expCtx, BSONObj stageObj) {
uassert(16435,
"A pipeline stage specification object must contain exactly one field.",
stageObj.nFields() == 1);
@@ -66,9 +66,11 @@ intrusive_ptr<DocumentSource> DocumentSource::parse(const intrusive_ptr<Expressi
// Get the registered parser and call that.
auto it = parserMap.find(stageName);
+
uassert(16436,
str::stream() << "Unrecognized pipeline stage name: '" << stageName << "'",
it != parserMap.end());
+
return it->second(stageSpec, expCtx);
}
diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h
index de60023ec87..3101cbe6882 100644
--- a/src/mongo/db/pipeline/document_source.h
+++ b/src/mongo/db/pipeline/document_source.h
@@ -73,17 +73,36 @@ class RecordCursor;
*
* As an example, if your document source looks like {"$foo": <args>}, with a parsing function
* 'createFromBson', you would add this line:
- * REGISTER_EXPRESSION(foo, DocumentSourceFoo::createFromBson);
+ * REGISTER_DOCUMENT_SOURCE(foo, DocumentSourceFoo::createFromBson);
*/
-#define REGISTER_DOCUMENT_SOURCE(key, parser) \
- MONGO_INITIALIZER(addToDocSourceParserMap_##key)(InitializerContext*) { \
- DocumentSource::registerParser("$" #key, (parser)); \
- return Status::OK(); \
+#define REGISTER_DOCUMENT_SOURCE(key, parser) \
+ MONGO_INITIALIZER(addToDocSourceParserMap_##key)(InitializerContext*) { \
+ auto parserWrapper = [](BSONElement stageSpec, \
+ const boost::intrusive_ptr<ExpressionContext>& expCtx) { \
+ return std::vector<boost::intrusive_ptr<DocumentSource>>{(parser)(stageSpec, expCtx)}; \
+ }; \
+ DocumentSource::registerParser("$" #key, parserWrapper); \
+ return Status::OK(); \
}
+/**
+ * Registers an alias to have the name 'key'. When a stage with name '$key' is found,
+ * 'parser' will be called to construct a vector of DocumentSources.
+ *
+ * As an example, if your document source looks like {"$foo": <args>}, with a parsing function
+ * 'createFromBson', you would add this line:
+ * REGISTER_DOCUMENT_SOURCE_ALIAS(foo, DocumentSourceFoo::createFromBson);
+ */
+#define REGISTER_DOCUMENT_SOURCE_ALIAS(key, parser) \
+ MONGO_INITIALIZER(addAliasToDocSourceParserMap_##key)(InitializerContext*) { \
+ DocumentSource::registerParser("$" #key, (parser)); \
+ return Status::OK(); \
+ }
+
+
class DocumentSource : public IntrusiveCounterUnsigned {
public:
- using Parser = stdx::function<boost::intrusive_ptr<DocumentSource>(
+ using Parser = stdx::function<std::vector<boost::intrusive_ptr<DocumentSource>>(
BSONElement, const boost::intrusive_ptr<ExpressionContext>&)>;
virtual ~DocumentSource() {}
@@ -209,7 +228,7 @@ public:
/**
* Create a DocumentSource pipeline stage from 'stageObj'.
*/
- static boost::intrusive_ptr<DocumentSource> parse(
+ static std::vector<boost::intrusive_ptr<DocumentSource>> parse(
const boost::intrusive_ptr<ExpressionContext> expCtx, BSONObj stageObj);
/**
@@ -1691,4 +1710,13 @@ private:
// field, tracking how many results we've returned so far for the current input document.
long long _outputIndex;
};
+
+class DocumentSourceSortByCount final {
+public:
+ static std::vector<boost::intrusive_ptr<DocumentSource>> createFromBson(
+ BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx);
+
+private:
+ DocumentSourceSortByCount() = default;
+};
}
diff --git a/src/mongo/db/pipeline/document_source_sort_by_count.cpp b/src/mongo/db/pipeline/document_source_sort_by_count.cpp
new file mode 100644
index 00000000000..931a4bacdd7
--- /dev/null
+++ b/src/mongo/db/pipeline/document_source_sort_by_count.cpp
@@ -0,0 +1,75 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/jsobj.h"
+#include "mongo/db/pipeline/document_source.h"
+#include "mongo/db/pipeline/expression_context.h"
+
+namespace mongo {
+
+using boost::intrusive_ptr;
+using std::vector;
+
+REGISTER_DOCUMENT_SOURCE_ALIAS(sortByCount, DocumentSourceSortByCount::createFromBson);
+
+vector<intrusive_ptr<DocumentSource>> DocumentSourceSortByCount::createFromBson(
+ BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) {
+ if (elem.type() == Object) {
+ // Make sure that the sortByCount field is an expression inside an object
+ BSONObj innerObj = elem.embeddedObject();
+ uassert(40147,
+ str::stream() << "the sortByCount field must be defined as a $-prefixed path or an "
+ "expression inside an object",
+ innerObj.firstElementFieldName()[0] == '$');
+ } else if (elem.type() == String) {
+ // Make sure that the sortByCount field is a $-prefixed path
+ uassert(40148,
+ str::stream() << "the sortByCount field must be defined as a $-prefixed path or an "
+ "expression inside an object",
+ (elem.valueStringData()[0] == '$'));
+ } else {
+ uasserted(
+ 40149,
+ str::stream() << "the sortByCount field must be specified as a string or as an object");
+ }
+
+ BSONObjBuilder groupExprBuilder;
+ groupExprBuilder.appendAs(elem, "_id");
+ groupExprBuilder.append("count", BSON("$sum" << 1));
+
+ BSONObj groupObj = BSON("$group" << groupExprBuilder.obj());
+ BSONObj sortObj = BSON("$sort" << BSON("count" << -1));
+
+ auto groupSource = DocumentSourceGroup::createFromBson(groupObj.firstElement(), pExpCtx);
+ auto sortSource = DocumentSourceSort::createFromBson(sortObj.firstElement(), pExpCtx);
+
+ return {groupSource, sortSource};
+}
+} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp
index b5ed007d1f9..6a409052de8 100644
--- a/src/mongo/db/pipeline/document_source_test.cpp
+++ b/src/mongo/db/pipeline/document_source_test.cpp
@@ -3417,6 +3417,102 @@ public:
};
}
+namespace DocumentSourceSortByCount {
+using mongo::DocumentSourceSortByCount;
+using mongo::DocumentSourceGroup;
+using mongo::DocumentSourceSort;
+using std::vector;
+using boost::intrusive_ptr;
+
+/**
+ * Fixture to test that $sortByCount returns a DocumentSourceGroup and DocumentSourceSort.
+ */
+class SortByCountReturnsGroupAndSort : public Mock::Base, public unittest::Test {
+public:
+ void testCreateFromBsonResult(BSONObj sortByCountSpec, Value expectedGroupExplain) {
+ vector<intrusive_ptr<DocumentSource>> result =
+ DocumentSourceSortByCount::createFromBson(sortByCountSpec.firstElement(), ctx());
+
+ ASSERT_EQUALS(result.size(), 2UL);
+
+ const auto* groupStage = dynamic_cast<DocumentSourceGroup*>(result[0].get());
+ ASSERT(groupStage);
+
+ const auto* sortStage = dynamic_cast<DocumentSourceSort*>(result[1].get());
+ ASSERT(sortStage);
+
+ // Serialize the DocumentSourceGroup and DocumentSourceSort from $sortByCount so that we can
+ // check the explain output to make sure $group and $sort have the correct fields.
+ const bool explain = true;
+ vector<Value> explainedStages;
+ groupStage->serializeToArray(explainedStages, explain);
+ sortStage->serializeToArray(explainedStages, explain);
+ ASSERT_EQUALS(explainedStages.size(), 2UL);
+
+ auto groupExplain = explainedStages[0];
+ ASSERT_EQ(groupExplain["$group"], expectedGroupExplain);
+
+ auto sortExplain = explainedStages[1];
+ auto expectedSortExplain = Value{Document{{"sortKey", Document{{"count", -1}}}}};
+ ASSERT_EQ(sortExplain["$sort"], expectedSortExplain);
+ }
+};
+
+TEST_F(SortByCountReturnsGroupAndSort, ExpressionFieldPathSpec) {
+ BSONObj spec = BSON("$sortByCount"
+ << "$x");
+ Value expectedGroupExplain =
+ Value{Document{{"_id", "$x"}, {"count", Document{{"$sum", Document{{"$const", 1}}}}}}};
+ testCreateFromBsonResult(spec, expectedGroupExplain);
+}
+
+TEST_F(SortByCountReturnsGroupAndSort, ExpressionInObjectSpec) {
+ BSONObj spec = BSON("$sortByCount" << BSON("$floor"
+ << "$x"));
+ Value expectedGroupExplain =
+ Value{Document{{"_id", Document{{"$floor", Value{BSON_ARRAY("$x")}}}},
+ {"count", Document{{"$sum", Document{{"$const", 1}}}}}}};
+ testCreateFromBsonResult(spec, expectedGroupExplain);
+
+ spec = BSON("$sortByCount" << BSON("$eq" << BSON_ARRAY("$x" << 15)));
+ expectedGroupExplain =
+ Value{Document{{"_id", Document{{"$eq", Value{BSON_ARRAY("$x" << BSON("$const" << 15))}}}},
+ {"count", Document{{"$sum", Document{{"$const", 1}}}}}}};
+ testCreateFromBsonResult(spec, expectedGroupExplain);
+}
+
+/**
+ * Fixture to test error cases of the $sortByCount stage.
+ */
+class InvalidSortByCountSpec : public Mock::Base, public unittest::Test {
+public:
+ vector<intrusive_ptr<DocumentSource>> createSortByCount(BSONObj sortByCountSpec) {
+ auto specElem = sortByCountSpec.firstElement();
+ return DocumentSourceSortByCount::createFromBson(specElem, ctx());
+ }
+};
+
+TEST_F(InvalidSortByCountSpec, NonObjectNonStringSpec) {
+ BSONObj spec = BSON("$sortByCount" << 1);
+ ASSERT_THROWS_CODE(createSortByCount(spec), UserException, 40149);
+
+ spec = BSON("$sortByCount" << BSONNULL);
+ ASSERT_THROWS_CODE(createSortByCount(spec), UserException, 40149);
+}
+
+TEST_F(InvalidSortByCountSpec, NonExpressionInObjectSpec) {
+ BSONObj spec = BSON("$sortByCount" << BSON("field1"
+ << "$x"));
+ ASSERT_THROWS_CODE(createSortByCount(spec), UserException, 40147);
+}
+
+TEST_F(InvalidSortByCountSpec, NonFieldPathStringSpec) {
+ BSONObj spec = BSON("$sortByCount"
+ << "test");
+ ASSERT_THROWS_CODE(createSortByCount(spec), UserException, 40148);
+}
+} // namespace DocumentSourceSortByCount
+
class All : public Suite {
public:
All() : Suite("documentsource") {}
diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp
index 89a76aa9412..4125bc242ff 100644
--- a/src/mongo/db/pipeline/pipeline.cpp
+++ b/src/mongo/db/pipeline/pipeline.cpp
@@ -185,20 +185,31 @@ intrusive_ptr<Pipeline> Pipeline::parseCommand(string& errmsg,
str::stream() << "pipeline element " << iStep << " is not an object",
pipeElement.type() == Object);
- sources.push_back(DocumentSource::parse(pCtx, pipeElement.Obj()));
- if (sources.back()->isValidInitialSource()) {
- uassert(28837,
- str::stream() << sources.back()->getSourceName()
- << " is only valid as the first stage in a pipeline.",
- iStep == 0);
- }
+ vector<intrusive_ptr<DocumentSource>> stepSources =
+ DocumentSource::parse(pCtx, pipeElement.Obj());
+
+ // Iterate over the steps in stepSource. stepSource may have more than one step if the
+ // current step is a DocumentSource alias.
+ const size_t nStepSources = stepSources.size();
+ for (size_t iStepSource = 0; iStepSource < nStepSources; ++iStepSource) {
+ sources.push_back(stepSources[iStepSource]);
+
+ if (sources.back()->isValidInitialSource()) {
+ uassert(28837,
+ str::stream() << sources.back()->getSourceName()
+ << " is only valid as the first stage in a pipeline.",
+ iStep == 0 && iStepSource == 0);
+ }
- if (dynamic_cast<DocumentSourceOut*>(sources.back().get())) {
- uassert(16991, "$out can only be the final stage in the pipeline", iStep == nSteps - 1);
+ if (dynamic_cast<DocumentSourceOut*>(sources.back().get())) {
+ uassert(16991,
+ "$out can only be the final stage in the pipeline",
+ iStep == nSteps - 1 && iStepSource == nStepSources - 1);
- uassert(ErrorCodes::InvalidOptions,
- "$out can only be used with the 'local' read concern level",
- readConcernArgs.getLevel() == repl::ReadConcernLevel::kLocalReadConcern);
+ uassert(ErrorCodes::InvalidOptions,
+ "$out can only be used with the 'local' read concern level",
+ readConcernArgs.getLevel() == repl::ReadConcernLevel::kLocalReadConcern);
+ }
}
}