summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/pipeline/field_path.h8
-rw-r--r--src/mongo/db/pipeline/field_path_test.cpp9
-rw-r--r--src/mongo/db/query/parsed_distinct.cpp86
-rw-r--r--src/mongo/db/query/parsed_distinct_test.cpp53
4 files changed, 146 insertions, 10 deletions
diff --git a/src/mongo/db/pipeline/field_path.h b/src/mongo/db/pipeline/field_path.h
index 0f69ebb7043..347b236fb6b 100644
--- a/src/mongo/db/pipeline/field_path.h
+++ b/src/mongo/db/pipeline/field_path.h
@@ -78,6 +78,14 @@ public:
}
/**
+ * Get the subpath including path elements [0, n].
+ */
+ StringData getSubpath(size_t n) const {
+ invariant(n + 1 < _fieldPathDotPosition.size());
+ return StringData(_fieldPath.c_str(), _fieldPathDotPosition[n + 1]);
+ }
+
+ /**
* Return the ith field name from this path using zero-based indexes.
*/
StringData getFieldName(size_t i) const {
diff --git a/src/mongo/db/pipeline/field_path_test.cpp b/src/mongo/db/pipeline/field_path_test.cpp
index 999cb9b8134..b869bc38d27 100644
--- a/src/mongo/db/pipeline/field_path_test.cpp
+++ b/src/mongo/db/pipeline/field_path_test.cpp
@@ -188,5 +188,14 @@ TEST(FieldPathTest, ConstructorAssertsOnDeeplyNestedArrayPath) {
AssertionException,
ErrorCodes::Overflow);
}
+
+// Test FieldPath::getSubpath().
+TEST(FieldPathTest, GetSubpath) {
+ FieldPath path = FieldPath("foo.bar.baz");
+ ASSERT_EQUALS("foo", path.getSubpath(0));
+ ASSERT_EQUALS("foo.bar", path.getSubpath(1));
+ ASSERT_EQUALS("foo.bar.baz", path.getSubpath(2));
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/query/parsed_distinct.cpp b/src/mongo/db/query/parsed_distinct.cpp
index 62dc04ac610..2d5a74af4c0 100644
--- a/src/mongo/db/query/parsed_distinct.cpp
+++ b/src/mongo/db/query/parsed_distinct.cpp
@@ -49,6 +49,57 @@ const char ParsedDistinct::kCollationField[] = "collation";
const char ParsedDistinct::kCommentField[] = "comment";
namespace {
+
+/**
+ * Helper for when converting a distinct() to an aggregation pipeline. This function will add
+ * $unwind stages for each subpath of 'path'.
+ *
+ * See comments in ParsedDistinct::asAggregationCommand() for more detailed explanation.
+ */
+void addNestedUnwind(BSONArrayBuilder* pipelineBuilder, const FieldPath& unwindPath) {
+ for (size_t i = 0; i < unwindPath.getPathLength(); ++i) {
+ StringData pathPrefix = unwindPath.getSubpath(i);
+ BSONObjBuilder unwindStageBuilder(pipelineBuilder->subobjStart());
+ {
+ BSONObjBuilder unwindBuilder(unwindStageBuilder.subobjStart("$unwind"));
+ unwindBuilder.append("path", str::stream() << "$" << pathPrefix);
+ unwindBuilder.append("preserveNullAndEmptyArrays", true);
+ }
+ unwindStageBuilder.doneFast();
+ }
+}
+
+/**
+ * Helper for when converting a distinct() to an aggregation pipeline. This function may add a
+ * $match stage enforcing that intermediate subpaths are objects so that no implicit array
+ * traversal happens later on. The $match stage is only added when the path is dotted (e.g. "a.b"
+ * but for "xyz").
+ *
+ * See comments in ParsedDistinct::asAggregationCommand() for more detailed explanation.
+ */
+void addMatchRemovingNestedArrays(BSONArrayBuilder* pipelineBuilder, const FieldPath& unwindPath) {
+ if (unwindPath.getPathLength() == 1) {
+ return;
+ }
+ invariant(unwindPath.getPathLength() > 1);
+
+ BSONObjBuilder matchBuilder(pipelineBuilder->subobjStart());
+ BSONObjBuilder predicateBuilder(matchBuilder.subobjStart("$match"));
+
+
+ for (size_t i = 0; i < unwindPath.getPathLength() - 1; ++i) {
+ StringData pathPrefix = unwindPath.getSubpath(i);
+ // Add a clause to the $match predicate requiring that intermediate paths are objects so
+ // that no implicit array traversal happens.
+ predicateBuilder.append(pathPrefix,
+ BSON("$_internalSchemaType"
+ << "object"));
+ }
+
+ predicateBuilder.doneFast();
+ matchBuilder.doneFast();
+}
+
/**
* Checks dotted field for a projection and truncates the field name if we could be projecting on an
* array element. Sets 'isIDOut' to true if the projection is on a sub document of _id. For example,
@@ -124,27 +175,42 @@ StatusWith<BSONObj> ParsedDistinct::asAggregationCommand() const {
aggregationBuilder.append("aggregate", qr.nss().coll());
// Build a pipeline that accomplishes the distinct request. The building code constructs a
- // pipeline that looks like this:
+ // pipeline that looks like this, assuming the distinct is on the key "a.b.c"
//
// [
// { $match: { ... } },
- // { $unwind: { path: "$<key>", preserveNullAndEmptyArrays: true } },
+ // { $unwind: { path: "a", preserveNullAndEmptyArrays: true } },
+ // { $unwind: { path: "a.b", preserveNullAndEmptyArrays: true } },
+ // { $unwind: { path: "a.b.c", preserveNullAndEmptyArrays: true } },
+ // { $match: {"a": {$_internalSchemaType: "object"},
+ // "a.b": {$_internalSchemaType: "object"}}}
// { $group: { _id: null, distinct: { $addToSet: "$<key>" } } }
// ]
+ //
+ // The purpose of the intermediate $unwind stages is to deal with cases where there is an array
+ // along the distinct path. For example, if we're distincting on "a.b" and have a document like
+ // {a: [{b: 1}, {b: 2}]}, distinct() should produce two values: 1 and 2. If we were to only
+ // unwind on "a.b", the document would pass through the $unwind unmodified, and the $group
+ // stage would treat the entire array as a key, rather than each element.
+ //
+ // The reason for the $match with $_internalSchemaType is to deal with cases of nested
+ // arrays. The distinct command will not traverse paths inside of nested arrays. For example, a
+ // distinct on "a.b" with the following document will produce no results:
+ // {a: [[{b: 1}]]
+ //
+ // Any arrays remaining after the $unwinds must have been nested arrays, so in order to match
+ // the behavior of the distinct() command, we filter them out before the $group.
BSONArrayBuilder pipelineBuilder(aggregationBuilder.subarrayStart("pipeline"));
if (!qr.getFilter().isEmpty()) {
BSONObjBuilder matchStageBuilder(pipelineBuilder.subobjStart());
matchStageBuilder.append("$match", qr.getFilter());
matchStageBuilder.doneFast();
}
- BSONObjBuilder unwindStageBuilder(pipelineBuilder.subobjStart());
- {
- BSONObjBuilder unwindBuilder(unwindStageBuilder.subobjStart("$unwind"));
- unwindBuilder.append("path", str::stream() << "$" << _key);
- unwindBuilder.append("preserveNullAndEmptyArrays", true);
- unwindBuilder.doneFast();
- }
- unwindStageBuilder.doneFast();
+
+ FieldPath path(_key);
+ addNestedUnwind(&pipelineBuilder, path);
+ addMatchRemovingNestedArrays(&pipelineBuilder, path);
+
BSONObjBuilder groupStageBuilder(pipelineBuilder.subobjStart());
{
BSONObjBuilder groupBuilder(groupStageBuilder.subobjStart("$group"));
diff --git a/src/mongo/db/query/parsed_distinct_test.cpp b/src/mongo/db/query/parsed_distinct_test.cpp
index 844a1af4844..bf48d19439e 100644
--- a/src/mongo/db/query/parsed_distinct_test.cpp
+++ b/src/mongo/db/query/parsed_distinct_test.cpp
@@ -84,6 +84,59 @@ TEST(ParsedDistinctTest, ConvertToAggregationNoQuery) {
SimpleBSONObjComparator::kInstance.makeEqualTo()));
}
+TEST(ParsedDistinctTest, ConvertToAggregationDottedPathNoQuery) {
+ QueryTestServiceContext serviceContext;
+ auto uniqueTxn = serviceContext.makeOperationContext();
+ OperationContext* opCtx = uniqueTxn.get();
+
+ auto pd = ParsedDistinct::parse(opCtx,
+ testns,
+ fromjson("{distinct: 'testcoll', key: 'x.y.z', $db: 'testdb'}"),
+ ExtensionsCallbackNoop(),
+ !isExplain);
+ ASSERT_OK(pd.getStatus());
+
+ auto agg = pd.getValue().asAggregationCommand();
+ ASSERT_OK(agg);
+
+ auto ar = AggregationRequest::parseFromBSON(testns, agg.getValue());
+ ASSERT_OK(ar.getStatus());
+ ASSERT(!ar.getValue().getExplain());
+ ASSERT_EQ(ar.getValue().getBatchSize(), AggregationRequest::kDefaultBatchSize);
+ ASSERT_EQ(ar.getValue().getNamespaceString(), testns);
+ ASSERT_BSONOBJ_EQ(ar.getValue().getCollation(), BSONObj());
+ ASSERT(ar.getValue().getReadConcern().isEmpty());
+ ASSERT(ar.getValue().getUnwrappedReadPref().isEmpty());
+ ASSERT(ar.getValue().getComment().empty());
+ ASSERT_EQUALS(ar.getValue().getMaxTimeMS(), 0u);
+
+ std::vector<BSONObj> expectedPipeline{
+ BSON("$unwind" << BSON("path"
+ << "$x"
+ << "preserveNullAndEmptyArrays"
+ << true)),
+ BSON("$unwind" << BSON("path"
+ << "$x.y"
+ << "preserveNullAndEmptyArrays"
+ << true)),
+ BSON("$unwind" << BSON("path"
+ << "$x.y.z"
+ << "preserveNullAndEmptyArrays"
+ << true)),
+ BSON("$match" << BSON("x" << BSON("$_internalSchemaType"
+ << "object")
+ << "x.y"
+ << BSON("$_internalSchemaType"
+ << "object"))),
+ BSON("$group" << BSON("_id" << BSONNULL << "distinct" << BSON("$addToSet"
+ << "$x.y.z")))};
+ ASSERT(std::equal(expectedPipeline.begin(),
+ expectedPipeline.end(),
+ ar.getValue().getPipeline().begin(),
+ ar.getValue().getPipeline().end(),
+ SimpleBSONObjComparator::kInstance.makeEqualTo()));
+}
+
TEST(ParsedDistinctTest, ConvertToAggregationWithAllOptions) {
QueryTestServiceContext serviceContext;
auto uniqueTxn = serviceContext.makeOperationContext();