4 files changed, 146 insertions, 10 deletions
diff --git a/src/mongo/db/pipeline/field_path.h b/src/mongo/db/pipeline/field_path.h
index 0f69ebb7043..347b236fb6b 100644
--- a/src/mongo/db/pipeline/field_path.h
+++ b/src/mongo/db/pipeline/field_path.h
@@ -78,6 +78,14 @@ public:
     }
 
     /**
+     * Get the subpath including path elements [0, n].
+     */
+    StringData getSubpath(size_t n) const {
+        invariant(n + 1 < _fieldPathDotPosition.size());
+        return StringData(_fieldPath.c_str(), _fieldPathDotPosition[n + 1]);
+    }
+
+    /**
      * Return the ith field name from this path using zero-based indexes.
      */
     StringData getFieldName(size_t i) const {
diff --git a/src/mongo/db/pipeline/field_path_test.cpp b/src/mongo/db/pipeline/field_path_test.cpp
index 999cb9b8134..b869bc38d27 100644
--- a/src/mongo/db/pipeline/field_path_test.cpp
+++ b/src/mongo/db/pipeline/field_path_test.cpp
@@ -188,5 +188,14 @@ TEST(FieldPathTest, ConstructorAssertsOnDeeplyNestedArrayPath) {
                        AssertionException,
                        ErrorCodes::Overflow);
 }
+
+// Test FieldPath::getSubpath().
+TEST(FieldPathTest, GetSubpath) {
+    FieldPath path = FieldPath("foo.bar.baz");
+    ASSERT_EQUALS("foo", path.getSubpath(0));
+    ASSERT_EQUALS("foo.bar", path.getSubpath(1));
+    ASSERT_EQUALS("foo.bar.baz", path.getSubpath(2));
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/query/parsed_distinct.cpp b/src/mongo/db/query/parsed_distinct.cpp
index 62dc04ac610..2d5a74af4c0 100644
--- a/src/mongo/db/query/parsed_distinct.cpp
+++ b/src/mongo/db/query/parsed_distinct.cpp
@@ -49,6 +49,57 @@ const char ParsedDistinct::kCollationField[] = "collation";
 const char ParsedDistinct::kCommentField[] = "comment";
 
 namespace {
+
+/**
+ * Helper for when converting a distinct() to an aggregation pipeline. This function will add
+ * $unwind stages for each subpath of 'path'.
+ *
+ * See comments in ParsedDistinct::asAggregationCommand() for more detailed explanation.
+ */
+void addNestedUnwind(BSONArrayBuilder* pipelineBuilder, const FieldPath& unwindPath) {
+    for (size_t i = 0; i < unwindPath.getPathLength(); ++i) {
+        StringData pathPrefix = unwindPath.getSubpath(i);
+        BSONObjBuilder unwindStageBuilder(pipelineBuilder->subobjStart());
+        {
+            BSONObjBuilder unwindBuilder(unwindStageBuilder.subobjStart("$unwind"));
+            unwindBuilder.append("path", str::stream() << "$" << pathPrefix);
+            unwindBuilder.append("preserveNullAndEmptyArrays", true);
+        }
+        unwindStageBuilder.doneFast();
+    }
+}
+
+/**
+ * Helper for when converting a distinct() to an aggregation pipeline. This function may add a
+ * $match stage enforcing that intermediate subpaths are objects so that no implicit array
+ * traversal happens later on. The $match stage is only added when the path is dotted (e.g. "a.b"
+ * but for "xyz").
+ *
+ * See comments in ParsedDistinct::asAggregationCommand() for more detailed explanation.
+ */
+void addMatchRemovingNestedArrays(BSONArrayBuilder* pipelineBuilder, const FieldPath& unwindPath) {
+    if (unwindPath.getPathLength() == 1) {
+        return;
+    }
+    invariant(unwindPath.getPathLength() > 1);
+
+    BSONObjBuilder matchBuilder(pipelineBuilder->subobjStart());
+    BSONObjBuilder predicateBuilder(matchBuilder.subobjStart("$match"));
+
+
+    for (size_t i = 0; i < unwindPath.getPathLength() - 1; ++i) {
+        StringData pathPrefix = unwindPath.getSubpath(i);
+        // Add a clause to the $match predicate requiring that intermediate paths are objects so
+        // that no implicit array traversal happens.
+        predicateBuilder.append(pathPrefix,
+                                BSON("$_internalSchemaType"
+                                     << "object"));
+    }
+
+    predicateBuilder.doneFast();
+    matchBuilder.doneFast();
+}
+
 /**
  * Checks dotted field for a projection and truncates the field name if we could be projecting on an
  * array element. Sets 'isIDOut' to true if the projection is on a sub document of _id. For example,
@@ -124,27 +175,42 @@ StatusWith<BSONObj> ParsedDistinct::asAggregationCommand() const {
     aggregationBuilder.append("aggregate", qr.nss().coll());
 
     // Build a pipeline that accomplishes the distinct request. The building code constructs a
-    // pipeline that looks like this:
+    // pipeline that looks like this, assuming the distinct is on the key "a.b.c"
     //
     //      [
     //          { $match: { ... } },
-    //          { $unwind: { path: "$<key>", preserveNullAndEmptyArrays: true } },
+    //          { $unwind: { path: "a", preserveNullAndEmptyArrays: true } },
+    //          { $unwind: { path: "a.b", preserveNullAndEmptyArrays: true } },
+    //          { $unwind: { path: "a.b.c", preserveNullAndEmptyArrays: true } },
+    //          { $match: {"a": {$_internalSchemaType: "object"},
+    //                     "a.b": {$_internalSchemaType: "object"}}}
     //          { $group: { _id: null, distinct: { $addToSet: "$<key>" } } }
     //      ]
+    //
+    // The purpose of the intermediate $unwind stages is to deal with cases where there is an array
+    // along the distinct path. For example, if we're distincting on "a.b" and have a document like
+    // {a: [{b: 1}, {b: 2}]}, distinct() should produce two values: 1 and 2. If we were to only
+    // unwind on "a.b", the document would pass through the $unwind unmodified, and the $group
+    // stage would treat the entire array as a key, rather than each element.
+    //
+    // The reason for the $match with $_internalSchemaType is to deal with cases of nested
+    // arrays. The distinct command will not traverse paths inside of nested arrays. For example, a
+    // distinct on "a.b" with the following document will produce no results:
+    // {a: [[{b: 1}]]
+    //
+    // Any arrays remaining after the $unwinds must have been nested arrays, so in order to match
+    // the behavior of the distinct() command, we filter them out before the $group.
     BSONArrayBuilder pipelineBuilder(aggregationBuilder.subarrayStart("pipeline"));
     if (!qr.getFilter().isEmpty()) {
         BSONObjBuilder matchStageBuilder(pipelineBuilder.subobjStart());
         matchStageBuilder.append("$match", qr.getFilter());
         matchStageBuilder.doneFast();
     }
-    BSONObjBuilder unwindStageBuilder(pipelineBuilder.subobjStart());
-    {
-        BSONObjBuilder unwindBuilder(unwindStageBuilder.subobjStart("$unwind"));
-        unwindBuilder.append("path", str::stream() << "$" << _key);
-        unwindBuilder.append("preserveNullAndEmptyArrays", true);
-        unwindBuilder.doneFast();
-    }
-    unwindStageBuilder.doneFast();
+
+    FieldPath path(_key);
+    addNestedUnwind(&pipelineBuilder, path);
+    addMatchRemovingNestedArrays(&pipelineBuilder, path);
+
     BSONObjBuilder groupStageBuilder(pipelineBuilder.subobjStart());
     {
         BSONObjBuilder groupBuilder(groupStageBuilder.subobjStart("$group"));
diff --git a/src/mongo/db/query/parsed_distinct_test.cpp b/src/mongo/db/query/parsed_distinct_test.cpp
index 844a1af4844..bf48d19439e 100644
--- a/src/mongo/db/query/parsed_distinct_test.cpp
+++ b/src/mongo/db/query/parsed_distinct_test.cpp
@@ -84,6 +84,59 @@ TEST(ParsedDistinctTest, ConvertToAggregationNoQuery) {
                       SimpleBSONObjComparator::kInstance.makeEqualTo()));
 }
 
+TEST(ParsedDistinctTest, ConvertToAggregationDottedPathNoQuery) {
+    QueryTestServiceContext serviceContext;
+    auto uniqueTxn = serviceContext.makeOperationContext();
+    OperationContext* opCtx = uniqueTxn.get();
+
+    auto pd = ParsedDistinct::parse(opCtx,
+                                    testns,
+                                    fromjson("{distinct: 'testcoll', key: 'x.y.z', $db: 'testdb'}"),
+                                    ExtensionsCallbackNoop(),
+                                    !isExplain);
+    ASSERT_OK(pd.getStatus());
+
+    auto agg = pd.getValue().asAggregationCommand();
+    ASSERT_OK(agg);
+
+    auto ar = AggregationRequest::parseFromBSON(testns, agg.getValue());
+    ASSERT_OK(ar.getStatus());
+    ASSERT(!ar.getValue().getExplain());
+    ASSERT_EQ(ar.getValue().getBatchSize(), AggregationRequest::kDefaultBatchSize);
+    ASSERT_EQ(ar.getValue().getNamespaceString(), testns);
+    ASSERT_BSONOBJ_EQ(ar.getValue().getCollation(), BSONObj());
+    ASSERT(ar.getValue().getReadConcern().isEmpty());
+    ASSERT(ar.getValue().getUnwrappedReadPref().isEmpty());
+    ASSERT(ar.getValue().getComment().empty());
+    ASSERT_EQUALS(ar.getValue().getMaxTimeMS(), 0u);
+
+    std::vector<BSONObj> expectedPipeline{
+        BSON("$unwind" << BSON("path"
+                               << "$x"
+                               << "preserveNullAndEmptyArrays"
+                               << true)),
+        BSON("$unwind" << BSON("path"
+                               << "$x.y"
+                               << "preserveNullAndEmptyArrays"
+                               << true)),
+        BSON("$unwind" << BSON("path"
+                               << "$x.y.z"
+                               << "preserveNullAndEmptyArrays"
+                               << true)),
+        BSON("$match" << BSON("x" << BSON("$_internalSchemaType"
+                                          << "object")
+                                  << "x.y"
+                                  << BSON("$_internalSchemaType"
+                                          << "object"))),
+        BSON("$group" << BSON("_id" << BSONNULL << "distinct" << BSON("$addToSet"
+                                                                      << "$x.y.z")))};
+    ASSERT(std::equal(expectedPipeline.begin(),
+                      expectedPipeline.end(),
+                      ar.getValue().getPipeline().begin(),
+                      ar.getValue().getPipeline().end(),
+                      SimpleBSONObjComparator::kInstance.makeEqualTo()));
+}
+
 TEST(ParsedDistinctTest, ConvertToAggregationWithAllOptions) {
     QueryTestServiceContext serviceContext;
     auto uniqueTxn = serviceContext.makeOperationContext();