diff options
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/pipeline/field_path.h | 8 | ||||
-rw-r--r-- | src/mongo/db/pipeline/field_path_test.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/parsed_distinct.cpp | 86 | ||||
-rw-r--r-- | src/mongo/db/query/parsed_distinct_test.cpp | 53 |
4 files changed, 146 insertions, 10 deletions
diff --git a/src/mongo/db/pipeline/field_path.h b/src/mongo/db/pipeline/field_path.h index 0f69ebb7043..347b236fb6b 100644 --- a/src/mongo/db/pipeline/field_path.h +++ b/src/mongo/db/pipeline/field_path.h @@ -78,6 +78,14 @@ public: } /** + * Get the subpath including path elements [0, n]. + */ + StringData getSubpath(size_t n) const { + invariant(n + 1 < _fieldPathDotPosition.size()); + return StringData(_fieldPath.c_str(), _fieldPathDotPosition[n + 1]); + } + + /** * Return the ith field name from this path using zero-based indexes. */ StringData getFieldName(size_t i) const { diff --git a/src/mongo/db/pipeline/field_path_test.cpp b/src/mongo/db/pipeline/field_path_test.cpp index 999cb9b8134..b869bc38d27 100644 --- a/src/mongo/db/pipeline/field_path_test.cpp +++ b/src/mongo/db/pipeline/field_path_test.cpp @@ -188,5 +188,14 @@ TEST(FieldPathTest, ConstructorAssertsOnDeeplyNestedArrayPath) { AssertionException, ErrorCodes::Overflow); } + +// Test FieldPath::getSubpath(). +TEST(FieldPathTest, GetSubpath) { + FieldPath path = FieldPath("foo.bar.baz"); + ASSERT_EQUALS("foo", path.getSubpath(0)); + ASSERT_EQUALS("foo.bar", path.getSubpath(1)); + ASSERT_EQUALS("foo.bar.baz", path.getSubpath(2)); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/query/parsed_distinct.cpp b/src/mongo/db/query/parsed_distinct.cpp index 62dc04ac610..2d5a74af4c0 100644 --- a/src/mongo/db/query/parsed_distinct.cpp +++ b/src/mongo/db/query/parsed_distinct.cpp @@ -49,6 +49,57 @@ const char ParsedDistinct::kCollationField[] = "collation"; const char ParsedDistinct::kCommentField[] = "comment"; namespace { + +/** + * Helper for when converting a distinct() to an aggregation pipeline. This function will add + * $unwind stages for each subpath of 'path'. + * + * See comments in ParsedDistinct::asAggregationCommand() for more detailed explanation. + */ +void addNestedUnwind(BSONArrayBuilder* pipelineBuilder, const FieldPath& unwindPath) { + for (size_t i = 0; i < unwindPath.getPathLength(); ++i) { + StringData pathPrefix = unwindPath.getSubpath(i); + BSONObjBuilder unwindStageBuilder(pipelineBuilder->subobjStart()); + { + BSONObjBuilder unwindBuilder(unwindStageBuilder.subobjStart("$unwind")); + unwindBuilder.append("path", str::stream() << "$" << pathPrefix); + unwindBuilder.append("preserveNullAndEmptyArrays", true); + } + unwindStageBuilder.doneFast(); + } +} + +/** + * Helper for when converting a distinct() to an aggregation pipeline. This function may add a + * $match stage enforcing that intermediate subpaths are objects so that no implicit array + * traversal happens later on. The $match stage is only added when the path is dotted (e.g. "a.b" + * but for "xyz"). + * + * See comments in ParsedDistinct::asAggregationCommand() for more detailed explanation. + */ +void addMatchRemovingNestedArrays(BSONArrayBuilder* pipelineBuilder, const FieldPath& unwindPath) { + if (unwindPath.getPathLength() == 1) { + return; + } + invariant(unwindPath.getPathLength() > 1); + + BSONObjBuilder matchBuilder(pipelineBuilder->subobjStart()); + BSONObjBuilder predicateBuilder(matchBuilder.subobjStart("$match")); + + + for (size_t i = 0; i < unwindPath.getPathLength() - 1; ++i) { + StringData pathPrefix = unwindPath.getSubpath(i); + // Add a clause to the $match predicate requiring that intermediate paths are objects so + // that no implicit array traversal happens. + predicateBuilder.append(pathPrefix, + BSON("$_internalSchemaType" + << "object")); + } + + predicateBuilder.doneFast(); + matchBuilder.doneFast(); +} + /** * Checks dotted field for a projection and truncates the field name if we could be projecting on an * array element. Sets 'isIDOut' to true if the projection is on a sub document of _id. For example, @@ -124,27 +175,42 @@ StatusWith<BSONObj> ParsedDistinct::asAggregationCommand() const { aggregationBuilder.append("aggregate", qr.nss().coll()); // Build a pipeline that accomplishes the distinct request. The building code constructs a - // pipeline that looks like this: + // pipeline that looks like this, assuming the distinct is on the key "a.b.c" // // [ // { $match: { ... } }, - // { $unwind: { path: "$<key>", preserveNullAndEmptyArrays: true } }, + // { $unwind: { path: "a", preserveNullAndEmptyArrays: true } }, + // { $unwind: { path: "a.b", preserveNullAndEmptyArrays: true } }, + // { $unwind: { path: "a.b.c", preserveNullAndEmptyArrays: true } }, + // { $match: {"a": {$_internalSchemaType: "object"}, + // "a.b": {$_internalSchemaType: "object"}}} // { $group: { _id: null, distinct: { $addToSet: "$<key>" } } } // ] + // + // The purpose of the intermediate $unwind stages is to deal with cases where there is an array + // along the distinct path. For example, if we're distincting on "a.b" and have a document like + // {a: [{b: 1}, {b: 2}]}, distinct() should produce two values: 1 and 2. If we were to only + // unwind on "a.b", the document would pass through the $unwind unmodified, and the $group + // stage would treat the entire array as a key, rather than each element. + // + // The reason for the $match with $_internalSchemaType is to deal with cases of nested + // arrays. The distinct command will not traverse paths inside of nested arrays. For example, a + // distinct on "a.b" with the following document will produce no results: + // {a: [[{b: 1}]] + // + // Any arrays remaining after the $unwinds must have been nested arrays, so in order to match + // the behavior of the distinct() command, we filter them out before the $group. BSONArrayBuilder pipelineBuilder(aggregationBuilder.subarrayStart("pipeline")); if (!qr.getFilter().isEmpty()) { BSONObjBuilder matchStageBuilder(pipelineBuilder.subobjStart()); matchStageBuilder.append("$match", qr.getFilter()); matchStageBuilder.doneFast(); } - BSONObjBuilder unwindStageBuilder(pipelineBuilder.subobjStart()); - { - BSONObjBuilder unwindBuilder(unwindStageBuilder.subobjStart("$unwind")); - unwindBuilder.append("path", str::stream() << "$" << _key); - unwindBuilder.append("preserveNullAndEmptyArrays", true); - unwindBuilder.doneFast(); - } - unwindStageBuilder.doneFast(); + + FieldPath path(_key); + addNestedUnwind(&pipelineBuilder, path); + addMatchRemovingNestedArrays(&pipelineBuilder, path); + BSONObjBuilder groupStageBuilder(pipelineBuilder.subobjStart()); { BSONObjBuilder groupBuilder(groupStageBuilder.subobjStart("$group")); diff --git a/src/mongo/db/query/parsed_distinct_test.cpp b/src/mongo/db/query/parsed_distinct_test.cpp index 844a1af4844..bf48d19439e 100644 --- a/src/mongo/db/query/parsed_distinct_test.cpp +++ b/src/mongo/db/query/parsed_distinct_test.cpp @@ -84,6 +84,59 @@ TEST(ParsedDistinctTest, ConvertToAggregationNoQuery) { SimpleBSONObjComparator::kInstance.makeEqualTo())); } +TEST(ParsedDistinctTest, ConvertToAggregationDottedPathNoQuery) { + QueryTestServiceContext serviceContext; + auto uniqueTxn = serviceContext.makeOperationContext(); + OperationContext* opCtx = uniqueTxn.get(); + + auto pd = ParsedDistinct::parse(opCtx, + testns, + fromjson("{distinct: 'testcoll', key: 'x.y.z', $db: 'testdb'}"), + ExtensionsCallbackNoop(), + !isExplain); + ASSERT_OK(pd.getStatus()); + + auto agg = pd.getValue().asAggregationCommand(); + ASSERT_OK(agg); + + auto ar = AggregationRequest::parseFromBSON(testns, agg.getValue()); + ASSERT_OK(ar.getStatus()); + ASSERT(!ar.getValue().getExplain()); + ASSERT_EQ(ar.getValue().getBatchSize(), AggregationRequest::kDefaultBatchSize); + ASSERT_EQ(ar.getValue().getNamespaceString(), testns); + ASSERT_BSONOBJ_EQ(ar.getValue().getCollation(), BSONObj()); + ASSERT(ar.getValue().getReadConcern().isEmpty()); + ASSERT(ar.getValue().getUnwrappedReadPref().isEmpty()); + ASSERT(ar.getValue().getComment().empty()); + ASSERT_EQUALS(ar.getValue().getMaxTimeMS(), 0u); + + std::vector<BSONObj> expectedPipeline{ + BSON("$unwind" << BSON("path" + << "$x" + << "preserveNullAndEmptyArrays" + << true)), + BSON("$unwind" << BSON("path" + << "$x.y" + << "preserveNullAndEmptyArrays" + << true)), + BSON("$unwind" << BSON("path" + << "$x.y.z" + << "preserveNullAndEmptyArrays" + << true)), + BSON("$match" << BSON("x" << BSON("$_internalSchemaType" + << "object") + << "x.y" + << BSON("$_internalSchemaType" + << "object"))), + BSON("$group" << BSON("_id" << BSONNULL << "distinct" << BSON("$addToSet" + << "$x.y.z")))}; + ASSERT(std::equal(expectedPipeline.begin(), + expectedPipeline.end(), + ar.getValue().getPipeline().begin(), + ar.getValue().getPipeline().end(), + SimpleBSONObjComparator::kInstance.makeEqualTo())); +} + TEST(ParsedDistinctTest, ConvertToAggregationWithAllOptions) { QueryTestServiceContext serviceContext; auto uniqueTxn = serviceContext.makeOperationContext(); |