From f23e588c180cc1a2ae79b9cbf6a4ba2a196ee215 Mon Sep 17 00:00:00 2001 From: Charlie Swanson Date: Tue, 22 Mar 2016 10:52:18 -0400 Subject: SERVER-22881 Expand arrays during $lookup's query. If the value in 'localField' is an array, assume that it corresponds to multiple entries in the foreign collection. The query to the foreign collection will use a $in predicate, except when a regex is present, in which case it will use a series of $eq predicates within an $or. --- jstests/aggregation/bugs/server19095.js | 49 ++++++++++++++++++ src/mongo/db/pipeline/document_source.h | 8 ++- src/mongo/db/pipeline/document_source_lookup.cpp | 65 +++++++++++++++++++++--- src/mongo/db/pipeline/document_source_test.cpp | 36 +++++++++++++ 4 files changed, 149 insertions(+), 9 deletions(-) diff --git a/jstests/aggregation/bugs/server19095.js b/jstests/aggregation/bugs/server19095.js index 7d023ebc271..30d2610aad9 100644 --- a/jstests/aggregation/bugs/server19095.js +++ b/jstests/aggregation/bugs/server19095.js @@ -312,6 +312,55 @@ load("jstests/aggregation/extras/utils.js"); expectedResults = [{_id: 0, a: /a regex/, b: [{_id: 0, b: /a regex/}]}]; testPipeline(pipeline, expectedResults, coll); + // + // A local value of an array. + // + + // Basic array corresponding to multiple documents. + coll.drop(); + assert.writeOK(coll.insert({_id: 0, a: [0, 1, 2]})); + + from.drop(); + assert.writeOK(from.insert({_id: 0})); + assert.writeOK(from.insert({_id: 1})); + + pipeline = [ + { + $lookup: { + localField: "a", + foreignField: "_id", + from: "from", + as: "b", + } + }, + ]; + expectedResults = [{_id: 0, a: [0, 1, 2], b: [{_id: 0}, {_id: 1}]}]; + testPipeline(pipeline, expectedResults, coll); + + // Array containing regular expressions. + coll.drop(); + assert.writeOK(coll.insert({_id: 0, a: [/a regex/, /^x/]})); + + from.drop(); + assert.writeOK(from.insert({_id: 0, b: "should not match a regex"})); + assert.writeOK(from.insert({_id: 1, b: "xxxx"})); + assert.writeOK(from.insert({_id: 2, b: /a regex/})); + assert.writeOK(from.insert({_id: 3, b: /^x/})); + + pipeline = [ + { + $lookup: { + localField: "a", + foreignField: "b", + from: "from", + as: "b", + } + }, + ]; + expectedResults = + [{_id: 0, a: [/a regex/, /^x/], b: [{_id: 2, b: /a regex/}, {_id: 3, b: /^x/}]}]; + testPipeline(pipeline, expectedResults, coll); + // // Error cases. // diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index f792bc6eeb8..6c9dee2aa39 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -1427,6 +1427,13 @@ public: static boost::intrusive_ptr createFromBson( BSONElement elem, const boost::intrusive_ptr& pExpCtx); + /** + * Build the BSONObj used to query the foreign collection. + */ + static BSONObj queryForInput(const Document& input, + const FieldPath& localFieldName, + const std::string& foreignFieldName); + private: DocumentSourceLookUp(NamespaceString fromNs, std::string as, @@ -1439,7 +1446,6 @@ private: } boost::optional unwindResult(); - BSONObj queryForInput(const Document& input) const; NamespaceString _fromNs; FieldPath _as; diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp index 4e63247bef4..2838972d703 100644 --- a/src/mongo/db/pipeline/document_source_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_lookup.cpp @@ -41,6 +41,7 @@ namespace mongo { using boost::intrusive_ptr; +using std::vector; DocumentSourceLookUp::DocumentSourceLookUp(NamespaceString fromNs, std::string as, @@ -60,6 +61,29 @@ const char* DocumentSourceLookUp::getSourceName() const { return "$lookup"; } +namespace { + +/** + * Constructs a query of the following shape: + * {$or: [ + * {'fieldName': {$eq: 'values[0]'}}, + * {'fieldName': {$eq: 'values[1]'}}, + * ... + * ]} + */ +BSONObj buildEqualityOrQuery(const std::string& fieldName, const vector& values) { + BSONObjBuilder orBuilder; + { + BSONArrayBuilder orPredicatesBuilder(orBuilder.subarrayStart("$or")); + for (auto&& value : values) { + orPredicatesBuilder.append(BSON(fieldName << BSON("$eq" << value))); + } + } + return orBuilder.obj(); +} + +} // namespace + boost::optional DocumentSourceLookUp::getNext() { pExpCtx->checkForInterrupt(); @@ -72,7 +96,7 @@ boost::optional DocumentSourceLookUp::getNext() { boost::optional input = pSource->getNext(); if (!input) return {}; - BSONObj query = queryForInput(*input); + BSONObj query = queryForInput(*input, _localField, _foreignFieldFieldName); std::unique_ptr cursor = _mongod->directClient()->query(_fromNs.ns(), query); std::vector results; @@ -114,16 +138,39 @@ void DocumentSourceLookUp::dispose() { pSource->dispose(); } -BSONObj DocumentSourceLookUp::queryForInput(const Document& input) const { - Value localFieldVal = input.getNestedField(_localField); +BSONObj DocumentSourceLookUp::queryForInput(const Document& input, + const FieldPath& localFieldPath, + const std::string& foreignFieldName) { + Value localFieldVal = input.getNestedField(localFieldPath); + + // Missing values are treated as null. if (localFieldVal.missing()) { localFieldVal = Value(BSONNULL); } - // { _foreignFieldFiedlName : { "$eq" : localFieldValue } } BSONObjBuilder query; - BSONObjBuilder subObj(query.subobjStart(_foreignFieldFieldName)); - subObj << "$eq" << localFieldVal; + BSONObjBuilder subObj(query.subobjStart(foreignFieldName)); + + if (localFieldVal.isArray()) { + // Assume an array value logically corresponds to many documents, rather than logically + // corresponding to one document with an array value. + const vector& localArray = localFieldVal.getArray(); + const bool containsRegex = std::any_of( + localArray.begin(), localArray.end(), [](Value val) { return val.getType() == RegEx; }); + + if (containsRegex) { + // A regex inside of an $in will not be treated as an equality comparison, so use an + // $or. + return buildEqualityOrQuery(foreignFieldName, localFieldVal.getArray()); + } + + // { _foreignFieldFieldName : { "$in" : localFieldValue } } + subObj << "$in" << localFieldVal; + } else { + // { _foreignFieldFieldName : { "$eq" : localFieldValue } } + subObj << "$eq" << localFieldVal; + } + subObj.doneFast(); return query.obj(); } @@ -165,7 +212,9 @@ boost::optional DocumentSourceLookUp::unwindResult() { if (!_input) return {}; - _cursor = _mongod->directClient()->query(_fromNs.ns(), queryForInput(*_input)); + _cursor = _mongod->directClient()->query( + _fromNs.ns(), + DocumentSourceLookUp::queryForInput(*_input, _localField, _foreignFieldFieldName)); _cursorIndex = 0; if (_unwindSrc->preserveNullAndEmptyArrays() && !_cursor->more()) { @@ -219,7 +268,7 @@ DocumentSource::GetDepsReturn DocumentSourceLookUp::getDependencies(DepsTracker* return SEE_NEXT; } -boost::intrusive_ptr DocumentSourceLookUp::createFromBson( +intrusive_ptr DocumentSourceLookUp::createFromBson( BSONElement elem, const boost::intrusive_ptr& pExpCtx) { uassert(4569, "the $lookup specification must be an Object", elem.type() == Object); diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp index c1a46c66a44..f950eb8328f 100644 --- a/src/mongo/db/pipeline/document_source_test.cpp +++ b/src/mongo/db/pipeline/document_source_test.cpp @@ -349,6 +349,42 @@ public: } // namespace DocumentSourceLimit +namespace DocumentSourceLookup { + +TEST(QueryForInput, NonArrayValueUsesEqQuery) { + Document input = DOC("local" << 1); + BSONObj query = DocumentSourceLookUp::queryForInput(input, FieldPath("local"), "foreign"); + ASSERT_EQ(query, BSON("foreign" << BSON("$eq" << 1))); +} + +TEST(QueryForInput, RegexValueUsesEqQuery) { + BSONRegEx regex("^a"); + Document input = DOC("local" << Value(regex)); + BSONObj query = DocumentSourceLookUp::queryForInput(input, FieldPath("local"), "foreign"); + ASSERT_EQ(query, BSON("foreign" << BSON("$eq" << regex))); +} + +TEST(QueryForInput, ArrayValueUsesInQuery) { + vector inputArray = {Value(1), Value(2)}; + Document input = DOC("local" << Value(inputArray)); + BSONObj query = DocumentSourceLookUp::queryForInput(input, FieldPath("local"), "foreign"); + ASSERT_EQ(query, BSON("foreign" << BSON("$in" << BSON_ARRAY(1 << 2)))); +} + +TEST(QueryForInput, ArrayValueWithRegexUsesOrQuery) { + BSONRegEx regex("^a"); + vector inputArray = {Value(1), Value(regex), Value(2)}; + Document input = DOC("local" << Value(inputArray)); + BSONObj query = DocumentSourceLookUp::queryForInput(input, FieldPath("local"), "foreign"); + ASSERT_EQ(query, + BSON("$or" << BSON_ARRAY(BSON("foreign" << BSON("$eq" << Value(1))) + << BSON("foreign" << BSON("$eq" << regex)) + << BSON("foreign" << BSON("$eq" << Value(2)))))); +} + + +} // namespace DocumentSourceLookUp + namespace DocumentSourceGroup { using mongo::DocumentSourceGroup; -- cgit v1.2.1