summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2016-03-22 10:52:18 -0400
committerCharlie Swanson <charlie.swanson@mongodb.com>2016-03-29 15:06:54 -0400
commitf23e588c180cc1a2ae79b9cbf6a4ba2a196ee215 (patch)
tree6b262c6768e0c35d488107e024c0732f041baebf
parente242b49373f0a9ee39f2b02f1a766a84c2c869b6 (diff)
downloadmongo-f23e588c180cc1a2ae79b9cbf6a4ba2a196ee215.tar.gz
SERVER-22881 Expand arrays during $lookup's query.
If the value in 'localField' is an array, assume that it corresponds to multiple entries in the foreign collection. The query to the foreign collection will use a $in predicate, except when a regex is present, in which case it will use a series of $eq predicates within an $or.
-rw-r--r--jstests/aggregation/bugs/server19095.js49
-rw-r--r--src/mongo/db/pipeline/document_source.h8
-rw-r--r--src/mongo/db/pipeline/document_source_lookup.cpp65
-rw-r--r--src/mongo/db/pipeline/document_source_test.cpp36
4 files changed, 149 insertions, 9 deletions
diff --git a/jstests/aggregation/bugs/server19095.js b/jstests/aggregation/bugs/server19095.js
index 7d023ebc271..30d2610aad9 100644
--- a/jstests/aggregation/bugs/server19095.js
+++ b/jstests/aggregation/bugs/server19095.js
@@ -313,6 +313,55 @@ load("jstests/aggregation/extras/utils.js");
testPipeline(pipeline, expectedResults, coll);
//
+ // A local value of an array.
+ //
+
+ // Basic array corresponding to multiple documents.
+ coll.drop();
+ assert.writeOK(coll.insert({_id: 0, a: [0, 1, 2]}));
+
+ from.drop();
+ assert.writeOK(from.insert({_id: 0}));
+ assert.writeOK(from.insert({_id: 1}));
+
+ pipeline = [
+ {
+ $lookup: {
+ localField: "a",
+ foreignField: "_id",
+ from: "from",
+ as: "b",
+ }
+ },
+ ];
+ expectedResults = [{_id: 0, a: [0, 1, 2], b: [{_id: 0}, {_id: 1}]}];
+ testPipeline(pipeline, expectedResults, coll);
+
+ // Array containing regular expressions.
+ coll.drop();
+ assert.writeOK(coll.insert({_id: 0, a: [/a regex/, /^x/]}));
+
+ from.drop();
+ assert.writeOK(from.insert({_id: 0, b: "should not match a regex"}));
+ assert.writeOK(from.insert({_id: 1, b: "xxxx"}));
+ assert.writeOK(from.insert({_id: 2, b: /a regex/}));
+ assert.writeOK(from.insert({_id: 3, b: /^x/}));
+
+ pipeline = [
+ {
+ $lookup: {
+ localField: "a",
+ foreignField: "b",
+ from: "from",
+ as: "b",
+ }
+ },
+ ];
+ expectedResults =
+ [{_id: 0, a: [/a regex/, /^x/], b: [{_id: 2, b: /a regex/}, {_id: 3, b: /^x/}]}];
+ testPipeline(pipeline, expectedResults, coll);
+
+ //
// Error cases.
//
diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h
index f792bc6eeb8..6c9dee2aa39 100644
--- a/src/mongo/db/pipeline/document_source.h
+++ b/src/mongo/db/pipeline/document_source.h
@@ -1427,6 +1427,13 @@ public:
static boost::intrusive_ptr<DocumentSource> createFromBson(
BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx);
+ /**
+ * Build the BSONObj used to query the foreign collection.
+ */
+ static BSONObj queryForInput(const Document& input,
+ const FieldPath& localFieldName,
+ const std::string& foreignFieldName);
+
private:
DocumentSourceLookUp(NamespaceString fromNs,
std::string as,
@@ -1439,7 +1446,6 @@ private:
}
boost::optional<Document> unwindResult();
- BSONObj queryForInput(const Document& input) const;
NamespaceString _fromNs;
FieldPath _as;
diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp
index 4e63247bef4..2838972d703 100644
--- a/src/mongo/db/pipeline/document_source_lookup.cpp
+++ b/src/mongo/db/pipeline/document_source_lookup.cpp
@@ -41,6 +41,7 @@
namespace mongo {
using boost::intrusive_ptr;
+using std::vector;
DocumentSourceLookUp::DocumentSourceLookUp(NamespaceString fromNs,
std::string as,
@@ -60,6 +61,29 @@ const char* DocumentSourceLookUp::getSourceName() const {
return "$lookup";
}
+namespace {
+
+/**
+ * Constructs a query of the following shape:
+ * {$or: [
+ * {'fieldName': {$eq: 'values[0]'}},
+ * {'fieldName': {$eq: 'values[1]'}},
+ * ...
+ * ]}
+ */
+BSONObj buildEqualityOrQuery(const std::string& fieldName, const vector<Value>& values) {
+ BSONObjBuilder orBuilder;
+ {
+ BSONArrayBuilder orPredicatesBuilder(orBuilder.subarrayStart("$or"));
+ for (auto&& value : values) {
+ orPredicatesBuilder.append(BSON(fieldName << BSON("$eq" << value)));
+ }
+ }
+ return orBuilder.obj();
+}
+
+} // namespace
+
boost::optional<Document> DocumentSourceLookUp::getNext() {
pExpCtx->checkForInterrupt();
@@ -72,7 +96,7 @@ boost::optional<Document> DocumentSourceLookUp::getNext() {
boost::optional<Document> input = pSource->getNext();
if (!input)
return {};
- BSONObj query = queryForInput(*input);
+ BSONObj query = queryForInput(*input, _localField, _foreignFieldFieldName);
std::unique_ptr<DBClientCursor> cursor = _mongod->directClient()->query(_fromNs.ns(), query);
std::vector<Value> results;
@@ -114,16 +138,39 @@ void DocumentSourceLookUp::dispose() {
pSource->dispose();
}
-BSONObj DocumentSourceLookUp::queryForInput(const Document& input) const {
- Value localFieldVal = input.getNestedField(_localField);
+BSONObj DocumentSourceLookUp::queryForInput(const Document& input,
+ const FieldPath& localFieldPath,
+ const std::string& foreignFieldName) {
+ Value localFieldVal = input.getNestedField(localFieldPath);
+
+ // Missing values are treated as null.
if (localFieldVal.missing()) {
localFieldVal = Value(BSONNULL);
}
- // { _foreignFieldFiedlName : { "$eq" : localFieldValue } }
BSONObjBuilder query;
- BSONObjBuilder subObj(query.subobjStart(_foreignFieldFieldName));
- subObj << "$eq" << localFieldVal;
+ BSONObjBuilder subObj(query.subobjStart(foreignFieldName));
+
+ if (localFieldVal.isArray()) {
+ // Assume an array value logically corresponds to many documents, rather than logically
+ // corresponding to one document with an array value.
+ const vector<Value>& localArray = localFieldVal.getArray();
+ const bool containsRegex = std::any_of(
+ localArray.begin(), localArray.end(), [](Value val) { return val.getType() == RegEx; });
+
+ if (containsRegex) {
+ // A regex inside of an $in will not be treated as an equality comparison, so use an
+ // $or.
+ return buildEqualityOrQuery(foreignFieldName, localFieldVal.getArray());
+ }
+
+ // { _foreignFieldFieldName : { "$in" : localFieldValue } }
+ subObj << "$in" << localFieldVal;
+ } else {
+ // { _foreignFieldFieldName : { "$eq" : localFieldValue } }
+ subObj << "$eq" << localFieldVal;
+ }
+
subObj.doneFast();
return query.obj();
}
@@ -165,7 +212,9 @@ boost::optional<Document> DocumentSourceLookUp::unwindResult() {
if (!_input)
return {};
- _cursor = _mongod->directClient()->query(_fromNs.ns(), queryForInput(*_input));
+ _cursor = _mongod->directClient()->query(
+ _fromNs.ns(),
+ DocumentSourceLookUp::queryForInput(*_input, _localField, _foreignFieldFieldName));
_cursorIndex = 0;
if (_unwindSrc->preserveNullAndEmptyArrays() && !_cursor->more()) {
@@ -219,7 +268,7 @@ DocumentSource::GetDepsReturn DocumentSourceLookUp::getDependencies(DepsTracker*
return SEE_NEXT;
}
-boost::intrusive_ptr<DocumentSource> DocumentSourceLookUp::createFromBson(
+intrusive_ptr<DocumentSource> DocumentSourceLookUp::createFromBson(
BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx) {
uassert(4569, "the $lookup specification must be an Object", elem.type() == Object);
diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp
index c1a46c66a44..f950eb8328f 100644
--- a/src/mongo/db/pipeline/document_source_test.cpp
+++ b/src/mongo/db/pipeline/document_source_test.cpp
@@ -349,6 +349,42 @@ public:
} // namespace DocumentSourceLimit
+namespace DocumentSourceLookup {
+
+TEST(QueryForInput, NonArrayValueUsesEqQuery) {
+ Document input = DOC("local" << 1);
+ BSONObj query = DocumentSourceLookUp::queryForInput(input, FieldPath("local"), "foreign");
+ ASSERT_EQ(query, BSON("foreign" << BSON("$eq" << 1)));
+}
+
+TEST(QueryForInput, RegexValueUsesEqQuery) {
+ BSONRegEx regex("^a");
+ Document input = DOC("local" << Value(regex));
+ BSONObj query = DocumentSourceLookUp::queryForInput(input, FieldPath("local"), "foreign");
+ ASSERT_EQ(query, BSON("foreign" << BSON("$eq" << regex)));
+}
+
+TEST(QueryForInput, ArrayValueUsesInQuery) {
+ vector<Value> inputArray = {Value(1), Value(2)};
+ Document input = DOC("local" << Value(inputArray));
+ BSONObj query = DocumentSourceLookUp::queryForInput(input, FieldPath("local"), "foreign");
+ ASSERT_EQ(query, BSON("foreign" << BSON("$in" << BSON_ARRAY(1 << 2))));
+}
+
+TEST(QueryForInput, ArrayValueWithRegexUsesOrQuery) {
+ BSONRegEx regex("^a");
+ vector<Value> inputArray = {Value(1), Value(regex), Value(2)};
+ Document input = DOC("local" << Value(inputArray));
+ BSONObj query = DocumentSourceLookUp::queryForInput(input, FieldPath("local"), "foreign");
+ ASSERT_EQ(query,
+ BSON("$or" << BSON_ARRAY(BSON("foreign" << BSON("$eq" << Value(1)))
+ << BSON("foreign" << BSON("$eq" << regex))
+ << BSON("foreign" << BSON("$eq" << Value(2))))));
+}
+
+
+} // namespace DocumentSourceLookUp
+
namespace DocumentSourceGroup {
using mongo::DocumentSourceGroup;