diff options
author | James Wahlin <james@mongodb.com> | 2018-11-27 13:28:27 -0500 |
---|---|---|
committer | James Wahlin <james@mongodb.com> | 2018-12-12 14:41:24 -0500 |
commit | 056d61676f91f6da0a030347ae4b92255d752d8f (patch) | |
tree | 92f5b2d319ce1cd5701be912e6b96cf9a6fdaa4b /jstests | |
parent | d2573d47786b035d5bcdeaf30207bbfcd58bf14e (diff) | |
download | mongo-056d61676f91f6da0a030347ae4b92255d752d8f.tar.gz |
SERVER-32308 Support for $lookup to execute on mongos against a sharded foreign collection
Diffstat (limited to 'jstests')
-rw-r--r-- | jstests/aggregation/mongos_merge.js | 39 | ||||
-rw-r--r-- | jstests/aggregation/sources/facet/use_cases.js | 21 | ||||
-rw-r--r-- | jstests/aggregation/sources/graphLookup/sharded.js | 56 | ||||
-rw-r--r-- | jstests/aggregation/sources/lookup/collation_lookup.js | 368 | ||||
-rw-r--r-- | jstests/aggregation/sources/lookup/lookup.js | 1148 | ||||
-rw-r--r-- | jstests/aggregation/sources/lookup/lookup_subpipeline.js | 604 | ||||
-rw-r--r-- | jstests/sharding/collation_lookup.js | 454 | ||||
-rw-r--r-- | jstests/sharding/lookup.js | 618 | ||||
-rw-r--r-- | jstests/sharding/lookup_mongod_unaware.js | 168 | ||||
-rw-r--r-- | jstests/sharding/lookup_stale_mongos.js | 130 |
10 files changed, 2015 insertions, 1591 deletions
diff --git a/jstests/aggregation/mongos_merge.js b/jstests/aggregation/mongos_merge.js index 13ab1b2431e..df22312ce13 100644 --- a/jstests/aggregation/mongos_merge.js +++ b/jstests/aggregation/mongos_merge.js @@ -286,6 +286,45 @@ allowDiskUse: allowDiskUse, expectedCount: 400 }); + + // Test that $lookup is merged on the primary shard when the foreign collection is + // unsharded. + assertMergeOnMongoD({ + testName: "agg_mongos_merge_lookup_unsharded_disk_use_" + allowDiskUse, + pipeline: [ + {$match: {_id: {$gte: -200, $lte: 200}}}, + { + $lookup: { + from: unshardedColl.getName(), + localField: "_id", + foreignField: "_id", + as: "lookupField" + } + } + ], + mergeType: "primaryShard", + allowDiskUse: allowDiskUse, + expectedCount: 400 + }); + + // Test that $lookup is merged on mongoS when the foreign collection is sharded. + assertMergeOnMongoS({ + testName: "agg_mongos_merge_lookup_sharded_disk_use_" + allowDiskUse, + pipeline: [ + {$match: {_id: {$gte: -200, $lte: 200}}}, + { + $lookup: { + from: mongosColl.getName(), + localField: "_id", + foreignField: "_id", + as: "lookupField" + } + } + ], + mergeType: "mongos", + allowDiskUse: allowDiskUse, + expectedCount: 400 + }); } /** diff --git a/jstests/aggregation/sources/facet/use_cases.js b/jstests/aggregation/sources/facet/use_cases.js index 1295f638910..6074e66d810 100644 --- a/jstests/aggregation/sources/facet/use_cases.js +++ b/jstests/aggregation/sources/facet/use_cases.js @@ -115,8 +115,6 @@ populateData(st.s0, nDocs); doExecutionTest(st.s0); - // Test that $facet stage propagates information about involved collections, preventing users - // from doing things like $lookup from a sharded collection. const shardedDBName = "sharded"; const shardedCollName = "collection"; const shardedColl = st.getDB(shardedDBName).getCollection(shardedCollName); @@ -126,21 +124,8 @@ assert.commandWorked( st.admin.runCommand({shardCollection: shardedColl.getFullName(), key: {_id: 1}})); - // Test that trying to perform a $lookup on a sharded collection returns an error. - let res = assert.commandFailed(unshardedColl.runCommand({ - aggregate: unshardedColl.getName(), - pipeline: [{ - $lookup: - {from: shardedCollName, localField: "_id", foreignField: "_id", as: "results"} - }], - cursor: {} - })); - assert.eq( - 28769, res.code, "Expected aggregation to fail due to $lookup on a sharded collection"); - - // Test that trying to perform a $lookup on a sharded collection inside a $facet stage still - // returns an error. - res = assert.commandFailed(unshardedColl.runCommand({ + // Test $lookup inside a $facet stage on a sharded collection. + assert.commandWorked(unshardedColl.runCommand({ aggregate: unshardedColl.getName(), pipeline: [{ $facet: { @@ -156,8 +141,6 @@ }], cursor: {} })); - assert.eq( - 28769, res.code, "Expected aggregation to fail due to $lookup on a sharded collection"); // Then run the assertions against a sharded collection. assert.commandWorked(st.admin.runCommand({enableSharding: dbName})); diff --git a/jstests/aggregation/sources/graphLookup/sharded.js b/jstests/aggregation/sources/graphLookup/sharded.js deleted file mode 100644 index b78649d5824..00000000000 --- a/jstests/aggregation/sources/graphLookup/sharded.js +++ /dev/null @@ -1,56 +0,0 @@ -// In SERVER-23725, $graphLookup was introduced. In this file, we test that the expression behaves -// correctly on a sharded collection. -// @tags: [requires_sharding] -load("jstests/aggregation/extras/utils.js"); // For assertErrorCode. - -(function() { - "use strict"; - - var st = new ShardingTest({name: "aggregation_graph_lookup", shards: 2, mongos: 1}); - - st.adminCommand({enableSharding: "graphLookup"}); - st.ensurePrimaryShard("graphLookup", st.shard1.shardName); - st.adminCommand({shardCollection: "graphLookup.local", key: {_id: 1}}); - - var foreign = st.getDB("graphLookup").foreign; - var local = st.getDB("graphLookup").local; - - var bulk = foreign.initializeUnorderedBulkOp(); - - for (var i = 0; i < 100; i++) { - bulk.insert({_id: i, next: i + 1}); - } - assert.writeOK(bulk.execute()); - - assert.writeOK(local.insert({})); - - var res = st.s.getDB("graphLookup") - .local - .aggregate({ - $graphLookup: { - from: "foreign", - startWith: {$literal: 0}, - connectToField: "_id", - connectFromField: "next", - as: "number_line" - } - }) - .toArray(); - - assert.eq(res.length, 1); - assert.eq(res[0].number_line.length, 100); - - // Cannot perform a $graphLookup where the "from" collection is sharded. - var pipeline = { - $graphLookup: { - from: "local", - startWith: {$literal: 0}, - connectToField: "_id", - connectFromField: "_id", - as: "out" - } - }; - - assertErrorCode(foreign, pipeline, 28769); - st.stop(); -}()); diff --git a/jstests/aggregation/sources/lookup/collation_lookup.js b/jstests/aggregation/sources/lookup/collation_lookup.js deleted file mode 100644 index 80d173138a6..00000000000 --- a/jstests/aggregation/sources/lookup/collation_lookup.js +++ /dev/null @@ -1,368 +0,0 @@ -// Cannot implicitly shard accessed collections because of collection existing when none expected. -// @tags: [assumes_no_implicit_collection_creation_after_drop] - -/** - * Tests that the $lookup stage respects the collation. - * - * The comparison of string values between the 'localField' and 'foreignField' should use the - * collation either explicitly set on the aggregation operation, or the collation inherited from the - * collection the "aggregate" command was performed on. - */ -(function() { - "use strict"; - - load("jstests/aggregation/extras/utils.js"); // for arrayEq - - const caseInsensitive = {collation: {locale: "en_US", strength: 2}}; - - const withDefaultCollationColl = db.collation_lookup_with_default; - const withoutDefaultCollationColl = db.collation_lookup_without_default; - - withDefaultCollationColl.drop(); - withoutDefaultCollationColl.drop(); - - assert.commandWorked(db.createCollection(withDefaultCollationColl.getName(), caseInsensitive)); - assert.writeOK(withDefaultCollationColl.insert({_id: "lowercase", str: "abc"})); - - assert.writeOK(withoutDefaultCollationColl.insert({_id: "lowercase", str: "abc"})); - assert.writeOK(withoutDefaultCollationColl.insert({_id: "uppercase", str: "ABC"})); - assert.writeOK(withoutDefaultCollationColl.insert({_id: "unmatched", str: "def"})); - - // Test that the $lookup stage respects the inherited collation. - let res = withDefaultCollationColl - .aggregate([{ - $lookup: { - from: withoutDefaultCollationColl.getName(), - localField: "str", - foreignField: "str", - as: "matched", - }, - }]) - .toArray(); - assert.eq(1, res.length, tojson(res)); - - let expected = [{_id: "lowercase", str: "abc"}, {_id: "uppercase", str: "ABC"}]; - assert( - arrayEq(expected, res[0].matched), - "Expected " + tojson(expected) + " to equal " + tojson(res[0].matched) + " up to ordering"); - - res = withDefaultCollationColl - .aggregate([{ - $lookup: { - from: withoutDefaultCollationColl.getName(), - let : {str1: "$str"}, - pipeline: [ - {$match: {$expr: {$eq: ["$str", "$$str1"]}}}, - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - let : {str2: "$str"}, - pipeline: [{$match: {$expr: {$eq: ["$str", "$$str1"]}}}], - as: "matched2" - } - } - ], - as: "matched1", - }, - }]) - .toArray(); - assert.eq(1, res.length, tojson(res)); - - expected = [ - { - "_id": "lowercase", - "str": "abc", - "matched2": [{"_id": "lowercase", "str": "abc"}, {"_id": "uppercase", "str": "ABC"}] - }, - { - "_id": "uppercase", - "str": "ABC", - "matched2": [{"_id": "lowercase", "str": "abc"}, {"_id": "uppercase", "str": "ABC"}] - } - ]; - assert(arrayEq(expected, res[0].matched1), - "Expected " + tojson(expected) + " to equal " + tojson(res[0].matched1) + - " up to ordering. " + tojson(res)); - - // Test that the $lookup stage respects the inherited collation when it optimizes with an - // $unwind stage. - res = withDefaultCollationColl - .aggregate([ - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - localField: "str", - foreignField: "str", - as: "matched", - }, - }, - {$unwind: "$matched"}, - ]) - .toArray(); - assert.eq(2, res.length, tojson(res)); - - expected = [ - {_id: "lowercase", str: "abc", matched: {_id: "lowercase", str: "abc"}}, - {_id: "lowercase", str: "abc", matched: {_id: "uppercase", str: "ABC"}} - ]; - assert(arrayEq(expected, res), - "Expected " + tojson(expected) + " to equal " + tojson(res) + " up to ordering"); - - res = withDefaultCollationColl - .aggregate([ - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - let : {str1: "$str"}, - pipeline: [ - {$match: {$expr: {$eq: ["$str", "$$str1"]}}}, - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - let : {str2: "$str"}, - pipeline: [{$match: {$expr: {$eq: ["$str", "$$str1"]}}}], - as: "matched2" - } - }, - {$unwind: "$matched2"}, - ], - as: "matched1", - }, - }, - {$unwind: "$matched1"}, - ]) - .toArray(); - assert.eq(4, res.length, tojson(res)); - - expected = [ - { - "_id": "lowercase", - "str": "abc", - "matched1": - {"_id": "lowercase", "str": "abc", "matched2": {"_id": "lowercase", "str": "abc"}} - }, - { - "_id": "lowercase", - "str": "abc", - "matched1": - {"_id": "lowercase", "str": "abc", "matched2": {"_id": "uppercase", "str": "ABC"}} - }, - { - "_id": "lowercase", - "str": "abc", - "matched1": - {"_id": "uppercase", "str": "ABC", "matched2": {"_id": "lowercase", "str": "abc"}} - }, - { - "_id": "lowercase", - "str": "abc", - "matched1": - {"_id": "uppercase", "str": "ABC", "matched2": {"_id": "uppercase", "str": "ABC"}} - } - ]; - assert(arrayEq(expected, res), - "Expected " + tojson(expected) + " to equal " + tojson(res) + " up to ordering"); - - // Test that the $lookup stage respects an explicit collation on the aggregation operation. - res = withoutDefaultCollationColl - .aggregate( - [ - {$match: {_id: "lowercase"}}, - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - localField: "str", - foreignField: "str", - as: "matched", - }, - }, - ], - caseInsensitive) - .toArray(); - assert.eq(1, res.length, tojson(res)); - - expected = [{_id: "lowercase", str: "abc"}, {_id: "uppercase", str: "ABC"}]; - assert( - arrayEq(expected, res[0].matched), - "Expected " + tojson(expected) + " to equal " + tojson(res[0].matched) + " up to ordering"); - - res = withoutDefaultCollationColl - .aggregate( - [ - {$match: {_id: "lowercase"}}, - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - let : {str1: "$str"}, - pipeline: [ - {$match: {$expr: {$eq: ["$str", "$$str1"]}}}, - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - let : {str2: "$str"}, - pipeline: [{$match: {$expr: {$eq: ["$str", "$$str1"]}}}], - as: "matched2" - } - } - ], - as: "matched1", - }, - } - ], - caseInsensitive) - .toArray(); - assert.eq(1, res.length, tojson(res)); - - expected = [ - { - "_id": "lowercase", - "str": "abc", - "matched2": [{"_id": "lowercase", "str": "abc"}, {"_id": "uppercase", "str": "ABC"}] - }, - { - "_id": "uppercase", - "str": "ABC", - "matched2": [{"_id": "lowercase", "str": "abc"}, {"_id": "uppercase", "str": "ABC"}] - } - ]; - assert(arrayEq(expected, res[0].matched1), - "Expected " + tojson(expected) + " to equal " + tojson(res[0].matched1) + - " up to ordering"); - - // Test that the $lookup stage respects an explicit collation on the aggregation operation when - // it optimizes with an $unwind stage. - res = withoutDefaultCollationColl - .aggregate( - [ - {$match: {_id: "lowercase"}}, - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - localField: "str", - foreignField: "str", - as: "matched", - }, - }, - {$unwind: "$matched"}, - ], - caseInsensitive) - .toArray(); - assert.eq(2, res.length, tojson(res)); - - expected = [ - {_id: "lowercase", str: "abc", matched: {_id: "lowercase", str: "abc"}}, - {_id: "lowercase", str: "abc", matched: {_id: "uppercase", str: "ABC"}} - ]; - assert(arrayEq(expected, res), - "Expected " + tojson(expected) + " to equal " + tojson(res) + " up to ordering"); - - res = withoutDefaultCollationColl - .aggregate( - [ - {$match: {_id: "lowercase"}}, - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - let : {str1: "$str"}, - pipeline: [ - {$match: {$expr: {$eq: ["$str", "$$str1"]}}}, - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - let : {str2: "$str"}, - pipeline: [{$match: {$expr: {$eq: ["$str", "$$str1"]}}}], - as: "matched2" - } - }, - {$unwind: "$matched2"}, - ], - as: "matched1", - }, - }, - {$unwind: "$matched1"}, - ], - caseInsensitive) - .toArray(); - assert.eq(4, res.length, tojson(res)); - - expected = [ - { - "_id": "lowercase", - "str": "abc", - "matched1": - {"_id": "lowercase", "str": "abc", "matched2": {"_id": "lowercase", "str": "abc"}} - }, - { - "_id": "lowercase", - "str": "abc", - "matched1": - {"_id": "lowercase", "str": "abc", "matched2": {"_id": "uppercase", "str": "ABC"}} - }, - { - "_id": "lowercase", - "str": "abc", - "matched1": - {"_id": "uppercase", "str": "ABC", "matched2": {"_id": "lowercase", "str": "abc"}} - }, - { - "_id": "lowercase", - "str": "abc", - "matched1": - {"_id": "uppercase", "str": "ABC", "matched2": {"_id": "uppercase", "str": "ABC"}} - } - ]; - assert(arrayEq(expected, res), - "Expected " + tojson(expected) + " to equal " + tojson(res) + " up to ordering"); - - // Test that the $lookup stage uses the "simple" collation if a collation isn't set on the - // collection or the aggregation operation. - res = withoutDefaultCollationColl - .aggregate([ - {$match: {_id: "lowercase"}}, - { - $lookup: { - from: withDefaultCollationColl.getName(), - localField: "str", - foreignField: "str", - as: "matched", - }, - }, - ]) - .toArray(); - assert.eq([{_id: "lowercase", str: "abc", matched: [{_id: "lowercase", str: "abc"}]}], res); - - res = withoutDefaultCollationColl - .aggregate([ - {$match: {_id: "lowercase"}}, - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - let : {str1: "$str"}, - pipeline: [ - {$match: {$expr: {$eq: ["$str", "$$str1"]}}}, - { - $lookup: { - from: withoutDefaultCollationColl.getName(), - let : {str2: "$str"}, - pipeline: [{$match: {$expr: {$eq: ["$str", "$$str1"]}}}], - as: "matched2" - } - }, - {$unwind: "$matched2"}, - ], - as: "matched1", - }, - }, - ]) - .toArray(); - assert.eq([{ - "_id": "lowercase", - "str": "abc", - "matched1": [{ - "_id": "lowercase", - "str": "abc", - "matched2": {"_id": "lowercase", "str": "abc"} - }] - }], - res); -})(); diff --git a/jstests/aggregation/sources/lookup/lookup.js b/jstests/aggregation/sources/lookup/lookup.js deleted file mode 100644 index 0d29f4eb8a8..00000000000 --- a/jstests/aggregation/sources/lookup/lookup.js +++ /dev/null @@ -1,1148 +0,0 @@ -// Basic $lookup regression tests. -// @tags: [requires_sharding] - -load("jstests/aggregation/extras/utils.js"); // For assertErrorCode. - -(function() { - "use strict"; - - // Used by testPipeline to sort result documents. All _ids must be primitives. - function compareId(a, b) { - if (a._id < b._id) { - return -1; - } - if (a._id > b._id) { - return 1; - } - return 0; - } - - function generateNestedPipeline(foreignCollName, numLevels) { - let pipeline = [{"$lookup": {pipeline: [], from: foreignCollName, as: "same"}}]; - - for (let level = 1; level < numLevels; level++) { - pipeline = [{"$lookup": {pipeline: pipeline, from: foreignCollName, as: "same"}}]; - } - - return pipeline; - } - - // Helper for testing that pipeline returns correct set of results. - function testPipeline(pipeline, expectedResult, collection) { - assert.eq(collection.aggregate(pipeline).toArray().sort(compareId), - expectedResult.sort(compareId)); - } - - function runTest(coll, from, thirdColl, fourthColl) { - var db = null; // Using the db variable is banned in this function. - - assert.writeOK(coll.insert({_id: 0, a: 1})); - assert.writeOK(coll.insert({_id: 1, a: null})); - assert.writeOK(coll.insert({_id: 2})); - - assert.writeOK(from.insert({_id: 0, b: 1})); - assert.writeOK(from.insert({_id: 1, b: null})); - assert.writeOK(from.insert({_id: 2})); - - // - // Basic functionality. - // - - // "from" document added to "as" field if a == b, where nonexistent fields are treated as - // null. - var expectedResults = [ - {_id: 0, a: 1, "same": [{_id: 0, b: 1}]}, - {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, - {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} - ]; - testPipeline([{$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}], - expectedResults, - coll); - - // If localField is nonexistent, it is treated as if it is null. - expectedResults = [ - {_id: 0, a: 1, "same": [{_id: 1, b: null}, {_id: 2}]}, - {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, - {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} - ]; - testPipeline( - [{$lookup: {localField: "nonexistent", foreignField: "b", from: "from", as: "same"}}], - expectedResults, - coll); - - // If foreignField is nonexistent, it is treated as if it is null. - expectedResults = [ - {_id: 0, a: 1, "same": []}, - {_id: 1, a: null, "same": [{_id: 0, b: 1}, {_id: 1, b: null}, {_id: 2}]}, - {_id: 2, "same": [{_id: 0, b: 1}, {_id: 1, b: null}, {_id: 2}]} - ]; - testPipeline( - [{$lookup: {localField: "a", foreignField: "nonexistent", from: "from", as: "same"}}], - expectedResults, - coll); - - // If there are no matches or the from coll doesn't exist, the result is an empty array. - expectedResults = - [{_id: 0, a: 1, "same": []}, {_id: 1, a: null, "same": []}, {_id: 2, "same": []}]; - testPipeline( - [{$lookup: {localField: "_id", foreignField: "nonexistent", from: "from", as: "same"}}], - expectedResults, - coll); - testPipeline( - [{$lookup: {localField: "a", foreignField: "b", from: "nonexistent", as: "same"}}], - expectedResults, - coll); - - // If field name specified by "as" already exists, it is overwritten. - expectedResults = [ - {_id: 0, "a": [{_id: 0, b: 1}]}, - {_id: 1, "a": [{_id: 1, b: null}, {_id: 2}]}, - {_id: 2, "a": [{_id: 1, b: null}, {_id: 2}]} - ]; - testPipeline([{$lookup: {localField: "a", foreignField: "b", from: "from", as: "a"}}], - expectedResults, - coll); - - // Running multiple $lookups in the same pipeline is allowed. - expectedResults = [ - {_id: 0, a: 1, "c": [{_id: 0, b: 1}], "d": [{_id: 0, b: 1}]}, - { - _id: 1, - a: null, "c": [{_id: 1, b: null}, {_id: 2}], "d": [{_id: 1, b: null}, {_id: 2}] - }, - {_id: 2, "c": [{_id: 1, b: null}, {_id: 2}], "d": [{_id: 1, b: null}, {_id: 2}]} - ]; - testPipeline( - [ - {$lookup: {localField: "a", foreignField: "b", from: "from", as: "c"}}, - {$project: {"a": 1, "c": 1}}, - {$lookup: {localField: "a", foreignField: "b", from: "from", as: "d"}} - ], - expectedResults, - coll); - - // - // Coalescing with $unwind. - // - - // A normal $unwind with on the "as" field. - expectedResults = [ - {_id: 0, a: 1, same: {_id: 0, b: 1}}, - {_id: 1, a: null, same: {_id: 1, b: null}}, - {_id: 1, a: null, same: {_id: 2}}, - {_id: 2, same: {_id: 1, b: null}}, - {_id: 2, same: {_id: 2}} - ]; - testPipeline( - [ - {$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}, - {$unwind: {path: "$same"}} - ], - expectedResults, - coll); - - // An $unwind on the "as" field, with includeArrayIndex. - expectedResults = [ - {_id: 0, a: 1, same: {_id: 0, b: 1}, index: NumberLong(0)}, - {_id: 1, a: null, same: {_id: 1, b: null}, index: NumberLong(0)}, - {_id: 1, a: null, same: {_id: 2}, index: NumberLong(1)}, - {_id: 2, same: {_id: 1, b: null}, index: NumberLong(0)}, - {_id: 2, same: {_id: 2}, index: NumberLong(1)}, - ]; - testPipeline( - [ - {$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}, - {$unwind: {path: "$same", includeArrayIndex: "index"}} - ], - expectedResults, - coll); - - // Normal $unwind with no matching documents. - expectedResults = []; - testPipeline( - [ - {$lookup: {localField: "_id", foreignField: "nonexistent", from: "from", as: "same"}}, - {$unwind: {path: "$same"}} - ], - expectedResults, - coll); - - // $unwind with preserveNullAndEmptyArray with no matching documents. - expectedResults = [ - {_id: 0, a: 1}, - {_id: 1, a: null}, - {_id: 2}, - ]; - testPipeline( - [ - {$lookup: {localField: "_id", foreignField: "nonexistent", from: "from", as: "same"}}, - {$unwind: {path: "$same", preserveNullAndEmptyArrays: true}} - ], - expectedResults, - coll); - - // $unwind with preserveNullAndEmptyArray, some with matching documents, some without. - expectedResults = [ - {_id: 0, a: 1}, - {_id: 1, a: null, same: {_id: 0, b: 1}}, - {_id: 2}, - ]; - testPipeline( - [ - {$lookup: {localField: "_id", foreignField: "b", from: "from", as: "same"}}, - {$unwind: {path: "$same", preserveNullAndEmptyArrays: true}} - ], - expectedResults, - coll); - - // $unwind with preserveNullAndEmptyArray and includeArrayIndex, some with matching - // documents, some without. - expectedResults = [ - {_id: 0, a: 1, index: null}, - {_id: 1, a: null, same: {_id: 0, b: 1}, index: NumberLong(0)}, - {_id: 2, index: null}, - ]; - testPipeline( - [ - {$lookup: {localField: "_id", foreignField: "b", from: "from", as: "same"}}, - { - $unwind: - {path: "$same", preserveNullAndEmptyArrays: true, includeArrayIndex: "index"} - } - ], - expectedResults, - coll); - - // - // Dependencies. - // - - // If $lookup didn't add "localField" to its dependencies, this test would fail as the - // value of the "a" field would be lost and treated as null. - expectedResults = [ - {_id: 0, "same": [{_id: 0, b: 1}]}, - {_id: 1, "same": [{_id: 1, b: null}, {_id: 2}]}, - {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} - ]; - testPipeline( - [ - {$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}, - {$project: {"same": 1}} - ], - expectedResults, - coll); - - // If $lookup didn't add fields referenced by "let" variables to its dependencies, this test - // would fail as the value of the "a" field would be lost and treated as null. - expectedResults = [ - {"_id": 0, "same": [{"_id": 0, "x": 1}, {"_id": 1, "x": 1}, {"_id": 2, "x": 1}]}, - { - "_id": 1, - "same": [{"_id": 0, "x": null}, {"_id": 1, "x": null}, {"_id": 2, "x": null}] - }, - {"_id": 2, "same": [{"_id": 0}, {"_id": 1}, {"_id": 2}]} - ]; - testPipeline( - [ - { - $lookup: { - let : {var1: "$a"}, - pipeline: [{$project: {x: "$$var1"}}], - from: "from", - as: "same" - } - }, - {$project: {"same": 1}} - ], - expectedResults, - coll); - - // - // Dotted field paths. - // - - coll.drop(); - assert.writeOK(coll.insert({_id: 0, a: 1})); - assert.writeOK(coll.insert({_id: 1, a: null})); - assert.writeOK(coll.insert({_id: 2})); - assert.writeOK(coll.insert({_id: 3, a: {c: 1}})); - - from.drop(); - assert.writeOK(from.insert({_id: 0, b: 1})); - assert.writeOK(from.insert({_id: 1, b: null})); - assert.writeOK(from.insert({_id: 2})); - assert.writeOK(from.insert({_id: 3, b: {c: 1}})); - assert.writeOK(from.insert({_id: 4, b: {c: 2}})); - - // Once without a dotted field. - var pipeline = [{$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}]; - expectedResults = [ - {_id: 0, a: 1, "same": [{_id: 0, b: 1}]}, - {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, - {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]}, - {_id: 3, a: {c: 1}, "same": [{_id: 3, b: {c: 1}}]} - ]; - testPipeline(pipeline, expectedResults, coll); - - // Look up a dotted field. - pipeline = [{$lookup: {localField: "a.c", foreignField: "b.c", from: "from", as: "same"}}]; - // All but the last document in 'coll' have a nullish value for 'a.c'. - expectedResults = [ - {_id: 0, a: 1, same: [{_id: 0, b: 1}, {_id: 1, b: null}, {_id: 2}]}, - {_id: 1, a: null, same: [{_id: 0, b: 1}, {_id: 1, b: null}, {_id: 2}]}, - {_id: 2, same: [{_id: 0, b: 1}, {_id: 1, b: null}, {_id: 2}]}, - {_id: 3, a: {c: 1}, same: [{_id: 3, b: {c: 1}}]} - ]; - testPipeline(pipeline, expectedResults, coll); - - // With an $unwind stage. - coll.drop(); - assert.writeOK(coll.insert({_id: 0, a: {b: 1}})); - assert.writeOK(coll.insert({_id: 1})); - - from.drop(); - assert.writeOK(from.insert({_id: 0, target: 1})); - - pipeline = [ - { - $lookup: { - localField: "a.b", - foreignField: "target", - from: "from", - as: "same.documents", - } - }, - { - // Expected input to $unwind: - // {_id: 0, a: {b: 1}, same: {documents: [{_id: 0, target: 1}]}} - // {_id: 1, same: {documents: []}} - $unwind: { - path: "$same.documents", - preserveNullAndEmptyArrays: true, - includeArrayIndex: "c.d.e", - } - } - ]; - expectedResults = [ - {_id: 0, a: {b: 1}, same: {documents: {_id: 0, target: 1}}, c: {d: {e: NumberLong(0)}}}, - {_id: 1, same: {}, c: {d: {e: null}}}, - ]; - testPipeline(pipeline, expectedResults, coll); - - // - // Query-like local fields (SERVER-21287) - // - - // This must only do an equality match rather than treating the value as a regex. - coll.drop(); - assert.writeOK(coll.insert({_id: 0, a: /a regex/})); - - from.drop(); - assert.writeOK(from.insert({_id: 0, b: /a regex/})); - assert.writeOK(from.insert({_id: 1, b: "string that matches /a regex/"})); - - pipeline = [ - { - $lookup: { - localField: "a", - foreignField: "b", - from: "from", - as: "b", - } - }, - ]; - expectedResults = [{_id: 0, a: /a regex/, b: [{_id: 0, b: /a regex/}]}]; - testPipeline(pipeline, expectedResults, coll); - - // - // A local value of an array. - // - - // Basic array corresponding to multiple documents. - coll.drop(); - assert.writeOK(coll.insert({_id: 0, a: [0, 1, 2]})); - - from.drop(); - assert.writeOK(from.insert({_id: 0})); - assert.writeOK(from.insert({_id: 1})); - - pipeline = [ - { - $lookup: { - localField: "a", - foreignField: "_id", - from: "from", - as: "b", - } - }, - ]; - expectedResults = [{_id: 0, a: [0, 1, 2], b: [{_id: 0}, {_id: 1}]}]; - testPipeline(pipeline, expectedResults, coll); - - // Basic array corresponding to a single document. - coll.drop(); - assert.writeOK(coll.insert({_id: 0, a: [1]})); - - from.drop(); - assert.writeOK(from.insert({_id: 0})); - assert.writeOK(from.insert({_id: 1})); - - pipeline = [ - { - $lookup: { - localField: "a", - foreignField: "_id", - from: "from", - as: "b", - } - }, - ]; - expectedResults = [{_id: 0, a: [1], b: [{_id: 1}]}]; - testPipeline(pipeline, expectedResults, coll); - - // Array containing regular expressions. - coll.drop(); - assert.writeOK(coll.insert({_id: 0, a: [/a regex/, /^x/]})); - assert.writeOK(coll.insert({_id: 1, a: [/^x/]})); - - from.drop(); - assert.writeOK(from.insert({_id: 0, b: "should not match a regex"})); - assert.writeOK(from.insert({_id: 1, b: "xxxx"})); - assert.writeOK(from.insert({_id: 2, b: /a regex/})); - assert.writeOK(from.insert({_id: 3, b: /^x/})); - - pipeline = [ - { - $lookup: { - localField: "a", - foreignField: "b", - from: "from", - as: "b", - } - }, - ]; - expectedResults = [ - {_id: 0, a: [/a regex/, /^x/], b: [{_id: 2, b: /a regex/}, {_id: 3, b: /^x/}]}, - {_id: 1, a: [/^x/], b: [{_id: 3, b: /^x/}]} - ]; - testPipeline(pipeline, expectedResults, coll); - - // 'localField' references a field within an array of sub-objects. - coll.drop(); - assert.writeOK(coll.insert({_id: 0, a: [{b: 1}, {b: 2}]})); - - from.drop(); - assert.writeOK(from.insert({_id: 0})); - assert.writeOK(from.insert({_id: 1})); - assert.writeOK(from.insert({_id: 2})); - assert.writeOK(from.insert({_id: 3})); - - pipeline = [ - { - $lookup: { - localField: "a.b", - foreignField: "_id", - from: "from", - as: "c", - } - }, - ]; - - expectedResults = [{"_id": 0, "a": [{"b": 1}, {"b": 2}], "c": [{"_id": 1}, {"_id": 2}]}]; - testPipeline(pipeline, expectedResults, coll); - - // - // Pipeline syntax using 'let' variables. - // - coll.drop(); - assert.writeOK(coll.insert({_id: 1, x: 1})); - assert.writeOK(coll.insert({_id: 2, x: 2})); - assert.writeOK(coll.insert({_id: 3, x: 3})); - - from.drop(); - assert.writeOK(from.insert({_id: 1})); - assert.writeOK(from.insert({_id: 2})); - assert.writeOK(from.insert({_id: 3})); - - // Basic non-equi theta join via $project. - pipeline = [ - { - $lookup: { - let : {var1: "$_id"}, - pipeline: [ - {$project: {isMatch: {$gt: ["$$var1", "$_id"]}}}, - {$match: {isMatch: true}}, - {$project: {isMatch: 0}} - ], - from: "from", - as: "c", - } - }, - ]; - - expectedResults = [ - {"_id": 1, x: 1, "c": []}, - {"_id": 2, x: 2, "c": [{"_id": 1}]}, - { - "_id": 3, - x: 3, - "c": [ - {"_id": 1}, - { - "_id": 2, - } - ] - } - ]; - testPipeline(pipeline, expectedResults, coll); - - // Basic non-equi theta join via $match. - pipeline = [ - { - $lookup: { - let : {var1: "$_id"}, - pipeline: [ - {$match: {$expr: {$lt: ["$_id", "$$var1"]}}}, - ], - from: "from", - as: "c", - } - }, - ]; - - expectedResults = [ - {"_id": 1, x: 1, "c": []}, - {"_id": 2, x: 2, "c": [{"_id": 1}]}, - { - "_id": 3, - x: 3, - "c": [ - {"_id": 1}, - { - "_id": 2, - } - ] - } - ]; - testPipeline(pipeline, expectedResults, coll); - - // Multi-level join using $match. - pipeline = [ - { - $lookup: { - let : {var1: "$_id"}, - pipeline: [ - {$match: {$expr: {$eq: ["$_id", "$$var1"]}}}, - { - $lookup: { - let : {var2: "$_id"}, - pipeline: [ - {$match: {$expr: {$gt: ["$_id", "$$var2"]}}}, - ], - from: "from", - as: "d" - } - }, - ], - from: "from", - as: "c", - } - }, - ]; - - expectedResults = [ - {"_id": 1, "x": 1, "c": [{"_id": 1, "d": [{"_id": 2}, {"_id": 3}]}]}, - {"_id": 2, "x": 2, "c": [{"_id": 2, "d": [{"_id": 3}]}]}, - {"_id": 3, "x": 3, "c": [{"_id": 3, "d": []}]} - ]; - testPipeline(pipeline, expectedResults, coll); - - // Equijoin with $match that can't be delegated to the query subsystem. - pipeline = [ - { - $lookup: { - let : {var1: "$x"}, - pipeline: [ - {$addFields: {newField: 2}}, - {$match: {$expr: {$eq: ["$newField", "$$var1"]}}}, - {$project: {newField: 0}} - ], - from: "from", - as: "c", - } - }, - ]; - - expectedResults = [ - {"_id": 1, "x": 1, "c": []}, - {"_id": 2, "x": 2, "c": [{"_id": 1}, {"_id": 2}, {"_id": 3}]}, - {"_id": 3, "x": 3, "c": []} - ]; - testPipeline(pipeline, expectedResults, coll); - - // Multiple variables. - pipeline = [ - { - $lookup: { - let : {var1: "$_id", var2: "$x"}, - pipeline: [ - { - $project: { - isMatch: {$gt: ["$$var1", "$_id"]}, - var2Times2: {$multiply: [2, "$$var2"]} - } - }, - {$match: {isMatch: true}}, - {$project: {isMatch: 0}} - ], - from: "from", - as: "c", - }, - }, - {$project: {x: 1, c: 1}} - ]; - - expectedResults = [ - {"_id": 1, x: 1, "c": []}, - {"_id": 2, x: 2, "c": [{"_id": 1, var2Times2: 4}]}, - {"_id": 3, x: 3, "c": [{"_id": 1, var2Times2: 6}, {"_id": 2, var2Times2: 6}]} - ]; - testPipeline(pipeline, expectedResults, coll); - - // Let var as complex expression object. - pipeline = [ - { - $lookup: { - let : {var1: {$mod: ["$x", 3]}}, - pipeline: [ - {$project: {var1Mod3TimesForeignId: {$multiply: ["$$var1", "$_id"]}}}, - ], - from: "from", - as: "c", - } - }, - ]; - - expectedResults = [ - { - "_id": 1, - x: 1, - "c": [ - {_id: 1, var1Mod3TimesForeignId: 1}, - {_id: 2, var1Mod3TimesForeignId: 2}, - {_id: 3, var1Mod3TimesForeignId: 3} - ] - }, - { - "_id": 2, - x: 2, - "c": [ - {_id: 1, var1Mod3TimesForeignId: 2}, - {_id: 2, var1Mod3TimesForeignId: 4}, - {_id: 3, var1Mod3TimesForeignId: 6} - ] - }, - { - "_id": 3, - x: 3, - "c": [ - {_id: 1, var1Mod3TimesForeignId: 0}, - {_id: 2, var1Mod3TimesForeignId: 0}, - {_id: 3, var1Mod3TimesForeignId: 0} - ] - } - ]; - testPipeline(pipeline, expectedResults, coll); - - // 'let' defined variables are available to all nested sub-pipelines. - pipeline = [ - {$match: {_id: 1}}, - { - $lookup: { - let : {var1: "ABC", var2: "123"}, - pipeline: [ - {$match: {_id: 1}}, - { - $lookup: { - pipeline: [ - {$match: {_id: 2}}, - {$addFields: {letVar1: "$$var1"}}, - { - $lookup: { - let : {var3: "XYZ"}, - pipeline: [{ - $addFields: { - mergedLetVars: - {$concat: ["$$var1", "$$var2", "$$var3"]} - } - }], - from: "from", - as: "join3" - } - }, - ], - from: "from", - as: "join2" - } - }, - ], - from: "from", - as: "join1", - } - } - ]; - - expectedResults = [{ - "_id": 1, - "x": 1, - "join1": [{ - "_id": 1, - "join2": [{ - "_id": 2, - "letVar1": "ABC", - "join3": [ - {"_id": 1, "mergedLetVars": "ABC123XYZ"}, - {"_id": 2, "mergedLetVars": "ABC123XYZ"}, - {"_id": 3, "mergedLetVars": "ABC123XYZ"} - ] - }] - }] - }]; - testPipeline(pipeline, expectedResults, coll); - - // 'let' variable shadowed by foreign pipeline variable. - pipeline = [ - {$match: {_id: 2}}, - { - $lookup: { - let : {var1: "$_id"}, - pipeline: [ - { - $project: { - shadowedVar: {$let: {vars: {var1: "abc"}, in : "$$var1"}}, - originalVar: "$$var1" - } - }, - { - $lookup: { - pipeline: [{ - $project: { - shadowedVar: {$let: {vars: {var1: "xyz"}, in : "$$var1"}}, - originalVar: "$$var1" - } - }], - from: "from", - as: "d" - } - } - ], - from: "from", - as: "c", - } - } - ]; - - expectedResults = [{ - "_id": 2, - "x": 2, - "c": [ - { - "_id": 1, - "shadowedVar": "abc", - "originalVar": 2, - "d": [ - {"_id": 1, "shadowedVar": "xyz", "originalVar": 2}, - {"_id": 2, "shadowedVar": "xyz", "originalVar": 2}, - {"_id": 3, "shadowedVar": "xyz", "originalVar": 2} - ] - }, - { - "_id": 2, - "shadowedVar": "abc", - "originalVar": 2, - "d": [ - {"_id": 1, "shadowedVar": "xyz", "originalVar": 2}, - {"_id": 2, "shadowedVar": "xyz", "originalVar": 2}, - {"_id": 3, "shadowedVar": "xyz", "originalVar": 2} - ] - }, - { - "_id": 3, - "shadowedVar": "abc", - "originalVar": 2, - "d": [ - {"_id": 1, "shadowedVar": "xyz", "originalVar": 2}, - {"_id": 2, "shadowedVar": "xyz", "originalVar": 2}, - {"_id": 3, "shadowedVar": "xyz", "originalVar": 2} - ] - } - ] - }]; - testPipeline(pipeline, expectedResults, coll); - - // Use of undefined variable fails. - assertErrorCode(coll, - [{ - $lookup: { - from: "from", - as: "as", - let : {var1: "$x"}, - pipeline: [{$project: {myVar: "$$nonExistent"}}] - } - }], - 17276); - - // The dotted path offset of a non-object variable is equivalent referencing an undefined - // field. - pipeline = [ - { - $lookup: { - let : {var1: "$x"}, - pipeline: [ - { - $match: { - $expr: { - $eq: [ - "FIELD-IS-NULL", - {$ifNull: ["$$var1.y.z", "FIELD-IS-NULL"]} - ] - } - } - }, - ], - from: "from", - as: "as", - } - }, - {$project: {_id: 0}} - ]; - - expectedResults = [ - {"x": 1, "as": [{"_id": 1}, {"_id": 2}, {"_id": 3}]}, - {"x": 2, "as": [{"_id": 1}, {"_id": 2}, {"_id": 3}]}, - {"x": 3, "as": [{"_id": 1}, {"_id": 2}, {"_id": 3}]} - ]; - testPipeline(pipeline, expectedResults, coll); - - // Comparison where a 'let' variable references an array. - coll.drop(); - assert.writeOK(coll.insert({x: [1, 2, 3]})); - - pipeline = [ - { - $lookup: { - let : {var1: "$x"}, - pipeline: [ - {$match: {$expr: {$eq: ["$$var1", [1, 2, 3]]}}}, - ], - from: "from", - as: "as", - } - }, - {$project: {_id: 0}} - ]; - - expectedResults = [{"x": [1, 2, 3], "as": [{"_id": 1}, {"_id": 2}, {"_id": 3}]}]; - testPipeline(pipeline, expectedResults, coll); - - // - // Pipeline syntax with nested object. - // - coll.drop(); - assert.writeOK(coll.insert({x: {y: {z: 10}}})); - - // Subfields of 'let' variables can be referenced via dotted path. - pipeline = [ - { - $lookup: { - let : {var1: "$x"}, - pipeline: [ - {$project: {z: "$$var1.y.z"}}, - ], - from: "from", - as: "as", - } - }, - {$project: {_id: 0}} - ]; - - expectedResults = [{ - "x": {"y": {"z": 10}}, - "as": [{"_id": 1, "z": 10}, {"_id": 2, "z": 10}, {"_id": 3, "z": 10}] - }]; - testPipeline(pipeline, expectedResults, coll); - - // 'let' variable with dotted field path off of $$ROOT. - pipeline = [ - { - $lookup: { - let : {var1: "$$ROOT.x.y.z"}, - pipeline: - [{$match: {$expr: {$eq: ["$$var1", "$$ROOT.x.y.z"]}}}, {$project: {_id: 0}}], - from: "lookUp", - as: "as", - } - }, - {$project: {_id: 0}} - ]; - - expectedResults = [{"x": {"y": {"z": 10}}, "as": [{"x": {"y": {"z": 10}}}]}]; - testPipeline(pipeline, expectedResults, coll); - - // 'let' variable with dotted field path off of $$CURRENT. - pipeline = [ - { - $lookup: { - let : {var1: "$$CURRENT.x.y.z"}, - pipeline: [ - {$match: {$expr: {$eq: ["$$var1", "$$CURRENT.x.y.z"]}}}, - {$project: {_id: 0}} - ], - from: "lookUp", - as: "as", - } - }, - {$project: {_id: 0}} - ]; - - expectedResults = [{"x": {"y": {"z": 10}}, "as": [{"x": {"y": {"z": 10}}}]}]; - testPipeline(pipeline, expectedResults, coll); - - // - // Pipeline syntax with nested $lookup. - // - coll.drop(); - assert.writeOK(coll.insert({_id: 1, w: 1})); - assert.writeOK(coll.insert({_id: 2, w: 2})); - assert.writeOK(coll.insert({_id: 3, w: 3})); - - from.drop(); - assert.writeOK(from.insert({_id: 1, x: 1})); - assert.writeOK(from.insert({_id: 2, x: 2})); - assert.writeOK(from.insert({_id: 3, x: 3})); - - thirdColl.drop(); - assert.writeOK(thirdColl.insert({_id: 1, y: 1})); - assert.writeOK(thirdColl.insert({_id: 2, y: 2})); - assert.writeOK(thirdColl.insert({_id: 3, y: 3})); - - fourthColl.drop(); - assert.writeOK(fourthColl.insert({_id: 1, z: 1})); - assert.writeOK(fourthColl.insert({_id: 2, z: 2})); - assert.writeOK(fourthColl.insert({_id: 3, z: 3})); - - // Nested $lookup pipeline. - pipeline = [ - {$match: {_id: 1}}, - { - $lookup: { - pipeline: [ - {$match: {_id: 2}}, - { - $lookup: { - pipeline: [ - {$match: {_id: 3}}, - { - $lookup: { - pipeline: [ - {$match: {_id: 1}}, - ], - from: "fourthColl", - as: "thirdLookup" - } - }, - ], - from: "thirdColl", - as: "secondLookup" - } - }, - ], - from: "from", - as: "firstLookup", - } - } - ]; - - expectedResults = [{ - "_id": 1, - "w": 1, - "firstLookup": [{ - "_id": 2, - x: 2, "secondLookup": [{"_id": 3, y: 3, "thirdLookup": [{_id: 1, z: 1}]}] - }] - }]; - testPipeline(pipeline, expectedResults, coll); - - // Deeply nested $lookup pipeline. Confirm that we can execute an aggregation with nested - // $lookup sub-pipelines up to the maximum depth, but not beyond. - let nestedPipeline = generateNestedPipeline("lookup", 20); - assert.commandWorked(coll.getDB().runCommand( - {aggregate: coll.getName(), pipeline: nestedPipeline, cursor: {}})); - - nestedPipeline = generateNestedPipeline("lookup", 21); - assertErrorCode(coll, nestedPipeline, ErrorCodes.MaxSubPipelineDepthExceeded); - - // Confirm that maximum $lookup sub-pipeline depth is respected when aggregating views whose - // combined nesting depth exceeds the limit. - nestedPipeline = generateNestedPipeline("lookup", 10); - coll.getDB().view1.drop(); - assert.commandWorked( - coll.getDB().runCommand({create: "view1", viewOn: "lookup", pipeline: nestedPipeline})); - - nestedPipeline = generateNestedPipeline("view1", 10); - coll.getDB().view2.drop(); - assert.commandWorked( - coll.getDB().runCommand({create: "view2", viewOn: "view1", pipeline: nestedPipeline})); - - // Confirm that a composite sub-pipeline depth of 20 is allowed. - assert.commandWorked( - coll.getDB().runCommand({aggregate: "view2", pipeline: [], cursor: {}})); - - const pipelineWhichExceedsNestingLimit = generateNestedPipeline("view2", 1); - coll.getDB().view3.drop(); - assert.commandWorked(coll.getDB().runCommand( - {create: "view3", viewOn: "view2", pipeline: pipelineWhichExceedsNestingLimit})); - - // Confirm that a composite sub-pipeline depth greater than 20 fails. - assertErrorCode(coll.getDB().view3, [], ErrorCodes.MaxSubPipelineDepthExceeded); - - // - // Error cases. - // - - // 'from', 'as', 'localField' and 'foreignField' must all be specified when run with - // localField/foreignField syntax. - assertErrorCode(coll, - [{$lookup: {foreignField: "b", from: "from", as: "same"}}], - ErrorCodes.FailedToParse); - assertErrorCode(coll, - [{$lookup: {localField: "a", from: "from", as: "same"}}], - ErrorCodes.FailedToParse); - assertErrorCode(coll, - [{$lookup: {localField: "a", foreignField: "b", as: "same"}}], - ErrorCodes.FailedToParse); - assertErrorCode(coll, - [{$lookup: {localField: "a", foreignField: "b", from: "from"}}], - ErrorCodes.FailedToParse); - - // localField/foreignField and pipeline/let syntax must not be mixed. - assertErrorCode(coll, - [{$lookup: {pipeline: [], foreignField: "b", from: "from", as: "as"}}], - ErrorCodes.FailedToParse); - assertErrorCode(coll, - [{$lookup: {pipeline: [], localField: "b", from: "from", as: "as"}}], - ErrorCodes.FailedToParse); - assertErrorCode( - coll, - [{$lookup: {pipeline: [], localField: "b", foreignField: "b", from: "from", as: "as"}}], - ErrorCodes.FailedToParse); - assertErrorCode(coll, - [{$lookup: {let : {a: "$b"}, foreignField: "b", from: "from", as: "as"}}], - ErrorCodes.FailedToParse); - assertErrorCode(coll, - [{$lookup: {let : {a: "$b"}, localField: "b", from: "from", as: "as"}}], - ErrorCodes.FailedToParse); - assertErrorCode( - coll, - [{ - $lookup: - {let : {a: "$b"}, localField: "b", foreignField: "b", from: "from", as: "as"} - }], - ErrorCodes.FailedToParse); - - // 'from', 'as', 'localField' and 'foreignField' must all be of type string. - assertErrorCode(coll, - [{$lookup: {localField: 1, foreignField: "b", from: "from", as: "as"}}], - ErrorCodes.FailedToParse); - assertErrorCode(coll, - [{$lookup: {localField: "a", foreignField: 1, from: "from", as: "as"}}], - ErrorCodes.FailedToParse); - assertErrorCode(coll, - [{$lookup: {localField: "a", foreignField: "b", from: 1, as: "as"}}], - ErrorCodes.FailedToParse); - assertErrorCode(coll, - [{$lookup: {localField: "a", foreignField: "b", from: "from", as: 1}}], - ErrorCodes.FailedToParse); - - // 'pipeline' and 'let' must be of expected type. - assertErrorCode( - coll, [{$lookup: {pipeline: 1, from: "from", as: "as"}}], ErrorCodes.TypeMismatch); - assertErrorCode( - coll, [{$lookup: {pipeline: {}, from: "from", as: "as"}}], ErrorCodes.TypeMismatch); - assertErrorCode(coll, - [{$lookup: {let : 1, pipeline: [], from: "from", as: "as"}}], - ErrorCodes.FailedToParse); - assertErrorCode(coll, - [{$lookup: {let : [], pipeline: [], from: "from", as: "as"}}], - ErrorCodes.FailedToParse); - - // The foreign collection must be a valid namespace. - assertErrorCode(coll, - [{$lookup: {localField: "a", foreignField: "b", from: "", as: "as"}}], - ErrorCodes.InvalidNamespace); - // $lookup's field must be an object. - assertErrorCode(coll, [{$lookup: "string"}], ErrorCodes.FailedToParse); - } - - // Run tests on single node. - db.lookUp.drop(); - db.from.drop(); - db.thirdColl.drop(); - db.fourthColl.drop(); - runTest(db.lookUp, db.from, db.thirdColl, db.fourthColl); - - // Run tests in a sharded environment. - var sharded = new ShardingTest({shards: 2, mongos: 1}); - assert(sharded.adminCommand({enableSharding: "test"})); - sharded.getDB('test').lookUp.drop(); - sharded.getDB('test').from.drop(); - sharded.getDB('test').thirdColl.drop(); - sharded.getDB('test').fourthColl.drop(); - assert(sharded.adminCommand({shardCollection: "test.lookUp", key: {_id: 'hashed'}})); - runTest(sharded.getDB('test').lookUp, - sharded.getDB('test').from, - sharded.getDB('test').thirdColl, - sharded.getDB('test').fourthColl); - - // An error is thrown if the from collection is sharded. - assert(sharded.adminCommand({shardCollection: "test.from", key: {_id: 1}})); - assertErrorCode(sharded.getDB('test').lookUp, - [{$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}], - 28769); - - // An error is thrown if nested $lookup from collection is sharded. - assert(sharded.adminCommand({shardCollection: "test.fourthColl", key: {_id: 1}})); - assertErrorCode(sharded.getDB('test').lookUp, - [{ - $lookup: { - pipeline: [{$lookup: {pipeline: [], from: "fourthColl", as: "same"}}], - from: "thirdColl", - as: "same" - } - }], - 28769); - - // Test that a $lookup from an unsharded collection followed by an $out to a sharded collection - // is allowed. - const sourceColl = sharded.getDB("test").lookUp; - sourceColl.drop(); - assert(sharded.adminCommand({shardCollection: sourceColl.getFullName(), key: {_id: "hashed"}})); - assert.commandWorked(sourceColl.insert({_id: 0, a: 0})); - - const outColl = sharded.getDB("test").out; - outColl.drop(); - assert(sharded.adminCommand({shardCollection: outColl.getFullName(), key: {_id: "hashed"}})); - - const fromColl = sharded.getDB("test").from; - fromColl.drop(); - assert.commandWorked(fromColl.insert({_id: 0, b: 0})); - - sourceColl.aggregate([ - {$lookup: {localField: "a", foreignField: "b", from: fromColl.getName(), as: "same"}}, - {$out: {to: outColl.getName(), mode: "insertDocuments"}} - ]); - - assert.eq([{a: 0, same: [{_id: 0, b: 0}]}], outColl.find({}, {_id: 0}).toArray()); - - sharded.stop(); -}()); diff --git a/jstests/aggregation/sources/lookup/lookup_subpipeline.js b/jstests/aggregation/sources/lookup/lookup_subpipeline.js new file mode 100644 index 00000000000..abffadf4c0b --- /dev/null +++ b/jstests/aggregation/sources/lookup/lookup_subpipeline.js @@ -0,0 +1,604 @@ +// Tests for the $lookup stage with a sub-pipeline. +(function() { + "use strict"; + + load("jstests/aggregation/extras/utils.js"); // For assertErrorCode. + + const testName = "lookup_subpipeline"; + + const coll = db.lookUp; + const from = db.from; + const thirdColl = db.thirdColl; + const fourthColl = db.fourthColl; + + // Used by testPipeline to sort result documents. All _ids must be primitives. + function compareId(a, b) { + if (a._id < b._id) { + return -1; + } + if (a._id > b._id) { + return 1; + } + return 0; + } + + function generateNestedPipeline(foreignCollName, numLevels) { + let pipeline = [{"$lookup": {pipeline: [], from: foreignCollName, as: "same"}}]; + + for (let level = 1; level < numLevels; level++) { + pipeline = [{"$lookup": {pipeline: pipeline, from: foreignCollName, as: "same"}}]; + } + + return pipeline; + } + + // Helper for testing that pipeline returns correct set of results. + function testPipeline(pipeline, expectedResult, collection) { + assert.eq(collection.aggregate(pipeline).toArray().sort(compareId), + expectedResult.sort(compareId)); + } + + // + // Pipeline syntax using 'let' variables. + // + coll.drop(); + assert.writeOK(coll.insert({_id: 1, x: 1})); + assert.writeOK(coll.insert({_id: 2, x: 2})); + assert.writeOK(coll.insert({_id: 3, x: 3})); + + from.drop(); + assert.writeOK(from.insert({_id: 1})); + assert.writeOK(from.insert({_id: 2})); + assert.writeOK(from.insert({_id: 3})); + + // Basic non-equi theta join via $project. + let pipeline = [ + { + $lookup: { + let : {var1: "$_id"}, + pipeline: [ + {$project: {isMatch: {$gt: ["$$var1", "$_id"]}}}, + {$match: {isMatch: true}}, + {$project: {isMatch: 0}} + ], + from: "from", + as: "c", + } + }, + ]; + + let expectedResults = [ + {"_id": 1, x: 1, "c": []}, + {"_id": 2, x: 2, "c": [{"_id": 1}]}, + { + "_id": 3, + x: 3, + "c": [ + {"_id": 1}, + { + "_id": 2, + } + ] + } + ]; + testPipeline(pipeline, expectedResults, coll); + // Basic non-equi theta join via $match. + pipeline = [ + { + $lookup: { + let : {var1: "$_id"}, + pipeline: [ + {$match: {$expr: {$lt: ["$_id", "$$var1"]}}}, + ], + from: "from", + as: "c", + } + }, + ]; + + expectedResults = [ + {"_id": 1, x: 1, "c": []}, + {"_id": 2, x: 2, "c": [{"_id": 1}]}, + { + "_id": 3, + x: 3, + "c": [ + {"_id": 1}, + { + "_id": 2, + } + ] + } + ]; + testPipeline(pipeline, expectedResults, coll); + + // Multi-level join using $match. + pipeline = [ + { + $lookup: { + let : {var1: "$_id"}, + pipeline: [ + {$match: {$expr: {$eq: ["$_id", "$$var1"]}}}, + { + $lookup: { + let : {var2: "$_id"}, + pipeline: [ + {$match: {$expr: {$gt: ["$_id", "$$var2"]}}}, + ], + from: "from", + as: "d" + } + }, + ], + from: "from", + as: "c", + } + }, + ]; + + expectedResults = [ + {"_id": 1, "x": 1, "c": [{"_id": 1, "d": [{"_id": 2}, {"_id": 3}]}]}, + {"_id": 2, "x": 2, "c": [{"_id": 2, "d": [{"_id": 3}]}]}, + {"_id": 3, "x": 3, "c": [{"_id": 3, "d": []}]} + ]; + testPipeline(pipeline, expectedResults, coll); + + // Equijoin with $match that can't be delegated to the query subsystem. + pipeline = [ + { + $lookup: { + let : {var1: "$x"}, + pipeline: [ + {$addFields: {newField: 2}}, + {$match: {$expr: {$eq: ["$newField", "$$var1"]}}}, + {$project: {newField: 0}} + ], + from: "from", + as: "c", + } + }, + ]; + + expectedResults = [ + {"_id": 1, "x": 1, "c": []}, + {"_id": 2, "x": 2, "c": [{"_id": 1}, {"_id": 2}, {"_id": 3}]}, + {"_id": 3, "x": 3, "c": []} + ]; + testPipeline(pipeline, expectedResults, coll); + + // Multiple variables. + pipeline = [ + { + $lookup: { + let : {var1: "$_id", var2: "$x"}, + pipeline: [ + { + $project: { + isMatch: {$gt: ["$$var1", "$_id"]}, + var2Times2: {$multiply: [2, "$$var2"]} + } + }, + {$match: {isMatch: true}}, + {$project: {isMatch: 0}} + ], + from: "from", + as: "c", + }, + }, + {$project: {x: 1, c: 1}} + ]; + + expectedResults = [ + {"_id": 1, x: 1, "c": []}, + {"_id": 2, x: 2, "c": [{"_id": 1, var2Times2: 4}]}, + {"_id": 3, x: 3, "c": [{"_id": 1, var2Times2: 6}, {"_id": 2, var2Times2: 6}]} + ]; + testPipeline(pipeline, expectedResults, coll); + + // Let var as complex expression object. + pipeline = [ + { + $lookup: { + let : {var1: {$mod: ["$x", 3]}}, + pipeline: [ + {$project: {var1Mod3TimesForeignId: {$multiply: ["$$var1", "$_id"]}}}, + ], + from: "from", + as: "c", + } + }, + ]; + + expectedResults = [ + { + "_id": 1, + x: 1, + "c": [ + {_id: 1, var1Mod3TimesForeignId: 1}, + {_id: 2, var1Mod3TimesForeignId: 2}, + {_id: 3, var1Mod3TimesForeignId: 3} + ] + }, + { + "_id": 2, + x: 2, + "c": [ + {_id: 1, var1Mod3TimesForeignId: 2}, + {_id: 2, var1Mod3TimesForeignId: 4}, + {_id: 3, var1Mod3TimesForeignId: 6} + ] + }, + { + "_id": 3, + x: 3, + "c": [ + {_id: 1, var1Mod3TimesForeignId: 0}, + {_id: 2, var1Mod3TimesForeignId: 0}, + {_id: 3, var1Mod3TimesForeignId: 0} + ] + } + ]; + testPipeline(pipeline, expectedResults, coll); + + // 'let' defined variables are available to all nested sub-pipelines. + pipeline = [ + {$match: {_id: 1}}, + { + $lookup: { + let : {var1: "ABC", var2: "123"}, + pipeline: [ + {$match: {_id: 1}}, + { + $lookup: { + pipeline: [ + {$match: {_id: 2}}, + {$addFields: {letVar1: "$$var1"}}, + { + $lookup: { + let : {var3: "XYZ"}, + pipeline: [{ + $addFields: { + mergedLetVars: + {$concat: ["$$var1", "$$var2", "$$var3"]} + } + }], + from: "from", + as: "join3" + } + }, + ], + from: "from", + as: "join2" + } + }, + ], + from: "from", + as: "join1", + } + } + ]; + + expectedResults = [{ + "_id": 1, + "x": 1, + "join1": [{ + "_id": 1, + "join2": [{ + "_id": 2, + "letVar1": "ABC", + "join3": [ + {"_id": 1, "mergedLetVars": "ABC123XYZ"}, + {"_id": 2, "mergedLetVars": "ABC123XYZ"}, + {"_id": 3, "mergedLetVars": "ABC123XYZ"} + ] + }] + }] + }]; + testPipeline(pipeline, expectedResults, coll); + + // 'let' variable shadowed by foreign pipeline variable. + pipeline = [ + {$match: {_id: 2}}, + { + $lookup: { + let : {var1: "$_id"}, + pipeline: [ + { + $project: { + shadowedVar: {$let: {vars: {var1: "abc"}, in : "$$var1"}}, + originalVar: "$$var1" + } + }, + { + $lookup: { + pipeline: [{ + $project: { + shadowedVar: {$let: {vars: {var1: "xyz"}, in : "$$var1"}}, + originalVar: "$$var1" + } + }], + from: "from", + as: "d" + } + } + ], + from: "from", + as: "c", + } + } + ]; + + expectedResults = [{ + "_id": 2, + "x": 2, + "c": [ + { + "_id": 1, + "shadowedVar": "abc", + "originalVar": 2, + "d": [ + {"_id": 1, "shadowedVar": "xyz", "originalVar": 2}, + {"_id": 2, "shadowedVar": "xyz", "originalVar": 2}, + {"_id": 3, "shadowedVar": "xyz", "originalVar": 2} + ] + }, + { + "_id": 2, + "shadowedVar": "abc", + "originalVar": 2, + "d": [ + {"_id": 1, "shadowedVar": "xyz", "originalVar": 2}, + {"_id": 2, "shadowedVar": "xyz", "originalVar": 2}, + {"_id": 3, "shadowedVar": "xyz", "originalVar": 2} + ] + }, + { + "_id": 3, + "shadowedVar": "abc", + "originalVar": 2, + "d": [ + {"_id": 1, "shadowedVar": "xyz", "originalVar": 2}, + {"_id": 2, "shadowedVar": "xyz", "originalVar": 2}, + {"_id": 3, "shadowedVar": "xyz", "originalVar": 2} + ] + } + ] + }]; + testPipeline(pipeline, expectedResults, coll); + + // Use of undefined variable fails. + assertErrorCode(coll, + [{ + $lookup: { + from: "from", + as: "as", + let : {var1: "$x"}, + pipeline: [{$project: {myVar: "$$nonExistent"}}] + } + }], + 17276); + + // The dotted path offset of a non-object variable is equivalent referencing an undefined + // field. + pipeline = [ + { + $lookup: { + let : {var1: "$x"}, + pipeline: [ + { + $match: { + $expr: + {$eq: ["FIELD-IS-NULL", {$ifNull: ["$$var1.y.z", "FIELD-IS-NULL"]}]} + } + }, + ], + from: "from", + as: "as", + } + }, + {$project: {_id: 0}}, + {$sort: {x: 1}} + ]; + + expectedResults = [ + {"x": 1, "as": [{"_id": 1}, {"_id": 2}, {"_id": 3}]}, + {"x": 2, "as": [{"_id": 1}, {"_id": 2}, {"_id": 3}]}, + {"x": 3, "as": [{"_id": 1}, {"_id": 2}, {"_id": 3}]} + ]; + testPipeline(pipeline, expectedResults, coll); + + // Comparison where a 'let' variable references an array. + coll.drop(); + assert.writeOK(coll.insert({x: [1, 2, 3]})); + + pipeline = [ + { + $lookup: { + let : {var1: "$x"}, + pipeline: [ + {$match: {$expr: {$eq: ["$$var1", [1, 2, 3]]}}}, + ], + from: "from", + as: "as", + } + }, + {$project: {_id: 0}} + ]; + + expectedResults = [{"x": [1, 2, 3], "as": [{"_id": 1}, {"_id": 2}, {"_id": 3}]}]; + testPipeline(pipeline, expectedResults, coll); + + // + // Pipeline syntax with nested object. + // + coll.drop(); + assert.writeOK(coll.insert({x: {y: {z: 10}}})); + + // Subfields of 'let' variables can be referenced via dotted path. + pipeline = [ + { + $lookup: { + let : {var1: "$x"}, + pipeline: [ + {$project: {z: "$$var1.y.z"}}, + ], + from: "from", + as: "as", + } + }, + {$project: {_id: 0}} + ]; + + expectedResults = [{ + "x": {"y": {"z": 10}}, + "as": [{"_id": 1, "z": 10}, {"_id": 2, "z": 10}, {"_id": 3, "z": 10}] + }]; + testPipeline(pipeline, expectedResults, coll); + + // 'let' variable with dotted field path off of $$ROOT. + pipeline = [ + { + $lookup: { + let : {var1: "$$ROOT.x.y.z"}, + pipeline: + [{$match: {$expr: {$eq: ["$$var1", "$$ROOT.x.y.z"]}}}, {$project: {_id: 0}}], + from: "lookUp", + as: "as", + } + }, + {$project: {_id: 0}} + ]; + + expectedResults = [{"x": {"y": {"z": 10}}, "as": [{"x": {"y": {"z": 10}}}]}]; + testPipeline(pipeline, expectedResults, coll); + + // 'let' variable with dotted field path off of $$CURRENT. + pipeline = [ + { + $lookup: { + let : {var1: "$$CURRENT.x.y.z"}, + pipeline: + [{$match: {$expr: {$eq: ["$$var1", "$$CURRENT.x.y.z"]}}}, {$project: {_id: 0}}], + from: "lookUp", + as: "as", + } + }, + {$project: {_id: 0}} + ]; + + expectedResults = [{"x": {"y": {"z": 10}}, "as": [{"x": {"y": {"z": 10}}}]}]; + testPipeline(pipeline, expectedResults, coll); + + // + // Pipeline syntax with nested $lookup. + // + coll.drop(); + assert.writeOK(coll.insert({_id: 1, w: 1})); + assert.writeOK(coll.insert({_id: 2, w: 2})); + assert.writeOK(coll.insert({_id: 3, w: 3})); + + from.drop(); + assert.writeOK(from.insert({_id: 1, x: 1})); + assert.writeOK(from.insert({_id: 2, x: 2})); + assert.writeOK(from.insert({_id: 3, x: 3})); + + thirdColl.drop(); + assert.writeOK(thirdColl.insert({_id: 1, y: 1})); + assert.writeOK(thirdColl.insert({_id: 2, y: 2})); + assert.writeOK(thirdColl.insert({_id: 3, y: 3})); + + fourthColl.drop(); + assert.writeOK(fourthColl.insert({_id: 1, z: 1})); + assert.writeOK(fourthColl.insert({_id: 2, z: 2})); + assert.writeOK(fourthColl.insert({_id: 3, z: 3})); + + // Nested $lookup pipeline. + pipeline = [ + {$match: {_id: 1}}, + { + $lookup: { + pipeline: [ + {$match: {_id: 2}}, + { + $lookup: { + pipeline: [ + {$match: {_id: 3}}, + { + $lookup: { + pipeline: [ + {$match: {_id: 1}}, + ], + from: "fourthColl", + as: "thirdLookup" + } + }, + ], + from: "thirdColl", + as: "secondLookup" + } + }, + ], + from: "from", + as: "firstLookup", + } + } + ]; + + expectedResults = [{ + "_id": 1, + "w": 1, + "firstLookup": [{ + "_id": 2, + x: 2, "secondLookup": [{"_id": 3, y: 3, "thirdLookup": [{_id: 1, z: 1}]}] + }] + }]; + testPipeline(pipeline, expectedResults, coll); + + // Deeply nested $lookup pipeline. Confirm that we can execute an aggregation with nested + // $lookup sub-pipelines up to the maximum depth, but not beyond. + let nestedPipeline = generateNestedPipeline("lookup", 20); + assert.commandWorked( + coll.getDB().runCommand({aggregate: coll.getName(), pipeline: nestedPipeline, cursor: {}})); + + nestedPipeline = generateNestedPipeline("lookup", 21); + assertErrorCode(coll, nestedPipeline, ErrorCodes.MaxSubPipelineDepthExceeded); + + // Confirm that maximum $lookup sub-pipeline depth is respected when aggregating views whose + // combined nesting depth exceeds the limit. + nestedPipeline = generateNestedPipeline("lookup", 10); + coll.getDB().view1.drop(); + assert.commandWorked( + coll.getDB().runCommand({create: "view1", viewOn: "lookup", pipeline: nestedPipeline})); + + nestedPipeline = generateNestedPipeline("view1", 10); + coll.getDB().view2.drop(); + assert.commandWorked( + coll.getDB().runCommand({create: "view2", viewOn: "view1", pipeline: nestedPipeline})); + + // Confirm that a composite sub-pipeline depth of 20 is allowed. + assert.commandWorked(coll.getDB().runCommand({aggregate: "view2", pipeline: [], cursor: {}})); + + const pipelineWhichExceedsNestingLimit = generateNestedPipeline("view2", 1); + coll.getDB().view3.drop(); + assert.commandWorked(coll.getDB().runCommand( + {create: "view3", viewOn: "view2", pipeline: pipelineWhichExceedsNestingLimit})); + + // + // Error cases. + // + + // Confirm that a composite sub-pipeline depth greater than 20 fails. + assertErrorCode(coll.getDB().view3, [], ErrorCodes.MaxSubPipelineDepthExceeded); + + // 'pipeline' and 'let' must be of expected type. + assertErrorCode( + coll, [{$lookup: {pipeline: 1, from: "from", as: "as"}}], ErrorCodes.TypeMismatch); + assertErrorCode( + coll, [{$lookup: {pipeline: {}, from: "from", as: "as"}}], ErrorCodes.TypeMismatch); + assertErrorCode(coll, + [{$lookup: {let : 1, pipeline: [], from: "from", as: "as"}}], + ErrorCodes.FailedToParse); + assertErrorCode(coll, + [{$lookup: {let : [], pipeline: [], from: "from", as: "as"}}], + ErrorCodes.FailedToParse); +}()); diff --git a/jstests/sharding/collation_lookup.js b/jstests/sharding/collation_lookup.js new file mode 100644 index 00000000000..f06e92ab3fc --- /dev/null +++ b/jstests/sharding/collation_lookup.js @@ -0,0 +1,454 @@ +/** + * Tests that the $lookup stage respects the collation when the local and/or foreign collections + * are sharded. + * + * The comparison of string values between the 'localField' and 'foreignField' should use the + * collation either explicitly set on the aggregation operation, or the collation inherited from the + * collection the "aggregate" command was performed on. + */ +(function() { + "use strict"; + + load("jstests/aggregation/extras/utils.js"); // for arrayEq + + function runTests(withDefaultCollationColl, withoutDefaultCollationColl, collation) { + // Test that the $lookup stage respects the inherited collation. + let res = withDefaultCollationColl + .aggregate([{ + $lookup: { + from: withoutDefaultCollationColl.getName(), + localField: "str", + foreignField: "str", + as: "matched", + }, + }]) + .toArray(); + assert.eq(1, res.length, tojson(res)); + + let expected = [{_id: "lowercase", str: "abc"}, {_id: "uppercase", str: "ABC"}]; + assert(arrayEq(expected, res[0].matched), + "Expected " + tojson(expected) + " to equal " + tojson(res[0].matched) + + " up to ordering"); + + res = withDefaultCollationColl + .aggregate([{ + $lookup: { + from: withoutDefaultCollationColl.getName(), + let : {str1: "$str"}, + pipeline: [ + {$match: {$expr: {$eq: ["$str", "$$str1"]}}}, + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + let : {str2: "$str"}, + pipeline: [{$match: {$expr: {$eq: ["$str", "$$str1"]}}}], + as: "matched2" + } + } + ], + as: "matched1", + }, + }]) + .toArray(); + assert.eq(1, res.length, tojson(res)); + + expected = [ + { + "_id": "lowercase", + "str": "abc", + "matched2": [{"_id": "lowercase", "str": "abc"}, {"_id": "uppercase", "str": "ABC"}] + }, + { + "_id": "uppercase", + "str": "ABC", + "matched2": + [{"_id": "lowercase", "str": "abc"}, {"_id": "uppercase", "str": "ABC"}] + } + ]; + assert(arrayEq(expected, res[0].matched1), + "Expected " + tojson(expected) + " to equal " + tojson(res[0].matched1) + + " up to ordering. " + tojson(res)); + + // Test that the $lookup stage respects the inherited collation when it optimizes with an + // $unwind stage. + res = withDefaultCollationColl + .aggregate([ + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + localField: "str", + foreignField: "str", + as: "matched", + }, + }, + {$unwind: "$matched"}, + ]) + .toArray(); + assert.eq(2, res.length, tojson(res)); + + expected = [ + {_id: "lowercase", str: "abc", matched: {_id: "lowercase", str: "abc"}}, + {_id: "lowercase", str: "abc", matched: {_id: "uppercase", str: "ABC"}} + ]; + assert(arrayEq(expected, res), + "Expected " + tojson(expected) + " to equal " + tojson(res) + " up to ordering"); + + res = withDefaultCollationColl + .aggregate([ + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + let : {str1: "$str"}, + pipeline: [ + {$match: {$expr: {$eq: ["$str", "$$str1"]}}}, + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + let : {str2: "$str"}, + pipeline: [{$match: {$expr: {$eq: ["$str", "$$str1"]}}}], + as: "matched2" + } + }, + {$unwind: "$matched2"}, + ], + as: "matched1", + }, + }, + {$unwind: "$matched1"}, + ]) + .toArray(); + assert.eq(4, res.length, tojson(res)); + + expected = [ + { + "_id": "lowercase", + "str": "abc", + "matched1": { + "_id": "lowercase", + "str": "abc", + "matched2": {"_id": "lowercase", "str": "abc"} + } + }, + { + "_id": "lowercase", + "str": "abc", + "matched1": { + "_id": "lowercase", + "str": "abc", + "matched2": {"_id": "uppercase", "str": "ABC"} + } + }, + { + "_id": "lowercase", + "str": "abc", + "matched1": { + "_id": "uppercase", + "str": "ABC", + "matched2": {"_id": "lowercase", "str": "abc"} + } + }, + { + "_id": "lowercase", + "str": "abc", + "matched1": { + "_id": "uppercase", + "str": "ABC", + "matched2": {"_id": "uppercase", "str": "ABC"} + } + } + ]; + assert(arrayEq(expected, res), + "Expected " + tojson(expected) + " to equal " + tojson(res) + " up to ordering"); + + // Test that the $lookup stage respects an explicit collation on the aggregation operation. + res = withoutDefaultCollationColl + .aggregate( + [ + {$match: {_id: "lowercase"}}, + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + localField: "str", + foreignField: "str", + as: "matched", + }, + }, + ], + collation) + .toArray(); + assert.eq(1, res.length, tojson(res)); + + expected = [{_id: "lowercase", str: "abc"}, {_id: "uppercase", str: "ABC"}]; + assert(arrayEq(expected, res[0].matched), + "Expected " + tojson(expected) + " to equal " + tojson(res[0].matched) + + " up to ordering"); + + res = withoutDefaultCollationColl + .aggregate( + [ + {$match: {_id: "lowercase"}}, + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + let : {str1: "$str"}, + pipeline: [ + {$match: {$expr: {$eq: ["$str", "$$str1"]}}}, + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + let : {str2: "$str"}, + pipeline: [{$match: {$expr: {$eq: ["$str", "$$str1"]}}}], + as: "matched2" + } + } + ], + as: "matched1", + }, + } + ], + collation) + .toArray(); + assert.eq(1, res.length, tojson(res)); + + expected = [ + { + "_id": "lowercase", + "str": "abc", + "matched2": [{"_id": "lowercase", "str": "abc"}, {"_id": "uppercase", "str": "ABC"}] + }, + { + "_id": "uppercase", + "str": "ABC", + "matched2": + [{"_id": "lowercase", "str": "abc"}, {"_id": "uppercase", "str": "ABC"}] + } + ]; + assert(arrayEq(expected, res[0].matched1), + "Expected " + tojson(expected) + " to equal " + tojson(res[0].matched1) + + " up to ordering"); + + // Test that the $lookup stage respects an explicit collation on the aggregation operation + // when + // it optimizes with an $unwind stage. + res = withoutDefaultCollationColl + .aggregate( + [ + {$match: {_id: "lowercase"}}, + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + localField: "str", + foreignField: "str", + as: "matched", + }, + }, + {$unwind: "$matched"}, + ], + collation) + .toArray(); + assert.eq(2, res.length, tojson(res)); + + expected = [ + {_id: "lowercase", str: "abc", matched: {_id: "lowercase", str: "abc"}}, + {_id: "lowercase", str: "abc", matched: {_id: "uppercase", str: "ABC"}} + ]; + assert(arrayEq(expected, res), + "Expected " + tojson(expected) + " to equal " + tojson(res) + " up to ordering"); + + res = withoutDefaultCollationColl + .aggregate( + [ + {$match: {_id: "lowercase"}}, + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + let : {str1: "$str"}, + pipeline: [ + {$match: {$expr: {$eq: ["$str", "$$str1"]}}}, + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + let : {str2: "$str"}, + pipeline: [{$match: {$expr: {$eq: ["$str", "$$str1"]}}}], + as: "matched2" + } + }, + {$unwind: "$matched2"}, + ], + as: "matched1", + }, + }, + {$unwind: "$matched1"}, + ], + collation) + .toArray(); + assert.eq(4, res.length, tojson(res)); + + expected = [ + { + "_id": "lowercase", + "str": "abc", + "matched1": { + "_id": "lowercase", + "str": "abc", + "matched2": {"_id": "lowercase", "str": "abc"} + } + }, + { + "_id": "lowercase", + "str": "abc", + "matched1": { + "_id": "lowercase", + "str": "abc", + "matched2": {"_id": "uppercase", "str": "ABC"} + } + }, + { + "_id": "lowercase", + "str": "abc", + "matched1": { + "_id": "uppercase", + "str": "ABC", + "matched2": {"_id": "lowercase", "str": "abc"} + } + }, + { + "_id": "lowercase", + "str": "abc", + "matched1": { + "_id": "uppercase", + "str": "ABC", + "matched2": {"_id": "uppercase", "str": "ABC"} + } + } + ]; + assert(arrayEq(expected, res), + "Expected " + tojson(expected) + " to equal " + tojson(res) + " up to ordering"); + + // Test that the $lookup stage uses the "simple" collation if a collation isn't set on the + // collection or the aggregation operation. + res = withoutDefaultCollationColl + .aggregate([ + {$match: {_id: "lowercase"}}, + { + $lookup: { + from: withDefaultCollationColl.getName(), + localField: "str", + foreignField: "str", + as: "matched", + }, + }, + ]) + .toArray(); + assert.eq([{_id: "lowercase", str: "abc", matched: [{_id: "lowercase", str: "abc"}]}], res); + + res = withoutDefaultCollationColl + .aggregate([ + {$match: {_id: "lowercase"}}, + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + let : {str1: "$str"}, + pipeline: [ + {$match: {$expr: {$eq: ["$str", "$$str1"]}}}, + { + $lookup: { + from: withoutDefaultCollationColl.getName(), + let : {str2: "$str"}, + pipeline: [{$match: {$expr: {$eq: ["$str", "$$str1"]}}}], + as: "matched2" + } + }, + {$unwind: "$matched2"}, + ], + as: "matched1", + }, + }, + ]) + .toArray(); + assert.eq([{ + "_id": "lowercase", + "str": "abc", + "matched1": [{ + "_id": "lowercase", + "str": "abc", + "matched2": {"_id": "lowercase", "str": "abc"} + }] + }], + res); + } + + const st = new ShardingTest({shards: 2, config: 1}); + const testName = "collation_lookup"; + const caseInsensitive = {collation: {locale: "en_US", strength: 2}}; + + const mongosDB = st.s0.getDB(testName); + const withDefaultCollationColl = mongosDB[testName + "_with_default"]; + const withoutDefaultCollationColl = mongosDB[testName + "_without_default"]; + + assert.commandWorked( + mongosDB.createCollection(withDefaultCollationColl.getName(), caseInsensitive)); + assert.writeOK(withDefaultCollationColl.insert({_id: "lowercase", str: "abc"})); + + assert.writeOK(withoutDefaultCollationColl.insert({_id: "lowercase", str: "abc"})); + assert.writeOK(withoutDefaultCollationColl.insert({_id: "uppercase", str: "ABC"})); + assert.writeOK(withoutDefaultCollationColl.insert({_id: "unmatched", str: "def"})); + + // + // Sharded collection with default collation and unsharded collection without a default + // collation. + // + assert.commandWorked( + withDefaultCollationColl.createIndex({str: 1}, {collation: {locale: "simple"}})); + + // Enable sharding on the test DB and ensure its primary is shard0000. + assert.commandWorked(mongosDB.adminCommand({enableSharding: mongosDB.getName()})); + st.ensurePrimaryShard(mongosDB.getName(), st.shard0.shardName); + + // Shard the collection with a default collation. + assert.commandWorked(mongosDB.adminCommand({ + shardCollection: withDefaultCollationColl.getFullName(), + key: {str: 1}, + collation: {locale: "simple"} + })); + + // Split the collection into 2 chunks. + assert.commandWorked(mongosDB.adminCommand( + {split: withDefaultCollationColl.getFullName(), middle: {str: "abc"}})); + + // Move the chunk containing {str: "abc"} to shard0001. + assert.commandWorked(mongosDB.adminCommand({ + moveChunk: withDefaultCollationColl.getFullName(), + find: {str: "abc"}, + to: st.shard1.shardName + })); + + runTests(withDefaultCollationColl, withoutDefaultCollationColl, caseInsensitive); + + // TODO: Enable the following tests once SERVER-32536 is fixed. + // + // Sharded collection with default collation and sharded collection without a default + // collation. + // + + // Shard the collection without a default collation. + // assert.commandWorked(mongosDB.adminCommand({ + // shardCollection: withoutDefaultCollationColl.getFullName(), + // key: {_id: 1}, + // })); + + // // Split the collection into 2 chunks. + // assert.commandWorked(mongosDB.adminCommand( + // {split: withoutDefaultCollationColl.getFullName(), middle: {_id: "unmatched"}})); + + // // Move the chunk containing {_id: "lowercase"} to shard0001. + // assert.commandWorked(mongosDB.adminCommand({ + // moveChunk: withoutDefaultCollationColl.getFullName(), + // find: {_id: "lowercase"}, + // to: st.shard1.shardName + // })); + + // runTests(withDefaultCollationColl, withoutDefaultCollationColl, caseInsensitive); + + st.stop(); +})(); diff --git a/jstests/sharding/lookup.js b/jstests/sharding/lookup.js new file mode 100644 index 00000000000..7bb1bca0be5 --- /dev/null +++ b/jstests/sharding/lookup.js @@ -0,0 +1,618 @@ +// Basic $lookup regression tests. + +(function() { + "use strict"; + + load("jstests/aggregation/extras/utils.js"); // For assertErrorCode. + load("jstests/libs/fixture_helpers.js"); // For isSharded. + + const st = new ShardingTest({shards: 2, config: 1, mongos: 1}); + const testName = "lookup_sharded"; + + const mongosDB = st.s0.getDB(testName); + assert.commandWorked(mongosDB.dropDatabase()); + + // Used by testPipeline to sort result documents. All _ids must be primitives. + function compareId(a, b) { + if (a._id < b._id) { + return -1; + } + if (a._id > b._id) { + return 1; + } + return 0; + } + + // Helper for testing that pipeline returns correct set of results. + function testPipeline(pipeline, expectedResult, collection) { + assert.eq(collection.aggregate(pipeline).toArray().sort(compareId), + expectedResult.sort(compareId)); + } + + function runTest(coll, from, thirdColl, fourthColl) { + let db = null; // Using the db variable is banned in this function. + + assert.commandWorked(coll.remove({})); + assert.commandWorked(from.remove({})); + assert.commandWorked(thirdColl.remove({})); + assert.commandWorked(fourthColl.remove({})); + + assert.writeOK(coll.insert({_id: 0, a: 1})); + assert.writeOK(coll.insert({_id: 1, a: null})); + assert.writeOK(coll.insert({_id: 2})); + + assert.writeOK(from.insert({_id: 0, b: 1})); + assert.writeOK(from.insert({_id: 1, b: null})); + assert.writeOK(from.insert({_id: 2})); + + // + // Basic functionality. + // + + // "from" document added to "as" field if a == b, where nonexistent fields are treated as + // null. + let expectedResults = [ + {_id: 0, a: 1, "same": [{_id: 0, b: 1}]}, + {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} + ]; + testPipeline([{$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}], + expectedResults, + coll); + + // If localField is nonexistent, it is treated as if it is null. + expectedResults = [ + {_id: 0, a: 1, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} + ]; + testPipeline( + [{$lookup: {localField: "nonexistent", foreignField: "b", from: "from", as: "same"}}], + expectedResults, + coll); + + // If foreignField is nonexistent, it is treated as if it is null. + expectedResults = [ + {_id: 0, a: 1, "same": []}, + {_id: 1, a: null, "same": [{_id: 0, b: 1}, {_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "same": [{_id: 0, b: 1}, {_id: 1, b: null}, {_id: 2}]} + ]; + testPipeline( + [{$lookup: {localField: "a", foreignField: "nonexistent", from: "from", as: "same"}}], + expectedResults, + coll); + + // If there are no matches or the from coll doesn't exist, the result is an empty array. + expectedResults = + [{_id: 0, a: 1, "same": []}, {_id: 1, a: null, "same": []}, {_id: 2, "same": []}]; + testPipeline( + [{$lookup: {localField: "_id", foreignField: "nonexistent", from: "from", as: "same"}}], + expectedResults, + coll); + testPipeline( + [{$lookup: {localField: "a", foreignField: "b", from: "nonexistent", as: "same"}}], + expectedResults, + coll); + + // If field name specified by "as" already exists, it is overwritten. + expectedResults = [ + {_id: 0, "a": [{_id: 0, b: 1}]}, + {_id: 1, "a": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "a": [{_id: 1, b: null}, {_id: 2}]} + ]; + testPipeline([{$lookup: {localField: "a", foreignField: "b", from: "from", as: "a"}}], + expectedResults, + coll); + + // Running multiple $lookups in the same pipeline is allowed. + expectedResults = [ + {_id: 0, a: 1, "c": [{_id: 0, b: 1}], "d": [{_id: 0, b: 1}]}, + { + _id: 1, + a: null, "c": [{_id: 1, b: null}, {_id: 2}], "d": [{_id: 1, b: null}, {_id: 2}] + }, + {_id: 2, "c": [{_id: 1, b: null}, {_id: 2}], "d": [{_id: 1, b: null}, {_id: 2}]} + ]; + testPipeline( + [ + {$lookup: {localField: "a", foreignField: "b", from: "from", as: "c"}}, + {$project: {"a": 1, "c": 1}}, + {$lookup: {localField: "a", foreignField: "b", from: "from", as: "d"}} + ], + expectedResults, + coll); + + // + // Coalescing with $unwind. + // + + // A normal $unwind with on the "as" field. + expectedResults = [ + {_id: 0, a: 1, same: {_id: 0, b: 1}}, + {_id: 1, a: null, same: {_id: 1, b: null}}, + {_id: 1, a: null, same: {_id: 2}}, + {_id: 2, same: {_id: 1, b: null}}, + {_id: 2, same: {_id: 2}} + ]; + testPipeline( + [ + {$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}, + {$unwind: {path: "$same"}} + ], + expectedResults, + coll); + + // An $unwind on the "as" field, with includeArrayIndex. + expectedResults = [ + {_id: 0, a: 1, same: {_id: 0, b: 1}, index: NumberLong(0)}, + {_id: 1, a: null, same: {_id: 1, b: null}, index: NumberLong(0)}, + {_id: 1, a: null, same: {_id: 2}, index: NumberLong(1)}, + {_id: 2, same: {_id: 1, b: null}, index: NumberLong(0)}, + {_id: 2, same: {_id: 2}, index: NumberLong(1)}, + ]; + testPipeline( + [ + {$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}, + {$unwind: {path: "$same", includeArrayIndex: "index"}} + ], + expectedResults, + coll); + + // Normal $unwind with no matching documents. + expectedResults = []; + testPipeline( + [ + {$lookup: {localField: "_id", foreignField: "nonexistent", from: "from", as: "same"}}, + {$unwind: {path: "$same"}} + ], + expectedResults, + coll); + + // $unwind with preserveNullAndEmptyArray with no matching documents. + expectedResults = [ + {_id: 0, a: 1}, + {_id: 1, a: null}, + {_id: 2}, + ]; + testPipeline( + [ + {$lookup: {localField: "_id", foreignField: "nonexistent", from: "from", as: "same"}}, + {$unwind: {path: "$same", preserveNullAndEmptyArrays: true}} + ], + expectedResults, + coll); + + // $unwind with preserveNullAndEmptyArray, some with matching documents, some without. + expectedResults = [ + {_id: 0, a: 1}, + {_id: 1, a: null, same: {_id: 0, b: 1}}, + {_id: 2}, + ]; + testPipeline( + [ + {$lookup: {localField: "_id", foreignField: "b", from: "from", as: "same"}}, + {$unwind: {path: "$same", preserveNullAndEmptyArrays: true}} + ], + expectedResults, + coll); + + // $unwind with preserveNullAndEmptyArray and includeArrayIndex, some with matching + // documents, some without. + expectedResults = [ + {_id: 0, a: 1, index: null}, + {_id: 1, a: null, same: {_id: 0, b: 1}, index: NumberLong(0)}, + {_id: 2, index: null}, + ]; + testPipeline( + [ + {$lookup: {localField: "_id", foreignField: "b", from: "from", as: "same"}}, + { + $unwind: + {path: "$same", preserveNullAndEmptyArrays: true, includeArrayIndex: "index"} + } + ], + expectedResults, + coll); + + // + // Dependencies. + // + + // If $lookup didn't add "localField" to its dependencies, this test would fail as the + // value of the "a" field would be lost and treated as null. + expectedResults = [ + {_id: 0, "same": [{_id: 0, b: 1}]}, + {_id: 1, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} + ]; + testPipeline( + [ + {$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}, + {$project: {"same": 1}} + ], + expectedResults, + coll); + + // If $lookup didn't add fields referenced by "let" variables to its dependencies, this test + // would fail as the value of the "a" field would be lost and treated as null. + expectedResults = [ + {"_id": 0, "same": [{"_id": 0, "x": 1}, {"_id": 1, "x": 1}, {"_id": 2, "x": 1}]}, + { + "_id": 1, + "same": [{"_id": 0, "x": null}, {"_id": 1, "x": null}, {"_id": 2, "x": null}] + }, + {"_id": 2, "same": [{"_id": 0}, {"_id": 1}, {"_id": 2}]} + ]; + testPipeline( + [ + { + $lookup: { + let : {var1: "$a"}, + pipeline: [{$project: {x: "$$var1"}}], + from: "from", + as: "same" + } + }, + {$project: {"same": 1}} + ], + expectedResults, + coll); + + // + // Dotted field paths. + // + + assert.commandWorked(coll.remove({})); + assert.writeOK(coll.insert({_id: 0, a: 1})); + assert.writeOK(coll.insert({_id: 1, a: null})); + assert.writeOK(coll.insert({_id: 2})); + assert.writeOK(coll.insert({_id: 3, a: {c: 1}})); + + assert.commandWorked(from.remove({})); + assert.writeOK(from.insert({_id: 0, b: 1})); + assert.writeOK(from.insert({_id: 1, b: null})); + assert.writeOK(from.insert({_id: 2})); + assert.writeOK(from.insert({_id: 3, b: {c: 1}})); + assert.writeOK(from.insert({_id: 4, b: {c: 2}})); + + // Once without a dotted field. + let pipeline = [{$lookup: {localField: "a", foreignField: "b", from: "from", as: "same"}}]; + expectedResults = [ + {_id: 0, a: 1, "same": [{_id: 0, b: 1}]}, + {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 3, a: {c: 1}, "same": [{_id: 3, b: {c: 1}}]} + ]; + testPipeline(pipeline, expectedResults, coll); + + // Look up a dotted field. + pipeline = [{$lookup: {localField: "a.c", foreignField: "b.c", from: "from", as: "same"}}]; + // All but the last document in 'coll' have a nullish value for 'a.c'. + expectedResults = [ + {_id: 0, a: 1, same: [{_id: 0, b: 1}, {_id: 1, b: null}, {_id: 2}]}, + {_id: 1, a: null, same: [{_id: 0, b: 1}, {_id: 1, b: null}, {_id: 2}]}, + {_id: 2, same: [{_id: 0, b: 1}, {_id: 1, b: null}, {_id: 2}]}, + {_id: 3, a: {c: 1}, same: [{_id: 3, b: {c: 1}}]} + ]; + testPipeline(pipeline, expectedResults, coll); + + // With an $unwind stage. + assert.commandWorked(coll.remove({})); + assert.writeOK(coll.insert({_id: 0, a: {b: 1}})); + assert.writeOK(coll.insert({_id: 1})); + + assert.commandWorked(from.remove({})); + assert.writeOK(from.insert({_id: 0, target: 1})); + + pipeline = [ + { + $lookup: { + localField: "a.b", + foreignField: "target", + from: "from", + as: "same.documents", + } + }, + { + // Expected input to $unwind: + // {_id: 0, a: {b: 1}, same: {documents: [{_id: 0, target: 1}]}} + // {_id: 1, same: {documents: []}} + $unwind: { + path: "$same.documents", + preserveNullAndEmptyArrays: true, + includeArrayIndex: "c.d.e", + } + } + ]; + expectedResults = [ + {_id: 0, a: {b: 1}, same: {documents: {_id: 0, target: 1}}, c: {d: {e: NumberLong(0)}}}, + {_id: 1, same: {}, c: {d: {e: null}}}, + ]; + testPipeline(pipeline, expectedResults, coll); + + // + // Query-like local fields (SERVER-21287) + // + + // This must only do an equality match rather than treating the value as a regex. + assert.commandWorked(coll.remove({})); + assert.writeOK(coll.insert({_id: 0, a: /a regex/})); + + assert.commandWorked(from.remove({})); + assert.writeOK(from.insert({_id: 0, b: /a regex/})); + assert.writeOK(from.insert({_id: 1, b: "string that matches /a regex/"})); + + pipeline = [ + { + $lookup: { + localField: "a", + foreignField: "b", + from: "from", + as: "b", + } + }, + ]; + expectedResults = [{_id: 0, a: /a regex/, b: [{_id: 0, b: /a regex/}]}]; + testPipeline(pipeline, expectedResults, coll); + + // + // A local value of an array. + // + + // Basic array corresponding to multiple documents. + assert.commandWorked(coll.remove({})); + assert.writeOK(coll.insert({_id: 0, a: [0, 1, 2]})); + + assert.commandWorked(from.remove({})); + assert.writeOK(from.insert({_id: 0})); + assert.writeOK(from.insert({_id: 1})); + + pipeline = [ + { + $lookup: { + localField: "a", + foreignField: "_id", + from: "from", + as: "b", + } + }, + ]; + expectedResults = [{_id: 0, a: [0, 1, 2], b: [{_id: 0}, {_id: 1}]}]; + testPipeline(pipeline, expectedResults, coll); + + // Basic array corresponding to a single document. + assert.commandWorked(coll.remove({})); + assert.writeOK(coll.insert({_id: 0, a: [1]})); + + assert.commandWorked(from.remove({})); + assert.writeOK(from.insert({_id: 0})); + assert.writeOK(from.insert({_id: 1})); + + pipeline = [ + { + $lookup: { + localField: "a", + foreignField: "_id", + from: "from", + as: "b", + } + }, + ]; + expectedResults = [{_id: 0, a: [1], b: [{_id: 1}]}]; + testPipeline(pipeline, expectedResults, coll); + + // Array containing regular expressions. + assert.commandWorked(coll.remove({})); + assert.writeOK(coll.insert({_id: 0, a: [/a regex/, /^x/]})); + assert.writeOK(coll.insert({_id: 1, a: [/^x/]})); + + assert.commandWorked(from.remove({})); + assert.writeOK(from.insert({_id: 0, b: "should not match a regex"})); + assert.writeOK(from.insert({_id: 1, b: "xxxx"})); + assert.writeOK(from.insert({_id: 2, b: /a regex/})); + assert.writeOK(from.insert({_id: 3, b: /^x/})); + + pipeline = [ + { + $lookup: { + localField: "a", + foreignField: "b", + from: "from", + as: "b", + } + }, + ]; + expectedResults = [ + {_id: 0, a: [/a regex/, /^x/], b: [{_id: 2, b: /a regex/}, {_id: 3, b: /^x/}]}, + {_id: 1, a: [/^x/], b: [{_id: 3, b: /^x/}]} + ]; + testPipeline(pipeline, expectedResults, coll); + + // 'localField' references a field within an array of sub-objects. + assert.commandWorked(coll.remove({})); + assert.writeOK(coll.insert({_id: 0, a: [{b: 1}, {b: 2}]})); + + assert.commandWorked(from.remove({})); + assert.writeOK(from.insert({_id: 0})); + assert.writeOK(from.insert({_id: 1})); + assert.writeOK(from.insert({_id: 2})); + assert.writeOK(from.insert({_id: 3})); + + pipeline = [ + { + $lookup: { + localField: "a.b", + foreignField: "_id", + from: "from", + as: "c", + } + }, + ]; + + expectedResults = [{"_id": 0, "a": [{"b": 1}, {"b": 2}], "c": [{"_id": 1}, {"_id": 2}]}]; + testPipeline(pipeline, expectedResults, coll); + + // + // Test $lookup when the foreign collection is a view. + // + // TODO SERVER-32548: Allow this test to run when the foreign collection is sharded. + if (!FixtureHelpers.isSharded(from)) { + assert.commandWorked( + coll.getDB().runCommand({create: "fromView", viewOn: "from", pipeline: []})); + pipeline = [ + { + $lookup: { + localField: "a.b", + foreignField: "_id", + from: "fromView", + as: "c", + } + }, + ]; + + expectedResults = + [{"_id": 0, "a": [{"b": 1}, {"b": 2}], "c": [{"_id": 1}, {"_id": 2}]}]; + testPipeline(pipeline, expectedResults, coll); + } + + // + // Error cases. + // + + // 'from', 'as', 'localField' and 'foreignField' must all be specified when run with + // localField/foreignField syntax. + assertErrorCode(coll, + [{$lookup: {foreignField: "b", from: "from", as: "same"}}], + ErrorCodes.FailedToParse); + assertErrorCode(coll, + [{$lookup: {localField: "a", from: "from", as: "same"}}], + ErrorCodes.FailedToParse); + assertErrorCode(coll, + [{$lookup: {localField: "a", foreignField: "b", as: "same"}}], + ErrorCodes.FailedToParse); + assertErrorCode(coll, + [{$lookup: {localField: "a", foreignField: "b", from: "from"}}], + ErrorCodes.FailedToParse); + + // localField/foreignField and pipeline/let syntax must not be mixed. + assertErrorCode(coll, + [{$lookup: {pipeline: [], foreignField: "b", from: "from", as: "as"}}], + ErrorCodes.FailedToParse); + assertErrorCode(coll, + [{$lookup: {pipeline: [], localField: "b", from: "from", as: "as"}}], + ErrorCodes.FailedToParse); + assertErrorCode( + coll, + [{$lookup: {pipeline: [], localField: "b", foreignField: "b", from: "from", as: "as"}}], + ErrorCodes.FailedToParse); + assertErrorCode(coll, + [{$lookup: {let : {a: "$b"}, foreignField: "b", from: "from", as: "as"}}], + ErrorCodes.FailedToParse); + assertErrorCode(coll, + [{$lookup: {let : {a: "$b"}, localField: "b", from: "from", as: "as"}}], + ErrorCodes.FailedToParse); + assertErrorCode( + coll, + [{ + $lookup: + {let : {a: "$b"}, localField: "b", foreignField: "b", from: "from", as: "as"} + }], + ErrorCodes.FailedToParse); + + // 'from', 'as', 'localField' and 'foreignField' must all be of type string. + assertErrorCode(coll, + [{$lookup: {localField: 1, foreignField: "b", from: "from", as: "as"}}], + ErrorCodes.FailedToParse); + assertErrorCode(coll, + [{$lookup: {localField: "a", foreignField: 1, from: "from", as: "as"}}], + ErrorCodes.FailedToParse); + assertErrorCode(coll, + [{$lookup: {localField: "a", foreignField: "b", from: 1, as: "as"}}], + ErrorCodes.FailedToParse); + assertErrorCode(coll, + [{$lookup: {localField: "a", foreignField: "b", from: "from", as: 1}}], + ErrorCodes.FailedToParse); + + // The foreign collection must be a valid namespace. + assertErrorCode(coll, + [{$lookup: {localField: "a", foreignField: "b", from: "", as: "as"}}], + ErrorCodes.InvalidNamespace); + // $lookup's field must be an object. + assertErrorCode(coll, [{$lookup: "string"}], ErrorCodes.FailedToParse); + } + + // + // Test unsharded local collection and unsharded foreign collection. + // + mongosDB.lookUp.drop(); + mongosDB.from.drop(); + mongosDB.thirdColl.drop(); + mongosDB.fourthColl.drop(); + + runTest(mongosDB.lookUp, mongosDB.from, mongosDB.thirdColl, mongosDB.fourthColl); + + // Verify that the command is sent only to the primary shard when both the local and foreign + // collections are unsharded. + assert(!assert + .commandWorked(mongosDB.lookup.explain().aggregate([{ + $lookup: { + from: mongosDB.from.getName(), + localField: "a", + foreignField: "b", + as: "results" + } + }])) + .hasOwnProperty("shards")); + // Enable sharding on the test DB and ensure its primary is shard0000. + assert.commandWorked(mongosDB.adminCommand({enableSharding: mongosDB.getName()})); + st.ensurePrimaryShard(mongosDB.getName(), st.shard0.shardName); + + // + // Test unsharded local collection and sharded foreign collection. + // + + // Shard the foreign collection on _id. + st.shardColl(mongosDB.from, {_id: 1}, {_id: 0}, {_id: 1}, mongosDB.getName()); + runTest(mongosDB.lookUp, mongosDB.from, mongosDB.thirdColl, mongosDB.fourthColl); + + // + // Test sharded local collection and unsharded foreign collection. + // + assert(mongosDB.from.drop()); + + // Shard the local collection on _id. + st.shardColl(mongosDB.lookup, {_id: 1}, {_id: 0}, {_id: 1}, mongosDB.getName()); + runTest(mongosDB.lookUp, mongosDB.from, mongosDB.thirdColl, mongosDB.fourthColl); + + // + // Test sharded local and foreign collections. + // + + // Shard the foreign collection on _id. + st.shardColl(mongosDB.from, {_id: 1}, {_id: 0}, {_id: 1}, mongosDB.getName()); + runTest(mongosDB.lookUp, mongosDB.from, mongosDB.thirdColl, mongosDB.fourthColl); + + // Test that a $lookup from an unsharded collection followed by an $out to a sharded collection + // is allowed. + const sourceColl = st.getDB(testName).lookUp; + assert(sourceColl.drop()); + assert(st.adminCommand({shardCollection: sourceColl.getFullName(), key: {_id: "hashed"}})); + assert.commandWorked(sourceColl.insert({_id: 0, a: 0})); + + const outColl = st.getDB(testName).out; + assert(outColl.drop()); + assert(st.adminCommand({shardCollection: outColl.getFullName(), key: {_id: "hashed"}})); + + const fromColl = st.getDB(testName).from; + assert(fromColl.drop()); + assert.commandWorked(fromColl.insert({_id: 0, b: 0})); + + sourceColl.aggregate([ + {$lookup: {localField: "a", foreignField: "b", from: fromColl.getName(), as: "same"}}, + {$out: {to: outColl.getName(), mode: "insertDocuments"}} + ]); + + assert.eq([{a: 0, same: [{_id: 0, b: 0}]}], outColl.find({}, {_id: 0}).toArray()); + + st.stop(); +}()); diff --git a/jstests/sharding/lookup_mongod_unaware.js b/jstests/sharding/lookup_mongod_unaware.js new file mode 100644 index 00000000000..0c6072f8095 --- /dev/null +++ b/jstests/sharding/lookup_mongod_unaware.js @@ -0,0 +1,168 @@ +// Tests the behavior of a $lookup when a shard contains incorrect routing information for the +// local and/or foreign collections. This includes when the shard thinks the collection is sharded +// when it's not, and likewise when it thinks the collection is unsharded but is actually sharded. +// +// We restart a mongod to cause it to forget that a collection was sharded. When restarted, we +// expect it to still have all the previous data. +// @tags: [requires_persistence] +(function() { + "use strict"; + + // Restarts the primary shard and ensures that it believes both collections are unsharded. + function restartPrimaryShard(rs, localColl, foreignColl) { + // Returns true if the shard is aware that the collection is sharded. + function hasRoutingInfoForNs(shardConn, coll) { + const res = shardConn.adminCommand({getShardVersion: coll, fullMetadata: true}); + assert.commandWorked(res); + return res.metadata.collVersion != undefined; + } + + rs.restart(0); + rs.awaitSecondaryNodes(); + assert(!hasRoutingInfoForNs(rs.getPrimary(), localColl.getFullName())); + assert(!hasRoutingInfoForNs(rs.getPrimary(), foreignColl.getFullName())); + } + + const testName = "lookup_stale_mongod"; + const st = new ShardingTest({ + shards: 2, + mongos: 2, + rs: {nodes: 1}, + }); + + const mongos0DB = st.s0.getDB(testName); + const mongos0LocalColl = mongos0DB[testName + "_local"]; + const mongos0ForeignColl = mongos0DB[testName + "_foreign"]; + + const mongos1DB = st.s1.getDB(testName); + const mongos1LocalColl = mongos1DB[testName + "_local"]; + const mongos1ForeignColl = mongos1DB[testName + "_foreign"]; + + const pipeline = [ + { + $lookup: + {localField: "a", foreignField: "b", from: mongos0ForeignColl.getName(), as: "same"} + }, + {$sort: {_id: 1}} + ]; + + // The results are expected to be correct if the $lookup stage is executed on the mongos which + // is aware that the collection is sharded. + const expectedResults = [ + {_id: 0, a: 1, "same": [{_id: 0, b: 1}]}, + {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} + ]; + + // Ensure that shard0 is the primary shard. + assert.commandWorked(mongos0DB.adminCommand({enableSharding: mongos0DB.getName()})); + st.ensurePrimaryShard(mongos0DB.getName(), st.shard0.shardName); + + assert.writeOK(mongos0LocalColl.insert({_id: 0, a: 1})); + assert.writeOK(mongos0LocalColl.insert({_id: 1, a: null})); + + assert.writeOK(mongos0ForeignColl.insert({_id: 0, b: 1})); + assert.writeOK(mongos0ForeignColl.insert({_id: 1, b: null})); + + // Send writes through mongos1 such that it's aware of the collections and believes they are + // unsharded. + assert.writeOK(mongos1LocalColl.insert({_id: 2})); + assert.writeOK(mongos1ForeignColl.insert({_id: 2})); + + // + // Test unsharded local and sharded foreign collections, with the primary shard unaware that + // the foreign collection is sharded. + // + + // Shard the foreign collection. + assert.commandWorked( + mongos0DB.adminCommand({shardCollection: mongos0ForeignColl.getFullName(), key: {_id: 1}})); + + // Split the collection into 2 chunks: [MinKey, 1), [1, MaxKey). + assert.commandWorked( + mongos0DB.adminCommand({split: mongos0ForeignColl.getFullName(), middle: {_id: 1}})); + + // Move the [minKey, 1) chunk to shard1. + assert.commandWorked(mongos0DB.adminCommand({ + moveChunk: mongos0ForeignColl.getFullName(), + find: {_id: 0}, + to: st.shard1.shardName, + _waitForDelete: true + })); + + // Verify $lookup results through the fresh mongos. + restartPrimaryShard(st.rs0, mongos0LocalColl, mongos0ForeignColl); + assert.eq(mongos0LocalColl.aggregate(pipeline).toArray(), expectedResults); + + // Verify $lookup results through mongos1, which is not aware that the local + // collection is sharded. The results are expected to be incorrect when both the mongos and + // primary shard incorrectly believe that a collection is unsharded. + // TODO: This should be fixed by SERVER-32629, likewise for the other aggregates in this file + // sent to the stale mongos. + restartPrimaryShard(st.rs0, mongos0LocalColl, mongos0ForeignColl); + assert.eq(mongos1LocalColl.aggregate(pipeline).toArray(), [ + {_id: 0, a: 1, "same": []}, + {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} + ]); + + // + // Test sharded local and sharded foreign collections, with the primary shard unaware that + // either collection is sharded. + // + + // Shard the local collection. + assert.commandWorked( + mongos0DB.adminCommand({shardCollection: mongos0LocalColl.getFullName(), key: {_id: 1}})); + + // Split the collection into 2 chunks: [MinKey, 1), [1, MaxKey). + assert.commandWorked( + mongos0DB.adminCommand({split: mongos0LocalColl.getFullName(), middle: {_id: 1}})); + + // Move the [minKey, 1) chunk to shard1. + assert.commandWorked(mongos0DB.adminCommand({ + moveChunk: mongos0LocalColl.getFullName(), + find: {_id: 0}, + to: st.shard1.shardName, + _waitForDelete: true + })); + + // Verify $lookup results through the fresh mongos. + restartPrimaryShard(st.rs0, mongos0LocalColl, mongos0ForeignColl); + assert.eq(mongos0LocalColl.aggregate(pipeline).toArray(), expectedResults); + + // Verify $lookup results through mongos1, which is not aware that the local + // collection is sharded. The results are expected to be incorrect when both the mongos and + // primary shard incorrectly believe that a collection is unsharded. + restartPrimaryShard(st.rs0, mongos0LocalColl, mongos0ForeignColl); + assert.eq(mongos1LocalColl.aggregate(pipeline).toArray(), [ + {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} + ]); + + // + // Test sharded local and unsharded foreign collections, with the primary shard unaware that + // the local collection is sharded. + // + + // Recreate the foreign collection as unsharded. + mongos0ForeignColl.drop(); + assert.writeOK(mongos0ForeignColl.insert({_id: 0, b: 1})); + assert.writeOK(mongos0ForeignColl.insert({_id: 1, b: null})); + assert.writeOK(mongos0ForeignColl.insert({_id: 2})); + + // Verify $lookup results through the fresh mongos. + restartPrimaryShard(st.rs0, mongos0LocalColl, mongos0ForeignColl); + assert.eq(mongos0LocalColl.aggregate(pipeline).toArray(), expectedResults); + + // Verify $lookup results through mongos1, which is not aware that the local + // collection is sharded. The results are expected to be incorrect when both the mongos and + // primary shard incorrectly believe that a collection is unsharded. + restartPrimaryShard(st.rs0, mongos0LocalColl, mongos0ForeignColl); + assert.eq(mongos1LocalColl.aggregate(pipeline).toArray(), [ + {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} + ]); + + st.stop(); +})(); diff --git a/jstests/sharding/lookup_stale_mongos.js b/jstests/sharding/lookup_stale_mongos.js new file mode 100644 index 00000000000..0dc17958e26 --- /dev/null +++ b/jstests/sharding/lookup_stale_mongos.js @@ -0,0 +1,130 @@ +// Tests the behavior of a $lookup when the mongos contains stale routing information for the +// local and/or foreign collections. This includes when mongos thinks the collection is sharded +// when it's not, and likewise when mongos thinks the collection is unsharded but is actually +// sharded. +(function() { + "use strict"; + + const testName = "lookup_stale_mongos"; + const st = new ShardingTest({ + shards: 2, + mongos: 2, + }); + + const mongos0DB = st.s0.getDB(testName); + assert.commandWorked(mongos0DB.dropDatabase()); + const mongos0LocalColl = mongos0DB[testName + "_local"]; + const mongos0ForeignColl = mongos0DB[testName + "_foreign"]; + + const mongos1DB = st.s1.getDB(testName); + const mongos1LocalColl = mongos1DB[testName + "_local"]; + const mongos1ForeignColl = mongos1DB[testName + "_foreign"]; + + const pipeline = [ + { + $lookup: + {localField: "a", foreignField: "b", from: mongos1ForeignColl.getName(), as: "same"} + }, + {$sort: {_id: 1}} + ]; + const expectedResults = [ + {_id: 0, a: 1, "same": [{_id: 0, b: 1}]}, + {_id: 1, a: null, "same": [{_id: 1, b: null}, {_id: 2}]}, + {_id: 2, "same": [{_id: 1, b: null}, {_id: 2}]} + ]; + + // Ensure that shard0 is the primary shard. + assert.commandWorked(mongos0DB.adminCommand({enableSharding: mongos0DB.getName()})); + st.ensurePrimaryShard(mongos0DB.getName(), st.shard0.shardName); + + assert.writeOK(mongos0LocalColl.insert({_id: 0, a: 1})); + assert.writeOK(mongos0LocalColl.insert({_id: 1, a: null})); + + assert.writeOK(mongos0ForeignColl.insert({_id: 0, b: 1})); + assert.writeOK(mongos0ForeignColl.insert({_id: 1, b: null})); + + // Send writes through mongos1 such that it's aware of the collections and believes they are + // unsharded. + assert.writeOK(mongos1LocalColl.insert({_id: 2})); + assert.writeOK(mongos1ForeignColl.insert({_id: 2})); + + // + // Test unsharded local and sharded foreign collections, with mongos unaware that the foreign + // collection is sharded. + // + + // Shard the foreign collection through mongos0. + assert.commandWorked( + mongos0DB.adminCommand({shardCollection: mongos0ForeignColl.getFullName(), key: {_id: 1}})); + + // Split the collection into 2 chunks: [MinKey, 1), [1, MaxKey). + assert.commandWorked( + mongos0DB.adminCommand({split: mongos0ForeignColl.getFullName(), middle: {_id: 1}})); + + // Move the [minKey, 1) chunk to shard1. + assert.commandWorked(mongos0DB.adminCommand({ + moveChunk: mongos0ForeignColl.getFullName(), + find: {_id: 0}, + to: st.shard1.shardName, + _waitForDelete: true + })); + + // Issue a $lookup through mongos1, which is unaware that the foreign collection is sharded. + assert.eq(mongos1LocalColl.aggregate(pipeline).toArray(), expectedResults); + + // + // Test sharded local and sharded foreign collections, with mongos unaware that the local + // collection is sharded. + // + + // Shard the local collection through mongos0. + assert.commandWorked( + mongos0DB.adminCommand({shardCollection: mongos0LocalColl.getFullName(), key: {_id: 1}})); + + // Split the collection into 2 chunks: [MinKey, 1), [1, MaxKey). + assert.commandWorked( + mongos0DB.adminCommand({split: mongos0LocalColl.getFullName(), middle: {_id: 1}})); + + // Move the [minKey, 1) chunk to shard1. + assert.commandWorked(mongos0DB.adminCommand({ + moveChunk: mongos0LocalColl.getFullName(), + find: {_id: 0}, + to: st.shard1.shardName, + _waitForDelete: true + })); + + // Issue a $lookup through mongos1, which is unaware that the local collection is sharded. + assert.eq(mongos1LocalColl.aggregate(pipeline).toArray(), expectedResults); + + // + // Test sharded local and unsharded foreign collections, with mongos unaware that the foreign + // collection is unsharded. + // + + // Recreate the foreign collection as unsharded through mongos0. + mongos0ForeignColl.drop(); + assert.writeOK(mongos0ForeignColl.insert({_id: 0, b: 1})); + assert.writeOK(mongos0ForeignColl.insert({_id: 1, b: null})); + assert.writeOK(mongos0ForeignColl.insert({_id: 2})); + + // Issue a $lookup through mongos1, which is unaware that the foreign collection is now + // unsharded. + assert.eq(mongos1LocalColl.aggregate(pipeline).toArray(), expectedResults); + + // + // Test unsharded local and foreign collections, with mongos unaware that the local + // collection is unsharded. + // + + // Recreate the local collection as unsharded through mongos0. + mongos0LocalColl.drop(); + assert.writeOK(mongos0LocalColl.insert({_id: 0, a: 1})); + assert.writeOK(mongos0LocalColl.insert({_id: 1, a: null})); + assert.writeOK(mongos0LocalColl.insert({_id: 2})); + + // Issue a $lookup through mongos1, which is unaware that the local collection is now + // unsharded. + assert.eq(mongos1LocalColl.aggregate(pipeline).toArray(), expectedResults); + + st.stop(); +})(); |