From 3970aa26552a170f55ccf1d9d51607e0a911d8c6 Mon Sep 17 00:00:00 2001 From: Bernard Gorman Date: Wed, 2 Oct 2019 00:49:47 +0000 Subject: SERVER-43350 $lookup with no local default or user-specified collation should explicitly set the simple collation on the foreign expression context (cherry picked from commit d6133a3a5464fac202f512b0310dfeb200c126f9) --- .../sources/graphLookup/collation_graphlookup.js | 35 +++++++- .../aggregation/sources/lookup/lookup_collation.js | 99 ++++++++++++++++++++++ .../db/pipeline/document_source_graph_lookup.cpp | 13 ++- src/mongo/db/pipeline/document_source_lookup.cpp | 9 +- 4 files changed, 152 insertions(+), 4 deletions(-) create mode 100644 jstests/aggregation/sources/lookup/lookup_collation.js diff --git a/jstests/aggregation/sources/graphLookup/collation_graphlookup.js b/jstests/aggregation/sources/graphLookup/collation_graphlookup.js index f3fbcf2ee34..00aa20fc404 100644 --- a/jstests/aggregation/sources/graphLookup/collation_graphlookup.js +++ b/jstests/aggregation/sources/graphLookup/collation_graphlookup.js @@ -46,7 +46,8 @@ assert.eq(1, res.length); assert.eq("erica", res[0].username); assert.eq(2, res[0].friendUsers.length); -// Negative test: ensure that we don't find any friends when the collation is simple. +// Negative test: The local collation does not have a default collation, and so we use the simple +// collation. Ensure that we don't find any friends when the collation is simple. res = coll.aggregate([ {$match: {username: "erica"}}, { @@ -64,6 +65,38 @@ assert.eq(1, res.length); assert.eq("erica", res[0].username); assert.eq(0, res[0].friendUsers.length); +// Create a foreign collection with a case-insensitive default collation. +foreignColl.drop(); +assert.commandWorked(db.createCollection(foreignColl.getName(), caseInsensitiveUS)); +assert.commandWorked(foreignColl.insert([{username: "JEREMY"}, {username: "JIMMY"}])); + +// Insert some more documents into the local collection. +assert.commandWorked(coll.insert({username: "fiona", friends: ["jeremy"]})); +assert.commandWorked(coll.insert({username: "gretchen", friends: ["jimmy"]})); + +// Test that $graphLookup uses the simple collation in the case where the collection on which it is +// run does not have a default collation, and that this collation is used instead of the default +// collation of the foreign collection. Exercises the fix for SERVER-43350. +res = coll.aggregate([{$match: {username: {$in: ["erica", "fiona", "gretchen"]}}}, + { + $graphLookup: { + from: foreignColl.getName(), + startWith: "$friends", + connectFromField: "friends", + connectToField: "username", + as: "friendUsers" + } + } + ]) + .toArray(); +assert.eq(3, res.length, tojson(res)); +for (let i = 0; i < res.length; ++i) { + assert(["erica", "fiona", "gretchen"].includes(res[i].username)); + assert.eq(0, res[i].friendUsers.length); +} + +// Recreate the local collection with a case-insensitive default collation, and the foreign +// collection with a case-sensitive default collation. coll.drop(); assert.commandWorked(db.createCollection(coll.getName(), caseInsensitiveUS)); assert.writeOK(coll.insert({username: "erica", friends: ["jeremy", "jimmy"]})); diff --git a/jstests/aggregation/sources/lookup/lookup_collation.js b/jstests/aggregation/sources/lookup/lookup_collation.js new file mode 100644 index 00000000000..57c00f2550e --- /dev/null +++ b/jstests/aggregation/sources/lookup/lookup_collation.js @@ -0,0 +1,99 @@ +/** + * Tests that $lookup respects the user-specified collation or the inherited local collation + * when performing comparisons on a foreign collection with a different default collation. Exercises + * the fix for SERVER-43350. + * @tags: [assumes_unsharded_collection] + */ +(function() { + +"use strict"; + +const testDB = db.getSiblingDB(jsTestName()); +const noCollationColl = testDB.no_collation; +const caseInsensitiveColl = testDB.case_insensitive; + +caseInsensitiveColl.drop(); +noCollationColl.drop(); + +const caseInsensitiveCollation = { + locale: "en_US", + strength: 1 +}; + +const simpleCollation = { + locale: "simple" +}; + +assert.commandWorked(testDB.createCollection(noCollationColl.getName())); +assert.commandWorked( + testDB.createCollection(caseInsensitiveColl.getName(), {collation: caseInsensitiveCollation})); + +assert.commandWorked( + noCollationColl.insert([{_id: "a"}, {_id: "b"}, {_id: "c"}, {_id: "d"}, {_id: "e"}])); +assert.commandWorked( + caseInsensitiveColl.insert([{_id: "a"}, {_id: "B"}, {_id: "c"}, {_id: "D"}, {_id: "e"}])); + +const lookupWithPipeline = (foreignColl) => { + return { + $lookup: {from: foreignColl.getName(), as: "foreignMatch", let: {l_id: "$_id"}, pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}]} + }; +}; +const lookupNoPipeline = (foreignColl) => { + return { + $lookup: {from: foreignColl.getName(), localField: "_id", foreignField: "_id", as: "foreignMatch"} + }; +}; + +for (let lookupInto of [lookupWithPipeline, lookupNoPipeline]) { + // Verify that a $lookup whose local collection has no default collation uses the simple + // collation for comparisons on a foreign collection with a non-simple default collation. + let results = noCollationColl.aggregate([lookupInto(caseInsensitiveColl)]).toArray(); + assert.docEq(results, [ + {_id: "a", foreignMatch: [{_id: "a"}]}, + {_id: "b", foreignMatch: []}, + {_id: "c", foreignMatch: [{_id: "c"}]}, + {_id: "d", foreignMatch: []}, + {_id: "e", foreignMatch: [{_id: "e"}]} + ]); + + // Verify that a $lookup whose local collection has no default collation but which is running in + // a pipeline with a non-simple user-specified collation uses the latter for comparisons on the + // foreign collection. + results = + noCollationColl + .aggregate([lookupInto(caseInsensitiveColl)], {collation: caseInsensitiveCollation}) + .toArray(); + assert.docEq(results, [ + {_id: "a", foreignMatch: [{_id: "a"}]}, + {_id: "b", foreignMatch: [{_id: "B"}]}, + {_id: "c", foreignMatch: [{_id: "c"}]}, + {_id: "d", foreignMatch: [{_id: "D"}]}, + {_id: "e", foreignMatch: [{_id: "e"}]} + ]); + + // Verify that a $lookup whose local collection has a non-simple collation uses the latter for + // comparisons on a foreign collection with no default collation. + results = caseInsensitiveColl.aggregate([lookupInto(noCollationColl)]).toArray(); + assert.docEq(results, [ + {_id: "a", foreignMatch: [{_id: "a"}]}, + {_id: "B", foreignMatch: [{_id: "b"}]}, + {_id: "c", foreignMatch: [{_id: "c"}]}, + {_id: "D", foreignMatch: [{_id: "d"}]}, + {_id: "e", foreignMatch: [{_id: "e"}]} + ]); + + // Verify that a $lookup whose local collection has a non-simple collation but which is running + // in a pipeline with a user-specified simple collation uses the latter for comparisons on the + // foreign collection. + results = + caseInsensitiveColl.aggregate([lookupInto(noCollationColl)], {collation: simpleCollation}) + .toArray(); + assert.docEq(results, [ + {_id: "a", foreignMatch: [{_id: "a"}]}, + {_id: "B", foreignMatch: []}, + {_id: "c", foreignMatch: [{_id: "c"}]}, + {_id: "D", foreignMatch: []}, + {_id: "e", foreignMatch: [{_id: "e"}]} + ]); +} +})(); \ No newline at end of file diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp index a989f8f389d..72d255f5156 100644 --- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp @@ -458,11 +458,20 @@ DocumentSourceGraphLookUp::DocumentSourceGraphLookUp( _cache(pExpCtx->getValueComparator()), _unwind(unwindSrc) { const auto& resolvedNamespace = pExpCtx->getResolvedNamespace(_from); - _fromExpCtx = pExpCtx->copyWith(resolvedNamespace.ns); - _fromPipeline = resolvedNamespace.pipeline; + + // We always set an explicit collator on the copied expression context, even if the collator is + // null (i.e. the simple collation). Otherwise, in the situation where the user did not specify + // a collation and the simple collation was inherited from the local collection, the execution + // machinery will incorrectly interpret the null collator and empty user collation as an + // indication that it should inherit the foreign collection's collation. + _fromExpCtx = + expCtx->copyWith(resolvedNamespace.ns, + boost::none, + expCtx->getCollator() ? expCtx->getCollator()->clone() : nullptr); // We append an additional BSONObj to '_fromPipeline' as a placeholder for the $match stage // we'll eventually construct from the input document. + _fromPipeline = resolvedNamespace.pipeline; _fromPipeline.reserve(_fromPipeline.size() + 1); _fromPipeline.push_back(BSON("$match" << BSONObj())); } diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp index ef11b582394..3f5d2eb8efa 100644 --- a/src/mongo/db/pipeline/document_source_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_lookup.cpp @@ -60,7 +60,14 @@ DocumentSourceLookUp::DocumentSourceLookUp(NamespaceString fromNs, const auto& resolvedNamespace = expCtx->getResolvedNamespace(_fromNs); _resolvedNs = resolvedNamespace.ns; _resolvedPipeline = resolvedNamespace.pipeline; - _fromExpCtx = expCtx->copyWith(_resolvedNs); + + // We always set an explicit collator on the copied expression context, even if the collator is + // null (i.e. the simple collation). Otherwise, in the situation where the user did not specify + // a collation and the simple collation was inherited from the local collection, the execution + // machinery will incorrectly interpret the null collator and empty user collation as an + // indication that it should inherit the foreign collection's collation. + _fromExpCtx = expCtx->copyWith( + _resolvedNs, boost::none, expCtx->getCollator() ? expCtx->getCollator()->clone() : nullptr); _fromExpCtx->subPipelineDepth += 1; uassert(ErrorCodes::MaxSubPipelineDepthExceeded, -- cgit v1.2.1