diff options
author | Irina Yatsenko <irina.yatsenko@mongodb.com> | 2022-03-31 17:02:29 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-03-31 18:51:58 +0000 |
commit | 176f398bd3cf651f349660bb82285eaf46252650 (patch) | |
tree | 0dc504e4d6541f0fd4cb7b38aa22b92bf0480aa1 | |
parent | 967f1c59872b76b34d83bf5151999f273acdb7e4 (diff) | |
download | mongo-176f398bd3cf651f349660bb82285eaf46252650.tar.gz |
SERVER-64482 Support collation when lowering lookup into SBE
5 files changed, 229 insertions, 146 deletions
diff --git a/jstests/aggregation/sources/lookup/lookup_collation.js b/jstests/aggregation/sources/lookup/lookup_collation.js index 13bacca992c..892ee29f1f4 100644 --- a/jstests/aggregation/sources/lookup/lookup_collation.js +++ b/jstests/aggregation/sources/lookup/lookup_collation.js @@ -2,6 +2,15 @@ * Tests that $lookup respects the user-specified collation or the inherited local collation * when performing comparisons on a foreign collection with a different default collation. Exercises * the fix for SERVER-43350. + * + * Collation can be set at three different levels for $lookup stage + * 1. on the local collection (collation on the foreign collection is always ignored) + * 2. on the $lookup stage via '_internalCollation' property + * 3. on the aggregation command via 'collation' property in options + * + * The three settings have the following precedence: + * 1. '_internalCollation' overrides all others + * 2. 'collation' option overrides local collection's collation */ load("jstests/aggregation/extras/utils.js"); // For anyEq. load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. @@ -15,103 +24,177 @@ load("jstests/libs/fixture_helpers.js"); // For isSharded. const testDB = db.getSiblingDB(jsTestName()); assert.commandWorked(testDB.dropDatabase()); -// TODO SERVER-64482 Reenable this test when SERVER-64482 is done. -if (checkSBEEnabled(testDB, ["featureFlagSBELookupPushdown"])) { - jsTestLog("Skipping test because SBE and SBE $lookup features are both enabled."); - return; -} - -const caseInsensitiveCollation = { +const caseInsensitive = { locale: "en_US", strength: 1 }; -const simpleCollation = { +const caseSensitive = { locale: "simple" }; -assert.commandWorked(testDB.createCollection("no_collation")); -assert.commandWorked( - testDB.createCollection("case_insensitive", {collation: caseInsensitiveCollation})); +// When no collation is specified for a collection, it uses the default, case-sensitive collation. +assert.commandWorked(testDB.createCollection("case_sensitive")); +const collAa = testDB.case_sensitive; +assert.commandWorked(testDB.createCollection("case_sensitive_indexed")); +const collAa_indexed = testDB.case_sensitive_indexed; -const noCollationColl = testDB.no_collation; -const caseInsensitiveColl = testDB.case_insensitive; +assert.commandWorked(testDB.createCollection("case_insensitive", {collation: caseInsensitive})); +const collAA = testDB.case_insensitive; // Do not run the rest of the tests if the foreign collection is implicitly sharded but the flag to // allow $lookup/$graphLookup into a sharded collection is disabled. const getShardedLookupParam = db.adminCommand({getParameter: 1, featureFlagShardedLookup: 1}); const isShardedLookupEnabled = getShardedLookupParam.hasOwnProperty("featureFlagShardedLookup") && getShardedLookupParam.featureFlagShardedLookup.value; -if (FixtureHelpers.isSharded(caseInsensitiveColl) && !isShardedLookupEnabled) { +if (FixtureHelpers.isSharded(collAA) && !isShardedLookupEnabled) { return; } -assert.commandWorked( - noCollationColl.insert([{_id: "a"}, {_id: "b"}, {_id: "c"}, {_id: "d"}, {_id: "e"}])); -assert.commandWorked( - caseInsensitiveColl.insert([{_id: "a"}, {_id: "B"}, {_id: "c"}, {_id: "D"}, {_id: "e"}])); +const records = [{_id: 0, key: "a"}, {_id: 1, key: "A"}]; +assert.commandWorked(collAa.insert(records)); +assert.commandWorked(collAA.insert(records)); +assert.commandWorked(collAa_indexed.insert(records)); +assert.commandWorked(collAa_indexed.createIndex({key: 1})); const lookupWithPipeline = (foreignColl) => { return { - $lookup: {from: foreignColl.getName(), as: "foreignMatch", let: {l_id: "$_id"}, pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}]} + $lookup: { + from: foreignColl.getName(), + as: "matched", + let: {l_key: "$key"}, + pipeline: [{$match: {$expr: {$eq: ["$key", "$$l_key"]}}}] + } }; }; const lookupNoPipeline = (foreignColl) => { return { - $lookup: {from: foreignColl.getName(), localField: "_id", foreignField: "_id", as: "foreignMatch"} + $lookup: + {from: foreignColl.getName(), localField: "key", foreignField: "key", as: "matched"} }; }; -for (let lookupInto of [lookupWithPipeline, lookupNoPipeline]) { - // Verify that a $lookup whose local collection has no default collation uses the simple - // collation for comparisons on a foreign collection with a non-simple default collation. - let results = noCollationColl.aggregate([lookupInto(caseInsensitiveColl)]).toArray(); - assert(anyEq(results, [ - {_id: "a", foreignMatch: [{_id: "a"}]}, - {_id: "b", foreignMatch: []}, - {_id: "c", foreignMatch: [{_id: "c"}]}, - {_id: "d", foreignMatch: []}, - {_id: "e", foreignMatch: [{_id: "e"}]} - ])); - - // Verify that a $lookup whose local collection has no default collation but which is running in - // a pipeline with a non-simple user-specified collation uses the latter for comparisons on the - // foreign collection. - results = - noCollationColl - .aggregate([lookupInto(caseInsensitiveColl)], {collation: caseInsensitiveCollation}) - .toArray(); - assert(anyEq(results, [ - {_id: "a", foreignMatch: [{_id: "a"}]}, - {_id: "b", foreignMatch: [{_id: "B"}]}, - {_id: "c", foreignMatch: [{_id: "c"}]}, - {_id: "d", foreignMatch: [{_id: "D"}]}, - {_id: "e", foreignMatch: [{_id: "e"}]} - ])); - - // Verify that a $lookup whose local collection has a non-simple collation uses the latter for - // comparisons on a foreign collection with no default collation. - results = caseInsensitiveColl.aggregate([lookupInto(noCollationColl)]).toArray(); - assert(anyEq(results, [ - {_id: "a", foreignMatch: [{_id: "a"}]}, - {_id: "B", foreignMatch: [{_id: "b"}]}, - {_id: "c", foreignMatch: [{_id: "c"}]}, - {_id: "D", foreignMatch: [{_id: "d"}]}, - {_id: "e", foreignMatch: [{_id: "e"}]} - ])); - - // Verify that a $lookup whose local collection has a non-simple collation but which is running - // in a pipeline with a user-specified simple collation uses the latter for comparisons on the - // foreign collection. - results = - caseInsensitiveColl.aggregate([lookupInto(noCollationColl)], {collation: simpleCollation}) - .toArray(); - assert(anyEq(results, [ - {_id: "a", foreignMatch: [{_id: "a"}]}, - {_id: "B", foreignMatch: []}, - {_id: "c", foreignMatch: [{_id: "c"}]}, - {_id: "D", foreignMatch: []}, - {_id: "e", foreignMatch: [{_id: "e"}]} - ])); -} +const resultCaseSensistive = [ + {_id: 0, key: "a", matched: [{_id: 0, key: "a"}]}, + {_id: 1, key: "A", matched: [{_id: 1, key: "A"}]}, +]; +const resultCaseInsensitive = [ + {_id: 0, key: "a", matched: [{_id: 0, key: "a"}, {_id: 1, key: "A"}]}, + {_id: 1, key: "A", matched: [{_id: 0, key: "a"}, {_id: 1, key: "A"}]}, +]; +let results = []; + +// Collation on the foreign collection should be ignored. +(function testLocalCollationPrecedence() { + for (let lookupInto of [lookupWithPipeline, lookupNoPipeline]) { + results = collAa.aggregate([lookupInto(collAA)]).toArray(); + assertArrayEq({ + actual: results, + expected: resultCaseSensistive, + extraErrorMsg: " Default collation on local, running: " + tojson(lookupInto) + }); + + results = collAA.aggregate([lookupInto(collAa)]).toArray(); + assertArrayEq({ + actual: results, + expected: resultCaseInsensitive, + extraErrorMsg: " Case-insensitive collation on local, running: " + tojson(lookupInto) + }); + + // When lowering to SBE a different join algorithm (HashJoin) is used if 'allowDiskUse' is + // set to true. We only need to verify the collation of HJ once, because it works the same + // independent of how the collation is chosen. + results = collAA.aggregate([lookupInto(collAa)], {allowDiskUse: true}).toArray(); + assertArrayEq({ + actual: results, + expected: resultCaseInsensitive, + extraErrorMsg: " Case-insensitive collation on local, disk use allowed, running: " + + tojson(lookupInto) + }); + } +})(); + +// Collation at the command level should override collation of the local collection. +(function testCommandCollationPrecedence() { + for (let lookupInto of [lookupWithPipeline, lookupNoPipeline]) { + results = collAa.aggregate([lookupInto(collAa)], {collation: caseInsensitive}).toArray(); + assertArrayEq({ + actual: results, + expected: resultCaseInsensitive, + extraErrorMsg: " Case-insensitive collation on command, running: " + tojson(lookupInto) + }); + + results = collAA.aggregate([lookupInto(collAa)], {collation: caseSensitive}).toArray(); + assertArrayEq({ + actual: results, + expected: resultCaseSensistive, + extraErrorMsg: " Case-sensitive collation on command, running: " + tojson(lookupInto) + }); + } +})(); + +// Collation set on $lookup stage with '_internalCollation' should override collation of the local +// collection and on the command. +(function testStageCollationPrecedence() { + for (let lookupInto of [lookupWithPipeline, lookupNoPipeline]) { + let lookupStage = lookupInto(collAa); + lookupStage.$lookup._internalCollation = caseInsensitive; + results = collAa.aggregate([lookupStage], {collation: caseSensitive}).toArray(); + assertArrayEq({ + actual: results, + expected: resultCaseInsensitive, + extraErrorMsg: " Case-insensitive collation on stage, running: " + tojson(lookupInto) + }); + + lookupStage.$lookup._internalCollation = caseSensitive; + results = collAA.aggregate([lookupStage], {collation: caseInsensitive}).toArray(); + assertArrayEq({ + actual: results, + expected: resultCaseSensistive, + extraErrorMsg: " Case-sensitive collation on stage, running: " + tojson(lookupInto) + }); + } +})(); + +// In presense of indexes lookup might choose a different strategy for the join, that relies on the +// index (INLJ). It should respect the effective collation of $lookup. +(function testCollationWithIndexes() { + // TODO SERVER-65115: integration of collation with INLJ NYI. + if (checkSBEEnabled(testDB, ["featureFlagSBELookupPushdown"])) { + jsTestLog("Skipping test because of SERVER-65115."); + return; + } + + for (let lookupInto of [lookupWithPipeline, lookupNoPipeline]) { + // Local and foreign have different collations. + results = collAA.aggregate([lookupInto(collAa_indexed)]).toArray(); + assertArrayEq({ + actual: results, + expected: resultCaseInsensitive, + extraErrorMsg: " Case-insensitive collation on local, foreign is indexed, running: " + + tojson(lookupInto) + }); + + // Command-level collation overrides collection-level collation. + results = + collAa.aggregate([lookupInto(collAa_indexed)], {collation: caseInsensitive}).toArray(); + assertArrayEq({ + actual: results, + expected: resultCaseInsensitive, + extraErrorMsg: " Case-insensitive collation on command, foreign is indexed, running: " + + tojson(lookupInto) + }); + + // Stage-level collation overrides collection-level and command-level collations. + let lookupStage = lookupInto(collAa_indexed); + lookupStage.$lookup._internalCollation = caseInsensitive; + results = collAa.aggregate([lookupStage], {collation: caseSensitive}).toArray(); + assertArrayEq({ + actual: results, + expected: resultCaseInsensitive, + extraErrorMsg: " Case-insensitive collation on stage, foreign is indexed, running: " + + tojson(lookupInto) + }); + } +})(); })(); diff --git a/jstests/aggregation/sources/lookup/lookup_foreign_collation.js b/jstests/aggregation/sources/lookup/lookup_foreign_collation.js index e7f68372df4..6937fe42854 100644 --- a/jstests/aggregation/sources/lookup/lookup_foreign_collation.js +++ b/jstests/aggregation/sources/lookup/lookup_foreign_collation.js @@ -6,7 +6,6 @@ * ] */ load("jstests/aggregation/extras/utils.js"); // For anyEq. -load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. (function() { @@ -15,13 +14,6 @@ load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. load("jstests/libs/fixture_helpers.js"); // For isSharded. const testDB = db.getSiblingDB(jsTestName()); - -// TODO SERVER-64482 Reenable this test when SERVER-64482 is done. -if (checkSBEEnabled(testDB, ["featureFlagSBELookupPushdown"])) { - jsTestLog("Skipping test because SBE and SBE $lookup features are both enabled."); - return; -} - const localColl = testDB.local_no_collation; const localCaseInsensitiveColl = testDB.local_collation; const foreignColl = testDB.foreign_no_collation; @@ -71,15 +63,15 @@ function setup() { // Pipeline style $lookup with cases insensitive collation. const lookupWithPipeline = (foreignColl) => { return { - $lookup: {from: foreignColl.getName(), as: "foreignMatch", _internalCollation: caseInsensitiveCollation, let: {l_id: "$_id"}, pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}]} - }; + $lookup: {from: foreignColl.getName(), as: "foreignMatch", _internalCollation: caseInsensitiveCollation, let: {l_id: "$_id"}, pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}]} + }; }; // Local-field foreign-field style $lookup with cases insensitive collation. const lookupWithLocalForeignField = (foreignColl) => { return { - $lookup: {from: foreignColl.getName(), localField: "_id", foreignField: "_id", as: "foreignMatch", _internalCollation: caseInsensitiveCollation} - }; + $lookup: {from: foreignColl.getName(), localField: "_id", foreignField: "_id", as: "foreignMatch", _internalCollation: caseInsensitiveCollation} + }; }; const resultSetCaseInsensitive = [ @@ -110,14 +102,14 @@ function setup() { function assertExpectedResultSet( localColl, foreignColl, commandCollation, lookupCollation, expectedResults) { const lookupWithPipeline = {$lookup: {from: foreignColl.getName(), - as: "foreignMatch", - let: {l_id: "$_id"}, - pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}]}}; + as: "foreignMatch", + let: {l_id: "$_id"}, + pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}]}}; const lookupWithLocalForeignField = {$lookup: {from: foreignColl.getName(), - localField: "_id", - foreignField: "_id", - as: "foreignMatch"}}; + localField: "_id", + foreignField: "_id", + as: "foreignMatch"}}; if (lookupCollation) { lookupWithPipeline.$lookup._internalCollation = lookupCollation; @@ -162,15 +154,15 @@ function setup() { setup(); const lookupWithPipeline = {$lookup: {from: foreignColl.getName(), - as: "foreignMatch", - let: {l_id: "$_id"}, - pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}, - {$lookup: {from: localColl.getName(), - as: "foreignMatch2", - let: {l_id: "$_id"}, - pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}], - _internalCollation: simpleCollation}}], - _internalCollation: caseInsensitiveCollation}}; + as: "foreignMatch", + let: {l_id: "$_id"}, + pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}, + {$lookup: {from: localColl.getName(), + as: "foreignMatch2", + let: {l_id: "$_id"}, + pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}], + _internalCollation: simpleCollation}}], + _internalCollation: caseInsensitiveCollation}}; const resultSet = [ {_id: "a", foreignMatch: [{_id: "a", "foreignMatch2": [{"_id": "a"}]}]}, @@ -190,12 +182,12 @@ function setup() { // A $lookup stage with a collation that differs from the collection and command collation // will not absorb a $match on unwound results. let pipeline = [{$lookup: {from: foreignColl.getName(), - as: "foreignMatch", - let: {l_id: "$_id"}, - pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}], - _internalCollation: caseInsensitiveCollation}}, - {$unwind: "$foreignMatch"}, - {$match: {"foreignMatch._id": "b"}}]; + as: "foreignMatch", + let: {l_id: "$_id"}, + pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}], + _internalCollation: caseInsensitiveCollation}}, + {$unwind: "$foreignMatch"}, + {$match: {"foreignMatch._id": "b"}}]; let results = localColl.aggregate(pipeline).toArray(); assert.eq(0, results.length); @@ -210,12 +202,12 @@ function setup() { // A $lookup stage with a collation that matches the command collation will absorb a $match // stage. pipeline = [{$lookup: {from: foreignColl.getName(), - as: "foreignMatch", - let: {l_id: "$_id"}, - pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}], - _internalCollation: caseInsensitiveCollation}}, - {$unwind: "$foreignMatch"}, - {$match: {"foreignMatch._id": "b"}}]; + as: "foreignMatch", + let: {l_id: "$_id"}, + pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}], + _internalCollation: caseInsensitiveCollation}}, + {$unwind: "$foreignMatch"}, + {$match: {"foreignMatch._id": "b"}}]; let expectedResults = [{"_id": "b", "foreignMatch": {"_id": "B"}}]; @@ -229,12 +221,12 @@ function setup() { // A $lookup stage with a collation that matches the local collection collation will absorb // a $match stage. pipeline = [{$lookup: {from: foreignColl.getName(), - as: "foreignMatch", - let: {l_id: "$_id"}, - pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}], - _internalCollation: caseInsensitiveCollation}}, - {$unwind: "$foreignMatch"}, - {$match: {"foreignMatch._id": "b"}}]; + as: "foreignMatch", + let: {l_id: "$_id"}, + pipeline: [{$match: {$expr: {$eq: ["$_id", "$$l_id"]}}}], + _internalCollation: caseInsensitiveCollation}}, + {$unwind: "$foreignMatch"}, + {$match: {"foreignMatch._id": "b"}}]; expectedResults = [{"_id": "b", "foreignMatch": {"_id": "B"}}]; diff --git a/jstests/sharding/query/collation_lookup.js b/jstests/sharding/query/collation_lookup.js index c9fb28e3a15..57f4e8eeeed 100644 --- a/jstests/sharding/query/collation_lookup.js +++ b/jstests/sharding/query/collation_lookup.js @@ -569,13 +569,6 @@ const caseInsensitive = { const mongosDB = st.s0.getDB(testName); -// TODO SERVER-64482 Reenable this test when SERVER-64482 is done. -if (checkSBEEnabled(mongosDB, ["featureFlagSBELookupPushdown"])) { - jsTestLog("Skipping test because SBE and SBE $lookup features are both enabled."); - st.stop(); - return; -} - const withDefaultCollationColl = mongosDB[testName + "_with_default"]; const withoutDefaultCollationColl = mongosDB[testName + "_without_default"]; diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp index 00bddd1c88e..4c1f0bbd68b 100644 --- a/src/mongo/db/pipeline/document_source_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_lookup.cpp @@ -381,6 +381,8 @@ void DocumentSourceLookUp::determineSbeCompatibility() { // than indexes into arrays, which is compatible with SBE.) && !FieldRef(_localField->fullPath()).hasNumericPathComponents() && !FieldRef(_foreignField->fullPath()).hasNumericPathComponents() + // Setting a collator on an individual $lookup stage with _internalCollation isn't supported + && !_hasExplicitCollation // We currently don't lower $lookup against views ('_fromNs' does not correspond to a // view). && pExpCtx->getResolvedNamespace(_fromNs).pipeline.empty(); diff --git a/src/mongo/db/query/sbe_stage_builder_lookup.cpp b/src/mongo/db/query/sbe_stage_builder_lookup.cpp index 780fa3b10e9..6e7fa784142 100644 --- a/src/mongo/db/query/sbe_stage_builder_lookup.cpp +++ b/src/mongo/db/query/sbe_stage_builder_lookup.cpp @@ -263,10 +263,10 @@ std::pair<SlotId /* keyValueSlot */, std::unique_ptr<sbe::PlanStage>> buildForei terminalStagesToUnion.emplace_back(makeLimitCoScanTree(nodeId, 1)); std::unique_ptr<sbe::PlanStage> unionStage = - sbe::makeS<UnionStage>(std::move(terminalStagesToUnion), - std::vector{makeSV(terminalUnwindOutputSlot), makeSV(keyValueSlot)}, - makeSV(unionOutputSlot), - nodeId); + makeS<UnionStage>(std::move(terminalStagesToUnion), + std::vector{makeSV(terminalUnwindOutputSlot), makeSV(keyValueSlot)}, + makeSV(unionOutputSlot), + nodeId); currentStage = makeS<LoopJoinStage>(std::move(currentStage), std::move(unionStage), @@ -304,7 +304,7 @@ std::pair<SlotId /* keyValuesSetSlot */, std::unique_ptr<sbe::PlanStage>> buildK EvalStage{std::move(keyValuesStage), SlotVector{}}, makeSV(), /* groupBy slots - "none" means creating a single group */ makeEM(keyValuesSetSlot, makeFunction("addToSet"_sd, makeVariable(keyValueSlot))), - {} /* collatorSlot */, + boost::none /* we group _all_ key values into a single set, so collator is irrelevant */, false /* allowDiskUse */, nodeId); @@ -321,7 +321,7 @@ std::pair<SlotId /* keyValuesSetSlot */, std::unique_ptr<sbe::PlanStage>> buildK arrayWithNullView->push_back(TypeTags::Null, 0); std::unique_ptr<EExpression> isNonEmptySetExpr = - makeBinaryOp(sbe::EPrimBinary::greater, + makeBinaryOp(EPrimBinary::greater, makeFunction("getArraySize", makeVariable(keyValuesSetSlot)), makeConstant(TypeTags::NumberInt32, 0)); @@ -329,9 +329,9 @@ std::pair<SlotId /* keyValuesSetSlot */, std::unique_ptr<sbe::PlanStage>> buildK packedKeyValuesStage = makeProject(std::move(packedKeyValuesStage), nodeId, nonEmptySetSlot, - sbe::makeE<sbe::EIf>(std::move(isNonEmptySetExpr), - makeVariable(keyValuesSetSlot), - std::move(arrayWithNull))); + makeE<EIf>(std::move(isNonEmptySetExpr), + makeVariable(keyValuesSetSlot), + std::move(arrayWithNull))); keyValuesSetSlot = nonEmptySetSlot; } @@ -363,7 +363,7 @@ std::pair<SlotId /* resultSlot */, std::unique_ptr<sbe::PlanStage>> buildForeign std::move(innerBranch), makeSV(), /* groupBy slots */ makeEM(accumulatorSlot, makeFunction("addToArray"_sd, makeVariable(foreignRecordSlot))), - {} /* collatorSlot */, + {} /* collatorSlot, no collation here because we want to return all matches "as is" */, false /* allowDiskUse */, nodeId); @@ -401,6 +401,7 @@ std::pair<SlotId /* matched docs */, std::unique_ptr<sbe::PlanStage>> buildNljLo std::unique_ptr<sbe::PlanStage> foreignStage, SlotId foreignRecordSlot, StringData foreignFieldName, + boost::optional<SlotId> collatorSlot, const PlanNodeId nodeId, SlotIdGenerator& slotIdGenerator) { // Build the outer branch that produces the set of local key values. @@ -421,16 +422,21 @@ std::pair<SlotId /* matched docs */, std::unique_ptr<sbe::PlanStage>> buildNljLo // Add a filter that only lets through foreign records with non-empty intersection of local and // foreign keys. - std::unique_ptr<EExpression> haveMatchingKeys = makeBinaryOp( - sbe::EPrimBinary::greater, - makeFunction("getArraySize", - sbe::makeE<sbe::EFunction>("setIntersection", - sbe::makeEs(makeE<EVariable>(localKeySlot), - makeE<EVariable>(foreignKeySlot)))), - makeConstant(TypeTags::NumberInt32, 0)); + std::unique_ptr<EExpression> setIntersectionExpr = (collatorSlot) + ? makeFunction("collSetIntersection", + makeVariable(*collatorSlot), + makeVariable(localKeySlot), + makeVariable(foreignKeySlot)) + : makeFunction("setIntersection", makeVariable(localKeySlot), makeVariable(foreignKeySlot)); + std::unique_ptr<EExpression> haveMatchingKeysExpr = + makeBinaryOp(EPrimBinary::greater, + makeFunction("getArraySize", std::move(setIntersectionExpr)), + makeConstant(TypeTags::NumberInt32, 0)); EvalStage innerBranch = makeFilter<false /* IsConst */, false /* IsEof */>( - EvalStage{std::move(foreignKeyStage), SlotVector{}}, std::move(haveMatchingKeys), nodeId); + EvalStage{std::move(foreignKeyStage), SlotVector{}}, + std::move(haveMatchingKeysExpr), + nodeId); // Group the matched foreign documents into a list, stored in the 'innerResultSlot'. // It creates a union stage internally so that when there's no matching foreign records, an @@ -551,6 +557,7 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage( const IndexEntry& index, StringMap<const IndexAccessMethod*>& iamMap, PlanYieldPolicySBE* yieldPolicy, + boost::optional<SlotId> collatorSlot, const PlanNodeId nodeId, SlotIdGenerator& slotIdGenerator, FrameIdGenerator& frameIdGenerator, @@ -814,6 +821,7 @@ std::pair<SlotId /*matched docs*/, std::unique_ptr<sbe::PlanStage>> buildHashJoi std::unique_ptr<sbe::PlanStage> foreignStage, SlotId foreignRecordSlot, const FieldPath& foreignFieldName, + boost::optional<SlotId> collatorSlot, const PlanNodeId nodeId, SlotIdGenerator& slotIdGenerator) { @@ -836,16 +844,15 @@ std::pair<SlotId /*matched docs*/, std::unique_ptr<sbe::PlanStage>> buildHashJoi // Build lookup stage that matches the local and foreign rows and aggregates the // foreign values in an array. auto lookupAggSlot = slotIdGenerator.generate(); - auto aggs = - makeEM(lookupAggSlot, - stage_builder::makeFunction("addToArray", makeE<EVariable>(foreignRecordSlot))); + auto aggs = makeEM(lookupAggSlot, + stage_builder::makeFunction("addToArray", makeVariable(foreignRecordSlot))); std::unique_ptr<sbe::PlanStage> hl = makeS<HashLookupStage>(std::move(outerRootStage), std::move(foreignKeyStage), localKeySlot, foreignKeySlot, makeSV(foreignRecordSlot), std::move(aggs), - boost::none /*collatorSlot*/, + collatorSlot, nodeId); // Add a projection that makes so that empty array is returned if no foreign row were matched. @@ -868,6 +875,7 @@ std::pair<SlotId /*matched docs*/, std::unique_ptr<sbe::PlanStage>> buildLookupS std::unique_ptr<sbe::PlanStage> foreignStage, SlotId foreignRecordSlot, const FieldPath& foreignFieldName, + boost::optional<SlotId> collatorSlot, const PlanNodeId nodeId, SlotIdGenerator& slotIdGenerator) { switch (lookupStrategy) { @@ -878,6 +886,7 @@ std::pair<SlotId /*matched docs*/, std::unique_ptr<sbe::PlanStage>> buildLookupS std::move(foreignStage), foreignRecordSlot, foreignFieldName.fullPath(), + collatorSlot, nodeId, slotIdGenerator); case EqLookupNode::LookupStrategy::kHashJoin: @@ -887,6 +896,7 @@ std::pair<SlotId /*matched docs*/, std::unique_ptr<sbe::PlanStage>> buildLookupS std::move(foreignStage), foreignRecordSlot, foreignFieldName, + collatorSlot, nodeId, slotIdGenerator); default: @@ -936,6 +946,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder std::move(localStage), eqLookupNode->nodeId(), _slotIdGenerator); } + boost::optional<SlotId> collatorSlot = _state.data->env->getSlotIfExists("collator"_sd); switch (eqLookupNode->lookupStrategy) { case EqLookupNode::LookupStrategy::kIndexedLoopJoin: { tassert( @@ -952,6 +963,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder *eqLookupNode->idxEntry, _data.iamMap, _yieldPolicy, + collatorSlot, eqLookupNode->nodeId(), _slotIdGenerator, _frameIdGenerator, @@ -988,6 +1000,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder std::move(foreignStage), foreignResultSlot, eqLookupNode->joinFieldForeign, + collatorSlot, eqLookupNode->nodeId(), _slotIdGenerator); } |