diff options
author | Irina Yatsenko <irina.yatsenko@mongodb.com> | 2022-05-04 14:11:09 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-05-09 14:18:16 +0000 |
commit | 5a4e56115f8ffa057c8e7fda5fb0980e85fdb28b (patch) | |
tree | 104a8ec8864c0f419a9d2474dab3dcdaf415e89f | |
parent | 8bdca7b05b91e53309d485c91f7d087a871fafa9 (diff) | |
download | mongo-5a4e56115f8ffa057c8e7fda5fb0980e85fdb28b.tar.gz |
SERVER-66119 Avoid duplicated seek into index for local fields with nested arrays
(cherry picked from commit 96b8052c9e34423534ac610fcb5b11bca7239d12)
3 files changed, 62 insertions, 20 deletions
diff --git a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_inlj.js b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_inlj.js index a3d77057022..0276f2c5038 100644 --- a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_inlj.js +++ b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_inlj.js @@ -119,4 +119,32 @@ runTests(); }); })(); })(); + +/** + * Other miscelaneous tests for INLJ. + */ +(function runMiscelaneousInljTests() { + currentJoinAlgorithm = JoinAlgorithm.INLJ_Asc; + + (function testServer66119() { + const docs = [ + {_id: 0, a: {b: [1, 2, 1]}}, + {_id: 1, a: {b: [1, 3, [1, 2]]}}, + {_id: 2, a: {b: [1, 3, [2, 1]]}}, + {_id: 3, a: {b: [1, 3, [1]]}}, + {_id: 4, a: [{b: 1}, {b: [1, 2]}]}, + {_id: 5, a: [{b: 1}, {b: [[1], 2]}]}, + {_id: 6, a: [{b: [1, 2]}, {b: [3, [1]]}]}, + ]; + + runTest_SingleForeignRecord({ + testDescription: "Nested arrays with the same value as another key value in local", + localRecords: docs, + localField: "a.b", + foreignRecord: {_id: 0, a: 1}, + foreignField: "a", + idsExpectedToMatch: [0, 1, 2, 3, 4, 5, 6] + }); + })(); +})(); })(); diff --git a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js index e728b817acc..4710a162cdb 100644 --- a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js +++ b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js @@ -89,6 +89,13 @@ function runTest_SingleForeignRecord( const results = localColl.aggregate(pipeline, aggOptions).toArray(); const explain = localColl.explain().aggregate(pipeline, aggOptions); + // The foreign record should never duplicate in the results (e.g. see SERVER-66119). That is, + // the "matched" field should either be an empty array or contain a single element. + for (let i = 0; i < results.length; i++) { + assert(results[i].matched.length < 2, + testDescription + " Found duplicated match in " + tojson(results[i])); + } + // Build the array of ids for the results that have non-empty array in the "matched" field. const matchedIds = results .filter(function(x) { diff --git a/src/mongo/db/query/sbe_stage_builder_lookup.cpp b/src/mongo/db/query/sbe_stage_builder_lookup.cpp index 15ebf0a16ad..4b62228edde 100644 --- a/src/mongo/db/query/sbe_stage_builder_lookup.cpp +++ b/src/mongo/db/query/sbe_stage_builder_lookup.cpp @@ -473,18 +473,19 @@ std::pair<SlotId /* matched docs */, std::unique_ptr<sbe::PlanStage>> buildForei auto [foreignValueSlot, currentRootStage] = buildForeignKeysStream(foreignRecordSlot, foreignFieldName, nodeId, slotIdGenerator); - currentRootStage = makeLimitTree( - makeS<FilterStage<false>>(std::move(currentRootStage), - collatorSlot ? makeFunction("collIsMember", - makeVariable(*collatorSlot), - makeVariable(foreignValueSlot), - makeVariable(localKeysSetSlot)) - : makeFunction("isMember", - makeVariable(foreignValueSlot), - makeVariable(localKeysSetSlot)), - nodeId), - nodeId, - 1 /* take the foreign record once, even if multiple key values match */); + currentRootStage = + makeLimitTree(makeS<FilterStage<false /*IsConst*/>>( + std::move(currentRootStage), + collatorSlot ? makeFunction("collIsMember", + makeVariable(*collatorSlot), + makeVariable(foreignValueSlot), + makeVariable(localKeysSetSlot)) + : makeFunction("isMember", + makeVariable(foreignValueSlot), + makeVariable(localKeysSetSlot)), + nodeId), + nodeId, + 1 /* take the foreign record once, even if multiple key values match */); currentRootStage = makeS<LoopJoinStage>(std::move(foreignStage) /* outer */, std::move(currentRootStage) /* inner */, @@ -657,8 +658,9 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage( // - Array values: // a. If array is empty, [Undefined, Undefined] // b. If array is NOT empty, [array[0], array[0]] (point interval composed from the first - // array element) - // - All other types, single point interval [value, value] + // array element). This is needed to match {_id: 0, a: [[1, 2]]} to {_id: 0, b: [1, 2]}. + // - All other types, including array itself as a value, single point interval [value, value]. + // This is needed for arrays to match {_id: 1, a: [[1, 2]]} to {_id: 0, b: [[1, 2], 42]}. // // To implement these rules, we use the union stage: // union pointValue [ @@ -669,7 +671,7 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage( // coscan // , // // Branch 2 - // cfilter isArray(rawValue) + // filter isArray(rawValue) && !isMember(pointValue, localKeyValueSet) // project pointValue = fillEmpty( // getElement(rawValue, 0), // Undefined @@ -690,7 +692,7 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage( nodeId, nullBranchOutput, makeConstant(TypeTags::bsonUndefined, 0)); - nullBranch = makeS<FilterStage<true>>( + nullBranch = makeS<FilterStage<true /*IsConst*/>>( std::move(nullBranch), makeFunction("isNull", makeVariable(singleLocalValueSlot)), nodeId); auto arrayBranchOutput = slotIdGenerator.generate(); @@ -703,10 +705,15 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage( makeVariable(singleLocalValueSlot), makeConstant(TypeTags::NumberInt32, 0)), makeConstant(TypeTags::bsonUndefined, 0))); - arrayBranch = - makeS<FilterStage<true>>(std::move(arrayBranch), - makeFunction("isArray", makeVariable(singleLocalValueSlot)), - nodeId); + auto shouldProduceSeekForArray = + makeBinaryOp(EPrimBinary::logicAnd, + makeFunction("isArray", makeVariable(singleLocalValueSlot)), + makeUnaryOp(EPrimUnary::logicNot, + makeFunction("isMember", + makeVariable(arrayBranchOutput), + makeVariable(localKeysSetSlot)))); + arrayBranch = makeS<FilterStage<false /*IsConst*/>>( + std::move(arrayBranch), std::move(shouldProduceSeekForArray), nodeId); auto valueBranchOutput = slotIdGenerator.generate(); auto valueBranch = makeProjectStage(makeLimitCoScanTree(nodeId, 1), |