summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIrina Yatsenko <irina.yatsenko@mongodb.com>2022-05-04 14:11:09 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-05-09 14:18:16 +0000
commit5a4e56115f8ffa057c8e7fda5fb0980e85fdb28b (patch)
tree104a8ec8864c0f419a9d2474dab3dcdaf415e89f
parent8bdca7b05b91e53309d485c91f7d087a871fafa9 (diff)
downloadmongo-5a4e56115f8ffa057c8e7fda5fb0980e85fdb28b.tar.gz
SERVER-66119 Avoid duplicated seek into index for local fields with nested arrays
(cherry picked from commit 96b8052c9e34423534ac610fcb5b11bca7239d12)
-rw-r--r--jstests/aggregation/sources/lookup/lookup_equijoin_semantics_inlj.js28
-rw-r--r--jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js7
-rw-r--r--src/mongo/db/query/sbe_stage_builder_lookup.cpp47
3 files changed, 62 insertions, 20 deletions
diff --git a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_inlj.js b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_inlj.js
index a3d77057022..0276f2c5038 100644
--- a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_inlj.js
+++ b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_inlj.js
@@ -119,4 +119,32 @@ runTests();
});
})();
})();
+
+/**
+ * Other miscelaneous tests for INLJ.
+ */
+(function runMiscelaneousInljTests() {
+ currentJoinAlgorithm = JoinAlgorithm.INLJ_Asc;
+
+ (function testServer66119() {
+ const docs = [
+ {_id: 0, a: {b: [1, 2, 1]}},
+ {_id: 1, a: {b: [1, 3, [1, 2]]}},
+ {_id: 2, a: {b: [1, 3, [2, 1]]}},
+ {_id: 3, a: {b: [1, 3, [1]]}},
+ {_id: 4, a: [{b: 1}, {b: [1, 2]}]},
+ {_id: 5, a: [{b: 1}, {b: [[1], 2]}]},
+ {_id: 6, a: [{b: [1, 2]}, {b: [3, [1]]}]},
+ ];
+
+ runTest_SingleForeignRecord({
+ testDescription: "Nested arrays with the same value as another key value in local",
+ localRecords: docs,
+ localField: "a.b",
+ foreignRecord: {_id: 0, a: 1},
+ foreignField: "a",
+ idsExpectedToMatch: [0, 1, 2, 3, 4, 5, 6]
+ });
+ })();
+})();
})();
diff --git a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js
index e728b817acc..4710a162cdb 100644
--- a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js
+++ b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js
@@ -89,6 +89,13 @@ function runTest_SingleForeignRecord(
const results = localColl.aggregate(pipeline, aggOptions).toArray();
const explain = localColl.explain().aggregate(pipeline, aggOptions);
+ // The foreign record should never duplicate in the results (e.g. see SERVER-66119). That is,
+ // the "matched" field should either be an empty array or contain a single element.
+ for (let i = 0; i < results.length; i++) {
+ assert(results[i].matched.length < 2,
+ testDescription + " Found duplicated match in " + tojson(results[i]));
+ }
+
// Build the array of ids for the results that have non-empty array in the "matched" field.
const matchedIds = results
.filter(function(x) {
diff --git a/src/mongo/db/query/sbe_stage_builder_lookup.cpp b/src/mongo/db/query/sbe_stage_builder_lookup.cpp
index 15ebf0a16ad..4b62228edde 100644
--- a/src/mongo/db/query/sbe_stage_builder_lookup.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_lookup.cpp
@@ -473,18 +473,19 @@ std::pair<SlotId /* matched docs */, std::unique_ptr<sbe::PlanStage>> buildForei
auto [foreignValueSlot, currentRootStage] =
buildForeignKeysStream(foreignRecordSlot, foreignFieldName, nodeId, slotIdGenerator);
- currentRootStage = makeLimitTree(
- makeS<FilterStage<false>>(std::move(currentRootStage),
- collatorSlot ? makeFunction("collIsMember",
- makeVariable(*collatorSlot),
- makeVariable(foreignValueSlot),
- makeVariable(localKeysSetSlot))
- : makeFunction("isMember",
- makeVariable(foreignValueSlot),
- makeVariable(localKeysSetSlot)),
- nodeId),
- nodeId,
- 1 /* take the foreign record once, even if multiple key values match */);
+ currentRootStage =
+ makeLimitTree(makeS<FilterStage<false /*IsConst*/>>(
+ std::move(currentRootStage),
+ collatorSlot ? makeFunction("collIsMember",
+ makeVariable(*collatorSlot),
+ makeVariable(foreignValueSlot),
+ makeVariable(localKeysSetSlot))
+ : makeFunction("isMember",
+ makeVariable(foreignValueSlot),
+ makeVariable(localKeysSetSlot)),
+ nodeId),
+ nodeId,
+ 1 /* take the foreign record once, even if multiple key values match */);
currentRootStage = makeS<LoopJoinStage>(std::move(foreignStage) /* outer */,
std::move(currentRootStage) /* inner */,
@@ -657,8 +658,9 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage(
// - Array values:
// a. If array is empty, [Undefined, Undefined]
// b. If array is NOT empty, [array[0], array[0]] (point interval composed from the first
- // array element)
- // - All other types, single point interval [value, value]
+ // array element). This is needed to match {_id: 0, a: [[1, 2]]} to {_id: 0, b: [1, 2]}.
+ // - All other types, including array itself as a value, single point interval [value, value].
+ // This is needed for arrays to match {_id: 1, a: [[1, 2]]} to {_id: 0, b: [[1, 2], 42]}.
//
// To implement these rules, we use the union stage:
// union pointValue [
@@ -669,7 +671,7 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage(
// coscan
// ,
// // Branch 2
- // cfilter isArray(rawValue)
+ // filter isArray(rawValue) && !isMember(pointValue, localKeyValueSet)
// project pointValue = fillEmpty(
// getElement(rawValue, 0),
// Undefined
@@ -690,7 +692,7 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage(
nodeId,
nullBranchOutput,
makeConstant(TypeTags::bsonUndefined, 0));
- nullBranch = makeS<FilterStage<true>>(
+ nullBranch = makeS<FilterStage<true /*IsConst*/>>(
std::move(nullBranch), makeFunction("isNull", makeVariable(singleLocalValueSlot)), nodeId);
auto arrayBranchOutput = slotIdGenerator.generate();
@@ -703,10 +705,15 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage(
makeVariable(singleLocalValueSlot),
makeConstant(TypeTags::NumberInt32, 0)),
makeConstant(TypeTags::bsonUndefined, 0)));
- arrayBranch =
- makeS<FilterStage<true>>(std::move(arrayBranch),
- makeFunction("isArray", makeVariable(singleLocalValueSlot)),
- nodeId);
+ auto shouldProduceSeekForArray =
+ makeBinaryOp(EPrimBinary::logicAnd,
+ makeFunction("isArray", makeVariable(singleLocalValueSlot)),
+ makeUnaryOp(EPrimUnary::logicNot,
+ makeFunction("isMember",
+ makeVariable(arrayBranchOutput),
+ makeVariable(localKeysSetSlot))));
+ arrayBranch = makeS<FilterStage<false /*IsConst*/>>(
+ std::move(arrayBranch), std::move(shouldProduceSeekForArray), nodeId);
auto valueBranchOutput = slotIdGenerator.generate();
auto valueBranch = makeProjectStage(makeLimitCoScanTree(nodeId, 1),