summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Lapkov <nikita.lapkov@mongodb.com>2022-03-31 14:12:24 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-03-31 15:12:22 +0000
commit857392e9d225d44e2af5325e84c7ba3ad68fad56 (patch)
tree415ef36ac27b03a74c64a5415f93dc35eb5f0e7d
parent123d582388017c273e0939e644026bce20184739 (diff)
downloadmongo-857392e9d225d44e2af5325e84c7ba3ad68fad56.tar.gz
SERVER-63574 Support all types in the index join strategy of $lookup
-rw-r--r--buildscripts/resmokeconfig/fully_disabled_feature_flags.yml2
-rw-r--r--jstests/core/index_stats.js12
-rw-r--r--jstests/noPassthrough/lookup_pushdown.js37
-rw-r--r--jstests/noPassthrough/lookup_pushdown_semantics.js194
-rw-r--r--jstests/sharding/query/lookup.js6
-rw-r--r--src/mongo/db/exec/sbe/values/bson.h4
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.cpp90
-rw-r--r--src/mongo/db/query/planner_analysis.cpp6
-rw-r--r--src/mongo/db/query/query_feature_flags.idl5
-rw-r--r--src/mongo/db/query/sbe_stage_builder_lookup.cpp243
10 files changed, 481 insertions, 118 deletions
diff --git a/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml b/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml
index 705f7831fc4..fb6b2875116 100644
--- a/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml
+++ b/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml
@@ -8,6 +8,4 @@
# Disable featureFlagRequireTenantID until all paths pass tenant id to TenantNamespace
# and TenantDatabase constructors.
- featureFlagRequireTenantID
-# TODO(SERVER-63574): Remove this feature flag and enable index join in all feature flag variant once all types of local fields are supported.
-- featureFlagSBELookupPushdownIndexJoin
- featureFlagAutoParameterization
diff --git a/jstests/core/index_stats.js b/jstests/core/index_stats.js
index 28d36c9be3b..70718c6f4d9 100644
--- a/jstests/core/index_stats.js
+++ b/jstests/core/index_stats.js
@@ -235,14 +235,10 @@ if (!checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
assert.eq(2,
getUsageCount("_id_", foreignCollection),
"Expected each lookup to be tracked as an index use");
-} else if (checkSBEEnabled(db, ["featureFlagSBELookupPushdownIndexJoin"])) {
+} else {
assert.eq(1,
getUsageCount("_id_", foreignCollection),
"Expected the index join lookup to be tracked as a single index use");
-} else {
- assert.eq(0,
- getUsageCount("_id_", foreignCollection),
- "Expected the nested loop join lookup have no index use");
}
//
@@ -277,14 +273,10 @@ if (!checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
assert.eq(2,
getUsageCount("_id_", foreignCollection),
"Expected each lookup to be tracked as an index use");
-} else if (checkSBEEnabled(db, ["featureFlagSBELookupPushdownIndexJoin"])) {
+} else {
assert.eq(1,
getUsageCount("_id_", foreignCollection),
"Expected the index join lookup to be tracked as a single index use");
-} else {
- assert.eq(0,
- getUsageCount("_id_", foreignCollection),
- "Expected the nested loop join lookup have no index use");
}
const explain = col.explain().aggregate(pipeline);
assert(getAggPlanStage(explain, "$cursor"),
diff --git a/jstests/noPassthrough/lookup_pushdown.js b/jstests/noPassthrough/lookup_pushdown.js
index 52b2a1d26a8..46f11b0a220 100644
--- a/jstests/noPassthrough/lookup_pushdown.js
+++ b/jstests/noPassthrough/lookup_pushdown.js
@@ -14,14 +14,10 @@ const JoinAlgorithm = {
NLJ: 1,
INLJ: 2,
HJ: 3,
- // These joins aren't implemented yet and will throw errors with the corresponding codes.
- INLJHashedIndex: 6357203,
};
// Standalone cases.
-const conn = MongoRunner.runMongod({
- setParameter: {featureFlagSBELookupPushdown: true, featureFlagSBELookupPushdownIndexJoin: true}
-});
+const conn = MongoRunner.runMongod({setParameter: "featureFlagSBELookupPushdown=true"});
assert.neq(null, conn, "mongod was unable to start up");
const name = "lookup_pushdown";
const foreignCollName = "foreign_lookup_pushdown";
@@ -101,14 +97,6 @@ function runTest(coll,
assert.eq(eqLookupNodes.length,
0,
"there should be no lowered EQ_LOOKUP stages; got " + tojson(explain));
- } else if (expectedJoinAlgorithm === JoinAlgorithm.INLJHashedIndex) {
- const result = assert.commandFailedWithCode(response, expectedJoinAlgorithm);
- if (errMsgRegex) {
- const errorMessage = result.errmsg;
- assert(errMsgRegex.test(errorMessage),
- "Error message '" + errorMessage + "' did not match the RegEx '" + errMsgRegex +
- "'");
- }
} else {
assert.commandWorked(response);
const explain = coll.explain().aggregate(pipeline, aggOptions);
@@ -297,13 +285,11 @@ let view = db[viewName];
// join strategy should be used.
(function testIndexNestedLoopJoinHashedIndex() {
assert.commandWorked(foreignColl.dropIndexes());
- assert.commandWorked(foreignColl.createIndex({b: 'hashed'}));
+ assert.commandWorked(foreignColl.createIndex({b: "hashed"}));
runTest(coll,
[{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
- JoinAlgorithm.INLJHashedIndex /* expectedJoinAlgorithm */,
- null /* indexKeyPattern */,
- {} /* aggOptions */,
- /b_hashed//* errMsgRegex */);
+ JoinAlgorithm.INLJ /* expectedJoinAlgorithm */,
+ {b: "hashed"} /* indexKeyPattern */);
assert.commandWorked(foreignColl.dropIndexes());
})();
@@ -361,13 +347,11 @@ let view = db[viewName];
(function testFewerComponentsFavoredOverIndexType() {
assert.commandWorked(foreignColl.dropIndexes());
assert.commandWorked(foreignColl.createIndex({b: 1, c: 1, d: 1}));
- assert.commandWorked(foreignColl.createIndex({b: 'hashed'}));
+ assert.commandWorked(foreignColl.createIndex({b: "hashed"}));
runTest(coll,
[{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
- JoinAlgorithm.INLJHashedIndex /* expectedJoinAlgorithm */,
- null /* indexKeyPattern */,
- {} /* aggOptions */,
- /b_hashed//* errMsgRegex */);
+ JoinAlgorithm.INLJ /* expectedJoinAlgorithm */,
+ {b: "hashed"} /* indexKeyPattern */);
assert.commandWorked(foreignColl.dropIndexes());
}());
@@ -683,12 +667,7 @@ MongoRunner.stopMongod(conn);
const st = new ShardingTest({
shards: 2,
mongos: 1,
- other: {
- shardOptions: {
- setParameter:
- {featureFlagSBELookupPushdown: true, featureFlagSBELookupPushdownIndexJoin: true}
- }
- }
+ other: {shardOptions: {setParameter: "featureFlagSBELookupPushdown=true"}}
});
db = st.s.getDB(name);
diff --git a/jstests/noPassthrough/lookup_pushdown_semantics.js b/jstests/noPassthrough/lookup_pushdown_semantics.js
index 2cb6de34020..1cae12893e7 100644
--- a/jstests/noPassthrough/lookup_pushdown_semantics.js
+++ b/jstests/noPassthrough/lookup_pushdown_semantics.js
@@ -8,9 +8,7 @@ load("jstests/libs/sbe_util.js"); // For 'checkSBEEnabled()'.
load("jstests/aggregation/extras/utils.js");
// Standalone cases.
-const conn = MongoRunner.runMongod({
- setParameter: {featureFlagSBELookupPushdown: true, featureFlagSBELookupPushdownIndexJoin: true}
-});
+const conn = MongoRunner.runMongod({setParameter: "featureFlagSBELookupPushdown=true"});
assert.neq(null, conn, "mongod was unable to start up");
const db = conn.getDB("lookup_pushdown");
if (!checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
@@ -48,6 +46,7 @@ function runTest_SingleForeignRecord({
if (foreignIndex) {
assert.commandWorked(foreignColl.createIndex(foreignIndex));
+ testDescription += ` (foreign index ${tojson(foreignIndex)})`;
}
const results = localColl.aggregate([{
@@ -98,6 +97,7 @@ function runTest_SingleLocalRecord({
if (foreignIndex) {
assert.commandWorked(foreignColl.createIndex(foreignIndex));
+ testDescription += ` (foreign index ${tojson(foreignIndex)})`;
}
const results = localColl.aggregate([{
@@ -148,12 +148,26 @@ function runTest_SingleLocalRecord({
assert.eq(results[0].matched, []);
})();
-(function testMatchingTopLevelFieldToScalar() {
+function testMatchingTopLevelFieldToNonArray(indexType) {
+ // NOTE: There is no shell equivalent for the following BSON types:
+ // - Code (13)
+ // - Symbol (14)
+ // - CodeWScope (15)
const docs = [
{_id: 0, a: NumberInt(0)},
{_id: 1, a: 3.14},
{_id: 2, a: NumberDecimal(3.14)},
{_id: 3, a: "abc"},
+ {_id: 4, a: {b: 1, c: 2, d: 3}},
+ {_id: 5, a: true},
+ {_id: 6, a: false},
+ {_id: 7, a: new ISODate("2022-01-01T00:00:00.00Z")},
+ {_id: 8, a: new Timestamp(1, 123)},
+ {_id: 9, a: new ObjectId("0102030405060708090A0B0C")},
+ {_id: 10, a: new BinData(0, "BBBBBBBBBBBBBBBBBBBBBBBBBBBB")},
+ {_id: 11, a: /hjkl/},
+ {_id: 12, a: /hjkl/g},
+ {_id: 13, a: new DBRef("collection", "id", "database")},
];
docs.forEach(doc => {
@@ -164,7 +178,7 @@ function runTest_SingleLocalRecord({
localField: "a",
foreignRecord: {b: doc.a},
foreignField: "b",
- foreignIndex: {b: 1},
+ foreignIndex: {b: indexType},
idsExpectedToMatch: [doc._id]
});
runTest_SingleLocalRecord({
@@ -174,7 +188,7 @@ function runTest_SingleLocalRecord({
localField: "b",
foreignRecords: docs,
foreignField: "a",
- foreignIndex: {a: 1},
+ foreignIndex: {a: indexType},
idsExpectedToMatch: [doc._id]
});
});
@@ -186,7 +200,7 @@ function runTest_SingleLocalRecord({
localField: "a",
foreignRecord: {b: 'xxx'},
foreignField: "b",
- foreignIndex: {b: 1},
+ foreignIndex: {b: indexType},
idsExpectedToMatch: []
});
runTest_SingleLocalRecord({
@@ -196,10 +210,172 @@ function runTest_SingleLocalRecord({
localField: "b",
foreignRecords: docs,
foreignField: "a",
- foreignIndex: {a: 1},
+ foreignIndex: {a: indexType},
idsExpectedToMatch: []
});
-})();
+}
+
+testMatchingTopLevelFieldToNonArray(1 /* indexType */);
+testMatchingTopLevelFieldToNonArray(-1 /* indexType */);
+testMatchingTopLevelFieldToNonArray("hashed" /* indexType */);
+
+function testMatchingTopLevelFieldToNullAndUndefined(indexType) {
+ const foreignRecords = [
+ {_id: 0, a: null},
+ {_id: 1, a: undefined},
+ ];
+ // We do not currently support hashed indexes on the collections with arrays.
+ if (indexType != "hashed") {
+ foreignRecords.push({_id: 2, a: []}, {_id: 3, a: [[]]});
+ }
+
+ runTest_SingleLocalRecord({
+ testDescription: "Null should match only to null",
+ localRecord: {b: null},
+ localField: "b",
+ foreignRecords,
+ foreignField: "a",
+ foreignIndex: {a: indexType},
+ idsExpectedToMatch: [0]
+ });
+}
+
+testMatchingTopLevelFieldToNullAndUndefined(1 /* indexType */);
+testMatchingTopLevelFieldToNullAndUndefined(-1 /* indexType */);
+testMatchingTopLevelFieldToNullAndUndefined("hashed" /* indexType */);
+
+function testMatchingTopLevelFieldToArrays(indexType) {
+ runTest_SingleLocalRecord({
+ testDescription: "Scalar should match arrays containing that value",
+ localRecord: {b: 1},
+ localField: "b",
+ foreignRecords: [
+ {_id: 0, a: 1},
+ {_id: 1, a: [1]},
+ {_id: 2, a: [1, 2, 3]},
+ {_id: 3, a: [3, 2, 1]},
+ {_id: 4, a: [4, 5, 6]},
+ {_id: 5, a: []},
+ ],
+ foreignField: "a",
+ foreignIndex: {a: indexType},
+ idsExpectedToMatch: [0, 1, 2, 3]
+ });
+
+ runTest_SingleLocalRecord({
+ testDescription: "Empty array should only match to empty array",
+ localRecord: {b: [[]]},
+ localField: "b",
+ foreignRecords: [
+ {_id: 0, a: null},
+ {_id: 1, a: undefined},
+ {_id: 2, a: []},
+ {_id: 3, a: [[]]},
+ {_id: 4, a: [null]},
+ {_id: 5, a: [undefined]},
+ {_id: 6, a: [1]},
+ {_id: 7, a: [1, 2, 3]},
+ ],
+ foreignField: "a",
+ foreignIndex: {a: indexType},
+ idsExpectedToMatch: [2, 3]
+ });
+
+ runTest_SingleLocalRecord({
+ testDescription: "Single element arrays should match only single-element arrays",
+ localRecord: {b: [[1]]},
+ localField: "b",
+ foreignRecords: [
+ {_id: 0, a: 1},
+ {_id: 1, a: [1]},
+ {_id: 2, a: [1, 2, 3]},
+ {_id: 3, a: [3, 2, 1]},
+ {_id: 4, a: [4, 5, 6]},
+ {_id: 5, a: []},
+ ],
+ foreignField: "a",
+ foreignIndex: {a: indexType},
+ idsExpectedToMatch: [1]
+ });
+
+ runTest_SingleLocalRecord({
+ testDescription: "Arrays with multiple elements should only match itself",
+ localRecord: {b: [[1, 2, 3]]},
+ localField: "b",
+ foreignRecords: [
+ {_id: 0, a: 1},
+ {_id: 1, a: [1]},
+ {_id: 2, a: [1, 2, 3]},
+ {_id: 3, a: [3, 2, 1]},
+ {_id: 4, a: [4, 5, 6]},
+ {_id: 5, a: []},
+ ],
+ foreignField: "a",
+ foreignIndex: {a: indexType},
+ idsExpectedToMatch: [2]
+ });
+
+ runTest_SingleLocalRecord({
+ testDescription: "Array queries must work on hashed indexes",
+ localRecord: {b: [[1, 2, 3]]},
+ localField: "b",
+ foreignRecords: [
+ {_id: 0, a: 1},
+ ],
+ foreignField: "a",
+ foreignIndex: {a: "hashed"},
+ idsExpectedToMatch: []
+ });
+}
+
+testMatchingTopLevelFieldToArrays(1 /* indexType */);
+testMatchingTopLevelFieldToArrays(-1 /* indexType */);
+
+function testMatchingWithNestedPaths(indexType) {
+ const foreignRecords = [
+ {_id: 0, a: {b: {c: 1}}},
+ {_id: 1, a: {no_b: 1}},
+ {_id: 2, a: {b: {no_c: 1}}},
+ ];
+ const idsExpectedToMatch = [0];
+
+ // We do not currently support hashed indexes on the collections with arrays.
+ if (indexType != "hashed") {
+ foreignRecords.push({_id: 3, a: {b: {c: [1]}}},
+ {_id: 4, a: [{b: [{c: 1}, {c: 2}]}, {b: [{c: 3}, {c: 4}]}]});
+ idsExpectedToMatch.push(3, 4);
+ }
+
+ runTest_SingleLocalRecord({
+ testDescription: "Index join with nested path in foreign field",
+ localRecord: {b: 1},
+ localField: "b",
+ foreignRecords,
+ foreignField: "a.b.c",
+ foreignIndex: {"a.b.c": indexType},
+ idsExpectedToMatch,
+ });
+
+ runTest_SingleForeignRecord({
+ testDescription: "Index join with nested path in local field",
+ localRecords: [
+ {_id: 0, a: {b: {c: 1}}},
+ {_id: 1, a: {b: {c: [1]}}},
+ {_id: 2, a: [{b: [{c: 1}, {c: 2}]}, {b: [{c: 3}, {c: 4}]}]},
+ {_id: 3, a: {no_b: 1}},
+ {_id: 4, a: {b: {no_c: 1}}},
+ ],
+ localField: "a.b.c",
+ foreignRecord: {b: 1},
+ foreignField: "b",
+ foreignIndex: {b: indexType},
+ idsExpectedToMatch: [0, 1, 2]
+ });
+}
+
+testMatchingWithNestedPaths(1 /* indexType */);
+testMatchingWithNestedPaths(-1 /* indexType */);
+testMatchingWithNestedPaths("hashed" /* indexType */);
MongoRunner.stopMongod(conn);
}());
diff --git a/jstests/sharding/query/lookup.js b/jstests/sharding/query/lookup.js
index f111d6d3773..13e18798c49 100644
--- a/jstests/sharding/query/lookup.js
+++ b/jstests/sharding/query/lookup.js
@@ -3,7 +3,7 @@
(function() {
"use strict";
-load("jstests/aggregation/extras/utils.js"); // For assertErrorCode.
+load("jstests/aggregation/extras/utils.js"); // For assertErrorCode and arrayEq.
load("jstests/libs/fixture_helpers.js"); // For isSharded.
load("jstests/libs/discover_topology.js"); // For findDataBearingNodes.
@@ -28,8 +28,8 @@ function compareId(a, b) {
// Helper for testing that pipeline returns correct set of results.
function testPipeline(pipeline, expectedResult, collection) {
- assert.eq(collection.aggregate(pipeline).toArray().sort(compareId),
- expectedResult.sort(compareId));
+ arrayEq(collection.aggregate(pipeline).toArray().sort(compareId),
+ expectedResult.sort(compareId));
}
function runTest(coll, from, thirdColl, fourthColl) {
diff --git a/src/mongo/db/exec/sbe/values/bson.h b/src/mongo/db/exec/sbe/values/bson.h
index f5c429d6890..0965ba11421 100644
--- a/src/mongo/db/exec/sbe/values/bson.h
+++ b/src/mongo/db/exec/sbe/values/bson.h
@@ -63,6 +63,10 @@ void appendValueToBsonObj(ObjBuilder& builder,
StringData name,
value::TypeTags tag,
value::Value val);
+
+template <class ArrayBuilder>
+void convertToBsonObj(ArrayBuilder& builder, value::ArrayEnumerator arr);
+
} // namespace bson
} // namespace sbe
} // namespace mongo
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index 93614841ba2..23446a0b049 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -1663,7 +1663,7 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinNewKeyString(Ar
uint32_t orderingBits = value::numericCast<int32_t>(tagOrdering, valOrdering);
BSONObjBuilder bb;
for (size_t i = 0; orderingBits != 0 && i < arity - 3u; ++i, orderingBits >>= 1) {
- bb.append(""_sd, (orderingBits & 1) ? 1 : 0);
+ bb.append(""_sd, (orderingBits & 1) ? -1 : 1);
}
KeyString::HeapBuilder kb{ksVersion, Ordering::make(bb.done())};
@@ -1675,6 +1675,9 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinNewKeyString(Ar
auto tagCopy = tag;
switch (tag) {
+ case value::TypeTags::Boolean:
+ kb.appendBool(value::bitcastTo<bool>(val));
+ break;
case value::TypeTags::NumberInt32:
kb.appendNumberInt(value::bitcastTo<int32_t>(val));
break;
@@ -1692,6 +1695,27 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinNewKeyString(Ar
case value::TypeTags::bsonString:
kb.appendString(value::getStringView(tag, val));
break;
+ case value::TypeTags::Null:
+ kb.appendNull();
+ break;
+ case value::TypeTags::bsonUndefined:
+ kb.appendUndefined();
+ break;
+ case value::TypeTags::bsonJavascript:
+ kb.appendCode(value::getBsonJavascriptView(val));
+ break;
+ case value::TypeTags::Date: {
+ auto milliseconds = value::bitcastTo<int64_t>(val);
+ auto duration = stdx::chrono::duration<int64_t, std::milli>(milliseconds);
+ auto date = Date_t::fromDurationSinceEpoch(duration);
+ kb.appendDate(date);
+ break;
+ }
+ case value::TypeTags::Timestamp: {
+ Timestamp ts{value::bitcastTo<uint64_t>(val)};
+ kb.appendTimestamp(ts);
+ break;
+ }
case value::TypeTags::MinKey: {
BSONObjBuilder bob;
bob.appendMinKey("");
@@ -1704,6 +1728,70 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinNewKeyString(Ar
kb.appendBSONElement(bob.obj().firstElement(), nullptr);
break;
}
+ case value::TypeTags::bsonArray: {
+ BSONObj bson{value::getRawPointerView(val)};
+ kb.appendArray(BSONArray(BSONObj(bson)));
+ break;
+ }
+ case value::TypeTags::Array:
+ case value::TypeTags::ArraySet: {
+ value::ArrayEnumerator enumerator{tag, val};
+ BSONArrayBuilder arrayBuilder;
+ bson::convertToBsonObj(arrayBuilder, enumerator);
+ kb.appendArray(arrayBuilder.arr());
+ break;
+ }
+ case value::TypeTags::bsonObject: {
+ BSONObj bson{value::getRawPointerView(val)};
+ kb.appendObject(bson);
+ break;
+ }
+ case value::TypeTags::Object: {
+ BSONObjBuilder objBuilder;
+ bson::convertToBsonObj(objBuilder, value::getObjectView(val));
+ kb.appendObject(objBuilder.obj());
+ break;
+ }
+ case value::TypeTags::ObjectId: {
+ auto oid = OID::from(value::getObjectIdView(val)->data());
+ kb.appendOID(oid);
+ break;
+ }
+ case value::TypeTags::bsonObjectId: {
+ auto oid = OID::from(value::getRawPointerView(val));
+ kb.appendOID(oid);
+ break;
+ }
+ case value::TypeTags::bsonSymbol: {
+ auto symbolView = value::getStringOrSymbolView(tag, val);
+ kb.appendSymbol(symbolView);
+ break;
+ }
+ case value::TypeTags::bsonBinData: {
+ auto data = value::getBSONBinData(tag, val);
+ auto length = static_cast<int>(value::getBSONBinDataSize(tag, val));
+ auto type = value::getBSONBinDataSubtype(tag, val);
+ BSONBinData binData{data, length, type};
+ kb.appendBinData(binData);
+ break;
+ }
+ case value::TypeTags::bsonRegex: {
+ auto sbeRegex = value::getBsonRegexView(val);
+ BSONRegEx regex{sbeRegex.pattern, sbeRegex.flags};
+ kb.appendRegex(regex);
+ break;
+ }
+ case value::TypeTags::bsonCodeWScope: {
+ auto sbeCodeWScope = value::getBsonCodeWScopeView(val);
+ BSONCodeWScope codeWScope{sbeCodeWScope.code, BSONObj(sbeCodeWScope.scope)};
+ kb.appendCodeWString(codeWScope);
+ break;
+ }
+ case value::TypeTags::bsonDBPointer: {
+ auto dbPointer = value::getBsonDBPointerView(val);
+ BSONDBRef dbRef{dbPointer.ns, OID::from(dbPointer.id)};
+ kb.appendDBRef(dbRef);
+ }
default:
uasserted(4822802, str::stream() << "Unsuppored key string type: " << tagCopy);
break;
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index 2f633c3c79b..b825199e964 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -625,10 +625,6 @@ bool isEligibleForHashJoin(const SecondaryCollectionInfo& foreignCollInfo) {
internalQueryCollectionMaxStorageSizeBytesToChooseHashJoin.load();
}
-bool isEligibleForIndexedLoopJoin() {
- return feature_flags::gFeatureFlagSBELookupPushdownIndexJoin.isEnabledAndIgnoreFCV();
-}
-
// static
void QueryPlannerAnalysis::determineLookupStrategy(
EqLookupNode* eqLookupNode,
@@ -672,7 +668,7 @@ void QueryPlannerAnalysis::determineLookupStrategy(
return boost::none;
}();
- if (foreignIndex && isEligibleForIndexedLoopJoin()) {
+ if (foreignIndex) {
eqLookupNode->lookupStrategy = EqLookupNode::LookupStrategy::kIndexedLoopJoin;
eqLookupNode->idxEntry = foreignIndex;
} else if (allowDiskUse && isEligibleForHashJoin(foreignCollItr->second)) {
diff --git a/src/mongo/db/query/query_feature_flags.idl b/src/mongo/db/query/query_feature_flags.idl
index 2da74b32cf7..7eca2ef9aef 100644
--- a/src/mongo/db/query/query_feature_flags.idl
+++ b/src/mongo/db/query/query_feature_flags.idl
@@ -133,11 +133,6 @@ feature_flags:
cpp_varname: gFeatureFlagSBELookupPushdown
default: false
- featureFlagSBELookupPushdownIndexJoin:
- description: "Feature flag for allowing SBE $lookup pushdown to execute query using index join strategy"
- cpp_varname: gFeatureFlagSBELookupPushdownIndexJoin
- default: false
-
featureFlagSearchShardedFacets:
description: "Enable use of $$SEARCH_META on sharded collections"
cpp_varname: gFeatureFlagSearchShardedFacets
diff --git a/src/mongo/db/query/sbe_stage_builder_lookup.cpp b/src/mongo/db/query/sbe_stage_builder_lookup.cpp
index bbc74bd7f02..780fa3b10e9 100644
--- a/src/mongo/db/query/sbe_stage_builder_lookup.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_lookup.cpp
@@ -38,6 +38,7 @@
#include "mongo/db/exec/sbe/stages/hash_agg.h"
#include "mongo/db/exec/sbe/stages/hash_lookup.h"
#include "mongo/db/exec/sbe/stages/ix_scan.h"
+#include "mongo/db/exec/sbe/stages/limit_skip.h"
#include "mongo/db/exec/sbe/stages/loop_join.h"
#include "mongo/db/exec/sbe/stages/scan.h"
#include "mongo/db/exec/sbe/stages/union.h"
@@ -52,6 +53,8 @@
#include "mongo/db/query/util/make_data_structure.h"
#include "mongo/logv2/log.h"
+#include "mongo/db/query/sbe_stage_builder_filter.h"
+
namespace mongo::stage_builder {
/**
* Helpers for building $lookup.
@@ -494,36 +497,48 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildLookupResultObject(
/*
* Build $lookup stage using index join strategy. Below is an example plan for the aggregation
* [{$lookup: {localField: "a", foreignField: "b"}}] with an index {b: 1} on the foreign
- * collection.
+ * collection. Note that parts reading the local values and constructing the resulting document are
+ * omitted.
*
- * nlj [localRecord]
+ * nlj [foreignDocument] [foreignDocument]
* left
- * project [localField = getField (localRecord, "a")]
- * scan localRecord
- * right
- * limit 1
- * union [foreignGroup] [
- * group [] [foreignGroupOrNothing = addToArray (foreignRecord)]
- * nlj []
- * left
- * nlj [indexId, indexKeyPattern]
- * left
- * project [lowKey = ks (localField, _, _, kExclusiveBefore),
- * highKey = ks (localField, _, _, kExclusiveAfter),
- * indexId = "b_1",
- * indexKeyPattern = {"b" : 1}]
- * limit 1
- * coscan
- * right
- * ixseek lowKey highKey indexKey foreignRecordId snapshotId _ @"b_1"
- * right
- * limit 1
- * seek foreignRecordId foreignRecord _ snapshotId indexId indexKey indexKeyPattern
- * ,
- * project [emptyArray = []]
+ * nlj
+ * left
+ * nlj [lowKey, highKey]
+ * left
+ * nlj
+ * left
+ * unwind localKeySet localValue
* limit 1
* coscan
- * ]
+ * right
+ * project lowKey = ks (1, 0, valueForIndexBounds, 1),
+ * highKey = ks (1, 0, valueForIndexBounds, 2)
+ * union [valueForIndexBounds] [
+ * cfilter {isNull (localValue)}
+ * project [valueForIndexBounds = undefined]
+ * limit 1
+ * coscan
+ * ,
+ * cfilter {isArray (localValue)}
+ * project [valueForIndexBounds = fillEmpty (getElement (localValue, 0), undefined)]
+ * limit 1
+ * coscan
+ * ,
+ * project [valueForIndexBounds = localValue]
+ * limit 1
+ * coscan
+ * ]
+ * right
+ * ixseek lowKey highKey recordId @"b_1"
+ * right
+ * limit 1
+ * seek s21 foreignDocument recordId @"foreign collection"
+ * right
+ * limit 1
+ * filter {isMember (foreignValue, localValueSet)}
+ * // Below is the tree performing path traversal on the 'foreignDocument' and producing value
+ * // into 'foreignValue'.
*
*/
std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage(
@@ -531,12 +546,15 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage(
std::unique_ptr<sbe::PlanStage> localStage,
SlotId localRecordSlot,
const FieldPath& localFieldName,
+ const FieldPath& foreignFieldName,
const CollectionPtr& foreignColl,
const IndexEntry& index,
StringMap<const IndexAccessMethod*>& iamMap,
PlanYieldPolicySBE* yieldPolicy,
const PlanNodeId nodeId,
- SlotIdGenerator& slotIdGenerator) {
+ SlotIdGenerator& slotIdGenerator,
+ FrameIdGenerator& frameIdGenerator,
+ RuntimeEnvironment* env) {
const auto foreignCollUUID = foreignColl->uuid();
const auto indexName = index.identifier.catalogName;
const auto indexDescriptor =
@@ -559,15 +577,98 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage(
nodeId,
slotIdGenerator);
- // 'localFieldSlot' is a set even if it contains a single item. Extract this single value until
- // SERVER-63574 is implemented.
- SlotId localFieldSlot = slotIdGenerator.generate();
- auto localFieldStage = makeS<UnwindStage>(std::move(localKeysSetStage) /* child stage */,
- localKeysSetSlot,
- localFieldSlot,
- slotIdGenerator.generate() /* outIndex */,
- true /* preserveNullAndEmptyArrays */,
- nodeId);
+ // Unwind local keys one by one into 'singleLocalValueSlot'.
+ auto singleLocalValueSlot = slotIdGenerator.generate();
+ auto unwindLocalKeysStage = makeS<UnwindStage>(makeLimitCoScanTree(nodeId, 1),
+ localKeysSetSlot /* inSlot */,
+ singleLocalValueSlot /* outField */,
+ slotIdGenerator.generate() /* outIndex */,
+ true /* preserveNullAndEmptyArrays */,
+ nodeId);
+
+ // We need to lookup value in 'singleLocalValueSlot' in the index defined on the foreign
+ // collection. To do this, we need to generate set of point intervals corresponding to this
+ // value. Single value can correspond to multiple point intervals:
+ // - Null values:
+ // a. [Null, Null]
+ // b. [Undefined, Undefined]
+ // - Array values:
+ // a. If array is empty, [Undefined, Undefined]
+ // b. If array is NOT empty, [array[0], array[0]] (point interval composed from the first
+ // array element)
+ // - All other types, single point interval [value, value]
+ //
+ // To implement these rules, we use the union stage:
+ // union pointValue [
+ // // Branch 1
+ // cfilter isNull(rawValue)
+ // project pointValue = Undefined
+ // limit 1
+ // coscan
+ // ,
+ // // Branch 2
+ // cfilter isArray(rawValue)
+ // project pointValue = fillEmpty(
+ // getElement(rawValue, 0),
+ // Undefined
+ // )
+ // limit 1
+ // coscan
+ // ,
+ // // Branch 3
+ // project pointValue = rawValue
+ // limit 1
+ // coscan
+ // ]
+ //
+ // For null values, only branches (1) and (3) produce values. For array values, only branches
+ // (2) and (3) produce values. For all other types, only (3) produces value.
+ auto nullBranchOutput = slotIdGenerator.generate();
+ auto nullBranch = makeProjectStage(makeLimitCoScanTree(nodeId, 1),
+ nodeId,
+ nullBranchOutput,
+ makeConstant(TypeTags::bsonUndefined, 0));
+ nullBranch = makeS<FilterStage<true>>(
+ std::move(nullBranch), makeFunction("isNull", makeVariable(singleLocalValueSlot)), nodeId);
+
+ auto arrayBranchOutput = slotIdGenerator.generate();
+ auto arrayBranch =
+ makeProjectStage(makeLimitCoScanTree(nodeId, 1),
+ nodeId,
+ arrayBranchOutput,
+ makeFunction("fillEmpty",
+ makeFunction("getElement",
+ makeVariable(singleLocalValueSlot),
+ makeConstant(TypeTags::NumberInt32, 0)),
+ makeConstant(TypeTags::bsonUndefined, 0)));
+ arrayBranch =
+ makeS<FilterStage<true>>(std::move(arrayBranch),
+ makeFunction("isArray", makeVariable(singleLocalValueSlot)),
+ nodeId);
+
+ auto valueBranchOutput = slotIdGenerator.generate();
+ auto valueBranch = makeProjectStage(makeLimitCoScanTree(nodeId, 1),
+ nodeId,
+ valueBranchOutput,
+ makeVariable(singleLocalValueSlot));
+
+ auto valueForIndexBounds = slotIdGenerator.generate();
+ auto valueGeneratorStage = makeS<UnionStage>(
+ makeSs(std::move(nullBranch), std::move(arrayBranch), std::move(valueBranch)),
+ makeVector(makeSV(nullBranchOutput), makeSV(arrayBranchOutput), makeSV(valueBranchOutput)),
+ makeSV(valueForIndexBounds),
+ nodeId);
+
+ // For hashed indexes, we need to hash value before computing keystrings.
+ if (index.type == INDEX_HASHED) {
+ auto rawValueSlot = valueForIndexBounds;
+ valueForIndexBounds = slotIdGenerator.generate();
+ valueGeneratorStage =
+ makeProjectStage(std::move(valueGeneratorStage),
+ nodeId,
+ valueForIndexBounds,
+ makeFunction("shardHash", makeVariable(rawValueSlot)));
+ }
// Calculate the low key and high key of each individual local field. They are stored in
// 'lowKeySlot' and 'highKeySlot', respectively. These two slots will be made available in
@@ -578,24 +679,23 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage(
auto indexIdSlot = slotIdGenerator.generate();
auto indexKeyPatternSlot = slotIdGenerator.generate();
auto [_, indexKeyPatternValue] =
- sbe::value::copyValue(sbe::value::TypeTags::bsonObject,
- sbe::value::bitcastFrom<const char*>(index.keyPattern.objdata()));
+ copyValue(TypeTags::bsonObject, bitcastFrom<const char*>(index.keyPattern.objdata()));
auto indexBoundKeyStage = makeProjectStage(
- makeLimitCoScanTree(nodeId, 1),
+ std::move(valueGeneratorStage),
nodeId,
lowKeySlot,
makeFunction(
"ks"_sd,
makeConstant(value::TypeTags::NumberInt64, static_cast<int64_t>(indexVersion)),
makeConstant(value::TypeTags::NumberInt32, indexOrdering.getBits()),
- makeVariable(localFieldSlot),
+ makeVariable(valueForIndexBounds),
makeConstant(value::TypeTags::NumberInt64,
static_cast<int64_t>(KeyString::Discriminator::kExclusiveBefore))),
highKeySlot,
makeFunction("ks"_sd,
makeConstant(value::TypeTags::NumberInt64, static_cast<int64_t>(indexVersion)),
makeConstant(value::TypeTags::NumberInt32, indexOrdering.getBits()),
- makeVariable(localFieldSlot),
+ makeVariable(valueForIndexBounds),
makeConstant(value::TypeTags::NumberInt64,
static_cast<int64_t>(KeyString::Discriminator::kExclusiveAfter))),
indexIdSlot,
@@ -603,6 +703,16 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage(
indexKeyPatternSlot,
makeConstant(value::TypeTags::bsonObject, indexKeyPatternValue));
+ // To ensure that we compute index bounds for all local values, introduce loop join, where
+ // unwinding of local values happens on the right side and index generation happens on the left
+ // side.
+ indexBoundKeyStage = makeS<LoopJoinStage>(std::move(unwindLocalKeysStage),
+ std::move(indexBoundKeyStage),
+ makeSV() /* outerProjects */,
+ makeSV(singleLocalValueSlot) /* outerCorrelated */,
+ nullptr /* predicate */,
+ nodeId);
+
// Perform the index seek based on the 'lowKeySlot' and 'highKeySlot' from the outer side.
// The foreign record id of the seek is stored in 'foreignRecordIdSlot'. We also keep
// 'indexKeySlot' and 'snapshotIdSlot' for the seek stage later to perform consistency
@@ -650,21 +760,50 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage(
makeSV() /* slotsToForward */,
slotIdGenerator);
+ // Some values are encoded with the same value in BTree index, such undefined, null and empty
+ // array. In hashed indexes, hash collisions are possible. We need to double check that the
+ // results returned from the index scan are what we expect. To do that, we traverse the path in
+ // 'foreignFieldName' and check if the set in 'localKeysSetSlot' contains any of the values
+ // returned.
+ auto [foreignValueSlot, foreignValueStage] =
+ buildForeignKeysStream(foreignRecordSlot, foreignFieldName, nodeId, slotIdGenerator);
+
+ // Check if local keys set contains the value from the foreign document.
+ auto foreignValueFilterStage = makeS<FilterStage<false>>(
+ std::move(foreignValueStage),
+ makeFunction("isMember", makeVariable(foreignValueSlot), makeVariable(localKeysSetSlot)),
+ nodeId);
+
+ // Path traversal of the foreign document may produce multiple values. To ensure that the
+ // foreign document is added only once to the resulting array, we put whole path traversal into
+ // the right branch of loop join and add 'limit 1' stage on top.
+ auto foreignValueMatchesStage = makeLimitTree(std::move(foreignValueFilterStage), nodeId, 1);
+
+ auto filteredForeignRecordsStage =
+ makeS<LoopJoinStage>(std::move(scanNljStage) /* outer */,
+ std::move(foreignValueMatchesStage) /* inner */,
+ makeSV(foreignRecordSlot) /* outerProjects */,
+ makeSV(foreignRecordSlot) /* outerCorrelated */,
+ nullptr,
+ nodeId);
+
// Group the matched foreign documents into a list, stored in the 'foreignGroupSlot'.
// It creates a union stage internally so that when there's no matching foreign records, an
// empty array will be returned.
auto [foreignGroupSlot, foreignGroupStage] = buildForeignMatchedArray(
- {std::move(scanNljStage), makeSV()}, foreignRecordSlot, nodeId, slotIdGenerator);
+ EvalStage{std::move(filteredForeignRecordsStage), makeSV(foreignRecordSlot)},
+ foreignRecordSlot,
+ nodeId,
+ slotIdGenerator);
// The top level loop join stage that joins each local field with the matched foreign
// documents.
- auto nljStage = makeS<LoopJoinStage>(std::move(localFieldStage),
+ auto nljStage = makeS<LoopJoinStage>(std::move(localKeysSetStage),
std::move(foreignGroupStage),
makeSV(localRecordSlot) /* outerProjects */,
- makeSV(localFieldSlot) /* outerCorrelated */,
+ makeSV(localKeysSetSlot) /* outerCorrelated */,
nullptr,
nodeId);
-
return {foreignGroupSlot, std::move(nljStage)};
}
@@ -804,23 +943,19 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
"$lookup using index join should have one child and a populated index entry",
eqLookupNode->children.size() == 1 && eqLookupNode->idxEntry);
- const auto& index = *eqLookupNode->idxEntry;
-
- uassert(6357203,
- str::stream() << "$lookup using index join doesn't work for hashed index '"
- << index.identifier.catalogName << "'",
- index.type != INDEX_HASHED);
-
return buildIndexJoinLookupStage(_state,
std::move(localStage),
localDocumentSlot,
eqLookupNode->joinFieldLocal,
+ eqLookupNode->joinFieldForeign,
foreignColl,
- index,
+ *eqLookupNode->idxEntry,
_data.iamMap,
_yieldPolicy,
eqLookupNode->nodeId(),
- _slotIdGenerator);
+ _slotIdGenerator,
+ _frameIdGenerator,
+ _data.env);
}
case EqLookupNode::LookupStrategy::kNestedLoopJoin:
case EqLookupNode::LookupStrategy::kHashJoin: {