summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Lapkov <nikita.lapkov@mongodb.com>2022-02-21 17:27:45 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-02-21 18:00:19 +0000
commit039d9b40b7c1c1df900b39cbb4c72522428359a3 (patch)
tree2497123f2edde403406e6ef1984b3729d7c61796
parent4cdd3f9439ba0eb8e6540465638d5cac3aca1fdf (diff)
downloadmongo-039d9b40b7c1c1df900b39cbb4c72522428359a3.tar.gz
SERVER-63570 Implement index selection for index join
-rw-r--r--jstests/noPassthrough/lookup_pushdown.js103
-rw-r--r--src/mongo/db/query/planner_analysis.cpp42
-rw-r--r--src/mongo/db/query/sbe_stage_builder_lookup.cpp9
3 files changed, 130 insertions, 24 deletions
diff --git a/jstests/noPassthrough/lookup_pushdown.js b/jstests/noPassthrough/lookup_pushdown.js
index 4066e7f115a..6d381d422af 100644
--- a/jstests/noPassthrough/lookup_pushdown.js
+++ b/jstests/noPassthrough/lookup_pushdown.js
@@ -12,21 +12,28 @@ const JoinAlgorithm = {
NLJ: 5842604,
};
-function runTest(coll, pipeline, expectedCode, aggOptions = {}) {
- const cmd = () => coll.aggregate(pipeline, aggOptions).toArray();
- if (expectedCode) {
- assert.throwsWithCode(cmd, expectedCode);
- } else {
- assert.doesNotThrow(cmd);
- }
-}
-
// Standalone cases.
const conn = MongoRunner.runMongod({setParameter: "internalEnableMultipleAutoGetCollections=true"});
assert.neq(null, conn, "mongod was unable to start up");
const name = "lookup_pushdown";
let db = conn.getDB(name);
+function runTest(coll, pipeline, expectedCode, aggOptions = {}, errMsgRegex = null) {
+ const options = Object.assign({pipeline, cursor: {}}, aggOptions);
+ const response = coll.runCommand("aggregate", options);
+ if (expectedCode) {
+ const result = assert.commandFailedWithCode(response, expectedCode);
+ if (errMsgRegex) {
+ const errorMessage = result.errmsg;
+ assert(errMsgRegex.test(errorMessage),
+ "Error message '" + errorMessage + "' did not match the RegEx '" + errMsgRegex +
+ "'");
+ }
+ } else {
+ assert.commandWorked(response);
+ }
+}
+
if (!checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
jsTestLog("Skipping test because the sbe lookup pushdown feature flag is disabled");
MongoRunner.stopMongod(conn);
@@ -159,7 +166,27 @@ runTest(coll,
assert.commandWorked(foreignColl.createIndex({b: 1}));
runTest(coll,
[{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
- JoinAlgorithm.INLJ /* expectedCode */);
+ JoinAlgorithm.INLJ /* expectedCode */,
+ {} /* aggOptions */,
+ /\(b_1, \)$//* errMsgRegex */);
+
+// Build a hashed index on the foreign collection that matches the foreignField. Indexed nested loop
+// join strategy should be used.
+assert.commandWorked(foreignColl.dropIndexes());
+assert.commandWorked(foreignColl.createIndex({b: 'hashed'}));
+runTest(coll,
+ [{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
+ JoinAlgorithm.INLJ /* expectedCode */,
+ {} /* aggOptions */,
+ /\(b_hashed, \)$//* errMsgRegex */);
+
+// Build a wildcard index on the foreign collection that matches the foreignField. Nested loop join
+// strategy should be used.
+assert.commandWorked(foreignColl.dropIndexes());
+assert.commandWorked(foreignColl.createIndex({'$**': 1}));
+runTest(coll,
+ [{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
+ JoinAlgorithm.NLJ /* expectedCode */);
// Build a compound index that is prefixed with the foreignField. We should use an indexed
// nested loop join.
@@ -167,7 +194,61 @@ assert.commandWorked(foreignColl.dropIndexes());
assert.commandWorked(foreignColl.createIndex({b: 1, c: 1, a: 1}));
runTest(coll,
[{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
- JoinAlgorithm.INLJ /* expectedCode */);
+ JoinAlgorithm.INLJ /* expectedCode */,
+ {} /* aggOptions */,
+ /\(b_1_c_1_a_1, \)$//* errMsgRegex */);
+
+// Build multiple compound indexes prefixed with the foreignField. We should utilize the index with
+// the least amount of components.
+assert.commandWorked(foreignColl.dropIndexes());
+assert.commandWorked(foreignColl.createIndex({b: 1, a: 1}));
+assert.commandWorked(foreignColl.createIndex({b: 1, c: 1, a: 1}));
+runTest(coll,
+ [{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
+ JoinAlgorithm.INLJ /* expectedCode */,
+ {} /* aggOptions */,
+ /\(b_1_a_1, \)$//* errMsgRegex */);
+
+// In the presence of hashed and BTree indexes with the same number of components, we should select
+// BTree one.
+assert.commandWorked(foreignColl.dropIndexes());
+assert.commandWorked(foreignColl.createIndex({b: 1}));
+assert.commandWorked(foreignColl.createIndex({b: 'hashed'}));
+runTest(coll,
+ [{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
+ JoinAlgorithm.INLJ /* expectedCode */,
+ {} /* aggOptions */,
+ /\(b_1, \)$//* errMsgRegex */);
+
+// While selecting a BTree index is more preferable, we should favor hashed index if it has smaller
+// number of components.
+assert.commandWorked(foreignColl.dropIndexes());
+assert.commandWorked(foreignColl.createIndex({b: 1, c: 1, d: 1}));
+assert.commandWorked(foreignColl.createIndex({b: 'hashed'}));
+runTest(coll,
+ [{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
+ JoinAlgorithm.INLJ /* expectedCode */,
+ {} /* aggOptions */,
+ /\(b_hashed, \)$//* errMsgRegex */);
+
+// If we have two indexes of the same type with the same number of components, index keypattern
+// should be used as a tie breaker.
+assert.commandWorked(foreignColl.dropIndexes());
+assert.commandWorked(foreignColl.createIndex({b: 1, c: 1}));
+assert.commandWorked(foreignColl.createIndex({b: 1, a: 1}));
+runTest(coll,
+ [{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
+ JoinAlgorithm.INLJ /* expectedCode */,
+ {} /* aggOptions */,
+ /\(b_1_a_1, \)$//* errMsgRegex */);
+
+// Build a 2d index on the foreign collection that matches the foreignField. In this case, we should
+// use regular nested loop join.
+assert.commandWorked(foreignColl.dropIndexes());
+assert.commandWorked(foreignColl.createIndex({b: '2d'}));
+runTest(coll,
+ [{$lookup: {from: foreignCollName, localField: "a", foreignField: "b", as: "out"}}],
+ JoinAlgorithm.NLJ /* expectedCode */);
// Build a compound index containing the foreignField, but not as the first field. In this case,
// we should use regular nested loop join.
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index dc347cdbe7b..1003d2a6c98 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -40,6 +40,7 @@
#include "mongo/db/index/s2_common.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/matcher/expression_geo.h"
+#include "mongo/db/query/planner_ixselect.h"
#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/query_planner_common.h"
#include "mongo/logv2/log.h"
@@ -624,24 +625,43 @@ void QueryPlannerAnalysis::determineLookupStrategy(
<< foreignCollName,
foreignCollItr != collectionsInfo.end());
- // Does an eligible index exist?
- // TODO SERVER-62913: finalize the logic for indexes analysis.
- const auto& foreignField = eqLookupNode->joinFieldForeign;
- bool foundEligibleIndex = false;
- for (const IndexEntry& idxEntry : foreignCollItr->second.indexes) {
- tassert(5842601, "index key pattern should not be empty", !idxEntry.keyPattern.isEmpty());
- if (idxEntry.keyPattern.firstElement().fieldName() == foreignField) {
- foundEligibleIndex = true;
- break;
+ // Check if an eligible index exists for indexed loop join strategy.
+ const auto foreignIndex = [&]() -> boost::optional<IndexEntry> {
+ // Sort indexes by (# of components, index type, index key pattern) tuple.
+ auto indexes = foreignCollItr->second.indexes;
+ std::sort(
+ indexes.begin(), indexes.end(), [](const IndexEntry& left, const IndexEntry& right) {
+ const auto nFieldsLeft = left.keyPattern.nFields();
+ const auto nFieldsRight = right.keyPattern.nFields();
+ if (nFieldsLeft != nFieldsRight) {
+ return nFieldsLeft < nFieldsRight;
+ } else if (left.type != right.type) {
+ // Here we rely on the fact that 'INDEX_BTREE < INDEX_HASHED'.
+ return left.type < right.type;
+ }
+
+ // This is a completely arbitrary tie breaker to make the selection algorithm
+ // deterministic.
+ return left.keyPattern.woCompare(right.keyPattern) < 0;
+ });
+
+ for (const auto& index : indexes) {
+ if ((index.type == INDEX_BTREE || index.type == INDEX_HASHED) &&
+ index.keyPattern.firstElement().fieldName() == eqLookupNode->joinFieldForeign) {
+ return index;
+ }
}
- }
+
+ return boost::none;
+ }();
// TODO SERVER-63449: make this setting configurable and tighten the HJ check to cover the
// number of records and the storage size of the collection.
static constexpr auto kMaxHashJoinCollectionSize = 100 * 1024 * 1024;
- if (foundEligibleIndex) {
+ if (foreignIndex) {
eqLookupNode->lookupStrategy = EqLookupNode::LookupStrategy::kIndexedLoopJoin;
+ eqLookupNode->idxEntry = foreignIndex;
} else if (allowDiskUse &&
foreignCollItr->second.approximateCollectionSizeBytes < kMaxHashJoinCollectionSize) {
eqLookupNode->lookupStrategy = EqLookupNode::LookupStrategy::kHashJoin;
diff --git a/src/mongo/db/query/sbe_stage_builder_lookup.cpp b/src/mongo/db/query/sbe_stage_builder_lookup.cpp
index 4dac1748299..d53a81cb448 100644
--- a/src/mongo/db/query/sbe_stage_builder_lookup.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_lookup.cpp
@@ -226,9 +226,14 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
case EqLookupNode::LookupStrategy::kHashJoin:
uasserted(5842602, "$lookup planning logic picked hash join");
break;
- case EqLookupNode::LookupStrategy::kIndexedLoopJoin:
- uasserted(5842603, "$lookup planning logic picked indexed loop join");
+ case EqLookupNode::LookupStrategy::kIndexedLoopJoin: {
+ const auto& index = *eqLookupNode->idxEntry;
+ uasserted(5842603,
+ str::stream()
+ << "$lookup planning logic picked indexed loop join with index: "
+ << index.identifier.toString());
break;
+ }
case EqLookupNode::LookupStrategy::kNestedLoopJoin:
// TODO SERVER-63533: replace the check for number of children with proper access to the
// foreign collection. The check currently allows us to run unit tests.