summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSvilen Mihaylov <svilen.mihaylov@mongodb.com>2022-02-22 17:30:29 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-02-22 18:15:43 +0000
commitc8ec0f4a1587ce3c221d44799cba47553a629274 (patch)
tree8951eb9e9c3cb3d884bea335e55e324a40a7ca1b
parent7ad05d39663d30ee9f20afafd499905a165127f9 (diff)
downloadmongo-c8ec0f4a1587ce3c221d44799cba47553a629274.tar.gz
SERVER-63636 Fix a bug with collation requirements on rid when merge join is used
-rw-r--r--src/mongo/db/query/optimizer/cascades/implementers.cpp110
-rw-r--r--src/mongo/db/query/optimizer/index_bounds.cpp3
-rw-r--r--src/mongo/db/query/optimizer/index_bounds.h3
-rw-r--r--src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp91
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.cpp23
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.h3
6 files changed, 184 insertions, 49 deletions
diff --git a/src/mongo/db/query/optimizer/cascades/implementers.cpp b/src/mongo/db/query/optimizer/cascades/implementers.cpp
index 412e3528e73..7d7a21c2b52 100644
--- a/src/mongo/db/query/optimizer/cascades/implementers.cpp
+++ b/src/mongo/db/query/optimizer/cascades/implementers.cpp
@@ -460,8 +460,11 @@ public:
}
}
- const auto availableDirections = indexSatisfiesCollation(
- indexDef.getCollationSpec(), candidateIndexEntry, requiredCollation);
+ const auto availableDirections =
+ indexSatisfiesCollation(indexDef.getCollationSpec(),
+ candidateIndexEntry,
+ requiredCollation,
+ ridProjName);
if (!availableDirections._forward && !availableDirections._backward) {
// Failed to satisfy collation.
continue;
@@ -719,9 +722,9 @@ public:
// Split collation between inner and outer side.
const CollationSplitResult& collationLeftRightSplit =
- splitCollationSpec(collationSpec, leftProjections, rightProjections);
+ splitCollationSpec(ridProjName, collationSpec, leftProjections, rightProjections);
const CollationSplitResult& collationRightLeftSplit =
- splitCollationSpec(collationSpec, rightProjections, leftProjections);
+ splitCollationSpec(ridProjName, collationSpec, rightProjections, leftProjections);
// We are propagating the distribution requirements to both sides.
PhysProps leftPhysProps = _physProps;
@@ -1092,7 +1095,8 @@ private:
IndexAvailableDirections indexSatisfiesCollation(
const IndexCollationSpec& indexCollationSpec,
const CandidateIndexEntry& candidateIndexEntry,
- const ProjectionCollationSpec& requiredCollationSpec) {
+ const ProjectionCollationSpec& requiredCollationSpec,
+ const ProjectionName& ridProjName) {
if (requiredCollationSpec.empty()) {
return {true, true};
}
@@ -1102,48 +1106,62 @@ private:
bool indexSuitable = true;
const auto& fieldProjections = candidateIndexEntry._fieldProjectionMap._fieldProjections;
+ const auto updateDirectionsFn = [&result](const CollationOp availableOp,
+ const CollationOp reqOp) {
+ result._forward &= collationOpsCompatible(availableOp, reqOp);
+ result._backward &= collationOpsCompatible(reverseCollationOp(availableOp), reqOp);
+ };
+
// Verify the index is compatible with our collation requirement, and can deliver the right
- // order of paths.
- for (size_t indexField = 0; indexField < indexCollationSpec.size(); indexField++) {
- const bool needsCollation = candidateIndexEntry._fieldsToCollate.count(indexField) > 0;
-
- auto it = fieldProjections.find(encodeIndexKeyName(indexField));
- if (it == fieldProjections.cend()) {
- // No bound projection for this index field.
- if (needsCollation) {
- // We cannot satisfy the rest of the collation requirements.
- indexSuitable = false;
- break;
- }
- continue;
- }
- const ProjectionName& projName = it->second;
+ // order of paths. Note: we are iterating to index one past the size. We assume there is an
+ // implicit rid index field which is collated in increasing order.
+ for (size_t indexField = 0; indexField < indexCollationSpec.size() + 1; indexField++) {
+ const auto& [reqProjName, reqOp] = requiredCollationSpec.at(collationSpecIndex);
+
+ if (indexField < indexCollationSpec.size()) {
+ const bool needsCollation =
+ candidateIndexEntry._fieldsToCollate.count(indexField) > 0;
- if (!needsCollation) {
- // We do not need to collate this field because of equality.
- if (requiredCollationSpec.at(collationSpecIndex).first == projName) {
- // We can satisfy the next collation requirement independent of collation op.
- if (++collationSpecIndex >= requiredCollationSpec.size()) {
+ auto it = fieldProjections.find(encodeIndexKeyName(indexField));
+ if (it == fieldProjections.cend()) {
+ // No bound projection for this index field.
+ if (needsCollation) {
+ // We cannot satisfy the rest of the collation requirements.
+ indexSuitable = false;
break;
}
+ continue;
+ }
+ const ProjectionName& projName = it->second;
+
+ if (!needsCollation) {
+ // We do not need to collate this field because of equality.
+ if (requiredCollationSpec.at(collationSpecIndex).first == projName) {
+ // We can satisfy the next collation requirement independent of collation
+ // op.
+ if (++collationSpecIndex >= requiredCollationSpec.size()) {
+ break;
+ }
+ }
+ continue;
}
- continue;
- }
- // Check if we can satisfy the next collation requirement.
- const auto& [reqProjName, reqOp] = requiredCollationSpec.at(collationSpecIndex);
- if (reqProjName != projName) {
- indexSuitable = false;
- break;
+ if (reqProjName != projName) {
+ indexSuitable = false;
+ break;
+ }
+ updateDirectionsFn(indexCollationSpec.at(indexField)._op, reqOp);
+ } else {
+ // If we fall through here, we are trying to satisfy a trailing collation
+ // requirement on rid.
+ if (reqProjName != ridProjName ||
+ candidateIndexEntry._intervalPrefixSize != indexCollationSpec.size()) {
+ indexSuitable = false;
+ break;
+ }
+ updateDirectionsFn(CollationOp::Ascending, reqOp);
}
- const CollationOp indexOp = indexCollationSpec.at(indexField)._op;
- if (result._forward && !collationOpsCompatible(indexOp, reqOp)) {
- result._forward = false;
- }
- if (result._backward && !collationOpsCompatible(reverseCollationOp(indexOp), reqOp)) {
- result._backward = false;
- }
if (!result._forward && !result._backward) {
indexSuitable = false;
break;
@@ -1311,8 +1329,18 @@ private:
PhysProps leftPhysPropsLocal = leftPhysProps;
PhysProps rightPhysPropsLocal = rightPhysProps;
- setCollationForRIDIntersect(
- collationLeftRightSplit, leftPhysPropsLocal, rightPhysPropsLocal);
+ // Add collation requirement on rid to both sides if needed.
+ CollationSplitResult split = collationLeftRightSplit;
+ if (split._leftCollation.empty() ||
+ split._leftCollation.back().first != ridProjectionName) {
+ split._leftCollation.emplace_back(ridProjectionName, CollationOp::Ascending);
+ }
+ if (split._rightCollation.empty() ||
+ split._rightCollation.back().first != ridProjectionName) {
+ split._rightCollation.emplace_back(ridProjectionName, CollationOp::Ascending);
+ }
+ setCollationForRIDIntersect(split, leftPhysPropsLocal, rightPhysPropsLocal);
+
if (dedupRID) {
getProperty<IndexingRequirement>(leftPhysPropsLocal)
.setDedupRID(true /*dedupRID*/);
diff --git a/src/mongo/db/query/optimizer/index_bounds.cpp b/src/mongo/db/query/optimizer/index_bounds.cpp
index 5c2ff9fd66b..4ceaf663a94 100644
--- a/src/mongo/db/query/optimizer/index_bounds.cpp
+++ b/src/mongo/db/query/optimizer/index_bounds.cpp
@@ -210,7 +210,8 @@ ResidualRequirement::ResidualRequirement(PartialSchemaKey key,
bool CandidateIndexEntry::operator==(const CandidateIndexEntry& other) const {
return _fieldProjectionMap == other._fieldProjectionMap && _intervals == other._intervals &&
_residualRequirements == other._residualRequirements &&
- _fieldsToCollate == other._fieldsToCollate;
+ _fieldsToCollate == other._fieldsToCollate &&
+ _intervalPrefixSize == other._intervalPrefixSize;
}
IndexSpecification::IndexSpecification(std::string scanDefName,
diff --git a/src/mongo/db/query/optimizer/index_bounds.h b/src/mongo/db/query/optimizer/index_bounds.h
index f172c361292..010285172e0 100644
--- a/src/mongo/db/query/optimizer/index_bounds.h
+++ b/src/mongo/db/query/optimizer/index_bounds.h
@@ -169,6 +169,9 @@ struct CandidateIndexEntry {
// requirements.
// TODO: consider a bitset.
opt::unordered_set<size_t> _fieldsToCollate;
+
+ // Length of prefix of fields with applied intervals.
+ size_t _intervalPrefixSize;
};
using CandidateIndexMap = opt::unordered_map<std::string /*index name*/, CandidateIndexEntry>;
diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
index 00446c214d8..b8ff9d71a13 100644
--- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
@@ -2624,6 +2624,97 @@ TEST(PhysRewriter, CompoundIndex3) {
optimized);
}
+TEST(PhysRewriter, CompoundIndex4Negative) {
+ using namespace properties;
+ PrefixId prefixId;
+
+ ABT scanNode = make<ScanNode>("root", "c1");
+
+ // Create the following expression: {$and: [{a: {$eq: 1}}, {b: {$eq: 2}}]}
+ ABT evalANode = make<EvaluationNode>(
+ "pa",
+ make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")),
+ std::move(scanNode));
+
+ ABT filterANode = make<FilterNode>(
+ make<EvalFilter>(make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(1))),
+ make<Variable>("pa")),
+ std::move(evalANode));
+
+ ABT evalBNode = make<EvaluationNode>(
+ "pb",
+ make<EvalPath>(make<PathGet>("b", make<PathIdentity>()), make<Variable>("root")),
+ std::move(filterANode));
+
+ ABT filterBNode = make<FilterNode>(
+ make<EvalFilter>(make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(2))),
+ make<Variable>("pb")),
+ std::move(evalBNode));
+
+ ABT rootNode =
+ make<RootNode>(ProjectionRequirement{ProjectionNameVector{"root"}}, std::move(filterBNode));
+
+ // Create the following indexes: {a:1, c:1, {name: 'index1'}}, and {b:1, d:1, {name: 'index2'}}
+ OptPhaseManager phaseManager(
+ {OptPhaseManager::OptPhase::MemoSubstitutionPhase,
+ OptPhaseManager::OptPhase::MemoExplorationPhase,
+ OptPhaseManager::OptPhase::MemoImplementationPhase},
+ prefixId,
+ {{{"c1",
+ ScanDefinition{
+ {},
+ {{"index1",
+ IndexDefinition{{{makeNonMultikeyIndexPath("a"), CollationOp::Ascending},
+ {makeNonMultikeyIndexPath("c"), CollationOp::Descending}},
+ false /*isMultiKey*/}},
+ {"index2",
+ IndexDefinition{{{makeNonMultikeyIndexPath("b"), CollationOp::Ascending},
+ {makeNonMultikeyIndexPath("d"), CollationOp::Ascending}},
+ false /*isMultiKey*/}}}}}}},
+ {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
+
+ ABT optimized = rootNode;
+ ASSERT_TRUE(phaseManager.optimize(optimized));
+ ASSERT_BETWEEN(20, 30, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+
+ // Index intersection via merge join relies on the fact that the RIDs of equal keys are sorted.
+ // Demonstrate that we do not get a merge join when the lookup keys on both intersected indexes
+ // do not cover all field indexes. In this case there is no guarantee that the RIDs of all
+ // matching keys will be sorted, and therefore they cannot be merge-joined.
+ ASSERT_EXPLAIN_V2(
+ "Root []\n"
+ "| | projections: \n"
+ "| | root\n"
+ "| RefBlock: \n"
+ "| Variable [root]\n"
+ "BinaryJoin [joinType: Inner, {rid_0}]\n"
+ "| | Const [true]\n"
+ "| Filter []\n"
+ "| | EvalFilter []\n"
+ "| | | Variable [evalTemp_2]\n"
+ "| | PathTraverse []\n"
+ "| | PathCompare [Eq]\n"
+ "| | Const [2]\n"
+ "| LimitSkip []\n"
+ "| | limitSkip:\n"
+ "| | limit: 1\n"
+ "| | skip: 0\n"
+ "| Seek [ridProjection: rid_0, {'<root>': root, 'b': evalTemp_2}, c1]\n"
+ "| | BindBlock:\n"
+ "| | [evalTemp_2]\n"
+ "| | Source []\n"
+ "| | [root]\n"
+ "| | Source []\n"
+ "| RefBlock: \n"
+ "| Variable [rid_0]\n"
+ "IndexScan [{'<rid>': rid_0}, scanDefName: c1, indexDefName: index1, interval: {[Const "
+ "[1], Const [1]], (-inf, +inf)}]\n"
+ " BindBlock:\n"
+ " [rid_0]\n"
+ " Source []\n",
+ optimized);
+}
+
TEST(PhysRewriter, IndexBoundsIntersect) {
using namespace properties;
PrefixId prefixId;
diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp
index d0ac280fdbc..933dda604fd 100644
--- a/src/mongo/db/query/optimizer/utils/utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/utils.cpp
@@ -145,17 +145,24 @@ bool areMultiKeyIntervalsEqualities(const MultiKeyIntervalRequirement& intervals
return true;
}
-CollationSplitResult splitCollationSpec(const ProjectionCollationSpec& collationSpec,
+CollationSplitResult splitCollationSpec(const ProjectionName& ridProjName,
+ const ProjectionCollationSpec& collationSpec,
const ProjectionNameSet& leftProjections,
const ProjectionNameSet& rightProjections) {
bool leftSide = true;
ProjectionCollationSpec leftCollationSpec;
ProjectionCollationSpec rightCollationSpec;
- for (const auto& collationEntry : collationSpec) {
+ for (size_t index = 0; index < collationSpec.size(); index++) {
+ const auto& collationEntry = collationSpec[index];
+
const ProjectionName& projectionName = collationEntry.first;
+ if (projectionName == ridProjName) {
+ uassert(6624147, "Collation on RID must be last", index + 1 == collationSpec.size());
- if (leftProjections.count(projectionName) > 0) {
+ // Propagate collation requirement on rid only to left side.
+ leftCollationSpec.emplace_back(collationEntry);
+ } else if (leftProjections.count(projectionName) > 0) {
if (!leftSide) {
// Left and right projections must complement and form prefix and suffix.
return {};
@@ -872,6 +879,7 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
ProjectionNameSet residualRequirementsTempProjections;
ResidualKeyMap residualKeyMap;
opt::unordered_set<size_t> fieldsToCollate;
+ size_t intervalPrefixSize = 0;
PartialSchemaKeySet unsatisfiedKeys;
for (const auto& [key, req] : reqMap) {
@@ -917,7 +925,7 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
} else {
if (indexDef.getPartialReqMap().empty()) {
hasEmptyInterval = true;
- return CandidateIndexMap();
+ return {};
} else {
// This is a partial index, so skip the empty interval, but consider the
// remaining indexes.
@@ -927,7 +935,9 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
}
}
- if (!combineSuccess) {
+ if (combineSuccess) {
+ intervalPrefixSize++;
+ } else {
if (!combineMultiKeyIntervalsDNF(intervals,
IntervalReqExpr::makeSingularDNF())) {
uasserted(6624155, "Cannot combine with an open interval");
@@ -1027,7 +1037,8 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
std::move(residualRequirements),
std::move(residualRequirementsTempProjections),
std::move(residualKeyMap),
- std::move(fieldsToCollate)});
+ std::move(fieldsToCollate),
+ intervalPrefixSize});
}
return result;
diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h
index 371dac2a48d..12efe684882 100644
--- a/src/mongo/db/query/optimizer/utils/utils.h
+++ b/src/mongo/db/query/optimizer/utils/utils.h
@@ -122,7 +122,8 @@ struct CollationSplitResult {
* Split a collation requirement between an outer (left) and inner (right) side. The outer side must
* be a prefix in the collation spec, and the right side a suffix.
*/
-CollationSplitResult splitCollationSpec(const ProjectionCollationSpec& collationSpec,
+CollationSplitResult splitCollationSpec(const ProjectionName& ridProjName,
+ const ProjectionCollationSpec& collationSpec,
const ProjectionNameSet& leftProjections,
const ProjectionNameSet& rightProjections);