diff options
author | Svilen Mihaylov <svilen.mihaylov@mongodb.com> | 2022-02-22 17:30:29 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-22 18:15:43 +0000 |
commit | c8ec0f4a1587ce3c221d44799cba47553a629274 (patch) | |
tree | 8951eb9e9c3cb3d884bea335e55e324a40a7ca1b | |
parent | 7ad05d39663d30ee9f20afafd499905a165127f9 (diff) | |
download | mongo-c8ec0f4a1587ce3c221d44799cba47553a629274.tar.gz |
SERVER-63636 Fix a bug with collation requirements on rid when merge join is used
-rw-r--r-- | src/mongo/db/query/optimizer/cascades/implementers.cpp | 110 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/index_bounds.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/index_bounds.h | 3 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp | 91 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/utils.cpp | 23 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/utils.h | 3 |
6 files changed, 184 insertions, 49 deletions
diff --git a/src/mongo/db/query/optimizer/cascades/implementers.cpp b/src/mongo/db/query/optimizer/cascades/implementers.cpp index 412e3528e73..7d7a21c2b52 100644 --- a/src/mongo/db/query/optimizer/cascades/implementers.cpp +++ b/src/mongo/db/query/optimizer/cascades/implementers.cpp @@ -460,8 +460,11 @@ public: } } - const auto availableDirections = indexSatisfiesCollation( - indexDef.getCollationSpec(), candidateIndexEntry, requiredCollation); + const auto availableDirections = + indexSatisfiesCollation(indexDef.getCollationSpec(), + candidateIndexEntry, + requiredCollation, + ridProjName); if (!availableDirections._forward && !availableDirections._backward) { // Failed to satisfy collation. continue; @@ -719,9 +722,9 @@ public: // Split collation between inner and outer side. const CollationSplitResult& collationLeftRightSplit = - splitCollationSpec(collationSpec, leftProjections, rightProjections); + splitCollationSpec(ridProjName, collationSpec, leftProjections, rightProjections); const CollationSplitResult& collationRightLeftSplit = - splitCollationSpec(collationSpec, rightProjections, leftProjections); + splitCollationSpec(ridProjName, collationSpec, rightProjections, leftProjections); // We are propagating the distribution requirements to both sides. PhysProps leftPhysProps = _physProps; @@ -1092,7 +1095,8 @@ private: IndexAvailableDirections indexSatisfiesCollation( const IndexCollationSpec& indexCollationSpec, const CandidateIndexEntry& candidateIndexEntry, - const ProjectionCollationSpec& requiredCollationSpec) { + const ProjectionCollationSpec& requiredCollationSpec, + const ProjectionName& ridProjName) { if (requiredCollationSpec.empty()) { return {true, true}; } @@ -1102,48 +1106,62 @@ private: bool indexSuitable = true; const auto& fieldProjections = candidateIndexEntry._fieldProjectionMap._fieldProjections; + const auto updateDirectionsFn = [&result](const CollationOp availableOp, + const CollationOp reqOp) { + result._forward &= collationOpsCompatible(availableOp, reqOp); + result._backward &= collationOpsCompatible(reverseCollationOp(availableOp), reqOp); + }; + // Verify the index is compatible with our collation requirement, and can deliver the right - // order of paths. - for (size_t indexField = 0; indexField < indexCollationSpec.size(); indexField++) { - const bool needsCollation = candidateIndexEntry._fieldsToCollate.count(indexField) > 0; - - auto it = fieldProjections.find(encodeIndexKeyName(indexField)); - if (it == fieldProjections.cend()) { - // No bound projection for this index field. - if (needsCollation) { - // We cannot satisfy the rest of the collation requirements. - indexSuitable = false; - break; - } - continue; - } - const ProjectionName& projName = it->second; + // order of paths. Note: we are iterating to index one past the size. We assume there is an + // implicit rid index field which is collated in increasing order. + for (size_t indexField = 0; indexField < indexCollationSpec.size() + 1; indexField++) { + const auto& [reqProjName, reqOp] = requiredCollationSpec.at(collationSpecIndex); + + if (indexField < indexCollationSpec.size()) { + const bool needsCollation = + candidateIndexEntry._fieldsToCollate.count(indexField) > 0; - if (!needsCollation) { - // We do not need to collate this field because of equality. - if (requiredCollationSpec.at(collationSpecIndex).first == projName) { - // We can satisfy the next collation requirement independent of collation op. - if (++collationSpecIndex >= requiredCollationSpec.size()) { + auto it = fieldProjections.find(encodeIndexKeyName(indexField)); + if (it == fieldProjections.cend()) { + // No bound projection for this index field. + if (needsCollation) { + // We cannot satisfy the rest of the collation requirements. + indexSuitable = false; break; } + continue; + } + const ProjectionName& projName = it->second; + + if (!needsCollation) { + // We do not need to collate this field because of equality. + if (requiredCollationSpec.at(collationSpecIndex).first == projName) { + // We can satisfy the next collation requirement independent of collation + // op. + if (++collationSpecIndex >= requiredCollationSpec.size()) { + break; + } + } + continue; } - continue; - } - // Check if we can satisfy the next collation requirement. - const auto& [reqProjName, reqOp] = requiredCollationSpec.at(collationSpecIndex); - if (reqProjName != projName) { - indexSuitable = false; - break; + if (reqProjName != projName) { + indexSuitable = false; + break; + } + updateDirectionsFn(indexCollationSpec.at(indexField)._op, reqOp); + } else { + // If we fall through here, we are trying to satisfy a trailing collation + // requirement on rid. + if (reqProjName != ridProjName || + candidateIndexEntry._intervalPrefixSize != indexCollationSpec.size()) { + indexSuitable = false; + break; + } + updateDirectionsFn(CollationOp::Ascending, reqOp); } - const CollationOp indexOp = indexCollationSpec.at(indexField)._op; - if (result._forward && !collationOpsCompatible(indexOp, reqOp)) { - result._forward = false; - } - if (result._backward && !collationOpsCompatible(reverseCollationOp(indexOp), reqOp)) { - result._backward = false; - } if (!result._forward && !result._backward) { indexSuitable = false; break; @@ -1311,8 +1329,18 @@ private: PhysProps leftPhysPropsLocal = leftPhysProps; PhysProps rightPhysPropsLocal = rightPhysProps; - setCollationForRIDIntersect( - collationLeftRightSplit, leftPhysPropsLocal, rightPhysPropsLocal); + // Add collation requirement on rid to both sides if needed. + CollationSplitResult split = collationLeftRightSplit; + if (split._leftCollation.empty() || + split._leftCollation.back().first != ridProjectionName) { + split._leftCollation.emplace_back(ridProjectionName, CollationOp::Ascending); + } + if (split._rightCollation.empty() || + split._rightCollation.back().first != ridProjectionName) { + split._rightCollation.emplace_back(ridProjectionName, CollationOp::Ascending); + } + setCollationForRIDIntersect(split, leftPhysPropsLocal, rightPhysPropsLocal); + if (dedupRID) { getProperty<IndexingRequirement>(leftPhysPropsLocal) .setDedupRID(true /*dedupRID*/); diff --git a/src/mongo/db/query/optimizer/index_bounds.cpp b/src/mongo/db/query/optimizer/index_bounds.cpp index 5c2ff9fd66b..4ceaf663a94 100644 --- a/src/mongo/db/query/optimizer/index_bounds.cpp +++ b/src/mongo/db/query/optimizer/index_bounds.cpp @@ -210,7 +210,8 @@ ResidualRequirement::ResidualRequirement(PartialSchemaKey key, bool CandidateIndexEntry::operator==(const CandidateIndexEntry& other) const { return _fieldProjectionMap == other._fieldProjectionMap && _intervals == other._intervals && _residualRequirements == other._residualRequirements && - _fieldsToCollate == other._fieldsToCollate; + _fieldsToCollate == other._fieldsToCollate && + _intervalPrefixSize == other._intervalPrefixSize; } IndexSpecification::IndexSpecification(std::string scanDefName, diff --git a/src/mongo/db/query/optimizer/index_bounds.h b/src/mongo/db/query/optimizer/index_bounds.h index f172c361292..010285172e0 100644 --- a/src/mongo/db/query/optimizer/index_bounds.h +++ b/src/mongo/db/query/optimizer/index_bounds.h @@ -169,6 +169,9 @@ struct CandidateIndexEntry { // requirements. // TODO: consider a bitset. opt::unordered_set<size_t> _fieldsToCollate; + + // Length of prefix of fields with applied intervals. + size_t _intervalPrefixSize; }; using CandidateIndexMap = opt::unordered_map<std::string /*index name*/, CandidateIndexEntry>; diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp index 00446c214d8..b8ff9d71a13 100644 --- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp +++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp @@ -2624,6 +2624,97 @@ TEST(PhysRewriter, CompoundIndex3) { optimized); } +TEST(PhysRewriter, CompoundIndex4Negative) { + using namespace properties; + PrefixId prefixId; + + ABT scanNode = make<ScanNode>("root", "c1"); + + // Create the following expression: {$and: [{a: {$eq: 1}}, {b: {$eq: 2}}]} + ABT evalANode = make<EvaluationNode>( + "pa", + make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")), + std::move(scanNode)); + + ABT filterANode = make<FilterNode>( + make<EvalFilter>(make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(1))), + make<Variable>("pa")), + std::move(evalANode)); + + ABT evalBNode = make<EvaluationNode>( + "pb", + make<EvalPath>(make<PathGet>("b", make<PathIdentity>()), make<Variable>("root")), + std::move(filterANode)); + + ABT filterBNode = make<FilterNode>( + make<EvalFilter>(make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(2))), + make<Variable>("pb")), + std::move(evalBNode)); + + ABT rootNode = + make<RootNode>(ProjectionRequirement{ProjectionNameVector{"root"}}, std::move(filterBNode)); + + // Create the following indexes: {a:1, c:1, {name: 'index1'}}, and {b:1, d:1, {name: 'index2'}} + OptPhaseManager phaseManager( + {OptPhaseManager::OptPhase::MemoSubstitutionPhase, + OptPhaseManager::OptPhase::MemoExplorationPhase, + OptPhaseManager::OptPhase::MemoImplementationPhase}, + prefixId, + {{{"c1", + ScanDefinition{ + {}, + {{"index1", + IndexDefinition{{{makeNonMultikeyIndexPath("a"), CollationOp::Ascending}, + {makeNonMultikeyIndexPath("c"), CollationOp::Descending}}, + false /*isMultiKey*/}}, + {"index2", + IndexDefinition{{{makeNonMultikeyIndexPath("b"), CollationOp::Ascending}, + {makeNonMultikeyIndexPath("d"), CollationOp::Ascending}}, + false /*isMultiKey*/}}}}}}}, + {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests}); + + ABT optimized = rootNode; + ASSERT_TRUE(phaseManager.optimize(optimized)); + ASSERT_BETWEEN(20, 30, phaseManager.getMemo().getStats()._physPlanExplorationCount); + + // Index intersection via merge join relies on the fact that the RIDs of equal keys are sorted. + // Demonstrate that we do not get a merge join when the lookup keys on both intersected indexes + // do not cover all field indexes. In this case there is no guarantee that the RIDs of all + // matching keys will be sorted, and therefore they cannot be merge-joined. + ASSERT_EXPLAIN_V2( + "Root []\n" + "| | projections: \n" + "| | root\n" + "| RefBlock: \n" + "| Variable [root]\n" + "BinaryJoin [joinType: Inner, {rid_0}]\n" + "| | Const [true]\n" + "| Filter []\n" + "| | EvalFilter []\n" + "| | | Variable [evalTemp_2]\n" + "| | PathTraverse []\n" + "| | PathCompare [Eq]\n" + "| | Const [2]\n" + "| LimitSkip []\n" + "| | limitSkip:\n" + "| | limit: 1\n" + "| | skip: 0\n" + "| Seek [ridProjection: rid_0, {'<root>': root, 'b': evalTemp_2}, c1]\n" + "| | BindBlock:\n" + "| | [evalTemp_2]\n" + "| | Source []\n" + "| | [root]\n" + "| | Source []\n" + "| RefBlock: \n" + "| Variable [rid_0]\n" + "IndexScan [{'<rid>': rid_0}, scanDefName: c1, indexDefName: index1, interval: {[Const " + "[1], Const [1]], (-inf, +inf)}]\n" + " BindBlock:\n" + " [rid_0]\n" + " Source []\n", + optimized); +} + TEST(PhysRewriter, IndexBoundsIntersect) { using namespace properties; PrefixId prefixId; diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp index d0ac280fdbc..933dda604fd 100644 --- a/src/mongo/db/query/optimizer/utils/utils.cpp +++ b/src/mongo/db/query/optimizer/utils/utils.cpp @@ -145,17 +145,24 @@ bool areMultiKeyIntervalsEqualities(const MultiKeyIntervalRequirement& intervals return true; } -CollationSplitResult splitCollationSpec(const ProjectionCollationSpec& collationSpec, +CollationSplitResult splitCollationSpec(const ProjectionName& ridProjName, + const ProjectionCollationSpec& collationSpec, const ProjectionNameSet& leftProjections, const ProjectionNameSet& rightProjections) { bool leftSide = true; ProjectionCollationSpec leftCollationSpec; ProjectionCollationSpec rightCollationSpec; - for (const auto& collationEntry : collationSpec) { + for (size_t index = 0; index < collationSpec.size(); index++) { + const auto& collationEntry = collationSpec[index]; + const ProjectionName& projectionName = collationEntry.first; + if (projectionName == ridProjName) { + uassert(6624147, "Collation on RID must be last", index + 1 == collationSpec.size()); - if (leftProjections.count(projectionName) > 0) { + // Propagate collation requirement on rid only to left side. + leftCollationSpec.emplace_back(collationEntry); + } else if (leftProjections.count(projectionName) > 0) { if (!leftSide) { // Left and right projections must complement and form prefix and suffix. return {}; @@ -872,6 +879,7 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId, ProjectionNameSet residualRequirementsTempProjections; ResidualKeyMap residualKeyMap; opt::unordered_set<size_t> fieldsToCollate; + size_t intervalPrefixSize = 0; PartialSchemaKeySet unsatisfiedKeys; for (const auto& [key, req] : reqMap) { @@ -917,7 +925,7 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId, } else { if (indexDef.getPartialReqMap().empty()) { hasEmptyInterval = true; - return CandidateIndexMap(); + return {}; } else { // This is a partial index, so skip the empty interval, but consider the // remaining indexes. @@ -927,7 +935,9 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId, } } - if (!combineSuccess) { + if (combineSuccess) { + intervalPrefixSize++; + } else { if (!combineMultiKeyIntervalsDNF(intervals, IntervalReqExpr::makeSingularDNF())) { uasserted(6624155, "Cannot combine with an open interval"); @@ -1027,7 +1037,8 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId, std::move(residualRequirements), std::move(residualRequirementsTempProjections), std::move(residualKeyMap), - std::move(fieldsToCollate)}); + std::move(fieldsToCollate), + intervalPrefixSize}); } return result; diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h index 371dac2a48d..12efe684882 100644 --- a/src/mongo/db/query/optimizer/utils/utils.h +++ b/src/mongo/db/query/optimizer/utils/utils.h @@ -122,7 +122,8 @@ struct CollationSplitResult { * Split a collation requirement between an outer (left) and inner (right) side. The outer side must * be a prefix in the collation spec, and the right side a suffix. */ -CollationSplitResult splitCollationSpec(const ProjectionCollationSpec& collationSpec, +CollationSplitResult splitCollationSpec(const ProjectionName& ridProjName, + const ProjectionCollationSpec& collationSpec, const ProjectionNameSet& leftProjections, const ProjectionNameSet& rightProjections); |