From 0ae6bba3c1483a309640e60ba5fb80f45b16ea67 Mon Sep 17 00:00:00 2001 From: Justin Zhang Date: Fri, 19 Aug 2022 19:33:12 +0000 Subject: SERVER-67140 Change query planner to determine column index eligibility based on field projection --- src/mongo/db/exec/projection_executor_utils.cpp | 15 - src/mongo/db/exec/projection_executor_utils.h | 12 +- src/mongo/db/query/collection_query_info.cpp | 13 +- src/mongo/db/query/get_executor.cpp | 28 +- src/mongo/db/query/get_executor.h | 9 + src/mongo/db/query/index_entry.h | 45 ++- src/mongo/db/query/plan_cache_indexability.cpp | 2 +- src/mongo/db/query/planner_wildcard_helpers.cpp | 4 +- src/mongo/db/query/query_planner.cpp | 81 ++++-- src/mongo/db/query/query_planner_columnar_test.cpp | 309 ++++++++++++++++++++- src/mongo/db/query/query_planner_test_lib.cpp | 4 +- src/mongo/db/query/sbe_stage_builder.cpp | 2 +- 12 files changed, 454 insertions(+), 70 deletions(-) diff --git a/src/mongo/db/exec/projection_executor_utils.cpp b/src/mongo/db/exec/projection_executor_utils.cpp index b77b330ff05..074d2bce485 100644 --- a/src/mongo/db/exec/projection_executor_utils.cpp +++ b/src/mongo/db/exec/projection_executor_utils.cpp @@ -37,21 +37,6 @@ bool applyProjectionToOneField(projection_executor::ProjectionExecutor* executor md.setNestedField(fp, Value{1.0}); auto output = executor->applyTransformation(md.freeze()); return !output.getNestedField(fp).missing(); - return false; -} - -stdx::unordered_set applyProjectionToFields( - projection_executor::ProjectionExecutor* executor, - const stdx::unordered_set& fields) { - stdx::unordered_set out; - - for (const auto& field : fields) { - if (applyProjectionToOneField(executor, field)) { - out.insert(field); - } - } - - return out; } namespace { diff --git a/src/mongo/db/exec/projection_executor_utils.h b/src/mongo/db/exec/projection_executor_utils.h index 0b5558bc221..a6f3a042200 100644 --- a/src/mongo/db/exec/projection_executor_utils.h +++ b/src/mongo/db/exec/projection_executor_utils.h @@ -45,9 +45,17 @@ bool applyProjectionToOneField(projection_executor::ProjectionExecutor* executor * Applies the projection to each field from the 'fields' set and stores it in the returned set * if the projection would allow that field to remain in a document. **/ +template stdx::unordered_set applyProjectionToFields( - projection_executor::ProjectionExecutor* executor, - const stdx::unordered_set& fields); + projection_executor::ProjectionExecutor* executor, Container const& fields) { + stdx::unordered_set out; + for (const auto& field : fields) { + if (applyProjectionToOneField(executor, field)) { + out.insert(field); + } + } + return out; +} /** * Applies a positional projection on the first array found in the 'path' on a projection diff --git a/src/mongo/db/query/collection_query_info.cpp b/src/mongo/db/query/collection_query_info.cpp index a0b4fe3cd36..55261978b21 100644 --- a/src/mongo/db/query/collection_query_info.cpp +++ b/src/mongo/db/query/collection_query_info.cpp @@ -42,6 +42,7 @@ #include "mongo/db/exec/projection_executor.h" #include "mongo/db/exec/projection_executor_utils.h" #include "mongo/db/fts/fts_spec.h" +#include "mongo/db/index/columns_access_method.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index/wildcard_access_method.h" #include "mongo/db/query/classic_plan_cache.h" @@ -127,10 +128,14 @@ void CollectionQueryInfo::computeIndexKeys(OperationContext* opCtx, const Collec const IndexDescriptor* descriptor = entry->descriptor(); const IndexAccessMethod* iam = entry->accessMethod(); - if (descriptor->getAccessMethodName() == IndexNames::WILDCARD) { + if (bool isWildcard = (descriptor->getAccessMethodName() == IndexNames::WILDCARD); + isWildcard || descriptor->getAccessMethodName() == IndexNames::COLUMN) { // Obtain the projection used by the $** index's key generator. - const auto* pathProj = - static_cast(iam)->getWildcardProjection(); + const auto* pathProj = isWildcard + ? static_cast( + static_cast(iam)->getWildcardProjection()) + : static_cast( + static_cast(iam)->getColumnstoreProjection()); // If the projection is an exclusion, then we must check the new document's keys on all // updates, since we do not exhaustively know the set of paths to be indexed. if (pathProj->exec()->getType() == @@ -145,8 +150,6 @@ void CollectionQueryInfo::computeIndexKeys(OperationContext* opCtx, const Collec _indexedPaths.addPath(path); } } - } else if (descriptor->getAccessMethodName() == IndexNames::COLUMN) { - _indexedPaths.allPathsIndexed(); } else if (descriptor->getAccessMethodName() == IndexNames::TEXT) { fts::FTSSpec ftsSpec(descriptor->infoObj()); diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 9025c6b4cbe..edab86732eb 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -56,6 +56,7 @@ #include "mongo/db/exec/sort_key_generator.h" #include "mongo/db/exec/subplan.h" #include "mongo/db/exec/upsert_stage.h" +#include "mongo/db/index/columns_access_method.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index/wildcard_access_method.h" #include "mongo/db/index_names.h" @@ -256,6 +257,30 @@ IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx, wildcardProjection}; } +ColumnIndexEntry columnIndexEntryFromIndexCatalogEntry(OperationContext* opCtx, + const CollectionPtr& collection, + const IndexCatalogEntry& ice) { + + auto desc = ice.descriptor(); + invariant(desc); + + auto accessMethod = ice.accessMethod(); + invariant(accessMethod); + + auto cam = static_cast(accessMethod); + const auto columnstoreProjection = cam->getColumnstoreProjection(); + + return {desc->keyPattern(), + desc->getIndexType(), + desc->version(), + desc->isSparse(), + desc->unique(), + ColumnIndexEntry::Identifier{desc->indexName()}, + ice.getFilterExpression(), + ice.getCollator(), + columnstoreProjection}; +} + /** * If query supports index filters, filter params.indices according to any index filters that have * been configured. In addition, sets that there were indeed index filters applied. @@ -303,7 +328,8 @@ void fillOutIndexEntries(OperationContext* opCtx, continue; if (indexType == IndexType::INDEX_COLUMN) { - columnEntries.emplace_back(ice->descriptor()->indexName()); + columnEntries.emplace_back( + columnIndexEntryFromIndexCatalogEntry(opCtx, collection, *ice)); } else { entries.emplace_back( indexEntryFromIndexCatalogEntry(opCtx, collection, *ice, canonicalQuery)); diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h index e913108679c..75ab5475f2f 100644 --- a/src/mongo/db/query/get_executor.h +++ b/src/mongo/db/query/get_executor.h @@ -127,6 +127,15 @@ IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx, const IndexCatalogEntry& ice, const CanonicalQuery* canonicalQuery = nullptr); +/** + * Converts the catalog metadata for an index into an ColumnIndexEntry, which is a format that is + * meant to be consumed by the query planner. This function can perform index reads and should not + * be called unless access to the storage engine is permitted. + */ +ColumnIndexEntry columnIndexEntryFromIndexCatalogEntry(OperationContext* opCtx, + const CollectionPtr& collection, + const IndexCatalogEntry& ice); + /** * Determines whether or not to wait for oplog visibility for a query. This is only used for * collection scans on the oplog. diff --git a/src/mongo/db/query/index_entry.h b/src/mongo/db/query/index_entry.h index 40313fdfed6..6d73844f426 100644 --- a/src/mongo/db/query/index_entry.h +++ b/src/mongo/db/query/index_entry.h @@ -61,17 +61,17 @@ struct CoreIndexInfo { Identifier ident, const MatchExpression* fe = nullptr, const CollatorInterface* ci = nullptr, - const WildcardProjection* wildcardProj = nullptr) + const IndexPathProjection* indexPathProj = nullptr) : identifier(std::move(ident)), keyPattern(kp), filterExpr(fe), type(type), sparse(sp), collator(ci), - wildcardProjection(wildcardProj) { - // We always expect a projection executor for $** indexes, and none otherwise. - // TODO SERVER-67140: Add columnstoreProjection and invariant - invariant((type == IndexType::INDEX_WILDCARD) == (wildcardProjection != nullptr)); + indexPathProjection(indexPathProj) { + // If a projection executor exists, we always expect a $** index + if (indexPathProjection != nullptr) + invariant(type == IndexType::INDEX_WILDCARD || type == IndexType::INDEX_COLUMN); } virtual ~CoreIndexInfo() = default; @@ -138,8 +138,9 @@ struct CoreIndexInfo { const CollatorInterface* collator = nullptr; // For $** indexes, a pointer to the projection executor owned by the index access method. Null - // unless this IndexEntry represents a wildcard index, in which case this is always non-null. - const WildcardProjection* wildcardProjection = nullptr; + // unless this IndexEntry represents a wildcard or column storeindex, in which case this is + // always non-null. + const IndexPathProjection* indexPathProjection = nullptr; }; /** @@ -259,12 +260,34 @@ struct IndexEntry : CoreIndexInfo { /** * Represents a columnar index. */ -struct ColumnIndexEntry { - ColumnIndexEntry(std::string catalogName) : catalogName(std::move(catalogName)) {} +struct ColumnIndexEntry : CoreIndexInfo { + ColumnIndexEntry(const BSONObj& keyPattern, + IndexType type, + IndexDescriptor::IndexVersion version, + bool sparse, + bool unique, + Identifier ident, + const MatchExpression* filterExpression, + const CollatorInterface* ci, + const IndexPathProjection* columnstoreProjection) + : CoreIndexInfo(keyPattern, + type, + sparse, + std::move(ident), + filterExpression, + ci, + columnstoreProjection), + version(version), + unique(unique) {} - std::string catalogName; + ColumnIndexEntry(const ColumnIndexEntry&) = default; + ColumnIndexEntry(ColumnIndexEntry&&) = default; + ColumnIndexEntry& operator=(const ColumnIndexEntry&) = default; + ColumnIndexEntry& operator=(ColumnIndexEntry&&) = default; + ~ColumnIndexEntry() = default; - // TODO SERVER-67140: Projection, probably need some kind of disambiguator. + IndexDescriptor::IndexVersion version; + bool unique; }; std::ostream& operator<<(std::ostream& stream, const IndexEntry::Identifier& ident); diff --git a/src/mongo/db/query/plan_cache_indexability.cpp b/src/mongo/db/query/plan_cache_indexability.cpp index 4f1b0c176f8..90b3e68239e 100644 --- a/src/mongo/db/query/plan_cache_indexability.cpp +++ b/src/mongo/db/query/plan_cache_indexability.cpp @@ -118,7 +118,7 @@ void PlanCacheIndexabilityState::processWildcardIndex(const CoreIndexInfo& cii) invariant(cii.type == IndexType::INDEX_WILDCARD); _wildcardIndexDiscriminators.emplace_back( - cii.wildcardProjection->exec(), cii.identifier.catalogName, cii.collator); + cii.indexPathProjection->exec(), cii.identifier.catalogName, cii.collator); } void PlanCacheIndexabilityState::processIndexCollation(const std::string& indexName, diff --git a/src/mongo/db/query/planner_wildcard_helpers.cpp b/src/mongo/db/query/planner_wildcard_helpers.cpp index ab2731461e8..bdc229352ee 100644 --- a/src/mongo/db/query/planner_wildcard_helpers.cpp +++ b/src/mongo/db/query/planner_wildcard_helpers.cpp @@ -361,7 +361,7 @@ void expandWildcardIndexEntry(const IndexEntry& wildcardIndex, invariant(wildcardIndex.multikeyPaths.empty()); // Obtain the projection executor from the parent wildcard IndexEntry. - auto* wildcardProjection = wildcardIndex.wildcardProjection; + auto* wildcardProjection = wildcardIndex.indexPathProjection; invariant(wildcardProjection); const auto projectedFields = @@ -411,7 +411,7 @@ void expandWildcardIndexEntry(const IndexEntry& wildcardIndex, wildcardIndex.filterExpr, wildcardIndex.infoObj, wildcardIndex.collator, - wildcardIndex.wildcardProjection); + wildcardIndex.indexPathProjection); invariant("$_path"_sd != fieldName); out->push_back(std::move(entry)); diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index b252117ab75..4420e3f2725 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -40,6 +40,7 @@ #include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/catalog/clustered_collection_util.h" #include "mongo/db/exec/bucket_unpacker.h" +#include "mongo/db/exec/projection_executor_utils.h" #include "mongo/db/index/wildcard_key_generator.h" #include "mongo/db/index_names.h" #include "mongo/db/matcher/expression_algo.h" @@ -158,7 +159,6 @@ bool hintMatchesNameOrPattern(const BSONObj& hintObj, firstHintElt.type() == BSONType::String) { // An index name is provided by the hint. return indexName == firstHintElt.valueStringData(); - ; } // An index spec is provided by the hint. @@ -191,10 +191,8 @@ bool hintMatchesClusterKey(const boost::optional& clust */ bool hintMatchesColumnStoreIndex(const BSONObj& hintObj, const ColumnIndexEntry& columnStoreIndex) { // TODO SERVER-68400: Should be possible to have some other keypattern. - return hintMatchesNameOrPattern(hintObj, - columnStoreIndex.catalogName, - BSON("$**" - << "columnstore")); + return hintMatchesNameOrPattern( + hintObj, columnStoreIndex.identifier.catalogName, columnStoreIndex.keyPattern); } /** @@ -258,7 +256,6 @@ std::unique_ptr makeColumnScanPlan( std::unique_ptr residualPredicate) { dassert(columnScanIsPossible(query, params)); - // TODO SERVER-67140: Check if the columnar index actually provides the fields we need. return QueryPlannerAnalysis::analyzeDataAccess( query, params, @@ -288,10 +285,35 @@ Status checkColumnScanFieldLimits( } return Status::OK(); } + +bool checkProjectionCoversQuery(OrderedPathSet& fields, const ColumnIndexEntry& columnStoreIndex) { + const auto projectedFields = projection_executor_utils::applyProjectionToFields( + columnStoreIndex.indexPathProjection->exec(), fields); + // If the number of fields is equal to the number of fields preserved, then the projection + // covers the query. + return projectedFields.size() == fields.size(); +} + /** - * Attempts to build a plan using a columnstore index. Returns a non-OK status if it can't build - * one - * - with the code and message indicating the problem - or a QuerySolution if it can. + * A helper function that returns the number of column store indexes that cover the query, + * as well as an arbitary, valid column store index for the column scan. + */ +std::pair getValidColumnIndex( + OrderedPathSet& fields, const std::vector& columnStoreIndexes) { + const ColumnIndexEntry* chosenIndex; + int numValid = 0; + for (const auto& columnStoreIndex : columnStoreIndexes) { + if (checkProjectionCoversQuery(fields, columnStoreIndex)) { + chosenIndex = numValid == 0 ? &columnStoreIndex : chosenIndex; + ++numValid; + } + } + return {numValid, chosenIndex}; +} + +/** + * Attempts to build a plan using a column store index. Returns a non-OK status if it can't build + * one with the code and message indicating the problem - or a QuerySolution if it can. */ StatusWith> tryToBuildColumnScan( const QueryPlannerParams& params, @@ -302,16 +324,6 @@ StatusWith> tryToBuildColumnScan( } invariant(params.columnStoreIndexes.size() >= 1); - const auto& columnStoreIndex = hintedIndex.value_or(params.columnStoreIndexes.front()); - if (!hintedIndex && params.columnStoreIndexes.size() > 1) { - // TODO SERVER-67140 only warnn if there is more than one index that is actually eligible - // for use. - LOGV2_DEBUG(6298500, - 2, - "Multiple column store indexes present. Selecting the first " - "one arbitrarily", - "indexName"_attr = columnStoreIndex.catalogName); - } auto [filterDeps, outputDeps] = computeDeps(params, query); auto allFieldsReferenced = set_util::setUnion(filterDeps.fields, outputDeps.fields); @@ -319,7 +331,7 @@ StatusWith> tryToBuildColumnScan( // TODO SERVER-66284 Would like to enable a plan when hinted, even if we need the whole // document. Something like COLUMN_SCAN -> FETCH. return {ErrorCodes::Error{6298501}, - "cannot use columnstore index because the query requires seeing the entire " + "cannot use column store index because the query requires seeing the entire " "document"}; } else if (!hintedIndex && expression::containsOverlappingPaths(allFieldsReferenced)) { // The query needs a path and a parent or ancestor path. For example, the query needs to @@ -333,7 +345,34 @@ StatusWith> tryToBuildColumnScan( << set_util::setToString(allFieldsReferenced)}; } - // TODO SERVER-67140: Check if the columnar index actually provides the fields we need. + // Ensures that hinted index is eligible for the column scan. + if (hintedIndex && !checkProjectionCoversQuery(allFieldsReferenced, *hintedIndex)) { + return {ErrorCodes::Error{6714002}, + "the hinted column store index cannot be used because it does not cover the query"}; + } + + // Check that union of the dependency fields can be successfully projected by at least one + // column store index. + auto [numValid, selectedColumnStoreIndex] = + getValidColumnIndex(allFieldsReferenced, params.columnStoreIndexes); + + // If not columnar index can support the projection, we will not use column scan. + if (numValid == 0) { + return {ErrorCodes::Error{6714001}, + "cannot use column store index because there exists no column store index for this " + "collection that covers the query"}; + } + invariant(selectedColumnStoreIndex); + + if (!hintedIndex && numValid > 1) { + LOGV2_DEBUG(6298500, + 2, + "Multiple column store indexes present. Selecting the first " + "one arbitrarily", + "indexName"_attr = selectedColumnStoreIndex->identifier.catalogName); + } + + const auto& columnStoreIndex = hintedIndex.value_or(*selectedColumnStoreIndex); std::unique_ptr residualPredicate; StringMap> filterSplitByColumn; std::tie(filterSplitByColumn, residualPredicate) = diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp index 490164ee87b..01305e374bc 100644 --- a/src/mongo/db/query/query_planner_columnar_test.cpp +++ b/src/mongo/db/query/query_planner_columnar_test.cpp @@ -29,6 +29,7 @@ #include "mongo/platform/basic.h" +#include "mongo/db/index/column_key_generator.h" #include "mongo/db/pipeline/document_source.h" #include "mongo/db/pipeline/inner_pipeline_stage_impl.h" #include "mongo/db/pipeline/inner_pipeline_stage_interface.h" @@ -43,6 +44,8 @@ namespace mongo { const std::string kIndexName = "indexName"; +const BSONObj kKeyPattern = BSON("$**" + << "columnstore"); /** * A specialization of the QueryPlannerTest fixture which makes it easy to present the planner with @@ -68,10 +71,24 @@ protected: kInternalQueryMaxNumberOfFieldsToChooseFilteredColumnScanDefault); } - void addColumnStoreIndexAndEnableFilterSplitting(StringData indexName = kIndexName) { - params.columnStoreIndexes.emplace_back(indexName.toString()); - - params.options |= QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS; + void addColumnStoreIndexAndEnableFilterSplitting(bool genPerColFilter = true, + StringData indexName = kIndexName, + const IndexPathProjection* proj = nullptr, + BSONObj keyPattern = kKeyPattern, + MatchExpression* partialFilterExpr = nullptr, + CollatorInterface* collator = nullptr) { + params.columnStoreIndexes.emplace_back(keyPattern, + IndexType::INDEX_COLUMN, + IndexDescriptor::kLatestIndexVersion, + false /* sparse */, + false /* unique */, + IndexEntry::Identifier{indexName.toString()}, + partialFilterExpr, + collator, + proj ? proj : &_defaultPathProj); + if (genPerColFilter) { + params.options |= QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS; + } } std::vector> makeInnerPipelineStages( @@ -83,10 +100,18 @@ protected: return stages; } + IndexPathProjection makeProjection(BSONObj columnstoreProjection, + BSONObj keyPattern = kKeyPattern) { + return column_keygen::ColumnKeyGenerator::createProjectionExecutor(keyPattern, + columnstoreProjection); + } + private: // SBE must be enabled in order to test columnar indexes. RAIIServerParameterControllerForTest _controllerSBE{"internalQueryFrameworkControl", "trySbeEngine"}; + IndexPathProjection _defaultPathProj = + column_keygen::ColumnKeyGenerator::createProjectionExecutor(kKeyPattern, BSONObj()); }; TEST_F(QueryPlannerColumnarTest, InclusionProjectionUsesColumnStoreIndex) { @@ -718,7 +743,7 @@ TEST_F(QueryPlannerColumnarTest, MatchGroupTest) { node: { column_scan: { filtersByPath: {name: {name: {$eq: 'bob'}}}, - outputFields: ['foo', 'x'], + outputFields: ['foo', 'x'], matchFields: ['name'] } } @@ -758,7 +783,7 @@ TEST_F(QueryPlannerColumnarTest, MatchGroupWithOverlappingFieldsTest) { node: { column_scan: { filtersByPath: {name: {name: {$eq: 'bob'}}}, - outputFields: ['foo', 'x', 'name'], + outputFields: ['foo', 'x', 'name'], matchFields: ['name'] } } @@ -831,7 +856,7 @@ TEST_F(QueryPlannerColumnarTest, DottedFieldsWithGroupStageDoesNotRequireProject node: { column_scan: { filtersByPath: {name: {name: {$eq: 'bob'}}}, - outputFields: ['foo.bar', 'x.y', 'name'], + outputFields: ['foo.bar', 'x.y', 'name'], matchFields: ['name'] } } @@ -899,8 +924,8 @@ TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsCountTowardsFieldLimit) { } TEST_F(QueryPlannerColumnarTest, SelectsFirstFromMultipleEligibleColumnStoreIndexes) { - addColumnStoreIndexAndEnableFilterSplitting("first index"_sd); - params.columnStoreIndexes.emplace_back("second index"); + addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd); + addColumnStoreIndexAndEnableFilterSplitting(false, "second index"_sd); runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "_id" << 0)); assertSolutionExists(R"({ @@ -911,4 +936,270 @@ TEST_F(QueryPlannerColumnarTest, SelectsFirstFromMultipleEligibleColumnStoreInde } })"); } + +TEST_F(QueryPlannerColumnarTest, FullPredicateOption) { + addColumnStoreIndexAndEnableFilterSplitting(false, kIndexName); + + // Filter that could be pushed down, but isn't due to the lack of the + // GENERATE_PER_COLUMN_FILTER flag. + auto predicate = fromjson(R"({ + specialAddress: {$exists: true}, + doNotContact: {$exists: true} + })"); + runQuerySortProj(predicate, BSONObj(), BSON("a" << 1 << "_id" << 0)); + assertSolutionExists(R"({ + proj: { + spec: {a: 1, _id: 0}, + node: { + column_scan: { + outputFields: ['a'], + matchFields: ['specialAddress', 'doNotContact'], + postAssemblyFilter: { + specialAddress: {$exists: true}, + doNotContact: {$exists: true} + } + } + } + } + })"); +} + +TEST_F(QueryPlannerColumnarTest, UseColumnStoreWithExactFields) { + auto firstProj = makeProjection(fromjson(R"({"d": true, "b.c": true, "_id": false})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd, &firstProj); + + auto secondProj = makeProjection(fromjson(R"({"a": true, "b.c": true, "_id": false})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "second index"_sd, &secondProj); + + // Should use the second index, despite the third index being valid, because the second index + // was seen first. + auto thirdProj = makeProjection(fromjson(R"({"a": true, "b.c": true, "_id": false})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "third index"_sd, &thirdProj); + + runQuerySortProj( + BSON("a" << BSON("$gt" << 3)), BSONObj(), BSON("a" << 1 << "b.c" << 1 << "_id" << 0)); + + assertNumSolutions(1U); + assertSolutionExists(R"({ + proj: { + spec: {a: 1, 'b.c': 1, _id: 0}, + node: { + column_scan: { + indexName: 'second index', + filtersByPath: {a: {a: {$gt: 3}}}, + outputFields: ['a', 'b.c'], + matchFields: ['a'] + } + } + } + })"); +} + +TEST_F(QueryPlannerColumnarTest, UseColumnStoreWithExtraFields) { + auto firstProj = makeProjection(fromjson( + R"({"a": true, "unsubscribed": true, "test field": true, "another test field": true, "_id": false})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "first index", &firstProj); + + auto secondProj = makeProjection(fromjson(R"({ + "a": true, + "addresses.zip": true, + "unsubscribed": true, + "specialAddress": true, + "doNotContact": true, + "test field": true, + "another test field": true, + "_id": false + })")); + addColumnStoreIndexAndEnableFilterSplitting(true, "second index"_sd, &secondProj); + + // Same predicate as above, except with exists: false, which disqualifies the whole thing. + auto complexPredicate = fromjson(R"({ + a: {$gte: 0}, + "addresses.zip": "12345", + unsubscribed: false, + specialAddress: {$exists: false}, + doNotContact: {$exists: false} + })"); + runQuerySortProj(complexPredicate, BSONObj(), BSON("a" << 1 << "_id" << 0)); + assertSolutionExists(R"({ + proj: { + spec: {a: 1, _id: 0}, + node: { + column_scan: { + index_name: 'second index', + filtersByPath: { + a: {a: {$gte: 0}}, + 'addresses.zip': {'addresses.zip': {$eq: '12345'}}, + unsubscribed: {unsubscribed: false} + }, + outputFields: ['a'], + postAssemblyFilter: { + specialAddress: {$exists: false}, + doNotContact: {$exists: false} + }, + matchFields: + ['a', 'addresses.zip', 'unsubscribed', 'specialAddress', 'doNotContact'] + } + } + } + })"); +} + +TEST_F(QueryPlannerColumnarTest, UseColumnStoreWithSinglePath) { + addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd); + addColumnStoreIndexAndEnableFilterSplitting(true, "second index"_sd); + + + runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "_id" << 0)); + assertNumSolutions(1U); + assertSolutionExists(R"({ + column_scan: { + indexName: 'first index', + filtersByPath: {}, + outputFields: ['a'], + matchFields: [] + } + })"); +} + +TEST_F(QueryPlannerColumnarTest, UseColumnStoreWithAncestorField) { + auto firstProj = makeProjection(fromjson(R"({"foo": true, "x": true, "name": true})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd, &firstProj); + + auto secondProj = makeProjection(BSONObj(), + BSON("foo.$**" + << "columnstore")); + addColumnStoreIndexAndEnableFilterSplitting(true, + "second index"_sd, + &secondProj, + BSON("foo.$**" + << "columnstore")); + + auto pipeline = Pipeline::parse( + {fromjson("{$group: {_id: '$foo.bar', s: {$sum: '$x.y'}, name: {$first: '$name'}}}")}, + expCtx); + + runQueryWithPipeline(BSON("name" + << "bob"), + BSON("foo.bar" << 1 << "x.y" << 1 << "name" << 1 << "_id" << 0), + makeInnerPipelineStages(*pipeline)); + + assertNumSolutions(1U); + assertSolutionExists(R"({ + proj: { + spec: {'foo.bar': 1, 'x.y': 1, name: 1, _id: 0}, + node: { + column_scan: { + indexName: 'first index', + filtersByPath: {name: {name: {$eq: 'bob'}}}, + outputFields: ['foo.bar', 'x.y', 'name'], + matchFields: ['name'] + } + } + } + })"); + + ASSERT(!cq->pipeline().empty()); + auto solution = + QueryPlanner::extendWithAggPipeline(*cq, std::move(solns[0]), {} /* secondaryCollInfos + */); + ASSERT_OK(QueryPlannerTestLib::solutionMatches(R"({ + group: { + key: {_id: '$foo.bar'}, + accs: [{s: {$sum: '$x.y'}}, {name: {$first: '$name'}}], + node: { + column_scan: { + indexName: 'first index', + filtersByPath: {name: {name: {$eq: 'bob'}}}, + outputFields: ['foo.bar', 'x.y', 'name'], + matchFields: ['name'] + } + } + } + })", + solution->root())) + << solution->root()->toString(); +} + +TEST_F(QueryPlannerColumnarTest, DontUseColumnStoreWithSinglePath) { + auto firstProj = makeProjection(BSONObj(), + BSON("a.$**" + << "columnstore")); + addColumnStoreIndexAndEnableFilterSplitting(true, + "first index"_sd, + &firstProj, + BSON("a.$**" + << "columnstore")); + internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); + runQuerySortProj(BSONObj(), BSONObj(), BSON("b" << 1)); + assertNumSolutions(1U); + assertSolutionExists(R"({proj: {spec: {b: 1}, node: {cscan: {dir: 1}}}})"); +} + +TEST_F(QueryPlannerColumnarTest, DontUseColumnStoreMissingField) { + auto firstProj = makeProjection(fromjson(R"({"a": false})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd, &firstProj); + + auto secondProj = makeProjection(fromjson(R"({"b": true})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "second index"_sd, &secondProj); + + runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "_id" << 0)); + assertNumSolutions(1U); + assertSolutionExists(R"({proj: {spec: {a: 1, _id: 0}, node: {cscan: {dir: 1}}}})"); +} + +TEST_F(QueryPlannerColumnarTest, DontUseColumnStoreMissingMultipleField) { + auto firstProj = makeProjection(fromjson(R"({"a": true, "c": true, "d": true})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd, &firstProj); + + auto secondProj = makeProjection(fromjson(R"({"b": true, "c": true, "d": true})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "second index"_sd, &secondProj); + + runQuerySortProj(BSON("a" << 1), BSONObj(), BSON("a" << true << "b" << true)); + assertNumSolutions(1U); + assertSolutionExists( + R"({proj: {spec: {a: 1, b: 1}, node: {cscan: {dir: 1, filter: {a: {$eq: 1}}}}}})"); +} + +TEST_F(QueryPlannerColumnarTest, DontUseColumnStoreSpecifiedSubField) { + auto firstProj = makeProjection(fromjson(R"({"a.b": true, "b.c": true, "c": true})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "first_index"_sd, &firstProj); + + auto secondProj = makeProjection(fromjson(R"({"a": true, "b.c": true, "d": true})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "second_index"_sd, &secondProj); + + internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); + runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "c" << 1)); + assertNumSolutions(1U); + assertSolutionExists(R"({proj: {spec: {a: 1, c: 1}, node: {cscan: {dir: 1}}}})"); +} + +TEST_F(QueryPlannerColumnarTest, HintIndexDoesNotCoverQuery) { + // Column Store Index does not cover query. + auto firstProj = makeProjection(BSONObj(), + BSON("b.$**" + << "columnstore")); + addColumnStoreIndexAndEnableFilterSplitting(true, "first index", &firstProj); + ASSERT_THROWS(runQuerySortProjSkipLimitHint(BSONObj(), + BSONObj(), + BSON("a" << 1 << "_id" << 0), + 0, + 0, + BSON("$hint" + << "first index")), + unittest::TestAssertionFailureException); +} + +TEST_F(QueryPlannerColumnarTest, NoColumnIndexCoversQuery) { + auto firstProj = makeProjection(fromjson(R"({b: 1, d: 1})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "first index", &firstProj); + auto secondProj = makeProjection(fromjson(R"({c: 1, d: 1})")); + addColumnStoreIndexAndEnableFilterSplitting(true, "second index", &secondProj); + + // Valid for column scan, but no column store indices that cover the query. + runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1)); + assertNumSolutions(1U); + assertSolutionExists(R"({proj: {spec: {a: 1}, node: {cscan: {dir: 1}}}})"); +} + } // namespace mongo diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index 1984edba47b..fec34133cd9 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -1312,8 +1312,8 @@ Status QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln, auto obj = expectedElem.Obj(); if (auto indexName = obj["indexName"]) { - if (auto nameStatus = - indexNamesMatch(indexName, actualColumnIxScanNode->indexEntry.catalogName); + if (auto nameStatus = indexNamesMatch( + indexName, actualColumnIxScanNode->indexEntry.identifier.catalogName); !nameStatus.isOK()) { return nameStatus; } diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index a9689b3284f..71d0d82f3d7 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -888,7 +888,7 @@ std::pair, PlanStageSlots> SlotBasedStageBuilder std::unique_ptr stage = std::make_unique(getCurrentCollection(reqs)->uuid(), - csn->indexEntry.catalogName, + csn->indexEntry.identifier.catalogName, std::move(paths), std::move(includeInOutput), ridSlot, -- cgit v1.2.1