summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Zhang <justin.zhang@mongodb.com>2022-08-19 19:33:12 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-19 20:53:08 +0000
commit0ae6bba3c1483a309640e60ba5fb80f45b16ea67 (patch)
treef81d9f62f11a86a7154dc6c740bf01b7e8959e39
parentbcebd12f2e6492eef7f609d8b234f81e92c2f683 (diff)
downloadmongo-0ae6bba3c1483a309640e60ba5fb80f45b16ea67.tar.gz
SERVER-67140 Change query planner to determine column index eligibility based on field projection
-rw-r--r--src/mongo/db/exec/projection_executor_utils.cpp15
-rw-r--r--src/mongo/db/exec/projection_executor_utils.h12
-rw-r--r--src/mongo/db/query/collection_query_info.cpp13
-rw-r--r--src/mongo/db/query/get_executor.cpp28
-rw-r--r--src/mongo/db/query/get_executor.h9
-rw-r--r--src/mongo/db/query/index_entry.h45
-rw-r--r--src/mongo/db/query/plan_cache_indexability.cpp2
-rw-r--r--src/mongo/db/query/planner_wildcard_helpers.cpp4
-rw-r--r--src/mongo/db/query/query_planner.cpp81
-rw-r--r--src/mongo/db/query/query_planner_columnar_test.cpp309
-rw-r--r--src/mongo/db/query/query_planner_test_lib.cpp4
-rw-r--r--src/mongo/db/query/sbe_stage_builder.cpp2
12 files changed, 454 insertions, 70 deletions
diff --git a/src/mongo/db/exec/projection_executor_utils.cpp b/src/mongo/db/exec/projection_executor_utils.cpp
index b77b330ff05..074d2bce485 100644
--- a/src/mongo/db/exec/projection_executor_utils.cpp
+++ b/src/mongo/db/exec/projection_executor_utils.cpp
@@ -37,21 +37,6 @@ bool applyProjectionToOneField(projection_executor::ProjectionExecutor* executor
md.setNestedField(fp, Value{1.0});
auto output = executor->applyTransformation(md.freeze());
return !output.getNestedField(fp).missing();
- return false;
-}
-
-stdx::unordered_set<std::string> applyProjectionToFields(
- projection_executor::ProjectionExecutor* executor,
- const stdx::unordered_set<std::string>& fields) {
- stdx::unordered_set<std::string> out;
-
- for (const auto& field : fields) {
- if (applyProjectionToOneField(executor, field)) {
- out.insert(field);
- }
- }
-
- return out;
}
namespace {
diff --git a/src/mongo/db/exec/projection_executor_utils.h b/src/mongo/db/exec/projection_executor_utils.h
index 0b5558bc221..a6f3a042200 100644
--- a/src/mongo/db/exec/projection_executor_utils.h
+++ b/src/mongo/db/exec/projection_executor_utils.h
@@ -45,9 +45,17 @@ bool applyProjectionToOneField(projection_executor::ProjectionExecutor* executor
* Applies the projection to each field from the 'fields' set and stores it in the returned set
* if the projection would allow that field to remain in a document.
**/
+template <typename Container>
stdx::unordered_set<std::string> applyProjectionToFields(
- projection_executor::ProjectionExecutor* executor,
- const stdx::unordered_set<std::string>& fields);
+ projection_executor::ProjectionExecutor* executor, Container const& fields) {
+ stdx::unordered_set<std::string> out;
+ for (const auto& field : fields) {
+ if (applyProjectionToOneField(executor, field)) {
+ out.insert(field);
+ }
+ }
+ return out;
+}
/**
* Applies a positional projection on the first array found in the 'path' on a projection
diff --git a/src/mongo/db/query/collection_query_info.cpp b/src/mongo/db/query/collection_query_info.cpp
index a0b4fe3cd36..55261978b21 100644
--- a/src/mongo/db/query/collection_query_info.cpp
+++ b/src/mongo/db/query/collection_query_info.cpp
@@ -42,6 +42,7 @@
#include "mongo/db/exec/projection_executor.h"
#include "mongo/db/exec/projection_executor_utils.h"
#include "mongo/db/fts/fts_spec.h"
+#include "mongo/db/index/columns_access_method.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/index/wildcard_access_method.h"
#include "mongo/db/query/classic_plan_cache.h"
@@ -127,10 +128,14 @@ void CollectionQueryInfo::computeIndexKeys(OperationContext* opCtx, const Collec
const IndexDescriptor* descriptor = entry->descriptor();
const IndexAccessMethod* iam = entry->accessMethod();
- if (descriptor->getAccessMethodName() == IndexNames::WILDCARD) {
+ if (bool isWildcard = (descriptor->getAccessMethodName() == IndexNames::WILDCARD);
+ isWildcard || descriptor->getAccessMethodName() == IndexNames::COLUMN) {
// Obtain the projection used by the $** index's key generator.
- const auto* pathProj =
- static_cast<const WildcardAccessMethod*>(iam)->getWildcardProjection();
+ const auto* pathProj = isWildcard
+ ? static_cast<const IndexPathProjection*>(
+ static_cast<const WildcardAccessMethod*>(iam)->getWildcardProjection())
+ : static_cast<const IndexPathProjection*>(
+ static_cast<const ColumnStoreAccessMethod*>(iam)->getColumnstoreProjection());
// If the projection is an exclusion, then we must check the new document's keys on all
// updates, since we do not exhaustively know the set of paths to be indexed.
if (pathProj->exec()->getType() ==
@@ -145,8 +150,6 @@ void CollectionQueryInfo::computeIndexKeys(OperationContext* opCtx, const Collec
_indexedPaths.addPath(path);
}
}
- } else if (descriptor->getAccessMethodName() == IndexNames::COLUMN) {
- _indexedPaths.allPathsIndexed();
} else if (descriptor->getAccessMethodName() == IndexNames::TEXT) {
fts::FTSSpec ftsSpec(descriptor->infoObj());
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 9025c6b4cbe..edab86732eb 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -56,6 +56,7 @@
#include "mongo/db/exec/sort_key_generator.h"
#include "mongo/db/exec/subplan.h"
#include "mongo/db/exec/upsert_stage.h"
+#include "mongo/db/index/columns_access_method.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/index/wildcard_access_method.h"
#include "mongo/db/index_names.h"
@@ -256,6 +257,30 @@ IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx,
wildcardProjection};
}
+ColumnIndexEntry columnIndexEntryFromIndexCatalogEntry(OperationContext* opCtx,
+ const CollectionPtr& collection,
+ const IndexCatalogEntry& ice) {
+
+ auto desc = ice.descriptor();
+ invariant(desc);
+
+ auto accessMethod = ice.accessMethod();
+ invariant(accessMethod);
+
+ auto cam = static_cast<const ColumnStoreAccessMethod*>(accessMethod);
+ const auto columnstoreProjection = cam->getColumnstoreProjection();
+
+ return {desc->keyPattern(),
+ desc->getIndexType(),
+ desc->version(),
+ desc->isSparse(),
+ desc->unique(),
+ ColumnIndexEntry::Identifier{desc->indexName()},
+ ice.getFilterExpression(),
+ ice.getCollator(),
+ columnstoreProjection};
+}
+
/**
* If query supports index filters, filter params.indices according to any index filters that have
* been configured. In addition, sets that there were indeed index filters applied.
@@ -303,7 +328,8 @@ void fillOutIndexEntries(OperationContext* opCtx,
continue;
if (indexType == IndexType::INDEX_COLUMN) {
- columnEntries.emplace_back(ice->descriptor()->indexName());
+ columnEntries.emplace_back(
+ columnIndexEntryFromIndexCatalogEntry(opCtx, collection, *ice));
} else {
entries.emplace_back(
indexEntryFromIndexCatalogEntry(opCtx, collection, *ice, canonicalQuery));
diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h
index e913108679c..75ab5475f2f 100644
--- a/src/mongo/db/query/get_executor.h
+++ b/src/mongo/db/query/get_executor.h
@@ -128,6 +128,15 @@ IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx,
const CanonicalQuery* canonicalQuery = nullptr);
/**
+ * Converts the catalog metadata for an index into an ColumnIndexEntry, which is a format that is
+ * meant to be consumed by the query planner. This function can perform index reads and should not
+ * be called unless access to the storage engine is permitted.
+ */
+ColumnIndexEntry columnIndexEntryFromIndexCatalogEntry(OperationContext* opCtx,
+ const CollectionPtr& collection,
+ const IndexCatalogEntry& ice);
+
+/**
* Determines whether or not to wait for oplog visibility for a query. This is only used for
* collection scans on the oplog.
*/
diff --git a/src/mongo/db/query/index_entry.h b/src/mongo/db/query/index_entry.h
index 40313fdfed6..6d73844f426 100644
--- a/src/mongo/db/query/index_entry.h
+++ b/src/mongo/db/query/index_entry.h
@@ -61,17 +61,17 @@ struct CoreIndexInfo {
Identifier ident,
const MatchExpression* fe = nullptr,
const CollatorInterface* ci = nullptr,
- const WildcardProjection* wildcardProj = nullptr)
+ const IndexPathProjection* indexPathProj = nullptr)
: identifier(std::move(ident)),
keyPattern(kp),
filterExpr(fe),
type(type),
sparse(sp),
collator(ci),
- wildcardProjection(wildcardProj) {
- // We always expect a projection executor for $** indexes, and none otherwise.
- // TODO SERVER-67140: Add columnstoreProjection and invariant
- invariant((type == IndexType::INDEX_WILDCARD) == (wildcardProjection != nullptr));
+ indexPathProjection(indexPathProj) {
+ // If a projection executor exists, we always expect a $** index
+ if (indexPathProjection != nullptr)
+ invariant(type == IndexType::INDEX_WILDCARD || type == IndexType::INDEX_COLUMN);
}
virtual ~CoreIndexInfo() = default;
@@ -138,8 +138,9 @@ struct CoreIndexInfo {
const CollatorInterface* collator = nullptr;
// For $** indexes, a pointer to the projection executor owned by the index access method. Null
- // unless this IndexEntry represents a wildcard index, in which case this is always non-null.
- const WildcardProjection* wildcardProjection = nullptr;
+ // unless this IndexEntry represents a wildcard or column storeindex, in which case this is
+ // always non-null.
+ const IndexPathProjection* indexPathProjection = nullptr;
};
/**
@@ -259,12 +260,34 @@ struct IndexEntry : CoreIndexInfo {
/**
* Represents a columnar index.
*/
-struct ColumnIndexEntry {
- ColumnIndexEntry(std::string catalogName) : catalogName(std::move(catalogName)) {}
+struct ColumnIndexEntry : CoreIndexInfo {
+ ColumnIndexEntry(const BSONObj& keyPattern,
+ IndexType type,
+ IndexDescriptor::IndexVersion version,
+ bool sparse,
+ bool unique,
+ Identifier ident,
+ const MatchExpression* filterExpression,
+ const CollatorInterface* ci,
+ const IndexPathProjection* columnstoreProjection)
+ : CoreIndexInfo(keyPattern,
+ type,
+ sparse,
+ std::move(ident),
+ filterExpression,
+ ci,
+ columnstoreProjection),
+ version(version),
+ unique(unique) {}
- std::string catalogName;
+ ColumnIndexEntry(const ColumnIndexEntry&) = default;
+ ColumnIndexEntry(ColumnIndexEntry&&) = default;
+ ColumnIndexEntry& operator=(const ColumnIndexEntry&) = default;
+ ColumnIndexEntry& operator=(ColumnIndexEntry&&) = default;
+ ~ColumnIndexEntry() = default;
- // TODO SERVER-67140: Projection, probably need some kind of disambiguator.
+ IndexDescriptor::IndexVersion version;
+ bool unique;
};
std::ostream& operator<<(std::ostream& stream, const IndexEntry::Identifier& ident);
diff --git a/src/mongo/db/query/plan_cache_indexability.cpp b/src/mongo/db/query/plan_cache_indexability.cpp
index 4f1b0c176f8..90b3e68239e 100644
--- a/src/mongo/db/query/plan_cache_indexability.cpp
+++ b/src/mongo/db/query/plan_cache_indexability.cpp
@@ -118,7 +118,7 @@ void PlanCacheIndexabilityState::processWildcardIndex(const CoreIndexInfo& cii)
invariant(cii.type == IndexType::INDEX_WILDCARD);
_wildcardIndexDiscriminators.emplace_back(
- cii.wildcardProjection->exec(), cii.identifier.catalogName, cii.collator);
+ cii.indexPathProjection->exec(), cii.identifier.catalogName, cii.collator);
}
void PlanCacheIndexabilityState::processIndexCollation(const std::string& indexName,
diff --git a/src/mongo/db/query/planner_wildcard_helpers.cpp b/src/mongo/db/query/planner_wildcard_helpers.cpp
index ab2731461e8..bdc229352ee 100644
--- a/src/mongo/db/query/planner_wildcard_helpers.cpp
+++ b/src/mongo/db/query/planner_wildcard_helpers.cpp
@@ -361,7 +361,7 @@ void expandWildcardIndexEntry(const IndexEntry& wildcardIndex,
invariant(wildcardIndex.multikeyPaths.empty());
// Obtain the projection executor from the parent wildcard IndexEntry.
- auto* wildcardProjection = wildcardIndex.wildcardProjection;
+ auto* wildcardProjection = wildcardIndex.indexPathProjection;
invariant(wildcardProjection);
const auto projectedFields =
@@ -411,7 +411,7 @@ void expandWildcardIndexEntry(const IndexEntry& wildcardIndex,
wildcardIndex.filterExpr,
wildcardIndex.infoObj,
wildcardIndex.collator,
- wildcardIndex.wildcardProjection);
+ wildcardIndex.indexPathProjection);
invariant("$_path"_sd != fieldName);
out->push_back(std::move(entry));
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index b252117ab75..4420e3f2725 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -40,6 +40,7 @@
#include "mongo/db/bson/dotted_path_support.h"
#include "mongo/db/catalog/clustered_collection_util.h"
#include "mongo/db/exec/bucket_unpacker.h"
+#include "mongo/db/exec/projection_executor_utils.h"
#include "mongo/db/index/wildcard_key_generator.h"
#include "mongo/db/index_names.h"
#include "mongo/db/matcher/expression_algo.h"
@@ -158,7 +159,6 @@ bool hintMatchesNameOrPattern(const BSONObj& hintObj,
firstHintElt.type() == BSONType::String) {
// An index name is provided by the hint.
return indexName == firstHintElt.valueStringData();
- ;
}
// An index spec is provided by the hint.
@@ -191,10 +191,8 @@ bool hintMatchesClusterKey(const boost::optional<ClusteredCollectionInfo>& clust
*/
bool hintMatchesColumnStoreIndex(const BSONObj& hintObj, const ColumnIndexEntry& columnStoreIndex) {
// TODO SERVER-68400: Should be possible to have some other keypattern.
- return hintMatchesNameOrPattern(hintObj,
- columnStoreIndex.catalogName,
- BSON("$**"
- << "columnstore"));
+ return hintMatchesNameOrPattern(
+ hintObj, columnStoreIndex.identifier.catalogName, columnStoreIndex.keyPattern);
}
/**
@@ -258,7 +256,6 @@ std::unique_ptr<QuerySolution> makeColumnScanPlan(
std::unique_ptr<MatchExpression> residualPredicate) {
dassert(columnScanIsPossible(query, params));
- // TODO SERVER-67140: Check if the columnar index actually provides the fields we need.
return QueryPlannerAnalysis::analyzeDataAccess(
query,
params,
@@ -288,10 +285,35 @@ Status checkColumnScanFieldLimits(
}
return Status::OK();
}
+
+bool checkProjectionCoversQuery(OrderedPathSet& fields, const ColumnIndexEntry& columnStoreIndex) {
+ const auto projectedFields = projection_executor_utils::applyProjectionToFields(
+ columnStoreIndex.indexPathProjection->exec(), fields);
+ // If the number of fields is equal to the number of fields preserved, then the projection
+ // covers the query.
+ return projectedFields.size() == fields.size();
+}
+
/**
- * Attempts to build a plan using a columnstore index. Returns a non-OK status if it can't build
- * one
- * - with the code and message indicating the problem - or a QuerySolution if it can.
+ * A helper function that returns the number of column store indexes that cover the query,
+ * as well as an arbitary, valid column store index for the column scan.
+ */
+std::pair<int, const ColumnIndexEntry*> getValidColumnIndex(
+ OrderedPathSet& fields, const std::vector<ColumnIndexEntry>& columnStoreIndexes) {
+ const ColumnIndexEntry* chosenIndex;
+ int numValid = 0;
+ for (const auto& columnStoreIndex : columnStoreIndexes) {
+ if (checkProjectionCoversQuery(fields, columnStoreIndex)) {
+ chosenIndex = numValid == 0 ? &columnStoreIndex : chosenIndex;
+ ++numValid;
+ }
+ }
+ return {numValid, chosenIndex};
+}
+
+/**
+ * Attempts to build a plan using a column store index. Returns a non-OK status if it can't build
+ * one with the code and message indicating the problem - or a QuerySolution if it can.
*/
StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
const QueryPlannerParams& params,
@@ -302,16 +324,6 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
}
invariant(params.columnStoreIndexes.size() >= 1);
- const auto& columnStoreIndex = hintedIndex.value_or(params.columnStoreIndexes.front());
- if (!hintedIndex && params.columnStoreIndexes.size() > 1) {
- // TODO SERVER-67140 only warnn if there is more than one index that is actually eligible
- // for use.
- LOGV2_DEBUG(6298500,
- 2,
- "Multiple column store indexes present. Selecting the first "
- "one arbitrarily",
- "indexName"_attr = columnStoreIndex.catalogName);
- }
auto [filterDeps, outputDeps] = computeDeps(params, query);
auto allFieldsReferenced = set_util::setUnion(filterDeps.fields, outputDeps.fields);
@@ -319,7 +331,7 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
// TODO SERVER-66284 Would like to enable a plan when hinted, even if we need the whole
// document. Something like COLUMN_SCAN -> FETCH.
return {ErrorCodes::Error{6298501},
- "cannot use columnstore index because the query requires seeing the entire "
+ "cannot use column store index because the query requires seeing the entire "
"document"};
} else if (!hintedIndex && expression::containsOverlappingPaths(allFieldsReferenced)) {
// The query needs a path and a parent or ancestor path. For example, the query needs to
@@ -333,7 +345,34 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
<< set_util::setToString(allFieldsReferenced)};
}
- // TODO SERVER-67140: Check if the columnar index actually provides the fields we need.
+ // Ensures that hinted index is eligible for the column scan.
+ if (hintedIndex && !checkProjectionCoversQuery(allFieldsReferenced, *hintedIndex)) {
+ return {ErrorCodes::Error{6714002},
+ "the hinted column store index cannot be used because it does not cover the query"};
+ }
+
+ // Check that union of the dependency fields can be successfully projected by at least one
+ // column store index.
+ auto [numValid, selectedColumnStoreIndex] =
+ getValidColumnIndex(allFieldsReferenced, params.columnStoreIndexes);
+
+ // If not columnar index can support the projection, we will not use column scan.
+ if (numValid == 0) {
+ return {ErrorCodes::Error{6714001},
+ "cannot use column store index because there exists no column store index for this "
+ "collection that covers the query"};
+ }
+ invariant(selectedColumnStoreIndex);
+
+ if (!hintedIndex && numValid > 1) {
+ LOGV2_DEBUG(6298500,
+ 2,
+ "Multiple column store indexes present. Selecting the first "
+ "one arbitrarily",
+ "indexName"_attr = selectedColumnStoreIndex->identifier.catalogName);
+ }
+
+ const auto& columnStoreIndex = hintedIndex.value_or(*selectedColumnStoreIndex);
std::unique_ptr<MatchExpression> residualPredicate;
StringMap<std::unique_ptr<MatchExpression>> filterSplitByColumn;
std::tie(filterSplitByColumn, residualPredicate) =
diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp
index 490164ee87b..01305e374bc 100644
--- a/src/mongo/db/query/query_planner_columnar_test.cpp
+++ b/src/mongo/db/query/query_planner_columnar_test.cpp
@@ -29,6 +29,7 @@
#include "mongo/platform/basic.h"
+#include "mongo/db/index/column_key_generator.h"
#include "mongo/db/pipeline/document_source.h"
#include "mongo/db/pipeline/inner_pipeline_stage_impl.h"
#include "mongo/db/pipeline/inner_pipeline_stage_interface.h"
@@ -43,6 +44,8 @@
namespace mongo {
const std::string kIndexName = "indexName";
+const BSONObj kKeyPattern = BSON("$**"
+ << "columnstore");
/**
* A specialization of the QueryPlannerTest fixture which makes it easy to present the planner with
@@ -68,10 +71,24 @@ protected:
kInternalQueryMaxNumberOfFieldsToChooseFilteredColumnScanDefault);
}
- void addColumnStoreIndexAndEnableFilterSplitting(StringData indexName = kIndexName) {
- params.columnStoreIndexes.emplace_back(indexName.toString());
-
- params.options |= QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS;
+ void addColumnStoreIndexAndEnableFilterSplitting(bool genPerColFilter = true,
+ StringData indexName = kIndexName,
+ const IndexPathProjection* proj = nullptr,
+ BSONObj keyPattern = kKeyPattern,
+ MatchExpression* partialFilterExpr = nullptr,
+ CollatorInterface* collator = nullptr) {
+ params.columnStoreIndexes.emplace_back(keyPattern,
+ IndexType::INDEX_COLUMN,
+ IndexDescriptor::kLatestIndexVersion,
+ false /* sparse */,
+ false /* unique */,
+ IndexEntry::Identifier{indexName.toString()},
+ partialFilterExpr,
+ collator,
+ proj ? proj : &_defaultPathProj);
+ if (genPerColFilter) {
+ params.options |= QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS;
+ }
}
std::vector<std::unique_ptr<InnerPipelineStageInterface>> makeInnerPipelineStages(
@@ -83,10 +100,18 @@ protected:
return stages;
}
+ IndexPathProjection makeProjection(BSONObj columnstoreProjection,
+ BSONObj keyPattern = kKeyPattern) {
+ return column_keygen::ColumnKeyGenerator::createProjectionExecutor(keyPattern,
+ columnstoreProjection);
+ }
+
private:
// SBE must be enabled in order to test columnar indexes.
RAIIServerParameterControllerForTest _controllerSBE{"internalQueryFrameworkControl",
"trySbeEngine"};
+ IndexPathProjection _defaultPathProj =
+ column_keygen::ColumnKeyGenerator::createProjectionExecutor(kKeyPattern, BSONObj());
};
TEST_F(QueryPlannerColumnarTest, InclusionProjectionUsesColumnStoreIndex) {
@@ -718,7 +743,7 @@ TEST_F(QueryPlannerColumnarTest, MatchGroupTest) {
node: {
column_scan: {
filtersByPath: {name: {name: {$eq: 'bob'}}},
- outputFields: ['foo', 'x'],
+ outputFields: ['foo', 'x'],
matchFields: ['name']
}
}
@@ -758,7 +783,7 @@ TEST_F(QueryPlannerColumnarTest, MatchGroupWithOverlappingFieldsTest) {
node: {
column_scan: {
filtersByPath: {name: {name: {$eq: 'bob'}}},
- outputFields: ['foo', 'x', 'name'],
+ outputFields: ['foo', 'x', 'name'],
matchFields: ['name']
}
}
@@ -831,7 +856,7 @@ TEST_F(QueryPlannerColumnarTest, DottedFieldsWithGroupStageDoesNotRequireProject
node: {
column_scan: {
filtersByPath: {name: {name: {$eq: 'bob'}}},
- outputFields: ['foo.bar', 'x.y', 'name'],
+ outputFields: ['foo.bar', 'x.y', 'name'],
matchFields: ['name']
}
}
@@ -899,8 +924,8 @@ TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsCountTowardsFieldLimit) {
}
TEST_F(QueryPlannerColumnarTest, SelectsFirstFromMultipleEligibleColumnStoreIndexes) {
- addColumnStoreIndexAndEnableFilterSplitting("first index"_sd);
- params.columnStoreIndexes.emplace_back("second index");
+ addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd);
+ addColumnStoreIndexAndEnableFilterSplitting(false, "second index"_sd);
runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "_id" << 0));
assertSolutionExists(R"({
@@ -911,4 +936,270 @@ TEST_F(QueryPlannerColumnarTest, SelectsFirstFromMultipleEligibleColumnStoreInde
}
})");
}
+
+TEST_F(QueryPlannerColumnarTest, FullPredicateOption) {
+ addColumnStoreIndexAndEnableFilterSplitting(false, kIndexName);
+
+ // Filter that could be pushed down, but isn't due to the lack of the
+ // GENERATE_PER_COLUMN_FILTER flag.
+ auto predicate = fromjson(R"({
+ specialAddress: {$exists: true},
+ doNotContact: {$exists: true}
+ })");
+ runQuerySortProj(predicate, BSONObj(), BSON("a" << 1 << "_id" << 0));
+ assertSolutionExists(R"({
+ proj: {
+ spec: {a: 1, _id: 0},
+ node: {
+ column_scan: {
+ outputFields: ['a'],
+ matchFields: ['specialAddress', 'doNotContact'],
+ postAssemblyFilter: {
+ specialAddress: {$exists: true},
+ doNotContact: {$exists: true}
+ }
+ }
+ }
+ }
+ })");
+}
+
+TEST_F(QueryPlannerColumnarTest, UseColumnStoreWithExactFields) {
+ auto firstProj = makeProjection(fromjson(R"({"d": true, "b.c": true, "_id": false})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd, &firstProj);
+
+ auto secondProj = makeProjection(fromjson(R"({"a": true, "b.c": true, "_id": false})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "second index"_sd, &secondProj);
+
+ // Should use the second index, despite the third index being valid, because the second index
+ // was seen first.
+ auto thirdProj = makeProjection(fromjson(R"({"a": true, "b.c": true, "_id": false})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "third index"_sd, &thirdProj);
+
+ runQuerySortProj(
+ BSON("a" << BSON("$gt" << 3)), BSONObj(), BSON("a" << 1 << "b.c" << 1 << "_id" << 0));
+
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({
+ proj: {
+ spec: {a: 1, 'b.c': 1, _id: 0},
+ node: {
+ column_scan: {
+ indexName: 'second index',
+ filtersByPath: {a: {a: {$gt: 3}}},
+ outputFields: ['a', 'b.c'],
+ matchFields: ['a']
+ }
+ }
+ }
+ })");
+}
+
+TEST_F(QueryPlannerColumnarTest, UseColumnStoreWithExtraFields) {
+ auto firstProj = makeProjection(fromjson(
+ R"({"a": true, "unsubscribed": true, "test field": true, "another test field": true, "_id": false})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "first index", &firstProj);
+
+ auto secondProj = makeProjection(fromjson(R"({
+ "a": true,
+ "addresses.zip": true,
+ "unsubscribed": true,
+ "specialAddress": true,
+ "doNotContact": true,
+ "test field": true,
+ "another test field": true,
+ "_id": false
+ })"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "second index"_sd, &secondProj);
+
+ // Same predicate as above, except with exists: false, which disqualifies the whole thing.
+ auto complexPredicate = fromjson(R"({
+ a: {$gte: 0},
+ "addresses.zip": "12345",
+ unsubscribed: false,
+ specialAddress: {$exists: false},
+ doNotContact: {$exists: false}
+ })");
+ runQuerySortProj(complexPredicate, BSONObj(), BSON("a" << 1 << "_id" << 0));
+ assertSolutionExists(R"({
+ proj: {
+ spec: {a: 1, _id: 0},
+ node: {
+ column_scan: {
+ index_name: 'second index',
+ filtersByPath: {
+ a: {a: {$gte: 0}},
+ 'addresses.zip': {'addresses.zip': {$eq: '12345'}},
+ unsubscribed: {unsubscribed: false}
+ },
+ outputFields: ['a'],
+ postAssemblyFilter: {
+ specialAddress: {$exists: false},
+ doNotContact: {$exists: false}
+ },
+ matchFields:
+ ['a', 'addresses.zip', 'unsubscribed', 'specialAddress', 'doNotContact']
+ }
+ }
+ }
+ })");
+}
+
+TEST_F(QueryPlannerColumnarTest, UseColumnStoreWithSinglePath) {
+ addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd);
+ addColumnStoreIndexAndEnableFilterSplitting(true, "second index"_sd);
+
+
+ runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "_id" << 0));
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({
+ column_scan: {
+ indexName: 'first index',
+ filtersByPath: {},
+ outputFields: ['a'],
+ matchFields: []
+ }
+ })");
+}
+
+TEST_F(QueryPlannerColumnarTest, UseColumnStoreWithAncestorField) {
+ auto firstProj = makeProjection(fromjson(R"({"foo": true, "x": true, "name": true})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd, &firstProj);
+
+ auto secondProj = makeProjection(BSONObj(),
+ BSON("foo.$**"
+ << "columnstore"));
+ addColumnStoreIndexAndEnableFilterSplitting(true,
+ "second index"_sd,
+ &secondProj,
+ BSON("foo.$**"
+ << "columnstore"));
+
+ auto pipeline = Pipeline::parse(
+ {fromjson("{$group: {_id: '$foo.bar', s: {$sum: '$x.y'}, name: {$first: '$name'}}}")},
+ expCtx);
+
+ runQueryWithPipeline(BSON("name"
+ << "bob"),
+ BSON("foo.bar" << 1 << "x.y" << 1 << "name" << 1 << "_id" << 0),
+ makeInnerPipelineStages(*pipeline));
+
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({
+ proj: {
+ spec: {'foo.bar': 1, 'x.y': 1, name: 1, _id: 0},
+ node: {
+ column_scan: {
+ indexName: 'first index',
+ filtersByPath: {name: {name: {$eq: 'bob'}}},
+ outputFields: ['foo.bar', 'x.y', 'name'],
+ matchFields: ['name']
+ }
+ }
+ }
+ })");
+
+ ASSERT(!cq->pipeline().empty());
+ auto solution =
+ QueryPlanner::extendWithAggPipeline(*cq, std::move(solns[0]), {} /* secondaryCollInfos
+ */);
+ ASSERT_OK(QueryPlannerTestLib::solutionMatches(R"({
+ group: {
+ key: {_id: '$foo.bar'},
+ accs: [{s: {$sum: '$x.y'}}, {name: {$first: '$name'}}],
+ node: {
+ column_scan: {
+ indexName: 'first index',
+ filtersByPath: {name: {name: {$eq: 'bob'}}},
+ outputFields: ['foo.bar', 'x.y', 'name'],
+ matchFields: ['name']
+ }
+ }
+ }
+ })",
+ solution->root()))
+ << solution->root()->toString();
+}
+
+TEST_F(QueryPlannerColumnarTest, DontUseColumnStoreWithSinglePath) {
+ auto firstProj = makeProjection(BSONObj(),
+ BSON("a.$**"
+ << "columnstore"));
+ addColumnStoreIndexAndEnableFilterSplitting(true,
+ "first index"_sd,
+ &firstProj,
+ BSON("a.$**"
+ << "columnstore"));
+ internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2);
+ runQuerySortProj(BSONObj(), BSONObj(), BSON("b" << 1));
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({proj: {spec: {b: 1}, node: {cscan: {dir: 1}}}})");
+}
+
+TEST_F(QueryPlannerColumnarTest, DontUseColumnStoreMissingField) {
+ auto firstProj = makeProjection(fromjson(R"({"a": false})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd, &firstProj);
+
+ auto secondProj = makeProjection(fromjson(R"({"b": true})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "second index"_sd, &secondProj);
+
+ runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "_id" << 0));
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({proj: {spec: {a: 1, _id: 0}, node: {cscan: {dir: 1}}}})");
+}
+
+TEST_F(QueryPlannerColumnarTest, DontUseColumnStoreMissingMultipleField) {
+ auto firstProj = makeProjection(fromjson(R"({"a": true, "c": true, "d": true})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "first index"_sd, &firstProj);
+
+ auto secondProj = makeProjection(fromjson(R"({"b": true, "c": true, "d": true})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "second index"_sd, &secondProj);
+
+ runQuerySortProj(BSON("a" << 1), BSONObj(), BSON("a" << true << "b" << true));
+ assertNumSolutions(1U);
+ assertSolutionExists(
+ R"({proj: {spec: {a: 1, b: 1}, node: {cscan: {dir: 1, filter: {a: {$eq: 1}}}}}})");
+}
+
+TEST_F(QueryPlannerColumnarTest, DontUseColumnStoreSpecifiedSubField) {
+ auto firstProj = makeProjection(fromjson(R"({"a.b": true, "b.c": true, "c": true})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "first_index"_sd, &firstProj);
+
+ auto secondProj = makeProjection(fromjson(R"({"a": true, "b.c": true, "d": true})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "second_index"_sd, &secondProj);
+
+ internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2);
+ runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "c" << 1));
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({proj: {spec: {a: 1, c: 1}, node: {cscan: {dir: 1}}}})");
+}
+
+TEST_F(QueryPlannerColumnarTest, HintIndexDoesNotCoverQuery) {
+ // Column Store Index does not cover query.
+ auto firstProj = makeProjection(BSONObj(),
+ BSON("b.$**"
+ << "columnstore"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "first index", &firstProj);
+ ASSERT_THROWS(runQuerySortProjSkipLimitHint(BSONObj(),
+ BSONObj(),
+ BSON("a" << 1 << "_id" << 0),
+ 0,
+ 0,
+ BSON("$hint"
+ << "first index")),
+ unittest::TestAssertionFailureException);
+}
+
+TEST_F(QueryPlannerColumnarTest, NoColumnIndexCoversQuery) {
+ auto firstProj = makeProjection(fromjson(R"({b: 1, d: 1})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "first index", &firstProj);
+ auto secondProj = makeProjection(fromjson(R"({c: 1, d: 1})"));
+ addColumnStoreIndexAndEnableFilterSplitting(true, "second index", &secondProj);
+
+ // Valid for column scan, but no column store indices that cover the query.
+ runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1));
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({proj: {spec: {a: 1}, node: {cscan: {dir: 1}}}})");
+}
+
} // namespace mongo
diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp
index 1984edba47b..fec34133cd9 100644
--- a/src/mongo/db/query/query_planner_test_lib.cpp
+++ b/src/mongo/db/query/query_planner_test_lib.cpp
@@ -1312,8 +1312,8 @@ Status QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln,
auto obj = expectedElem.Obj();
if (auto indexName = obj["indexName"]) {
- if (auto nameStatus =
- indexNamesMatch(indexName, actualColumnIxScanNode->indexEntry.catalogName);
+ if (auto nameStatus = indexNamesMatch(
+ indexName, actualColumnIxScanNode->indexEntry.identifier.catalogName);
!nameStatus.isOK()) {
return nameStatus;
}
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index a9689b3284f..71d0d82f3d7 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -888,7 +888,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
std::unique_ptr<sbe::PlanStage> stage =
std::make_unique<sbe::ColumnScanStage>(getCurrentCollection(reqs)->uuid(),
- csn->indexEntry.catalogName,
+ csn->indexEntry.identifier.catalogName,
std::move(paths),
std::move(includeInOutput),
ridSlot,