From a3b36dabe82aea53d2d16d82cd20f95d19e67932 Mon Sep 17 00:00:00 2001 From: Ian Boros Date: Fri, 4 Feb 2022 18:05:09 +0000 Subject: SERVER-62984 Rudimentary query planning for columnar indexes --- src/mongo/db/query/SConscript | 1 + src/mongo/db/query/classic_stage_builder.cpp | 1 + src/mongo/db/query/index_entry.h | 11 +++ src/mongo/db/query/query_planner.cpp | 12 +++ src/mongo/db/query/query_planner_columnar_test.cpp | 106 +++++++++++++++++++++ src/mongo/db/query/query_planner_params.h | 3 + src/mongo/db/query/query_planner_test_lib.cpp | 38 ++++++++ src/mongo/db/query/query_solution.cpp | 15 +++ src/mongo/db/query/query_solution.h | 37 +++++++ src/mongo/db/query/stage_types.cpp | 1 + src/mongo/db/query/stage_types.h | 1 + 11 files changed, 226 insertions(+) create mode 100644 src/mongo/db/query/query_planner_columnar_test.cpp diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index 77d2fabc948..2332f06e8e3 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -373,6 +373,7 @@ env.CppUnitTest( "projection_test.cpp", "query_planner_array_test.cpp", "query_planner_collation_test.cpp", + "query_planner_columnar_test.cpp", "query_planner_geo_test.cpp", "query_planner_group_pushdown_test.cpp", "query_planner_hashed_index_test.cpp", diff --git a/src/mongo/db/query/classic_stage_builder.cpp b/src/mongo/db/query/classic_stage_builder.cpp index 99e730cf9a0..dfa3f119a8f 100644 --- a/src/mongo/db/query/classic_stage_builder.cpp +++ b/src/mongo/db/query/classic_stage_builder.cpp @@ -417,6 +417,7 @@ std::unique_ptr ClassicStageBuilder::build(const QuerySolutionNode* r case STAGE_UNKNOWN: case STAGE_UNPACK_TIMESERIES_BUCKET: case STAGE_SENTINEL: + case STAGE_COLUMN_IXSCAN: case STAGE_UPDATE: { LOGV2_WARNING(4615604, "Can't build exec tree for node", "node"_attr = *root); } diff --git a/src/mongo/db/query/index_entry.h b/src/mongo/db/query/index_entry.h index 7c7ba04a532..6f69d388c9d 100644 --- a/src/mongo/db/query/index_entry.h +++ b/src/mongo/db/query/index_entry.h @@ -254,6 +254,17 @@ struct IndexEntry : CoreIndexInfo { BSONObj infoObj; }; +/** + * Represents a columnar index. + */ +struct ColumnIndexEntry { + ColumnIndexEntry(std::string catalogName) : catalogName(std::move(catalogName)) {} + + std::string catalogName; + + // TODO SERVER-63123: Projection, probably need some kind of disambiguator. +}; + std::ostream& operator<<(std::ostream& stream, const IndexEntry::Identifier& ident); StringBuilder& operator<<(StringBuilder& builder, const IndexEntry::Identifier& ident); } // namespace mongo diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index ef51a3f7ba5..26a605c081f 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -1193,6 +1193,18 @@ StatusWith>> QueryPlanner::plan( } } + // Check whether we're eligible to use the columnar index, assuming no other indexes can be + // used. + if (out.empty() && !params.columnarIndexes.empty() && query.getProj() && + !query.getProj()->requiresDocument()) { + // TODO SERVER-63123: Check if the columnar index actually provides the fields we need. + auto columnScan = std::make_unique(params.columnarIndexes.front()); + columnScan->fields = query.getProj()->getRequiredFields(); + columnScan->filter = query.root()->shallowClone(); + auto plan = QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(columnScan)); + out.push_back(std::move(plan)); + } + // The caller can explicitly ask for a collscan. bool collscanRequested = (params.options & QueryPlannerParams::INCLUDE_COLLSCAN); diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp new file mode 100644 index 00000000000..dbca638506c --- /dev/null +++ b/src/mongo/db/query/query_planner_columnar_test.cpp @@ -0,0 +1,106 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collator_interface_mock.h" +#include "mongo/db/query/query_planner_test_fixture.h" +#include "mongo/unittest/death_test.h" + +namespace mongo { +const std::string kIndexName = "indexName"; + +/** + * A specialization of the QueryPlannerTest fixture which makes it easy to present the planner with + * a view of the available column indexes. + */ +class QueryPlannerColumnarTest : public QueryPlannerTest { +protected: + void setUp() final { + QueryPlannerTest::setUp(); + + // We're interested in testing plans that use a columnar index, so don't generate collection + // scans. + params.options &= ~QueryPlannerParams::INCLUDE_COLLSCAN; + } + + void addColumnarIndex() { + params.columnarIndexes.emplace_back(kIndexName); + } +}; + +TEST_F(QueryPlannerColumnarTest, InclusionProjectionUsesColumnarIndex) { + addColumnarIndex(); + + runQuerySortProj(BSON("a" << BSON("$gt" << 3)), BSONObj(), BSON("a" << 1 << "_id" << 0)); + + assertNumSolutions(1U); + assertSolutionExists( + "{proj: {spec: {a: 1, _id: 0}, node: {column_ixscan: {filter: {a: {$gt: 3}}," + "fields: {a: 1}}}}}"); +} + +TEST_F(QueryPlannerColumnarTest, InclusionProjectionWithSortUsesColumnarIndexAndBlockingSort) { + addColumnarIndex(); + + runQuerySortProj(BSONObj(), BSON("a" << 1), BSON("a" << 1 << "_id" << 0)); + + assertNumSolutions(1U); + assertSolutionExists( + "{sort: {pattern: {a: 1}, limit: 0, node: {proj: {spec: {a: 1, _id: 0}, node: " + "{column_ixscan: " + "{fields: {a: 1}}}}}}}"); +} + +TEST_F(QueryPlannerColumnarTest, ExclusionProjectionDoesNotUseColumnarIndex) { + addColumnarIndex(); + + runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 0 << "_id" << 0)); + assertNumSolutions(1U); + assertSolutionExists("{proj: {spec: {a: 0, _id: 0}, node: {cscan: {dir: 1}}}}"); +} + +TEST_F(QueryPlannerColumnarTest, NoProjectionDoesNotUseColumnarIndex) { + addColumnarIndex(); + + runQuerySortProj(BSON("a" << 1), BSONObj(), BSONObj()); + assertNumSolutions(1U); + assertSolutionExists("{cscan: {dir: 1, filter: {a: {$eq: 1}}}}"); +} + +TEST_F(QueryPlannerColumnarTest, StandardIndexPreferredOverColumnarIndex) { + addColumnarIndex(); + addIndex(BSON("a" << 1)); + + runQuerySortProj(BSON("a" << 5), BSONObj(), BSON("a" << 1 << "_id" << 0)); + + assertNumSolutions(1U); + assertSolutionExists("{proj: {spec: {a:1, _id: 0}, node: {ixscan: {pattern: {a: 1}}}}}"); +} +} // namespace mongo diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h index 54697f2cc9a..4fb56c7c213 100644 --- a/src/mongo/db/query/query_planner_params.h +++ b/src/mongo/db/query/query_planner_params.h @@ -142,6 +142,9 @@ struct QueryPlannerParams { // What indices are available for planning? std::vector indices; + // Columnar indexes available. + std::vector columnarIndexes; + // What's our shard key? If INCLUDE_SHARD_FILTER is set we will create a shard filtering // stage. If we know the shard key, we can perform covering analysis instead of always // forcing a fetch. diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index 4d4359ccf08..de41618357d 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -1208,6 +1208,44 @@ Status QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln, "actual solution. Expected: " << testSoln << " Found: " << actualSentinelNode}; } + return Status::OK(); + } else if (STAGE_COLUMN_IXSCAN == trueSoln->getType()) { + const auto* actualColumnIxScanNode = static_cast(trueSoln); + auto expectedElem = testSoln["column_ixscan"]; + if (expectedElem.eoo() || !expectedElem.isABSONObj()) { + return {ErrorCodes::Error{5842490}, + "found a 'column_ixscan' object in the test solution but no corresponding " + "'column_ixscan' " + "object " + "in the expected JSON"}; + } + auto obj = expectedElem.Obj(); + + if (!obj["fields"].eoo()) { + std::set expectedFields; + for (auto& field : obj["fields"].Obj()) { + expectedFields.insert(field.fieldName()); + } + + std::set actualFields(actualColumnIxScanNode->fields.begin(), + actualColumnIxScanNode->fields.end()); + if (expectedFields != std::set(actualFields.begin(), actualFields.end())) { + return {ErrorCodes::Error{5842491}, str::stream() << "fields mismatch."}; + } + } + + if (!actualColumnIxScanNode->children.empty()) { + return { + ErrorCodes::Error{5842492}, + str::stream() << "found a column_ixscan stage with more than zero children in the " + "actual solution:"}; + } + + if (auto filter = obj["filter"]) { + return filterMatches(filter.Obj(), BSONObj(), trueSoln) + .withContext("mismatching 'filter' for 'column_ixscan' node"); + } + return Status::OK(); } return {ErrorCodes::Error{5698301}, diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 779fa866d0e..f98c056bbaa 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -1042,6 +1042,21 @@ bool IndexScanNode::operator==(const IndexScanNode& other) const { bounds == other.bounds; } +// +// ColumnIndexScanNode +// +ColumnIndexScanNode::ColumnIndexScanNode(ColumnIndexEntry indexEntry) + : indexEntry(std::move(indexEntry)) {} + +void ColumnIndexScanNode::appendToString(str::stream* ss, int indent) const { + addIndent(ss, indent); + *ss << "COLUMN_IX_SCAN\n"; + addIndent(ss, indent + 1); + *ss << "fields = [" << boost::algorithm::join(fields, ", "); + *ss << "]\n"; + addCommon(ss, indent); +} + // // ReturnKeyNode // diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index cca5fd1dd94..e91bb18f7ab 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -482,6 +482,43 @@ struct CollectionScanNode : public QuerySolutionNodeWithSortSet { bool stopApplyingFilterAfterFirstMatch = false; }; +struct ColumnIndexScanNode : public QuerySolutionNode { + ColumnIndexScanNode(ColumnIndexEntry); + + virtual StageType getType() const { + return STAGE_COLUMN_IXSCAN; + } + + void appendToString(str::stream* ss, int indent) const override; + + bool fetched() const { + return false; + } + FieldAvailability getFieldAvailability(const std::string& field) const { + for (const auto& availableField : fields) { + if (field == availableField) { + return FieldAvailability::kFullyProvided; + } + } + return FieldAvailability::kNotProvided; + } + bool sortedByDiskLoc() const { + return true; + } + + const ProvidedSortSet& providedSorts() const { + return kEmptySet; + } + + QuerySolutionNode* clone() const { + return new ColumnIndexScanNode(indexEntry); + } + + ColumnIndexEntry indexEntry; + + std::vector fields; +}; + /** * A VirtualScanNode is similar to a collection or an index scan except that it doesn't depend on an * underlying storage implementation. It can be used to represent a virtual diff --git a/src/mongo/db/query/stage_types.cpp b/src/mongo/db/query/stage_types.cpp index 135d4fc1d68..98084672456 100644 --- a/src/mongo/db/query/stage_types.cpp +++ b/src/mongo/db/query/stage_types.cpp @@ -39,6 +39,7 @@ StringData stageTypeToString(StageType stageType) { {STAGE_AND_SORTED, "AND_SORTED"_sd}, {STAGE_CACHED_PLAN, "CACHED_PLAN"}, {STAGE_COLLSCAN, "COLLSCAN"_sd}, + {STAGE_COLUMN_IXSCAN, "COLUMN_IXSCAN"_sd}, {STAGE_COUNT, "COUNT"_sd}, {STAGE_COUNT_SCAN, "COUNT_SCAN"_sd}, {STAGE_DELETE, "DELETE"_sd}, diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h index ff26bfba846..4c8e677300e 100644 --- a/src/mongo/db/query/stage_types.h +++ b/src/mongo/db/query/stage_types.h @@ -51,6 +51,7 @@ enum StageType { STAGE_AND_SORTED, STAGE_CACHED_PLAN, STAGE_COLLSCAN, + STAGE_COLUMN_IXSCAN, // A virtual scan stage that simulates a collection scan and doesn't depend on underlying // storage. -- cgit v1.2.1