summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2022-08-16 19:53:04 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-16 21:53:42 +0000
commit39e4b0c6ac51e8127c2afdaf7cc39742df6df5e1 (patch)
treeb2c16235af0a337eef16841ab4f8a1dd72743567 /src/mongo
parent5e77e5fbd3871c65ee20ade0a0cf1410c6b47a9c (diff)
downloadmongo-39e4b0c6ac51e8127c2afdaf7cc39742df6df5e1.tar.gz
SERVER-67264 Avoid COLUMN_SCAN if query needs overlapping fields
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/matcher/expression_algo.cpp16
-rw-r--r--src/mongo/db/matcher/expression_algo.h12
-rw-r--r--src/mongo/db/matcher/expression_algo_test.cpp19
-rw-r--r--src/mongo/db/query/query_planner.cpp41
-rw-r--r--src/mongo/db/query/query_planner_columnar_test.cpp104
-rw-r--r--src/mongo/db/query/query_solution.cpp8
-rw-r--r--src/mongo/db/query/query_solution.h2
-rw-r--r--src/mongo/db/query/util/set_util.h17
8 files changed, 199 insertions, 20 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp
index 90453c66db0..ba286cd4323 100644
--- a/src/mongo/db/matcher/expression_algo.cpp
+++ b/src/mongo/db/matcher/expression_algo.cpp
@@ -796,6 +796,22 @@ bool containsDependency(const OrderedPathSet& testSet, const OrderedPathSet& pre
return false;
}
+bool containsOverlappingPaths(const OrderedPathSet& testSet) {
+ // We will take advantage of the fact that paths with common ancestors are ordered together in
+ // our ordering. Thus if there are any paths that contain a common ancestor, they will be right
+ // next to each other - unless there are multiple pairs, in which case at least one pair will be
+ // right next to each other.
+ if (testSet.empty()) {
+ return false;
+ }
+ for (auto it = std::next(testSet.begin()); it != testSet.end(); ++it) {
+ if (isPathPrefixOf(*std::prev(it), *it)) {
+ return true;
+ }
+ }
+ return false;
+}
+
bool areIndependent(const OrderedPathSet& pathSet1, const OrderedPathSet& pathSet2) {
return !containsDependency(pathSet1, pathSet2) && !containsDependency(pathSet2, pathSet1);
}
diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h
index 814974ba881..2e310a52d95 100644
--- a/src/mongo/db/matcher/expression_algo.h
+++ b/src/mongo/db/matcher/expression_algo.h
@@ -98,12 +98,20 @@ bool isSplittableBy(const MatchExpression& expr, const OrderedPathSet& pathSet);
bool areIndependent(const OrderedPathSet& pathSet1, const OrderedPathSet& pathSet2);
/**
- * Return true if any of the fieldPaths in prefixCandidates are identical to or an ancestor of any
- * of the fieldpaths in testSet. The order of the parameters matters -- it's not commutative.
+ * Return true if any of the paths in 'prefixCandidates' are identical to or an ancestor of any
+ * of the paths in 'testSet'. The order of the parameters matters -- it's not commutative.
*/
bool containsDependency(const OrderedPathSet& testSet, const OrderedPathSet& prefixCandidates);
/**
+ * Returns true if any of the paths in 'testSet' are an ancestor of any of the other paths in
+ * 'testSet'. Examples:
+ * containsOverlappingPaths([a.b, a]) --> true
+ * containsOverlappingPaths([ab, a, a-b]) --> false
+ */
+bool containsOverlappingPaths(const OrderedPathSet& testSet);
+
+/**
* Determine if 'expr' is reliant upon any path from 'pathSet'.
*/
bool isIndependentOf(const MatchExpression& expr, const OrderedPathSet& pathSet);
diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp
index 73aa479bd2a..96117ff162a 100644
--- a/src/mongo/db/matcher/expression_algo_test.cpp
+++ b/src/mongo/db/matcher/expression_algo_test.cpp
@@ -950,6 +950,25 @@ TEST(IsIndependent, EmptyDependencySetsPassIsOnlyDependentOn) {
ASSERT_TRUE(expression::isOnlyDependentOn(*matchExpression.get(), {}));
}
+TEST(ContainsOverlappingPaths, Basics) {
+ // No overlap cases.
+ ASSERT_FALSE(expression::containsOverlappingPaths({}));
+ ASSERT_FALSE(expression::containsOverlappingPaths({"a"}));
+ ASSERT_FALSE(expression::containsOverlappingPaths({"a.b"}));
+ ASSERT_FALSE(expression::containsOverlappingPaths({"a", "b"}));
+ ASSERT_FALSE(expression::containsOverlappingPaths({"a", "ab", "a-b"}));
+ ASSERT_FALSE(expression::containsOverlappingPaths({"a.b", "ab", "a-b"}));
+ ASSERT_FALSE(expression::containsOverlappingPaths({"a.b", "a.c", "a.d"}));
+ ASSERT_FALSE(
+ expression::containsOverlappingPaths({"users.address.zip", "users.address.state"}));
+
+ // Overlap cases
+ ASSERT_TRUE(expression::containsOverlappingPaths({"a.b", "ab", "a-b", "a"}));
+ ASSERT_TRUE(expression::containsOverlappingPaths({"a.b", "ab", "a-b", "a.b.c"}));
+ ASSERT_TRUE(expression::containsOverlappingPaths({"a", "ab", "a-b", "a.b.c"}));
+ ASSERT_TRUE(expression::containsOverlappingPaths({"users.address", "users.address.state"}));
+}
+
TEST(SplitMatchExpression, AndWithSplittableChildrenIsSplittable) {
BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}");
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index 059ba11244f..9c05a0b3cf9 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -28,6 +28,7 @@
*/
+#include "mongo/db/pipeline/dependencies.h"
#include "mongo/platform/basic.h"
#include <boost/optional.hpp>
@@ -223,7 +224,8 @@ std::pair<DepsTracker, DepsTracker> computeDeps(const QueryPlannerParams& params
return {std::move(filterDeps), std::move(outputDeps)};
}
-Status columnScanIsPossibleStatus(const CanonicalQuery& query, const QueryPlannerParams& params) {
+Status computeColumnScanIsPossibleStatus(const CanonicalQuery& query,
+ const QueryPlannerParams& params) {
if (params.columnStoreIndexes.empty()) {
return {ErrorCodes::InvalidOptions, "No columnstore indexes available"};
}
@@ -241,7 +243,7 @@ Status columnScanIsPossibleStatus(const CanonicalQuery& query, const QueryPlanne
}
bool columnScanIsPossible(const CanonicalQuery& query, const QueryPlannerParams& params) {
- return columnScanIsPossibleStatus(query, params).isOK();
+ return computeColumnScanIsPossibleStatus(query, params).isOK();
}
std::unique_ptr<QuerySolution> makeColumnScanPlan(
@@ -250,6 +252,7 @@ std::unique_ptr<QuerySolution> makeColumnScanPlan(
const ColumnIndexEntry& columnStoreIndex,
DepsTracker filterDeps,
DepsTracker outputDeps,
+ OrderedPathSet allFieldsReferenced,
StringMap<std::unique_ptr<MatchExpression>> filterSplitByColumn,
std::unique_ptr<MatchExpression> residualPredicate) {
dassert(columnScanIsPossible(query, params));
@@ -261,6 +264,7 @@ std::unique_ptr<QuerySolution> makeColumnScanPlan(
std::make_unique<ColumnIndexScanNode>(columnStoreIndex,
std::move(outputDeps.fields),
std::move(filterDeps.fields),
+ std::move(allFieldsReferenced),
std::move(filterSplitByColumn),
std::move(residualPredicate)));
}
@@ -269,16 +273,14 @@ std::unique_ptr<QuerySolution> makeColumnScanPlan(
* A helper function which applies a heuristic to determine if a COLUMN_SCAN plan would examine few
* enough fields to be considered faster than a COLLSCAN.
*/
-Status checkFieldLimits(const OrderedPathSet& filterDeps,
- const OrderedPathSet& outputDeps,
- const StringMap<std::unique_ptr<MatchExpression>>& filterSplitByColumn) {
+Status checkColumnScanFieldLimits(
+ size_t nReferencedFields,
+ const StringMap<std::unique_ptr<MatchExpression>>& filterSplitByColumn) {
- const int nReferencedFields =
- static_cast<int>(set_util::setUnion(filterDeps, outputDeps).size());
const int maxNumFields = filterSplitByColumn.size() > 0
? internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.load()
: internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.load();
- if (nReferencedFields > maxNumFields) {
+ if (static_cast<int>(nReferencedFields) > maxNumFields) {
return Status{ErrorCodes::Error{6430508},
str::stream() << "referenced too many fields. nReferenced="
<< nReferencedFields << ", limit=" << maxNumFields};
@@ -294,7 +296,7 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
const QueryPlannerParams& params,
const CanonicalQuery& query,
const boost::optional<ColumnIndexEntry>& hintedIndex = boost::none) {
- if (auto status = columnScanIsPossibleStatus(query, params); !status.isOK()) {
+ if (auto status = computeColumnScanIsPossibleStatus(query, params); !status.isOK()) {
return status;
}
@@ -311,13 +313,23 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
}
auto [filterDeps, outputDeps] = computeDeps(params, query);
+ auto allFieldsReferenced = set_util::setUnion(filterDeps.fields, outputDeps.fields);
if (filterDeps.needWholeDocument || outputDeps.needWholeDocument) {
- // We only want to use the columnar index if we can avoid fetching the whole document.
// TODO SERVER-66284 Would like to enable a plan when hinted, even if we need the whole
// document. Something like COLUMN_SCAN -> FETCH.
return {ErrorCodes::Error{6298501},
"cannot use columnstore index because the query requires seeing the entire "
"document"};
+ } else if (!hintedIndex && expression::containsOverlappingPaths(allFieldsReferenced)) {
+ // The query needs a path and a parent or ancestor path. For example, the query needs to
+ // access both "a" and "a.b". This is a heuristic, but generally we would not expect this to
+ // benefit from the column store index. This kind of dependency pattern is probably an
+ // indication that the parent/ancestor path will be an object or array of objects, which
+ // will require us to fall back to the rowstore and remove any benefit of using the index.
+ return {ErrorCodes::Error{6726400},
+ str::stream() << "cannot use columnstore index because the query requires paths "
+ "which are a prefix of each other: "
+ << set_util::setToString(allFieldsReferenced)};
}
// TODO SERVER-67140: Check if the columnar index actually provides the fields we need.
@@ -326,7 +338,7 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
std::tie(filterSplitByColumn, residualPredicate) =
expression::splitMatchExpressionForColumns(query.root());
auto fieldLimitStatus =
- checkFieldLimits(filterDeps.fields, outputDeps.fields, filterSplitByColumn);
+ checkColumnScanFieldLimits(allFieldsReferenced.size(), filterSplitByColumn);
if (fieldLimitStatus.isOK() || hintedIndex) {
// We have a hint, or few enough dependencies that we suspect a column scan is still
@@ -336,6 +348,7 @@ StatusWith<std::unique_ptr<QuerySolution>> tryToBuildColumnScan(
columnStoreIndex,
std::move(filterDeps),
std::move(outputDeps),
+ std::move(allFieldsReferenced),
std::move(filterSplitByColumn),
std::move(residualPredicate));
}
@@ -1444,8 +1457,12 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
// Check whether we're eligible to use the columnar index, assuming no other indexes can be
// used.
if (out.empty()) {
- if (auto statusWithSoln = tryToBuildColumnScan(params, query); statusWithSoln.isOK()) {
+ auto statusWithSoln = tryToBuildColumnScan(params, query);
+ if (statusWithSoln.isOK()) {
out.emplace_back(std::move(statusWithSoln.getValue()));
+ } else {
+ LOGV2_DEBUG(
+ 6726401, 4, "Not using a column scan", "reason"_attr = statusWithSoln.getStatus());
}
}
diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp
index 914b9ae76f4..490164ee87b 100644
--- a/src/mongo/db/query/query_planner_columnar_test.cpp
+++ b/src/mongo/db/query/query_planner_columnar_test.cpp
@@ -280,6 +280,110 @@ TEST_F(QueryPlannerColumnarTest,
assertSolutionExists(R"({proj: {spec: {a: 1, b: 1, c: 1}, node: {cscan: {dir: 1}}}})");
}
+// Tests that a query which depends on overlapping parent/child fields like 'a.b' and 'a' will not
+// use the column store index.
+TEST_F(QueryPlannerColumnarTest, QueryWithOverlappingDependenciesDoesNotUseColumnarIndex) {
+ addColumnStoreIndexAndEnableFilterSplitting();
+
+ runQuerySortProj(BSONObj(), BSON("a.b" << 1 << "a.c" << 1), BSON("a" << 1));
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({
+ sort: {
+ pattern: {"a.b": 1, "a.c": 1},
+ limit: 0,
+ node: {
+ proj: {
+ spec: {a: 1},
+ node: {
+ cscan: {dir: 1}
+ }
+ }
+ }
+ }
+ })");
+}
+
+TEST_F(QueryPlannerColumnarTest, QueryWithConflictingAncestralDependenciesDoesNotUseColumnarIndex) {
+ addColumnStoreIndexAndEnableFilterSplitting();
+
+ runQuerySortProj(BSONObj(), BSON("a.b.c" << 1), BSON("a" << 1));
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({
+ sort: {
+ pattern: {"a.b.c": 1},
+ limit: 0,
+ node: {
+ proj: {
+ spec: {a: 1},
+ node: {
+ cscan: {dir: 1}
+ }
+ }
+ }
+ }
+ })");
+}
+
+// Test like those above, but proving that we do the prefix detection correctly and don't mistake
+// regular (non-path) prefixes.
+TEST_F(QueryPlannerColumnarTest, QueryWithSimilarDependenciesDoesUseColumnarIndex) {
+ addColumnStoreIndexAndEnableFilterSplitting();
+
+ runQuerySortProj(BSONObj(), BSON("abc" << 1), BSON("a" << 1));
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({
+ proj: {
+ spec: {a: 1, _id: 1},
+ node: {
+ sort: {
+ pattern: {"abc": 1},
+ limit: 0,
+ node: {
+ column_scan: {
+ filtersByPath: {},
+ outputFields: ['_id', 'a', 'abc'],
+ matchFields: []
+ }
+ }
+ }
+ }
+ }
+ })");
+}
+
+// Test that adding a hint will allow you to use the column store index for a query with overlapping
+// parent/child dependencies.
+TEST_F(QueryPlannerColumnarTest, HintOverridesOverlappingFieldsCheck) {
+ addColumnStoreIndexAndEnableFilterSplitting();
+
+ runQuerySortProjSkipLimitHint(BSONObj(),
+ BSON("a.b.c" << 1),
+ BSON("a" << 1),
+ 0,
+ 0,
+ BSON("$**"
+ << "columnstore"));
+ assertNumSolutions(1U);
+ assertSolutionExists(R"({
+ sort: {
+ pattern: {"a.b.c": 1},
+ limit: 0,
+ node: {
+ proj: {
+ spec: {a: 1, _id: 1},
+ node: {
+ column_scan: {
+ filtersByPath: {},
+ outputFields: ['_id', 'a', 'a.b.c'],
+ matchFields: []
+ }
+ }
+ }
+ }
+ }
+ })");
+}
+
TEST_F(QueryPlannerColumnarTest, HintOverridesFieldLimitUnfiltered) {
addColumnStoreIndexAndEnableFilterSplitting();
internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2);
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index f1f2295a727..32653070dea 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -1100,17 +1100,15 @@ bool IndexScanNode::operator==(const IndexScanNode& other) const {
ColumnIndexScanNode::ColumnIndexScanNode(ColumnIndexEntry indexEntry,
OrderedPathSet outputFieldsIn,
OrderedPathSet matchFieldsIn,
+ OrderedPathSet allFieldsIn,
StringMap<std::unique_ptr<MatchExpression>> filtersByPath,
std::unique_ptr<MatchExpression> postAssemblyFilter)
: indexEntry(std::move(indexEntry)),
outputFields(std::move(outputFieldsIn)),
matchFields(std::move(matchFieldsIn)),
- allFields(set_util::setUnion(outputFields, matchFields)),
+ allFields(std::move(allFieldsIn)),
filtersByPath(std::move(filtersByPath)),
- postAssemblyFilter(std::move(postAssemblyFilter)) {
- allFields = outputFields;
- allFields.insert(matchFields.begin(), matchFields.end());
-}
+ postAssemblyFilter(std::move(postAssemblyFilter)) {}
void ColumnIndexScanNode::appendToString(str::stream* ss, int indent) const {
addIndent(ss, indent);
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index dc84b041fb1..56a6eb92bf4 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -513,6 +513,7 @@ struct ColumnIndexScanNode : public QuerySolutionNode {
ColumnIndexScanNode(ColumnIndexEntry,
OrderedPathSet outputFields,
OrderedPathSet matchFields,
+ OrderedPathSet allFields,
StringMap<std::unique_ptr<MatchExpression>> filtersByPath,
std::unique_ptr<MatchExpression> postAssemblyFilter);
@@ -545,6 +546,7 @@ struct ColumnIndexScanNode : public QuerySolutionNode {
return std::make_unique<ColumnIndexScanNode>(indexEntry,
outputFields,
matchFields,
+ allFields,
std::move(clonedFiltersByPath),
postAssemblyFilter->shallowClone());
}
diff --git a/src/mongo/db/query/util/set_util.h b/src/mongo/db/query/util/set_util.h
index ab513eead71..a68f9a581f1 100644
--- a/src/mongo/db/query/util/set_util.h
+++ b/src/mongo/db/query/util/set_util.h
@@ -27,10 +27,12 @@
* it in the license file.
*/
+#pragma once
+
#include <algorithm>
#include <set>
-#pragma once
+#include "mongo/bson/util/builder_fwd.h"
namespace mongo::set_util {
@@ -43,4 +45,17 @@ std::set<T, Comparator> setUnion(const std::set<T, Comparator>& set1,
return out;
}
+template <typename T, typename Comparator>
+std::string setToString(const std::set<T, Comparator>& set) {
+ StackStringBuilder sb;
+ sb << "{";
+ for (auto it = set.begin(); it != set.end(); ++it) {
+ sb << *it;
+ if (std::next(it) != set.end())
+ sb << ",";
+ }
+ sb << "}";
+ return sb.str();
+}
+
} // namespace mongo::set_util