summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorIan Boros <puppyofkosh@gmail.com>2019-04-23 17:18:56 -0400
committerIan Boros <puppyofkosh@gmail.com>2019-05-03 15:00:35 -0400
commitb1a9c9adea89b475fb05660e2a1cad00971e6899 (patch)
tree770badc8cbb4d1c18f7f849600d5c9496edc9841 /src/mongo
parent6ef5b5618f934c4f94fe293ad9701fd98620f133 (diff)
downloadmongo-b1a9c9adea89b475fb05660e2a1cad00971e6899.tar.gz
SERVER-40465 $group will not distinct scan multikey index
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/pipeline/pipeline_d.cpp7
-rw-r--r--src/mongo/db/query/get_executor.cpp51
-rw-r--r--src/mongo/db/query/get_executor.h19
-rw-r--r--src/mongo/db/query/get_executor_test.cpp28
4 files changed, 105 insertions, 0 deletions
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index fa47f804077..6a1e3c54946 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -220,6 +220,13 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe
// possible, we return nullptr, and the caller is responsible for trying again without
// passing a 'groupIdForDistinctScan' value.
ParsedDistinct parsedDistinct(std::move(cq.getValue()), *groupIdForDistinctScan);
+
+ // Note that we request a "strict" distinct plan because:
+ // 1) We do not want to have to de-duplicate the results of the plan.
+ //
+ // 2) We not want a plan that will return separate values for each array element. For
+ // example, if we have a document {a: [1,2]} and group by "a" a DISTINCT_SCAN on an "a"
+ // index would produce one result for '1' and another for '2', which would be incorrect.
auto distinctExecutor =
getExecutorDistinct(opCtx,
collection,
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 0a8a6cc6080..d7b93fd6ad1 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -124,6 +124,35 @@ namespace wcp = ::mongo::wildcard_planning;
bool turnIxscanIntoCount(QuerySolution* soln);
} // namespace
+bool isAnyComponentOfPathMultikey(const BSONObj& indexKeyPattern,
+ bool isMultikey,
+ const MultikeyPaths& indexMultikeyInfo,
+ StringData path) {
+ if (!isMultikey) {
+ return false;
+ }
+
+ size_t keyPatternFieldIndex = 0;
+ bool found = false;
+ if (indexMultikeyInfo.empty()) {
+ // There is no path-level multikey information available, so we must assume 'path' is
+ // multikey.
+ return true;
+ }
+
+ for (auto&& elt : indexKeyPattern) {
+ if (elt.fieldNameStringData() == path) {
+ found = true;
+ break;
+ }
+ keyPatternFieldIndex++;
+ }
+ invariant(found);
+
+ invariant(indexMultikeyInfo.size() > keyPatternFieldIndex);
+ return !indexMultikeyInfo[keyPatternFieldIndex].empty();
+}
+
IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx,
const IndexCatalogEntry& ice,
const CanonicalQuery* canonicalQuery) {
@@ -1455,6 +1484,9 @@ QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx,
QueryPlannerParams plannerParams;
plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN | plannerOptions;
+ // If the caller did not request a "strict" distinct scan then we may choose a plan which
+ // unwinds arrays and treats each element in an array as its own key.
+ const bool mayUnwindArrays = !(plannerOptions & QueryPlannerParams::STRICT_DISTINCT_ONLY);
std::unique_ptr<IndexCatalog::IndexIterator> ii =
collection->getIndexCatalog()->getIndexIterator(opCtx, false);
auto query = parsedDistinct.getQuery()->getQueryRequest().getFilter();
@@ -1462,6 +1494,17 @@ QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx,
const IndexCatalogEntry* ice = ii->next();
const IndexDescriptor* desc = ice->descriptor();
if (desc->keyPattern().hasField(parsedDistinct.getKey())) {
+ if (!mayUnwindArrays && isAnyComponentOfPathMultikey(desc->keyPattern(),
+ desc->isMultikey(opCtx),
+ desc->getMultikeyPaths(opCtx),
+ parsedDistinct.getKey())) {
+ // If the caller requested "strict" distinct that does not "pre-unwind" arrays,
+ // then an index which is multikey on the distinct field may not be used. This is
+ // because when indexing an array each element gets inserted individually. Any plan
+ // which involves scanning the index will have effectively "unwound" all arrays.
+ continue;
+ }
+
plannerParams.indices.push_back(
indexEntryFromIndexCatalogEntry(opCtx, *ice, parsedDistinct.getQuery()));
} else if (desc->getIndexType() == IndexType::INDEX_WILDCARD && !query.isEmpty()) {
@@ -1472,6 +1515,14 @@ QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx,
plannerParams.indices.push_back(
indexEntryFromIndexCatalogEntry(opCtx, *ice, parsedDistinct.getQuery()));
}
+
+ // It is not necessary to do any checks about 'mayUnwindArrays' in this case, because:
+ // 1) If there is no predicate on the distinct(), a wildcard indices may not be used.
+ // 2) distinct() _with_ a predicate may not be answered with a DISTINCT_SCAN on _any_
+ // multikey index.
+
+ // So, we will not distinct scan a wildcard index that's multikey on the distinct()
+ // field, regardless of the value of 'mayUnwindArrays'.
}
}
diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h
index 1f5115ce3be..f81457d2faf 100644
--- a/src/mongo/db/query/get_executor.h
+++ b/src/mongo/db/query/get_executor.h
@@ -67,6 +67,17 @@ void fillOutPlannerParams(OperationContext* opCtx,
QueryPlannerParams* plannerParams);
/**
+ * Return whether or not any component of the path 'path' is multikey given an index key pattern
+ * and multikeypaths. If no multikey metdata is available for the index, and the index is marked
+ * multikey, conservatively assumes that a component of 'path' _is_ multikey. The 'isMultikey'
+ * property of an index is false for indexes that definitely have no multikey paths.
+ */
+bool isAnyComponentOfPathMultikey(const BSONObj& indexKeyPattern,
+ bool isMultikey,
+ const MultikeyPaths& indexMultikeyInfo,
+ StringData path);
+
+/**
* Converts the catalog metadata for an index into an IndexEntry, which is a format that is meant to
* be consumed by the query planner. This function can perform index reads and should not be called
* unless access to the storage engine is permitted.
@@ -167,6 +178,14 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln,
* DISTINCT_SCAN to filter some but not all duplicates (so that de-duplication is still necessary
* after query execution), or it may fall back to a regular IXSCAN.
*
+ * Providing QueryPlannerParams::STRICT_DISTINCT_ONLY also implies that the resulting plan may not
+ * "unwind" arrays. That is, it will not return separate values for each element in an array. For
+ * example, in a collection with documents {a: [10, 11]}, {a: 12}, a distinct command on field 'a'
+ * can process the "unwound" values 10, 11, and 12, but a $group by 'a' needs to see documents for
+ * the original [10, 11] and 12 values. In the latter case (in which the caller provides a
+ * STRICT_DISTINCT_ONLY), a DISTINCT_SCAN is not possible, and the caller would have to fall back
+ * to a different plan.
+ *
* Note that this function uses the projection in 'parsedDistinct' to produce a covered query when
* possible, but when a covered query is not possible, the resulting plan may elide the projection
* stage (instead returning entire fetched documents).
diff --git a/src/mongo/db/query/get_executor_test.cpp b/src/mongo/db/query/get_executor_test.cpp
index 50b50275a4c..16cdf77016a 100644
--- a/src/mongo/db/query/get_executor_test.cpp
+++ b/src/mongo/db/query/get_executor_test.cpp
@@ -263,4 +263,32 @@ TEST(GetExecutorTest, GetAllowedPathSpecifiedWildcardIndicesByName) {
{"a.$**_1"});
}
+TEST(GetExecutorTest, isComponentOfPathMultikeyNoMetadata) {
+ BSONObj indexKey = BSON("a" << 1 << "b.c" << -1);
+ MultikeyPaths multikeyInfo = {};
+
+ ASSERT_TRUE(isAnyComponentOfPathMultikey(indexKey, true, multikeyInfo, "a"));
+ ASSERT_TRUE(isAnyComponentOfPathMultikey(indexKey, true, multikeyInfo, "b.c"));
+
+ ASSERT_FALSE(isAnyComponentOfPathMultikey(indexKey, false, multikeyInfo, "a"));
+ ASSERT_FALSE(isAnyComponentOfPathMultikey(indexKey, false, multikeyInfo, "b.c"));
+}
+
+TEST(GetExecutorTest, isComponentOfPathMultikeyWithMetadata) {
+ BSONObj indexKey = BSON("a" << 1 << "b.c" << -1);
+ MultikeyPaths multikeyInfo = {{}, {1}};
+
+ ASSERT_FALSE(isAnyComponentOfPathMultikey(indexKey, true, multikeyInfo, "a"));
+ ASSERT_TRUE(isAnyComponentOfPathMultikey(indexKey, true, multikeyInfo, "b.c"));
+}
+
+TEST(GetExecutorTest, isComponentOfPathMultikeyWithEmptyMetadata) {
+ BSONObj indexKey = BSON("a" << 1 << "b.c" << -1);
+
+
+ MultikeyPaths multikeyInfoAllPathsScalar = {{}, {}};
+ ASSERT_FALSE(isAnyComponentOfPathMultikey(indexKey, false, multikeyInfoAllPathsScalar, "a"));
+ ASSERT_FALSE(isAnyComponentOfPathMultikey(indexKey, false, multikeyInfoAllPathsScalar, "b.c"));
+}
+
} // namespace