diff options
author | Ian Boros <puppyofkosh@gmail.com> | 2019-04-23 17:18:56 -0400 |
---|---|---|
committer | Ian Boros <puppyofkosh@gmail.com> | 2019-05-03 15:00:35 -0400 |
commit | b1a9c9adea89b475fb05660e2a1cad00971e6899 (patch) | |
tree | 770badc8cbb4d1c18f7f849600d5c9496edc9841 /src/mongo | |
parent | 6ef5b5618f934c4f94fe293ad9701fd98620f133 (diff) | |
download | mongo-b1a9c9adea89b475fb05660e2a1cad00971e6899.tar.gz |
SERVER-40465 $group will not distinct scan multikey index
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 51 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.h | 19 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor_test.cpp | 28 |
4 files changed, 105 insertions, 0 deletions
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index fa47f804077..6a1e3c54946 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -220,6 +220,13 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe // possible, we return nullptr, and the caller is responsible for trying again without // passing a 'groupIdForDistinctScan' value. ParsedDistinct parsedDistinct(std::move(cq.getValue()), *groupIdForDistinctScan); + + // Note that we request a "strict" distinct plan because: + // 1) We do not want to have to de-duplicate the results of the plan. + // + // 2) We not want a plan that will return separate values for each array element. For + // example, if we have a document {a: [1,2]} and group by "a" a DISTINCT_SCAN on an "a" + // index would produce one result for '1' and another for '2', which would be incorrect. auto distinctExecutor = getExecutorDistinct(opCtx, collection, diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 0a8a6cc6080..d7b93fd6ad1 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -124,6 +124,35 @@ namespace wcp = ::mongo::wildcard_planning; bool turnIxscanIntoCount(QuerySolution* soln); } // namespace +bool isAnyComponentOfPathMultikey(const BSONObj& indexKeyPattern, + bool isMultikey, + const MultikeyPaths& indexMultikeyInfo, + StringData path) { + if (!isMultikey) { + return false; + } + + size_t keyPatternFieldIndex = 0; + bool found = false; + if (indexMultikeyInfo.empty()) { + // There is no path-level multikey information available, so we must assume 'path' is + // multikey. + return true; + } + + for (auto&& elt : indexKeyPattern) { + if (elt.fieldNameStringData() == path) { + found = true; + break; + } + keyPatternFieldIndex++; + } + invariant(found); + + invariant(indexMultikeyInfo.size() > keyPatternFieldIndex); + return !indexMultikeyInfo[keyPatternFieldIndex].empty(); +} + IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx, const IndexCatalogEntry& ice, const CanonicalQuery* canonicalQuery) { @@ -1455,6 +1484,9 @@ QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx, QueryPlannerParams plannerParams; plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN | plannerOptions; + // If the caller did not request a "strict" distinct scan then we may choose a plan which + // unwinds arrays and treats each element in an array as its own key. + const bool mayUnwindArrays = !(plannerOptions & QueryPlannerParams::STRICT_DISTINCT_ONLY); std::unique_ptr<IndexCatalog::IndexIterator> ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false); auto query = parsedDistinct.getQuery()->getQueryRequest().getFilter(); @@ -1462,6 +1494,17 @@ QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx, const IndexCatalogEntry* ice = ii->next(); const IndexDescriptor* desc = ice->descriptor(); if (desc->keyPattern().hasField(parsedDistinct.getKey())) { + if (!mayUnwindArrays && isAnyComponentOfPathMultikey(desc->keyPattern(), + desc->isMultikey(opCtx), + desc->getMultikeyPaths(opCtx), + parsedDistinct.getKey())) { + // If the caller requested "strict" distinct that does not "pre-unwind" arrays, + // then an index which is multikey on the distinct field may not be used. This is + // because when indexing an array each element gets inserted individually. Any plan + // which involves scanning the index will have effectively "unwound" all arrays. + continue; + } + plannerParams.indices.push_back( indexEntryFromIndexCatalogEntry(opCtx, *ice, parsedDistinct.getQuery())); } else if (desc->getIndexType() == IndexType::INDEX_WILDCARD && !query.isEmpty()) { @@ -1472,6 +1515,14 @@ QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx, plannerParams.indices.push_back( indexEntryFromIndexCatalogEntry(opCtx, *ice, parsedDistinct.getQuery())); } + + // It is not necessary to do any checks about 'mayUnwindArrays' in this case, because: + // 1) If there is no predicate on the distinct(), a wildcard indices may not be used. + // 2) distinct() _with_ a predicate may not be answered with a DISTINCT_SCAN on _any_ + // multikey index. + + // So, we will not distinct scan a wildcard index that's multikey on the distinct() + // field, regardless of the value of 'mayUnwindArrays'. } } diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h index 1f5115ce3be..f81457d2faf 100644 --- a/src/mongo/db/query/get_executor.h +++ b/src/mongo/db/query/get_executor.h @@ -67,6 +67,17 @@ void fillOutPlannerParams(OperationContext* opCtx, QueryPlannerParams* plannerParams); /** + * Return whether or not any component of the path 'path' is multikey given an index key pattern + * and multikeypaths. If no multikey metdata is available for the index, and the index is marked + * multikey, conservatively assumes that a component of 'path' _is_ multikey. The 'isMultikey' + * property of an index is false for indexes that definitely have no multikey paths. + */ +bool isAnyComponentOfPathMultikey(const BSONObj& indexKeyPattern, + bool isMultikey, + const MultikeyPaths& indexMultikeyInfo, + StringData path); + +/** * Converts the catalog metadata for an index into an IndexEntry, which is a format that is meant to * be consumed by the query planner. This function can perform index reads and should not be called * unless access to the storage engine is permitted. @@ -167,6 +178,14 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, * DISTINCT_SCAN to filter some but not all duplicates (so that de-duplication is still necessary * after query execution), or it may fall back to a regular IXSCAN. * + * Providing QueryPlannerParams::STRICT_DISTINCT_ONLY also implies that the resulting plan may not + * "unwind" arrays. That is, it will not return separate values for each element in an array. For + * example, in a collection with documents {a: [10, 11]}, {a: 12}, a distinct command on field 'a' + * can process the "unwound" values 10, 11, and 12, but a $group by 'a' needs to see documents for + * the original [10, 11] and 12 values. In the latter case (in which the caller provides a + * STRICT_DISTINCT_ONLY), a DISTINCT_SCAN is not possible, and the caller would have to fall back + * to a different plan. + * * Note that this function uses the projection in 'parsedDistinct' to produce a covered query when * possible, but when a covered query is not possible, the resulting plan may elide the projection * stage (instead returning entire fetched documents). diff --git a/src/mongo/db/query/get_executor_test.cpp b/src/mongo/db/query/get_executor_test.cpp index 50b50275a4c..16cdf77016a 100644 --- a/src/mongo/db/query/get_executor_test.cpp +++ b/src/mongo/db/query/get_executor_test.cpp @@ -263,4 +263,32 @@ TEST(GetExecutorTest, GetAllowedPathSpecifiedWildcardIndicesByName) { {"a.$**_1"}); } +TEST(GetExecutorTest, isComponentOfPathMultikeyNoMetadata) { + BSONObj indexKey = BSON("a" << 1 << "b.c" << -1); + MultikeyPaths multikeyInfo = {}; + + ASSERT_TRUE(isAnyComponentOfPathMultikey(indexKey, true, multikeyInfo, "a")); + ASSERT_TRUE(isAnyComponentOfPathMultikey(indexKey, true, multikeyInfo, "b.c")); + + ASSERT_FALSE(isAnyComponentOfPathMultikey(indexKey, false, multikeyInfo, "a")); + ASSERT_FALSE(isAnyComponentOfPathMultikey(indexKey, false, multikeyInfo, "b.c")); +} + +TEST(GetExecutorTest, isComponentOfPathMultikeyWithMetadata) { + BSONObj indexKey = BSON("a" << 1 << "b.c" << -1); + MultikeyPaths multikeyInfo = {{}, {1}}; + + ASSERT_FALSE(isAnyComponentOfPathMultikey(indexKey, true, multikeyInfo, "a")); + ASSERT_TRUE(isAnyComponentOfPathMultikey(indexKey, true, multikeyInfo, "b.c")); +} + +TEST(GetExecutorTest, isComponentOfPathMultikeyWithEmptyMetadata) { + BSONObj indexKey = BSON("a" << 1 << "b.c" << -1); + + + MultikeyPaths multikeyInfoAllPathsScalar = {{}, {}}; + ASSERT_FALSE(isAnyComponentOfPathMultikey(indexKey, false, multikeyInfoAllPathsScalar, "a")); + ASSERT_FALSE(isAnyComponentOfPathMultikey(indexKey, false, multikeyInfoAllPathsScalar, "b.c")); +} + } // namespace |