diff options
author | Bernard Gorman <bernard.gorman@gmail.com> | 2018-07-10 15:48:30 +0100 |
---|---|---|
committer | Bernard Gorman <bernard.gorman@gmail.com> | 2018-08-09 23:02:51 +0100 |
commit | fa8f737443dbcd604071baee6e7daa148d92ce68 (patch) | |
tree | 364914a4fac03b464bc3cca272c7b6914553892e | |
parent | 55ff5175dfde9be093f69e792bac8408639c4653 (diff) | |
download | mongo-fa8f737443dbcd604071baee6e7daa148d92ce68.tar.gz |
SERVER-35860 Maintain multikey metadata keys for allPaths indexes
37 files changed, 1207 insertions, 198 deletions
diff --git a/src/mongo/db/catalog/collection_info_cache_impl.cpp b/src/mongo/db/catalog/collection_info_cache_impl.cpp index d23da8c5a81..9eacc4887fc 100644 --- a/src/mongo/db/catalog/collection_info_cache_impl.cpp +++ b/src/mongo/db/catalog/collection_info_cache_impl.cpp @@ -37,6 +37,7 @@ #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/fts/fts_spec.h" +#include "mongo/db/index/all_paths_key_generator.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index_legacy.h" #include "mongo/db/query/plan_cache.h" @@ -88,18 +89,22 @@ void CollectionInfoCacheImpl::computeIndexKeys(OperationContext* opCtx) { while (i.more()) { IndexDescriptor* descriptor = i.next(); - if (descriptor->getAccessMethodName() != IndexNames::TEXT) { - BSONObj key = descriptor->keyPattern(); - const BSONObj& infoObj = descriptor->infoObj(); - if (infoObj.hasField("expireAfterSeconds")) { - _hasTTLIndex = true; - } - BSONObjIterator j(key); - while (j.more()) { - BSONElement e = j.next(); - _indexedPaths.addPath(e.fieldName()); + if (descriptor->getAccessMethodName() == IndexNames::ALLPATHS) { + // Obtain the projection used by the $** index's key generator. + auto pathProj = AllPathsKeyGenerator::createProjectionExec( + descriptor->keyPattern(), descriptor->pathProjection()); + // If the projection is an exclusion, then we must check the new document's keys on all + // updates, since we do not exhaustively know the set of paths to be indexed. + if (pathProj->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection) { + _indexedPaths.allPathsIndexed(); + } else { + // If a subtree was specified in the keyPattern, or if an inclusion projection is + // present, then we need only index the path(s) preserved by the projection. + for (const auto& path : pathProj->getExhaustivePaths()) { + _indexedPaths.addPath(path); + } } - } else { + } else if (descriptor->getAccessMethodName() == IndexNames::TEXT) { fts::FTSSpec ftsSpec(descriptor->infoObj()); if (ftsSpec.wildcard()) { @@ -120,6 +125,17 @@ void CollectionInfoCacheImpl::computeIndexKeys(OperationContext* opCtx) { // language of a subdocument. Add the override field as a path component. _indexedPaths.addPathComponent(ftsSpec.languageOverrideField()); } + } else { + BSONObj key = descriptor->keyPattern(); + const BSONObj& infoObj = descriptor->infoObj(); + if (infoObj.hasField("expireAfterSeconds")) { + _hasTTLIndex = true; + } + BSONObjIterator j(key); + while (j.more()) { + BSONElement e = j.next(); + _indexedPaths.addPath(e.fieldName()); + } } // handle partial indexes diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp index 7caf6e1ed86..594b0265a43 100644 --- a/src/mongo/db/catalog/index_consistency.cpp +++ b/src/mongo/db/catalog/index_consistency.cpp @@ -298,7 +298,7 @@ ValidationStage IndexConsistency::getStage() const { void IndexConsistency::setLastProcessedRecordId(RecordId recordId) { stdx::lock_guard<stdx::mutex> lock(_classMutex); - if (!recordId.isNormal()) { + if (!recordId.isValid()) { _lastProcessedRecordId = boost::none; } else { _lastProcessedRecordId = recordId; diff --git a/src/mongo/db/catalog/index_create_impl.cpp b/src/mongo/db/catalog/index_create_impl.cpp index 24f123e6952..d65eed34084 100644 --- a/src/mongo/db/catalog/index_create_impl.cpp +++ b/src/mongo/db/catalog/index_create_impl.cpp @@ -486,7 +486,7 @@ Status MultiIndexBlockImpl::insert(const BSONObj& doc, const RecordId& loc) { int64_t unused; Status idxStatus(ErrorCodes::InternalError, ""); if (_indexes[i].bulk) { - idxStatus = _indexes[i].bulk->insert(_opCtx, doc, loc, _indexes[i].options, &unused); + idxStatus = _indexes[i].bulk->insert(_opCtx, doc, loc, _indexes[i].options); } else { idxStatus = _indexes[i].real->insert(_opCtx, doc, loc, _indexes[i].options, &unused); } diff --git a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp index d4619c81592..cf26dc31084 100644 --- a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp +++ b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp @@ -82,18 +82,19 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId, } BSONObjSet documentKeySet = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); - // There's no need to compute the prefixes of the indexed fields that cause the - // index to be multikey when validating the index keys. - MultikeyPaths* multikeyPaths = nullptr; + BSONObjSet multikeyMetadataKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + MultikeyPaths multikeyPaths; iam->getKeys(recordBson, IndexAccessMethod::GetKeysMode::kEnforceConstraints, &documentKeySet, - multikeyPaths); + &multikeyMetadataKeys, + &multikeyPaths); - if (!descriptor->isMultikey(_opCtx) && documentKeySet.size() > 1) { + if (!descriptor->isMultikey(_opCtx) && + iam->shouldMarkIndexAsMultikey(documentKeySet, multikeyMetadataKeys, multikeyPaths)) { std::string msg = str::stream() << "Index " << descriptor->indexName() - << " is not multi-key but has more than one" - << " key in document " << recordId; + << " is not multi-key, but a multikey path " + << " is present in document " << recordId; curRecordResults.errors.push_back(msg); curRecordResults.valid = false; } @@ -185,7 +186,7 @@ void RecordStoreValidateAdaptor::traverseRecordStore(RecordStore* recordStore, Status status = validate(record->id, record->data, &validatedSize); // Checks to ensure isInRecordIdOrder() is being used properly. - if (prevRecordId.isNormal()) { + if (prevRecordId.isValid()) { invariant(prevRecordId < record->id); } @@ -269,4 +270,4 @@ void RecordStoreValidateAdaptor::validateIndexKeyCount(IndexDescriptor* idx, results.warnings.push_back(warning); } } -} // namespace +} // namespace mongo diff --git a/src/mongo/db/exec/projection_exec_agg.cpp b/src/mongo/db/exec/projection_exec_agg.cpp index 65c780fa3b4..d841d5d93f0 100644 --- a/src/mongo/db/exec/projection_exec_agg.cpp +++ b/src/mongo/db/exec/projection_exec_agg.cpp @@ -73,6 +73,12 @@ public: expCtx, projSpec, idPolicy, recursionPolicy, ProjectionParseMode::kBanComputedFields); } + std::set<std::string> getExhaustivePaths() const { + DepsTracker depsTracker; + _projection->addDependencies(&depsTracker); + return depsTracker.fields; + } + ProjectionType getType() const { return (_projection->getType() == TransformerType::kInclusionProjection ? ProjectionType::kInclusionProjection @@ -136,4 +142,7 @@ stdx::unordered_set<std::string> ProjectionExecAgg::applyProjectionToFields( return _exec->applyProjectionToFields(fields); } +std::set<std::string> ProjectionExecAgg::getExhaustivePaths() const { + return _exec->getExhaustivePaths(); +} } // namespace mongo diff --git a/src/mongo/db/exec/projection_exec_agg.h b/src/mongo/db/exec/projection_exec_agg.h index f796a250d37..6989478b812 100644 --- a/src/mongo/db/exec/projection_exec_agg.h +++ b/src/mongo/db/exec/projection_exec_agg.h @@ -62,21 +62,24 @@ public: ~ProjectionExecAgg(); + BSONObj applyProjection(BSONObj inputDoc) const; + + stdx::unordered_set<std::string> applyProjectionToFields( + const stdx::unordered_set<std::string>& fields) const; + + /** + * Returns the exhaustive set of all paths that will be preserved by this projection, or an + * empty set if the exhaustive set cannot be determined. An inclusion will always produce an + * exhaustive set; an exclusion will always produce an empty set. + */ + std::set<std::string> getExhaustivePaths() const; + ProjectionType getType() const; BSONObj getProjectionSpec() const { return _projSpec; } - const BSONObj& getSpec() const { - return _projSpec; - } - - BSONObj applyProjection(BSONObj inputDoc) const; - - stdx::unordered_set<std::string> applyProjectionToFields( - const stdx::unordered_set<std::string>& fields) const; - private: /** * ProjectionExecAgg::ProjectionExecutor wraps all agg-specific calls, and is forward-declared diff --git a/src/mongo/db/exec/projection_exec_agg_test.cpp b/src/mongo/db/exec/projection_exec_agg_test.cpp index 8503e7a50f8..5105677f3f0 100644 --- a/src/mongo/db/exec/projection_exec_agg_test.cpp +++ b/src/mongo/db/exec/projection_exec_agg_test.cpp @@ -32,6 +32,7 @@ #include "mongo/bson/bsonmisc.h" #include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/json.h" #include "mongo/unittest/unittest.h" #include "mongo/util/assert_util.h" @@ -46,14 +47,20 @@ BSONObj wrapInLiteral(const T& arg) { return BSON("$literal" << arg); } -// Helper to simplify the creation of a ProjectionExecAgg which includes _id and recurses nested -// arrays by default. +// Helper to simplify the creation of a ProjectionExecAgg which includes _id and recurses arrays. std::unique_ptr<ProjectionExecAgg> makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion( BSONObj projSpec) { return ProjectionExecAgg::create( projSpec, DefaultIdPolicy::kIncludeId, ArrayRecursionPolicy::kRecurseNestedArrays); } +// Helper to simplify the creation of a ProjectionExecAgg which excludes _id and recurses arrays. +std::unique_ptr<ProjectionExecAgg> makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion( + BSONObj projSpec) { + return ProjectionExecAgg::create( + projSpec, DefaultIdPolicy::kExcludeId, ArrayRecursionPolicy::kRecurseNestedArrays); +} + // // Error cases. // @@ -174,5 +181,132 @@ TEST(ProjectionExecAggType, ShouldAcceptExclusionProjection) { ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection); } +// Misc tests. + +TEST(ProjectionExecAggTests, InclusionFieldPathsWithImplicitIdInclusion) { + auto parsedProject = makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion( + fromjson("{a: {b: {c: 1}}, d: 1}")); + ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kInclusionProjection); + + // Extract the exhaustive set of paths that will be preserved by the projection. + auto exhaustivePaths = parsedProject->getExhaustivePaths(); + std::set<std::string> expectedPaths{"_id", "a.b.c", "d"}; + + // Verify that the exhaustive set of paths is as expected. + ASSERT(exhaustivePaths == expectedPaths); +} + +TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdInclusion) { + auto parsedProject = makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion( + fromjson("{_id: 1, a: {b: {c: 1}}, d: 1}")); + ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kInclusionProjection); + + // Extract the exhaustive set of paths that will be preserved by the projection. + auto exhaustivePaths = parsedProject->getExhaustivePaths(); + std::set<std::string> expectedPaths{"_id", "a.b.c", "d"}; + + // Verify that the exhaustive set of paths is as expected. + ASSERT(exhaustivePaths == expectedPaths); +} + +TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdInclusionIdOnly) { + auto parsedProject = + makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion(fromjson("{_id: 1}")); + ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kInclusionProjection); + + // Extract the exhaustive set of paths that will be preserved by the projection. + auto exhaustivePaths = parsedProject->getExhaustivePaths(); + std::set<std::string> expectedPaths{"_id"}; + + // Verify that the exhaustive set of paths is as expected. + ASSERT(exhaustivePaths == expectedPaths); +} + +TEST(ProjectionExecAggTests, InclusionFieldPathsWithImplicitIdExclusion) { + auto parsedProject = makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion( + fromjson("{a: {b: {c: 1}}, d: 1}")); + ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kInclusionProjection); + + // Extract the exhaustive set of paths that will be preserved by the projection. + auto exhaustivePaths = parsedProject->getExhaustivePaths(); + std::set<std::string> expectedPaths{"a.b.c", "d"}; + + // Verify that the exhaustive set of paths is as expected. + ASSERT(exhaustivePaths == expectedPaths); +} + +TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdExclusion) { + auto parsedProject = makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion( + fromjson("{_id: 0, a: {b: {c: 1}}, d: 1}")); + ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kInclusionProjection); + + // Extract the exhaustive set of paths that will be preserved by the projection. + auto exhaustivePaths = parsedProject->getExhaustivePaths(); + std::set<std::string> expectedPaths{"a.b.c", "d"}; + + // Verify that the exhaustive set of paths is as expected. + ASSERT(exhaustivePaths == expectedPaths); +} + +TEST(ProjectionExecAggTests, ExclusionFieldPathsWithImplicitIdInclusion) { + auto parsedProject = makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion( + fromjson("{a: {b: {c: 0}}, d: 0}")); + ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection); + + // Extract the exhaustive set of paths that will be preserved by the projection. + auto exhaustivePaths = parsedProject->getExhaustivePaths(); + + // Verify that the exhaustive set is empty, despite the implicit inclusion of _id. + ASSERT(exhaustivePaths.empty()); +} + +TEST(ProjectionExecAggTests, ExclusionFieldPathsWithExplicitIdInclusion) { + auto parsedProject = makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion( + fromjson("{_id: 1, a: {b: {c: 0}}, d: 0}")); + ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection); + + // Extract the exhaustive set of paths that will be preserved by the projection. + auto exhaustivePaths = parsedProject->getExhaustivePaths(); + + // Verify that the exhaustive set is empty, despite the explicit inclusion of _id. + ASSERT(exhaustivePaths.empty()); +} + +TEST(ProjectionExecAggTests, ExclusionFieldPathsWithImplicitIdExclusion) { + auto parsedProject = makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion( + fromjson("{a: {b: {c: 0}}, d: 0}")); + ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection); + + // Extract the exhaustive set of paths that will be preserved by the projection. + auto exhaustivePaths = parsedProject->getExhaustivePaths(); + + // Verify that the exhaustive set is empty. + ASSERT(exhaustivePaths.empty()); +} + +TEST(ProjectionExecAggTests, ExclusionFieldPathsWithExplicitIdExclusion) { + auto parsedProject = makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion( + fromjson("{_id: 1, a: {b: {c: 0}}, d: 0}")); + ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection); + + // Extract the exhaustive set of paths that will be preserved by the projection. + auto exhaustivePaths = parsedProject->getExhaustivePaths(); + + // Verify that the exhaustive set is empty. + ASSERT(exhaustivePaths.empty()); +} + +TEST(ProjectionExecAggTests, ExclusionFieldPathsWithExplicitIdExclusionIdOnly) { + auto parsedProject = + makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion(fromjson("{_id: 0}")); + ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection); + + // Extract the exhaustive set of paths that will be preserved by the projection. + auto exhaustivePaths = parsedProject->getExhaustivePaths(); + + // Verify that the exhaustive set is empty. + ASSERT(exhaustivePaths.empty()); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp index 12a12fb81f5..cdd4b2227eb 100644 --- a/src/mongo/db/exec/working_set_common.cpp +++ b/src/mongo/db/exec/working_set_common.cpp @@ -36,7 +36,6 @@ #include "mongo/db/index/index_access_method.h" #include "mongo/db/query/canonical_query.h" #include "mongo/db/service_context.h" -#include "mongo/db/service_context.h" namespace mongo { @@ -117,10 +116,12 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx, BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); // There's no need to compute the prefixes of the indexed fields that cause the index to // be multikey when ensuring the keyData is still valid. + BSONObjSet* multikeyMetadataKeys = nullptr; MultikeyPaths* multikeyPaths = nullptr; member->keyData[i].index->getKeys(member->obj.value(), IndexAccessMethod::GetKeysMode::kEnforceConstraints, &keys, + multikeyMetadataKeys, multikeyPaths); if (!keys.count(member->keyData[i].keyData)) { // document would no longer be at this position in the index. diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp index 5920b46113a..f1db4b83969 100644 --- a/src/mongo/db/index/2d_access_method.cpp +++ b/src/mongo/db/index/2d_access_method.cpp @@ -50,6 +50,7 @@ TwoDAccessMethod::TwoDAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte /** Finds the key objects to put in an index */ void TwoDAccessMethod::doGetKeys(const BSONObj& obj, BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::get2DKeys(obj, _params, keys); } diff --git a/src/mongo/db/index/2d_access_method.h b/src/mongo/db/index/2d_access_method.h index 3ebea0bcf42..1a5451266b1 100644 --- a/src/mongo/db/index/2d_access_method.h +++ b/src/mongo/db/index/2d_access_method.h @@ -54,10 +54,13 @@ private: /** * Fills 'keys' with the keys that should be generated for 'obj' on this index. * - * This function ignores the 'multikeyPaths' pointer because 2d indexes don't support tracking - * path-level multikey information. + * This function ignores the 'multikeyPaths' and 'multikeyMetadataKeys' pointers because 2d + * indexes don't support tracking path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, + MultikeyPaths* multikeyPaths) const final; TwoDIndexingParams _params; }; diff --git a/src/mongo/db/index/all_paths_access_method.cpp b/src/mongo/db/index/all_paths_access_method.cpp index 46a3d432e4e..a6e95ce7d89 100644 --- a/src/mongo/db/index/all_paths_access_method.cpp +++ b/src/mongo/db/index/all_paths_access_method.cpp @@ -40,12 +40,16 @@ AllPathsAccessMethod::AllPathsAccessMethod(IndexCatalogEntry* allPathsState, _keyGen( _descriptor->keyPattern(), _descriptor->pathProjection(), _btreeState->getCollator()) {} +bool AllPathsAccessMethod::shouldMarkIndexAsMultikey(const BSONObjSet& keys, + const BSONObjSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths) const { + return !multikeyMetadataKeys.empty(); +} + void AllPathsAccessMethod::doGetKeys(const BSONObj& obj, BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths) const { - // TODO SERVER-35748: Until MultikeyPaths has been updated to facilitate 'allPaths' indexes, we - // use AllPathsKeyGenerator::MultikeyPathsMock to separate multikey paths from RecordId keys. - auto multikeyPathsMock = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); - _keyGen.generateKeys(obj, keys, &multikeyPathsMock); + _keyGen.generateKeys(obj, keys, multikeyMetadataKeys); } } // namespace mongo diff --git a/src/mongo/db/index/all_paths_access_method.h b/src/mongo/db/index/all_paths_access_method.h index 926d8ed1166..53637bf7ee5 100644 --- a/src/mongo/db/index/all_paths_access_method.h +++ b/src/mongo/db/index/all_paths_access_method.h @@ -42,8 +42,22 @@ class AllPathsAccessMethod : public IndexAccessMethod { public: AllPathsAccessMethod(IndexCatalogEntry* allPathsState, SortedDataInterface* btree); + /** + * Returns 'true' if the index should become multikey on the basis of the passed arguments. + * Because it is possible for a $** index to generate multiple keys per document without any of + * them lying along a multikey (i.e. array) path, this method will only return 'true' if one or + * more multikey metadata keys have been generated; that is, if the 'multikeyMetadataKeys' + * BSONObjSet is non-empty. + */ + bool shouldMarkIndexAsMultikey(const BSONObjSet& keys, + const BSONObjSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths) const final; + private: - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, + MultikeyPaths* multikeyPaths) const final; const AllPathsKeyGenerator _keyGen; }; diff --git a/src/mongo/db/index/all_paths_key_generator.cpp b/src/mongo/db/index/all_paths_key_generator.cpp index c21c01e9fa2..5d894ac40a8 100644 --- a/src/mongo/db/index/all_paths_key_generator.cpp +++ b/src/mongo/db/index/all_paths_key_generator.cpp @@ -62,7 +62,7 @@ void popPathComponent(BSONElement elem, bool enclosingObjIsArray, FieldRef* path constexpr StringData AllPathsKeyGenerator::kSubtreeSuffix; std::unique_ptr<ProjectionExecAgg> AllPathsKeyGenerator::createProjectionExec( - const BSONObj& keyPattern, const BSONObj& pathProjection) { + BSONObj keyPattern, BSONObj pathProjection) { // We should never have a key pattern that contains more than a single element. invariant(keyPattern.nFields() == 1); @@ -99,17 +99,16 @@ AllPathsKeyGenerator::AllPathsKeyGenerator(BSONObj keyPattern, void AllPathsKeyGenerator::generateKeys(BSONObj inputDoc, BSONObjSet* keys, - MultikeyPathsMock* multikeyPaths) const { - FieldRef workingPath; - _traverseAllPaths( - _projExec->applyProjection(inputDoc), false, &workingPath, keys, multikeyPaths); + BSONObjSet* multikeyPaths) const { + FieldRef rootPath; + _traverseAllPaths(_projExec->applyProjection(inputDoc), false, &rootPath, keys, multikeyPaths); } void AllPathsKeyGenerator::_traverseAllPaths(BSONObj obj, bool objIsArray, FieldRef* path, BSONObjSet* keys, - MultikeyPathsMock* multikeyPaths) const { + BSONObjSet* multikeyPaths) const { for (const auto elem : obj) { // If the element's fieldName contains a ".", fast-path skip it because it's not queryable. if (elem.fieldNameStringData().find('.', 0) != std::string::npos) @@ -167,10 +166,13 @@ void AllPathsKeyGenerator::_addKey(BSONElement elem, keys->insert(bob.obj()); } -void AllPathsKeyGenerator::_addMultiKey(const FieldRef& fullPath, - MultikeyPathsMock* multikeyPaths) const { - // Multikey paths are denoted by an entry of the form { "": 1, "": "path.to.array" }. - multikeyPaths->insert(BSON("" << 1 << "" << fullPath.dottedField())); +void AllPathsKeyGenerator::_addMultiKey(const FieldRef& fullPath, BSONObjSet* multikeyPaths) const { + // Multikey paths are denoted by a key of the form { "": 1, "": "path.to.array" }. The argument + // 'multikeyPaths' may be nullptr if the access method is being used in an operation which does + // not require multikey path generation. + if (multikeyPaths) { + multikeyPaths->insert(BSON("" << 1 << "" << fullPath.dottedField())); + } } } // namespace mongo diff --git a/src/mongo/db/index/all_paths_key_generator.h b/src/mongo/db/index/all_paths_key_generator.h index 9c5b7850fca..1eb0297ba8d 100644 --- a/src/mongo/db/index/all_paths_key_generator.h +++ b/src/mongo/db/index/all_paths_key_generator.h @@ -44,20 +44,12 @@ public: static constexpr StringData kSubtreeSuffix = ".$**"_sd; /** - * Returns an owned ProjectionExecAgg as defined by the 'keyPattern' and 'pathProjection' and - * created with the parameter necessary for allPaths key generation. - */ - static std::unique_ptr<ProjectionExecAgg> createProjectionExec(const BSONObj& keyPattern, - const BSONObj& pathProjection); - - /** - * TODO SERVER-35748: Currently, the MultikeyPaths structure used by IndexAccessMethod is not - * suitable for tracking multikey paths in AllPaths indexes. In order to keep multikey paths - * separate from RecordId keys, and to ensure that both this key generator and the - * AllPathsIndexAccessMethod can be trivially switched over to using the new MultikeyPaths - * tracker once it is implemented, we use a mock MultikeyPaths here. + * Returns an owned ProjectionExecAgg identical to the one that AllPathsKeyGenerator will use + * internally when generating the keys for the $** index, as defined by the 'keyPattern' and + * 'pathProjection' arguments. */ - using MultikeyPathsMock = BSONObjSet; + static std::unique_ptr<ProjectionExecAgg> createProjectionExec(BSONObj keyPattern, + BSONObj pathProjection); AllPathsKeyGenerator(BSONObj keyPattern, BSONObj pathProjection, @@ -71,7 +63,7 @@ public: * document, in the following format: * { '': 1, '': 'path.to.array' } */ - void generateKeys(BSONObj inputDoc, BSONObjSet* keys, MultikeyPathsMock* multikeyPaths) const; + void generateKeys(BSONObj inputDoc, BSONObjSet* keys, BSONObjSet* multikeyPaths) const; private: // Traverses every path of the post-projection document, adding keys to the set as it goes. @@ -79,10 +71,10 @@ private: bool objIsArray, FieldRef* path, BSONObjSet* keys, - MultikeyPathsMock* multikeyPaths) const; + BSONObjSet* multikeyPaths) const; // Helper functions to format the entry appropriately before adding it to the key/path tracker. - void _addMultiKey(const FieldRef& fullPath, MultikeyPathsMock* multikeyPaths) const; + void _addMultiKey(const FieldRef& fullPath, BSONObjSet* multikeyPaths) const; void _addKey(BSONElement elem, const FieldRef& fullPath, BSONObjSet* keys) const; // Helper to check whether the element is a nested array, and conditionally add it to 'keys'. diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp index ca6c7fb115c..ee599f5d23e 100644 --- a/src/mongo/db/index/btree_access_method.cpp +++ b/src/mongo/db/index/btree_access_method.cpp @@ -64,6 +64,7 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataIn void BtreeAccessMethod::doGetKeys(const BSONObj& obj, BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths) const { _keyGenerator->getKeys(obj, keys, multikeyPaths); } diff --git a/src/mongo/db/index/btree_access_method.h b/src/mongo/db/index/btree_access_method.h index caed0eccab5..11b811806a3 100644 --- a/src/mongo/db/index/btree_access_method.h +++ b/src/mongo/db/index/btree_access_method.h @@ -32,7 +32,6 @@ #include "mongo/base/status.h" #include "mongo/db/index/btree_key_generator.h" #include "mongo/db/index/index_access_method.h" -#include "mongo/db/index/index_access_method.h" #include "mongo/db/jsobj.h" namespace mongo { @@ -48,7 +47,10 @@ public: BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterface* btree); private: - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, + MultikeyPaths* multikeyPaths) const final; // Our keys differ for V0 and V1. std::unique_ptr<BtreeKeyGenerator> _keyGenerator; diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp index 9bb650f2eb6..203e555d52e 100644 --- a/src/mongo/db/index/fts_access_method.cpp +++ b/src/mongo/db/index/fts_access_method.cpp @@ -38,6 +38,7 @@ FTSAccessMethod::FTSAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterf void FTSAccessMethod::doGetKeys(const BSONObj& obj, BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getFTSKeys(obj, _ftsSpec, keys); } diff --git a/src/mongo/db/index/fts_access_method.h b/src/mongo/db/index/fts_access_method.h index 8f843e32bc6..d3148ae3d96 100644 --- a/src/mongo/db/index/fts_access_method.h +++ b/src/mongo/db/index/fts_access_method.h @@ -48,10 +48,13 @@ private: /** * Fills 'keys' with the keys that should be generated for 'obj' on this index. * - * This function ignores the 'multikeyPaths' pointer because text indexes don't support tracking - * path-level multikey information. + * This function ignores the 'multikeyPaths' and 'multikeyMetadataKeys' pointers because text + * indexes don't support tracking path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, + MultikeyPaths* multikeyPaths) const final; fts::FTSSpec _ftsSpec; }; diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp index 734b3812ebf..a2e25d71e7d 100644 --- a/src/mongo/db/index/hash_access_method.cpp +++ b/src/mongo/db/index/hash_access_method.cpp @@ -55,6 +55,7 @@ HashAccessMethod::HashAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte void HashAccessMethod::doGetKeys(const BSONObj& obj, BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getHashKeys( obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), _collator, keys); diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h index fbd35c812b2..99a82454a82 100644 --- a/src/mongo/db/index/hash_access_method.h +++ b/src/mongo/db/index/hash_access_method.h @@ -51,10 +51,13 @@ private: /** * Fills 'keys' with the keys that should be generated for 'obj' on this index. * - * This function ignores the 'multikeyPaths' pointer because hashed indexes don't support - * tracking path-level multikey information. + * This function ignores the 'multikeyPaths' and 'multikeyMetadataKeys' pointers because hashed + * indexes don't support tracking path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, + MultikeyPaths* multikeyPaths) const final; // Only one of our fields is hashed. This is the field name for it. std::string _hashedField; diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp index 843e59d09bb..28b7728c1c2 100644 --- a/src/mongo/db/index/haystack_access_method.cpp +++ b/src/mongo/db/index/haystack_access_method.cpp @@ -65,6 +65,7 @@ HaystackAccessMethod::HaystackAccessMethod(IndexCatalogEntry* btreeState, void HaystackAccessMethod::doGetKeys(const BSONObj& obj, BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys); } diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h index 4bf8fc41839..a6aef7a2df4 100644 --- a/src/mongo/db/index/haystack_access_method.h +++ b/src/mongo/db/index/haystack_access_method.h @@ -72,10 +72,13 @@ private: /** * Fills 'keys' with the keys that should be generated for 'obj' on this index. * - * This function ignores the 'multikeyPaths' pointer because geoHaystack indexes don't support - * tracking path-level multikey information. + * This function ignores the 'multikeyPaths' and 'multikeyMetadataKeys' pointers because + * geoHaystack indexes don't support tracking path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, + MultikeyPaths* multikeyPaths) const final; std::string _geoField; std::vector<std::string> _otherFields; diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index 4dab655cfb7..0dbe51ffbc9 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -66,6 +66,10 @@ using IndexVersion = IndexDescriptor::IndexVersion; namespace { +// Reserved RecordId against which multikey metadata keys are indexed. +static const RecordId kMultikeyMetadataKeyId = + RecordId{RecordId::ReservedId::kAllPathsMultikeyMetadataId}; + /** * Returns true if at least one prefix of any of the indexed fields causes the index to be * multikey, and returns false otherwise. This function returns false if the 'multikeyPaths' @@ -77,6 +81,10 @@ bool isMultikeyFromPaths(const MultikeyPaths& multikeyPaths) { [](const std::set<std::size_t>& components) { return !components.empty(); }); } +std::vector<BSONObj> asVector(const BSONObjSet& objSet) { + return {objSet.begin(), objSet.end()}; +} + } // namespace // TODO SERVER-36386: Remove the server parameter @@ -125,7 +133,23 @@ bool IndexAccessMethod::ignoreKeyTooLong(OperationContext* opCtx) { return shouldRelaxConstraints || !failIndexKeyTooLongParam(); } -// Find the keys for obj, put them in the tree pointing to loc +bool IndexAccessMethod::isFatalError(OperationContext* opCtx, Status status, BSONObj key) { + // If the status is Status::OK(), or if it is ErrorCodes::KeyTooLong and the user has chosen to + // ignore this error, return false immediately. + if (status.isOK() || (status == ErrorCodes::KeyTooLong && ignoreKeyTooLong(opCtx))) { + return false; + } + + // A document might be indexed multiple times during a background index build if it moves ahead + // of the cursor (e.g. via an update). We test this scenario and swallow the error accordingly. + if (status == ErrorCodes::DuplicateKeyValue && !_btreeState->isReady(opCtx)) { + LOG(3) << "key " << key << " already in index during background indexing (ok)"; + return false; + } + return true; +} + +// Find the keys for obj, put them in the tree pointing to loc. Status IndexAccessMethod::insert(OperationContext* opCtx, const BSONObj& obj, const RecordId& loc, @@ -133,52 +157,32 @@ Status IndexAccessMethod::insert(OperationContext* opCtx, int64_t* numInserted) { invariant(numInserted); *numInserted = 0; + BSONObjSet multikeyMetadataKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths multikeyPaths; // Delegate to the subclass. - getKeys(obj, options.getKeysMode, &keys, &multikeyPaths); - - Status ret = Status::OK(); - for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { - Status status = _newInterface->insert(opCtx, *i, loc, options.dupsAllowed); - - // Everything's OK, carry on. - if (status.isOK()) { - ++*numInserted; - IndexKeyEntry indexEntry = IndexKeyEntry(*i, loc); - continue; - } - - // Error cases. - - if (status.code() == ErrorCodes::KeyTooLong && ignoreKeyTooLong(opCtx)) { - IndexKeyEntry indexEntry = IndexKeyEntry(*i, loc); - continue; - } - - if (status.code() == ErrorCodes::DuplicateKeyValue) { - // A document might be indexed multiple times during a background index build - // if it moves ahead of the collection scan cursor (e.g. via an update). - if (!_btreeState->isReady(opCtx)) { - LOG(3) << "key " << *i << " already in index during background indexing (ok)"; - continue; + getKeys(obj, options.getKeysMode, &keys, &multikeyMetadataKeys, &multikeyPaths); + + // Add all new data keys, and all new multikey metadata keys, into the index. When iterating + // over the data keys, each of them should point to the doc's RecordId. When iterating over + // the multikey metadata keys, they should point to the reserved 'kMultikeyMetadataKeyId'. + for (const auto keySet : {&keys, &multikeyMetadataKeys}) { + const auto& recordId = (keySet == &keys ? loc : kMultikeyMetadataKeyId); + for (const auto& key : *keySet) { + Status status = _newInterface->insert(opCtx, key, recordId, options.dupsAllowed); + if (isFatalError(opCtx, status, key)) { + return status; } } - - // Clean up after ourselves. - for (BSONObjSet::const_iterator j = keys.begin(); j != i; ++j) { - removeOneKey(opCtx, *j, loc, options.dupsAllowed); - *numInserted = 0; - } - - return status; } - if (*numInserted > 1 || isMultikeyFromPaths(multikeyPaths)) { + *numInserted = keys.size() + multikeyMetadataKeys.size(); + + if (shouldMarkIndexAsMultikey(keys, multikeyMetadataKeys, multikeyPaths)) { _btreeState->setMultikey(opCtx, multikeyPaths); } - return ret; + return Status::OK(); } void IndexAccessMethod::removeOneKey(OperationContext* opCtx, @@ -188,7 +192,6 @@ void IndexAccessMethod::removeOneKey(OperationContext* opCtx, try { _newInterface->unindex(opCtx, key, loc, dupsAllowed); - IndexKeyEntry indexEntry = IndexKeyEntry(key, loc); } catch (AssertionException& e) { log() << "Assertion failure: _unindex failed " << _descriptor->indexNamespace(); log() << "Assertion failure: _unindex failed: " << redact(e) << " key:" << key.toString() @@ -214,17 +217,20 @@ Status IndexAccessMethod::remove(OperationContext* opCtx, // There's no need to compute the prefixes of the indexed fields that cause the index to be // multikey when removing a document since the index metadata isn't updated when keys are // deleted. + BSONObjSet* multikeyMetadataKeys = nullptr; MultikeyPaths* multikeyPaths = nullptr; // Relax key constraints on removal when deleting documents with invalid formats, but only // those that don't apply to the partialIndex filter. - getKeys(obj, GetKeysMode::kRelaxConstraintsUnfiltered, &keys, multikeyPaths); + getKeys( + obj, GetKeysMode::kRelaxConstraintsUnfiltered, &keys, multikeyMetadataKeys, multikeyPaths); - for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { - removeOneKey(opCtx, *i, loc, options.dupsAllowed); - ++*numDeleted; + for (const auto& key : keys) { + removeOneKey(opCtx, key, loc, options.dupsAllowed); } + *numDeleted = keys.size(); + return Status::OK(); } @@ -236,12 +242,13 @@ Status IndexAccessMethod::touch(OperationContext* opCtx, const BSONObj& obj) { BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); // There's no need to compute the prefixes of the indexed fields that cause the index to be // multikey when paging a document's index entries into memory. + BSONObjSet* multikeyMetadataKeys = nullptr; MultikeyPaths* multikeyPaths = nullptr; - getKeys(obj, GetKeysMode::kEnforceConstraints, &keys, multikeyPaths); + getKeys(obj, GetKeysMode::kEnforceConstraints, &keys, multikeyMetadataKeys, multikeyPaths); std::unique_ptr<SortedDataInterface::Cursor> cursor(_newInterface->newCursor(opCtx)); - for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { - cursor->seekExact(*i); + for (const auto& key : keys) { + cursor->seekExact(key); } return Status::OK(); @@ -258,8 +265,13 @@ RecordId IndexAccessMethod::findSingle(OperationContext* opCtx, const BSONObj& r if (_btreeState->getCollator()) { // For performance, call get keys only if there is a non-simple collation. BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + BSONObjSet* multikeyMetadataKeys = nullptr; MultikeyPaths* multikeyPaths = nullptr; - getKeys(requestedKey, GetKeysMode::kEnforceConstraints, &keys, multikeyPaths); + getKeys(requestedKey, + GetKeysMode::kEnforceConstraints, + &keys, + multikeyMetadataKeys, + multikeyPaths); invariant(keys.size() == 1); actualKey = *keys.begin(); } else { @@ -346,12 +358,17 @@ Status IndexAccessMethod::validateUpdate(OperationContext* opCtx, // There's no need to compute the prefixes of the indexed fields that possibly caused the // index to be multikey when the old version of the document was written since the index // metadata isn't updated when keys are deleted. + BSONObjSet* multikeyMetadataKeys = nullptr; MultikeyPaths* multikeyPaths = nullptr; - getKeys(from, options.getKeysMode, &ticket->oldKeys, multikeyPaths); + getKeys(from, options.getKeysMode, &ticket->oldKeys, multikeyMetadataKeys, multikeyPaths); } if (!indexFilter || indexFilter->matchesBSON(to)) { - getKeys(to, options.getKeysMode, &ticket->newKeys, &ticket->newMultikeyPaths); + getKeys(to, + options.getKeysMode, + &ticket->newKeys, + &ticket->newMultikeyMetadataKeys, + &ticket->newMultikeyPaths); } ticket->loc = record; @@ -368,6 +385,8 @@ Status IndexAccessMethod::update(OperationContext* opCtx, const UpdateTicket& ticket, int64_t* numInserted, int64_t* numDeleted) { + invariant(ticket.newKeys.size() == + ticket.oldKeys.size() + ticket.added.size() - ticket.removed.size()); invariant(numInserted); invariant(numDeleted); @@ -378,34 +397,31 @@ Status IndexAccessMethod::update(OperationContext* opCtx, return Status(ErrorCodes::InternalError, "Invalid UpdateTicket in update"); } - if (ticket.oldKeys.size() + ticket.added.size() - ticket.removed.size() > 1 || - isMultikeyFromPaths(ticket.newMultikeyPaths)) { - _btreeState->setMultikey(opCtx, ticket.newMultikeyPaths); - } - - for (size_t i = 0; i < ticket.removed.size(); ++i) { - _newInterface->unindex(opCtx, ticket.removed[i], ticket.loc, ticket.dupsAllowed); - IndexKeyEntry indexEntry = IndexKeyEntry(ticket.removed[i], ticket.loc); + for (const auto& remKey : ticket.removed) { + _newInterface->unindex(opCtx, remKey, ticket.loc, ticket.dupsAllowed); } - for (size_t i = 0; i < ticket.added.size(); ++i) { - Status status = - _newInterface->insert(opCtx, ticket.added[i], ticket.loc, ticket.dupsAllowed); - if (!status.isOK()) { - if (status.code() == ErrorCodes::KeyTooLong && ignoreKeyTooLong(opCtx)) { - // Ignore. - IndexKeyEntry indexEntry = IndexKeyEntry(ticket.added[i], ticket.loc); - continue; + // Add all new data keys, and all new multikey metadata keys, into the index. When iterating + // over the data keys, each of them should point to the doc's RecordId. When iterating over + // the multikey metadata keys, they should point to the reserved 'kMultikeyMetadataKeyId'. + const auto newMultikeyMetadataKeys = asVector(ticket.newMultikeyMetadataKeys); + for (const auto keySet : {&ticket.added, &newMultikeyMetadataKeys}) { + const auto& recordId = (keySet == &ticket.added ? ticket.loc : kMultikeyMetadataKeyId); + for (const auto& key : *keySet) { + Status status = _newInterface->insert(opCtx, key, recordId, ticket.dupsAllowed); + if (isFatalError(opCtx, status, key)) { + return status; } - - return status; } + } - IndexKeyEntry indexEntry = IndexKeyEntry(ticket.added[i], ticket.loc); + if (shouldMarkIndexAsMultikey( + ticket.newKeys, ticket.newMultikeyMetadataKeys, ticket.newMultikeyPaths)) { + _btreeState->setMultikey(opCtx, ticket.newMultikeyPaths); } - *numInserted = ticket.added.size(); *numDeleted = ticket.removed.size(); + *numInserted = ticket.added.size(); return Status::OK(); } @@ -433,14 +449,11 @@ IndexAccessMethod::BulkBuilder::BulkBuilder(const IndexAccessMethod* index, Status IndexAccessMethod::BulkBuilder::insert(OperationContext* opCtx, const BSONObj& obj, const RecordId& loc, - const InsertDeleteOptions& options, - int64_t* numInserted) { + const InsertDeleteOptions& options) { BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths multikeyPaths; - _real->getKeys(obj, options.getKeysMode, &keys, &multikeyPaths); - - _everGeneratedMultipleKeys = _everGeneratedMultipleKeys || (keys.size() > 1); + _real->getKeys(obj, options.getKeysMode, &keys, &_multikeyMetadataKeys, &multikeyPaths); if (!multikeyPaths.empty()) { if (_indexMultikeyPaths.empty()) { @@ -453,18 +466,24 @@ Status IndexAccessMethod::BulkBuilder::insert(OperationContext* opCtx, } } - for (BSONObjSet::iterator it = keys.begin(); it != keys.end(); ++it) { - _sorter->add(*it, loc); - _keysInserted++; + for (const auto& key : keys) { + _sorter->add(key, loc); + ++_keysInserted; } - if (NULL != numInserted) { - *numInserted += keys.size(); - } + _isMultiKey = + _isMultiKey || _real->shouldMarkIndexAsMultikey(keys, _multikeyMetadataKeys, multikeyPaths); return Status::OK(); } +IndexAccessMethod::BulkBuilder::Sorter::Iterator* IndexAccessMethod::BulkBuilder::done() { + for (const auto& key : _multikeyMetadataKeys) { + _sorter->add(key, kMultikeyMetadataKeyId); + ++_keysInserted; + } + return _sorter->done(); +} Status IndexAccessMethod::commitBulk(OperationContext* opCtx, BulkBuilder* bulk, @@ -473,7 +492,7 @@ Status IndexAccessMethod::commitBulk(OperationContext* opCtx, set<RecordId>* dupsToDrop) { Timer timer; - std::unique_ptr<BulkBuilder::Sorter::Iterator> it(bulk->_sorter->done()); + std::unique_ptr<BulkBuilder::Sorter::Iterator> it(bulk->done()); stdx::unique_lock<Client> lk(*opCtx->getClient()); ProgressMeterHolder pm( @@ -516,8 +535,7 @@ Status IndexAccessMethod::commitBulk(OperationContext* opCtx, return status; } - // If we're here either it's a dup and we're cool with it or the addKey went just - // fine. + // If we're here either it's a dup and we're cool with it or the addKey went just fine. pm.hit(); wunit.commit(); } @@ -543,6 +561,7 @@ void IndexAccessMethod::setIndexIsMultikey(OperationContext* opCtx, MultikeyPath void IndexAccessMethod::getKeys(const BSONObj& obj, GetKeysMode mode, BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths) const { static stdx::unordered_set<int> whiteList{ErrorCodes::CannotBuildIndexKeys, // Btree @@ -569,7 +588,7 @@ void IndexAccessMethod::getKeys(const BSONObj& obj, 13026, 13027}; try { - doGetKeys(obj, keys, multikeyPaths); + doGetKeys(obj, keys, multikeyMetadataKeys, multikeyPaths); } catch (const AssertionException& ex) { // Suppress all indexing errors when mode is kRelaxConstraints. if (mode == GetKeysMode::kEnforceConstraints) { @@ -598,8 +617,10 @@ void IndexAccessMethod::getKeys(const BSONObj& obj, } } -bool IndexAccessMethod::BulkBuilder::isMultikey() const { - return _everGeneratedMultipleKeys || isMultikeyFromPaths(_indexMultikeyPaths); +bool IndexAccessMethod::shouldMarkIndexAsMultikey(const BSONObjSet& keys, + const BSONObjSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths) const { + return (keys.size() > 1 || isMultikeyFromPaths(multikeyPaths)); } } // namespace mongo diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 9e1e3185247..07d8fa28c75 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -197,26 +197,33 @@ public: class BulkBuilder { public: + using Sorter = mongo::Sorter<BSONObj, RecordId>; + /** * Insert into the BulkBuilder as-if inserting into an IndexAccessMethod. */ Status insert(OperationContext* opCtx, const BSONObj& obj, const RecordId& loc, - const InsertDeleteOptions& options, - int64_t* numInserted); + const InsertDeleteOptions& options); const MultikeyPaths& getMultikeyPaths() const { return _indexMultikeyPaths; } - bool isMultikey() const; + bool isMultikey() const { + return _isMultiKey; + } + + /** + * Inserts all multikey metadata keys cached during the BulkBuilder's lifetime into the + * underlying Sorter, finalizes it, and returns an iterator over the sorted dataset. + */ + Sorter::Iterator* done(); private: friend class IndexAccessMethod; - using Sorter = mongo::Sorter<BSONObj, RecordId>; - BulkBuilder(const IndexAccessMethod* index, const IndexDescriptor* descriptor, size_t maxMemoryUsageBytes); @@ -225,13 +232,17 @@ public: const IndexAccessMethod* _real; int64_t _keysInserted = 0; - // Set to true if at least one document causes IndexAccessMethod::getKeys() to return a - // BSONObjSet with size strictly greater than one. - bool _everGeneratedMultipleKeys = false; + // Set to true if any document added to the BulkBuilder causes the index to become multikey. + bool _isMultiKey = false; // Holds the path components that cause this index to be multikey. The '_indexMultikeyPaths' // vector remains empty if this index doesn't support path-level multikey tracking. MultikeyPaths _indexMultikeyPaths; + + // Caches the set of all multikey metadata keys generated during the bulk build process. + // These are inserted into the sorter after all normal data keys have been added, just + // before the bulk build is committed. + BSONObjSet _multikeyMetadataKeys{SimpleBSONObjComparator::kInstance.makeBSONObjSet()}; }; /** @@ -285,13 +296,27 @@ public: * 'multikeyPaths' to have the same number of elements as the index key pattern and fills each * element with the prefixes of the indexed field that would cause this index to be multikey as * a result of inserting 'keys'. + * + * If the 'multikeyMetadataKeys' pointer is non-null, then the function will populate the + * BSONObjSet with any multikey metadata keys generated while processing the document. These + * keys are not associated with the document itself, but instead represent multi-key path + * information that must be stored in a reserved keyspace within the index. */ void getKeys(const BSONObj& obj, GetKeysMode mode, BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths) const; /** + * Given the set of keys, multikeyMetadataKeys and multikeyPaths generated by a particular + * document, return 'true' if the index should be marked as multikey and 'false' otherwise. + */ + virtual bool shouldMarkIndexAsMultikey(const BSONObjSet& keys, + const BSONObjSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths) const; + + /** * Splits the sets 'left' and 'right' into two vectors, the first containing the elements that * only appeared in 'left', and the second containing only elements that appeared in 'right'. * @@ -311,9 +336,15 @@ protected: * 'multikeyPaths' to have the same number of elements as the index key pattern and fills each * element with the prefixes of the indexed field that would cause this index to be multikey as * a result of inserting 'keys'. + * + * If the 'multikeyMetadataKeys' pointer is non-null, then the function will populate the + * BSONObjSet with any multikey metadata keys generated while processing the document. These + * keys are not associated with the document itself, but instead represent multi-key path + * information that must be stored in a reserved keyspace within the index. */ virtual void doGetKeys(const BSONObj& obj, BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths) const = 0; /** @@ -321,6 +352,14 @@ protected: */ bool ignoreKeyTooLong(OperationContext* opCtx); + /** + * Determine whether the given Status represents an exception that should cause the indexing + * process to abort. The 'key' argument is passed in to allow the offending entry to be logged + * in the event that a non-fatal 'ErrorCodes::DuplicateKeyValue' is encountered during a + * background index build. + */ + bool isFatalError(OperationContext* opCtx, Status status, BSONObj key); + IndexCatalogEntry* _btreeState; // owned by IndexCatalogEntry const IndexDescriptor* _descriptor; @@ -340,16 +379,20 @@ private: class UpdateTicket { public: UpdateTicket() - : oldKeys(SimpleBSONObjComparator::kInstance.makeBSONObjSet()), newKeys(oldKeys) {} + : oldKeys(SimpleBSONObjComparator::kInstance.makeBSONObjSet()), + newKeys(oldKeys), + newMultikeyMetadataKeys(newKeys) {} private: friend class IndexAccessMethod; - bool _isValid; + bool _isValid{false}; BSONObjSet oldKeys; BSONObjSet newKeys; + BSONObjSet newMultikeyMetadataKeys; + std::vector<BSONObj> removed; std::vector<BSONObj> added; diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp index 54f9c94f3a7..6e7c17569f9 100644 --- a/src/mongo/db/index/s2_access_method.cpp +++ b/src/mongo/db/index/s2_access_method.cpp @@ -142,6 +142,7 @@ StatusWith<BSONObj> S2AccessMethod::fixSpec(const BSONObj& specObj) { void S2AccessMethod::doGetKeys(const BSONObj& obj, BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys, multikeyPaths); } diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h index ad0044dc128..bc4456cabe8 100644 --- a/src/mongo/db/index/s2_access_method.h +++ b/src/mongo/db/index/s2_access_method.h @@ -63,7 +63,10 @@ private: * and fills each element with the prefixes of the indexed field that would cause this index to * be multikey as a result of inserting 'keys'. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + BSONObjSet* keys, + BSONObjSet* multikeyMetadataKeys, + MultikeyPaths* multikeyPaths) const final; S2IndexingParams _params; diff --git a/src/mongo/db/record_id.h b/src/mongo/db/record_id.h index e0dbf038573..1a903d1d39e 100644 --- a/src/mongo/db/record_id.h +++ b/src/mongo/db/record_id.h @@ -45,6 +45,17 @@ namespace mongo { */ class RecordId { public: + // This set of constants define the boundaries of the 'normal' and 'reserved' id ranges. + static constexpr int64_t kNullRepr = 0; + static constexpr int64_t kMinRepr = LLONG_MIN; + static constexpr int64_t kMaxRepr = LLONG_MAX; + static constexpr int64_t kMinReservedRepr = kMaxRepr - (1024 * 1024); + + /** + * Enumerates all ids in the reserved range that have been allocated for a specific purpose. + */ + enum class ReservedId : int64_t { kAllPathsMultikeyMetadataId = kMinReservedRepr }; + /** * Constructs a Null RecordId. */ @@ -52,6 +63,8 @@ public: explicit RecordId(int64_t repr) : _repr(repr) {} + explicit RecordId(ReservedId repr) : RecordId(static_cast<int64_t>(repr)) {} + /** * Construct a RecordId from two halves. * TODO consider removing. @@ -72,6 +85,13 @@ public: return RecordId(kMaxRepr); } + /** + * Returns the first record in the reserved id range at the top of the RecordId space. + */ + static RecordId minReserved() { + return RecordId(kMinReservedRepr); + } + bool isNull() const { return _repr == 0; } @@ -81,11 +101,27 @@ public: } /** - * Normal RecordIds are the only ones valid for representing Records. All RecordIds outside - * of this range are sentinel values. + * Valid RecordIds are the only ones which may be used to represent Records. The range of valid + * RecordIds includes both "normal" ids that refer to user data, and "reserved" ids that are + * used internally. All RecordIds outside of the valid range are sentinel values. + */ + bool isValid() const { + return isNormal() || isReserved(); + } + + /** + * Normal RecordIds are those which fall within the range used to represent normal user data, + * excluding the reserved range at the top of the RecordId space. */ bool isNormal() const { - return _repr > 0 && _repr < kMaxRepr; + return _repr > 0 && _repr < kMinReservedRepr; + } + + /** + * Returns true if this RecordId falls within the reserved range at the top of the record space. + */ + bool isReserved() const { + return _repr >= kMinReservedRepr && _repr < kMaxRepr; } int compare(RecordId rhs) const { @@ -121,10 +157,6 @@ public: } private: - static const int64_t kMaxRepr = LLONG_MAX; - static const int64_t kNullRepr = 0; - static const int64_t kMinRepr = LLONG_MIN; - int64_t _repr; }; diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp index 15598202a70..e0775d5c0aa 100644 --- a/src/mongo/db/repl/storage_interface_impl.cpp +++ b/src/mongo/db/repl/storage_interface_impl.cpp @@ -66,6 +66,7 @@ #include "mongo/db/operation_context.h" #include "mongo/db/ops/delete_request.h" #include "mongo/db/ops/parsed_update.h" +#include "mongo/db/ops/update_lifecycle_impl.h" #include "mongo/db/ops/update_request.h" #include "mongo/db/query/get_executor.h" #include "mongo/db/query/internal_plans.h" @@ -879,8 +880,10 @@ Status StorageInterfaceImpl::upsertById(OperationContext* opCtx, // We can create an UpdateRequest now that the collection's namespace has been resolved, in // the event it was specified as a UUID. UpdateRequest request(collection->ns()); + UpdateLifecycleImpl lifeCycle(collection->ns()); request.setQuery(query); request.setUpdates(update); + request.setLifecycle(&lifeCycle); request.setUpsert(true); invariant(!request.isMulti()); // This follows from using an exact _id query. invariant(!request.shouldReturnAnyDocs()); @@ -919,8 +922,10 @@ Status StorageInterfaceImpl::putSingleton(OperationContext* opCtx, const NamespaceString& nss, const TimestampedBSONObj& update) { UpdateRequest request(nss); + UpdateLifecycleImpl lifeCycle(nss); request.setQuery({}); request.setUpdates(update.obj); + request.setLifecycle(&lifeCycle); request.setUpsert(true); return _updateWithQuery(opCtx, request, update.timestamp); } @@ -930,8 +935,10 @@ Status StorageInterfaceImpl::updateSingleton(OperationContext* opCtx, const BSONObj& query, const TimestampedBSONObj& update) { UpdateRequest request(nss); + UpdateLifecycleImpl lifeCycle(nss); request.setQuery(query); request.setUpdates(update.obj); + request.setLifecycle(&lifeCycle); invariant(!request.isUpsert()); return _updateWithQuery(opCtx, request, update.timestamp); } diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp index 3d212b09900..75c30bc0f68 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp @@ -106,7 +106,7 @@ public: return Status(ErrorCodes::KeyTooLong, "key too big"); } - invariant(loc.isNormal()); + invariant(loc.isValid()); invariant(!hasFieldNames(key)); if (!_data->empty()) { @@ -150,7 +150,7 @@ public: const BSONObj& key, const RecordId& loc, bool dupsAllowed) { - invariant(loc.isNormal()); + invariant(loc.isValid()); invariant(!hasFieldNames(key)); if (key.objsize() >= TempKeyMaxSize) { @@ -176,7 +176,7 @@ public: const BSONObj& key, const RecordId& loc, bool dupsAllowed) { - invariant(loc.isNormal()); + invariant(loc.isValid()); invariant(!hasFieldNames(key)); IndexKeyEntry entry(key.getOwned(), loc); diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp index 1cdca41e6ed..f033ae3e576 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp @@ -631,7 +631,7 @@ int64_t EphemeralForTestRecordStore::storageSize(OperationContext* opCtx, RecordId EphemeralForTestRecordStore::allocateLoc() { RecordId out = RecordId(_data->nextId++); - invariant(out < RecordId::max()); + invariant(out.isNormal()); return out; } diff --git a/src/mongo/db/storage/key_string_test.cpp b/src/mongo/db/storage/key_string_test.cpp index 0d87d0dfea4..9e500879ba5 100644 --- a/src/mongo/db/storage/key_string_test.cpp +++ b/src/mongo/db/storage/key_string_test.cpp @@ -1123,7 +1123,7 @@ TEST_F(KeyStringTest, RecordIds) { ASSERT(reader.atEof()); } - if (rid.isNormal()) { + if (rid.isValid()) { ASSERT_GT(ks, KeyString(version, RecordId())); ASSERT_GT(ks, KeyString(version, RecordId::min())); ASSERT_LT(ks, KeyString(version, RecordId::max())); diff --git a/src/mongo/db/storage/mobile/mobile_index.cpp b/src/mongo/db/storage/mobile/mobile_index.cpp index bfd190cb4fd..bf61b8fe5cd 100644 --- a/src/mongo/db/storage/mobile/mobile_index.cpp +++ b/src/mongo/db/storage/mobile/mobile_index.cpp @@ -80,7 +80,7 @@ Status MobileIndex::insert(OperationContext* opCtx, const BSONObj& key, const RecordId& recId, bool dupsAllowed) { - invariant(recId.isNormal()); + invariant(recId.isValid()); invariant(!hasFieldNames(key)); Status status = _checkKeySize(key); @@ -133,7 +133,7 @@ void MobileIndex::unindex(OperationContext* opCtx, const BSONObj& key, const RecordId& recId, bool dupsAllowed) { - invariant(recId.isNormal()); + invariant(recId.isValid()); invariant(!hasFieldNames(key)); return _unindex(opCtx, key, recId, dupsAllowed); @@ -299,7 +299,7 @@ public: virtual ~BulkBuilderBase() {} Status addKey(const BSONObj& key, const RecordId& recId) override { - invariant(recId.isNormal()); + invariant(recId.isValid()); invariant(!hasFieldNames(key)); Status status = _checkKeySize(key); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp index a2d215087bc..ab871b59db3 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp @@ -318,7 +318,7 @@ Status WiredTigerIndex::insert(OperationContext* opCtx, const RecordId& id, bool dupsAllowed) { dassert(opCtx->lockState()->isWriteLocked()); - invariant(id.isNormal()); + invariant(id.isValid()); dassert(!hasFieldNames(key)); Status s = checkKeySize(key); @@ -337,7 +337,7 @@ void WiredTigerIndex::unindex(OperationContext* opCtx, const RecordId& id, bool dupsAllowed) { dassert(opCtx->lockState()->isWriteLocked()); - invariant(id.isNormal()); + invariant(id.isValid()); dassert(!hasFieldNames(key)); WiredTigerCursor curwrap(_uri, _tableId, false, opCtx); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index 72070d947c8..e5b1e785527 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -119,7 +119,7 @@ public: void commit(boost::optional<Timestamp>) final { invariant(_bytesInserted >= 0); - invariant(_highestInserted.isNormal()); + invariant(_highestInserted.isValid()); _oplogStones->_currentRecords.addAndFetch(_countInserted); int64_t newCurrentBytes = _oplogStones->_currentBytes.addAndFetch(_bytesInserted); @@ -225,7 +225,7 @@ void WiredTigerRecordStore::OplogStones::awaitHasExcessStonesOrDead() { : Timestamp::min(); auto stone = _stones.front(); - invariant(stone.lastRecord.isNormal()); + invariant(stone.lastRecord.isValid()); if (static_cast<std::uint64_t>(stone.lastRecord.repr()) < lastStableRecoveryTimestamp.asULL()) { break; @@ -1210,7 +1210,7 @@ void WiredTigerRecordStore::reclaimOplog(OperationContext* opCtx) { void WiredTigerRecordStore::reclaimOplog(OperationContext* opCtx, Timestamp recoveryTimestamp) { Timer timer; while (auto stone = _oplogStones->peekOldestStoneIfNeeded()) { - invariant(stone->lastRecord.isNormal()); + invariant(stone->lastRecord.isValid()); if (static_cast<std::uint64_t>(stone->lastRecord.repr()) >= recoveryTimestamp.asULL()) { // Do not truncate oplogs needed for replication recovery. @@ -1934,7 +1934,7 @@ boost::optional<Record> WiredTigerRecordStoreCursorBase::next() { } _skipNextAdvance = false; - if (!id.isNormal()) { + if (!id.isValid()) { id = getKey(c); } diff --git a/src/mongo/db/views/durable_view_catalog.cpp b/src/mongo/db/views/durable_view_catalog.cpp index b8691d5f15c..c02b091e88c 100644 --- a/src/mongo/db/views/durable_view_catalog.cpp +++ b/src/mongo/db/views/durable_view_catalog.cpp @@ -144,7 +144,7 @@ void DurableViewCatalogImpl::upsert(OperationContext* opCtx, RecordId id = Helpers::findOne(opCtx, systemViews, BSON("_id" << name.ns()), requireIndex); Snapshotted<BSONObj> oldView; - if (!id.isNormal() || !systemViews->findDoc(opCtx, id, &oldView)) { + if (!id.isValid() || !systemViews->findDoc(opCtx, id, &oldView)) { LOG(2) << "insert view " << view << " into " << _db->getSystemViewsName(); uassertStatusOK( systemViews->insertDocument(opCtx, InsertStatement(view), &CurOp::get(opCtx)->debug())); @@ -168,7 +168,7 @@ void DurableViewCatalogImpl::remove(OperationContext* opCtx, const NamespaceStri return; const bool requireIndex = false; RecordId id = Helpers::findOne(opCtx, systemViews, BSON("_id" << name.ns()), requireIndex); - if (!id.isNormal()) + if (!id.isValid()) return; LOG(2) << "remove view " << name << " from " << _db->getSystemViewsName(); diff --git a/src/mongo/dbtests/SConscript b/src/mongo/dbtests/SConscript index 9a3b1d505ee..1b66fc11627 100644 --- a/src/mongo/dbtests/SConscript +++ b/src/mongo/dbtests/SConscript @@ -52,6 +52,7 @@ env.Library( dbtest = env.Program( target="dbtest", source=[ + 'all_paths_multikey_persistence_test.cpp', 'basictests.cpp', 'clienttests.cpp', 'commandtests.cpp', diff --git a/src/mongo/dbtests/all_paths_multikey_persistence_test.cpp b/src/mongo/dbtests/all_paths_multikey_persistence_test.cpp new file mode 100644 index 00000000000..7f848045d2e --- /dev/null +++ b/src/mongo/dbtests/all_paths_multikey_persistence_test.cpp @@ -0,0 +1,706 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex + +#include <memory> + +#include "mongo/db/db_raii.h" +#include "mongo/db/repl/storage_interface_impl.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/log.h" + +namespace mongo { +namespace { + +using namespace unittest; + +static const RecordId kMetadataId = RecordId::minReserved(); + +static const int kIndexVersion = static_cast<int>(IndexDescriptor::kLatestIndexVersion); +static const NamespaceString kDefaultNSS{"all_paths_multikey_persistence.test"}; +static const std::string kDefaultIndexName{"all_paths_multikey"}; +static const BSONObj kDefaultIndexKey = fromjson("{'$**': 1}"); +static const BSONObj kDefaultPathProjection; + +static constexpr auto kIdField = "_id"; + +std::vector<InsertStatement> toInserts(std::vector<BSONObj> docs) { + std::vector<InsertStatement> inserts(docs.size()); + std::transform(docs.cbegin(), docs.cend(), inserts.begin(), [](const BSONObj& doc) { + return InsertStatement(doc); + }); + return inserts; +} + +class AllPathsMultikeyPersistenceTestFixture : public unittest::Test { +public: + AllPathsMultikeyPersistenceTestFixture() { + _origAllPathsKnob = internalQueryAllowAllPathsIndexes.load(); + internalQueryAllowAllPathsIndexes.store(true); + _opCtx = cc().makeOperationContext(); + } + + virtual ~AllPathsMultikeyPersistenceTestFixture() { + internalQueryAllowAllPathsIndexes.store(_origAllPathsKnob); + _opCtx.reset(); + } + + OperationContext* opCtx() { + return _opCtx.get(); + } + +protected: + void assertSetupEnvironment(bool background, + std::vector<BSONObj> initialDocs = {}, + BSONObj indexKey = kDefaultIndexKey, + BSONObj pathProjection = kDefaultPathProjection, + const std::string& indexName = kDefaultIndexName, + const NamespaceString& nss = kDefaultNSS) { + assertRecreateCollection(nss); + assertInsertDocuments(initialDocs, nss); + assertCreateIndexForColl(nss, indexName, indexKey, pathProjection, background); + } + + void assertIndexContentsEquals(std::vector<IndexKeyEntry> expectedKeys, + bool expectIndexIsMultikey = true, + const NamespaceString& nss = kDefaultNSS, + const std::string& indexName = kDefaultIndexName) { + // Subsequent operations must take place under a collection lock. + AutoGetCollectionForRead autoColl(opCtx(), nss); + auto collection = autoColl.getCollection(); + + // Verify whether or not the index has been marked as multikey. + ASSERT_EQ(expectIndexIsMultikey, getIndexDesc(collection, indexName)->isMultikey(opCtx())); + + // Obtain a cursor over the index, and confirm that the keys are present in order. + auto indexCursor = getIndexCursor(collection, indexName); + auto indexKey = indexCursor->seek(kMinBSONKey, true); + try { + for (const auto& expectedKey : expectedKeys) { + ASSERT(indexKey); + ASSERT_BSONOBJ_EQ(expectedKey.key, indexKey->key); + ASSERT_EQ(expectedKey.loc, indexKey->loc); + indexKey = indexCursor->next(); + } + // Confirm that there are no further keys in the index. + ASSERT(!indexCursor->next()); + } catch (const TestAssertionFailureException& ex) { + log() << "Writing remaining index keys to debug log:"; + while (indexKey) { + log() << "{ key: " << indexKey->key << ", loc: " << indexKey->loc << " }"; + indexKey = indexCursor->next(); + } + throw ex; + } + } + + void assertRecreateCollection(const NamespaceString& nss) { + ASSERT_OK(_storage.dropCollection(opCtx(), nss)); + ASSERT_OK(_storage.createCollection(opCtx(), nss, collOptions())); + } + + void assertInsertDocuments(std::vector<BSONObj> docs, + const NamespaceString& nss = kDefaultNSS) { + ASSERT_OK(_storage.insertDocuments(opCtx(), nss, toInserts(docs))); + } + + void assertUpdateDocuments(std::vector<std::pair<BSONObj, BSONObj>> updates, + const NamespaceString& nss = kDefaultNSS) { + for (const auto& update : updates) { + ASSERT_OK(_storage.updateSingleton( + opCtx(), nss, update.first, {update.second, Timestamp(0)})); + } + } + + void assertUpsertDocuments(std::vector<BSONObj> upserts, + const NamespaceString& nss = kDefaultNSS) { + for (const auto& upsert : upserts) { + ASSERT_OK(_storage.upsertById(opCtx(), nss, upsert[kIdField], upsert)); + } + } + + void assertRemoveDocuments(std::vector<BSONObj> docs, + const NamespaceString& nss = kDefaultNSS) { + for (const auto& doc : docs) { + ASSERT_OK(_storage.deleteByFilter(opCtx(), nss, doc)); + } + } + + void assertCreateIndexForColl(const NamespaceString& nss, + const std::string& name, + BSONObj key, + BSONObj pathProjection, + bool background) { + BSONObjBuilder bob = + std::move(BSONObjBuilder() << "ns" << nss.ns() << "name" << name << "key" << key); + + if (!pathProjection.isEmpty()) + bob << IndexDescriptor::kPathProjectionFieldName << pathProjection; + + auto indexSpec = (bob << "v" << kIndexVersion << "background" << background).obj(); + + Lock::DBLock dbLock(opCtx(), nss.db(), MODE_X); + AutoGetCollection autoColl(opCtx(), nss, MODE_X); + auto coll = autoColl.getCollection(); + + MultiIndexBlock indexer(opCtx(), coll); + indexer.allowBackgroundBuilding(); + indexer.allowInterruption(); + + // Initialize the index builder and add all documents currently in the collection. + ASSERT_OK(indexer.init(indexSpec).getStatus()); + ASSERT_OK(indexer.insertAllDocumentsInCollection()); + + WriteUnitOfWork wunit(opCtx()); + indexer.commit(); + wunit.commit(); + } + + std::vector<BSONObj> makeDocs(const std::vector<std::string>& jsonObjs) { + std::vector<BSONObj> docs(jsonObjs.size()); + std::transform( + jsonObjs.cbegin(), jsonObjs.cend(), docs.begin(), [this](const std::string& json) { + return fromjson(json).addField(BSON(kIdField << (_id++))[kIdField]); + }); + return docs; + } + + const IndexDescriptor* getIndexDesc(const Collection* collection, const StringData indexName) { + return collection->getIndexCatalog()->findIndexByName(opCtx(), indexName); + } + + const IndexAccessMethod* getIndex(const Collection* collection, const StringData indexName) { + return collection->getIndexCatalog()->getIndex(getIndexDesc(collection, indexName)); + } + + std::unique_ptr<SortedDataInterface::Cursor> getIndexCursor(const Collection* collection, + const StringData indexName) { + return getIndex(collection, indexName)->newCursor(opCtx()); + } + + CollectionOptions collOptions() { + CollectionOptions collOpts; + collOpts.uuid = UUID::gen(); + return collOpts; + } + +private: + ServiceContext::UniqueOperationContext _opCtx; + repl::StorageInterfaceImpl _storage; + bool _origAllPathsKnob{false}; + int _id{1}; +}; + +TEST_F(AllPathsMultikeyPersistenceTestFixture, RecordMultikeyPathsInBulkIndexBuild) { + // Create the test collection, add some initial documents, and build a foreground $** index. + assertSetupEnvironment(false, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"})); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, RecordMultikeyPathsInBackgroundIndexBuild) { + // Create the test collection, add some initial documents, and build a background $** index. + assertSetupEnvironment(true, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"})); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, DedupMultikeyPathsInBulkIndexBuild) { + // Create the test collection, add some initial documents, and build a foreground $** index. + const auto initialDocs = + makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}"}); + assertSetupEnvironment(false, initialDocs); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, DedupMultikeyPathsInBackgroundIndexBuild) { + // Create the test collection, add some initial documents, and build a background $** index. + const auto initialDocs = + makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}"}); + assertSetupEnvironment(true, initialDocs); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, AddAndDedupNewMultikeyPathsOnPostBuildInsertion) { + // Create the test collection, add some initial documents, and build a $** index. + assertSetupEnvironment(false, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"})); + + // Insert some more documents with a mix of new and duplicate multikey paths. + assertInsertDocuments(makeDocs({"{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", "{d: {e: {f: [5]}}}"})); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}, + {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, AddAndDedupNewMultikeyPathsOnUpsert) { + // Create the test collection, add some initial documents, and build a $** index. + assertSetupEnvironment(false, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"})); + + // Upsert some new documents to add new multikey paths. + assertUpsertDocuments(makeDocs({"{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", "{d: {e: {f: [5]}}}"})); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}, + {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, AddNewMultikeyPathsOnUpdate) { + // Create the test collection, add some initial documents, and build a $** index. + assertSetupEnvironment(false, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"})); + + // Update the initial document to add a new multikey path. + assertUpdateDocuments( + {{fromjson("{_id: 1}"), fromjson("{$push: {b: {$each: [{d: {f: [4]}}, {g: [5]}]}}}")}}); + + { + // Verify that the updated document appears as expected; + AutoGetCollectionForRead autoColl(opCtx(), kDefaultNSS); + Snapshotted<BSONObj> result; + ASSERT(autoColl.getCollection()->findDoc(opCtx(), RecordId(1), &result)); + ASSERT_BSONOBJ_EQ(result.value(), + fromjson("{_id:1, a:1, b:[{c:2}, {d:{e:[3]}}, {d:{f:[4]}}, {g:[5]}]}")); + } + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.f'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.g'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.f', '': 4}"), RecordId(1)}, + {fromjson("{'': 'b.g', '': 5}"), RecordId(1)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, AddNewMultikeyPathsOnReplacement) { + // Create the test collection, add some initial documents, and build a $** index. + assertSetupEnvironment(false, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"})); + + // Update the initial document to modify all existing data keys and add a new multikey path. + assertUpdateDocuments( + {{fromjson("{_id: 1}"), fromjson("{a: 2, b: [{c: 3}, {d: {e: [4], f: [5]}}]}")}}); + + { + // Verify that the updated document appears as expected; + AutoGetCollectionForRead autoColl(opCtx(), kDefaultNSS); + Snapshotted<BSONObj> result; + ASSERT(autoColl.getCollection()->findDoc(opCtx(), RecordId(1), &result)); + ASSERT_BSONOBJ_EQ(result.value(), + fromjson("{_id: 1, a: 2, b: [{c: 3}, {d: {e: [4], f: [5]}}]}")); + } + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.f'}"), kMetadataId}, + {fromjson("{'': 'a', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(1)}, + {fromjson("{'': 'b.d.f', '': 5}"), RecordId(1)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotRemoveMultikeyPathsOnDocDeletion) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment(false, docs); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}, + {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}}; + + assertIndexContentsEquals(expectedKeys); + + // Now remove all documents in the collection, and verify that only the multikey paths remain. + assertRemoveDocuments(docs); + + expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, OnlyIndexKeyPatternSubTreeInBulkBuild) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment(false, docs, fromjson("{'b.d.$**': 1}")); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, OnlyIndexKeyPatternSubTreeInBackgroundBuild) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment(true, docs, fromjson("{'b.d.$**': 1}")); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, OnlyIndexIncludedPathsInBulkBuild) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment( + false, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 1}}, 'd.e': 1}")); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}, + {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, OnlyIndexIncludedPathsInBackgroundBuild) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment( + true, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 1}}, 'd.e': 1}")); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}, + {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, OnlyIndexIncludedPathsOnUpdate) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment( + false, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 1}}, 'd.e': 1}")); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}, + {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}}; + + assertIndexContentsEquals(expectedKeys); + + // Now update RecordId(3), adding one new field 'd.e.g' within the included 'd.e' subpath and + // one new field 'd.h' which lies outside all included subtrees. + assertUpdateDocuments({{fromjson("{_id: 3}"), fromjson("{$set: {'d.e.g': 6, 'd.h': 7}}")}}); + + { + // Verify that the updated document appears as expected; + AutoGetCollectionForRead autoColl(opCtx(), kDefaultNSS); + Snapshotted<BSONObj> result; + ASSERT(autoColl.getCollection()->findDoc(opCtx(), RecordId(3), &result)); + ASSERT_BSONOBJ_EQ(result.value(), fromjson("{_id: 3, d: {e: {f: [5], g: 6}, h: 7}}")); + } + + // Verify that only the key {'d.e.g': 6} has been added to the index. + expectedKeys.push_back({fromjson("{'': 'd.e.g', '': 6}"), RecordId(3)}); + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotIndexExcludedPathsInBulkBuild) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment( + false, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 0}}, 'd.e': 0}")); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotIndexExcludedPathsInBackgroundBuild) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment( + true, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 0}}, 'd.e': 0}")); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotIndexExcludedPathsOnUpdate) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment( + false, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 0}}, 'd.e': 0}")); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}}; + + assertIndexContentsEquals(expectedKeys); + + // Now update RecordId(3), adding one new field 'd.e.g' within the excluded 'd.e' subpath and + // one new field 'd.h' which lies outside all excluded subtrees. + assertUpdateDocuments({{fromjson("{_id: 3}"), fromjson("{$set: {'d.e.g': 6, 'd.h': 7}}")}}); + + { + // Verify that the updated document appears as expected; + AutoGetCollectionForRead autoColl(opCtx(), kDefaultNSS); + Snapshotted<BSONObj> result; + ASSERT(autoColl.getCollection()->findDoc(opCtx(), RecordId(3), &result)); + ASSERT_BSONOBJ_EQ(result.value(), fromjson("{_id: 3, d: {e: {f: [5], g: 6}, h: 7}}")); + } + + // Verify that only the key {'d.h': 7} has been added to the index. + expectedKeys.push_back({fromjson("{'': 'd.h', '': 7}"), RecordId(3)}); + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, IndexIdFieldIfSpecifiedInInclusionProjection) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment( + false, docs, fromjson("{'$**': 1}"), fromjson("{_id: 1, 'b.d.e': 1, 'd.e': 1}")); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId}, + {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId}, + {fromjson("{'': '_id', '': 1}"), RecordId(1)}, + {fromjson("{'': '_id', '': 2}"), RecordId(2)}, + {fromjson("{'': '_id', '': 3}"), RecordId(3)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}, + {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, IndexIdFieldIfSpecifiedInExclusionProjection) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", + "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", + "{d: {e: {f: [5]}}}"}); + assertSetupEnvironment( + false, docs, fromjson("{'$**': 1}"), fromjson("{_id: 1, 'b.d.e': 0, 'd.e': 0}")); + + // Verify that the data and multikey path keys are present in the expected order. + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId}, + {fromjson("{'': '_id', '': 1}"), RecordId(1)}, + {fromjson("{'': '_id', '': 2}"), RecordId(2)}, + {fromjson("{'': '_id', '': 3}"), RecordId(3)}, + {fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}}; + + assertIndexContentsEquals(expectedKeys); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotMarkAsMultikeyIfNoArraysInBulkBuild) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs( + {"{a: 1, b: {c: 2, d: {e: 3}}}", "{a: 2, b: {c: 3, d: {e: 4}}}", "{d: {e: {f: 5}}}"}); + assertSetupEnvironment(false, docs, fromjson("{'$**': 1}")); + + // Verify that the data keys are present in the expected order, and the index is NOT multikey. + const bool expectIndexIsMultikey = false; + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}, + {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}}; + + assertIndexContentsEquals(expectedKeys, expectIndexIsMultikey); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotMarkAsMultikeyIfNoArraysInBackgroundBuild) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs( + {"{a: 1, b: {c: 2, d: {e: 3}}}", "{a: 2, b: {c: 3, d: {e: 4}}}", "{d: {e: {f: 5}}}"}); + assertSetupEnvironment(true, docs, fromjson("{'$**': 1}")); + + // Verify that the data keys are present in the expected order, and the index is NOT multikey. + const bool expectIndexIsMultikey = false; + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}, + {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}}; + + assertIndexContentsEquals(expectedKeys, expectIndexIsMultikey); +} + +TEST_F(AllPathsMultikeyPersistenceTestFixture, IndexShouldBecomeMultikeyIfArrayIsCreatedByUpdate) { + // Create the test collection, add some initial documents, and build a $** index. + const auto docs = makeDocs( + {"{a: 1, b: {c: 2, d: {e: 3}}}", "{a: 2, b: {c: 3, d: {e: 4}}}", "{d: {e: {f: 5}}}"}); + assertSetupEnvironment(false, docs, fromjson("{'$**': 1}")); + + // Verify that the data keys are present in the expected order, and the index is NOT multikey. + bool expectIndexIsMultikey = false; + std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 'a', '': 1}"), RecordId(1)}, + {fromjson("{'': 'a', '': 2}"), RecordId(2)}, + {fromjson("{'': 'b.c', '': 2}"), RecordId(1)}, + {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}, + {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}, + {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}, + {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}}; + + assertIndexContentsEquals(expectedKeys, expectIndexIsMultikey); + + // Now perform an update that introduces an array into one of the documents... + assertUpdateDocuments({{fromjson("{_id: 1}"), fromjson("{$set: {g: {h: []}}}")}}); + + // ... and confirm that this has caused the index to become multikey. + expectIndexIsMultikey = true; + expectedKeys.insert(expectedKeys.begin(), {fromjson("{'': 1, '': 'g.h'}"), kMetadataId}); + + assertIndexContentsEquals(expectedKeys, expectIndexIsMultikey); +} + +} // namespace +} // namespace mongo |