diff options
author | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2016-05-08 14:16:24 -0400 |
---|---|---|
committer | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2016-05-08 14:16:24 -0400 |
commit | 3e26b6b80f0a4904d477f8c4bd189945941ee09e (patch) | |
tree | 1a503037b273c922f28209abcda546ac7006938d | |
parent | 3a0d6ee6a2b6f82c5775380b7184501916338331 (diff) | |
download | mongo-3e26b6b80f0a4904d477f8c4bd189945941ee09e.tar.gz |
SERVER-22726 Propagate multikey paths computed during key generation.
28 files changed, 674 insertions, 75 deletions
diff --git a/src/mongo/db/catalog/collection.cpp b/src/mongo/db/catalog/collection.cpp index 7c359b728cb..40c2cd100a6 100644 --- a/src/mongo/db/catalog/collection.cpp +++ b/src/mongo/db/catalog/collection.cpp @@ -1021,7 +1021,10 @@ public: const IndexAccessMethod* iam = _indexCatalog->getIndex(descriptor); BSONObjSet documentKeySet; - iam->getKeys(recordBson, &documentKeySet); + // There's no need to compute the prefixes of the indexed fields that cause the + // index to be multikey when validating the index keys. + MultikeyPaths* multikeyPaths = nullptr; + iam->getKeys(recordBson, &documentKeySet, multikeyPaths); if (descriptor->isPartial()) { const IndexCatalogEntry* ice = _indexCatalog->getEntry(descriptor); diff --git a/src/mongo/db/catalog/collection_info_cache.cpp b/src/mongo/db/catalog/collection_info_cache.cpp index 383099aefd8..c2a6890f041 100644 --- a/src/mongo/db/catalog/collection_info_cache.cpp +++ b/src/mongo/db/catalog/collection_info_cache.cpp @@ -156,6 +156,7 @@ void CollectionInfoCache::updatePlanCacheIndexEntries(OperationContext* txn) { indexEntries.emplace_back(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(txn), + ice->getMultikeyPaths(txn), desc->isSparse(), desc->unique(), desc->indexName(), diff --git a/src/mongo/db/catalog/index_catalog_entry.cpp b/src/mongo/db/catalog/index_catalog_entry.cpp index 65b48166249..2a3dfac1ed2 100644 --- a/src/mongo/db/catalog/index_catalog_entry.cpp +++ b/src/mongo/db/catalog/index_catalog_entry.cpp @@ -34,6 +34,8 @@ #include "mongo/db/catalog/index_catalog_entry.h" +#include <algorithm> + #include "mongo/db/catalog/collection_catalog_entry.h" #include "mongo/db/catalog/head_manager.h" #include "mongo/db/concurrency/write_conflict_exception.h" @@ -99,7 +101,12 @@ void IndexCatalogEntry::init(OperationContext* txn, IndexAccessMethod* accessMet _isReady = _catalogIsReady(txn); _head = _catalogHead(txn); - _isMultikey = _catalogIsMultikey(txn); + + { + stdx::lock_guard<stdx::mutex> lk(_indexMultikeyPathsMutex); + _isMultikey.store(_catalogIsMultikey(txn, &_indexMultikeyPaths)); + _indexTracksPathLevelMultikeyInfo = !_indexMultikeyPaths.empty(); + } if (BSONElement filterElement = _descriptor->getInfoElement("partialFilterExpression")) { invariant(filterElement.isABSONObj()); @@ -135,7 +142,12 @@ bool IndexCatalogEntry::isReady(OperationContext* txn) const { } bool IndexCatalogEntry::isMultikey() const { - return _isMultikey; + return _isMultikey.load(); +} + +MultikeyPaths IndexCatalogEntry::getMultikeyPaths(OperationContext* txn) const { + stdx::lock_guard<stdx::mutex> lk(_indexMultikeyPathsMutex); + return _indexMultikeyPaths; } // --- @@ -167,7 +179,7 @@ void IndexCatalogEntry::setHead(OperationContext* txn, RecordId newHead) { /** * RAII class, which associates a new RecoveryUnit with an OperationContext for the purposes - * of simulating a sub-transaction. Takes ownership of the new recovery unit and frees it at + * of simulating a side-transaction. Takes ownership of the new recovery unit and frees it at * destruction time. */ class RecoveryUnitSwap { @@ -196,43 +208,86 @@ private: const std::unique_ptr<RecoveryUnit> _newRecoveryUnit; }; -void IndexCatalogEntry::setMultikey(OperationContext* txn) { - if (isMultikey()) { +void IndexCatalogEntry::setMultikey(OperationContext* txn, const MultikeyPaths& multikeyPaths) { + if (!_indexTracksPathLevelMultikeyInfo && isMultikey()) { + // If the index is already set as multikey and we don't have any path-level information to + // update, then there's nothing more for us to do. return; } - // Only one thread should set the multi-key value per collection, because the metadata for - // a collection is one large document. - Lock::ResourceLock collMDLock(txn->lockState(), ResourceId(RESOURCE_METADATA, _ns), MODE_X); + if (_indexTracksPathLevelMultikeyInfo) { + stdx::lock_guard<stdx::mutex> lk(_indexMultikeyPathsMutex); + invariant(multikeyPaths.size() == _indexMultikeyPaths.size()); + + bool newPathIsMultikey = false; + for (size_t i = 0; i < multikeyPaths.size(); ++i) { + if (!std::includes(_indexMultikeyPaths[i].begin(), + _indexMultikeyPaths[i].end(), + multikeyPaths[i].begin(), + multikeyPaths[i].end())) { + // If 'multikeyPaths' contains a new path component that causes this index to be + // multikey, then we must update the index metadata in the CollectionCatalogEntry. + newPathIsMultikey = true; + break; + } + } - // Check again in case we blocked on the MD lock and another thread beat us to setting the - // multiKey metadata for this index. - if (isMultikey()) { - return; + if (!newPathIsMultikey) { + // Otherwise, if all the path components in 'multikeyPaths' are already tracked in + // '_indexMultikeyPaths', then there's nothing more for us to do. + return; + } } - // This effectively emulates a sub-transaction off the main transaction, which invoked - // setMultikey. The reason we need is to avoid artificial WriteConflicts, which happen - // with snapshot isolation. { - StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine(); - RecoveryUnitSwap ruSwap(txn, storageEngine->newRecoveryUnit()); - - WriteUnitOfWork wuow(txn); + // Only one thread should set the multi-key value per collection, because the metadata for a + // collection is one large document. + Lock::ResourceLock collMDLock(txn->lockState(), ResourceId(RESOURCE_METADATA, _ns), MODE_X); + + if (!_indexTracksPathLevelMultikeyInfo && isMultikey()) { + // It's possible that we raced with another thread when acquiring the MD lock. If the + // index is already set as multikey and we don't have any path-level information to + // update, then there's nothing more for us to do. + return; + } - // TODO SERVER-22726: Propagate multikey paths computed during index key generation. - if (_collection->setIndexIsMultikey(txn, _descriptor->indexName(), MultikeyPaths{})) { - if (_infoCache) { - LOG(1) << _ns << ": clearing plan cache - index " << _descriptor->keyPattern() - << " set to multi key."; - _infoCache->clearQueryCache(); + // This effectively emulates a side-transaction off the main transaction, which invoked + // setMultikey. The reason we need is to avoid artificial WriteConflicts, which happen with + // snapshot isolation. + { + StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine(); + RecoveryUnitSwap ruSwap(txn, storageEngine->newRecoveryUnit()); + + WriteUnitOfWork wuow(txn); + + // It's possible that the index type (e.g. ascending/descending index) supports tracking + // path-level multikey information, but this particular index doesn't. + // CollectionCatalogEntry::setIndexIsMultikey() requires that we discard the path-level + // multikey information in order to avoid unintentionally setting path-level multikey + // information on an index created before 3.4. + if (_collection->setIndexIsMultikey( + txn, + _descriptor->indexName(), + _indexTracksPathLevelMultikeyInfo ? multikeyPaths : MultikeyPaths{})) { + if (_infoCache) { + LOG(1) << _ns << ": clearing plan cache - index " << _descriptor->keyPattern() + << " set to multi key."; + _infoCache->clearQueryCache(); + } } - } - wuow.commit(); + wuow.commit(); + } } - _isMultikey = true; + _isMultikey.store(true); + + if (_indexTracksPathLevelMultikeyInfo) { + stdx::lock_guard<stdx::mutex> lk(_indexMultikeyPathsMutex); + for (size_t i = 0; i < multikeyPaths.size(); ++i) { + _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end()); + } + } } // ---- @@ -245,8 +300,9 @@ RecordId IndexCatalogEntry::_catalogHead(OperationContext* txn) const { return _collection->getIndexHead(txn, _descriptor->indexName()); } -bool IndexCatalogEntry::_catalogIsMultikey(OperationContext* txn) const { - return _collection->isIndexMultikey(txn, _descriptor->indexName(), nullptr); +bool IndexCatalogEntry::_catalogIsMultikey(OperationContext* txn, + MultikeyPaths* multikeyPaths) const { + return _collection->isIndexMultikey(txn, _descriptor->indexName(), multikeyPaths); } // ------------------ diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h index 5d9aad728ac..08cad3bcbb4 100644 --- a/src/mongo/db/catalog/index_catalog_entry.h +++ b/src/mongo/db/catalog/index_catalog_entry.h @@ -30,12 +30,16 @@ #pragma once +#include <boost/optional.hpp> #include <string> #include "mongo/base/owned_pointer_vector.h" #include "mongo/bson/ordering.h" +#include "mongo/db/index/multikey_paths.h" #include "mongo/db/record_id.h" #include "mongo/db/storage/snapshot_name.h" +#include "mongo/platform/atomic_word.h" +#include "mongo/stdx/mutex.h" namespace mongo { @@ -105,9 +109,33 @@ public: // -- + /** + * Returns true if this index is multikey, and returns false otherwise. + */ bool isMultikey() const; - void setMultikey(OperationContext* txn); + /** + * Returns the path components that cause this index to be multikey if this index supports + * path-level multikey tracking, and returns an empty vector if path-level multikey tracking + * isn't supported. + * + * If this index supports path-level multikey tracking but isn't multikey, then this function + * returns a vector with size equal to the number of elements in the index key pattern where + * each element in the vector is an empty set. + */ + MultikeyPaths getMultikeyPaths(OperationContext* txn) const; + + /** + * Sets this index to be multikey. Information regarding which newly detected path components + * cause this index to be multikey can also be specified. + * + * If this index doesn't support path-level multikey tracking, then 'multikeyPaths' is ignored. + * + * If this index supports path-level multikey tracking, then 'multikeyPaths' must be a vector + * with size equal to the number of elements in the index key pattern. Additionally, at least + * one path component of the indexed fields must cause this index to be multikey. + */ + void setMultikey(OperationContext* txn, const MultikeyPaths& multikeyPaths); // if this ready is ready for queries bool isReady(OperationContext* txn) const; @@ -130,7 +158,13 @@ private: bool _catalogIsReady(OperationContext* txn) const; RecordId _catalogHead(OperationContext* txn) const; - bool _catalogIsMultikey(OperationContext* txn) const; + + /** + * Retrieves the multikey information associated with this index from '_collection', + * + * See CollectionCatalogEntry::isIndexMultikey() for more details. + */ + bool _catalogIsMultikey(OperationContext* txn, MultikeyPaths* multikeyPaths) const; // ----- @@ -154,7 +188,28 @@ private: Ordering _ordering; // TODO: this might be b-tree specific bool _isReady; // cache of NamespaceDetails info RecordId _head; // cache of IndexDetails - bool _isMultikey; // cache of NamespaceDetails info + + // Set to true if this index supports path-level multikey tracking. + // '_indexTracksPathLevelMultikeyInfo' is effectively const after IndexCatalogEntry::init() is + // called. + bool _indexTracksPathLevelMultikeyInfo = false; + + // Set to true if this index is multikey. '_isMultikey' serves as a cache of the information + // stored in the NamespaceDetails or KVCatalog. + AtomicWord<bool> _isMultikey; + + // Controls concurrent access to '_indexMultikeyPaths'. We acquire this mutex rather than the + // RESOURCE_METADATA lock as a performance optimization so that it is cheaper to detect whether + // there is actually any path-level multikey information to update or not. + mutable stdx::mutex _indexMultikeyPathsMutex; + + // Non-empty only if '_indexTracksPathLevelMultikeyInfo' is true. + // + // If non-empty, '_indexMultikeyPaths' is a vector with size equal to the number of elements + // in the index key pattern. Each element in the vector is an ordered set of positions (starting + // at 0) into the corresponding indexed field that represent what prefixes of the indexed field + // causes the index to be multikey. + MultikeyPaths _indexMultikeyPaths; // The earliest snapshot that is allowed to read this index. boost::optional<SnapshotName> _minVisibleSnapshot; diff --git a/src/mongo/db/exec/sort_key_generator.cpp b/src/mongo/db/exec/sort_key_generator.cpp index 0625e1553fc..87554b85f5f 100644 --- a/src/mongo/db/exec/sort_key_generator.cpp +++ b/src/mongo/db/exec/sort_key_generator.cpp @@ -229,8 +229,15 @@ void SortKeyGenerator::getBoundsForSort(OperationContext* txn, params.options = QueryPlannerParams::NO_TABLE_SCAN; // We're creating a "virtual index" with key pattern equal to the sort order. - IndexEntry sortOrder( - sortObj, IndexNames::BTREE, true, false, false, "doesnt_matter", NULL, BSONObj()); + IndexEntry sortOrder(sortObj, + IndexNames::BTREE, + true, + MultikeyPaths{}, + false, + false, + "doesnt_matter", + NULL, + BSONObj()); params.indices.push_back(sortOrder); auto statusWithQueryForSort = CanonicalQuery::canonicalize( diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp index 6cbfa82a21a..1981985a831 100644 --- a/src/mongo/db/exec/working_set_common.cpp +++ b/src/mongo/db/exec/working_set_common.cpp @@ -114,7 +114,10 @@ bool WorkingSetCommon::fetch(OperationContext* txn, invariant(!member->keyData.empty()); for (size_t i = 0; i < member->keyData.size(); i++) { BSONObjSet keys; - member->keyData[i].index->getKeys(member->obj.value(), &keys); + // There's no need to compute the prefixes of the indexed fields that cause the index to + // be multikey when ensuring the keyData is still valid. + MultikeyPaths* multikeyPaths = nullptr; + member->keyData[i].index->getKeys(member->obj.value(), &keys, multikeyPaths); if (!keys.count(member->keyData[i].keyData)) { // document would no longer be at this position in the index. return false; diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp index bc5bad9283b..c4e5e6e8843 100644 --- a/src/mongo/db/index/2d_access_method.cpp +++ b/src/mongo/db/index/2d_access_method.cpp @@ -47,7 +47,9 @@ TwoDAccessMethod::TwoDAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte } /** Finds the key objects to put in an index */ -void TwoDAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const { +void TwoDAccessMethod::getKeys(const BSONObj& obj, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::get2DKeys(obj, _params, keys, NULL); } diff --git a/src/mongo/db/index/2d_access_method.h b/src/mongo/db/index/2d_access_method.h index 644b6addc5b..5d181904ead 100644 --- a/src/mongo/db/index/2d_access_method.h +++ b/src/mongo/db/index/2d_access_method.h @@ -54,7 +54,13 @@ private: // This really gets the 'locs' from the provided obj. void getKeys(const BSONObj& obj, std::vector<BSONObj>& locs) const; - virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const; + /** + * Fills 'keys' with the keys that should be generated for 'obj' on this index. + * + * This function ignores the 'multikeyPaths' pointer because 2d indexes don't support tracking + * path-level multikey information. + */ + void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; TwoDIndexingParams _params; }; diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp index a3708e8caed..10cbe4ab6d1 100644 --- a/src/mongo/db/index/btree_access_method.cpp +++ b/src/mongo/db/index/btree_access_method.cpp @@ -62,11 +62,9 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataIn } } -void BtreeAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const { - // SERVER-22726 represents the work to gather and persist the path-level multikey information. - // Until that's done, we may as well avoid computing the prefixes of the indexed fields that - // cause the index to be multikey. - MultikeyPaths* multikeyPaths = nullptr; +void BtreeAccessMethod::getKeys(const BSONObj& obj, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const { _keyGenerator->getKeys(obj, keys, multikeyPaths); } diff --git a/src/mongo/db/index/btree_access_method.h b/src/mongo/db/index/btree_access_method.h index 4c20deeb931..5873514c01c 100644 --- a/src/mongo/db/index/btree_access_method.h +++ b/src/mongo/db/index/btree_access_method.h @@ -48,7 +48,7 @@ public: BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterface* btree); private: - virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const; + void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; // Our keys differ for V0 and V1. std::unique_ptr<BtreeKeyGenerator> _keyGenerator; diff --git a/src/mongo/db/index/btree_key_generator.cpp b/src/mongo/db/index/btree_key_generator.cpp index ad99eaa8a77..1f1eb949b06 100644 --- a/src/mongo/db/index/btree_key_generator.cpp +++ b/src/mongo/db/index/btree_key_generator.cpp @@ -312,6 +312,7 @@ void BtreeKeyGeneratorV1::getKeysImpl(std::vector<const char*> fieldNames, } if (multikeyPaths) { + invariant(multikeyPaths->empty()); multikeyPaths->resize(fieldNames.size()); } getKeysImplWithArray(fieldNames, fixed, obj, keys, 0, _emptyPositionalInfo, multikeyPaths); diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp index 9676fcbec45..e54e1760f55 100644 --- a/src/mongo/db/index/fts_access_method.cpp +++ b/src/mongo/db/index/fts_access_method.cpp @@ -34,7 +34,9 @@ namespace mongo { FTSAccessMethod::FTSAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterface* btree) : IndexAccessMethod(btreeState, btree), _ftsSpec(btreeState->descriptor()->infoObj()) {} -void FTSAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const { +void FTSAccessMethod::getKeys(const BSONObj& obj, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getFTSKeys(obj, _ftsSpec, keys); } diff --git a/src/mongo/db/index/fts_access_method.h b/src/mongo/db/index/fts_access_method.h index 794d1efe360..aa3a7dfc23e 100644 --- a/src/mongo/db/index/fts_access_method.h +++ b/src/mongo/db/index/fts_access_method.h @@ -45,8 +45,13 @@ public: } private: - // Implemented: - virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const; + /** + * Fills 'keys' with the keys that should be generated for 'obj' on this index. + * + * This function ignores the 'multikeyPaths' pointer because text indexes don't support tracking + * path-level multikey information. + */ + void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; fts::FTSSpec _ftsSpec; }; diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp index ef94a249f9c..10339f7eae9 100644 --- a/src/mongo/db/index/hash_access_method.cpp +++ b/src/mongo/db/index/hash_access_method.cpp @@ -51,7 +51,9 @@ HashAccessMethod::HashAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte _collator = btreeState->getCollator(); } -void HashAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const { +void HashAccessMethod::getKeys(const BSONObj& obj, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getHashKeys( obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), _collator, keys); } diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h index 5af9d0e4153..8fc5db36636 100644 --- a/src/mongo/db/index/hash_access_method.h +++ b/src/mongo/db/index/hash_access_method.h @@ -46,7 +46,13 @@ public: HashAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterface* btree); private: - virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const; + /** + * Fills 'keys' with the keys that should be generated for 'obj' on this index. + * + * This function ignores the 'multikeyPaths' pointer because hashed indexes don't support + * tracking path-level multikey information. + */ + void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; // Only one of our fields is hashed. This is the field name for it. std::string _hashedField; diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp index 946fdc7d444..d6a50fdac88 100644 --- a/src/mongo/db/index/haystack_access_method.cpp +++ b/src/mongo/db/index/haystack_access_method.cpp @@ -59,7 +59,9 @@ HaystackAccessMethod::HaystackAccessMethod(IndexCatalogEntry* btreeState, uassert(16774, "no non-geo fields specified", _otherFields.size()); } -void HaystackAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const { +void HaystackAccessMethod::getKeys(const BSONObj& obj, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys); } diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h index d79de3bfffc..0f5e519e1e5 100644 --- a/src/mongo/db/index/haystack_access_method.h +++ b/src/mongo/db/index/haystack_access_method.h @@ -69,7 +69,13 @@ protected: unsigned limit); private: - virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const; + /** + * Fills 'keys' with the keys that should be generated for 'obj' on this index. + * + * This function ignores the 'multikeyPaths' pointer because geoHaystack indexes don't support + * tracking path-level multikey information. + */ + void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; std::string _geoField; std::vector<std::string> _otherFields; diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index 2eb283a818a..4670a74f59d 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -55,6 +55,20 @@ using std::pair; using std::set; using std::vector; +namespace { + +/** + * Returns true if at least one prefix of any of the indexed fields causes the index to be multikey, + * and returns false otherwise. This function returns false if the 'multikeyPaths' vector is empty. + */ +bool isMultikeyFromPaths(const MultikeyPaths& multikeyPaths) { + return std::any_of(multikeyPaths.cbegin(), + multikeyPaths.cend(), + [](const std::set<std::size_t>& components) { return !components.empty(); }); +} + +} // namespace + MONGO_EXPORT_SERVER_PARAMETER(failIndexKeyTooLong, bool, true); // @@ -107,8 +121,9 @@ Status IndexAccessMethod::insert(OperationContext* txn, invariant(numInserted); *numInserted = 0; BSONObjSet keys; + MultikeyPaths multikeyPaths; // Delegate to the subclass. - getKeys(obj, &keys); + getKeys(obj, &keys, &multikeyPaths); Status ret = Status::OK(); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { @@ -144,8 +159,8 @@ Status IndexAccessMethod::insert(OperationContext* txn, return status; } - if (*numInserted > 1) { - _btreeState->setMultikey(txn); + if (*numInserted > 1 || isMultikeyFromPaths(multikeyPaths)) { + _btreeState->setMultikey(txn, multikeyPaths); } return ret; @@ -184,7 +199,11 @@ Status IndexAccessMethod::remove(OperationContext* txn, invariant(numDeleted); *numDeleted = 0; BSONObjSet keys; - getKeys(obj, &keys); + // There's no need to compute the prefixes of the indexed fields that cause the index to be + // multikey when removing a document since the index metadata isn't updated when keys are + // deleted. + MultikeyPaths* multikeyPaths = nullptr; + getKeys(obj, &keys, multikeyPaths); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { removeOneKey(txn, *i, loc, options.dupsAllowed); @@ -200,7 +219,10 @@ Status IndexAccessMethod::initializeAsEmpty(OperationContext* txn) { Status IndexAccessMethod::touch(OperationContext* txn, const BSONObj& obj) { BSONObjSet keys; - getKeys(obj, &keys); + // There's no need to compute the prefixes of the indexed fields that cause the index to be + // multikey when paging a document's index entries into memory. + MultikeyPaths* multikeyPaths = nullptr; + getKeys(obj, &keys, multikeyPaths); std::unique_ptr<SortedDataInterface::Cursor> cursor(_newInterface->newCursor(txn)); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { @@ -292,10 +314,18 @@ Status IndexAccessMethod::validateUpdate(OperationContext* txn, const InsertDeleteOptions& options, UpdateTicket* ticket, const MatchExpression* indexFilter) { - if (indexFilter == NULL || indexFilter->matchesBSON(from)) - getKeys(from, &ticket->oldKeys); - if (indexFilter == NULL || indexFilter->matchesBSON(to)) - getKeys(to, &ticket->newKeys); + if (!indexFilter || indexFilter->matchesBSON(from)) { + // There's no need to compute the prefixes of the indexed fields that possibly caused the + // index to be multikey when the old version of the document was written since the index + // metadata isn't updated when keys are deleted. + MultikeyPaths* multikeyPaths = nullptr; + getKeys(from, &ticket->oldKeys, multikeyPaths); + } + + if (!indexFilter || indexFilter->matchesBSON(to)) { + getKeys(to, &ticket->newKeys, &ticket->newMultikeyPaths); + } + ticket->loc = record; ticket->dupsAllowed = options.dupsAllowed; @@ -320,8 +350,9 @@ Status IndexAccessMethod::update(OperationContext* txn, return Status(ErrorCodes::InternalError, "Invalid UpdateTicket in update"); } - if (ticket.oldKeys.size() + ticket.added.size() - ticket.removed.size() > 1) { - _btreeState->setMultikey(txn); + if (ticket.oldKeys.size() + ticket.added.size() - ticket.removed.size() > 1 || + isMultikeyFromPaths(ticket.newMultikeyPaths)) { + _btreeState->setMultikey(txn, ticket.newMultikeyPaths); } for (size_t i = 0; i < ticket.removed.size(); ++i) { @@ -370,9 +401,21 @@ Status IndexAccessMethod::BulkBuilder::insert(OperationContext* txn, const InsertDeleteOptions& options, int64_t* numInserted) { BSONObjSet keys; - _real->getKeys(obj, &keys); + MultikeyPaths multikeyPaths; + _real->getKeys(obj, &keys, &multikeyPaths); - _isMultiKey = _isMultiKey || (keys.size() > 1); + _everGeneratedMultipleKeys = _everGeneratedMultipleKeys || (keys.size() > 1); + + if (!multikeyPaths.empty()) { + if (_indexMultikeyPaths.empty()) { + _indexMultikeyPaths = multikeyPaths; + } else { + invariant(_indexMultikeyPaths.size() == multikeyPaths.size()); + for (size_t i = 0; i < multikeyPaths.size(); ++i) { + _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end()); + } + } + } for (BSONObjSet::iterator it = keys.begin(); it != keys.end(); ++it) { _sorter->add(*it, loc); @@ -408,8 +451,8 @@ Status IndexAccessMethod::commitBulk(OperationContext* txn, MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { WriteUnitOfWork wunit(txn); - if (bulk->_isMultiKey) { - _btreeState->setMultikey(txn); + if (bulk->_everGeneratedMultipleKeys || isMultikeyFromPaths(bulk->_indexMultikeyPaths)) { + _btreeState->setMultikey(txn, bulk->_indexMultikeyPaths); } builder.reset(_newInterface->getBulkBuilder(txn, dupsAllowed)); diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 85da9227586..35455f5425c 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -216,7 +216,14 @@ public: std::unique_ptr<Sorter> _sorter; const IndexAccessMethod* _real; int64_t _keysInserted = 0; - bool _isMultiKey = false; + + // Set to true if at least one document causes IndexAccessMethod::getKeys() to return a + // BSONObjSet with size strictly greater than one. + bool _everGeneratedMultipleKeys = false; + + // Holds the path components that cause this index to be multikey. The '_indexMultikeyPaths' + // vector remains empty if this index doesn't support path-level multikey tracking. + MultikeyPaths _indexMultikeyPaths; }; /** @@ -245,8 +252,16 @@ public: /** * Fills 'keys' with the keys that should be generated for 'obj' on this index. + * + * If the 'multikeyPaths' pointer is non-null, then it must point to an empty vector. If this + * index type supports tracking path-level multikey information, then this function resizes + * 'multikeyPaths' to have the same number of elements as the index key pattern and fills each + * element with the prefixes of the indexed field that would cause this index to be multikey as + * a result of inserting 'keys'. */ - virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const = 0; + virtual void getKeys(const BSONObj& obj, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const = 0; /** * Splits the sets 'left' and 'right' into two vectors, the first containing the elements that @@ -294,6 +309,11 @@ private: RecordId loc; bool dupsAllowed; + + // Holds the path components that would cause this index to be multikey as a result of inserting + // 'newKeys'. The 'newMultikeyPaths' vector remains empty if this index doesn't support + // path-level multikey tracking. + MultikeyPaths newMultikeyPaths; }; /** diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp index 6c3c5c18302..1abc17c0979 100644 --- a/src/mongo/db/index/s2_access_method.cpp +++ b/src/mongo/db/index/s2_access_method.cpp @@ -103,7 +103,9 @@ BSONObj S2AccessMethod::fixSpec(const BSONObj& specObj) { return specObj; } -void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const { +void S2AccessMethod::getKeys(const BSONObj& obj, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys); } diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h index db615977461..7bc819adcce 100644 --- a/src/mongo/db/index/s2_access_method.h +++ b/src/mongo/db/index/s2_access_method.h @@ -49,7 +49,16 @@ public: static BSONObj fixSpec(const BSONObj& specObj); private: - virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const; + /** + * Fills 'keys' with the keys that should be generated for 'obj' on this index. + * + * This function ignores the 'multikeyPaths' pointer because text indexes don't support tracking + * path-level multikey information. + * + * TODO SERVER-23114: Return prefixes of the indexed fields that cause the index to be multikey + * as a result of inserting 'keys'. + */ + void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; S2IndexingParams _params; diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 33e6f533fb3..dceff9d9c85 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -137,6 +137,7 @@ void fillOutPlannerParams(OperationContext* txn, plannerParams->indices.push_back(IndexEntry(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(txn), + ice->getMultikeyPaths(txn), desc->isSparse(), desc->unique(), desc->indexName(), @@ -1318,6 +1319,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn, plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(txn), + ice->getMultikeyPaths(txn), desc->isSparse(), desc->unique(), desc->indexName(), diff --git a/src/mongo/db/query/index_entry.h b/src/mongo/db/query/index_entry.h index 185b7ae6bed..a8d51c7c5d8 100644 --- a/src/mongo/db/query/index_entry.h +++ b/src/mongo/db/query/index_entry.h @@ -50,6 +50,7 @@ struct IndexEntry { IndexEntry(const BSONObj& kp, const std::string& accessMethod, bool mk, + const MultikeyPaths& mkp, bool sp, bool unq, const std::string& n, @@ -57,6 +58,7 @@ struct IndexEntry { const BSONObj& io) : keyPattern(kp), multikey(mk), + multikeyPaths(mkp), sparse(sp), unique(unq), name(n), diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp b/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp index 10a853ced75..aafcd1b0c3e 100644 --- a/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp +++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp @@ -100,7 +100,10 @@ bool KVCollectionCatalogEntry::setIndexIsMultikey(OperationContext* txn, invariant(offset >= 0); const bool tracksPathLevelMultikeyInfo = !md.indexes[offset].multikeyPaths.empty(); - if (!tracksPathLevelMultikeyInfo) { + if (tracksPathLevelMultikeyInfo) { + invariant(!multikeyPaths.empty()); + invariant(multikeyPaths.size() == md.indexes[offset].multikeyPaths.size()); + } else { invariant(multikeyPaths.empty()); if (md.indexes[offset].multikey) { @@ -112,9 +115,7 @@ bool KVCollectionCatalogEntry::setIndexIsMultikey(OperationContext* txn, md.indexes[offset].multikey = true; - if (tracksPathLevelMultikeyInfo && !multikeyPaths.empty()) { - invariant(multikeyPaths.size() == md.indexes[offset].multikeyPaths.size()); - + if (tracksPathLevelMultikeyInfo) { bool newPathIsMultikey = false; bool somePathIsMultikey = false; diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp b/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp index 57729826aa4..a37eb986256 100644 --- a/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp +++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp @@ -239,6 +239,16 @@ TEST_F(KVCollectionCatalogEntryTest, CanSetMultipleFieldsAndComponentsAsMultikey } DEATH_TEST_F(KVCollectionCatalogEntryTest, + CannotOmitPathLevelMultikeyInfoWithBtreeIndex, + "Invariant failure !multikeyPaths.empty()") { + std::string indexName = createIndex(BSON("a" << 1 << "b" << 1)); + CollectionCatalogEntry* collEntry = getCollectionCatalogEntry(); + + auto opCtx = newOperationContext(); + collEntry->setIndexIsMultikey(opCtx.get(), indexName, MultikeyPaths{}); +} + +DEATH_TEST_F(KVCollectionCatalogEntryTest, AtLeastOnePathComponentMustCauseIndexToBeMultikey, "Invariant failure somePathIsMultikey") { std::string indexName = createIndex(BSON("a" << 1 << "b" << 1)); diff --git a/src/mongo/dbtests/SConscript b/src/mongo/dbtests/SConscript index 23c0f1258f5..0a4301fa180 100644 --- a/src/mongo/dbtests/SConscript +++ b/src/mongo/dbtests/SConscript @@ -73,6 +73,7 @@ dbtest = env.Program( 'mmaptests.cpp', 'mock_dbclient_conn_test.cpp', 'mock_replica_set_test.cpp', + 'multikey_paths_test.cpp', 'namespacetests.cpp', 'oplogstarttests.cpp', 'pdfiletests.cpp', diff --git a/src/mongo/dbtests/multikey_paths_test.cpp b/src/mongo/dbtests/multikey_paths_test.cpp new file mode 100644 index 00000000000..429bc9447eb --- /dev/null +++ b/src/mongo/dbtests/multikey_paths_test.cpp @@ -0,0 +1,353 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include <iostream> +#include <string> + +#include "mongo/db/client.h" +#include "mongo/db/db_raii.h" +#include "mongo/db/catalog/index_create.h" +#include "mongo/db/index/multikey_paths.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/service_context.h" +#include "mongo/dbtests/dbtests.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/mongoutils/str.h" + +namespace mongo { +namespace { + +/** + * Fixture for testing correctness of multikey paths. + * + * Has helper functions for creating indexes and asserting that the multikey paths after performing + * write operations are as expected. + */ +class MultikeyPathsTest : public unittest::Test { +public: + MultikeyPathsTest() : _nss("unittests.multikey_paths") {} + + void setUp() final { + AutoGetOrCreateDb autoDb(_opCtx.get(), _nss.db(), MODE_X); + Database* database = autoDb.getDb(); + { + WriteUnitOfWork wuow(_opCtx.get()); + ASSERT(database->createCollection(_opCtx.get(), _nss.ns())); + wuow.commit(); + } + } + + void tearDown() final { + AutoGetDb autoDb(_opCtx.get(), _nss.db(), MODE_X); + Database* database = autoDb.getDb(); + if (database) { + WriteUnitOfWork wuow(_opCtx.get()); + ASSERT_OK(database->dropCollection(_opCtx.get(), _nss.ns())); + wuow.commit(); + } + } + + Status createIndex(Collection* collection, BSONObj indexSpec) { + return dbtests::createIndexFromSpec(_opCtx.get(), collection->ns().ns(), indexSpec); + } + + void assertMultikeyPaths(Collection* collection, + BSONObj keyPattern, + const MultikeyPaths& expectedMultikeyPaths) { + IndexCatalog* indexCatalog = collection->getIndexCatalog(); + IndexDescriptor* desc = indexCatalog->findIndexByKeyPattern(_opCtx.get(), keyPattern); + const IndexCatalogEntry* ice = indexCatalog->getEntry(desc); + + auto actualMultikeyPaths = ice->getMultikeyPaths(_opCtx.get()); + if (storageEngineSupportsPathLevelMultikeyTracking()) { + ASSERT_FALSE(actualMultikeyPaths.empty()); + const bool match = (expectedMultikeyPaths == actualMultikeyPaths); + if (!match) { + FAIL(str::stream() << "Expected: " << dumpMultikeyPaths(expectedMultikeyPaths) + << ", Actual: " << dumpMultikeyPaths(actualMultikeyPaths)); + } + ASSERT_TRUE(match); + } else { + ASSERT_TRUE(actualMultikeyPaths.empty()); + } + } + +protected: + const ServiceContext::UniqueOperationContext _opCtx = cc().makeOperationContext(); + const NamespaceString _nss; + +private: + bool storageEngineSupportsPathLevelMultikeyTracking() { + // Path-level multikey tracking is supported for all storage engines that use the KVCatalog. + // MMAPv1 is the only storage engine that does not. + // + // TODO SERVER-22727: Store path-level multikey information in MMAPv1 index catalog. + return !getGlobalServiceContext()->getGlobalStorageEngine()->isMmapV1(); + } + + std::string dumpMultikeyPaths(const MultikeyPaths& multikeyPaths) { + std::stringstream ss; + + ss << "[ "; + for (const auto multikeyComponents : multikeyPaths) { + ss << "[ "; + for (const auto multikeyComponent : multikeyComponents) { + ss << multikeyComponent << " "; + } + ss << "] "; + } + ss << "]"; + + return ss.str(); + } +}; + +TEST_F(MultikeyPathsTest, PathsUpdatedOnIndexCreation) { + AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X); + Collection* collection = autoColl.getCollection(); + invariant(collection); + + { + WriteUnitOfWork wuow(_opCtx.get()); + OpDebug* const nullOpDebug = nullptr; + const bool enforceQuota = true; + ASSERT_OK(collection->insertDocument( + _opCtx.get(), + BSON("_id" << 0 << "a" << 5 << "b" << BSON_ARRAY(1 << 2 << 3)), + nullOpDebug, + enforceQuota)); + wuow.commit(); + } + + BSONObj keyPattern = BSON("a" << 1 << "b" << 1); + createIndex(collection, + BSON("name" + << "a_1_b_1" + << "ns" << _nss.ns() << "key" << keyPattern)); + + assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, {0U}}); +} + +TEST_F(MultikeyPathsTest, PathsUpdatedOnIndexCreationWithMultipleDocuments) { + AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X); + Collection* collection = autoColl.getCollection(); + invariant(collection); + + { + WriteUnitOfWork wuow(_opCtx.get()); + OpDebug* const nullOpDebug = nullptr; + const bool enforceQuota = true; + ASSERT_OK(collection->insertDocument( + _opCtx.get(), + BSON("_id" << 0 << "a" << 5 << "b" << BSON_ARRAY(1 << 2 << 3)), + nullOpDebug, + enforceQuota)); + ASSERT_OK(collection->insertDocument( + _opCtx.get(), + BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2 << 3) << "b" << 5), + nullOpDebug, + enforceQuota)); + wuow.commit(); + } + + BSONObj keyPattern = BSON("a" << 1 << "b" << 1); + createIndex(collection, + BSON("name" + << "a_1_b_1" + << "ns" << _nss.ns() << "key" << keyPattern)); + + assertMultikeyPaths(collection, keyPattern, {{0U}, {0U}}); +} + +TEST_F(MultikeyPathsTest, PathsUpdatedOnDocumentInsert) { + AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X); + Collection* collection = autoColl.getCollection(); + invariant(collection); + + BSONObj keyPattern = BSON("a" << 1 << "b" << 1); + createIndex(collection, + BSON("name" + << "a_1_b_1" + << "ns" << _nss.ns() << "key" << keyPattern)); + + { + WriteUnitOfWork wuow(_opCtx.get()); + OpDebug* const nullOpDebug = nullptr; + const bool enforceQuota = true; + ASSERT_OK(collection->insertDocument( + _opCtx.get(), + BSON("_id" << 0 << "a" << 5 << "b" << BSON_ARRAY(1 << 2 << 3)), + nullOpDebug, + enforceQuota)); + wuow.commit(); + } + + assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, {0U}}); + + { + WriteUnitOfWork wuow(_opCtx.get()); + OpDebug* const nullOpDebug = nullptr; + const bool enforceQuota = true; + ASSERT_OK(collection->insertDocument( + _opCtx.get(), + BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2 << 3) << "b" << 5), + nullOpDebug, + enforceQuota)); + wuow.commit(); + } + + assertMultikeyPaths(collection, keyPattern, {{0U}, {0U}}); +} + +TEST_F(MultikeyPathsTest, PathsUpdatedOnDocumentUpdate) { + AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X); + Collection* collection = autoColl.getCollection(); + invariant(collection); + + BSONObj keyPattern = BSON("a" << 1 << "b" << 1); + createIndex(collection, + BSON("name" + << "a_1_b_1" + << "ns" << _nss.ns() << "key" << keyPattern)); + + { + WriteUnitOfWork wuow(_opCtx.get()); + OpDebug* const nullOpDebug = nullptr; + const bool enforceQuota = true; + ASSERT_OK(collection->insertDocument( + _opCtx.get(), BSON("_id" << 0 << "a" << 5), nullOpDebug, enforceQuota)); + wuow.commit(); + } + + assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, std::set<size_t>{}}); + + { + auto cursor = collection->getCursor(_opCtx.get()); + auto record = cursor->next(); + invariant(record); + + auto oldDoc = collection->docFor(_opCtx.get(), record->id); + { + WriteUnitOfWork wuow(_opCtx.get()); + const bool enforceQuota = true; + const bool indexesAffected = true; + OpDebug* opDebug = nullptr; + OplogUpdateEntryArgs args; + collection->updateDocument( + _opCtx.get(), + record->id, + oldDoc, + BSON("_id" << 0 << "a" << 5 << "b" << BSON_ARRAY(1 << 2 << 3)), + enforceQuota, + indexesAffected, + opDebug, + &args); + wuow.commit(); + } + } + + assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, {0U}}); +} + +TEST_F(MultikeyPathsTest, PathsNotUpdatedOnDocumentDelete) { + AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X); + Collection* collection = autoColl.getCollection(); + invariant(collection); + + BSONObj keyPattern = BSON("a" << 1 << "b" << 1); + createIndex(collection, + BSON("name" + << "a_1_b_1" + << "ns" << _nss.ns() << "key" << keyPattern)); + + { + WriteUnitOfWork wuow(_opCtx.get()); + OpDebug* const nullOpDebug = nullptr; + const bool enforceQuota = true; + ASSERT_OK(collection->insertDocument( + _opCtx.get(), + BSON("_id" << 0 << "a" << 5 << "b" << BSON_ARRAY(1 << 2 << 3)), + nullOpDebug, + enforceQuota)); + wuow.commit(); + } + + assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, {0U}}); + + { + auto cursor = collection->getCursor(_opCtx.get()); + auto record = cursor->next(); + invariant(record); + + { + WriteUnitOfWork wuow(_opCtx.get()); + OpDebug* const nullOpDebug = nullptr; + collection->deleteDocument(_opCtx.get(), record->id, nullOpDebug); + wuow.commit(); + } + } + + assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, {0U}}); +} + +TEST_F(MultikeyPathsTest, PathsUpdatedForMultipleIndexesOnDocumentInsert) { + AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X); + Collection* collection = autoColl.getCollection(); + invariant(collection); + + BSONObj keyPatternAB = BSON("a" << 1 << "b" << 1); + createIndex(collection, + BSON("name" + << "a_1_b_1" + << "ns" << _nss.ns() << "key" << keyPatternAB)); + + BSONObj keyPatternAC = BSON("a" << 1 << "c" << 1); + createIndex(collection, + BSON("name" + << "a_1_c_1" + << "ns" << _nss.ns() << "key" << keyPatternAC)); + { + WriteUnitOfWork wuow(_opCtx.get()); + OpDebug* const nullOpDebug = nullptr; + const bool enforceQuota = true; + ASSERT_OK(collection->insertDocument( + _opCtx.get(), + BSON("_id" << 0 << "a" << BSON_ARRAY(1 << 2 << 3) << "b" << 5 << "c" << 8), + nullOpDebug, + enforceQuota)); + wuow.commit(); + } + + assertMultikeyPaths(collection, keyPatternAB, {{0U}, std::set<size_t>{}}); + assertMultikeyPaths(collection, keyPatternAC, {{0U}, std::set<size_t>{}}); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp index 328045ce868..a7e72788a73 100644 --- a/src/mongo/s/chunk_manager.cpp +++ b/src/mongo/s/chunk_manager.cpp @@ -590,6 +590,7 @@ IndexBounds ChunkManager::getIndexBoundsForQuery(const BSONObj& key, IndexEntry indexEntry(key, accessMethod, false /* multiKey */, + MultikeyPaths{}, false /* sparse */, false /* unique */, "shardkey", |