diff options
author | Bernard Gorman <bernard.gorman@gmail.com> | 2020-08-29 23:48:59 +0100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-10-19 03:04:58 +0000 |
commit | 48089c01bcccc193b1d8dd3c50ae5cb3e072ebed (patch) | |
tree | bc7c9798e6c9d8dd697f32fa6d93fb293eea2d95 | |
parent | 30e753ec9ebd7804bc1bab62fb892aec33563e69 (diff) | |
download | mongo-48089c01bcccc193b1d8dd3c50ae5cb3e072ebed.tar.gz |
SERVER-47812 Secondaries persist wildcard multikeypaths out of order
(cherry picked from commit bd320bc2d10cff75756a2c95986cc81ec8a5e7c7)
26 files changed, 459 insertions, 184 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index d34cb3a7754..8f7fe4641e6 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -473,6 +473,7 @@ env.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/storage/key_string', ], ) diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h index b140a2f3405..9d3973c7fb4 100644 --- a/src/mongo/db/catalog/index_catalog.h +++ b/src/mongo/db/catalog/index_catalog.h @@ -435,6 +435,7 @@ public: */ virtual void setMultikeyPaths(OperationContext* const opCtx, const IndexDescriptor* const desc, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) = 0; // ----- data modifiers ------ diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h index f65194fe62b..130acb89bfb 100644 --- a/src/mongo/db/catalog/index_catalog_entry.h +++ b/src/mongo/db/catalog/index_catalog_entry.h @@ -38,6 +38,7 @@ #include "mongo/bson/timestamp.h" #include "mongo/db/index/multikey_paths.h" #include "mongo/db/record_id.h" +#include "mongo/db/storage/key_string.h" #include "mongo/db/storage/kv/kv_prefix.h" #include "mongo/platform/atomic_word.h" #include "mongo/platform/mutex.h" @@ -128,7 +129,9 @@ public: * namespace, index name, and multikey paths on the OperationContext rather than set the index * as multikey here. */ - virtual void setMultikey(OperationContext* const opCtx, const MultikeyPaths& multikeyPaths) = 0; + virtual void setMultikey(OperationContext* const opCtx, + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths) = 0; // if this ready is ready for queries virtual bool isReady(OperationContext* const opCtx) const = 0; diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.cpp b/src/mongo/db/catalog/index_catalog_entry_impl.cpp index b8e9bced5e2..266d7ad43c1 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_entry_impl.cpp @@ -84,7 +84,7 @@ IndexCatalogEntryImpl::IndexCatalogEntryImpl(OperationContext* const opCtx, const bool isMultikey = _catalogIsMultikey(opCtx, &_indexMultikeyPaths); _isMultikeyForRead.store(isMultikey); _isMultikeyForWrite.store(isMultikey); - _indexTracksPathLevelMultikeyInfo = !_indexMultikeyPaths.empty(); + _indexTracksMultikeyPathsInCatalog = !_indexMultikeyPaths.empty(); } const BSONObj& collation = _descriptor->collation(); @@ -182,14 +182,19 @@ void IndexCatalogEntryImpl::setIsReady(bool newIsReady) { } void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) { - if (!_indexTracksPathLevelMultikeyInfo && _isMultikeyForWrite.load()) { - // If the index is already set as multikey and we don't have any path-level information to - // update, then there's nothing more for us to do. + // An index can either track path-level multikey information in the catalog or as metadata keys + // in the index itself, but not both. + invariant(!(_indexTracksMultikeyPathsInCatalog && multikeyMetadataKeys.size() > 0)); + // If the index is already set as multikey and we don't have any path-level information to + // update, then there's nothing more for us to do. + bool hasNoPathLevelInfo = (!_indexTracksMultikeyPathsInCatalog && multikeyMetadataKeys.empty()); + if (hasNoPathLevelInfo && _isMultikeyForWrite.load()) { return; } - if (_indexTracksPathLevelMultikeyInfo) { + if (_indexTracksMultikeyPathsInCatalog) { stdx::lock_guard<Latch> lk(_indexMultikeyPathsMutex); invariant(multikeyPaths.size() == _indexMultikeyPaths.size()); @@ -213,7 +218,7 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx, } } - MultikeyPaths paths = _indexTracksPathLevelMultikeyInfo ? multikeyPaths : MultikeyPaths{}; + MultikeyPaths paths = _indexTracksMultikeyPathsInCatalog ? multikeyPaths : MultikeyPaths{}; // On a primary, we can simply assign this write the same timestamp as the index creation, // insert, or update that caused this index to become multikey. This is because if two @@ -236,14 +241,20 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx, // it multikey, that write will be marked as "isTrackingMultikeyPathInfo" on the applier's // OperationContext and we can safely defer that write to the end of the batch. if (MultikeyPathTracker::get(opCtx).isTrackingMultikeyPathInfo()) { - MultikeyPathInfo info; - info.nss = ns(); - info.indexName = _descriptor->indexName(); - info.multikeyPaths = paths; - MultikeyPathTracker::get(opCtx).addMultikeyPathInfo(info); + MultikeyPathTracker::get(opCtx).addMultikeyPathInfo( + {ns(), _descriptor->indexName(), multikeyMetadataKeys, std::move(paths)}); return; } + // If multikeyMetadataKeys is non-empty, we must insert these keys into the index itself. We do + // not have to account for potential dupes, since all metadata keys are indexed against a single + // RecordId. An attempt to write a duplicate key will therefore be ignored. + if (!multikeyMetadataKeys.empty()) { + uassertStatusOK( + accessMethod()->insertKeys(opCtx, multikeyMetadataKeys, {}, {}, {}, nullptr)); + } + + // Mark the catalog as multikey, and record the multikey paths if applicable. if (opCtx->inMultiDocumentTransaction()) { auto status = _setMultikeyInMultiDocumentTransaction(opCtx, paths); // Retry without side transaction. @@ -359,7 +370,7 @@ void IndexCatalogEntryImpl::_catalogSetMultikey(OperationContext* opCtx, // multikey can be undone. Alternatively, one could use a counter instead of a boolean to avoid // that problem. _isMultikeyForRead.store(true); - if (_indexTracksPathLevelMultikeyInfo) { + if (_indexTracksMultikeyPathsInCatalog) { stdx::lock_guard<Latch> lk(_indexMultikeyPathsMutex); for (size_t i = 0; i < multikeyPaths.size(); ++i) { _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end()); diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.h b/src/mongo/db/catalog/index_catalog_entry_impl.h index 91d4762b826..8d6077423e5 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.h +++ b/src/mongo/db/catalog/index_catalog_entry_impl.h @@ -161,7 +161,9 @@ public: * namespace, index name, and multikey paths on the OperationContext rather than set the index * as multikey here. */ - void setMultikey(OperationContext* opCtx, const MultikeyPaths& multikeyPaths) final; + void setMultikey(OperationContext* opCtx, + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths) final; // if this ready is ready for queries bool isReady(OperationContext* opCtx) const final; @@ -235,10 +237,9 @@ private: bool _isFrozen; AtomicWord<bool> _isDropped; // Whether the index drop is committed. - // Set to true if this index supports path-level multikey tracking. - // '_indexTracksPathLevelMultikeyInfo' is effectively const after IndexCatalogEntry::init() is - // called. - bool _indexTracksPathLevelMultikeyInfo = false; + // Set to true if this index can track path-level multikey information in the catalog. This + // member is effectively const after IndexCatalogEntry::init() is called. + bool _indexTracksMultikeyPathsInCatalog = false; // Set to true if this index may contain multikey data. AtomicWord<bool> _isMultikeyForRead; diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index ddbb4ca7f02..25f1a07e239 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -1097,13 +1097,14 @@ MultikeyPaths IndexCatalogImpl::getMultikeyPaths(OperationContext* opCtx, void IndexCatalogImpl::setMultikeyPaths(OperationContext* const opCtx, const IndexDescriptor* desc, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) { IndexCatalogEntry* entry = _readyIndexes.find(desc); if (!entry) { entry = _buildingIndexes.find(desc); } invariant(entry); - entry->setMultikey(opCtx, multikeyPaths); + entry->setMultikey(opCtx, multikeyMetadataKeys, multikeyPaths); }; // --------------------------- @@ -1333,7 +1334,7 @@ const IndexDescriptor* IndexCatalogImpl::refreshEntry(OperationContext* opCtx, Status IndexCatalogImpl::_indexKeys(OperationContext* opCtx, IndexCatalogEntry* index, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, const BSONObj& obj, @@ -1365,15 +1366,8 @@ Status IndexCatalogImpl::_indexKeys(OperationContext* opCtx, } } else { int64_t numInserted; - status = index->accessMethod()->insertKeys( - opCtx, - keys, - {multikeyMetadataKeys.begin(), multikeyMetadataKeys.end()}, - multikeyPaths, - loc, - options, - nullptr, - &numInserted); + status = index->accessMethod()->insertKeysAndUpdateMultikeyPaths( + opCtx, keys, multikeyMetadataKeys, multikeyPaths, loc, options, nullptr, &numInserted); if (keysInsertedOut) { *keysInsertedOut += numInserted; } @@ -1413,7 +1407,7 @@ Status IndexCatalogImpl::_indexFilteredRecords(OperationContext* opCtx, Status status = _indexKeys(opCtx, index, - {keys.begin(), keys.end()}, + keys, multikeyMetadataKeys, multikeyPaths, *bsonRecord.docPtr, @@ -1497,7 +1491,7 @@ Status IndexCatalogImpl::_updateRecord(OperationContext* const opCtx, void IndexCatalogImpl::_unindexKeys(OperationContext* opCtx, IndexCatalogEntry* index, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const BSONObj& obj, RecordId loc, bool logIfError, @@ -1583,7 +1577,7 @@ void IndexCatalogImpl::_unindexRecord(OperationContext* opCtx, return; } } - _unindexKeys(opCtx, entry, {keys.begin(), keys.end()}, obj, loc, logIfError, keysDeletedOut); + _unindexKeys(opCtx, entry, keys, obj, loc, logIfError, keysDeletedOut); } Status IndexCatalogImpl::indexRecords(OperationContext* opCtx, diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h index 36709dba180..fac98a9e365 100644 --- a/src/mongo/db/catalog/index_catalog_impl.h +++ b/src/mongo/db/catalog/index_catalog_impl.h @@ -240,6 +240,7 @@ public: void setMultikeyPaths(OperationContext* const opCtx, const IndexDescriptor* desc, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) override; // ----- data modifiers ------ @@ -308,7 +309,7 @@ private: Status _indexKeys(OperationContext* opCtx, IndexCatalogEntry* index, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, const BSONObj& obj, @@ -336,7 +337,7 @@ private: void _unindexKeys(OperationContext* opCtx, IndexCatalogEntry* index, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const BSONObj& obj, RecordId loc, bool logIfError, diff --git a/src/mongo/db/catalog/index_catalog_noop.h b/src/mongo/db/catalog/index_catalog_noop.h index ebe91ec2b72..7361c0f8a47 100644 --- a/src/mongo/db/catalog/index_catalog_noop.h +++ b/src/mongo/db/catalog/index_catalog_noop.h @@ -193,6 +193,7 @@ public: void setMultikeyPaths(OperationContext* const opCtx, const IndexDescriptor* const desc, + const std::KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) override {} Status indexRecords(OperationContext* const opCtx, diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp index 8e666b4d21e..c7aede5a0fc 100644 --- a/src/mongo/db/catalog/multi_index_block.cpp +++ b/src/mongo/db/catalog/multi_index_block.cpp @@ -766,37 +766,47 @@ Status MultiIndexBlock::commit(OperationContext* opCtx, onCreateEach(_indexes[i].block->getSpec()); // Do this before calling success(), which unsets the interceptor pointer on the index - // catalog entry. + // catalog entry. The interceptor will write multikey metadata keys into the index during + // IndexBuildInterceptor::sideWrite, so we only need to pass the cached MultikeyPaths into + // IndexCatalogEntry::setMultikey here. auto interceptor = _indexes[i].block->getEntry()->indexBuildInterceptor(); if (interceptor) { auto multikeyPaths = interceptor->getMultikeyPaths(); if (multikeyPaths) { - _indexes[i].block->getEntry()->setMultikey(opCtx, multikeyPaths.get()); + _indexes[i].block->getEntry()->setMultikey(opCtx, {}, multikeyPaths.get()); } } _indexes[i].block->success(opCtx, collection); - // The bulk builder will track multikey information itself. Non-bulk builders re-use the - // code path that a typical insert/update uses. State is altered on the non-bulk build - // path to accumulate the multikey information on the `MultikeyPathTracker`. + // The bulk builder will track multikey information itself, and will write cached multikey + // metadata keys into the index just before committing. We therefore only need to pass the + // MultikeyPaths into IndexCatalogEntry::setMultikey here. if (_indexes[i].bulk) { const auto& bulkBuilder = _indexes[i].bulk; if (bulkBuilder->isMultikey()) { - _indexes[i].block->getEntry()->setMultikey(opCtx, bulkBuilder->getMultikeyPaths()); + _indexes[i].block->getEntry()->setMultikey( + opCtx, {}, bulkBuilder->getMultikeyPaths()); } } else { - auto multikeyPaths = - boost::optional<MultikeyPaths>(MultikeyPathTracker::get(opCtx).getMultikeyPathInfo( - collection->ns(), _indexes[i].block->getIndexName())); - if (multikeyPaths) { + // Non-bulk builders re-use the code path that a typical insert/update uses. State is + // altered on the non-bulk build path to accumulate the multikey information on the + // MultikeyPathTracker. + auto multikeyPaths = MultikeyPathTracker::get(opCtx).getMultikeyPathInfo( + collection->ns(), _indexes[i].block->getIndexName()); + auto multikeyMetadataKeys = MultikeyPathTracker::get(opCtx).getMultikeyMetadataKeys( + collection->ns(), _indexes[i].block->getIndexName()); + if (multikeyPaths || multikeyMetadataKeys) { // Upon reaching this point, multikeyPaths must either have at least one (possibly // empty) element unless it is of an index with a type that doesn't support tracking // multikeyPaths via the multikeyPaths array or has "special" multikey semantics. - invariant(!multikeyPaths.get().empty() || + invariant(multikeyMetadataKeys || !multikeyPaths.get().empty() || !IndexBuildInterceptor::typeCanFastpathMultikeyUpdates( _indexes[i].block->getEntry()->descriptor()->getIndexType())); - _indexes[i].block->getEntry()->setMultikey(opCtx, *multikeyPaths); + _indexes[i].block->getEntry()->setMultikey( + opCtx, + multikeyMetadataKeys.value_or(KeyStringSet{}), + multikeyPaths.value_or(MultikeyPaths{{}})); } } diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp index 2a47dd62f82..54cb62a16ee 100644 --- a/src/mongo/db/catalog/validate_adaptor.cpp +++ b/src/mongo/db/catalog/validate_adaptor.cpp @@ -115,9 +115,7 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx, if (!descriptor->isMultikey() && iam->shouldMarkIndexAsMultikey( - documentKeySet.size(), - {multikeyMetadataKeys.begin(), multikeyMetadataKeys.end()}, - documentMultikeyPaths)) { + documentKeySet.size(), multikeyMetadataKeys, documentMultikeyPaths)) { std::string msg = str::stream() << "Index " << descriptor->indexName() << " is not multi-key but has more than one" << " key in document " << recordId; diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index 5f18d0c6c03..f88d721ae16 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -82,10 +82,6 @@ bool isMultikeyFromPaths(const MultikeyPaths& multikeyPaths) { multikeyPaths.cend(), [](const std::set<std::size_t>& components) { return !components.empty(); }); } - -std::vector<KeyString::Value> asVector(const KeyStringSet& keySet) { - return {keySet.begin(), keySet.end()}; -} } // namespace struct BtreeExternalSortComparison { @@ -145,53 +141,68 @@ Status AbstractIndexAccessMethod::insert(OperationContext* opCtx, loc, kNoopOnSuppressedErrorFn); - return insertKeys(opCtx, - {keys.begin(), keys.end()}, - {multikeyMetadataKeys.begin(), multikeyMetadataKeys.end()}, - multikeyPaths, - loc, - options, - std::move(onDuplicateKey), - numInserted); + return insertKeysAndUpdateMultikeyPaths(opCtx, + keys, + multikeyMetadataKeys, + multikeyPaths, + loc, + options, + std::move(onDuplicateKey), + numInserted); +} + +Status AbstractIndexAccessMethod::insertKeysAndUpdateMultikeyPaths( + OperationContext* opCtx, + const KeyStringSet& keys, + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths, + const RecordId& loc, + const InsertDeleteOptions& options, + KeyHandlerFn&& onDuplicateKey, + int64_t* numInserted) { + // Insert the specified data keys into the index. + auto status = insertKeys(opCtx, keys, loc, options, std::move(onDuplicateKey), numInserted); + if (!status.isOK()) { + return status; + } + // If these keys should cause the index to become multikey, pass them into the catalog. + if (shouldMarkIndexAsMultikey(keys.size(), multikeyMetadataKeys, multikeyPaths)) { + _indexCatalogEntry->setMultikey(opCtx, multikeyMetadataKeys, multikeyPaths); + } + // If we have some multikey metadata keys, they should have been added while marking the index + // as multikey in the catalog. Add them to the count of keys inserted for completeness. + if (numInserted && !multikeyMetadataKeys.empty()) { + *numInserted += multikeyMetadataKeys.size(); + } + return Status::OK(); } Status AbstractIndexAccessMethod::insertKeys(OperationContext* opCtx, - const vector<KeyString::Value>& keys, - const vector<KeyString::Value>& multikeyMetadataKeys, - const MultikeyPaths& multikeyPaths, + const KeyStringSet& keys, const RecordId& loc, const InsertDeleteOptions& options, KeyHandlerFn&& onDuplicateKey, int64_t* numInserted) { - // Add all new data keys, and all new multikey metadata keys, into the index. When iterating - // over the data keys, each of them should point to the doc's RecordId. When iterating over - // the multikey metadata keys, they should point to the reserved 'kMultikeyMetadataKeyId'. - for (const auto keyVec : {&keys, &multikeyMetadataKeys}) { - for (const auto& keyString : *keyVec) { - bool unique = _descriptor->unique(); - Status status = _newInterface->insert(opCtx, keyString, !unique /* dupsAllowed */); - - // When duplicates are encountered and allowed, retry with dupsAllowed. Call - // onDuplicateKey() with the inserted duplicate key. - if (ErrorCodes::DuplicateKey == status.code() && options.dupsAllowed) { - invariant(unique); - status = _newInterface->insert(opCtx, keyString, true /* dupsAllowed */); - - if (status.isOK() && onDuplicateKey) - status = onDuplicateKey(keyString); - } - if (isFatalError(opCtx, status, keyString)) { - return status; - } + // Add all new keys into the index. The RecordId for each is already encoded in the KeyString. + for (const auto& keyString : keys) { + bool unique = _descriptor->unique(); + Status status = _newInterface->insert(opCtx, keyString, !unique /* dupsAllowed */); + + // When duplicates are encountered and allowed, retry with dupsAllowed. Call + // onDuplicateKey() with the inserted duplicate key. + if (ErrorCodes::DuplicateKey == status.code() && options.dupsAllowed) { + invariant(unique); + status = _newInterface->insert(opCtx, keyString, true /* dupsAllowed */); + + if (status.isOK() && onDuplicateKey) + status = onDuplicateKey(keyString); + } + if (isFatalError(opCtx, status, keyString)) { + return status; } } - if (numInserted) { - *numInserted = keys.size() + multikeyMetadataKeys.size(); - } - - if (shouldMarkIndexAsMultikey(keys.size(), multikeyMetadataKeys, multikeyPaths)) { - _indexCatalogEntry->setMultikey(opCtx, multikeyPaths); + *numInserted = keys.size(); } return Status::OK(); } @@ -228,7 +239,7 @@ std::unique_ptr<SortedDataInterface::Cursor> AbstractIndexAccessMethod::newCurso } Status AbstractIndexAccessMethod::removeKeys(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const RecordId& loc, const InsertDeleteOptions& options, int64_t* numDeleted) { @@ -310,13 +321,14 @@ long long AbstractIndexAccessMethod::getSpaceUsedBytes(OperationContext* opCtx) return _newInterface->getSpaceUsedBytes(opCtx); } -pair<vector<KeyString::Value>, vector<KeyString::Value>> AbstractIndexAccessMethod::setDifference( - const KeyStringSet& left, const KeyStringSet& right, Ordering ordering) { +pair<KeyStringSet, KeyStringSet> AbstractIndexAccessMethod::setDifference(const KeyStringSet& left, + const KeyStringSet& right, + Ordering ordering) { // Two iterators to traverse the two sets in sorted order. auto leftIt = left.begin(); auto rightIt = right.begin(); - vector<KeyString::Value> onlyLeft; - vector<KeyString::Value> onlyRight; + KeyStringSet onlyLeft; + KeyStringSet onlyRight; while (leftIt != left.end() && rightIt != right.end()) { const int cmp = leftIt->compare(*rightIt); @@ -330,24 +342,24 @@ pair<vector<KeyString::Value>, vector<KeyString::Value>> AbstractIndexAccessMeth auto rightKey = KeyString::toBson( rightIt->getBuffer(), rightIt->getSize(), ordering, rightIt->getTypeBits()); if (!leftKey.binaryEqual(rightKey)) { - onlyLeft.push_back(*leftIt); - onlyRight.push_back(*rightIt); + onlyLeft.insert(*leftIt); + onlyRight.insert(*rightIt); } ++leftIt; ++rightIt; continue; } else if (cmp > 0) { - onlyRight.push_back(*rightIt); + onlyRight.insert(*rightIt); ++rightIt; } else { - onlyLeft.push_back(*leftIt); + onlyLeft.insert(*leftIt); ++leftIt; } } // Add the rest of 'left' to 'onlyLeft', and the rest of 'right' to 'onlyRight', if any. - onlyLeft.insert(onlyLeft.end(), leftIt, left.end()); - onlyRight.insert(onlyRight.end(), rightIt, right.end()); + onlyLeft.insert(leftIt, left.end()); + onlyRight.insert(rightIt, right.end()); return {std::move(onlyLeft), std::move(onlyRight)}; } @@ -421,28 +433,25 @@ Status AbstractIndexAccessMethod::update(OperationContext* opCtx, _newInterface->unindex(opCtx, remKey, ticket.dupsAllowed); } - // Add all new data keys, and all new multikey metadata keys, into the index. When iterating - // over the data keys, each of them should point to the doc's RecordId. When iterating over - // the multikey metadata keys, they should point to the reserved 'kMultikeyMetadataKeyId'. - const auto newMultikeyMetadataKeys = asVector(ticket.newMultikeyMetadataKeys); - for (const auto keySet : {&ticket.added, &newMultikeyMetadataKeys}) { - for (const auto& keyString : *keySet) { - Status status = _newInterface->insert(opCtx, keyString, ticket.dupsAllowed); - if (isFatalError(opCtx, status, keyString)) { - return status; - } + // Add all new data keys into the index. + for (const auto keyString : ticket.added) { + Status status = _newInterface->insert(opCtx, keyString, ticket.dupsAllowed); + if (isFatalError(opCtx, status, keyString)) { + return status; } } + // If these keys should cause the index to become multikey, pass them into the catalog. if (shouldMarkIndexAsMultikey( - ticket.newKeys.size(), - {ticket.newMultikeyMetadataKeys.begin(), ticket.newMultikeyMetadataKeys.end()}, - ticket.newMultikeyPaths)) { - _indexCatalogEntry->setMultikey(opCtx, ticket.newMultikeyPaths); + ticket.newKeys.size(), ticket.newMultikeyMetadataKeys, ticket.newMultikeyPaths)) { + _indexCatalogEntry->setMultikey( + opCtx, ticket.newMultikeyMetadataKeys, ticket.newMultikeyPaths); } + // If we have some multikey metadata keys, they should have been added while marking the index + // as multikey in the catalog. Add them to the count of keys inserted for completeness. + *numInserted = ticket.added.size() + ticket.newMultikeyMetadataKeys.size(); *numDeleted = ticket.removed.size(); - *numInserted = ticket.added.size(); return Status::OK(); } @@ -564,9 +573,7 @@ Status AbstractIndexAccessMethod::BulkBuilderImpl::insert(OperationContext* opCt _isMultiKey = _isMultiKey || _indexCatalogEntry->accessMethod()->shouldMarkIndexAsMultikey( - keys.size(), - {_multikeyMetadataKeys.begin(), _multikeyMetadataKeys.end()}, - multikeyPaths); + keys.size(), _multikeyMetadataKeys, multikeyPaths); return Status::OK(); } @@ -684,8 +691,10 @@ Status AbstractIndexAccessMethod::commitBulk(OperationContext* opCtx, return Status::OK(); } -void AbstractIndexAccessMethod::setIndexIsMultikey(OperationContext* opCtx, MultikeyPaths paths) { - _indexCatalogEntry->setMultikey(opCtx, paths); +void AbstractIndexAccessMethod::setIndexIsMultikey(OperationContext* opCtx, + KeyStringSet multikeyMetadataKeys, + MultikeyPaths paths) { + _indexCatalogEntry->setMultikey(opCtx, multikeyMetadataKeys, paths); } IndexAccessMethod::OnSuppressedErrorFn IndexAccessMethod::kNoopOnSuppressedErrorFn = @@ -763,7 +772,7 @@ void AbstractIndexAccessMethod::getKeys(const BSONObj& obj, bool AbstractIndexAccessMethod::shouldMarkIndexAsMultikey( size_t numberOfKeys, - const vector<KeyString::Value>& multikeyMetadataKeys, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const { return numberOfKeys > 1 || isMultikeyFromPaths(multikeyPaths); } diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index e47b916ef03..63be0cf3186 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -93,10 +93,26 @@ public: KeyHandlerFn&& onDuplicateKey, int64_t* numInserted) = 0; + /** + * Inserts the specified keys into the index. and determines whether these keys should cause the + * index to become multikey. If so, this method also handles the task of marking the index as + * multikey in the catalog, and sets the path-level multikey information if applicable. + */ + virtual Status insertKeysAndUpdateMultikeyPaths(OperationContext* opCtx, + const KeyStringSet& keys, + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths, + const RecordId& loc, + const InsertDeleteOptions& options, + KeyHandlerFn&& onDuplicateKey, + int64_t* numInserted) = 0; + + /** + * Inserts the specified keys into the index. Does not attempt to determine whether the + * insertion of these keys should cause the index to become multikey. + */ virtual Status insertKeys(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, - const MultikeyPaths& multikeyPaths, + const KeyStringSet& keys, const RecordId& loc, const InsertDeleteOptions& options, KeyHandlerFn&& onDuplicateKey, @@ -107,7 +123,7 @@ public: * 'numDeleted' will be set to the number of keys removed from the index for the provided keys. */ virtual Status removeKeys(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const RecordId& loc, const InsertDeleteOptions& options, int64_t* numDeleted) = 0; @@ -194,7 +210,9 @@ public: /** * Sets this index as multikey with the provided paths. */ - virtual void setIndexIsMultikey(OperationContext* opCtx, MultikeyPaths paths) = 0; + virtual void setIndexIsMultikey(OperationContext* opCtx, + KeyStringSet multikeyMetadataKeys, + MultikeyPaths paths) = 0; // // Bulk operations support @@ -316,10 +334,9 @@ public: * Given the set of keys, multikeyMetadataKeys and multikeyPaths generated by a particular * document, return 'true' if the index should be marked as multikey and 'false' otherwise. */ - virtual bool shouldMarkIndexAsMultikey( - size_t numberOfKeys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, - const MultikeyPaths& multikeyPaths) const = 0; + virtual bool shouldMarkIndexAsMultikey(size_t numberOfKeys, + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths) const = 0; /** * Returns the intersection of 'fields' and the set of multikey metadata paths stored in the @@ -380,8 +397,8 @@ struct UpdateTicket { KeyStringSet newMultikeyMetadataKeys; - std::vector<KeyString::Value> removed; - std::vector<KeyString::Value> added; + KeyStringSet removed; + KeyStringSet added; RecordId loc; bool dupsAllowed; @@ -432,8 +449,9 @@ public: * setDifference({BSON("a" << 0.0)}, {BSON("a" << 0LL)}) would result in the pair * ( {BSON("a" << 0.0)}, {BSON("a" << 0LL)} ). */ - static std::pair<std::vector<KeyString::Value>, std::vector<KeyString::Value>> setDifference( - const KeyStringSet& left, const KeyStringSet& right, Ordering ordering); + static std::pair<KeyStringSet, KeyStringSet> setDifference(const KeyStringSet& left, + const KeyStringSet& right, + Ordering ordering); AbstractIndexAccessMethod(IndexCatalogEntry* btreeState, std::unique_ptr<SortedDataInterface> btree); @@ -446,16 +464,23 @@ public: int64_t* numInserted) final; Status insertKeys(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, - const MultikeyPaths& multikeyPaths, + const KeyStringSet& keys, const RecordId& loc, const InsertDeleteOptions& options, KeyHandlerFn&& onDuplicateKey, int64_t* numInserted) final; + Status insertKeysAndUpdateMultikeyPaths(OperationContext* opCtx, + const KeyStringSet& keys, + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths, + const RecordId& loc, + const InsertDeleteOptions& options, + KeyHandlerFn&& onDuplicateKey, + int64_t* numInserted) final; + Status removeKeys(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const RecordId& loc, const InsertDeleteOptions& options, int64_t* numDeleted) final; @@ -493,7 +518,9 @@ public: Status compact(OperationContext* opCtx) final; - void setIndexIsMultikey(OperationContext* opCtx, MultikeyPaths paths) final; + void setIndexIsMultikey(OperationContext* opCtx, + KeyStringSet multikeyMetadataKeys, + MultikeyPaths paths) final; std::unique_ptr<BulkBuilder> initiateBulk(size_t maxMemoryUsageBytes) final; @@ -513,7 +540,7 @@ public: OnSuppressedErrorFn onSuppressedError) const final; bool shouldMarkIndexAsMultikey(size_t numberOfKeys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const override; SortedDataInterface* getSortedDataInterface() const override final; diff --git a/src/mongo/db/index/index_build_interceptor.cpp b/src/mongo/db/index/index_build_interceptor.cpp index 9f5f6ddc27a..e377ffa14c2 100644 --- a/src/mongo/db/index/index_build_interceptor.cpp +++ b/src/mongo/db/index/index_build_interceptor.cpp @@ -282,18 +282,19 @@ Status IndexBuildInterceptor::_applyWrite(OperationContext* opCtx, auto accessMethod = _indexCatalogEntry->accessMethod(); if (opType == Op::kInsert) { int64_t numInserted; - auto status = accessMethod->insertKeys(opCtx, - {keySet.begin(), keySet.end()}, - {}, - MultikeyPaths{}, - opRecordId, - options, - [=](const KeyString::Value& duplicateKey) { - return trackDups == TrackDuplicates::kTrack - ? recordDuplicateKey(opCtx, duplicateKey) - : Status::OK(); - }, - &numInserted); + auto status = accessMethod->insertKeysAndUpdateMultikeyPaths( + opCtx, + keySet, + {}, + MultikeyPaths{}, + opRecordId, + options, + [=](const KeyString::Value& duplicateKey) { + return trackDups == TrackDuplicates::kTrack + ? recordDuplicateKey(opCtx, duplicateKey) + : Status::OK(); + }, + &numInserted); if (!status.isOK()) { return status; } @@ -307,8 +308,7 @@ Status IndexBuildInterceptor::_applyWrite(OperationContext* opCtx, invariant(strcmp(operation.getStringField("op"), "d") == 0); int64_t numDeleted; - Status s = accessMethod->removeKeys( - opCtx, {keySet.begin(), keySet.end()}, opRecordId, options, &numDeleted); + Status s = accessMethod->removeKeys(opCtx, keySet, opRecordId, options, &numDeleted); if (!s.isOK()) { return s; } @@ -376,7 +376,7 @@ boost::optional<MultikeyPaths> IndexBuildInterceptor::getMultikeyPaths() const { } Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, RecordId loc, diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h index a5733401411..3f872830331 100644 --- a/src/mongo/db/index/index_build_interceptor.h +++ b/src/mongo/db/index/index_build_interceptor.h @@ -86,7 +86,7 @@ public: * On success, `numKeysOut` if non-null will contain the number of keys added or removed. */ Status sideWrite(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, RecordId loc, diff --git a/src/mongo/db/index/wildcard_access_method.cpp b/src/mongo/db/index/wildcard_access_method.cpp index 8d38ce5d8e8..9c4d8043560 100644 --- a/src/mongo/db/index/wildcard_access_method.cpp +++ b/src/mongo/db/index/wildcard_access_method.cpp @@ -46,10 +46,9 @@ WildcardAccessMethod::WildcardAccessMethod(IndexCatalogEntry* wildcardState, getSortedDataInterface()->getKeyStringVersion(), getSortedDataInterface()->getOrdering()) {} -bool WildcardAccessMethod::shouldMarkIndexAsMultikey( - size_t numberOfKeys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, - const MultikeyPaths& multikeyPaths) const { +bool WildcardAccessMethod::shouldMarkIndexAsMultikey(size_t numberOfKeys, + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths) const { return !multikeyMetadataKeys.empty(); } diff --git a/src/mongo/db/index/wildcard_access_method.h b/src/mongo/db/index/wildcard_access_method.h index 0d87d6bbb99..df89943db36 100644 --- a/src/mongo/db/index/wildcard_access_method.h +++ b/src/mongo/db/index/wildcard_access_method.h @@ -67,7 +67,7 @@ public: * vector is non-empty. */ bool shouldMarkIndexAsMultikey(size_t numberOfKeys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const final; /** diff --git a/src/mongo/db/multi_key_path_tracker.cpp b/src/mongo/db/multi_key_path_tracker.cpp index 342e571ea0f..230e3c758c1 100644 --- a/src/mongo/db/multi_key_path_tracker.cpp +++ b/src/mongo/db/multi_key_path_tracker.cpp @@ -33,6 +33,7 @@ #include "mongo/db/multi_key_path_tracker.h" +#include "mongo/db/storage/key_string.h" #include "mongo/util/assert_util.h" #include "mongo/util/str.h" @@ -100,6 +101,8 @@ void MultikeyPathTracker::addMultikeyPathInfo(MultikeyPathInfo info) { } mergeMultikeyPaths(&existingChanges.multikeyPaths, info.multikeyPaths); + existingChanges.multikeyMetadataKeys.insert(info.multikeyMetadataKeys.begin(), + info.multikeyMetadataKeys.end()); return; } @@ -122,6 +125,17 @@ const boost::optional<MultikeyPaths> MultikeyPathTracker::getMultikeyPathInfo( return boost::none; } +const boost::optional<KeyStringSet> MultikeyPathTracker::getMultikeyMetadataKeys( + const NamespaceString& nss, const std::string& indexName) { + for (const auto& multikeyPathInfo : _multikeyPathInfo) { + if (multikeyPathInfo.nss == nss && multikeyPathInfo.indexName == indexName) { + return multikeyPathInfo.multikeyMetadataKeys; + } + } + + return boost::none; +} + void MultikeyPathTracker::startTrackingMultikeyPathInfo() { _trackMultikeyPathInfo = true; } diff --git a/src/mongo/db/multi_key_path_tracker.h b/src/mongo/db/multi_key_path_tracker.h index d6efe1e71db..b8e63bd7fcc 100644 --- a/src/mongo/db/multi_key_path_tracker.h +++ b/src/mongo/db/multi_key_path_tracker.h @@ -35,12 +35,14 @@ #include "mongo/db/index/multikey_paths.h" #include "mongo/db/operation_context.h" +#include "mongo/db/storage/key_string.h" namespace mongo { struct MultikeyPathInfo { NamespaceString nss; std::string indexName; + KeyStringSet multikeyMetadataKeys; MultikeyPaths multikeyPaths; }; @@ -96,6 +98,12 @@ public: const std::string& indexName); /** + * Returns the multikey metadata keys for the given inputs, or boost::none if none exist. + */ + const boost::optional<KeyStringSet> getMultikeyMetadataKeys(const NamespaceString& nss, + const std::string& indexName); + + /** * Specifies that we should track multikey path information on this MultikeyPathTracker. This is * only expected to be called during oplog application on secondaries. We cannot simply check * 'canAcceptWritesFor' because background index builds use their own OperationContext and diff --git a/src/mongo/db/repl/oplog_applier_impl.cpp b/src/mongo/db/repl/oplog_applier_impl.cpp index 4f541ddfeb4..3ab1d0a2eba 100644 --- a/src/mongo/db/repl/oplog_applier_impl.cpp +++ b/src/mongo/db/repl/oplog_applier_impl.cpp @@ -768,8 +768,12 @@ StatusWith<OpTime> OplogApplierImpl::_applyOplogBatch(OperationContext* opCtx, // the first timestamp in the batch since we do not have enough information to find out // the timestamp of the first write that set the given multikey path. fassert(50686, - _storageInterface->setIndexIsMultikey( - opCtx, info.nss, info.indexName, info.multikeyPaths, firstTimeInBatch)); + _storageInterface->setIndexIsMultikey(opCtx, + info.nss, + info.indexName, + info.multikeyMetadataKeys, + info.multikeyPaths, + firstTimeInBatch)); } } diff --git a/src/mongo/db/repl/storage_interface.h b/src/mongo/db/repl/storage_interface.h index bcc253283ac..a8458a967e7 100644 --- a/src/mongo/db/repl/storage_interface.h +++ b/src/mongo/db/repl/storage_interface.h @@ -185,6 +185,7 @@ public: virtual Status setIndexIsMultikey(OperationContext* opCtx, const NamespaceString& nss, const std::string& indexName, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& paths, Timestamp ts) = 0; /** diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp index 08ebbee9ede..48cbd22e8cc 100644 --- a/src/mongo/db/repl/storage_interface_impl.cpp +++ b/src/mongo/db/repl/storage_interface_impl.cpp @@ -559,6 +559,7 @@ Status StorageInterfaceImpl::renameCollection(OperationContext* opCtx, Status StorageInterfaceImpl::setIndexIsMultikey(OperationContext* opCtx, const NamespaceString& nss, const std::string& indexName, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& paths, Timestamp ts) { if (ts.isNull()) { @@ -589,7 +590,7 @@ Status StorageInterfaceImpl::setIndexIsMultikey(OperationContext* opCtx, str::stream() << "Could not find index " << indexName << " in " << nss.ns() << " to set to multikey."); } - collection->getIndexCatalog()->setMultikeyPaths(opCtx, idx, paths); + collection->getIndexCatalog()->setMultikeyPaths(opCtx, idx, multikeyMetadataKeys, paths); wunit.commit(); return Status::OK(); }); diff --git a/src/mongo/db/repl/storage_interface_impl.h b/src/mongo/db/repl/storage_interface_impl.h index 2951ba998a7..37d72abfb95 100644 --- a/src/mongo/db/repl/storage_interface_impl.h +++ b/src/mongo/db/repl/storage_interface_impl.h @@ -95,6 +95,7 @@ public: Status setIndexIsMultikey(OperationContext* opCtx, const NamespaceString& nss, const std::string& indexName, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& paths, Timestamp ts) override; diff --git a/src/mongo/db/repl/storage_interface_impl_test.cpp b/src/mongo/db/repl/storage_interface_impl_test.cpp index 9656b154a88..6a7ab527f0d 100644 --- a/src/mongo/db/repl/storage_interface_impl_test.cpp +++ b/src/mongo/db/repl/storage_interface_impl_test.cpp @@ -2607,7 +2607,7 @@ TEST_F(StorageInterfaceImplTest, SetIndexIsMultikeyReturnsNamespaceNotFoundForMi StorageInterfaceImpl storage; auto nss = makeNamespace(_agent); ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, - storage.setIndexIsMultikey(opCtx, nss, "foo", {}, Timestamp(3, 3))); + storage.setIndexIsMultikey(opCtx, nss, "foo", {}, {}, Timestamp(3, 3))); } TEST_F(StorageInterfaceImplTest, SetIndexIsMultikeyReturnsNamespaceNotFoundForMissingCollection) { @@ -2617,7 +2617,7 @@ TEST_F(StorageInterfaceImplTest, SetIndexIsMultikeyReturnsNamespaceNotFoundForMi NamespaceString wrongColl(nss.db(), "wrongColl"_sd); ASSERT_OK(storage.createCollection(opCtx, nss, CollectionOptions())); ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, - storage.setIndexIsMultikey(opCtx, wrongColl, "foo", {}, Timestamp(3, 3))); + storage.setIndexIsMultikey(opCtx, wrongColl, "foo", {}, {}, Timestamp(3, 3))); } TEST_F(StorageInterfaceImplTest, SetIndexIsMultikeyReturnsIndexNotFoundForMissingIndex) { @@ -2626,7 +2626,7 @@ TEST_F(StorageInterfaceImplTest, SetIndexIsMultikeyReturnsIndexNotFoundForMissin auto nss = makeNamespace(_agent); ASSERT_OK(storage.createCollection(opCtx, nss, CollectionOptions())); ASSERT_EQUALS(ErrorCodes::IndexNotFound, - storage.setIndexIsMultikey(opCtx, nss, "foo", {}, Timestamp(3, 3))); + storage.setIndexIsMultikey(opCtx, nss, "foo", {}, {}, Timestamp(3, 3))); } TEST_F(StorageInterfaceImplTest, SetIndexIsMultikeyReturnsInvalidOptionsForNullTimestamp) { @@ -2635,7 +2635,7 @@ TEST_F(StorageInterfaceImplTest, SetIndexIsMultikeyReturnsInvalidOptionsForNullT auto nss = makeNamespace(_agent); ASSERT_OK(storage.createCollection(opCtx, nss, CollectionOptions())); ASSERT_EQUALS(ErrorCodes::InvalidOptions, - storage.setIndexIsMultikey(opCtx, nss, "foo", {}, Timestamp())); + storage.setIndexIsMultikey(opCtx, nss, "foo", {}, {}, Timestamp())); } TEST_F(StorageInterfaceImplTest, SetIndexIsMultikeySucceeds) { @@ -2650,7 +2650,7 @@ TEST_F(StorageInterfaceImplTest, SetIndexIsMultikeySucceeds) { ASSERT_EQUALS(_createIndexOnEmptyCollection(opCtx, nss, indexSpec), 2); MultikeyPaths paths = {{1}}; - ASSERT_OK(storage.setIndexIsMultikey(opCtx, nss, indexName, paths, Timestamp(3, 3))); + ASSERT_OK(storage.setIndexIsMultikey(opCtx, nss, indexName, {}, paths, Timestamp(3, 3))); AutoGetCollectionForReadCommand autoColl(opCtx, nss); ASSERT_TRUE(autoColl.getCollection()); auto indexCatalog = autoColl.getCollection()->getIndexCatalog(); diff --git a/src/mongo/db/repl/storage_interface_mock.h b/src/mongo/db/repl/storage_interface_mock.h index 1133186a6bc..0459d4357e5 100644 --- a/src/mongo/db/repl/storage_interface_mock.h +++ b/src/mongo/db/repl/storage_interface_mock.h @@ -191,6 +191,7 @@ public: Status setIndexIsMultikey(OperationContext* opCtx, const NamespaceString& nss, const std::string& indexName, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& paths, Timestamp ts) override { diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp index 132b2ef5be1..ff771064454 100644 --- a/src/mongo/dbtests/storage_timestamp_tests.cpp +++ b/src/mongo/dbtests/storage_timestamp_tests.cpp @@ -52,6 +52,7 @@ #include "mongo/db/global_settings.h" #include "mongo/db/index/index_build_interceptor.h" #include "mongo/db/index/index_descriptor.h" +#include "mongo/db/index/wildcard_access_method.h" #include "mongo/db/index_builds_coordinator.h" #include "mongo/db/logical_clock.h" #include "mongo/db/multi_key_path_tracker.h" @@ -1343,6 +1344,200 @@ public: } }; +class SecondarySetWildcardIndexMultikeyOnInsert : public StorageTimestampTest { + +public: + void run() { + // Pretend to be a secondary. + repl::UnreplicatedWritesBlock uwb(_opCtx); + + NamespaceString nss("unittests.SecondarySetWildcardIndexMultikeyOnInsert"); + reset(nss); + UUID uuid = UUID::gen(); + { + AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX); + uuid = autoColl.getCollection()->uuid(); + } + auto indexName = "a_1"; + auto indexSpec = BSON("name" << indexName << "key" << BSON("$**" << 1) << "v" + << static_cast<int>(kIndexVersion)); + ASSERT_OK(dbtests::createIndexFromSpec(_opCtx, nss.ns(), indexSpec)); + + _coordinatorMock->alwaysAllowWrites(false); + + const LogicalTime insertTime0 = _clock->reserveTicks(1); + const LogicalTime insertTime1 = _clock->reserveTicks(1); + const LogicalTime insertTime2 = _clock->reserveTicks(1); + + BSONObj doc0 = BSON("_id" << 0 << "a" << 3); + BSONObj doc1 = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2)); + BSONObj doc2 = BSON("_id" << 2 << "a" << BSON_ARRAY(1 << 2)); + auto op0 = repl::OplogEntry( + BSON("ts" << insertTime0.asTimestamp() << "t" << 1LL << "v" << 2 << "op" + << "i" + << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc0)); + auto op1 = repl::OplogEntry( + BSON("ts" << insertTime1.asTimestamp() << "t" << 1LL << "v" << 2 << "op" + << "i" + << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc1)); + auto op2 = repl::OplogEntry( + BSON("ts" << insertTime2.asTimestamp() << "t" << 1LL << "v" << 2 << "op" + << "i" + << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc2)); + + // Coerce oplog application to apply op2 before op1. + std::vector<repl::OplogEntry> ops = {op0, op2, op1}; + + DoNothingOplogApplierObserver observer; + auto storageInterface = repl::StorageInterface::get(_opCtx); + auto writerPool = repl::makeReplWriterPool(); + repl::OplogApplierImpl oplogApplier( + nullptr, // task executor. not required for applyOplogBatch(). + nullptr, // oplog buffer. not required for applyOplogBatch(). + &observer, + _coordinatorMock, + _consistencyMarkers, + storageInterface, + repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering), + writerPool.get()); + + uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops)); + + AutoGetCollectionForRead autoColl(_opCtx, nss); + auto wildcardIndexDescriptor = + autoColl.getCollection()->getIndexCatalog()->findIndexByName(_opCtx, indexName); + const IndexAccessMethod* wildcardIndexAccessMethod = autoColl.getCollection() + ->getIndexCatalog() + ->getEntry(wildcardIndexDescriptor) + ->accessMethod(); + { + // Verify that, even though op2 was applied first, the multikey state is observed in all + // WiredTiger transactions that can contain the data written by op1. + OneOffRead oor(_opCtx, insertTime1.asTimestamp()); + const WildcardAccessMethod* wam = + dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod); + MultikeyMetadataAccessStats stats; + std::set<FieldRef> paths = wam->getMultikeyPathSet(_opCtx, &stats); + ASSERT_EQUALS(1, paths.size()); + ASSERT_EQUALS("a", paths.begin()->dottedField()); + } + { + // Oplog application conservatively uses the first optime in the batch, insertTime0, as + // the point at which the index became multikey, despite the fact that the earliest op + // which caused the index to become multikey did not occur until insertTime1. This works + // because if we construct a query plan that incorrectly believes a particular path to + // be multikey, the plan will still be correct (if possibly sub-optimal). Conversely, if + // we were to construct a query plan that incorrectly believes a path is NOT multikey, + // it could produce incorrect results. + OneOffRead oor(_opCtx, insertTime0.asTimestamp()); + const WildcardAccessMethod* wam = + dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod); + MultikeyMetadataAccessStats stats; + std::set<FieldRef> paths = wam->getMultikeyPathSet(_opCtx, &stats); + ASSERT_EQUALS(1, paths.size()); + ASSERT_EQUALS("a", paths.begin()->dottedField()); + } + } +}; + +class SecondarySetWildcardIndexMultikeyOnUpdate : public StorageTimestampTest { + +public: + void run() { + // Pretend to be a secondary. + repl::UnreplicatedWritesBlock uwb(_opCtx); + + NamespaceString nss("unittests.SecondarySetWildcardIndexMultikeyOnUpdate"); + reset(nss); + UUID uuid = UUID::gen(); + { + AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX); + uuid = autoColl.getCollection()->uuid(); + } + auto indexName = "a_1"; + auto indexSpec = BSON("name" << indexName << "key" << BSON("$**" << 1) << "v" + << static_cast<int>(kIndexVersion)); + ASSERT_OK(dbtests::createIndexFromSpec(_opCtx, nss.ns(), indexSpec)); + + _coordinatorMock->alwaysAllowWrites(false); + + const LogicalTime insertTime0 = _clock->reserveTicks(1); + const LogicalTime updateTime1 = _clock->reserveTicks(1); + const LogicalTime updateTime2 = _clock->reserveTicks(1); + + BSONObj doc0 = BSON("_id" << 0 << "a" << 3); + BSONObj doc1 = BSON("$v" << 1 << "$set" << BSON("a" << BSON_ARRAY(1 << 2))); + BSONObj doc2 = BSON("$v" << 1 << "$set" << BSON("a" << BSON_ARRAY(1 << 2))); + auto op0 = repl::OplogEntry( + BSON("ts" << insertTime0.asTimestamp() << "t" << 1LL << "v" << 2 << "op" + << "i" + << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc0)); + auto op1 = repl::OplogEntry( + BSON("ts" << updateTime1.asTimestamp() << "t" << 1LL << "v" << 2 << "op" + << "u" + << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc1 + << "o2" << BSON("_id" << 0))); + auto op2 = repl::OplogEntry( + BSON("ts" << updateTime2.asTimestamp() << "t" << 1LL << "v" << 2 << "op" + << "u" + << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc2 + << "o2" << BSON("_id" << 0))); + + // Coerce oplog application to apply op2 before op1. + std::vector<repl::OplogEntry> ops = {op0, op2, op1}; + + DoNothingOplogApplierObserver observer; + auto storageInterface = repl::StorageInterface::get(_opCtx); + auto writerPool = repl::makeReplWriterPool(); + repl::OplogApplierImpl oplogApplier( + nullptr, // task executor. not required for applyOplogBatch(). + nullptr, // oplog buffer. not required for applyOplogBatch(). + &observer, + _coordinatorMock, + _consistencyMarkers, + storageInterface, + repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering), + writerPool.get()); + + uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops)); + + AutoGetCollectionForRead autoColl(_opCtx, nss); + auto wildcardIndexDescriptor = + autoColl.getCollection()->getIndexCatalog()->findIndexByName(_opCtx, indexName); + const IndexAccessMethod* wildcardIndexAccessMethod = autoColl.getCollection() + ->getIndexCatalog() + ->getEntry(wildcardIndexDescriptor) + ->accessMethod(); + { + // Verify that, even though op2 was applied first, the multikey state is observed in all + // WiredTiger transactions that can contain the data written by op1. + OneOffRead oor(_opCtx, updateTime1.asTimestamp()); + const WildcardAccessMethod* wam = + dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod); + MultikeyMetadataAccessStats stats; + std::set<FieldRef> paths = wam->getMultikeyPathSet(_opCtx, &stats); + ASSERT_EQUALS(1, paths.size()); + ASSERT_EQUALS("a", paths.begin()->dottedField()); + } + { + // Oplog application conservatively uses the first optime in the batch, insertTime0, as + // the point at which the index became multikey, despite the fact that the earliest op + // which caused the index to become multikey did not occur until updateTime1. This works + // because if we construct a query plan that incorrectly believes a particular path to + // be multikey, the plan will still be correct (if possibly sub-optimal). Conversely, if + // we were to construct a query plan that incorrectly believes a path is NOT multikey, + // it could produce incorrect results. + OneOffRead oor(_opCtx, insertTime0.asTimestamp()); + const WildcardAccessMethod* wam = + dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod); + MultikeyMetadataAccessStats stats; + std::set<FieldRef> paths = wam->getMultikeyPathSet(_opCtx, &stats); + ASSERT_EQUALS(1, paths.size()); + ASSERT_EQUALS("a", paths.begin()->dottedField()); + } + } +}; + class InitialSyncSetIndexMultikeyOnInsert : public StorageTimestampTest { public: @@ -4030,6 +4225,8 @@ public: addIf<SecondaryCreateCollectionBetweenInserts>(); addIf<PrimaryCreateCollectionInApplyOps>(); addIf<SecondarySetIndexMultikeyOnInsert>(); + addIf<SecondarySetWildcardIndexMultikeyOnInsert>(); + addIf<SecondarySetWildcardIndexMultikeyOnUpdate>(); addIf<InitialSyncSetIndexMultikeyOnInsert>(); addIf<PrimarySetIndexMultikeyOnInsert>(); addIf<PrimarySetIndexMultikeyOnInsertUnreplicated>(); diff --git a/src/mongo/dbtests/validate_tests.cpp b/src/mongo/dbtests/validate_tests.cpp index 39f99f37c18..60baea48b63 100644 --- a/src/mongo/dbtests/validate_tests.cpp +++ b/src/mongo/dbtests/validate_tests.cpp @@ -895,8 +895,7 @@ public: id1, IndexAccessMethod::kNoopOnSuppressedErrorFn); - auto removeStatus = - iam->removeKeys(&_opCtx, {keys.begin(), keys.end()}, id1, options, &numDeleted); + auto removeStatus = iam->removeKeys(&_opCtx, keys, id1, options, &numDeleted); auto insertStatus = iam->insert(&_opCtx, badKey, id1, options, nullptr, &numInserted); ASSERT_EQUALS(numDeleted, 1); @@ -1309,8 +1308,7 @@ public: nullptr, rid, IndexAccessMethod::kNoopOnSuppressedErrorFn); - auto removeStatus = - iam->removeKeys(&_opCtx, {keys.begin(), keys.end()}, rid, options, &numDeleted); + auto removeStatus = iam->removeKeys(&_opCtx, keys, rid, options, &numDeleted); ASSERT_EQUALS(numDeleted, 1); ASSERT_OK(removeStatus); @@ -1542,8 +1540,7 @@ public: nullptr, rid, IndexAccessMethod::kNoopOnSuppressedErrorFn); - auto removeStatus = - iam->removeKeys(&_opCtx, {keys.begin(), keys.end()}, rid, options, &numDeleted); + auto removeStatus = iam->removeKeys(&_opCtx, keys, rid, options, &numDeleted); ASSERT_EQUALS(numDeleted, 1); ASSERT_OK(removeStatus); @@ -1578,8 +1575,7 @@ public: nullptr, rid, IndexAccessMethod::kNoopOnSuppressedErrorFn); - auto removeStatus = - iam->removeKeys(&_opCtx, {keys.begin(), keys.end()}, rid, options, &numDeleted); + auto removeStatus = iam->removeKeys(&_opCtx, keys, rid, options, &numDeleted); ASSERT_EQUALS(numDeleted, 1); ASSERT_OK(removeStatus); @@ -1705,9 +1701,7 @@ public: int64_t numInserted; auto insertStatus = iam->insertKeys( &_opCtx, - {keys.begin(), keys.end()}, - {}, - MultikeyPaths{}, + keys, swRecordId.getValue(), options, [this, &interceptor](const KeyString::Value& duplicateKey) { @@ -1746,9 +1740,7 @@ public: ASSERT_EQ(1, keys.size()); auto insertStatus = iam->insertKeys( &_opCtx, - {keys.begin(), keys.end()}, - {}, - MultikeyPaths{}, + keys, swRecordId.getValue(), options, [this, &interceptor](const KeyString::Value& duplicateKey) { |