diff options
author | Louis Williams <louis.williams@mongodb.com> | 2021-02-02 14:48:40 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-02-08 20:02:32 +0000 |
commit | ecd41f8b3bfe2154921cbcede9040d535a46e0c5 (patch) | |
tree | 49ba16820940612d8a771ec64322edbe5b79b559 | |
parent | 8147d05d7038c6bf08a0b523fd69a44c13553cf1 (diff) | |
download | mongo-ecd41f8b3bfe2154921cbcede9040d535a46e0c5.tar.gz |
SERVER-47694: fix multikey. again
Split the single _isMultikey variable on an IndexCatalogEntry(Impl) into two
separate variables: _isMultikeyForReader and _isMultikeyForWriter.
_isMultikeyForReader is flipped as early as possible. Readers concurrent
with multikey flipping may forgo a possible optimization when their snapshot
sees no multikey data.
_isMultikeyForWriter is flipped after the storage engine commits a multikey
change to the on-disk catalog. At this point, writers may, under some
circumstances, optimize away some catalog writes.
Move logic for optimizing readers (multikey paths, clearing query cache) outside
of the onCommit.
Adds a failpoint widenWUOWChangesWindow which sleeps transaction commit and
onCommit/onRollback handlers.
Have validate assert multikey paths are set correctly for the documents observed
during its collection scan.
(cherry picked from commit 3566db153ea61fb10d3ef11ea917fc7bc93eac4d)
-rw-r--r-- | src/mongo/db/catalog/index_catalog_entry_impl.cpp | 143 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_catalog_entry_impl.h | 9 | ||||
-rw-r--r-- | src/mongo/db/multi_key_path_tracker.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/multi_key_path_tracker.h | 5 |
4 files changed, 96 insertions, 74 deletions
diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.cpp b/src/mongo/db/catalog/index_catalog_entry_impl.cpp index 325689f9060..0537f6565d1 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_entry_impl.cpp @@ -74,7 +74,9 @@ IndexCatalogEntryImpl::IndexCatalogEntryImpl(OperationContext* const opCtx, { stdx::lock_guard<Latch> lk(_indexMultikeyPathsMutex); - _isMultikey.store(_catalogIsMultikey(opCtx, &_indexMultikeyPaths)); + const bool isMultikey = _catalogIsMultikey(opCtx, &_indexMultikeyPaths); + _isMultikeyForRead.store(isMultikey); + _isMultikeyForWrite.store(isMultikey); _indexTracksMultikeyPathsInCatalog = !_indexMultikeyPaths.empty(); } @@ -138,7 +140,7 @@ bool IndexCatalogEntryImpl::isReady(OperationContext* opCtx) const { } bool IndexCatalogEntryImpl::isMultikey(OperationContext* opCtx) const { - auto ret = _isMultikey.load(); + auto ret = _isMultikeyForRead.load(); if (ret) { return true; } @@ -207,7 +209,7 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx, // If the index is already set as multikey and we don't have any path-level information to // update, then there's nothing more for us to do. bool hasNoPathLevelInfo = (!_indexTracksMultikeyPathsInCatalog && multikeyMetadataKeys.empty()); - if (hasNoPathLevelInfo && _isMultikey.load()) { + if (hasNoPathLevelInfo && _isMultikeyForWrite.load()) { return; } @@ -273,32 +275,35 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx, opCtx, multikeyMetadataKeys, kMultikeyMetadataKeyId, {}, {}, nullptr)); } - // It's possible that the index type (e.g. ascending/descending index) supports tracking - // path-level multikey information, but this particular index doesn't. - // CollectionCatalogEntry::setIndexIsMultikey() requires that we discard the path-level - // multikey information in order to avoid unintentionally setting path-level multikey - // information on an index created before 3.4. - bool indexMetadataHasChanged; - - // The commit handler for a transaction that sets the multikey flag. When the recovery unit - // commits, update the multikey paths if needed and clear the plan cache if the index metadata - // has changed. - auto onMultikeyCommitFn = [this, multikeyPaths](bool indexMetadataHasChanged) { - _isMultikey.store(true); - - if (_indexTracksMultikeyPathsInCatalog) { - stdx::lock_guard<Latch> lk(_indexMultikeyPathsMutex); - for (size_t i = 0; i < multikeyPaths.size(); ++i) { - _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end()); - } + // In the absense of using the storage engine to read from the catalog, we must set multikey + // prior to the storage engine transaction committing. + // + // Moreover, there must not be an `onRollback` handler to reset this back to false. Given a long + // enough pause in processing `onRollback` handlers, a later writer that successfully flipped + // multikey can be undone. Alternatively, one could use a counter instead of a boolean to avoid + // that problem. + _isMultikeyForRead.store(true); + if (_indexTracksMultikeyPathsInCatalog) { + stdx::lock_guard<Latch> lk(_indexMultikeyPathsMutex); + for (size_t i = 0; i < multikeyPaths.size(); ++i) { + _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end()); } + } + + if (!opCtx->inMultiDocumentTransaction()) { + const bool indexMetadataHasChanged = DurableCatalog::get(opCtx)->setIndexIsMultikey( + opCtx, _ns, _descriptor->indexName(), paths); if (indexMetadataHasChanged && _infoCache) { LOG(1) << _ns << ": clearing plan cache - index " << _descriptor->keyPattern() << " set to multi key."; _infoCache->clearQueryCache(); } - }; + + opCtx->recoveryUnit()->onCommit( + [this](boost::optional<Timestamp>) { _isMultikeyForWrite.store(true); }); + return; + } // If we are inside a multi-document transaction, we write the on-disk multikey update in a // separate transaction so that it will not generate prepare conflicts with other operations @@ -307,59 +312,51 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx, // multikey flag write and the parent transaction. We can do this write separately and commit it // before the parent transaction commits. auto txnParticipant = TransactionParticipant::get(opCtx); - if (opCtx->inMultiDocumentTransaction()) { - TransactionParticipant::SideTransactionBlock sideTxn(opCtx); - writeConflictRetry(opCtx, "set index multikey", _ns.ns(), [&] { - WriteUnitOfWork wuow(opCtx); - - // If we have a prepare optime for recovery, then we always use that. During recovery of - // prepared transactions, the logical clock may not yet be initialized, so we use the - // prepare timestamp of the transaction for this write. This is safe since the prepare - // timestamp is always <= the commit timestamp of a transaction, which satisfies the - // correctness requirement for multikey writes i.e. they must occur at or before the - // first write that set the multikey flag. - auto recoveryPrepareOpTime = txnParticipant.getPrepareOpTimeForRecovery(); - Timestamp writeTs = recoveryPrepareOpTime.isNull() - ? LogicalClock::get(opCtx)->getClusterTime().asTimestamp() - : recoveryPrepareOpTime.getTimestamp(); - - auto status = opCtx->recoveryUnit()->setTimestamp(writeTs); - if (status.code() == ErrorCodes::BadValue) { - log() << "Temporarily could not timestamp the multikey catalog write, retrying. " - << status.reason(); - throw WriteConflictException(); - } - fassert(31164, status); - indexMetadataHasChanged = DurableCatalog::get(opCtx)->setIndexIsMultikey( - opCtx, _ns, _descriptor->indexName(), paths); - opCtx->recoveryUnit()->onCommit( - [onMultikeyCommitFn, indexMetadataHasChanged](boost::optional<Timestamp>) { - onMultikeyCommitFn(indexMetadataHasChanged); - }); - wuow.commit(); - }); - } else { - indexMetadataHasChanged = DurableCatalog::get(opCtx)->setIndexIsMultikey( + TransactionParticipant::SideTransactionBlock sideTxn(opCtx); + writeConflictRetry(opCtx, "set index multikey", _ns.ns(), [&] { + WriteUnitOfWork wuow(opCtx); + + // If we have a prepare optime for recovery, then we always use that. During recovery of + // prepared transactions, the logical clock may not yet be initialized, so we use the + // prepare timestamp of the transaction for this write. This is safe since the prepare + // timestamp is always <= the commit timestamp of a transaction, which satisfies the + // correctness requirement for multikey writes i.e. they must occur at or before the + // first write that set the multikey flag. + auto recoveryPrepareOpTime = txnParticipant.getPrepareOpTimeForRecovery(); + Timestamp writeTs = recoveryPrepareOpTime.isNull() + ? LogicalClock::get(opCtx)->getClusterTime().asTimestamp() + : recoveryPrepareOpTime.getTimestamp(); + + auto status = opCtx->recoveryUnit()->setTimestamp(writeTs); + if (status.code() == ErrorCodes::BadValue) { + log() << "Temporarily could not timestamp the multikey catalog write, retrying. " + << status.reason(); + throw WriteConflictException(); + } + fassert(31164, status); + const bool indexMetadataHasChanged = DurableCatalog::get(opCtx)->setIndexIsMultikey( opCtx, _ns, _descriptor->indexName(), paths); - } - opCtx->recoveryUnit()->onCommit( - [onMultikeyCommitFn, indexMetadataHasChanged](boost::optional<Timestamp>) { - onMultikeyCommitFn(indexMetadataHasChanged); - }); - - // Within a multi-document transaction, reads should be able to see the effect of previous - // writes done within that transaction. If a previous write in a transaction has set the index - // to be multikey, then a subsequent read MUST know that fact in order to return correct - // results. This is true in general for multikey writes. Since we don't update the in-memory - // multikey flag until after the transaction commits, we track extra information here to let - // subsequent readers within the same transaction know if this index was set as multikey by a - // previous write in the transaction. - if (opCtx->inMultiDocumentTransaction()) { - invariant(txnParticipant); + if (indexMetadataHasChanged && _infoCache) { + LOG(1) << _ns << ": clearing plan cache - index " << _descriptor->keyPattern() + << " set to multi key."; + _infoCache->clearQueryCache(); + } + + opCtx->recoveryUnit()->onCommit( + [this](boost::optional<Timestamp>) { _isMultikeyForWrite.store(true); }); + wuow.commit(); + + // Within a multi-document transaction, reads should be able to see the effect of previous + // writes done within that transaction. If a previous write in a transaction has set the + // index to be multikey, then a subsequent read MUST know that fact in order to return + // correct results. This is true in general for multikey writes. Since we don't update the + // in-memory multikey flag until after the transaction commits, we track extra information + // here to let subsequent readers within the same transaction know if this index was set as + // multikey by a previous write in the transaction. txnParticipant.addUncommittedMultikeyPathInfo(MultikeyPathInfo{ _ns, _descriptor->indexName(), multikeyMetadataKeys, std::move(paths)}); - } + }); } void IndexCatalogEntryImpl::setNs(NamespaceString ns) { @@ -379,6 +376,10 @@ bool IndexCatalogEntryImpl::_catalogIsPresent(OperationContext* opCtx) const { bool IndexCatalogEntryImpl::_catalogIsMultikey(OperationContext* opCtx, MultikeyPaths* multikeyPaths) const { + if (_isMultikeyForRead.load()) { + return true; + } + return DurableCatalog::get(opCtx)->isIndexMultikey( opCtx, _ns, _descriptor->indexName(), multikeyPaths); } diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.h b/src/mongo/db/catalog/index_catalog_entry_impl.h index 88a5d666888..0b1b7b6f2bd 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.h +++ b/src/mongo/db/catalog/index_catalog_entry_impl.h @@ -216,9 +216,12 @@ private: // member is effectively const after IndexCatalogEntry::init() is called. bool _indexTracksMultikeyPathsInCatalog = false; - // Set to true if this index is multikey. '_isMultikey' serves as a cache of the information - // stored in the NamespaceDetails or DurableCatalog. - AtomicWord<bool> _isMultikey; + // Set to true if this index may contain multikey data. + AtomicWord<bool> _isMultikeyForRead; + + // Set to true after a transaction commit successfully updates multikey on the catalog data. At + // this point, future writers do not need to update the catalog. + AtomicWord<bool> _isMultikeyForWrite; // Controls concurrent access to '_indexMultikeyPaths'. We acquire this mutex rather than the // RESOURCE_METADATA lock as a performance optimization so that it is cheaper to detect whether diff --git a/src/mongo/db/multi_key_path_tracker.cpp b/src/mongo/db/multi_key_path_tracker.cpp index 52ac75d58f5..33112ced2b2 100644 --- a/src/mongo/db/multi_key_path_tracker.cpp +++ b/src/mongo/db/multi_key_path_tracker.cpp @@ -68,6 +68,19 @@ void MultikeyPathTracker::mergeMultikeyPaths(MultikeyPaths* toMergeInto, } } +bool MultikeyPathTracker::covers(const MultikeyPaths& parent, const MultikeyPaths& child) { + for (size_t idx = 0; idx < parent.size(); ++idx) { + auto& parentPath = parent[idx]; + auto& childPath = child[idx]; + for (auto&& item : childPath) { + if (parentPath.find(item) == parentPath.end()) { + return false; + } + } + } + return true; +} + void MultikeyPathTracker::addMultikeyPathInfo(MultikeyPathInfo info) { invariant(_trackMultikeyPathInfo); // Merge the `MultikeyPathInfo` input into the accumulated value being tracked for the diff --git a/src/mongo/db/multi_key_path_tracker.h b/src/mongo/db/multi_key_path_tracker.h index 949a7a7d319..51c2ee800d0 100644 --- a/src/mongo/db/multi_key_path_tracker.h +++ b/src/mongo/db/multi_key_path_tracker.h @@ -64,6 +64,11 @@ public: static void mergeMultikeyPaths(MultikeyPaths* toMergeInto, const MultikeyPaths& newPaths); + /** + * Return true iff the child's paths are a subset of the parent. + */ + static bool covers(const MultikeyPaths& parent, const MultikeyPaths& child); + // Decoration requires a default constructor. MultikeyPathTracker() = default; |