summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2021-02-02 14:48:40 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-02-08 20:02:32 +0000
commitecd41f8b3bfe2154921cbcede9040d535a46e0c5 (patch)
tree49ba16820940612d8a771ec64322edbe5b79b559
parent8147d05d7038c6bf08a0b523fd69a44c13553cf1 (diff)
downloadmongo-ecd41f8b3bfe2154921cbcede9040d535a46e0c5.tar.gz
SERVER-47694: fix multikey. again
Split the single _isMultikey variable on an IndexCatalogEntry(Impl) into two separate variables: _isMultikeyForReader and _isMultikeyForWriter. _isMultikeyForReader is flipped as early as possible. Readers concurrent with multikey flipping may forgo a possible optimization when their snapshot sees no multikey data. _isMultikeyForWriter is flipped after the storage engine commits a multikey change to the on-disk catalog. At this point, writers may, under some circumstances, optimize away some catalog writes. Move logic for optimizing readers (multikey paths, clearing query cache) outside of the onCommit. Adds a failpoint widenWUOWChangesWindow which sleeps transaction commit and onCommit/onRollback handlers. Have validate assert multikey paths are set correctly for the documents observed during its collection scan. (cherry picked from commit 3566db153ea61fb10d3ef11ea917fc7bc93eac4d)
-rw-r--r--src/mongo/db/catalog/index_catalog_entry_impl.cpp143
-rw-r--r--src/mongo/db/catalog/index_catalog_entry_impl.h9
-rw-r--r--src/mongo/db/multi_key_path_tracker.cpp13
-rw-r--r--src/mongo/db/multi_key_path_tracker.h5
4 files changed, 96 insertions, 74 deletions
diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.cpp b/src/mongo/db/catalog/index_catalog_entry_impl.cpp
index 325689f9060..0537f6565d1 100644
--- a/src/mongo/db/catalog/index_catalog_entry_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_entry_impl.cpp
@@ -74,7 +74,9 @@ IndexCatalogEntryImpl::IndexCatalogEntryImpl(OperationContext* const opCtx,
{
stdx::lock_guard<Latch> lk(_indexMultikeyPathsMutex);
- _isMultikey.store(_catalogIsMultikey(opCtx, &_indexMultikeyPaths));
+ const bool isMultikey = _catalogIsMultikey(opCtx, &_indexMultikeyPaths);
+ _isMultikeyForRead.store(isMultikey);
+ _isMultikeyForWrite.store(isMultikey);
_indexTracksMultikeyPathsInCatalog = !_indexMultikeyPaths.empty();
}
@@ -138,7 +140,7 @@ bool IndexCatalogEntryImpl::isReady(OperationContext* opCtx) const {
}
bool IndexCatalogEntryImpl::isMultikey(OperationContext* opCtx) const {
- auto ret = _isMultikey.load();
+ auto ret = _isMultikeyForRead.load();
if (ret) {
return true;
}
@@ -207,7 +209,7 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx,
// If the index is already set as multikey and we don't have any path-level information to
// update, then there's nothing more for us to do.
bool hasNoPathLevelInfo = (!_indexTracksMultikeyPathsInCatalog && multikeyMetadataKeys.empty());
- if (hasNoPathLevelInfo && _isMultikey.load()) {
+ if (hasNoPathLevelInfo && _isMultikeyForWrite.load()) {
return;
}
@@ -273,32 +275,35 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx,
opCtx, multikeyMetadataKeys, kMultikeyMetadataKeyId, {}, {}, nullptr));
}
- // It's possible that the index type (e.g. ascending/descending index) supports tracking
- // path-level multikey information, but this particular index doesn't.
- // CollectionCatalogEntry::setIndexIsMultikey() requires that we discard the path-level
- // multikey information in order to avoid unintentionally setting path-level multikey
- // information on an index created before 3.4.
- bool indexMetadataHasChanged;
-
- // The commit handler for a transaction that sets the multikey flag. When the recovery unit
- // commits, update the multikey paths if needed and clear the plan cache if the index metadata
- // has changed.
- auto onMultikeyCommitFn = [this, multikeyPaths](bool indexMetadataHasChanged) {
- _isMultikey.store(true);
-
- if (_indexTracksMultikeyPathsInCatalog) {
- stdx::lock_guard<Latch> lk(_indexMultikeyPathsMutex);
- for (size_t i = 0; i < multikeyPaths.size(); ++i) {
- _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end());
- }
+ // In the absense of using the storage engine to read from the catalog, we must set multikey
+ // prior to the storage engine transaction committing.
+ //
+ // Moreover, there must not be an `onRollback` handler to reset this back to false. Given a long
+ // enough pause in processing `onRollback` handlers, a later writer that successfully flipped
+ // multikey can be undone. Alternatively, one could use a counter instead of a boolean to avoid
+ // that problem.
+ _isMultikeyForRead.store(true);
+ if (_indexTracksMultikeyPathsInCatalog) {
+ stdx::lock_guard<Latch> lk(_indexMultikeyPathsMutex);
+ for (size_t i = 0; i < multikeyPaths.size(); ++i) {
+ _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end());
}
+ }
+
+ if (!opCtx->inMultiDocumentTransaction()) {
+ const bool indexMetadataHasChanged = DurableCatalog::get(opCtx)->setIndexIsMultikey(
+ opCtx, _ns, _descriptor->indexName(), paths);
if (indexMetadataHasChanged && _infoCache) {
LOG(1) << _ns << ": clearing plan cache - index " << _descriptor->keyPattern()
<< " set to multi key.";
_infoCache->clearQueryCache();
}
- };
+
+ opCtx->recoveryUnit()->onCommit(
+ [this](boost::optional<Timestamp>) { _isMultikeyForWrite.store(true); });
+ return;
+ }
// If we are inside a multi-document transaction, we write the on-disk multikey update in a
// separate transaction so that it will not generate prepare conflicts with other operations
@@ -307,59 +312,51 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx,
// multikey flag write and the parent transaction. We can do this write separately and commit it
// before the parent transaction commits.
auto txnParticipant = TransactionParticipant::get(opCtx);
- if (opCtx->inMultiDocumentTransaction()) {
- TransactionParticipant::SideTransactionBlock sideTxn(opCtx);
- writeConflictRetry(opCtx, "set index multikey", _ns.ns(), [&] {
- WriteUnitOfWork wuow(opCtx);
-
- // If we have a prepare optime for recovery, then we always use that. During recovery of
- // prepared transactions, the logical clock may not yet be initialized, so we use the
- // prepare timestamp of the transaction for this write. This is safe since the prepare
- // timestamp is always <= the commit timestamp of a transaction, which satisfies the
- // correctness requirement for multikey writes i.e. they must occur at or before the
- // first write that set the multikey flag.
- auto recoveryPrepareOpTime = txnParticipant.getPrepareOpTimeForRecovery();
- Timestamp writeTs = recoveryPrepareOpTime.isNull()
- ? LogicalClock::get(opCtx)->getClusterTime().asTimestamp()
- : recoveryPrepareOpTime.getTimestamp();
-
- auto status = opCtx->recoveryUnit()->setTimestamp(writeTs);
- if (status.code() == ErrorCodes::BadValue) {
- log() << "Temporarily could not timestamp the multikey catalog write, retrying. "
- << status.reason();
- throw WriteConflictException();
- }
- fassert(31164, status);
- indexMetadataHasChanged = DurableCatalog::get(opCtx)->setIndexIsMultikey(
- opCtx, _ns, _descriptor->indexName(), paths);
- opCtx->recoveryUnit()->onCommit(
- [onMultikeyCommitFn, indexMetadataHasChanged](boost::optional<Timestamp>) {
- onMultikeyCommitFn(indexMetadataHasChanged);
- });
- wuow.commit();
- });
- } else {
- indexMetadataHasChanged = DurableCatalog::get(opCtx)->setIndexIsMultikey(
+ TransactionParticipant::SideTransactionBlock sideTxn(opCtx);
+ writeConflictRetry(opCtx, "set index multikey", _ns.ns(), [&] {
+ WriteUnitOfWork wuow(opCtx);
+
+ // If we have a prepare optime for recovery, then we always use that. During recovery of
+ // prepared transactions, the logical clock may not yet be initialized, so we use the
+ // prepare timestamp of the transaction for this write. This is safe since the prepare
+ // timestamp is always <= the commit timestamp of a transaction, which satisfies the
+ // correctness requirement for multikey writes i.e. they must occur at or before the
+ // first write that set the multikey flag.
+ auto recoveryPrepareOpTime = txnParticipant.getPrepareOpTimeForRecovery();
+ Timestamp writeTs = recoveryPrepareOpTime.isNull()
+ ? LogicalClock::get(opCtx)->getClusterTime().asTimestamp()
+ : recoveryPrepareOpTime.getTimestamp();
+
+ auto status = opCtx->recoveryUnit()->setTimestamp(writeTs);
+ if (status.code() == ErrorCodes::BadValue) {
+ log() << "Temporarily could not timestamp the multikey catalog write, retrying. "
+ << status.reason();
+ throw WriteConflictException();
+ }
+ fassert(31164, status);
+ const bool indexMetadataHasChanged = DurableCatalog::get(opCtx)->setIndexIsMultikey(
opCtx, _ns, _descriptor->indexName(), paths);
- }
- opCtx->recoveryUnit()->onCommit(
- [onMultikeyCommitFn, indexMetadataHasChanged](boost::optional<Timestamp>) {
- onMultikeyCommitFn(indexMetadataHasChanged);
- });
-
- // Within a multi-document transaction, reads should be able to see the effect of previous
- // writes done within that transaction. If a previous write in a transaction has set the index
- // to be multikey, then a subsequent read MUST know that fact in order to return correct
- // results. This is true in general for multikey writes. Since we don't update the in-memory
- // multikey flag until after the transaction commits, we track extra information here to let
- // subsequent readers within the same transaction know if this index was set as multikey by a
- // previous write in the transaction.
- if (opCtx->inMultiDocumentTransaction()) {
- invariant(txnParticipant);
+ if (indexMetadataHasChanged && _infoCache) {
+ LOG(1) << _ns << ": clearing plan cache - index " << _descriptor->keyPattern()
+ << " set to multi key.";
+ _infoCache->clearQueryCache();
+ }
+
+ opCtx->recoveryUnit()->onCommit(
+ [this](boost::optional<Timestamp>) { _isMultikeyForWrite.store(true); });
+ wuow.commit();
+
+ // Within a multi-document transaction, reads should be able to see the effect of previous
+ // writes done within that transaction. If a previous write in a transaction has set the
+ // index to be multikey, then a subsequent read MUST know that fact in order to return
+ // correct results. This is true in general for multikey writes. Since we don't update the
+ // in-memory multikey flag until after the transaction commits, we track extra information
+ // here to let subsequent readers within the same transaction know if this index was set as
+ // multikey by a previous write in the transaction.
txnParticipant.addUncommittedMultikeyPathInfo(MultikeyPathInfo{
_ns, _descriptor->indexName(), multikeyMetadataKeys, std::move(paths)});
- }
+ });
}
void IndexCatalogEntryImpl::setNs(NamespaceString ns) {
@@ -379,6 +376,10 @@ bool IndexCatalogEntryImpl::_catalogIsPresent(OperationContext* opCtx) const {
bool IndexCatalogEntryImpl::_catalogIsMultikey(OperationContext* opCtx,
MultikeyPaths* multikeyPaths) const {
+ if (_isMultikeyForRead.load()) {
+ return true;
+ }
+
return DurableCatalog::get(opCtx)->isIndexMultikey(
opCtx, _ns, _descriptor->indexName(), multikeyPaths);
}
diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.h b/src/mongo/db/catalog/index_catalog_entry_impl.h
index 88a5d666888..0b1b7b6f2bd 100644
--- a/src/mongo/db/catalog/index_catalog_entry_impl.h
+++ b/src/mongo/db/catalog/index_catalog_entry_impl.h
@@ -216,9 +216,12 @@ private:
// member is effectively const after IndexCatalogEntry::init() is called.
bool _indexTracksMultikeyPathsInCatalog = false;
- // Set to true if this index is multikey. '_isMultikey' serves as a cache of the information
- // stored in the NamespaceDetails or DurableCatalog.
- AtomicWord<bool> _isMultikey;
+ // Set to true if this index may contain multikey data.
+ AtomicWord<bool> _isMultikeyForRead;
+
+ // Set to true after a transaction commit successfully updates multikey on the catalog data. At
+ // this point, future writers do not need to update the catalog.
+ AtomicWord<bool> _isMultikeyForWrite;
// Controls concurrent access to '_indexMultikeyPaths'. We acquire this mutex rather than the
// RESOURCE_METADATA lock as a performance optimization so that it is cheaper to detect whether
diff --git a/src/mongo/db/multi_key_path_tracker.cpp b/src/mongo/db/multi_key_path_tracker.cpp
index 52ac75d58f5..33112ced2b2 100644
--- a/src/mongo/db/multi_key_path_tracker.cpp
+++ b/src/mongo/db/multi_key_path_tracker.cpp
@@ -68,6 +68,19 @@ void MultikeyPathTracker::mergeMultikeyPaths(MultikeyPaths* toMergeInto,
}
}
+bool MultikeyPathTracker::covers(const MultikeyPaths& parent, const MultikeyPaths& child) {
+ for (size_t idx = 0; idx < parent.size(); ++idx) {
+ auto& parentPath = parent[idx];
+ auto& childPath = child[idx];
+ for (auto&& item : childPath) {
+ if (parentPath.find(item) == parentPath.end()) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
void MultikeyPathTracker::addMultikeyPathInfo(MultikeyPathInfo info) {
invariant(_trackMultikeyPathInfo);
// Merge the `MultikeyPathInfo` input into the accumulated value being tracked for the
diff --git a/src/mongo/db/multi_key_path_tracker.h b/src/mongo/db/multi_key_path_tracker.h
index 949a7a7d319..51c2ee800d0 100644
--- a/src/mongo/db/multi_key_path_tracker.h
+++ b/src/mongo/db/multi_key_path_tracker.h
@@ -64,6 +64,11 @@ public:
static void mergeMultikeyPaths(MultikeyPaths* toMergeInto, const MultikeyPaths& newPaths);
+ /**
+ * Return true iff the child's paths are a subset of the parent.
+ */
+ static bool covers(const MultikeyPaths& parent, const MultikeyPaths& child);
+
// Decoration requires a default constructor.
MultikeyPathTracker() = default;