summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHenrik Edin <henrik.edin@mongodb.com>2022-11-04 16:14:31 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-09 14:47:09 +0000
commitdefa7826e96929cabbaa204f91a39d6bd6e30239 (patch)
tree579409d514767795c9bfc23e337b965fe7b3a057
parentd85c4a5b97b52e1bc87554fa43ffcb7fd61e0c63 (diff)
downloadmongo-defa7826e96929cabbaa204f91a39d6bd6e30239.tar.gz
SERVER-70879 Fix race where multiple threads are turning an index multikey concurrently
Fix race where the writes to the durable catalog are serialized but the second writer is using a stale in-memory state due to the commit handler not being executed yet. This interleaving does not cause a write conflict as the writes are serialized but the second writer overwrote the multikey paths set by the first writer. To handle it we always read the latest state from the durable catalog before performing multikey writes. This guarantees that we always are operating on the latest state. Some care was needed to ensure index offsets remain stable. (cherry picked from commit 74bf9118e38ff4ee63db8bb17146dabe3ef07392)
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp22
-rw-r--r--src/mongo/db/storage/kv/durable_catalog_test.cpp121
2 files changed, 142 insertions, 1 deletions
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index 5f1423e0da9..9b444a5a351 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -2029,11 +2029,31 @@ bool CollectionImpl::setIndexIsMultikey(OperationContext* opCtx,
}
BSONCollectionCatalogEntry::MetaData* metadata = nullptr;
bool hasSetMultikey = false;
+
if (auto it = uncommittedMultikeys->find(this); it != uncommittedMultikeys->end()) {
metadata = &it->second;
hasSetMultikey = setMultikey(*metadata);
} else {
- BSONCollectionCatalogEntry::MetaData metadataLocal(*_metadata);
+ // First time this OperationContext needs to change multikey information for this
+ // collection. We cannot use the cached metadata in this collection as we may have just
+ // committed a multikey change concurrently to the storage engine without being able to
+ // observe it if its onCommit handlers haven't run yet.
+ auto metadataLocal = *DurableCatalog::get(opCtx)->getMetaData(opCtx, getCatalogId());
+ // When reading from the durable catalog the index offsets are different because when
+ // removing indexes in-memory just zeros out the slot instead of actually removing it. We
+ // must adjust the entries so they match how they are stored in _metadata so we can rely on
+ // the index offsets being stable. The order of valid indexes are the same, so we can
+ // iterate from the end and move them into the right positions.
+ int localIdx = metadataLocal.indexes.size() - 1;
+ metadataLocal.indexes.resize(_metadata->indexes.size());
+ for (int i = _metadata->indexes.size() - 1; i >= 0 && localIdx != i; --i) {
+ if (!_metadata->indexes[i].spec.isEmpty()) {
+ metadataLocal.indexes[i] = std::move(metadataLocal.indexes[localIdx]);
+ metadataLocal.indexes[localIdx] = {};
+ --localIdx;
+ }
+ }
+
hasSetMultikey = setMultikey(metadataLocal);
if (hasSetMultikey) {
metadata = &uncommittedMultikeys->emplace(this, std::move(metadataLocal)).first->second;
diff --git a/src/mongo/db/storage/kv/durable_catalog_test.cpp b/src/mongo/db/storage/kv/durable_catalog_test.cpp
index 595acc34bb1..a2e2ed7b884 100644
--- a/src/mongo/db/storage/kv/durable_catalog_test.cpp
+++ b/src/mongo/db/storage/kv/durable_catalog_test.cpp
@@ -465,6 +465,127 @@ TEST_F(DurableCatalogTest, NoOpWhenEntireIndexAlreadySetAsMultikey) {
}
}
+class ConcurrentMultikeyTest : public DurableCatalogTest {
+public:
+ void testConcurrentMultikey(BSONObj keyPattern,
+ const MultikeyPaths& first,
+ const MultikeyPaths& second,
+ const MultikeyPaths& expected) {
+ /*
+ * This test verifies that we can set multikey on two threads concurrently with the
+ * following interleaving that do not cause a WCE from the storage engine:
+ *
+ * T1: open storage snapshot
+ *
+ * T1: set multikey paths to {first}
+ *
+ * T1: commit
+ *
+ * T2: open storage snapshot
+ *
+ * T2: set multikey paths to {second}
+ *
+ * T2: commit
+ *
+ * T1: onCommit handler
+ *
+ * T2: onCommit handler
+ *
+ */
+
+ auto indexEntry = createIndex(keyPattern);
+ auto collection = getCollection();
+
+ mongo::Mutex mutex;
+ stdx::condition_variable cv;
+ int numMultikeyCalls = 0;
+
+ // Start a thread that will set multikey paths to 'first'. It will commit the change to the
+ // storage engine but block before running the onCommit handler that updates the in-memory
+ // state in the Collection instance.
+ stdx::thread t([svcCtx = getServiceContext(),
+ &collection,
+ &indexEntry,
+ &first,
+ &mutex,
+ &cv,
+ &numMultikeyCalls] {
+ ThreadClient client(svcCtx);
+ auto opCtx = client->makeOperationContext();
+
+ Lock::GlobalLock globalLock{opCtx.get(), MODE_IX};
+ WriteUnitOfWork wuow(opCtx.get());
+
+ // Register a onCommit that will block until the main thread has committed its multikey
+ // write. This onCommit handler is registered before any writes and will thus be
+ // performed first, blocking all other onCommit handlers.
+ opCtx->recoveryUnit()->onCommit(
+ [&mutex, &cv, &numMultikeyCalls](boost::optional<Timestamp> commitTime) {
+ stdx::unique_lock lock(mutex);
+
+ // Let the main thread now we have committed to the storage engine
+ numMultikeyCalls = 1;
+ cv.notify_all();
+
+ // Wait until the main thread has committed its multikey write
+ cv.wait(lock, [&numMultikeyCalls]() { return numMultikeyCalls == 2; });
+ });
+
+ // Set the index to multikey with 'first' as paths.
+ collection->setIndexIsMultikey(
+ opCtx.get(), indexEntry->descriptor()->indexName(), first);
+ wuow.commit();
+ });
+
+ // Wait for the thread above to commit its multikey write to the storage engine
+ {
+ stdx::unique_lock lock(mutex);
+ cv.wait(lock, [&numMultikeyCalls]() { return numMultikeyCalls == 1; });
+ }
+
+ // Set the index to multikey with 'second' as paths. This will not cause a WCE as the write
+ // in the thread is fully committed to the storage engine.
+ {
+ Lock::GlobalLock globalLock{operationContext(), MODE_IX};
+ WriteUnitOfWork wuow(operationContext());
+ collection->setIndexIsMultikey(
+ operationContext(), indexEntry->descriptor()->indexName(), second);
+ wuow.commit();
+ }
+
+ // Notify the thread that our multikey write is committed
+ {
+ stdx::unique_lock lock(mutex);
+ numMultikeyCalls = 2;
+ cv.notify_all();
+ }
+ t.join();
+
+ // Verify that our Collection instance has 'expected' as multikey paths for this index
+ {
+ MultikeyPaths multikeyPaths;
+ ASSERT(collection->isIndexMultikey(
+ operationContext(), indexEntry->descriptor()->indexName(), &multikeyPaths));
+ assertMultikeyPathsAreEqual(multikeyPaths, expected);
+ }
+
+ // Verify that the durable catalog has 'expected' as multikey paths for this index
+ Lock::GlobalLock globalLock{operationContext(), MODE_IS};
+ auto md = getCatalog()->getMetaData(operationContext(), collection->getCatalogId());
+
+ auto indexOffset = md->findIndexOffset(indexEntry->descriptor()->indexName());
+ assertMultikeyPathsAreEqual(md->indexes[indexOffset].multikeyPaths, expected);
+ }
+};
+
+TEST_F(ConcurrentMultikeyTest, MultikeyPathsConcurrentSecondSubset) {
+ testConcurrentMultikey(BSON("a.b" << 1), {{0U, 1U}}, {{0U}}, {{0U, 1U}});
+}
+
+TEST_F(ConcurrentMultikeyTest, MultikeyPathsConcurrentDistinct) {
+ testConcurrentMultikey(BSON("a.b" << 1), {{0U}}, {{1U}}, {{0U, 1U}});
+}
+
TEST_F(DurableCatalogTest, SinglePhaseIndexBuild) {
auto indexEntry = createIndex(BSON("a" << 1));
auto collection = getCollection();