From a6d55fca3a30a7f41e78904a4a851f1bd6ed8d02 Mon Sep 17 00:00:00 2001 From: Antonio Fuschetto Date: Wed, 26 May 2021 22:42:56 +0000 Subject: SERVER-57102 Fixing invariants on ShardServerCatalogCacheLoader to consider different terms --- .../db/s/shard_server_catalog_cache_loader.cpp | 17 ++++++++++-- .../s/shard_server_catalog_cache_loader_test.cpp | 32 ++++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) (limited to 'src/mongo/db/s') diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp index 83e4a081bec..5f8dc38a0bb 100644 --- a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp +++ b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp @@ -63,6 +63,7 @@ using CollectionAndChangedChunks = CatalogCacheLoader::CollectionAndChangedChunk namespace { MONGO_FAIL_POINT_DEFINE(hangPersistCollectionAndChangedChunksAfterDropChunks); +MONGO_FAIL_POINT_DEFINE(hangCollectionFlush); AtomicWord taskIdGenerator{0}; @@ -984,6 +985,11 @@ void ShardServerCatalogCacheLoader::_ensureMajorityPrimaryAndScheduleDbTask(Oper void ShardServerCatalogCacheLoader::_runCollAndChunksTasks(const NamespaceString& nss) { auto context = _contexts.makeOperationContext(*Client::getCurrent()); + if (MONGO_unlikely(hangCollectionFlush.shouldFail())) { + LOGV2(5710200, "Hit hangCollectionFlush failpoint"); + hangCollectionFlush.pauseWhileSet(); + } + bool taskFinished = false; bool inShutdown = false; try { @@ -1233,10 +1239,10 @@ ShardServerCatalogCacheLoader::_getCompletePersistedMetadataForSecondarySinceVer LOGV2_FOR_CATALOG_REFRESH( 24114, 1, - "Cache loader read meatadata while updates were being applied: this metadata may be " + "Cache loader read metadata while updates were being applied: this metadata may be " "incomplete. Retrying. Refresh state before read: {beginRefreshState}. Current refresh " "state: {endRefreshState}", - "Cache loader read meatadata while updates were being applied: this metadata may be " + "Cache loader read metadata while updates were being applied: this metadata may be " "incomplete. Retrying", "beginRefreshState"_attr = beginRefreshState, "endRefreshState"_attr = endRefreshState); @@ -1284,6 +1290,11 @@ void ShardServerCatalogCacheLoader::CollAndChunkTaskList::addTask(collAndChunkTa } const auto& lastTask = _tasks.back(); + if (lastTask.termCreated != task.termCreated) { + _tasks.emplace_back(std::move(task)); + return; + } + if (task.dropped) { invariant(lastTask.maxQueryVersion == task.minQueryVersion, str::stream() << "The version of the added task is not contiguous with that of " @@ -1402,7 +1413,7 @@ ShardServerCatalogCacheLoader::CollAndChunkTaskList::getEnqueuedMetadataForTerm( // Make sure we do not append a duplicate chunk. The diff query is GTE, so there can // be duplicates of the same exact versioned chunk across tasks. This is no problem // for our diff application algorithms, but it can return unpredictable numbers of - // chunks for testing purposes. Eliminate unpredicatable duplicates for testing + // chunks for testing purposes. Eliminate unpredictable duplicates for testing // stability. auto taskCollectionAndChangedChunksIt = task.collectionAndChangedChunks->changedChunks.begin(); diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp index 902b09c3a64..3122220654a 100644 --- a/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp +++ b/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp @@ -79,6 +79,8 @@ public: */ vector setUpChunkLoaderWithFiveChunks(); + void refreshCollectionEpochOnRemoteLoader(); + const KeyPattern kKeyPattern = KeyPattern(BSON(kPattern << 1)); const std::function)> kDoNothingCallbackFn = []( @@ -563,5 +565,35 @@ TEST_F(ShardServerCatalogCacheLoaderTest, PrimaryLoadFromShardedAndFindMixedChun } } +void ShardServerCatalogCacheLoaderTest::refreshCollectionEpochOnRemoteLoader() { + ChunkVersion collectionVersion(1, 2, OID::gen()); + CollectionType collectionType = makeCollectionType(collectionVersion); + vector chunks = makeFiveChunks(collectionVersion); + _remoteLoaderMock->setCollectionRefreshReturnValue(collectionType); + _remoteLoaderMock->setChunkRefreshReturnValue(chunks); +} + +TEST_F(ShardServerCatalogCacheLoaderTest, CollAndChunkTasksConsistency) { + // Put some metadata in the persisted cache (config.cache.chunks.*) + refreshCollectionEpochOnRemoteLoader(); + _shardLoader->getChunksSince(kNss, ChunkVersion::UNSHARDED(), kDoNothingCallbackFn)->get(); + _shardLoader->waitForCollectionFlush(operationContext(), kNss); + + // Pause the thread processing the pending updates on metadata + FailPointEnableBlock failPoint("hangCollectionFlush"); + + // Put a first task in the list of pending updates on metadata (in-memory) + refreshCollectionEpochOnRemoteLoader(); + _shardLoader->getChunksSince(kNss, ChunkVersion::UNSHARDED(), kDoNothingCallbackFn)->get(); + + // Bump the shard's term + _shardLoader->onStepUp(); + + // Putting a second task causes a verification of the contiguous versions in the list pending + // updates on metadata + refreshCollectionEpochOnRemoteLoader(); + _shardLoader->getChunksSince(kNss, ChunkVersion::UNSHARDED(), kDoNothingCallbackFn)->get(); +} + } // namespace } // namespace mongo -- cgit v1.2.1