summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAntonio Fuschetto <antonio.fuschetto@mongodb.com>2021-05-26 22:44:45 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-28 08:10:53 +0000
commitb0a976653a626bb469754e658c978635fae87c8a (patch)
treedb384775c6a4b2bda98d81dca9159429e7d9bd45
parent1d2530b5fa056a1b389acb14dbd2c931b1dd6cc0 (diff)
downloadmongo-b0a976653a626bb469754e658c978635fae87c8a.tar.gz
SERVER-57102 Fixing invariants on ShardServerCatalogCacheLoader to consider different terms
-rw-r--r--src/mongo/db/s/shard_server_catalog_cache_loader.cpp17
-rw-r--r--src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp32
2 files changed, 47 insertions, 2 deletions
diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
index c1efae18af7..c9dbd9dd531 100644
--- a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
+++ b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
@@ -48,6 +48,7 @@
#include "mongo/s/client/shard_registry.h"
#include "mongo/s/grid.h"
#include "mongo/stdx/memory.h"
+#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
namespace mongo {
@@ -58,6 +59,8 @@ using CollectionAndChangedChunks = CatalogCacheLoader::CollectionAndChangedChunk
namespace {
+MONGO_FAIL_POINT_DEFINE(hangCollectionFlush);
+
AtomicUInt64 taskIdGenerator{0};
/**
@@ -930,6 +933,11 @@ Status ShardServerCatalogCacheLoader::_ensureMajorityPrimaryAndScheduleDbTask(
void ShardServerCatalogCacheLoader::_runCollAndChunksTasks(const NamespaceString& nss) {
auto context = _contexts.makeOperationContext(*Client::getCurrent());
+ if (MONGO_unlikely(hangCollectionFlush.shouldFail())) {
+ LOG(0) << "Hit hangCollectionFlush failpoint";
+ MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangCollectionFlush);
+ }
+
bool taskFinished = false;
try {
_updatePersistedCollAndChunksMetadata(context.opCtx(), nss);
@@ -1111,7 +1119,7 @@ ShardServerCatalogCacheLoader::_getCompletePersistedMetadataForSecondarySinceVer
}
LOG_CATALOG_REFRESH(1)
- << "Cache loader read meatadata while updates were being applied: this metadata may"
+ << "Cache loader read metadata while updates were being applied: this metadata may"
<< " be incomplete. Retrying. Refresh state before read: " << beginRefreshState
<< ". Current refresh state: '" << endRefreshState << "'.";
}
@@ -1158,6 +1166,11 @@ void ShardServerCatalogCacheLoader::CollAndChunkTaskList::addTask(collAndChunkTa
}
const auto& lastTask = _tasks.back();
+ if (lastTask.termCreated != task.termCreated) {
+ _tasks.emplace_back(std::move(task));
+ return;
+ }
+
if (task.dropped) {
invariant(lastTask.maxQueryVersion == task.minQueryVersion,
str::stream() << "The version of the added task is not contiguous with that of "
@@ -1282,7 +1295,7 @@ ShardServerCatalogCacheLoader::CollAndChunkTaskList::getEnqueuedMetadataForTerm(
// Make sure we do not append a duplicate chunk. The diff query is GTE, so there can
// be duplicates of the same exact versioned chunk across tasks. This is no problem
// for our diff application algorithms, but it can return unpredictable numbers of
- // chunks for testing purposes. Eliminate unpredicatable duplicates for testing
+ // chunks for testing purposes. Eliminate unpredictable duplicates for testing
// stability.
auto taskCollectionAndChangedChunksIt =
task.collectionAndChangedChunks->changedChunks.begin();
diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp
index 56f33dfaacf..c36fc9e505e 100644
--- a/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp
+++ b/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp
@@ -80,6 +80,8 @@ public:
*/
vector<ChunkType> setUpChunkLoaderWithFiveChunks();
+ void refreshCollectionEpochOnRemoteLoader();
+
const KeyPattern kKeyPattern = KeyPattern(BSON(kPattern << 1));
const stdx::function<void(OperationContext*, StatusWith<CollectionAndChangedChunks>)>
kDoNothingCallbackFn = [](
@@ -564,5 +566,35 @@ TEST_F(ShardServerCatalogCacheLoaderTest, PrimaryLoadFromShardedAndFindMixedChun
}
}
+void ShardServerCatalogCacheLoaderTest::refreshCollectionEpochOnRemoteLoader() {
+ ChunkVersion collectionVersion(1, 2, OID::gen());
+ CollectionType collectionType = makeCollectionType(collectionVersion);
+ vector<ChunkType> chunks = makeFiveChunks(collectionVersion);
+ _remoteLoaderMock->setCollectionRefreshReturnValue(collectionType);
+ _remoteLoaderMock->setChunkRefreshReturnValue(chunks);
+}
+
+TEST_F(ShardServerCatalogCacheLoaderTest, CollAndChunkTasksConsistency) {
+ // Put some metadata in the persisted cache (config.cache.chunks.*)
+ refreshCollectionEpochOnRemoteLoader();
+ _shardLoader->getChunksSince(kNss, ChunkVersion::UNSHARDED(), kDoNothingCallbackFn)->get();
+ _shardLoader->waitForCollectionFlush(operationContext(), kNss);
+
+ // Pause the thread processing the pending updates on metadata
+ FailPointEnableBlock failPoint("hangCollectionFlush");
+
+ // Put a first task in the list of pending updates on metadata (in-memory)
+ refreshCollectionEpochOnRemoteLoader();
+ _shardLoader->getChunksSince(kNss, ChunkVersion::UNSHARDED(), kDoNothingCallbackFn)->get();
+
+ // Bump the shard's term
+ _shardLoader->onStepUp();
+
+ // Putting a second task causes a verification of the contiguous versions in the list pending
+ // updates on metadata
+ refreshCollectionEpochOnRemoteLoader();
+ _shardLoader->getChunksSince(kNss, ChunkVersion::UNSHARDED(), kDoNothingCallbackFn)->get();
+}
+
} // namespace
} // namespace mongo