diff options
author | Jack Mulrow <jack.mulrow@mongodb.com> | 2018-01-24 17:32:05 -0500 |
---|---|---|
committer | Jack Mulrow <jack.mulrow@mongodb.com> | 2018-01-30 14:36:18 -0500 |
commit | 153610cb7439546ef8897a2f4eda05b7fb50af5c (patch) | |
tree | ba91c8ba31d16a306e68293a4f3c7f4846feadf1 /src/mongo/db | |
parent | a3c08f54f5364418e4dc91f7cb645286daa95a95 (diff) | |
download | mongo-153610cb7439546ef8897a2f4eda05b7fb50af5c.tar.gz |
SERVER-32593 Stepdown after failed moveChunk commit shouldn't crash source shard primary
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/s/migration_source_manager.cpp | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp index b49cdad4392..6c831b1a0ae 100644 --- a/src/mongo/db/s/migration_source_manager.cpp +++ b/src/mongo/db/s/migration_source_manager.cpp @@ -36,6 +36,7 @@ #include "mongo/db/catalog/catalog_raii.h" #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/operation_context.h" +#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/s/migration_chunk_cloner_source_legacy.h" #include "mongo/db/s/migration_util.h" #include "mongo/db/s/shard_metadata_util.h" @@ -477,6 +478,25 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC shutdown(waitForShutdown()); } + // If we failed to get the latest config optime because we stepped down as primary, then it + // is safe to fail without crashing because the new primary will fetch the latest optime + // when it recovers the sharding state recovery document, as long as we also clear the + // metadata for this collection, forcing subsequent callers to do a full refresh. Check if + // this node can accept writes for this collection as a proxy for it being primary. + if (!status.isOK()) { + AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X); + if (!repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesFor(opCtx, getNss())) { + CollectionShardingState::get(opCtx, getNss())->refreshMetadata(opCtx, nullptr); + uassertStatusOK(status.withContext( + str::stream() << "Unable to verify migration commit for chunk: " + << redact(_args.toString()) + << " because the node's replication role changed. Metadata " + "was cleared for: " + << getNss().ns() + << ", so it will get a full refresh when accessed again.")); + } + } + fassertStatusOK( 40137, status.withContext( |