summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJack Mulrow <jack.mulrow@mongodb.com>2018-01-24 17:32:05 -0500
committerJack Mulrow <jack.mulrow@mongodb.com>2018-01-30 14:36:18 -0500
commit153610cb7439546ef8897a2f4eda05b7fb50af5c (patch)
treeba91c8ba31d16a306e68293a4f3c7f4846feadf1 /src
parenta3c08f54f5364418e4dc91f7cb645286daa95a95 (diff)
downloadmongo-153610cb7439546ef8897a2f4eda05b7fb50af5c.tar.gz
SERVER-32593 Stepdown after failed moveChunk commit shouldn't crash source shard primary
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/s/migration_source_manager.cpp20
1 files changed, 20 insertions, 0 deletions
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index b49cdad4392..6c831b1a0ae 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -36,6 +36,7 @@
#include "mongo/db/catalog/catalog_raii.h"
#include "mongo/db/concurrency/write_conflict_exception.h"
#include "mongo/db/operation_context.h"
+#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/s/migration_chunk_cloner_source_legacy.h"
#include "mongo/db/s/migration_util.h"
#include "mongo/db/s/shard_metadata_util.h"
@@ -477,6 +478,25 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC
shutdown(waitForShutdown());
}
+ // If we failed to get the latest config optime because we stepped down as primary, then it
+ // is safe to fail without crashing because the new primary will fetch the latest optime
+ // when it recovers the sharding state recovery document, as long as we also clear the
+ // metadata for this collection, forcing subsequent callers to do a full refresh. Check if
+ // this node can accept writes for this collection as a proxy for it being primary.
+ if (!status.isOK()) {
+ AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X);
+ if (!repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesFor(opCtx, getNss())) {
+ CollectionShardingState::get(opCtx, getNss())->refreshMetadata(opCtx, nullptr);
+ uassertStatusOK(status.withContext(
+ str::stream() << "Unable to verify migration commit for chunk: "
+ << redact(_args.toString())
+ << " because the node's replication role changed. Metadata "
+ "was cleared for: "
+ << getNss().ns()
+ << ", so it will get a full refresh when accessed again."));
+ }
+ }
+
fassertStatusOK(
40137,
status.withContext(