diff options
author | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2021-11-17 03:26:19 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-17 03:51:33 +0000 |
commit | 963c540bdc58509e4938f2d5981f13c66e012b59 (patch) | |
tree | f9ffa5b8b08366522b6e904377cedc8fce0a6b08 /src/mongo | |
parent | 4c57f6bca3334bee1118a695d871db5346c75ff5 (diff) | |
download | mongo-963c540bdc58509e4938f2d5981f13c66e012b59.tar.gz |
SERVER-61482 Avoid resharding holding open an oplog hole on configsvr.
(cherry picked from commit 2d3592d80fb851e00b519bfeeea513aaff6d0c95)
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/s/resharding/resharding_op_observer.cpp | 26 |
1 files changed, 19 insertions, 7 deletions
diff --git a/src/mongo/db/s/resharding/resharding_op_observer.cpp b/src/mongo/db/s/resharding/resharding_op_observer.cpp index 326b249c4a4..d299c498984 100644 --- a/src/mongo/db/s/resharding/resharding_op_observer.cpp +++ b/src/mongo/db/s/resharding/resharding_op_observer.cpp @@ -216,14 +216,26 @@ void ReshardingOpObserver::onUpdate(OperationContext* opCtx, const OplogUpdateEn if (args.nss == NamespaceString::kConfigReshardingOperationsNamespace) { auto newCoordinatorDoc = ReshardingCoordinatorDocument::parse( IDLParserErrorContext("reshardingCoordinatorDoc"), args.updateArgs.updatedDoc); - auto reshardingId = BSON(ReshardingCoordinatorDocument::kReshardingUUIDFieldName - << newCoordinatorDoc.getReshardingUUID()); - auto observer = getReshardingCoordinatorObserver(opCtx, reshardingId); - opCtx->recoveryUnit()->onCommit( - [observer = std::move(observer), newCoordinatorDoc = std::move(newCoordinatorDoc)]( - boost::optional<Timestamp> unusedCommitTime) mutable { + opCtx->recoveryUnit()->onCommit([opCtx, newCoordinatorDoc = std::move(newCoordinatorDoc)]( + boost::optional<Timestamp> unusedCommitTime) mutable { + try { + // It is possible that the ReshardingCoordinatorService is still being rebuilt. We + // must defer calling ReshardingCoordinator::lookup() until after our storage + // transaction has committed to ensure we aren't holding open an oplog hole and + // preventing replication from making progress while we wait. + auto reshardingId = BSON(ReshardingCoordinatorDocument::kReshardingUUIDFieldName + << newCoordinatorDoc.getReshardingUUID()); + auto observer = getReshardingCoordinatorObserver(opCtx, reshardingId); observer->onReshardingParticipantTransition(newCoordinatorDoc); - }); + } catch (const DBException& ex) { + LOGV2_INFO(6148200, + "Interrupted while waiting for resharding coordinator to be rebuilt;" + " will retry on new primary", + "namespace"_attr = newCoordinatorDoc.getSourceNss(), + "reshardingUUID"_attr = newCoordinatorDoc.getReshardingUUID(), + "error"_attr = redact(ex.toStatus())); + } + }); } else if (args.nss.isTemporaryReshardingCollection()) { const std::vector<InsertStatement> updateDoc{InsertStatement{args.updateArgs.updatedDoc}}; assertCanExtractShardKeyFromDocs(opCtx, args.nss, updateDoc.begin(), updateDoc.end()); |