diff options
author | Jordi Serra Torrens <jordi.serra-torrens@mongodb.com> | 2021-06-17 16:43:15 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-07-22 08:13:16 +0000 |
commit | 5ca770642be40552592e00a6290ee7a4c5c1addb (patch) | |
tree | 4e22acc6e46b7202ea4c8aefb824da76a2810def | |
parent | 85b7fdfe5973c41c6835e89b932640b32d173503 (diff) | |
download | mongo-5ca770642be40552592e00a6290ee7a4c5c1addb.tar.gz |
SERVER-56648 Make dropCollection resilient to network partitions
(cherry picked from commit 210c9d9582f0833d6a15a928779a02f4588f168b)
-rw-r--r-- | jstests/sharding/verify_sessions_expiration_sharded.js | 33 | ||||
-rw-r--r-- | src/mongo/base/error_codes.yml | 1 | ||||
-rw-r--r-- | src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/s/drop_collection_coordinator.cpp | 89 | ||||
-rw-r--r-- | src/mongo/db/s/drop_collection_coordinator.h | 3 | ||||
-rw-r--r-- | src/mongo/db/s/drop_database_coordinator.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/s/rename_collection_coordinator.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_coordinator.h | 12 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_util.cpp | 118 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_util.h | 36 | ||||
-rw-r--r-- | src/mongo/db/transaction_validation.cpp | 19 |
11 files changed, 241 insertions, 82 deletions
diff --git a/jstests/sharding/verify_sessions_expiration_sharded.js b/jstests/sharding/verify_sessions_expiration_sharded.js index 96cd020aadd..58d868a0c76 100644 --- a/jstests/sharding/verify_sessions_expiration_sharded.js +++ b/jstests/sharding/verify_sessions_expiration_sharded.js @@ -31,11 +31,23 @@ const startSession = { }; const failPointName = "waitAfterPinningCursorBeforeGetMoreBatch"; -function refreshSessionsAndVerifyCount(mongosConfig, shardConfig, expectedCount) { +/* + * Refresh logical session cache on mongos and shard and check that each one of the session IDs in + * the 'expectedSessionIDs' array exist. If 'expectToExist' is false, checks that they don't exist. + */ +function refreshSessionsAndVerifyExistence( + mongosConfig, shardConfig, expectedSessionIDs, expectToExist = true) { mongosConfig.runCommand(refresh); shardConfig.runCommand(refresh); - assert.eq(mongosConfig.system.sessions.count(), expectedCount); + const sessionIDs = mongosConfig.system.sessions.find().toArray().map(s => s._id.id); + + // Assert that 'expectedSessionIDs' is a subset of 'sessionIDs' + assert(expectedSessionIDs.every(expectedId => { + return sessionIDs.some(s => { + return bsonBinaryEqual(s, expectedId); + }) == expectToExist; + })); } function verifyOpenCursorCount(db, expectedCount) { @@ -59,15 +71,17 @@ let mongosConfig = mongos.getDB("config"); let shardConfig = shardingTest.rs0.getPrimary().getDB("config"); // 1. Verify that sessions expire from config.system.sessions after the timeout has passed. +let sessionIDs = []; for (let i = 0; i < 5; i++) { let res = db.runCommand(startSession); assert.commandWorked(res, "unable to start session"); + sessionIDs.push(res.id.id); } -refreshSessionsAndVerifyCount(mongosConfig, shardConfig, 5); +refreshSessionsAndVerifyExistence(mongosConfig, shardConfig, sessionIDs); // Manually delete entries in config.system.sessions to simulate TTL expiration. assert.commandWorked(mongosConfig.system.sessions.remove({})); -refreshSessionsAndVerifyCount(mongosConfig, shardConfig, 0); +refreshSessionsAndVerifyExistence(mongosConfig, shardConfig, sessionIDs, false /* expectToExist */); // 2. Verify that getMores after finds will update the 'lastUse' field on documents in the // config.system.sessions collection. @@ -76,15 +90,17 @@ for (let i = 0; i < 10; i++) { } let cursors = []; +sessionIDs = []; for (let i = 0; i < 5; i++) { let session = mongos.startSession({}); assert.commandWorked(session.getDatabase("admin").runCommand({usersInfo: 1}), "initialize the session"); cursors.push(session.getDatabase(dbName)[testCollName].find({b: 1}).batchSize(1)); assert(cursors[i].hasNext()); + sessionIDs.push(session.getSessionId().id); } -refreshSessionsAndVerifyCount(mongosConfig, shardConfig, 5); +refreshSessionsAndVerifyExistence(mongosConfig, shardConfig, sessionIDs); verifyOpenCursorCount(mongosConfig, 5); let sessionsCollectionArray; @@ -94,7 +110,7 @@ for (let i = 0; i < 3; i++) { cursors[j].next(); } - refreshSessionsAndVerifyCount(mongosConfig, shardConfig, 5); + refreshSessionsAndVerifyExistence(mongosConfig, shardConfig, sessionIDs); verifyOpenCursorCount(mongosConfig, 5); sessionsCollectionArray = getSessions(mongosConfig); @@ -115,7 +131,7 @@ for (let i = 0; i < 3; i++) { // cursors. assert.commandWorked(mongosConfig.system.sessions.remove({})); -refreshSessionsAndVerifyCount(mongosConfig, shardConfig, 0); +refreshSessionsAndVerifyExistence(mongosConfig, shardConfig, sessionIDs, false /* expectToExist */); verifyOpenCursorCount(mongosConfig, 0); for (let i = 0; i < cursors.length; i++) { @@ -128,6 +144,7 @@ for (let i = 0; i < cursors.length; i++) { // 4. Verify that an expired session (simulated by manual deletion) that has a currently // running operation will be vivified during the logical session cache refresh. let pinnedCursorSession = mongos.startSession(); +let pinnedCursorSessionID = pinnedCursorSession.getSessionId().id; let pinnedCursorDB = pinnedCursorSession.getDatabase(dbName); withPinnedCursor({ @@ -138,7 +155,7 @@ withPinnedCursor({ assert.commandWorked(mongosConfig.system.sessions.remove({})); verifyOpenCursorCount(mongosConfig, 1); - refreshSessionsAndVerifyCount(mongosConfig, shardConfig, 1); + refreshSessionsAndVerifyExistence(mongosConfig, shardConfig, [pinnedCursorSessionID]); let db = coll.getDB(); assert.commandWorked(db.runCommand({killCursors: coll.getName(), cursors: [cursorId]})); diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml index e809121f4f3..78930ec3a73 100644 --- a/src/mongo/base/error_codes.yml +++ b/src/mongo/base/error_codes.yml @@ -465,4 +465,5 @@ error_codes: - {code: 14031,name: OutOfDiskSpace} - {code: 17280,name: OBSOLETE_KeyTooLong} - {code: 46841,name: ClientMarkedKilled,categories: [Interruption,CancellationError]} + - {code: 50768,name: NotARetryableWriteCommand} diff --git a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp index 1af7c6d40f3..c4f0beb533d 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp @@ -486,7 +486,8 @@ void ShardingCatalogManager::renameShardedMetadata( } else { // Remove stale CSRS metadata in case the source collection is unsharded and the // target collection was sharded - sharding_ddl_util::removeCollMetadataFromConfig(opCtx, to); + sharding_ddl_util::removeCollAndChunksMetadataFromConfig_notIdempotent(opCtx, to); + sharding_ddl_util::removeTagsMetadataFromConfig_notIdempotent(opCtx, to); ShardingLogging::get(opCtx)->logChange(opCtx, "renameCollection.metadata", str::stream() diff --git a/src/mongo/db/s/drop_collection_coordinator.cpp b/src/mongo/db/s/drop_collection_coordinator.cpp index 2136e1f2f96..c8d132b3745 100644 --- a/src/mongo/db/s/drop_collection_coordinator.cpp +++ b/src/mongo/db/s/drop_collection_coordinator.cpp @@ -42,6 +42,31 @@ #include "mongo/s/request_types/sharded_ddl_commands_gen.h" namespace mongo { +namespace { + +void sendDropCollectionParticipantCommandToShards( + OperationContext* opCtx, + const NamespaceString& nss, + const std::vector<ShardId>& shardIds, + const std::shared_ptr<executor::TaskExecutor>& executor, + const OperationSessionInfo& osi) { + const ShardsvrDropCollectionParticipant dropCollectionParticipant(nss); + const auto cmdObj = + CommandHelpers::appendMajorityWriteConcern(dropCollectionParticipant.toBSON({})); + + try { + sharding_ddl_util::sendAuthenticatedCommandToShards( + opCtx, nss.db(), cmdObj.addFields(osi.toBSON()), shardIds, executor); + } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) { + // Older 5.0 binaries don't support running the _shardsvrDropCollectionParticipant + // command as a retryable write yet. In that case, retry without attaching session + // info. + sharding_ddl_util::sendAuthenticatedCommandToShards( + opCtx, nss.db(), cmdObj, shardIds, executor); + } +} + +} // namespace DropCollectionCoordinator::DropCollectionCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState) @@ -109,6 +134,20 @@ void DropCollectionCoordinator::_enterPhase(Phase newPhase) { _doc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc)); } +void DropCollectionCoordinator::_performNoopRetryableWriteOnParticipants( + OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor) { + auto shardsAndConfigsvr = [&] { + const auto shardRegistry = Grid::get(opCtx)->shardRegistry(); + auto participants = shardRegistry->getAllShardIds(opCtx); + participants.emplace_back(shardRegistry->getConfigShard()->getId()); + return participants; + }(); + + _doc = _updateSession(opCtx, _doc); + sharding_ddl_util::performNoopRetryableWriteOnShards( + opCtx, shardsAndConfigsvr, getCurrentSession(_doc), executor); +} + ExecutorFuture<void> DropCollectionCoordinator::_runImpl( std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancellationToken& token) noexcept { @@ -120,16 +159,31 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl( auto* opCtx = opCtxHolder.get(); getForwardableOpMetadata().setOn(opCtx); - ShardingLogging::get(opCtx)->logChange(opCtx, "dropCollection.start", nss().ns()); - try { - sharding_ddl_util::stopMigrations(opCtx, nss()); auto coll = Grid::get(opCtx)->catalogClient()->getCollection(opCtx, nss()); _doc.setCollInfo(std::move(coll)); - } catch (ExceptionFor<ErrorCodes::NamespaceNotSharded>&) { + } catch (const ExceptionFor<ErrorCodes::NamespaceNotFound>&) { // The collection is not sharded or doesn't exist. _doc.setCollInfo(boost::none); } + + BSONObjBuilder logChangeDetail; + if (_doc.getCollInfo()) { + logChangeDetail.append("collectionUUID", + _doc.getCollInfo()->getUuid().toBSON()); + } + + ShardingLogging::get(opCtx)->logChange( + opCtx, "dropCollection.start", nss().ns(), logChangeDetail.obj()); + + // Persist the collection info before sticking to using it's uuid. This ensures this + // node is still the RS primary, so it was also the primary at the moment we read + // the collection metadata. + _doc = _updateStateDocument(opCtx, StateDoc(_doc)); + + if (_doc.getCollInfo()) { + sharding_ddl_util::stopMigrations(opCtx, nss(), _doc.getCollInfo()->getUuid()); + } })) .then(_executePhase( Phase::kDropCollection, @@ -138,6 +192,10 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl( auto* opCtx = opCtxHolder.get(); getForwardableOpMetadata().setOn(opCtx); + if (_recoveredFromDisk) { + _performNoopRetryableWriteOnParticipants(opCtx, **executor); + } + const auto collIsSharded = bool(_doc.getCollInfo()); LOGV2_DEBUG(5390504, @@ -149,22 +207,20 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl( if (collIsSharded) { invariant(_doc.getCollInfo()); const auto& coll = _doc.getCollInfo().get(); - sharding_ddl_util::removeCollMetadataFromConfig(opCtx, coll); - } else { - // The collection is not sharded or didn't exist, just remove tags - sharding_ddl_util::removeTagsMetadataFromConfig(opCtx, nss()); + sharding_ddl_util::removeCollAndChunksMetadataFromConfig(opCtx, coll); } + // Remove tags even if the collection is not sharded or didn't exist + _doc = _updateSession(opCtx, _doc); + sharding_ddl_util::removeTagsMetadataFromConfig( + opCtx, nss(), getCurrentSession(_doc)); + // get a Lsid and an incremented txnNumber. Ensures we are the primary _doc = _updateSession(opCtx, _doc); const auto primaryShardId = ShardingState::get(opCtx)->shardId(); - const ShardsvrDropCollectionParticipant dropCollectionParticipant(nss()); - const auto cmdObj = CommandHelpers::appendMajorityWriteConcern( - dropCollectionParticipant.toBSON({})); - - sharding_ddl_util::sendAuthenticatedCommandToShards( - opCtx, nss().db(), cmdObj, {primaryShardId}, **executor); + sendDropCollectionParticipantCommandToShards( + opCtx, nss(), {primaryShardId}, **executor, getCurrentSession(_doc)); // We need to send the drop to all the shards because both movePrimary and // moveChunk leave garbage behind for sharded collections. @@ -173,8 +229,9 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl( participants.erase( std::remove(participants.begin(), participants.end(), primaryShardId), participants.end()); - sharding_ddl_util::sendAuthenticatedCommandToShards( - opCtx, nss().db(), cmdObj, participants, **executor); + + sendDropCollectionParticipantCommandToShards( + opCtx, nss(), participants, **executor, getCurrentSession(_doc)); ShardingLogging::get(opCtx)->logChange(opCtx, "dropCollection", nss().ns()); LOGV2(5390503, "Collection dropped", "namespace"_attr = nss()); diff --git a/src/mongo/db/s/drop_collection_coordinator.h b/src/mongo/db/s/drop_collection_coordinator.h index 1f43b204de4..ca102598895 100644 --- a/src/mongo/db/s/drop_collection_coordinator.h +++ b/src/mongo/db/s/drop_collection_coordinator.h @@ -83,6 +83,9 @@ private: void _enterPhase(Phase newPhase); + void _performNoopRetryableWriteOnParticipants( + OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor); + DropCollectionCoordinatorDocument _doc; }; diff --git a/src/mongo/db/s/drop_database_coordinator.cpp b/src/mongo/db/s/drop_database_coordinator.cpp index e20a72dbab1..ca12068d7f3 100644 --- a/src/mongo/db/s/drop_database_coordinator.cpp +++ b/src/mongo/db/s/drop_database_coordinator.cpp @@ -48,7 +48,8 @@ namespace { void dropShardedCollection(OperationContext* opCtx, const CollectionType& coll, std::shared_ptr<executor::ScopedTaskExecutor> executor) { - sharding_ddl_util::removeCollMetadataFromConfig(opCtx, coll); + sharding_ddl_util::removeCollAndChunksMetadataFromConfig(opCtx, coll); + sharding_ddl_util::removeTagsMetadataFromConfig_notIdempotent(opCtx, coll.getNss()); const auto primaryShardId = ShardingState::get(opCtx)->shardId(); const ShardsvrDropCollectionParticipant dropCollectionParticipant(coll.getNss()); @@ -166,7 +167,7 @@ ExecutorFuture<void> DropDatabaseCoordinator::_runImpl( const auto& nss = coll.getNss(); LOGV2_DEBUG(5494505, 2, "Dropping collection", "namespace"_attr = nss); - sharding_ddl_util::stopMigrations(opCtx, nss); + sharding_ddl_util::stopMigrations(opCtx, nss, boost::none); auto newStateDoc = _doc; newStateDoc.setCollInfo(coll); diff --git a/src/mongo/db/s/rename_collection_coordinator.cpp b/src/mongo/db/s/rename_collection_coordinator.cpp index cf943e4a559..f0e35dc6afb 100644 --- a/src/mongo/db/s/rename_collection_coordinator.cpp +++ b/src/mongo/db/s/rename_collection_coordinator.cpp @@ -219,11 +219,11 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl( // Block migrations on involved sharded collections if (_doc.getOptShardedCollInfo()) { - sharding_ddl_util::stopMigrations(opCtx, fromNss); + sharding_ddl_util::stopMigrations(opCtx, fromNss, boost::none); } if (_doc.getTargetIsSharded()) { - sharding_ddl_util::stopMigrations(opCtx, toNss); + sharding_ddl_util::stopMigrations(opCtx, toNss, boost::none); } })) .then(_executePhase(Phase::kBlockCrudAndRename, diff --git a/src/mongo/db/s/sharding_ddl_coordinator.h b/src/mongo/db/s/sharding_ddl_coordinator.h index 49372a81080..335e9f5f45c 100644 --- a/src/mongo/db/s/sharding_ddl_coordinator.h +++ b/src/mongo/db/s/sharding_ddl_coordinator.h @@ -160,6 +160,18 @@ protected: return _updateStateDocument(opCtx, std::move(newDoc)); } + template <typename StateDoc> + OperationSessionInfo getCurrentSession(StateDoc const& doc) const { + invariant(doc.getShardingDDLCoordinatorMetadata().getSession()); + ShardingDDLSession shardingDDLSession = + *doc.getShardingDDLCoordinatorMetadata().getSession(); + + OperationSessionInfo osi; + osi.setSessionId(shardingDDLSession.getLsid()); + osi.setTxnNumber(shardingDDLSession.getTxnNumber()); + return osi; + } + protected: ShardingDDLCoordinatorService* _service; const ShardingDDLCoordinatorId _coordId; diff --git a/src/mongo/db/s/sharding_ddl_util.cpp b/src/mongo/db/s/sharding_ddl_util.cpp index 1b43e958599..47757e304a0 100644 --- a/src/mongo/db/s/sharding_ddl_util.cpp +++ b/src/mongo/db/s/sharding_ddl_util.cpp @@ -47,6 +47,7 @@ #include "mongo/s/catalog/type_collection.h" #include "mongo/s/catalog/type_tags.h" #include "mongo/s/grid.h" +#include "mongo/s/request_types/remove_tags_gen.h" #include "mongo/s/request_types/set_allow_migrations_gen.h" #include "mongo/s/write_ops/batch_write_exec.h" @@ -83,19 +84,8 @@ void updateTags(OperationContext* opCtx, uassertStatusOK(response.toStatus()); } -void deleteChunks(OperationContext* opCtx, const NamespaceStringOrUUID& nssOrUUID) { +void deleteChunks(OperationContext* opCtx, const UUID& collectionUUID) { // Remove config.chunks entries - const auto chunksQuery = [&]() { - auto optUUID = nssOrUUID.uuid(); - if (optUUID) { - return BSON(ChunkType::collectionUUID << *optUUID); - } - - auto optNss = nssOrUUID.nss(); - invariant(optNss); - return BSON(ChunkType::ns(optNss->ns())); - }(); - // TODO SERVER-57221 don't use hint if not relevant anymore for delete performances auto hint = BSON(ChunkType::collectionUUID() << 1 << ChunkType::min() << 1); @@ -103,7 +93,7 @@ void deleteChunks(OperationContext* opCtx, const NamespaceStringOrUUID& nssOrUUI write_ops::DeleteCommandRequest deleteOp(ChunkType::ConfigNS); deleteOp.setDeletes({[&] { write_ops::DeleteOpEntry entry; - entry.setQ(chunksQuery); + entry.setQ(BSON(ChunkType::collectionUUID << collectionUUID)); entry.setHint(hint); entry.setMulti(true); return entry; @@ -120,15 +110,16 @@ void deleteChunks(OperationContext* opCtx, const NamespaceStringOrUUID& nssOrUUI uassertStatusOK(response.toStatus()); } -void deleteCollection(OperationContext* opCtx, const NamespaceString& nss) { +void deleteCollection(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid) { const auto catalogClient = Grid::get(opCtx)->catalogClient(); - // Remove config.collection entry - uassertStatusOK( - catalogClient->removeConfigDocuments(opCtx, - CollectionType::ConfigNS, - BSON(CollectionType::kNssFieldName << nss.ns()), - ShardingCatalogClient::kMajorityWriteConcern)); + // Remove config.collection entry. Query by 'ns' AND 'uuid' so that the remove can be resolved + // with an IXSCAN (thanks to the index on '_id') and is idempotent (thanks to the 'uuid') + uassertStatusOK(catalogClient->removeConfigDocuments( + opCtx, + CollectionType::ConfigNS, + BSON(CollectionType::kNssFieldName << nss.ns() << CollectionType::kUuidFieldName << uuid), + ShardingCatalogClient::kMajorityWriteConcern)); } } // namespace @@ -168,7 +159,29 @@ void sendAuthenticatedCommandToShards(OperationContext* opCtx, sharding_util::sendCommandToShards(opCtx, dbName, authenticatedCommand, shardIds, executor); } -void removeTagsMetadataFromConfig(OperationContext* opCtx, const NamespaceString& nss) { +void removeTagsMetadataFromConfig(OperationContext* opCtx, + const NamespaceString& nss, + const OperationSessionInfo& osi) { + auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard(); + + // Remove config.tags entries + ConfigsvrRemoveTags configsvrRemoveTagsCmd(nss); + configsvrRemoveTagsCmd.setDbName(NamespaceString::kAdminDb); + + const auto swRemoveTagsResult = configShard->runCommandWithFixedRetryAttempts( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + NamespaceString::kAdminDb.toString(), + CommandHelpers::appendMajorityWriteConcern(configsvrRemoveTagsCmd.toBSON(osi.toBSON())), + Shard::RetryPolicy::kIdempotent); + + uassertStatusOKWithContext( + Shard::CommandResponse::getEffectiveStatus(std::move(swRemoveTagsResult)), + str::stream() << "Error removing tags for collection " << nss.toString()); +} + +void removeTagsMetadataFromConfig_notIdempotent(OperationContext* opCtx, + const NamespaceString& nss) { // Remove config.tags entries const auto query = BSON(TagsType::ns(nss.ns())); const auto hint = BSON(TagsType::ns() << 1 << TagsType::min() << 1); @@ -194,25 +207,21 @@ void removeTagsMetadataFromConfig(OperationContext* opCtx, const NamespaceString uassertStatusOK(response.toStatus()); } -void removeCollMetadataFromConfig(OperationContext* opCtx, const CollectionType& coll) { +void removeCollAndChunksMetadataFromConfig(OperationContext* opCtx, const CollectionType& coll) { IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx); const auto& nss = coll.getNss(); + const auto& uuid = coll.getUuid(); ON_BLOCK_EXIT( [&] { Grid::get(opCtx)->catalogCache()->invalidateCollectionEntry_LINEARIZABLE(nss); }); - const NamespaceStringOrUUID nssOrUUID = coll.getTimestamp() - ? NamespaceStringOrUUID(nss.db().toString(), coll.getUuid()) - : NamespaceStringOrUUID(nss); + deleteCollection(opCtx, nss, uuid); - deleteCollection(opCtx, nss); - - deleteChunks(opCtx, nssOrUUID); - - removeTagsMetadataFromConfig(opCtx, nss); + deleteChunks(opCtx, uuid); } -bool removeCollMetadataFromConfig(OperationContext* opCtx, const NamespaceString& nss) { +bool removeCollAndChunksMetadataFromConfig_notIdempotent(OperationContext* opCtx, + const NamespaceString& nss) { IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx); const auto catalogClient = Grid::get(opCtx)->catalogClient(); @@ -221,11 +230,10 @@ bool removeCollMetadataFromConfig(OperationContext* opCtx, const NamespaceString try { auto coll = catalogClient->getCollection(opCtx, nss); - removeCollMetadataFromConfig(opCtx, coll); + removeCollAndChunksMetadataFromConfig(opCtx, coll); return true; } catch (ExceptionFor<ErrorCodes::NamespaceNotFound>&) { - // The collection is not sharded or doesn't exist, just tags need to be removed - removeTagsMetadataFromConfig(opCtx, nss); + // The collection is not sharded or doesn't exist return false; } } @@ -237,6 +245,7 @@ void shardedRenameMetadata(OperationContext* opCtx, auto catalogClient = Grid::get(opCtx)->catalogClient(); auto fromNss = fromCollType.getNss(); + auto fromUUID = fromCollType.getUuid(); // Delete eventual TO chunk/collection entries referring a dropped collection try { @@ -248,13 +257,16 @@ void shardedRenameMetadata(OperationContext* opCtx, } // Delete TO chunk/collection entries referring a dropped collection - removeCollMetadataFromConfig(opCtx, toNss); + removeCollAndChunksMetadataFromConfig_notIdempotent(opCtx, toNss); } catch (ExceptionFor<ErrorCodes::NamespaceNotFound>&) { // The TO collection is not sharded or doesn't exist } + // Delete TO tags, even if the TO collection is not sharded or doesn't exist + removeTagsMetadataFromConfig_notIdempotent(opCtx, toNss); + // Delete FROM collection entry - deleteCollection(opCtx, fromNss); + deleteCollection(opCtx, fromNss, fromUUID); // Update FROM tags to TO updateTags(opCtx, fromNss, toNss); @@ -360,9 +372,12 @@ boost::optional<CreateCollectionResponse> checkIfCollectionAlreadySharded( return response; } -void stopMigrations(OperationContext* opCtx, const NamespaceString& nss) { - const ConfigsvrSetAllowMigrations configsvrSetAllowMigrationsCmd(nss, - false /* allowMigrations */); +void stopMigrations(OperationContext* opCtx, + const NamespaceString& nss, + const boost::optional<UUID>& expectedCollectionUUID) { + ConfigsvrSetAllowMigrations configsvrSetAllowMigrationsCmd(nss, false /* allowMigrations */); + configsvrSetAllowMigrationsCmd.setCollectionUUID(expectedCollectionUUID); + const auto swSetAllowMigrationsResult = Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommandWithFixedRetryAttempts( opCtx, @@ -384,5 +399,30 @@ boost::optional<UUID> getCollectionUUID(OperationContext* opCtx, const Namespace AutoGetCollection autoColl(opCtx, nss, MODE_IS, AutoGetCollectionViewMode::kViewsForbidden); return autoColl ? boost::make_optional(autoColl->uuid()) : boost::none; } + +void performNoopRetryableWriteOnShards(OperationContext* opCtx, + const std::vector<ShardId>& shardIds, + const OperationSessionInfo& osi, + const std::shared_ptr<executor::TaskExecutor>& executor) { + write_ops::UpdateCommandRequest updateOp(NamespaceString::kServerConfigurationNamespace); + auto queryFilter = BSON("_id" + << "shardingDDLCoordinatorRecoveryDoc"); + auto updateModification = + write_ops::UpdateModification(write_ops::UpdateModification::parseFromClassicUpdate( + BSON("$inc" << BSON("noopWriteCount" << 1)))); + + write_ops::UpdateOpEntry updateEntry(queryFilter, updateModification); + updateEntry.setMulti(false); + updateEntry.setUpsert(true); + updateOp.setUpdates({updateEntry}); + + sharding_ddl_util::sendAuthenticatedCommandToShards( + opCtx, + updateOp.getDbName(), + CommandHelpers::appendMajorityWriteConcern(updateOp.toBSON(osi.toBSON())), + shardIds, + executor); +} + } // namespace sharding_ddl_util } // namespace mongo diff --git a/src/mongo/db/s/sharding_ddl_util.h b/src/mongo/db/s/sharding_ddl_util.h index 54f2e41dd66..dc6aa2fb106 100644 --- a/src/mongo/db/s/sharding_ddl_util.h +++ b/src/mongo/db/s/sharding_ddl_util.h @@ -55,24 +55,36 @@ void sendAuthenticatedCommandToShards(OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor); /** + * Erase tags metadata from config server for the given namespace, using the _configsvrRemoveTags + * command as a retryable write to ensure idempotency. + */ +void removeTagsMetadataFromConfig(OperationContext* opCtx, + const NamespaceString& nss, + const OperationSessionInfo& osi); + +/** * Erase tags metadata from config server for the given namespace. + * TODO SERVER-56649 remove this */ -void removeTagsMetadataFromConfig(OperationContext* opCtx, const NamespaceString& nss); +void removeTagsMetadataFromConfig_notIdempotent(OperationContext* opCtx, + const NamespaceString& nss); /** * Erase collection metadata from config server and invalidate the locally cached one. - * In particular remove chunks, tags and the description associated with the given namespace. + * In particular remove the collection and chunks metadata associated with the given namespace. */ -void removeCollMetadataFromConfig(OperationContext* opCtx, const CollectionType& coll); +void removeCollAndChunksMetadataFromConfig(OperationContext* opCtx, const CollectionType& coll); /** * Erase collection metadata from config server and invalidate the locally cached one. - * In particular remove chunks, tags and the description associated with the given namespace. + * In particular remove the collection and chunks metadata associated with the given namespace. * * Returns true if the collection existed before being removed. + * TODO SERVER-56649 remove this */ -bool removeCollMetadataFromConfig(OperationContext* opCtx, const NamespaceString& nss); +bool removeCollAndChunksMetadataFromConfig_notIdempotent(OperationContext* opCtx, + const NamespaceString& nss); /** * Rename sharded collection metadata as part of a renameCollection operation. @@ -120,8 +132,11 @@ boost::optional<CreateCollectionResponse> checkIfCollectionAlreadySharded( /** * Stops ongoing migrations and prevents future ones to start for the given nss. + * If expectedCollectionUUID is set and doesn't match that of that collection, then this is a no-op. */ -void stopMigrations(OperationContext* opCtx, const NamespaceString& nss); +void stopMigrations(OperationContext* opCtx, + const NamespaceString& nss, + const boost::optional<UUID>& expectedCollectionUUID); /* * Returns the UUID of the collection (if exists) using the catalog. It does not provide any locking @@ -129,5 +144,14 @@ void stopMigrations(OperationContext* opCtx, const NamespaceString& nss); **/ boost::optional<UUID> getCollectionUUID(OperationContext* opCtx, const NamespaceString& nss); +/* + * Performs a noop retryable write on the given shards using the session and txNumber specified in + * 'osi' + */ +void performNoopRetryableWriteOnShards(OperationContext* opCtx, + const std::vector<ShardId>& shardIds, + const OperationSessionInfo& osi, + const std::shared_ptr<executor::TaskExecutor>& executor); + } // namespace sharding_ddl_util } // namespace mongo diff --git a/src/mongo/db/transaction_validation.cpp b/src/mongo/db/transaction_validation.cpp index d0ceebc6edb..d23d2de6e5d 100644 --- a/src/mongo/db/transaction_validation.cpp +++ b/src/mongo/db/transaction_validation.cpp @@ -45,13 +45,16 @@ using namespace fmt::literals; namespace { -const StringMap<int> retryableWriteCommands = {{"delete", 1}, - {"findandmodify", 1}, - {"findAndModify", 1}, - {"insert", 1}, - {"update", 1}, - {"_recvChunkStart", 1}, - {"_configsvrRemoveTags", 1}}; +const StringMap<int> retryableWriteCommands = { + {"delete", 1}, + {"findandmodify", 1}, + {"findAndModify", 1}, + {"insert", 1}, + {"update", 1}, + {"_recvChunkStart", 1}, + {"_configsvrRemoveTags", 1}, + {"_shardsvrDropCollectionParticipant", 1}, +}; // Commands that can be sent with session info but should not check out a session. const StringMap<int> skipSessionCheckoutList = { @@ -97,7 +100,7 @@ void validateSessionOptions(const OperationSessionInfoFromClient& sessionOptions } if (!sessionOptions.getAutocommit() && sessionOptions.getTxnNumber()) { - uassert(50768, + uassert(ErrorCodes::NotARetryableWriteCommand, "txnNumber may only be provided for multi-document transactions and retryable " "write commands. autocommit:false was not provided, and {} is not a retryable " "write command."_format(cmdName), |