diff options
author | Tommaso Tocci <tommaso.tocci@mongodb.com> | 2023-01-04 18:00:16 +0100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-01-05 00:49:31 +0000 |
commit | 928850f2d3e755950b387d8b0541af3450e05f82 (patch) | |
tree | cb6c5b8bea3ba7ef2615eb68ebbaa8d5be8bb9d3 | |
parent | d5670b47e3cf3aff1c8ec057aa0c8dd5a2849a14 (diff) | |
download | mongo-928850f2d3e755950b387d8b0541af3450e05f82.tar.gz |
SERVER-70323 converts many sharding invariants to tassert
(cherry picked from commit 40747744d90190e44986561119b35d6e791c12dd)
-rw-r--r-- | src/mongo/db/s/collection_sharding_runtime.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/s/recoverable_critical_section_service.cpp | 109 | ||||
-rw-r--r-- | src/mongo/db/s/shard_server_catalog_cache_loader.cpp | 45 | ||||
-rw-r--r-- | src/mongo/s/catalog_cache.cpp | 69 |
4 files changed, 154 insertions, 96 deletions
diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp index fb6af030514..81a70c4f9a4 100644 --- a/src/mongo/db/s/collection_sharding_runtime.cpp +++ b/src/mongo/db/s/collection_sharding_runtime.cpp @@ -102,8 +102,9 @@ ScopedCollectionFilter CollectionShardingRuntime::getOwnershipFilter( if (!supportNonVersionedOperations) { optReceivedShardVersion = getOperationReceivedVersion(opCtx, _nss); // No operations should be calling getOwnershipFilter without a shard version - invariant(optReceivedShardVersion, - "getOwnershipFilter called by operation that doesn't specify shard version"); + tassert(7032300, + "getOwnershipFilter called by operation that doesn't specify shard version", + optReceivedShardVersion); } auto metadata = @@ -112,10 +113,11 @@ ScopedCollectionFilter CollectionShardingRuntime::getOwnershipFilter( supportNonVersionedOperations); if (!supportNonVersionedOperations) { - invariant(!ChunkVersion::isIgnoredVersion(*optReceivedShardVersion) || - !metadata->get().allowMigrations() || !metadata->get().isSharded(), - "For sharded collections getOwnershipFilter cannot be relied on without a valid " - "shard version"); + tassert(7032301, + "For sharded collections getOwnershipFilter cannot be relied on without a valid " + "shard version", + !ChunkVersion::isIgnoredVersion(*optReceivedShardVersion) || + !metadata->get().allowMigrations() || !metadata->get().isSharded()); } return {std::move(metadata)}; @@ -202,8 +204,9 @@ void CollectionShardingRuntime::setFilteringMetadata(OperationContext* opCtx, void CollectionShardingRuntime::setFilteringMetadata_withLock(OperationContext* opCtx, CollectionMetadata newMetadata, const CSRLock& csrExclusiveLock) { - invariant(!newMetadata.isSharded() || !_nss.isNamespaceAlwaysUnsharded(), - str::stream() << "Namespace " << _nss.ns() << " must never be sharded."); + tassert(7032302, + str::stream() << "Namespace " << _nss.ns() << " must never be sharded.", + !newMetadata.isSharded() || !_nss.isNamespaceAlwaysUnsharded()); stdx::lock_guard lk(_metadataManagerLock); @@ -479,7 +482,9 @@ CollectionCriticalSection::CollectionCriticalSection(OperationContext* opCtx, Milliseconds(migrationLockAcquisitionMaxWaitMS.load())); auto* const csr = CollectionShardingRuntime::get(_opCtx, _nss); auto csrLock = CollectionShardingRuntime::CSRLock::lockExclusive(opCtx, csr); - invariant(csr->getCurrentMetadataIfKnown()); + tassert(7032305, + "Collection metadata unknown when entering critical section", + csr->getCurrentMetadataIfKnown()); csr->enterCriticalSectionCatchUpPhase(csrLock, _reason); } @@ -500,7 +505,9 @@ void CollectionCriticalSection::enterCommitPhase() { Milliseconds(migrationLockAcquisitionMaxWaitMS.load())); auto* const csr = CollectionShardingRuntime::get(_opCtx, _nss); auto csrLock = CollectionShardingRuntime::CSRLock::lockExclusive(_opCtx, csr); - invariant(csr->getCurrentMetadataIfKnown()); + tassert(7032304, + "Collection metadata unknown when entering critical section commit phase", + csr->getCurrentMetadataIfKnown()); csr->enterCriticalSectionCommitPhase(csrLock, _reason); } diff --git a/src/mongo/db/s/recoverable_critical_section_service.cpp b/src/mongo/db/s/recoverable_critical_section_service.cpp index 39406025140..6d10fdb4ef7 100644 --- a/src/mongo/db/s/recoverable_critical_section_service.cpp +++ b/src/mongo/db/s/recoverable_critical_section_service.cpp @@ -29,6 +29,7 @@ #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding +#include <fmt/format.h> #include <set> #include "mongo/platform/basic.h" @@ -92,7 +93,12 @@ void RecoverableCriticalSectionService::acquireRecoverableCriticalSectionBlockWr "reason"_attr = reason, "writeConcern"_attr = writeConcern); - invariant(!opCtx->lockState()->isLocked()); + tassert(7032360, + fmt::format("Can't acquire recoverable critical section for collection '{}' with " + "reason '{}' while holding locks", + nss.toString(), + reason.toString()), + !opCtx->lockState()->isLocked()); { Lock::GlobalLock lk(opCtx, MODE_IX); @@ -110,12 +116,14 @@ void RecoverableCriticalSectionService::acquireRecoverableCriticalSectionBlockWr const auto collCSDoc = CollectionCriticalSectionDocument::parse( IDLParserErrorContext("AcquireRecoverableCSBW"), bsonObj); - invariant(collCSDoc.getReason().woCompare(reason) == 0, - str::stream() - << "Trying to acquire a critical section blocking writes for namespace " - << nss << " and reason " << reason - << " but it is already taken by another operation with different reason " - << collCSDoc.getReason()); + tassert(7032368, + fmt::format("Trying to acquire a critical section blocking writes for " + "namespace '{}' and reason '{}' but it is already taken by another " + "operation with different reason '{}'", + nss.toString(), + reason.toString(), + collCSDoc.getReason().toString()), + collCSDoc.getReason().woCompare(reason) == 0); LOGV2_DEBUG( 5656601, @@ -149,10 +157,13 @@ void RecoverableCriticalSectionService::acquireRecoverableCriticalSectionBlockWr BatchedCommandResponse batchedResponse; std::string unusedErrmsg; batchedResponse.parseBSON(commandReply, &unusedErrmsg); - invariant(batchedResponse.getN() > 0, - str::stream() << "Insert did not add any doc to collection " - << NamespaceString::kCollectionCriticalSectionsNamespace - << " for namespace " << nss << " and reason " << reason); + tassert(7032369, + fmt::format("Insert did not add any doc to collection '{}' for namespace '{}' " + "and reason '{}'", + nss.toString(), + reason.toString(), + NamespaceString::kCollectionCriticalSectionsNamespace.toString()), + batchedResponse.getN() > 0); } WriteConcernResult ignoreResult; @@ -179,7 +190,12 @@ void RecoverableCriticalSectionService::promoteRecoverableCriticalSectionToBlock "reason"_attr = reason, "writeConcern"_attr = writeConcern); - invariant(!opCtx->lockState()->isLocked()); + tassert(7032364, + fmt::format("Can't promote recoverable critical section for collection '{}' with " + "reason '{}' while holding locks", + nss.toString(), + reason.toString()), + !opCtx->lockState()->isLocked()); { AutoGetCollection cCollLock(opCtx, nss, MODE_X); @@ -190,21 +206,26 @@ void RecoverableCriticalSectionService::promoteRecoverableCriticalSectionToBlock BSON(CollectionCriticalSectionDocument::kNssFieldName << nss.toString())); auto cursor = dbClient.find(std::move(findRequest)); - invariant( - cursor->more(), - str::stream() << "Trying to acquire a critical section blocking reads for namespace " - << nss << " and reason " << reason - << " but the critical section wasn't acquired first blocking writers."); + tassert(7032361, + fmt::format( + "Trying to acquire a critical section blocking reads for namespace '{}' and " + "reason '{}' but the critical section wasn't acquired first blocking writers.", + nss.toString(), + reason.toString()), + cursor->more()); BSONObj bsonObj = cursor->next(); const auto collCSDoc = CollectionCriticalSectionDocument::parse( IDLParserErrorContext("AcquireRecoverableCSBR"), bsonObj); - invariant( - collCSDoc.getReason().woCompare(reason) == 0, - str::stream() << "Trying to acquire a critical section blocking reads for namespace " - << nss << " and reason " << reason - << " but it is already taken by another operation with different reason " - << collCSDoc.getReason()); + tassert(7032362, + fmt::format( + "Trying to acquire a critical section blocking reads for namespace '{}' and " + "reason " + "'{}' but it is already taken by another operation with different reason '{}'", + nss.toString(), + reason.toString(), + collCSDoc.getReason().toString()), + collCSDoc.getReason().woCompare(reason) == 0); // if there is a document with the same nss, reason and blocking reads -> do nothing, the CS // is already taken! @@ -249,10 +270,13 @@ void RecoverableCriticalSectionService::promoteRecoverableCriticalSectionToBlock BatchedCommandResponse batchedResponse; std::string unusedErrmsg; batchedResponse.parseBSON(commandReply, &unusedErrmsg); - invariant(batchedResponse.getNModified() > 0, - str::stream() << "Update did not modify any doc from collection " - << NamespaceString::kCollectionCriticalSectionsNamespace - << " for namespace " << nss << " and reason " << reason); + tassert(7032363, + fmt::format("Update did not modify any doc from collection '{}' for namespace '{}' " + "and reason '{}'", + NamespaceString::kCollectionCriticalSectionsNamespace.toString(), + nss.toString(), + reason.toString()), + batchedResponse.getNModified() > 0); } WriteConcernResult ignoreResult; @@ -279,7 +303,12 @@ void RecoverableCriticalSectionService::releaseRecoverableCriticalSection( "reason"_attr = reason, "writeConcern"_attr = writeConcern); - invariant(!opCtx->lockState()->isLocked()); + tassert(7032365, + fmt::format("Can't release recoverable critical section for collection '{}' with " + "reason '{}' while holding locks", + nss.toString(), + reason.toString()), + !opCtx->lockState()->isLocked()); { AutoGetCollection collLock(opCtx, nss, MODE_X); @@ -307,12 +336,13 @@ void RecoverableCriticalSectionService::releaseRecoverableCriticalSection( const auto collCSDoc = CollectionCriticalSectionDocument::parse( IDLParserErrorContext("ReleaseRecoverableCS"), bsonObj); - invariant( - collCSDoc.getReason().woCompare(reason) == 0, - str::stream() << "Trying to release a critical for namespace " << nss << " and reason " - << reason - << " but it is already taken by another operation with different reason " - << collCSDoc.getReason()); + tassert(7032366, + fmt::format("Trying to release a critical for namespace '{}' and reason '{}' but " + "it is already taken by another operation with different reason '{}'", + nss.toString(), + reason.toString(), + collCSDoc.getReason().toString()), + collCSDoc.getReason().woCompare(reason) == 0); // The collection critical section is taken (in any phase), try to release it. @@ -342,10 +372,13 @@ void RecoverableCriticalSectionService::releaseRecoverableCriticalSection( BatchedCommandResponse batchedResponse; std::string unusedErrmsg; batchedResponse.parseBSON(commandReply, &unusedErrmsg); - invariant(batchedResponse.getN() > 0, - str::stream() << "Delete did not remove any doc from collection " - << NamespaceString::kCollectionCriticalSectionsNamespace - << " for namespace " << nss << " and reason " << reason); + tassert(7032367, + fmt::format("Delete did not remove any doc from collection '{}' for namespace '{}' " + "and reason '{}'", + NamespaceString::kCollectionCriticalSectionsNamespace.toString(), + nss.toString(), + reason.toString()), + batchedResponse.getN() > 0); } WriteConcernResult ignoreResult; diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp index f7fbbd89bb3..0a016956fb1 100644 --- a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp +++ b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp @@ -37,6 +37,7 @@ #include "mongo/db/s/shard_server_catalog_cache_loader.h" +#include <fmt/format.h> #include <memory> #include "mongo/db/catalog/rename_collection.h" @@ -778,9 +779,10 @@ ShardServerCatalogCacheLoader::_schedulePrimaryGetChunksSince( } // After finding metadata remotely, we must have found metadata locally. - invariant(!collAndChunks.changedChunks.empty(), - str::stream() << "No chunks metadata found for collection '" << nss - << "' despite the config server returned actual information"); + tassert(7032350, + str::stream() << "No chunks metadata found for collection '" << nss + << "' despite the config server returned actual information", + !collAndChunks.changedChunks.empty()); return swCollectionAndChangedChunks; }; @@ -1175,7 +1177,9 @@ void ShardServerCatalogCacheLoader::_updatePersistedCollAndChunksMetadata( stdx::unique_lock<Latch> lock(_mutex); const CollAndChunkTask& task = _collAndChunkTaskLists[nss].front(); - invariant(task.dropped || !task.collectionAndChangedChunks->changedChunks.empty()); + tassert(7032351, + "Invalid CollAndChunkTask state", + task.dropped || !task.collectionAndChangedChunks->changedChunks.empty()); // If this task is from an old term and no longer valid, do not execute and return true so that // the task gets removed from the task list @@ -1301,7 +1305,9 @@ ShardServerCatalogCacheLoader::CollAndChunkTask::CollAndChunkTask( termCreated(currentTerm) { if (statusWithCollectionAndChangedChunks.isOK()) { collectionAndChangedChunks = std::move(statusWithCollectionAndChangedChunks.getValue()); - invariant(!collectionAndChangedChunks->changedChunks.empty()); + tassert(7032354, + "Found no chunks in retrieved collection metadata", + !collectionAndChangedChunks->changedChunks.empty()); const auto highestVersion = collectionAndChangedChunks->changedChunks.back().getVersion(); // Note that due to the way Phase 1 of the FCV upgrade writes timestamps to chunks // (non-atomically), it is possible that chunks exist with timestamps, but the @@ -1313,7 +1319,10 @@ ShardServerCatalogCacheLoader::CollAndChunkTask::CollAndChunkTask( highestVersion.epoch(), collectionAndChangedChunks->timestamp); } else { - invariant(statusWithCollectionAndChangedChunks == ErrorCodes::NamespaceNotFound); + tassert(7032358, + fmt::format("Encountered unexpected error while fetching collection metadata: {}", + statusWithCollectionAndChangedChunks.getStatus().toString()), + statusWithCollectionAndChangedChunks == ErrorCodes::NamespaceNotFound); dropped = true; maxQueryVersion = ChunkVersion::UNSHARDED(); } @@ -1325,7 +1334,10 @@ ShardServerCatalogCacheLoader::DBTask::DBTask(StatusWith<DatabaseType> swDatabas if (swDatabaseType.isOK()) { dbType = std::move(swDatabaseType.getValue()); } else { - invariant(swDatabaseType == ErrorCodes::NamespaceNotFound); + tassert(7032355, + fmt::format("Encountered unexpected error while fetching database metadata: {}", + swDatabaseType.getStatus().toString()), + swDatabaseType == ErrorCodes::NamespaceNotFound); } } @@ -1348,10 +1360,11 @@ void ShardServerCatalogCacheLoader::CollAndChunkTaskList::addTask(CollAndChunkTa } if (task.dropped) { - invariant(lastTask.maxQueryVersion == task.minQueryVersion, - str::stream() << "The version of the added task is not contiguous with that of " - << "the previous one: LastTask {" << lastTask.toString() << "}, " - << "AddedTask {" << task.toString() << "}"); + tassert(7032356, + str::stream() << "The version of the added task is not contiguous with that of " + << "the previous one: LastTask {" << lastTask.toString() << "}, " + << "AddedTask {" << task.toString() << "}", + lastTask.maxQueryVersion == task.minQueryVersion); // As an optimization, on collection drop, clear any pending tasks in order to prevent any // throw-away work from executing. Because we have no way to differentiate whether the @@ -1365,11 +1378,11 @@ void ShardServerCatalogCacheLoader::CollAndChunkTaskList::addTask(CollAndChunkTa } } else { // Tasks must have contiguous versions, unless a complete reload occurs. - invariant(lastTask.maxQueryVersion == task.minQueryVersion || !task.minQueryVersion.isSet(), - str::stream() << "The added task is not the first and its version is not " - << "contiguous with that of the previous one: LastTask {" - << lastTask.toString() << "}, AddedTask {" << task.toString() - << "}"); + tassert(7032357, + str::stream() << "The added task is not the first and its version is not " + << "contiguous with that of the previous one: LastTask {" + << lastTask.toString() << "}, AddedTask {" << task.toString() << "}", + lastTask.maxQueryVersion == task.minQueryVersion || !task.minQueryVersion.isSet()); _tasks.emplace_back(std::move(task)); } diff --git a/src/mongo/s/catalog_cache.cpp b/src/mongo/s/catalog_cache.cpp index 60314d67ef3..75ac34818ad 100644 --- a/src/mongo/s/catalog_cache.cpp +++ b/src/mongo/s/catalog_cache.cpp @@ -37,6 +37,8 @@ #include "mongo/s/catalog_cache.h" +#include <fmt/format.h> + #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/query/collation/collator_factory_interface.h" #include "mongo/db/repl/optime_with.h" @@ -81,28 +83,31 @@ std::shared_ptr<RoutingTableHistory> createUpdatedRoutingTableHistory( if (isIncremental && collectionAndChunks.changedChunks.size() == 1 && collectionAndChunks.changedChunks[0].getVersion() == existingHistory->optRt->getVersion()) { - invariant(collectionAndChunks.allowMigrations == existingHistory->optRt->allowMigrations(), - str::stream() << "allowMigrations field of " << nss - << " collection changed without changing the collection version " - << existingHistory->optRt->getVersion().toString() - << ". Old value: " << existingHistory->optRt->allowMigrations() - << ", new value: " << collectionAndChunks.allowMigrations); + tassert(7032310, + fmt::format("allowMigrations field of collection '{}' changed without changing the " + "collection version {}. Old value: {}, new value: {}", + nss.toString(), + existingHistory->optRt->getVersion().toString(), + existingHistory->optRt->allowMigrations(), + collectionAndChunks.allowMigrations), + collectionAndChunks.allowMigrations == existingHistory->optRt->allowMigrations()); const auto& oldReshardingFields = existingHistory->optRt->getReshardingFields(); const auto& newReshardingFields = collectionAndChunks.reshardingFields; - invariant( - [&] { - if (oldReshardingFields && newReshardingFields) - return oldReshardingFields->toBSON().woCompare(newReshardingFields->toBSON()) == - 0; - else - return !oldReshardingFields && !newReshardingFields; - }(), - str::stream() << "reshardingFields field of " << nss - << " collection changed without changing the collection version " - << existingHistory->optRt->getVersion().toString() - << ". Old value: " << oldReshardingFields->toBSON() - << ", new value: " << newReshardingFields->toBSON()); + tassert(7032311, + fmt::format("reshardingFields field of collection '{}' changed without changing " + "the collection version {}. Old value: {}, new value: {}", + nss.toString(), + existingHistory->optRt->getVersion().toString(), + oldReshardingFields->toBSON().toString(), + newReshardingFields->toBSON().toString()), + [&] { + if (oldReshardingFields && newReshardingFields) + return oldReshardingFields->toBSON().woCompare( + newReshardingFields->toBSON()) == 0; + else + return !oldReshardingFields && !newReshardingFields; + }()); return existingHistory->optRt; } @@ -114,7 +119,11 @@ std::shared_ptr<RoutingTableHistory> createUpdatedRoutingTableHistory( return 0; } if (collectionAndChunks.maxChunkSizeBytes) { - invariant(collectionAndChunks.maxChunkSizeBytes.get() > 0); + tassert(7032312, + fmt::format("Invalid maxChunkSizeBytes value {} for collection '{}'", + nss.toString(), + collectionAndChunks.maxChunkSizeBytes.get()), + collectionAndChunks.maxChunkSizeBytes.get() > 0); return uint64_t(*collectionAndChunks.maxChunkSizeBytes); } return boost::none; @@ -252,13 +261,11 @@ CatalogCache::~CatalogCache() { StatusWith<CachedDatabaseInfo> CatalogCache::getDatabase(OperationContext* opCtx, StringData dbName, bool allowLocks) { - if (!allowLocks) { - invariant( - !opCtx->lockState() || !opCtx->lockState()->isLocked(), + tassert(7032313, "Do not hold a lock while refreshing the catalog cache. Doing so would potentially " "hold the lock during a network call, and can lead to a deadlock as described in " - "SERVER-37398."); - } + "SERVER-37398.", + allowLocks || !opCtx->lockState() || !opCtx->lockState()->isLocked()); try { auto dbEntry = _databaseCache.acquire(opCtx, dbName, CacheCausalConsistency::kLatestKnown); @@ -277,13 +284,11 @@ StatusWith<ChunkManager> CatalogCache::_getCollectionRoutingInfoAt( const NamespaceString& nss, boost::optional<Timestamp> atClusterTime, bool allowLocks) { - if (!allowLocks) { - invariant(!opCtx->lockState() || !opCtx->lockState()->isLocked(), - "Do not hold a lock while refreshing the catalog cache. Doing so would " - "potentially hold " - "the lock during a network call, and can lead to a deadlock as described in " - "SERVER-37398."); - } + tassert(7032314, + "Do not hold a lock while refreshing the catalog cache. Doing so would potentially " + "hold the lock during a network call, and can lead to a deadlock as described in " + "SERVER-37398.", + allowLocks || !opCtx->lockState() || !opCtx->lockState()->isLocked()); try { const auto swDbInfo = getDatabase(opCtx, nss.db(), allowLocks); |