diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/s/balancer_configuration.cpp | 25 | ||||
-rw-r--r-- | src/mongo/s/catalog/replset_dist_lock_manager.cpp | 236 | ||||
-rw-r--r-- | src/mongo/s/catalog/sharding_catalog_client_impl.cpp | 5 | ||||
-rw-r--r-- | src/mongo/s/catalog_cache.cpp | 129 | ||||
-rw-r--r-- | src/mongo/s/chunk_manager_index_bounds_test.cpp | 13 | ||||
-rw-r--r-- | src/mongo/s/client/parallel.cpp | 277 | ||||
-rw-r--r-- | src/mongo/s/client/shard.cpp | 35 | ||||
-rw-r--r-- | src/mongo/s/client/shard_connection.cpp | 20 | ||||
-rw-r--r-- | src/mongo/s/client/shard_registry.cpp | 82 |
9 files changed, 456 insertions, 366 deletions
diff --git a/src/mongo/s/balancer_configuration.cpp b/src/mongo/s/balancer_configuration.cpp index be3887de2ed..f52a9c5a4bb 100644 --- a/src/mongo/s/balancer_configuration.cpp +++ b/src/mongo/s/balancer_configuration.cpp @@ -243,11 +243,10 @@ Status BalancerConfiguration::_refreshChunkSizeSettings(OperationContext* opCtx) if (settings.getMaxChunkSizeBytes() != getMaxChunkSizeBytes()) { LOGV2(22640, - "MaxChunkSize changing from {getMaxChunkSizeBytes_1024_1024}MB to " - "{settings_getMaxChunkSizeBytes_1024_1024}MB", - "getMaxChunkSizeBytes_1024_1024"_attr = getMaxChunkSizeBytes() / (1024 * 1024), - "settings_getMaxChunkSizeBytes_1024_1024"_attr = - settings.getMaxChunkSizeBytes() / (1024 * 1024)); + "Changing MaxChunkSize setting to {newMaxChunkSizeMB}MB from {oldMaxChunkSizeMB}MB", + "Changing MaxChunkSize setting", + "newMaxChunkSizeMB"_attr = settings.getMaxChunkSizeBytes() / (1024 * 1024), + "oldMaxChunkSizeMB"_attr = getMaxChunkSizeBytes() / (1024 * 1024)); _maxChunkSizeBytes.store(settings.getMaxChunkSizeBytes()); } @@ -273,9 +272,10 @@ Status BalancerConfiguration::_refreshAutoSplitSettings(OperationContext* opCtx) if (settings.getShouldAutoSplit() != getShouldAutoSplit()) { LOGV2(22641, - "ShouldAutoSplit changing from {getShouldAutoSplit} to {settings_getShouldAutoSplit}", - "getShouldAutoSplit"_attr = getShouldAutoSplit(), - "settings_getShouldAutoSplit"_attr = settings.getShouldAutoSplit()); + "Changing ShouldAutoSplit setting to {newShouldAutoSplit} from {oldShouldAutoSplit}", + "Changing ShouldAutoSplit setting", + "newShouldAutoSplit"_attr = settings.getShouldAutoSplit(), + "oldShouldAutoSplit"_attr = getShouldAutoSplit()); _shouldAutoSplit.store(settings.getShouldAutoSplit()); } @@ -404,11 +404,10 @@ bool BalancerSettingsType::isTimeInBalancingWindow(const boost::posix_time::ptim }; LOGV2_DEBUG(24094, 1, - "inBalancingWindow: now: {timeToString_now} startTime: " - "{timeToString_activeWindowStart} stopTime: {timeToString_activeWindowStop}", - "timeToString_now"_attr = timeToString(now), - "timeToString_activeWindowStart"_attr = timeToString(*_activeWindowStart), - "timeToString_activeWindowStop"_attr = timeToString(*_activeWindowStop)); + "inBalancingWindow", + "now"_attr = timeToString(now), + "activeWindowStart"_attr = timeToString(*_activeWindowStart), + "activeWindowStop"_attr = timeToString(*_activeWindowStop)); if (*_activeWindowStop > *_activeWindowStart) { if ((now >= *_activeWindowStart) && (now <= *_activeWindowStop)) { diff --git a/src/mongo/s/catalog/replset_dist_lock_manager.cpp b/src/mongo/s/catalog/replset_dist_lock_manager.cpp index 3368f305815..09745931f44 100644 --- a/src/mongo/s/catalog/replset_dist_lock_manager.cpp +++ b/src/mongo/s/catalog/replset_dist_lock_manager.cpp @@ -109,10 +109,10 @@ void ReplSetDistLockManager::shutDown(OperationContext* opCtx) { auto status = _catalog->stopPing(opCtx, _processID); if (!status.isOK()) { LOGV2_WARNING(22667, - "error encountered while cleaning up distributed ping entry for " - "{processID}{causedBy_status}", - "processID"_attr = _processID, - "causedBy_status"_attr = causedBy(redact(status))); + "Error cleaning up distributed ping entry for {processId} caused by {error}", + "Error cleaning up distributed ping entry", + "processId"_attr = _processID, + "error"_attr = redact(status)); } } @@ -127,9 +127,10 @@ bool ReplSetDistLockManager::isShutDown() { void ReplSetDistLockManager::doTask() { LOGV2(22649, - "creating distributed lock ping thread for process {processID} (sleeping for " - "{pingInterval})", - "processID"_attr = _processID, + "Creating distributed lock ping thread for process {processId} with ping interval " + "{pingInterval}", + "Creating distributed lock ping thread", + "processId"_attr = _processID, "pingInterval"_attr = _pingInterval); Timer elapsedSincelastPing(_serviceContext->getTickSource()); @@ -142,16 +143,18 @@ void ReplSetDistLockManager::doTask() { if (!pingStatus.isOK() && pingStatus != ErrorCodes::NotMaster) { LOGV2_WARNING(22668, - "pinging failed for distributed lock pinger{causedBy_pingStatus}", - "causedBy_pingStatus"_attr = causedBy(pingStatus)); + "Pinging failed for distributed lock pinger caused by {error}", + "Pinging failed for distributed lock pinger", + "error"_attr = pingStatus); } const Milliseconds elapsed(elapsedSincelastPing.millis()); if (elapsed > 10 * _pingInterval) { LOGV2_WARNING(22669, - "Lock pinger for proc: {processID} was inactive for {elapsed} ms", - "processID"_attr = _processID, - "elapsed"_attr = elapsed); + "Lock pinger for process {processId} was inactive for {duration}", + "Lock pinger was inactive for multiple intervals", + "processId"_attr = _processID, + "duration"_attr = elapsed); } elapsedSincelastPing.reset(); @@ -162,36 +165,33 @@ void ReplSetDistLockManager::doTask() { } for (const auto& toUnlock : toUnlockBatch) { - std::string nameMessage = ""; Status unlockStatus(ErrorCodes::NotYetInitialized, "status unlock not initialized!"); if (toUnlock.second) { // A non-empty _id (name) field was provided, unlock by ts (sessionId) and _id. unlockStatus = _catalog->unlock(opCtx.get(), toUnlock.first, *toUnlock.second); - nameMessage = " and " + LocksType::name() + ": " + *toUnlock.second; } else { unlockStatus = _catalog->unlock(opCtx.get(), toUnlock.first); } if (!unlockStatus.isOK()) { LOGV2_WARNING(22670, - "Failed to unlock lock with {LocksType_lockID}: " - "{toUnlock_first}{nameMessage}{causedBy_unlockStatus}", - "LocksType_lockID"_attr = LocksType::lockID(), - "toUnlock_first"_attr = toUnlock.first, - "nameMessage"_attr = nameMessage, - "causedBy_unlockStatus"_attr = causedBy(unlockStatus)); + "Error unlocking distributed lock {lockName} with sessionID " + "{lockSessionId} caused by {error}", + "Error unlocking distributed lock", + "lockName"_attr = toUnlock.second, + "lockSessionId"_attr = toUnlock.first, + "error"_attr = unlockStatus); // Queue another attempt, unless the problem was no longer being primary. if (unlockStatus != ErrorCodes::NotMaster) { queueUnlock(toUnlock.first, toUnlock.second); } } else { LOGV2(22650, - "distributed lock with {LocksType_lockID}: {toUnlock_first}{nameMessage} " - "unlocked.", - "LocksType_lockID"_attr = LocksType::lockID(), - "toUnlock_first"_attr = toUnlock.first, - "nameMessage"_attr = nameMessage); + "Unlocked distributed lock {lockName} with sessionID {lockSessionId}", + "Unlocked distributed lock", + "lockName"_attr = toUnlock.second, + "lockSessionId"_attr = toUnlock.first); } if (isShutDown()) { @@ -267,11 +267,12 @@ StatusWith<bool> ReplSetDistLockManager::isLockExpired(OperationContext* opCtx, LOGV2_DEBUG(22651, 1, - "checking last ping for lock '{lockDoc_getName}' against last seen process " - "{pingInfo_processId} and ping {pingInfo_lastPing}", - "lockDoc_getName"_attr = lockDoc.getName(), - "pingInfo_processId"_attr = pingInfo->processId, - "pingInfo_lastPing"_attr = pingInfo->lastPing); + "Checking last ping for lock {lockName} against last seen process {processId} and " + "ping {lastPing}", + "Checking last ping for lock", + "lockName"_attr = lockDoc.getName(), + "processId"_attr = pingInfo->processId, + "lastPing"_attr = pingInfo->lastPing); if (pingInfo->lastPing != pingValue || // ping is active @@ -291,34 +292,34 @@ StatusWith<bool> ReplSetDistLockManager::isLockExpired(OperationContext* opCtx, if (configServerLocalTime < pingInfo->configLocalTime) { LOGV2_WARNING(22671, - "config server local time went backwards, from last seen: " - "{pingInfo_configLocalTime} to {configServerLocalTime}", - "pingInfo_configLocalTime"_attr = pingInfo->configLocalTime, - "configServerLocalTime"_attr = configServerLocalTime); + "Config server local time went backwards, new value " + "{newConfigServerLocalTime}, old value {oldConfigServerLocalTime}", + "Config server local time went backwards", + "newConfigServerLocalTime"_attr = configServerLocalTime, + "oldConfigServerLocalTime"_attr = pingInfo->configLocalTime); return false; } Milliseconds elapsedSinceLastPing(configServerLocalTime - pingInfo->configLocalTime); if (elapsedSinceLastPing >= lockExpiration) { LOGV2(22652, - "forcing lock '{lockDoc_getName}' because elapsed time {elapsedSinceLastPing} >= " - "takeover time {lockExpiration}", - "lockDoc_getName"_attr = lockDoc.getName(), + "Forcing lock {lockName} because elapsed time {elapsedSinceLastPing} >= " + "takeover time {lockExpirationTimeout}", + "Forcing lock because too much time has passed from last ping", + "lockName"_attr = lockDoc.getName(), "elapsedSinceLastPing"_attr = elapsedSinceLastPing, - "lockExpiration"_attr = lockExpiration); + "lockExpirationTimeout"_attr = lockExpiration); return true; } LOGV2_DEBUG(22653, 1, - "could not force lock '{lockDoc_getName}' because elapsed time " - "{durationCount_Milliseconds_elapsedSinceLastPing} < takeover time " - "{durationCount_Milliseconds_lockExpiration} ms", - "lockDoc_getName"_attr = lockDoc.getName(), - "durationCount_Milliseconds_elapsedSinceLastPing"_attr = - durationCount<Milliseconds>(elapsedSinceLastPing), - "durationCount_Milliseconds_lockExpiration"_attr = - durationCount<Milliseconds>(lockExpiration)); + "Could not force lock of {lockName} because elapsed time {elapsedSinceLastPing} < " + "takeover time {lockExpirationTimeout}", + "Could not force lock because too little time has passed from last ping", + "lockName"_attr = lockDoc.getName(), + "elapsedSinceLastPing"_attr = elapsedSinceLastPing, + "lockExpirationTimeout"_attr = lockExpiration); return false; } @@ -351,18 +352,19 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo LOGV2_DEBUG(22654, 1, - "trying to acquire new distributed lock for {name} ( lock timeout : " - "{durationCount_Milliseconds_lockExpiration} ms, ping interval : " - "{durationCount_Milliseconds_pingInterval} ms, process : {processID} ) with " - "lockSessionID: {lockSessionID}, why: {whyMessage}", - "name"_attr = name, - "durationCount_Milliseconds_lockExpiration"_attr = - durationCount<Milliseconds>(lockExpiration), - "durationCount_Milliseconds_pingInterval"_attr = - durationCount<Milliseconds>(_pingInterval), - "processID"_attr = _processID, - "lockSessionID"_attr = lockSessionID, - "whyMessage"_attr = whyMessage.toString()); + "Trying to acquire new distributed lock for {lockName} ( " + "lockSessionID: {lockSessionId}, " + "process : {processId}, " + "lock timeout : {lockExpirationTimeout}, " + "ping interval : {pingInterval}, " + "reason: {reason} )", + "Trying to acquire new distributed lock", + "lockName"_attr = name, + "lockSessionId"_attr = lockSessionID, + "processId"_attr = _processID, + "lockExpirationTimeout"_attr = lockExpiration, + "pingInterval"_attr = _pingInterval, + "reason"_attr = whyMessage); auto lockResult = _catalog->grabLock( opCtx, name, lockSessionID, who, _processID, Date_t::now(), whyMessage.toString()); @@ -373,10 +375,12 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo // Lock is acquired since findAndModify was able to successfully modify // the lock document. LOGV2(22655, - "distributed lock '{name}' acquired for '{whyMessage}', ts : {lockSessionID}", - "name"_attr = name, - "whyMessage"_attr = whyMessage.toString(), - "lockSessionID"_attr = lockSessionID); + "Acquired distributed lock {lockName} with session ID {lockSessionId} for " + "{reason}", + "Acquired distributed lock", + "lockName"_attr = name, + "lockSessionId"_attr = lockSessionID, + "reason"_attr = whyMessage); return lockSessionID; } @@ -385,10 +389,13 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo networkErrorRetries < kMaxNumLockAcquireRetries) { LOGV2_DEBUG(22656, 1, - "Failed to acquire distributed lock because of retriable error. Retrying " - "acquisition by first unlocking the stale entry, which possibly exists " - "now{causedBy_status}", - "causedBy_status"_attr = causedBy(redact(status))); + "Error acquiring distributed lock because of retryable error. " + "Retrying acquisition by first unlocking the stale entry, which possibly " + "exists now. Caused by {error}", + "Error acquiring distributed lock because of retryable error. " + "Retrying acquisition by first unlocking the stale entry, which possibly " + "exists now", + "error"_attr = redact(status)); networkErrorRetries++; @@ -403,9 +410,9 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo LOGV2_DEBUG(22657, 1, - "Failed to retry acquisition of distributed lock. No more attempts will be " - "made{causedBy_status}", - "causedBy_status"_attr = causedBy(redact(status))); + "Last attempt to acquire distributed lock failed with {error}", + "Last attempt to acquire distributed lock failed", + "error"_attr = redact(status)); } if (status != ErrorCodes::LockStateChangeFailed) { @@ -449,11 +456,11 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo // Lock is acquired since findAndModify was able to successfully modify // the lock document. - LOGV2(22658, "lock '{name}' successfully forced", "name"_attr = name); - LOGV2(22659, - "distributed lock '{name}' acquired, ts : {lockSessionID}", - "name"_attr = name, - "lockSessionID"_attr = lockSessionID); + LOGV2(22658, + "Acquired distributed lock {lockName} with sessionId {lockSessionId}", + "Acquired distributed lock", + "lockName"_attr = name, + "lockSessionId"_attr = lockSessionID); return lockSessionID; } @@ -466,7 +473,11 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo } } - LOGV2_DEBUG(22660, 1, "distributed lock '{name}' was not acquired.", "name"_attr = name); + LOGV2_DEBUG(22660, + 1, + "Distributed lock {lockName} was not acquired", + "Distributed lock was not acquired", + "lockName"_attr = name); if (waitFor == Milliseconds::zero()) { break; @@ -475,10 +486,11 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo // Periodically message for debugging reasons if (msgTimer.seconds() > 10) { LOGV2(22661, - "waited {timer_seconds}s for distributed lock {name} for {whyMessage}", - "timer_seconds"_attr = timer.seconds(), - "name"_attr = name, - "whyMessage"_attr = whyMessage.toString()); + "Waited {elapsed} for distributed lock {lockName} for {reason}", + "Waiting for distributed lock", + "lockName"_attr = name, + "elapsed"_attr = Seconds(timer.seconds()), + "reason"_attr = whyMessage); msgTimer.reset(); } @@ -501,18 +513,19 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::tryLockWithLocalWriteConcern( LOGV2_DEBUG(22662, 1, - "trying to acquire new distributed lock for {name} ( lock timeout : " - "{durationCount_Milliseconds_lockExpiration} ms, ping interval : " - "{durationCount_Milliseconds_pingInterval} ms, process : {processID} ) with " - "lockSessionID: {lockSessionID}, why: {whyMessage}", - "name"_attr = name, - "durationCount_Milliseconds_lockExpiration"_attr = - durationCount<Milliseconds>(_lockExpiration), - "durationCount_Milliseconds_pingInterval"_attr = - durationCount<Milliseconds>(_pingInterval), - "processID"_attr = _processID, - "lockSessionID"_attr = lockSessionID, - "whyMessage"_attr = whyMessage.toString()); + "Trying to acquire new distributed lock for {lockName} ( " + "process : {processId}, " + "lockSessionID: {lockSessionId}, " + "lock timeout : {lockExpirationTimeout}, " + "ping interval : {pingInterval}, " + "reason: {reason} )", + "Trying to acquire new distributed lock", + "lockName"_attr = name, + "lockSessionId"_attr = lockSessionID, + "processId"_attr = _processID, + "lockExpirationTimeout"_attr = _lockExpiration, + "pingInterval"_attr = _pingInterval, + "reason"_attr = whyMessage); auto lockStatus = _catalog->grabLock(opCtx, name, @@ -525,14 +538,20 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::tryLockWithLocalWriteConcern( if (lockStatus.isOK()) { LOGV2(22663, - "distributed lock '{name}' acquired for '{whyMessage}', ts : {lockSessionID}", - "name"_attr = name, - "whyMessage"_attr = whyMessage.toString(), - "lockSessionID"_attr = lockSessionID); + "Acquired distributed lock {lockName} with session ID {lockSessionId} for " + "{reason}", + "Acquired distributed lock", + "lockName"_attr = name, + "lockSessionId"_attr = lockSessionID, + "reason"_attr = whyMessage); return lockSessionID; } - LOGV2_DEBUG(22664, 1, "distributed lock '{name}' was not acquired.", "name"_attr = name); + LOGV2_DEBUG(22664, + 1, + "Distributed lock {lockName} was not acquired", + "Distributed lock was not acquired", + "lockName"_attr = name); if (lockStatus == ErrorCodes::LockStateChangeFailed) { return {ErrorCodes::LockBusy, str::stream() << "Unable to acquire " << name}; @@ -548,9 +567,9 @@ void ReplSetDistLockManager::unlock(OperationContext* opCtx, const DistLockHandl queueUnlock(lockSessionID, boost::none); } else { LOGV2(22665, - "distributed lock with {LocksType_lockID}: {lockSessionID}' unlocked.", - "LocksType_lockID"_attr = LocksType::lockID(), - "lockSessionID"_attr = lockSessionID); + "Unlocked distributed lock with sessionID {lockSessionId}", + "Unlocked distributed lock", + "lockSessionId"_attr = lockSessionID); } } @@ -563,21 +582,22 @@ void ReplSetDistLockManager::unlock(OperationContext* opCtx, queueUnlock(lockSessionID, name.toString()); } else { LOGV2(22666, - "distributed lock with {LocksType_lockID}: '{lockSessionID}' and {LocksType_name}: " - "'{name}' unlocked.", - "LocksType_lockID"_attr = LocksType::lockID(), - "lockSessionID"_attr = lockSessionID, - "LocksType_name"_attr = LocksType::name(), - "name"_attr = name.toString()); + "Unlocked distributed lock {lockName} with sessionID {lockSessionId}", + "Unlocked distributed lock", + "lockName"_attr = name, + "lockSessionId"_attr = lockSessionID); } } void ReplSetDistLockManager::unlockAll(OperationContext* opCtx, const std::string& processID) { Status status = _catalog->unlockAll(opCtx, processID); if (!status.isOK()) { - LOGV2_WARNING(22672, - "Error while trying to unlock existing distributed locks{causedBy_status}", - "causedBy_status"_attr = causedBy(redact(status))); + LOGV2_WARNING( + 22672, + "Error unlocking all distributed locks for process {processId} caused by {error}", + "Error unlocking all existing distributed locks for a process", + "processId"_attr = processID, + "error"_attr = redact(status)); } } diff --git a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp index a1e3da49eb9..1bd8d8afa52 100644 --- a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp +++ b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp @@ -724,8 +724,9 @@ Status ShardingCatalogClientImpl::applyChunkOpsDeprecated(OperationContext* opCt LOGV2_WARNING( 22675, - "chunk operation commit failed and metadata will be revalidated{causedBy_status}", - "causedBy_status"_attr = causedBy(redact(status))); + "Error committing chunk operation, metadata will be revalidated. Caused by {error}", + "Error committing chunk operation, metadata will be revalidated", + "error"_attr = redact(status)); // Look for the chunk in this shard whose version got bumped. We assume that if that // mod made it to the config server, then transaction was successful. diff --git a/src/mongo/s/catalog_cache.cpp b/src/mongo/s/catalog_cache.cpp index 2a93e98518c..d80d5befe14 100644 --- a/src/mongo/s/catalog_cache.cpp +++ b/src/mongo/s/catalog_cache.cpp @@ -346,8 +346,10 @@ void CatalogCache::onStaleDatabaseVersion(const StringData dbName, } else if (!itDbEntry->second->dbt || databaseVersion::equal(itDbEntry->second->dbt->getVersion(), databaseVersion)) { // If the versions match, the cached database info is stale, so mark it as needs refresh. - LOGV2( - 22642, "Marking cached database entry for '{dbName}' as stale", "dbName"_attr = dbName); + LOGV2(22642, + "Marking cached database entry for {db} as stale", + "Marking cached database entry as stale", + "db"_attr = dbName); itDbEntry->second->needsRefresh = true; } } @@ -467,22 +469,26 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) { stdx::lock_guard<Latch> lg(_mutex); LOGV2(22643, - "Starting to invalidate databases and collections with data on shard: {shardId}", + "Starting to invalidate databases and collections with data on shard {shardId}", + "Starting to invalidate databases and collections referencing a specific shard", "shardId"_attr = shardId); // Invalidate databases with this shard as their primary. for (const auto& [dbNs, dbInfoEntry] : _databases) { LOGV2_DEBUG(22644, 3, - "Checking if database {dbNs}has primary shard: {shardId}", - "dbNs"_attr = dbNs, - "shardId"_attr = shardId); + "Checking if database {db} has primary shard: {primaryShardId}", + "Checking if database matches primary shard", + "db"_attr = dbNs, + "primaryShardId"_attr = shardId); if (!dbInfoEntry->needsRefresh && dbInfoEntry->dbt->getPrimary() == shardId) { LOGV2_DEBUG(22645, 3, - "Database {dbNs}has primary shard {shardId}, invalidating cache entry", - "dbNs"_attr = dbNs, - "shardId"_attr = shardId); + "Invalidating cache entry for database {db} that has primary shard " + "{primaryShardId}", + "Invalidating database cache entry", + "db"_attr = dbNs, + "primaryShardId"_attr = shardId); dbInfoEntry->needsRefresh = true; } } @@ -493,8 +499,9 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) { LOGV2_DEBUG(22646, 3, - "Checking if {collNs}has data on shard: {shardId}", - "collNs"_attr = collNs, + "Checking if collection {namespace} has data on shard {shardId}", + "Checking if collection has data on specific shard", + "namespace"_attr = collNs, "shardId"_attr = shardId); if (!collRoutingInfoEntry->needsRefresh) { @@ -506,8 +513,10 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) { shardsOwningDataForCollection.end()) { LOGV2_DEBUG(22647, 3, - "{collNs}has data on shard {shardId}, invalidating cache entry", - "collNs"_attr = collNs, + "Invalidating cached collection {namespace} that has data " + "on shard {shardId}", + "Invalidating cached collection", + "namespace"_attr = collNs, "shardId"_attr = shardId); collRoutingInfoEntry->needsRefresh = true; @@ -519,6 +528,7 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) { LOGV2(22648, "Finished invalidating databases and collections with data on shard: {shardId}", + "Finished invalidating databases and collections that reference specific shard", "shardId"_attr = shardId); } @@ -573,11 +583,12 @@ void CatalogCache::_scheduleDatabaseRefresh(WithLock lk, if (!swDbt.isOK()) { LOGV2_OPTIONS(24100, {logv2::LogComponent::kShardingCatalogRefresh}, - "Refresh for database {dbName} took {t_millis} ms and " - "failed{causedBy_swDbt_getStatus}", - "dbName"_attr = dbName, - "t_millis"_attr = t.millis(), - "causedBy_swDbt_getStatus"_attr = causedBy(redact(swDbt.getStatus()))); + "Error refreshing cached database entry for {db}. Took {duration} and " + "failed due to {error}", + "Error refreshing cached database entry", + "db"_attr = dbName, + "duration"_attr = Milliseconds(t.millis()), + "error"_attr = redact(swDbt.getStatus())); return; } @@ -591,14 +602,13 @@ void CatalogCache::_scheduleDatabaseRefresh(WithLock lk, LOGV2_FOR_CATALOG_REFRESH( 24101, logSeverityV1toV2(logLevel).toInt(), - "Refresh for database {dbName} from version " - "{dbEntry_dbt_dbEntry_dbt_getVersion_BSONObj} to version {dbVersionAfterRefresh} " - "took {t_millis} ms", - "dbName"_attr = dbName, - "dbEntry_dbt_dbEntry_dbt_getVersion_BSONObj"_attr = - (dbEntry->dbt ? dbEntry->dbt->getVersion().toBSON() : BSONObj()), - "dbVersionAfterRefresh"_attr = dbVersionAfterRefresh.toBSON(), - "t_millis"_attr = t.millis()); + "Refreshed cached database entry for {db} to version {newDbVersion} from version " + "{oldDbVersion}. Took {duration}", + "Refreshed cached database entry", + "db"_attr = dbName, + "newDbVersion"_attr = dbVersionAfterRefresh.toBSON(), + "oldDbVersion"_attr = (dbEntry->dbt ? dbEntry->dbt->getVersion().toBSON() : BSONObj()), + "duration"_attr = Milliseconds(t.millis())); }; // Invoked if getDatabase resulted in error or threw and exception @@ -636,14 +646,14 @@ void CatalogCache::_scheduleDatabaseRefresh(WithLock lk, dbEntry->dbt = std::move(swDbt.getValue()); }; - LOGV2_FOR_CATALOG_REFRESH( - 24102, - 1, - "Refreshing cached database entry for {dbName}; current cached database info is " - "{dbEntry_dbt_dbEntry_dbt_BSONObj}", - "dbName"_attr = dbName, - "dbEntry_dbt_dbEntry_dbt_BSONObj"_attr = - (dbEntry->dbt ? dbEntry->dbt->toBSON() : BSONObj())); + LOGV2_FOR_CATALOG_REFRESH(24102, + 1, + "Refreshing cached database entry for {db}; current cached database " + "info is {currentDbInfo}", + "Refreshing cached database entry", + "db"_attr = dbName, + "currentDbInfo"_attr = + (dbEntry->dbt ? dbEntry->dbt->toBSON() : BSONObj())); try { _cacheLoader.getDatabase(dbName, refreshCallback); @@ -685,13 +695,14 @@ void CatalogCache::_scheduleCollectionRefresh(WithLock lk, if (!status.isOK()) { _stats.countFailedRefreshes.addAndFetch(1); - LOGV2_OPTIONS( - 24103, - {logv2::LogComponent::kShardingCatalogRefresh}, - "Refresh for collection {nss} took {t_millis} ms and failed{causedBy_status}", - "nss"_attr = nss, - "t_millis"_attr = t.millis(), - "causedBy_status"_attr = causedBy(redact(status))); + LOGV2_OPTIONS(24103, + {logv2::LogComponent::kShardingCatalogRefresh}, + "Error refreshing cached collection {namespace}; Took {duration} and " + "failed due to {error}", + "Error refreshing cached collection", + "namespace"_attr = nss, + "duration"_attr = Milliseconds(t.millis()), + "error"_attr = redact(status)); } else if (routingInfoAfterRefresh) { const int logLevel = (!existingRoutingInfo || @@ -702,25 +713,24 @@ void CatalogCache::_scheduleCollectionRefresh(WithLock lk, LOGV2_FOR_CATALOG_REFRESH( 24104, logSeverityV1toV2(logLevel).toInt(), - "Refresh for collection " - "{nss}{existingRoutingInfo_from_version_existingRoutingInfo_getVersion} to version " - "{routingInfoAfterRefresh_getVersion} took {t_millis} ms", - "nss"_attr = nss.toString(), - "existingRoutingInfo_from_version_existingRoutingInfo_getVersion"_attr = + "Refreshed cached collection {namespace} to version {newVersion} from version " + "{oldVersion}. Took {duration}", + "Refreshed cached collection", + "namespace"_attr = nss, + "newVersion"_attr = routingInfoAfterRefresh->getVersion(), + "oldVersion"_attr = (existingRoutingInfo ? (" from version " + existingRoutingInfo->getVersion().toString()) : ""), - "routingInfoAfterRefresh_getVersion"_attr = - routingInfoAfterRefresh->getVersion().toString(), - "t_millis"_attr = t.millis()); + "duration"_attr = Milliseconds(t.millis())); } else { - LOGV2_OPTIONS( - 24105, - {logv2::LogComponent::kShardingCatalogRefresh}, - "Refresh for collection {nss} took {t_millis} ms and found the collection is not " - "sharded", - "nss"_attr = nss, - "t_millis"_attr = t.millis()); + LOGV2_OPTIONS(24105, + {logv2::LogComponent::kShardingCatalogRefresh}, + "Collection {namespace} was found to be unsharded after refresh that " + "took {duration}", + "Collection has found to be unsharded after refresh", + "namespace"_attr = nss, + "duration"_attr = Milliseconds(t.millis())); } }; @@ -786,10 +796,9 @@ void CatalogCache::_scheduleCollectionRefresh(WithLock lk, LOGV2_FOR_CATALOG_REFRESH( 24106, 1, - "Refreshing chunks for collection {nss}; current collection version is " - "{startingCollectionVersion}", - "nss"_attr = nss, - "startingCollectionVersion"_attr = startingCollectionVersion); + "Refreshing cached collection {namespace} with version {currentCollectionVersion}", + "namespace"_attr = nss, + "currentCollectionVersion"_attr = startingCollectionVersion); try { _cacheLoader.getChunksSince(nss, startingCollectionVersion, refreshCallback); diff --git a/src/mongo/s/chunk_manager_index_bounds_test.cpp b/src/mongo/s/chunk_manager_index_bounds_test.cpp index fa30a2795d5..3ef9ca2c551 100644 --- a/src/mongo/s/chunk_manager_index_bounds_test.cpp +++ b/src/mongo/s/chunk_manager_index_bounds_test.cpp @@ -89,9 +89,9 @@ protected: if (Interval::INTERVAL_EQUALS != oil.intervals[i].compare(expectedOil.intervals[i])) { LOGV2(22676, - "{oil_intervals_i} != {expectedOil_intervals_i}", - "oil_intervals_i"_attr = oil.intervals[i], - "expectedOil_intervals_i"_attr = expectedOil.intervals[i]); + "Found mismatching field interval", + "queryFieldInterval"_attr = oil.intervals[i], + "expectedFieldInterval"_attr = expectedOil.intervals[i]); } ASSERT_EQUALS(Interval::INTERVAL_EQUALS, oil.intervals[i].compare(expectedOil.intervals[i])); @@ -111,9 +111,10 @@ protected: const OrderedIntervalList& oil = indexBounds.fields.front(); if (oil.intervals.size() != expectedOil.intervals.size()) { - for (size_t i = 0; i < oil.intervals.size(); i++) { - LOGV2(22677, "{oil_intervals_i}", "oil_intervals_i"_attr = oil.intervals[i]); - } + LOGV2(22677, + "Found mismatching field intervals", + "queryFieldInterval"_attr = oil, + "expectedFieldInterval"_attr = expectedOil); } ASSERT_EQUALS(oil.intervals.size(), expectedOil.intervals.size()); diff --git a/src/mongo/s/client/parallel.cpp b/src/mongo/s/client/parallel.cpp index cda8113ff33..5161808c140 100644 --- a/src/mongo/s/client/parallel.cpp +++ b/src/mongo/s/client/parallel.cpp @@ -378,9 +378,11 @@ void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk( dassert(repl); LOGV2_WARNING( 22695, - "Primary for {repl_getServerAddress} was down before, bypassing " + "Primary for {replicaSetAddress} was down before, bypassing " "setShardVersion. The local replica set view and targeting may be stale.", - "repl_getServerAddress"_attr = repl->getServerAddress()); + "Bypassing setShardVersion because replica set view and targeting may be stale " + "from primary having been down", + "replicaSetAddress"_attr = repl->getServerAddress()); } return; @@ -392,8 +394,9 @@ void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk( LOGV2_DEBUG(22678, 2, "pcursor: needed to set remote version on connection to value compatible " - "with {vinfo}", - "vinfo"_attr = vinfo); + "with {shardVersion}", + "pcursor: needed to set remote version on connection", + "shardVersion"_attr = vinfo); } } catch (const DBException& dbExcep) { auto errCode = dbExcep.code(); @@ -408,10 +411,13 @@ void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk( const DBClientReplicaSet* repl = dynamic_cast<const DBClientReplicaSet*>(state->conn->getRawConn()); dassert(repl); - LOGV2_WARNING(22696, - "Cannot contact primary for {repl_getServerAddress} to check shard " - "version. The local replica set view and targeting may be stale.", - "repl_getServerAddress"_attr = repl->getServerAddress()); + LOGV2_WARNING( + 22696, + "Cannot contact replica set {replicaSetAdress} to check shard version. " + "The local replica set view and targeting may be stale", + "Cannot contact replica set to check shard version. " + "The local replica set view and targeting may be stale", + "replicaSetAddress"_attr = repl->getServerAddress()); } } else { throw; @@ -433,10 +439,11 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) { } LOGV2_DEBUG(22679, 2, - "pcursor: {prefix} pcursor over {qSpec} and {cInfo}", + "pcursor: {prefix} pcursor over {query} and {command}", + "pcursor", "prefix"_attr = prefix, - "qSpec"_attr = _qSpec, - "cInfo"_attr = _cInfo); + "query"_attr = _qSpec, + "command"_attr = _cInfo); shared_ptr<ChunkManager> manager; shared_ptr<Shard> primary; @@ -484,9 +491,10 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) { LOGV2_DEBUG(22680, 2, "pcursor: closing cursor on shard {shardId} as the connection is no longer " - "required by {vinfo}", + "required by {shardVersion}", + "pcursor: closing cursor as the connection is no longer required", "shardId"_attr = shardId, - "vinfo"_attr = vinfo); + "shardVersion"_attr = vinfo); cmEntry.second.cleanup(true); } @@ -494,9 +502,10 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) { LOGV2_DEBUG(22681, 2, - "pcursor: initializing over {shardIds_size} shards required by {vinfo}", - "shardIds_size"_attr = shardIds.size(), - "vinfo"_attr = vinfo); + "pcursor: initializing over {shardsNumber} shards required by {shardVersion}", + "pcursor: initializing for all shards", + "shardsNumber"_attr = shardIds.size(), + "shardVersion"_attr = vinfo); // Don't retry indefinitely for whatever reason _totalTries++; @@ -507,9 +516,11 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) { LOGV2_DEBUG(22682, 2, - "pcursor: initializing on shard {shardId}, current connection state is {mdata}", + "pcursor: initializing on shard {shardId}, current connection state is " + "{connectionState}", + "pcursor: initializing on shard", "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON()); + "connectionState"_attr = mdata.toBSON()); // This may be the first time connecting to this shard, if so we can get an error here try { @@ -633,12 +644,13 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) { LOGV2_DEBUG(22683, 2, - "pcursor: initialized {isCommand_command_query}{lazyInit_lazily_full}on " - "shard {shardId}, current connection state is {mdata}", - "isCommand_command_query"_attr = (isCommand() ? "command " : "query "), - "lazyInit_lazily_full"_attr = (lazyInit ? "(lazily) " : "(full) "), + "pcursor: initialized {commandType} ({lazyInit}) on shard {shardId}, " + "current connection state is {connectionState}", + "pcursor: initialized command", + "commandType"_attr = (isCommand() ? "command" : "query"), + "lazyInit"_attr = (lazyInit ? "lazily" : "full"), "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON()); + "connectionState"_attr = mdata.toBSON()); } catch (StaleConfigException& e) { // Our version isn't compatible with the current version anymore on at least one shard, // need to retry immediately @@ -650,20 +662,22 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) { ->invalidateShardOrEntireCollectionEntryForShardedCollection( opCtx, nss, e->getVersionWanted(), e->getVersionReceived(), e->getShardId()); - LOGV2_DEBUG( - 22684, - 1, - "stale config of ns {staleNS} during initialization, will retry{causedBy_e}", - "staleNS"_attr = staleNS, - "causedBy_e"_attr = causedBy(redact(e))); + LOGV2_DEBUG(22684, + 1, + "Error initializing pcursor, stale config for namespace {namespace}, " + "caused by {error}, will retry", + "Error initializing pcursor, will retry", + "namespace"_attr = staleNS, + "error"_attr = redact(e)); // This is somewhat strange if (staleNS != nss) { - LOGV2_WARNING( - 22700, - "versioned ns {nss_ns} doesn't match stale config namespace {staleNS}", - "nss_ns"_attr = nss.ns(), - "staleNS"_attr = staleNS); + LOGV2_WARNING(22700, + "Versioned namespace {namespace} doesn't match stale config " + "namespace {staleNamespace}", + "Versioned namespace doesn't match stale config namespace", + "namespace"_attr = nss.ns(), + "staleNamespace"_attr = staleNS); } // Restart with new chunk manager @@ -671,11 +685,12 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) { return; } catch (NetworkException& e) { LOGV2_WARNING(22701, - "socket exception when initializing on {shardId}, current connection " - "state is {mdata}{causedBy_e}", + "Error initializing pcursor on {shardId} with current connection state " + "{connectionState} caused by {error}", + "Error initializing pcursor", "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON(), - "causedBy_e"_attr = causedBy(redact(e))); + "connectionState"_attr = mdata.toBSON(), + "error"_attr = redact(e)); mdata.errored = true; if (returnPartial) { mdata.cleanup(true); @@ -684,11 +699,12 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) { throw; } catch (DBException& e) { LOGV2_WARNING(22702, - "db exception when initializing on {shardId}, current connection state " - "is {mdata}{causedBy_e}", + "Error initializing pcursor on {shardId} with current connection state " + "{connectionState} caused by {error}", + "Error initializing pcursor", "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON(), - "causedBy_e"_attr = causedBy(redact(e))); + "connectionState"_attr = mdata.toBSON(), + "error"_attr = redact(e)); mdata.errored = true; if (returnPartial && e.code() == 15925 /* From above! */) { mdata.cleanup(true); @@ -697,19 +713,22 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) { throw; } catch (std::exception& e) { LOGV2_WARNING(22703, - "exception when initializing on {shardId}, current connection state is " - "{mdata}{causedBy_e}", + "Error initializing pcursor on {shardId} with current connection state " + "{connectionState} caused by {error}", + "Error initializing pcursor", "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON(), - "causedBy_e"_attr = causedBy(e)); + "connectionState"_attr = mdata.toBSON(), + "error"_attr = redact(e.what())); mdata.errored = true; throw; } catch (...) { LOGV2_WARNING(22704, - "unknown exception when initializing on {shardId}, current connection " - "state is {mdata}", + "Error initializing pcursor on {shardId} with current connection state " + "{connectionState} caused by {error}", + "Error initializing pcursor", "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON()); + "connectionState"_attr = mdata.toBSON(), + "error"_attr = "uknownError"); mdata.errored = true; throw; } @@ -760,8 +779,9 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) { LOGV2_DEBUG(22685, 2, - "pcursor: finishing over {cursorMap_size} shards", - "cursorMap_size"_attr = _cursorMap.size()); + "pcursor: finishing over {shardsCount} shards", + "pcursor: finishing initialization", + "shardsCount"_attr = _cursorMap.size()); for (auto& cmEntry : _cursorMap) { const auto& shardId = cmEntry.first; @@ -769,9 +789,11 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) { LOGV2_DEBUG(22686, 2, - "pcursor: finishing on shard {shardId}, current connection state is {mdata}", + "pcursor: finishing initialization on shard {shardId}, current connection " + "state is {connectionState}", + "pcursor: finishing initialization for shard", "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON()); + "connectionState"_attr = mdata.toBSON()); // Ignore empty conns for now if (!mdata.pcState) @@ -818,12 +840,13 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) { // Finalize state state->cursor->attach(state->conn.get()); // Closes connection for us - LOGV2_DEBUG( - 22687, - 2, - "pcursor: finished on shard {shardId}, current connection state is {mdata}", - "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON()); + LOGV2_DEBUG(22687, + 2, + "pcursor: finished on shard {shardId}, current connection state is " + "{connectionState}", + "pcursor: finished initialization for shard", + "shardId"_attr = shardId, + "connectionState"_attr = mdata.toBSON()); } } catch (StaleConfigException& e) { retry = true; @@ -838,11 +861,12 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) { continue; } catch (NetworkException& e) { LOGV2_WARNING(22705, - "socket exception when finishing on {shardId}, current connection state " - "is {mdata}{causedBy_e}", + "Error finalizing pcursor initialization for shard {shardId} with " + "connection state {connectionState} caused by {error}", + "Error finalizing pcursor initialization for shard", "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON(), - "causedBy_e"_attr = causedBy(redact(e))); + "connectionState"_attr = mdata.toBSON(), + "error"_attr = redact(e)); mdata.errored = true; if (returnPartial) { mdata.cleanup(true); @@ -854,11 +878,12 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) { // ABOVE if (e.code() == 15988) { LOGV2_WARNING(22706, - "exception when receiving data from {shardId}, current connection " - "state is {mdata}{causedBy_e}", + "Error finalizing pcursor initialization for shard {shardId} with " + "connection state {connectionState} caused by {error}", + "Error finalizing pcursor initialization for shard", "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON(), - "causedBy_e"_attr = causedBy(redact(e))); + "connectionState"_attr = mdata.toBSON(), + "error"_attr = redact(e)); mdata.errored = true; if (returnPartial) { @@ -870,36 +895,42 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) { // the InvalidBSON exception indicates that the BSON is malformed -> // don't print/call "mdata.toBSON()" to avoid unexpected errors e.g. a segfault if (e.code() == ErrorCodes::InvalidBSON) + LOGV2_WARNING(22707, + "Error finalizing pcursor initialization for shard " + "{shardId} caused by {error}", + "Error finalizing pcursor initialization for shard", + "shardId"_attr = shardId, + "error"_attr = redact(e)); + else LOGV2_WARNING( - 22707, - "bson is malformed :: db exception when finishing on {shardId}{causedBy_e}", + 22708, + "Error finalizing pcursor initialization for shard {shardId} with " + "connection state {connectionState} caused by {error}", + "Error finalizing pcursor initialization for shard", "shardId"_attr = shardId, - "causedBy_e"_attr = causedBy(redact(e))); - else - LOGV2_WARNING(22708, - "db exception when finishing on {shardId}, current connection " - "state is {mdata}{causedBy_e}", - "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON(), - "causedBy_e"_attr = causedBy(redact(e))); + "connectionState"_attr = mdata.toBSON(), + "error"_attr = redact(e)); mdata.errored = true; throw; } } catch (std::exception& e) { LOGV2_WARNING(22709, - "exception when finishing on {shardId}, current connection state is " - "{mdata}{causedBy_e}", + "Error finalizing pcursor initialization for shard {shardId} with " + "connection state {connectionState} caused by {error}", + "Error finalizing pcursor initialization for shard", "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON(), - "causedBy_e"_attr = causedBy(e)); + "connectionState"_attr = mdata.toBSON(), + "error"_attr = redact(e.what())); mdata.errored = true; throw; } catch (...) { - LOGV2_WARNING(22710, - "unknown exception when finishing on {shardId}, current connection state " - "is {mdata}", - "shardId"_attr = shardId, - "mdata"_attr = mdata.toBSON()); + LOGV2_WARNING( + 22710, + "Unknown error finalizing pcursor initialization for shard {shardId} with " + "connection state {connectionState}", + "Unknown error finalizing pcursor initialization for shard", + "shardId"_attr = shardId, + "connectionState"_attr = mdata.toBSON()); mdata.errored = true; throw; } @@ -923,20 +954,22 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) { staleNS, *ex->getShardId()); } - LOGV2_DEBUG( - 22688, - 1, - "stale config of ns {staleNS} on finishing query, will retry{causedBy_ex}", - "staleNS"_attr = staleNS, - "causedBy_ex"_attr = causedBy(redact(ex))); + LOGV2_DEBUG(22688, + 1, + "Found stale config for namespace {namespace} on finishing query " + "caused by {error}", + "Found stale config for namespace", + "namespace"_attr = staleNS, + "error"_attr = redact(ex)); // This is somewhat strange if (staleNS.ns() != ns) { - LOGV2_WARNING( - 22711, - "versioned ns {ns} doesn't match stale config namespace {staleNS}", - "ns"_attr = ns, - "staleNS"_attr = staleNS); + LOGV2_WARNING(22711, + "Versioned namespace {namespace} doesn't match stale config " + "namespace {staleNamespace}", + "Versioned namespace doesn't match stale config namespace", + "namespace"_attr = ns, + "staleNamespace"_attr = staleNS); } } } @@ -953,7 +986,10 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) { // Erase empty stuff if (!mdata.pcState) { - LOGV2(22689, "PCursor erasing empty state {mdata}", "mdata"_attr = mdata.toBSON()); + LOGV2(22689, + "PCursor erasing empty connection state {connectionState}", + "PCursor erasing empty connection state", + "connectionState"_attr = mdata.toBSON()); _cursorMap.erase(i++); continue; } else { @@ -1043,16 +1079,19 @@ void ParallelSortClusteredCursor::_oldInit(OperationContext* opCtx) { bool firstPass = retryQueries.size() == 0; if (!firstPass) { + vector<StringData> serverHostsToRetry; + serverHostsToRetry.reserve(retryQueries.size()); + transform(retryQueries.begin(), + retryQueries.end(), + std::back_inserter(serverHostsToRetry), + [&](const auto& query) { return serverHosts[query]; }); LOGV2(22690, - "retrying {returnPartial_partial}parallel connection to ", - "returnPartial_partial"_attr = (returnPartial ? "(partial) " : "")); - for (set<int>::const_iterator it = retryQueries.begin(); it != retryQueries.end(); - ++it) { - LOGV2(22691, "{serverHosts_it}, ", "serverHosts_it"_attr = serverHosts[*it]); - } - LOGV2(22692, - "{finishedQueries} finished queries.", - "finishedQueries"_attr = finishedQueries); + "Finished queries {finishedQueriesCount}, retrying parallel connection to " + "{serverHostsToRetry}", + "Retrying parallel connection", + "returnPartial"_attr = returnPartial, + "finishedQueriesCount"_attr = finishedQueries, + "serverHostsToRetry"_attr = serverHostsToRetry); } size_t num = 0; @@ -1099,10 +1138,9 @@ void ParallelSortClusteredCursor::_oldInit(OperationContext* opCtx) { LOGV2_DEBUG(22693, 5, - "ParallelSortClusteredCursor::init server:{serverHost} ns:{ns} " - "query:{query} fields:{fields} options: {options}", + "ParallelSortClusteredCursor initialization", "serverHost"_attr = serverHost, - "ns"_attr = _ns, + "namespace"_attr = _ns, "query"_attr = redact(_query), "fields"_attr = redact(_fields), "options"_attr = _options); @@ -1157,9 +1195,10 @@ void ParallelSortClusteredCursor::_oldInit(OperationContext* opCtx) { try { if (!_cursors[i].get()->initLazyFinish(retry)) { LOGV2_WARNING(22712, - "invalid result from {conns_i_getHost}{retry_retrying}", - "conns_i_getHost"_attr = conns[i]->getHost(), - "retry_retrying"_attr = (retry ? ", retrying" : "")); + "Got invalid result from {host}", + "Got invalid result", + "host"_attr = conns[i]->getHost(), + "retrying"_attr = retry); _cursors[i].reset(nullptr, nullptr); if (!retry) { @@ -1263,13 +1302,17 @@ void ParallelSortClusteredCursor::_oldInit(OperationContext* opCtx) { } else if (throwException) { uasserted(14827, errMsg.str()); } else { - LOGV2_WARNING(22713, "{errMsg_str}", "errMsg_str"_attr = redact(errMsg.str())); + LOGV2_WARNING(22713, + "Error while initializing cursor across all shards caused by {error}", + "Error while initializing cursor across all shards", + "error"_attr = redact(errMsg.str())); } } if (retries > 0) LOGV2(22694, - "successfully finished parallel query after {retries} retries", + "Successfully finished parallel query after {retries} retries", + "Successfully finished parallel query", "retries"_attr = retries); } @@ -1363,9 +1406,9 @@ void ParallelConnectionMetadata::cleanup(bool full) { bool retry = false; pcState->cursor->initLazyFinish(retry); } catch (std::exception&) { - LOGV2_WARNING(22714, "exception closing cursor"); + LOGV2_WARNING(22714, "Exception closing cursor"); } catch (...) { - LOGV2_WARNING(22715, "unknown exception closing cursor"); + LOGV2_WARNING(22715, "Unknown exception closing cursor"); } } } diff --git a/src/mongo/s/client/shard.cpp b/src/mongo/s/client/shard.cpp index 5aa8d72671d..a9a0c51b67a 100644 --- a/src/mongo/s/client/shard.cpp +++ b/src/mongo/s/client/shard.cpp @@ -125,12 +125,13 @@ StatusWith<Shard::CommandResponse> Shard::runCommand(OperationContext* opCtx, auto swResponse = _runCommand(opCtx, readPref, dbName, maxTimeMSOverride, cmdObj); auto status = CommandResponse::getEffectiveStatus(swResponse); if (isRetriableError(status.code(), retryPolicy)) { - LOGV2_DEBUG( - 22719, - 2, - "Command {cmdObj} failed with retriable error and will be retried{causedBy_status}", - "cmdObj"_attr = redact(cmdObj), - "causedBy_status"_attr = causedBy(redact(status))); + LOGV2_DEBUG(22719, + 2, + "Command {command} failed with retryable error and will be retried. Caused " + "by {error}", + "Command failed with retryable error and will be retried", + "command"_attr = redact(cmdObj), + "error"_attr = redact(status)); continue; } @@ -165,12 +166,13 @@ StatusWith<Shard::CommandResponse> Shard::runCommandWithFixedRetryAttempts( auto swResponse = _runCommand(opCtx, readPref, dbName, maxTimeMSOverride, cmdObj); auto status = CommandResponse::getEffectiveStatus(swResponse); if (retry < kOnErrorNumRetries && isRetriableError(status.code(), retryPolicy)) { - LOGV2_DEBUG( - 22720, - 2, - "Command {cmdObj} failed with retriable error and will be retried{causedBy_status}", - "cmdObj"_attr = redact(cmdObj), - "causedBy_status"_attr = causedBy(redact(status))); + LOGV2_DEBUG(22720, + 2, + "Command {command} failed with retryable error and will be retried. Caused " + "by {error}", + "Command failed with retryable error and will be retried", + "command"_attr = redact(cmdObj), + "error"_attr = redact(status)); continue; } @@ -215,10 +217,11 @@ BatchedCommandResponse Shard::runBatchWriteCommand(OperationContext* opCtx, if (retry < kOnErrorNumRetries && isRetriableError(writeStatus.code(), retryPolicy)) { LOGV2_DEBUG(22721, 2, - "Batch write command to {getId} failed with retriable error and will be " - "retried{causedBy_writeStatus}", - "getId"_attr = getId(), - "causedBy_writeStatus"_attr = causedBy(redact(writeStatus))); + "Batch write command to shard {shardId} failed with retryable error " + "and will be retried. Caused by {error}", + "Batch write command failed with retryable error and will be retried", + "shardId"_attr = getId(), + "error"_attr = redact(writeStatus)); continue; } diff --git a/src/mongo/s/client/shard_connection.cpp b/src/mongo/s/client/shard_connection.cpp index 2a4b4e020a9..10584f04e57 100644 --- a/src/mongo/s/client/shard_connection.cpp +++ b/src/mongo/s/client/shard_connection.cpp @@ -186,8 +186,9 @@ public: if (s->avail != nullptr) { LOGV2_WARNING( 22717, - "Detected additional sharded connection in the thread local pool for {addr}", - "addr"_attr = addr); + "Detected additional sharded connection in the thread local pool for {address}", + "Detected additional sharded connection in the thread local pool", + "address"_attr = addr); if (DBException::traceExceptions.load()) { // There shouldn't be more than one connection checked out to the same @@ -249,11 +250,11 @@ public: versionManager.checkShardVersionCB(opCtx, s->avail, ns, false, 1); } catch (const DBException& ex) { - LOGV2_WARNING( - 22718, - "Problem while initially checking shard versions on {shardId}{causedBy_ex}", - "shardId"_attr = shardId, - "causedBy_ex"_attr = causedBy(redact(ex))); + LOGV2_WARNING(22718, + "Error checking shard version on {shardId} caused by {error}", + "Error checking shard version", + "shardId"_attr = shardId, + "error"_attr = redact(ex)); // NOTE: This is only a heuristic, to avoid multiple stale version retries across // multiple shards, and does not affect correctness. @@ -405,8 +406,9 @@ ShardConnection::~ShardConnection() { } else { // see done() comments above for why we log this line LOGV2(22716, - "sharded connection to {conn_getServerAddress} not being returned to the pool", - "conn_getServerAddress"_attr = _conn->getServerAddress()); + "Sharded connection to {address} not being returned to the pool", + "Sharded connection not being returned to the pool", + "address"_attr = _conn->getServerAddress()); kill(); } diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp index 23ecde36a8c..9bfa02546e9 100644 --- a/src/mongo/s/client/shard_registry.cpp +++ b/src/mongo/s/client/shard_registry.cpp @@ -241,8 +241,9 @@ void ShardRegistry::startup(OperationContext* opCtx) { if (!status.isOK()) { LOGV2_FATAL(22737, - "Can't schedule ShardRegistry reload due to {causedBy_status_getStatus}", - "causedBy_status_getStatus"_attr = causedBy(status.getStatus())); + "Error scheduling shard registry reload caused by {error}", + "Error scheduling shard registry reload", + "error"_attr = redact(status.getStatus())); fassertFailed(40252); } } @@ -251,8 +252,9 @@ void ShardRegistry::_internalReload(const CallbackArgs& cbArgs) { LOGV2_DEBUG(22726, 1, "Reloading shardRegistry"); if (!cbArgs.status.isOK()) { LOGV2_WARNING(22734, - "cant reload ShardRegistry {causedBy_cbArgs_status}", - "causedBy_cbArgs_status"_attr = causedBy(cbArgs.status)); + "Error reloading shard registry caused by {error}", + "Error reloading shard registry", + "error"_attr = redact(cbArgs.status)); return; } @@ -264,10 +266,11 @@ void ShardRegistry::_internalReload(const CallbackArgs& cbArgs) { reload(opCtx.get()); } catch (const DBException& e) { LOGV2(22727, - "Periodic reload of shard registry failed {causedBy_e}; will retry after " - "{kRefreshPeriod}", - "causedBy_e"_attr = causedBy(e), - "kRefreshPeriod"_attr = kRefreshPeriod); + "Error running periodic reload of shard registry caused by {error}; will retry after " + "{shardRegistryReloadInterval}", + "Error running periodic reload of shard registry", + "error"_attr = redact(e), + "shardRegistryReloadInterval"_attr = kRefreshPeriod); } // reschedule itself @@ -276,14 +279,16 @@ void ShardRegistry::_internalReload(const CallbackArgs& cbArgs) { [this](const CallbackArgs& cbArgs) { _internalReload(cbArgs); }); if (status.getStatus() == ErrorCodes::ShutdownInProgress) { - LOGV2_DEBUG(22728, 1, "Cant schedule ShardRegistry reload. Executor shutdown in progress"); + LOGV2_DEBUG( + 22728, 1, "Error scheduling shard registry reload. Executor shutdown in progress"); return; } if (!status.isOK()) { LOGV2_FATAL(22738, - "Can't schedule ShardRegistry reload due to {causedBy_status_getStatus}", - "causedBy_status_getStatus"_attr = causedBy(status.getStatus())); + "Error scheduling shard registry reload caused by {error}", + "Error scheduling shard registry reload", + "error"_attr = redact(status.getStatus())); fassertFailed(40253); } } @@ -307,8 +312,9 @@ bool ShardRegistry::reload(OperationContext* opCtx) { } catch (const DBException& e) { LOGV2_DEBUG(22729, 1, - "ShardRegistry reload is interrupted due to: {e_toStatus}", - "e_toStatus"_attr = redact(e.toStatus())); + "Error reloading shard registry caused by {error}", + "Error reloading shard registry", + "error"_attr = redact(e)); return false; } @@ -377,9 +383,10 @@ void ShardRegistry::updateReplicaSetOnConfigServer(ServiceContext* serviceContex if (!s) { LOGV2_DEBUG(22730, 1, - "shard not found for set: {connStr} when attempting to inform config servers " - "of updated set membership", - "connStr"_attr = connStr); + "Error updating replica set on config server. Couldn't find shard for " + "replica set {replicaSetConnectionStr}", + "Error updating replica set on config servers. Couldn't find shard", + "replicaSetConnectionStr"_attr = connStr); return; } @@ -398,10 +405,11 @@ void ShardRegistry::updateReplicaSetOnConfigServer(ServiceContext* serviceContex auto status = swWasUpdated.getStatus(); if (!status.isOK()) { LOGV2_ERROR(22736, - "RSChangeWatcher: could not update config db with connection string " - "{connStr}{causedBy_status}", - "connStr"_attr = connStr, - "causedBy_status"_attr = causedBy(redact(status))); + "Error updating replica set {replicaSetConnectionStr} on config server caused " + "by {error}", + "Error updating replica set on config server", + "replicaSetConnectionStr"_attr = connStr, + "error"_attr = redact(status)); } } @@ -428,10 +436,11 @@ ShardRegistryData::ShardRegistryData(OperationContext* opCtx, ShardFactory* shar LOGV2_DEBUG(22731, 1, - "found {shards_size} shards listed on config server(s) with lastVisibleOpTime: " - "{reloadOpTime}", - "shards_size"_attr = shards.size(), - "reloadOpTime"_attr = reloadOpTime.toBSON()); + "Found {shardsNumber} shards listed on config server(s) with lastVisibleOpTime: " + "{lastVisibleOpTime}", + "Succesfully retrieved updated shard list from config server", + "shardsNumber"_attr = shards.size(), + "lastVisibleOpTime"_attr = reloadOpTime); // Ensure targeter exists for all shards and take shard connection string from the targeter. // Do this before re-taking the mutex to avoid deadlock with the ReplicaSetMonitor updating @@ -444,9 +453,9 @@ ShardRegistryData::ShardRegistryData(OperationContext* opCtx, ShardFactory* shar auto shardHostStatus = ConnectionString::parse(shardType.getHost()); if (!shardHostStatus.isOK()) { LOGV2_WARNING(22735, - "Unable to parse shard host {shardHostStatus_getStatus}", - "shardHostStatus_getStatus"_attr = - shardHostStatus.getStatus().toString()); + "Error parsing shard host caused by {error}", + "Error parsing shard host", + "error"_attr = redact(shardHostStatus.getStatus())); continue; } @@ -589,11 +598,12 @@ void ShardRegistryData::_addShard(WithLock lk, if (oldConnString.toString() != connString.toString()) { LOGV2(22732, - "Updating ShardRegistry connection string for shard {currentShard_getId} from: " - "{oldConnString} to: {connString}", - "currentShard_getId"_attr = currentShard->getId(), - "oldConnString"_attr = oldConnString.toString(), - "connString"_attr = connString.toString()); + "Updating shard registry connection string for shard {shardId} to " + "{newShardConnectionString} from {oldShardConnectionString}", + "Updating shard connection string on shard registry", + "shardId"_attr = currentShard->getId(), + "newShardConnectionString"_attr = connString, + "oldShardConnectionString"_attr = oldConnString); } for (const auto& host : oldConnString.getServers()) { @@ -607,9 +617,11 @@ void ShardRegistryData::_addShard(WithLock lk, LOGV2_DEBUG(22733, 3, - "Adding shard {shard_getId}, with CS {connString}", - "shard_getId"_attr = shard->getId(), - "connString"_attr = connString.toString()); + "Adding new shard {shardId} with connection string {shardConnectionString} to " + "shard registry", + "Adding new shard to shard registry", + "shardId"_attr = shard->getId(), + "shardConnectionString"_attr = connString); if (connString.type() == ConnectionString::SET) { _rsLookup[connString.getSetName()] = shard; } else if (connString.type() == ConnectionString::CUSTOM) { |