summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mongo/s/balancer_configuration.cpp25
-rw-r--r--src/mongo/s/catalog/replset_dist_lock_manager.cpp236
-rw-r--r--src/mongo/s/catalog/sharding_catalog_client_impl.cpp5
-rw-r--r--src/mongo/s/catalog_cache.cpp129
-rw-r--r--src/mongo/s/chunk_manager_index_bounds_test.cpp13
-rw-r--r--src/mongo/s/client/parallel.cpp277
-rw-r--r--src/mongo/s/client/shard.cpp35
-rw-r--r--src/mongo/s/client/shard_connection.cpp20
-rw-r--r--src/mongo/s/client/shard_registry.cpp82
9 files changed, 456 insertions, 366 deletions
diff --git a/src/mongo/s/balancer_configuration.cpp b/src/mongo/s/balancer_configuration.cpp
index be3887de2ed..f52a9c5a4bb 100644
--- a/src/mongo/s/balancer_configuration.cpp
+++ b/src/mongo/s/balancer_configuration.cpp
@@ -243,11 +243,10 @@ Status BalancerConfiguration::_refreshChunkSizeSettings(OperationContext* opCtx)
if (settings.getMaxChunkSizeBytes() != getMaxChunkSizeBytes()) {
LOGV2(22640,
- "MaxChunkSize changing from {getMaxChunkSizeBytes_1024_1024}MB to "
- "{settings_getMaxChunkSizeBytes_1024_1024}MB",
- "getMaxChunkSizeBytes_1024_1024"_attr = getMaxChunkSizeBytes() / (1024 * 1024),
- "settings_getMaxChunkSizeBytes_1024_1024"_attr =
- settings.getMaxChunkSizeBytes() / (1024 * 1024));
+ "Changing MaxChunkSize setting to {newMaxChunkSizeMB}MB from {oldMaxChunkSizeMB}MB",
+ "Changing MaxChunkSize setting",
+ "newMaxChunkSizeMB"_attr = settings.getMaxChunkSizeBytes() / (1024 * 1024),
+ "oldMaxChunkSizeMB"_attr = getMaxChunkSizeBytes() / (1024 * 1024));
_maxChunkSizeBytes.store(settings.getMaxChunkSizeBytes());
}
@@ -273,9 +272,10 @@ Status BalancerConfiguration::_refreshAutoSplitSettings(OperationContext* opCtx)
if (settings.getShouldAutoSplit() != getShouldAutoSplit()) {
LOGV2(22641,
- "ShouldAutoSplit changing from {getShouldAutoSplit} to {settings_getShouldAutoSplit}",
- "getShouldAutoSplit"_attr = getShouldAutoSplit(),
- "settings_getShouldAutoSplit"_attr = settings.getShouldAutoSplit());
+ "Changing ShouldAutoSplit setting to {newShouldAutoSplit} from {oldShouldAutoSplit}",
+ "Changing ShouldAutoSplit setting",
+ "newShouldAutoSplit"_attr = settings.getShouldAutoSplit(),
+ "oldShouldAutoSplit"_attr = getShouldAutoSplit());
_shouldAutoSplit.store(settings.getShouldAutoSplit());
}
@@ -404,11 +404,10 @@ bool BalancerSettingsType::isTimeInBalancingWindow(const boost::posix_time::ptim
};
LOGV2_DEBUG(24094,
1,
- "inBalancingWindow: now: {timeToString_now} startTime: "
- "{timeToString_activeWindowStart} stopTime: {timeToString_activeWindowStop}",
- "timeToString_now"_attr = timeToString(now),
- "timeToString_activeWindowStart"_attr = timeToString(*_activeWindowStart),
- "timeToString_activeWindowStop"_attr = timeToString(*_activeWindowStop));
+ "inBalancingWindow",
+ "now"_attr = timeToString(now),
+ "activeWindowStart"_attr = timeToString(*_activeWindowStart),
+ "activeWindowStop"_attr = timeToString(*_activeWindowStop));
if (*_activeWindowStop > *_activeWindowStart) {
if ((now >= *_activeWindowStart) && (now <= *_activeWindowStop)) {
diff --git a/src/mongo/s/catalog/replset_dist_lock_manager.cpp b/src/mongo/s/catalog/replset_dist_lock_manager.cpp
index 3368f305815..09745931f44 100644
--- a/src/mongo/s/catalog/replset_dist_lock_manager.cpp
+++ b/src/mongo/s/catalog/replset_dist_lock_manager.cpp
@@ -109,10 +109,10 @@ void ReplSetDistLockManager::shutDown(OperationContext* opCtx) {
auto status = _catalog->stopPing(opCtx, _processID);
if (!status.isOK()) {
LOGV2_WARNING(22667,
- "error encountered while cleaning up distributed ping entry for "
- "{processID}{causedBy_status}",
- "processID"_attr = _processID,
- "causedBy_status"_attr = causedBy(redact(status)));
+ "Error cleaning up distributed ping entry for {processId} caused by {error}",
+ "Error cleaning up distributed ping entry",
+ "processId"_attr = _processID,
+ "error"_attr = redact(status));
}
}
@@ -127,9 +127,10 @@ bool ReplSetDistLockManager::isShutDown() {
void ReplSetDistLockManager::doTask() {
LOGV2(22649,
- "creating distributed lock ping thread for process {processID} (sleeping for "
- "{pingInterval})",
- "processID"_attr = _processID,
+ "Creating distributed lock ping thread for process {processId} with ping interval "
+ "{pingInterval}",
+ "Creating distributed lock ping thread",
+ "processId"_attr = _processID,
"pingInterval"_attr = _pingInterval);
Timer elapsedSincelastPing(_serviceContext->getTickSource());
@@ -142,16 +143,18 @@ void ReplSetDistLockManager::doTask() {
if (!pingStatus.isOK() && pingStatus != ErrorCodes::NotMaster) {
LOGV2_WARNING(22668,
- "pinging failed for distributed lock pinger{causedBy_pingStatus}",
- "causedBy_pingStatus"_attr = causedBy(pingStatus));
+ "Pinging failed for distributed lock pinger caused by {error}",
+ "Pinging failed for distributed lock pinger",
+ "error"_attr = pingStatus);
}
const Milliseconds elapsed(elapsedSincelastPing.millis());
if (elapsed > 10 * _pingInterval) {
LOGV2_WARNING(22669,
- "Lock pinger for proc: {processID} was inactive for {elapsed} ms",
- "processID"_attr = _processID,
- "elapsed"_attr = elapsed);
+ "Lock pinger for process {processId} was inactive for {duration}",
+ "Lock pinger was inactive for multiple intervals",
+ "processId"_attr = _processID,
+ "duration"_attr = elapsed);
}
elapsedSincelastPing.reset();
@@ -162,36 +165,33 @@ void ReplSetDistLockManager::doTask() {
}
for (const auto& toUnlock : toUnlockBatch) {
- std::string nameMessage = "";
Status unlockStatus(ErrorCodes::NotYetInitialized,
"status unlock not initialized!");
if (toUnlock.second) {
// A non-empty _id (name) field was provided, unlock by ts (sessionId) and _id.
unlockStatus = _catalog->unlock(opCtx.get(), toUnlock.first, *toUnlock.second);
- nameMessage = " and " + LocksType::name() + ": " + *toUnlock.second;
} else {
unlockStatus = _catalog->unlock(opCtx.get(), toUnlock.first);
}
if (!unlockStatus.isOK()) {
LOGV2_WARNING(22670,
- "Failed to unlock lock with {LocksType_lockID}: "
- "{toUnlock_first}{nameMessage}{causedBy_unlockStatus}",
- "LocksType_lockID"_attr = LocksType::lockID(),
- "toUnlock_first"_attr = toUnlock.first,
- "nameMessage"_attr = nameMessage,
- "causedBy_unlockStatus"_attr = causedBy(unlockStatus));
+ "Error unlocking distributed lock {lockName} with sessionID "
+ "{lockSessionId} caused by {error}",
+ "Error unlocking distributed lock",
+ "lockName"_attr = toUnlock.second,
+ "lockSessionId"_attr = toUnlock.first,
+ "error"_attr = unlockStatus);
// Queue another attempt, unless the problem was no longer being primary.
if (unlockStatus != ErrorCodes::NotMaster) {
queueUnlock(toUnlock.first, toUnlock.second);
}
} else {
LOGV2(22650,
- "distributed lock with {LocksType_lockID}: {toUnlock_first}{nameMessage} "
- "unlocked.",
- "LocksType_lockID"_attr = LocksType::lockID(),
- "toUnlock_first"_attr = toUnlock.first,
- "nameMessage"_attr = nameMessage);
+ "Unlocked distributed lock {lockName} with sessionID {lockSessionId}",
+ "Unlocked distributed lock",
+ "lockName"_attr = toUnlock.second,
+ "lockSessionId"_attr = toUnlock.first);
}
if (isShutDown()) {
@@ -267,11 +267,12 @@ StatusWith<bool> ReplSetDistLockManager::isLockExpired(OperationContext* opCtx,
LOGV2_DEBUG(22651,
1,
- "checking last ping for lock '{lockDoc_getName}' against last seen process "
- "{pingInfo_processId} and ping {pingInfo_lastPing}",
- "lockDoc_getName"_attr = lockDoc.getName(),
- "pingInfo_processId"_attr = pingInfo->processId,
- "pingInfo_lastPing"_attr = pingInfo->lastPing);
+ "Checking last ping for lock {lockName} against last seen process {processId} and "
+ "ping {lastPing}",
+ "Checking last ping for lock",
+ "lockName"_attr = lockDoc.getName(),
+ "processId"_attr = pingInfo->processId,
+ "lastPing"_attr = pingInfo->lastPing);
if (pingInfo->lastPing != pingValue || // ping is active
@@ -291,34 +292,34 @@ StatusWith<bool> ReplSetDistLockManager::isLockExpired(OperationContext* opCtx,
if (configServerLocalTime < pingInfo->configLocalTime) {
LOGV2_WARNING(22671,
- "config server local time went backwards, from last seen: "
- "{pingInfo_configLocalTime} to {configServerLocalTime}",
- "pingInfo_configLocalTime"_attr = pingInfo->configLocalTime,
- "configServerLocalTime"_attr = configServerLocalTime);
+ "Config server local time went backwards, new value "
+ "{newConfigServerLocalTime}, old value {oldConfigServerLocalTime}",
+ "Config server local time went backwards",
+ "newConfigServerLocalTime"_attr = configServerLocalTime,
+ "oldConfigServerLocalTime"_attr = pingInfo->configLocalTime);
return false;
}
Milliseconds elapsedSinceLastPing(configServerLocalTime - pingInfo->configLocalTime);
if (elapsedSinceLastPing >= lockExpiration) {
LOGV2(22652,
- "forcing lock '{lockDoc_getName}' because elapsed time {elapsedSinceLastPing} >= "
- "takeover time {lockExpiration}",
- "lockDoc_getName"_attr = lockDoc.getName(),
+ "Forcing lock {lockName} because elapsed time {elapsedSinceLastPing} >= "
+ "takeover time {lockExpirationTimeout}",
+ "Forcing lock because too much time has passed from last ping",
+ "lockName"_attr = lockDoc.getName(),
"elapsedSinceLastPing"_attr = elapsedSinceLastPing,
- "lockExpiration"_attr = lockExpiration);
+ "lockExpirationTimeout"_attr = lockExpiration);
return true;
}
LOGV2_DEBUG(22653,
1,
- "could not force lock '{lockDoc_getName}' because elapsed time "
- "{durationCount_Milliseconds_elapsedSinceLastPing} < takeover time "
- "{durationCount_Milliseconds_lockExpiration} ms",
- "lockDoc_getName"_attr = lockDoc.getName(),
- "durationCount_Milliseconds_elapsedSinceLastPing"_attr =
- durationCount<Milliseconds>(elapsedSinceLastPing),
- "durationCount_Milliseconds_lockExpiration"_attr =
- durationCount<Milliseconds>(lockExpiration));
+ "Could not force lock of {lockName} because elapsed time {elapsedSinceLastPing} < "
+ "takeover time {lockExpirationTimeout}",
+ "Could not force lock because too little time has passed from last ping",
+ "lockName"_attr = lockDoc.getName(),
+ "elapsedSinceLastPing"_attr = elapsedSinceLastPing,
+ "lockExpirationTimeout"_attr = lockExpiration);
return false;
}
@@ -351,18 +352,19 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo
LOGV2_DEBUG(22654,
1,
- "trying to acquire new distributed lock for {name} ( lock timeout : "
- "{durationCount_Milliseconds_lockExpiration} ms, ping interval : "
- "{durationCount_Milliseconds_pingInterval} ms, process : {processID} ) with "
- "lockSessionID: {lockSessionID}, why: {whyMessage}",
- "name"_attr = name,
- "durationCount_Milliseconds_lockExpiration"_attr =
- durationCount<Milliseconds>(lockExpiration),
- "durationCount_Milliseconds_pingInterval"_attr =
- durationCount<Milliseconds>(_pingInterval),
- "processID"_attr = _processID,
- "lockSessionID"_attr = lockSessionID,
- "whyMessage"_attr = whyMessage.toString());
+ "Trying to acquire new distributed lock for {lockName} ( "
+ "lockSessionID: {lockSessionId}, "
+ "process : {processId}, "
+ "lock timeout : {lockExpirationTimeout}, "
+ "ping interval : {pingInterval}, "
+ "reason: {reason} )",
+ "Trying to acquire new distributed lock",
+ "lockName"_attr = name,
+ "lockSessionId"_attr = lockSessionID,
+ "processId"_attr = _processID,
+ "lockExpirationTimeout"_attr = lockExpiration,
+ "pingInterval"_attr = _pingInterval,
+ "reason"_attr = whyMessage);
auto lockResult = _catalog->grabLock(
opCtx, name, lockSessionID, who, _processID, Date_t::now(), whyMessage.toString());
@@ -373,10 +375,12 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo
// Lock is acquired since findAndModify was able to successfully modify
// the lock document.
LOGV2(22655,
- "distributed lock '{name}' acquired for '{whyMessage}', ts : {lockSessionID}",
- "name"_attr = name,
- "whyMessage"_attr = whyMessage.toString(),
- "lockSessionID"_attr = lockSessionID);
+ "Acquired distributed lock {lockName} with session ID {lockSessionId} for "
+ "{reason}",
+ "Acquired distributed lock",
+ "lockName"_attr = name,
+ "lockSessionId"_attr = lockSessionID,
+ "reason"_attr = whyMessage);
return lockSessionID;
}
@@ -385,10 +389,13 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo
networkErrorRetries < kMaxNumLockAcquireRetries) {
LOGV2_DEBUG(22656,
1,
- "Failed to acquire distributed lock because of retriable error. Retrying "
- "acquisition by first unlocking the stale entry, which possibly exists "
- "now{causedBy_status}",
- "causedBy_status"_attr = causedBy(redact(status)));
+ "Error acquiring distributed lock because of retryable error. "
+ "Retrying acquisition by first unlocking the stale entry, which possibly "
+ "exists now. Caused by {error}",
+ "Error acquiring distributed lock because of retryable error. "
+ "Retrying acquisition by first unlocking the stale entry, which possibly "
+ "exists now",
+ "error"_attr = redact(status));
networkErrorRetries++;
@@ -403,9 +410,9 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo
LOGV2_DEBUG(22657,
1,
- "Failed to retry acquisition of distributed lock. No more attempts will be "
- "made{causedBy_status}",
- "causedBy_status"_attr = causedBy(redact(status)));
+ "Last attempt to acquire distributed lock failed with {error}",
+ "Last attempt to acquire distributed lock failed",
+ "error"_attr = redact(status));
}
if (status != ErrorCodes::LockStateChangeFailed) {
@@ -449,11 +456,11 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo
// Lock is acquired since findAndModify was able to successfully modify
// the lock document.
- LOGV2(22658, "lock '{name}' successfully forced", "name"_attr = name);
- LOGV2(22659,
- "distributed lock '{name}' acquired, ts : {lockSessionID}",
- "name"_attr = name,
- "lockSessionID"_attr = lockSessionID);
+ LOGV2(22658,
+ "Acquired distributed lock {lockName} with sessionId {lockSessionId}",
+ "Acquired distributed lock",
+ "lockName"_attr = name,
+ "lockSessionId"_attr = lockSessionID);
return lockSessionID;
}
@@ -466,7 +473,11 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo
}
}
- LOGV2_DEBUG(22660, 1, "distributed lock '{name}' was not acquired.", "name"_attr = name);
+ LOGV2_DEBUG(22660,
+ 1,
+ "Distributed lock {lockName} was not acquired",
+ "Distributed lock was not acquired",
+ "lockName"_attr = name);
if (waitFor == Milliseconds::zero()) {
break;
@@ -475,10 +486,11 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::lockWithSessionID(OperationCo
// Periodically message for debugging reasons
if (msgTimer.seconds() > 10) {
LOGV2(22661,
- "waited {timer_seconds}s for distributed lock {name} for {whyMessage}",
- "timer_seconds"_attr = timer.seconds(),
- "name"_attr = name,
- "whyMessage"_attr = whyMessage.toString());
+ "Waited {elapsed} for distributed lock {lockName} for {reason}",
+ "Waiting for distributed lock",
+ "lockName"_attr = name,
+ "elapsed"_attr = Seconds(timer.seconds()),
+ "reason"_attr = whyMessage);
msgTimer.reset();
}
@@ -501,18 +513,19 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::tryLockWithLocalWriteConcern(
LOGV2_DEBUG(22662,
1,
- "trying to acquire new distributed lock for {name} ( lock timeout : "
- "{durationCount_Milliseconds_lockExpiration} ms, ping interval : "
- "{durationCount_Milliseconds_pingInterval} ms, process : {processID} ) with "
- "lockSessionID: {lockSessionID}, why: {whyMessage}",
- "name"_attr = name,
- "durationCount_Milliseconds_lockExpiration"_attr =
- durationCount<Milliseconds>(_lockExpiration),
- "durationCount_Milliseconds_pingInterval"_attr =
- durationCount<Milliseconds>(_pingInterval),
- "processID"_attr = _processID,
- "lockSessionID"_attr = lockSessionID,
- "whyMessage"_attr = whyMessage.toString());
+ "Trying to acquire new distributed lock for {lockName} ( "
+ "process : {processId}, "
+ "lockSessionID: {lockSessionId}, "
+ "lock timeout : {lockExpirationTimeout}, "
+ "ping interval : {pingInterval}, "
+ "reason: {reason} )",
+ "Trying to acquire new distributed lock",
+ "lockName"_attr = name,
+ "lockSessionId"_attr = lockSessionID,
+ "processId"_attr = _processID,
+ "lockExpirationTimeout"_attr = _lockExpiration,
+ "pingInterval"_attr = _pingInterval,
+ "reason"_attr = whyMessage);
auto lockStatus = _catalog->grabLock(opCtx,
name,
@@ -525,14 +538,20 @@ StatusWith<DistLockHandle> ReplSetDistLockManager::tryLockWithLocalWriteConcern(
if (lockStatus.isOK()) {
LOGV2(22663,
- "distributed lock '{name}' acquired for '{whyMessage}', ts : {lockSessionID}",
- "name"_attr = name,
- "whyMessage"_attr = whyMessage.toString(),
- "lockSessionID"_attr = lockSessionID);
+ "Acquired distributed lock {lockName} with session ID {lockSessionId} for "
+ "{reason}",
+ "Acquired distributed lock",
+ "lockName"_attr = name,
+ "lockSessionId"_attr = lockSessionID,
+ "reason"_attr = whyMessage);
return lockSessionID;
}
- LOGV2_DEBUG(22664, 1, "distributed lock '{name}' was not acquired.", "name"_attr = name);
+ LOGV2_DEBUG(22664,
+ 1,
+ "Distributed lock {lockName} was not acquired",
+ "Distributed lock was not acquired",
+ "lockName"_attr = name);
if (lockStatus == ErrorCodes::LockStateChangeFailed) {
return {ErrorCodes::LockBusy, str::stream() << "Unable to acquire " << name};
@@ -548,9 +567,9 @@ void ReplSetDistLockManager::unlock(OperationContext* opCtx, const DistLockHandl
queueUnlock(lockSessionID, boost::none);
} else {
LOGV2(22665,
- "distributed lock with {LocksType_lockID}: {lockSessionID}' unlocked.",
- "LocksType_lockID"_attr = LocksType::lockID(),
- "lockSessionID"_attr = lockSessionID);
+ "Unlocked distributed lock with sessionID {lockSessionId}",
+ "Unlocked distributed lock",
+ "lockSessionId"_attr = lockSessionID);
}
}
@@ -563,21 +582,22 @@ void ReplSetDistLockManager::unlock(OperationContext* opCtx,
queueUnlock(lockSessionID, name.toString());
} else {
LOGV2(22666,
- "distributed lock with {LocksType_lockID}: '{lockSessionID}' and {LocksType_name}: "
- "'{name}' unlocked.",
- "LocksType_lockID"_attr = LocksType::lockID(),
- "lockSessionID"_attr = lockSessionID,
- "LocksType_name"_attr = LocksType::name(),
- "name"_attr = name.toString());
+ "Unlocked distributed lock {lockName} with sessionID {lockSessionId}",
+ "Unlocked distributed lock",
+ "lockName"_attr = name,
+ "lockSessionId"_attr = lockSessionID);
}
}
void ReplSetDistLockManager::unlockAll(OperationContext* opCtx, const std::string& processID) {
Status status = _catalog->unlockAll(opCtx, processID);
if (!status.isOK()) {
- LOGV2_WARNING(22672,
- "Error while trying to unlock existing distributed locks{causedBy_status}",
- "causedBy_status"_attr = causedBy(redact(status)));
+ LOGV2_WARNING(
+ 22672,
+ "Error unlocking all distributed locks for process {processId} caused by {error}",
+ "Error unlocking all existing distributed locks for a process",
+ "processId"_attr = processID,
+ "error"_attr = redact(status));
}
}
diff --git a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
index a1e3da49eb9..1bd8d8afa52 100644
--- a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
+++ b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
@@ -724,8 +724,9 @@ Status ShardingCatalogClientImpl::applyChunkOpsDeprecated(OperationContext* opCt
LOGV2_WARNING(
22675,
- "chunk operation commit failed and metadata will be revalidated{causedBy_status}",
- "causedBy_status"_attr = causedBy(redact(status)));
+ "Error committing chunk operation, metadata will be revalidated. Caused by {error}",
+ "Error committing chunk operation, metadata will be revalidated",
+ "error"_attr = redact(status));
// Look for the chunk in this shard whose version got bumped. We assume that if that
// mod made it to the config server, then transaction was successful.
diff --git a/src/mongo/s/catalog_cache.cpp b/src/mongo/s/catalog_cache.cpp
index 2a93e98518c..d80d5befe14 100644
--- a/src/mongo/s/catalog_cache.cpp
+++ b/src/mongo/s/catalog_cache.cpp
@@ -346,8 +346,10 @@ void CatalogCache::onStaleDatabaseVersion(const StringData dbName,
} else if (!itDbEntry->second->dbt ||
databaseVersion::equal(itDbEntry->second->dbt->getVersion(), databaseVersion)) {
// If the versions match, the cached database info is stale, so mark it as needs refresh.
- LOGV2(
- 22642, "Marking cached database entry for '{dbName}' as stale", "dbName"_attr = dbName);
+ LOGV2(22642,
+ "Marking cached database entry for {db} as stale",
+ "Marking cached database entry as stale",
+ "db"_attr = dbName);
itDbEntry->second->needsRefresh = true;
}
}
@@ -467,22 +469,26 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) {
stdx::lock_guard<Latch> lg(_mutex);
LOGV2(22643,
- "Starting to invalidate databases and collections with data on shard: {shardId}",
+ "Starting to invalidate databases and collections with data on shard {shardId}",
+ "Starting to invalidate databases and collections referencing a specific shard",
"shardId"_attr = shardId);
// Invalidate databases with this shard as their primary.
for (const auto& [dbNs, dbInfoEntry] : _databases) {
LOGV2_DEBUG(22644,
3,
- "Checking if database {dbNs}has primary shard: {shardId}",
- "dbNs"_attr = dbNs,
- "shardId"_attr = shardId);
+ "Checking if database {db} has primary shard: {primaryShardId}",
+ "Checking if database matches primary shard",
+ "db"_attr = dbNs,
+ "primaryShardId"_attr = shardId);
if (!dbInfoEntry->needsRefresh && dbInfoEntry->dbt->getPrimary() == shardId) {
LOGV2_DEBUG(22645,
3,
- "Database {dbNs}has primary shard {shardId}, invalidating cache entry",
- "dbNs"_attr = dbNs,
- "shardId"_attr = shardId);
+ "Invalidating cache entry for database {db} that has primary shard "
+ "{primaryShardId}",
+ "Invalidating database cache entry",
+ "db"_attr = dbNs,
+ "primaryShardId"_attr = shardId);
dbInfoEntry->needsRefresh = true;
}
}
@@ -493,8 +499,9 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) {
LOGV2_DEBUG(22646,
3,
- "Checking if {collNs}has data on shard: {shardId}",
- "collNs"_attr = collNs,
+ "Checking if collection {namespace} has data on shard {shardId}",
+ "Checking if collection has data on specific shard",
+ "namespace"_attr = collNs,
"shardId"_attr = shardId);
if (!collRoutingInfoEntry->needsRefresh) {
@@ -506,8 +513,10 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) {
shardsOwningDataForCollection.end()) {
LOGV2_DEBUG(22647,
3,
- "{collNs}has data on shard {shardId}, invalidating cache entry",
- "collNs"_attr = collNs,
+ "Invalidating cached collection {namespace} that has data "
+ "on shard {shardId}",
+ "Invalidating cached collection",
+ "namespace"_attr = collNs,
"shardId"_attr = shardId);
collRoutingInfoEntry->needsRefresh = true;
@@ -519,6 +528,7 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) {
LOGV2(22648,
"Finished invalidating databases and collections with data on shard: {shardId}",
+ "Finished invalidating databases and collections that reference specific shard",
"shardId"_attr = shardId);
}
@@ -573,11 +583,12 @@ void CatalogCache::_scheduleDatabaseRefresh(WithLock lk,
if (!swDbt.isOK()) {
LOGV2_OPTIONS(24100,
{logv2::LogComponent::kShardingCatalogRefresh},
- "Refresh for database {dbName} took {t_millis} ms and "
- "failed{causedBy_swDbt_getStatus}",
- "dbName"_attr = dbName,
- "t_millis"_attr = t.millis(),
- "causedBy_swDbt_getStatus"_attr = causedBy(redact(swDbt.getStatus())));
+ "Error refreshing cached database entry for {db}. Took {duration} and "
+ "failed due to {error}",
+ "Error refreshing cached database entry",
+ "db"_attr = dbName,
+ "duration"_attr = Milliseconds(t.millis()),
+ "error"_attr = redact(swDbt.getStatus()));
return;
}
@@ -591,14 +602,13 @@ void CatalogCache::_scheduleDatabaseRefresh(WithLock lk,
LOGV2_FOR_CATALOG_REFRESH(
24101,
logSeverityV1toV2(logLevel).toInt(),
- "Refresh for database {dbName} from version "
- "{dbEntry_dbt_dbEntry_dbt_getVersion_BSONObj} to version {dbVersionAfterRefresh} "
- "took {t_millis} ms",
- "dbName"_attr = dbName,
- "dbEntry_dbt_dbEntry_dbt_getVersion_BSONObj"_attr =
- (dbEntry->dbt ? dbEntry->dbt->getVersion().toBSON() : BSONObj()),
- "dbVersionAfterRefresh"_attr = dbVersionAfterRefresh.toBSON(),
- "t_millis"_attr = t.millis());
+ "Refreshed cached database entry for {db} to version {newDbVersion} from version "
+ "{oldDbVersion}. Took {duration}",
+ "Refreshed cached database entry",
+ "db"_attr = dbName,
+ "newDbVersion"_attr = dbVersionAfterRefresh.toBSON(),
+ "oldDbVersion"_attr = (dbEntry->dbt ? dbEntry->dbt->getVersion().toBSON() : BSONObj()),
+ "duration"_attr = Milliseconds(t.millis()));
};
// Invoked if getDatabase resulted in error or threw and exception
@@ -636,14 +646,14 @@ void CatalogCache::_scheduleDatabaseRefresh(WithLock lk,
dbEntry->dbt = std::move(swDbt.getValue());
};
- LOGV2_FOR_CATALOG_REFRESH(
- 24102,
- 1,
- "Refreshing cached database entry for {dbName}; current cached database info is "
- "{dbEntry_dbt_dbEntry_dbt_BSONObj}",
- "dbName"_attr = dbName,
- "dbEntry_dbt_dbEntry_dbt_BSONObj"_attr =
- (dbEntry->dbt ? dbEntry->dbt->toBSON() : BSONObj()));
+ LOGV2_FOR_CATALOG_REFRESH(24102,
+ 1,
+ "Refreshing cached database entry for {db}; current cached database "
+ "info is {currentDbInfo}",
+ "Refreshing cached database entry",
+ "db"_attr = dbName,
+ "currentDbInfo"_attr =
+ (dbEntry->dbt ? dbEntry->dbt->toBSON() : BSONObj()));
try {
_cacheLoader.getDatabase(dbName, refreshCallback);
@@ -685,13 +695,14 @@ void CatalogCache::_scheduleCollectionRefresh(WithLock lk,
if (!status.isOK()) {
_stats.countFailedRefreshes.addAndFetch(1);
- LOGV2_OPTIONS(
- 24103,
- {logv2::LogComponent::kShardingCatalogRefresh},
- "Refresh for collection {nss} took {t_millis} ms and failed{causedBy_status}",
- "nss"_attr = nss,
- "t_millis"_attr = t.millis(),
- "causedBy_status"_attr = causedBy(redact(status)));
+ LOGV2_OPTIONS(24103,
+ {logv2::LogComponent::kShardingCatalogRefresh},
+ "Error refreshing cached collection {namespace}; Took {duration} and "
+ "failed due to {error}",
+ "Error refreshing cached collection",
+ "namespace"_attr = nss,
+ "duration"_attr = Milliseconds(t.millis()),
+ "error"_attr = redact(status));
} else if (routingInfoAfterRefresh) {
const int logLevel =
(!existingRoutingInfo ||
@@ -702,25 +713,24 @@ void CatalogCache::_scheduleCollectionRefresh(WithLock lk,
LOGV2_FOR_CATALOG_REFRESH(
24104,
logSeverityV1toV2(logLevel).toInt(),
- "Refresh for collection "
- "{nss}{existingRoutingInfo_from_version_existingRoutingInfo_getVersion} to version "
- "{routingInfoAfterRefresh_getVersion} took {t_millis} ms",
- "nss"_attr = nss.toString(),
- "existingRoutingInfo_from_version_existingRoutingInfo_getVersion"_attr =
+ "Refreshed cached collection {namespace} to version {newVersion} from version "
+ "{oldVersion}. Took {duration}",
+ "Refreshed cached collection",
+ "namespace"_attr = nss,
+ "newVersion"_attr = routingInfoAfterRefresh->getVersion(),
+ "oldVersion"_attr =
(existingRoutingInfo
? (" from version " + existingRoutingInfo->getVersion().toString())
: ""),
- "routingInfoAfterRefresh_getVersion"_attr =
- routingInfoAfterRefresh->getVersion().toString(),
- "t_millis"_attr = t.millis());
+ "duration"_attr = Milliseconds(t.millis()));
} else {
- LOGV2_OPTIONS(
- 24105,
- {logv2::LogComponent::kShardingCatalogRefresh},
- "Refresh for collection {nss} took {t_millis} ms and found the collection is not "
- "sharded",
- "nss"_attr = nss,
- "t_millis"_attr = t.millis());
+ LOGV2_OPTIONS(24105,
+ {logv2::LogComponent::kShardingCatalogRefresh},
+ "Collection {namespace} was found to be unsharded after refresh that "
+ "took {duration}",
+ "Collection has found to be unsharded after refresh",
+ "namespace"_attr = nss,
+ "duration"_attr = Milliseconds(t.millis()));
}
};
@@ -786,10 +796,9 @@ void CatalogCache::_scheduleCollectionRefresh(WithLock lk,
LOGV2_FOR_CATALOG_REFRESH(
24106,
1,
- "Refreshing chunks for collection {nss}; current collection version is "
- "{startingCollectionVersion}",
- "nss"_attr = nss,
- "startingCollectionVersion"_attr = startingCollectionVersion);
+ "Refreshing cached collection {namespace} with version {currentCollectionVersion}",
+ "namespace"_attr = nss,
+ "currentCollectionVersion"_attr = startingCollectionVersion);
try {
_cacheLoader.getChunksSince(nss, startingCollectionVersion, refreshCallback);
diff --git a/src/mongo/s/chunk_manager_index_bounds_test.cpp b/src/mongo/s/chunk_manager_index_bounds_test.cpp
index fa30a2795d5..3ef9ca2c551 100644
--- a/src/mongo/s/chunk_manager_index_bounds_test.cpp
+++ b/src/mongo/s/chunk_manager_index_bounds_test.cpp
@@ -89,9 +89,9 @@ protected:
if (Interval::INTERVAL_EQUALS !=
oil.intervals[i].compare(expectedOil.intervals[i])) {
LOGV2(22676,
- "{oil_intervals_i} != {expectedOil_intervals_i}",
- "oil_intervals_i"_attr = oil.intervals[i],
- "expectedOil_intervals_i"_attr = expectedOil.intervals[i]);
+ "Found mismatching field interval",
+ "queryFieldInterval"_attr = oil.intervals[i],
+ "expectedFieldInterval"_attr = expectedOil.intervals[i]);
}
ASSERT_EQUALS(Interval::INTERVAL_EQUALS,
oil.intervals[i].compare(expectedOil.intervals[i]));
@@ -111,9 +111,10 @@ protected:
const OrderedIntervalList& oil = indexBounds.fields.front();
if (oil.intervals.size() != expectedOil.intervals.size()) {
- for (size_t i = 0; i < oil.intervals.size(); i++) {
- LOGV2(22677, "{oil_intervals_i}", "oil_intervals_i"_attr = oil.intervals[i]);
- }
+ LOGV2(22677,
+ "Found mismatching field intervals",
+ "queryFieldInterval"_attr = oil,
+ "expectedFieldInterval"_attr = expectedOil);
}
ASSERT_EQUALS(oil.intervals.size(), expectedOil.intervals.size());
diff --git a/src/mongo/s/client/parallel.cpp b/src/mongo/s/client/parallel.cpp
index cda8113ff33..5161808c140 100644
--- a/src/mongo/s/client/parallel.cpp
+++ b/src/mongo/s/client/parallel.cpp
@@ -378,9 +378,11 @@ void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk(
dassert(repl);
LOGV2_WARNING(
22695,
- "Primary for {repl_getServerAddress} was down before, bypassing "
+ "Primary for {replicaSetAddress} was down before, bypassing "
"setShardVersion. The local replica set view and targeting may be stale.",
- "repl_getServerAddress"_attr = repl->getServerAddress());
+ "Bypassing setShardVersion because replica set view and targeting may be stale "
+ "from primary having been down",
+ "replicaSetAddress"_attr = repl->getServerAddress());
}
return;
@@ -392,8 +394,9 @@ void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk(
LOGV2_DEBUG(22678,
2,
"pcursor: needed to set remote version on connection to value compatible "
- "with {vinfo}",
- "vinfo"_attr = vinfo);
+ "with {shardVersion}",
+ "pcursor: needed to set remote version on connection",
+ "shardVersion"_attr = vinfo);
}
} catch (const DBException& dbExcep) {
auto errCode = dbExcep.code();
@@ -408,10 +411,13 @@ void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk(
const DBClientReplicaSet* repl =
dynamic_cast<const DBClientReplicaSet*>(state->conn->getRawConn());
dassert(repl);
- LOGV2_WARNING(22696,
- "Cannot contact primary for {repl_getServerAddress} to check shard "
- "version. The local replica set view and targeting may be stale.",
- "repl_getServerAddress"_attr = repl->getServerAddress());
+ LOGV2_WARNING(
+ 22696,
+ "Cannot contact replica set {replicaSetAdress} to check shard version. "
+ "The local replica set view and targeting may be stale",
+ "Cannot contact replica set to check shard version. "
+ "The local replica set view and targeting may be stale",
+ "replicaSetAddress"_attr = repl->getServerAddress());
}
} else {
throw;
@@ -433,10 +439,11 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) {
}
LOGV2_DEBUG(22679,
2,
- "pcursor: {prefix} pcursor over {qSpec} and {cInfo}",
+ "pcursor: {prefix} pcursor over {query} and {command}",
+ "pcursor",
"prefix"_attr = prefix,
- "qSpec"_attr = _qSpec,
- "cInfo"_attr = _cInfo);
+ "query"_attr = _qSpec,
+ "command"_attr = _cInfo);
shared_ptr<ChunkManager> manager;
shared_ptr<Shard> primary;
@@ -484,9 +491,10 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) {
LOGV2_DEBUG(22680,
2,
"pcursor: closing cursor on shard {shardId} as the connection is no longer "
- "required by {vinfo}",
+ "required by {shardVersion}",
+ "pcursor: closing cursor as the connection is no longer required",
"shardId"_attr = shardId,
- "vinfo"_attr = vinfo);
+ "shardVersion"_attr = vinfo);
cmEntry.second.cleanup(true);
}
@@ -494,9 +502,10 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) {
LOGV2_DEBUG(22681,
2,
- "pcursor: initializing over {shardIds_size} shards required by {vinfo}",
- "shardIds_size"_attr = shardIds.size(),
- "vinfo"_attr = vinfo);
+ "pcursor: initializing over {shardsNumber} shards required by {shardVersion}",
+ "pcursor: initializing for all shards",
+ "shardsNumber"_attr = shardIds.size(),
+ "shardVersion"_attr = vinfo);
// Don't retry indefinitely for whatever reason
_totalTries++;
@@ -507,9 +516,11 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) {
LOGV2_DEBUG(22682,
2,
- "pcursor: initializing on shard {shardId}, current connection state is {mdata}",
+ "pcursor: initializing on shard {shardId}, current connection state is "
+ "{connectionState}",
+ "pcursor: initializing on shard",
"shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON());
+ "connectionState"_attr = mdata.toBSON());
// This may be the first time connecting to this shard, if so we can get an error here
try {
@@ -633,12 +644,13 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) {
LOGV2_DEBUG(22683,
2,
- "pcursor: initialized {isCommand_command_query}{lazyInit_lazily_full}on "
- "shard {shardId}, current connection state is {mdata}",
- "isCommand_command_query"_attr = (isCommand() ? "command " : "query "),
- "lazyInit_lazily_full"_attr = (lazyInit ? "(lazily) " : "(full) "),
+ "pcursor: initialized {commandType} ({lazyInit}) on shard {shardId}, "
+ "current connection state is {connectionState}",
+ "pcursor: initialized command",
+ "commandType"_attr = (isCommand() ? "command" : "query"),
+ "lazyInit"_attr = (lazyInit ? "lazily" : "full"),
"shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON());
+ "connectionState"_attr = mdata.toBSON());
} catch (StaleConfigException& e) {
// Our version isn't compatible with the current version anymore on at least one shard,
// need to retry immediately
@@ -650,20 +662,22 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) {
->invalidateShardOrEntireCollectionEntryForShardedCollection(
opCtx, nss, e->getVersionWanted(), e->getVersionReceived(), e->getShardId());
- LOGV2_DEBUG(
- 22684,
- 1,
- "stale config of ns {staleNS} during initialization, will retry{causedBy_e}",
- "staleNS"_attr = staleNS,
- "causedBy_e"_attr = causedBy(redact(e)));
+ LOGV2_DEBUG(22684,
+ 1,
+ "Error initializing pcursor, stale config for namespace {namespace}, "
+ "caused by {error}, will retry",
+ "Error initializing pcursor, will retry",
+ "namespace"_attr = staleNS,
+ "error"_attr = redact(e));
// This is somewhat strange
if (staleNS != nss) {
- LOGV2_WARNING(
- 22700,
- "versioned ns {nss_ns} doesn't match stale config namespace {staleNS}",
- "nss_ns"_attr = nss.ns(),
- "staleNS"_attr = staleNS);
+ LOGV2_WARNING(22700,
+ "Versioned namespace {namespace} doesn't match stale config "
+ "namespace {staleNamespace}",
+ "Versioned namespace doesn't match stale config namespace",
+ "namespace"_attr = nss.ns(),
+ "staleNamespace"_attr = staleNS);
}
// Restart with new chunk manager
@@ -671,11 +685,12 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) {
return;
} catch (NetworkException& e) {
LOGV2_WARNING(22701,
- "socket exception when initializing on {shardId}, current connection "
- "state is {mdata}{causedBy_e}",
+ "Error initializing pcursor on {shardId} with current connection state "
+ "{connectionState} caused by {error}",
+ "Error initializing pcursor",
"shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON(),
- "causedBy_e"_attr = causedBy(redact(e)));
+ "connectionState"_attr = mdata.toBSON(),
+ "error"_attr = redact(e));
mdata.errored = true;
if (returnPartial) {
mdata.cleanup(true);
@@ -684,11 +699,12 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) {
throw;
} catch (DBException& e) {
LOGV2_WARNING(22702,
- "db exception when initializing on {shardId}, current connection state "
- "is {mdata}{causedBy_e}",
+ "Error initializing pcursor on {shardId} with current connection state "
+ "{connectionState} caused by {error}",
+ "Error initializing pcursor",
"shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON(),
- "causedBy_e"_attr = causedBy(redact(e)));
+ "connectionState"_attr = mdata.toBSON(),
+ "error"_attr = redact(e));
mdata.errored = true;
if (returnPartial && e.code() == 15925 /* From above! */) {
mdata.cleanup(true);
@@ -697,19 +713,22 @@ void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) {
throw;
} catch (std::exception& e) {
LOGV2_WARNING(22703,
- "exception when initializing on {shardId}, current connection state is "
- "{mdata}{causedBy_e}",
+ "Error initializing pcursor on {shardId} with current connection state "
+ "{connectionState} caused by {error}",
+ "Error initializing pcursor",
"shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON(),
- "causedBy_e"_attr = causedBy(e));
+ "connectionState"_attr = mdata.toBSON(),
+ "error"_attr = redact(e.what()));
mdata.errored = true;
throw;
} catch (...) {
LOGV2_WARNING(22704,
- "unknown exception when initializing on {shardId}, current connection "
- "state is {mdata}",
+ "Error initializing pcursor on {shardId} with current connection state "
+ "{connectionState} caused by {error}",
+ "Error initializing pcursor",
"shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON());
+ "connectionState"_attr = mdata.toBSON(),
+ "error"_attr = "uknownError");
mdata.errored = true;
throw;
}
@@ -760,8 +779,9 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) {
LOGV2_DEBUG(22685,
2,
- "pcursor: finishing over {cursorMap_size} shards",
- "cursorMap_size"_attr = _cursorMap.size());
+ "pcursor: finishing over {shardsCount} shards",
+ "pcursor: finishing initialization",
+ "shardsCount"_attr = _cursorMap.size());
for (auto& cmEntry : _cursorMap) {
const auto& shardId = cmEntry.first;
@@ -769,9 +789,11 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) {
LOGV2_DEBUG(22686,
2,
- "pcursor: finishing on shard {shardId}, current connection state is {mdata}",
+ "pcursor: finishing initialization on shard {shardId}, current connection "
+ "state is {connectionState}",
+ "pcursor: finishing initialization for shard",
"shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON());
+ "connectionState"_attr = mdata.toBSON());
// Ignore empty conns for now
if (!mdata.pcState)
@@ -818,12 +840,13 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) {
// Finalize state
state->cursor->attach(state->conn.get()); // Closes connection for us
- LOGV2_DEBUG(
- 22687,
- 2,
- "pcursor: finished on shard {shardId}, current connection state is {mdata}",
- "shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON());
+ LOGV2_DEBUG(22687,
+ 2,
+ "pcursor: finished on shard {shardId}, current connection state is "
+ "{connectionState}",
+ "pcursor: finished initialization for shard",
+ "shardId"_attr = shardId,
+ "connectionState"_attr = mdata.toBSON());
}
} catch (StaleConfigException& e) {
retry = true;
@@ -838,11 +861,12 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) {
continue;
} catch (NetworkException& e) {
LOGV2_WARNING(22705,
- "socket exception when finishing on {shardId}, current connection state "
- "is {mdata}{causedBy_e}",
+ "Error finalizing pcursor initialization for shard {shardId} with "
+ "connection state {connectionState} caused by {error}",
+ "Error finalizing pcursor initialization for shard",
"shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON(),
- "causedBy_e"_attr = causedBy(redact(e)));
+ "connectionState"_attr = mdata.toBSON(),
+ "error"_attr = redact(e));
mdata.errored = true;
if (returnPartial) {
mdata.cleanup(true);
@@ -854,11 +878,12 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) {
// ABOVE
if (e.code() == 15988) {
LOGV2_WARNING(22706,
- "exception when receiving data from {shardId}, current connection "
- "state is {mdata}{causedBy_e}",
+ "Error finalizing pcursor initialization for shard {shardId} with "
+ "connection state {connectionState} caused by {error}",
+ "Error finalizing pcursor initialization for shard",
"shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON(),
- "causedBy_e"_attr = causedBy(redact(e)));
+ "connectionState"_attr = mdata.toBSON(),
+ "error"_attr = redact(e));
mdata.errored = true;
if (returnPartial) {
@@ -870,36 +895,42 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) {
// the InvalidBSON exception indicates that the BSON is malformed ->
// don't print/call "mdata.toBSON()" to avoid unexpected errors e.g. a segfault
if (e.code() == ErrorCodes::InvalidBSON)
+ LOGV2_WARNING(22707,
+ "Error finalizing pcursor initialization for shard "
+ "{shardId} caused by {error}",
+ "Error finalizing pcursor initialization for shard",
+ "shardId"_attr = shardId,
+ "error"_attr = redact(e));
+ else
LOGV2_WARNING(
- 22707,
- "bson is malformed :: db exception when finishing on {shardId}{causedBy_e}",
+ 22708,
+ "Error finalizing pcursor initialization for shard {shardId} with "
+ "connection state {connectionState} caused by {error}",
+ "Error finalizing pcursor initialization for shard",
"shardId"_attr = shardId,
- "causedBy_e"_attr = causedBy(redact(e)));
- else
- LOGV2_WARNING(22708,
- "db exception when finishing on {shardId}, current connection "
- "state is {mdata}{causedBy_e}",
- "shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON(),
- "causedBy_e"_attr = causedBy(redact(e)));
+ "connectionState"_attr = mdata.toBSON(),
+ "error"_attr = redact(e));
mdata.errored = true;
throw;
}
} catch (std::exception& e) {
LOGV2_WARNING(22709,
- "exception when finishing on {shardId}, current connection state is "
- "{mdata}{causedBy_e}",
+ "Error finalizing pcursor initialization for shard {shardId} with "
+ "connection state {connectionState} caused by {error}",
+ "Error finalizing pcursor initialization for shard",
"shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON(),
- "causedBy_e"_attr = causedBy(e));
+ "connectionState"_attr = mdata.toBSON(),
+ "error"_attr = redact(e.what()));
mdata.errored = true;
throw;
} catch (...) {
- LOGV2_WARNING(22710,
- "unknown exception when finishing on {shardId}, current connection state "
- "is {mdata}",
- "shardId"_attr = shardId,
- "mdata"_attr = mdata.toBSON());
+ LOGV2_WARNING(
+ 22710,
+ "Unknown error finalizing pcursor initialization for shard {shardId} with "
+ "connection state {connectionState}",
+ "Unknown error finalizing pcursor initialization for shard",
+ "shardId"_attr = shardId,
+ "connectionState"_attr = mdata.toBSON());
mdata.errored = true;
throw;
}
@@ -923,20 +954,22 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) {
staleNS, *ex->getShardId());
}
- LOGV2_DEBUG(
- 22688,
- 1,
- "stale config of ns {staleNS} on finishing query, will retry{causedBy_ex}",
- "staleNS"_attr = staleNS,
- "causedBy_ex"_attr = causedBy(redact(ex)));
+ LOGV2_DEBUG(22688,
+ 1,
+ "Found stale config for namespace {namespace} on finishing query "
+ "caused by {error}",
+ "Found stale config for namespace",
+ "namespace"_attr = staleNS,
+ "error"_attr = redact(ex));
// This is somewhat strange
if (staleNS.ns() != ns) {
- LOGV2_WARNING(
- 22711,
- "versioned ns {ns} doesn't match stale config namespace {staleNS}",
- "ns"_attr = ns,
- "staleNS"_attr = staleNS);
+ LOGV2_WARNING(22711,
+ "Versioned namespace {namespace} doesn't match stale config "
+ "namespace {staleNamespace}",
+ "Versioned namespace doesn't match stale config namespace",
+ "namespace"_attr = ns,
+ "staleNamespace"_attr = staleNS);
}
}
}
@@ -953,7 +986,10 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) {
// Erase empty stuff
if (!mdata.pcState) {
- LOGV2(22689, "PCursor erasing empty state {mdata}", "mdata"_attr = mdata.toBSON());
+ LOGV2(22689,
+ "PCursor erasing empty connection state {connectionState}",
+ "PCursor erasing empty connection state",
+ "connectionState"_attr = mdata.toBSON());
_cursorMap.erase(i++);
continue;
} else {
@@ -1043,16 +1079,19 @@ void ParallelSortClusteredCursor::_oldInit(OperationContext* opCtx) {
bool firstPass = retryQueries.size() == 0;
if (!firstPass) {
+ vector<StringData> serverHostsToRetry;
+ serverHostsToRetry.reserve(retryQueries.size());
+ transform(retryQueries.begin(),
+ retryQueries.end(),
+ std::back_inserter(serverHostsToRetry),
+ [&](const auto& query) { return serverHosts[query]; });
LOGV2(22690,
- "retrying {returnPartial_partial}parallel connection to ",
- "returnPartial_partial"_attr = (returnPartial ? "(partial) " : ""));
- for (set<int>::const_iterator it = retryQueries.begin(); it != retryQueries.end();
- ++it) {
- LOGV2(22691, "{serverHosts_it}, ", "serverHosts_it"_attr = serverHosts[*it]);
- }
- LOGV2(22692,
- "{finishedQueries} finished queries.",
- "finishedQueries"_attr = finishedQueries);
+ "Finished queries {finishedQueriesCount}, retrying parallel connection to "
+ "{serverHostsToRetry}",
+ "Retrying parallel connection",
+ "returnPartial"_attr = returnPartial,
+ "finishedQueriesCount"_attr = finishedQueries,
+ "serverHostsToRetry"_attr = serverHostsToRetry);
}
size_t num = 0;
@@ -1099,10 +1138,9 @@ void ParallelSortClusteredCursor::_oldInit(OperationContext* opCtx) {
LOGV2_DEBUG(22693,
5,
- "ParallelSortClusteredCursor::init server:{serverHost} ns:{ns} "
- "query:{query} fields:{fields} options: {options}",
+ "ParallelSortClusteredCursor initialization",
"serverHost"_attr = serverHost,
- "ns"_attr = _ns,
+ "namespace"_attr = _ns,
"query"_attr = redact(_query),
"fields"_attr = redact(_fields),
"options"_attr = _options);
@@ -1157,9 +1195,10 @@ void ParallelSortClusteredCursor::_oldInit(OperationContext* opCtx) {
try {
if (!_cursors[i].get()->initLazyFinish(retry)) {
LOGV2_WARNING(22712,
- "invalid result from {conns_i_getHost}{retry_retrying}",
- "conns_i_getHost"_attr = conns[i]->getHost(),
- "retry_retrying"_attr = (retry ? ", retrying" : ""));
+ "Got invalid result from {host}",
+ "Got invalid result",
+ "host"_attr = conns[i]->getHost(),
+ "retrying"_attr = retry);
_cursors[i].reset(nullptr, nullptr);
if (!retry) {
@@ -1263,13 +1302,17 @@ void ParallelSortClusteredCursor::_oldInit(OperationContext* opCtx) {
} else if (throwException) {
uasserted(14827, errMsg.str());
} else {
- LOGV2_WARNING(22713, "{errMsg_str}", "errMsg_str"_attr = redact(errMsg.str()));
+ LOGV2_WARNING(22713,
+ "Error while initializing cursor across all shards caused by {error}",
+ "Error while initializing cursor across all shards",
+ "error"_attr = redact(errMsg.str()));
}
}
if (retries > 0)
LOGV2(22694,
- "successfully finished parallel query after {retries} retries",
+ "Successfully finished parallel query after {retries} retries",
+ "Successfully finished parallel query",
"retries"_attr = retries);
}
@@ -1363,9 +1406,9 @@ void ParallelConnectionMetadata::cleanup(bool full) {
bool retry = false;
pcState->cursor->initLazyFinish(retry);
} catch (std::exception&) {
- LOGV2_WARNING(22714, "exception closing cursor");
+ LOGV2_WARNING(22714, "Exception closing cursor");
} catch (...) {
- LOGV2_WARNING(22715, "unknown exception closing cursor");
+ LOGV2_WARNING(22715, "Unknown exception closing cursor");
}
}
}
diff --git a/src/mongo/s/client/shard.cpp b/src/mongo/s/client/shard.cpp
index 5aa8d72671d..a9a0c51b67a 100644
--- a/src/mongo/s/client/shard.cpp
+++ b/src/mongo/s/client/shard.cpp
@@ -125,12 +125,13 @@ StatusWith<Shard::CommandResponse> Shard::runCommand(OperationContext* opCtx,
auto swResponse = _runCommand(opCtx, readPref, dbName, maxTimeMSOverride, cmdObj);
auto status = CommandResponse::getEffectiveStatus(swResponse);
if (isRetriableError(status.code(), retryPolicy)) {
- LOGV2_DEBUG(
- 22719,
- 2,
- "Command {cmdObj} failed with retriable error and will be retried{causedBy_status}",
- "cmdObj"_attr = redact(cmdObj),
- "causedBy_status"_attr = causedBy(redact(status)));
+ LOGV2_DEBUG(22719,
+ 2,
+ "Command {command} failed with retryable error and will be retried. Caused "
+ "by {error}",
+ "Command failed with retryable error and will be retried",
+ "command"_attr = redact(cmdObj),
+ "error"_attr = redact(status));
continue;
}
@@ -165,12 +166,13 @@ StatusWith<Shard::CommandResponse> Shard::runCommandWithFixedRetryAttempts(
auto swResponse = _runCommand(opCtx, readPref, dbName, maxTimeMSOverride, cmdObj);
auto status = CommandResponse::getEffectiveStatus(swResponse);
if (retry < kOnErrorNumRetries && isRetriableError(status.code(), retryPolicy)) {
- LOGV2_DEBUG(
- 22720,
- 2,
- "Command {cmdObj} failed with retriable error and will be retried{causedBy_status}",
- "cmdObj"_attr = redact(cmdObj),
- "causedBy_status"_attr = causedBy(redact(status)));
+ LOGV2_DEBUG(22720,
+ 2,
+ "Command {command} failed with retryable error and will be retried. Caused "
+ "by {error}",
+ "Command failed with retryable error and will be retried",
+ "command"_attr = redact(cmdObj),
+ "error"_attr = redact(status));
continue;
}
@@ -215,10 +217,11 @@ BatchedCommandResponse Shard::runBatchWriteCommand(OperationContext* opCtx,
if (retry < kOnErrorNumRetries && isRetriableError(writeStatus.code(), retryPolicy)) {
LOGV2_DEBUG(22721,
2,
- "Batch write command to {getId} failed with retriable error and will be "
- "retried{causedBy_writeStatus}",
- "getId"_attr = getId(),
- "causedBy_writeStatus"_attr = causedBy(redact(writeStatus)));
+ "Batch write command to shard {shardId} failed with retryable error "
+ "and will be retried. Caused by {error}",
+ "Batch write command failed with retryable error and will be retried",
+ "shardId"_attr = getId(),
+ "error"_attr = redact(writeStatus));
continue;
}
diff --git a/src/mongo/s/client/shard_connection.cpp b/src/mongo/s/client/shard_connection.cpp
index 2a4b4e020a9..10584f04e57 100644
--- a/src/mongo/s/client/shard_connection.cpp
+++ b/src/mongo/s/client/shard_connection.cpp
@@ -186,8 +186,9 @@ public:
if (s->avail != nullptr) {
LOGV2_WARNING(
22717,
- "Detected additional sharded connection in the thread local pool for {addr}",
- "addr"_attr = addr);
+ "Detected additional sharded connection in the thread local pool for {address}",
+ "Detected additional sharded connection in the thread local pool",
+ "address"_attr = addr);
if (DBException::traceExceptions.load()) {
// There shouldn't be more than one connection checked out to the same
@@ -249,11 +250,11 @@ public:
versionManager.checkShardVersionCB(opCtx, s->avail, ns, false, 1);
} catch (const DBException& ex) {
- LOGV2_WARNING(
- 22718,
- "Problem while initially checking shard versions on {shardId}{causedBy_ex}",
- "shardId"_attr = shardId,
- "causedBy_ex"_attr = causedBy(redact(ex)));
+ LOGV2_WARNING(22718,
+ "Error checking shard version on {shardId} caused by {error}",
+ "Error checking shard version",
+ "shardId"_attr = shardId,
+ "error"_attr = redact(ex));
// NOTE: This is only a heuristic, to avoid multiple stale version retries across
// multiple shards, and does not affect correctness.
@@ -405,8 +406,9 @@ ShardConnection::~ShardConnection() {
} else {
// see done() comments above for why we log this line
LOGV2(22716,
- "sharded connection to {conn_getServerAddress} not being returned to the pool",
- "conn_getServerAddress"_attr = _conn->getServerAddress());
+ "Sharded connection to {address} not being returned to the pool",
+ "Sharded connection not being returned to the pool",
+ "address"_attr = _conn->getServerAddress());
kill();
}
diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp
index 23ecde36a8c..9bfa02546e9 100644
--- a/src/mongo/s/client/shard_registry.cpp
+++ b/src/mongo/s/client/shard_registry.cpp
@@ -241,8 +241,9 @@ void ShardRegistry::startup(OperationContext* opCtx) {
if (!status.isOK()) {
LOGV2_FATAL(22737,
- "Can't schedule ShardRegistry reload due to {causedBy_status_getStatus}",
- "causedBy_status_getStatus"_attr = causedBy(status.getStatus()));
+ "Error scheduling shard registry reload caused by {error}",
+ "Error scheduling shard registry reload",
+ "error"_attr = redact(status.getStatus()));
fassertFailed(40252);
}
}
@@ -251,8 +252,9 @@ void ShardRegistry::_internalReload(const CallbackArgs& cbArgs) {
LOGV2_DEBUG(22726, 1, "Reloading shardRegistry");
if (!cbArgs.status.isOK()) {
LOGV2_WARNING(22734,
- "cant reload ShardRegistry {causedBy_cbArgs_status}",
- "causedBy_cbArgs_status"_attr = causedBy(cbArgs.status));
+ "Error reloading shard registry caused by {error}",
+ "Error reloading shard registry",
+ "error"_attr = redact(cbArgs.status));
return;
}
@@ -264,10 +266,11 @@ void ShardRegistry::_internalReload(const CallbackArgs& cbArgs) {
reload(opCtx.get());
} catch (const DBException& e) {
LOGV2(22727,
- "Periodic reload of shard registry failed {causedBy_e}; will retry after "
- "{kRefreshPeriod}",
- "causedBy_e"_attr = causedBy(e),
- "kRefreshPeriod"_attr = kRefreshPeriod);
+ "Error running periodic reload of shard registry caused by {error}; will retry after "
+ "{shardRegistryReloadInterval}",
+ "Error running periodic reload of shard registry",
+ "error"_attr = redact(e),
+ "shardRegistryReloadInterval"_attr = kRefreshPeriod);
}
// reschedule itself
@@ -276,14 +279,16 @@ void ShardRegistry::_internalReload(const CallbackArgs& cbArgs) {
[this](const CallbackArgs& cbArgs) { _internalReload(cbArgs); });
if (status.getStatus() == ErrorCodes::ShutdownInProgress) {
- LOGV2_DEBUG(22728, 1, "Cant schedule ShardRegistry reload. Executor shutdown in progress");
+ LOGV2_DEBUG(
+ 22728, 1, "Error scheduling shard registry reload. Executor shutdown in progress");
return;
}
if (!status.isOK()) {
LOGV2_FATAL(22738,
- "Can't schedule ShardRegistry reload due to {causedBy_status_getStatus}",
- "causedBy_status_getStatus"_attr = causedBy(status.getStatus()));
+ "Error scheduling shard registry reload caused by {error}",
+ "Error scheduling shard registry reload",
+ "error"_attr = redact(status.getStatus()));
fassertFailed(40253);
}
}
@@ -307,8 +312,9 @@ bool ShardRegistry::reload(OperationContext* opCtx) {
} catch (const DBException& e) {
LOGV2_DEBUG(22729,
1,
- "ShardRegistry reload is interrupted due to: {e_toStatus}",
- "e_toStatus"_attr = redact(e.toStatus()));
+ "Error reloading shard registry caused by {error}",
+ "Error reloading shard registry",
+ "error"_attr = redact(e));
return false;
}
@@ -377,9 +383,10 @@ void ShardRegistry::updateReplicaSetOnConfigServer(ServiceContext* serviceContex
if (!s) {
LOGV2_DEBUG(22730,
1,
- "shard not found for set: {connStr} when attempting to inform config servers "
- "of updated set membership",
- "connStr"_attr = connStr);
+ "Error updating replica set on config server. Couldn't find shard for "
+ "replica set {replicaSetConnectionStr}",
+ "Error updating replica set on config servers. Couldn't find shard",
+ "replicaSetConnectionStr"_attr = connStr);
return;
}
@@ -398,10 +405,11 @@ void ShardRegistry::updateReplicaSetOnConfigServer(ServiceContext* serviceContex
auto status = swWasUpdated.getStatus();
if (!status.isOK()) {
LOGV2_ERROR(22736,
- "RSChangeWatcher: could not update config db with connection string "
- "{connStr}{causedBy_status}",
- "connStr"_attr = connStr,
- "causedBy_status"_attr = causedBy(redact(status)));
+ "Error updating replica set {replicaSetConnectionStr} on config server caused "
+ "by {error}",
+ "Error updating replica set on config server",
+ "replicaSetConnectionStr"_attr = connStr,
+ "error"_attr = redact(status));
}
}
@@ -428,10 +436,11 @@ ShardRegistryData::ShardRegistryData(OperationContext* opCtx, ShardFactory* shar
LOGV2_DEBUG(22731,
1,
- "found {shards_size} shards listed on config server(s) with lastVisibleOpTime: "
- "{reloadOpTime}",
- "shards_size"_attr = shards.size(),
- "reloadOpTime"_attr = reloadOpTime.toBSON());
+ "Found {shardsNumber} shards listed on config server(s) with lastVisibleOpTime: "
+ "{lastVisibleOpTime}",
+ "Succesfully retrieved updated shard list from config server",
+ "shardsNumber"_attr = shards.size(),
+ "lastVisibleOpTime"_attr = reloadOpTime);
// Ensure targeter exists for all shards and take shard connection string from the targeter.
// Do this before re-taking the mutex to avoid deadlock with the ReplicaSetMonitor updating
@@ -444,9 +453,9 @@ ShardRegistryData::ShardRegistryData(OperationContext* opCtx, ShardFactory* shar
auto shardHostStatus = ConnectionString::parse(shardType.getHost());
if (!shardHostStatus.isOK()) {
LOGV2_WARNING(22735,
- "Unable to parse shard host {shardHostStatus_getStatus}",
- "shardHostStatus_getStatus"_attr =
- shardHostStatus.getStatus().toString());
+ "Error parsing shard host caused by {error}",
+ "Error parsing shard host",
+ "error"_attr = redact(shardHostStatus.getStatus()));
continue;
}
@@ -589,11 +598,12 @@ void ShardRegistryData::_addShard(WithLock lk,
if (oldConnString.toString() != connString.toString()) {
LOGV2(22732,
- "Updating ShardRegistry connection string for shard {currentShard_getId} from: "
- "{oldConnString} to: {connString}",
- "currentShard_getId"_attr = currentShard->getId(),
- "oldConnString"_attr = oldConnString.toString(),
- "connString"_attr = connString.toString());
+ "Updating shard registry connection string for shard {shardId} to "
+ "{newShardConnectionString} from {oldShardConnectionString}",
+ "Updating shard connection string on shard registry",
+ "shardId"_attr = currentShard->getId(),
+ "newShardConnectionString"_attr = connString,
+ "oldShardConnectionString"_attr = oldConnString);
}
for (const auto& host : oldConnString.getServers()) {
@@ -607,9 +617,11 @@ void ShardRegistryData::_addShard(WithLock lk,
LOGV2_DEBUG(22733,
3,
- "Adding shard {shard_getId}, with CS {connString}",
- "shard_getId"_attr = shard->getId(),
- "connString"_attr = connString.toString());
+ "Adding new shard {shardId} with connection string {shardConnectionString} to "
+ "shard registry",
+ "Adding new shard to shard registry",
+ "shardId"_attr = shard->getId(),
+ "shardConnectionString"_attr = connString);
if (connString.type() == ConnectionString::SET) {
_rsLookup[connString.getSetName()] = shard;
} else if (connString.type() == ConnectionString::CUSTOM) {