summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2016-10-25 12:55:37 -0400
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2016-10-27 16:03:23 -0400
commite8cbb32624663fdf59f6df752594e5295c55247c (patch)
treea2fa9e520e29f3d58149472a98cf23e467a4476b
parent604cf316385ddb9e6bc0361d0bb300024b4bc98c (diff)
downloadmongo-e8cbb32624663fdf59f6df752594e5295c55247c.tar.gz
SERVER-26777 Improve logging around chunk refresh
Unifies the logging messages between mongos and mongod and adds timing information.
-rw-r--r--src/mongo/db/s/metadata_loader.cpp52
-rw-r--r--src/mongo/db/s/sharding_state.cpp59
-rw-r--r--src/mongo/s/catalog/type_collection.cpp11
-rw-r--r--src/mongo/s/chunk_manager.cpp33
4 files changed, 78 insertions, 77 deletions
diff --git a/src/mongo/db/s/metadata_loader.cpp b/src/mongo/db/s/metadata_loader.cpp
index 1e43c4bd73f..ea9a0ed87a4 100644
--- a/src/mongo/db/s/metadata_loader.cpp
+++ b/src/mongo/db/s/metadata_loader.cpp
@@ -95,9 +95,9 @@ Status MetadataLoader::makeCollectionMetadata(OperationContext* txn,
const string& shard,
const CollectionMetadata* oldMetadata,
CollectionMetadata* metadata) {
- Status status = _initCollection(txn, catalogClient, ns, shard, metadata);
- if (!status.isOK() || metadata->getKeyPattern().isEmpty()) {
- return status;
+ Status initCollectionStatus = _initCollection(txn, catalogClient, ns, shard, metadata);
+ if (!initCollectionStatus.isOK()) {
+ return initCollectionStatus;
}
return _initChunks(txn, catalogClient, ns, shard, oldMetadata, metadata);
@@ -183,11 +183,6 @@ Status MetadataLoader::_initChunks(OperationContext* txn,
nullptr,
repl::ReadConcernLevel::kMajorityReadConcern);
if (!status.isOK()) {
- if (status == ErrorCodes::HostUnreachable) {
- // Make our metadata invalid
- metadata->_collVersion = ChunkVersion(0, 0, OID());
- metadata->_chunksMap.clear();
- }
return status;
}
@@ -221,35 +216,22 @@ Status MetadataLoader::_initChunks(OperationContext* txn,
// TODO: drop the config.collections entry *before* the chunks and eliminate this
// ambiguity
- string errMsg = str::stream()
- << "no chunks found when reloading " << ns << ", previous version was "
- << metadata->_collVersion.toString() << (fullReload ? ", this is a drop" : "");
-
- warning() << errMsg;
-
- metadata->_collVersion = ChunkVersion(0, 0, OID());
- metadata->_chunksMap.clear();
-
- return fullReload ? Status(ErrorCodes::NamespaceNotFound, errMsg)
- : Status(ErrorCodes::RemoteChangeDetected, errMsg);
+ return {fullReload ? ErrorCodes::NamespaceNotFound : ErrorCodes::RemoteChangeDetected,
+ str::stream() << "No chunks found when reloading " << ns
+ << ", previous version was "
+ << metadata->_collVersion.toString()
+ << (fullReload ? ", this is a drop" : "")};
} else {
- // Invalid chunks found, our epoch may have changed because we dropped/recreated
- // the collection.
- string errMsg = str::stream()
- << "invalid chunks found when reloading " << ns << ", previous version was "
- << metadata->_collVersion.toString() << ", this should be rare";
- warning() << errMsg;
-
- metadata->_collVersion = ChunkVersion(0, 0, OID());
- metadata->_chunksMap.clear();
-
- return Status(ErrorCodes::RemoteChangeDetected, errMsg);
+ // Invalid chunks found, our epoch may have changed because we dropped/recreated the
+ // collection
+ return {ErrorCodes::RemoteChangeDetected,
+ str::stream() << "Invalid chunks found when reloading " << ns
+ << ", previous version was "
+ << metadata->_collVersion.toString()
+ << ", this should be rare"};
}
- } catch (const DBException& e) {
- // We deliberately do not return connPtr to the pool, since it was involved with the
- // error here.
- return Status(ErrorCodes::HostUnreachable,
- str::stream() << "problem querying chunks metadata" << causedBy(e));
+ } catch (const DBException& ex) {
+ return ex.toStatus();
}
}
diff --git a/src/mongo/db/s/sharding_state.cpp b/src/mongo/db/s/sharding_state.cpp
index 7bb45f2a904..bd6867ddc6d 100644
--- a/src/mongo/db/s/sharding_state.cpp
+++ b/src/mongo/db/s/sharding_state.cpp
@@ -668,49 +668,56 @@ StatusWith<ChunkVersion> ShardingState::_refreshMetadata(
}
}
- // The _configServerTickets serializes this process such that only a small number of threads
- // can try to refresh at the same time
+ // The _configServerTickets serializes this process such that only a small number of threads can
+ // try to refresh at the same time in order to avoid overloading the config server
_configServerTickets.waitForTicket();
TicketHolderReleaser needTicketFrom(&_configServerTickets);
- LOG(1) << "Remotely refreshing metadata for " << nss.ns() << ", based on collection version "
- << (metadataForDiff ? metadataForDiff->getCollVersion().toString() : "(empty)");
+ Timer t;
- std::unique_ptr<CollectionMetadata> remoteMetadata(stdx::make_unique<CollectionMetadata>());
+ log() << "MetadataLoader loading chunks for " << nss.ns() << " based on: "
+ << (metadataForDiff ? metadataForDiff->getCollVersion().toString() : "(empty)");
- {
- Timer refreshTimer;
+ std::unique_ptr<CollectionMetadata> remoteMetadata(stdx::make_unique<CollectionMetadata>());
- Status status = MetadataLoader::makeCollectionMetadata(txn,
- grid.catalogClient(txn),
- nss.ns(),
- getShardName(),
- metadataForDiff,
- remoteMetadata.get());
+ Status status = MetadataLoader::makeCollectionMetadata(txn,
+ grid.catalogClient(txn),
+ nss.ns(),
+ getShardName(),
+ metadataForDiff,
+ remoteMetadata.get());
- if (status.code() == ErrorCodes::NamespaceNotFound) {
- remoteMetadata.reset();
- } else if (!status.isOK()) {
- warning() << "Could not remotely refresh metadata for " << nss.ns()
- << causedBy(redact(status));
+ if (!status.isOK() && status != ErrorCodes::NamespaceNotFound) {
+ warning() << "MetadataLoader failed after " << t.millis() << " ms"
+ << causedBy(redact(status));
- return status;
- }
+ return status;
}
- // Exclusive collection lock needed since we're now potentially changing the metadata, and
- // don't want reads/writes to be ongoing
+ // Exclusive collection lock needed since we're now changing the metadata
ScopedTransaction transaction(txn, MODE_IX);
AutoGetCollection autoColl(txn, nss, MODE_IX, MODE_X);
auto css = CollectionShardingState::get(txn, nss);
- // Resolve newer pending chunks with the remote metadata, finish construction
- css->refreshMetadata(txn, std::move(remoteMetadata));
+ if (status.isOK()) {
+ css->refreshMetadata(txn, std::move(remoteMetadata));
+
+ auto metadata = css->getMetadata();
+
+ log() << "MetadataLoader took " << t.millis() << " ms and found version "
+ << metadata->getCollVersion();
+
+ return metadata->getShardVersion();
+ }
+
+ invariant(status == ErrorCodes::NamespaceNotFound);
+
+ css->refreshMetadata(txn, nullptr);
- auto metadata = css->getMetadata();
+ log() << "MetadataLoader took " << t.millis() << " ms and did not find the namespace";
- return (metadata ? metadata->getShardVersion() : ChunkVersion::UNSHARDED());
+ return ChunkVersion::UNSHARDED();
}
StatusWith<ScopedRegisterDonateChunk> ShardingState::registerDonateChunk(
diff --git a/src/mongo/s/catalog/type_collection.cpp b/src/mongo/s/catalog/type_collection.cpp
index 5c94eb3fc69..aed93093462 100644
--- a/src/mongo/s/catalog/type_collection.cpp
+++ b/src/mongo/s/catalog/type_collection.cpp
@@ -105,8 +105,15 @@ StatusWith<CollectionType> CollectionType::fromBSON(const BSONObj& source) {
}
coll._keyPattern = KeyPattern(obj.getOwned());
- } else if ((status == ErrorCodes::NoSuchKey) && coll.getDropped()) {
- // Sharding key can be missing if the collection is dropped
+ } else if (status == ErrorCodes::NoSuchKey) {
+ // Sharding key can only be missing if the collection is dropped
+ if (!coll.getDropped()) {
+ return {status.code(),
+ str::stream() << "Shard key for collection " << coll._fullNs->ns()
+ << " is missing, but the collection is not marked as "
+ "dropped. This is an indication of corrupted sharding "
+ "metadata."};
+ }
} else {
return status;
}
diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp
index f5dca5e24f8..6979b583880 100644
--- a/src/mongo/s/chunk_manager.cpp
+++ b/src/mongo/s/chunk_manager.cpp
@@ -77,8 +77,7 @@ using std::vector;
namespace {
/**
- * This is an adapter so we can use config diffs - mongos and mongod do them slightly
- * differently
+ * This is an adapter so we can use config diffs - mongos and mongod do them slightly differently.
*
* The mongos adapter here tracks all shards, and stores ranges by (max, Chunk) in the map.
*/
@@ -116,7 +115,6 @@ private:
ChunkManager* const _manager;
};
-
bool allOfType(BSONType type, const BSONObj& o) {
BSONObjIterator it(o);
while (it.more()) {
@@ -207,6 +205,8 @@ ChunkManager::ChunkManager(OperationContext* txn, const CollectionType& coll)
}
void ChunkManager::loadExistingRanges(OperationContext* txn, const ChunkManager* oldManager) {
+ invariant(!_version.isSet());
+
int tries = 3;
while (tries--) {
@@ -217,24 +217,29 @@ void ChunkManager::loadExistingRanges(OperationContext* txn, const ChunkManager*
Timer t;
- bool success = _load(txn, chunkMap, shardIds, &shardVersions, oldManager);
- if (success) {
- log() << "ChunkManager: time to load chunks for " << _ns << ": " << t.millis() << "ms"
- << " sequenceNumber: " << _sequenceNumber << " version: " << _version.toString()
- << " based on: "
- << (oldManager ? oldManager->getVersion().toString() : "(empty)");
+ log() << "ChunkManager loading chunks for " << _ns << " sequenceNumber: " << _sequenceNumber
+ << " based on: " << (oldManager ? oldManager->getVersion().toString() : "(empty)");
+ if (_load(txn, chunkMap, shardIds, &shardVersions, oldManager)) {
// TODO: Merge into diff code above, so we validate in one place
if (isChunkMapValid(chunkMap)) {
- _chunkMap.swap(chunkMap);
- _shardIds.swap(shardIds);
- _shardVersions.swap(shardVersions);
+ _chunkMap = std::move(chunkMap);
+ _shardIds = std::move(shardIds);
+ _shardVersions = std::move(shardVersions);
_chunkRangeMap = _constructRanges(_chunkMap);
- return;
+
+ log() << "ChunkManager load took " << t.millis() << " ms and found version "
+ << _version;
+ } else {
+ warning() << "ChunkManager load took " << t.millis()
+ << " ms and found invalid chunk ranges at version " << _version;
}
+
+ return;
}
- warning() << "ChunkManager loaded an invalid config for " << _ns << ", trying again";
+ warning() << "ChunkManager load failed after " << t.millis()
+ << " ms and will be retried up to " << tries << " more times";
sleepmillis(10 * (3 - tries));
}