diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2016-10-25 12:55:37 -0400 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2016-10-27 16:03:23 -0400 |
commit | e8cbb32624663fdf59f6df752594e5295c55247c (patch) | |
tree | a2fa9e520e29f3d58149472a98cf23e467a4476b | |
parent | 604cf316385ddb9e6bc0361d0bb300024b4bc98c (diff) | |
download | mongo-e8cbb32624663fdf59f6df752594e5295c55247c.tar.gz |
SERVER-26777 Improve logging around chunk refresh
Unifies the logging messages between mongos and mongod and adds timing
information.
-rw-r--r-- | src/mongo/db/s/metadata_loader.cpp | 52 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_state.cpp | 59 | ||||
-rw-r--r-- | src/mongo/s/catalog/type_collection.cpp | 11 | ||||
-rw-r--r-- | src/mongo/s/chunk_manager.cpp | 33 |
4 files changed, 78 insertions, 77 deletions
diff --git a/src/mongo/db/s/metadata_loader.cpp b/src/mongo/db/s/metadata_loader.cpp index 1e43c4bd73f..ea9a0ed87a4 100644 --- a/src/mongo/db/s/metadata_loader.cpp +++ b/src/mongo/db/s/metadata_loader.cpp @@ -95,9 +95,9 @@ Status MetadataLoader::makeCollectionMetadata(OperationContext* txn, const string& shard, const CollectionMetadata* oldMetadata, CollectionMetadata* metadata) { - Status status = _initCollection(txn, catalogClient, ns, shard, metadata); - if (!status.isOK() || metadata->getKeyPattern().isEmpty()) { - return status; + Status initCollectionStatus = _initCollection(txn, catalogClient, ns, shard, metadata); + if (!initCollectionStatus.isOK()) { + return initCollectionStatus; } return _initChunks(txn, catalogClient, ns, shard, oldMetadata, metadata); @@ -183,11 +183,6 @@ Status MetadataLoader::_initChunks(OperationContext* txn, nullptr, repl::ReadConcernLevel::kMajorityReadConcern); if (!status.isOK()) { - if (status == ErrorCodes::HostUnreachable) { - // Make our metadata invalid - metadata->_collVersion = ChunkVersion(0, 0, OID()); - metadata->_chunksMap.clear(); - } return status; } @@ -221,35 +216,22 @@ Status MetadataLoader::_initChunks(OperationContext* txn, // TODO: drop the config.collections entry *before* the chunks and eliminate this // ambiguity - string errMsg = str::stream() - << "no chunks found when reloading " << ns << ", previous version was " - << metadata->_collVersion.toString() << (fullReload ? ", this is a drop" : ""); - - warning() << errMsg; - - metadata->_collVersion = ChunkVersion(0, 0, OID()); - metadata->_chunksMap.clear(); - - return fullReload ? Status(ErrorCodes::NamespaceNotFound, errMsg) - : Status(ErrorCodes::RemoteChangeDetected, errMsg); + return {fullReload ? ErrorCodes::NamespaceNotFound : ErrorCodes::RemoteChangeDetected, + str::stream() << "No chunks found when reloading " << ns + << ", previous version was " + << metadata->_collVersion.toString() + << (fullReload ? ", this is a drop" : "")}; } else { - // Invalid chunks found, our epoch may have changed because we dropped/recreated - // the collection. - string errMsg = str::stream() - << "invalid chunks found when reloading " << ns << ", previous version was " - << metadata->_collVersion.toString() << ", this should be rare"; - warning() << errMsg; - - metadata->_collVersion = ChunkVersion(0, 0, OID()); - metadata->_chunksMap.clear(); - - return Status(ErrorCodes::RemoteChangeDetected, errMsg); + // Invalid chunks found, our epoch may have changed because we dropped/recreated the + // collection + return {ErrorCodes::RemoteChangeDetected, + str::stream() << "Invalid chunks found when reloading " << ns + << ", previous version was " + << metadata->_collVersion.toString() + << ", this should be rare"}; } - } catch (const DBException& e) { - // We deliberately do not return connPtr to the pool, since it was involved with the - // error here. - return Status(ErrorCodes::HostUnreachable, - str::stream() << "problem querying chunks metadata" << causedBy(e)); + } catch (const DBException& ex) { + return ex.toStatus(); } } diff --git a/src/mongo/db/s/sharding_state.cpp b/src/mongo/db/s/sharding_state.cpp index 7bb45f2a904..bd6867ddc6d 100644 --- a/src/mongo/db/s/sharding_state.cpp +++ b/src/mongo/db/s/sharding_state.cpp @@ -668,49 +668,56 @@ StatusWith<ChunkVersion> ShardingState::_refreshMetadata( } } - // The _configServerTickets serializes this process such that only a small number of threads - // can try to refresh at the same time + // The _configServerTickets serializes this process such that only a small number of threads can + // try to refresh at the same time in order to avoid overloading the config server _configServerTickets.waitForTicket(); TicketHolderReleaser needTicketFrom(&_configServerTickets); - LOG(1) << "Remotely refreshing metadata for " << nss.ns() << ", based on collection version " - << (metadataForDiff ? metadataForDiff->getCollVersion().toString() : "(empty)"); + Timer t; - std::unique_ptr<CollectionMetadata> remoteMetadata(stdx::make_unique<CollectionMetadata>()); + log() << "MetadataLoader loading chunks for " << nss.ns() << " based on: " + << (metadataForDiff ? metadataForDiff->getCollVersion().toString() : "(empty)"); - { - Timer refreshTimer; + std::unique_ptr<CollectionMetadata> remoteMetadata(stdx::make_unique<CollectionMetadata>()); - Status status = MetadataLoader::makeCollectionMetadata(txn, - grid.catalogClient(txn), - nss.ns(), - getShardName(), - metadataForDiff, - remoteMetadata.get()); + Status status = MetadataLoader::makeCollectionMetadata(txn, + grid.catalogClient(txn), + nss.ns(), + getShardName(), + metadataForDiff, + remoteMetadata.get()); - if (status.code() == ErrorCodes::NamespaceNotFound) { - remoteMetadata.reset(); - } else if (!status.isOK()) { - warning() << "Could not remotely refresh metadata for " << nss.ns() - << causedBy(redact(status)); + if (!status.isOK() && status != ErrorCodes::NamespaceNotFound) { + warning() << "MetadataLoader failed after " << t.millis() << " ms" + << causedBy(redact(status)); - return status; - } + return status; } - // Exclusive collection lock needed since we're now potentially changing the metadata, and - // don't want reads/writes to be ongoing + // Exclusive collection lock needed since we're now changing the metadata ScopedTransaction transaction(txn, MODE_IX); AutoGetCollection autoColl(txn, nss, MODE_IX, MODE_X); auto css = CollectionShardingState::get(txn, nss); - // Resolve newer pending chunks with the remote metadata, finish construction - css->refreshMetadata(txn, std::move(remoteMetadata)); + if (status.isOK()) { + css->refreshMetadata(txn, std::move(remoteMetadata)); + + auto metadata = css->getMetadata(); + + log() << "MetadataLoader took " << t.millis() << " ms and found version " + << metadata->getCollVersion(); + + return metadata->getShardVersion(); + } + + invariant(status == ErrorCodes::NamespaceNotFound); + + css->refreshMetadata(txn, nullptr); - auto metadata = css->getMetadata(); + log() << "MetadataLoader took " << t.millis() << " ms and did not find the namespace"; - return (metadata ? metadata->getShardVersion() : ChunkVersion::UNSHARDED()); + return ChunkVersion::UNSHARDED(); } StatusWith<ScopedRegisterDonateChunk> ShardingState::registerDonateChunk( diff --git a/src/mongo/s/catalog/type_collection.cpp b/src/mongo/s/catalog/type_collection.cpp index 5c94eb3fc69..aed93093462 100644 --- a/src/mongo/s/catalog/type_collection.cpp +++ b/src/mongo/s/catalog/type_collection.cpp @@ -105,8 +105,15 @@ StatusWith<CollectionType> CollectionType::fromBSON(const BSONObj& source) { } coll._keyPattern = KeyPattern(obj.getOwned()); - } else if ((status == ErrorCodes::NoSuchKey) && coll.getDropped()) { - // Sharding key can be missing if the collection is dropped + } else if (status == ErrorCodes::NoSuchKey) { + // Sharding key can only be missing if the collection is dropped + if (!coll.getDropped()) { + return {status.code(), + str::stream() << "Shard key for collection " << coll._fullNs->ns() + << " is missing, but the collection is not marked as " + "dropped. This is an indication of corrupted sharding " + "metadata."}; + } } else { return status; } diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp index f5dca5e24f8..6979b583880 100644 --- a/src/mongo/s/chunk_manager.cpp +++ b/src/mongo/s/chunk_manager.cpp @@ -77,8 +77,7 @@ using std::vector; namespace { /** - * This is an adapter so we can use config diffs - mongos and mongod do them slightly - * differently + * This is an adapter so we can use config diffs - mongos and mongod do them slightly differently. * * The mongos adapter here tracks all shards, and stores ranges by (max, Chunk) in the map. */ @@ -116,7 +115,6 @@ private: ChunkManager* const _manager; }; - bool allOfType(BSONType type, const BSONObj& o) { BSONObjIterator it(o); while (it.more()) { @@ -207,6 +205,8 @@ ChunkManager::ChunkManager(OperationContext* txn, const CollectionType& coll) } void ChunkManager::loadExistingRanges(OperationContext* txn, const ChunkManager* oldManager) { + invariant(!_version.isSet()); + int tries = 3; while (tries--) { @@ -217,24 +217,29 @@ void ChunkManager::loadExistingRanges(OperationContext* txn, const ChunkManager* Timer t; - bool success = _load(txn, chunkMap, shardIds, &shardVersions, oldManager); - if (success) { - log() << "ChunkManager: time to load chunks for " << _ns << ": " << t.millis() << "ms" - << " sequenceNumber: " << _sequenceNumber << " version: " << _version.toString() - << " based on: " - << (oldManager ? oldManager->getVersion().toString() : "(empty)"); + log() << "ChunkManager loading chunks for " << _ns << " sequenceNumber: " << _sequenceNumber + << " based on: " << (oldManager ? oldManager->getVersion().toString() : "(empty)"); + if (_load(txn, chunkMap, shardIds, &shardVersions, oldManager)) { // TODO: Merge into diff code above, so we validate in one place if (isChunkMapValid(chunkMap)) { - _chunkMap.swap(chunkMap); - _shardIds.swap(shardIds); - _shardVersions.swap(shardVersions); + _chunkMap = std::move(chunkMap); + _shardIds = std::move(shardIds); + _shardVersions = std::move(shardVersions); _chunkRangeMap = _constructRanges(_chunkMap); - return; + + log() << "ChunkManager load took " << t.millis() << " ms and found version " + << _version; + } else { + warning() << "ChunkManager load took " << t.millis() + << " ms and found invalid chunk ranges at version " << _version; } + + return; } - warning() << "ChunkManager loaded an invalid config for " << _ns << ", trying again"; + warning() << "ChunkManager load failed after " << t.millis() + << " ms and will be retried up to " << tries << " more times"; sleepmillis(10 * (3 - tries)); } |