/** * Copyright (C) 2012 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects * for all of the code used other than as permitted herein. If you modify * file(s) with this exception, you may extend this exception to your * version of the file(s), but you are not obligated to do so. If you do not * wish to do so, delete this exception statement from your version. If you * delete this exception statement from all source files in the program, * then also delete it in the license file. */ #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding #include "mongo/platform/basic.h" #include "mongo/s/metadata_loader.h" #include "mongo/client/connpool.h" #include "mongo/client/dbclientcursor.h" #include "mongo/client/dbclientmockcursor.h" #include "mongo/s/chunk_diff.h" #include "mongo/s/chunk_version.h" #include "mongo/s/collection_metadata.h" #include "mongo/s/range_arithmetic.h" #include "mongo/s/type_chunk.h" #include "mongo/s/type_collection.h" #include "mongo/util/log.h" namespace mongo { using std::make_pair; using std::map; using std::pair; using std::string; /** * This is an adapter so we can use config diffs - mongos and mongod do them slightly * differently. * * The mongod adapter here tracks only a single shard, and stores ranges by (min, max). */ class SCMConfigDiffTracker : public ConfigDiffTracker { public: SCMConfigDiffTracker( const string& currShard ) : _currShard( currShard ) { } virtual bool isTracked( const BSONObj& chunkDoc ) const { return chunkDoc["shard"].type() == String && chunkDoc["shard"].String() == _currShard; } virtual BSONObj maxFrom( const BSONObj& val ) const { return val; } virtual pair rangeFor( const BSONObj& chunkDoc, const BSONObj& min, const BSONObj& max ) const { return make_pair( min, max ); } virtual string shardFor( const string& name ) const { return name; } virtual string nameFrom( const string& shard ) const { return shard; } string _currShard; }; // // MetadataLoader implementation // MetadataLoader::MetadataLoader( const ConnectionString& configLoc ) : _configLoc( configLoc ) { } MetadataLoader::~MetadataLoader() { } Status MetadataLoader::makeCollectionMetadata( const string& ns, const string& shard, const CollectionMetadata* oldMetadata, CollectionMetadata* metadata ) const { Status status = initCollection( ns, shard, metadata ); if ( !status.isOK() || metadata->getKeyPattern().isEmpty() ) return status; return initChunks( ns, shard, oldMetadata, metadata ); } Status MetadataLoader::initCollection( const string& ns, const string& shard, CollectionMetadata* metadata ) const { // // Bring collection entry from the config server. // BSONObj collDoc; { try { ScopedDbConnection conn( _configLoc.toString(), 30 ); collDoc = conn->findOne( CollectionType::ConfigNS, QUERY(CollectionType::ns()<_keyPattern = collInfo.getKeyPattern(); metadata->fillKeyPatternFields(); metadata->_shardVersion = ChunkVersion( 0, 0, collInfo.getEpoch() ); metadata->_collVersion = ChunkVersion( 0, 0, collInfo.getEpoch() ); return Status::OK(); } else if ( collInfo.isPrimarySet() && collInfo.getPrimary() == shard ) { // A collection with a non-default primary // Empty primary field not allowed if set dassert( collInfo.getPrimary() != "" ); metadata->_keyPattern = BSONObj(); metadata->fillKeyPatternFields(); metadata->_shardVersion = ChunkVersion( 1, 0, collInfo.getEpoch() ); metadata->_collVersion = metadata->_shardVersion; return Status::OK(); } else { // A collection with a primary that doesn't match this shard or is empty, the primary // may have changed before we loaded. errMsg = // br str::stream() << "collection " << ns << " does not have a shard key " << "and primary " << ( collInfo.isPrimarySet() ? collInfo.getPrimary() : "" ) << " does not match this shard " << shard; warning() << errMsg << endl; metadata->_collVersion = ChunkVersion( 0, 0, OID() ); return Status( ErrorCodes::RemoteChangeDetected, errMsg ); } } Status MetadataLoader::initChunks( const string& ns, const string& shard, const CollectionMetadata* oldMetadata, CollectionMetadata* metadata ) const { map versionMap; // Preserve the epoch versionMap[shard] = metadata->_shardVersion; OID epoch = metadata->getCollVersion().epoch(); bool fullReload = true; // Check to see if we should use the old version or not. if ( oldMetadata ) { // If our epochs are compatible, it's useful to use the old metadata for diffs if ( oldMetadata->getCollVersion().hasEqualEpoch( epoch ) ) { fullReload = false; invariant( oldMetadata->isValid() ); versionMap[shard] = oldMetadata->_shardVersion; metadata->_collVersion = oldMetadata->_collVersion; // TODO: This could be made more efficient if copying not required, but // not as frequently reloaded as in mongos. metadata->_chunksMap = oldMetadata->_chunksMap; LOG( 2 ) << "loading new chunks for collection " << ns << " using old metadata w/ version " << oldMetadata->getShardVersion() << " and " << metadata->_chunksMap.size() << " chunks" << endl; } else { warning() << "reloading collection metadata for " << ns << " with new epoch " << epoch.toString() << ", the current epoch is " << oldMetadata->getCollVersion().epoch().toString() << endl; } } // Exposes the new metadata's range map and version to the "differ," who // would ultimately be responsible of filling them up. SCMConfigDiffTracker differ( shard ); differ.attach( ns, metadata->_chunksMap, metadata->_collVersion, versionMap ); try { ScopedDbConnection conn( _configLoc.toString(), 30 ); auto_ptr cursor = conn->query( ChunkType::ConfigNS, differ.configDiffQuery() ); if ( !cursor.get() ) { // Make our metadata invalid metadata->_collVersion = ChunkVersion( 0, 0, OID() ); metadata->_chunksMap.clear(); conn.done(); return Status( ErrorCodes::HostUnreachable, "problem opening chunk metadata cursor" ); } // // The diff tracker should always find at least one chunk (the highest chunk we saw // last time). If not, something has changed on the config server (potentially between // when we read the collection data and when we read the chunks data). // int diffsApplied = differ.calculateConfigDiff( *cursor ); if ( diffsApplied > 0 ) { // Chunks found, return ok LOG(2) << "loaded " << diffsApplied << " chunks into new metadata for " << ns << " with version " << metadata->_collVersion << endl; metadata->_shardVersion = versionMap[shard]; metadata->fillRanges(); conn.done(); invariant( metadata->isValid() ); return Status::OK(); } else if ( diffsApplied == 0 ) { // No chunks found, the collection is dropping or we're confused // If this is a full reload, assume it is a drop for backwards compatibility // TODO: drop the config.collections entry *before* the chunks and eliminate this // ambiguity string errMsg = str::stream() << "no chunks found when reloading " << ns << ", previous version was " << metadata->_collVersion.toString() << ( fullReload ? ", this is a drop" : "" ); warning() << errMsg << endl; metadata->_collVersion = ChunkVersion( 0, 0, OID() ); metadata->_chunksMap.clear(); conn.done(); return fullReload ? Status( ErrorCodes::NamespaceNotFound, errMsg ) : Status( ErrorCodes::RemoteChangeDetected, errMsg ); } else { // Invalid chunks found, our epoch may have changed because we dropped/recreated // the collection. string errMsg = // br str::stream() << "invalid chunks found when reloading " << ns << ", previous version was " << metadata->_collVersion.toString() << ", this should be rare"; warning() << errMsg << endl; metadata->_collVersion = ChunkVersion( 0, 0, OID() ); metadata->_chunksMap.clear(); conn.done(); return Status( ErrorCodes::RemoteChangeDetected, errMsg ); } } catch ( const DBException& e ) { string errMsg = str::stream() << "problem querying chunks metadata" << causedBy( e ); // We deliberately do not return connPtr to the pool, since it was involved // with the error here. return Status( ErrorCodes::HostUnreachable, errMsg ); } } Status MetadataLoader::promotePendingChunks( const CollectionMetadata* afterMetadata, CollectionMetadata* remoteMetadata ) const { // Ensure pending chunks are applicable bool notApplicable = ( NULL == afterMetadata || NULL == remoteMetadata ) || ( afterMetadata->getShardVersion() > remoteMetadata->getShardVersion() ) || ( afterMetadata->getShardVersion().epoch() != remoteMetadata->getShardVersion().epoch() ); if ( notApplicable ) return Status::OK(); // The chunks from remoteMetadata are the latest version, and the pending chunks // from afterMetadata are the latest version. If no trickery is afoot, pending chunks // should match exactly zero or one loaded chunk. remoteMetadata->_pendingMap = afterMetadata->_pendingMap; // Resolve our pending chunks against the chunks we've loaded for ( RangeMap::iterator it = remoteMetadata->_pendingMap.begin(); it != remoteMetadata->_pendingMap.end(); ) { if ( !rangeMapOverlaps( remoteMetadata->_chunksMap, it->first, it->second ) ) { ++it; continue; } // Our pending range overlaps at least one chunk if ( rangeMapContains( remoteMetadata->_chunksMap, it->first, it->second ) ) { // Chunk was promoted from pending, successful migration LOG( 2 ) << "verified chunk " << rangeToString( it->first, it->second ) << " was migrated earlier to this shard" << endl; remoteMetadata->_pendingMap.erase( it++ ); } else { // Something strange happened, maybe manual editing of config? RangeVector overlap; getRangeMapOverlap( remoteMetadata->_chunksMap, it->first, it->second, &overlap ); string errMsg = str::stream() << "the remote metadata changed unexpectedly, pending range " << rangeToString( it->first, it->second ) << " does not exactly overlap loaded chunks " << overlapToString( overlap ); return Status( ErrorCodes::RemoteChangeDetected, errMsg ); } } return Status::OK(); } } // namespace mongo