/** * Copyright (C) 2012 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects * for all of the code used other than as permitted herein. If you modify * file(s) with this exception, you may extend this exception to your * version of the file(s), but you are not obligated to do so. If you do not * wish to do so, delete this exception statement from your version. If you * delete this exception statement from all source files in the program, * then also delete it in the license file. */ #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding #include "mongo/platform/basic.h" #include "mongo/db/s/collection_metadata.h" #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/bson/util/builder.h" #include "mongo/s/catalog/type_chunk.h" #include "mongo/stdx/memory.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" namespace mongo { using std::unique_ptr; using std::make_pair; using std::string; using std::vector; using str::stream; CollectionMetadata::CollectionMetadata() : _pendingMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap()), _chunksMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap()), _rangesMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap()) {} CollectionMetadata::CollectionMetadata(const BSONObj& keyPattern, ChunkVersion collectionVersion) : _collVersion(collectionVersion), _shardVersion(ChunkVersion(0, 0, collectionVersion.epoch())), _keyPattern(keyPattern.getOwned()), _pendingMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap()), _chunksMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap()), _rangesMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap()) {} CollectionMetadata::~CollectionMetadata() = default; unique_ptr CollectionMetadata::clonePlusChunk( const BSONObj& minKey, const BSONObj& maxKey, const ChunkVersion& chunkVersion) const { invariant(chunkVersion.epoch() == _shardVersion.epoch()); invariant(chunkVersion.isSet()); invariant(chunkVersion > _shardVersion); invariant(minKey.woCompare(maxKey) < 0); invariant(!rangeMapOverlaps(_chunksMap, minKey, maxKey)); unique_ptr metadata(stdx::make_unique()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_chunksMap = _chunksMap; metadata->_chunksMap.insert( make_pair(minKey.getOwned(), CachedChunkInfo(maxKey.getOwned(), chunkVersion))); metadata->_shardVersion = chunkVersion; metadata->_collVersion = chunkVersion > _collVersion ? chunkVersion : _collVersion; metadata->fillRanges(); invariant(metadata->isValid()); return metadata; } std::unique_ptr CollectionMetadata::cloneMinusPending( const ChunkType& chunk) const { invariant(rangeMapContains(_pendingMap, chunk.getMin(), chunk.getMax())); unique_ptr metadata(stdx::make_unique()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_pendingMap.erase(chunk.getMin()); metadata->_chunksMap = _chunksMap; metadata->_rangesMap = _rangesMap; metadata->_shardVersion = _shardVersion; metadata->_collVersion = _collVersion; invariant(metadata->isValid()); return metadata; } std::unique_ptr CollectionMetadata::clonePlusPending( const ChunkType& chunk) const { invariant(!rangeMapOverlaps(_chunksMap, chunk.getMin(), chunk.getMax())); unique_ptr metadata(stdx::make_unique()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_chunksMap = _chunksMap; metadata->_rangesMap = _rangesMap; metadata->_shardVersion = _shardVersion; metadata->_collVersion = _collVersion; // If there are any pending chunks on the interval to be added this is ok, since pending chunks // aren't officially tracked yet and something may have changed on servers we do not see yet. // // We remove any chunks we overlap because the remote request starting a chunk migration is what // is authoritative. if (rangeMapOverlaps(_pendingMap, chunk.getMin(), chunk.getMax())) { RangeVector pendingOverlap; getRangeMapOverlap(_pendingMap, chunk.getMin(), chunk.getMax(), &pendingOverlap); warning() << "new pending chunk " << redact(rangeToString(chunk.getMin(), chunk.getMax())) << " overlaps existing pending chunks " << redact(overlapToString(pendingOverlap)) << ", a migration may not have completed"; for (RangeVector::iterator it = pendingOverlap.begin(); it != pendingOverlap.end(); ++it) { metadata->_pendingMap.erase(it->first); } } // The pending map entry cannot contain a specific chunk version because we don't know what // version would be generated for it at commit time. That's why we insert an IGNORED value. metadata->_pendingMap.insert( make_pair(chunk.getMin(), CachedChunkInfo(chunk.getMax(), ChunkVersion::IGNORED()))); invariant(metadata->isValid()); return metadata; } bool CollectionMetadata::keyBelongsToMe(const BSONObj& key) const { // For now, collections don't move. So if the collection is not sharded, assume // the document with the given key can be accessed. if (_keyPattern.isEmpty()) { return true; } if (_rangesMap.size() <= 0) { return false; } RangeMap::const_iterator it = _rangesMap.upper_bound(key); if (it != _rangesMap.begin()) it--; return rangeContains(it->first, it->second.getMaxKey(), key); } bool CollectionMetadata::keyIsPending(const BSONObj& key) const { // If we aren't sharded, then the key is never pending (though it belongs-to-me) if (_keyPattern.isEmpty()) { return false; } if (_pendingMap.size() <= 0) { return false; } RangeMap::const_iterator it = _pendingMap.upper_bound(key); if (it != _pendingMap.begin()) it--; bool isPending = rangeContains(it->first, it->second.getMaxKey(), key); return isPending; } bool CollectionMetadata::getNextChunk(const BSONObj& lookupKey, ChunkType* chunk) const { RangeMap::const_iterator upperChunkIt = _chunksMap.upper_bound(lookupKey); RangeMap::const_iterator lowerChunkIt = upperChunkIt; if (upperChunkIt != _chunksMap.begin()) { --lowerChunkIt; } else { lowerChunkIt = _chunksMap.end(); } if (lowerChunkIt != _chunksMap.end() && lowerChunkIt->second.getMaxKey().woCompare(lookupKey) > 0) { chunk->setMin(lowerChunkIt->first); chunk->setMax(lowerChunkIt->second.getMaxKey()); chunk->setVersion(lowerChunkIt->second.getVersion()); return true; } if (upperChunkIt != _chunksMap.end()) { chunk->setMin(upperChunkIt->first); chunk->setMax(upperChunkIt->second.getMaxKey()); chunk->setVersion(upperChunkIt->second.getVersion()); return true; } return false; } bool CollectionMetadata::getDifferentChunk(const BSONObj& chunkMinKey, ChunkType* differentChunk) const { RangeMap::const_iterator upperChunkIt = _chunksMap.end(); RangeMap::const_iterator lowerChunkIt = _chunksMap.begin(); while (lowerChunkIt != upperChunkIt) { if (lowerChunkIt->first.woCompare(chunkMinKey) != 0) { differentChunk->setMin(lowerChunkIt->first); differentChunk->setMax(lowerChunkIt->second.getMaxKey()); differentChunk->setVersion(lowerChunkIt->second.getVersion()); return true; } ++lowerChunkIt; } return false; } Status CollectionMetadata::checkChunkIsValid(const ChunkType& chunk) { ChunkType existingChunk; if (!getNextChunk(chunk.getMin(), &existingChunk)) { return {ErrorCodes::IncompatibleShardingMetadata, str::stream() << "Chunk with bounds " << ChunkRange(chunk.getMin(), chunk.getMax()).toString() << " is not owned by this shard."}; } if (existingChunk.getMin().woCompare(chunk.getMin()) || existingChunk.getMax().woCompare(chunk.getMax())) { return {ErrorCodes::IncompatibleShardingMetadata, str::stream() << "Unable to find chunk with the exact bounds " << ChunkRange(chunk.getMin(), chunk.getMax()).toString() << " at collection version " << getCollVersion().toString()}; } return Status::OK(); } void CollectionMetadata::toBSONBasic(BSONObjBuilder& bb) const { _collVersion.addToBSON(bb, "collVersion"); _shardVersion.addToBSON(bb, "shardVersion"); bb.append("keyPattern", _keyPattern); } void CollectionMetadata::toBSONChunks(BSONArrayBuilder& bb) const { if (_chunksMap.empty()) return; for (RangeMap::const_iterator it = _chunksMap.begin(); it != _chunksMap.end(); ++it) { BSONArrayBuilder chunkBB(bb.subarrayStart()); chunkBB.append(it->first); chunkBB.append(it->second.getMaxKey()); chunkBB.done(); } } void CollectionMetadata::toBSONPending(BSONArrayBuilder& bb) const { if (_pendingMap.empty()) return; for (RangeMap::const_iterator it = _pendingMap.begin(); it != _pendingMap.end(); ++it) { BSONArrayBuilder pendingBB(bb.subarrayStart()); pendingBB.append(it->first); pendingBB.append(it->second.getMaxKey()); pendingBB.done(); } } string CollectionMetadata::toStringBasic() const { return stream() << "collection version: " << _collVersion.toString() << ", shard version: " << _shardVersion.toString(); } bool CollectionMetadata::getNextOrphanRange(const BSONObj& origLookupKey, KeyRange* range) const { if (_keyPattern.isEmpty()) return false; BSONObj lookupKey = origLookupKey; BSONObj maxKey = getMaxKey(); // so we don't keep rebuilding while (lookupKey.woCompare(maxKey) < 0) { RangeMap::const_iterator lowerChunkIt = _chunksMap.end(); RangeMap::const_iterator upperChunkIt = _chunksMap.end(); if (!_chunksMap.empty()) { upperChunkIt = _chunksMap.upper_bound(lookupKey); lowerChunkIt = upperChunkIt; if (upperChunkIt != _chunksMap.begin()) --lowerChunkIt; else lowerChunkIt = _chunksMap.end(); } // If we overlap, continue after the overlap // TODO: Could optimize slightly by finding next non-contiguous chunk if (lowerChunkIt != _chunksMap.end() && lowerChunkIt->second.getMaxKey().woCompare(lookupKey) > 0) { lookupKey = lowerChunkIt->second.getMaxKey(); continue; } RangeMap::const_iterator lowerPendingIt = _pendingMap.end(); RangeMap::const_iterator upperPendingIt = _pendingMap.end(); if (!_pendingMap.empty()) { upperPendingIt = _pendingMap.upper_bound(lookupKey); lowerPendingIt = upperPendingIt; if (upperPendingIt != _pendingMap.begin()) --lowerPendingIt; else lowerPendingIt = _pendingMap.end(); } // If we overlap, continue after the overlap // TODO: Could optimize slightly by finding next non-contiguous chunk if (lowerPendingIt != _pendingMap.end() && lowerPendingIt->second.getMaxKey().woCompare(lookupKey) > 0) { lookupKey = lowerPendingIt->second.getMaxKey(); continue; } // // We know that the lookup key is not covered by a chunk or pending range, and where the // previous chunk and pending chunks are. Now we fill in the bounds as the closest // bounds of the surrounding ranges in both maps. // range->keyPattern = _keyPattern; range->minKey = getMinKey(); range->maxKey = maxKey; if (lowerChunkIt != _chunksMap.end() && lowerChunkIt->second.getMaxKey().woCompare(range->minKey) > 0) { range->minKey = lowerChunkIt->second.getMaxKey(); } if (upperChunkIt != _chunksMap.end() && upperChunkIt->first.woCompare(range->maxKey) < 0) { range->maxKey = upperChunkIt->first; } if (lowerPendingIt != _pendingMap.end() && lowerPendingIt->second.getMaxKey().woCompare(range->minKey) > 0) { range->minKey = lowerPendingIt->second.getMaxKey(); } if (upperPendingIt != _pendingMap.end() && upperPendingIt->first.woCompare(range->maxKey) < 0) { range->maxKey = upperPendingIt->first; } return true; } return false; } BSONObj CollectionMetadata::getMinKey() const { BSONObjIterator it(_keyPattern); BSONObjBuilder minKeyB; while (it.more()) minKeyB << it.next().fieldName() << MINKEY; return minKeyB.obj(); } BSONObj CollectionMetadata::getMaxKey() const { BSONObjIterator it(_keyPattern); BSONObjBuilder maxKeyB; while (it.more()) maxKeyB << it.next().fieldName() << MAXKEY; return maxKeyB.obj(); } bool CollectionMetadata::isValid() const { if (_shardVersion > _collVersion) return false; if (_collVersion.majorVersion() == 0) return false; if (_collVersion.epoch() != _shardVersion.epoch()) return false; if (_shardVersion.majorVersion() > 0) { // Must be chunks if (_rangesMap.size() == 0 || _chunksMap.size() == 0) return false; } else { // No chunks if (_shardVersion.minorVersion() > 0) return false; if (_rangesMap.size() > 0 || _chunksMap.size() > 0) return false; } return true; } bool CollectionMetadata::isValidKey(const BSONObj& key) const { BSONObjIterator it(_keyPattern); while (it.more()) { BSONElement next = it.next(); if (!key.hasField(next.fieldName())) return false; } return key.nFields() == _keyPattern.nFields(); } void CollectionMetadata::fillRanges() { if (_chunksMap.empty()) return; // Load the chunk information, coallesceing their ranges. The version for this shard would be // the highest version for any of the chunks. BSONObj min, max; for (const auto& entry : _chunksMap) { BSONObj currMin = entry.first; BSONObj currMax = entry.second.getMaxKey(); // coalesce the chunk's bounds in ranges if they are adjacent chunks if (min.isEmpty()) { min = currMin; max = currMax; continue; } if (SimpleBSONObjComparator::kInstance.evaluate(max == currMin)) { max = currMax; continue; } _rangesMap.insert(make_pair(min, CachedChunkInfo(max, ChunkVersion::IGNORED()))); min = currMin; max = currMax; } invariant(!min.isEmpty()); invariant(!max.isEmpty()); _rangesMap.insert(make_pair(min, CachedChunkInfo(max, ChunkVersion::IGNORED()))); } void CollectionMetadata::fillKeyPatternFields() { // Parse the shard keys into the states 'keys' and 'keySet' members. BSONObjIterator patternIter = _keyPattern.begin(); while (patternIter.more()) { BSONElement current = patternIter.next(); _keyFields.mutableVector().push_back(new FieldRef); FieldRef* const newFieldRef = _keyFields.mutableVector().back(); newFieldRef->parse(current.fieldNameStringData()); } } } // namespace mongo