summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2016-07-18 16:13:54 -0400
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2016-07-21 22:14:51 -0400
commitea026e685bc90c102e2305e21b8bdc096475b49b (patch)
tree9399e67cc27fd5700892ae5b28ec4aca6912440a /src/mongo
parent0c8085729e7062202bd66076e2ca7751aa338ab6 (diff)
downloadmongo-ea026e685bc90c102e2305e21b8bdc096475b49b.tar.gz
SERVER-24569 Maintain rangesToClean and metadataInUse on chunk migrations
This change rewrites the collection metadata refresh mechanism and puts it entirely under the metadata manager.
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/s/collection_metadata.cpp38
-rw-r--r--src/mongo/db/s/collection_metadata.h18
-rw-r--r--src/mongo/db/s/collection_sharding_state.cpp21
-rw-r--r--src/mongo/db/s/collection_sharding_state.h23
-rw-r--r--src/mongo/db/s/get_shard_version_command.cpp17
-rw-r--r--src/mongo/db/s/merge_chunks_command.cpp4
-rw-r--r--src/mongo/db/s/metadata_loader.cpp50
-rw-r--r--src/mongo/db/s/metadata_loader.h25
-rw-r--r--src/mongo/db/s/metadata_loader_test.cpp198
-rw-r--r--src/mongo/db/s/metadata_manager.cpp260
-rw-r--r--src/mongo/db/s/metadata_manager.h48
-rw-r--r--src/mongo/db/s/metadata_manager_test.cpp201
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp12
-rw-r--r--src/mongo/db/s/migration_source_manager.cpp10
-rw-r--r--src/mongo/db/s/sharding_state.cpp372
-rw-r--r--src/mongo/db/s/sharding_state.h17
-rw-r--r--src/mongo/db/s/split_chunk_command.cpp6
-rw-r--r--src/mongo/shell/shardingtest.js6
18 files changed, 584 insertions, 742 deletions
diff --git a/src/mongo/db/s/collection_metadata.cpp b/src/mongo/db/s/collection_metadata.cpp
index 8b713dd0cbd..1c576851f11 100644
--- a/src/mongo/db/s/collection_metadata.cpp
+++ b/src/mongo/db/s/collection_metadata.cpp
@@ -393,10 +393,10 @@ bool CollectionMetadata::getDifferentChunk(const BSONObj& chunkMinKey,
return false;
}
-BSONObj CollectionMetadata::toBSON() const {
- BSONObjBuilder bb;
- toBSON(bb);
- return bb.obj();
+void CollectionMetadata::toBSONBasic(BSONObjBuilder& bb) const {
+ _collVersion.addToBSON(bb, "collVersion");
+ _shardVersion.addToBSON(bb, "shardVersion");
+ bb.append("keyPattern", _keyPattern);
}
void CollectionMetadata::toBSONChunks(BSONArrayBuilder& bb) const {
@@ -423,18 +423,9 @@ void CollectionMetadata::toBSONPending(BSONArrayBuilder& bb) const {
}
}
-void CollectionMetadata::toBSON(BSONObjBuilder& bb) const {
- _collVersion.addToBSON(bb, "collVersion");
- _shardVersion.addToBSON(bb, "shardVersion");
- bb.append("keyPattern", _keyPattern);
-
- BSONArrayBuilder chunksBB(bb.subarrayStart("chunks"));
- toBSONChunks(chunksBB);
- chunksBB.done();
-
- BSONArrayBuilder pendingBB(bb.subarrayStart("pending"));
- toBSONPending(pendingBB);
- pendingBB.done();
+string CollectionMetadata::toStringBasic() const {
+ return stream() << "Coll version: " << _collVersion.toString()
+ << ", shard version: " << _shardVersion.toString();
}
bool CollectionMetadata::getNextOrphanRange(const BSONObj& origLookupKey, KeyRange* range) const {
@@ -517,21 +508,6 @@ bool CollectionMetadata::getNextOrphanRange(const BSONObj& origLookupKey, KeyRan
return false;
}
-string CollectionMetadata::toString() const {
- StringBuilder ss;
- ss << " CollectionManager version: " << _shardVersion.toString() << " key: " << _keyPattern;
- if (_rangesMap.empty()) {
- return ss.str();
- }
-
- RangeMap::const_iterator it = _rangesMap.begin();
- ss << it->first << " -> " << it->second;
- while (it != _rangesMap.end()) {
- ss << ", " << it->first << " -> " << it->second;
- }
- return ss.str();
-}
-
BSONObj CollectionMetadata::getMinKey() const {
BSONObjIterator it(_keyPattern);
BSONObjBuilder minKeyB;
diff --git a/src/mongo/db/s/collection_metadata.h b/src/mongo/db/s/collection_metadata.h
index df0212ca6a4..d15805af20b 100644
--- a/src/mongo/db/s/collection_metadata.h
+++ b/src/mongo/db/s/collection_metadata.h
@@ -175,6 +175,10 @@ public:
return _shardVersion;
}
+ const RangeMap& getChunks() const {
+ return _chunksMap;
+ }
+
BSONObj getKeyPattern() const {
return _keyPattern;
}
@@ -196,14 +200,9 @@ public:
}
/**
- * BSON output of the metadata information.
+ * BSON output of the basic metadata information (chunk and shard version).
*/
- BSONObj toBSON() const;
-
- /**
- * BSON output of the metadata information, into a builder.
- */
- void toBSON(BSONObjBuilder& bb) const;
+ void toBSONBasic(BSONObjBuilder& bb) const;
/**
* BSON output of the chunks metadata into a BSONArray
@@ -215,10 +214,7 @@ public:
*/
void toBSONPending(BSONArrayBuilder& bb) const;
- /**
- * std::string output of the metadata information.
- */
- std::string toString() const;
+ std::string toStringBasic() const;
/**
* This method is used only for unit-tests and it returns a new metadata's instance based on
diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp
index 9ccf10791e9..d8daa704a2c 100644
--- a/src/mongo/db/s/collection_sharding_state.cpp
+++ b/src/mongo/db/s/collection_sharding_state.cpp
@@ -107,7 +107,7 @@ private:
CollectionShardingState::CollectionShardingState(
NamespaceString nss, std::unique_ptr<CollectionMetadata> initialMetadata)
: _nss(std::move(nss)), _metadataManager{} {
- _metadataManager.setActiveMetadata(std::move(initialMetadata));
+ _metadataManager.refreshActiveMetadata(std::move(initialMetadata));
}
CollectionShardingState::~CollectionShardingState() {
@@ -132,12 +132,19 @@ ScopedCollectionMetadata CollectionShardingState::getMetadata() {
return _metadataManager.getActiveMetadata();
}
-void CollectionShardingState::setMetadata(std::unique_ptr<CollectionMetadata> newMetadata) {
- if (newMetadata) {
- invariant(!newMetadata->getCollVersion().isWriteCompatibleWith(ChunkVersion::UNSHARDED()));
- invariant(!newMetadata->getShardVersion().isWriteCompatibleWith(ChunkVersion::UNSHARDED()));
- }
- _metadataManager.setActiveMetadata(std::move(newMetadata));
+void CollectionShardingState::refreshMetadata(OperationContext* txn,
+ std::unique_ptr<CollectionMetadata> newMetadata) {
+ invariant(txn->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_X));
+
+ _metadataManager.refreshActiveMetadata(std::move(newMetadata));
+}
+
+void CollectionShardingState::beginReceive(const ChunkRange& range) {
+ _metadataManager.beginReceive(range);
+}
+
+void CollectionShardingState::forgetReceive(const ChunkRange& range) {
+ _metadataManager.forgetReceive(range);
}
MigrationSourceManager* CollectionShardingState::getMigrationSourceManager() {
diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h
index ba482b15c74..f4ebb922718 100644
--- a/src/mongo/db/s/collection_sharding_state.h
+++ b/src/mongo/db/s/collection_sharding_state.h
@@ -79,9 +79,27 @@ public:
ScopedCollectionMetadata getMetadata();
/**
- * Set a new metadata to be used for this collection.
+ * Updates the metadata based on changes received from the config server and also resolves the
+ * pending receives map in case some of these pending receives have completed or have been
+ * abandoned.
+ *
+ * Must always be called with an exclusive collection lock.
+ */
+ void refreshMetadata(OperationContext* txn, std::unique_ptr<CollectionMetadata> newMetadata);
+
+ /**
+ * Modifies the collection's sharding state to indicate that it is beginning to receive the
+ * given ChunkRange.
+ */
+ void beginReceive(const ChunkRange& range);
+
+ /*
+ * Modifies the collection's sharding state to indicate that the previous pending migration
+ * failed. If the range was not previously pending, this function will crash the server.
+ *
+ * This function is the mirror image of beginReceive.
*/
- void setMetadata(std::unique_ptr<CollectionMetadata> newMetadata);
+ void forgetReceive(const ChunkRange& range);
/**
* Returns the active migration source manager, if one is available.
@@ -146,6 +164,7 @@ private:
// Namespace to which this state belongs.
const NamespaceString _nss;
+ // Contains all the metadata associated with this collection.
MetadataManager _metadataManager;
// If this collection is serving as a source shard for chunk migration, this value will be
diff --git a/src/mongo/db/s/get_shard_version_command.cpp b/src/mongo/db/s/get_shard_version_command.cpp
index 5e5e1edef97..4420a064fae 100644
--- a/src/mongo/db/s/get_shard_version_command.cpp
+++ b/src/mongo/db/s/get_shard_version_command.cpp
@@ -45,9 +45,6 @@
#include "mongo/util/stringutils.h"
namespace mongo {
-
-using std::shared_ptr;
-
namespace {
class GetShardVersion : public Command {
@@ -126,11 +123,19 @@ public:
}
if (cmdObj["fullMetadata"].trueValue()) {
+ BSONObjBuilder metadataBuilder(result.subobjStart("metadata"));
if (metadata) {
- result.append("metadata", metadata->toBSON());
- } else {
- result.append("metadata", BSONObj());
+ metadata->toBSONBasic(metadataBuilder);
+
+ BSONArrayBuilder chunksArr(metadataBuilder.subarrayStart("chunks"));
+ metadata->toBSONChunks(chunksArr);
+ chunksArr.doneFast();
+
+ BSONArrayBuilder pendingArr(metadataBuilder.subarrayStart("pending"));
+ metadata->toBSONPending(pendingArr);
+ pendingArr.doneFast();
}
+ metadataBuilder.doneFast();
}
return true;
diff --git a/src/mongo/db/s/merge_chunks_command.cpp b/src/mongo/db/s/merge_chunks_command.cpp
index f996fda63e9..510f816d56e 100644
--- a/src/mongo/db/s/merge_chunks_command.cpp
+++ b/src/mongo/db/s/merge_chunks_command.cpp
@@ -341,8 +341,8 @@ bool mergeChunks(OperationContext* txn,
auto css = CollectionShardingState::get(txn, nss);
std::unique_ptr<CollectionMetadata> cloned(
- uassertStatusOK(css->getMetadata()->cloneMerge(minKey, maxKey, mergeVersion)));
- css->setMetadata(std::move(cloned));
+ fassertStatusOK(40222, css->getMetadata()->cloneMerge(minKey, maxKey, mergeVersion)));
+ css->refreshMetadata(txn, std::move(cloned));
}
//
diff --git a/src/mongo/db/s/metadata_loader.cpp b/src/mongo/db/s/metadata_loader.cpp
index 8bc72af7f60..876217c5ec2 100644
--- a/src/mongo/db/s/metadata_loader.cpp
+++ b/src/mongo/db/s/metadata_loader.cpp
@@ -170,7 +170,6 @@ Status MetadataLoader::initChunks(OperationContext* txn,
}
}
-
// Exposes the new metadata's range map and version to the "differ," who
// would ultimately be responsible of filling them up.
SCMConfigDiffTracker differ(shard);
@@ -252,53 +251,4 @@ Status MetadataLoader::initChunks(OperationContext* txn,
}
}
-Status MetadataLoader::promotePendingChunks(const CollectionMetadata* afterMetadata,
- CollectionMetadata* remoteMetadata) const {
- // Ensure pending chunks are applicable
- bool notApplicable = (NULL == afterMetadata || NULL == remoteMetadata) ||
- (afterMetadata->getShardVersion() > remoteMetadata->getShardVersion()) ||
- (afterMetadata->getShardVersion().epoch() != remoteMetadata->getShardVersion().epoch());
- if (notApplicable)
- return Status::OK();
-
- // The chunks from remoteMetadata are the latest version, and the pending chunks
- // from afterMetadata are the latest version. If no trickery is afoot, pending chunks
- // should match exactly zero or one loaded chunk.
-
- remoteMetadata->_pendingMap = afterMetadata->_pendingMap;
-
- // Resolve our pending chunks against the chunks we've loaded
- for (RangeMap::iterator it = remoteMetadata->_pendingMap.begin();
- it != remoteMetadata->_pendingMap.end();) {
- if (!rangeMapOverlaps(remoteMetadata->_chunksMap, it->first, it->second)) {
- ++it;
- continue;
- }
-
- // Our pending range overlaps at least one chunk
-
- if (rangeMapContains(remoteMetadata->_chunksMap, it->first, it->second)) {
- // Chunk was promoted from pending, successful migration
- LOG(2) << "verified chunk " << rangeToString(it->first, it->second)
- << " was migrated earlier to this shard";
-
- remoteMetadata->_pendingMap.erase(it++);
- } else {
- // Something strange happened, maybe manual editing of config?
- RangeVector overlap;
- getRangeMapOverlap(remoteMetadata->_chunksMap, it->first, it->second, &overlap);
-
- string errMsg = str::stream()
- << "the remote metadata changed unexpectedly, pending range "
- << rangeToString(it->first, it->second)
- << " does not exactly overlap loaded chunks " << overlapToString(overlap);
-
- return Status(ErrorCodes::RemoteChangeDetected, errMsg);
- }
- }
-
- return Status::OK();
-}
-
-
} // namespace mongo
diff --git a/src/mongo/db/s/metadata_loader.h b/src/mongo/db/s/metadata_loader.h
index bcaae5f4b9f..223a3bd08a5 100644
--- a/src/mongo/db/s/metadata_loader.h
+++ b/src/mongo/db/s/metadata_loader.h
@@ -57,7 +57,6 @@ class OperationContext;
* remoteMetadata = makeCollectionMetadata( beforeMetadata, remoteMetadata );
* DBLock lock(txn, dbname, MODE_X);
* afterMetadata = <get latest local metadata>;
- * promotePendingChunks( afterMetadata, remoteMetadata );
*
* The loader will go out of its way to try to fetch the smaller amount possible of data
* from the config server without sacrificing the freshness and accuracy of the metadata it
@@ -97,30 +96,6 @@ public:
const CollectionMetadata* oldMetadata,
CollectionMetadata* metadata) const;
- /**
- * Replaces the pending chunks of the remote metadata with the more up-to-date pending
- * chunks of the 'after' metadata (metadata from after the remote load), and removes pending
- * chunks which are now regular chunks.
- *
- * Pending chunks should always correspond to one or zero chunks in the remoteMetadata
- * if the epochs are the same and the remote version is the same or higher, otherwise they
- * are not applicable.
- *
- * Locking note:
- * + Must be called in a DBLock, to ensure validity of afterMetadata
- *
- * Returns OK if pending chunks correctly follow the rule above or are not applicable
- * Returns RemoteChangeDetected if pending chunks do not follow the rule above, indicating
- * either the config server or us has changed unexpectedly.
- * This should only occur with manual editing of the config
- * server.
- *
- * TODO: This is a bit ugly but necessary for now. If/when pending chunk info is stored on
- * the config server, this should go away.
- */
- Status promotePendingChunks(const CollectionMetadata* afterMetadata,
- CollectionMetadata* remoteMetadata) const;
-
private:
/**
* Returns OK and fills in the internal state of 'metadata' with general collection
diff --git a/src/mongo/db/s/metadata_loader_test.cpp b/src/mongo/db/s/metadata_loader_test.cpp
index e33e3142d1a..aef88ecc380 100644
--- a/src/mongo/db/s/metadata_loader_test.cpp
+++ b/src/mongo/db/s/metadata_loader_test.cpp
@@ -421,203 +421,5 @@ TEST_F(MetadataLoaderFixture, NoChunks) {
future.timed_get(kFutureTimeout);
}
-TEST_F(MetadataLoaderFixture, PromotePendingNA) {
- unique_ptr<ChunkType> chunk(new ChunkType());
- chunk->setNS("foo.bar");
- chunk->setShard(ShardId("shard0000"));
- chunk->setMin(BSON("x" << MINKEY));
- chunk->setMax(BSON("x" << 0));
- chunk->setVersion(ChunkVersion(1, 0, OID::gen()));
-
- OwnedPointerVector<ChunkType> chunks;
- chunks.mutableVector().push_back(chunk.release());
-
- CollectionMetadata afterMetadata;
- getMetadataFor(chunks, &afterMetadata);
-
- // Metadata of different epoch
- (*chunks.vector().begin())->setVersion(ChunkVersion(1, 0, OID::gen()));
-
- CollectionMetadata remoteMetadata;
- getMetadataFor(chunks, &remoteMetadata);
-
- Status status = loader().promotePendingChunks(&afterMetadata, &remoteMetadata);
- ASSERT_OK(status);
-
- ChunkType pending;
- pending.setMin(BSON("x" << 0));
- pending.setMax(BSON("x" << 10));
-
- unique_ptr<CollectionMetadata> cloned(afterMetadata.clonePlusPending(pending));
- status = loader().promotePendingChunks(cloned.get(), &remoteMetadata);
- ASSERT_OK(status);
- ASSERT_EQUALS(remoteMetadata.getNumPending(), 0u);
-}
-
-TEST_F(MetadataLoaderFixture, PromotePendingNAVersion) {
- OID epoch = OID::gen();
-
- unique_ptr<ChunkType> chunk(new ChunkType());
- chunk->setNS("foo.bar");
- chunk->setShard(ShardId("shard0000"));
- chunk->setMin(BSON("x" << MINKEY));
- chunk->setMax(BSON("x" << 0));
- chunk->setVersion(ChunkVersion(1, 1, epoch));
-
- OwnedPointerVector<ChunkType> chunks;
- chunks.mutableVector().push_back(chunk.release());
-
- CollectionMetadata afterMetadata;
- getMetadataFor(chunks, &afterMetadata);
-
- // Metadata of same epoch, but lower version
- (*chunks.vector().begin())->setVersion(ChunkVersion(1, 0, epoch));
-
- CollectionMetadata remoteMetadata;
- getMetadataFor(chunks, &remoteMetadata);
-
- Status status = loader().promotePendingChunks(&afterMetadata, &remoteMetadata);
- ASSERT_OK(status);
-
- ChunkType pending;
- pending.setMin(BSON("x" << 0));
- pending.setMax(BSON("x" << 10));
-
- unique_ptr<CollectionMetadata> cloned(afterMetadata.clonePlusPending(pending));
- status = loader().promotePendingChunks(cloned.get(), &remoteMetadata);
- ASSERT_OK(status);
- ASSERT_EQUALS(remoteMetadata.getNumPending(), 0u);
-}
-
-TEST_F(MetadataLoaderFixture, PromotePendingGoodOverlap) {
- OID epoch = OID::gen();
-
- //
- // Setup chunk range for remote metadata
- //
-
- OwnedPointerVector<ChunkType> chunks;
-
- unique_ptr<ChunkType> chunk(new ChunkType());
- chunk->setNS("foo.bar");
- chunk->setShard(ShardId("shard0000"));
- chunk->setMin(BSON("x" << MINKEY));
- chunk->setMax(BSON("x" << 0));
- chunk->setVersion(ChunkVersion(1, 0, epoch));
- chunks.mutableVector().push_back(chunk.release());
-
- chunk.reset(new ChunkType());
- chunk->setNS("foo.bar");
- chunk->setShard(ShardId("shard0000"));
- chunk->setMin(BSON("x" << 10));
- chunk->setMax(BSON("x" << 20));
- chunks.mutableVector().push_back(chunk.release());
-
- chunk.reset(new ChunkType());
- chunk->setNS("foo.bar");
- chunk->setShard(ShardId("shard0000"));
- chunk->setMin(BSON("x" << 30));
- chunk->setMax(BSON("x" << MAXKEY));
- chunks.mutableVector().push_back(chunk.release());
-
- CollectionMetadata remoteMetadata;
- getMetadataFor(chunks, &remoteMetadata);
-
- //
- // Setup chunk and pending range for afterMetadata
- //
-
- chunks.clear();
-
- chunk.reset(new ChunkType());
- chunk->setNS("foo.bar");
- chunk->setShard(ShardId("shard0000"));
- chunk->setMin(BSON("x" << 0));
- chunk->setMax(BSON("x" << 10));
- chunk->setVersion(ChunkVersion(1, 0, epoch));
-
- chunks.mutableVector().push_back(chunk.release());
-
- CollectionMetadata afterMetadata;
- getMetadataFor(chunks, &afterMetadata);
-
- ChunkType pending;
- pending.setMin(BSON("x" << MINKEY));
- pending.setMax(BSON("x" << 0));
-
- unique_ptr<CollectionMetadata> cloned(afterMetadata.clonePlusPending(pending));
-
- pending.setMin(BSON("x" << 10));
- pending.setMax(BSON("x" << 20));
-
- cloned = cloned->clonePlusPending(pending);
-
- pending.setMin(BSON("x" << 20));
- pending.setMax(BSON("x" << 30));
-
- cloned = cloned->clonePlusPending(pending);
-
- pending.setMin(BSON("x" << 30));
- pending.setMax(BSON("x" << MAXKEY));
-
- cloned = cloned->clonePlusPending(pending);
-
- Status status = loader().promotePendingChunks(cloned.get(), &remoteMetadata);
- ASSERT_OK(status);
-
- ASSERT_EQUALS(remoteMetadata.getNumPending(), 1u);
- ASSERT(remoteMetadata.keyIsPending(BSON("x" << 25)));
-}
-
-TEST_F(MetadataLoaderFixture, PromotePendingBadOverlap) {
- OID epoch = OID::gen();
-
- //
- // Setup chunk range for remote metadata
- //
-
- OwnedPointerVector<ChunkType> chunks;
-
- unique_ptr<ChunkType> chunk(new ChunkType());
- chunk->setNS("foo.bar");
- chunk->setShard(ShardId("shard0000"));
- chunk->setMin(BSON("x" << MINKEY));
- chunk->setMax(BSON("x" << 0));
- chunk->setVersion(ChunkVersion(1, 0, epoch));
-
- chunks.mutableVector().push_back(chunk.release());
-
- CollectionMetadata remoteMetadata;
- getMetadataFor(chunks, &remoteMetadata);
-
- //
- // Setup chunk and pending range for afterMetadata
- //
-
- chunks.clear();
-
- chunk.reset(new ChunkType());
- chunk->setNS("foo.bar");
- chunk->setShard(ShardId("shard0000"));
- chunk->setMin(BSON("x" << 15));
- chunk->setMax(BSON("x" << MAXKEY));
- chunk->setVersion(ChunkVersion(1, 0, epoch));
-
- chunks.mutableVector().push_back(chunk.release());
-
- CollectionMetadata afterMetadata;
- getMetadataFor(chunks, &afterMetadata);
-
- ChunkType pending;
- pending.setMin(BSON("x" << MINKEY));
- pending.setMax(BSON("x" << 1));
-
- unique_ptr<CollectionMetadata> cloned(afterMetadata.clonePlusPending(pending));
- cloned = cloned->clonePlusPending(pending);
-
- Status status = loader().promotePendingChunks(cloned.get(), &remoteMetadata);
- ASSERT_EQUALS(status.code(), ErrorCodes::RemoteChangeDetected);
-}
-
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp
index e815e2ef50d..f5b29f3aee2 100644
--- a/src/mongo/db/s/metadata_manager.cpp
+++ b/src/mongo/db/s/metadata_manager.cpp
@@ -26,11 +26,14 @@
* it in the license file.
*/
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
#include "mongo/platform/basic.h"
+#include "mongo/db/range_arithmetic.h"
#include "mongo/db/s/metadata_manager.h"
-
#include "mongo/stdx/memory.h"
+#include "mongo/util/log.h"
namespace mongo {
@@ -51,11 +54,185 @@ ScopedCollectionMetadata MetadataManager::getActiveMetadata() {
return ScopedCollectionMetadata(this, _activeMetadataTracker.get());
}
-void MetadataManager::setActiveMetadata(std::unique_ptr<CollectionMetadata> newMetadata) {
- invariant(!newMetadata || newMetadata->isValid());
+void MetadataManager::refreshActiveMetadata(std::unique_ptr<CollectionMetadata> remoteMetadata) {
+ LOG(1) << "Refreshing the active metadata from "
+ << (_activeMetadataTracker->metadata ? _activeMetadataTracker->metadata->toStringBasic()
+ : "(empty)")
+ << ", to " << (remoteMetadata ? remoteMetadata->toStringBasic() : "(empty)");
stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
+ // Collection is not sharded anymore
+ if (!remoteMetadata) {
+ log() << "Marking collection as not sharded.";
+
+ _receivingChunks.clear();
+ _rangesToClean.clear();
+
+ _setActiveMetadata_inlock(nullptr);
+ return;
+ }
+
+ invariant(!remoteMetadata->getCollVersion().isWriteCompatibleWith(ChunkVersion::UNSHARDED()));
+ invariant(!remoteMetadata->getShardVersion().isWriteCompatibleWith(ChunkVersion::UNSHARDED()));
+
+ // Collection is not sharded currently
+ if (!_activeMetadataTracker->metadata) {
+ log() << "Marking collection as sharded with version " << remoteMetadata->toStringBasic();
+
+ invariant(_receivingChunks.empty());
+ invariant(_rangesToClean.empty());
+
+ _setActiveMetadata_inlock(std::move(remoteMetadata));
+ return;
+ }
+
+ // If the metadata being installed has a different epoch from ours, this means the collection
+ // was dropped and recreated, so we must entirely reset the metadata state
+ if (_activeMetadataTracker->metadata->getCollVersion().epoch() !=
+ remoteMetadata->getCollVersion().epoch()) {
+ log() << "Overwriting collection metadata due to epoch change.";
+
+ _receivingChunks.clear();
+ _rangesToClean.clear();
+
+ _setActiveMetadata_inlock(std::move(remoteMetadata));
+ return;
+ }
+
+ // We already have newer version
+ if (_activeMetadataTracker->metadata->getCollVersion() >= remoteMetadata->getCollVersion()) {
+ LOG(1) << "Attempted to refresh active metadata "
+ << _activeMetadataTracker->metadata->toStringBasic() << " with an older version "
+ << remoteMetadata->toStringBasic();
+
+ return;
+ }
+
+ // Resolve any receiving chunks, which might have completed by now
+ for (auto it = _receivingChunks.begin(); it != _receivingChunks.end();) {
+ const BSONObj min = it->first;
+ const BSONObj max = it->second;
+
+ // Our pending range overlaps at least one chunk
+ if (rangeMapContains(remoteMetadata->getChunks(), min, max)) {
+ // The remote metadata contains a chunk we were earlier in the process of receiving, so
+ // we deem it successfully received.
+ LOG(2) << "Verified chunk " << ChunkRange(min, max).toString()
+ << " was migrated earlier to this shard";
+
+ _receivingChunks.erase(it++);
+ continue;
+ } else if (!rangeMapOverlaps(remoteMetadata->getChunks(), min, max)) {
+ ++it;
+ continue;
+ }
+
+ // Partial overlap indicates that the earlier migration has failed, but the chunk being
+ // migrated underwent some splits and other migrations and ended up here again. In this
+ // case, we will request full reload of the metadata. Currently this cannot happen, because
+ // all migrations are with the explicit knowledge of the recipient shard. However, we leave
+ // the option open so that chunk splits can do empty chunk move without having to notify the
+ // recipient.
+ RangeVector overlappedChunks;
+ getRangeMapOverlap(remoteMetadata->getChunks(), min, max, &overlappedChunks);
+
+ for (const auto& overlapChunkMin : overlappedChunks) {
+ auto itRecv = _receivingChunks.find(overlapChunkMin.first);
+ invariant(itRecv != _receivingChunks.end());
+
+ const ChunkRange receivingRange(itRecv->first, itRecv->second);
+
+ _receivingChunks.erase(itRecv);
+
+ // Make sure any potentially partially copied chunks are scheduled to be cleaned up
+ _addRangeToClean_inlock(receivingRange);
+ }
+
+ // Need to reset the iterator
+ it = _receivingChunks.begin();
+ }
+
+ // For compatibility with the current range deleter, which is driven entirely by the contents of
+ // the CollectionMetadata update the pending chunks
+ for (const auto& receivingChunk : _receivingChunks) {
+ ChunkType chunk;
+ chunk.setMin(receivingChunk.first);
+ chunk.setMax(receivingChunk.second);
+ remoteMetadata = remoteMetadata->clonePlusPending(chunk);
+ }
+
+ _setActiveMetadata_inlock(std::move(remoteMetadata));
+}
+
+void MetadataManager::beginReceive(const ChunkRange& range) {
+ stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
+
+ // Collection is not known to be sharded if the active metadata tracker is null
+ invariant(_activeMetadataTracker);
+
+ // If range is contained within pending chunks, this means a previous migration must have failed
+ // and we need to clean all overlaps
+ RangeVector overlappedChunks;
+ getRangeMapOverlap(_receivingChunks, range.getMin(), range.getMax(), &overlappedChunks);
+
+ for (const auto& overlapChunkMin : overlappedChunks) {
+ auto itRecv = _receivingChunks.find(overlapChunkMin.first);
+ invariant(itRecv != _receivingChunks.end());
+
+ const ChunkRange receivingRange(itRecv->first, itRecv->second);
+
+ _receivingChunks.erase(itRecv);
+
+ // Make sure any potentially partially copied chunks are scheduled to be cleaned up
+ _addRangeToClean_inlock(receivingRange);
+ }
+
+ // Need to ensure that the background range deleter task won't delete the range we are about to
+ // receive
+ _removeRangeToClean_inlock(range);
+ _receivingChunks.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned()));
+
+ // For compatibility with the current range deleter, update the pending chunks on the collection
+ // metadata to include the chunk being received
+ ChunkType chunk;
+ chunk.setMin(range.getMin());
+ chunk.setMax(range.getMax());
+ _setActiveMetadata_inlock(_activeMetadataTracker->metadata->clonePlusPending(chunk));
+}
+
+void MetadataManager::forgetReceive(const ChunkRange& range) {
+ stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
+
+ {
+ auto it = _receivingChunks.find(range.getMin());
+ invariant(it != _receivingChunks.end());
+
+ // Verify entire ChunkRange is identical, not just the min key.
+ invariant(it->second == range.getMax());
+
+ _receivingChunks.erase(it);
+ }
+
+ // This is potentially a partially received data, which needs to be cleaned up
+ _addRangeToClean_inlock(range);
+
+ // For compatibility with the current range deleter, update the pending chunks on the collection
+ // metadata to exclude the chunk being received, which was added in beginReceive
+ ChunkType chunk;
+ chunk.setMin(range.getMin());
+ chunk.setMax(range.getMax());
+ _setActiveMetadata_inlock(_activeMetadataTracker->metadata->cloneMinusPending(chunk));
+}
+
+RangeMap MetadataManager::getCopyOfReceivingChunks() {
+ stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
+ return _receivingChunks;
+}
+
+void MetadataManager::_setActiveMetadata_inlock(std::unique_ptr<CollectionMetadata> newMetadata) {
+ invariant(!newMetadata || newMetadata->isValid());
+
if (_activeMetadataTracker->usageCounter > 0) {
_metadataInUse.push_front(std::move(_activeMetadataTracker));
}
@@ -80,7 +257,7 @@ void MetadataManager::_removeMetadata_inlock(CollectionMetadataTracker* metadata
MetadataManager::CollectionMetadataTracker::CollectionMetadataTracker(
std::unique_ptr<CollectionMetadata> m)
- : metadata(std::move(m)), usageCounter(0){};
+ : metadata(std::move(m)) {}
ScopedCollectionMetadata::ScopedCollectionMetadata() = default;
@@ -129,7 +306,7 @@ ScopedCollectionMetadata::operator bool() const {
return _tracker && _tracker->metadata.get();
}
-std::map<BSONObj, ChunkRange> MetadataManager::getCopyOfRanges() {
+RangeMap MetadataManager::getCopyOfRangesToClean() {
stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
return _rangesToClean;
}
@@ -140,31 +317,9 @@ void MetadataManager::addRangeToClean(const ChunkRange& range) {
}
void MetadataManager::_addRangeToClean_inlock(const ChunkRange& range) {
- auto itLow = _rangesToClean.upper_bound(range.getMin());
- if (itLow != _rangesToClean.begin()) {
- --itLow;
- }
-
- if (itLow != _rangesToClean.end()) {
- const ChunkRange& cr = itLow->second;
- if (cr.getMin() < range.getMin()) {
- // Checks that there is no overlap between range and any other ChunkRange
- // Specifically, checks that the greatest chunk less than or equal to range, if such a
- // chunk exists, does not overlap with the min of range.
- invariant(cr.getMax() <= range.getMin());
- }
- }
-
- auto itHigh = _rangesToClean.lower_bound(range.getMin());
- if (itHigh != _rangesToClean.end()) {
- const ChunkRange& cr = itHigh->second;
- // Checks that there is no overlap between range and any other ChunkRange
- // Specifically, checks that the least chunk greater than or equal to range
- // does not overlap with the max of range.
- invariant(cr.getMin() >= range.getMax());
- }
-
- _rangesToClean.insert(std::make_pair(range.getMin(), range));
+ invariant(!rangeMapOverlaps(_rangesToClean, range.getMin(), range.getMax()));
+ invariant(!rangeMapOverlaps(_receivingChunks, range.getMin(), range.getMax()));
+ _rangesToClean.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned()));
}
void MetadataManager::removeRangeToClean(const ChunkRange& range) {
@@ -180,34 +335,55 @@ void MetadataManager::_removeRangeToClean_inlock(const ChunkRange& range) {
--it;
}
- for (; it != _rangesToClean.end() && it->second.getMin() < range.getMax();) {
- if (it->second.getMax() <= range.getMin()) {
+ for (; it != _rangesToClean.end() && it->first < range.getMax();) {
+ if (it->second <= range.getMin()) {
++it;
continue;
}
+
// There's overlap between *it and range so we remove *it
// and then replace with new ranges.
- ChunkRange oldChunk = it->second;
+ BSONObj oldMin = it->first, oldMax = it->second;
_rangesToClean.erase(it++);
- if (oldChunk.getMin() < range.getMin()) {
- ChunkRange newChunk = ChunkRange(oldChunk.getMin(), range.getMin());
- _addRangeToClean_inlock(newChunk);
+ if (oldMin < range.getMin()) {
+ _addRangeToClean_inlock(ChunkRange(oldMin, range.getMin()));
}
- if (oldChunk.getMax() > range.getMax()) {
- ChunkRange newChunk = ChunkRange(range.getMax(), oldChunk.getMax());
- _addRangeToClean_inlock(newChunk);
+
+ if (oldMax > range.getMax()) {
+ _addRangeToClean_inlock(ChunkRange(range.getMax(), oldMax));
}
}
}
void MetadataManager::append(BSONObjBuilder* builder) {
- BSONArrayBuilder arr(builder->subarrayStart("rangesToClean"));
stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
+
+ BSONArrayBuilder rtcArr(builder->subarrayStart("rangesToClean"));
for (const auto& entry : _rangesToClean) {
BSONObjBuilder obj;
- entry.second.append(&obj);
- arr.append(obj.done());
+ ChunkRange r = ChunkRange(entry.first, entry.second);
+ r.append(&obj);
+ rtcArr.append(obj.done());
+ }
+ rtcArr.done();
+
+ BSONArrayBuilder pcArr(builder->subarrayStart("pendingChunks"));
+ for (const auto& entry : _receivingChunks) {
+ BSONObjBuilder obj;
+ ChunkRange r = ChunkRange(entry.first, entry.second);
+ r.append(&obj);
+ pcArr.append(obj.done());
+ }
+ pcArr.done();
+
+ BSONArrayBuilder amrArr(builder->subarrayStart("activeMetadataRanges"));
+ for (const auto& entry : _activeMetadataTracker->metadata->getChunks()) {
+ BSONObjBuilder obj;
+ ChunkRange r = ChunkRange(entry.first, entry.second);
+ r.append(&obj);
+ amrArr.append(obj.done());
}
+ amrArr.done();
}
} // namespace mongo
diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h
index 5953ccb9fdf..319db8180fb 100644
--- a/src/mongo/db/s/metadata_manager.h
+++ b/src/mongo/db/s/metadata_manager.h
@@ -57,10 +57,27 @@ public:
ScopedCollectionMetadata getActiveMetadata();
/**
- * Changes the active metadata and if there are current users of the current metadata,
- * puts it in the _metadataInUse set.
+ * Uses the contents of the specified metadata as a way to purge any pending chunks.
*/
- void setActiveMetadata(std::unique_ptr<CollectionMetadata> newMetadata);
+ void refreshActiveMetadata(std::unique_ptr<CollectionMetadata> newMetadata);
+
+ /**
+ * Puts the specified range on the list of chunks, which are being received so that the range
+ * deleter process will not clean the partially migrated data.
+ */
+ void beginReceive(const ChunkRange& range);
+
+ /*
+ * Removes a range from the list of chunks, which are being received. Used externally to
+ * indicate that a chunk migration failed.
+ */
+ void forgetReceive(const ChunkRange& range);
+
+ /**
+ * Gets copy of the set of chunk ranges which are being received for this collection. This
+ * method is intended for testing purposes only and should not be used in any production code.
+ */
+ RangeMap getCopyOfReceivingChunks();
/**
* Adds a new range to be cleaned up.
@@ -74,9 +91,10 @@ public:
void removeRangeToClean(const ChunkRange& range);
/**
- * Gets copy of _rangesToClean map (see below).
+ * Gets copy of the set of chunk ranges which are scheduled for cleanup. This method is intended
+ * for testing purposes only and should not be used in any production code.
*/
- std::map<BSONObj, ChunkRange> getCopyOfRanges();
+ RangeMap getCopyOfRangesToClean();
/*
* Appends information on all the chunk ranges in rangesToClean to builder.
@@ -94,6 +112,7 @@ private:
CollectionMetadataTracker(std::unique_ptr<CollectionMetadata> m);
std::unique_ptr<CollectionMetadata> metadata;
+
uint32_t usageCounter{0};
};
@@ -106,6 +125,11 @@ private:
void _addRangeToClean_inlock(const ChunkRange& range);
void _removeRangeToClean_inlock(const ChunkRange& range);
+ void _setActiveMetadata_inlock(std::unique_ptr<CollectionMetadata> newMetadata);
+
+ // Mutex to protect the state below
+ stdx::mutex _managerLock;
+
// Holds the collection metadata, which is currently active
std::unique_ptr<CollectionMetadataTracker> _activeMetadataTracker;
@@ -113,12 +137,12 @@ private:
// by still active server operations or cursors
std::list<std::unique_ptr<CollectionMetadataTracker>> _metadataInUse;
- // Contains the information of which ranges of sharding keys need to
- // be deleted from the shard. The map is from the minimum value of the
- // range to be deleted (e.g. BSON("key" << 0)) to the entire chunk range.
- std::map<BSONObj, ChunkRange> _rangesToClean;
+ // Chunk ranges which are currently assumed to be transferred to the shard. Indexed by the min
+ // key of the range.
+ RangeMap _receivingChunks;
- stdx::mutex _managerLock;
+ // Set of ranges to be deleted. Indexed by the min key of the range.
+ RangeMap _rangesToClean;
};
class ScopedCollectionMetadata {
@@ -146,7 +170,9 @@ public:
CollectionMetadata* operator->();
CollectionMetadata* getMetadata();
- /** True if the ScopedCollectionMetadata stores a metadata (is not empty) */
+ /**
+ * True if the ScopedCollectionMetadata stores a metadata (is not empty)
+ */
operator bool() const;
private:
diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp
index 45cff494785..412b5e7ba89 100644
--- a/src/mongo/db/s/metadata_manager_test.cpp
+++ b/src/mongo/db/s/metadata_manager_test.cpp
@@ -44,32 +44,35 @@ using unittest::assertGet;
namespace {
-std::unique_ptr<CollectionMetadata> makeMetadata() {
+std::unique_ptr<CollectionMetadata> makeEmptyMetadata() {
return stdx::make_unique<CollectionMetadata>(BSON("key" << 1), ChunkVersion(1, 0, OID::gen()));
}
TEST(MetadataManager, SetAndGetActiveMetadata) {
MetadataManager manager;
- std::unique_ptr<CollectionMetadata> cm = makeMetadata();
+ std::unique_ptr<CollectionMetadata> cm = makeEmptyMetadata();
auto cmPtr = cm.get();
- manager.setActiveMetadata(std::move(cm));
+ manager.refreshActiveMetadata(std::move(cm));
ScopedCollectionMetadata scopedMetadata = manager.getActiveMetadata();
ASSERT_EQ(cmPtr, scopedMetadata.getMetadata());
};
-TEST(MetadataManager, ResetActiveMetadata) {
+TEST(MetadataManager, RefreshActiveMetadata) {
MetadataManager manager;
- manager.setActiveMetadata(makeMetadata());
+ manager.refreshActiveMetadata(makeEmptyMetadata());
ScopedCollectionMetadata scopedMetadata1 = manager.getActiveMetadata();
- std::unique_ptr<CollectionMetadata> cm2 = makeMetadata();
+ ChunkVersion newVersion = scopedMetadata1->getCollVersion();
+ newVersion.incMajor();
+ std::unique_ptr<CollectionMetadata> cm2 =
+ scopedMetadata1->clonePlusChunk(BSON("key" << 0), BSON("key" << 10), newVersion);
auto cm2Ptr = cm2.get();
- manager.setActiveMetadata(std::move(cm2));
+ manager.refreshActiveMetadata(std::move(cm2));
ScopedCollectionMetadata scopedMetadata2 = manager.getActiveMetadata();
ASSERT_EQ(cm2Ptr, scopedMetadata2.getMetadata());
@@ -81,16 +84,17 @@ TEST(MetadataManager, AddAndRemoveRanges) {
ChunkRange cr2 = ChunkRange(BSON("key" << 10), BSON("key" << 20));
mm.addRangeToClean(cr1);
- ASSERT_EQ(mm.getCopyOfRanges().size(), 1UL);
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 1UL);
mm.removeRangeToClean(cr1);
- ASSERT_EQ(mm.getCopyOfRanges().size(), 0UL);
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 0UL);
mm.addRangeToClean(cr1);
mm.addRangeToClean(cr2);
mm.removeRangeToClean(cr1);
- ASSERT_EQ(mm.getCopyOfRanges().size(), 1UL);
- auto ranges = mm.getCopyOfRanges();
- ChunkRange remainingChunk = ranges.find(cr2.getMin())->second;
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 1UL);
+ auto ranges = mm.getCopyOfRangesToClean();
+ auto it = ranges.find(cr2.getMin());
+ ChunkRange remainingChunk = ChunkRange(it->first, it->second);
ASSERT_EQ(remainingChunk.toString(), cr2.toString());
mm.removeRangeToClean(cr2);
}
@@ -103,19 +107,21 @@ TEST(MetadataManager, RemoveRangeInMiddleOfRange) {
mm.addRangeToClean(cr1);
mm.removeRangeToClean(ChunkRange(BSON("key" << 4), BSON("key" << 6)));
- ASSERT_EQ(mm.getCopyOfRanges().size(), 2UL);
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 2UL);
- auto ranges = mm.getCopyOfRanges();
+ auto ranges = mm.getCopyOfRangesToClean();
auto it = ranges.find(BSON("key" << 0));
ChunkRange expectedChunk = ChunkRange(BSON("key" << 0), BSON("key" << 4));
- ASSERT_EQ(it->second.toString(), expectedChunk.toString());
+ ChunkRange remainingChunk = ChunkRange(it->first, it->second);
+ ASSERT_EQ(remainingChunk.toString(), expectedChunk.toString());
it++;
expectedChunk = ChunkRange(BSON("key" << 6), BSON("key" << 10));
- ASSERT_EQ(it->second.toString(), expectedChunk.toString());
+ remainingChunk = ChunkRange(it->first, it->second);
+ ASSERT_EQ(remainingChunk.toString(), expectedChunk.toString());
mm.removeRangeToClean(cr1);
- ASSERT_EQ(mm.getCopyOfRanges().size(), 0UL);
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 0UL);
}
// Tests removals that overlap with just one ChunkRange.
@@ -125,28 +131,31 @@ TEST(MetadataManager, RemoveRangeWithSingleRangeOverlap) {
mm.addRangeToClean(cr1);
mm.removeRangeToClean(ChunkRange(BSON("key" << 0), BSON("key" << 5)));
- ASSERT_EQ(mm.getCopyOfRanges().size(), 1UL);
- auto ranges = mm.getCopyOfRanges();
- ChunkRange remainingChunk = ranges.find(BSON("key" << 5))->second;
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 1UL);
+ auto ranges = mm.getCopyOfRangesToClean();
+ auto it = ranges.find(BSON("key" << 5));
+ ChunkRange remainingChunk = ChunkRange(it->first, it->second);
ChunkRange expectedChunk = ChunkRange(BSON("key" << 5), BSON("key" << 10));
ASSERT_EQ(remainingChunk.toString(), expectedChunk.toString());
mm.removeRangeToClean(ChunkRange(BSON("key" << 4), BSON("key" << 6)));
- ASSERT_EQ(mm.getCopyOfRanges().size(), 1UL);
- ranges = mm.getCopyOfRanges();
- remainingChunk = ranges.find(BSON("key" << 6))->second;
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 1UL);
+ ranges = mm.getCopyOfRangesToClean();
+ it = ranges.find(BSON("key" << 6));
+ remainingChunk = ChunkRange(it->first, it->second);
expectedChunk = ChunkRange(BSON("key" << 6), BSON("key" << 10));
ASSERT_EQ(remainingChunk.toString(), expectedChunk.toString());
mm.removeRangeToClean(ChunkRange(BSON("key" << 9), BSON("key" << 13)));
- ASSERT_EQ(mm.getCopyOfRanges().size(), 1UL);
- ranges = mm.getCopyOfRanges();
- remainingChunk = ranges.find(BSON("key" << 6))->second;
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 1UL);
+ ranges = mm.getCopyOfRangesToClean();
+ it = ranges.find(BSON("key" << 6));
+ remainingChunk = ChunkRange(it->first, it->second);
expectedChunk = ChunkRange(BSON("key" << 6), BSON("key" << 9));
ASSERT_EQ(remainingChunk.toString(), expectedChunk.toString());
mm.removeRangeToClean(ChunkRange(BSON("key" << 0), BSON("key" << 10)));
- ASSERT_EQ(mm.getCopyOfRanges().size(), 0UL);
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 0UL);
}
// Tests removals that overlap with more than one ChunkRange.
@@ -159,20 +168,146 @@ TEST(MetadataManager, RemoveRangeWithMultipleRangeOverlaps) {
mm.addRangeToClean(cr1);
mm.addRangeToClean(cr2);
mm.addRangeToClean(cr3);
- ASSERT_EQ(mm.getCopyOfRanges().size(), 3UL);
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 3UL);
mm.removeRangeToClean(ChunkRange(BSON("key" << 8), BSON("key" << 22)));
- ASSERT_EQ(mm.getCopyOfRanges().size(), 2UL);
- auto ranges = mm.getCopyOfRanges();
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 2UL);
+ auto ranges = mm.getCopyOfRangesToClean();
auto it = ranges.find(BSON("key" << 0));
+ ChunkRange remainingChunk = ChunkRange(it->first, it->second);
ChunkRange expectedChunk = ChunkRange(BSON("key" << 0), BSON("key" << 8));
- ASSERT_EQ(it->second.toString(), expectedChunk.toString());
+ ASSERT_EQ(remainingChunk.toString(), expectedChunk.toString());
it++;
+ remainingChunk = ChunkRange(it->first, it->second);
expectedChunk = ChunkRange(BSON("key" << 22), BSON("key" << 30));
- ASSERT_EQ(it->second.toString(), expectedChunk.toString());
+ ASSERT_EQ(remainingChunk.toString(), expectedChunk.toString());
mm.removeRangeToClean(ChunkRange(BSON("key" << 0), BSON("key" << 30)));
- ASSERT_EQ(mm.getCopyOfRanges().size(), 0UL);
+ ASSERT_EQ(mm.getCopyOfRangesToClean().size(), 0UL);
+}
+
+TEST(MetadataManager, RefreshAfterSuccessfulMigrationSinglePending) {
+ MetadataManager mm;
+ mm.refreshActiveMetadata(makeEmptyMetadata());
+
+ const ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
+ mm.beginReceive(cr1);
+ ASSERT_EQ(mm.getCopyOfReceivingChunks().size(), 1UL);
+ ASSERT_EQ(mm.getActiveMetadata()->getChunks().size(), 0UL);
+
+ ChunkVersion version = mm.getActiveMetadata()->getCollVersion();
+ version.incMajor();
+
+ mm.refreshActiveMetadata(
+ mm.getActiveMetadata()->clonePlusChunk(cr1.getMin(), cr1.getMax(), version));
+ ASSERT_EQ(mm.getCopyOfReceivingChunks().size(), 0UL);
+ ASSERT_EQ(mm.getActiveMetadata()->getChunks().size(), 1UL);
+}
+
+TEST(MetadataManager, RefreshAfterSuccessfulMigrationMultiplePending) {
+ MetadataManager mm;
+ mm.refreshActiveMetadata(makeEmptyMetadata());
+
+ const ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
+ mm.beginReceive(cr1);
+
+ const ChunkRange cr2(BSON("key" << 30), BSON("key" << 40));
+ mm.beginReceive(cr2);
+
+ ASSERT_EQ(mm.getCopyOfReceivingChunks().size(), 2UL);
+ ASSERT_EQ(mm.getActiveMetadata()->getChunks().size(), 0UL);
+
+ {
+ ChunkVersion version = mm.getActiveMetadata()->getCollVersion();
+ version.incMajor();
+
+ mm.refreshActiveMetadata(
+ mm.getActiveMetadata()->clonePlusChunk(cr1.getMin(), cr1.getMax(), version));
+ ASSERT_EQ(mm.getCopyOfReceivingChunks().size(), 1UL);
+ ASSERT_EQ(mm.getActiveMetadata()->getChunks().size(), 1UL);
+ }
+
+ {
+ ChunkVersion version = mm.getActiveMetadata()->getCollVersion();
+ version.incMajor();
+
+ mm.refreshActiveMetadata(
+ mm.getActiveMetadata()->clonePlusChunk(cr2.getMin(), cr2.getMax(), version));
+ ASSERT_EQ(mm.getCopyOfReceivingChunks().size(), 0UL);
+ ASSERT_EQ(mm.getActiveMetadata()->getChunks().size(), 2UL);
+ }
+}
+
+TEST(MetadataManager, RefreshAfterNotYetCompletedMigrationMultiplePending) {
+ MetadataManager mm;
+ mm.refreshActiveMetadata(makeEmptyMetadata());
+
+ const ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
+ mm.beginReceive(cr1);
+
+ const ChunkRange cr2(BSON("key" << 30), BSON("key" << 40));
+ mm.beginReceive(cr2);
+
+ ASSERT_EQ(mm.getCopyOfReceivingChunks().size(), 2UL);
+ ASSERT_EQ(mm.getActiveMetadata()->getChunks().size(), 0UL);
+
+ ChunkVersion version = mm.getActiveMetadata()->getCollVersion();
+ version.incMajor();
+
+ mm.refreshActiveMetadata(
+ mm.getActiveMetadata()->clonePlusChunk(BSON("key" << 50), BSON("key" << 60), version));
+ ASSERT_EQ(mm.getCopyOfReceivingChunks().size(), 2UL);
+ ASSERT_EQ(mm.getActiveMetadata()->getChunks().size(), 1UL);
+}
+
+TEST(MetadataManager, BeginReceiveWithOverlappingRange) {
+ MetadataManager mm;
+ mm.refreshActiveMetadata(makeEmptyMetadata());
+
+ const ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
+ mm.beginReceive(cr1);
+
+ const ChunkRange cr2(BSON("key" << 30), BSON("key" << 40));
+ mm.beginReceive(cr2);
+
+ const ChunkRange crOverlap(BSON("key" << 5), BSON("key" << 35));
+ mm.beginReceive(crOverlap);
+
+ const auto copyOfPending = mm.getCopyOfReceivingChunks();
+
+ ASSERT_EQ(copyOfPending.size(), 1UL);
+ ASSERT_EQ(mm.getActiveMetadata()->getChunks().size(), 0UL);
+
+ const auto it = copyOfPending.find(BSON("key" << 5));
+ ASSERT(it != copyOfPending.end());
+ ASSERT_EQ(it->second, BSON("key" << 35));
+}
+
+TEST(MetadataManager, RefreshMetadataAfterDropAndRecreate) {
+ MetadataManager mm;
+ mm.refreshActiveMetadata(makeEmptyMetadata());
+
+ {
+ auto metadata = mm.getActiveMetadata();
+ ChunkVersion newVersion = metadata->getCollVersion();
+ newVersion.incMajor();
+
+ mm.refreshActiveMetadata(
+ metadata->clonePlusChunk(BSON("key" << 0), BSON("key" << 10), newVersion));
+ }
+
+ // Now, pretend that the collection was dropped and recreated
+ auto recreateMetadata = makeEmptyMetadata();
+ ChunkVersion newVersion = recreateMetadata->getCollVersion();
+ newVersion.incMajor();
+
+ mm.refreshActiveMetadata(
+ recreateMetadata->clonePlusChunk(BSON("key" << 20), BSON("key" << 30), newVersion));
+ ASSERT_EQ(mm.getActiveMetadata()->getChunks().size(), 1UL);
+
+ const auto chunkEntry = mm.getActiveMetadata()->getChunks().begin();
+ ASSERT_EQ(BSON("key" << 20), chunkEntry->first);
+ ASSERT_EQ(BSON("key" << 30), chunkEntry->second);
}
} // namespace
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index 89aab68dcd4..dd70f21a0a9 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -571,7 +571,6 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* txn,
Status status = _notePending(txn, NamespaceString(ns), min, max, epoch);
if (!status.isOK()) {
- warning() << errmsg;
setState(FAIL);
return;
}
@@ -998,11 +997,7 @@ Status MigrationDestinationManager::_notePending(OperationContext* txn,
: ChunkVersion::UNSHARDED().epoch())};
}
- ChunkType chunk;
- chunk.setMin(min);
- chunk.setMax(max);
-
- css->setMetadata(metadata->clonePlusPending(chunk));
+ css->beginReceive(ChunkRange(min, max));
stdx::lock_guard<stdx::mutex> sl(_mutex);
invariant(!_chunkMarkedPending);
@@ -1050,11 +1045,8 @@ Status MigrationDestinationManager::_forgetPending(OperationContext* txn,
: ChunkVersion::UNSHARDED().epoch())};
}
- ChunkType chunk;
- chunk.setMin(min);
- chunk.setMax(max);
+ css->forgetReceive(ChunkRange(min, max));
- css->setMetadata(metadata->cloneMinusPending(chunk));
return Status::OK();
}
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index 9b54e5d4fe3..df12472c7bb 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -333,14 +333,14 @@ Status MigrationSourceManager::commitDonateChunk(OperationContext* txn) {
// committed, update the collection metadata to the new collection version in the command
// response and forget the migrated chunk.
- ChunkVersion uncommittedCollVersion;
+ ChunkVersion committedCollVersion;
if (controlChunkType) {
- uncommittedCollVersion = fassertStatusOK(
+ committedCollVersion = fassertStatusOK(
40084,
ChunkVersion::parseFromBSONWithFieldForCommands(
commitChunkMigrationResponse.getValue().response, kControlChunkVersionField));
} else {
- uncommittedCollVersion = fassertStatusOK(
+ committedCollVersion = fassertStatusOK(
40083,
ChunkVersion::parseFromBSONWithFieldForCommands(
commitChunkMigrationResponse.getValue().response, kMigratedChunkVersionField));
@@ -353,8 +353,8 @@ Status MigrationSourceManager::commitDonateChunk(OperationContext* txn) {
migratingChunkToForget.setMin(_args.getMinKey());
migratingChunkToForget.setMax(_args.getMaxKey());
auto css = CollectionShardingState::get(txn, _args.getNss().ns());
- css->setMetadata(
- _committedMetadata->cloneMigrate(migratingChunkToForget, uncommittedCollVersion));
+ css->refreshMetadata(
+ txn, _committedMetadata->cloneMigrate(migratingChunkToForget, committedCollVersion));
_committedMetadata = css->getMetadata();
} else {
// This could be an unrelated error (e.g. network error). Check whether the metadata update
diff --git a/src/mongo/db/s/sharding_state.cpp b/src/mongo/db/s/sharding_state.cpp
index 45584790c0f..19b41dcc39d 100644
--- a/src/mongo/db/s/sharding_state.cpp
+++ b/src/mongo/db/s/sharding_state.cpp
@@ -84,47 +84,8 @@ const auto getShardingState = ServiceContext::declareDecoration<ShardingState>()
// Max number of concurrent config server refresh threads
const int kMaxConfigServerRefreshThreads = 3;
-enum class VersionChoice { Local, Remote, Unknown };
-
-/**
- * Compares a remotely-loaded version (remoteVersion) to the latest local version of a collection
- * (localVersion) and returns which one is the newest.
- *
- * Because it isn't clear during epoch changes which epoch is newer, the local version before the
- * reload occurred, 'prevLocalVersion', is used to determine whether the remote epoch is definitely
- * newer, or we're not sure.
- */
-VersionChoice chooseNewestVersion(ChunkVersion prevLocalVersion,
- ChunkVersion localVersion,
- ChunkVersion remoteVersion) {
- OID prevEpoch = prevLocalVersion.epoch();
- OID localEpoch = localVersion.epoch();
- OID remoteEpoch = remoteVersion.epoch();
-
- // Everything changed in-flight, so we need to try again
- if (prevEpoch != localEpoch && localEpoch != remoteEpoch) {
- return VersionChoice::Unknown;
- }
-
- // We're in the same (zero) epoch as the latest metadata, nothing to do
- if (localEpoch == remoteEpoch && !remoteEpoch.isSet()) {
- return VersionChoice::Local;
- }
-
- // We're in the same (non-zero) epoch as the latest metadata, so increment the version
- if (localEpoch == remoteEpoch && remoteEpoch.isSet()) {
- // Use the newer version if possible
- if (localVersion < remoteVersion) {
- return VersionChoice::Remote;
- } else {
- return VersionChoice::Local;
- }
- }
-
- // We're now sure we're installing a new epoch and the epoch didn't change during reload
- dassert(prevEpoch == localEpoch && localEpoch != remoteEpoch);
- return VersionChoice::Remote;
-}
+// Maximum number of times to try to refresh the collection metadata if conflicts are occurring
+const int kMaxNumMetadataRefreshAttempts = 3;
/**
* Updates the config server field of the shardIdentity document with the given connection string
@@ -154,10 +115,6 @@ void updateShardIdentityConfigStringCB(const string& setName, const string& newC
} // namespace
-//
-// ShardingState
-//
-
ShardingState::ShardingState()
: _initializationState(static_cast<uint32_t>(InitializationState::kNew)),
_initializationStatus(Status(ErrorCodes::InternalError, "Uninitialized value")),
@@ -302,56 +259,65 @@ Status ShardingState::onStaleShardVersion(OperationContext* txn,
// Fast path - check if the requested version is at a higher version than the current metadata
// version or a different epoch before verifying against config server.
+ ScopedCollectionMetadata currentMetadata;
+
{
AutoGetCollection autoColl(txn, nss, MODE_IS);
- auto storedMetadata = CollectionShardingState::get(txn, nss)->getMetadata();
- if (storedMetadata) {
- collectionShardVersion = storedMetadata->getShardVersion();
+ currentMetadata = CollectionShardingState::get(txn, nss)->getMetadata();
+ if (currentMetadata) {
+ collectionShardVersion = currentMetadata->getShardVersion();
}
- if (collectionShardVersion >= expectedVersion &&
- collectionShardVersion.epoch() == expectedVersion.epoch()) {
+ if (collectionShardVersion.epoch() == expectedVersion.epoch() &&
+ collectionShardVersion >= expectedVersion) {
// Don't need to remotely reload if we're in the same epoch and the requested version is
// smaller than the one we know about. This means that the remote side is behind.
return Status::OK();
}
}
- // The _configServerTickets serializes this process such that only a small number of threads can
- // try to refresh at the same time
- _configServerTickets.waitForTicket();
- TicketHolderReleaser needTicketFrom(&_configServerTickets);
+ // At the first attempt try to use the currently loaded metadata and on subsequent attempts use
+ // the complete metadata
+ int numRefreshAttempts = 0;
- //
- // Slow path - remotely reload
- //
- // Cases:
- // A) Initial config load and/or secondary take-over.
- // B) Migration TO this shard finished, notified by mongos.
- // C) Dropping a collection, notified (currently) by mongos.
- // D) Stale client wants to reload metadata with a different *epoch*, so we aren't sure.
-
- if (collectionShardVersion.epoch() != expectedVersion.epoch()) {
- // Need to remotely reload if our epochs aren't the same, to verify
- LOG(1) << "metadata change requested for " << nss.ns() << ", from shard version "
- << collectionShardVersion << " to " << expectedVersion
- << ", need to verify with config server";
- } else {
- // Need to remotely reload since our epochs aren't the same but our version is greater
- LOG(1) << "metadata version update requested for " << nss.ns() << ", from shard version "
- << collectionShardVersion << " to " << expectedVersion
- << ", need to verify with config server";
+ while (true) {
+ numRefreshAttempts++;
+
+ auto refreshStatusAndVersion =
+ _refreshMetadata(txn, nss, (currentMetadata ? currentMetadata.getMetadata() : nullptr));
+ if (refreshStatusAndVersion.isOK()) {
+ LOG(1) << "Successfully refreshed metadata for " << nss.ns() << " to "
+ << refreshStatusAndVersion.getValue();
+ return Status::OK();
+ }
+
+ if (refreshStatusAndVersion == ErrorCodes::RemoteChangeDetected &&
+ numRefreshAttempts < kMaxNumMetadataRefreshAttempts) {
+ currentMetadata = ScopedCollectionMetadata();
+
+ log() << "Refresh failed and will be retried as full reload "
+ << refreshStatusAndVersion.getStatus();
+ continue;
+ }
+
+ return refreshStatusAndVersion.getStatus();
}
- ChunkVersion unusedLatestShardVersion;
- return _refreshMetadata(txn, nss.ns(), expectedVersion, true, &unusedLatestShardVersion);
+
+ MONGO_UNREACHABLE;
}
Status ShardingState::refreshMetadataNow(OperationContext* txn,
const string& ns,
ChunkVersion* latestShardVersion) {
- return _refreshMetadata(txn, ns, ChunkVersion(0, 0, OID()), false, latestShardVersion);
+ auto refreshLatestShardVersionStatus = _refreshMetadata(txn, NamespaceString(ns), nullptr);
+ if (!refreshLatestShardVersionStatus.isOK()) {
+ return refreshLatestShardVersionStatus.getStatus();
+ }
+
+ *latestShardVersion = refreshLatestShardVersionStatus.getValue();
+ return Status::OK();
}
void ShardingState::initializeFromConfigConnString(OperationContext* txn, const string& configSvr) {
@@ -616,258 +582,72 @@ void ShardingState::_signalInitializationComplete(Status status) {
_initializationFinishedCondition.notify_all();
}
-Status ShardingState::_refreshMetadata(OperationContext* txn,
- const string& ns,
- const ChunkVersion& reqShardVersion,
- bool useRequestedVersion,
- ChunkVersion* latestShardVersion) {
+StatusWith<ChunkVersion> ShardingState::_refreshMetadata(
+ OperationContext* txn, const NamespaceString& nss, const CollectionMetadata* metadataForDiff) {
invariant(!txn->lockState()->isLocked());
- Status status = _waitForInitialization(txn->getDeadline());
- if (!status.isOK())
- return status;
+ {
+ Status status = _waitForInitialization(txn->getDeadline());
+ if (!status.isOK())
+ return status;
+ }
// We can't reload if a shard name has not yet been set
{
stdx::lock_guard<stdx::mutex> lk(_mutex);
if (_shardName.empty()) {
- string errMsg = str::stream() << "cannot refresh metadata for " << ns
+ string errMsg = str::stream() << "cannot refresh metadata for " << nss.ns()
<< " before shard name has been set";
-
warning() << errMsg;
- return Status(ErrorCodes::NotYetInitialized, errMsg);
+ return {ErrorCodes::NotYetInitialized, errMsg};
}
}
- const NamespaceString nss(ns);
-
- // The idea here is that we're going to reload the metadata from the config server, but we need
- // to do so outside any locks. When we get our result back, if the current metadata has
- // changed, we may not be able to install the new metadata.
- ScopedCollectionMetadata beforeMetadata;
- {
- ScopedTransaction transaction(txn, MODE_IS);
- AutoGetCollection autoColl(txn, nss, MODE_IS);
-
- beforeMetadata = CollectionShardingState::get(txn, nss)->getMetadata();
- }
-
- ChunkVersion beforeShardVersion;
- ChunkVersion beforeCollVersion;
-
- if (beforeMetadata) {
- beforeShardVersion = beforeMetadata->getShardVersion();
- beforeCollVersion = beforeMetadata->getCollVersion();
- }
-
- *latestShardVersion = beforeShardVersion;
-
- //
- // Determine whether we need to diff or fully reload
- //
-
- bool fullReload = false;
- if (!beforeMetadata) {
- // We don't have any metadata to reload from
- fullReload = true;
- } else if (useRequestedVersion && reqShardVersion.epoch() != beforeShardVersion.epoch()) {
- // It's not useful to use the metadata as a base because we think the epoch will differ
- fullReload = true;
- }
-
- //
- // Load the metadata from the remote server, start construction
- //
-
- LOG(0) << "remotely refreshing metadata for " << ns
- << (useRequestedVersion
- ? string(" with requested shard version ") + reqShardVersion.toString()
- : "")
- << (fullReload ? ", current shard version is " : " based on current shard version ")
- << beforeShardVersion << ", current metadata version is " << beforeCollVersion;
+ // The _configServerTickets serializes this process such that only a small number of threads can
+ // try to refresh at the same time
+ _configServerTickets.waitForTicket();
+ TicketHolderReleaser needTicketFrom(&_configServerTickets);
- string errMsg;
+ LOG(1) << "Remotely refreshing metadata for " << nss.ns() << ", based on collection version "
+ << (metadataForDiff ? metadataForDiff->getCollVersion().toString() : "(empty)");
- MetadataLoader mdLoader;
std::unique_ptr<CollectionMetadata> remoteMetadata(stdx::make_unique<CollectionMetadata>());
- Timer refreshTimer;
- long long refreshMillis;
-
{
- Status status =
- mdLoader.makeCollectionMetadata(txn,
- grid.catalogClient(txn),
- ns,
- getShardName(),
- fullReload ? nullptr : beforeMetadata.getMetadata(),
- remoteMetadata.get());
- refreshMillis = refreshTimer.millis();
+ Timer refreshTimer;
+
+ MetadataLoader mdLoader;
+ Status status = mdLoader.makeCollectionMetadata(txn,
+ grid.catalogClient(txn),
+ nss.ns(),
+ getShardName(),
+ metadataForDiff,
+ remoteMetadata.get());
if (status.code() == ErrorCodes::NamespaceNotFound) {
remoteMetadata.reset();
} else if (!status.isOK()) {
- warning() << "could not remotely refresh metadata for " << ns
+ warning() << "Could not remotely refresh metadata for " << nss.ns()
<< causedBy(status.reason());
return status;
}
}
- ChunkVersion remoteShardVersion;
- ChunkVersion remoteCollVersion;
- if (remoteMetadata) {
- remoteShardVersion = remoteMetadata->getShardVersion();
- remoteCollVersion = remoteMetadata->getCollVersion();
- }
+ // Exclusive collection lock needed since we're now potentially changing the metadata, and don't
+ // want reads/writes to be ongoing
+ ScopedTransaction transaction(txn, MODE_IX);
+ AutoGetCollection autoColl(txn, nss, MODE_IX, MODE_X);
- //
- // Get ready to install loaded metadata if needed
- //
+ auto css = CollectionShardingState::get(txn, nss);
- ScopedCollectionMetadata afterMetadata;
- ChunkVersion afterShardVersion;
- ChunkVersion afterCollVersion;
- VersionChoice choice;
+ // Resolve newer pending chunks with the remote metadata, finish construction
+ css->refreshMetadata(txn, std::move(remoteMetadata));
- // If we choose to install the new metadata, this describes the kind of install
- enum InstallType {
- InstallType_New,
- InstallType_Update,
- InstallType_Replace,
- InstallType_Drop,
- InstallType_None
- } installType = InstallType_None; // compiler complains otherwise
+ auto metadata = css->getMetadata();
- {
- // Exclusive collection lock needed since we're now potentially changing the metadata,
- // and don't want reads/writes to be ongoing.
- ScopedTransaction transaction(txn, MODE_IX);
- AutoGetCollection autoColl(txn, nss, MODE_IX, MODE_X);
-
- // Get the metadata now that the load has completed
- auto css = CollectionShardingState::get(txn, nss);
- afterMetadata = css->getMetadata();
-
- if (afterMetadata) {
- afterShardVersion = afterMetadata->getShardVersion();
- afterCollVersion = afterMetadata->getCollVersion();
- }
-
- *latestShardVersion = afterShardVersion;
-
- //
- // Resolve newer pending chunks with the remote metadata, finish construction
- //
-
- Status status =
- mdLoader.promotePendingChunks(afterMetadata.getMetadata(), remoteMetadata.get());
- if (!status.isOK()) {
- warning() << "remote metadata for " << ns
- << " is inconsistent with current pending chunks"
- << causedBy(status.reason());
-
- return status;
- }
-
- //
- // Compare the 'before', 'after', and 'remote' versions/epochs and choose newest
- // Zero-epochs (sentinel value for "dropped" collections), are tested by
- // !epoch.isSet().
- //
-
- choice = chooseNewestVersion(beforeCollVersion, afterCollVersion, remoteCollVersion);
-
- if (choice == VersionChoice::Remote) {
- dassert(
- !remoteCollVersion.epoch().isSet() || remoteShardVersion >= beforeShardVersion ||
- (remoteShardVersion.minorVersion() == 0 && remoteShardVersion.majorVersion() == 0));
-
- if (!afterCollVersion.epoch().isSet()) {
- // First metadata load
- installType = InstallType_New;
- css->setMetadata(std::move(remoteMetadata));
- } else if (remoteCollVersion.epoch().isSet() &&
- remoteCollVersion.epoch() == afterCollVersion.epoch()) {
- // Update to existing metadata
- installType = InstallType_Update;
- css->setMetadata(std::move(remoteMetadata));
- } else if (remoteCollVersion.epoch().isSet()) {
- // New epoch detected, replacing metadata
- installType = InstallType_Replace;
- css->setMetadata(std::move(remoteMetadata));
- } else {
- dassert(!remoteCollVersion.epoch().isSet());
-
- // Drop detected
- installType = InstallType_Drop;
- css->setMetadata(nullptr);
- }
-
- *latestShardVersion = remoteShardVersion;
- }
- }
- // End _mutex
- // End DBWrite
-
- //
- // Do messaging based on what happened above
- //
- string localShardVersionMsg = beforeShardVersion.epoch() == afterShardVersion.epoch()
- ? afterShardVersion.toString()
- : beforeShardVersion.toString() + " / " + afterShardVersion.toString();
-
- if (choice == VersionChoice::Unknown) {
- string errMsg = str::stream()
- << "need to retry loading metadata for " << ns
- << ", collection may have been dropped or recreated during load"
- << " (loaded shard version : " << remoteShardVersion.toString()
- << ", stored shard versions : " << localShardVersionMsg << ", took " << refreshMillis
- << "ms)";
-
- warning() << errMsg;
- return Status(ErrorCodes::RemoteChangeDetected, errMsg);
- }
-
- if (choice == VersionChoice::Local) {
- LOG(0) << "metadata of collection " << ns
- << " already up to date (shard version : " << afterShardVersion.toString()
- << ", took " << refreshMillis << "ms)";
- return Status::OK();
- }
-
- dassert(choice == VersionChoice::Remote);
-
- switch (installType) {
- case InstallType_New:
- LOG(0) << "collection " << ns << " was previously unsharded"
- << ", new metadata loaded with shard version " << remoteShardVersion;
- break;
- case InstallType_Update:
- LOG(0) << "updating metadata for " << ns << " from shard version "
- << localShardVersionMsg << " to shard version " << remoteShardVersion;
- break;
- case InstallType_Replace:
- LOG(0) << "replacing metadata for " << ns << " at shard version "
- << localShardVersionMsg << " with a new epoch (shard version "
- << remoteShardVersion << ")";
- break;
- case InstallType_Drop:
- LOG(0) << "dropping metadata for " << ns << " at shard version " << localShardVersionMsg
- << ", took " << refreshMillis << "ms";
- break;
- default:
- verify(false);
- break;
- }
-
- if (installType != InstallType_Drop) {
- LOG(0) << "collection version was loaded at version " << remoteCollVersion << ", took "
- << refreshMillis << "ms";
- }
-
- return Status::OK();
+ return (metadata ? metadata->getShardVersion() : ChunkVersion::UNSHARDED());
}
StatusWith<ScopedRegisterMigration> ShardingState::registerMigration(const MoveChunkRequest& args) {
diff --git a/src/mongo/db/s/sharding_state.h b/src/mongo/db/s/sharding_state.h
index 8975464136d..23e49047981 100644
--- a/src/mongo/db/s/sharding_state.h
+++ b/src/mongo/db/s/sharding_state.h
@@ -49,6 +49,7 @@ namespace mongo {
class BSONObj;
class BSONObjBuilder;
struct ChunkVersion;
+class CollectionMetadata;
class CollectionShardingState;
class ConnectionString;
class OperationContext;
@@ -313,14 +314,16 @@ private:
void _setInitializationState_inlock(InitializationState newState);
/**
- * Refreshes collection metadata by asking the config server for the latest information. May or
- * may not be based on a requested version.
+ * Refreshes collection metadata by asking the config server for the latest information and
+ * returns the latest version at the time the reload was done. This call does network I/O and
+ * should never be called with a lock.
+ *
+ * The metadataForDiff argument indicates that the specified metadata should be used as a base
+ * from which to only load the differences. If nullptr is passed, a full reload will be done.
*/
- Status _refreshMetadata(OperationContext* txn,
- const std::string& ns,
- const ChunkVersion& reqShardVersion,
- bool useRequestedVersion,
- ChunkVersion* latestShardVersion);
+ StatusWith<ChunkVersion> _refreshMetadata(OperationContext* txn,
+ const NamespaceString& nss,
+ const CollectionMetadata* metadataForDiff);
// Initializes a TaskExecutor for cleaning up orphaned ranges
void _initializeRangeDeleterTaskExecutor();
diff --git a/src/mongo/db/s/split_chunk_command.cpp b/src/mongo/db/s/split_chunk_command.cpp
index 90dc2c96427..54cf2e4bb83 100644
--- a/src/mongo/db/s/split_chunk_command.cpp
+++ b/src/mongo/db/s/split_chunk_command.cpp
@@ -432,9 +432,9 @@ public:
// TODO: Revisit this interface, it's a bit clunky
newShardVersion.incMinor();
- std::unique_ptr<CollectionMetadata> cloned(uassertStatusOK(
- css->getMetadata()->cloneSplit(min, max, splitKeys, newShardVersion)));
- css->setMetadata(std::move(cloned));
+ std::unique_ptr<CollectionMetadata> cloned(fassertStatusOK(
+ 40221, css->getMetadata()->cloneSplit(min, max, splitKeys, newShardVersion)));
+ css->refreshMetadata(txn, std::move(cloned));
}
//
diff --git a/src/mongo/shell/shardingtest.js b/src/mongo/shell/shardingtest.js
index 788fd9b4ddb..2ca4dcfaf69 100644
--- a/src/mongo/shell/shardingtest.js
+++ b/src/mongo/shell/shardingtest.js
@@ -929,7 +929,7 @@ var ShardingTest = function(params) {
var otherParams = Object.merge(params, params.other || {});
var numShards = otherParams.hasOwnProperty('shards') ? otherParams.shards : 2;
- var verboseLevel = otherParams.hasOwnProperty('verbose') ? otherParams.verbose : 1;
+ var mongosVerboseLevel = otherParams.hasOwnProperty('verbose') ? otherParams.verbose : 1;
var numMongos = otherParams.hasOwnProperty('mongos') ? otherParams.mongos : 1;
var numConfigs = otherParams.hasOwnProperty('config') ? otherParams.config : 3;
var waitForCSRSSecondaries = otherParams.hasOwnProperty('waitForCSRSSecondaries')
@@ -1026,7 +1026,7 @@ var ShardingTest = function(params) {
noJournalPrealloc: otherParams.nopreallocj,
oplogSize: 16,
shardsvr: '',
- pathOpts: Object.merge(pathOpts, {shard: i})
+ pathOpts: Object.merge(pathOpts, {shard: i}),
};
rsDefaults = Object.merge(rsDefaults, otherParams.rs);
@@ -1237,7 +1237,7 @@ var ShardingTest = function(params) {
useHostname: otherParams.useHostname,
pathOpts: Object.merge(pathOpts, {mongos: i}),
configdb: this._configDB,
- verbose: verboseLevel || 0,
+ verbose: mongosVerboseLevel,
keyFile: keyFile,
};