diff options
author | Misha Tyulenev <misha@mongodb.com> | 2016-01-29 16:47:14 -0500 |
---|---|---|
committer | Misha Tyulenev <misha@mongodb.com> | 2016-02-01 10:50:22 -0500 |
commit | e6cf3f0a2350cfa09fc2557fdb138fb2c7b38b69 (patch) | |
tree | cf4fc2cb09f2b5a81466c7ce5108aad854671956 | |
parent | 6d38a1f04849ae593c0101bfcebb1b017f831003 (diff) | |
download | mongo-e6cf3f0a2350cfa09fc2557fdb138fb2c7b38b69.tar.gz |
SERVER-21896 reload ChunkManager cache when shard does not exists
(cherry picked from commit 67bb466b81b162020b0e437a3540a0f57659b183)
-rw-r--r-- | jstests/sharding/remove3.js | 48 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_find.cpp | 16 |
2 files changed, 59 insertions, 5 deletions
diff --git a/jstests/sharding/remove3.js b/jstests/sharding/remove3.js new file mode 100644 index 00000000000..1ca64fc3d10 --- /dev/null +++ b/jstests/sharding/remove3.js @@ -0,0 +1,48 @@ +// Validates the remove/drain shard functionality when there is data on the shard being removed +(function() { +'use strict'; + +var st = new ShardingTest({ name: "remove_shard3", shards: 2, mongos: 2 }); + +assert.commandWorked(st.s0.adminCommand({ enableSharding: 'TestDB' })); +st.ensurePrimaryShard('TestDB', 'shard0000'); +assert.commandWorked(st.s0.adminCommand({ shardCollection: 'TestDB.Coll', key: { _id: 1 } })); +assert.commandWorked(st.s0.adminCommand({ split: 'TestDB.Coll', middle: { _id: 0 } })); + +// Insert some documents and make sure there are docs on both shards +st.s0.getDB('TestDB').Coll.insert({ _id: -1, value: 'Negative value' }); +st.s0.getDB('TestDB').Coll.insert({ _id: 1, value: 'Positive value' }); + +assert.commandWorked(st.s0.adminCommand({ moveChunk: 'TestDB.Coll', + find: { _id: 1 }, + to: 'shard0001', + _waitForDelete: true })); + +// Make sure both mongos instances know of the latest metadata +assert.eq(2, st.s0.getDB('TestDB').Coll.find({}).toArray().length); +assert.eq(2, st.s1.getDB('TestDB').Coll.find({}).toArray().length); + +// Remove shard0001 +var removeRes; +removeRes = assert.commandWorked(st.s0.adminCommand({ removeShard: 'shard0001' })); +assert.eq('started', removeRes.state); +removeRes = assert.commandWorked(st.s0.adminCommand({ removeShard: 'shard0001' })); +assert.eq('ongoing', removeRes.state); + +// Move the one chunk off shard0001 +assert.commandWorked(st.s0.adminCommand({ moveChunk: 'TestDB.Coll', + find: { _id: 1 }, + to: 'shard0000', + _waitForDelete: true })); + +// Remove shard must succeed now +removeRes = assert.commandWorked(st.s0.adminCommand({ removeShard: 'shard0001' })); +assert.eq('completed', removeRes.state); + +// Make sure both mongos instance refresh their metadata and do not reference the missing shard +assert.eq(2, st.s0.getDB('TestDB').Coll.find({}).toArray().length); +assert.eq(2, st.s1.getDB('TestDB').Coll.find({}).toArray().length); + +st.stop(); + +})(); diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp index 6b1df73fad1..f7b6e866987 100644 --- a/src/mongo/s/query/cluster_find.cpp +++ b/src/mongo/s/query/cluster_find.cpp @@ -212,7 +212,12 @@ StatusWith<CursorId> runQueryWithoutRetrying(OperationContext* txn, chunkManager->getShardIdsForQuery(txn, query.getParsed().getFilter(), &shardIds); for (auto id : shardIds) { - shards.emplace_back(shardRegistry->getShard(txn, id)); + auto shard = shardRegistry->getShard(txn, id); + if (!shard) { + return {ErrorCodes::ShardNotFound, + str::stream() << "Shard with id: " << id << " is not found."}; + } + shards.emplace_back(shard); } } @@ -377,10 +382,11 @@ StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn, } auto status = std::move(cursorId.getStatus()); - if (!ErrorCodes::isStaleShardingError(status.code())) { - // Errors other than receiving a stale metadata message from MongoD are fatal to the - // operation. Network errors and replication retries happen at the level of the - // AsyncResultsMerger. + if (!ErrorCodes::isStaleShardingError(status.code()) && + status != ErrorCodes::ShardNotFound) { + // Errors other than trying to reach a non existent shard or receiving a stale + // metadata message from MongoD are fatal to the operation. Network errors and + // replication retries happen at the level of the AsyncResultsMerger. return status; } |