SERVER-21896 reload ChunkManager cache when shard does not exists

(cherry picked from commit 67bb466b81b162020b0e437a3540a0f57659b183)
author: Misha Tyulenev <misha@mongodb.com> 2016-01-29 16:47:14 -0500
committer: Misha Tyulenev <misha@mongodb.com> 2016-02-01 10:50:22 -0500
commit: e6cf3f0a2350cfa09fc2557fdb138fb2c7b38b69 (patch)
tree: cf4fc2cb09f2b5a81466c7ce5108aad854671956
parent: 6d38a1f04849ae593c0101bfcebb1b017f831003 (diff)
download: mongo-e6cf3f0a2350cfa09fc2557fdb138fb2c7b38b69.tar.gz
2 files changed, 59 insertions, 5 deletions
diff --git a/jstests/sharding/remove3.js b/jstests/sharding/remove3.js
new file mode 100644
index 00000000000..1ca64fc3d10
--- /dev/null
+++ b/jstests/sharding/remove3.js
@@ -0,0 +1,48 @@
+// Validates the remove/drain shard functionality when there is data on the shard being removed
+(function() {
+'use strict';
+ 
+var st = new ShardingTest({ name: "remove_shard3", shards: 2, mongos: 2 });
+ 
+assert.commandWorked(st.s0.adminCommand({ enableSharding: 'TestDB' }));
+st.ensurePrimaryShard('TestDB', 'shard0000');
+assert.commandWorked(st.s0.adminCommand({ shardCollection: 'TestDB.Coll', key: { _id: 1 } }));
+assert.commandWorked(st.s0.adminCommand({ split: 'TestDB.Coll', middle: { _id: 0 } }));
+ 
+// Insert some documents and make sure there are docs on both shards
+st.s0.getDB('TestDB').Coll.insert({ _id: -1, value: 'Negative value' });
+st.s0.getDB('TestDB').Coll.insert({ _id: 1, value: 'Positive value' });
+ 
+assert.commandWorked(st.s0.adminCommand({ moveChunk: 'TestDB.Coll',
+                                          find: { _id: 1 },
+                                          to: 'shard0001',
+                                          _waitForDelete: true }));
+ 
+// Make sure both mongos instances know of the latest metadata
+assert.eq(2, st.s0.getDB('TestDB').Coll.find({}).toArray().length);
+assert.eq(2, st.s1.getDB('TestDB').Coll.find({}).toArray().length);
+ 
+// Remove shard0001
+var removeRes;
+removeRes = assert.commandWorked(st.s0.adminCommand({ removeShard: 'shard0001' }));
+assert.eq('started', removeRes.state);
+removeRes = assert.commandWorked(st.s0.adminCommand({ removeShard: 'shard0001' }));
+assert.eq('ongoing', removeRes.state);
+ 
+// Move the one chunk off shard0001
+assert.commandWorked(st.s0.adminCommand({ moveChunk: 'TestDB.Coll',
+                                          find: { _id: 1 },
+                                          to: 'shard0000',
+                                          _waitForDelete: true }));
+                                          
+// Remove shard must succeed now
+removeRes = assert.commandWorked(st.s0.adminCommand({ removeShard: 'shard0001' }));
+assert.eq('completed', removeRes.state);
+ 
+// Make sure both mongos instance refresh their metadata and do not reference the missing shard
+assert.eq(2, st.s0.getDB('TestDB').Coll.find({}).toArray().length);
+assert.eq(2, st.s1.getDB('TestDB').Coll.find({}).toArray().length);
+ 
+st.stop();
+ 
+})();
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index 6b1df73fad1..f7b6e866987 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -212,7 +212,12 @@ StatusWith<CursorId> runQueryWithoutRetrying(OperationContext* txn,
         chunkManager->getShardIdsForQuery(txn, query.getParsed().getFilter(), &shardIds);
 
         for (auto id : shardIds) {
-            shards.emplace_back(shardRegistry->getShard(txn, id));
+            auto shard = shardRegistry->getShard(txn, id);
+            if (!shard) {
+                return {ErrorCodes::ShardNotFound,
+                        str::stream() << "Shard with id:  " << id << " is not found."};
+            }
+            shards.emplace_back(shard);
         }
     }
 
@@ -377,10 +382,11 @@ StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn,
         }
         auto status = std::move(cursorId.getStatus());
 
-        if (!ErrorCodes::isStaleShardingError(status.code())) {
-            // Errors other than receiving a stale metadata message from MongoD are fatal to the
-            // operation. Network errors and replication retries happen at the level of the
-            // AsyncResultsMerger.
+        if (!ErrorCodes::isStaleShardingError(status.code()) &&
+            status != ErrorCodes::ShardNotFound) {
+            // Errors other than trying to reach a non existent shard or receiving a stale
+            // metadata message from MongoD are fatal to the operation. Network errors and
+            // replication retries happen at the level of the AsyncResultsMerger.
             return status;
         }
author	Misha Tyulenev <misha@mongodb.com>	2016-01-29 16:47:14 -0500
committer	Misha Tyulenev <misha@mongodb.com>	2016-02-01 10:50:22 -0500
commit	e6cf3f0a2350cfa09fc2557fdb138fb2c7b38b69 (patch)
tree	cf4fc2cb09f2b5a81466c7ce5108aad854671956
parent	6d38a1f04849ae593c0101bfcebb1b017f831003 (diff)
download	mongo-e6cf3f0a2350cfa09fc2557fdb138fb2c7b38b69.tar.gz