summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMisha Tyulenev <misha@mongodb.com>2016-01-29 16:47:14 -0500
committerMisha Tyulenev <misha@mongodb.com>2016-02-01 10:50:22 -0500
commite6cf3f0a2350cfa09fc2557fdb138fb2c7b38b69 (patch)
treecf4fc2cb09f2b5a81466c7ce5108aad854671956
parent6d38a1f04849ae593c0101bfcebb1b017f831003 (diff)
downloadmongo-e6cf3f0a2350cfa09fc2557fdb138fb2c7b38b69.tar.gz
SERVER-21896 reload ChunkManager cache when shard does not exists
(cherry picked from commit 67bb466b81b162020b0e437a3540a0f57659b183)
-rw-r--r--jstests/sharding/remove3.js48
-rw-r--r--src/mongo/s/query/cluster_find.cpp16
2 files changed, 59 insertions, 5 deletions
diff --git a/jstests/sharding/remove3.js b/jstests/sharding/remove3.js
new file mode 100644
index 00000000000..1ca64fc3d10
--- /dev/null
+++ b/jstests/sharding/remove3.js
@@ -0,0 +1,48 @@
+// Validates the remove/drain shard functionality when there is data on the shard being removed
+(function() {
+'use strict';
+
+var st = new ShardingTest({ name: "remove_shard3", shards: 2, mongos: 2 });
+
+assert.commandWorked(st.s0.adminCommand({ enableSharding: 'TestDB' }));
+st.ensurePrimaryShard('TestDB', 'shard0000');
+assert.commandWorked(st.s0.adminCommand({ shardCollection: 'TestDB.Coll', key: { _id: 1 } }));
+assert.commandWorked(st.s0.adminCommand({ split: 'TestDB.Coll', middle: { _id: 0 } }));
+
+// Insert some documents and make sure there are docs on both shards
+st.s0.getDB('TestDB').Coll.insert({ _id: -1, value: 'Negative value' });
+st.s0.getDB('TestDB').Coll.insert({ _id: 1, value: 'Positive value' });
+
+assert.commandWorked(st.s0.adminCommand({ moveChunk: 'TestDB.Coll',
+ find: { _id: 1 },
+ to: 'shard0001',
+ _waitForDelete: true }));
+
+// Make sure both mongos instances know of the latest metadata
+assert.eq(2, st.s0.getDB('TestDB').Coll.find({}).toArray().length);
+assert.eq(2, st.s1.getDB('TestDB').Coll.find({}).toArray().length);
+
+// Remove shard0001
+var removeRes;
+removeRes = assert.commandWorked(st.s0.adminCommand({ removeShard: 'shard0001' }));
+assert.eq('started', removeRes.state);
+removeRes = assert.commandWorked(st.s0.adminCommand({ removeShard: 'shard0001' }));
+assert.eq('ongoing', removeRes.state);
+
+// Move the one chunk off shard0001
+assert.commandWorked(st.s0.adminCommand({ moveChunk: 'TestDB.Coll',
+ find: { _id: 1 },
+ to: 'shard0000',
+ _waitForDelete: true }));
+
+// Remove shard must succeed now
+removeRes = assert.commandWorked(st.s0.adminCommand({ removeShard: 'shard0001' }));
+assert.eq('completed', removeRes.state);
+
+// Make sure both mongos instance refresh their metadata and do not reference the missing shard
+assert.eq(2, st.s0.getDB('TestDB').Coll.find({}).toArray().length);
+assert.eq(2, st.s1.getDB('TestDB').Coll.find({}).toArray().length);
+
+st.stop();
+
+})();
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index 6b1df73fad1..f7b6e866987 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -212,7 +212,12 @@ StatusWith<CursorId> runQueryWithoutRetrying(OperationContext* txn,
chunkManager->getShardIdsForQuery(txn, query.getParsed().getFilter(), &shardIds);
for (auto id : shardIds) {
- shards.emplace_back(shardRegistry->getShard(txn, id));
+ auto shard = shardRegistry->getShard(txn, id);
+ if (!shard) {
+ return {ErrorCodes::ShardNotFound,
+ str::stream() << "Shard with id: " << id << " is not found."};
+ }
+ shards.emplace_back(shard);
}
}
@@ -377,10 +382,11 @@ StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn,
}
auto status = std::move(cursorId.getStatus());
- if (!ErrorCodes::isStaleShardingError(status.code())) {
- // Errors other than receiving a stale metadata message from MongoD are fatal to the
- // operation. Network errors and replication retries happen at the level of the
- // AsyncResultsMerger.
+ if (!ErrorCodes::isStaleShardingError(status.code()) &&
+ status != ErrorCodes::ShardNotFound) {
+ // Errors other than trying to reach a non existent shard or receiving a stale
+ // metadata message from MongoD are fatal to the operation. Network errors and
+ // replication retries happen at the level of the AsyncResultsMerger.
return status;
}