summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEsha Maharishi <esha.maharishi@mongodb.com>2017-10-25 12:42:37 -0400
committerEsha Maharishi <esha.maharishi@mongodb.com>2017-11-06 13:20:44 -0500
commit87d4378a2d6749ac1977b0aff05cc9ae22e06d2f (patch)
tree5c594158f4b848da8662105b6c7895b5461e45ba
parent0b073092a8127c3723569f37d9ffa9d81e785116 (diff)
downloadmongo-87d4378a2d6749ac1977b0aff05cc9ae22e06d2f.tar.gz
SERVER-31712 do not apply ignoredErrors when appending raw responses if no shards have an OK response
-rw-r--r--jstests/sharding/index_and_collection_option_propagation.js63
-rw-r--r--src/mongo/s/commands/cluster_commands_helpers.cpp87
-rw-r--r--src/mongo/s/commands/cluster_commands_helpers.h4
3 files changed, 120 insertions, 34 deletions
diff --git a/jstests/sharding/index_and_collection_option_propagation.js b/jstests/sharding/index_and_collection_option_propagation.js
index 47c1d2e0695..3599f2507f3 100644
--- a/jstests/sharding/index_and_collection_option_propagation.js
+++ b/jstests/sharding/index_and_collection_option_propagation.js
@@ -4,10 +4,15 @@
* - If called on an unsharded collection, the request is routed only to the primary shard.
* - If called on a sharded collection, the request is broadcast to all shards, but
* NamespaceNotFound and CannotImplicitlyCreateCollection errors do not lead to command failure
- * (though these errors are reported in the 'raw' shard responses).
+ * (though these errors are reported in the 'raw' shard responses) as long as at least one shard
+ * returns success.
*
* This test verifies this behavior.
*/
+
+// This test shuts down a shard.
+TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
+
(function() {
// Helper function that runs listIndexes against shards to check for the existence of an index.
function checkShardIndexes(indexKey, shardsWithIndex, shardsWithoutIndex) {
@@ -156,5 +161,61 @@
assert.eq(res.raw[st.shard2.host].code, ErrorCodes.NamespaceNotFound, tojson(res));
checkShardCollOption("validator", validationOption2, [st.shard0, st.shard1], [st.shard2]);
+ // Check that errors from shards are aggregated correctly.
+
+ // If no shard returns success, then errors that are usually ignored should be reported.
+ res = st.s.getDB(dbName).getCollection("unshardedColl").dropIndex("nonexistentIndex");
+ assert.eq(res.raw[st.shard0.host].ok, 0, tojson(res));
+ assert.eq(res.code, res.raw[st.shard0.host].code, tojson(res));
+ assert.eq(res.codeName, res.raw[st.shard0.host].codeName, tojson(res));
+ assert.eq(res.code, ErrorCodes.NamespaceNotFound, tojson(res));
+ assert.eq("NamespaceNotFound", res.codeName, tojson(res));
+ assert.neq(null, res.errmsg, tojson(res));
+
+ // If all shards report the same error, the overall command error should be set to that error.
+ res = st.s.getDB(dbName).getCollection(collName).createIndex({});
+ assert.eq(res.raw[st.shard0.host].ok, 0, tojson(res));
+ assert.eq(res.raw[st.shard1.host].ok, 0, tojson(res));
+ assert.eq(res.raw[st.shard2.host].ok, 0, tojson(res));
+ assert.eq(res.code, res.raw[st.shard0.host].code, tojson(res));
+ assert.eq(res.code, res.raw[st.shard1.host].code, tojson(res));
+ assert.eq(res.code, res.raw[st.shard2.host].code, tojson(res));
+ assert.eq(res.codeName, res.raw[st.shard0.host].codeName, tojson(res));
+ assert.eq(res.codeName, res.raw[st.shard1.host].codeName, tojson(res));
+ assert.eq(res.codeName, res.raw[st.shard2.host].codeName, tojson(res));
+ assert.eq(res.code, ErrorCodes.CannotCreateIndex, tojson(res));
+ assert.eq("CannotCreateIndex", res.codeName, tojson(res));
+ assert.neq(null, res.errmsg, tojson(res));
+
+ st.rs0.stopSet();
+
+ // If we receive a non-ignorable error, it should be reported as the command error.
+ res = st.s.getDB(dbName).getCollection("unshardedColl").createIndex({"validIdx": 1});
+ assert.eq(res.raw[st.shard0.host].ok, 0, tojson(res));
+ assert.eq(res.code, res.raw[st.shard0.host].code, tojson(res));
+ assert.eq(res.codeName, res.raw[st.shard0.host].codeName, tojson(res));
+ // We might see 'HostUnreachable' the first time if the mongos's ReplicaSetMonitor does not yet
+ // know that the shard is down.
+ assert(res.code === ErrorCodes.HostUnreachable ||
+ res.code === ErrorCodes.FailedToSatisfyReadPreference,
+ tojson(res));
+ assert(res.codeName === "HostUnreachable" || res.codeName === "FailedToSatisfyReadPreference",
+ tojson(res));
+
+ // If some shard returns a non-ignorable error, it should be reported as the command error, even
+ // if other shards returned ignorable errors.
+ res = st.s.getDB(dbName).getCollection(collName).createIndex({"validIdx": 1});
+ assert.eq(res.raw[st.shard0.host].ok, 0, tojson(res)); // shard was down
+ assert.eq(
+ res.raw[st.shard1.host].ok, 1, tojson(res)); // gets created on shard that owns chunks
+ assert.eq(res.raw[st.shard2.host].ok, 0, tojson(res)); // shard does not own chunks
+ assert.eq(res.code, res.raw[st.shard0.host].code, tojson(res));
+ assert.eq(res.codeName, res.raw[st.shard0.host].codeName, tojson(res));
+ // We can expect to see 'FailedToSatisfyReadPreference' this time, because after the previous
+ // createIndexes attempt, mongos's ReplicaSetMonitor should have been updated.
+ assert.eq(res.code, ErrorCodes.FailedToSatisfyReadPreference, tojson(res));
+ assert.eq("FailedToSatisfyReadPreference", res.codeName, tojson(res));
+ assert.neq(null, res.errmsg, tojson(res));
+
st.stop();
})();
diff --git a/src/mongo/s/commands/cluster_commands_helpers.cpp b/src/mongo/s/commands/cluster_commands_helpers.cpp
index c64bab1807d..aea2fa2429f 100644
--- a/src/mongo/s/commands/cluster_commands_helpers.cpp
+++ b/src/mongo/s/commands/cluster_commands_helpers.cpp
@@ -270,9 +270,15 @@ bool appendRawResponses(OperationContext* opCtx,
BSONObjBuilder* output,
std::vector<AsyncRequestsSender::Response> shardResponses,
std::set<ErrorCodes::Error> ignoredErrors) {
- BSONObjBuilder subobj; // Stores raw responses by ConnectionString
- BSONObjBuilder errors; // Stores errors by ConnectionString
- int commonErrCode = -1; // Stores the overall error code
+ // Always include ShardNotFound as an ignored error, since this node may not have realized a
+ // shard has been removed.
+ ignoredErrors.insert(ErrorCodes::ShardNotFound);
+
+ BSONObjBuilder subobj; // Stores raw responses by ConnectionString
+
+ // Stores all errors; we will remove ignoredErrors later if some shard returned success.
+ std::vector<std::pair<std::string, Status>> errors; // Stores errors by ConnectionString
+
BSONElement wcErrorElem; // Stores the first writeConcern error we encounter
ShardId wcErrorShardId; // Stores the shardId for the first writeConcern error we encounter
bool hasWCError = false; // Whether we have encountered a writeConcern error yet
@@ -282,45 +288,43 @@ bool appendRawResponses(OperationContext* opCtx,
const auto swShard =
Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardResponse.shardId);
if (ErrorCodes::ShardNotFound == swShard.getStatus().code()) {
- // If a shard got removed, ignore its response.
+ // Store the error by ShardId, since we cannot know the shard connection string, and it
+ // is only used for reporting.
+ errors.push_back(std::make_pair(shardResponse.shardId.toString(), swShard.getStatus()));
continue;
}
const auto shard = uassertStatusOK(swShard);
const auto shardConnStr = shard->getConnString().toString();
- auto status = shardResponse.swResponse.getStatus();
+ Status sendStatus = shardResponse.swResponse.getStatus();
+ if (!sendStatus.isOK()) {
+ // Convert the error status back into the form of a command result and append it as the
+ // raw response.
+ BSONObjBuilder statusObjBob;
+ Command::appendCommandStatus(statusObjBob, sendStatus);
+ subobj.append(shardConnStr, statusObjBob.obj());
- if (status.isOK()) {
- status = getStatusFromCommandResult(shardResponse.swResponse.getValue().data);
+ errors.push_back(std::make_pair(shardConnStr, sendStatus));
+ continue;
+ }
- // Report the first writeConcern error we see.
- if (!hasWCError) {
- if ((wcErrorElem = shardResponse.swResponse.getValue().data["writeConcernError"])) {
- wcErrorShardId = shardResponse.shardId;
- hasWCError = true;
- }
- }
+ // Got a response from the shard.
- if (status.isOK() || ignoredErrors.find(status.code()) != ignoredErrors.end()) {
- subobj.append(shardConnStr,
- Command::filterCommandReplyForPassthrough(
- shardResponse.swResponse.getValue().data));
- continue;
- }
- }
+ auto& resObj = shardResponse.swResponse.getValue().data;
- errors.append(shardConnStr, status.reason());
+ // Append the shard's raw response.
+ subobj.append(shardConnStr, Command::filterCommandReplyForPassthrough(resObj));
- if (commonErrCode == -1) {
- commonErrCode = status.code();
- } else if (commonErrCode != status.code()) {
- commonErrCode = 0;
+ auto commandStatus = getStatusFromCommandResult(resObj);
+ if (!commandStatus.isOK()) {
+ errors.push_back(std::make_pair(shardConnStr, std::move(commandStatus)));
}
- // Convert the error status back into the format of a command result.
- BSONObjBuilder statusObjBob;
- Command::appendCommandStatus(statusObjBob, status);
- subobj.append(shard->getConnString().toString(), statusObjBob.obj());
+ // Report the first writeConcern error we see.
+ if (!hasWCError && (wcErrorElem = resObj["writeConcernError"])) {
+ wcErrorShardId = shardResponse.shardId;
+ hasWCError = true;
+ }
}
output->append("raw", subobj.done());
@@ -329,7 +333,28 @@ bool appendRawResponses(OperationContext* opCtx,
appendWriteConcernErrorToCmdResponse(wcErrorShardId, wcErrorElem, *output);
}
- BSONObj errobj = errors.done();
+ // If any shard returned success, filter out ignored errors
+ bool someShardReturnedOK = (errors.size() != shardResponses.size());
+
+ BSONObjBuilder errorBob;
+ int commonErrCode = -1;
+ auto it = errors.begin();
+ while (it != errors.end()) {
+ if (someShardReturnedOK && ignoredErrors.find(it->second.code()) != ignoredErrors.end()) {
+ // Ignore the error.
+ it = errors.erase(it);
+ } else {
+ errorBob.append(it->first, it->second.reason());
+ if (commonErrCode == -1) {
+ commonErrCode = it->second.code();
+ } else if (commonErrCode != it->second.code()) {
+ commonErrCode = 0;
+ }
+ ++it;
+ }
+ }
+ BSONObj errobj = errorBob.obj();
+
if (!errobj.isEmpty()) {
*errmsg = errobj.toString();
diff --git a/src/mongo/s/commands/cluster_commands_helpers.h b/src/mongo/s/commands/cluster_commands_helpers.h
index 7c148f14012..de75019b551 100644
--- a/src/mongo/s/commands/cluster_commands_helpers.h
+++ b/src/mongo/s/commands/cluster_commands_helpers.h
@@ -126,9 +126,9 @@ StatusWith<std::vector<AsyncRequestsSender::Response>> scatterGatherOnlyVersionI
*
* If all shards that errored had the same error, writes the common error code to 'output'. Writes a
* string representation of all errors to 'errmsg.' Errors codes in 'ignoredErrors' are not treated
- * as errors.
+ * as errors if any shard returned success.
*
- * Returns true if all the shards reported success.
+ * Returns true if any shard reports success and only ignored errors occur.
*/
bool appendRawResponses(OperationContext* opCtx,
std::string* errmsg,