diff options
author | Esha Maharishi <esha.maharishi@mongodb.com> | 2017-10-25 12:42:37 -0400 |
---|---|---|
committer | Esha Maharishi <esha.maharishi@mongodb.com> | 2017-11-06 13:20:44 -0500 |
commit | 87d4378a2d6749ac1977b0aff05cc9ae22e06d2f (patch) | |
tree | 5c594158f4b848da8662105b6c7895b5461e45ba | |
parent | 0b073092a8127c3723569f37d9ffa9d81e785116 (diff) | |
download | mongo-87d4378a2d6749ac1977b0aff05cc9ae22e06d2f.tar.gz |
SERVER-31712 do not apply ignoredErrors when appending raw responses if no shards have an OK response
-rw-r--r-- | jstests/sharding/index_and_collection_option_propagation.js | 63 | ||||
-rw-r--r-- | src/mongo/s/commands/cluster_commands_helpers.cpp | 87 | ||||
-rw-r--r-- | src/mongo/s/commands/cluster_commands_helpers.h | 4 |
3 files changed, 120 insertions, 34 deletions
diff --git a/jstests/sharding/index_and_collection_option_propagation.js b/jstests/sharding/index_and_collection_option_propagation.js index 47c1d2e0695..3599f2507f3 100644 --- a/jstests/sharding/index_and_collection_option_propagation.js +++ b/jstests/sharding/index_and_collection_option_propagation.js @@ -4,10 +4,15 @@ * - If called on an unsharded collection, the request is routed only to the primary shard. * - If called on a sharded collection, the request is broadcast to all shards, but * NamespaceNotFound and CannotImplicitlyCreateCollection errors do not lead to command failure - * (though these errors are reported in the 'raw' shard responses). + * (though these errors are reported in the 'raw' shard responses) as long as at least one shard + * returns success. * * This test verifies this behavior. */ + +// This test shuts down a shard. +TestData.skipCheckingUUIDsConsistentAcrossCluster = true; + (function() { // Helper function that runs listIndexes against shards to check for the existence of an index. function checkShardIndexes(indexKey, shardsWithIndex, shardsWithoutIndex) { @@ -156,5 +161,61 @@ assert.eq(res.raw[st.shard2.host].code, ErrorCodes.NamespaceNotFound, tojson(res)); checkShardCollOption("validator", validationOption2, [st.shard0, st.shard1], [st.shard2]); + // Check that errors from shards are aggregated correctly. + + // If no shard returns success, then errors that are usually ignored should be reported. + res = st.s.getDB(dbName).getCollection("unshardedColl").dropIndex("nonexistentIndex"); + assert.eq(res.raw[st.shard0.host].ok, 0, tojson(res)); + assert.eq(res.code, res.raw[st.shard0.host].code, tojson(res)); + assert.eq(res.codeName, res.raw[st.shard0.host].codeName, tojson(res)); + assert.eq(res.code, ErrorCodes.NamespaceNotFound, tojson(res)); + assert.eq("NamespaceNotFound", res.codeName, tojson(res)); + assert.neq(null, res.errmsg, tojson(res)); + + // If all shards report the same error, the overall command error should be set to that error. + res = st.s.getDB(dbName).getCollection(collName).createIndex({}); + assert.eq(res.raw[st.shard0.host].ok, 0, tojson(res)); + assert.eq(res.raw[st.shard1.host].ok, 0, tojson(res)); + assert.eq(res.raw[st.shard2.host].ok, 0, tojson(res)); + assert.eq(res.code, res.raw[st.shard0.host].code, tojson(res)); + assert.eq(res.code, res.raw[st.shard1.host].code, tojson(res)); + assert.eq(res.code, res.raw[st.shard2.host].code, tojson(res)); + assert.eq(res.codeName, res.raw[st.shard0.host].codeName, tojson(res)); + assert.eq(res.codeName, res.raw[st.shard1.host].codeName, tojson(res)); + assert.eq(res.codeName, res.raw[st.shard2.host].codeName, tojson(res)); + assert.eq(res.code, ErrorCodes.CannotCreateIndex, tojson(res)); + assert.eq("CannotCreateIndex", res.codeName, tojson(res)); + assert.neq(null, res.errmsg, tojson(res)); + + st.rs0.stopSet(); + + // If we receive a non-ignorable error, it should be reported as the command error. + res = st.s.getDB(dbName).getCollection("unshardedColl").createIndex({"validIdx": 1}); + assert.eq(res.raw[st.shard0.host].ok, 0, tojson(res)); + assert.eq(res.code, res.raw[st.shard0.host].code, tojson(res)); + assert.eq(res.codeName, res.raw[st.shard0.host].codeName, tojson(res)); + // We might see 'HostUnreachable' the first time if the mongos's ReplicaSetMonitor does not yet + // know that the shard is down. + assert(res.code === ErrorCodes.HostUnreachable || + res.code === ErrorCodes.FailedToSatisfyReadPreference, + tojson(res)); + assert(res.codeName === "HostUnreachable" || res.codeName === "FailedToSatisfyReadPreference", + tojson(res)); + + // If some shard returns a non-ignorable error, it should be reported as the command error, even + // if other shards returned ignorable errors. + res = st.s.getDB(dbName).getCollection(collName).createIndex({"validIdx": 1}); + assert.eq(res.raw[st.shard0.host].ok, 0, tojson(res)); // shard was down + assert.eq( + res.raw[st.shard1.host].ok, 1, tojson(res)); // gets created on shard that owns chunks + assert.eq(res.raw[st.shard2.host].ok, 0, tojson(res)); // shard does not own chunks + assert.eq(res.code, res.raw[st.shard0.host].code, tojson(res)); + assert.eq(res.codeName, res.raw[st.shard0.host].codeName, tojson(res)); + // We can expect to see 'FailedToSatisfyReadPreference' this time, because after the previous + // createIndexes attempt, mongos's ReplicaSetMonitor should have been updated. + assert.eq(res.code, ErrorCodes.FailedToSatisfyReadPreference, tojson(res)); + assert.eq("FailedToSatisfyReadPreference", res.codeName, tojson(res)); + assert.neq(null, res.errmsg, tojson(res)); + st.stop(); })(); diff --git a/src/mongo/s/commands/cluster_commands_helpers.cpp b/src/mongo/s/commands/cluster_commands_helpers.cpp index c64bab1807d..aea2fa2429f 100644 --- a/src/mongo/s/commands/cluster_commands_helpers.cpp +++ b/src/mongo/s/commands/cluster_commands_helpers.cpp @@ -270,9 +270,15 @@ bool appendRawResponses(OperationContext* opCtx, BSONObjBuilder* output, std::vector<AsyncRequestsSender::Response> shardResponses, std::set<ErrorCodes::Error> ignoredErrors) { - BSONObjBuilder subobj; // Stores raw responses by ConnectionString - BSONObjBuilder errors; // Stores errors by ConnectionString - int commonErrCode = -1; // Stores the overall error code + // Always include ShardNotFound as an ignored error, since this node may not have realized a + // shard has been removed. + ignoredErrors.insert(ErrorCodes::ShardNotFound); + + BSONObjBuilder subobj; // Stores raw responses by ConnectionString + + // Stores all errors; we will remove ignoredErrors later if some shard returned success. + std::vector<std::pair<std::string, Status>> errors; // Stores errors by ConnectionString + BSONElement wcErrorElem; // Stores the first writeConcern error we encounter ShardId wcErrorShardId; // Stores the shardId for the first writeConcern error we encounter bool hasWCError = false; // Whether we have encountered a writeConcern error yet @@ -282,45 +288,43 @@ bool appendRawResponses(OperationContext* opCtx, const auto swShard = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardResponse.shardId); if (ErrorCodes::ShardNotFound == swShard.getStatus().code()) { - // If a shard got removed, ignore its response. + // Store the error by ShardId, since we cannot know the shard connection string, and it + // is only used for reporting. + errors.push_back(std::make_pair(shardResponse.shardId.toString(), swShard.getStatus())); continue; } const auto shard = uassertStatusOK(swShard); const auto shardConnStr = shard->getConnString().toString(); - auto status = shardResponse.swResponse.getStatus(); + Status sendStatus = shardResponse.swResponse.getStatus(); + if (!sendStatus.isOK()) { + // Convert the error status back into the form of a command result and append it as the + // raw response. + BSONObjBuilder statusObjBob; + Command::appendCommandStatus(statusObjBob, sendStatus); + subobj.append(shardConnStr, statusObjBob.obj()); - if (status.isOK()) { - status = getStatusFromCommandResult(shardResponse.swResponse.getValue().data); + errors.push_back(std::make_pair(shardConnStr, sendStatus)); + continue; + } - // Report the first writeConcern error we see. - if (!hasWCError) { - if ((wcErrorElem = shardResponse.swResponse.getValue().data["writeConcernError"])) { - wcErrorShardId = shardResponse.shardId; - hasWCError = true; - } - } + // Got a response from the shard. - if (status.isOK() || ignoredErrors.find(status.code()) != ignoredErrors.end()) { - subobj.append(shardConnStr, - Command::filterCommandReplyForPassthrough( - shardResponse.swResponse.getValue().data)); - continue; - } - } + auto& resObj = shardResponse.swResponse.getValue().data; - errors.append(shardConnStr, status.reason()); + // Append the shard's raw response. + subobj.append(shardConnStr, Command::filterCommandReplyForPassthrough(resObj)); - if (commonErrCode == -1) { - commonErrCode = status.code(); - } else if (commonErrCode != status.code()) { - commonErrCode = 0; + auto commandStatus = getStatusFromCommandResult(resObj); + if (!commandStatus.isOK()) { + errors.push_back(std::make_pair(shardConnStr, std::move(commandStatus))); } - // Convert the error status back into the format of a command result. - BSONObjBuilder statusObjBob; - Command::appendCommandStatus(statusObjBob, status); - subobj.append(shard->getConnString().toString(), statusObjBob.obj()); + // Report the first writeConcern error we see. + if (!hasWCError && (wcErrorElem = resObj["writeConcernError"])) { + wcErrorShardId = shardResponse.shardId; + hasWCError = true; + } } output->append("raw", subobj.done()); @@ -329,7 +333,28 @@ bool appendRawResponses(OperationContext* opCtx, appendWriteConcernErrorToCmdResponse(wcErrorShardId, wcErrorElem, *output); } - BSONObj errobj = errors.done(); + // If any shard returned success, filter out ignored errors + bool someShardReturnedOK = (errors.size() != shardResponses.size()); + + BSONObjBuilder errorBob; + int commonErrCode = -1; + auto it = errors.begin(); + while (it != errors.end()) { + if (someShardReturnedOK && ignoredErrors.find(it->second.code()) != ignoredErrors.end()) { + // Ignore the error. + it = errors.erase(it); + } else { + errorBob.append(it->first, it->second.reason()); + if (commonErrCode == -1) { + commonErrCode = it->second.code(); + } else if (commonErrCode != it->second.code()) { + commonErrCode = 0; + } + ++it; + } + } + BSONObj errobj = errorBob.obj(); + if (!errobj.isEmpty()) { *errmsg = errobj.toString(); diff --git a/src/mongo/s/commands/cluster_commands_helpers.h b/src/mongo/s/commands/cluster_commands_helpers.h index 7c148f14012..de75019b551 100644 --- a/src/mongo/s/commands/cluster_commands_helpers.h +++ b/src/mongo/s/commands/cluster_commands_helpers.h @@ -126,9 +126,9 @@ StatusWith<std::vector<AsyncRequestsSender::Response>> scatterGatherOnlyVersionI * * If all shards that errored had the same error, writes the common error code to 'output'. Writes a * string representation of all errors to 'errmsg.' Errors codes in 'ignoredErrors' are not treated - * as errors. + * as errors if any shard returned success. * - * Returns true if all the shards reported success. + * Returns true if any shard reports success and only ignored errors occur. */ bool appendRawResponses(OperationContext* opCtx, std::string* errmsg, |