diff options
author | Spencer T Brody <spencer@mongodb.com> | 2015-10-15 16:00:31 -0400 |
---|---|---|
committer | Spencer T Brody <spencer@mongodb.com> | 2015-10-16 16:27:53 -0400 |
commit | 7cf6b9bf5a47f1446be71105a4186be924e20a85 (patch) | |
tree | 2468ba86eb63b1cfb29656504a05c3bf2b87e94e | |
parent | af021f6e674f003f1d8227d46f43fe69cdef7606 (diff) | |
download | mongo-7cf6b9bf5a47f1446be71105a4186be924e20a85.tar.gz |
SERVER-20748 Handle epoch mismatch explicitly in cluster find command
-rw-r--r-- | src/mongo/base/error_codes.err | 3 | ||||
-rw-r--r-- | src/mongo/db/commands.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/query/cursor_response.cpp | 8 | ||||
-rw-r--r-- | src/mongo/s/config.h | 8 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_find.cpp | 15 |
5 files changed, 34 insertions, 5 deletions
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index 7044584ab00..d900cb1119e 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -149,6 +149,7 @@ error_code("ExceededMemoryLimit", 146) error_code("ZLibError", 147) error_code("ReadConcernMajorityNotEnabled", 148) error_code("NoConfigMaster", 149) +error_code("StaleEpoch", 150) # Non-sequential error codes (for compatibility only) error_code("RecvStaleConfig", 9996) @@ -171,3 +172,5 @@ error_code("PrepareConfigsFailed", 13104); error_class("NetworkError", ["HostUnreachable", "HostNotFound", "NetworkTimeout"]) error_class("Interruption", ["Interrupted", "InterruptedAtShutdown", "ExceededTimeLimit"]) error_class("NotMasterError", ["NotMaster", "NotMasterNoSlaveOk"]) +error_class("StaleShardingError", + ["RecvStaleConfig", "SendStaleConfig", "StaleShardVersion", "StaleEpoch"]) diff --git a/src/mongo/db/commands.cpp b/src/mongo/db/commands.cpp index ec130328bef..bd3d95e7295 100644 --- a/src/mongo/db/commands.cpp +++ b/src/mongo/db/commands.cpp @@ -424,8 +424,9 @@ void _generateErrorResponse(OperationContext* txn, static_cast<const SendStaleConfigException&>(exception); replyBuilder->setCommandReply(scex.toStatus(), BSON("ns" << scex.getns() << "vReceived" - << scex.getVersionReceived().toBSON() << "vWanted" - << scex.getVersionWanted().toBSON())); + << BSONArray(scex.getVersionReceived().toBSON()) + << "vWanted" + << BSONArray(scex.getVersionWanted().toBSON()))); } else { replyBuilder->setCommandReply(exception.toStatus()); } diff --git a/src/mongo/db/query/cursor_response.cpp b/src/mongo/db/query/cursor_response.cpp index 3c86439f622..2954da683c9 100644 --- a/src/mongo/db/query/cursor_response.cpp +++ b/src/mongo/db/query/cursor_response.cpp @@ -34,6 +34,7 @@ #include "mongo/bson/bsontypes.h" #include "mongo/rpc/get_status_from_command_result.h" +#include "mongo/s/chunk_version.h" namespace mongo { @@ -97,6 +98,13 @@ CursorResponse& CursorResponse::operator=(CursorResponse&& other) { StatusWith<CursorResponse> CursorResponse::parseFromBSON(const BSONObj& cmdResponse) { Status cmdStatus = getStatusFromCommandResult(cmdResponse); if (!cmdStatus.isOK()) { + if (ErrorCodes::isStaleShardingError(cmdStatus.code())) { + auto vWanted = ChunkVersion::fromBSON(cmdResponse, "vWanted"); + auto vReceived = ChunkVersion::fromBSON(cmdResponse, "vReceived"); + if (!vWanted.hasEqualEpoch(vReceived)) { + return Status(ErrorCodes::StaleEpoch, cmdStatus.reason()); + } + } return cmdStatus; } diff --git a/src/mongo/s/config.h b/src/mongo/s/config.h index 39b56f529c1..68a2ca18ae8 100644 --- a/src/mongo/s/config.h +++ b/src/mongo/s/config.h @@ -166,6 +166,10 @@ public: void setPrimary(OperationContext* txn, const std::string& s); + /** + * Returns true if it is successful at loading the DBConfig, false if the database is not found, + * and throws on all other errors. + */ bool load(OperationContext* txn); bool reload(OperationContext* txn); @@ -182,6 +186,10 @@ protected: std::set<ShardId>& shardIds, std::string& errmsg); + /** + * Returns true if it is successful at loading the DBConfig, false if the database is not found, + * and throws on all other errors. + */ bool _load(OperationContext* txn); void _save(OperationContext* txn, bool db = true, bool coll = true); diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp index 75b1582d085..8b197b81eb5 100644 --- a/src/mongo/s/query/cluster_find.cpp +++ b/src/mongo/s/query/cluster_find.cpp @@ -369,8 +369,8 @@ StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn, } auto status = std::move(cursorId.getStatus()); - if (status != ErrorCodes::SendStaleConfig && status != ErrorCodes::RecvStaleConfig) { - // Errors other than receiving a stale config message from MongoD are fatal to the + if (!ErrorCodes::isStaleShardingError(status.code())) { + // Errors other than receiving a stale metadata message from MongoD are fatal to the // operation. Network errors and replication retries happen at the level of the // AsyncResultsMerger. return status; @@ -379,7 +379,16 @@ StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn, LOG(1) << "Received error status for query " << query.toStringShort() << " on attempt " << retries << " of " << kMaxStaleConfigRetries << ": " << status; - chunkManager = dbConfig.getValue()->getChunkManagerIfExists(txn, query.nss().ns(), true); + const bool staleEpoch = (status == ErrorCodes::StaleEpoch); + if (staleEpoch) { + if (!dbConfig.getValue()->reload(txn)) { + // If the reload failed that means the database wasn't found, so successfully return + // an empty result set without creating a cursor. + return CursorId(0); + } + } + chunkManager = + dbConfig.getValue()->getChunkManagerIfExists(txn, query.nss().ns(), true, staleEpoch); if (!chunkManager) { dbConfig.getValue()->getChunkManagerOrPrimary( txn, query.nss().ns(), chunkManager, primary); |