summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSpencer T Brody <spencer@mongodb.com>2015-10-15 16:00:31 -0400
committerSpencer T Brody <spencer@mongodb.com>2015-10-16 16:27:53 -0400
commit7cf6b9bf5a47f1446be71105a4186be924e20a85 (patch)
tree2468ba86eb63b1cfb29656504a05c3bf2b87e94e
parentaf021f6e674f003f1d8227d46f43fe69cdef7606 (diff)
downloadmongo-7cf6b9bf5a47f1446be71105a4186be924e20a85.tar.gz
SERVER-20748 Handle epoch mismatch explicitly in cluster find command
-rw-r--r--src/mongo/base/error_codes.err3
-rw-r--r--src/mongo/db/commands.cpp5
-rw-r--r--src/mongo/db/query/cursor_response.cpp8
-rw-r--r--src/mongo/s/config.h8
-rw-r--r--src/mongo/s/query/cluster_find.cpp15
5 files changed, 34 insertions, 5 deletions
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err
index 7044584ab00..d900cb1119e 100644
--- a/src/mongo/base/error_codes.err
+++ b/src/mongo/base/error_codes.err
@@ -149,6 +149,7 @@ error_code("ExceededMemoryLimit", 146)
error_code("ZLibError", 147)
error_code("ReadConcernMajorityNotEnabled", 148)
error_code("NoConfigMaster", 149)
+error_code("StaleEpoch", 150)
# Non-sequential error codes (for compatibility only)
error_code("RecvStaleConfig", 9996)
@@ -171,3 +172,5 @@ error_code("PrepareConfigsFailed", 13104);
error_class("NetworkError", ["HostUnreachable", "HostNotFound", "NetworkTimeout"])
error_class("Interruption", ["Interrupted", "InterruptedAtShutdown", "ExceededTimeLimit"])
error_class("NotMasterError", ["NotMaster", "NotMasterNoSlaveOk"])
+error_class("StaleShardingError",
+ ["RecvStaleConfig", "SendStaleConfig", "StaleShardVersion", "StaleEpoch"])
diff --git a/src/mongo/db/commands.cpp b/src/mongo/db/commands.cpp
index ec130328bef..bd3d95e7295 100644
--- a/src/mongo/db/commands.cpp
+++ b/src/mongo/db/commands.cpp
@@ -424,8 +424,9 @@ void _generateErrorResponse(OperationContext* txn,
static_cast<const SendStaleConfigException&>(exception);
replyBuilder->setCommandReply(scex.toStatus(),
BSON("ns" << scex.getns() << "vReceived"
- << scex.getVersionReceived().toBSON() << "vWanted"
- << scex.getVersionWanted().toBSON()));
+ << BSONArray(scex.getVersionReceived().toBSON())
+ << "vWanted"
+ << BSONArray(scex.getVersionWanted().toBSON())));
} else {
replyBuilder->setCommandReply(exception.toStatus());
}
diff --git a/src/mongo/db/query/cursor_response.cpp b/src/mongo/db/query/cursor_response.cpp
index 3c86439f622..2954da683c9 100644
--- a/src/mongo/db/query/cursor_response.cpp
+++ b/src/mongo/db/query/cursor_response.cpp
@@ -34,6 +34,7 @@
#include "mongo/bson/bsontypes.h"
#include "mongo/rpc/get_status_from_command_result.h"
+#include "mongo/s/chunk_version.h"
namespace mongo {
@@ -97,6 +98,13 @@ CursorResponse& CursorResponse::operator=(CursorResponse&& other) {
StatusWith<CursorResponse> CursorResponse::parseFromBSON(const BSONObj& cmdResponse) {
Status cmdStatus = getStatusFromCommandResult(cmdResponse);
if (!cmdStatus.isOK()) {
+ if (ErrorCodes::isStaleShardingError(cmdStatus.code())) {
+ auto vWanted = ChunkVersion::fromBSON(cmdResponse, "vWanted");
+ auto vReceived = ChunkVersion::fromBSON(cmdResponse, "vReceived");
+ if (!vWanted.hasEqualEpoch(vReceived)) {
+ return Status(ErrorCodes::StaleEpoch, cmdStatus.reason());
+ }
+ }
return cmdStatus;
}
diff --git a/src/mongo/s/config.h b/src/mongo/s/config.h
index 39b56f529c1..68a2ca18ae8 100644
--- a/src/mongo/s/config.h
+++ b/src/mongo/s/config.h
@@ -166,6 +166,10 @@ public:
void setPrimary(OperationContext* txn, const std::string& s);
+ /**
+ * Returns true if it is successful at loading the DBConfig, false if the database is not found,
+ * and throws on all other errors.
+ */
bool load(OperationContext* txn);
bool reload(OperationContext* txn);
@@ -182,6 +186,10 @@ protected:
std::set<ShardId>& shardIds,
std::string& errmsg);
+ /**
+ * Returns true if it is successful at loading the DBConfig, false if the database is not found,
+ * and throws on all other errors.
+ */
bool _load(OperationContext* txn);
void _save(OperationContext* txn, bool db = true, bool coll = true);
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index 75b1582d085..8b197b81eb5 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -369,8 +369,8 @@ StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn,
}
auto status = std::move(cursorId.getStatus());
- if (status != ErrorCodes::SendStaleConfig && status != ErrorCodes::RecvStaleConfig) {
- // Errors other than receiving a stale config message from MongoD are fatal to the
+ if (!ErrorCodes::isStaleShardingError(status.code())) {
+ // Errors other than receiving a stale metadata message from MongoD are fatal to the
// operation. Network errors and replication retries happen at the level of the
// AsyncResultsMerger.
return status;
@@ -379,7 +379,16 @@ StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn,
LOG(1) << "Received error status for query " << query.toStringShort() << " on attempt "
<< retries << " of " << kMaxStaleConfigRetries << ": " << status;
- chunkManager = dbConfig.getValue()->getChunkManagerIfExists(txn, query.nss().ns(), true);
+ const bool staleEpoch = (status == ErrorCodes::StaleEpoch);
+ if (staleEpoch) {
+ if (!dbConfig.getValue()->reload(txn)) {
+ // If the reload failed that means the database wasn't found, so successfully return
+ // an empty result set without creating a cursor.
+ return CursorId(0);
+ }
+ }
+ chunkManager =
+ dbConfig.getValue()->getChunkManagerIfExists(txn, query.nss().ns(), true, staleEpoch);
if (!chunkManager) {
dbConfig.getValue()->getChunkManagerOrPrimary(
txn, query.nss().ns(), chunkManager, primary);