summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiyuan Zhou <siyuan.zhou@mongodb.com>2015-10-13 15:39:13 -0400
committerSiyuan Zhou <siyuan.zhou@mongodb.com>2015-10-14 11:31:14 -0400
commitbededc2f0496162a188d66835638a2dc714b8b98 (patch)
tree84338711ab925d867239428eadfb750d605f57da
parent5eaf403f3659f7e4b6372be0f50d82863b704b47 (diff)
downloadmongo-bededc2f0496162a188d66835638a2dc714b8b98.tar.gz
SERVER-20812 Tag vote request connection to skip closing it when relinquishing primary
-rw-r--r--jstests/replsets/two_nodes_priority_take_over.js60
-rw-r--r--src/mongo/db/repl/repl_set_request_votes.cpp19
-rw-r--r--src/mongo/db/repl/replset_commands.cpp17
-rw-r--r--src/mongo/db/repl/vote_requester.cpp2
-rw-r--r--src/mongo/util/net/message_port.cpp4
5 files changed, 97 insertions, 5 deletions
diff --git a/jstests/replsets/two_nodes_priority_take_over.js b/jstests/replsets/two_nodes_priority_take_over.js
new file mode 100644
index 00000000000..020e2f5bf30
--- /dev/null
+++ b/jstests/replsets/two_nodes_priority_take_over.js
@@ -0,0 +1,60 @@
+// SERVER-20812 Current primary rejects vote request from higher-priority node
+// because of stepping down. In a two node replset, this rejection will prevent
+// smooth priority takeover.
+
+load("jstests/replsets/rslib.js");
+
+(function() {
+
+"use strict";
+var name = "two_nodes_priority_take_over";
+var rst = new ReplSetTest({name: name, nodes: 2});
+
+rst.startSet();
+var conf = rst.getReplSetConfig();
+conf.members[0].priority = 2;
+conf.members[1].priority = 1;
+rst.initiate(conf);
+rst.awaitSecondaryNodes();
+// Set verbosity for replication on all nodes.
+var verbosity = {
+ "setParameter" : 1,
+ "logComponentVerbosity" : {
+ "verbosity": 4,
+ "storage" : { "verbosity" : 1 }
+ }
+}
+rst.nodes.forEach(function (node) {node.adminCommand(verbosity)});
+
+// The first node will be the primary at the beginning.
+rst.waitForState(rst.nodes[0], rst.PRIMARY, 60 * 1000);
+
+// Get the term when replset is stable.
+var res = rst.getPrimary().adminCommand("replSetGetStatus");
+assert.commandWorked(res);
+var stableTerm = res.term;
+
+// Reconfig to change priorities. The current primary remains the same until
+// the higher priority node takes over.
+var conf = rst.getReplSetConfig();
+conf.members[0].priority = 1;
+conf.members[1].priority = 2;
+conf.version = 2;
+reconfig(rst, conf);
+
+// The second node will take over the primary.
+rst.waitForState(rst.nodes[1], rst.PRIMARY, 60 * 1000);
+
+res = rst.getPrimary().adminCommand("replSetGetStatus");
+assert.commandWorked(res);
+var newTerm = res.term;
+
+// Priority takeover should happen smoothly without failed election as there is
+// no current candidate. If vote requests failed (wrongly) for some reason,
+// nodes have to start new elections, which increase the term unnecessarily.
+res = rst.getPrimary().adminCommand("replSetGetConfig");
+assert.commandWorked(res);
+if (res.config.protocolVersion == 1) {
+ assert.eq(newTerm, stableTerm + 1);
+}
+})();
diff --git a/src/mongo/db/repl/repl_set_request_votes.cpp b/src/mongo/db/repl/repl_set_request_votes.cpp
index 92a72a1a6c4..0c0f209d1b6 100644
--- a/src/mongo/db/repl/repl_set_request_votes.cpp
+++ b/src/mongo/db/repl/repl_set_request_votes.cpp
@@ -28,10 +28,14 @@
#include "mongo/platform/basic.h"
+#include "mongo/db/client.h"
+#include "mongo/db/operation_context.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/repl_set_command.h"
#include "mongo/db/repl/repl_set_request_votes_args.h"
#include "mongo/db/repl/replication_coordinator_global.h"
+#include "mongo/executor/network_interface.h"
+#include "mongo/util/scopeguard.h"
namespace mongo {
namespace repl {
@@ -58,6 +62,21 @@ private:
return appendCommandStatus(result, status);
}
+ // We want to keep request vote connection open when relinquishing primary.
+ // Tag it here.
+ unsigned originalTag = 0;
+ AbstractMessagingPort* mp = txn->getClient()->port();
+ if (mp) {
+ originalTag = mp->tag;
+ mp->tag |= executor::NetworkInterface::kMessagingPortKeepOpen;
+ }
+ // Untag the connection on exit.
+ ON_BLOCK_EXIT([mp, originalTag]() {
+ if (mp) {
+ mp->tag = originalTag;
+ }
+ });
+
ReplSetRequestVotesResponse response;
status = getGlobalReplicationCoordinator()->processReplSetRequestVotes(
txn, parsedArgs, &response);
diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp
index 25892d53ade..929a6d8b729 100644
--- a/src/mongo/db/repl/replset_commands.cpp
+++ b/src/mongo/db/repl/replset_commands.cpp
@@ -57,6 +57,7 @@
#include "mongo/executor/network_interface.h"
#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
+#include "mongo/util/scopeguard.h"
namespace mongo {
namespace repl {
@@ -694,12 +695,20 @@ public:
/* we want to keep heartbeat connections open when relinquishing primary.
tag them here. */
- {
- AbstractMessagingPort* mp = txn->getClient()->port();
- if (mp)
- mp->tag |= executor::NetworkInterface::kMessagingPortKeepOpen;
+ AbstractMessagingPort* mp = txn->getClient()->port();
+ unsigned originalTag = 0;
+ if (mp) {
+ originalTag = mp->tag;
+ mp->tag |= executor::NetworkInterface::kMessagingPortKeepOpen;
}
+ // Unset the tag on block exit
+ ON_BLOCK_EXIT([mp, originalTag]() {
+ if (mp) {
+ mp->tag = originalTag;
+ }
+ });
+
// Process heartbeat based on the version of request. The missing fields in mismatched
// version will be empty.
if (isHeartbeatRequestV1(cmdObj)) {
diff --git a/src/mongo/db/repl/vote_requester.cpp b/src/mongo/db/repl/vote_requester.cpp
index 61b29ac4924..525667abf64 100644
--- a/src/mongo/db/repl/vote_requester.cpp
+++ b/src/mongo/db/repl/vote_requester.cpp
@@ -107,6 +107,8 @@ void VoteRequester::Algorithm::processResponse(const RemoteCommandRequest& reque
}
if (voteResponse.getVoteGranted()) {
+ LOG(3) << "VoteRequester: Got yes vote from " << request.target
+ << ", resp:" << response.getValue().data;
_votes++;
} else {
log() << "VoteRequester: Got no vote from " << request.target
diff --git a/src/mongo/util/net/message_port.cpp b/src/mongo/util/net/message_port.cpp
index 23d1ebac3ee..d3abc93fbb2 100644
--- a/src/mongo/util/net/message_port.cpp
+++ b/src/mongo/util/net/message_port.cpp
@@ -79,8 +79,10 @@ public:
void closeAll(unsigned skip_mask) {
stdx::lock_guard<stdx::mutex> bl(m);
for (std::set<MessagingPort*>::iterator i = ports.begin(); i != ports.end(); i++) {
- if ((*i)->tag & skip_mask)
+ if ((*i)->tag & skip_mask) {
+ LOG(3) << "Skip closing connection # " << (*i)->connectionId();
continue;
+ }
(*i)->shutdown();
}
}