diff options
author | Siyuan Zhou <siyuan.zhou@mongodb.com> | 2015-10-13 15:39:13 -0400 |
---|---|---|
committer | Siyuan Zhou <siyuan.zhou@mongodb.com> | 2015-10-14 11:31:14 -0400 |
commit | bededc2f0496162a188d66835638a2dc714b8b98 (patch) | |
tree | 84338711ab925d867239428eadfb750d605f57da | |
parent | 5eaf403f3659f7e4b6372be0f50d82863b704b47 (diff) | |
download | mongo-bededc2f0496162a188d66835638a2dc714b8b98.tar.gz |
SERVER-20812 Tag vote request connection to skip closing it when relinquishing primary
-rw-r--r-- | jstests/replsets/two_nodes_priority_take_over.js | 60 | ||||
-rw-r--r-- | src/mongo/db/repl/repl_set_request_votes.cpp | 19 | ||||
-rw-r--r-- | src/mongo/db/repl/replset_commands.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/repl/vote_requester.cpp | 2 | ||||
-rw-r--r-- | src/mongo/util/net/message_port.cpp | 4 |
5 files changed, 97 insertions, 5 deletions
diff --git a/jstests/replsets/two_nodes_priority_take_over.js b/jstests/replsets/two_nodes_priority_take_over.js new file mode 100644 index 00000000000..020e2f5bf30 --- /dev/null +++ b/jstests/replsets/two_nodes_priority_take_over.js @@ -0,0 +1,60 @@ +// SERVER-20812 Current primary rejects vote request from higher-priority node +// because of stepping down. In a two node replset, this rejection will prevent +// smooth priority takeover. + +load("jstests/replsets/rslib.js"); + +(function() { + +"use strict"; +var name = "two_nodes_priority_take_over"; +var rst = new ReplSetTest({name: name, nodes: 2}); + +rst.startSet(); +var conf = rst.getReplSetConfig(); +conf.members[0].priority = 2; +conf.members[1].priority = 1; +rst.initiate(conf); +rst.awaitSecondaryNodes(); +// Set verbosity for replication on all nodes. +var verbosity = { + "setParameter" : 1, + "logComponentVerbosity" : { + "verbosity": 4, + "storage" : { "verbosity" : 1 } + } +} +rst.nodes.forEach(function (node) {node.adminCommand(verbosity)}); + +// The first node will be the primary at the beginning. +rst.waitForState(rst.nodes[0], rst.PRIMARY, 60 * 1000); + +// Get the term when replset is stable. +var res = rst.getPrimary().adminCommand("replSetGetStatus"); +assert.commandWorked(res); +var stableTerm = res.term; + +// Reconfig to change priorities. The current primary remains the same until +// the higher priority node takes over. +var conf = rst.getReplSetConfig(); +conf.members[0].priority = 1; +conf.members[1].priority = 2; +conf.version = 2; +reconfig(rst, conf); + +// The second node will take over the primary. +rst.waitForState(rst.nodes[1], rst.PRIMARY, 60 * 1000); + +res = rst.getPrimary().adminCommand("replSetGetStatus"); +assert.commandWorked(res); +var newTerm = res.term; + +// Priority takeover should happen smoothly without failed election as there is +// no current candidate. If vote requests failed (wrongly) for some reason, +// nodes have to start new elections, which increase the term unnecessarily. +res = rst.getPrimary().adminCommand("replSetGetConfig"); +assert.commandWorked(res); +if (res.config.protocolVersion == 1) { + assert.eq(newTerm, stableTerm + 1); +} +})(); diff --git a/src/mongo/db/repl/repl_set_request_votes.cpp b/src/mongo/db/repl/repl_set_request_votes.cpp index 92a72a1a6c4..0c0f209d1b6 100644 --- a/src/mongo/db/repl/repl_set_request_votes.cpp +++ b/src/mongo/db/repl/repl_set_request_votes.cpp @@ -28,10 +28,14 @@ #include "mongo/platform/basic.h" +#include "mongo/db/client.h" +#include "mongo/db/operation_context.h" #include "mongo/db/repl/optime.h" #include "mongo/db/repl/repl_set_command.h" #include "mongo/db/repl/repl_set_request_votes_args.h" #include "mongo/db/repl/replication_coordinator_global.h" +#include "mongo/executor/network_interface.h" +#include "mongo/util/scopeguard.h" namespace mongo { namespace repl { @@ -58,6 +62,21 @@ private: return appendCommandStatus(result, status); } + // We want to keep request vote connection open when relinquishing primary. + // Tag it here. + unsigned originalTag = 0; + AbstractMessagingPort* mp = txn->getClient()->port(); + if (mp) { + originalTag = mp->tag; + mp->tag |= executor::NetworkInterface::kMessagingPortKeepOpen; + } + // Untag the connection on exit. + ON_BLOCK_EXIT([mp, originalTag]() { + if (mp) { + mp->tag = originalTag; + } + }); + ReplSetRequestVotesResponse response; status = getGlobalReplicationCoordinator()->processReplSetRequestVotes( txn, parsedArgs, &response); diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp index 25892d53ade..929a6d8b729 100644 --- a/src/mongo/db/repl/replset_commands.cpp +++ b/src/mongo/db/repl/replset_commands.cpp @@ -57,6 +57,7 @@ #include "mongo/executor/network_interface.h" #include "mongo/util/fail_point_service.h" #include "mongo/util/log.h" +#include "mongo/util/scopeguard.h" namespace mongo { namespace repl { @@ -694,12 +695,20 @@ public: /* we want to keep heartbeat connections open when relinquishing primary. tag them here. */ - { - AbstractMessagingPort* mp = txn->getClient()->port(); - if (mp) - mp->tag |= executor::NetworkInterface::kMessagingPortKeepOpen; + AbstractMessagingPort* mp = txn->getClient()->port(); + unsigned originalTag = 0; + if (mp) { + originalTag = mp->tag; + mp->tag |= executor::NetworkInterface::kMessagingPortKeepOpen; } + // Unset the tag on block exit + ON_BLOCK_EXIT([mp, originalTag]() { + if (mp) { + mp->tag = originalTag; + } + }); + // Process heartbeat based on the version of request. The missing fields in mismatched // version will be empty. if (isHeartbeatRequestV1(cmdObj)) { diff --git a/src/mongo/db/repl/vote_requester.cpp b/src/mongo/db/repl/vote_requester.cpp index 61b29ac4924..525667abf64 100644 --- a/src/mongo/db/repl/vote_requester.cpp +++ b/src/mongo/db/repl/vote_requester.cpp @@ -107,6 +107,8 @@ void VoteRequester::Algorithm::processResponse(const RemoteCommandRequest& reque } if (voteResponse.getVoteGranted()) { + LOG(3) << "VoteRequester: Got yes vote from " << request.target + << ", resp:" << response.getValue().data; _votes++; } else { log() << "VoteRequester: Got no vote from " << request.target diff --git a/src/mongo/util/net/message_port.cpp b/src/mongo/util/net/message_port.cpp index 23d1ebac3ee..d3abc93fbb2 100644 --- a/src/mongo/util/net/message_port.cpp +++ b/src/mongo/util/net/message_port.cpp @@ -79,8 +79,10 @@ public: void closeAll(unsigned skip_mask) { stdx::lock_guard<stdx::mutex> bl(m); for (std::set<MessagingPort*>::iterator i = ports.begin(); i != ports.end(); i++) { - if ((*i)->tag & skip_mask) + if ((*i)->tag & skip_mask) { + LOG(3) << "Skip closing connection # " << (*i)->connectionId(); continue; + } (*i)->shutdown(); } } |