diff options
author | Ben Pfaff <blp@ovn.org> | 2018-11-13 13:25:08 -0800 |
---|---|---|
committer | Ben Pfaff <blp@ovn.org> | 2018-11-19 08:47:49 -0800 |
commit | 17bd414951e7ad5a6e1de9c17b6f18b2658ea351 (patch) | |
tree | cdbc364347439fc929ab1f9363b678509779dd15 /ovsdb/raft.c | |
parent | e8208c6617d314546eff366a6e8d6b5ff42c1f47 (diff) | |
download | openvswitch-17bd414951e7ad5a6e1de9c17b6f18b2658ea351.tar.gz |
raft: Fix notifications when a server leaves the cluster.
When server A sends the leader a request to remove server B from the
cluster, where A != B, the leader sends both A and B a notification when
the removal is complete. Until now, however, the notification (which is a
raft_remove_server_reply message) did not say which server had been
removed, and the receiver did not check. Instead, the receiver assumed
that it had been removed. The result was that B was removed and A stopped
serving out the database even though it was still part of the cluster,
This commit fixes the problem.
Reported-by: ramteja tadishetti <ramtejatadishetti@gmail.com>
Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Diffstat (limited to 'ovsdb/raft.c')
-rw-r--r-- | ovsdb/raft.c | 50 |
1 files changed, 38 insertions, 12 deletions
diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 07884820e..753881586 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -296,6 +296,7 @@ static void raft_send_remove_server_reply__( struct raft *, const struct uuid *target_sid, const struct uuid *requester_sid, struct unixctl_conn *requester_conn, bool success, const char *comment); +static void raft_finished_leaving_cluster(struct raft *); static void raft_server_init_leader(struct raft *, struct raft_server *); @@ -303,6 +304,7 @@ static bool raft_rpc_is_heartbeat(const union raft_rpc *); static bool raft_is_rpc_synced(const struct raft *, const union raft_rpc *); static void raft_handle_rpc(struct raft *, const union raft_rpc *); + static bool raft_send_at(struct raft *, const union raft_rpc *, int line_number); #define raft_send(raft, rpc) raft_send_at(raft, rpc, __LINE__) @@ -2197,16 +2199,28 @@ raft_send_add_server_reply__(struct raft *raft, const struct uuid *sid, } static void -raft_send_remove_server_reply_rpc(struct raft *raft, const struct uuid *sid, +raft_send_remove_server_reply_rpc(struct raft *raft, + const struct uuid *dst_sid, + const struct uuid *target_sid, bool success, const char *comment) { + if (uuid_equals(&raft->sid, dst_sid)) { + if (success && uuid_equals(&raft->sid, target_sid)) { + raft_finished_leaving_cluster(raft); + } + return; + } + const union raft_rpc rpy = { .remove_server_reply = { .common = { .type = RAFT_RPC_REMOVE_SERVER_REPLY, - .sid = *sid, + .sid = *dst_sid, .comment = CONST_CAST(char *, comment), }, + .target_sid = (uuid_equals(dst_sid, target_sid) + ? UUID_ZERO + : *target_sid), .success = success, } }; @@ -2235,6 +2249,9 @@ raft_send_remove_server_reply__(struct raft *raft, } else { char buf[SID_LEN + 1]; ds_put_cstr(&s, raft_get_nickname(raft, target_sid, buf, sizeof buf)); + if (uuid_equals(target_sid, &raft->sid)) { + ds_put_cstr(&s, " (ourselves)"); + } } ds_put_format(&s, " from cluster "CID_FMT" %s", CID_ARGS(&raft->cid), @@ -2251,11 +2268,12 @@ raft_send_remove_server_reply__(struct raft *raft, * allows it to be sure that it's really removed and update its log and * disconnect permanently. */ if (!uuid_is_zero(requester_sid)) { - raft_send_remove_server_reply_rpc(raft, requester_sid, + raft_send_remove_server_reply_rpc(raft, requester_sid, target_sid, success, comment); } if (!uuid_equals(requester_sid, target_sid)) { - raft_send_remove_server_reply_rpc(raft, target_sid, success, comment); + raft_send_remove_server_reply_rpc(raft, target_sid, target_sid, + success, comment); } if (requester_conn) { if (success) { @@ -3559,17 +3577,25 @@ raft_handle_remove_server_request(struct raft *raft, } static void -raft_handle_remove_server_reply(struct raft *raft, - const struct raft_remove_server_reply *rpc) +raft_finished_leaving_cluster(struct raft *raft) { - if (rpc->success) { - VLOG_INFO(SID_FMT": finished leaving cluster "CID_FMT, - SID_ARGS(&raft->sid), CID_ARGS(&raft->cid)); + VLOG_INFO(SID_FMT": finished leaving cluster "CID_FMT, + SID_ARGS(&raft->sid), CID_ARGS(&raft->cid)); - raft_record_note(raft, "left", "this server left the cluster"); + raft_record_note(raft, "left", "this server left the cluster"); + + raft->leaving = false; + raft->left = true; +} - raft->leaving = false; - raft->left = true; +static void +raft_handle_remove_server_reply(struct raft *raft, + const struct raft_remove_server_reply *rpc) +{ + if (rpc->success + && (uuid_is_zero(&rpc->target_sid) + || uuid_equals(&rpc->target_sid, &raft->sid))) { + raft_finished_leaving_cluster(raft); } } |