summaryrefslogtreecommitdiff
path: root/ovsdb
diff options
context:
space:
mode:
authorBen Pfaff <blp@ovn.org>2018-11-13 13:25:08 -0800
committerBen Pfaff <blp@ovn.org>2018-11-19 08:47:49 -0800
commit17bd414951e7ad5a6e1de9c17b6f18b2658ea351 (patch)
treecdbc364347439fc929ab1f9363b678509779dd15 /ovsdb
parente8208c6617d314546eff366a6e8d6b5ff42c1f47 (diff)
downloadopenvswitch-17bd414951e7ad5a6e1de9c17b6f18b2658ea351.tar.gz
raft: Fix notifications when a server leaves the cluster.
When server A sends the leader a request to remove server B from the cluster, where A != B, the leader sends both A and B a notification when the removal is complete. Until now, however, the notification (which is a raft_remove_server_reply message) did not say which server had been removed, and the receiver did not check. Instead, the receiver assumed that it had been removed. The result was that B was removed and A stopped serving out the database even though it was still part of the cluster, This commit fixes the problem. Reported-by: ramteja tadishetti <ramtejatadishetti@gmail.com> Acked-by: Mark Michelson <mmichels@redhat.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
Diffstat (limited to 'ovsdb')
-rw-r--r--ovsdb/raft-rpc.c5
-rw-r--r--ovsdb/raft-rpc.h7
-rw-r--r--ovsdb/raft.c50
3 files changed, 50 insertions, 12 deletions
diff --git a/ovsdb/raft-rpc.c b/ovsdb/raft-rpc.c
index 13aee0c4b..56c07d487 100644
--- a/ovsdb/raft-rpc.c
+++ b/ovsdb/raft-rpc.c
@@ -460,6 +460,10 @@ static void
raft_remove_server_reply_to_jsonrpc(const struct raft_remove_server_reply *rpy,
struct json *args)
{
+ if (!uuid_is_zero(&rpy->target_sid)) {
+ json_object_put_format(args, "target_server",
+ UUID_FMT, UUID_ARGS(&rpy->target_sid));
+ }
json_object_put(args, "success", json_boolean_create(rpy->success));
}
@@ -468,6 +472,7 @@ raft_remove_server_reply_from_jsonrpc(struct ovsdb_parser *p,
struct raft_remove_server_reply *rpy)
{
rpy->success = raft_parse_required_boolean(p, "success");
+ raft_parse_optional_uuid(p, "target_server", &rpy->target_sid);
}
static void
diff --git a/ovsdb/raft-rpc.h b/ovsdb/raft-rpc.h
index 15ddf0128..bdc3429cc 100644
--- a/ovsdb/raft-rpc.h
+++ b/ovsdb/raft-rpc.h
@@ -205,6 +205,13 @@ struct raft_add_server_reply {
struct raft_remove_server_reply {
struct raft_rpc_common common;
bool success;
+
+ /* SID of the removed server, but all-zeros if it is the same as the
+ * destination of the RPC. (Older ovsdb-server did not have 'target_sid'
+ * and assumed that the destination was always the target, so by omitting
+ * 'target_sid' when this is the case we can preserve a small amount of
+ * inter-version compatibility.) */
+ struct uuid target_sid;
};
struct raft_install_snapshot_request {
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 07884820e..753881586 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -296,6 +296,7 @@ static void raft_send_remove_server_reply__(
struct raft *, const struct uuid *target_sid,
const struct uuid *requester_sid, struct unixctl_conn *requester_conn,
bool success, const char *comment);
+static void raft_finished_leaving_cluster(struct raft *);
static void raft_server_init_leader(struct raft *, struct raft_server *);
@@ -303,6 +304,7 @@ static bool raft_rpc_is_heartbeat(const union raft_rpc *);
static bool raft_is_rpc_synced(const struct raft *, const union raft_rpc *);
static void raft_handle_rpc(struct raft *, const union raft_rpc *);
+
static bool raft_send_at(struct raft *, const union raft_rpc *,
int line_number);
#define raft_send(raft, rpc) raft_send_at(raft, rpc, __LINE__)
@@ -2197,16 +2199,28 @@ raft_send_add_server_reply__(struct raft *raft, const struct uuid *sid,
}
static void
-raft_send_remove_server_reply_rpc(struct raft *raft, const struct uuid *sid,
+raft_send_remove_server_reply_rpc(struct raft *raft,
+ const struct uuid *dst_sid,
+ const struct uuid *target_sid,
bool success, const char *comment)
{
+ if (uuid_equals(&raft->sid, dst_sid)) {
+ if (success && uuid_equals(&raft->sid, target_sid)) {
+ raft_finished_leaving_cluster(raft);
+ }
+ return;
+ }
+
const union raft_rpc rpy = {
.remove_server_reply = {
.common = {
.type = RAFT_RPC_REMOVE_SERVER_REPLY,
- .sid = *sid,
+ .sid = *dst_sid,
.comment = CONST_CAST(char *, comment),
},
+ .target_sid = (uuid_equals(dst_sid, target_sid)
+ ? UUID_ZERO
+ : *target_sid),
.success = success,
}
};
@@ -2235,6 +2249,9 @@ raft_send_remove_server_reply__(struct raft *raft,
} else {
char buf[SID_LEN + 1];
ds_put_cstr(&s, raft_get_nickname(raft, target_sid, buf, sizeof buf));
+ if (uuid_equals(target_sid, &raft->sid)) {
+ ds_put_cstr(&s, " (ourselves)");
+ }
}
ds_put_format(&s, " from cluster "CID_FMT" %s",
CID_ARGS(&raft->cid),
@@ -2251,11 +2268,12 @@ raft_send_remove_server_reply__(struct raft *raft,
* allows it to be sure that it's really removed and update its log and
* disconnect permanently. */
if (!uuid_is_zero(requester_sid)) {
- raft_send_remove_server_reply_rpc(raft, requester_sid,
+ raft_send_remove_server_reply_rpc(raft, requester_sid, target_sid,
success, comment);
}
if (!uuid_equals(requester_sid, target_sid)) {
- raft_send_remove_server_reply_rpc(raft, target_sid, success, comment);
+ raft_send_remove_server_reply_rpc(raft, target_sid, target_sid,
+ success, comment);
}
if (requester_conn) {
if (success) {
@@ -3559,17 +3577,25 @@ raft_handle_remove_server_request(struct raft *raft,
}
static void
-raft_handle_remove_server_reply(struct raft *raft,
- const struct raft_remove_server_reply *rpc)
+raft_finished_leaving_cluster(struct raft *raft)
{
- if (rpc->success) {
- VLOG_INFO(SID_FMT": finished leaving cluster "CID_FMT,
- SID_ARGS(&raft->sid), CID_ARGS(&raft->cid));
+ VLOG_INFO(SID_FMT": finished leaving cluster "CID_FMT,
+ SID_ARGS(&raft->sid), CID_ARGS(&raft->cid));
- raft_record_note(raft, "left", "this server left the cluster");
+ raft_record_note(raft, "left", "this server left the cluster");
+
+ raft->leaving = false;
+ raft->left = true;
+}
- raft->leaving = false;
- raft->left = true;
+static void
+raft_handle_remove_server_reply(struct raft *raft,
+ const struct raft_remove_server_reply *rpc)
+{
+ if (rpc->success
+ && (uuid_is_zero(&rpc->target_sid)
+ || uuid_equals(&rpc->target_sid, &raft->sid))) {
+ raft_finished_leaving_cluster(raft);
}
}