summaryrefslogtreecommitdiff
path: root/ovsdb
diff options
context:
space:
mode:
authorIlya Maximets <i.maximets@ovn.org>2022-01-28 19:51:21 +0100
committerIlya Maximets <i.maximets@ovn.org>2022-02-25 14:15:12 +0100
commit999ba294fb4f9a39db77070f0b2045bf166c2287 (patch)
tree617a06ecffd9fb881f137dc386436cfd03daedc1 /ovsdb
parent6de8868d19eadd253f3301bce2bf52c981bc5fc4 (diff)
downloadopenvswitch-999ba294fb4f9a39db77070f0b2045bf166c2287.tar.gz
ovsdb: raft: Fix inability to join the cluster after interrupted attempt.
If the joining server re-connects while catching up (e.g. if it crashed or connection got closed due to inactivity), the data we sent might be lost, so the server will never reply to append request or a snapshot installation request. At the same time, leader will decline all the subsequent requests to join from that server with the 'in progress' resolution. At this point the new server will never be able to join the cluster, because it will never receive the raft log while leader thinks that it was already sent. This happened in practice when one of the servers got preempted for a few seconds, so the leader closed connection due to inactivity. Destroying the joining server if disconnection detected. This will allow to start the joining from scratch when the server re-connects and sends the new join request. We can't track re-connection in the raft_conn_run(), because it's incoming connection and the jsonrpc will not keep it alive or try to reconnect. Next time the server re-connects it will be an entirely new raft conn. Fixes: 1b1d2e6daa56 ("ovsdb: Introduce experimental support for clustered databases.") Reported-at: https://bugzilla.redhat.com/2033514 Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Acked-by: Dumitru Ceara <dceara@redhat.com>
Diffstat (limited to 'ovsdb')
-rw-r--r--ovsdb/raft.c38
1 files changed, 31 insertions, 7 deletions
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 1a3447a8d..855404808 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -74,6 +74,7 @@ enum raft_failure_test {
FT_CRASH_BEFORE_SEND_EXEC_REQ,
FT_CRASH_AFTER_SEND_EXEC_REQ,
FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE,
+ FT_CRASH_BEFORE_SEND_SNAPSHOT_REP,
FT_DELAY_ELECTION,
FT_DONT_SEND_VOTE_REQUEST,
FT_STOP_RAFT_RPC,
@@ -379,12 +380,19 @@ static bool raft_handle_write_error(struct raft *, struct ovsdb_error *);
static void raft_run_reconfigure(struct raft *);
static void raft_set_leader(struct raft *, const struct uuid *sid);
+
static struct raft_server *
raft_find_server(const struct raft *raft, const struct uuid *sid)
{
return raft_server_find(&raft->servers, sid);
}
+static struct raft_server *
+raft_find_new_server(struct raft *raft, const struct uuid *uuid)
+{
+ return raft_server_find(&raft->add_servers, uuid);
+}
+
static char *
raft_make_address_passive(const char *address_)
{
@@ -1867,6 +1875,8 @@ raft_open_conn(struct raft *raft, const char *address, const struct uuid *sid)
static void
raft_conn_close(struct raft_conn *conn)
{
+ VLOG_DBG("closing connection to server %s (%s)",
+ conn->nickname, jsonrpc_session_get_name(conn->js));
jsonrpc_session_close(conn->js);
ovs_list_remove(&conn->list_node);
free(conn->nickname);
@@ -1957,16 +1967,30 @@ raft_run(struct raft *raft)
}
/* Close unneeded sessions. */
+ struct raft_server *server;
struct raft_conn *next;
LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) {
if (!raft_conn_should_stay_open(raft, conn)) {
+ server = raft_find_new_server(raft, &conn->sid);
+ if (server) {
+ /* We only have one incoming connection from joining servers,
+ * so if it's closed, we need to destroy the record about the
+ * server. This way the process can be started over on the
+ * next join request. */
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+ VLOG_INFO_RL(&rl, "cluster "CID_FMT": server %s (%s) "
+ "disconnected while joining",
+ CID_ARGS(&raft->cid),
+ server->nickname, server->address);
+ hmap_remove(&raft->add_servers, &server->hmap_node);
+ raft_server_destroy(server);
+ }
raft->n_disconnections++;
raft_conn_close(conn);
}
}
/* Open needed sessions. */
- struct raft_server *server;
HMAP_FOR_EACH (server, hmap_node, &raft->servers) {
raft_open_conn(raft, server->address, &server->sid);
}
@@ -3354,12 +3378,6 @@ raft_find_peer(struct raft *raft, const struct uuid *uuid)
return s && !uuid_equals(&raft->sid, &s->sid) ? s : NULL;
}
-static struct raft_server *
-raft_find_new_server(struct raft *raft, const struct uuid *uuid)
-{
- return raft_server_find(&raft->add_servers, uuid);
-}
-
/* Figure 3.1: "If there exists an N such that N > commitIndex, a
* majority of matchIndex[i] >= N, and log[N].term == currentTerm, set
* commitIndex = N (sections 3.5 and 3.6)." */
@@ -4142,6 +4160,10 @@ static void
raft_handle_install_snapshot_request(
struct raft *raft, const struct raft_install_snapshot_request *rq)
{
+ if (failure_test == FT_CRASH_BEFORE_SEND_SNAPSHOT_REP) {
+ ovs_fatal(0, "Raft test: crash before sending install_snapshot_reply");
+ }
+
if (raft_handle_install_snapshot_request__(raft, rq)) {
union raft_rpc rpy = {
.install_snapshot_reply = {
@@ -4940,6 +4962,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
failure_test = FT_CRASH_AFTER_SEND_EXEC_REQ;
} else if (!strcmp(test, "crash-after-receiving-append-request-update")) {
failure_test = FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE;
+ } else if (!strcmp(test, "crash-before-sending-install-snapshot-reply")) {
+ failure_test = FT_CRASH_BEFORE_SEND_SNAPSHOT_REP;
} else if (!strcmp(test, "delay-election")) {
failure_test = FT_DELAY_ELECTION;
struct raft *raft;