summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBeniamino Galvani <bgalvani@redhat.com>2020-04-03 09:13:51 +0200
committerThomas Haller <thaller@redhat.com>2020-08-17 17:44:36 +0200
commit166ad887f9d2fa361dcc6e0926e21e25c7a4ce2d (patch)
treec766852d9a12940b11ab5f9860728a673012e716
parente62afcf0bd9172825294802ef13de8d55714ccc0 (diff)
downloadNetworkManager-166ad887f9d2fa361dcc6e0926e21e25c7a4ce2d.tar.gz
ovsdb: retry calls in case of communication error with server
When the server is restarted the write to unix socket fails with EPIPE. In such case, don't fail all the calls in queue; instead, after a sync of the ovsdb state (through a monitor call), start processing the queue again, including the call that previously failed. Add a retry counter to avoid that calls are stuck in the queue forever in a hypothetical scenario in which the write always fails. https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/459 (cherry picked from commit db37e530e8f03b7882997194bb325a206555c44d) (cherry picked from commit 54254bf6fe869b83189f43cdf13652c165d5aa71)
-rw-r--r--src/devices/ovs/nm-ovsdb.c81
1 files changed, 55 insertions, 26 deletions
diff --git a/src/devices/ovs/nm-ovsdb.c b/src/devices/ovs/nm-ovsdb.c
index 5782e51851..0af72ef03d 100644
--- a/src/devices/ovs/nm-ovsdb.c
+++ b/src/devices/ovs/nm-ovsdb.c
@@ -78,6 +78,7 @@ typedef struct {
GHashTable *ports; /* port uuid => OpenvswitchPort */
GHashTable *bridges; /* bridge uuid => OpenvswitchBridge */
char *db_uuid;
+ guint num_failures;
} NMOvsdbPrivate;
struct _NMOvsdb {
@@ -101,7 +102,7 @@ NM_DEFINE_SINGLETON_GETTER (NMOvsdb, nm_ovsdb_get, NM_TYPE_OVSDB);
/*****************************************************************************/
static void ovsdb_try_connect (NMOvsdb *self);
-static void ovsdb_disconnect (NMOvsdb *self, gboolean is_disposing);
+static void ovsdb_disconnect (NMOvsdb *self, gboolean retry, gboolean is_disposing);
static void ovsdb_read (NMOvsdb *self);
static void ovsdb_write (NMOvsdb *self);
static void ovsdb_next_command (NMOvsdb *self);
@@ -141,6 +142,8 @@ typedef struct {
};
} OvsdbMethodCall;
+#define OVSDB_MAX_FAILURES 3
+
static void
_call_trace (const char *comment, OvsdbMethodCall *call, json_t *msg)
{
@@ -197,7 +200,8 @@ ovsdb_call_method (NMOvsdb *self, OvsdbCommand command,
const char *ifname,
NMConnection *bridge, NMConnection *port, NMConnection *interface,
NMDevice *bridge_device, NMDevice *interface_device,
- guint32 mtu, OvsdbMethodCallback callback, gpointer user_data)
+ guint32 mtu, OvsdbMethodCallback callback, gpointer user_data,
+ gboolean add_first)
{
NMOvsdbPrivate *priv = NM_OVSDB_GET_PRIVATE (self);
OvsdbMethodCall *call;
@@ -205,8 +209,13 @@ ovsdb_call_method (NMOvsdb *self, OvsdbCommand command,
/* Ensure we're not unsynchronized before we queue the method call. */
ovsdb_try_connect (self);
- g_array_set_size (priv->calls, priv->calls->len + 1);
- call = &g_array_index (priv->calls, OvsdbMethodCall, priv->calls->len - 1);
+ if (add_first) {
+ g_array_prepend_val (priv->calls, (OvsdbMethodCall) {});
+ call = &g_array_index (priv->calls, OvsdbMethodCall, 0);
+ } else {
+ g_array_set_size (priv->calls, priv->calls->len + 1);
+ call = &g_array_index (priv->calls, OvsdbMethodCall, priv->calls->len - 1);
+ }
call->id = COMMAND_PENDING;
call->command = command;
call->callback = callback;
@@ -1210,7 +1219,7 @@ ovsdb_got_msg (NMOvsdb *self, json_t *msg)
"result", &result,
"error", &error) == -1) {
_LOGW ("couldn't grok the message: %s", json_error.text);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
return;
}
@@ -1221,7 +1230,7 @@ ovsdb_got_msg (NMOvsdb *self, json_t *msg)
/* It's a method call! */
if (!params) {
_LOGW ("a method call with no params: '%s'", method);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
return;
}
@@ -1241,13 +1250,13 @@ ovsdb_got_msg (NMOvsdb *self, json_t *msg)
/* This is a response to a method call. */
if (!priv->calls->len) {
_LOGE ("there are no queued calls expecting response %" G_GUINT64_FORMAT, id);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
return;
}
call = &g_array_index (priv->calls, OvsdbMethodCall, 0);
if (call->id != id) {
_LOGE ("expected a response to call %" G_GUINT64_FORMAT ", not %" G_GUINT64_FORMAT, call->id, id);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
return;
}
/* Cool, we found a corresponding call. Finish it. */
@@ -1265,6 +1274,7 @@ ovsdb_got_msg (NMOvsdb *self, json_t *msg)
user_data = call->user_data;
g_array_remove_index (priv->calls, 0);
callback (self, result, local, user_data);
+ priv->num_failures = 0;
/* Don't progress further commands in case the callback hit an error
* and disconnected us. */
@@ -1323,9 +1333,11 @@ ovsdb_read_cb (GObject *source_object, GAsyncResult *res, gpointer user_data)
size = g_input_stream_read_finish (stream, res, &error);
if (size == -1) {
+ /* ovsdb-server was possibly restarted */
_LOGW ("short read from ovsdb: %s", error->message);
+ priv->num_failures++;
g_clear_error (&error);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, priv->num_failures <= OVSDB_MAX_FAILURES, FALSE);
return;
}
@@ -1371,9 +1383,11 @@ ovsdb_write_cb (GObject *source_object, GAsyncResult *res, gpointer user_data)
size = g_output_stream_write_finish (stream, res, &error);
if (size == -1) {
+ /* ovsdb-server was possibly restarted */
_LOGW ("short write to ovsdb: %s", error->message);
+ priv->num_failures++;
g_clear_error (&error);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, priv->num_failures <= OVSDB_MAX_FAILURES, FALSE);
return;
}
@@ -1416,7 +1430,7 @@ ovsdb_write (NMOvsdb *self)
* puts us back in sync.
*/
static void
-ovsdb_disconnect (NMOvsdb *self, gboolean is_disposing)
+ovsdb_disconnect (NMOvsdb *self, gboolean retry, gboolean is_disposing)
{
NMOvsdbPrivate *priv = NM_OVSDB_GET_PRIVATE (self);
OvsdbMethodCall *call;
@@ -1424,18 +1438,26 @@ ovsdb_disconnect (NMOvsdb *self, gboolean is_disposing)
gpointer user_data;
gs_free_error GError *error = NULL;
+ nm_assert (!retry || !is_disposing);
+
if (!priv->client)
return;
- _LOGD ("disconnecting from ovsdb");
- nm_utils_error_set_cancelled (&error, is_disposing, "NMOvsdb");
+ _LOGD ("disconnecting from ovsdb, retry %d", retry);
- while (priv->calls->len) {
- call = &g_array_index (priv->calls, OvsdbMethodCall, priv->calls->len - 1);
- callback = call->callback;
- user_data = call->user_data;
- g_array_remove_index (priv->calls, priv->calls->len - 1);
- callback (self, NULL, error, user_data);
+ if (retry) {
+ if (priv->calls->len != 0)
+ g_array_index (priv->calls, OvsdbMethodCall, 0).id = COMMAND_PENDING;
+ } else {
+ nm_utils_error_set_cancelled (&error, is_disposing, "NMOvsdb");
+
+ while (priv->calls->len) {
+ call = &g_array_index (priv->calls, OvsdbMethodCall, priv->calls->len - 1);
+ callback = call->callback;
+ user_data = call->user_data;
+ g_array_remove_index (priv->calls, priv->calls->len - 1);
+ callback (self, NULL, error, user_data);
+ }
}
priv->bufp = 0;
@@ -1445,6 +1467,9 @@ ovsdb_disconnect (NMOvsdb *self, gboolean is_disposing)
g_clear_object (&priv->conn);
g_clear_pointer (&priv->db_uuid, g_free);
nm_clear_g_cancellable (&priv->cancellable);
+
+ if (retry)
+ ovsdb_try_connect (self);
}
static void
@@ -1453,7 +1478,7 @@ _monitor_bridges_cb (NMOvsdb *self, json_t *result, GError *error, gpointer user
if (error) {
if (!nm_utils_error_is_cancelled (error, TRUE)) {
_LOGI ("%s", error->message);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
}
return;
}
@@ -1477,7 +1502,7 @@ _client_connect_cb (GObject *source_object, GAsyncResult *res, gpointer user_dat
if (!g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED))
_LOGI ("%s", error->message);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
g_clear_error (&error);
return;
}
@@ -1518,7 +1543,8 @@ ovsdb_try_connect (NMOvsdb *self)
/* Queue a monitor call before any other command, ensuring that we have an up
* to date view of existing bridged that we need for add and remove ops. */
ovsdb_call_method (self, OVSDB_MONITOR, NULL,
- NULL, NULL, NULL, NULL, NULL, 0, _monitor_bridges_cb, NULL);
+ NULL, NULL, NULL, NULL, NULL, 0,
+ _monitor_bridges_cb, NULL, TRUE);
}
/*****************************************************************************/
@@ -1579,7 +1605,8 @@ nm_ovsdb_add_interface (NMOvsdb *self,
bridge_device, interface_device,
0,
_transact_cb,
- ovsdb_call_new (callback, user_data));
+ ovsdb_call_new (callback, user_data),
+ FALSE);
}
void
@@ -1589,7 +1616,8 @@ nm_ovsdb_del_interface (NMOvsdb *self, const char *ifname,
ovsdb_call_method (self, OVSDB_DEL_INTERFACE, ifname,
NULL, NULL, NULL, NULL, NULL, 0,
_transact_cb,
- ovsdb_call_new (callback, user_data));
+ ovsdb_call_new (callback, user_data),
+ FALSE);
}
void nm_ovsdb_set_interface_mtu (NMOvsdb *self, const char *ifname, guint32 mtu,
@@ -1598,7 +1626,8 @@ void nm_ovsdb_set_interface_mtu (NMOvsdb *self, const char *ifname, guint32 mtu,
ovsdb_call_method (self, OVSDB_SET_INTERFACE_MTU, ifname,
NULL, NULL, NULL, NULL, NULL, mtu,
_transact_cb,
- ovsdb_call_new (callback, user_data));
+ ovsdb_call_new (callback, user_data),
+ FALSE);
}
/*****************************************************************************/
@@ -1680,7 +1709,7 @@ dispose (GObject *object)
NMOvsdb *self = NM_OVSDB (object);
NMOvsdbPrivate *priv = NM_OVSDB_GET_PRIVATE (self);
- ovsdb_disconnect (self, TRUE);
+ ovsdb_disconnect (self, FALSE, TRUE);
if (priv->input) {
g_string_free (priv->input, TRUE);