summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Haller <thaller@redhat.com>2022-10-04 12:35:35 +0200
committerThomas Haller <thaller@redhat.com>2022-10-04 12:37:42 +0200
commit6ef929d19fb7622b4430ecb2434b1562221c65ae (patch)
tree844a5920aea0c07c9e8104ed70a38a512fbf8b92
parent718392ef5ff484f6b77b3088417a52ef90331a83 (diff)
parent22f670687a5b91784bf04cedaf0a7f11c4eb2dd5 (diff)
downloadNetworkManager-6ef929d19fb7622b4430ecb2434b1562221c65ae.tar.gz
bond: merge branch 'th/mlag-bonding-slb'
https://bugzilla.redhat.com/show_bug.cgi?id=2128216 https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1385
-rw-r--r--Makefile.am2
-rw-r--r--NEWS4
-rw-r--r--src/core/devices/nm-device-bond.c123
-rw-r--r--src/core/meson.build1
-rw-r--r--src/core/nm-bond-manager.c967
-rw-r--r--src/core/nm-bond-manager.h32
-rw-r--r--src/core/nm-firewall-utils.c248
-rw-r--r--src/core/nm-firewall-utils.h6
-rw-r--r--src/libnm-core-impl/nm-setting-bond.c40
-rw-r--r--src/libnm-core-public/nm-setting-bond.h1
10 files changed, 1418 insertions, 6 deletions
diff --git a/Makefile.am b/Makefile.am
index 6b9b82876b..13cadec6dc 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -2462,6 +2462,8 @@ src_core_libNetworkManagerBase_la_SOURCES = \
src/core/nm-l3cfg.h \
src/core/nm-ip-config.c \
src/core/nm-ip-config.h \
+ src/core/nm-bond-manager.c \
+ src/core/nm-bond-manager.h \
\
src/core/dhcp/nm-dhcp-client.c \
src/core/dhcp/nm-dhcp-client.h \
diff --git a/NEWS b/NEWS
index aa2eb464b4..4bd63ab496 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,10 @@ USE AT YOUR OWN RISK. NOT RECOMMENDED FOR PRODUCTION USE!
in `nmcli connection $operator uuid $uuid`.
* nmtui now supports editing Wi-Fi WPA-Enterprise, Ethernet with 802.1X
authentication and MACsec connection profiles.
+* bond: add "balance-slb" option which implements source load balancing
+ with "balance-xor" mode and "vlan-srcmac" xmit_hash_policy. In this
+ mode, NetworkManager configures nftables to prevent loops in the
+ switch.
=============================================
NetworkManager-1.40
diff --git a/src/core/devices/nm-device-bond.c b/src/core/devices/nm-device-bond.c
index dc5e1d5c7e..72ede7c19c 100644
--- a/src/core/devices/nm-device-bond.c
+++ b/src/core/devices/nm-device-bond.c
@@ -20,6 +20,7 @@
#include "libnm-core-intern/nm-core-internal.h"
#include "nm-manager.h"
#include "nm-setting-bond-port.h"
+#include "nm-bond-manager.h"
#define _NMLOG_DEVICE_TYPE NMDeviceBond
#include "nm-device-logging.h"
@@ -59,7 +60,8 @@
/*****************************************************************************/
struct _NMDeviceBond {
- NMDevice parent;
+ NMDevice parent;
+ NMBondManager *bond_manager;
};
struct _NMDeviceBondClass {
@@ -178,7 +180,9 @@ update_connection(NMDevice *device, NMConnection *connection)
gs_free char *value = NULL;
char *p;
- if (NM_IN_STRSET(option, NM_SETTING_BOND_OPTION_ACTIVE_SLAVE))
+ if (NM_IN_STRSET(option,
+ NM_SETTING_BOND_OPTION_ACTIVE_SLAVE,
+ NM_SETTING_BOND_OPTION_BALANCE_SLB))
continue;
value =
@@ -460,10 +464,97 @@ _platform_lnk_bond_init_from_setting(NMSettingBond *s_bond, NMPlatformLnkBond *p
props->tlb_dynamic_lb_has = NM_IN_SET(props->mode, NM_BOND_MODE_TLB, NM_BOND_MODE_ALB);
}
+static void
+_balance_slb_cb(NMBondManager *bond_manager, NMBondManagerEventType event_type, gpointer user_data)
+{
+ NMDevice *device = user_data;
+ NMDeviceBond *self = NM_DEVICE_BOND(device);
+
+ nm_assert(NM_IS_DEVICE_BOND(self));
+ nm_assert(self->bond_manager == bond_manager);
+
+ switch (event_type) {
+ case NM_BOND_MANAGER_EVENT_TYPE_STATE:
+ switch (nm_bond_manager_get_state(bond_manager)) {
+ case NM_OPTION_BOOL_FALSE:
+ if (nm_device_get_state(device) <= NM_DEVICE_STATE_ACTIVATED) {
+ _LOGD(LOGD_BOND, "balance-slb: failed");
+ nm_device_state_changed(device,
+ NM_DEVICE_STATE_FAILED,
+ NM_DEVICE_STATE_REASON_CONFIG_FAILED);
+ }
+ return;
+ case NM_OPTION_BOOL_TRUE:
+ if (nm_device_get_state(device) <= NM_DEVICE_STATE_ACTIVATED
+ && nm_device_devip_get_state(device, AF_UNSPEC) <= NM_DEVICE_IP_STATE_PENDING) {
+ nm_device_devip_set_state(device, AF_UNSPEC, NM_DEVICE_IP_STATE_READY, NULL);
+ }
+ return;
+ case NM_OPTION_BOOL_DEFAULT:
+ if (nm_device_get_state(device) <= NM_DEVICE_STATE_ACTIVATED
+ && nm_device_devip_get_state(device, AF_UNSPEC) == NM_DEVICE_IP_STATE_READY) {
+ /* We are again busy. We can also go back to "pending" from "ready".
+ * If ip-config state is not yet complete, this will further delay it.
+ * Otherwise, it should have no effect. */
+ nm_device_devip_set_state(device, AF_UNSPEC, NM_DEVICE_IP_STATE_PENDING, NULL);
+ }
+ return;
+ }
+ nm_assert_not_reached();
+ return;
+ }
+
+ nm_assert_not_reached();
+}
+
+static void
+_balance_slb_setup(NMDeviceBond *self, NMConnection *connection)
+{
+ int ifindex = nm_device_get_ifindex(NM_DEVICE(self));
+ gboolean balance_slb = FALSE;
+ const char *uuid;
+ NMSettingBond *s_bond;
+
+ if (ifindex > 0 && connection && (s_bond = nm_connection_get_setting_bond(connection)))
+ balance_slb = _v_intbool(s_bond, NM_SETTING_BOND_OPTION_BALANCE_SLB);
+
+ if (!balance_slb) {
+ if (nm_clear_pointer(&self->bond_manager, nm_bond_manager_destroy)) {
+ _LOGD(LOGD_BOND, "balance-slb: stopped");
+ nm_device_devip_set_state(NM_DEVICE(self), AF_UNSPEC, NM_DEVICE_IP_STATE_NONE, NULL);
+ }
+ return;
+ }
+
+ uuid = nm_connection_get_uuid(connection);
+
+ if (self->bond_manager) {
+ if (nm_bond_manager_get_ifindex(self->bond_manager) == ifindex
+ && nm_streq0(nm_bond_manager_get_connection_uuid(self->bond_manager), uuid)) {
+ _LOGD(LOGD_BOND, "balance-slb: reapply");
+ nm_bond_manager_reapply(self->bond_manager);
+ return;
+ }
+ nm_clear_pointer(&self->bond_manager, nm_bond_manager_destroy);
+ _LOGD(LOGD_BOND, "balance-slb: restart");
+ }
+
+ _LOGD(LOGD_BOND, "balance-slb: start");
+ if (nm_device_devip_get_state(NM_DEVICE(self), AF_UNSPEC) < NM_DEVICE_IP_STATE_PENDING)
+ nm_device_devip_set_state(NM_DEVICE(self), AF_UNSPEC, NM_DEVICE_IP_STATE_PENDING, NULL);
+ self->bond_manager = nm_bond_manager_new(nm_device_get_platform(NM_DEVICE(self)),
+ ifindex,
+ uuid,
+ _balance_slb_cb,
+ self);
+ nm_assert(nm_bond_manager_get_state(self->bond_manager) == NM_OPTION_BOOL_DEFAULT);
+}
+
static NMActStageReturn
act_stage1_prepare(NMDevice *device, NMDeviceStateReason *out_failure_reason)
{
- NMActStageReturn ret = NM_ACT_STAGE_RETURN_SUCCESS;
+ NMDeviceBond *self = NM_DEVICE_BOND(device);
+ NMActStageReturn ret = NM_ACT_STAGE_RETURN_SUCCESS;
NMConnection *connection;
NMSettingBond *s_bond;
NMPlatformLnkBond props;
@@ -476,6 +567,14 @@ act_stage1_prepare(NMDevice *device, NMDeviceStateReason *out_failure_reason)
s_bond = nm_connection_get_setting_bond(connection);
g_return_val_if_fail(s_bond, NM_ACT_STAGE_RETURN_FAILURE);
+ if (nm_device_sys_iface_state_is_external(device))
+ return NM_ACT_STAGE_RETURN_SUCCESS;
+
+ _balance_slb_setup(self, connection);
+
+ if (nm_device_sys_iface_state_is_external_or_assume(device))
+ return NM_ACT_STAGE_RETURN_SUCCESS;
+
_platform_lnk_bond_init_from_setting(s_bond, &props);
/* Interface must be down to set bond options */
@@ -684,7 +783,7 @@ can_reapply_change(NMDevice *device,
const char *name = *option_list;
/* We support changes to these */
- if (NM_IN_STRSET(name, OPTIONS_REAPPLY_FULL))
+ if (NM_IN_STRSET(name, OPTIONS_REAPPLY_FULL, NM_SETTING_BOND_OPTION_BALANCE_SLB))
continue;
/* Reject any other changes */
@@ -730,6 +829,16 @@ reapply_connection(NMDevice *device, NMConnection *con_old, NMConnection *con_ne
set_bond_arp_ip_targets(device, s_bond);
set_bond_attrs_or_default(device, s_bond, NM_MAKE_STRV(OPTIONS_REAPPLY_SUBSET));
+
+ _balance_slb_setup(self, con_new);
+}
+
+static void
+deactivate(NMDevice *device)
+{
+ NMDeviceBond *self = NM_DEVICE_BOND(device);
+
+ _balance_slb_setup(self, NULL);
}
/*****************************************************************************/
@@ -768,13 +877,15 @@ nm_device_bond_class_init(NMDeviceBondClass *klass)
device_class->update_connection = update_connection;
device_class->master_update_slave_connection = controller_update_port_connection;
- device_class->create_and_realize = create_and_realize;
- device_class->act_stage1_prepare = act_stage1_prepare;
+ device_class->create_and_realize = create_and_realize;
+ device_class->act_stage1_prepare = act_stage1_prepare;
+ device_class->act_stage1_prepare_also_for_external_or_assume = TRUE;
device_class->get_configured_mtu = nm_device_get_configured_mtu_for_wired;
device_class->attach_port = attach_port;
device_class->detach_port = detach_port;
device_class->can_reapply_change = can_reapply_change;
device_class->reapply_connection = reapply_connection;
+ device_class->deactivate = deactivate;
}
/*****************************************************************************/
diff --git a/src/core/meson.build b/src/core/meson.build
index f3359ad0f5..6f11595aa0 100644
--- a/src/core/meson.build
+++ b/src/core/meson.build
@@ -53,6 +53,7 @@ libNetworkManagerBase = static_library(
'nm-l3-ipv4ll.c',
'nm-l3-ipv6ll.c',
'nm-l3cfg.c',
+ 'nm-bond-manager.c',
'nm-ip-config.c',
),
dependencies: [
diff --git a/src/core/nm-bond-manager.c b/src/core/nm-bond-manager.c
new file mode 100644
index 0000000000..2d15b0b5a0
--- /dev/null
+++ b/src/core/nm-bond-manager.c
@@ -0,0 +1,967 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "src/core/nm-default-daemon.h"
+
+#include "nm-bond-manager.h"
+
+#include <linux/if.h>
+
+#include "NetworkManagerUtils.h"
+#include "libnm-core-aux-intern/nm-libnm-core-utils.h"
+#include "libnm-glib-aux/nm-str-buf.h"
+#include "libnm-platform/nm-platform.h"
+#include "libnm-platform/nmp-object.h"
+#include "nm-firewall-utils.h"
+
+/*****************************************************************************/
+
+typedef enum _nm_packed {
+ REGISTRATION_STATE_NONE,
+ REGISTRATION_STATE_UPPING,
+ REGISTRATION_STATE_UP,
+ REGISTRATION_STATE_DOWNING,
+} RegistrationState;
+
+struct _NMBondManager {
+ NMPlatform *platform;
+
+ NMBondManagerCallback callback;
+ gpointer user_data;
+
+ /* This is only used for structured logging. */
+ char *connection_uuid;
+
+ GSource *reconfigure_on_idle_source;
+
+ /* During _reconfigure_check() we remember all ifindexes that are part
+ * of the current SLB bond. This is used during _link_changed_cb() to
+ * figure out whether a change on the interface might be relevant to
+ * trigger a _reconfigure_check() on idle. */
+ GHashTable *previous_ifindexes;
+
+ /* We need to keep track of active members that we configured in NFT.
+ * That is, because on update we use "add && flush" to reset the table,
+ * however that leaves empty chains around. If we previously had an active
+ * member, a chain for it was created that we need to clean up.
+ *
+ * Before every NFT call we use this to generate the list of members that
+ * are to be cleaned up. Thereby also adding the new active-memebers to
+ * the list. When the NFT calls returns with success, we can prune the
+ * now deleted member/chain. */
+ GHashTable *previous_members;
+
+ GCancellable *cancellable;
+
+ struct {
+ char *bond_ifname_curr;
+ char *bond_ifname_next;
+ const char **active_members_curr;
+ const char **active_members_next;
+ } dat;
+
+ gulong link_changed_id;
+ int ifindex;
+ RegistrationState reg_state;
+ bool destroyed : 1;
+
+ /* Whether we noticed some changes that require us to _reconfigure_check().
+ * Note that while a NFT call is pending, we postpone the check. */
+ bool reconfigure_check : 1;
+
+ /* Whether a `nft` call is in progress. Usually this corresponds to
+ * having a cancellable, however, we may also cancel and clear the
+ * cancellable while the call is still in progress. */
+ bool nft_in_progress : 1;
+
+ /* Whether the last NFT invocation was good. If not, we may have
+ * an invalid state. Actually unused, so far because it's not
+ * clear what to do about failure to configure NFT (aside logging
+ * a warning). */
+ bool nft_good : 1;
+
+ /* The overall state. DEFAULT means that an update is pending.
+ * FALSE means that the last "nft" command failed.
+ * TRUE means that the last "nft" command was good. */
+ NMOptionBool state : 3;
+};
+
+#define NM_IS_BOND_MANAGER(self) \
+ ({ \
+ const NMBondManager *_self = (self); \
+ \
+ (_self && NM_IS_PLATFORM(_self->platform)); \
+ })
+
+/*****************************************************************************/
+
+static void _nft_call(NMBondManager *self,
+ gboolean up,
+ const char *bond_ifname,
+ const char *const *bond_ifnames_down,
+ const char *const *active_members);
+
+static void _bond_manager_destroy(NMBondManager *self);
+
+static void _reconfigure_check(NMBondManager *self, gboolean reapply);
+
+/*****************************************************************************/
+
+#define _NMLOG_DOMAIN LOGD_DEVICE
+#define _NMLOG_PREFIX_NAME "mlag"
+#define _NMLOG(level, ...) \
+ G_STMT_START \
+ { \
+ const NMLogLevel _level = (level); \
+ \
+ if (nm_logging_enabled(_level, _NMLOG_DOMAIN)) { \
+ NMBondManager *const _self = (self); \
+ const char *_ifname = nm_platform_link_get_name(_self->platform, _self->ifindex); \
+ char _sbuf[30]; \
+ \
+ _nm_log(_level, \
+ _NMLOG_DOMAIN, \
+ 0, \
+ _ifname, \
+ _self->connection_uuid, \
+ "%s[" NM_HASH_OBFUSCATE_PTR_FMT ", %s]: " _NM_UTILS_MACRO_FIRST(__VA_ARGS__), \
+ _NMLOG_PREFIX_NAME, \
+ NM_HASH_OBFUSCATE_PTR(_self), \
+ (_ifname ?: nm_sprintf_buf(_sbuf, "(%d)", _self->ifindex)) \
+ _NM_UTILS_MACRO_REST(__VA_ARGS__)); \
+ } \
+ } \
+ G_STMT_END
+
+static const char *
+_log_info(NMStrBuf *strbuf,
+ const char *bond_ifname,
+ const char *const *active_members,
+ const char *const *previous_members)
+{
+ gsize i;
+
+ nm_str_buf_reset(strbuf);
+
+ if (!bond_ifname)
+ nm_str_buf_append(strbuf, "(disabled)");
+ else {
+ nm_str_buf_append_printf(strbuf, "(enabled, \"%s\"", bond_ifname);
+
+ for (i = 0; active_members && active_members[i]; i++) {
+ if (i == 0)
+ nm_str_buf_append(strbuf, ", active-members=[ \"");
+ else
+ nm_str_buf_append(strbuf, "\", \"");
+ nm_str_buf_append(strbuf, active_members[i]);
+ }
+ if (i > 0)
+ nm_str_buf_append(strbuf, "\" ]");
+
+ for (i = 0; previous_members && previous_members[i]; i++) {
+ nm_assert(!nm_strv_contains(active_members, -1, previous_members[i]));
+ if (i == 0)
+ nm_str_buf_append(strbuf, ", previous-members=[ \"");
+ else
+ nm_str_buf_append(strbuf, "\", \"");
+ nm_str_buf_append(strbuf, previous_members[i]);
+ }
+ if (i > 0)
+ nm_str_buf_append(strbuf, "\" ]");
+
+ nm_str_buf_append(strbuf, ")");
+ }
+
+ return nm_str_buf_get_str(strbuf);
+}
+
+/*****************************************************************************/
+
+static gboolean
+_nm_assert_self_(NMBondManager *self)
+{
+ nm_assert(self);
+ nm_assert(NM_IS_PLATFORM(self->platform));
+ nm_assert(!self->cancellable || G_IS_CANCELLABLE(self->cancellable));
+ nm_assert(!self->cancellable || !g_cancellable_is_cancelled(self->cancellable));
+ nm_assert(!self->dat.active_members_curr || self->dat.bond_ifname_curr);
+ nm_assert(!self->dat.active_members_next || self->dat.bond_ifname_next);
+ nm_assert(!self->cancellable || self->nft_in_progress);
+ nm_assert(!self->reconfigure_on_idle_source || self->reconfigure_check);
+ nm_assert(!self->nft_in_progress || !self->reconfigure_on_idle_source);
+
+ nm_assert(!self->dat.active_members_curr || self->dat.bond_ifname_curr[0]);
+ nm_assert(!self->dat.active_members_next || self->dat.bond_ifname_next[0]);
+
+ nm_assert(!self->destroyed || !self->dat.bond_ifname_next);
+ nm_assert(!self->destroyed
+ || NM_IN_SET((RegistrationState) self->reg_state,
+ REGISTRATION_STATE_UPPING,
+ REGISTRATION_STATE_DOWNING));
+
+ switch (self->reg_state) {
+ case REGISTRATION_STATE_NONE:
+ nm_assert(!self->nft_in_progress);
+ nm_assert(!self->cancellable);
+ nm_assert(!self->dat.bond_ifname_curr);
+ nm_assert(!self->dat.bond_ifname_next);
+ break;
+ case REGISTRATION_STATE_UPPING:
+ nm_assert(self->nft_in_progress);
+ nm_assert(self->dat.bond_ifname_curr);
+ break;
+ case REGISTRATION_STATE_UP:
+ nm_assert(!self->nft_in_progress);
+ nm_assert(!self->cancellable);
+ nm_assert(self->dat.bond_ifname_curr);
+ nm_assert(!self->dat.bond_ifname_next);
+ break;
+ case REGISTRATION_STATE_DOWNING:
+ nm_assert(self->nft_in_progress);
+ nm_assert(self->dat.bond_ifname_curr);
+ break;
+ default:
+ nm_assert_not_reached();
+ break;
+ }
+
+ return TRUE;
+}
+
+#define _nm_assert_self(self) nm_assert(_nm_assert_self_(self))
+
+/*****************************************************************************/
+
+static void
+_callback_invoke(NMBondManager *self, NMBondManagerEventType event_type)
+{
+ if (!self->callback)
+ return;
+
+ self->callback(self, event_type, self->user_data);
+}
+
+static void
+_notify_state_change(NMBondManager *self)
+{
+ NMOptionBool state;
+
+ if (self->nft_in_progress)
+ state = NM_OPTION_BOOL_DEFAULT;
+ else
+ state = !!self->nft_good;
+
+ if (state == self->state)
+ return;
+
+ self->state = state;
+ _callback_invoke(self, NM_BOND_MANAGER_EVENT_TYPE_STATE);
+}
+
+/*****************************************************************************/
+
+static void
+_nft_call_cb(GObject *source, GAsyncResult *result, gpointer user_data)
+{
+ nm_auto_str_buf NMStrBuf strbuf = NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE);
+ NMBondManager *self;
+ gpointer ptr_up;
+ gs_free const char **previous_members = NULL;
+ gs_free_error GError *error = NULL;
+
+ nm_utils_user_data_unpack(user_data, &self, &ptr_up, &previous_members);
+
+ _nm_assert_self(self);
+
+ self->nft_in_progress = FALSE;
+
+ nm_firewall_nft_call_finish(result, &error);
+
+ if (!error) {
+ gsize i;
+
+ /* On success, we can forget about our previous members that we successfully
+ * deleted. */
+ if (!GPOINTER_TO_INT(ptr_up)) {
+ /* We successfully deleted the NFT table. Forget all previous members. */
+ g_hash_table_remove_all(self->previous_members);
+ } else if (previous_members) {
+ /* These previous members are now forgotten for good. */
+ for (i = 0; previous_members[i]; i++)
+ g_hash_table_remove(self->previous_members, previous_members[i]);
+ }
+ } else {
+ /* If all our NFT calls keep failing, we never actually prune entries from
+ * self->previous_members. That is a problem, however, under normal operation
+ * NFT calls should not continuously fail, and we would have a small fixed
+ * number of active-members. */
+ }
+
+ nm_clear_g_cancellable(&self->cancellable);
+
+ if (nm_utils_error_is_cancelled(error)) {
+ switch (self->reg_state) {
+ case REGISTRATION_STATE_NONE:
+ case REGISTRATION_STATE_UP:
+ case REGISTRATION_STATE_DOWNING:
+ /* It is not expected that we cancel anything in this state. */
+ nm_assert_not_reached();
+ goto out;
+ case REGISTRATION_STATE_UPPING:
+ nm_assert(self->dat.bond_ifname_curr);
+ /* We cancelled while upping. We need to issue another down,
+ * to make sure the data is gone. */
+ if (!self->dat.bond_ifname_next) {
+ /* There is no other name to configure. We just need to down
+ * the current one. */
+ _LOGT("reconfigure: configuration cancelled, deconfigure %s",
+ self->dat.bond_ifname_curr);
+ _nft_call(self, FALSE, self->dat.bond_ifname_curr, NULL, NULL);
+ self->reg_state = REGISTRATION_STATE_DOWNING;
+ goto out;
+ }
+ /* There is already another configuration. UPPING again. */
+ _LOGT("reconfigure: configuration cancelled, configure %s",
+ _log_info(&strbuf,
+ self->dat.bond_ifname_next,
+ self->dat.active_members_next,
+ NULL));
+ _nft_call(self,
+ TRUE,
+ self->dat.bond_ifname_next,
+ NM_MAKE_STRV(self->dat.bond_ifname_curr),
+ self->dat.active_members_next);
+ self->reg_state = REGISTRATION_STATE_UPPING;
+ nm_clear_g_free(&self->dat.bond_ifname_curr);
+ nm_clear_g_free(&self->dat.active_members_curr);
+ self->dat.bond_ifname_curr = g_steal_pointer(&self->dat.bond_ifname_next);
+ self->dat.active_members_curr = g_steal_pointer(&self->dat.active_members_next);
+ goto out;
+ }
+ nm_assert_not_reached();
+ goto out;
+ }
+
+ if (error) {
+ self->nft_good = FALSE;
+ } else {
+ /* Technically, if a previous downing failed, we cannot know that
+ * we were able to fix this bug a successful run now. That is, because
+ * if the interface got renamed, and the downing for the previous
+ * interface name failed, we leak that table and the success now doesn't
+ * fix that.
+ *
+ * That is a bug, but probably not severe because:
+ * - interfaces are not supposed to be renamed.
+ * - if this NFT command succeed, we expect that also the previous downings worked.
+ *
+ * The problem here is only that nft_good might lie and indicate
+ * no problem. However, when a downing fails, we anyway leak the table already
+ * and the bad thing happend. We cannot fix if `nft` command fails.
+ */
+ self->nft_good = TRUE;
+ }
+
+ switch (self->reg_state) {
+ case REGISTRATION_STATE_NONE:
+ case REGISTRATION_STATE_UP:
+ /* Unexpected to get a callback completion in these states. */
+ nm_assert_not_reached();
+ goto out;
+ case REGISTRATION_STATE_UPPING:
+ nm_assert(!self->dat.bond_ifname_next);
+ if (error) {
+ /* Unclear what to do about this error. Just log about it, nothing else. */
+ _LOGW("reconfigure: nft configuration for balance-slb failed: %s", error->message);
+ } else
+ _LOGT("reconfigure: configuration completed");
+ self->reg_state = REGISTRATION_STATE_UP;
+ goto out;
+ case REGISTRATION_STATE_DOWNING:
+ nm_assert(self->dat.bond_ifname_curr);
+ if (!self->dat.bond_ifname_next) {
+ if (error) {
+ /* Unclear what to do about this error. Just log about it, nothing else. */
+ _LOGW("reconfigure: nft deconfiguration for balance-slb failed: %s",
+ error->message);
+ } else
+ _LOGT("reconfigure: deconfiguration completed");
+ nm_clear_g_free(&self->dat.bond_ifname_curr);
+ nm_clear_g_free(&self->dat.active_members_curr);
+ self->reg_state = REGISTRATION_STATE_NONE;
+
+ if (self->destroyed) {
+ _bond_manager_destroy(self);
+ return;
+ }
+
+ goto out;
+ }
+ if (error) {
+ /* Unclear what to do about this error. Just log about it, nothing else. */
+ _LOGW("reconfigure: nft deconfiguration failed before restart: %s", error->message);
+ } else
+ _LOGT("reconfigure: deconfiguration completed before restart");
+ _nft_call(self,
+ TRUE,
+ self->dat.bond_ifname_next,
+ NM_MAKE_STRV(self->dat.bond_ifname_curr),
+ self->dat.active_members_next);
+ nm_clear_g_free(&self->dat.bond_ifname_curr);
+ nm_clear_g_free(&self->dat.active_members_curr);
+ self->dat.bond_ifname_curr = g_steal_pointer(&self->dat.bond_ifname_next);
+ self->dat.active_members_curr = g_steal_pointer(&self->dat.active_members_next);
+ self->reg_state = REGISTRATION_STATE_UPPING;
+ goto out;
+ }
+
+ nm_assert_not_reached();
+
+out:
+ if (self->reconfigure_check) {
+ if (self->destroyed)
+ nm_assert_not_reached();
+ else if (!self->nft_in_progress) {
+ nm_assert(!self->reconfigure_on_idle_source);
+ _reconfigure_check(self, FALSE);
+ }
+ }
+
+ _notify_state_change(self);
+}
+
+static void
+_nft_call(NMBondManager *self,
+ gboolean up,
+ const char *bond_ifname,
+ const char *const *bond_ifnames_down,
+ const char *const *active_members)
+{
+ gs_unref_bytes GBytes *stdin_buf = NULL;
+ gs_free const char *const *previous_members_strv = NULL;
+
+ if (up) {
+ gs_unref_ptrarray GPtrArray *arr = NULL;
+ GHashTableIter iter;
+ const char *n;
+ gsize i;
+
+ /* We need to track the active-members that we add, because, when we update the
+ * NFT table without the member from previously, we use "add && flush", which
+ * leaves empty chains for the previous members around. We need to cleanup those
+ * chains, hence the need to track which members we ever added.
+ *
+ * Before making an UP call, we add the newly configured active_members to the list
+ * of previous_members. All the while, passing a list of previous_members_strv
+ * which we currently no longer configure.
+ *
+ * Only when the call succeeds (in _nft_call_cb()), we will forget about previously added
+ * members. This is done by passing the list of members that we are forgetting now
+ * on to the callback below. */
+
+ /* Get the list of previous members that are no longer in the current
+ * active list. */
+ g_hash_table_iter_init(&iter, self->previous_members);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &n, NULL)) {
+ if (nm_strv_contains(active_members, -1, n))
+ continue;
+ if (!arr)
+ arr = g_ptr_array_new();
+ g_ptr_array_add(arr, (gpointer) n);
+ }
+ if (arr) {
+ nm_strv_sort((const char **) arr->pdata, arr->len);
+ previous_members_strv = nm_strv_dup_packed((const char *const *) arr->pdata, arr->len);
+ }
+
+ /* The now active member also get tracked as previous members for the future. */
+ if (active_members) {
+ for (i = 0; active_members[i]; i++)
+ g_hash_table_add(self->previous_members, g_strdup(active_members[i]));
+ }
+ }
+
+ stdin_buf = nm_firewall_nft_stdio_mlag(up,
+ bond_ifname,
+ bond_ifnames_down,
+ active_members,
+ previous_members_strv);
+
+ nm_clear_g_cancellable(&self->cancellable);
+ self->cancellable = g_cancellable_new();
+
+ nm_shutdown_wait_obj_register_cancellable(self->cancellable, "nft-mlag");
+
+ if (_LOGT_ENABLED()) {
+ if (up) {
+ nm_auto_str_buf NMStrBuf strbuf =
+ NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE);
+
+ _LOGT("reconfigure: call nft: %s",
+ _log_info(&strbuf, bond_ifname, active_members, previous_members_strv));
+ } else
+ _LOGT("reconfigure: call nft: disable on \"%s\"", bond_ifname);
+ }
+
+ self->nft_in_progress = TRUE;
+
+ if (self->reconfigure_check)
+ nm_clear_g_source_inst(&self->reconfigure_on_idle_source);
+
+ nm_firewall_nft_call(stdin_buf,
+ self->cancellable,
+ _nft_call_cb,
+ nm_utils_user_data_pack(self,
+ GINT_TO_POINTER(up),
+ g_steal_pointer(&previous_members_strv)));
+}
+
+/*****************************************************************************/
+
+static void
+_reconfigure_do(NMBondManager *self,
+ gboolean reapply,
+ const char *bond_ifname,
+ const char **active_members_take)
+{
+ nm_auto_str_buf NMStrBuf strbuf = NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE);
+ gs_free const char **active_members = g_steal_pointer(&active_members_take);
+
+ _nm_assert_self(self);
+ nm_assert(!active_members || bond_ifname);
+ nm_assert(!active_members || active_members[0]);
+
+ /* The difficulty of all of this is "state". In particular, since we make the nft call
+ * async, we need to handle all the possible cases, how an update event can invalidate
+ * a currently pending call. */
+
+ switch (self->reg_state) {
+ case REGISTRATION_STATE_NONE:
+ nm_assert(!self->dat.bond_ifname_curr);
+ nm_assert(!self->dat.active_members_curr);
+ nm_assert(!self->dat.bond_ifname_next);
+ nm_assert(!self->dat.active_members_next);
+ nm_assert(!self->cancellable);
+ nm_assert(!self->nft_in_progress);
+
+ if (!bond_ifname) {
+ /* No configuration done. Nothing to do. */
+ goto out;
+ }
+
+ _LOGT("reconfigure: start configuring (%s)",
+ _log_info(&strbuf, bond_ifname, active_members, NULL));
+ self->dat.bond_ifname_curr = g_strdup(bond_ifname);
+ self->dat.active_members_curr = nm_strv_dup_packed(active_members, -1);
+ _nft_call(self, TRUE, self->dat.bond_ifname_curr, NULL, self->dat.active_members_curr);
+ self->reg_state = REGISTRATION_STATE_UPPING;
+ goto out;
+ case REGISTRATION_STATE_UPPING:
+ nm_assert(self->dat.bond_ifname_curr);
+ nm_assert(self->nft_in_progress);
+
+ /* We are UPPING, we cancel the pending operation and will
+ * handle the rest when the callback completes. */
+ if (!bond_ifname) {
+ if (self->cancellable || self->dat.bond_ifname_next)
+ _LOGT("reconfigure: aborting configuring");
+ nm_clear_g_free(&self->dat.bond_ifname_next);
+ nm_clear_g_free(&self->dat.active_members_next);
+ nm_clear_g_cancellable(&self->cancellable);
+ goto out;
+ }
+ if (!reapply && self->cancellable && nm_streq0(bond_ifname, self->dat.bond_ifname_curr)
+ && nm_strv_equal(active_members, self->dat.active_members_curr)) {
+ /* Nothing to do. We are already upping this setup. */
+ nm_assert(!self->dat.bond_ifname_next);
+ nm_assert(!self->dat.active_members_next);
+ goto out;
+ }
+ if (!reapply && !self->cancellable && nm_streq0(bond_ifname, self->dat.bond_ifname_next)
+ && nm_strv_equal(active_members, self->dat.active_members_next)) {
+ /* We already cancelled the current upping, and have scheduled another
+ * (identical) run. Nothing to do. */
+ goto out;
+ }
+ _LOGT("reconfigure: abort configuring to configure %s",
+ _log_info(&strbuf, bond_ifname, active_members, NULL));
+ nm_clear_g_free(&self->dat.bond_ifname_next);
+ nm_clear_g_free(&self->dat.active_members_next);
+ self->dat.bond_ifname_next = g_strdup(bond_ifname);
+ self->dat.active_members_next = nm_strv_dup_packed(active_members, -1);
+ nm_clear_g_cancellable(&self->cancellable);
+ goto out;
+ case REGISTRATION_STATE_UP:
+ nm_assert(self->dat.bond_ifname_curr);
+ nm_assert(!self->dat.bond_ifname_next);
+ nm_assert(!self->dat.active_members_next);
+ nm_assert(!self->cancellable);
+ nm_assert(!self->nft_in_progress);
+
+ if (!bond_ifname) {
+ _LOGT("reconfigure: deconfigure to disable");
+ _nft_call(self, FALSE, self->dat.bond_ifname_curr, NULL, NULL);
+ self->reg_state = REGISTRATION_STATE_DOWNING;
+ goto out;
+ }
+ if (!reapply && nm_streq0(bond_ifname, self->dat.bond_ifname_curr)
+ && nm_strv_equal(active_members, self->dat.active_members_curr)) {
+ /* Nothing to do. The current configuration is already active. */
+ goto out;
+ }
+ _LOGT("reconfigure: configure, update to %s",
+ _log_info(&strbuf, bond_ifname, active_members, NULL));
+ _nft_call(self,
+ TRUE,
+ bond_ifname,
+ NM_MAKE_STRV(self->dat.bond_ifname_curr),
+ active_members);
+ self->reg_state = REGISTRATION_STATE_UPPING;
+ nm_clear_g_free(&self->dat.bond_ifname_curr);
+ nm_clear_g_free(&self->dat.active_members_curr);
+ self->dat.bond_ifname_curr = g_strdup(bond_ifname);
+ self->dat.active_members_curr = nm_strv_dup_packed(active_members, -1);
+ goto out;
+ case REGISTRATION_STATE_DOWNING:
+ nm_assert(self->dat.bond_ifname_curr);
+ nm_assert(self->nft_in_progress);
+
+ /* we are already DOWNING. It suffices to clear the scheduled "next"
+ * config and wait, and reset the "next" configuration. */
+ if (nm_streq0(bond_ifname, self->dat.bond_ifname_next)
+ && nm_strv_equal(active_members, self->dat.active_members_next)) {
+ /* Nothing to do. */
+ goto out;
+ }
+ _LOGT("reconfigure: deconfiguring and waiting for %s",
+ _log_info(&strbuf, bond_ifname, active_members, NULL));
+ nm_clear_g_free(&self->dat.bond_ifname_next);
+ nm_clear_g_free(&self->dat.active_members_next);
+ if (bond_ifname) {
+ self->dat.bond_ifname_next = g_strdup(bond_ifname);
+ self->dat.active_members_next = nm_strv_dup_packed(active_members, -1);
+ }
+ goto out;
+ }
+ nm_assert_not_reached();
+
+out:
+ _notify_state_change(self);
+}
+
+static void
+_reconfigure_check(NMBondManager *self, gboolean reapply)
+{
+ const NMPlatformLink *plink_ctrl;
+ const NMPlatformLink *plink_port;
+ const NMPlatformLnkBond *plnkbond_ctrl;
+ NMDedupMultiIter pliter;
+ const NMDedupMultiHeadEntry *pl_links_head_entry;
+ const char *active_members_lst_stack[16];
+ gs_free const char **active_members_lst_heap = NULL;
+ const char **active_members_lst = active_members_lst_stack;
+ gsize active_members_alloc = G_N_ELEMENTS(active_members_lst_stack);
+ gsize active_members_n = 0;
+ gs_free const char **active_members_result = NULL;
+ const char *bond_ifname = NULL;
+
+ _nm_assert_self(self);
+ nm_assert(!self->destroyed);
+
+ self->reconfigure_check = FALSE;
+ nm_clear_g_source_inst(&self->reconfigure_on_idle_source);
+
+ g_hash_table_remove_all(self->previous_ifindexes);
+
+ plnkbond_ctrl = nm_platform_link_get_lnk_bond(self->platform, self->ifindex, &plink_ctrl);
+
+ /* We only do bonding-slb MLAG handling if our ifindex is a bond with
+ * mode=balance-xor && xmit_hash_policy=vlan+srcmac. */
+ if (!plnkbond_ctrl)
+ goto out;
+ if (!plink_ctrl)
+ goto out;
+ if (plink_ctrl->type != NM_LINK_TYPE_BOND)
+ goto out;
+ if (plnkbond_ctrl->mode != NM_BOND_MODE_XOR)
+ goto out;
+ if (plnkbond_ctrl->xmit_hash_policy != NM_BOND_XMIT_HASH_POLICY_VLAN_SRCMAC)
+ goto out;
+
+ /* Find all the connected ports that are IFF_RUNNING. */
+ pl_links_head_entry = nm_platform_lookup_obj_type(self->platform, NMP_OBJECT_TYPE_LINK);
+ nmp_cache_iter_for_each_link (&pliter, pl_links_head_entry, &plink_port) {
+ if (plink_port->master != self->ifindex)
+ continue;
+ if (!NM_FLAGS_HAS(plink_port->n_ifi_flags, IFF_RUNNING))
+ continue;
+
+ g_hash_table_add(self->previous_ifindexes, GINT_TO_POINTER(plink_port->ifindex));
+
+ if (active_members_n == active_members_alloc) {
+ active_members_alloc *= 2;
+ active_members_lst_heap =
+ g_renew(const char *, active_members_lst_heap, active_members_alloc);
+ if (active_members_lst == active_members_lst_stack) {
+ memcpy(active_members_lst_heap,
+ active_members_lst_stack,
+ sizeof(const char *) * active_members_n);
+ }
+ active_members_lst = active_members_lst_heap;
+ }
+
+ active_members_lst[active_members_n++] = plink_port->name;
+ }
+
+ if (active_members_n > 0) {
+ gsize i;
+ gsize j;
+
+ /* We sort the active members by name */
+ g_qsort_with_data(active_members_lst,
+ active_members_n,
+ sizeof(const char *),
+ nm_strcmp_p_with_data,
+ NULL);
+
+ /* There really shouldn't be any duplicates. Nonetheless, check
+ * and drop them. They must be unique, because nm_firewall_nft_stdio_mlag()
+ * relies on that. */
+ for (j = 1, i = 1; i < active_members_n; i++) {
+ if (nm_streq(active_members_lst[j - 1], active_members_lst[i])) {
+ /* Repeated. Skip. */
+ continue;
+ }
+ if (j != i)
+ active_members_lst[j] = active_members_lst[i];
+ j++;
+ }
+ active_members_n = j;
+
+ active_members_result = g_new(const char *, active_members_n + 1u);
+ j = 0;
+
+ if (self->dat.active_members_curr) {
+ /* We configured a list earlier. We want to preserve the sort order
+ * from before. Prefer entries that we already had, in their previous
+ * order. */
+ for (i = 0; self->dat.active_members_curr[i]; i++) {
+ gssize idx;
+
+ /* We cannot use binary search, because we steal the elements we found
+ * already. Hence this is O(n^2). We could use binary search if we would
+ * not modify active_members_lst, but then we would need to remember
+ * somehow which elements are already consumed. */
+ idx = nm_strv_find_first(active_members_lst,
+ active_members_n,
+ self->dat.active_members_curr[i]);
+ if (idx >= 0)
+ active_members_result[j++] = g_steal_pointer(&active_members_lst[idx]);
+ }
+ }
+
+ /* append the remaining entries, which are sorted by name. */
+ for (i = 0; i < active_members_n; i++) {
+ if (active_members_lst[i])
+ active_members_result[j++] = active_members_lst[i];
+ }
+
+ nm_assert(j == active_members_n);
+ active_members_result[j] = NULL;
+ }
+
+ bond_ifname = plink_ctrl->name;
+
+out:
+ _reconfigure_do(self, reapply, bond_ifname, g_steal_pointer(&active_members_result));
+}
+
+static gboolean
+_reconfigure_check_on_idle_cb(gpointer user_data)
+{
+ NMBondManager *self = user_data;
+
+ nm_assert(!self->nft_in_progress);
+ _reconfigure_check(self, FALSE);
+ return G_SOURCE_CONTINUE;
+}
+
+/*****************************************************************************/
+
+static void
+_link_changed_cb(NMPlatform *platform,
+ int obj_type_i,
+ int ifindex,
+ const NMPlatformLink *plink,
+ int change_type_i,
+ NMBondManager *self)
+{
+ if (self->reconfigure_check) {
+ /* Recheck already scheduled. */
+ return;
+ }
+
+ if (self->destroyed) {
+ /* We should not get another event at this point. Anyway, ignore. */
+ return;
+ }
+
+ if (ifindex == self->ifindex)
+ goto schedule;
+
+ if (plink->master == self->ifindex)
+ goto schedule;
+
+ if (g_hash_table_contains(self->previous_ifindexes, GINT_TO_POINTER(ifindex)))
+ goto schedule;
+
+ /* This event is not relevant. Skip. */
+ return;
+
+schedule:
+ self->reconfigure_check = TRUE;
+ if (!self->nft_in_progress) {
+ self->reconfigure_on_idle_source =
+ nm_g_idle_add_source(_reconfigure_check_on_idle_cb, self);
+ }
+}
+
+/*****************************************************************************/
+
+void
+nm_bond_manager_reapply(NMBondManager *self)
+{
+ _reconfigure_check(self, TRUE);
+}
+
+/*****************************************************************************/
+
+int
+nm_bond_manager_get_ifindex(NMBondManager *self)
+{
+ nm_assert(NM_IS_BOND_MANAGER(self));
+
+ return self->ifindex;
+}
+
+const char *
+nm_bond_manager_get_connection_uuid(NMBondManager *self)
+{
+ nm_assert(NM_IS_BOND_MANAGER(self));
+
+ return self->connection_uuid;
+}
+
+NMOptionBool
+nm_bond_manager_get_state(NMBondManager *self)
+{
+ nm_assert(NM_IS_BOND_MANAGER(self));
+
+ return self->state;
+}
+
+/*****************************************************************************/
+
+NMBondManager *
+nm_bond_manager_new(struct _NMPlatform *platform,
+ int ifindex,
+ const char *connection_uuid,
+ NMBondManagerCallback callback,
+ gpointer user_data)
+{
+ NMBondManager *self;
+
+ nm_assert(NM_IS_PLATFORM(platform));
+ nm_assert(ifindex > 0);
+
+ self = g_slice_new(NMBondManager);
+ *self = (NMBondManager){
+ .platform = g_object_ref(platform),
+ .ifindex = ifindex,
+ .reg_state = REGISTRATION_STATE_NONE,
+ .destroyed = FALSE,
+ .nft_good = TRUE,
+ .callback = callback,
+ .user_data = user_data,
+ .previous_ifindexes = g_hash_table_new(nm_direct_hash, NULL),
+ .previous_members = g_hash_table_new_full(nm_str_hash, g_str_equal, g_free, NULL),
+ .connection_uuid = g_strdup(connection_uuid),
+ .state = NM_OPTION_BOOL_DEFAULT,
+ };
+
+ self->link_changed_id = g_signal_connect(self->platform,
+ NM_PLATFORM_SIGNAL_LINK_CHANGED,
+ G_CALLBACK(_link_changed_cb),
+ self);
+
+ _LOGT("new balance-slb (MLAG) manager for interface %d", self->ifindex);
+
+ _reconfigure_check(self, TRUE);
+
+ return self;
+}
+
+void
+nm_bond_manager_destroy(NMBondManager *self)
+{
+ g_return_if_fail(self);
+ g_return_if_fail(!self->destroyed);
+
+ self->destroyed = TRUE;
+
+ self->callback = NULL;
+ self->user_data = NULL;
+
+ nm_clear_g_signal_handler(self->platform, &self->link_changed_id);
+
+ nm_clear_g_source_inst(&self->reconfigure_on_idle_source);
+ self->reconfigure_check = FALSE;
+
+ nm_clear_g_free(&self->dat.bond_ifname_next);
+ nm_clear_g_free(&self->dat.active_members_next);
+
+ switch (self->reg_state) {
+ case REGISTRATION_STATE_NONE:
+ break;
+ case REGISTRATION_STATE_UPPING:
+ /* We still have some nfts registered. We need to wrap them up. */
+ _LOGT("destroying but deconfigure pending configuration first");
+ nm_clear_g_free(&self->dat.bond_ifname_next);
+ nm_clear_g_free(&self->dat.active_members_next);
+ nm_clear_g_cancellable(&self->cancellable);
+ return;
+ case REGISTRATION_STATE_UP:
+ _LOGT("destroying but deconfigure first");
+ _nft_call(self, FALSE, self->dat.bond_ifname_curr, NULL, NULL);
+ self->reg_state = REGISTRATION_STATE_DOWNING;
+ return;
+ case REGISTRATION_STATE_DOWNING:
+ _LOGT("destroying but wait for deconfiguring");
+ return;
+ }
+
+ _bond_manager_destroy(self);
+}
+
+static void
+_bond_manager_destroy(NMBondManager *self)
+{
+ _LOGT("destroyed");
+
+ nm_assert(self);
+ nm_assert(self->destroyed);
+ nm_assert(self->reg_state == REGISTRATION_STATE_NONE);
+ nm_assert(self->link_changed_id == 0);
+ nm_assert(!self->cancellable);
+ nm_assert(!self->dat.bond_ifname_curr);
+ nm_assert(!self->dat.active_members_curr);
+ nm_assert(!self->reconfigure_on_idle_source);
+
+ nm_clear_g_free(&self->dat.bond_ifname_next);
+ nm_clear_g_free(&self->dat.active_members_next);
+
+ g_object_unref(self->platform);
+ g_hash_table_unref(self->previous_ifindexes);
+ g_hash_table_unref(self->previous_members);
+ g_free(self->connection_uuid);
+ nm_g_slice_free(self);
+}
diff --git a/src/core/nm-bond-manager.h b/src/core/nm-bond-manager.h
new file mode 100644
index 0000000000..92a89f0b92
--- /dev/null
+++ b/src/core/nm-bond-manager.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#ifndef __NM_BOND_MANAGER_H__
+#define __NM_BOND_MANAGER_H__
+
+typedef struct _NMBondManager NMBondManager;
+
+struct _NMPlatform;
+
+typedef enum {
+ NM_BOND_MANAGER_EVENT_TYPE_STATE,
+} NMBondManagerEventType;
+
+typedef void (*NMBondManagerCallback)(NMBondManager *self,
+ NMBondManagerEventType event_type,
+ gpointer user_data);
+
+NMBondManager *nm_bond_manager_new(struct _NMPlatform *platform,
+ int ifindex,
+ const char *connection_uuid,
+ NMBondManagerCallback callback,
+ gpointer user_data);
+
+void nm_bond_manager_reapply(NMBondManager *self);
+
+void nm_bond_manager_destroy(NMBondManager *self);
+
+int nm_bond_manager_get_ifindex(NMBondManager *self);
+const char *nm_bond_manager_get_connection_uuid(NMBondManager *self);
+NMOptionBool nm_bond_manager_get_state(NMBondManager *self);
+
+#endif /* __NM_BOND_MANAGER_H__ */
diff --git a/src/core/nm-firewall-utils.c b/src/core/nm-firewall-utils.c
index 92c9fd814e..7b5d2f47b6 100644
--- a/src/core/nm-firewall-utils.c
+++ b/src/core/nm-firewall-utils.c
@@ -39,6 +39,71 @@ static const struct {
/*****************************************************************************/
+static const char *
+_nft_ifname_valid(const char *str)
+{
+ gsize i;
+
+ /* `nft -f -` takes certain strings, like "device $IFNAME", but
+ * those strings are from a limited character set. Check that
+ * @str is valid according to those rules.
+ *
+ * src/scanner.l:
+ * digit [0-9]
+ * letter [a-zA-Z]
+ * string ({letter}|[_.])({letter}|{digit}|[/\-_\.])*
+ **/
+
+ if (!str || !str[0])
+ return NULL;
+
+ for (i = 0; str[i]; i++) {
+ switch (str[i]) {
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '_':
+ case '.':
+ continue;
+ case '0' ... '9':
+ case '/':
+ case '-':
+ if (i == 0)
+ return NULL;
+ continue;
+ default:
+ return NULL;
+ }
+ }
+ if (i >= NMP_IFNAMSIZ)
+ return NULL;
+
+ return str;
+}
+
+static const char *
+_strbuf_set_sanitized(NMStrBuf *strbuf, const char *prefix, const char *str_to_sanitize)
+{
+ nm_str_buf_reset(strbuf);
+
+ if (prefix)
+ nm_str_buf_append(strbuf, prefix);
+
+ for (; str_to_sanitize[0] != '\0'; str_to_sanitize++) {
+ const char ch = str_to_sanitize[0];
+
+ if (g_ascii_isalpha(ch) || g_ascii_isdigit(ch)) {
+ nm_str_buf_append_c(strbuf, ch);
+ continue;
+ }
+ nm_str_buf_append_c(strbuf, '_');
+ nm_str_buf_append_c_hex(strbuf, ch, FALSE);
+ }
+
+ return nm_str_buf_get_str(strbuf);
+}
+
+/*****************************************************************************/
+
#define _SHARE_IPTABLES_SUBNET_TO_STR_LEN (INET_ADDRSTRLEN + 1 + 2 + 1)
static const char *
@@ -701,6 +766,189 @@ _fw_nft_set_shared_construct(gboolean up, const char *ip_iface, in_addr_t addr,
/*****************************************************************************/
+GBytes *
+nm_firewall_nft_stdio_mlag(gboolean up,
+ const char *bond_ifname,
+ const char *const *bond_ifnames_down,
+ const char *const *active_members,
+ const char *const *previous_members)
+{
+ nm_auto_str_buf NMStrBuf strbuf_table_name =
+ NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_32, FALSE);
+ nm_auto_str_buf NMStrBuf strbuf = NM_STR_BUF_INIT(NM_UTILS_GET_NEXT_REALLOC_SIZE_1000, FALSE);
+ const char *table_name;
+ gsize i;
+
+ if (NM_MORE_ASSERTS > 10 && active_members) {
+ /* No duplicates. We make certain assumptions here, and we don't
+ * want to check that there are no duplicates. The caller must take
+ * care of this. */
+ for (i = 0; active_members[i]; i++)
+ nm_assert(!nm_strv_contains(&active_members[i + 1], -1, active_members[i]));
+ }
+
+ /* If an interface gets renamed, we need to update the nft tables. Since one nft
+ * invocation is atomic, it is reasonable to drop the previous tables(s) at the
+ * same time when creating the new one. */
+ for (; bond_ifnames_down && bond_ifnames_down[0]; bond_ifnames_down++) {
+ if (nm_streq(bond_ifname, bond_ifnames_down[0]))
+ continue;
+ table_name = _strbuf_set_sanitized(&strbuf_table_name, "nm-mlag-", bond_ifnames_down[0]);
+ _fw_nft_append_cmd_table(&strbuf, "netdev", table_name, FALSE);
+ }
+
+ table_name = _strbuf_set_sanitized(&strbuf_table_name, "nm-mlag-", bond_ifname);
+
+ _fw_nft_append_cmd_table(&strbuf, "netdev", table_name, up);
+
+ if (up) {
+ nm_auto_str_buf NMStrBuf strbuf_1 =
+ NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE);
+ const gsize n_active_members = NM_PTRARRAY_LEN(active_members);
+
+ if (!_nft_ifname_valid(bond_ifname)) {
+ /* We cannot meaningfully express this interface name. Ignore all chains
+ * and only create an empty table. */
+ goto out;
+ }
+
+ for (; previous_members && previous_members[0]; previous_members++) {
+ const char *previous_member = previous_members[0];
+ const char *chain_name;
+
+ /* The caller already ensures that the previous member is not part of the new
+ * active members. Avoid the overhead of checking, and assert against that. */
+ nm_assert(!nm_strv_contains(active_members, n_active_members, previous_member));
+
+ if (!_nft_ifname_valid(previous_member))
+ continue;
+
+ chain_name = _strbuf_set_sanitized(&strbuf_1, "rx-drop-bc-mc-", previous_member);
+
+ /* We want atomically update our table, however, we don't want to delete
+ * and recreate it, because then the sets get lost (which we don't want).
+ *
+ * Instead, we only "add && flush" the table, which removes all rules from
+ * the chain. However, as our active-members change, we want to delete
+ * the obsolete chains too.
+ *
+ * nft has no way to delete all chains in a table, we have to name
+ * them one by one. So we keep track of active members that we had
+ * in the past, and which are now no longer in use. For those previous
+ * members we delete the chains (again, with the "add && delete" dance
+ * to avoid failure deleting a non-existing chain (in case our tracking
+ * is wrong or somebody else modified the table in the meantime).
+ *
+ * We need to track the previous members, because we don't want to first
+ * ask nft which chains exist. Doing that would be cumbersome as we would
+ * have to do one async program invocation and parse stdout. */
+ _append(&strbuf,
+ "add chain netdev %s %s {"
+ " type filter hook ingress device %s priority filter; "
+ "}",
+ table_name,
+ chain_name,
+ previous_member);
+ _append(&strbuf, "delete chain netdev %s %s", table_name, chain_name);
+ }
+
+ /* OVS SLB rule 1
+ *
+ * "Open vSwitch avoids packet duplication by accepting multicast and broadcast
+ * packets on only the active member, and dropping multicast and broadcast
+ * packets on all other members."
+ *
+ * primary is first member, we drop on all others */
+ for (i = 0; i < n_active_members; i++) {
+ const char *active_member = active_members[i];
+ const char *chain_name;
+
+ if (!_nft_ifname_valid(active_member))
+ continue;
+
+ chain_name = _strbuf_set_sanitized(&strbuf_1, "rx-drop-bc-mc-", active_member);
+
+ _append(&strbuf,
+ "add chain netdev %s %s {"
+ " type filter hook ingress device %s priority filter; "
+ "}",
+ table_name,
+ chain_name,
+ active_member);
+
+ if (i == 0) {
+ _append(&strbuf, "delete chain netdev %s %s", table_name, chain_name);
+ continue;
+ }
+
+ _append(&strbuf,
+ "add rule netdev %s %s pkttype {"
+ " broadcast, multicast "
+ "} counter drop",
+ table_name,
+ chain_name);
+ }
+
+ /* OVS SLB rule 2
+ *
+ * "Open vSwitch deals with this case by dropping packets received on any SLB
+ * bonded link that have a source MAC+VLAN that has been learned on any other
+ * port."
+ */
+ _append(&strbuf,
+ "add set netdev %s macset-tagged {"
+ " typeof ether saddr . vlan id; flags timeout; "
+ "}",
+ table_name);
+ _append(&strbuf,
+ "add set netdev %s macset-untagged {"
+ " typeof ether saddr; flags timeout;"
+ "}",
+ table_name);
+
+ _append(&strbuf,
+ "add chain netdev %s tx-snoop-source-mac {"
+ " type filter hook egress device %s priority filter; "
+ "}",
+ table_name,
+ bond_ifname);
+ _append(&strbuf,
+ "add rule netdev %s tx-snoop-source-mac set update ether saddr . vlan id"
+ " timeout 5s @macset-tagged counter return"
+ "", /* tagged */
+ table_name);
+ _append(&strbuf,
+ "add rule netdev %s tx-snoop-source-mac set update ether saddr"
+ " timeout 5s @macset-untagged counter"
+ "", /* untagged*/
+ table_name);
+
+ _append(&strbuf,
+ "add chain netdev %s rx-drop-looped-packets {"
+ " type filter hook ingress device %s priority filter; "
+ "}",
+ table_name,
+ bond_ifname);
+ _append(&strbuf,
+ "add rule netdev %s rx-drop-looped-packets ether saddr . vlan id"
+ " @macset-tagged counter drop",
+ table_name);
+ _append(&strbuf,
+ "add rule netdev %s rx-drop-looped-packets ether type vlan counter return"
+ "", /* avoid looking up tagged packets in untagged table */
+ table_name);
+ _append(&strbuf,
+ "add rule netdev %s rx-drop-looped-packets ether saddr @macset-untagged"
+ " counter drop",
+ table_name);
+ }
+
+out:
+ return nm_str_buf_finalize_to_gbytes(&strbuf);
+}
+
+/*****************************************************************************/
+
struct _NMFirewallConfig {
char *ip_iface;
in_addr_t addr;
diff --git a/src/core/nm-firewall-utils.h b/src/core/nm-firewall-utils.h
index 9d883fea7b..ca138ccf78 100644
--- a/src/core/nm-firewall-utils.h
+++ b/src/core/nm-firewall-utils.h
@@ -35,4 +35,10 @@ void nm_firewall_nft_call(GBytes *stdin_buf,
gboolean nm_firewall_nft_call_finish(GAsyncResult *result, GError **error);
+GBytes *nm_firewall_nft_stdio_mlag(gboolean up,
+ const char *bond_ifname,
+ const char *const *bond_ifnames_down,
+ const char *const *active_members,
+ const char *const *previous_members);
+
#endif /* __NM_FIREWALL_UTILS_H__ */
diff --git a/src/libnm-core-impl/nm-setting-bond.c b/src/libnm-core-impl/nm-setting-bond.c
index a7f64393b9..b03cc455a7 100644
--- a/src/libnm-core-impl/nm-setting-bond.c
+++ b/src/libnm-core-impl/nm-setting-bond.c
@@ -70,6 +70,7 @@ static const char *const valid_options_lst[] = {
NM_SETTING_BOND_OPTION_ARP_INTERVAL,
NM_SETTING_BOND_OPTION_ARP_IP_TARGET,
NM_SETTING_BOND_OPTION_ARP_VALIDATE,
+ NM_SETTING_BOND_OPTION_BALANCE_SLB,
NM_SETTING_BOND_OPTION_PRIMARY,
NM_SETTING_BOND_OPTION_PRIMARY_RESELECT,
NM_SETTING_BOND_OPTION_FAIL_OVER_MAC,
@@ -195,6 +196,7 @@ static NM_UTILS_STRING_TABLE_LOOKUP_STRUCT_DEFINE(
{NM_SETTING_BOND_OPTION_ARP_IP_TARGET, {"", NM_BOND_OPTION_TYPE_IP}},
{NM_SETTING_BOND_OPTION_ARP_VALIDATE,
{"none", NM_BOND_OPTION_TYPE_BOTH, 0, 6, _option_default_strv_arp_validate}},
+ {NM_SETTING_BOND_OPTION_BALANCE_SLB, {"0", NM_BOND_OPTION_TYPE_INT, 0, 1}},
{NM_SETTING_BOND_OPTION_DOWNDELAY, {"0", NM_BOND_OPTION_TYPE_INT, 0, G_MAXINT}},
{NM_SETTING_BOND_OPTION_FAIL_OVER_MAC,
{"none", NM_BOND_OPTION_TYPE_BOTH, 0, 2, _option_default_strv_fail_over_mac}},
@@ -344,6 +346,17 @@ _bond_get_option_normalized(NMSettingBond *self, const char *option, gboolean ge
value = _bond_get_option(self, NM_SETTING_BOND_OPTION_PRIMARY);
if (!value)
value = _bond_get_option(self, NM_SETTING_BOND_OPTION_ACTIVE_SLAVE);
+ } else if (nm_streq(option, NM_SETTING_BOND_OPTION_XMIT_HASH_POLICY)) {
+ if (_nm_utils_ascii_str_to_int64(
+ _bond_get_option(self, NM_SETTING_BOND_OPTION_BALANCE_SLB),
+ 10,
+ 0,
+ 1,
+ -1)
+ == 1) {
+ /* balance-slb implies vlan+srcmac */
+ return "5";
+ }
} else
value = _bond_get_option(self, option);
@@ -840,6 +853,7 @@ verify(NMSetting *setting, NMConnection *connection, GError **error)
const char *arp_ip_target = NULL;
const char *lacp_rate;
const char *primary;
+ const char *s;
NMBondMode bond_mode;
guint i;
const NMUtilsNamedValue *n;
@@ -1067,6 +1081,32 @@ verify(NMSetting *setting, NMConnection *connection, GError **error)
return FALSE;
}
+ s = _bond_get_option(self, NM_SETTING_BOND_OPTION_BALANCE_SLB);
+ if (s && _atoi(s) > 0) {
+ if (bond_mode != NM_BOND_MODE_XOR) {
+ g_set_error(error,
+ NM_CONNECTION_ERROR,
+ NM_CONNECTION_ERROR_INVALID_PROPERTY,
+ _("%s requires bond mode \"%s\""),
+ NM_SETTING_BOND_OPTION_BALANCE_SLB,
+ "balance-xor");
+ g_prefix_error(error, "%s.%s: ", NM_SETTING_BOND_SETTING_NAME, NM_SETTING_BOND_OPTIONS);
+ return FALSE;
+ }
+ s = _bond_get_option(self, NM_SETTING_BOND_OPTION_XMIT_HASH_POLICY);
+ if (s
+ && _nm_setting_bond_xmit_hash_policy_from_string(s)
+ != NM_BOND_XMIT_HASH_POLICY_VLAN_SRCMAC) {
+ g_set_error(error,
+ NM_CONNECTION_ERROR,
+ NM_CONNECTION_ERROR_INVALID_PROPERTY,
+ _("%s requires xmit_hash_policy \"vlan+srcmac\""),
+ NM_SETTING_BOND_OPTION_BALANCE_SLB);
+ g_prefix_error(error, "%s.%s: ", NM_SETTING_BOND_SETTING_NAME, NM_SETTING_BOND_OPTIONS);
+ return FALSE;
+ }
+ }
+
if (!_nm_connection_verify_required_interface_name(connection, error))
return FALSE;
diff --git a/src/libnm-core-public/nm-setting-bond.h b/src/libnm-core-public/nm-setting-bond.h
index ed44abbff1..10d703bcaa 100644
--- a/src/libnm-core-public/nm-setting-bond.h
+++ b/src/libnm-core-public/nm-setting-bond.h
@@ -37,6 +37,7 @@ G_BEGIN_DECLS
#define NM_SETTING_BOND_OPTION_ARP_IP_TARGET "arp_ip_target"
#define NM_SETTING_BOND_OPTION_ARP_VALIDATE "arp_validate"
#define NM_SETTING_BOND_OPTION_PRIMARY "primary"
+#define NM_SETTING_BOND_OPTION_BALANCE_SLB "balance-slb"
#define NM_SETTING_BOND_OPTION_PRIMARY_RESELECT "primary_reselect"
#define NM_SETTING_BOND_OPTION_FAIL_OVER_MAC "fail_over_mac"
#define NM_SETTING_BOND_OPTION_USE_CARRIER "use_carrier"