summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu Watanabe <watanabe.yu+github@gmail.com>2021-11-08 06:34:43 +0900
committerYu Watanabe <watanabe.yu+github@gmail.com>2021-12-04 15:06:58 +0900
commitb90d0f83b20f54fd52e9f932b5ce24871a9e7dcb (patch)
treef6dafb427699c222933e64ad696a930601703287
parenta2bf1a61bccc41753f582a27ab84a6cc6e710ee6 (diff)
downloadsystemd-b90d0f83b20f54fd52e9f932b5ce24871a9e7dcb.tar.gz
network/netdev: add support to create IPoIB subinterface
-rw-r--r--man/systemd.netdev.xml46
-rw-r--r--man/systemd.network.xml5
-rw-r--r--src/network/meson.build2
-rw-r--r--src/network/netdev/ipoib.c119
-rw-r--r--src/network/netdev/ipoib.h28
-rw-r--r--src/network/netdev/netdev-gperf.gperf4
-rw-r--r--src/network/netdev/netdev.c45
-rw-r--r--src/network/netdev/netdev.h4
-rw-r--r--src/network/netdev/veth.c2
-rw-r--r--src/network/networkd-network-gperf.gperf1
-rw-r--r--src/network/networkd-network.c1
-rw-r--r--test/fuzz/fuzz-netdev-parser/directives.netdev4
-rw-r--r--test/fuzz/fuzz-network-parser/directives.network1
13 files changed, 248 insertions, 14 deletions
diff --git a/man/systemd.netdev.xml b/man/systemd.netdev.xml
index e4e7e611e7..255b85f404 100644
--- a/man/systemd.netdev.xml
+++ b/man/systemd.netdev.xml
@@ -189,6 +189,9 @@
<row><entry><varname>batadv</varname></entry>
<entry><ulink url="https://www.open-mesh.org/projects/open-mesh/wiki">B.A.T.M.A.N. Advanced</ulink> is a routing protocol for multi-hop mobile ad-hoc networks which operates on layer 2.</entry></row>
+
+ <row><entry><varname>ipoib</varname></entry>
+ <entry>An IP over Infiniband subinterface.</entry></row>
</tbody>
</tgroup>
</table>
@@ -2126,6 +2129,49 @@
</refsect1>
<refsect1>
+ <title>[IPoIB] Section Options</title>
+ <para>The [IPoIB] section only applies for netdevs of kind <literal>ipoib</literal> and accepts the
+ following keys:</para>
+
+ <variablelist class='network-directives'>
+ <varlistentry>
+ <term><varname>PartitionKey=</varname></term>
+ <listitem>
+ <para>Takes an integer in the range 1…0xffff, except for 0x8000. Defaults to unset, and the
+ kernel's default is used.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id='ipoib_mode'>
+ <term><varname>Mode=</varname></term>
+ <listitem>
+ <para>Takes one of the special values <literal>datagram</literal> or
+ <literal>connected</literal>. Defaults to unset, and the kernel's default is used.</para>
+
+ <para>When <literal>datagram</literal>, the Infiniband unreliable datagram (UD) transport is
+ used, and so the interface MTU is equal to the IB L2 MTU minus the IPoIB encapsulation
+ header (4 bytes). For example, in a typical IB fabric with a 2K MTU, the IPoIB MTU will be
+ 2048 - 4 = 2044 bytes.</para>
+
+ <para>When <literal>connected</literal>, the Infiniband reliable connected (RC) transport is
+ used. Connected mode takes advantage of the connected nature of the IB transport and allows
+ an MTU up to the maximal IP packet size of 64K, which reduces the number of IP packets needed
+ for handling large UDP datagrams, TCP segments, etc and increases the performance for large
+ messages.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id='ipoib_umcast'>
+ <term><varname>IgnoreUserspaceMulticastGroup=</varname></term>
+ <listitem>
+ <para>Takes an boolean value. When true, the kernel ignores multicast groups handled by
+ userspace. Defaults to unset, and the kernel's default is used.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
<title>Examples</title>
<example>
<title>/etc/systemd/network/25-bridge.netdev</title>
diff --git a/man/systemd.network.xml b/man/systemd.network.xml
index 50367ecdcd..1de7bb0538 100644
--- a/man/systemd.network.xml
+++ b/man/systemd.network.xml
@@ -902,6 +902,7 @@ Table=1234</programlisting></para>
</listitem>
</varlistentry>
<varlistentry>
+ <term><varname>IPoIB=</varname></term>
<term><varname>IPVLAN=</varname></term>
<term><varname>IPVTAP=</varname></term>
<term><varname>L2TP=</varname></term>
@@ -913,8 +914,8 @@ Table=1234</programlisting></para>
<term><varname>VXLAN=</varname></term>
<term><varname>Xfrm=</varname></term>
<listitem>
- <para>The name of an IPVLAN, IPVTAP, L2TP, MACsec, MACVLAN, MACVTAP, tunnel, VLAN, VXLAN, or
- Xfrm to be created on the link. See
+ <para>The name of an IPoIB, IPVLAN, IPVTAP, L2TP, MACsec, MACVLAN, MACVTAP, tunnel, VLAN,
+ VXLAN, or Xfrm to be created on the link. See
<citerefentry><refentrytitle>systemd.netdev</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
This option may be specified more than once.</para>
</listitem>
diff --git a/src/network/meson.build b/src/network/meson.build
index cfa16a8ecf..c1cf227ffc 100644
--- a/src/network/meson.build
+++ b/src/network/meson.build
@@ -13,6 +13,8 @@ sources = files('''
netdev/dummy.h
netdev/ifb.c
netdev/ifb.h
+ netdev/ipoib.c
+ netdev/ipoib.h
netdev/ipvlan.c
netdev/ipvlan.h
netdev/macvlan.c
diff --git a/src/network/netdev/ipoib.c b/src/network/netdev/ipoib.c
new file mode 100644
index 0000000000..b341001bc4
--- /dev/null
+++ b/src/network/netdev/ipoib.c
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/if_arp.h>
+#include <linux/if_link.h>
+
+#include "ipoib.h"
+#include "parse-util.h"
+#include "string-table.h"
+
+assert_cc((int) IP_OVER_INFINIBAND_MODE_DATAGRAM == (int) IPOIB_MODE_DATAGRAM);
+assert_cc((int) IP_OVER_INFINIBAND_MODE_CONNECTED == (int) IPOIB_MODE_CONNECTED);
+
+static void netdev_ipoib_init(NetDev *netdev) {
+ IPoIB *ipoib;
+
+ assert(netdev);
+
+ ipoib = IPOIB(netdev);
+
+ assert(ipoib);
+
+ ipoib->mode = _IP_OVER_INFINIBAND_MODE_INVALID;
+ ipoib->umcast = -1;
+}
+
+static int netdev_ipoib_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ IPoIB *ipoib;
+ int r;
+
+ assert(netdev);
+ assert(link);
+ assert(m);
+
+ ipoib = IPOIB(netdev);
+
+ assert(ipoib);
+
+ if (ipoib->pkey > 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_IPOIB_PKEY, ipoib->pkey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_PKEY attribute: %m");
+ }
+
+ if (ipoib->mode >= 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_IPOIB_MODE, ipoib->mode);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_MODE attribute: %m");
+ }
+
+ if (ipoib->umcast >= 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_IPOIB_UMCAST, ipoib->umcast);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_UMCAST attribute: %m");
+ }
+
+ return 0;
+}
+
+static const char * const ipoib_mode_table[_IP_OVER_INFINIBAND_MODE_MAX] = {
+ [IP_OVER_INFINIBAND_MODE_DATAGRAM] = "datagram",
+ [IP_OVER_INFINIBAND_MODE_CONNECTED] = "connected",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(ipoib_mode, IPoIBMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ipoib_mode, ipoib_mode, IPoIBMode, "Failed to parse IPoIB mode");
+
+int config_parse_ipoib_pkey(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t u, *pkey = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *pkey = 0; /* 0 means unset. */
+ return 0;
+ }
+
+ r = safe_atou16(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse IPoIB pkey '%s', ignoring assignment: %m",
+ rvalue);
+ return 0;
+ }
+ if (u == 0 || u == 0x8000) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "IPoIB pkey cannot be 0 nor 0x8000, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ *pkey = u;
+ return 0;
+}
+
+
+const NetDevVTable ipoib_vtable = {
+ .object_size = sizeof(IPoIB),
+ .sections = NETDEV_COMMON_SECTIONS "IPoIB\0",
+ .init = netdev_ipoib_init,
+ .fill_message_create = netdev_ipoib_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .iftype = ARPHRD_INFINIBAND,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/ipoib.h b/src/network/netdev/ipoib.h
new file mode 100644
index 0000000000..d2f5d9350f
--- /dev/null
+++ b/src/network/netdev/ipoib.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+
+#include "conf-parser.h"
+#include "netdev.h"
+
+typedef enum IPoIBMode {
+ IP_OVER_INFINIBAND_MODE_DATAGRAM,
+ IP_OVER_INFINIBAND_MODE_CONNECTED,
+ _IP_OVER_INFINIBAND_MODE_MAX,
+ _IP_OVER_INFINIBAND_MODE_INVALID = -EINVAL,
+} IPoIBMode;
+
+typedef struct IPoIB {
+ NetDev meta;
+
+ uint16_t pkey;
+ IPoIBMode mode;
+ int umcast;
+} IPoIB;
+
+DEFINE_NETDEV_CAST(IPOIB, IPoIB);
+extern const NetDevVTable ipoib_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ipoib_pkey);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipoib_mode);
diff --git a/src/network/netdev/netdev-gperf.gperf b/src/network/netdev/netdev-gperf.gperf
index 37a0d9fa5d..a948ec2c8a 100644
--- a/src/network/netdev/netdev-gperf.gperf
+++ b/src/network/netdev/netdev-gperf.gperf
@@ -11,6 +11,7 @@ _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
#include "conf-parser.h"
#include "fou-tunnel.h"
#include "geneve.h"
+#include "ipoib.h"
#include "ipvlan.h"
#include "l2tp-tunnel.h"
#include "macsec.h"
@@ -253,3 +254,6 @@ BatmanAdvanced.GatewayBandwidthUp, config_parse_badadv_bandwidth,
BatmanAdvanced.HopPenalty, config_parse_uint8, 0, offsetof(BatmanAdvanced, hop_penalty)
BatmanAdvanced.OriginatorIntervalSec, config_parse_sec, 0, offsetof(BatmanAdvanced, originator_interval)
BatmanAdvanced.RoutingAlgorithm, config_parse_batadv_routing_algorithm, 0, offsetof(BatmanAdvanced, routing_algorithm)
+IPoIB.PartitionKey, config_parse_ipoib_pkey, 0, offsetof(IPoIB, pkey)
+IPoIB.Mode, config_parse_ipoib_mode, 0, offsetof(IPoIB, mode)
+IPoIB.IgnoreUserspaceMulticastGroups, config_parse_tristate, 0, offsetof(IPoIB, umcast)
diff --git a/src/network/netdev/netdev.c b/src/network/netdev/netdev.c
index 6c6b4c3068..f673292594 100644
--- a/src/network/netdev/netdev.c
+++ b/src/network/netdev/netdev.c
@@ -18,6 +18,7 @@
#include "fou-tunnel.h"
#include "geneve.h"
#include "ifb.h"
+#include "ipoib.h"
#include "ipvlan.h"
#include "l2tp-tunnel.h"
#include "list.h"
@@ -64,6 +65,7 @@ const NetDevVTable * const netdev_vtable[_NETDEV_KIND_MAX] = {
[NETDEV_KIND_IP6GRETAP] = &ip6gretap_vtable,
[NETDEV_KIND_IP6TNL] = &ip6tnl_vtable,
[NETDEV_KIND_IPIP] = &ipip_vtable,
+ [NETDEV_KIND_IPOIB] = &ipoib_vtable,
[NETDEV_KIND_IPVLAN] = &ipvlan_vtable,
[NETDEV_KIND_IPVTAP] = &ipvtap_vtable,
[NETDEV_KIND_L2TP] = &l2tptnl_vtable,
@@ -103,6 +105,7 @@ static const char* const netdev_kind_table[_NETDEV_KIND_MAX] = {
[NETDEV_KIND_IP6GRETAP] = "ip6gretap",
[NETDEV_KIND_IP6TNL] = "ip6tnl",
[NETDEV_KIND_IPIP] = "ipip",
+ [NETDEV_KIND_IPOIB] = "ipoib",
[NETDEV_KIND_IPVLAN] = "ipvlan",
[NETDEV_KIND_IPVTAP] = "ipvtap",
[NETDEV_KIND_L2TP] = "l2tp",
@@ -393,6 +396,7 @@ int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *message) {
int netdev_generate_hw_addr(
NetDev *netdev,
+ Link *parent,
const char *name,
const struct hw_addr_data *hw_addr,
struct hw_addr_data *ret) {
@@ -419,7 +423,7 @@ int netdev_generate_hw_addr(
if (!NETDEV_VTABLE(netdev)->generate_mac)
goto finalize;
- if (NETDEV_VTABLE(netdev)->iftype != ARPHRD_ETHER)
+ if (!IN_SET(NETDEV_VTABLE(netdev)->iftype, ARPHRD_ETHER, ARPHRD_INFINIBAND))
goto finalize;
r = net_get_unique_predictable_data_from_name(name, &HASH_KEY, &result);
@@ -430,21 +434,42 @@ int netdev_generate_hw_addr(
}
a.length = arphrd_to_hw_addr_len(NETDEV_VTABLE(netdev)->iftype);
- assert(a.length <= sizeof(result));
- memcpy(a.bytes, &result, a.length);
- if (ether_addr_is_null(&a.ether) || ether_addr_is_broadcast(&a.ether)) {
- log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
- "Failed to generate persistent MAC address, ignoring: %m");
- a = HW_ADDR_NULL;
- goto finalize;
+ switch (NETDEV_VTABLE(netdev)->iftype) {
+ case ARPHRD_ETHER:
+ assert(a.length <= sizeof(result));
+ memcpy(a.bytes, &result, a.length);
+
+ if (ether_addr_is_null(&a.ether) || ether_addr_is_broadcast(&a.ether)) {
+ log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "Failed to generate persistent MAC address, ignoring: %m");
+ a = HW_ADDR_NULL;
+ goto finalize;
+ }
+
+ break;
+ case ARPHRD_INFINIBAND:
+ if (result == 0) {
+ log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "Failed to generate persistent MAC address: %m");
+ goto finalize;
+ }
+
+ assert(a.length >= sizeof(result));
+ memzero(a.bytes, a.length - sizeof(result));
+ memcpy(a.bytes + a.length - sizeof(result), &result, sizeof(result));
+ break;
+ default:
+ assert_not_reached();
}
+
} else {
a = *hw_addr;
warn_invalid = true;
}
- r = net_verify_hardware_address(name, warn_invalid, NETDEV_VTABLE(netdev)->iftype, NULL, &a);
+ r = net_verify_hardware_address(name, warn_invalid, NETDEV_VTABLE(netdev)->iftype,
+ parent ? &parent->hw_addr : NULL, &a);
if (r < 0)
return r;
@@ -481,7 +506,7 @@ static int netdev_create(NetDev *netdev, Link *link, link_netlink_message_handle
if (r < 0)
return log_netdev_error_errno(netdev, r, "Could not append IFLA_IFNAME, attribute: %m");
- r = netdev_generate_hw_addr(netdev, netdev->ifname, &netdev->hw_addr, &hw_addr);
+ r = netdev_generate_hw_addr(netdev, link, netdev->ifname, &netdev->hw_addr, &hw_addr);
if (r < 0)
return r;
diff --git a/src/network/netdev/netdev.h b/src/network/netdev/netdev.h
index b226cf20a7..c7262f550a 100644
--- a/src/network/netdev/netdev.h
+++ b/src/network/netdev/netdev.h
@@ -22,6 +22,7 @@
"-Bridge\0" \
"-FooOverUDP\0" \
"-GENEVE\0" \
+ "-IPoIB\0" \
"-IPVLAN\0" \
"-IPVTAP\0" \
"-L2TP\0" \
@@ -60,6 +61,7 @@ typedef enum NetDevKind {
NETDEV_KIND_IP6GRETAP,
NETDEV_KIND_IP6TNL,
NETDEV_KIND_IPIP,
+ NETDEV_KIND_IPOIB,
NETDEV_KIND_IPVLAN,
NETDEV_KIND_IPVTAP,
NETDEV_KIND_L2TP,
@@ -201,7 +203,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(NetDev*, netdev_unref);
bool netdev_is_managed(NetDev *netdev);
int netdev_get(Manager *manager, const char *name, NetDev **ret);
int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *newlink);
-int netdev_generate_hw_addr(NetDev *netdev, const char *name,
+int netdev_generate_hw_addr(NetDev *netdev, Link *link, const char *name,
const struct hw_addr_data *hw_addr, struct hw_addr_data *ret);
int netdev_join(NetDev *netdev, Link *link, link_netlink_message_handler_t cb);
diff --git a/src/network/netdev/veth.c b/src/network/netdev/veth.c
index 5dd8586a3a..c946e81fc0 100644
--- a/src/network/netdev/veth.c
+++ b/src/network/netdev/veth.c
@@ -32,7 +32,7 @@ static int netdev_veth_fill_message_create(NetDev *netdev, Link *link, sd_netlin
return log_netdev_error_errno(netdev, r, "Failed to add netlink interface name: %m");
}
- r = netdev_generate_hw_addr(netdev, v->ifname_peer, &v->hw_addr_peer, &hw_addr);
+ r = netdev_generate_hw_addr(netdev, NULL, v->ifname_peer, &v->hw_addr_peer, &hw_addr);
if (r < 0)
return r;
diff --git a/src/network/networkd-network-gperf.gperf b/src/network/networkd-network-gperf.gperf
index df9721a9bc..4ac58a26ad 100644
--- a/src/network/networkd-network-gperf.gperf
+++ b/src/network/networkd-network-gperf.gperf
@@ -87,6 +87,7 @@ Network.BatmanAdvanced, config_parse_ifname,
Network.Bond, config_parse_ifname, 0, offsetof(Network, bond_name)
Network.Bridge, config_parse_ifname, 0, offsetof(Network, bridge_name)
Network.VRF, config_parse_ifname, 0, offsetof(Network, vrf_name)
+Network.IPoIB, config_parse_stacked_netdev, NETDEV_KIND_IPOIB, offsetof(Network, stacked_netdev_names)
Network.IPVLAN, config_parse_stacked_netdev, NETDEV_KIND_IPVLAN, offsetof(Network, stacked_netdev_names)
Network.IPVTAP, config_parse_stacked_netdev, NETDEV_KIND_IPVTAP, offsetof(Network, stacked_netdev_names)
Network.L2TP, config_parse_stacked_netdev, NETDEV_KIND_L2TP, offsetof(Network, stacked_netdev_names)
diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c
index 443222f610..7640429f46 100644
--- a/src/network/networkd-network.c
+++ b/src/network/networkd-network.c
@@ -852,6 +852,7 @@ int config_parse_stacked_netdev(
assert(rvalue);
assert(data);
assert(IN_SET(kind,
+ NETDEV_KIND_IPOIB,
NETDEV_KIND_IPVLAN,
NETDEV_KIND_IPVTAP,
NETDEV_KIND_L2TP,
diff --git a/test/fuzz/fuzz-netdev-parser/directives.netdev b/test/fuzz/fuzz-netdev-parser/directives.netdev
index e34d16af11..f5fa2418fe 100644
--- a/test/fuzz/fuzz-netdev-parser/directives.netdev
+++ b/test/fuzz/fuzz-netdev-parser/directives.netdev
@@ -241,3 +241,7 @@ GatewayBandwithUp=
GatewayBandwidthDown=
GatewayBandwidthUp=
RoutingAlgorithm=
+[IPoIB]
+PartitionKey=
+Mode=
+IgnoreUserspaceMulticastGroups=
diff --git a/test/fuzz/fuzz-network-parser/directives.network b/test/fuzz/fuzz-network-parser/directives.network
index 5b5a4f8c60..68cf1ba691 100644
--- a/test/fuzz/fuzz-network-parser/directives.network
+++ b/test/fuzz/fuzz-network-parser/directives.network
@@ -242,6 +242,7 @@ IgnoreCarrierLoss=
KeepConfiguration=
DHCPv6PrefixDelegation=
BatmanAdvanced=
+IPoIB=
[IPv6Prefix]
Prefix=
OnLink=